From b8aaee2f4025a0dd807a630708f1570eb9fc0749 Mon Sep 17 00:00:00 2001 From: Joshua Drake Date: Sat, 3 Sep 2022 16:07:26 -0500 Subject: Parser works with headers for all paragraphs... not otherwise. --- FBGM.py | 53 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/FBGM.py b/FBGM.py index 150b9d1..69f6e0b 100644 --- a/FBGM.py +++ b/FBGM.py @@ -1,37 +1,60 @@ from ftplib import parse150 import os, sys +from string import punctuation def paragraph_parser(contents): - paragraph_end = [".\n", "?\n", "!\n", '"\n', "|*E"] + punctuation = [".", "?", "!", '"'] + paragraph_end = [".\n\n", "?\n\n", "!\n\n", '"\n\n', "|E"] article_end = '\n' paragraph_index = [0] + article_index = [0] paragraph_number = 1 - paragraph = [] + paragraph=[] + article=[] x = 0 - paragraph_count = 0 - article_count = 0 + output = '' contents=contents.strip() - contents+="|*E" + contents+="|E" while x in range (len(paragraph_end)): if contents.find(paragraph_end[x]) == -1: paragraph_end.pop(x) else: - paragraph_count += contents.count(paragraph_end[x]) - x += 1 - article_count = contents.count(article_end)-paragraph_count - + x += 1 while paragraph_end: minimum = contents[paragraph_index[paragraph_number-1]:].index(paragraph_end[0]) +paragraph_index[paragraph_number-1] for x in paragraph_end: if contents[paragraph_index[paragraph_number-1]:].index(x)+paragraph_index[paragraph_number-1]+1 < minimum: minimum = contents[paragraph_index[paragraph_number-1]:].index(x)+paragraph_index[paragraph_number-1] paragraph_index.append(minimum+1) - paragraph.append(contents[paragraph_index[paragraph_number-1]+1:paragraph_index[paragraph_number]]) + paragraph.append(contents[paragraph_index[paragraph_number-1]:paragraph_index[paragraph_number]]) for x in paragraph_end: if contents[paragraph_index[paragraph_number]:].find(x) == -1: paragraph_end.remove(x) + if contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].find(article_end) != -1: + first_nl =contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].index(article_end)+paragraph_index[paragraph_number-1]+2 + print(contents[first_nl-1:first_nl]) + if contents[first_nl-1:first_nl] not in punctuation: + article_index.append(contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].index(article_end)+paragraph_index[paragraph_number-1]) paragraph_number +=1 - return paragraph + + paragraph_number = 0 + for x in range(1,len(article_index)): + if contents[article_index[x-1]:article_index[x]-2].rfind('\n') != -1: + article.append(contents[contents[:article_index[x]].rfind('\n')+1:article_index[x]+2]) + elif x==1: + article.append(contents[:article_index[x]+2]) + output+='
' + output+="

"+article[x-1]+'

' + while paragraph_index[paragraph_number] < article_index[x]: + if article_index[x] in range(paragraph_index[paragraph_number],paragraph_index[paragraph_number+1]): + paragraph[paragraph_number]=paragraph[paragraph_number][article_index[x]-paragraph_index[paragraph_number]+3:] + output+=paragraph[paragraph_number] + paragraph_number+=1 + output+='
' + #contents=contents[:contents[:x-1].rfind('\n')]+'
'+contents[x:] + + print(output) + return output @@ -43,7 +66,7 @@ def txthandler(path, htmlfile): contents = f.read() title = filename main_header = contents[0:contents.find('\n')] - paragraph = paragraph_parser(contents[contents.find('\n'):]) + paragraphs = paragraph_parser(contents[contents.find('\n'):]) htmlfile.write('' '' '' @@ -58,8 +81,7 @@ def txthandler(path, htmlfile): '

  Introduction  

' '

''

' '
') - for x in paragraph: - htmlfile.write('
' + x + '
') + htmlfile.write(paragraphs) @@ -83,7 +105,8 @@ def dochandler(path): #Attempting to pass txt file in path -path = 'C:/Users/a big fuck/Documents/beansandtoast.txt' +#path = 'C:/Users/a big fuck/Documents/beansandtoast.txt' +path = 'C:/Users/Josh/Desktop/porkandbeans/texttohtml/memes.txt' extension = path[path.rfind('.')+1: len(path)] -- cgit v1.2.3