From c23160f81eee39f42ba5315ecfa61c668fe2b969 Mon Sep 17 00:00:00 2001 From: Joshua Drake Date: Sun, 4 Sep 2022 01:18:34 -0500 Subject: Paragraph Parser now works. Edge cases and user errors must now be accounted for. --- FBGM.py | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/FBGM.py b/FBGM.py index 69f6e0b..8c326e5 100644 --- a/FBGM.py +++ b/FBGM.py @@ -12,7 +12,6 @@ def paragraph_parser(contents): paragraph=[] article=[] x = 0 - output = '' contents=contents.strip() contents+="|E" while x in range (len(paragraph_end)): @@ -26,13 +25,11 @@ def paragraph_parser(contents): if contents[paragraph_index[paragraph_number-1]:].index(x)+paragraph_index[paragraph_number-1]+1 < minimum: minimum = contents[paragraph_index[paragraph_number-1]:].index(x)+paragraph_index[paragraph_number-1] paragraph_index.append(minimum+1) - paragraph.append(contents[paragraph_index[paragraph_number-1]:paragraph_index[paragraph_number]]) + paragraph.append(contents[paragraph_index[paragraph_number-1]:paragraph_index[paragraph_number]]+'

') for x in paragraph_end: if contents[paragraph_index[paragraph_number]:].find(x) == -1: paragraph_end.remove(x) - if contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].find(article_end) != -1: - first_nl =contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].index(article_end)+paragraph_index[paragraph_number-1]+2 - print(contents[first_nl-1:first_nl]) + first_nl =contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].index(article_end)+paragraph_index[paragraph_number-1]+2 if contents[first_nl-1:first_nl] not in punctuation: article_index.append(contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].index(article_end)+paragraph_index[paragraph_number-1]) paragraph_number +=1 @@ -42,19 +39,16 @@ def paragraph_parser(contents): if contents[article_index[x-1]:article_index[x]-2].rfind('\n') != -1: article.append(contents[contents[:article_index[x]].rfind('\n')+1:article_index[x]+2]) elif x==1: - article.append(contents[:article_index[x]+2]) - output+='
' - output+="

"+article[x-1]+'

' - while paragraph_index[paragraph_number] < article_index[x]: - if article_index[x] in range(paragraph_index[paragraph_number],paragraph_index[paragraph_number+1]): - paragraph[paragraph_number]=paragraph[paragraph_number][article_index[x]-paragraph_index[paragraph_number]+3:] - output+=paragraph[paragraph_number] - paragraph_number+=1 - output+='
' - #contents=contents[:contents[:x-1].rfind('\n')]+'
'+contents[x:] - - print(output) - return output + article.append(contents[contents[:article_index[x]].rfind('\n')+1:article_index[x]+2]) + for x in article_index[1:]: + while x not in range(paragraph_index[paragraph_number],paragraph_index[paragraph_number+1]): + paragraph[paragraph_number]="

"+paragraph[paragraph_number] + paragraph_number +=1 + paragraph[paragraph_number]=paragraph[paragraph_number].replace("\n"," ") + paragraph[paragraph_number]=paragraph[paragraph_number].strip() + paragraph[paragraph_number]="

"+article[article_index.index(x)-1]+"

\n

"+paragraph[paragraph_number][len(article[article_index.index(x)-1]):] + print("Header "+str(x)+" is in paragraph "+str(paragraph_number)) + return ''.join(paragraph) -- cgit v1.2.3