diff options
author | Joshua Drake <joshua.ellis.drake@gmail.com> | 2022-09-03 16:07:26 -0500 |
---|---|---|
committer | Joshua Drake <joshua.ellis.drake@gmail.com> | 2022-09-03 16:07:26 -0500 |
commit | b8aaee2f4025a0dd807a630708f1570eb9fc0749 (patch) | |
tree | 6affad1edb990694e799e7a0f9504aade9b8d46d /FBGM.py | |
parent | 928f802ca53710ec7215edb2d6c9baf7156a9605 (diff) |
Parser works with headers for all paragraphs... not otherwise.
Diffstat (limited to 'FBGM.py')
-rw-r--r-- | FBGM.py | 53 |
1 files changed, 38 insertions, 15 deletions
@@ -1,37 +1,60 @@ from ftplib import parse150 import os, sys +from string import punctuation def paragraph_parser(contents): - paragraph_end = [".\n", "?\n", "!\n", '"\n', "|*E"] + punctuation = [".", "?", "!", '"'] + paragraph_end = [".\n\n", "?\n\n", "!\n\n", '"\n\n', "|E"] article_end = '\n' paragraph_index = [0] + article_index = [0] paragraph_number = 1 - paragraph = [] + paragraph=[] + article=[] x = 0 - paragraph_count = 0 - article_count = 0 + output = '' contents=contents.strip() - contents+="|*E" + contents+="|E" while x in range (len(paragraph_end)): if contents.find(paragraph_end[x]) == -1: paragraph_end.pop(x) else: - paragraph_count += contents.count(paragraph_end[x]) - x += 1 - article_count = contents.count(article_end)-paragraph_count - + x += 1 while paragraph_end: minimum = contents[paragraph_index[paragraph_number-1]:].index(paragraph_end[0]) +paragraph_index[paragraph_number-1] for x in paragraph_end: if contents[paragraph_index[paragraph_number-1]:].index(x)+paragraph_index[paragraph_number-1]+1 < minimum: minimum = contents[paragraph_index[paragraph_number-1]:].index(x)+paragraph_index[paragraph_number-1] paragraph_index.append(minimum+1) - paragraph.append(contents[paragraph_index[paragraph_number-1]+1:paragraph_index[paragraph_number]]) + paragraph.append(contents[paragraph_index[paragraph_number-1]:paragraph_index[paragraph_number]]) for x in paragraph_end: if contents[paragraph_index[paragraph_number]:].find(x) == -1: paragraph_end.remove(x) + if contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].find(article_end) != -1: + first_nl =contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].index(article_end)+paragraph_index[paragraph_number-1]+2 + print(contents[first_nl-1:first_nl]) + if contents[first_nl-1:first_nl] not in punctuation: + article_index.append(contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].index(article_end)+paragraph_index[paragraph_number-1]) paragraph_number +=1 - return paragraph + + paragraph_number = 0 + for x in range(1,len(article_index)): + if contents[article_index[x-1]:article_index[x]-2].rfind('\n') != -1: + article.append(contents[contents[:article_index[x]].rfind('\n')+1:article_index[x]+2]) + elif x==1: + article.append(contents[:article_index[x]+2]) + output+='<article class="bodysection">' + output+="<h2>"+article[x-1]+'</h2>' + while paragraph_index[paragraph_number] < article_index[x]: + if article_index[x] in range(paragraph_index[paragraph_number],paragraph_index[paragraph_number+1]): + paragraph[paragraph_number]=paragraph[paragraph_number][article_index[x]-paragraph_index[paragraph_number]+3:] + output+=paragraph[paragraph_number] + paragraph_number+=1 + output+='</article>' + #contents=contents[:contents[:x-1].rfind('\n')]+'<article class="bodysection"> '+contents[x:] + + print(output) + return output @@ -43,7 +66,7 @@ def txthandler(path, htmlfile): contents = f.read() title = filename main_header = contents[0:contents.find('\n')] - paragraph = paragraph_parser(contents[contents.find('\n'):]) + paragraphs = paragraph_parser(contents[contents.find('\n'):]) htmlfile.write('<!DOCTYPE html>' '<html lang ="en">' '<head>' @@ -58,8 +81,7 @@ def txthandler(path, htmlfile): '<h2>  Introduction  </h2>' '<p>''</p>' '</article>') - for x in paragraph: - htmlfile.write('<article class="bodyparagraph">' + x + '</article>') + htmlfile.write(paragraphs) @@ -83,7 +105,8 @@ def dochandler(path): #Attempting to pass txt file in path -path = 'C:/Users/a big fuck/Documents/beansandtoast.txt' +#path = 'C:/Users/a big fuck/Documents/beansandtoast.txt' +path = 'C:/Users/Josh/Desktop/porkandbeans/texttohtml/memes.txt' extension = path[path.rfind('.')+1: len(path)] |