summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoshua Drake <joshua.ellis.drake@gmail.com>2022-09-03 16:07:26 -0500
committerJoshua Drake <joshua.ellis.drake@gmail.com>2022-09-03 16:07:26 -0500
commitb8aaee2f4025a0dd807a630708f1570eb9fc0749 (patch)
tree6affad1edb990694e799e7a0f9504aade9b8d46d
parent928f802ca53710ec7215edb2d6c9baf7156a9605 (diff)
Parser works with headers for all paragraphs... not otherwise.
-rw-r--r--FBGM.py53
1 files changed, 38 insertions, 15 deletions
diff --git a/FBGM.py b/FBGM.py
index 150b9d1..69f6e0b 100644
--- a/FBGM.py
+++ b/FBGM.py
@@ -1,37 +1,60 @@
from ftplib import parse150
import os, sys
+from string import punctuation
def paragraph_parser(contents):
- paragraph_end = [".\n", "?\n", "!\n", '"\n', "|*E"]
+ punctuation = [".", "?", "!", '"']
+ paragraph_end = [".\n\n", "?\n\n", "!\n\n", '"\n\n', "|E"]
article_end = '\n'
paragraph_index = [0]
+ article_index = [0]
paragraph_number = 1
- paragraph = []
+ paragraph=[]
+ article=[]
x = 0
- paragraph_count = 0
- article_count = 0
+ output = ''
contents=contents.strip()
- contents+="|*E"
+ contents+="|E"
while x in range (len(paragraph_end)):
if contents.find(paragraph_end[x]) == -1:
paragraph_end.pop(x)
else:
- paragraph_count += contents.count(paragraph_end[x])
- x += 1
- article_count = contents.count(article_end)-paragraph_count
-
+ x += 1
while paragraph_end:
minimum = contents[paragraph_index[paragraph_number-1]:].index(paragraph_end[0]) +paragraph_index[paragraph_number-1]
for x in paragraph_end:
if contents[paragraph_index[paragraph_number-1]:].index(x)+paragraph_index[paragraph_number-1]+1 < minimum:
minimum = contents[paragraph_index[paragraph_number-1]:].index(x)+paragraph_index[paragraph_number-1]
paragraph_index.append(minimum+1)
- paragraph.append(contents[paragraph_index[paragraph_number-1]+1:paragraph_index[paragraph_number]])
+ paragraph.append(contents[paragraph_index[paragraph_number-1]:paragraph_index[paragraph_number]])
for x in paragraph_end:
if contents[paragraph_index[paragraph_number]:].find(x) == -1:
paragraph_end.remove(x)
+ if contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].find(article_end) != -1:
+ first_nl =contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].index(article_end)+paragraph_index[paragraph_number-1]+2
+ print(contents[first_nl-1:first_nl])
+ if contents[first_nl-1:first_nl] not in punctuation:
+ article_index.append(contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].index(article_end)+paragraph_index[paragraph_number-1])
paragraph_number +=1
- return paragraph
+
+ paragraph_number = 0
+ for x in range(1,len(article_index)):
+ if contents[article_index[x-1]:article_index[x]-2].rfind('\n') != -1:
+ article.append(contents[contents[:article_index[x]].rfind('\n')+1:article_index[x]+2])
+ elif x==1:
+ article.append(contents[:article_index[x]+2])
+ output+='<article class="bodysection">'
+ output+="<h2>"+article[x-1]+'</h2>'
+ while paragraph_index[paragraph_number] < article_index[x]:
+ if article_index[x] in range(paragraph_index[paragraph_number],paragraph_index[paragraph_number+1]):
+ paragraph[paragraph_number]=paragraph[paragraph_number][article_index[x]-paragraph_index[paragraph_number]+3:]
+ output+=paragraph[paragraph_number]
+ paragraph_number+=1
+ output+='</article>'
+ #contents=contents[:contents[:x-1].rfind('\n')]+'<article class="bodysection"> '+contents[x:]
+
+ print(output)
+ return output
@@ -43,7 +66,7 @@ def txthandler(path, htmlfile):
contents = f.read()
title = filename
main_header = contents[0:contents.find('\n')]
- paragraph = paragraph_parser(contents[contents.find('\n'):])
+ paragraphs = paragraph_parser(contents[contents.find('\n'):])
htmlfile.write('<!DOCTYPE html>'
'<html lang ="en">'
'<head>'
@@ -58,8 +81,7 @@ def txthandler(path, htmlfile):
'<h2>&nbsp Introduction &nbsp</h2>'
'<p>''</p>'
'</article>')
- for x in paragraph:
- htmlfile.write('<article class="bodyparagraph">' + x + '</article>')
+ htmlfile.write(paragraphs)
@@ -83,7 +105,8 @@ def dochandler(path):
#Attempting to pass txt file in path
-path = 'C:/Users/a big fuck/Documents/beansandtoast.txt'
+#path = 'C:/Users/a big fuck/Documents/beansandtoast.txt'
+path = 'C:/Users/Josh/Desktop/porkandbeans/texttohtml/memes.txt'
extension = path[path.rfind('.')+1: len(path)]