Parser works with headers for all paragraphs... not otherwise.

author: Joshua Drake <joshua.ellis.drake@gmail.com> 2022-09-03 16:07:26 -0500
committer: Joshua Drake <joshua.ellis.drake@gmail.com> 2022-09-03 16:07:26 -0500
commit: b8aaee2f4025a0dd807a630708f1570eb9fc0749 (patch)
tree: 6affad1edb990694e799e7a0f9504aade9b8d46d /FBGM.py
parent: 928f802ca53710ec7215edb2d6c9baf7156a9605 (diff)
1 files changed, 38 insertions, 15 deletions
diff --git a/FBGM.py b/FBGM.py
index 150b9d1..69f6e0b 100644
--- a/FBGM.py
+++ b/FBGM.py
@@ -1,37 +1,60 @@
 from ftplib import parse150
 import os, sys
+from string import punctuation
 
 def paragraph_parser(contents):
-        paragraph_end = [".\n", "?\n", "!\n", '"\n', "|*E"]
+        punctuation = [".", "?", "!", '"']
+        paragraph_end = [".\n\n", "?\n\n", "!\n\n", '"\n\n', "|E"]
         article_end = '\n'
         paragraph_index = [0]
+        article_index = [0]
         paragraph_number = 1
-        paragraph = []
+        paragraph=[]
+        article=[]
         x = 0
-        paragraph_count = 0
-        article_count = 0
+        output = ''
         contents=contents.strip()
-        contents+="|*E"
+        contents+="|E"
         while x in range (len(paragraph_end)):
             if contents.find(paragraph_end[x]) == -1:
                 paragraph_end.pop(x)
             else:
-                paragraph_count += contents.count(paragraph_end[x])
-                x += 1
-        article_count = contents.count(article_end)-paragraph_count
-               
+                x += 1   
         while paragraph_end:
             minimum = contents[paragraph_index[paragraph_number-1]:].index(paragraph_end[0]) +paragraph_index[paragraph_number-1]
             for x in paragraph_end:
                 if contents[paragraph_index[paragraph_number-1]:].index(x)+paragraph_index[paragraph_number-1]+1 < minimum:
                    minimum = contents[paragraph_index[paragraph_number-1]:].index(x)+paragraph_index[paragraph_number-1]
             paragraph_index.append(minimum+1)
-            paragraph.append(contents[paragraph_index[paragraph_number-1]+1:paragraph_index[paragraph_number]])
+            paragraph.append(contents[paragraph_index[paragraph_number-1]:paragraph_index[paragraph_number]])
             for x in paragraph_end:
                 if contents[paragraph_index[paragraph_number]:].find(x) == -1:
                     paragraph_end.remove(x)
+            if contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].find(article_end) != -1:
+                first_nl =contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].index(article_end)+paragraph_index[paragraph_number-1]+2
+                print(contents[first_nl-1:first_nl])
+            if contents[first_nl-1:first_nl] not in punctuation:
+                article_index.append(contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].index(article_end)+paragraph_index[paragraph_number-1])
             paragraph_number +=1
-        return paragraph
+
+        paragraph_number = 0
+        for x in range(1,len(article_index)):
+            if contents[article_index[x-1]:article_index[x]-2].rfind('\n') != -1:
+                article.append(contents[contents[:article_index[x]].rfind('\n')+1:article_index[x]+2])
+            elif x==1:
+                article.append(contents[:article_index[x]+2])
+            output+='<article class="bodysection">'
+            output+="<h2>"+article[x-1]+'</h2>'
+            while paragraph_index[paragraph_number] < article_index[x]:
+                if article_index[x] in range(paragraph_index[paragraph_number],paragraph_index[paragraph_number+1]):
+                    paragraph[paragraph_number]=paragraph[paragraph_number][article_index[x]-paragraph_index[paragraph_number]+3:]
+                output+=paragraph[paragraph_number]
+                paragraph_number+=1
+            output+='</article>'
+            #contents=contents[:contents[:x-1].rfind('\n')]+'<article class="bodysection"> '+contents[x:]
+
+        print(output)
+        return output
             
             
 
@@ -43,7 +66,7 @@ def txthandler(path, htmlfile):
     contents = f.read()
     title = filename
     main_header = contents[0:contents.find('\n')]
-    paragraph = paragraph_parser(contents[contents.find('\n'):])    
+    paragraphs = paragraph_parser(contents[contents.find('\n'):])    
     htmlfile.write('<!DOCTYPE html>'
 '<html lang ="en">'
     '<head>'
@@ -58,8 +81,7 @@ def txthandler(path, htmlfile):
 		'<h2>&nbsp Introduction &nbsp</h2>'
         '<p>''</p>'
 	 '</article>')
-    for x in paragraph:
-        htmlfile.write('<article class="bodyparagraph">' + x + '</article>')
+    htmlfile.write(paragraphs)
         
 
 
@@ -83,7 +105,8 @@ def dochandler(path):
 
 
 #Attempting to pass txt file in path 
-path = 'C:/Users/a big fuck/Documents/beansandtoast.txt'
+#path = 'C:/Users/a big fuck/Documents/beansandtoast.txt'
+path = 'C:/Users/Josh/Desktop/porkandbeans/texttohtml/memes.txt'
 
 
 extension = path[path.rfind('.')+1: len(path)]
author	Joshua Drake <joshua.ellis.drake@gmail.com>	2022-09-03 16:07:26 -0500
committer	Joshua Drake <joshua.ellis.drake@gmail.com>	2022-09-03 16:07:26 -0500
commit	b8aaee2f4025a0dd807a630708f1570eb9fc0749 (patch)
tree	6affad1edb990694e799e7a0f9504aade9b8d46d /FBGM.py
parent	928f802ca53710ec7215edb2d6c9baf7156a9605 (diff)