Added Line Break and Ascii handlers.HEAD master

author: Joshua Drake <joshua.ellis.drake@gmail.com> 2022-10-23 00:22:13 -0500
committer: Joshua Drake <joshua.ellis.drake@gmail.com> 2022-10-23 00:22:13 -0500
commit: 5789a123268db253f75836def35df3b54529bc88 (patch)
tree: 68e38221d2a0c08526b714c7b73c9f4c1673a96f /FBGM.py
parent: 0a086697e3266ce68bee1b3628d7e7de95620f15 (diff)
1 files changed, 9 insertions, 4 deletions
diff --git a/FBGM.py b/FBGM.py
index 7344207..3a37c7c 100644
--- a/FBGM.py
+++ b/FBGM.py
@@ -4,8 +4,14 @@ import os, sys, docx, PyPDF2
 from string import punctuation
 from PyPDF2 import PdfReader
 
+def process_text(string):
+    string = remove_non_ascii(string)
+    string = remove_line_break(string)
+    return string
 def remove_non_ascii(string):
     return string.encode('ascii', errors='xmlcharrefreplace').decode()
+def remove_line_break(string):
+    return string.replace("- ","")
 
 def html_file_generator(path, page_header, page_contents):
     filename = path[path.rfind('/')+1: path.rfind('.')]
@@ -24,7 +30,7 @@ def html_file_generator(path, page_header, page_contents):
      '</head>'
      '<body>'
         '<h1>' +page_header+ '</h1>')
-    htmlfile.write(remove_non_ascii(page_contents))
+    htmlfile.write(process_text(page_contents))
     htmlfile.write('</body>'
     '</html>')
     htmlfile.close()
@@ -99,7 +105,6 @@ def dochandler(path):
         elif para.style.name in paragraph_style:
             fullText+="<p>"+para.text
             fullText+="</p>\n"
-        print(para.text, para.style.name)
     html_file_generator(path, title, fullText)
     return '\n'.join(path)
     
@@ -139,8 +144,8 @@ def pdfhandler(path):
 
 
 #Attempting to pass txt file in path 
-path = 'C:/Code/texttohtml/kac.pdf'
-#path = 'C:/Users/Josh/Desktop/porkandbeans/texttohtml/memes.docx'
+#path = 'C:/Code/texttohtml/kac.pdf'
+path = 'C:/Users/Josh/Desktop/porkandbeans/texttohtml/memes.docx'
 
 extension = path[path.rfind('.')+1: len(path)]
author	Joshua Drake <joshua.ellis.drake@gmail.com>	2022-10-23 00:22:13 -0500
committer	Joshua Drake <joshua.ellis.drake@gmail.com>	2022-10-23 00:22:13 -0500
commit	5789a123268db253f75836def35df3b54529bc88 (patch)
tree	68e38221d2a0c08526b714c7b73c9f4c1673a96f /FBGM.py
parent	0a086697e3266ce68bee1b3628d7e7de95620f15 (diff)