Simplified code and added basic functionality for word document processing.

author: Joshua Drake <joshua.ellis.drake@gmail.com> 2022-09-17 13:46:26 -0500
committer: Joshua Drake <joshua.ellis.drake@gmail.com> 2022-09-17 13:46:26 -0500
commit: 3fab0721a01002e6df67b7e5d999b1e306265202 (patch)
tree: af5f5730ca8cd2397accdd58e74c034b7d195bb8 /FBGM.py
parent: 9adca6837c95537a09af07668bccbd06d9f2ca08 (diff)
1 files changed, 40 insertions, 41 deletions
diff --git a/FBGM.py b/FBGM.py
index 4f2a8cd..349693e 100644
--- a/FBGM.py
+++ b/FBGM.py
@@ -1,7 +1,29 @@
+from email import header
 from ftplib import parse150
 import os, sys, docx
 from string import punctuation
 
+def html_file_generator(path, page_header, page_contents):
+    filename = path[path.rfind('/')+1: path.rfind('.')]
+
+    if os.path.exists(filename+".html") == True:
+        os.remove(filename+".html")
+
+    htmlfile = open(filename+".html", "x")
+    htmlfile.write('<!DOCTYPE html>'
+'<html lang ="en">'
+    '<head>'
+        '<title>' +filename+ '</title>'
+        '<link rel="stylesheet" type="text/css" href="stylesheet.css">'
+        '<link rel="icon" type="image/x-icon" href="images/favicon.ico">'
+        '<meta charset="utf-8"/>'
+     '</head>'
+     '<body>'
+        '<h1>' +page_header+ '</h1>')
+    htmlfile.write(page_contents)
+    htmlfile.write('</body>'
+    '</html>')
+
 
 #Text file handler
 def paragraph_parser(contents):
@@ -57,10 +79,21 @@ def paragraph_parser(contents):
 #Document file Handler
 def dochandler(path):
     doc = docx.Document(path)
-    fullText = []
+    header_styles= ["Subtitle","Heading 1", "Heading 2"]
+    paragraph_style= ["Normal","No Spacing"]
+    title = (path[path.rfind('/')+1: path.rfind('.')]).capitalize()
+    if doc.paragraphs[0].style.name == "Title":
+        title = doc.paragraphs[0].text
+    fullText = ""
     for para in doc.paragraphs:
-        fullText.append(para.text)
+        if para.style.name in header_styles:
+            fullText+="<h2>"+para.text
+            fullText+="</h2>\n"
+        elif para.style.name in paragraph_style:
+            fullText+="<p>"+para.text
+            fullText+="</p>\n"
         print(para.text, para.style.name)
+    html_file_generator(path, title, fullText)
     return '\n'.join(path)
     
 
@@ -68,35 +101,12 @@ def dochandler(path):
             
 
 
-def txthandler(path, htmlfile):
-
-    filename = path[path.rfind('/')+1: path.rfind('.')]
+def txthandler(path):
     f = open(path, "r")
     contents = f.read()
-    title = filename
     main_header = contents[0:contents.find('\n')]
     paragraphs = paragraph_parser(contents[contents.find('\n'):])    
-    htmlfile.write('<!DOCTYPE html>'
-'<html lang ="en">'
-    '<head>'
-        '<title>' +title+ '</title>'
-        '<link rel="stylesheet" type="text/css" href="stylesheet.css">'
-        '<link rel="icon" type="image/x-icon" href="images/favicon.ico">'
-        '<meta charset="utf-8"/>'
-     '</head>'
-     '<body>'
-        '<h1>' +main_header+ '</h1>'
-     '<article class="introduction">'
-		'<h2>&nbsp Introduction &nbsp</h2>'
-        '<p>''</p>'
-	 '</article>')
-    htmlfile.write(paragraphs)
-        
-
-
-     
-    htmlfile.write('</body>'
-    '</html>')
+    html_file_generator(path,main_header,paragraphs)
     print(f.read())
     return 0
 
@@ -108,25 +118,14 @@ def pdfhandler(path):
 
 
 #Attempting to pass txt file in path 
-path = 'C:/Code/texttohtml/ThisOne.docx'
-#path = 'C:/Code/texttohtml/memes.txt'
-
+#path = 'C:/Code/texttohtml/ThisOne.docx'
+path = 'C:/Users/Josh/Desktop/porkandbeans/texttohtml/memes.docx'
 
 extension = path[path.rfind('.')+1: len(path)]
-filename = path[path.rfind('/')+1: path.rfind('.')]
-
-
-if os.path.exists(filename+".html") == True:
-    os.remove(filename+".html")
-
-htmlfile = open(filename+".html", "x")
-
-
-
 
 
 if extension =='txt':
-    txthandler(path, htmlfile)
+    txthandler(path)
 
 elif extension == 'pdf':
     pdfhandler(path)
author	Joshua Drake <joshua.ellis.drake@gmail.com>	2022-09-17 13:46:26 -0500
committer	Joshua Drake <joshua.ellis.drake@gmail.com>	2022-09-17 13:46:26 -0500
commit	3fab0721a01002e6df67b7e5d999b1e306265202 (patch)
tree	af5f5730ca8cd2397accdd58e74c034b7d195bb8 /FBGM.py
parent	9adca6837c95537a09af07668bccbd06d9f2ca08 (diff)