From 3fab0721a01002e6df67b7e5d999b1e306265202 Mon Sep 17 00:00:00 2001 From: Joshua Drake Date: Sat, 17 Sep 2022 13:46:26 -0500 Subject: Simplified code and added basic functionality for word document processing. --- FBGM.py | 81 ++++++++++++++++++++++++++++++++--------------------------------- 1 file changed, 40 insertions(+), 41 deletions(-) (limited to 'FBGM.py') diff --git a/FBGM.py b/FBGM.py index 4f2a8cd..349693e 100644 --- a/FBGM.py +++ b/FBGM.py @@ -1,7 +1,29 @@ +from email import header from ftplib import parse150 import os, sys, docx from string import punctuation +def html_file_generator(path, page_header, page_contents): + filename = path[path.rfind('/')+1: path.rfind('.')] + + if os.path.exists(filename+".html") == True: + os.remove(filename+".html") + + htmlfile = open(filename+".html", "x") + htmlfile.write('' +'' + '' + '' +filename+ '' + '' + '' + '' + '' + '' + '

' +page_header+ '

') + htmlfile.write(page_contents) + htmlfile.write('' + '') + #Text file handler def paragraph_parser(contents): @@ -57,10 +79,21 @@ def paragraph_parser(contents): #Document file Handler def dochandler(path): doc = docx.Document(path) - fullText = [] + header_styles= ["Subtitle","Heading 1", "Heading 2"] + paragraph_style= ["Normal","No Spacing"] + title = (path[path.rfind('/')+1: path.rfind('.')]).capitalize() + if doc.paragraphs[0].style.name == "Title": + title = doc.paragraphs[0].text + fullText = "" for para in doc.paragraphs: - fullText.append(para.text) + if para.style.name in header_styles: + fullText+="

"+para.text + fullText+="

\n" + elif para.style.name in paragraph_style: + fullText+="

"+para.text + fullText+="

\n" print(para.text, para.style.name) + html_file_generator(path, title, fullText) return '\n'.join(path) @@ -68,35 +101,12 @@ def dochandler(path): -def txthandler(path, htmlfile): - - filename = path[path.rfind('/')+1: path.rfind('.')] +def txthandler(path): f = open(path, "r") contents = f.read() - title = filename main_header = contents[0:contents.find('\n')] paragraphs = paragraph_parser(contents[contents.find('\n'):]) - htmlfile.write('' -'' - '' - '' +title+ '' - '' - '' - '' - '' - '' - '

' +main_header+ '

' - '
' - '

  Introduction  

' - '

''

' - '
') - htmlfile.write(paragraphs) - - - - - htmlfile.write('' - '') + html_file_generator(path,main_header,paragraphs) print(f.read()) return 0 @@ -108,25 +118,14 @@ def pdfhandler(path): #Attempting to pass txt file in path -path = 'C:/Code/texttohtml/ThisOne.docx' -#path = 'C:/Code/texttohtml/memes.txt' - +#path = 'C:/Code/texttohtml/ThisOne.docx' +path = 'C:/Users/Josh/Desktop/porkandbeans/texttohtml/memes.docx' extension = path[path.rfind('.')+1: len(path)] -filename = path[path.rfind('/')+1: path.rfind('.')] - - -if os.path.exists(filename+".html") == True: - os.remove(filename+".html") - -htmlfile = open(filename+".html", "x") - - - if extension =='txt': - txthandler(path, htmlfile) + txthandler(path) elif extension == 'pdf': pdfhandler(path) -- cgit v1.2.3