diff options
author | Joshua Drake <joshua.ellis.drake@gmail.com> | 2022-09-17 13:46:26 -0500 |
---|---|---|
committer | Joshua Drake <joshua.ellis.drake@gmail.com> | 2022-09-17 13:46:26 -0500 |
commit | 3fab0721a01002e6df67b7e5d999b1e306265202 (patch) | |
tree | af5f5730ca8cd2397accdd58e74c034b7d195bb8 /FBGM.py | |
parent | 9adca6837c95537a09af07668bccbd06d9f2ca08 (diff) |
Simplified code and added basic functionality for word document processing.
Diffstat (limited to 'FBGM.py')
-rw-r--r-- | FBGM.py | 81 |
1 files changed, 40 insertions, 41 deletions
@@ -1,7 +1,29 @@ +from email import header from ftplib import parse150 import os, sys, docx from string import punctuation +def html_file_generator(path, page_header, page_contents): + filename = path[path.rfind('/')+1: path.rfind('.')] + + if os.path.exists(filename+".html") == True: + os.remove(filename+".html") + + htmlfile = open(filename+".html", "x") + htmlfile.write('<!DOCTYPE html>' +'<html lang ="en">' + '<head>' + '<title>' +filename+ '</title>' + '<link rel="stylesheet" type="text/css" href="stylesheet.css">' + '<link rel="icon" type="image/x-icon" href="images/favicon.ico">' + '<meta charset="utf-8"/>' + '</head>' + '<body>' + '<h1>' +page_header+ '</h1>') + htmlfile.write(page_contents) + htmlfile.write('</body>' + '</html>') + #Text file handler def paragraph_parser(contents): @@ -57,10 +79,21 @@ def paragraph_parser(contents): #Document file Handler def dochandler(path): doc = docx.Document(path) - fullText = [] + header_styles= ["Subtitle","Heading 1", "Heading 2"] + paragraph_style= ["Normal","No Spacing"] + title = (path[path.rfind('/')+1: path.rfind('.')]).capitalize() + if doc.paragraphs[0].style.name == "Title": + title = doc.paragraphs[0].text + fullText = "" for para in doc.paragraphs: - fullText.append(para.text) + if para.style.name in header_styles: + fullText+="<h2>"+para.text + fullText+="</h2>\n" + elif para.style.name in paragraph_style: + fullText+="<p>"+para.text + fullText+="</p>\n" print(para.text, para.style.name) + html_file_generator(path, title, fullText) return '\n'.join(path) @@ -68,35 +101,12 @@ def dochandler(path): -def txthandler(path, htmlfile): - - filename = path[path.rfind('/')+1: path.rfind('.')] +def txthandler(path): f = open(path, "r") contents = f.read() - title = filename main_header = contents[0:contents.find('\n')] paragraphs = paragraph_parser(contents[contents.find('\n'):]) - htmlfile.write('<!DOCTYPE html>' -'<html lang ="en">' - '<head>' - '<title>' +title+ '</title>' - '<link rel="stylesheet" type="text/css" href="stylesheet.css">' - '<link rel="icon" type="image/x-icon" href="images/favicon.ico">' - '<meta charset="utf-8"/>' - '</head>' - '<body>' - '<h1>' +main_header+ '</h1>' - '<article class="introduction">' - '<h2>  Introduction  </h2>' - '<p>''</p>' - '</article>') - htmlfile.write(paragraphs) - - - - - htmlfile.write('</body>' - '</html>') + html_file_generator(path,main_header,paragraphs) print(f.read()) return 0 @@ -108,25 +118,14 @@ def pdfhandler(path): #Attempting to pass txt file in path -path = 'C:/Code/texttohtml/ThisOne.docx' -#path = 'C:/Code/texttohtml/memes.txt' - +#path = 'C:/Code/texttohtml/ThisOne.docx' +path = 'C:/Users/Josh/Desktop/porkandbeans/texttohtml/memes.docx' extension = path[path.rfind('.')+1: len(path)] -filename = path[path.rfind('/')+1: path.rfind('.')] - - -if os.path.exists(filename+".html") == True: - os.remove(filename+".html") - -htmlfile = open(filename+".html", "x") - - - if extension =='txt': - txthandler(path, htmlfile) + txthandler(path) elif extension == 'pdf': pdfhandler(path) |