diff options
| -rw-r--r-- | FBGM.py | 81 | 
1 files changed, 40 insertions, 41 deletions
| @@ -1,7 +1,29 @@ +from email import header  from ftplib import parse150  import os, sys, docx  from string import punctuation +def html_file_generator(path, page_header, page_contents): +    filename = path[path.rfind('/')+1: path.rfind('.')] + +    if os.path.exists(filename+".html") == True: +        os.remove(filename+".html") + +    htmlfile = open(filename+".html", "x") +    htmlfile.write('<!DOCTYPE html>' +'<html lang ="en">' +    '<head>' +        '<title>' +filename+ '</title>' +        '<link rel="stylesheet" type="text/css" href="stylesheet.css">' +        '<link rel="icon" type="image/x-icon" href="images/favicon.ico">' +        '<meta charset="utf-8"/>' +     '</head>' +     '<body>' +        '<h1>' +page_header+ '</h1>') +    htmlfile.write(page_contents) +    htmlfile.write('</body>' +    '</html>') +  #Text file handler  def paragraph_parser(contents): @@ -57,10 +79,21 @@ def paragraph_parser(contents):  #Document file Handler  def dochandler(path):      doc = docx.Document(path) -    fullText = [] +    header_styles= ["Subtitle","Heading 1", "Heading 2"] +    paragraph_style= ["Normal","No Spacing"] +    title = (path[path.rfind('/')+1: path.rfind('.')]).capitalize() +    if doc.paragraphs[0].style.name == "Title": +        title = doc.paragraphs[0].text +    fullText = ""      for para in doc.paragraphs: -        fullText.append(para.text) +        if para.style.name in header_styles: +            fullText+="<h2>"+para.text +            fullText+="</h2>\n" +        elif para.style.name in paragraph_style: +            fullText+="<p>"+para.text +            fullText+="</p>\n"          print(para.text, para.style.name) +    html_file_generator(path, title, fullText)      return '\n'.join(path) @@ -68,35 +101,12 @@ def dochandler(path): -def txthandler(path, htmlfile): - -    filename = path[path.rfind('/')+1: path.rfind('.')] +def txthandler(path):      f = open(path, "r")      contents = f.read() -    title = filename      main_header = contents[0:contents.find('\n')]      paragraphs = paragraph_parser(contents[contents.find('\n'):])     -    htmlfile.write('<!DOCTYPE html>' -'<html lang ="en">' -    '<head>' -        '<title>' +title+ '</title>' -        '<link rel="stylesheet" type="text/css" href="stylesheet.css">' -        '<link rel="icon" type="image/x-icon" href="images/favicon.ico">' -        '<meta charset="utf-8"/>' -     '</head>' -     '<body>' -        '<h1>' +main_header+ '</h1>' -     '<article class="introduction">' -		'<h2>  Introduction  </h2>' -        '<p>''</p>' -	 '</article>') -    htmlfile.write(paragraphs) -         - - -      -    htmlfile.write('</body>' -    '</html>') +    html_file_generator(path,main_header,paragraphs)      print(f.read())      return 0 @@ -108,25 +118,14 @@ def pdfhandler(path):  #Attempting to pass txt file in path  -path = 'C:/Code/texttohtml/ThisOne.docx' -#path = 'C:/Code/texttohtml/memes.txt' - +#path = 'C:/Code/texttohtml/ThisOne.docx' +path = 'C:/Users/Josh/Desktop/porkandbeans/texttohtml/memes.docx'  extension = path[path.rfind('.')+1: len(path)] -filename = path[path.rfind('/')+1: path.rfind('.')] - - -if os.path.exists(filename+".html") == True: -    os.remove(filename+".html") - -htmlfile = open(filename+".html", "x") - - -  if extension =='txt': -    txthandler(path, htmlfile) +    txthandler(path)  elif extension == 'pdf':      pdfhandler(path) | 
