diff options
| author | Joshua Drake <joshua.ellis.drake@gmail.com> | 2022-10-23 00:22:13 -0500 | 
|---|---|---|
| committer | Joshua Drake <joshua.ellis.drake@gmail.com> | 2022-10-23 00:22:13 -0500 | 
| commit | 5789a123268db253f75836def35df3b54529bc88 (patch) | |
| tree | 68e38221d2a0c08526b714c7b73c9f4c1673a96f /FBGM.py | |
| parent | 0a086697e3266ce68bee1b3628d7e7de95620f15 (diff) | |
Diffstat (limited to 'FBGM.py')
| -rw-r--r-- | FBGM.py | 13 | 
1 files changed, 9 insertions, 4 deletions
| @@ -4,8 +4,14 @@ import os, sys, docx, PyPDF2  from string import punctuation  from PyPDF2 import PdfReader +def process_text(string): +    string = remove_non_ascii(string) +    string = remove_line_break(string) +    return string  def remove_non_ascii(string):      return string.encode('ascii', errors='xmlcharrefreplace').decode() +def remove_line_break(string): +    return string.replace("- ","")  def html_file_generator(path, page_header, page_contents):      filename = path[path.rfind('/')+1: path.rfind('.')] @@ -24,7 +30,7 @@ def html_file_generator(path, page_header, page_contents):       '</head>'       '<body>'          '<h1>' +page_header+ '</h1>') -    htmlfile.write(remove_non_ascii(page_contents)) +    htmlfile.write(process_text(page_contents))      htmlfile.write('</body>'      '</html>')      htmlfile.close() @@ -99,7 +105,6 @@ def dochandler(path):          elif para.style.name in paragraph_style:              fullText+="<p>"+para.text              fullText+="</p>\n" -        print(para.text, para.style.name)      html_file_generator(path, title, fullText)      return '\n'.join(path) @@ -139,8 +144,8 @@ def pdfhandler(path):  #Attempting to pass txt file in path  -path = 'C:/Code/texttohtml/kac.pdf' -#path = 'C:/Users/Josh/Desktop/porkandbeans/texttohtml/memes.docx' +#path = 'C:/Code/texttohtml/kac.pdf' +path = 'C:/Users/Josh/Desktop/porkandbeans/texttohtml/memes.docx'  extension = path[path.rfind('.')+1: len(path)] | 
