diff options
author | Daniel Behmer <behmer_d@yahoo.com> | 2022-09-17 08:59:27 -0700 |
---|---|---|
committer | Daniel Behmer <behmer_d@yahoo.com> | 2022-09-17 08:59:27 -0700 |
commit | 9adca6837c95537a09af07668bccbd06d9f2ca08 (patch) | |
tree | 9927ea51f124af4f8ef1eb8ca8057e9114c5f5c0 | |
parent | 906b98f25dd62ee5afbe80b0d07e91270aad24ab (diff) |
Added sample document file and proper docx library
-rw-r--r-- | FBGM.py | 27 |
1 files changed, 17 insertions, 10 deletions
@@ -1,7 +1,9 @@ from ftplib import parse150 -import os, sys +import os, sys, docx from string import punctuation + +#Text file handler def paragraph_parser(contents): punctuation = [".", "?", "!", '"'] paragraph_end = [".\n\n", "?\n\n", "!\n\n", '"\n\n', "|E"] @@ -49,8 +51,19 @@ def paragraph_parser(contents): paragraph[paragraph_number]="<h2>"+article[article_index.index(x)-1]+"</h2>\n<p>"+paragraph[paragraph_number][len(article[article_index.index(x)-1]):] print("Header "+str(x)+" is in paragraph "+str(paragraph_number)) paragraph[-1] = paragraph[-1][:-5]+paragraph[-1][-4:] - print(paragraph[-1]) return ''.join(paragraph) + + +#Document file Handler +def dochandler(path): + doc = docx.Document(path) + fullText = [] + for para in doc.paragraphs: + fullText.append(para.text) + print(para.text, para.style.name) + return '\n'.join(path) + + @@ -93,16 +106,10 @@ def pdfhandler(path): return 0 -def dochandler(path): - f = open(path) - print(f.read()) - return 0 - - #Attempting to pass txt file in path -#path = 'C:/Users/a big fuck/Documents/beansandtoast.txt' -path = 'C:/Code/texttohtml/memes.txt' +path = 'C:/Code/texttohtml/ThisOne.docx' +#path = 'C:/Code/texttohtml/memes.txt' extension = path[path.rfind('.')+1: len(path)] |