From 9adca6837c95537a09af07668bccbd06d9f2ca08 Mon Sep 17 00:00:00 2001 From: Daniel Behmer Date: Sat, 17 Sep 2022 08:59:27 -0700 Subject: Added sample document file and proper docx library --- FBGM.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'FBGM.py') diff --git a/FBGM.py b/FBGM.py index 09821f8..4f2a8cd 100644 --- a/FBGM.py +++ b/FBGM.py @@ -1,7 +1,9 @@ from ftplib import parse150 -import os, sys +import os, sys, docx from string import punctuation + +#Text file handler def paragraph_parser(contents): punctuation = [".", "?", "!", '"'] paragraph_end = [".\n\n", "?\n\n", "!\n\n", '"\n\n', "|E"] @@ -49,8 +51,19 @@ def paragraph_parser(contents): paragraph[paragraph_number]="

"+article[article_index.index(x)-1]+"

\n

"+paragraph[paragraph_number][len(article[article_index.index(x)-1]):] print("Header "+str(x)+" is in paragraph "+str(paragraph_number)) paragraph[-1] = paragraph[-1][:-5]+paragraph[-1][-4:] - print(paragraph[-1]) return ''.join(paragraph) + + +#Document file Handler +def dochandler(path): + doc = docx.Document(path) + fullText = [] + for para in doc.paragraphs: + fullText.append(para.text) + print(para.text, para.style.name) + return '\n'.join(path) + + @@ -93,16 +106,10 @@ def pdfhandler(path): return 0 -def dochandler(path): - f = open(path) - print(f.read()) - return 0 - - #Attempting to pass txt file in path -#path = 'C:/Users/a big fuck/Documents/beansandtoast.txt' -path = 'C:/Code/texttohtml/memes.txt' +path = 'C:/Code/texttohtml/ThisOne.docx' +#path = 'C:/Code/texttohtml/memes.txt' extension = path[path.rfind('.')+1: len(path)] -- cgit v1.2.3