summaryrefslogtreecommitdiff
path: root/FBGM.py
diff options
context:
space:
mode:
authorDaniel Behmer <behmer_d@yahoo.com>2022-09-17 08:59:27 -0700
committerDaniel Behmer <behmer_d@yahoo.com>2022-09-17 08:59:27 -0700
commit9adca6837c95537a09af07668bccbd06d9f2ca08 (patch)
tree9927ea51f124af4f8ef1eb8ca8057e9114c5f5c0 /FBGM.py
parent906b98f25dd62ee5afbe80b0d07e91270aad24ab (diff)
Added sample document file and proper docx library
Diffstat (limited to 'FBGM.py')
-rw-r--r--FBGM.py27
1 files changed, 17 insertions, 10 deletions
diff --git a/FBGM.py b/FBGM.py
index 09821f8..4f2a8cd 100644
--- a/FBGM.py
+++ b/FBGM.py
@@ -1,7 +1,9 @@
from ftplib import parse150
-import os, sys
+import os, sys, docx
from string import punctuation
+
+#Text file handler
def paragraph_parser(contents):
punctuation = [".", "?", "!", '"']
paragraph_end = [".\n\n", "?\n\n", "!\n\n", '"\n\n', "|E"]
@@ -49,8 +51,19 @@ def paragraph_parser(contents):
paragraph[paragraph_number]="<h2>"+article[article_index.index(x)-1]+"</h2>\n<p>"+paragraph[paragraph_number][len(article[article_index.index(x)-1]):]
print("Header "+str(x)+" is in paragraph "+str(paragraph_number))
paragraph[-1] = paragraph[-1][:-5]+paragraph[-1][-4:]
- print(paragraph[-1])
return ''.join(paragraph)
+
+
+#Document file Handler
+def dochandler(path):
+ doc = docx.Document(path)
+ fullText = []
+ for para in doc.paragraphs:
+ fullText.append(para.text)
+ print(para.text, para.style.name)
+ return '\n'.join(path)
+
+
@@ -93,16 +106,10 @@ def pdfhandler(path):
return 0
-def dochandler(path):
- f = open(path)
- print(f.read())
- return 0
-
-
#Attempting to pass txt file in path
-#path = 'C:/Users/a big fuck/Documents/beansandtoast.txt'
-path = 'C:/Code/texttohtml/memes.txt'
+path = 'C:/Code/texttohtml/ThisOne.docx'
+#path = 'C:/Code/texttohtml/memes.txt'
extension = path[path.rfind('.')+1: len(path)]