from email import header
from ftplib import parse150
import os, sys, docx
from string import punctuation
def html_file_generator(path, page_header, page_contents):
filename = path[path.rfind('/')+1: path.rfind('.')]
if os.path.exists(filename+".html") == True:
os.remove(filename+".html")
htmlfile = open(filename+".html", "x")
htmlfile.write(''
''
'
'
'' +filename+ ''
''
''
''
''
''
'
' +page_header+ '
')
htmlfile.write(page_contents)
htmlfile.write(''
'')
#Text file handler
def paragraph_parser(contents):
punctuation = [".", "?", "!", '"']
paragraph_end = [".\n\n", "?\n\n", "!\n\n", '"\n\n', "|E"]
article_end = '\n'
paragraph_index = [0]
article_index = [0]
paragraph_number = 1
paragraph=[]
article=[]
x = 0
contents=contents.strip()
contents+="|E\n"
while x in range (len(paragraph_end)):
if contents.find(paragraph_end[x]) == -1:
paragraph_end.pop(x)
else:
x += 1
while paragraph_end:
minimum = contents[paragraph_index[paragraph_number-1]:].index(paragraph_end[0]) +paragraph_index[paragraph_number-1]
for x in paragraph_end:
if contents[paragraph_index[paragraph_number-1]:].index(x)+paragraph_index[paragraph_number-1]+1 < minimum:
minimum = contents[paragraph_index[paragraph_number-1]:].index(x)+paragraph_index[paragraph_number-1]
paragraph_index.append(minimum+1)
paragraph.append(contents[paragraph_index[paragraph_number-1]:paragraph_index[paragraph_number]]+'')
for x in paragraph_end:
if contents[paragraph_index[paragraph_number]:].find(x) == -1:
paragraph_end.remove(x)
first_nl =contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].index(article_end)+paragraph_index[paragraph_number-1]+2
if contents[first_nl-1:first_nl] not in punctuation:
article_index.append(contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].index(article_end)+paragraph_index[paragraph_number-1])
paragraph_number +=1
paragraph_number = 0
for x in range(1,len(article_index)):
if contents[article_index[x-1]:article_index[x]-2].rfind('\n') != -1:
article.append(contents[contents[:article_index[x]].rfind('\n')+1:article_index[x]+2])
elif x==1:
article.append(contents[contents[:article_index[x]].rfind('\n')+1:article_index[x]+2])
for x in article_index[1:]:
while x not in range(paragraph_index[paragraph_number],paragraph_index[paragraph_number+1]):
paragraph[paragraph_number]="