blob: 150b9d1d920075ad13c12885fdcfac7b2b6dbef1 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
from ftplib import parse150
import os, sys
def paragraph_parser(contents):
paragraph_end = [".\n", "?\n", "!\n", '"\n', "|*E"]
article_end = '\n'
paragraph_index = [0]
paragraph_number = 1
paragraph = []
x = 0
paragraph_count = 0
article_count = 0
contents=contents.strip()
contents+="|*E"
while x in range (len(paragraph_end)):
if contents.find(paragraph_end[x]) == -1:
paragraph_end.pop(x)
else:
paragraph_count += contents.count(paragraph_end[x])
x += 1
article_count = contents.count(article_end)-paragraph_count
while paragraph_end:
minimum = contents[paragraph_index[paragraph_number-1]:].index(paragraph_end[0]) +paragraph_index[paragraph_number-1]
for x in paragraph_end:
if contents[paragraph_index[paragraph_number-1]:].index(x)+paragraph_index[paragraph_number-1]+1 < minimum:
minimum = contents[paragraph_index[paragraph_number-1]:].index(x)+paragraph_index[paragraph_number-1]
paragraph_index.append(minimum+1)
paragraph.append(contents[paragraph_index[paragraph_number-1]+1:paragraph_index[paragraph_number]])
for x in paragraph_end:
if contents[paragraph_index[paragraph_number]:].find(x) == -1:
paragraph_end.remove(x)
paragraph_number +=1
return paragraph
def txthandler(path, htmlfile):
filename = path[path.rfind('/')+1: path.rfind('.')]
f = open(path, "r")
contents = f.read()
title = filename
main_header = contents[0:contents.find('\n')]
paragraph = paragraph_parser(contents[contents.find('\n'):])
htmlfile.write('<!DOCTYPE html>'
'<html lang ="en">'
'<head>'
'<title>' +title+ '</title>'
'<link rel="stylesheet" type="text/css" href="stylesheet.css">'
'<link rel="icon" type="image/x-icon" href="images/favicon.ico">'
'<meta charset="utf-8"/>'
'</head>'
'<body>'
'<h1>' +main_header+ '</h1>'
'<article class="introduction">'
'<h2>  Introduction  </h2>'
'<p>''</p>'
'</article>')
for x in paragraph:
htmlfile.write('<article class="bodyparagraph">' + x + '</article>')
htmlfile.write('</body>'
'</html>')
print(f.read())
return 0
def pdfhandler(path):
f = open(path)
print(f.read())
return 0
def dochandler(path):
f = open(path)
print(f.read())
return 0
#Attempting to pass txt file in path
path = 'C:/Users/a big fuck/Documents/beansandtoast.txt'
extension = path[path.rfind('.')+1: len(path)]
filename = path[path.rfind('/')+1: path.rfind('.')]
if os.path.exists(filename+".html") == True:
os.remove(filename+".html")
htmlfile = open(filename+".html", "x")
if extension =='txt':
txthandler(path, htmlfile)
elif extension == 'pdf':
pdfhandler(path)
elif extension == 'doc' or 'docx':
dochandler(path)
else:
print("Extension not recognized")
sys.exit()
|