FBGM.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118

from ftplib import parse150
import os, sys

def paragraph_parser(contents):
        paragraph_end = [".\n", "?\n", "!\n", '"\n', "|*E"]
        article_end = '\n'
        paragraph_index = [0]
        paragraph_number = 1
        paragraph = []
        x = 0
        paragraph_count = 0
        article_count = 0
        contents=contents.strip()
        contents+="|*E"
        while x in range (len(paragraph_end)):
            if contents.find(paragraph_end[x]) == -1:
                paragraph_end.pop(x)
            else:
                paragraph_count += contents.count(paragraph_end[x])
                x += 1
        article_count = contents.count(article_end)-paragraph_count
               
        while paragraph_end:
            minimum = contents[paragraph_index[paragraph_number-1]:].index(paragraph_end[0]) +paragraph_index[paragraph_number-1]
            for x in paragraph_end:
                if contents[paragraph_index[paragraph_number-1]:].index(x)+paragraph_index[paragraph_number-1]+1 < minimum:
                   minimum = contents[paragraph_index[paragraph_number-1]:].index(x)+paragraph_index[paragraph_number-1]
            paragraph_index.append(minimum+1)
            paragraph.append(contents[paragraph_index[paragraph_number-1]+1:paragraph_index[paragraph_number]])
            for x in paragraph_end:
                if contents[paragraph_index[paragraph_number]:].find(x) == -1:
                    paragraph_end.remove(x)
            paragraph_number +=1
        return paragraph
            
            
def txthandler(path, htmlfile):

    filename = path[path.rfind('/')+1: path.rfind('.')]
    f = open(path, "r")
    contents = f.read()
    title = filename
    main_header = contents[0:contents.find('\n')]
    paragraph = paragraph_parser(contents[contents.find('\n'):])    
    htmlfile.write('<!DOCTYPE html>'
'<html lang ="en">'
    '<head>'
        '<title>' +title+ '</title>'
        '<link rel="stylesheet" type="text/css" href="stylesheet.css">'
        '<link rel="icon" type="image/x-icon" href="images/favicon.ico">'
        '<meta charset="utf-8"/>'
     '</head>'
     '<body>'
        '<h1>' +main_header+ '</h1>'
     '<article class="introduction">'
		'<h2>&nbsp Introduction &nbsp</h2>'
        '<p>''</p>'
	 '</article>')
    for x in paragraph:
        htmlfile.write('<article class="bodyparagraph">' + x + '</article>')
        

    htmlfile.write('</body>'
    '</html>')
    print(f.read())
    return 0

def pdfhandler(path):
    f = open(path)
    print(f.read())
    return 0
    

def dochandler(path):
    f = open(path)
    print(f.read())
    return 0


#Attempting to pass txt file in path 
path = 'C:/Users/a big fuck/Documents/beansandtoast.txt'


extension = path[path.rfind('.')+1: len(path)]
filename = path[path.rfind('/')+1: path.rfind('.')]


if os.path.exists(filename+".html") == True:
    os.remove(filename+".html")

htmlfile = open(filename+".html", "x")


if extension =='txt':
    txthandler(path, htmlfile)

elif extension == 'pdf':
    pdfhandler(path)

elif extension == 'doc' or 'docx':
    dochandler(path)

else:
    print("Extension not recognized")
    sys.exit()