from ftplib import parse150
import os, sys
from string import punctuation
def paragraph_parser(contents):
punctuation = [".", "?", "!", '"']
paragraph_end = [".\n\n", "?\n\n", "!\n\n", '"\n\n', "|E"]
article_end = '\n'
paragraph_index = [0]
article_index = [0]
paragraph_number = 1
paragraph=[]
article=[]
x = 0
output = ''
contents=contents.strip()
contents+="|E"
while x in range (len(paragraph_end)):
if contents.find(paragraph_end[x]) == -1:
paragraph_end.pop(x)
else:
x += 1
while paragraph_end:
minimum = contents[paragraph_index[paragraph_number-1]:].index(paragraph_end[0]) +paragraph_index[paragraph_number-1]
for x in paragraph_end:
if contents[paragraph_index[paragraph_number-1]:].index(x)+paragraph_index[paragraph_number-1]+1 < minimum:
minimum = contents[paragraph_index[paragraph_number-1]:].index(x)+paragraph_index[paragraph_number-1]
paragraph_index.append(minimum+1)
paragraph.append(contents[paragraph_index[paragraph_number-1]:paragraph_index[paragraph_number]])
for x in paragraph_end:
if contents[paragraph_index[paragraph_number]:].find(x) == -1:
paragraph_end.remove(x)
if contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].find(article_end) != -1:
first_nl =contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].index(article_end)+paragraph_index[paragraph_number-1]+2
print(contents[first_nl-1:first_nl])
if contents[first_nl-1:first_nl] not in punctuation:
article_index.append(contents[paragraph_index[paragraph_number-1]+2:paragraph_index[paragraph_number]].index(article_end)+paragraph_index[paragraph_number-1])
paragraph_number +=1
paragraph_number = 0
for x in range(1,len(article_index)):
if contents[article_index[x-1]:article_index[x]-2].rfind('\n') != -1:
article.append(contents[contents[:article_index[x]].rfind('\n')+1:article_index[x]+2])
elif x==1:
article.append(contents[:article_index[x]+2])
output+=''
output+="