-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
86 lines (66 loc) · 2.87 KB
/
app.py
File metadata and controls
86 lines (66 loc) · 2.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import streamlit as st
import collections
import math
import networkx as nx
import operator
from textpre import cleanText
from wl import process_word_list
def calculate_word_scores(word_dict, size):
window = 3
nodeHash = {}
word_score = collections.defaultdict(dict)
count = 0
for i in word_dict.keys():
for j in range(0, len(word_dict[i])):
count += 1
position = float(count) / (float(size) + 1.0)
word_score[i][j] = 1.0 / (math.pi * math.sqrt(position * (1 - position)))
word = word_dict[i][j]
if word in nodeHash:
if nodeHash[word] < word_score[i][j]:
nodeHash[word] = word_score[i][j]
else:
nodeHash[word] = word_score[i][j]
return word_score, nodeHash
def build_graph(word_dict, nodeHash, window):
graph = nx.Graph()
graph.add_nodes_from(nodeHash.keys())
for i in word_dict.keys():
for j in range(0, len(word_dict[i])):
current_word = word_dict[i][j]
next_words = word_dict[i][j + 1:j + window]
for word in next_words:
graph.add_edge(current_word, word, weight=(nodeHash[current_word] + nodeHash[word]) / 2)
return graph
def text_rank_summary(sentences, word_dict, textRank, keyphrases, numberofSentences):
sentenceScore = {}
for i in word_dict.keys():
position = float(i + 1) / (float(len(sentences)) + 1.0)
positionalFeatureWeight = 1.0 / (math.pi * math.sqrt(position * (1.0 - position)))
sumKeyPhrases = 0.0
for keyphrase in keyphrases:
if keyphrase in word_dict[i]:
sumKeyPhrases += textRank[keyphrase]
sentenceScore[i] = sumKeyPhrases * positionalFeatureWeight
sSentenceScores = sorted(sentenceScore.items(), key=operator.itemgetter(1), reverse=True)[:numberofSentences]
sSentenceScores = sorted(sSentenceScores, key=operator.itemgetter(0), reverse=False)
summary = [sentences[s[0]] for s in sSentenceScores]
return summary
st.title("Marathi Text Summarization")
uploaded_file = st.file_uploader("Choose a text file", type="txt")
if uploaded_file is not None:
with open("uploaded_text.txt", "wb") as f:
f.write(uploaded_file.getbuffer())
word_dict, sentences, size = cleanText("uploaded_text.txt")
word_score, nodeHash = calculate_word_scores(word_dict, size)
graph = build_graph(word_dict, nodeHash, window=3)
textRank = nx.pagerank(graph, weight='weight')
n = int(math.ceil(min(0.1 * size, 7 * math.log(size))))
keyphrases = sorted(textRank, key=textRank.get, reverse=True)[:n]
summary = text_rank_summary(sentences, word_dict, textRank, keyphrases, numberofSentences=6)
st.write("## Summary")
for sentence in summary:
st.write(sentence)
# st.write("## Key Phrases")
# for phrase in keyphrases:
# st.write(phrase)