Spaces:
Running
Running
Commit
·
0723a2c
1
Parent(s):
04d9da1
Update app.py
Browse files
app.py
CHANGED
@@ -6,55 +6,63 @@ from txtai.graph import GraphFactory
|
|
6 |
from datasets import load_dataset
|
7 |
import streamlit as st
|
8 |
import streamlit.components.v1 as components
|
9 |
-
|
10 |
|
11 |
|
12 |
st.set_page_config(page_title="DebateKG")
|
13 |
st.title("DebateKG - Automatic Policy Debate Case Creation")
|
14 |
-
st.write("WIP, give me a few more days before reviewing!")
|
15 |
st.caption("github: https://github.com/Hellisotherpeople/DebateKG")
|
16 |
|
17 |
|
18 |
form = st.sidebar.form("Main Settings")
|
19 |
form.header("Main Settings")
|
20 |
-
|
21 |
-
|
22 |
-
show_extract = form.checkbox("Show extracts", value = False)
|
23 |
show_abstract = form.checkbox("Show abstract", value = False)
|
24 |
show_full_doc = form.checkbox("Show full doc", value = False)
|
25 |
-
show_citation = form.checkbox("Show citation", value =
|
26 |
-
rerank_word = form.
|
27 |
-
|
28 |
-
|
29 |
-
form.
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
"
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
graph = embeddings.graph
|
59 |
|
60 |
def david_distance(source, target, attrs):
|
@@ -64,13 +72,13 @@ def david_distance(source, target, attrs):
|
|
64 |
def david_showpath(source, target, the_graph):
|
65 |
return nx.shortest_path(the_graph, source, target, david_distance)
|
66 |
|
|
|
|
|
67 |
|
68 |
|
69 |
-
import string
|
70 |
-
|
71 |
def highlight(index, result):
|
72 |
output = f"{index}. "
|
73 |
-
spans = [(token, score, "#fff59d" if score >
|
74 |
|
75 |
for token, _, color in spans:
|
76 |
output += f"<span style='background-color: {color}'>{token}</span> " if color else f"{token} "
|
@@ -91,27 +99,37 @@ def showpath_any(list_of_arguments, strip_punctuation = True, the_graph=graph.ba
|
|
91 |
if strip_punctuation:
|
92 |
text = text.translate(str.maketrans("","", string.punctuation))
|
93 |
list_of_evidence_ids.append(int(embeddings.search(f"select id from txtai where similar('{text}') limit 1")[0]['id']))
|
94 |
-
print(list_of_evidence_ids)
|
95 |
|
96 |
sections = []
|
|
|
97 |
for x, p in enumerate(path):
|
98 |
if x == 0:
|
99 |
# Print start node
|
100 |
|
101 |
sections.append(f"{x + 1}. {p}")
|
102 |
-
|
103 |
-
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
if x < len(path) - 1:
|
107 |
# Explain and highlight next path element
|
108 |
results = embeddings.explain(p, [path[x + 1]], limit=1)[0]
|
109 |
sections.append(highlight(x + 2, results))
|
110 |
-
|
111 |
-
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
-
return components.html("<br/><br/>".join(sections), scrolling = True, width =
|
115 |
|
116 |
def question(text, rerank_word = "", rerank_topic = "", limit = 100):
|
117 |
return embeddings.search(f"select id, text, topic, evidence_id, score from txtai where similar('{text}') and text like '%{rerank_word}%' and topic like '%{rerank_topic}%' limit {limit}")
|
@@ -119,11 +137,36 @@ def question(text, rerank_word = "", rerank_topic = "", limit = 100):
|
|
119 |
|
120 |
|
121 |
query_form = st.form("Query the Index:")
|
122 |
-
query_form.write("
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
|
126 |
-
#showpath_any([3, 12, 15])
|
127 |
|
128 |
-
with st.expander("mine", expanded = False):
|
129 |
-
st.write(embeddings.search(f"select * from txtai where similar('you') and text like '%the%' limit 10"))
|
|
|
6 |
from datasets import load_dataset
|
7 |
import streamlit as st
|
8 |
import streamlit.components.v1 as components
|
9 |
+
import string
|
10 |
|
11 |
|
12 |
st.set_page_config(page_title="DebateKG")
|
13 |
st.title("DebateKG - Automatic Policy Debate Case Creation")
|
|
|
14 |
st.caption("github: https://github.com/Hellisotherpeople/DebateKG")
|
15 |
|
16 |
|
17 |
form = st.sidebar.form("Main Settings")
|
18 |
form.header("Main Settings")
|
19 |
+
highlight_threshold = form.number_input("Enter the minimum similarity value needed to highlight" , value = 0.05)
|
20 |
+
show_extract = form.checkbox("Show extracts", value = True)
|
|
|
21 |
show_abstract = form.checkbox("Show abstract", value = False)
|
22 |
show_full_doc = form.checkbox("Show full doc", value = False)
|
23 |
+
show_citation = form.checkbox("Show citation", value = True)
|
24 |
+
rerank_word = form.text_input("(Optional) Constrain all evidence in the case to have this word within its text", value = "")
|
25 |
+
form.caption("Doing this may create graphs which are so constrained that DebateKG can't find a valid path in the graph to build a case")
|
26 |
+
html_window_width = form.number_input("Enter the pixel width of the output debate case window", value = 1000)
|
27 |
+
html_window_height = form.number_input("Enter the pixel height of the output debate case window", value = 1000)
|
28 |
+
option = form.selectbox(
|
29 |
+
'Which Knowledge Graph do you want to use?',
|
30 |
+
('DebateSum_SemanticGraph_longformer_extract.tar.gz', 'DebateSum_SemanticGraph_longformer_abstract.tar.gz', 'DebateSum_SemanticGraph_mpnet_abstract.tar.gz', 'DebateSum_SemanticGraph_legalbert_abstract.tar.gz', 'DebateSum_SemanticGraph_legalbert_extract.tar.gz', 'DebateSum_SemanticGraph_mpnet_extract.tar.gz', 'DebateSum_SemanticGraph_mpnet_sentence.tar.gz'), index = 2)
|
31 |
+
|
32 |
+
form.form_submit_button("Change Settings")
|
33 |
+
|
34 |
+
@st.cache(allow_output_mutation=True)
|
35 |
+
def load_my_dataset():
|
36 |
+
dataset = load_dataset("Hellisotherpeople/DebateSum", split = "train")
|
37 |
+
return dataset
|
38 |
+
|
39 |
+
@st.cache(allow_output_mutation=True)
|
40 |
+
def load_embeddings():
|
41 |
+
embeddings = Embeddings({
|
42 |
+
"path": "sentence-transformers/all-mpnet-base-v2",
|
43 |
+
"content": True,
|
44 |
+
"functions": [
|
45 |
+
{"name": "graph", "function": "graph.attribute"},
|
46 |
+
],
|
47 |
+
"expressions": [
|
48 |
+
{"name": "topic", "expression": "graph(indexid, 'topic')"},
|
49 |
+
{"name": "topicrank", "expression": "graph(indexid, 'topicrank')"}
|
50 |
+
],
|
51 |
+
"graph": {
|
52 |
+
"limit": 100,
|
53 |
+
"minscore": 0.10,
|
54 |
+
"topics": {
|
55 |
+
"terms": 4,
|
56 |
+
"resolution" : 100
|
57 |
+
}
|
58 |
+
}
|
59 |
+
})
|
60 |
+
embeddings.load(option)
|
61 |
+
return embeddings
|
62 |
+
|
63 |
+
dataset = load_my_dataset()
|
64 |
+
embeddings = load_embeddings()
|
65 |
+
|
66 |
graph = embeddings.graph
|
67 |
|
68 |
def david_distance(source, target, attrs):
|
|
|
72 |
def david_showpath(source, target, the_graph):
|
73 |
return nx.shortest_path(the_graph, source, target, david_distance)
|
74 |
|
75 |
+
def david_show_all_paths(source, target, the_graph):
|
76 |
+
return nx.all_shortest_paths(the_graph, source, target, david_distance)
|
77 |
|
78 |
|
|
|
|
|
79 |
def highlight(index, result):
|
80 |
output = f"{index}. "
|
81 |
+
spans = [(token, score, "#fff59d" if score > highlight_threshold else None) for token, score in result["tokens"]]
|
82 |
|
83 |
for token, _, color in spans:
|
84 |
output += f"<span style='background-color: {color}'>{token}</span> " if color else f"{token} "
|
|
|
99 |
if strip_punctuation:
|
100 |
text = text.translate(str.maketrans("","", string.punctuation))
|
101 |
list_of_evidence_ids.append(int(embeddings.search(f"select id from txtai where similar('{text}') limit 1")[0]['id']))
|
|
|
102 |
|
103 |
sections = []
|
104 |
+
#sections.append(list_of_evidence_ids)
|
105 |
for x, p in enumerate(path):
|
106 |
if x == 0:
|
107 |
# Print start node
|
108 |
|
109 |
sections.append(f"{x + 1}. {p}")
|
110 |
+
if show_abstract:
|
111 |
+
sections.append(dataset["Abstract"][list_of_evidence_ids[x]])
|
112 |
+
if show_citation:
|
113 |
+
sections.append(dataset["Citation"][list_of_evidence_ids[x]])
|
114 |
+
if show_extract:
|
115 |
+
sections.append(dataset["Extract"][list_of_evidence_ids[x]])
|
116 |
+
if show_full_doc:
|
117 |
+
sections.append(dataset["Full-Document"][list_of_evidence_ids[x]])
|
118 |
|
119 |
if x < len(path) - 1:
|
120 |
# Explain and highlight next path element
|
121 |
results = embeddings.explain(p, [path[x + 1]], limit=1)[0]
|
122 |
sections.append(highlight(x + 2, results))
|
123 |
+
if show_abstract:
|
124 |
+
sections.append(dataset["Abstract"][list_of_evidence_ids[x+1]])
|
125 |
+
if show_citation:
|
126 |
+
sections.append(dataset["Citation"][list_of_evidence_ids[x+1]])
|
127 |
+
if show_extract:
|
128 |
+
sections.append(dataset["Extract"][list_of_evidence_ids[x+1]])
|
129 |
+
if show_full_doc:
|
130 |
+
sections.append(dataset["Full-Document"][list_of_evidence_ids[x+1]])
|
131 |
|
132 |
+
return components.html("<br/><br/>".join(sections), scrolling = True, width = html_window_width, height = html_window_height)
|
133 |
|
134 |
def question(text, rerank_word = "", rerank_topic = "", limit = 100):
|
135 |
return embeddings.search(f"select id, text, topic, evidence_id, score from txtai where similar('{text}') and text like '%{rerank_word}%' and topic like '%{rerank_topic}%' limit {limit}")
|
|
|
137 |
|
138 |
|
139 |
query_form = st.form("Query the Index:")
|
140 |
+
query_form.write("Step 1: Find Arguments")
|
141 |
+
query_form.write("Use semantic SQL from txtai to find some arguments, we use indexids to keep track of them.")
|
142 |
+
query_form.caption("You can use the semantic SQL to explore the dataset too! The possibilities are limitless!")
|
143 |
+
query_sql = query_form.text_area("Enter a semantic SQL statement", value = f"select topic, * from txtai where similar('Trump and US relations with China') and topic like '%trump%' and text like '%Donald%' limit 1")
|
144 |
+
|
145 |
+
query_form_submitted = query_form.form_submit_button("Query")
|
146 |
+
|
147 |
+
if query_form_submitted:
|
148 |
+
with st.expander("Output (Open Me)", expanded = False):
|
149 |
+
#my_path = showpath_any([170750, 50, 23])
|
150 |
+
#st.write(embeddings.search(f"select * from txtai where similar('you') and text like '%the%' limit 10"))
|
151 |
+
st.write(embeddings.search(query_sql))
|
152 |
+
|
153 |
+
|
154 |
+
paths_form = st.form("Build the Arguments")
|
155 |
+
paths_form.write("Step 2: Build a Policy Debate Case")
|
156 |
+
paths_form.write("Enter any number of indexids (arguments), DebateKG will build a debate case out of it which links them all together")
|
157 |
+
user_paths_string = paths_form.text_area("Enter a list of indexids seperated by whitespace", value = "250 10000 2405")
|
158 |
+
user_paths_list_of_strings = user_paths_string.split()
|
159 |
+
user_paths_list = list(map(int, user_paths_list_of_strings))
|
160 |
+
|
161 |
+
paths_form_submitted = paths_form.form_submit_button("Build a Policy Debate Case")
|
162 |
+
|
163 |
+
if paths_form_submitted:
|
164 |
+
if rerank_word:
|
165 |
+
selected_nodes = [n for n,v in graph.backend.nodes(data=True) if rerank_word in v['text']] ##also works for topic
|
166 |
+
H = graph.backend.subgraph(selected_nodes)
|
167 |
+
showpath_any(user_paths_list, the_graph = H)
|
168 |
+
else:
|
169 |
+
showpath_any(user_paths_list)
|
170 |
|
171 |
|
|
|
172 |
|
|
|
|