Spaces:
Runtime error
Runtime error
Nick Canu
commited on
Commit
·
a79e4c0
1
Parent(s):
abb14f6
report button and tab update
Browse files
.vscode/launch.json
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
{
|
2 |
-
// Use IntelliSense to learn about possible attributes.
|
3 |
-
// Hover to view descriptions of existing attributes.
|
4 |
-
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
5 |
-
"version": "0.2.0",
|
6 |
-
"configurations": [
|
7 |
-
{
|
8 |
-
"name": "Python: Module",
|
9 |
-
"type": "python",
|
10 |
-
"request": "launch",
|
11 |
-
"module": "streamlit",
|
12 |
-
"args": ["run", "Home.py"],
|
13 |
-
"justMyCode": true
|
14 |
-
}
|
15 |
-
]
|
16 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Alternate Class Files for Appendix/Community Aggregation - Input Manager.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#Alternative input manager for description generator
|
2 |
+
class input_manager:
|
3 |
+
#initialize key dictionary from vector data frame and set community top N
|
4 |
+
def __init__(self,key_df, slim_df, search_tokens, top_n=10):
|
5 |
+
self.key_df = key_df
|
6 |
+
self.slim_df = slim_df
|
7 |
+
self.search_tokens = search_tokens
|
8 |
+
self.key = dict(zip(list(key_df.columns),np.zeros(len(key_df.columns))))
|
9 |
+
self.top_n = top_n
|
10 |
+
self.nlp = spacy.load("en_core_web_md")
|
11 |
+
#translate input text to vector
|
12 |
+
def set_input(self,input_cats):
|
13 |
+
|
14 |
+
#need setup to apply correct group tag to values
|
15 |
+
#separate known/unknown features
|
16 |
+
k_flags = [cat for cat in input_cats if cat in list(self.key.keys())]
|
17 |
+
unk_flags = [cat for cat in input_cats if cat not in list(self.key.keys())]
|
18 |
+
|
19 |
+
#process within feature class similarity for each unknown input
|
20 |
+
if len(unk_flags)>0:
|
21 |
+
outs = []
|
22 |
+
|
23 |
+
for word in unk_flags:
|
24 |
+
if re.match(r"game_type_",word):
|
25 |
+
tok = self.nlp(word.split("_")[-1])
|
26 |
+
mtch = max([(key,key.similarity(tok)) for key in self.search_tokens[0]],key=itemgetter(1))
|
27 |
+
#if no known match is found (model doesn't recognize input word), we're going to discard - other solutions performance prohibitive
|
28 |
+
if mtch[1]>0:
|
29 |
+
outs.append("game_type_"+mtch[0])
|
30 |
+
elif re.match(r"mechanic_",word):
|
31 |
+
tok = self.nlp(word.split("_")[-1])
|
32 |
+
mtch = max([(key,key.similarity(tok)) for key in self.search_tokens[1]],key=itemgetter(1))
|
33 |
+
if mtch[1]>0:
|
34 |
+
outs.append("mechanic_"+mtch[0])
|
35 |
+
elif re.match(r"category_",word):
|
36 |
+
tok = self.nlp(word.split("_")[-1])
|
37 |
+
mtch=max([(key,key.similarity(tok)) for key in self.search_tokens[2]],key=itemgetter(1))
|
38 |
+
if mtch[1]>0:
|
39 |
+
outs.append("category_"+mtch[0])
|
40 |
+
elif re.match(r"family_",word):
|
41 |
+
tok = self.nlp(word.split("_")[-1])
|
42 |
+
mtch=max([(key,key.similarity(tok)) for key in self.search_tokens[3]],key=itemgetter(1))
|
43 |
+
if mtch[1]>0:
|
44 |
+
outs.append("family_"+str(mtch[0]))
|
45 |
+
|
46 |
+
#if unks are processed, rejoin nearest match to known.
|
47 |
+
k_flags = list(set(k_flags+outs))
|
48 |
+
|
49 |
+
#preserve global key and ouput copy w/input keys activated to 1
|
50 |
+
d = self.key.copy()
|
51 |
+
for cat in k_flags:
|
52 |
+
d[cat] = 1.0
|
53 |
+
return d
|
54 |
+
|
55 |
+
def input_parser(self,in_vec):
|
56 |
+
#extracting keys from processed vector
|
57 |
+
ks = [k for k,v in in_vec.items() if v == 1]
|
58 |
+
|
59 |
+
#finding raw "total" match score - how many of the how input columns are hot in each existing vector
|
60 |
+
inter = self.key_df[ks].sum(axis=1)
|
61 |
+
|
62 |
+
#performing operation on each df seems to be slightly quicker than transforming the df here - may refactor though
|
63 |
+
|
64 |
+
#dropping any row without 3 matches (minimum match check)
|
65 |
+
cand_vec = self.key_df.iloc[list(inter[inter>=3].index)]
|
66 |
+
#if parsing returns less ranked matches than specificed top n, reduce threshold to 1 match and check again
|
67 |
+
if len(cand_vec) < self.top_n:
|
68 |
+
cand_vec = self.key_df.iloc[list(inter[inter>=1].index)]
|
69 |
+
|
70 |
+
cand_slim = self.slim_df.iloc[list(inter[inter>=3].index)]
|
71 |
+
if len(cand_slim) < self.top_n:
|
72 |
+
cand_slim = self.key_df.iloc[list(inter[inter>=1].index)]
|
73 |
+
|
74 |
+
return ks,cand_slim,in_vec.values()
|
75 |
+
|
76 |
+
#calculating per community vector pairwise jaccard similarity to input split by feature class
|
77 |
+
def ret_jaccard(self,in_vec,t_vec):
|
78 |
+
gt_score = sklearn.metrics.jaccard_score(in_vec[1:9],t_vec[1:9],zero_division=0)
|
79 |
+
cat_score = sklearn.metrics.jaccard_score(in_vec[192:276],t_vec[192:276],zero_division=0)
|
80 |
+
mech_score = sklearn.metrics.jaccard_score(in_vec[9:192],t_vec[9:192],zero_division=0)
|
81 |
+
fam_score = sklearn.metrics.jaccard_score(in_vec[276:3901],t_vec[276:3901],zero_division=0)
|
82 |
+
if in_vec[0] == t_vec[0]:
|
83 |
+
coop_score = 1
|
84 |
+
else:
|
85 |
+
coop_score = 0
|
86 |
+
|
87 |
+
#initial weighting treats all feature classes as equal - looking into updating this as a feedback mechanism
|
88 |
+
return np.mean([gt_score,cat_score,mech_score,fam_score,coop_score])
|
89 |
+
|
90 |
+
#function to actually return community neighbors
|
91 |
+
def n_neighbors(self,in_data):
|
92 |
+
#applies jaccard func to each row using vectors and maps to "full" df w/text
|
93 |
+
slim, vec, in_vec = in_data
|
94 |
+
vec['score']=vec.apply(lambda x: self.ret_jaccard(in_vec,x),raw=True,axis=1)
|
95 |
+
slim['score']=vec['score']
|
96 |
+
|
97 |
+
#converts to rank - this avoids splitting equal scoring groups inappropriately
|
98 |
+
slim['rank'] = slim['score'].rank(ascending=False)
|
99 |
+
return slim[slim['rank']<self.top_n].sort_values(by=['rank'])
|
100 |
+
|
101 |
+
def query_score(self,outframe, gen_text):
|
102 |
+
#requires text processing function, nearest neighbor community dataframe, and piece of generated text
|
103 |
+
query = doc_text_preprocessing(pd.Series(gen_text))
|
104 |
+
desc_tokens = pd.concat([outframe['cleaned_descriptions'],pd.Series(query)])
|
105 |
+
desc_dict = corpora.Dictionary()
|
106 |
+
desc_corpus = [desc_dict.doc2bow(doc, allow_update=True) for doc in desc_tokens]
|
107 |
+
temp_index = get_tmpfile("index")
|
108 |
+
index = similarities.Similarity(temp_index, desc_corpus, num_features=len(desc_dict.token2id))
|
109 |
+
|
110 |
+
sim_stack = []
|
111 |
+
for sims in index:
|
112 |
+
sim_stack.append(sims)
|
113 |
+
|
114 |
+
return (gen_text,np.mean(np.multiply(out['score'],sim_stack[-1][:-1])))
|
Home.py
CHANGED
@@ -2,6 +2,8 @@ import streamlit as st
|
|
2 |
|
3 |
st.set_page_config(page_title='Auto-BG: The Game Concept Generator', layout='wide')
|
4 |
|
|
|
|
|
5 |
def application():
|
6 |
###Imports
|
7 |
import pandas as pd
|
@@ -14,6 +16,7 @@ def application():
|
|
14 |
from title_generator import Title_Generator
|
15 |
import gzip
|
16 |
import io
|
|
|
17 |
from description_generator import input_manager, model_control
|
18 |
from pathlib import Path
|
19 |
|
@@ -39,7 +42,7 @@ def application():
|
|
39 |
if 'coop_d' not in st.session_state:
|
40 |
st.session_state.coop_d = 0
|
41 |
|
42 |
-
#
|
43 |
#reader code extended from https://gist.github.com/thearn/5424244 for alternate load format
|
44 |
def reader(path):
|
45 |
f = gzip.GzipFile(filename=path)
|
@@ -81,7 +84,7 @@ def application():
|
|
81 |
inter_pair = Tgen.candidate_generator(clean_desc)
|
82 |
out = Tgen.candidate_score(inter_pair,ex_check)
|
83 |
descs.append(out)
|
84 |
-
|
85 |
st.session_state.output_dict = {0:descs[0],1:descs[1],2:descs[2]}
|
86 |
|
87 |
def title_check(next=0):
|
@@ -131,8 +134,23 @@ def application():
|
|
131 |
st.session_state.title_iter = 0
|
132 |
show_title(0)
|
133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
-
|
136 |
###Variables
|
137 |
|
138 |
###Data
|
@@ -161,8 +179,6 @@ def application():
|
|
161 |
|
162 |
Tgen, iman, mctrl = setup_models()
|
163 |
|
164 |
-
|
165 |
-
|
166 |
#UI
|
167 |
|
168 |
#Application
|
@@ -336,7 +352,9 @@ def application():
|
|
336 |
|
337 |
with d_col2:
|
338 |
st.button("See Next Description", on_click=ND_button_clicked, use_container_width=True)
|
339 |
-
|
|
|
|
|
340 |
def blog():
|
341 |
"""
|
342 |
Blog describing the Auto-BG project
|
@@ -386,12 +404,11 @@ def about_us():
|
|
386 |
*MADS (Master of Applied Data Science)*\n
|
387 |
""")
|
388 |
|
389 |
-
|
390 |
-
|
391 |
-
"Blog": blog,
|
392 |
-
"About Us": about_us,
|
393 |
-
}
|
394 |
|
395 |
-
|
396 |
-
|
397 |
|
|
|
|
|
|
2 |
|
3 |
st.set_page_config(page_title='Auto-BG: The Game Concept Generator', layout='wide')
|
4 |
|
5 |
+
tab1, tab2, tab3 = st.tabs(['App', 'Blog', 'About Us'])
|
6 |
+
|
7 |
def application():
|
8 |
###Imports
|
9 |
import pandas as pd
|
|
|
16 |
from title_generator import Title_Generator
|
17 |
import gzip
|
18 |
import io
|
19 |
+
from datetime import date
|
20 |
from description_generator import input_manager, model_control
|
21 |
from pathlib import Path
|
22 |
|
|
|
42 |
if 'coop_d' not in st.session_state:
|
43 |
st.session_state.coop_d = 0
|
44 |
|
45 |
+
#helper functions
|
46 |
#reader code extended from https://gist.github.com/thearn/5424244 for alternate load format
|
47 |
def reader(path):
|
48 |
f = gzip.GzipFile(filename=path)
|
|
|
84 |
inter_pair = Tgen.candidate_generator(clean_desc)
|
85 |
out = Tgen.candidate_score(inter_pair,ex_check)
|
86 |
descs.append(out)
|
87 |
+
results.success("Prompt " +str(status+1)+ "/3 Generated!")
|
88 |
st.session_state.output_dict = {0:descs[0],1:descs[1],2:descs[2]}
|
89 |
|
90 |
def title_check(next=0):
|
|
|
134 |
st.session_state.title_iter = 0
|
135 |
show_title(0)
|
136 |
|
137 |
+
def report():
|
138 |
+
inputs = '|'.join(str(x) for x in st.session_state.inputs)
|
139 |
+
data = {'rprtd': date.today(),'inpts': inputs, 'title': st.session_state.output_dict[st.session_state.desc_iter]['titles'][st.session_state.title_iter][0], 'desc':st.session_state.output_dict[st.session_state.desc_iter]['text']}
|
140 |
+
try:
|
141 |
+
r_df = pd.DataFrame(data, index=[0])
|
142 |
+
r_p = pd.read_pickle(Path(__file__).parent / "Persistent_Data/reported_df.PICKLE")
|
143 |
+
w_p = pd.concat([r_df, r_p])
|
144 |
+
w_p = w_p.drop_duplicates()
|
145 |
+
print('try')
|
146 |
+
print(w_p)
|
147 |
+
w_p.to_pickle(Path(__file__).parent / "Persistent_Data/reported_df.PICKLE")
|
148 |
+
except:
|
149 |
+
print('except')
|
150 |
+
print(r_df)
|
151 |
+
r_df.to_pickle(Path(__file__).parent / "Persistent_Data/reported_df.PICKLE")
|
152 |
|
153 |
+
|
154 |
###Variables
|
155 |
|
156 |
###Data
|
|
|
179 |
|
180 |
Tgen, iman, mctrl = setup_models()
|
181 |
|
|
|
|
|
182 |
#UI
|
183 |
|
184 |
#Application
|
|
|
352 |
|
353 |
with d_col2:
|
354 |
st.button("See Next Description", on_click=ND_button_clicked, use_container_width=True)
|
355 |
+
|
356 |
+
st.button('Report', on_click=report, use_container_width=True)
|
357 |
+
|
358 |
def blog():
|
359 |
"""
|
360 |
Blog describing the Auto-BG project
|
|
|
404 |
*MADS (Master of Applied Data Science)*\n
|
405 |
""")
|
406 |
|
407 |
+
with tab1:
|
408 |
+
application()
|
|
|
|
|
|
|
409 |
|
410 |
+
with tab2:
|
411 |
+
blog()
|
412 |
|
413 |
+
with tab3:
|
414 |
+
about_us()
|