Files changed (1) hide show
  1. app.py +3 -331
app.py CHANGED
@@ -1,336 +1,8 @@
1
- import functions as funky # need to enable this for Hugging Face
2
- import pandas as pd
3
  import gradio as gr
4
- import os
5
- from datasets import load_dataset
6
- from huggingface_hub import login
7
- import numpy as np
8
- from fastapi import FastAPI, Request
9
- import uvicorn
10
- from starlette.middleware.sessions import SessionMiddleware
11
- import fastapi
12
- from datetime import datetime
13
- import re
14
 
15
- login(token = os.environ['HUB_TOKEN'])
16
 
17
- # logger = gr.HuggingFaceDatasetSaver(os.environ['HUB_TOKEN'], dataset_name='illustration_gdrive_logging_main', private=True)
18
- # logger.setup([gr.Text(label="clicked_url"), gr.Text(label="seach_term"), gr.Text(label = 'sessionhash'), gr.Text(label = 'datetime')], './flagged_data_points')
19
-
20
-
21
- logging_js = '''
22
- function magicFunc(x){
23
- let script = document.createElement('script');
24
- script.src = "file/js_functions.js"
25
- document.head.appendChild(script);
26
- }
27
- '''
28
-
29
- dataset = load_dataset("bradley6597/illustration-test", data_files = 'data.csv')
30
- df = pd.DataFrame(dataset['train']).drop_duplicates()
31
-
32
- dataset_ai = load_dataset("bradley6597/illustration-test", data_files = 'ai_captions_data.csv')
33
- ai_captions = pd.DataFrame(dataset_ai['train']).drop_duplicates()
34
-
35
- df = df.merge(ai_captions, how = 'left', on = 'clean_link')
36
- df['ai_description'] = df['ai_description'].fillna('')
37
-
38
- ill_links = df.copy()
39
- ill_links = ill_links[ill_links['Description'] != 'Moved'].copy()
40
- ill_links['code'] = ill_links['link'].str.replace("https://drive.google.com/file/d/", "", regex = False)
41
- ill_links['code'] = ill_links['code'].str.replace("/view?usp=drivesdk", "", regex = False)
42
- ill_links['filename'] = ill_links['file'].str.replace(".*\\/", "", regex = True)
43
- # ill_links['image_code'] = 'https://lh3.google.com/u/0/d/' + ill_links['code'] + '=k'
44
- ill_links['image_code'] = 'https://lh3.google.com/u/0/d/' + ill_links['code'] + '=w320-h304'
45
- ill_links['image_code'] = np.where(ill_links['file'].str.contains("\\.png$", regex = True),
46
- '<center><a href="' + ill_links['link'] + '" target="_blank" onclick="magicFunc(\'' + ill_links['code'] + '\')"><img src="' + ill_links['image_code'] + '" style="max-height:400px; max-width:200px"> ' + ill_links['filename'] + '</a><br><a class="icon" href="https://drive.google.com/u/0/uc?id=' + ill_links['code'] + '&export=download"><img src="/file/file-download.png" width="30" height="30"></a><a class="icon" href="https://drive.google.com/drive/u/0/folders/' + ill_links['parent_id'] + '" target="_blank"><img src="/file/folder-small.png" width="30" height="30"></a><button class="submit-btn" onclick="mdFunc(this.parentNode)">Make Draggable</button></center>',
47
- '<center><a href="' + ill_links['link'] + '" target="_blank" onclick="magicFunc(\'' + ill_links['code'] + '\')"><img src="' + ill_links['image_code'] + '" style="max-height:400px; max-width:200px"> ' + ill_links['filename'] + '</a><br><a class="icon" href="https://drive.google.com/u/0/uc?id=' + ill_links['code'] + '&export=download"><img src="/file/file-download.png" width="30" height="30"></a><a class="icon" href="https://drive.google.com/drive/u/0/folders/' + ill_links['parent_id'] + '" target="_blank"><img src="/file/folder-small.png" width="30" height="30"></a></center>',
48
- )
49
- ill_links['shared_drive'] = ill_links['file'].str.replace("/content/drive/Shareddrives/", "", regex = False)
50
- ill_links['shared_drive'] = ill_links['shared_drive'].str.replace("(.*?)\\/.*", "\\1", regex = True)
51
- ill_links['Description'] = ill_links['Description'].str.replace("No Description", "", regex = False)
52
-
53
- ill_links['ID'] = ill_links.index
54
- ill_links['title'] = ill_links['filename']
55
- ill_links['url'] = ill_links['image_code']
56
- ill_links['filepath'] = ill_links['file']
57
- ill_links['post_filepath'] = ill_links['filepath'].str.replace(".*?\\/KS1 EYFS\\/", "", regex = True)
58
-
59
- ill_links_title = ill_links.copy()
60
- ill_links_ai = ill_links.copy()
61
-
62
- ill_links['abstract'] = ill_links['filename'].str.replace("\\-|\\_", " ", regex = True) + ' ' + ill_links['Description'].str.replace(",", " ", regex = False).astype(str)
63
- ill_links_title['abstract'] = ill_links_title['filename'].str.replace('\\-|\\_', " ", regex = True)
64
- ill_links_ai['abstract'] = ill_links_title['ai_description']
65
-
66
- ill_check_lst = []
67
- for i in range(0, 5):
68
- tmp_links = f'https://lh3.google.com/u/{i}/d/' + ill_links['code'].iloc[0] + '=w320-h304'
69
- tmp_links = '<img onmousedown="mdFunc(this)" src="' + tmp_links + '" style="max-height:400px; max-width:25%">'
70
- tmp_links = f'<p>{i}</p>' + tmp_links
71
- ill_check_lst.append(tmp_links)
72
- ill_check_df = pd.DataFrame(ill_check_lst).T
73
- ill_check_html = ill_check_df.to_html(escape = False, render_links = True, index = False, header = False)
74
-
75
- ill_links = ill_links[['ID', 'title', 'url', 'abstract', 'filepath', 'Date Created', 'post_filepath', 'parent_id']]
76
- ill_links_title = ill_links_title[['ID', 'title', 'url', 'abstract', 'filepath', 'Date Created', 'Description', 'post_filepath', 'parent_id']]
77
- ill_links_ai = ill_links_ai[['ID', 'title', 'url', 'abstract', 'filepath', 'Date Created', 'Description', 'post_filepath', 'parent_id']]
78
-
79
- ind_main, doc_main, tf_main = funky.index_documents(ill_links)
80
- del ill_links
81
- ind_title, doc_title, tf_title = funky.index_documents(ill_links_title)
82
- del ill_links_title
83
- ind_ai, doc_ai, tf_ai = funky.index_documents(ill_links_ai)
84
- del ill_links_ai
85
-
86
- def same_auth(username, password):
87
- return(username == os.environ['username']) & (password == os.environ['password'])
88
-
89
-
90
- def search_index(search_text, sd, ks, sort_by, max_results, user_num, search_title, image_type, do_not_use, increase = None):
91
- max_results_list = ['10', '25', '50', '75', '100', '250', '500', '1000', '5000', '10000', 'All']
92
- if increase:
93
- max_results = max_results_list[max_results_list.index(max_results) + 1]
94
- if search_title:
95
- output = funky.search(tf_title, doc_title, ind_title, search_text, search_type = 'AND', ranking = True)
96
- else:
97
- output = funky.search(tf_main, doc_main, ind_main, search_text, search_type='AND', ranking = True)
98
- # Don't need to order by AI as the AI ranking numbers are much lower than the default numbers
99
- output_ai = funky.search(tf_ai, doc_ai, ind_ai, search_text, search_type = 'AND', ranking = True)
100
- output.extend(output_ai)
101
-
102
- output = [x for o in output for x in o if type(x) is not float]
103
- load_more_visible = False
104
- extra_info = ''
105
- if len(output) > 0:
106
-
107
- output_df = (pd.DataFrame(output)
108
- .groupby('url')
109
- .first()
110
- .reset_index()
111
- .drop_duplicates())
112
- output_df['Date Created'] = pd.to_datetime(output_df['Date Created'], format = 'mixed')
113
- if do_not_use:
114
- output_df = output_df[~output_df['filepath'].str.lower().str.contains("do.*not.*use|not.*general|don\\'t.*use|do.*no.*use|numberblock", regex = True)]
115
-
116
- map_df = output_df[output_df['title'].str.contains('map|Map', regex = True)]
117
- output_df['url'] = output_df['url'].str.replace("/u/0/", f"/u/{int(user_num)}/", regex = False)
118
- output_df_temp = pd.DataFrame()
119
- if len(sd) > 0:
120
- for shared in sd:
121
- temp_df = output_df[(output_df['filepath'].str.contains(str(shared), regex = False))]
122
- output_df_temp = pd.concat([output_df_temp, temp_df])
123
- output_df = output_df_temp.sort_index()
124
- # if len(sd) == 1:
125
- # output_df = output_df[(output_df['filepath'].str.contains(str(sd[0]), regex = False))]
126
- if len(ks) > 0:
127
- keystage_filter = '|'.join(ks).lower()
128
- if search_title:
129
- output_df['abstract'] = output_df['abstract'] + ' ' + output_df['Description']
130
-
131
- output_df['abstract'] = output_df['abstract'].str.lower()
132
- output_df['post_filepath'] = output_df['post_filepath'].str.lower()
133
- output_df['missing_desc'] = np.where(output_df['abstract'].str.contains('eyfs|ks1|ks2|ks3', regex = True), 0, 1)
134
- output_df2 = output_df[(output_df['abstract'].str.contains(keystage_filter, regex = True) | (output_df['missing_desc'] == 1))].copy()
135
- output_df2 = output_df2[(output_df2['post_filepath'].str.contains(keystage_filter, regex = True))]
136
- if output_df2.shape[0] == 0:
137
- output_df2 = output_df[(output_df['post_filepath'].str.contains(keystage_filter, regex = True))]
138
- else:
139
- output_df['abstract'] = output_df['abstract'].str.lower()
140
- output_df['post_filepath'] = output_df['post_filepath'].str.lower()
141
- output_df['missing_desc'] = np.where(output_df['abstract'].str.contains('eyfs|ks1|ks2|ks3', regex = True), 0, 1)
142
- output_df2 = output_df
143
- output_df2['ind'] = output_df2.index
144
- min_parent_score = output_df2.groupby('parent_id')['ind'].min().reset_index()
145
- min_parent_score.columns = ['parent_id', 'min_parent_ind']
146
- output_df2 = output_df2.merge(min_parent_score, how = 'left', on = 'parent_id')
147
-
148
- if sort_by == 'Relevance':
149
- output_df2 = output_df2.sort_values(by = ['missing_desc', 'min_parent_ind'], ascending = [True, True])
150
- elif sort_by == 'Date Created':
151
- output_df2 = output_df2.sort_values(by = ['Date Created'], ascending = False)
152
- elif sort_by == 'A-Z':
153
- output_df2 = output_df2.sort_values(by = ['title'], ascending = True)
154
-
155
- image_type_filter = '$|'.join(image_type).lower().replace("jpeg", "jpg") + '$'
156
-
157
- output_df2 = output_df2[output_df2['filepath'].str.contains(image_type_filter, regex = True)].reset_index(drop = True)
158
- total_returned = 'No. of Results to Return (Total: ' + str(output_df2.shape[0]) + ')'
159
-
160
- if max_results != 'All':
161
- if output_df2.shape[0] > int(max_results):
162
- load_more_visible = True
163
- output_df2 = output_df2.head(int(max_results))
164
- output_df2 = output_df2[['url']].reset_index(drop = True)
165
-
166
- max_cols = 5
167
- output_df2['row'] = output_df2.index % max_cols
168
- for x in range(0, max_cols):
169
- tmp = output_df2[output_df2['row'] == x].reset_index(drop = True)
170
- tmp = tmp[['url']]
171
- if x == 0:
172
- final_df = tmp
173
- else:
174
- final_df = pd.concat([final_df, tmp], axis = 1)
175
-
176
- final_df = final_df.fillna('')
177
- else:
178
- final_df = pd.DataFrame(['<h3>No Results Found :(</h3>'])
179
- total_returned = 'No. of Results to Return (Total: 0)'
180
-
181
- if final_df.shape[0] == 0 :
182
- final_df = pd.DataFrame(['<h3>No Results Found :(</h3>'])
183
-
184
-
185
- return('<center>' +
186
- extra_info +
187
- final_df.to_html(escape = False, render_links = True, index = False, header = False) +
188
- '</center>',
189
- gr.update(label = total_returned, value = max_results),
190
- gr.update(visible = load_more_visible))
191
-
192
-
193
- def search_logging(x: str, request: gr.Request):
194
- x = 0
195
- # session_id = getattr(request.cookies, 'access-token')
196
- # logger.flag(['', x, session_id, str(datetime.now())])
197
-
198
- back_to_top_btn_html = '''
199
- <button id="toTopBtn" onclick="'parentIFrame' in window ? window.parentIFrame.scrollTo({top: 0, behavior:'smooth'}) : window.scrollTo({ top: 0 })">
200
- <a style="color:white; text-decoration:none;">Back to Top!</a>
201
- </button>
202
- '''
203
-
204
- style = '''
205
- footer{
206
- display: none !important;
207
- }
208
-
209
- td img{
210
- background-image:
211
- linear-gradient(45deg, lightgrey 25%, transparent 25%),
212
- linear-gradient(135deg, lightgrey 25%, transparent 25%),
213
- linear-gradient(45deg, transparent 75%, lightgrey 75%),
214
- linear-gradient(135deg, transparent 75%, lightgrey 75%);
215
-
216
- background-size: 20px 20px;
217
- background-position: 0 0, 10px 0, 10px -10px, 0px 10px;
218
- }
219
- #toTopBtn {
220
- position: fixed;
221
- bottom: 10px;
222
- float: right;
223
- right: 18.5%;
224
- left: 77.25%;
225
- height: 30px;
226
- max-width: 100px;
227
- width: 100%;
228
- font-size: 12px;
229
- border-color: rgba(217,24,120, .5);
230
- background-color: rgba(35,153,249,.5);
231
- padding: .5px;
232
- border-radius: 4px;
233
- }
234
-
235
- .submit-btn{
236
- display:inline-block !important;
237
- padding:0.7em 1.4em !important;
238
- margin:0 0.3em 0.3em 0 !important;
239
- border-radius:0.15em !important;
240
- box-sizing: border-box !important;
241
- text-decoration:none !important;
242
- font-family:'Roboto',sans-serif !important;
243
- text-transform:uppercase !important;
244
- font-weight:400 !important;
245
- color:#FFFFFF !important;
246
- background-color:#3369ff !important;
247
- box-shadow:inset 0 -0.6em 0 -0.35em rgba(0,0,0,0.17) !important;
248
- text-align:center !important;
249
- position:relative !important;
250
- }
251
- .submit-btn:active{
252
- top:0.1em !important;
253
- }
254
- @media all and (max-width:30em){
255
- .submit-btn{
256
- display:block !important;
257
- margin:0.4em auto !important;
258
- }
259
- }
260
- #mapBorder {
261
- border-radius: 25px;
262
- border: 2px solid orange;
263
- }
264
- .icon {
265
- width:50%;
266
- float: left;
267
- }
268
- '''
269
-
270
- with gr.Blocks(css=style,
271
- js = logging_js
272
- ) as app:
273
  with gr.Row():
274
- with gr.Column(min_width = 10):
275
- with gr.Row():
276
- gr.HTML("<center><p>If you can't see the images please make sure you are signed in to your Twinkl account on Google & you have access to the Shared Drives you are searching :)</p><p>To drag images click 'Make Draggable' button and wait until it says 'Drag It!'. After this you can drag the image into a folder on your computer</p></center>")
277
- gr.HTML(ill_check_html)
278
- user_num = gr.Number(value = 0, label = 'Put lowest number of the alarm clock you can see')
279
- with gr.Row():
280
- with gr.Column(min_width = 0):
281
- search_prompt = gr.Textbox(placeholder = 'search for an illustration', label = 'Search', elem_id = 'search_term')
282
- title_search = gr.Checkbox(label = 'Search title only')
283
- do_not_use = gr.Checkbox(label = 'Remove Do Not Use Images', value = True)
284
-
285
- with gr.Column(min_width = 0):
286
- shared_drive = gr.Dropdown(choices = ['Accurate Maps and Flags', 'Aus and Nz - Phonics Illustrations', 'Australia - Rhino Readers Illustrations', 'Beyond - Illustrations', 'DO NOT USE IN GENERAL RESOURCES - South Africa', 'Illustrations - 01-10 to 07-22', 'Illustrations - Now', 'Shutter Stock Images', 'Twinkl Art Gallery', 'USA 3rd-8th Grade Illustrations '], multiselect = True, label = 'Shared Drive', value = ['Illustrations - 01-10 to 07-22', 'Illustrations - Now'])
287
- with gr.Column(min_width = 0):
288
- key_stage = gr.Dropdown(choices = ['EYFS', 'KS1', 'KS2', 'KS3'], multiselect = True, label = 'Key Stage', value = ['EYFS', 'KS1', 'KS2', 'KS3'])
289
- with gr.Column(min_width = 0):
290
- image_type = gr.Dropdown(choices = ['JPEG', 'PNG', 'TIF', 'TIFF'], multiselect = True, label = 'Image Type', value = ['PNG', 'JPEG', 'TIF', 'TIFF'])
291
- with gr.Column(min_width = 0):
292
- sort_by = gr.Dropdown(choices = ['Relevance', 'Date Created', 'A-Z'], value = 'Relevance', multiselect = False, label = 'Sort By')
293
- max_return = gr.Dropdown(choices = ['10', '25', '50', '75', '100', '250', '500', '1000', '5000', '10000', 'All'], value = '50', multiselect = False, label = 'No. of Results to Return (Total: 0)')
294
- with gr.Row():
295
- search_button = gr.Button(value="Search!", interactive = True)
296
- with gr.Row():
297
- output_df = gr.HTML()
298
- back_top_btn = gr.HTML(back_to_top_btn_html)
299
- load_more_results_btn = gr.Button(value = 'Load More Results', interactive = True, visible = False)
300
- search_button.click(search_index, inputs=[search_prompt, shared_drive, key_stage, sort_by, max_return, user_num, title_search, image_type, do_not_use], outputs=[output_df, max_return, load_more_results_btn])
301
- search_prompt.submit(search_index, inputs=[search_prompt, shared_drive, key_stage, sort_by, max_return, user_num, title_search, image_type, do_not_use], outputs=[output_df, max_return, load_more_results_btn])
302
- search_button.click(search_logging, inputs=[search_prompt], outputs=None)
303
- search_prompt.submit(search_logging, inputs=[search_prompt], outputs=None)
304
- load_more_results_btn.click(search_index, inputs=[search_prompt, shared_drive, key_stage, sort_by, max_return, user_num, title_search, image_type, do_not_use, load_more_results_btn], outputs=[output_df, max_return, load_more_results_btn])
305
- app.load()
306
-
307
- app.auth = (same_auth)
308
- app.auth_message = ''
309
-
310
- fapi = FastAPI()
311
-
312
- fapi.add_middleware(SessionMiddleware, secret_key=os.environ['session_key'])
313
-
314
- @fapi.middleware("http")
315
- async def add_session_hash(request: Request, call_next):
316
- response = await call_next(request)
317
- session = request.cookies.get('session')
318
- if session:
319
- response.set_cookie(key='session', value=request.cookies.get('session'), httponly=True)
320
- return response
321
-
322
- # custom get request handler with params to flag clicks
323
- @ fapi.get("/track")
324
- async def track(url: str, q: str, request: Request):
325
-
326
- if q is None:
327
- q = ''
328
-
329
- # logger.flag([url, q, request.cookies['access-token'], str(datetime.now())])
330
- return {"message": "ok"}
331
 
332
- # mount Gradio app to FastAPI app
333
- app2 = gr.mount_gradio_app(fapi, app, path="/", allowed_paths = ["."], auth = same_auth)
334
- # serve the app
335
- if __name__ == "__main__":
336
- uvicorn.run(app2, host="0.0.0.0", port=7860)
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
2
 
 
3
 
4
+ with gr.Blocks() as app:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  with gr.Row():
6
+ gr.HTML("<h1>This tool is no longer updated. Please go to this <a href='https://sites.google.com/twinkl.co.uk/gdrive-illustration-search/home'>link</a></h1>")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ app.launch()