etukurudinesh commited on
Commit
f8bc2f2
·
1 Parent(s): 73ab785

feat: interface for users

Browse files
Files changed (1) hide show
  1. app.py +313 -4
app.py CHANGED
@@ -1,7 +1,316 @@
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- with gr.Blocks(theme=gr.themes.Glass()) as demo:
4
- open_google = gr.Button(value="Client", link="https://huggingface.co/spaces/WD101/OneClientToRuleThemAll")
5
- open_bing = gr.Button(value="Server", link="https://huggingface.co/spaces/WD101/OneServerToRuleThemAll")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- demo.launch()
 
1
+ #import gradio as gr
2
+
3
+ #with gr.Blocks(theme=gr.themes.Glass()) as demo:
4
+ # open_google = gr.Button(value="Client", link="https://huggingface.co/spaces/WD101/OneClientToRuleThemAll")
5
+ # open_bing = gr.Button(value="Server", link="https://huggingface.co/spaces/WD101/OneServerToRuleThemAll")
6
+
7
+ #demo.launch()
8
+
9
  import gradio as gr
10
+ import pandas as pd
11
+ import requests
12
+ import math
13
+ import json
14
+
15
+
16
+ def scrape_websites(urls_input):
17
+ """Scrapes multiple URLs and returns results in a paginated DataFrame.
18
+
19
+ Args:
20
+ urls_input (str): A string containing URLs, one per line.
21
+
22
+ Returns:
23
+ tuple: A tuple containing:
24
+ - pd.DataFrame: Paginated DataFrame with up to 5 rows, columns ["URL", "Content", "Status"].
25
+ - str: Status message (e.g., "Scraping completed", "No URLs provided").
26
+ - pd.DataFrame: Full DataFrame with all results.
27
+ - int: Current page number (initially 1).
28
+ - str: Page information (e.g., "Page 1").
29
+ """
30
+ if not urls_input:
31
+ return pd.DataFrame(), "No URLs provided", pd.DataFrame(), 1, "Page 1"
32
+
33
+ urls = [url.strip() for url in urls_input.split("\n") if url.strip()]
34
+ if not urls:
35
+ return pd.DataFrame(), "No valid URLs provided", pd.DataFrame(), 1, "Page 1"
36
+
37
+ results = []
38
+ for url in urls:
39
+ try:
40
+ response = requests.post("http://localhost:8000/scrape", json={"url": url})
41
+ result = response.json()
42
+ if "error" in result:
43
+ results.append({"URL": url, "Content": "", "Status": f"Error: {result['error']}"})
44
+ else:
45
+ results.append({"URL": url, "Content": result.get("text", "No content extracted"), "Status": "Success"})
46
+ except Exception as e:
47
+ results.append({"URL": url, "Content": "", "Status": f"Error: {str(e)}"})
48
+
49
+ df = pd.DataFrame(results, columns=["URL", "Content", "Status"])
50
+ paginated_df = df.head(5)
51
+ status = "Scraping completed" if results else "No results to display"
52
+ page = 1
53
+ page_info = f"Page {page}"
54
+ return paginated_df, status, df, page, page_info
55
+
56
+
57
+ def change_page(full_df, current_page, direction):
58
+ """Changes the displayed page of scraped results.
59
+
60
+ Args:
61
+ full_df (pd.DataFrame): Full DataFrame containing all scraped results.
62
+ current_page (int): Current page number.
63
+ direction (str): Navigation direction ("next" or "prev").
64
+
65
+ Returns:
66
+ tuple: A tuple containing:
67
+ - pd.DataFrame: Paginated DataFrame with up to 5 rows.
68
+ - str: Status message (e.g., "Page updated").
69
+ - pd.DataFrame: Unchanged full DataFrame.
70
+ - int: Updated current page number.
71
+ - str: Page information (e.g., "Page 2 of 3").
72
+ """
73
+ if full_df.empty:
74
+ return pd.DataFrame(), "No results to display", full_df, current_page, f"Page {current_page}"
75
+
76
+ rows_per_page = 5
77
+ total_rows = len(full_df)
78
+ total_pages = math.ceil(total_rows / rows_per_page)
79
+
80
+ if direction == "next" and current_page < total_pages:
81
+ current_page += 1
82
+ elif direction == "prev" and current_page > 1:
83
+ current_page -= 1
84
+
85
+ start_idx = (current_page - 1) * rows_per_page
86
+ end_idx = start_idx + rows_per_page
87
+ paginated_df = full_df.iloc[start_idx:end_idx]
88
+
89
+ page_info = f"Page {current_page} of {total_pages}"
90
+ status = "Page updated" if not paginated_df.empty else "No results on this page"
91
+ return paginated_df, status, full_df, current_page, page_info
92
+
93
+
94
+ def retrieve_notes():
95
+ """Fetches notes from Server 2 and returns them as a DataFrame.
96
+
97
+ Args:
98
+ None
99
+
100
+ Returns:
101
+ tuple: A tuple containing:
102
+ - pd.DataFrame: DataFrame with columns ["id", "topic", "notes", "url", "tag"].
103
+ - str: Status message (e.g., "Notes loaded successfully", "No notes found").
104
+ """
105
+ try:
106
+ response = requests.get("http://localhost:8001/notes")
107
+ result = response.json()
108
+ if "error" in result or not result:
109
+ return pd.DataFrame(), "No notes found"
110
+ processed_results = [
111
+ {
112
+ "id": item.get("id", "N/A"),
113
+ "topic": item.get("topic", "Summary"),
114
+ "notes": item.get("notes", ""),
115
+ "url": item.get("url", ""),
116
+ "tag": item.get("tag", "General")
117
+ } for item in result
118
+ ]
119
+ df = pd.DataFrame(processed_results, columns=["id", "topic", "notes", "url", "tag"])
120
+ return df, "Notes loaded successfully"
121
+ except Exception as e:
122
+ return pd.DataFrame(), f"Error: {str(e)}"
123
+
124
+
125
+ def filter_notes(notes_df, max_rows, search_query, search_field):
126
+ """Filters and searches notes based on user input.
127
+
128
+ Args:
129
+ notes_df (pd.DataFrame): DataFrame containing notes.
130
+ max_rows (str): Maximum rows to display ("5", "10", "25", or "All").
131
+ search_query (str): Search term to filter notes.
132
+ search_field (str): Field to search ("id", "topic", "notes", "url", "tag", or "all").
133
+
134
+ Returns:
135
+ tuple: A tuple containing:
136
+ - pd.DataFrame: Filtered DataFrame.
137
+ - str: Status message (e.g., "Filtered notes loaded", "No matching notes found").
138
+ """
139
+ if notes_df.empty:
140
+ return pd.DataFrame(), "No notes available"
141
+ try:
142
+ filtered_df = notes_df.copy()
143
+ if search_query and search_field:
144
+ search_query = search_query.lower()
145
+ if search_field == "all":
146
+ filtered_df = filtered_df[
147
+ filtered_df.apply(
148
+ lambda row: any(search_query in str(val).lower() for val in row), axis=1
149
+ )
150
+ ]
151
+ else:
152
+ filtered_df = filtered_df[
153
+ filtered_df[search_field].str.lower().str.contains(search_query, na=False)
154
+ ]
155
+ if max_rows != "All":
156
+ max_rows = int(max_rows)
157
+ filtered_df = filtered_df.head(max_rows)
158
+ status = "Filtered notes loaded" if not filtered_df.empty else "No matching notes found"
159
+ return filtered_df, status
160
+ except Exception as e:
161
+ return pd.DataFrame(), f"Error: {str(e)}"
162
+
163
+
164
+ def view_note_content(selected_row: int, notes_df):
165
+ """Displays the content of a selected note.
166
+
167
+ Args:
168
+ selected_row (int): Index of the selected row in the DataFrame.
169
+ notes_df (pd.DataFrame): DataFrame containing notes.
170
+
171
+ Returns:
172
+ str: The content of the selected note or an error/status message.
173
+ """
174
+ if selected_row is None or notes_df.empty:
175
+ return "No note selected or no data available"
176
+ try:
177
+ return notes_df.iloc[selected_row]["notes"]
178
+ except Exception as e:
179
+ return f"Error: {str(e)}"
180
+
181
+
182
+ def download_notes(notes_df, format_choice):
183
+ """Downloads notes in CSV or JSON format.
184
+
185
+ Args:
186
+ notes_df (pd.DataFrame): DataFrame containing notes.
187
+ format_choice (str): Download format ("CSV" or "JSON").
188
+
189
+ Returns:
190
+ tuple: A tuple containing:
191
+ - gr.File or None: File object with the downloaded content or None if no data.
192
+ - str: Status message (e.g., "Download ready", "Data not available to download").
193
+ """
194
+ if notes_df.empty:
195
+ return None, "Data not available to download"
196
+ try:
197
+ if format_choice == "CSV":
198
+ content = notes_df.to_csv(index=False)
199
+ filename = "notes.csv"
200
+ mime_type = "text/csv"
201
+ elif format_choice == "JSON":
202
+ content = notes_df.to_json(orient="records", lines=True)
203
+ filename = "notes.json"
204
+ mime_type = "application/json"
205
+ else:
206
+ return None, "Invalid format selected"
207
+
208
+ return gr.File(value=content.encode(), filename=filename, mime_type=mime_type, visible=True), "Download ready"
209
+ except Exception as e:
210
+ return None, f"Error: {str(e)}"
211
+
212
+
213
+ # Gradio Tabbed Interface
214
+ with gr.Blocks() as app:
215
+ gr.Markdown("# Knowledge Store App")
216
+ with gr.Tabs():
217
+ # Tab 1: Input Client with Multi-URL Support and Pagination
218
+ with gr.Tab(label="URL Scraper"):
219
+ url_input = gr.Textbox(
220
+ label="Enter Webpage URLs (one per line)",
221
+ placeholder="https://example.com\nhttps://wikipedia.org",
222
+ lines=5
223
+ )
224
+ scrape_button = gr.Button("Scrape URLs")
225
+ scrape_output = gr.Dataframe(
226
+ headers=["URL", "Content", "Status"],
227
+ label="Scraped Results",
228
+ wrap=False
229
+ )
230
+ scrape_status = gr.Textbox(label="Status")
231
+ with gr.Row():
232
+ prev_button = gr.Button("Previous Page")
233
+ next_button = gr.Button("Next Page")
234
+ page_info = gr.Textbox(label="Page", value="Page 1", interactive=False)
235
+ full_results = gr.State(pd.DataFrame())
236
+ current_page = gr.State(1)
237
+
238
+ scrape_button.click(
239
+ fn=scrape_websites,
240
+ inputs=url_input,
241
+ outputs=[scrape_output, scrape_status, full_results, current_page, page_info]
242
+ )
243
+ prev_button.click(
244
+ fn=change_page,
245
+ inputs=[full_results, current_page, gr.State("prev")],
246
+ outputs=[scrape_output, scrape_status, full_results, current_page, page_info]
247
+ )
248
+ next_button.click(
249
+ fn=change_page,
250
+ inputs=[full_results, current_page, gr.State("next")],
251
+ outputs=[scrape_output, scrape_status, full_results, current_page, page_info]
252
+ )
253
+
254
+ # Tab 2: Retrieval Client with Enhanced Columns
255
+ with gr.Tab(label="View Notes"):
256
+ with gr.Row():
257
+ max_rows = gr.Dropdown(
258
+ choices=["5", "10", "25", "All"],
259
+ value="All",
260
+ label="Max Rows to Display"
261
+ )
262
+ search_query = gr.Textbox(label="Search Notes", placeholder="Enter search term")
263
+ search_field = gr.Dropdown(
264
+ choices=["id", "topic", "notes", "url", "tag", "all"],
265
+ value="all",
266
+ label="Search Field"
267
+ )
268
+ retrieve_button = gr.Button("Fetch Notes")
269
+ notes_table = gr.Dataframe(
270
+ headers=["id", "topic", "notes", "url", "tag"],
271
+ label="Stored Notes",
272
+ interactive=True,
273
+ wrap=False
274
+ )
275
+ notes_status = gr.Textbox(label="Status")
276
+ content_view = gr.Textbox(label="Selected Note Content", lines=5)
277
+ with gr.Row():
278
+ format_choice = gr.Dropdown(
279
+ choices=["CSV", "JSON"],
280
+ value="CSV",
281
+ label="Download Format"
282
+ )
283
+ download_button = gr.Button("Download Notes")
284
+ download_file = gr.File(label="Download File", visible=False)
285
 
286
+ retrieve_button.click(
287
+ fn=retrieve_notes,
288
+ outputs=[notes_table, notes_status]
289
+ )
290
+ max_rows.change(
291
+ fn=filter_notes,
292
+ inputs=[notes_table, max_rows, search_query, search_field],
293
+ outputs=[notes_table, notes_status]
294
+ )
295
+ search_query.change(
296
+ fn=filter_notes,
297
+ inputs=[notes_table, max_rows, search_query, search_field],
298
+ outputs=[notes_table, notes_status]
299
+ )
300
+ search_field.change(
301
+ fn=filter_notes,
302
+ inputs=[notes_table, max_rows, search_query, search_field],
303
+ outputs=[notes_table, notes_status]
304
+ )
305
+ notes_table.select(
306
+ fn=view_note_content,
307
+ inputs=[notes_table],
308
+ outputs=content_view
309
+ )
310
+ download_button.click(
311
+ fn=download_notes,
312
+ inputs=[notes_table, format_choice],
313
+ outputs=[download_file, notes_status]
314
+ )
315
 
316
+ app.launch()