Spaces:
Sleeping
Sleeping
#import gradio as gr | |
#with gr.Blocks(theme=gr.themes.Glass()) as demo: | |
# open_google = gr.Button(value="Client", link="https://huggingface.co/spaces/WD101/OneClientToRuleThemAll") | |
# open_bing = gr.Button(value="Server", link="https://huggingface.co/spaces/WD101/OneServerToRuleThemAll") | |
#demo.launch() | |
import gradio as gr | |
import pandas as pd | |
import requests | |
import math | |
import json | |
def scrape_websites(urls_input): | |
"""Scrapes multiple URLs and returns results in a paginated DataFrame. | |
Args: | |
urls_input (str): A string containing URLs, one per line. | |
Returns: | |
tuple: A tuple containing: | |
- pd.DataFrame: Paginated DataFrame with up to 5 rows, columns ["URL", "Content", "Status"]. | |
- str: Status message (e.g., "Scraping completed", "No URLs provided"). | |
- pd.DataFrame: Full DataFrame with all results. | |
- int: Current page number (initially 1). | |
- str: Page information (e.g., "Page 1"). | |
""" | |
if not urls_input: | |
return pd.DataFrame(), "No URLs provided", pd.DataFrame(), 1, "Page 1" | |
urls = [url.strip() for url in urls_input.split("\n") if url.strip()] | |
if not urls: | |
return pd.DataFrame(), "No valid URLs provided", pd.DataFrame(), 1, "Page 1" | |
results = [] | |
for url in urls: | |
try: | |
response = requests.post("http://localhost:8000/scrape", json={"url": url}) | |
result = response.json() | |
if "error" in result: | |
results.append({"URL": url, "Content": "", "Status": f"Error: {result['error']}"}) | |
else: | |
results.append({"URL": url, "Content": result.get("text", "No content extracted"), "Status": "Success"}) | |
except Exception as e: | |
results.append({"URL": url, "Content": "", "Status": f"Error: {str(e)}"}) | |
df = pd.DataFrame(results, columns=["URL", "Content", "Status"]) | |
paginated_df = df.head(5) | |
status = "Scraping completed" if results else "No results to display" | |
page = 1 | |
page_info = f"Page {page}" | |
return paginated_df, status, df, page, page_info | |
def change_page(full_df, current_page, direction): | |
"""Changes the displayed page of scraped results. | |
Args: | |
full_df (pd.DataFrame): Full DataFrame containing all scraped results. | |
current_page (int): Current page number. | |
direction (str): Navigation direction ("next" or "prev"). | |
Returns: | |
tuple: A tuple containing: | |
- pd.DataFrame: Paginated DataFrame with up to 5 rows. | |
- str: Status message (e.g., "Page updated"). | |
- pd.DataFrame: Unchanged full DataFrame. | |
- int: Updated current page number. | |
- str: Page information (e.g., "Page 2 of 3"). | |
""" | |
if full_df.empty: | |
return pd.DataFrame(), "No results to display", full_df, current_page, f"Page {current_page}" | |
rows_per_page = 5 | |
total_rows = len(full_df) | |
total_pages = math.ceil(total_rows / rows_per_page) | |
if direction == "next" and current_page < total_pages: | |
current_page += 1 | |
elif direction == "prev" and current_page > 1: | |
current_page -= 1 | |
start_idx = (current_page - 1) * rows_per_page | |
end_idx = start_idx + rows_per_page | |
paginated_df = full_df.iloc[start_idx:end_idx] | |
page_info = f"Page {current_page} of {total_pages}" | |
status = "Page updated" if not paginated_df.empty else "No results on this page" | |
return paginated_df, status, full_df, current_page, page_info | |
def retrieve_notes(): | |
"""Fetches notes from Server 2 and returns them as a DataFrame. | |
Args: | |
None | |
Returns: | |
tuple: A tuple containing: | |
- pd.DataFrame: DataFrame with columns ["id", "topic", "notes", "url", "tag"]. | |
- str: Status message (e.g., "Notes loaded successfully", "No notes found"). | |
""" | |
try: | |
response = requests.get("http://localhost:8001/notes") | |
result = response.json() | |
if "error" in result or not result: | |
return pd.DataFrame(), "No notes found" | |
processed_results = [ | |
{ | |
"id": item.get("id", "N/A"), | |
"topic": item.get("topic", "Summary"), | |
"notes": item.get("notes", ""), | |
"url": item.get("url", ""), | |
"tag": item.get("tag", "General") | |
} for item in result | |
] | |
df = pd.DataFrame(processed_results, columns=["id", "topic", "notes", "url", "tag"]) | |
return df, "Notes loaded successfully" | |
except Exception as e: | |
return pd.DataFrame(), f"Error: {str(e)}" | |
def filter_notes(notes_df, max_rows, search_query, search_field): | |
"""Filters and searches notes based on user input. | |
Args: | |
notes_df (pd.DataFrame): DataFrame containing notes. | |
max_rows (str): Maximum rows to display ("5", "10", "25", or "All"). | |
search_query (str): Search term to filter notes. | |
search_field (str): Field to search ("id", "topic", "notes", "url", "tag", or "all"). | |
Returns: | |
tuple: A tuple containing: | |
- pd.DataFrame: Filtered DataFrame. | |
- str: Status message (e.g., "Filtered notes loaded", "No matching notes found"). | |
""" | |
if notes_df.empty: | |
return pd.DataFrame(), "No notes available" | |
try: | |
filtered_df = notes_df.copy() | |
if search_query and search_field: | |
search_query = search_query.lower() | |
if search_field == "all": | |
filtered_df = filtered_df[ | |
filtered_df.apply( | |
lambda row: any(search_query in str(val).lower() for val in row), axis=1 | |
) | |
] | |
else: | |
filtered_df = filtered_df[ | |
filtered_df[search_field].str.lower().str.contains(search_query, na=False) | |
] | |
if max_rows != "All": | |
max_rows = int(max_rows) | |
filtered_df = filtered_df.head(max_rows) | |
status = "Filtered notes loaded" if not filtered_df.empty else "No matching notes found" | |
return filtered_df, status | |
except Exception as e: | |
return pd.DataFrame(), f"Error: {str(e)}" | |
def view_note_content(selected_row: int, notes_df): | |
"""Displays the content of a selected note. | |
Args: | |
selected_row (int): Index of the selected row in the DataFrame. | |
notes_df (pd.DataFrame): DataFrame containing notes. | |
Returns: | |
str: The content of the selected note or an error/status message. | |
""" | |
if selected_row is None or notes_df.empty: | |
return "No note selected or no data available" | |
try: | |
return notes_df.iloc[selected_row]["notes"] | |
except Exception as e: | |
return f"Error: {str(e)}" | |
def download_notes(notes_df, format_choice): | |
"""Downloads notes in CSV or JSON format. | |
Args: | |
notes_df (pd.DataFrame): DataFrame containing notes. | |
format_choice (str): Download format ("CSV" or "JSON"). | |
Returns: | |
tuple: A tuple containing: | |
- gr.File or None: File object with the downloaded content or None if no data. | |
- str: Status message (e.g., "Download ready", "Data not available to download"). | |
""" | |
if notes_df.empty: | |
return None, "Data not available to download" | |
try: | |
if format_choice == "CSV": | |
content = notes_df.to_csv(index=False) | |
filename = "notes.csv" | |
mime_type = "text/csv" | |
elif format_choice == "JSON": | |
content = notes_df.to_json(orient="records", lines=True) | |
filename = "notes.json" | |
mime_type = "application/json" | |
else: | |
return None, "Invalid format selected" | |
return gr.File(value=content.encode(), filename=filename, mime_type=mime_type, visible=True), "Download ready" | |
except Exception as e: | |
return None, f"Error: {str(e)}" | |
# Gradio Tabbed Interface | |
with gr.Blocks() as app: | |
gr.Markdown("# Knowledge Store App") | |
with gr.Tabs(): | |
# Tab 1: Input Client with Multi-URL Support and Pagination | |
with gr.Tab(label="URL Scraper"): | |
url_input = gr.Textbox( | |
label="Enter Webpage URLs (one per line)", | |
placeholder="https://example.com\nhttps://wikipedia.org", | |
lines=5 | |
) | |
scrape_button = gr.Button("Scrape URLs") | |
scrape_output = gr.Dataframe( | |
headers=["URL", "Content", "Status"], | |
label="Scraped Results", | |
wrap=False | |
) | |
scrape_status = gr.Textbox(label="Status") | |
with gr.Row(): | |
prev_button = gr.Button("Previous Page") | |
next_button = gr.Button("Next Page") | |
page_info = gr.Textbox(label="Page", value="Page 1", interactive=False) | |
full_results = gr.State(pd.DataFrame()) | |
current_page = gr.State(1) | |
scrape_button.click( | |
fn=scrape_websites, | |
inputs=url_input, | |
outputs=[scrape_output, scrape_status, full_results, current_page, page_info] | |
) | |
prev_button.click( | |
fn=change_page, | |
inputs=[full_results, current_page, gr.State("prev")], | |
outputs=[scrape_output, scrape_status, full_results, current_page, page_info] | |
) | |
next_button.click( | |
fn=change_page, | |
inputs=[full_results, current_page, gr.State("next")], | |
outputs=[scrape_output, scrape_status, full_results, current_page, page_info] | |
) | |
# Tab 2: Retrieval Client with Enhanced Columns | |
with gr.Tab(label="View Notes"): | |
with gr.Row(): | |
max_rows = gr.Dropdown( | |
choices=["5", "10", "25", "All"], | |
value="All", | |
label="Max Rows to Display" | |
) | |
search_query = gr.Textbox(label="Search Notes", placeholder="Enter search term") | |
search_field = gr.Dropdown( | |
choices=["id", "topic", "notes", "url", "tag", "all"], | |
value="all", | |
label="Search Field" | |
) | |
retrieve_button = gr.Button("Fetch Notes") | |
notes_table = gr.Dataframe( | |
headers=["id", "topic", "notes", "url", "tag"], | |
label="Stored Notes", | |
interactive=True, | |
wrap=False | |
) | |
notes_status = gr.Textbox(label="Status") | |
content_view = gr.Textbox(label="Selected Note Content", lines=5) | |
with gr.Row(): | |
format_choice = gr.Dropdown( | |
choices=["CSV", "JSON"], | |
value="CSV", | |
label="Download Format" | |
) | |
download_button = gr.Button("Download Notes") | |
download_file = gr.File(label="Download File", visible=False) | |
retrieve_button.click( | |
fn=retrieve_notes, | |
outputs=[notes_table, notes_status] | |
) | |
max_rows.change( | |
fn=filter_notes, | |
inputs=[notes_table, max_rows, search_query, search_field], | |
outputs=[notes_table, notes_status] | |
) | |
search_query.change( | |
fn=filter_notes, | |
inputs=[notes_table, max_rows, search_query, search_field], | |
outputs=[notes_table, notes_status] | |
) | |
search_field.change( | |
fn=filter_notes, | |
inputs=[notes_table, max_rows, search_query, search_field], | |
outputs=[notes_table, notes_status] | |
) | |
notes_table.select( | |
fn=view_note_content, | |
inputs=[notes_table], | |
outputs=content_view | |
) | |
download_button.click( | |
fn=download_notes, | |
inputs=[notes_table, format_choice], | |
outputs=[download_file, notes_status] | |
) | |
app.launch() |