etukurudinesh's picture
feat: interface for users
f8bc2f2
#import gradio as gr
#with gr.Blocks(theme=gr.themes.Glass()) as demo:
# open_google = gr.Button(value="Client", link="https://huggingface.co/spaces/WD101/OneClientToRuleThemAll")
# open_bing = gr.Button(value="Server", link="https://huggingface.co/spaces/WD101/OneServerToRuleThemAll")
#demo.launch()
import gradio as gr
import pandas as pd
import requests
import math
import json
def scrape_websites(urls_input):
"""Scrapes multiple URLs and returns results in a paginated DataFrame.
Args:
urls_input (str): A string containing URLs, one per line.
Returns:
tuple: A tuple containing:
- pd.DataFrame: Paginated DataFrame with up to 5 rows, columns ["URL", "Content", "Status"].
- str: Status message (e.g., "Scraping completed", "No URLs provided").
- pd.DataFrame: Full DataFrame with all results.
- int: Current page number (initially 1).
- str: Page information (e.g., "Page 1").
"""
if not urls_input:
return pd.DataFrame(), "No URLs provided", pd.DataFrame(), 1, "Page 1"
urls = [url.strip() for url in urls_input.split("\n") if url.strip()]
if not urls:
return pd.DataFrame(), "No valid URLs provided", pd.DataFrame(), 1, "Page 1"
results = []
for url in urls:
try:
response = requests.post("http://localhost:8000/scrape", json={"url": url})
result = response.json()
if "error" in result:
results.append({"URL": url, "Content": "", "Status": f"Error: {result['error']}"})
else:
results.append({"URL": url, "Content": result.get("text", "No content extracted"), "Status": "Success"})
except Exception as e:
results.append({"URL": url, "Content": "", "Status": f"Error: {str(e)}"})
df = pd.DataFrame(results, columns=["URL", "Content", "Status"])
paginated_df = df.head(5)
status = "Scraping completed" if results else "No results to display"
page = 1
page_info = f"Page {page}"
return paginated_df, status, df, page, page_info
def change_page(full_df, current_page, direction):
"""Changes the displayed page of scraped results.
Args:
full_df (pd.DataFrame): Full DataFrame containing all scraped results.
current_page (int): Current page number.
direction (str): Navigation direction ("next" or "prev").
Returns:
tuple: A tuple containing:
- pd.DataFrame: Paginated DataFrame with up to 5 rows.
- str: Status message (e.g., "Page updated").
- pd.DataFrame: Unchanged full DataFrame.
- int: Updated current page number.
- str: Page information (e.g., "Page 2 of 3").
"""
if full_df.empty:
return pd.DataFrame(), "No results to display", full_df, current_page, f"Page {current_page}"
rows_per_page = 5
total_rows = len(full_df)
total_pages = math.ceil(total_rows / rows_per_page)
if direction == "next" and current_page < total_pages:
current_page += 1
elif direction == "prev" and current_page > 1:
current_page -= 1
start_idx = (current_page - 1) * rows_per_page
end_idx = start_idx + rows_per_page
paginated_df = full_df.iloc[start_idx:end_idx]
page_info = f"Page {current_page} of {total_pages}"
status = "Page updated" if not paginated_df.empty else "No results on this page"
return paginated_df, status, full_df, current_page, page_info
def retrieve_notes():
"""Fetches notes from Server 2 and returns them as a DataFrame.
Args:
None
Returns:
tuple: A tuple containing:
- pd.DataFrame: DataFrame with columns ["id", "topic", "notes", "url", "tag"].
- str: Status message (e.g., "Notes loaded successfully", "No notes found").
"""
try:
response = requests.get("http://localhost:8001/notes")
result = response.json()
if "error" in result or not result:
return pd.DataFrame(), "No notes found"
processed_results = [
{
"id": item.get("id", "N/A"),
"topic": item.get("topic", "Summary"),
"notes": item.get("notes", ""),
"url": item.get("url", ""),
"tag": item.get("tag", "General")
} for item in result
]
df = pd.DataFrame(processed_results, columns=["id", "topic", "notes", "url", "tag"])
return df, "Notes loaded successfully"
except Exception as e:
return pd.DataFrame(), f"Error: {str(e)}"
def filter_notes(notes_df, max_rows, search_query, search_field):
"""Filters and searches notes based on user input.
Args:
notes_df (pd.DataFrame): DataFrame containing notes.
max_rows (str): Maximum rows to display ("5", "10", "25", or "All").
search_query (str): Search term to filter notes.
search_field (str): Field to search ("id", "topic", "notes", "url", "tag", or "all").
Returns:
tuple: A tuple containing:
- pd.DataFrame: Filtered DataFrame.
- str: Status message (e.g., "Filtered notes loaded", "No matching notes found").
"""
if notes_df.empty:
return pd.DataFrame(), "No notes available"
try:
filtered_df = notes_df.copy()
if search_query and search_field:
search_query = search_query.lower()
if search_field == "all":
filtered_df = filtered_df[
filtered_df.apply(
lambda row: any(search_query in str(val).lower() for val in row), axis=1
)
]
else:
filtered_df = filtered_df[
filtered_df[search_field].str.lower().str.contains(search_query, na=False)
]
if max_rows != "All":
max_rows = int(max_rows)
filtered_df = filtered_df.head(max_rows)
status = "Filtered notes loaded" if not filtered_df.empty else "No matching notes found"
return filtered_df, status
except Exception as e:
return pd.DataFrame(), f"Error: {str(e)}"
def view_note_content(selected_row: int, notes_df):
"""Displays the content of a selected note.
Args:
selected_row (int): Index of the selected row in the DataFrame.
notes_df (pd.DataFrame): DataFrame containing notes.
Returns:
str: The content of the selected note or an error/status message.
"""
if selected_row is None or notes_df.empty:
return "No note selected or no data available"
try:
return notes_df.iloc[selected_row]["notes"]
except Exception as e:
return f"Error: {str(e)}"
def download_notes(notes_df, format_choice):
"""Downloads notes in CSV or JSON format.
Args:
notes_df (pd.DataFrame): DataFrame containing notes.
format_choice (str): Download format ("CSV" or "JSON").
Returns:
tuple: A tuple containing:
- gr.File or None: File object with the downloaded content or None if no data.
- str: Status message (e.g., "Download ready", "Data not available to download").
"""
if notes_df.empty:
return None, "Data not available to download"
try:
if format_choice == "CSV":
content = notes_df.to_csv(index=False)
filename = "notes.csv"
mime_type = "text/csv"
elif format_choice == "JSON":
content = notes_df.to_json(orient="records", lines=True)
filename = "notes.json"
mime_type = "application/json"
else:
return None, "Invalid format selected"
return gr.File(value=content.encode(), filename=filename, mime_type=mime_type, visible=True), "Download ready"
except Exception as e:
return None, f"Error: {str(e)}"
# Gradio Tabbed Interface
with gr.Blocks() as app:
gr.Markdown("# Knowledge Store App")
with gr.Tabs():
# Tab 1: Input Client with Multi-URL Support and Pagination
with gr.Tab(label="URL Scraper"):
url_input = gr.Textbox(
label="Enter Webpage URLs (one per line)",
placeholder="https://example.com\nhttps://wikipedia.org",
lines=5
)
scrape_button = gr.Button("Scrape URLs")
scrape_output = gr.Dataframe(
headers=["URL", "Content", "Status"],
label="Scraped Results",
wrap=False
)
scrape_status = gr.Textbox(label="Status")
with gr.Row():
prev_button = gr.Button("Previous Page")
next_button = gr.Button("Next Page")
page_info = gr.Textbox(label="Page", value="Page 1", interactive=False)
full_results = gr.State(pd.DataFrame())
current_page = gr.State(1)
scrape_button.click(
fn=scrape_websites,
inputs=url_input,
outputs=[scrape_output, scrape_status, full_results, current_page, page_info]
)
prev_button.click(
fn=change_page,
inputs=[full_results, current_page, gr.State("prev")],
outputs=[scrape_output, scrape_status, full_results, current_page, page_info]
)
next_button.click(
fn=change_page,
inputs=[full_results, current_page, gr.State("next")],
outputs=[scrape_output, scrape_status, full_results, current_page, page_info]
)
# Tab 2: Retrieval Client with Enhanced Columns
with gr.Tab(label="View Notes"):
with gr.Row():
max_rows = gr.Dropdown(
choices=["5", "10", "25", "All"],
value="All",
label="Max Rows to Display"
)
search_query = gr.Textbox(label="Search Notes", placeholder="Enter search term")
search_field = gr.Dropdown(
choices=["id", "topic", "notes", "url", "tag", "all"],
value="all",
label="Search Field"
)
retrieve_button = gr.Button("Fetch Notes")
notes_table = gr.Dataframe(
headers=["id", "topic", "notes", "url", "tag"],
label="Stored Notes",
interactive=True,
wrap=False
)
notes_status = gr.Textbox(label="Status")
content_view = gr.Textbox(label="Selected Note Content", lines=5)
with gr.Row():
format_choice = gr.Dropdown(
choices=["CSV", "JSON"],
value="CSV",
label="Download Format"
)
download_button = gr.Button("Download Notes")
download_file = gr.File(label="Download File", visible=False)
retrieve_button.click(
fn=retrieve_notes,
outputs=[notes_table, notes_status]
)
max_rows.change(
fn=filter_notes,
inputs=[notes_table, max_rows, search_query, search_field],
outputs=[notes_table, notes_status]
)
search_query.change(
fn=filter_notes,
inputs=[notes_table, max_rows, search_query, search_field],
outputs=[notes_table, notes_status]
)
search_field.change(
fn=filter_notes,
inputs=[notes_table, max_rows, search_query, search_field],
outputs=[notes_table, notes_status]
)
notes_table.select(
fn=view_note_content,
inputs=[notes_table],
outputs=content_view
)
download_button.click(
fn=download_notes,
inputs=[notes_table, format_choice],
outputs=[download_file, notes_status]
)
app.launch()