Spaces:

WD101
/

One_App_To_Rule_Them_All

Sleeping

App Files Files Community

etukurudinesh commited on Jun 5

Commit

f8bc2f2

1 Parent(s): 73ab785

feat: interface for users

Browse files

Files changed (1) hide show

app.py +313 -4

app.py CHANGED Viewed

@@ -1,7 +1,316 @@
 import gradio as gr
-with gr.Blocks(theme=gr.themes.Glass()) as demo:
-  open_google = gr.Button(value="Client", link="https://huggingface.co/spaces/WD101/OneClientToRuleThemAll")
-  open_bing = gr.Button(value="Server", link="https://huggingface.co/spaces/WD101/OneServerToRuleThemAll")
-demo.launch()

+#import gradio as gr
+#with gr.Blocks(theme=gr.themes.Glass()) as demo:
+#  open_google = gr.Button(value="Client", link="https://huggingface.co/spaces/WD101/OneClientToRuleThemAll")
+#  open_bing = gr.Button(value="Server", link="https://huggingface.co/spaces/WD101/OneServerToRuleThemAll")
+#demo.launch()
 import gradio as gr
+import pandas as pd
+import requests
+import math
+import json
+def scrape_websites(urls_input):
+  """Scrapes multiple URLs and returns results in a paginated DataFrame.
+  Args:
+      urls_input (str): A string containing URLs, one per line.
+  Returns:
+      tuple: A tuple containing:
+          - pd.DataFrame: Paginated DataFrame with up to 5 rows, columns ["URL", "Content", "Status"].
+          - str: Status message (e.g., "Scraping completed", "No URLs provided").
+          - pd.DataFrame: Full DataFrame with all results.
+          - int: Current page number (initially 1).
+          - str: Page information (e.g., "Page 1").
+  """
+  if not urls_input:
+    return pd.DataFrame(), "No URLs provided", pd.DataFrame(), 1, "Page 1"
+  urls = [url.strip() for url in urls_input.split("\n") if url.strip()]
+  if not urls:
+    return pd.DataFrame(), "No valid URLs provided", pd.DataFrame(), 1, "Page 1"
+  results = []
+  for url in urls:
+    try:
+      response = requests.post("http://localhost:8000/scrape", json={"url": url})
+      result = response.json()
+      if "error" in result:
+        results.append({"URL": url, "Content": "", "Status": f"Error: {result['error']}"})
+      else:
+        results.append({"URL": url, "Content": result.get("text", "No content extracted"), "Status": "Success"})
+    except Exception as e:
+      results.append({"URL": url, "Content": "", "Status": f"Error: {str(e)}"})
+  df = pd.DataFrame(results, columns=["URL", "Content", "Status"])
+  paginated_df = df.head(5)
+  status = "Scraping completed" if results else "No results to display"
+  page = 1
+  page_info = f"Page {page}"
+  return paginated_df, status, df, page, page_info
+def change_page(full_df, current_page, direction):
+  """Changes the displayed page of scraped results.
+  Args:
+      full_df (pd.DataFrame): Full DataFrame containing all scraped results.
+      current_page (int): Current page number.
+      direction (str): Navigation direction ("next" or "prev").
+  Returns:
+      tuple: A tuple containing:
+          - pd.DataFrame: Paginated DataFrame with up to 5 rows.
+          - str: Status message (e.g., "Page updated").
+          - pd.DataFrame: Unchanged full DataFrame.
+          - int: Updated current page number.
+          - str: Page information (e.g., "Page 2 of 3").
+  """
+  if full_df.empty:
+    return pd.DataFrame(), "No results to display", full_df, current_page, f"Page {current_page}"
+  rows_per_page = 5
+  total_rows = len(full_df)
+  total_pages = math.ceil(total_rows / rows_per_page)
+  if direction == "next" and current_page < total_pages:
+    current_page += 1
+  elif direction == "prev" and current_page > 1:
+    current_page -= 1
+  start_idx = (current_page - 1) * rows_per_page
+  end_idx = start_idx + rows_per_page
+  paginated_df = full_df.iloc[start_idx:end_idx]
+  page_info = f"Page {current_page} of {total_pages}"
+  status = "Page updated" if not paginated_df.empty else "No results on this page"
+  return paginated_df, status, full_df, current_page, page_info
+def retrieve_notes():
+  """Fetches notes from Server 2 and returns them as a DataFrame.
+  Args:
+      None
+  Returns:
+      tuple: A tuple containing:
+          - pd.DataFrame: DataFrame with columns ["id", "topic", "notes", "url", "tag"].
+          - str: Status message (e.g., "Notes loaded successfully", "No notes found").
+  """
+  try:
+    response = requests.get("http://localhost:8001/notes")
+    result = response.json()
+    if "error" in result or not result:
+      return pd.DataFrame(), "No notes found"
+    processed_results = [
+      {
+        "id": item.get("id", "N/A"),
+        "topic": item.get("topic", "Summary"),
+        "notes": item.get("notes", ""),
+        "url": item.get("url", ""),
+        "tag": item.get("tag", "General")
+      } for item in result
+    ]
+    df = pd.DataFrame(processed_results, columns=["id", "topic", "notes", "url", "tag"])
+    return df, "Notes loaded successfully"
+  except Exception as e:
+    return pd.DataFrame(), f"Error: {str(e)}"
+def filter_notes(notes_df, max_rows, search_query, search_field):
+  """Filters and searches notes based on user input.
+  Args:
+      notes_df (pd.DataFrame): DataFrame containing notes.
+      max_rows (str): Maximum rows to display ("5", "10", "25", or "All").
+      search_query (str): Search term to filter notes.
+      search_field (str): Field to search ("id", "topic", "notes", "url", "tag", or "all").
+  Returns:
+      tuple: A tuple containing:
+          - pd.DataFrame: Filtered DataFrame.
+          - str: Status message (e.g., "Filtered notes loaded", "No matching notes found").
+  """
+  if notes_df.empty:
+    return pd.DataFrame(), "No notes available"
+  try:
+    filtered_df = notes_df.copy()
+    if search_query and search_field:
+      search_query = search_query.lower()
+      if search_field == "all":
+        filtered_df = filtered_df[
+          filtered_df.apply(
+            lambda row: any(search_query in str(val).lower() for val in row), axis=1
+          )
+        ]
+      else:
+        filtered_df = filtered_df[
+          filtered_df[search_field].str.lower().str.contains(search_query, na=False)
+        ]
+    if max_rows != "All":
+      max_rows = int(max_rows)
+      filtered_df = filtered_df.head(max_rows)
+    status = "Filtered notes loaded" if not filtered_df.empty else "No matching notes found"
+    return filtered_df, status
+  except Exception as e:
+    return pd.DataFrame(), f"Error: {str(e)}"
+def view_note_content(selected_row: int, notes_df):
+  """Displays the content of a selected note.
+  Args:
+      selected_row (int): Index of the selected row in the DataFrame.
+      notes_df (pd.DataFrame): DataFrame containing notes.
+  Returns:
+      str: The content of the selected note or an error/status message.
+  """
+  if selected_row is None or notes_df.empty:
+    return "No note selected or no data available"
+  try:
+    return notes_df.iloc[selected_row]["notes"]
+  except Exception as e:
+    return f"Error: {str(e)}"
+def download_notes(notes_df, format_choice):
+  """Downloads notes in CSV or JSON format.
+  Args:
+      notes_df (pd.DataFrame): DataFrame containing notes.
+      format_choice (str): Download format ("CSV" or "JSON").
+  Returns:
+      tuple: A tuple containing:
+          - gr.File or None: File object with the downloaded content or None if no data.
+          - str: Status message (e.g., "Download ready", "Data not available to download").
+  """
+  if notes_df.empty:
+    return None, "Data not available to download"
+  try:
+    if format_choice == "CSV":
+      content = notes_df.to_csv(index=False)
+      filename = "notes.csv"
+      mime_type = "text/csv"
+    elif format_choice == "JSON":
+      content = notes_df.to_json(orient="records", lines=True)
+      filename = "notes.json"
+      mime_type = "application/json"
+    else:
+      return None, "Invalid format selected"
+    return gr.File(value=content.encode(), filename=filename, mime_type=mime_type, visible=True), "Download ready"
+  except Exception as e:
+    return None, f"Error: {str(e)}"
+# Gradio Tabbed Interface
+with gr.Blocks() as app:
+  gr.Markdown("# Knowledge Store App")
+  with gr.Tabs():
+    # Tab 1: Input Client with Multi-URL Support and Pagination
+    with gr.Tab(label="URL Scraper"):
+      url_input = gr.Textbox(
+        label="Enter Webpage URLs (one per line)",
+        placeholder="https://example.com\nhttps://wikipedia.org",
+        lines=5
+      )
+      scrape_button = gr.Button("Scrape URLs")
+      scrape_output = gr.Dataframe(
+        headers=["URL", "Content", "Status"],
+        label="Scraped Results",
+        wrap=False
+      )
+      scrape_status = gr.Textbox(label="Status")
+      with gr.Row():
+        prev_button = gr.Button("Previous Page")
+        next_button = gr.Button("Next Page")
+        page_info = gr.Textbox(label="Page", value="Page 1", interactive=False)
+      full_results = gr.State(pd.DataFrame())
+      current_page = gr.State(1)
+      scrape_button.click(
+        fn=scrape_websites,
+        inputs=url_input,
+        outputs=[scrape_output, scrape_status, full_results, current_page, page_info]
+      )
+      prev_button.click(
+        fn=change_page,
+        inputs=[full_results, current_page, gr.State("prev")],
+        outputs=[scrape_output, scrape_status, full_results, current_page, page_info]
+      )
+      next_button.click(
+        fn=change_page,
+        inputs=[full_results, current_page, gr.State("next")],
+        outputs=[scrape_output, scrape_status, full_results, current_page, page_info]
+      )
+    # Tab 2: Retrieval Client with Enhanced Columns
+    with gr.Tab(label="View Notes"):
+      with gr.Row():
+        max_rows = gr.Dropdown(
+          choices=["5", "10", "25", "All"],
+          value="All",
+          label="Max Rows to Display"
+        )
+        search_query = gr.Textbox(label="Search Notes", placeholder="Enter search term")
+        search_field = gr.Dropdown(
+          choices=["id", "topic", "notes", "url", "tag", "all"],
+          value="all",
+          label="Search Field"
+        )
+      retrieve_button = gr.Button("Fetch Notes")
+      notes_table = gr.Dataframe(
+        headers=["id", "topic", "notes", "url", "tag"],
+        label="Stored Notes",
+        interactive=True,
+        wrap=False
+      )
+      notes_status = gr.Textbox(label="Status")
+      content_view = gr.Textbox(label="Selected Note Content", lines=5)
+      with gr.Row():
+        format_choice = gr.Dropdown(
+          choices=["CSV", "JSON"],
+          value="CSV",
+          label="Download Format"
+        )
+        download_button = gr.Button("Download Notes")
+      download_file = gr.File(label="Download File", visible=False)
+      retrieve_button.click(
+        fn=retrieve_notes,
+        outputs=[notes_table, notes_status]
+      )
+      max_rows.change(
+        fn=filter_notes,
+        inputs=[notes_table, max_rows, search_query, search_field],
+        outputs=[notes_table, notes_status]
+      )
+      search_query.change(
+        fn=filter_notes,
+        inputs=[notes_table, max_rows, search_query, search_field],
+        outputs=[notes_table, notes_status]
+      )
+      search_field.change(
+        fn=filter_notes,
+        inputs=[notes_table, max_rows, search_query, search_field],
+        outputs=[notes_table, notes_status]
+      )
+      notes_table.select(
+        fn=view_note_content,
+        inputs=[notes_table],
+        outputs=content_view
+      )
+      download_button.click(
+        fn=download_notes,
+        inputs=[notes_table, format_choice],
+        outputs=[download_file, notes_status]
+      )
+app.launch()