Spaces:

WD101
/

One_App_To_Rule_Them_All

Sleeping

App Files Files Community

One_App_To_Rule_Them_All / app.py

etukurudinesh

feat: interface for users

f8bc2f2 4 months ago

raw

history blame contribute delete

10.9 kB

	#import gradio as gr

	#with gr.Blocks(theme=gr.themes.Glass()) as demo:
	# open_google = gr.Button(value="Client", link="https://huggingface.co/spaces/WD101/OneClientToRuleThemAll")
	# open_bing = gr.Button(value="Server", link="https://huggingface.co/spaces/WD101/OneServerToRuleThemAll")

	#demo.launch()

	import gradio as gr
	import pandas as pd
	import requests
	import math
	import json


	def scrape_websites(urls_input):
	"""Scrapes multiple URLs and returns results in a paginated DataFrame.

	Args:
	urls_input (str): A string containing URLs, one per line.

	Returns:
	tuple: A tuple containing:
	- pd.DataFrame: Paginated DataFrame with up to 5 rows, columns ["URL", "Content", "Status"].
	- str: Status message (e.g., "Scraping completed", "No URLs provided").
	- pd.DataFrame: Full DataFrame with all results.
	- int: Current page number (initially 1).
	- str: Page information (e.g., "Page 1").
	"""
	if not urls_input:
	return pd.DataFrame(), "No URLs provided", pd.DataFrame(), 1, "Page 1"

	urls = [url.strip() for url in urls_input.split("\n") if url.strip()]
	if not urls:
	return pd.DataFrame(), "No valid URLs provided", pd.DataFrame(), 1, "Page 1"

	results = []
	for url in urls:
	try:
	response = requests.post("http://localhost:8000/scrape", json={"url": url})
	result = response.json()
	if "error" in result:
	results.append({"URL": url, "Content": "", "Status": f"Error: {result['error']}"})
	else:
	results.append({"URL": url, "Content": result.get("text", "No content extracted"), "Status": "Success"})
	except Exception as e:
	results.append({"URL": url, "Content": "", "Status": f"Error: {str(e)}"})

	df = pd.DataFrame(results, columns=["URL", "Content", "Status"])
	paginated_df = df.head(5)
	status = "Scraping completed" if results else "No results to display"
	page = 1
	page_info = f"Page {page}"
	return paginated_df, status, df, page, page_info


	def change_page(full_df, current_page, direction):
	"""Changes the displayed page of scraped results.

	Args:
	full_df (pd.DataFrame): Full DataFrame containing all scraped results.
	current_page (int): Current page number.
	direction (str): Navigation direction ("next" or "prev").

	Returns:
	tuple: A tuple containing:
	- pd.DataFrame: Paginated DataFrame with up to 5 rows.
	- str: Status message (e.g., "Page updated").
	- pd.DataFrame: Unchanged full DataFrame.
	- int: Updated current page number.
	- str: Page information (e.g., "Page 2 of 3").
	"""
	if full_df.empty:
	return pd.DataFrame(), "No results to display", full_df, current_page, f"Page {current_page}"

	rows_per_page = 5
	total_rows = len(full_df)
	total_pages = math.ceil(total_rows / rows_per_page)

	if direction == "next" and current_page < total_pages:
	current_page += 1
	elif direction == "prev" and current_page > 1:
	current_page -= 1

	start_idx = (current_page - 1) * rows_per_page
	end_idx = start_idx + rows_per_page
	paginated_df = full_df.iloc[start_idx:end_idx]

	page_info = f"Page {current_page} of {total_pages}"
	status = "Page updated" if not paginated_df.empty else "No results on this page"
	return paginated_df, status, full_df, current_page, page_info


	def retrieve_notes():
	"""Fetches notes from Server 2 and returns them as a DataFrame.

	Args:
	None

	Returns:
	tuple: A tuple containing:
	- pd.DataFrame: DataFrame with columns ["id", "topic", "notes", "url", "tag"].
	- str: Status message (e.g., "Notes loaded successfully", "No notes found").
	"""
	try:
	response = requests.get("http://localhost:8001/notes")
	result = response.json()
	if "error" in result or not result:
	return pd.DataFrame(), "No notes found"
	processed_results = [
	{
	"id": item.get("id", "N/A"),
	"topic": item.get("topic", "Summary"),
	"notes": item.get("notes", ""),
	"url": item.get("url", ""),
	"tag": item.get("tag", "General")
	} for item in result
	]
	df = pd.DataFrame(processed_results, columns=["id", "topic", "notes", "url", "tag"])
	return df, "Notes loaded successfully"
	except Exception as e:
	return pd.DataFrame(), f"Error: {str(e)}"


	def filter_notes(notes_df, max_rows, search_query, search_field):
	"""Filters and searches notes based on user input.

	Args:
	notes_df (pd.DataFrame): DataFrame containing notes.
	max_rows (str): Maximum rows to display ("5", "10", "25", or "All").
	search_query (str): Search term to filter notes.
	search_field (str): Field to search ("id", "topic", "notes", "url", "tag", or "all").

	Returns:
	tuple: A tuple containing:
	- pd.DataFrame: Filtered DataFrame.
	- str: Status message (e.g., "Filtered notes loaded", "No matching notes found").
	"""
	if notes_df.empty:
	return pd.DataFrame(), "No notes available"
	try:
	filtered_df = notes_df.copy()
	if search_query and search_field:
	search_query = search_query.lower()
	if search_field == "all":
	filtered_df = filtered_df[
	filtered_df.apply(
	lambda row: any(search_query in str(val).lower() for val in row), axis=1
	)
	]
	else:
	filtered_df = filtered_df[
	filtered_df[search_field].str.lower().str.contains(search_query, na=False)
	]
	if max_rows != "All":
	max_rows = int(max_rows)
	filtered_df = filtered_df.head(max_rows)
	status = "Filtered notes loaded" if not filtered_df.empty else "No matching notes found"
	return filtered_df, status
	except Exception as e:
	return pd.DataFrame(), f"Error: {str(e)}"


	def view_note_content(selected_row: int, notes_df):
	"""Displays the content of a selected note.

	Args:
	selected_row (int): Index of the selected row in the DataFrame.
	notes_df (pd.DataFrame): DataFrame containing notes.

	Returns:
	str: The content of the selected note or an error/status message.
	"""
	if selected_row is None or notes_df.empty:
	return "No note selected or no data available"
	try:
	return notes_df.iloc[selected_row]["notes"]
	except Exception as e:
	return f"Error: {str(e)}"


	def download_notes(notes_df, format_choice):
	"""Downloads notes in CSV or JSON format.

	Args:
	notes_df (pd.DataFrame): DataFrame containing notes.
	format_choice (str): Download format ("CSV" or "JSON").

	Returns:
	tuple: A tuple containing:
	- gr.File or None: File object with the downloaded content or None if no data.
	- str: Status message (e.g., "Download ready", "Data not available to download").
	"""
	if notes_df.empty:
	return None, "Data not available to download"
	try:
	if format_choice == "CSV":
	content = notes_df.to_csv(index=False)
	filename = "notes.csv"
	mime_type = "text/csv"
	elif format_choice == "JSON":
	content = notes_df.to_json(orient="records", lines=True)
	filename = "notes.json"
	mime_type = "application/json"
	else:
	return None, "Invalid format selected"

	return gr.File(value=content.encode(), filename=filename, mime_type=mime_type, visible=True), "Download ready"
	except Exception as e:
	return None, f"Error: {str(e)}"


	# Gradio Tabbed Interface
	with gr.Blocks() as app:
	gr.Markdown("# Knowledge Store App")
	with gr.Tabs():
	# Tab 1: Input Client with Multi-URL Support and Pagination
	with gr.Tab(label="URL Scraper"):
	url_input = gr.Textbox(
	label="Enter Webpage URLs (one per line)",
	placeholder="https://example.com\nhttps://wikipedia.org",
	lines=5
	)
	scrape_button = gr.Button("Scrape URLs")
	scrape_output = gr.Dataframe(
	headers=["URL", "Content", "Status"],
	label="Scraped Results",
	wrap=False
	)
	scrape_status = gr.Textbox(label="Status")
	with gr.Row():
	prev_button = gr.Button("Previous Page")
	next_button = gr.Button("Next Page")
	page_info = gr.Textbox(label="Page", value="Page 1", interactive=False)
	full_results = gr.State(pd.DataFrame())
	current_page = gr.State(1)

	scrape_button.click(
	fn=scrape_websites,
	inputs=url_input,
	outputs=[scrape_output, scrape_status, full_results, current_page, page_info]
	)
	prev_button.click(
	fn=change_page,
	inputs=[full_results, current_page, gr.State("prev")],
	outputs=[scrape_output, scrape_status, full_results, current_page, page_info]
	)
	next_button.click(
	fn=change_page,
	inputs=[full_results, current_page, gr.State("next")],
	outputs=[scrape_output, scrape_status, full_results, current_page, page_info]
	)

	# Tab 2: Retrieval Client with Enhanced Columns
	with gr.Tab(label="View Notes"):
	with gr.Row():
	max_rows = gr.Dropdown(
	choices=["5", "10", "25", "All"],
	value="All",
	label="Max Rows to Display"
	)
	search_query = gr.Textbox(label="Search Notes", placeholder="Enter search term")
	search_field = gr.Dropdown(
	choices=["id", "topic", "notes", "url", "tag", "all"],
	value="all",
	label="Search Field"
	)
	retrieve_button = gr.Button("Fetch Notes")
	notes_table = gr.Dataframe(
	headers=["id", "topic", "notes", "url", "tag"],
	label="Stored Notes",
	interactive=True,
	wrap=False
	)
	notes_status = gr.Textbox(label="Status")
	content_view = gr.Textbox(label="Selected Note Content", lines=5)
	with gr.Row():
	format_choice = gr.Dropdown(
	choices=["CSV", "JSON"],
	value="CSV",
	label="Download Format"
	)
	download_button = gr.Button("Download Notes")
	download_file = gr.File(label="Download File", visible=False)

	retrieve_button.click(
	fn=retrieve_notes,
	outputs=[notes_table, notes_status]
	)
	max_rows.change(
	fn=filter_notes,
	inputs=[notes_table, max_rows, search_query, search_field],
	outputs=[notes_table, notes_status]
	)
	search_query.change(
	fn=filter_notes,
	inputs=[notes_table, max_rows, search_query, search_field],
	outputs=[notes_table, notes_status]
	)
	search_field.change(
	fn=filter_notes,
	inputs=[notes_table, max_rows, search_query, search_field],
	outputs=[notes_table, notes_status]
	)
	notes_table.select(
	fn=view_note_content,
	inputs=[notes_table],
	outputs=content_view
	)
	download_button.click(
	fn=download_notes,
	inputs=[notes_table, format_choice],
	outputs=[download_file, notes_status]
	)

	app.launch()