davanstrien HF staff commited on
Commit
2834fe9
·
1 Parent(s): 35db041

update views

Browse files
Files changed (1) hide show
  1. app.py +71 -7
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import asyncio
2
- import json
3
  import re
4
  from typing import Dict, List
5
 
@@ -7,17 +6,20 @@ import gradio as gr
7
  import httpx
8
  from cashews import cache
9
  from huggingface_hub import ModelCard
10
- from ragatouille_search import create_ragatouille_interface, search_with_ragatouille
 
11
 
12
  cache.setup("mem://")
13
- API_URL = "https://davanstrien-huggingface-datasets-search-v2.hf.space/similar"
14
  HF_API_URL = "https://huggingface.co/api/datasets"
15
  README_URL_TEMPLATE = "https://huggingface.co/datasets/{}/raw/main/README.md"
16
 
17
 
18
  async def fetch_similar_datasets(dataset_id: str, limit: int = 10) -> List[Dict]:
19
  async with httpx.AsyncClient() as client:
20
- response = await client.get(f"{API_URL}?dataset_id={dataset_id}&n={limit + 1}")
 
 
21
  if response.status_code == 200:
22
  results = response.json()["results"]
23
  # Remove the input dataset from the results
@@ -26,9 +28,9 @@ async def fetch_similar_datasets(dataset_id: str, limit: int = 10) -> List[Dict]
26
 
27
 
28
  async def fetch_similar_datasets_by_text(query: str, limit: int = 10) -> List[Dict]:
29
- async with httpx.AsyncClient() as client:
30
- response = await client.post(
31
- f"{API_URL}-by-text", params={"query": query, "n": limit + 1}
32
  )
33
  if response.status_code == 200:
34
  results = response.json()["results"]
@@ -150,6 +152,37 @@ async def search_similar_datasets(dataset_id: str, limit: int = 10):
150
  return format_results(results, dataset_cards, dataset_infos)
151
 
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  with gr.Blocks() as demo:
154
  gr.Markdown("## 🤗 Dataset Search and Similarity")
155
 
@@ -219,4 +252,35 @@ with gr.Blocks() as demo:
219
  with gr.TabItem("RAGatouille Search"):
220
  ragatouille_interface = create_ragatouille_interface()
221
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  demo.launch()
 
1
  import asyncio
 
2
  import re
3
  from typing import Dict, List
4
 
 
6
  import httpx
7
  from cashews import cache
8
  from huggingface_hub import ModelCard
9
+
10
+ from ragatouille_search import create_ragatouille_interface
11
 
12
  cache.setup("mem://")
13
+ API_URL = "https://davanstrien-huggingface-datasets-search-v2.hf.space"
14
  HF_API_URL = "https://huggingface.co/api/datasets"
15
  README_URL_TEMPLATE = "https://huggingface.co/datasets/{}/raw/main/README.md"
16
 
17
 
18
  async def fetch_similar_datasets(dataset_id: str, limit: int = 10) -> List[Dict]:
19
  async with httpx.AsyncClient() as client:
20
+ response = await client.get(
21
+ f"{API_URL}/similar?dataset_id={dataset_id}&n={limit + 1}"
22
+ )
23
  if response.status_code == 200:
24
  results = response.json()["results"]
25
  # Remove the input dataset from the results
 
28
 
29
 
30
  async def fetch_similar_datasets_by_text(query: str, limit: int = 10) -> List[Dict]:
31
+ async with httpx.AsyncClient(timeout=30) as client:
32
+ response = await client.get(
33
+ f"{API_URL}/similar-text", params={"query": query, "n": limit + 1}
34
  )
35
  if response.status_code == 200:
36
  results = response.json()["results"]
 
152
  return format_results(results, dataset_cards, dataset_infos)
153
 
154
 
155
+ async def search_viewer(query: str, limit: int = 10):
156
+ async with httpx.AsyncClient(timeout=30) as client:
157
+ response = await client.get(
158
+ f"{API_URL}/search-viewer", params={"query": query, "n": limit}
159
+ )
160
+ if response.status_code == 200:
161
+ results = response.json()["results"]
162
+ return format_viewer_results(results)
163
+ return "No results found."
164
+
165
+
166
+ def format_viewer_results(results: List[Dict]) -> str:
167
+ html = "<div style='height: 600px; overflow-y: auto;'>"
168
+ for result in results:
169
+ dataset_id = result["dataset_id"]
170
+ html += f"""
171
+ <div style='margin-bottom: 20px; border: 1px solid #ddd; padding: 10px;'>
172
+ <h3>{dataset_id}</h3>
173
+ <p><strong>Similarity Score:</strong> {result['similarity']:.4f}</p>
174
+ <iframe
175
+ src="https://huggingface.co/datasets/{dataset_id}/embed/viewer/default/train"
176
+ frameborder="0"
177
+ width="100%"
178
+ height="560px"
179
+ ></iframe>
180
+ </div>
181
+ """
182
+ html += "</div>"
183
+ return html
184
+
185
+
186
  with gr.Blocks() as demo:
187
  gr.Markdown("## &#129303; Dataset Search and Similarity")
188
 
 
252
  with gr.TabItem("RAGatouille Search"):
253
  ragatouille_interface = create_ragatouille_interface()
254
 
255
+ with gr.TabItem("Search Viewer"):
256
+ gr.Markdown("## &#128269; Search Viewer")
257
+ with gr.Row():
258
+ gr.Markdown(
259
+ "This tab allows you to search for datasets using the Search Viewer endpoint. "
260
+ "Enter a query to find relevant datasets and preview them."
261
+ )
262
+
263
+ with gr.Row():
264
+ viewer_query = gr.Textbox(
265
+ label="Search Query", placeholder="Enter your search query here"
266
+ )
267
+
268
+ with gr.Row():
269
+ viewer_search_btn = gr.Button("Search")
270
+ viewer_max_results = gr.Slider(
271
+ minimum=1,
272
+ maximum=50,
273
+ step=1,
274
+ value=10,
275
+ label="Maximum number of results",
276
+ )
277
+
278
+ viewer_results = gr.HTML()
279
+
280
+ viewer_search_btn.click(
281
+ lambda query, limit: asyncio.run(search_viewer(query, limit)),
282
+ inputs=[viewer_query, viewer_max_results],
283
+ outputs=viewer_results,
284
+ )
285
+
286
  demo.launch()