Spaces:

Agents-MCP-Hackathon
/

YouTwo

Sleeping

App Files Files Community

Tanuj commited on Jun 10

Commit

2cf474e

1 Parent(s): f2c4e2e

Update requirements

Browse files

Files changed (4) hide show

.gitignore +1 -0
app.py +4 -3
rag.py +15 -13
requirements.txt +4 -1

.gitignore CHANGED Viewed

@@ -2,3 +2,4 @@
 .DS_Store
 .venv/
 __pycache__/

 .DS_Store
 .venv/
 __pycache__/
+.env

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 from pathlib import Path
-from rag import is_allowed_filetype, upload_pdf_to_vectara, retrieve_chunks
 import logging
 # ---------------------------
@@ -56,7 +56,8 @@ def natural_language_handler(query: str) -> str:
     Returns:
         str: Simulated or generated action and result.
     """
-    return f"💬 Got your request: “{query}”. This would be processed by the language understanding agent system."
 def placeholder(feature_name: str = "unknown") -> str:
@@ -98,7 +99,7 @@ def handle_file_input(file_path: str | None, uploaded_file: gr.File | None):
         file_contents = file.read()
-    upload_result = upload_pdf_to_vectara(file_contents, filepath.name)
     return f"Uploaded document: {upload_result['id']}"

 import gradio as gr
 from pathlib import Path
+from rag import is_allowed_filetype, upload_file_to_vectara, retrieve_chunks
 import logging
 # ---------------------------
     Returns:
         str: Simulated or generated action and result.
     """
+    chunks, response = retrieve_chunks(query, limit=5)
+    return f"💬 Got {len(chunks)} chunks for your request: “{query}”. Response: {response}"
 def placeholder(feature_name: str = "unknown") -> str:
         file_contents = file.read()
+    upload_result = upload_file_to_vectara(file_contents, filepath.name)
     return f"Uploaded document: {upload_result['id']}"

rag.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 import logging
 import os
 import requests
 from pprint import pprint
 from schemas import UploadResult
@@ -57,13 +58,13 @@ def save_response_to_file(response_json: dict, filename: str):
     with open(filename, "w") as f:
         json.dump(response_json, f, indent=2)
-def upload_pdf_to_vectara(pdf_bytes: bytes, filename: str)  -> UploadResult:
     """
-    Uploads a PDF file to Vectara for processing.
     Args:
-        pdf_bytes (bytes): The PDF file content in bytes.
-        filename (str): The name of the PDF file.
     Returns:
         None
@@ -74,12 +75,13 @@ def upload_pdf_to_vectara(pdf_bytes: bytes, filename: str)  -> UploadResult:
     """
     CORPUS_KEY = "YouTwo"  # Replace with your actual corpus key
-    # Check if pdf_bytes is provided
-    if not pdf_bytes:
-        raise IndexingError("No PDF bytes provided.")
     # Ensure valid filename
-    if not filename.endswith(".pdf"):
         raise IndexingError("Invalid filename. Please provide a filename ending with .pdf")
     # Replace with your actual corpus_key and API key
@@ -92,11 +94,11 @@ def upload_pdf_to_vectara(pdf_bytes: bytes, filename: str)  -> UploadResult:
         "Accept": "application/json",
         "x-api-key": api_key,
     }
     files = {
-        'file': (filename, pdf_bytes, 'application/pdf')
     }
     try:
         response = requests.post(url, headers=headers, files=files)
         response.raise_for_status()  # Raise an exception for HTTP errors
@@ -133,7 +135,7 @@ def process_upload_response(response_json: dict) -> UploadResult:
         storage_usage=response_json["storage_usage"]
     )
 # See https://docs.vectara.com/docs/rest-api/query-corpus
-def retrieve_chunks(query: str) -> tuple[list[str], str]:
     """
     Retrieves relevant chunks and a generated summary from the Vectara corpus based on the query.
@@ -157,7 +159,7 @@ def retrieve_chunks(query: str) -> tuple[list[str], str]:
     payload = {
         "query": query,
         "search": {
-            "limit": 10,  # Number of search results to retrieve
             # "reranker": {
             #     "type": "customer_reranker",
             #     "reranker_name": "Rerank_Multilingual_v1",
@@ -244,7 +246,7 @@ def test_file_upload():
         pdf_path = Path(FILEPATH).expanduser()
         with open(pdf_path, "rb") as f:
             pdf_bytes = f.read()
-        upload_pdf_to_vectara(pdf_bytes, pdf_path.name)
     except Exception as e:
         raise IndexingError(f"Error occurred while uploading PDF: {e}")

 import json
 import logging
 import os
+from pathlib import Path
 import requests
 from pprint import pprint
 from schemas import UploadResult
     with open(filename, "w") as f:
         json.dump(response_json, f, indent=2)
+def upload_file_to_vectara(file_bytes: bytes, filename: str)  -> UploadResult:
     """
+    Uploads a supported file type to Vectara for processing.
     Args:
+        file_bytes (bytes): The file content in bytes.
+        filename (str): The name of the file.
     Returns:
         None
     """
     CORPUS_KEY = "YouTwo"  # Replace with your actual corpus key
+    # Check if file_bytes is provided
+    if not file_bytes:
+        raise IndexingError("No file bytes provided.")
+    suffix = Path(filename).suffix
     # Ensure valid filename
+    if not is_allowed_filetype(suffix):
         raise IndexingError("Invalid filename. Please provide a filename ending with .pdf")
     # Replace with your actual corpus_key and API key
         "Accept": "application/json",
         "x-api-key": api_key,
     }
     files = {
+        'file': (filename, file_bytes)
     }
     try:
         response = requests.post(url, headers=headers, files=files)
         response.raise_for_status()  # Raise an exception for HTTP errors
         storage_usage=response_json["storage_usage"]
     )
 # See https://docs.vectara.com/docs/rest-api/query-corpus
+def retrieve_chunks(query: str, limit: int = 10) -> tuple[list[str], str]:
     """
     Retrieves relevant chunks and a generated summary from the Vectara corpus based on the query.
     payload = {
         "query": query,
         "search": {
+            "limit": limit,  # Number of search results to retrieve
             # "reranker": {
             #     "type": "customer_reranker",
             #     "reranker_name": "Rerank_Multilingual_v1",
         pdf_path = Path(FILEPATH).expanduser()
         with open(pdf_path, "rb") as f:
             pdf_bytes = f.read()
+        upload_file_to_vectara(pdf_bytes, pdf_path.name)
     except Exception as e:
         raise IndexingError(f"Error occurred while uploading PDF: {e}")

requirements.txt CHANGED Viewed

@@ -1,2 +1,5 @@
 gradio[mcp]==5.33.0
-python-dotenv

 gradio[mcp]==5.33.0
+requests
+python-dotenv
+smolagents
+fastrtc