Spaces:
Sleeping
Sleeping
Tanuj
commited on
Commit
Β·
2cf474e
1
Parent(s):
f2c4e2e
Update requirements
Browse files- .gitignore +1 -0
- app.py +4 -3
- rag.py +15 -13
- requirements.txt +4 -1
.gitignore
CHANGED
@@ -2,3 +2,4 @@
|
|
2 |
.DS_Store
|
3 |
.venv/
|
4 |
__pycache__/
|
|
|
|
2 |
.DS_Store
|
3 |
.venv/
|
4 |
__pycache__/
|
5 |
+
.env
|
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
from pathlib import Path
|
3 |
-
from rag import is_allowed_filetype,
|
4 |
import logging
|
5 |
|
6 |
# ---------------------------
|
@@ -56,7 +56,8 @@ def natural_language_handler(query: str) -> str:
|
|
56 |
Returns:
|
57 |
str: Simulated or generated action and result.
|
58 |
"""
|
59 |
-
|
|
|
60 |
|
61 |
|
62 |
def placeholder(feature_name: str = "unknown") -> str:
|
@@ -98,7 +99,7 @@ def handle_file_input(file_path: str | None, uploaded_file: gr.File | None):
|
|
98 |
file_contents = file.read()
|
99 |
|
100 |
|
101 |
-
upload_result =
|
102 |
|
103 |
return f"Uploaded document: {upload_result['id']}"
|
104 |
|
|
|
1 |
import gradio as gr
|
2 |
from pathlib import Path
|
3 |
+
from rag import is_allowed_filetype, upload_file_to_vectara, retrieve_chunks
|
4 |
import logging
|
5 |
|
6 |
# ---------------------------
|
|
|
56 |
Returns:
|
57 |
str: Simulated or generated action and result.
|
58 |
"""
|
59 |
+
chunks, response = retrieve_chunks(query, limit=5)
|
60 |
+
return f"π¬ Got {len(chunks)} chunks for your request: β{query}β. Response: {response}"
|
61 |
|
62 |
|
63 |
def placeholder(feature_name: str = "unknown") -> str:
|
|
|
99 |
file_contents = file.read()
|
100 |
|
101 |
|
102 |
+
upload_result = upload_file_to_vectara(file_contents, filepath.name)
|
103 |
|
104 |
return f"Uploaded document: {upload_result['id']}"
|
105 |
|
rag.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import json
|
2 |
import logging
|
3 |
import os
|
|
|
4 |
import requests
|
5 |
from pprint import pprint
|
6 |
from schemas import UploadResult
|
@@ -57,13 +58,13 @@ def save_response_to_file(response_json: dict, filename: str):
|
|
57 |
with open(filename, "w") as f:
|
58 |
json.dump(response_json, f, indent=2)
|
59 |
|
60 |
-
def
|
61 |
"""
|
62 |
-
Uploads a
|
63 |
|
64 |
Args:
|
65 |
-
|
66 |
-
filename (str): The name of the
|
67 |
|
68 |
Returns:
|
69 |
None
|
@@ -74,12 +75,13 @@ def upload_pdf_to_vectara(pdf_bytes: bytes, filename: str) -> UploadResult:
|
|
74 |
"""
|
75 |
CORPUS_KEY = "YouTwo" # Replace with your actual corpus key
|
76 |
|
77 |
-
# Check if
|
78 |
-
if not
|
79 |
-
raise IndexingError("No
|
80 |
|
|
|
81 |
# Ensure valid filename
|
82 |
-
if not
|
83 |
raise IndexingError("Invalid filename. Please provide a filename ending with .pdf")
|
84 |
|
85 |
# Replace with your actual corpus_key and API key
|
@@ -92,11 +94,11 @@ def upload_pdf_to_vectara(pdf_bytes: bytes, filename: str) -> UploadResult:
|
|
92 |
"Accept": "application/json",
|
93 |
"x-api-key": api_key,
|
94 |
}
|
95 |
-
|
96 |
files = {
|
97 |
-
'file': (filename,
|
98 |
}
|
99 |
|
|
|
100 |
try:
|
101 |
response = requests.post(url, headers=headers, files=files)
|
102 |
response.raise_for_status() # Raise an exception for HTTP errors
|
@@ -133,7 +135,7 @@ def process_upload_response(response_json: dict) -> UploadResult:
|
|
133 |
storage_usage=response_json["storage_usage"]
|
134 |
)
|
135 |
# See https://docs.vectara.com/docs/rest-api/query-corpus
|
136 |
-
def retrieve_chunks(query: str) -> tuple[list[str], str]:
|
137 |
"""
|
138 |
Retrieves relevant chunks and a generated summary from the Vectara corpus based on the query.
|
139 |
|
@@ -157,7 +159,7 @@ def retrieve_chunks(query: str) -> tuple[list[str], str]:
|
|
157 |
payload = {
|
158 |
"query": query,
|
159 |
"search": {
|
160 |
-
"limit":
|
161 |
# "reranker": {
|
162 |
# "type": "customer_reranker",
|
163 |
# "reranker_name": "Rerank_Multilingual_v1",
|
@@ -244,7 +246,7 @@ def test_file_upload():
|
|
244 |
pdf_path = Path(FILEPATH).expanduser()
|
245 |
with open(pdf_path, "rb") as f:
|
246 |
pdf_bytes = f.read()
|
247 |
-
|
248 |
except Exception as e:
|
249 |
raise IndexingError(f"Error occurred while uploading PDF: {e}")
|
250 |
|
|
|
1 |
import json
|
2 |
import logging
|
3 |
import os
|
4 |
+
from pathlib import Path
|
5 |
import requests
|
6 |
from pprint import pprint
|
7 |
from schemas import UploadResult
|
|
|
58 |
with open(filename, "w") as f:
|
59 |
json.dump(response_json, f, indent=2)
|
60 |
|
61 |
+
def upload_file_to_vectara(file_bytes: bytes, filename: str) -> UploadResult:
|
62 |
"""
|
63 |
+
Uploads a supported file type to Vectara for processing.
|
64 |
|
65 |
Args:
|
66 |
+
file_bytes (bytes): The file content in bytes.
|
67 |
+
filename (str): The name of the file.
|
68 |
|
69 |
Returns:
|
70 |
None
|
|
|
75 |
"""
|
76 |
CORPUS_KEY = "YouTwo" # Replace with your actual corpus key
|
77 |
|
78 |
+
# Check if file_bytes is provided
|
79 |
+
if not file_bytes:
|
80 |
+
raise IndexingError("No file bytes provided.")
|
81 |
|
82 |
+
suffix = Path(filename).suffix
|
83 |
# Ensure valid filename
|
84 |
+
if not is_allowed_filetype(suffix):
|
85 |
raise IndexingError("Invalid filename. Please provide a filename ending with .pdf")
|
86 |
|
87 |
# Replace with your actual corpus_key and API key
|
|
|
94 |
"Accept": "application/json",
|
95 |
"x-api-key": api_key,
|
96 |
}
|
|
|
97 |
files = {
|
98 |
+
'file': (filename, file_bytes)
|
99 |
}
|
100 |
|
101 |
+
|
102 |
try:
|
103 |
response = requests.post(url, headers=headers, files=files)
|
104 |
response.raise_for_status() # Raise an exception for HTTP errors
|
|
|
135 |
storage_usage=response_json["storage_usage"]
|
136 |
)
|
137 |
# See https://docs.vectara.com/docs/rest-api/query-corpus
|
138 |
+
def retrieve_chunks(query: str, limit: int = 10) -> tuple[list[str], str]:
|
139 |
"""
|
140 |
Retrieves relevant chunks and a generated summary from the Vectara corpus based on the query.
|
141 |
|
|
|
159 |
payload = {
|
160 |
"query": query,
|
161 |
"search": {
|
162 |
+
"limit": limit, # Number of search results to retrieve
|
163 |
# "reranker": {
|
164 |
# "type": "customer_reranker",
|
165 |
# "reranker_name": "Rerank_Multilingual_v1",
|
|
|
246 |
pdf_path = Path(FILEPATH).expanduser()
|
247 |
with open(pdf_path, "rb") as f:
|
248 |
pdf_bytes = f.read()
|
249 |
+
upload_file_to_vectara(pdf_bytes, pdf_path.name)
|
250 |
except Exception as e:
|
251 |
raise IndexingError(f"Error occurred while uploading PDF: {e}")
|
252 |
|
requirements.txt
CHANGED
@@ -1,2 +1,5 @@
|
|
1 |
gradio[mcp]==5.33.0
|
2 |
-
|
|
|
|
|
|
|
|
1 |
gradio[mcp]==5.33.0
|
2 |
+
requests
|
3 |
+
python-dotenv
|
4 |
+
smolagents
|
5 |
+
fastrtc
|