Tanuj commited on
Commit
2cf474e
Β·
1 Parent(s): f2c4e2e

Update requirements

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. app.py +4 -3
  3. rag.py +15 -13
  4. requirements.txt +4 -1
.gitignore CHANGED
@@ -2,3 +2,4 @@
2
  .DS_Store
3
  .venv/
4
  __pycache__/
 
 
2
  .DS_Store
3
  .venv/
4
  __pycache__/
5
+ .env
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  from pathlib import Path
3
- from rag import is_allowed_filetype, upload_pdf_to_vectara, retrieve_chunks
4
  import logging
5
 
6
  # ---------------------------
@@ -56,7 +56,8 @@ def natural_language_handler(query: str) -> str:
56
  Returns:
57
  str: Simulated or generated action and result.
58
  """
59
- return f"πŸ’¬ Got your request: β€œ{query}”. This would be processed by the language understanding agent system."
 
60
 
61
 
62
  def placeholder(feature_name: str = "unknown") -> str:
@@ -98,7 +99,7 @@ def handle_file_input(file_path: str | None, uploaded_file: gr.File | None):
98
  file_contents = file.read()
99
 
100
 
101
- upload_result = upload_pdf_to_vectara(file_contents, filepath.name)
102
 
103
  return f"Uploaded document: {upload_result['id']}"
104
 
 
1
  import gradio as gr
2
  from pathlib import Path
3
+ from rag import is_allowed_filetype, upload_file_to_vectara, retrieve_chunks
4
  import logging
5
 
6
  # ---------------------------
 
56
  Returns:
57
  str: Simulated or generated action and result.
58
  """
59
+ chunks, response = retrieve_chunks(query, limit=5)
60
+ return f"πŸ’¬ Got {len(chunks)} chunks for your request: β€œ{query}”. Response: {response}"
61
 
62
 
63
  def placeholder(feature_name: str = "unknown") -> str:
 
99
  file_contents = file.read()
100
 
101
 
102
+ upload_result = upload_file_to_vectara(file_contents, filepath.name)
103
 
104
  return f"Uploaded document: {upload_result['id']}"
105
 
rag.py CHANGED
@@ -1,6 +1,7 @@
1
  import json
2
  import logging
3
  import os
 
4
  import requests
5
  from pprint import pprint
6
  from schemas import UploadResult
@@ -57,13 +58,13 @@ def save_response_to_file(response_json: dict, filename: str):
57
  with open(filename, "w") as f:
58
  json.dump(response_json, f, indent=2)
59
 
60
- def upload_pdf_to_vectara(pdf_bytes: bytes, filename: str) -> UploadResult:
61
  """
62
- Uploads a PDF file to Vectara for processing.
63
 
64
  Args:
65
- pdf_bytes (bytes): The PDF file content in bytes.
66
- filename (str): The name of the PDF file.
67
 
68
  Returns:
69
  None
@@ -74,12 +75,13 @@ def upload_pdf_to_vectara(pdf_bytes: bytes, filename: str) -> UploadResult:
74
  """
75
  CORPUS_KEY = "YouTwo" # Replace with your actual corpus key
76
 
77
- # Check if pdf_bytes is provided
78
- if not pdf_bytes:
79
- raise IndexingError("No PDF bytes provided.")
80
 
 
81
  # Ensure valid filename
82
- if not filename.endswith(".pdf"):
83
  raise IndexingError("Invalid filename. Please provide a filename ending with .pdf")
84
 
85
  # Replace with your actual corpus_key and API key
@@ -92,11 +94,11 @@ def upload_pdf_to_vectara(pdf_bytes: bytes, filename: str) -> UploadResult:
92
  "Accept": "application/json",
93
  "x-api-key": api_key,
94
  }
95
-
96
  files = {
97
- 'file': (filename, pdf_bytes, 'application/pdf')
98
  }
99
 
 
100
  try:
101
  response = requests.post(url, headers=headers, files=files)
102
  response.raise_for_status() # Raise an exception for HTTP errors
@@ -133,7 +135,7 @@ def process_upload_response(response_json: dict) -> UploadResult:
133
  storage_usage=response_json["storage_usage"]
134
  )
135
  # See https://docs.vectara.com/docs/rest-api/query-corpus
136
- def retrieve_chunks(query: str) -> tuple[list[str], str]:
137
  """
138
  Retrieves relevant chunks and a generated summary from the Vectara corpus based on the query.
139
 
@@ -157,7 +159,7 @@ def retrieve_chunks(query: str) -> tuple[list[str], str]:
157
  payload = {
158
  "query": query,
159
  "search": {
160
- "limit": 10, # Number of search results to retrieve
161
  # "reranker": {
162
  # "type": "customer_reranker",
163
  # "reranker_name": "Rerank_Multilingual_v1",
@@ -244,7 +246,7 @@ def test_file_upload():
244
  pdf_path = Path(FILEPATH).expanduser()
245
  with open(pdf_path, "rb") as f:
246
  pdf_bytes = f.read()
247
- upload_pdf_to_vectara(pdf_bytes, pdf_path.name)
248
  except Exception as e:
249
  raise IndexingError(f"Error occurred while uploading PDF: {e}")
250
 
 
1
  import json
2
  import logging
3
  import os
4
+ from pathlib import Path
5
  import requests
6
  from pprint import pprint
7
  from schemas import UploadResult
 
58
  with open(filename, "w") as f:
59
  json.dump(response_json, f, indent=2)
60
 
61
+ def upload_file_to_vectara(file_bytes: bytes, filename: str) -> UploadResult:
62
  """
63
+ Uploads a supported file type to Vectara for processing.
64
 
65
  Args:
66
+ file_bytes (bytes): The file content in bytes.
67
+ filename (str): The name of the file.
68
 
69
  Returns:
70
  None
 
75
  """
76
  CORPUS_KEY = "YouTwo" # Replace with your actual corpus key
77
 
78
+ # Check if file_bytes is provided
79
+ if not file_bytes:
80
+ raise IndexingError("No file bytes provided.")
81
 
82
+ suffix = Path(filename).suffix
83
  # Ensure valid filename
84
+ if not is_allowed_filetype(suffix):
85
  raise IndexingError("Invalid filename. Please provide a filename ending with .pdf")
86
 
87
  # Replace with your actual corpus_key and API key
 
94
  "Accept": "application/json",
95
  "x-api-key": api_key,
96
  }
 
97
  files = {
98
+ 'file': (filename, file_bytes)
99
  }
100
 
101
+
102
  try:
103
  response = requests.post(url, headers=headers, files=files)
104
  response.raise_for_status() # Raise an exception for HTTP errors
 
135
  storage_usage=response_json["storage_usage"]
136
  )
137
  # See https://docs.vectara.com/docs/rest-api/query-corpus
138
+ def retrieve_chunks(query: str, limit: int = 10) -> tuple[list[str], str]:
139
  """
140
  Retrieves relevant chunks and a generated summary from the Vectara corpus based on the query.
141
 
 
159
  payload = {
160
  "query": query,
161
  "search": {
162
+ "limit": limit, # Number of search results to retrieve
163
  # "reranker": {
164
  # "type": "customer_reranker",
165
  # "reranker_name": "Rerank_Multilingual_v1",
 
246
  pdf_path = Path(FILEPATH).expanduser()
247
  with open(pdf_path, "rb") as f:
248
  pdf_bytes = f.read()
249
+ upload_file_to_vectara(pdf_bytes, pdf_path.name)
250
  except Exception as e:
251
  raise IndexingError(f"Error occurred while uploading PDF: {e}")
252
 
requirements.txt CHANGED
@@ -1,2 +1,5 @@
1
  gradio[mcp]==5.33.0
2
- python-dotenv
 
 
 
 
1
  gradio[mcp]==5.33.0
2
+ requests
3
+ python-dotenv
4
+ smolagents
5
+ fastrtc