debugged-blah / app.py
DrishtiSharma's picture
Update app.py
5633d52 verified
import os
import chromadb
from datetime import datetime
import streamlit as st
from patentwiz import preprocess_data, qa_agent
# Check if the API key is loaded
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
st.error("OPENAI_API_KEY not found! Please set it in the environment variables or Hugging Face Secrets.")
st.stop()
# Clear ChromaDB cache to fix tenant issue
chromadb.api.client.SharedSystemClient.clear_system_cache()
PROMPT = """
Task: Carefully review the provided patent text and extract all relevant technical information, specifically for RF devices, antennas, and related hardware. Focus on the following parameters:
1. **Physical Measurements**:
- Frequency, impedance, voltage, current, power, gain, bandwidth, radiation efficiency, and other measurable RF parameters.
- For each measurement, provide the following details:
- Substance or component being measured.
- Specific value or range of the measurement.
- Unit of measurement (if provided).
- Measurement type or context (e.g., frequency, impedance, gain, etc.).
2. **Patent Metadata**:
- Title of the patent.
- Abstract summarizing the technical focus.
- Metadata, including:
- Patent number.
- Filing date.
- Inventors.
- Assignee (if applicable).
### Output Format:
The response should be formatted as a structured JSON object, as shown below:
{
"Patent_Title": "Title",
"Patent_Abstract": "Abstract",
"Patent_Metadata": {
"Patent_Number": "Number",
"Filing_Date": "Date",
"Inventors": ["Name1", "Name2"],
"Assignee": "Assignee Name"
},
"Content": [
{
"Measurement_substance": "substance",
"Measured_value": "value",
"Measured_unit": "unit",
"measurement_type": "type"
}
// Additional measurements
]
}
"""
# Title and description
st.title("Blah")
st.write(
"Analyze patents to extract physical measurements such as frequency, bandwidth, and more. "
"Provide a date range to download patents and analyze them using GPT models."
)
# User Input Section
st.header("Enter Date Range for Patent Analysis")
start_date_input = st.text_input("Enter the start date (YYYY-MM-DD):", value="2024-06-20")
end_date_input = st.text_input("Enter the end date (YYYY-MM-DD):", value="2024-06-27")
num_patents_to_analyze = st.number_input(
"Number of patents to analyze:", min_value=1, value=3, step=1, help="Specify how many patents you want to analyze."
)
model_choice = st.selectbox(
"Select a model for analysis:", ["gpt-3.5-turbo", "gpt-4"], help="Choose the OpenAI GPT model for the analysis."
)
logging_enabled = st.checkbox("Enable logging?", value=False, help="Toggle logging for debugging purposes.")
# Keyword Management
st.header("Manage Keywords")
st.write("Add or delete keywords for filtering patents.")
default_keywords = [
"RF", "Radio Frequency", "Wireless Communication", "Antenna", "Microwave", "Electromagnetic Waves",
"Beamforming", "5G", "6G", "Patch Antenna", "Dipole Antenna", "Phased Array", "Radiation Pattern", "IoT",
"Wireless Charging"
]
keywords_input = st.text_area(
"Enter keywords for filtering (comma-separated):", value=", ".join(default_keywords)
)
user_keywords = [kw.strip() for kw in keywords_input.split(",") if kw.strip()]
# Field Selection
st.header("Choose Fields for Filtering")
fields = st.multiselect(
"Select fields to search for keywords:",
["Title", "Abstract", "Claims", "Summary", "Detailed Description"],
default=["Title", "Abstract"]
)
# Run Analysis Button
if st.button("Analyze Patents"):
if not start_date_input or not end_date_input:
st.error("Please enter both start and end dates!")
elif not user_keywords:
st.error("Please provide at least one keyword for filtering.")
elif not fields:
st.error("Please select at least one field for filtering.")
else:
try:
# Parse date inputs
start_date = datetime.strptime(start_date_input, "%Y-%m-%d")
end_date = datetime.strptime(end_date_input, "%Y-%m-%d")
# Validate date range
if start_date > end_date:
st.error("End date must be after start date!")
st.stop()
# Step 1: Download and preprocess patents
with st.spinner("Downloading and extracting patents..."):
saved_patent_names = preprocess_data.parse_and_save_patents(
start_date, end_date, logging_enabled
)
if not saved_patent_names:
st.error("No patents found for the given date range.")
st.stop()
st.success(f"{len(saved_patent_names)} patents found and processed!")
# Step 2: Filter patents based on user input
with st.spinner("Filtering patents..."):
filtered_patents = preprocess_data.filter_rf_patents(
saved_patent_names, keywords=user_keywords, fields=fields
)
if not filtered_patents:
st.error("No patents matched the filtering criteria.")
st.stop()
st.success(f"{len(filtered_patents)} relevant patents found and processed!")
# Step 3: Analyze patents using GPT
random_patents = filtered_patents[:num_patents_to_analyze]
total_cost = 0
results = []
st.write("Starting patent analysis...")
for i, patent_file in enumerate(random_patents):
cost, output = qa_agent.call_QA_to_json(
PROMPT,
start_date.year, start_date.month, start_date.day,
saved_patent_names, i, logging_enabled, model_choice
)
total_cost += cost
results.append(output)
# Step 4: Display results
st.write(f"**Total Cost:** ${total_cost:.4f}")
st.write("### Analysis Results:")
for idx, result in enumerate(results):
st.subheader(f"Patent {idx + 1}")
st.json(result)
except ValueError as ve:
st.error(f"Invalid date format: {ve}")
except Exception as e:
st.error(f"An unexpected error occurred: {e}")