Spaces:

tejacherukuri
/

ProSpectAI

Running

App Files Files Community

TejaCherukuri commited on Jan 30

Commit

2544e0a

1 Parent(s): 77bf1df

bug fixes and added feature

Browse files

Files changed (4) hide show

app.py +55 -24
src/job_extractor.py +46 -11
src/message_writer.py +10 -5
src/resume_loader.py +2 -2

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import streamlit as st
 from src.resume_loader import ResumeLoaderFactory
 from src.job_extractor import JobExtractor
 from src.message_writer import MessageWriter
 def main():
@@ -18,35 +19,61 @@ def main():
     st.subheader("Upload Your Resume")
     uploaded_file = st.file_uploader("Upload a PDF Resume", type=["pdf"])
-    # Input field for the job URL
-    job_url = st.text_input(
-        "Enter the Job URL",
-        placeholder="https://amazon.jobs/en/jobs/2831138/software-development-engineer-2025-ai-ml"
     )
     # Button to trigger the flow
     if st.button("Generate Message"):
-        if job_url:
-            st.info("Processing your request...")
-            # Trigger the flow (replace with your logic)
-            thought, response = generate_message_for_job(job_url, uploaded_file)
-            # Create two columns for displaying outputs side by side
-            col1, col2 = st.columns(2)
-            # Display Thought Process in the first column
-            with col1:
-                st.subheader("DeepThink")
-                st.text_area(" ", value=thought, height=500)
-            # Display Generated Message in the second column
-            with col2:
-                st.subheader("Generated Message")
-                st.text_area(" ", value=response, height=500)
         else:
             st.error("Please provide a valid job URL.")
-def generate_message_for_job(job_url, uploaded_file):
     # Load the resume using the appropriate method (PDF or text)
     if uploaded_file:
@@ -58,8 +85,12 @@ def generate_message_for_job(job_url, uploaded_file):
     # Extract the key info from job URL
     extractor = JobExtractor()
-    job = extractor.parse_job_from_web(job_url)
-    job = extractor.extract_jobdata(job)
     # Invoke chat model
     writer = MessageWriter()

 import streamlit as st
 from src.resume_loader import ResumeLoaderFactory
 from src.job_extractor import JobExtractor
+# from src.job_extractor_2 import JobExtractor2
 from src.message_writer import MessageWriter
 def main():
     st.subheader("Upload Your Resume")
     uploaded_file = st.file_uploader("Upload a PDF Resume", type=["pdf"])
+    # Job Input Option (Radio Buttons for Job URL or Description)
+    input_option = st.radio(
+        "How would you like to provide the job information?",
+        ("Job URL", "Job Description")
     )
+    job_url = None
+    job_description = None
+    # Show corresponding input field based on the selection
+    if input_option == "Job URL":
+        job_url = st.text_input(
+            "Enter the Job URL",
+            placeholder="https://amazon.jobs/en/jobs/2831138/software-development-engineer-2025-ai-ml"
+        )
+        # Display the alert with color
+        with st.expander("🔔 **Alert!** Job URL Instructions", expanded=True):
+            st.markdown(
+                """
+                <p style="color:red;">If using a LinkedIn job URL (Easy Apply), paste the job description instead.</p>
+                """, unsafe_allow_html=True)
+    elif input_option == "Job Description":
+        job_description = st.text_area("Enter the Job Description", height=200)
     # Button to trigger the flow
     if st.button("Generate Message"):
+        if job_url or job_description:
+            try:
+                st.info("Processing your request...")
+                # Trigger the flow (replace with your logic)
+                thought, response = generate_message_for_job(job_url, uploaded_file, job_description)
+                # Create two columns for displaying outputs side by side
+                col1, col2 = st.columns(2)
+                # Display Thought Process in the first column
+                with col1:
+                    st.subheader("DeepThink")
+                    st.text_area(" ", value=thought, height=500)
+                # Display Generated Message in the second column
+                with col2:
+                    st.subheader("Generated Message")
+                    st.text_area(" ", value=response, height=500)
+            except ValueError as e:
+                st.error(f"Error: {e}")
+            except Exception as e:
+                st.error(f"Unexpected Error: {e}")
         else:
             st.error("Please provide a valid job URL.")
+def generate_message_for_job(job_url, uploaded_file, job_description=None):
     # Load the resume using the appropriate method (PDF or text)
     if uploaded_file:
     # Extract the key info from job URL
     extractor = JobExtractor()
+    if job_url:
+        job_description = extractor.parse_job_from_web(job_url)
+    job = extractor.extract_jobdata(job_description)
+    if not job or not job.get('job_postings'):
+        raise ValueError(f"Cannot fetch job details from this url: {job_url}, Use the 'Job Description' field for better assistance!")
     # Invoke chat model
     writer = MessageWriter()

src/job_extractor.py CHANGED Viewed

@@ -4,6 +4,9 @@ from langchain_core.prompts import PromptTemplate
 from langchain_core.output_parsers import JsonOutputParser
 from langchain_core.exceptions import OutputParserException
 from src.utils import clean_text
 class JobExtractor:
     """
@@ -46,6 +49,8 @@ class JobExtractor:
             `role`, `experience`, `skills`, `responsibilities`, `basic qualifications`,
             `preferred qualifications`, and `description`.
             Only return the valid JSON.
             ### VALID JSON (NO PREAMBLE):
             """
         )
@@ -71,15 +76,30 @@ class JobExtractor:
         ValueError: If the content could not be loaded or cleaned properly.
         """
         try:
-            loader = WebBaseLoader(url)
             page_data = loader.load().pop().page_content
             if not page_data:
-                raise ValueError("The scraped page content is empty.")
             cleaned_data = clean_text(page_data)
-            print(f"Scraped and cleaned data: {cleaned_data[:200]}...")  # Displaying a snippet of data for debugging
             return cleaned_data
         except Exception as e:
-            raise ValueError(f"Error scraping or cleaning the content from the URL {url}: {e}")
     def extract_jobdata(self, text):
         """
@@ -104,14 +124,29 @@ class JobExtractor:
             extract_chain = self.extract_prompt | self.chat_model.groq
             res = extract_chain.invoke(input={"page_data": text})
-            # Try parsing the response content into JSON format
-            job_data = self.json_parser.parse(res.content)
-            print("=====================JSON Job Data==================")
-            print(job_data)
-            return job_data
         except OutputParserException as e:
-            raise OutputParserException("Unable to parse job data as valid JSON. The response might be malformed or incomplete.") from e
         except Exception as e:
             raise ValueError(f"An error occurred during job extraction: {e}") from e

 from langchain_core.output_parsers import JsonOutputParser
 from langchain_core.exceptions import OutputParserException
 from src.utils import clean_text
+import json
+import requests
 class JobExtractor:
     """
             `role`, `experience`, `skills`, `responsibilities`, `basic qualifications`,
             `preferred qualifications`, and `description`.
             Only return the valid JSON.
+            If you do not find any data to form a JSON, return
+            ```json{{'job_postings': []}}```
             ### VALID JSON (NO PREAMBLE):
             """
         )
         ValueError: If the content could not be loaded or cleaned properly.
         """
         try:
+            headers = {
+                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
+            }
+            loader = WebBaseLoader(url, headers)
             page_data = loader.load().pop().page_content
+            # Check for blocking or unsupported browser messages
+            if "unsupported browser" in page_data.lower():
+                raise ValueError(f"Unsupported browser message detected.")
+                # return None
             if not page_data:
+                raise ValueError(f"Failed to fetch content from the URL {url}.")
+            print(f"===Page Data===\n {page_data}")
             cleaned_data = clean_text(page_data)
+            print(f"=== Scraped and cleaned data ===\n {cleaned_data}...")  # Displaying a snippet of data for debugging
             return cleaned_data
         except Exception as e:
+            print(f"WebBaseLoader Error: {e}")
+            # raise ValueError(f"Failed to fetch content from the URL {url}.")
+            return None
     def extract_jobdata(self, text):
         """
             extract_chain = self.extract_prompt | self.chat_model.groq
             res = extract_chain.invoke(input={"page_data": text})
+            print(f"=== Result Content ===\n {res.content}")
+            if not res.content.strip():  # Check if response is empty
+                raise ValueError("No valid job data extracted.")
+            try:
+                job_data = self.json_parser.parse(res.content)
+                print(f"=== JSON Job Data ===\n {job_data}")
+                return job_data
+            except json.decoder.JSONDecodeError:
+                print("Invalid JSON received. Returning empty job data.")
+                return {"job_postings": []}  # Fail gracefully
+        except requests.exceptions.HTTPError as http_err:
+            if http_err.response.status_code == 413:
+                raise ValueError("The input is too large. Please reduce the size and try again.")
+            elif http_err.response.status_code == 429:
+                raise ValueError("Too many requests. Please try again later.")
+            else:
+                raise ValueError(f"HTTP error occurred: {http_err}") from http_err
         except OutputParserException as e:
+            raise OutputParserException("Unable to parse job data as valid JSON.") from e
         except Exception as e:
             raise ValueError(f"An error occurred during job extraction: {e}") from e

src/message_writer.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from src.chat_model import ChatModel
 from langchain_core.prompts import PromptTemplate
 import re
 class MessageWriter:
     """
@@ -106,18 +107,22 @@ class MessageWriter:
                 extracted_text = extracted_text.strip()  # Strip leading/trailing whitespace and newlines
                 # Print the well-formatted text
-                print("======Thought Process======")
-                print(extracted_text)
                 think_content = extracted_text
             else:
                 print("No content found between <think> and </think> tags.")
-            print("======Cleaned Response======")
-            print(cleaned_response)
             # Return the extracted thought process and the cleaned email content
             return think_content, cleaned_response.strip()
         except Exception as e:
             # Raise a ValueError with additional context if there was an error in processing
             raise ValueError(f"An error occurred while generating the email: {e}") from e

 from src.chat_model import ChatModel
 from langchain_core.prompts import PromptTemplate
 import re
+import requests
 class MessageWriter:
     """
                 extracted_text = extracted_text.strip()  # Strip leading/trailing whitespace and newlines
                 # Print the well-formatted text
+                print(f"=== Thought Process ===\n {extracted_text}")
                 think_content = extracted_text
             else:
                 print("No content found between <think> and </think> tags.")
+            print(f"=== Cleaned Response ===\n {cleaned_response}")
             # Return the extracted thought process and the cleaned email content
             return think_content, cleaned_response.strip()
+        except requests.exceptions.HTTPError as http_err:
+            if http_err.response.status_code == 413:
+                raise ValueError("The input is too large. Please reduce the size and try again.")
+            elif http_err.response.status_code == 429:
+                raise ValueError("Too many requests. Please try again later.")
+            else:
+                raise ValueError(f"HTTP error occurred: {http_err}") from http_err
         except Exception as e:
             # Raise a ValueError with additional context if there was an error in processing
             raise ValueError(f"An error occurred while generating the email: {e}") from e

src/resume_loader.py CHANGED Viewed

@@ -67,7 +67,7 @@ class TextResumeLoader(ResumeLoader):
         text_loader = TextLoader(self.file_path)
         resume = text_loader.load()  # Directly load the full text without chunking
-        print(resume[0].page_content)
         return resume[0]
@@ -118,7 +118,7 @@ class PdfResumeLoader(ResumeLoader):
             pdf_loader = PyPDFLoader(temp_file_path)
             resume = pdf_loader.load()  # Extract text from PDF
-            print(resume[0].page_content)  # Debug: Print first page content
             return resume[0]
         except Exception as e:

         text_loader = TextLoader(self.file_path)
         resume = text_loader.load()  # Directly load the full text without chunking
+        print(f"=== Resume Content ===\n {resume[0].page_content}")
         return resume[0]
             pdf_loader = PyPDFLoader(temp_file_path)
             resume = pdf_loader.load()  # Extract text from PDF
+            print(f"=== Resume Content ===\n {resume[0].page_content}")
             return resume[0]
         except Exception as e: