TejaCherukuri commited on
Commit
2544e0a
·
1 Parent(s): 77bf1df

bug fixes and added feature

Browse files
Files changed (4) hide show
  1. app.py +55 -24
  2. src/job_extractor.py +46 -11
  3. src/message_writer.py +10 -5
  4. src/resume_loader.py +2 -2
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
  from src.resume_loader import ResumeLoaderFactory
3
  from src.job_extractor import JobExtractor
 
4
  from src.message_writer import MessageWriter
5
 
6
  def main():
@@ -18,35 +19,61 @@ def main():
18
  st.subheader("Upload Your Resume")
19
  uploaded_file = st.file_uploader("Upload a PDF Resume", type=["pdf"])
20
 
21
- # Input field for the job URL
22
- job_url = st.text_input(
23
- "Enter the Job URL",
24
- placeholder="https://amazon.jobs/en/jobs/2831138/software-development-engineer-2025-ai-ml"
25
  )
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  # Button to trigger the flow
28
  if st.button("Generate Message"):
29
- if job_url:
30
- st.info("Processing your request...")
31
- # Trigger the flow (replace with your logic)
32
- thought, response = generate_message_for_job(job_url, uploaded_file)
33
-
34
- # Create two columns for displaying outputs side by side
35
- col1, col2 = st.columns(2)
36
-
37
- # Display Thought Process in the first column
38
- with col1:
39
- st.subheader("DeepThink")
40
- st.text_area(" ", value=thought, height=500)
41
-
42
- # Display Generated Message in the second column
43
- with col2:
44
- st.subheader("Generated Message")
45
- st.text_area(" ", value=response, height=500)
 
 
 
 
 
46
  else:
47
  st.error("Please provide a valid job URL.")
48
 
49
- def generate_message_for_job(job_url, uploaded_file):
50
 
51
  # Load the resume using the appropriate method (PDF or text)
52
  if uploaded_file:
@@ -58,8 +85,12 @@ def generate_message_for_job(job_url, uploaded_file):
58
 
59
  # Extract the key info from job URL
60
  extractor = JobExtractor()
61
- job = extractor.parse_job_from_web(job_url)
62
- job = extractor.extract_jobdata(job)
 
 
 
 
63
 
64
  # Invoke chat model
65
  writer = MessageWriter()
 
1
  import streamlit as st
2
  from src.resume_loader import ResumeLoaderFactory
3
  from src.job_extractor import JobExtractor
4
+ # from src.job_extractor_2 import JobExtractor2
5
  from src.message_writer import MessageWriter
6
 
7
  def main():
 
19
  st.subheader("Upload Your Resume")
20
  uploaded_file = st.file_uploader("Upload a PDF Resume", type=["pdf"])
21
 
22
+ # Job Input Option (Radio Buttons for Job URL or Description)
23
+ input_option = st.radio(
24
+ "How would you like to provide the job information?",
25
+ ("Job URL", "Job Description")
26
  )
27
 
28
+ job_url = None
29
+ job_description = None
30
+
31
+ # Show corresponding input field based on the selection
32
+ if input_option == "Job URL":
33
+ job_url = st.text_input(
34
+ "Enter the Job URL",
35
+ placeholder="https://amazon.jobs/en/jobs/2831138/software-development-engineer-2025-ai-ml"
36
+ )
37
+
38
+ # Display the alert with color
39
+ with st.expander("🔔 **Alert!** Job URL Instructions", expanded=True):
40
+ st.markdown(
41
+ """
42
+ <p style="color:red;">If using a LinkedIn job URL (Easy Apply), paste the job description instead.</p>
43
+ """, unsafe_allow_html=True)
44
+
45
+ elif input_option == "Job Description":
46
+ job_description = st.text_area("Enter the Job Description", height=200)
47
+
48
+
49
  # Button to trigger the flow
50
  if st.button("Generate Message"):
51
+ if job_url or job_description:
52
+ try:
53
+ st.info("Processing your request...")
54
+ # Trigger the flow (replace with your logic)
55
+ thought, response = generate_message_for_job(job_url, uploaded_file, job_description)
56
+
57
+ # Create two columns for displaying outputs side by side
58
+ col1, col2 = st.columns(2)
59
+
60
+ # Display Thought Process in the first column
61
+ with col1:
62
+ st.subheader("DeepThink")
63
+ st.text_area(" ", value=thought, height=500)
64
+
65
+ # Display Generated Message in the second column
66
+ with col2:
67
+ st.subheader("Generated Message")
68
+ st.text_area(" ", value=response, height=500)
69
+ except ValueError as e:
70
+ st.error(f"Error: {e}")
71
+ except Exception as e:
72
+ st.error(f"Unexpected Error: {e}")
73
  else:
74
  st.error("Please provide a valid job URL.")
75
 
76
+ def generate_message_for_job(job_url, uploaded_file, job_description=None):
77
 
78
  # Load the resume using the appropriate method (PDF or text)
79
  if uploaded_file:
 
85
 
86
  # Extract the key info from job URL
87
  extractor = JobExtractor()
88
+ if job_url:
89
+ job_description = extractor.parse_job_from_web(job_url)
90
+
91
+ job = extractor.extract_jobdata(job_description)
92
+ if not job or not job.get('job_postings'):
93
+ raise ValueError(f"Cannot fetch job details from this url: {job_url}, Use the 'Job Description' field for better assistance!")
94
 
95
  # Invoke chat model
96
  writer = MessageWriter()
src/job_extractor.py CHANGED
@@ -4,6 +4,9 @@ from langchain_core.prompts import PromptTemplate
4
  from langchain_core.output_parsers import JsonOutputParser
5
  from langchain_core.exceptions import OutputParserException
6
  from src.utils import clean_text
 
 
 
7
 
8
  class JobExtractor:
9
  """
@@ -46,6 +49,8 @@ class JobExtractor:
46
  `role`, `experience`, `skills`, `responsibilities`, `basic qualifications`,
47
  `preferred qualifications`, and `description`.
48
  Only return the valid JSON.
 
 
49
  ### VALID JSON (NO PREAMBLE):
50
  """
51
  )
@@ -71,15 +76,30 @@ class JobExtractor:
71
  ValueError: If the content could not be loaded or cleaned properly.
72
  """
73
  try:
74
- loader = WebBaseLoader(url)
 
 
 
75
  page_data = loader.load().pop().page_content
 
 
 
 
 
 
76
  if not page_data:
77
- raise ValueError("The scraped page content is empty.")
 
 
 
78
  cleaned_data = clean_text(page_data)
79
- print(f"Scraped and cleaned data: {cleaned_data[:200]}...") # Displaying a snippet of data for debugging
80
  return cleaned_data
81
  except Exception as e:
82
- raise ValueError(f"Error scraping or cleaning the content from the URL {url}: {e}")
 
 
 
83
 
84
  def extract_jobdata(self, text):
85
  """
@@ -104,14 +124,29 @@ class JobExtractor:
104
  extract_chain = self.extract_prompt | self.chat_model.groq
105
  res = extract_chain.invoke(input={"page_data": text})
106
 
107
- # Try parsing the response content into JSON format
108
- job_data = self.json_parser.parse(res.content)
109
- print("=====================JSON Job Data==================")
110
- print(job_data)
111
- return job_data
112
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  except OutputParserException as e:
114
- raise OutputParserException("Unable to parse job data as valid JSON. The response might be malformed or incomplete.") from e
115
  except Exception as e:
116
  raise ValueError(f"An error occurred during job extraction: {e}") from e
117
 
 
 
4
  from langchain_core.output_parsers import JsonOutputParser
5
  from langchain_core.exceptions import OutputParserException
6
  from src.utils import clean_text
7
+ import json
8
+ import requests
9
+
10
 
11
  class JobExtractor:
12
  """
 
49
  `role`, `experience`, `skills`, `responsibilities`, `basic qualifications`,
50
  `preferred qualifications`, and `description`.
51
  Only return the valid JSON.
52
+ If you do not find any data to form a JSON, return
53
+ ```json{{'job_postings': []}}```
54
  ### VALID JSON (NO PREAMBLE):
55
  """
56
  )
 
76
  ValueError: If the content could not be loaded or cleaned properly.
77
  """
78
  try:
79
+ headers = {
80
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
81
+ }
82
+ loader = WebBaseLoader(url, headers)
83
  page_data = loader.load().pop().page_content
84
+
85
+ # Check for blocking or unsupported browser messages
86
+ if "unsupported browser" in page_data.lower():
87
+ raise ValueError(f"Unsupported browser message detected.")
88
+ # return None
89
+
90
  if not page_data:
91
+ raise ValueError(f"Failed to fetch content from the URL {url}.")
92
+
93
+ print(f"===Page Data===\n {page_data}")
94
+
95
  cleaned_data = clean_text(page_data)
96
+ print(f"=== Scraped and cleaned data ===\n {cleaned_data}...") # Displaying a snippet of data for debugging
97
  return cleaned_data
98
  except Exception as e:
99
+ print(f"WebBaseLoader Error: {e}")
100
+ # raise ValueError(f"Failed to fetch content from the URL {url}.")
101
+ return None
102
+
103
 
104
  def extract_jobdata(self, text):
105
  """
 
124
  extract_chain = self.extract_prompt | self.chat_model.groq
125
  res = extract_chain.invoke(input={"page_data": text})
126
 
127
+ print(f"=== Result Content ===\n {res.content}")
128
+
129
+ if not res.content.strip(): # Check if response is empty
130
+ raise ValueError("No valid job data extracted.")
131
+
132
+ try:
133
+ job_data = self.json_parser.parse(res.content)
134
+ print(f"=== JSON Job Data ===\n {job_data}")
135
+ return job_data
136
+ except json.decoder.JSONDecodeError:
137
+ print("Invalid JSON received. Returning empty job data.")
138
+ return {"job_postings": []} # Fail gracefully
139
+
140
+ except requests.exceptions.HTTPError as http_err:
141
+ if http_err.response.status_code == 413:
142
+ raise ValueError("The input is too large. Please reduce the size and try again.")
143
+ elif http_err.response.status_code == 429:
144
+ raise ValueError("Too many requests. Please try again later.")
145
+ else:
146
+ raise ValueError(f"HTTP error occurred: {http_err}") from http_err
147
  except OutputParserException as e:
148
+ raise OutputParserException("Unable to parse job data as valid JSON.") from e
149
  except Exception as e:
150
  raise ValueError(f"An error occurred during job extraction: {e}") from e
151
 
152
+
src/message_writer.py CHANGED
@@ -1,6 +1,7 @@
1
  from src.chat_model import ChatModel
2
  from langchain_core.prompts import PromptTemplate
3
  import re
 
4
 
5
  class MessageWriter:
6
  """
@@ -106,18 +107,22 @@ class MessageWriter:
106
  extracted_text = extracted_text.strip() # Strip leading/trailing whitespace and newlines
107
 
108
  # Print the well-formatted text
109
- print("======Thought Process======")
110
- print(extracted_text)
111
  think_content = extracted_text
112
  else:
113
  print("No content found between <think> and </think> tags.")
114
 
115
- print("======Cleaned Response======")
116
- print(cleaned_response)
117
 
118
  # Return the extracted thought process and the cleaned email content
119
  return think_content, cleaned_response.strip()
120
-
 
 
 
 
 
 
121
  except Exception as e:
122
  # Raise a ValueError with additional context if there was an error in processing
123
  raise ValueError(f"An error occurred while generating the email: {e}") from e
 
1
  from src.chat_model import ChatModel
2
  from langchain_core.prompts import PromptTemplate
3
  import re
4
+ import requests
5
 
6
  class MessageWriter:
7
  """
 
107
  extracted_text = extracted_text.strip() # Strip leading/trailing whitespace and newlines
108
 
109
  # Print the well-formatted text
110
+ print(f"=== Thought Process ===\n {extracted_text}")
 
111
  think_content = extracted_text
112
  else:
113
  print("No content found between <think> and </think> tags.")
114
 
115
+ print(f"=== Cleaned Response ===\n {cleaned_response}")
 
116
 
117
  # Return the extracted thought process and the cleaned email content
118
  return think_content, cleaned_response.strip()
119
+ except requests.exceptions.HTTPError as http_err:
120
+ if http_err.response.status_code == 413:
121
+ raise ValueError("The input is too large. Please reduce the size and try again.")
122
+ elif http_err.response.status_code == 429:
123
+ raise ValueError("Too many requests. Please try again later.")
124
+ else:
125
+ raise ValueError(f"HTTP error occurred: {http_err}") from http_err
126
  except Exception as e:
127
  # Raise a ValueError with additional context if there was an error in processing
128
  raise ValueError(f"An error occurred while generating the email: {e}") from e
src/resume_loader.py CHANGED
@@ -67,7 +67,7 @@ class TextResumeLoader(ResumeLoader):
67
 
68
  text_loader = TextLoader(self.file_path)
69
  resume = text_loader.load() # Directly load the full text without chunking
70
- print(resume[0].page_content)
71
 
72
  return resume[0]
73
 
@@ -118,7 +118,7 @@ class PdfResumeLoader(ResumeLoader):
118
  pdf_loader = PyPDFLoader(temp_file_path)
119
  resume = pdf_loader.load() # Extract text from PDF
120
 
121
- print(resume[0].page_content) # Debug: Print first page content
122
  return resume[0]
123
 
124
  except Exception as e:
 
67
 
68
  text_loader = TextLoader(self.file_path)
69
  resume = text_loader.load() # Directly load the full text without chunking
70
+ print(f"=== Resume Content ===\n {resume[0].page_content}")
71
 
72
  return resume[0]
73
 
 
118
  pdf_loader = PyPDFLoader(temp_file_path)
119
  resume = pdf_loader.load() # Extract text from PDF
120
 
121
+ print(f"=== Resume Content ===\n {resume[0].page_content}")
122
  return resume[0]
123
 
124
  except Exception as e: