Spaces:
Running
Running
TejaCherukuri
commited on
Commit
·
2544e0a
1
Parent(s):
77bf1df
bug fixes and added feature
Browse files- app.py +55 -24
- src/job_extractor.py +46 -11
- src/message_writer.py +10 -5
- src/resume_loader.py +2 -2
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import streamlit as st
|
2 |
from src.resume_loader import ResumeLoaderFactory
|
3 |
from src.job_extractor import JobExtractor
|
|
|
4 |
from src.message_writer import MessageWriter
|
5 |
|
6 |
def main():
|
@@ -18,35 +19,61 @@ def main():
|
|
18 |
st.subheader("Upload Your Resume")
|
19 |
uploaded_file = st.file_uploader("Upload a PDF Resume", type=["pdf"])
|
20 |
|
21 |
-
# Input
|
22 |
-
|
23 |
-
"
|
24 |
-
|
25 |
)
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
# Button to trigger the flow
|
28 |
if st.button("Generate Message"):
|
29 |
-
if job_url:
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
46 |
else:
|
47 |
st.error("Please provide a valid job URL.")
|
48 |
|
49 |
-
def generate_message_for_job(job_url, uploaded_file):
|
50 |
|
51 |
# Load the resume using the appropriate method (PDF or text)
|
52 |
if uploaded_file:
|
@@ -58,8 +85,12 @@ def generate_message_for_job(job_url, uploaded_file):
|
|
58 |
|
59 |
# Extract the key info from job URL
|
60 |
extractor = JobExtractor()
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
63 |
|
64 |
# Invoke chat model
|
65 |
writer = MessageWriter()
|
|
|
1 |
import streamlit as st
|
2 |
from src.resume_loader import ResumeLoaderFactory
|
3 |
from src.job_extractor import JobExtractor
|
4 |
+
# from src.job_extractor_2 import JobExtractor2
|
5 |
from src.message_writer import MessageWriter
|
6 |
|
7 |
def main():
|
|
|
19 |
st.subheader("Upload Your Resume")
|
20 |
uploaded_file = st.file_uploader("Upload a PDF Resume", type=["pdf"])
|
21 |
|
22 |
+
# Job Input Option (Radio Buttons for Job URL or Description)
|
23 |
+
input_option = st.radio(
|
24 |
+
"How would you like to provide the job information?",
|
25 |
+
("Job URL", "Job Description")
|
26 |
)
|
27 |
|
28 |
+
job_url = None
|
29 |
+
job_description = None
|
30 |
+
|
31 |
+
# Show corresponding input field based on the selection
|
32 |
+
if input_option == "Job URL":
|
33 |
+
job_url = st.text_input(
|
34 |
+
"Enter the Job URL",
|
35 |
+
placeholder="https://amazon.jobs/en/jobs/2831138/software-development-engineer-2025-ai-ml"
|
36 |
+
)
|
37 |
+
|
38 |
+
# Display the alert with color
|
39 |
+
with st.expander("🔔 **Alert!** Job URL Instructions", expanded=True):
|
40 |
+
st.markdown(
|
41 |
+
"""
|
42 |
+
<p style="color:red;">If using a LinkedIn job URL (Easy Apply), paste the job description instead.</p>
|
43 |
+
""", unsafe_allow_html=True)
|
44 |
+
|
45 |
+
elif input_option == "Job Description":
|
46 |
+
job_description = st.text_area("Enter the Job Description", height=200)
|
47 |
+
|
48 |
+
|
49 |
# Button to trigger the flow
|
50 |
if st.button("Generate Message"):
|
51 |
+
if job_url or job_description:
|
52 |
+
try:
|
53 |
+
st.info("Processing your request...")
|
54 |
+
# Trigger the flow (replace with your logic)
|
55 |
+
thought, response = generate_message_for_job(job_url, uploaded_file, job_description)
|
56 |
+
|
57 |
+
# Create two columns for displaying outputs side by side
|
58 |
+
col1, col2 = st.columns(2)
|
59 |
+
|
60 |
+
# Display Thought Process in the first column
|
61 |
+
with col1:
|
62 |
+
st.subheader("DeepThink")
|
63 |
+
st.text_area(" ", value=thought, height=500)
|
64 |
+
|
65 |
+
# Display Generated Message in the second column
|
66 |
+
with col2:
|
67 |
+
st.subheader("Generated Message")
|
68 |
+
st.text_area(" ", value=response, height=500)
|
69 |
+
except ValueError as e:
|
70 |
+
st.error(f"Error: {e}")
|
71 |
+
except Exception as e:
|
72 |
+
st.error(f"Unexpected Error: {e}")
|
73 |
else:
|
74 |
st.error("Please provide a valid job URL.")
|
75 |
|
76 |
+
def generate_message_for_job(job_url, uploaded_file, job_description=None):
|
77 |
|
78 |
# Load the resume using the appropriate method (PDF or text)
|
79 |
if uploaded_file:
|
|
|
85 |
|
86 |
# Extract the key info from job URL
|
87 |
extractor = JobExtractor()
|
88 |
+
if job_url:
|
89 |
+
job_description = extractor.parse_job_from_web(job_url)
|
90 |
+
|
91 |
+
job = extractor.extract_jobdata(job_description)
|
92 |
+
if not job or not job.get('job_postings'):
|
93 |
+
raise ValueError(f"Cannot fetch job details from this url: {job_url}, Use the 'Job Description' field for better assistance!")
|
94 |
|
95 |
# Invoke chat model
|
96 |
writer = MessageWriter()
|
src/job_extractor.py
CHANGED
@@ -4,6 +4,9 @@ from langchain_core.prompts import PromptTemplate
|
|
4 |
from langchain_core.output_parsers import JsonOutputParser
|
5 |
from langchain_core.exceptions import OutputParserException
|
6 |
from src.utils import clean_text
|
|
|
|
|
|
|
7 |
|
8 |
class JobExtractor:
|
9 |
"""
|
@@ -46,6 +49,8 @@ class JobExtractor:
|
|
46 |
`role`, `experience`, `skills`, `responsibilities`, `basic qualifications`,
|
47 |
`preferred qualifications`, and `description`.
|
48 |
Only return the valid JSON.
|
|
|
|
|
49 |
### VALID JSON (NO PREAMBLE):
|
50 |
"""
|
51 |
)
|
@@ -71,15 +76,30 @@ class JobExtractor:
|
|
71 |
ValueError: If the content could not be loaded or cleaned properly.
|
72 |
"""
|
73 |
try:
|
74 |
-
|
|
|
|
|
|
|
75 |
page_data = loader.load().pop().page_content
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
if not page_data:
|
77 |
-
raise ValueError("
|
|
|
|
|
|
|
78 |
cleaned_data = clean_text(page_data)
|
79 |
-
print(f"Scraped and cleaned data
|
80 |
return cleaned_data
|
81 |
except Exception as e:
|
82 |
-
|
|
|
|
|
|
|
83 |
|
84 |
def extract_jobdata(self, text):
|
85 |
"""
|
@@ -104,14 +124,29 @@ class JobExtractor:
|
|
104 |
extract_chain = self.extract_prompt | self.chat_model.groq
|
105 |
res = extract_chain.invoke(input={"page_data": text})
|
106 |
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
except OutputParserException as e:
|
114 |
-
raise OutputParserException("Unable to parse job data as valid JSON.
|
115 |
except Exception as e:
|
116 |
raise ValueError(f"An error occurred during job extraction: {e}") from e
|
117 |
|
|
|
|
4 |
from langchain_core.output_parsers import JsonOutputParser
|
5 |
from langchain_core.exceptions import OutputParserException
|
6 |
from src.utils import clean_text
|
7 |
+
import json
|
8 |
+
import requests
|
9 |
+
|
10 |
|
11 |
class JobExtractor:
|
12 |
"""
|
|
|
49 |
`role`, `experience`, `skills`, `responsibilities`, `basic qualifications`,
|
50 |
`preferred qualifications`, and `description`.
|
51 |
Only return the valid JSON.
|
52 |
+
If you do not find any data to form a JSON, return
|
53 |
+
```json{{'job_postings': []}}```
|
54 |
### VALID JSON (NO PREAMBLE):
|
55 |
"""
|
56 |
)
|
|
|
76 |
ValueError: If the content could not be loaded or cleaned properly.
|
77 |
"""
|
78 |
try:
|
79 |
+
headers = {
|
80 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
|
81 |
+
}
|
82 |
+
loader = WebBaseLoader(url, headers)
|
83 |
page_data = loader.load().pop().page_content
|
84 |
+
|
85 |
+
# Check for blocking or unsupported browser messages
|
86 |
+
if "unsupported browser" in page_data.lower():
|
87 |
+
raise ValueError(f"Unsupported browser message detected.")
|
88 |
+
# return None
|
89 |
+
|
90 |
if not page_data:
|
91 |
+
raise ValueError(f"Failed to fetch content from the URL {url}.")
|
92 |
+
|
93 |
+
print(f"===Page Data===\n {page_data}")
|
94 |
+
|
95 |
cleaned_data = clean_text(page_data)
|
96 |
+
print(f"=== Scraped and cleaned data ===\n {cleaned_data}...") # Displaying a snippet of data for debugging
|
97 |
return cleaned_data
|
98 |
except Exception as e:
|
99 |
+
print(f"WebBaseLoader Error: {e}")
|
100 |
+
# raise ValueError(f"Failed to fetch content from the URL {url}.")
|
101 |
+
return None
|
102 |
+
|
103 |
|
104 |
def extract_jobdata(self, text):
|
105 |
"""
|
|
|
124 |
extract_chain = self.extract_prompt | self.chat_model.groq
|
125 |
res = extract_chain.invoke(input={"page_data": text})
|
126 |
|
127 |
+
print(f"=== Result Content ===\n {res.content}")
|
128 |
+
|
129 |
+
if not res.content.strip(): # Check if response is empty
|
130 |
+
raise ValueError("No valid job data extracted.")
|
131 |
+
|
132 |
+
try:
|
133 |
+
job_data = self.json_parser.parse(res.content)
|
134 |
+
print(f"=== JSON Job Data ===\n {job_data}")
|
135 |
+
return job_data
|
136 |
+
except json.decoder.JSONDecodeError:
|
137 |
+
print("Invalid JSON received. Returning empty job data.")
|
138 |
+
return {"job_postings": []} # Fail gracefully
|
139 |
+
|
140 |
+
except requests.exceptions.HTTPError as http_err:
|
141 |
+
if http_err.response.status_code == 413:
|
142 |
+
raise ValueError("The input is too large. Please reduce the size and try again.")
|
143 |
+
elif http_err.response.status_code == 429:
|
144 |
+
raise ValueError("Too many requests. Please try again later.")
|
145 |
+
else:
|
146 |
+
raise ValueError(f"HTTP error occurred: {http_err}") from http_err
|
147 |
except OutputParserException as e:
|
148 |
+
raise OutputParserException("Unable to parse job data as valid JSON.") from e
|
149 |
except Exception as e:
|
150 |
raise ValueError(f"An error occurred during job extraction: {e}") from e
|
151 |
|
152 |
+
|
src/message_writer.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
from src.chat_model import ChatModel
|
2 |
from langchain_core.prompts import PromptTemplate
|
3 |
import re
|
|
|
4 |
|
5 |
class MessageWriter:
|
6 |
"""
|
@@ -106,18 +107,22 @@ class MessageWriter:
|
|
106 |
extracted_text = extracted_text.strip() # Strip leading/trailing whitespace and newlines
|
107 |
|
108 |
# Print the well-formatted text
|
109 |
-
print("
|
110 |
-
print(extracted_text)
|
111 |
think_content = extracted_text
|
112 |
else:
|
113 |
print("No content found between <think> and </think> tags.")
|
114 |
|
115 |
-
print("
|
116 |
-
print(cleaned_response)
|
117 |
|
118 |
# Return the extracted thought process and the cleaned email content
|
119 |
return think_content, cleaned_response.strip()
|
120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
except Exception as e:
|
122 |
# Raise a ValueError with additional context if there was an error in processing
|
123 |
raise ValueError(f"An error occurred while generating the email: {e}") from e
|
|
|
1 |
from src.chat_model import ChatModel
|
2 |
from langchain_core.prompts import PromptTemplate
|
3 |
import re
|
4 |
+
import requests
|
5 |
|
6 |
class MessageWriter:
|
7 |
"""
|
|
|
107 |
extracted_text = extracted_text.strip() # Strip leading/trailing whitespace and newlines
|
108 |
|
109 |
# Print the well-formatted text
|
110 |
+
print(f"=== Thought Process ===\n {extracted_text}")
|
|
|
111 |
think_content = extracted_text
|
112 |
else:
|
113 |
print("No content found between <think> and </think> tags.")
|
114 |
|
115 |
+
print(f"=== Cleaned Response ===\n {cleaned_response}")
|
|
|
116 |
|
117 |
# Return the extracted thought process and the cleaned email content
|
118 |
return think_content, cleaned_response.strip()
|
119 |
+
except requests.exceptions.HTTPError as http_err:
|
120 |
+
if http_err.response.status_code == 413:
|
121 |
+
raise ValueError("The input is too large. Please reduce the size and try again.")
|
122 |
+
elif http_err.response.status_code == 429:
|
123 |
+
raise ValueError("Too many requests. Please try again later.")
|
124 |
+
else:
|
125 |
+
raise ValueError(f"HTTP error occurred: {http_err}") from http_err
|
126 |
except Exception as e:
|
127 |
# Raise a ValueError with additional context if there was an error in processing
|
128 |
raise ValueError(f"An error occurred while generating the email: {e}") from e
|
src/resume_loader.py
CHANGED
@@ -67,7 +67,7 @@ class TextResumeLoader(ResumeLoader):
|
|
67 |
|
68 |
text_loader = TextLoader(self.file_path)
|
69 |
resume = text_loader.load() # Directly load the full text without chunking
|
70 |
-
print(resume[0].page_content)
|
71 |
|
72 |
return resume[0]
|
73 |
|
@@ -118,7 +118,7 @@ class PdfResumeLoader(ResumeLoader):
|
|
118 |
pdf_loader = PyPDFLoader(temp_file_path)
|
119 |
resume = pdf_loader.load() # Extract text from PDF
|
120 |
|
121 |
-
print(resume[0].page_content)
|
122 |
return resume[0]
|
123 |
|
124 |
except Exception as e:
|
|
|
67 |
|
68 |
text_loader = TextLoader(self.file_path)
|
69 |
resume = text_loader.load() # Directly load the full text without chunking
|
70 |
+
print(f"=== Resume Content ===\n {resume[0].page_content}")
|
71 |
|
72 |
return resume[0]
|
73 |
|
|
|
118 |
pdf_loader = PyPDFLoader(temp_file_path)
|
119 |
resume = pdf_loader.load() # Extract text from PDF
|
120 |
|
121 |
+
print(f"=== Resume Content ===\n {resume[0].page_content}")
|
122 |
return resume[0]
|
123 |
|
124 |
except Exception as e:
|