Akchhaya commited on
Commit
b82edc9
·
verified ·
1 Parent(s): 4717c7f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -0
app.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import pandas as pd
4
+ import spacy
5
+ import re
6
+ from pathlib import Path
7
+ import PyPDF2
8
+ import docx
9
+ import json
10
+
11
+ # Load models
12
+ nlp = spacy.load("en_core_web_sm")
13
+ keyword_extractor = pipeline("token-classification", model="jean-baptiste/roberta-large-ner-english")
14
+ classifier = pipeline("text-classification", model="microsoft/MiniLM-L12-H384-uncased")
15
+
16
+ def extract_text_from_resume(file):
17
+ file_path = file.name
18
+ text = ""
19
+
20
+ if file_path.endswith('.pdf'):
21
+ with open(file_path, 'rb') as pdf_file:
22
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
23
+ for page in pdf_reader.pages:
24
+ text += page.extract_text()
25
+
26
+ elif file_path.endswith('.docx'):
27
+ doc = docx.Document(file_path)
28
+ for paragraph in doc.paragraphs:
29
+ text += paragraph.text + '\n'
30
+
31
+ elif file_path.endswith('.txt'):
32
+ with open(file_path, 'r', encoding='utf-8') as txt_file:
33
+ text = txt_file.read()
34
+
35
+ return text.strip()
36
+
37
+ def extract_information(text):
38
+ doc = nlp(text)
39
+
40
+ entities = {
41
+ "skills": [],
42
+ "education": [],
43
+ "experience": [],
44
+ "contact": []
45
+ }
46
+
47
+ # Extract skills (using a predefined list of common skills)
48
+ common_skills = ["python", "java", "javascript", "sql", "machine learning", "data analysis"]
49
+ text_lower = text.lower()
50
+ entities["skills"] = [skill for skill in common_skills if skill in text_lower]
51
+
52
+ # Extract education
53
+ education_keywords = ["university", "college", "bachelor", "master", "phd", "degree"]
54
+ for sent in doc.sents:
55
+ if any(keyword in sent.text.lower() for keyword in education_keywords):
56
+ entities["education"].append(sent.text.strip())
57
+
58
+ # Extract experience
59
+ experience_keywords = ["experience", "work", "job", "position", "role"]
60
+ for sent in doc.sents:
61
+ if any(keyword in sent.text.lower() for keyword in experience_keywords):
62
+ entities["experience"].append(sent.text.strip())
63
+
64
+ # Extract contact information
65
+ email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
66
+ phone_pattern = r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b'
67
+
68
+ emails = re.findall(email_pattern, text)
69
+ phones = re.findall(phone_pattern, text)
70
+
71
+ entities["contact"] = emails + phones
72
+
73
+ return entities
74
+
75
+ def analyze_resume(text, entities):
76
+ scores = {
77
+ "completeness": 0,
78
+ "skills_match": 0,
79
+ "formatting": 0,
80
+ "keyword_optimization": 0
81
+ }
82
+
83
+ # Completeness score
84
+ score_components = 0
85
+ if entities["skills"]: score_components += 1
86
+ if entities["education"]: score_components += 1
87
+ if entities["experience"]: score_components += 1
88
+ if entities["contact"]: score_components += 1
89
+ scores["completeness"] = (score_components / 4) * 100
90
+
91
+ # Skills match score
92
+ desired_skills = ["python", "java", "javascript", "sql", "machine learning"]
93
+ matched_skills = sum(1 for skill in entities["skills"] if skill in desired_skills)
94
+ scores["skills_match"] = (matched_skills / len(desired_skills)) * 100
95
+
96
+ # Formatting score
97
+ formatting_score = 0
98
+ if len(text.split('\n')) > 5: formatting_score += 20
99
+ if len(text) > 200: formatting_score += 20
100
+ if any(char.isupper() for char in text): formatting_score += 20
101
+ if re.search(r'\b\d{4}\b', text): formatting_score += 20
102
+ if len(re.findall(r'[.!?]', text)) > 3: formatting_score += 20
103
+ scores["formatting"] = formatting_score
104
+
105
+ # Keyword optimization score
106
+ keywords = keyword_extractor(text[:512])
107
+ scores["keyword_optimization"] = min(len(keywords) * 10, 100)
108
+
109
+ return scores
110
+
111
+ def generate_recommendations(scores, entities):
112
+ recommendations = []
113
+
114
+ if scores["completeness"] < 75:
115
+ recommendations.append("📋 Add more sections to your resume to improve completeness.")
116
+ if not entities["skills"]:
117
+ recommendations.append("- Add a skills section")
118
+ if not entities["education"]:
119
+ recommendations.append("- Add education details")
120
+ if not entities["experience"]:
121
+ recommendations.append("- Add work experience")
122
+ if not entities["contact"]:
123
+ recommendations.append("- Add contact information")
124
+
125
+ if scores["skills_match"] < 60:
126
+ recommendations.append("\n💡 Consider adding more relevant skills:")
127
+ recommendations.append("- Focus on technical skills like Python, Java, SQL")
128
+ recommendations.append("- Include both hard and soft skills")
129
+
130
+ if scores["formatting"] < 80:
131
+ recommendations.append("\n📑 Improve resume formatting:")
132
+ recommendations.append("- Use clear section headings")
133
+ recommendations.append("- Include dates for experiences")
134
+ recommendations.append("- Use bullet points for better readability")
135
+
136
+ if scores["keyword_optimization"] < 70:
137
+ recommendations.append("\n🔍 Optimize keywords usage:")
138
+ recommendations.append("- Use more industry-specific terms")
139
+ recommendations.append("- Include action verbs")
140
+ recommendations.append("- Mention specific technologies and tools")
141
+
142
+ return "\n".join(recommendations)
143
+
144
+ def process_resume(file):
145
+ text = extract_text_from_resume(file)
146
+ entities = extract_information(text)
147
+ scores = analyze_resume(text, entities)
148
+ recommendations = generate_recommendations(scores, entities)
149
+
150
+ return scores, recommendations
151
+
152
+ def create_interface():
153
+ with gr.Blocks() as app:
154
+ gr.Markdown("""
155
+ # Resume Analyzer and Optimizer
156
+ Upload your resume to get personalized analysis and recommendations.
157
+ """)
158
+
159
+ with gr.Row():
160
+ file_input = gr.File(
161
+ label="Upload Resume (PDF, DOCX, or TXT)",
162
+ file_types=["pdf", "docx", "txt"]
163
+ )
164
+
165
+ with gr.Row():
166
+ analyze_button = gr.Button("Analyze Resume", variant="primary")
167
+
168
+ with gr.Row():
169
+ with gr.Column():
170
+ score_output = gr.JSON(label="Analysis Scores")
171
+ with gr.Column():
172
+ recommendations_output = gr.Textbox(
173
+ label="Recommendations",
174
+ lines=10
175
+ )
176
+
177
+ analyze_button.click(
178
+ fn=process_resume,
179
+ inputs=[file_input],
180
+ outputs=[score_output, recommendations_output]
181
+ )
182
+
183
+ return app
184
+
185
+ if __name__ == "__main__":
186
+ app = create_interface()
187
+ app.launch()