rivapereira123 commited on
Commit
a8f42ad
Β·
verified Β·
1 Parent(s): c593899

Upload 7 files

Browse files
modules/analysis.py ADDED
@@ -0,0 +1,712 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import pandas as pd
3
+ from utils.data_helpers import clean_text, extract_actions_from_feedback
4
+ import requests
5
+ import time
6
+ import os
7
+
8
+ def clean_linkedin_input(text):
9
+ junk_patterns = [
10
+ r"Add verification badge", r"Contact info", r"followers", r"connections",
11
+ r"Add profile section", r"Enhance profile", r"Open to work.*?Show details",
12
+ r"Show all analytics", r"Get started", r"Edit", r"See more", r"…see more",
13
+ r"Subscribe", r"View .*? graphic link", r"Activate to view larger image",
14
+ r"Create a post", r"Loaded .*? posts", r"Visible to anyone.*?", r"Β· Remote",
15
+ r"\\d+\\s+(followers|connections|comments|likes?)", r"Issued .*?",
16
+ r"Posts.*?Comments.*?Videos.*?Images.*?Newsletter", r"Show all .*?",
17
+ r"–", r"β€”", r"…"
18
+ ]
19
+ for pattern in junk_patterns:
20
+ text = re.sub(pattern, "", text, flags=re.IGNORECASE)
21
+
22
+ emoji_pattern = re.compile("["
23
+ u"\\U0001F600-\\U0001F64F"
24
+ u"\\U0001F300-\\U0001F5FF"
25
+ u"\\U0001F680-\\U0001F6FF"
26
+ u"\\U0001F1E0-\\U0001F1FF"
27
+ u"\\U00002700-\\U000027BF"
28
+ u"\\U000024C2-\\U0001F251"
29
+ "]+", flags=re.UNICODE)
30
+ text = emoji_pattern.sub(\'\', text)
31
+
32
+ text = re.sub(r\'\\n+\', r\'\\n\', text)
33
+ text = re.sub(r\'\\s{2,}\', r\' \', text)
34
+ return text.strip()
35
+
36
+
37
+ def clean_name_headline_section(text):
38
+ lines = text.splitlines()
39
+ debug = []
40
+
41
+ name = None
42
+ headline = None
43
+ location = None
44
+ followers = None
45
+ open_roles = None
46
+
47
+ for line in lines:
48
+ line = line.strip()
49
+
50
+ # Name is usually first and alphabetic
51
+ if not name and line and line[0].isalpha() and " " in line:
52
+ name = line
53
+ debug.append(f"πŸ‘€ Name: {name}")
54
+ continue
55
+
56
+ # Headline: usually comes after name or has "β€’" or "|"
57
+ if not headline and ("β€’" in line or "|" in line):
58
+ headline = line
59
+ debug.append(f"🧠 Headline: {headline}")
60
+ continue
61
+
62
+ # Location
63
+ if "united arab emirates" in line.lower() or "city" in line.lower():
64
+ location = line
65
+ debug.append(f"πŸ“ Location: {location}")
66
+ continue
67
+
68
+ # Followers
69
+ if "followers" in line.lower():
70
+ followers = line
71
+ debug.append(f"πŸ“Š {followers}")
72
+ continue
73
+
74
+ # Open to work
75
+ if "open to" in line.lower() and "roles" in line.lower():
76
+ open_roles = line
77
+ debug.append(f"πŸ’Ό {open_roles}")
78
+ continue
79
+
80
+ feedback = []
81
+ if not name:
82
+ feedback.append("⚠️ Your full name is missing or unclear.")
83
+ if not headline:
84
+ feedback.append("⚠️ Headline/tagline is missing. Add a short, keyword-rich sentence.")
85
+ if not location:
86
+ feedback.append("⚠️ Location info not found. Add your city for recruiters.")
87
+ if not followers:
88
+ feedback.append("πŸ” Tip: Add or grow your follower count for visibility.")
89
+ if not open_roles:
90
+ feedback.append("πŸ“£ Mention your \'Open to work\' roles clearly in your profile.")
91
+
92
+ feedback.append("πŸ“Έ Bonus: Did you upload a banner and cover photo? If not, add one to personalize your profile!")
93
+
94
+ return "\\n".join(feedback), "\\n".join(debug)
95
+
96
+
97
+ def analyze_apify_name_headline(row):
98
+ feedback = []
99
+
100
+ def safe_str(x):
101
+ return str(x).strip() if x else ""
102
+
103
+ name = safe_str(row.get("fullName"))
104
+ headline = safe_str(row.get("headline"))
105
+ location = safe_str(row.get("location"))
106
+ followers = row.get("followersCount", "")
107
+ open_to_work = safe_str(row.get("openToWork"))
108
+ featured = safe_str(row.get("featured"))
109
+ creative_mode = safe_str(row.get("creatorMode"))
110
+ profile_photo = safe_str(row.get("profilePhotoUrl"))
111
+ background_photo = safe_str(row.get("backgroundImageUrl"))
112
+
113
+ if not profile_photo:
114
+ feedback.append("πŸ–ΌοΈ No clear headshot found. Add a professional, well-lit photo.")
115
+ if not background_photo:
116
+ feedback.append("🧒 Add a cover/banner image with your name and tagline centered for impact.")
117
+ if not name:
118
+ feedback.append("⚠️ Your full name is missing or unclear.")
119
+ if not headline:
120
+ feedback.append("⚠️ Headline/tagline is missing. Add a short, keyword-rich sentence.")
121
+ if not location:
122
+ feedback.append("πŸ“ Add your city or country. Recruiters filter by location.")
123
+ if not followers:
124
+ feedback.append("πŸ” Grow your follower count β€” it boosts visibility.")
125
+ if not open_to_work:
126
+ feedback.append("πŸ“£ Add your \'Open to work\' roles. It helps LinkedIn match you.")
127
+ if not featured:
128
+ feedback.append("⭐ Add a featured post or portfolio item to your profile.")
129
+ if not creative_mode:
130
+ feedback.append("🎨 Turn on Creator Mode for extra reach if you post content.")
131
+
132
+ return "### 🧾 Name & Headline Checklist\\n" + "\\n".join(feedback)
133
+
134
+
135
+ def clean_about_section(text):
136
+ feedback = []
137
+ debug_info = []
138
+
139
+ # Remove repeated lines
140
+ sentences = list(dict.fromkeys(text.strip().split(".")))
141
+ cleaned_text = ". ".join([s.strip() for s in sentences if s.strip()])
142
+
143
+ debug_info.append(f"🧹 Cleaned Sentences Count: {len(sentences)}")
144
+ debug_info.append(f"πŸ“ Cleaned Text:\\n{cleaned_text[:500]}...")
145
+
146
+ # Heuristics
147
+ if len(cleaned_text) < 200:
148
+ feedback.append("⚠️ Your About section seems short. Aim for 3-5 strong paragraphs.")
149
+ if cleaned_text.lower().count("i am") + cleaned_text.lower().count("i\u2019m") == 0:
150
+ feedback.append("πŸ€” Add more personal voice. Use \'I am...\' or \'I\u2019m...\' to connect with the reader.")
151
+ if "impact" in cleaned_text.lower() and "mentor" in cleaned_text.lower():
152
+ feedback.append("βœ… Nice! You\u2019re showing leadership and purpose.")
153
+
154
+ # Detect keyword stuffing
155
+ keywords = [\'python\', \'machine learning\', \'data\', \'power bi\', \'ai\', \'artificial intelligence\']
156
+ keyword_hits = [kw for kw in keywords if cleaned_text.lower().count(kw) > 2]
157
+ if keyword_hits:
158
+ feedback.append(f"⚠️ These keywords are mentioned too often: {\', \'.join(keyword_hits)}. Avoid overusing them.")
159
+
160
+ return "\\n".join(feedback), "\\n".join(debug_info)
161
+
162
+
163
+
164
+ def analyze_apify_about_section(row):
165
+ feedback = []
166
+ raw = row.get("about", "")
167
+
168
+ # Ensure it\u2019s a string
169
+ if isinstance(raw, list):
170
+ about = " ".join(str(x) for x in raw)
171
+ else:
172
+ about = str(raw).strip()
173
+
174
+ if not about or len(about) < 20:
175
+ return "### πŸ“˜ About Me\\n⚠️ You haven\u2019t written an About Me section \u2014 that\u2019s a missed opportunity!"
176
+
177
+ if len(about) < 200:
178
+ feedback.append("πŸ“ Your About Me is short. Aim for 3\u20135 short paragraphs with story, skills, and goals.")
179
+
180
+ if "I am" not in about and "I\u2019m" not in about:
181
+ feedback.append("πŸ—£οΈ Use personal voice \u2014 write in first person (e.g. \'I\u2019m a data analyst...\').")
182
+
183
+ if not any(word in about.lower() for word in ["mission", "values", "why", "story"]):
184
+ feedback.append("πŸ’‘ Try adding a personal story or your \'why\'. It makes you memorable.")
185
+
186
+ if not any(word in about.lower() for word in ["python", "data", "ai", "sql", "ml", "analytics"]):
187
+ feedback.append("πŸ” Add industry-relevant keywords like tools or domains (e.g. Python, Analytics).")
188
+
189
+ return "### πŸ“˜ About Me Feedback\\n" + "\\n".join(feedback)
190
+
191
+
192
+
193
+ def analyze_experience_section(text):
194
+ feedback, debug = [], []
195
+ total_skills = set()
196
+
197
+ # Normalize
198
+ text = re.sub(r"(logo|pdf).*?\\.pdf", "", text, flags=re.IGNORECASE)
199
+ text = re.sub(r"\\.?\\s*see more", "", text, flags=re.IGNORECASE)
200
+ text = re.sub(r"\\s+", " ", text)
201
+ entries = re.split(r"(?:\\d{4}.*?(?:mo|mos|yr|yrs))", text)
202
+
203
+ work_types = {"remote": 0, "hybrid": 1, "onsite": 2}
204
+ format_score = 0
205
+ short_roles = 0
206
+ roles_found = 0
207
+
208
+ for entry in entries:
209
+ if len(entry.strip()) < 50:
210
+ continue
211
+ roles_found += 1
212
+
213
+ # Determine format
214
+ format_detected = "onsite"
215
+ if "remote" in entry.lower():
216
+ format_detected = "remote"
217
+ elif "hybrid" in entry.lower():
218
+ format_detected = "hybrid"
219
+ format_score += work_types[format_detected]
220
+
221
+ # Detect time span
222
+ if re.search(r"(\\d+\\s*(mo|mos|yr|yrs))", entry):
223
+ months = sum([
224
+ int(x) if "mo" in unit else int(x) * 12
225
+ for x, unit in re.findall(r"(\\d+)\\s*(mo|mos|yr|yrs)", entry)
226
+ ])
227
+ if months < 3:
228
+ short_roles += 1
229
+ else:
230
+ feedback.append("⚠️ One experience entry is missing a time span.")
231
+
232
+ # Remove duplicated bullets and body
233
+ cleaned_entry = re.sub(r"(▢️.*?)\\1+", r"\\1", entry)
234
+
235
+ # Extract skills
236
+ skill_matches = re.findall(r"\\b([A-Z][a-z]+(?: [A-Z][a-z]+)?)\\b", cleaned_entry)
237
+ for s in skill_matches:
238
+ if len(s) <= 20:
239
+ total_skills.add(s.strip().lower())
240
+
241
+ # Summary logic
242
+ if roles_found == 0:
243
+ feedback.append("⚠️ Couldn\u2019t find valid experience entries. Double-check formatting.")
244
+ else:
245
+ feedback.append(f"βœ… Found **{roles_found}** experience roles.")
246
+ if short_roles > 0:
247
+ feedback.append(f"πŸ•’ {short_roles} roles seem too short (<3 months). Consider explaining a little on these roles, Use Harvard Referencing Words.")
248
+ if format_score / max(1, roles_found) < 1.2:
249
+ feedback.append("πŸ“ Most of your roles are **Remote** or **Hybrid**. Consider getting (if you can-easier said then done) onsite or longer-term internships for variety.")
250
+ feedback.append(f"🧠 Extracted **{len(total_skills)}** possible skills so far.")
251
+
252
+ debug.append("πŸ›  Extracted Sample Skills:\\n" + ", ".join(list(total_skills)[:20]))
253
+ debug.append(f"πŸ”Ž Total Raw Experience Entries: {roles_found}")
254
+
255
+ return "\\n".join(feedback), "\\n".join(debug), total_skills
256
+
257
+
258
+
259
+ def analyze_apify_experience_section(row):
260
+ raw = row.get("experience", "")
261
+ feedback = []
262
+
263
+ # Handle list format from Apify
264
+ if isinstance(raw, list):
265
+ flattened = []
266
+ for entry in raw:
267
+ if isinstance(entry, dict):
268
+ flattened.append(" ".join(str(v) for v in entry.values()))
269
+ text = "\\n".join(flattened)
270
+ else:
271
+ text = str(raw).strip()
272
+
273
+ if not text or len(text.strip()) < 50:
274
+ return "### πŸ’Ό Experience\\n😬 Your experience section is very light or missing! Add at least one role."
275
+
276
+ # Approximate heuristic
277
+ jobs = sum(text.lower().count(term) for term in ["intern", "analyst", "engineer"])
278
+ if jobs == 0:
279
+ feedback.append("⚠️ We couldn\u2019t find any formal roles. Add internships, part-time jobs, or freelance gigs.")
280
+ elif jobs < 2:
281
+ feedback.append("🧱 Add another experience or project \u2014 even university or club work counts.")
282
+
283
+ if "remote" in text.lower():
284
+ feedback.append("🌍 You\u2019ve worked remotely \u2014 highlight how you managed communication or independence.")
285
+
286
+ return "### πŸ’Ό Experience Review\\n" + "\\n".join(feedback)
287
+
288
+
289
+
290
+ def analyze_education_section(text):
291
+ if not text or len(text.strip()) < 30:
292
+ return "⚠️ Your education section looks empty or too short. Add your university, field of study, and time period."
293
+
294
+ suggestions = []
295
+
296
+ # Check for institution and degree/field
297
+ has_university = re.search(r"(university|college|institute|school)", text, re.IGNORECASE)
298
+ has_field = re.search(r"(computer|data|science|engineering|business|design|marketing|ai|big data|cs|it)", text, re.IGNORECASE)
299
+ has_dates = re.search(r"\\b20\\d{2}\\b", text)
300
+
301
+ if not has_university:
302
+ suggestions.append("πŸŽ“ Add your **university or institution name**.")
303
+ if not has_field:
304
+ suggestions.append("πŸ“˜ Add your **field of study** like Data Science, Business, or AI.")
305
+ if not has_dates:
306
+ suggestions.append("πŸ“… Include your **education timeline**, like 2022\u20132025.")
307
+
308
+ # Skill extraction (optional)
309
+ skills_found = re.findall(r"[A-Za-z]{3,}", text)
310
+ if len(skills_found) < 3:
311
+ suggestions.append("🧠 List **a few relevant skills** you learned (e.g., Python, SQL, Problem Solving).")
312
+
313
+ if not suggestions:
314
+ return "βœ… Your education section looks complete and informative!"
315
+ else:
316
+ return "\\n".join(suggestions), "No debug info"
317
+
318
+
319
+
320
+ def analyze_apify_education_section(row):
321
+ raw = row.get("education", "")
322
+ suggestions = []
323
+
324
+ # Handle list format
325
+ if isinstance(raw, list):
326
+ flattened = []
327
+ for entry in raw:
328
+ if isinstance(entry, dict):
329
+ flattened.append(" ".join(str(v) for v in entry.values()))
330
+ text = "\\n".join(flattened)
331
+ else:
332
+ text = str(raw).strip()
333
+
334
+ if not text or len(text.strip()) < 20:
335
+ return "### πŸŽ“ Education\\n⚠️ Your education section looks empty. Add your university or field of study."
336
+
337
+ if not re.search(r"university|college|institute|school", text, re.IGNORECASE):
338
+ suggestions.append("🏫 Add the name of your university/institute.")
339
+ if not re.search(r"data|science|engineering|business|design|marketing", text, re.IGNORECASE):
340
+ suggestions.append("πŸ“˜ Add your major/field of study.")
341
+ if not re.search(r"20\\d{2}", text):
342
+ suggestions.append("πŸ“… Include the years (e.g., 2022\u20132025).")
343
+
344
+ return "### πŸŽ“ Education\\n" + "\\n".join(suggestions) if suggestions else "### πŸŽ“ Education\\nβœ… Looks complete."
345
+
346
+
347
+
348
+ def analyze_skills_section(text):
349
+ if not text or len(text.strip()) < 20:
350
+ return "😬 DUDE PUT SOMETHING IN THERE. Add your technical, analytical, or soft skills. This helps with visibility and matching."
351
+
352
+ # Clean noisy parts
353
+ clean = re.sub(r"Company logo.*?", "", text, flags=re.DOTALL)
354
+ clean = re.sub(r"Show all \\d+ details", "", clean)
355
+ clean = re.sub(r"\\b\\d+\\s+endorsement[s]?", "", clean, flags=re.IGNORECASE)
356
+ clean = re.sub(r"\\s{2,}", " ", clean)
357
+ clean = re.sub(r"[^\\x00-\\x7F]+", "", clean) # remove emojis, logos, etc.
358
+
359
+ # Extract skills
360
+ lines = clean.splitlines()
361
+ skills = set()
362
+ endorsements = 0
363
+ for line in lines:
364
+ skill = line.strip()
365
+ if skill.lower().endswith("endorsement"):
366
+ endorsements += 1
367
+ elif len(skill.split()) < 5 and not re.search(r\'\\d\', skill) and len(skill) > 2:
368
+ skills.add(skill)
369
+
370
+ feedback = []
371
+ skill_count = len(skills)
372
+
373
+ # Skill quantity logic
374
+ if skill_count == 0:
375
+ feedback.append("😬 You didn\u2019t list any skills. Add at least 5\u201310 to improve discoverability.")
376
+ elif skill_count < 10:
377
+ feedback.append(f"🧠 You listed {skill_count} skills. Maybe add more as you study and grow.")
378
+ elif skill_count < 50:
379
+ feedback.append(f"βœ… You have {skill_count} skills \u2014 solid! Most professionals have up to 50 over time.")
380
+ else:
381
+ feedback.append(f"πŸ”₯ You\u2019ve listed {skill_count}+ skills \u2014 that\u2019s fantastic!")
382
+
383
+ # Endorsements check
384
+ if endorsements == 0:
385
+ feedback.append(
386
+ "πŸ“£ None of your skills are endorsed. Ask your friends, lecturers, or mentors to endorse them. "
387
+ "They just need to visit your profile, scroll to skills, and click **Endorse**."
388
+ )
389
+ else:
390
+ feedback.append(f"πŸ‘ You\u2019ve got {endorsements} endorsement{\'s\' if endorsements > 1 else \'\'} \u2014 nice!")
391
+
392
+ return "\\n".join(feedback), "No debug info"
393
+
394
+
395
+
396
+ def analyze_apify_skills_section(row):
397
+ raw = row.get("skills", "")
398
+ feedback = []
399
+
400
+ # Handle list format
401
+ if isinstance(raw, list):
402
+ skills = [s.get("name", "").strip() for s in raw if isinstance(s, dict) and s.get("name")]
403
+ else:
404
+ text = str(raw).strip()
405
+ skills = [line.strip() for line in text.splitlines() if len(line.strip()) > 2]
406
+
407
+ if not skills:
408
+ return "### 🧠 Skills\\n😬 No skills listed. Add at least 5\u201310 relevant skills to boost search visibility."
409
+
410
+ if len(skills) < 5:
411
+ feedback.append("🧱 You only have a few skills. Try adding more \u2014 aim for 10+.")
412
+ elif len(skills) > 25:
413
+ feedback.append("βœ… Great! You\u2019ve added many skills. Keep them updated.")
414
+
415
+ feedback.append("πŸ“£ Ask friends, mentors, or teammates to endorse your top 3\u20135 skills.")
416
+ feedback.append("🀝 Add a skill every time you complete a certificate, internship, or project.")
417
+
418
+ return "### 🧠 Skills\\n" + "\\n".join(feedback)
419
+
420
+
421
+
422
+ def analyze_volunteering_section(text):
423
+ if not text or len(text.strip()) < 15:
424
+ return "πŸ™‹β€β™€οΈ No volunteering found. If you\u2019ve done any kind of volunteering \u2014 at uni, events, or clubs \u2014 add it! It boosts credibility and empathy."
425
+
426
+ # Clean the text: remove repeated logos, pdf links, and duplicates
427
+ text = re.sub(r"Company logo", "", text)
428
+ text = re.sub(r"\\.pdf", "", text)
429
+ text = re.sub(r"\\s{2,}", " ", text)
430
+ text = re.sub(r"[^\\x00-\\x7F]+", "", text) # remove emojis, non-ASCII noise
431
+
432
+ # Parse entries
433
+ volunteering_entries = re.findall(r"(.*?)\\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\s+\\d{4}", text, flags=re.IGNORECASE)
434
+ count = len(volunteering_entries)
435
+
436
+ # Determine recency
437
+ if count == 0:
438
+ return "πŸ™‹β€β™€οΈ No formal volunteering roles found. Consider listing any academic or community events you\u2019ve supported."
439
+
440
+ feedback = [f"βœ… You\u2019ve listed {count} volunteering experience{\'s\' if count > 1 else \'\'} \u2014 that\u2019s awesome!"]
441
+ if count < 2:
442
+ feedback.append("πŸ’‘ Try adding another \u2014 even a one-day academic event helps build social capital.")
443
+ else:
444
+ feedback.append("🌟 Keep highlighting these \u2014 volunteering shows initiative and collaboration!")
445
+
446
+ return "\\n".join(feedback), "No debug info"
447
+
448
+
449
+
450
+ def analyze_certifications_section(text):
451
+ if not text or len(text.strip()) < 15:
452
+ return "πŸ“œ No certifications listed. Consider adding a few! Start with free options on Coursera, edX, or Google Career Certificates."
453
+
454
+ # Clean redundant patterns
455
+ text = re.sub(r"(Company logo|Show credential|Project Capstone\\.pdf|\\.png|\\.pdf|Credential ID.*?|https?://\\S+)", "", text)
456
+ text = re.sub(r"\\b(Issued|Skills):.*", "", text)
457
+ text = re.sub(r"\\s{2,}", " ", text)
458
+ text = re.sub(r"[^\\x00-\\x7F]+", "", text) # remove emojis/non-ASCII
459
+ text = text.strip()
460
+
461
+ # Count approximate number of certifications
462
+ cert_titles = re.findall(r"(Certificate|Professional Certificate|Internship|Developer|Challenge|Recognition|Capstone|Analytics|Power BI|Sales Dashboard)", text, flags=re.IGNORECASE)
463
+ cert_count = len(cert_titles)
464
+
465
+ # Logic-based advice
466
+ feedback = [f"βœ… You have about **{cert_count} certification{\'s\' if cert_count != 1 else \'\'}** listed. Great!"]
467
+
468
+ if cert_count < 3:
469
+ feedback.append("πŸ’‘ Consider adding a few more. They help boost your visibility to recruiters.")
470
+ elif cert_count >= 5:
471
+ feedback.append("🌟 Nice variety! Just make sure you\u2019ve described what you learned in a line or two.")
472
+
473
+ # Check for missing descriptions or visuals
474
+ if "learned" not in text.lower() and "description" not in text.lower():
475
+ feedback.append("πŸ“ Add a short description under each certificate explaining what you learned or applied.")
476
+
477
+ if "pdf" not in text.lower() and "image" not in text.lower():
478
+ feedback.append("πŸ–ΌοΈ It\u2019s a good practice to upload the certificate image or PDF to validate your learning!")
479
+
480
+ return "\\n".join(feedback), "No debug info"
481
+
482
+
483
+
484
+ def analyze_apify_certifications_section(row):
485
+ text = row.get("certifications", "")
486
+ feedback = []
487
+
488
+ if not text.strip():
489
+ return "### πŸ“„ Certifications\\nπŸ“œ No certifications listed. Pick one today to get started!"
490
+
491
+ certs = re.findall(r"(Certificate|Coursera|edX|Google|IBM|Professional|Udemy|Specialization)", text, re.IGNORECASE)
492
+ if len(certs) < 2:
493
+ feedback.append("🧠 Add at least 2\u20133 certifications relevant to your career path.")
494
+ else:
495
+ feedback.append(f"βœ… Found {len(certs)} certification(s). Nice!")
496
+
497
+ if not any(word in text.lower() for word in ["description", "learned", "project"]):
498
+ feedback.append("πŸ“ Add what you learned for each \u2014 helps recruiters understand your skills.")
499
+
500
+ return "### πŸ“„ Certifications\\n" + "\\n".join(feedback)
501
+
502
+
503
+
504
+ def analyze_linkedin(name_headline, about, experience, education, skills, certs, analytics):
505
+ output_sections = []
506
+
507
+ # Name + Headline
508
+ name_feedback, name_debug = clean_name_headline_section(name_headline)
509
+ output_sections.append(f"## 🧾 Name & Headline\\n{name_feedback}\\n\\n<details><summary>Debug</summary>\\n{name_debug}\\n</details>")
510
+
511
+ # About
512
+ about_feedback, about_debug = clean_about_section(about)
513
+ output_sections.append(f"## πŸ“˜ About Section\\n{about_feedback}\\n\\n<details><summary>Debug</summary>\\n{about_debug}\\n</details>")
514
+
515
+ # Experience
516
+ exp_feedback, exp_debug = analyze_experience_section(experience)
517
+ output_sections.append(f"## πŸ’Ό Experience\\n{exp_feedback}\\n\\n<details><summary>Debug</summary>\\n{exp_debug}\\n</details>")
518
+
519
+ # Education
520
+ edu_feedback = analyze_education_section(education)
521
+ output_sections.append(f"## πŸŽ“ Education\\n{edu_feedback}")
522
+
523
+ # Skills
524
+ skills_feedback = analyze_skills_section(skills)
525
+ output_sections.append(f"## 🧠 Skills\\n{skills_feedback}")
526
+
527
+ # # Volunteering (optional reuse of experience parser)
528
+ # vol_feedback, vol_debug = analyze_volunteering_section(experience) # Adjust if volunteering is separate
529
+ # output_sections.append(f"## 🌿 Volunteering\\n{vol_feedback}\\n\\n<details><summary>Debug</summary>\\n{vol_debug}\\n</details>")
530
+
531
+ # Certifications
532
+ cert_feedback = analyze_certifications_section(certs)
533
+ output_sections.append(f"## πŸ“„ Certifications\\n{cert_feedback}")
534
+
535
+ return clean_text("\\n\\n---\\n\\n".join(output_sections))
536
+
537
+
538
+
539
+ def analyze_scraped_linkedin_profile(row):
540
+ if not isinstance(row, dict):
541
+
542
+ if isinstance(row, pd.Series):
543
+ row = row.to_dict()
544
+ else:
545
+ return "❌ Invalid profile format. Expected a dictionary or dataframe row."
546
+
547
+ insights = []
548
+
549
+ insights.append(analyze_apify_about_section(row))
550
+
551
+ if row.get("about"):
552
+ insights.append(analyze_apify_about_section(row))
553
+
554
+ if row.get("experience"):
555
+ insights.append(analyze_apify_experience_section(row))
556
+
557
+ if row.get("education"):
558
+ insights.append(analyze_apify_education_section(row))
559
+
560
+ if row.get("skills"):
561
+ insights.append(analyze_apify_skills_section(row))
562
+
563
+ if row.get("certifications"):
564
+ insights.append(analyze_apify_certifications_section(row))
565
+
566
+ return "\\n\\n".join(insights)
567
+
568
+
569
+
570
+ def analyze_apify_dataset_ui():
571
+
572
+ path = "/mnt/data/dataset_linkedin-profile-full-sections-scraper_2025-06-09_23-12-43-671.csv"
573
+
574
+ try:
575
+ df = pd.read_csv(path)
576
+ if df.empty:
577
+ return "⚠️ No data found in the CSV."
578
+
579
+ result_md = []
580
+ for i, row in df.iterrows():
581
+ profile_insight = analyze_scraped_linkedin_profile(row)
582
+ result_md.append(f"## πŸ” Profile {i+1}\\n\\n{profile_insight}")
583
+
584
+ return "\\n\\n---\\n\\n".join(result_md)
585
+
586
+ except Exception as e:
587
+ return f"❌ Failed to analyze dataset: {e}"
588
+
589
+
590
+ #=====================
591
+
592
+
593
+ # Make sure this is defined globally
594
+ memo_data = []
595
+
596
+ def fetch_and_analyze_linkedin(linkedin_url):
597
+ if not linkedin_url.strip():
598
+ return "⚠️ Please enter a valid LinkedIn profile URL."
599
+
600
+ apify_token = os.getenv("APIFY_TOKEN")
601
+ if not apify_token:
602
+ return "⚠️ APIFY_TOKEN not found in environment variables."
603
+
604
+ actor_id = "rivapereira268~linkedin-profile-full-sections-scraper---no-cookies-task"
605
+ start_url = f"https://api.apify.com/v2/actor-tasks/{actor_id}/runs?token={apify_token}"
606
+ input_payload = {"profileUrls": [linkedin_url]}
607
+
608
+ try:
609
+ # Step 1: Start Apify run
610
+ run_response = requests.post(start_url, json=input_payload)
611
+ run_data = run_response.json()
612
+ if "data" not in run_data or "id" not in run_data["data"]:
613
+ return "❌ Failed to start Apify task."
614
+
615
+ run_id = run_data["data"]["id"]
616
+ print(f"[DEBUG] Apify task started. Run ID: {run_id}")
617
+
618
+ # Step 2: Poll for status
619
+ status_url = f"https://api.apify.com/v2/actor-runs/{run_id}?token={apify_token}"
620
+ for _ in range(30):
621
+ time.sleep(2)
622
+ run_status = requests.get(status_url).json()
623
+ status = run_status["data"]["status"]
624
+ print(f"[DEBUG] Apify task status: {status}")
625
+ if status == "SUCCEEDED":
626
+ break
627
+ elif status in ["FAILED", "ABORTED"]:
628
+ return f"❌ Apify task failed: {status}"
629
+
630
+ # Step 3: Fetch dataset
631
+ dataset_id = run_status["data"]["defaultDatasetId"]
632
+ items_url = f"https://api.apify.com/v2/datasets/{dataset_id}/items?format=json"
633
+ items = requests.get(items_url).json()
634
+ print(f"[DEBUG] Items fetched: {len(items)}")
635
+
636
+ if not items or not isinstance(items, list):
637
+ return "❌ No data returned from Apify. LinkedIn profile may be private or blocked."
638
+
639
+ # Step 4: Unwrap nested list if needed
640
+ while isinstance(items[0], list):
641
+ items = items[0]
642
+ profile_data = items[0]
643
+
644
+ if not isinstance(profile_data, dict):
645
+ return "❌ Apify returned unexpected data format."
646
+
647
+ # Step 5: Analyze profile and extract actions
648
+ result = analyze_scraped_linkedin_profile(profile_data)
649
+ print(f"[DEBUG] Analysis Result: {result[:100]}...")
650
+
651
+ actions = extract_actions_from_feedback(result, source="Linky")
652
+ print(f"[DEBUG] Actions Extracted: {len(actions)}")
653
+ memo_data.extend(actions)
654
+
655
+ # Step 6: Add generic Linky nudges if missing
656
+ existing_texts = [entry["text"] for entry in memo_data]
657
+ linky_nudges = [
658
+ {"type": "Action", "text": "🧩 Create a free Linktree to unify your portfolio links", "source": "Linky"},
659
+ {"type": "Action", "text": "🀝 Reach out for 1\u20132 professional recommendations on LinkedIn", "source": "Linky"},
660
+ {"type": "Action", "text": "🌿 Add a volunteering experience \u2014 even academic or event-based", "source": "Linky"},
661
+ {"type": "Action", "text": "πŸ“Š Review your LinkedIn Analytics this month", "source": "Linky"},
662
+ ]
663
+ for nudge in linky_nudges:
664
+ if nudge["text"] not in existing_texts:
665
+ memo_data.append(nudge)
666
+
667
+ return result
668
+
669
+ except Exception as e:
670
+ print(f"[ERROR] Exception during LinkedIn analysis: {e}")
671
+ return f"❌ Internal error: {e}"
672
+
673
+
674
+
675
+
676
+
677
+ #==================================================== GIT HUB HERE=====================================================
678
+ def analyze_github(readme_text=None):
679
+ """Provides GitHub README improvement checklist and personalized tips"""
680
+ if not readme_text or not readme_text.strip():
681
+ return "⚠️ Please paste your GitHub README content above."
682
+
683
+ # Clean and lowercase for analysis
684
+ text = readme_text.strip().lower()
685
+
686
+ tips = ["### πŸ—‚ GitHub README Optimization Tips"]
687
+
688
+ # Required Section Checks
689
+ if "hi there" in text and "hello"in text:
690
+ tips.append("- 🟑 Add a warm **intro greeting**. Sets the tone!")
691
+ if "skills" not in text:
692
+ tips.append("- ⚠️ Add a **Skills & Technologies** section to highlight your toolset.")
693
+ if "experience" not in text and "projects" not in text:
694
+ tips.append("- ❌ You\u2019re missing your **experience/projects** \u2014 showcase at least 1!")
695
+ if "collaborations" not in text and "open to" not in text:
696
+ tips.append("- 🟑 Mention you\u2019re open to **collaborations or freelance**.")
697
+ if "badge" not in text and "shields.io" not in text:
698
+ tips.append("- 🟨 Add some **GitHub badges** (license, language, build status).")
699
+
700
+ # Bonus Points
701
+ if "banner" in text or "header" in text:
702
+ tips.append("- βœ… Good job adding a visual **banner** to brand your README.")
703
+ if "cupid" in text or "dino" in text:
704
+ tips.append("- βœ… Project-specific highlights detected. Great work linking real repos!")
705
+ if "streamlit" in text or "gradio" in text:
706
+ tips.append("- βœ… Noticed interactive tools mentioned \u2014 excellent!")
707
+
708
+ tips.append("\\n---\\nβœ… You can also [check out Isham\\'s GitHub](https://github.com/di37) as a solid reference for advanced formatting, badge use, and depth.")
709
+
710
+ return clean_text("\\n".join(tips))
711
+
712
+
modules/rag.py ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from pathlib import Path
4
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
5
+ from langchain_community.embeddings import HuggingFaceEmbeddings
6
+ from transformers import pipeline
7
+ from utils.data_helpers import smart_label_converter, clean_text
8
+ from utils.api_clients import pine_index, client, TAVILY_API_KEY, OPENAI_API_KEY
9
+
10
+ # ==== Load RAG Vector Index ====
11
+ def load_docs():
12
+ try:
13
+ docs = SimpleDirectoryReader("data/memo").load_data()
14
+ return VectorStoreIndex.from_documents(docs).as_query_engine()
15
+ except Exception as e:
16
+ print("❌ Error loading RAG docs:", e)
17
+ return None
18
+
19
+ memo_rag_engine = load_docs()
20
+
21
+
22
+
23
+ # ==== Ingest Popular Courses from Class Central ====
24
+ def batch_ingest_from_classcentral():
25
+ course_descriptions = [
26
+ "CS50’s Introduction to Computer Science from Harvard University",
27
+ "Google Data Analytics from Google",
28
+ "Neural Networks and Deep Learning from DeepLearning.AI",
29
+ "Python for Everybody from University of Michigan",
30
+ "Introduction to Psychology from Yale University",
31
+ "Foundations of User Experience (UX) Design from Google",
32
+ "Financial Markets from Yale University",
33
+ "Introduction to Data Science in Python from University of Michigan",
34
+ "AI For Everyone from DeepLearning.AI",
35
+ "Introduction to HTML5 from University of Michigan"
36
+ ]
37
+ for title in course_descriptions:
38
+ try:
39
+ response = requests.post("https://api.tavily.com/search", json={
40
+ "api_key": TAVILY_API_KEY,
41
+ "query": title,
42
+ "include_answer": True
43
+ }, timeout=15)
44
+ response.raise_for_status()
45
+ answer = response.json().get("answer", "")
46
+ if not answer:
47
+ continue
48
+ # Assuming summarizer is initialized elsewhere or passed as an argument
49
+ # summary = summarizer(f"Summarize this course for roadmap purposes:\n{answer}", max_new_tokens=300)[0]["generated_text"]
50
+ goal = title.split(" from ")[0].strip().lower().replace(" ", "_")
51
+ save_to_rag(goal, answer) # + "\n\n---\n" + summary)
52
+
53
+ print(f"βœ… Ingested: {title}")
54
+ except Exception as e:
55
+ print(f"❌ Failed to ingest {title}: {e}")
56
+
57
+
58
+ # ==== Save Tavilly Result to RAG ====
59
+ def save_to_rag(goal, content):
60
+ goal_slug = goal.lower().replace(" ", "_")
61
+ path = Path(f"data/memo/{goal_slug}_tavilly.txt")
62
+ path.parent.mkdir(parents=True, exist_ok=True)
63
+ path.write_text(content)
64
+ print(f"πŸ“„ Saved to: {path}")
65
+
66
+ # ==== RAG from Memo ====
67
+ def call_rag(goal):
68
+ # Load saved content
69
+ path = Path(f"data/memo/{goal.lower().replace(" ", "_")}_tavilly.txt")
70
+ if not path.exists():
71
+ return "❌ No memory found for this goal yet. Try running Tavilly first."
72
+
73
+ # Example fixed formula-based roadmap
74
+ base_plan = f"""
75
+ ## πŸ“… 4-Week Roadmap for Becoming a {goal.title()}
76
+ ### πŸŽ“ Step 1: Choose a Top-Rated Course
77
+ - Search for a course on Coursera, edX, or Class Central.
78
+ - Prefer those with β˜…β˜…β˜…β˜…β˜† or β˜…β˜…β˜…β˜…β˜….
79
+ - Example: Google {goal.title()} Certificate.
80
+ ### πŸ’° Step 2: Check Accessibility
81
+ - βœ… Can you audit it for free?
82
+ - πŸ’³ Can you afford a paid certificate?
83
+ - πŸŽ“ See if your university provides access.
84
+ ### 🧠 Step 3: Weekly Breakdown
85
+ - **Week 1–3**: Complete 75% of the course.
86
+ - **Week 4**: Build a project related to the course topic.
87
+ - Example: For UX β†’ Design a landing page wireframe
88
+ - For Data β†’ Create a dashboard in Google Sheets or Tableau
89
+ ### πŸ“Œ Tip:
90
+ Document your work in Notion or a public portfolio. Practice explaining your learnings.
91
+ ---
92
+ πŸ“š Course inspiration: https://www.classcentral.com/report/most-popular-online-courses/
93
+ """
94
+ return clean_text(base_plan)
95
+
96
+
97
+ def save_to_memory(user_id, goal, summary, steps, courses):
98
+ try:
99
+ from datetime import datetime
100
+ text_blob = f"Goal: {goal}\nSummary: {summary}\nSteps: {" | ".join(steps)}\nCourses: {" | ".join([c[0] for c in courses])}"
101
+ embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
102
+ embedding = embed_model.embed_query(text_blob)
103
+ metadata = {
104
+ "user_id": user_id,
105
+ "goal": goal,
106
+ "summary": summary,
107
+ "steps": steps,
108
+ "courses": [f"{c[0]} | {c[1]}" for c in courses],
109
+ "timestamp": datetime.utcnow().isoformat()
110
+ }
111
+ pine_index.upsert([(user_id + ":" + goal.replace(" ", "_"), embedding, metadata)])
112
+ return True
113
+ except Exception as e:
114
+ print(f"❌ Failed to save memory: {e}")
115
+ return False
116
+
117
+ def recall_from_memory(user_id, goal):
118
+ try:
119
+ query = user_id + ":" + goal.replace(" ", "_")
120
+ result = pine_index.fetch([query]) # βœ… returns a FetchResponse object
121
+
122
+ if query not in result.vectors:
123
+ return "❌ No saved plan found for this goal."
124
+
125
+ metadata = result.vectors[query].get("metadata", {})
126
+ steps = metadata.get("steps", [])
127
+ steps = [smart_label_converter(s) for s in steps if isinstance(s, str) and len(s.strip()) > 1]
128
+ summary = metadata.get("summary", "")
129
+ courses = metadata.get("courses", [])
130
+ course_section = ""
131
+
132
+ # Assuming render_text_roadmap is defined elsewhere or passed as an argument
133
+ # diagram = render_text_roadmap(goal, steps)
134
+ diagram = ""
135
+
136
+ if courses:
137
+ course_section = "\n\n### πŸ“š Recommended Courses\n" + "\n".join([f"- [{c["name"]}]({c["url"]})" for c in courses if 'name' in c and 'url' in c])
138
+
139
+ return f"""### πŸ” Recalled Plan for {goal}
140
+
141
+ {diagram}
142
+
143
+ {summary}{course_section}
144
+
145
+ **πŸ—“ Book your weekly study check-in:** [Click here]({CALENDLY_LINK})
146
+ """
147
+ except Exception as e:
148
+ return f"❌ Error recalling memory: {e}"
149
+
150
+
151
+ # ==== Tavilly + Summary + Course Suggestion ====
152
+ # UI wiring reminder:
153
+ # rag_button.click(fn=call_tavilly_rag, inputs=career_goal, outputs=rag_output)
154
+ # Ensure gr.Markdown() is assigned to rag_output
155
+ def call_tavilly_rag(user_id, goal):
156
+ # completed_tasks.clear() # This should be handled in app.py
157
+
158
+ if not TAVILY_API_KEY:
159
+ return "❌ Tavilly API key not found.", "", []
160
+
161
+ try:
162
+ headers = {"Authorization": TAVILY_API_KEY}
163
+ payload = {
164
+ "query": f"{goal} career weekly roadmap",
165
+ "search_depth": "advanced",
166
+ "include_answer": True
167
+ }
168
+ response = requests.post("https://api.tavily.com/search", headers=headers, json=payload, timeout=15)
169
+ response.raise_for_status()
170
+ result = response.json()
171
+ web_content = result.get("answer", "")
172
+ if len(web_content.split()) < 100:
173
+ web_content += "\n\nSuggested steps: Learn Figma, build portfolio, network, and apply for internships."
174
+ except Exception as e:
175
+ import traceback
176
+ traceback.print_exc()
177
+ return f"❌ Tavilly error: {str(e)}", "", []
178
+
179
+
180
+ try:
181
+ # Get 6 week short roadmap steps
182
+ messages = [
183
+ {"role": "system", "content": "Create a personalized 6-step weekly career roadmap. The roadmap should be goal-focused and iterative β€” each step should build upon the previous one. Encourage the user to start by selecting a course from the recommended list, then move toward applying that knowledge through projects, certifications, or content creation. End the roadmap by demonstrating expertise (e.g., GitHub repo, portfolio update, mock interview). Each step should be 1–2 sentences and mention a clear action, resource, and milestone outcome."}
184
+ ]
185
+
186
+ # client = openai.OpenAI() # client should be imported
187
+ res = client.chat.completions.create(model="gpt-4o", messages=messages, max_tokens=300, temperature=0.5)
188
+ response_text = res.choices[0].message.content
189
+ raw_steps = response_text.split("\n")
190
+
191
+ steps = [s.strip("* ").strip() for s in raw_steps if s.strip() and not s.strip().lower().startswith("**week")]
192
+ steps = [smart_label_converter(s) for s in steps if isinstance(s, str) and len(s.strip()) > 1]
193
+
194
+
195
+ if not steps:
196
+ print("⚠️ No valid steps found from LLM β€” using fallback tasks. Tavilly can't find it, maybe RAG can-?")
197
+ steps = [
198
+ "Action: Find a course or learn the skill by hand. Head to Memo with your research.",
199
+ "Resource: Watch the first 2 modules.",
200
+ "Milestone: Create a short reflection post on what you learned."
201
+ ]
202
+
203
+ # diagram = render_text_roadmap(goal, steps) # This should be handled in app.py
204
+ diagram = ""
205
+
206
+ # Summarize with FLAN-T5
207
+ # prompt = f"Create a weekly roadmap for someone becoming a {goal}. Use:\n{web_content}"
208
+ # summary = summarizer(prompt, max_new_tokens=300, do_sample=False)[0]["generated_text"]
209
+ summary = ""
210
+
211
+ # goal_key = goal.lower().strip()
212
+ # courses = course_suggestions.get(goal_key, []) # This should be handled in app.py
213
+ courses = []
214
+ course_section = "" # + "\n\n### πŸ“š Recommended Courses\n" + "\n".join([f"- [{name}]({url})" for name, url in courses]) if courses else ""
215
+
216
+ save_to_memory(user_id, goal, summary, steps, courses)
217
+
218
+ return f"""
219
+ ### 🧠 Weekly Plan for {goal}
220
+
221
+ ```
222
+ {diagram}
223
+ ```
224
+
225
+ {summary}{course_section}
226
+
227
+ **πŸ—“ Do your study check-ins yourself and weekly follow up in the Memo tab.**
228
+
229
+ """, "", steps
230
+
231
+ except Exception as e:
232
+ print(f"❌ GPT-4o fallback failed: {e}")
233
+ fallback_steps = [
234
+ "Action: Search YouTube or Coursera for a beginner course.",
235
+ "Resource: Choose any free learning platform.",
236
+ "Milestone: Finish one hour of learning and reflect."
237
+ ]
238
+ # diagram = render_text_roadmap(goal, fallback_steps) # This should be handled in app.py
239
+ diagram = ""
240
+ fallback_summary = "This is a basic roadmap you can follow to get started until dynamic generation is fixed."
241
+
242
+ return f"""
243
+ ### 🧠 Starter Plan for {goal}
244
+
245
+ ```
246
+ {diagram}
247
+ ```
248
+
249
+ {fallback_summary}
250
+
251
+ **πŸ—“ Do your study check-ins yourself and weekly follow up in the Memo tab.**
252
+ """, "", fallback_steps
253
+
254
+ def run_gpt_fallback(goal):
255
+ messages = [
256
+ {
257
+ "role": "system",
258
+ "content": (
259
+ "Create a personalized 6-step weekly career roadmap for becoming a "
260
+ f"{goal}. The roadmap should be goal-focused and iterative β€” each step "
261
+ "should build upon the previous one. Encourage the user to start with a course, then move toward applying that knowledge. "
262
+ "Each step must include an action, a resource, and a milestone."
263
+ )
264
+ }
265
+ ]
266
+
267
+ # client = openai.OpenAI() # client should be imported
268
+ res = client.chat.completions.create(
269
+ model="gpt-4o",
270
+ messages=messages,
271
+ max_tokens=300,
272
+ temperature=0.5
273
+ )
274
+
275
+ response_text = res.choices[0].message.content
276
+ raw_steps = response_text.split("\n")
277
+ steps = [s.strip("* ").strip() for s in raw_steps if s.strip() and not s.strip().lower().startswith("**week")]
278
+ steps = [smart_label_converter(s) for s in steps if isinstance(s, str) and len(s.strip()) > 1]
279
+
280
+ if not steps:
281
+ raise ValueError("GPT fallback returned no usable steps")
282
+
283
+ return steps
284
+
modules/task_management.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import defaultdict
2
+ import gradio as gr
3
+ import datetime
4
+ import random
5
+
6
+ from utils.constants import reward_pool, task_data, claimed_rewards, available_rewards, last_reset
7
+ from utils.data_helpers import clean_text
8
+
9
+ def display_tasks():
10
+ if not task_data:
11
+ return "No tasks yet."
12
+
13
+ # πŸ”§ Patch missing points from old task entries
14
+ for t in task_data:
15
+ if "Points" not in t:
16
+ if "πŸ”₯" in t.get("Difficulty", ""):
17
+ t["Points"] = 30
18
+ elif "πŸ”" in t.get("Difficulty", ""):
19
+ t["Points"] = 20
20
+ elif "🧊" in t.get("Difficulty", ""):
21
+ t["Points"] = 10
22
+ else:
23
+ t["Points"] = 15
24
+
25
+ # Group tasks by Milestone, Action, Resource
26
+ grouped = defaultdict(list)
27
+ for t in task_data:
28
+ if t["Task"].lower().startswith("milestone"):
29
+ grouped["Milestone"].append(t)
30
+ elif t["Task"].lower().startswith("resource"):
31
+ grouped["Resource"].append(t)
32
+ elif t["Task"].lower().startswith("action"):
33
+ grouped["Action"].append(t)
34
+ else:
35
+ grouped["Other"].append(t)
36
+
37
+ display = ""
38
+
39
+ # Emoji headers
40
+ emoji_map = {
41
+ "Milestone": "🎯 Milestones",
42
+ "Action": "πŸ”Ž Actions",
43
+ "Resource": "πŸŽ“ Resources",
44
+ "Other": "πŸ“Œ Other Tasks"
45
+ }
46
+
47
+
48
+ for group in ["Milestone", "Action", "Resource", "Other"]:
49
+ if grouped[group]:
50
+ display += f"\n### {emoji_map[group]}\n"
51
+ display += "| πŸ“ Task | ⏱ Duration | 🧱 Difficulty | πŸ”₯ Priority | πŸ’― Points |\n"
52
+ display += "|---------|-------------|---------------|-------------|-------------|\n"
53
+
54
+ for t in grouped[group]:
55
+ priority_emoji = {"Critical πŸ”΄": "πŸ”΄", "Important 🟠": "🟠", "Optional 🟒": "🟒"}
56
+ emoji = priority_emoji.get(t["Tag"], "")
57
+ tag_display = f"{emoji} {t["Tag"]}" if emoji else t["Tag"]
58
+ points = t.get("Points", 0)
59
+ display += f"| {t["Task"]} | {t["Duration"]} hr | {t["Difficulty"]}" \
60
+ f" | {tag_display} | {t["Points"]} |\n"
61
+
62
+ return display
63
+
64
+
65
+ def add_reward(new_reward):
66
+ if new_reward and new_reward not in reward_pool:
67
+ reward_pool.append(new_reward)
68
+ return gr.update(choices=reward_pool, value=reward_pool)
69
+
70
+ def calculate_progress(user_id, completed):
71
+ completed_count = len(completed)
72
+ total = len(task_data)
73
+ percent = int((completed_count / total) * 100) if total else 0
74
+ points = completed_count * 25
75
+ bar = f"[{\'β–ˆ\' * (percent // 10)}{\'-\' * (10 - percent // 10)}]"
76
+ global available_rewards
77
+ available_rewards = reward_pool if percent == 100 else reward_pool[:2] if percent >= 50 else reward_pool[:1]
78
+ return f"Progress: {bar} {percent}% Points: {points} / {total * 25}", completed, task_data
79
+
80
+ def claim_reward(completed, tasks):
81
+ if not available_rewards:
82
+ return gr.update(value="πŸ”’ No rewards unlocked yet.")
83
+
84
+ if len(claimed_rewards) >= 1:
85
+ return gr.update(value="β›” Already claimed reward this week.")
86
+
87
+ chosen = random.choice(available_rewards)
88
+ claimed_rewards.append(chosen)
89
+
90
+ return gr.update(value=f"""
91
+ <div style=\'border: 2px solid #FFD700; padding: 12px; background: #fff3cd; font-size: 18px; border-radius:10px;\'>
92
+ πŸŽ‰ <strong>Reward Unlocked!</strong><br><br>
93
+ <span style=\'font-size: 22px;\'>✨ You claimed: <strong>{chosen}</strong> 🎁</span><br><br>
94
+ Past Rewards: {" ".join(claimed_rewards)}
95
+ </div>
96
+ """)
97
+
98
+ def add_course_to_memo(course_title):
99
+ # reset_weekly_data() # This should be handled in app.py
100
+ task = f"Finish Week 1 of {course_title}"
101
+ for t in task_data:
102
+ if t["Task"].strip().lower() == task.strip().lower():
103
+ return "⚠️ Course task already added."
104
+ task_data.append({
105
+ "Task": task,
106
+ "Duration": 3,
107
+ "Difficulty": "Moderate",
108
+ "Tag": "Critical πŸ”΄"
109
+ })
110
+ return display_tasks()
111
+
utils/api_clients.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from openai import OpenAI
3
+ from pinecone import Pinecone
4
+
5
+
6
+ def initialize_api_clients():
7
+ pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
8
+ pine_index = pc.Index("career-buddy-memo")
9
+ APIFY_TOKEN = os.environ.get("APIFY_TOKEN")
10
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
11
+ TAVILY_API_KEY = os.getenv("TAVILY_API_KEY", "")
12
+ client = OpenAI(api_key=OPENAI_API_KEY)
13
+
14
+ return pc, pine_index, APIFY_TOKEN, OPENAI_API_KEY, TAVILY_API_KEY, client
15
+
16
+
17
+
18
+ def unload_model(model):
19
+ # This is a placeholder for actual model unloading logic
20
+ # In a real scenario, this would depend on the specific model and framework
21
+ print(f"[DEBUG] Unloading model: {model}")
22
+ del model
23
+
24
+
utils/constants.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TASK_DIFFICULTIES = ["Simple", "Moderate", "Challenging"]
2
+ TASK_TAGS = ["Critical πŸ”΄", "Important 🟠", "Optional 🟒"]
3
+ reward_pool = ["Ice Cream 🍦", "Watch Party 🎬", "Spa Day πŸ’†β€β™€οΈ"]
4
+ task_data, claimed_rewards, available_rewards = [], [], []
5
+ memo_data = []
6
+ completed_tasks = set()
7
+ completed_steps_box = set ()
8
+
9
+ visual_steps = []
10
+ last_reset = datetime.date.today()
11
+
utils/data_helpers.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import datetime
3
+ import difflib
4
+ from utils.constants import memo_data, task_data, completed_tasks
5
+
6
+ def smart_label_converter(step_label):
7
+ if step_label.lower().startswith("milestone:"):
8
+ return "🎯 Build: " + step_label[len("milestone:"):].strip()
9
+ elif step_label.lower().startswith("action:"):
10
+ return "πŸ”Ž Research: " + step_label[len("action:"):].strip()
11
+ elif step_label.lower().startswith("resource:"):
12
+ return "πŸŽ“ Learn: " + step_label[len("resource:"):].strip()
13
+ else:
14
+ return step_label
15
+
16
+ def extract_actions_from_feedback(feedback_text, source="AI"):
17
+ sentences = re.split(r'[.?!]\s+', feedback_text)
18
+ actions = []
19
+
20
+ for sentence in sentences:
21
+ lower = sentence.lower()
22
+ if any(kw in lower for kw in ["fix", "add", "update", "change", "optimize", "remove", "improve", "include", "enhance", "refactor"]):
23
+ cleaned = sentence.strip("β€’- ").strip().capitalize()
24
+ if cleaned:
25
+ actions.append({"type": "Action", "text": cleaned, "source": source})
26
+
27
+ return actions
28
+
29
+ def render_memo():
30
+ if not memo_data:
31
+ return "πŸ“­ No roadmap data yet."
32
+
33
+ grouped = {"Milestone": [], "Resource": [], "Action": []}
34
+ for item in memo_data:
35
+ grouped[item["type"]].append(item)
36
+
37
+ output = ""
38
+ if grouped["Milestone"]:
39
+ output += "### 🎯 Milestones\n"
40
+ for m in grouped["Milestone"]:
41
+ output += f"- {m['text']}\n"
42
+
43
+ if grouped["Resource"]:
44
+ output += "\n### πŸŽ“ Resources\n"
45
+ for r in grouped["Resource"]:
46
+ output += f"- {r['text']}\n"
47
+
48
+ if grouped["Action"]:
49
+ output += "\n### πŸ”Ž Actions\n"
50
+ for a in grouped["Action"]:
51
+ output += f"- {a['text']} \n πŸ‘‰ [Add to Tasks]\n"
52
+
53
+ return output
54
+
55
+ def convert_actions_to_tasks():
56
+ added = 0
57
+ for item in memo_data:
58
+ if item["type"] == "Action":
59
+ task_data.append({
60
+ "Task": item["text"],
61
+ "Duration": 1,
62
+ "Difficulty": "Simple",
63
+ "Tag": "Important 🟠",
64
+ "Points": 10
65
+ })
66
+ added += 1
67
+ # Assuming display_tasks() is a Gradio component update, it will be handled in app.py
68
+ return "Tasks converted!"
69
+
70
+ course_suggestions = {
71
+ "data analyst": [
72
+ ("Google Data Analytics Professional Certificate", "https://www.coursera.org/professional-certificates/google-data-analytics"),
73
+ ("IBM Data Analyst Professional Certificate", "https://www.coursera.org/professional-certificates/ibm-data-analyst"),
74
+ ("Introduction to Data Analytics by IBM", "https://www.coursera.org/learn/introduction-to-data-analytics"),
75
+ ("Excel Basics for Data Analysis by IBM", "https://www.coursera.org/learn/excel-basics-data-analysis"),
76
+ ("Data Analysis using Excel and Tableau by EntryLevel", "https://www.entrylevel.net/post/beginner-data-analysis-courses-by-platform-with-certificates")
77
+ ],
78
+ "ux designer": [
79
+ ("Google UX Design Professional Certificate", "https://www.coursera.org/professional-certificates/google-ux-design"),
80
+ ("Introduction to UI and UX Design by Codecademy", "https://www.codecademy.com/learn/intro-to-ui-ux"),
81
+ ("UX Design Institute's Introduction to UX Design", "https://www.uxdesigninstitute.com/blog/best-free-ux-design-courses-in-2022/"),
82
+ ("Introduction to User Experience Design by Georgia Tech", "https://www.coursera.org/learn/user-experience-design"),
83
+ ("CareerFoundry UX Design Program", "https://careerfoundry.com/en/blog/ux-design/ux-design-course-online/")
84
+ ],
85
+ "software engineer": [
86
+ ("Introduction to Software Engineering by IBM", "https://www.coursera.org/learn/introduction-to-software-engineering"),
87
+ ("Python for Everybody Specialization by University of Michigan", "https://www.coursera.org/specializations/python"),
88
+ ("Full-Stack Engineer Career Path by Codecademy", "https://www.codecademy.com/learn/paths/full-stack-engineer-career-path"),
89
+ ("Software Engineering for Beginners by Udemy", "https://www.udemy.com/course/software-engineering-for-beginners/"),
90
+ ("Software Engineering Bootcamp by TripleTen", "https://tripleten.com/software-engineer/")
91
+ ],
92
+ "digital marketing": [
93
+ ("Fundamentals of Digital Marketing by Google Digital Garage", "https://learndigital.withgoogle.com/digitalgarage/course/digital-marketing"),
94
+ ("Digital Marketing Specialization by Coursera", "https://www.coursera.org/specializations/digital-marketing"),
95
+ ("The Complete Digital Marketing Course by Udemy", "https://www.udemy.com/course/learn-digital-marketing-course/"),
96
+ ("Digital Marketing Fundamentals by University of Edinburgh on edX", "https://www.edx.org/course/digital-marketing-fundamentals"),
97
+ ("Digital Marketing Course by CareerFoundry", "https://careerfoundry.com/en/blog/digital-marketing/online-digital-marketing-courses/")
98
+ ],
99
+ "project manager": [
100
+ ("Google Project Management Professional Certificate", "https://www.coursera.org/professional-certificates/google-project-management"),
101
+ ("Foundations of Project Management by Coursera", "https://www.coursera.org/learn/project-management-foundations"),
102
+ ("Project Management Basics by PMI", "https://www.pmi.org/learning/free-online-courses"),
103
+ ("Introduction to Project Management by University of Adelaide on edX", "https://www.edx.org/course/introduction-to-project-management"),
104
+ ("Project Management Principles and Practices Specialization by Coursera", "https://www.coursera.org/specializations/project-management")
105
+ ]
106
+ }
107
+
108
+ def get_courses_for_goal(goal_key):
109
+ if goal_key not in course_suggestions:
110
+ match = difflib.get_close_matches(goal_key, course_suggestions.keys(), n=1, cutoff=0.6)
111
+ if match:
112
+ goal_key = match[0]
113
+ return course_suggestions.get(goal_key, [])
114
+
115
+ class RoadmapUnlockManager:
116
+ def __init__(self):
117
+ self.weekly_steps = {}
118
+ self.current_week = "Week 1"
119
+ self.completed_tasks = set()
120
+
121
+ def load_steps(self, steps: list[str]):
122
+ self.weekly_steps = {}
123
+ current_label = None
124
+
125
+ for step in steps:
126
+ stripped = step.strip().strip("*")
127
+ if stripped.lower().startswith("week"):
128
+ current_label = stripped.split(":")[0].strip()
129
+ self.weekly_steps[current_label] = []
130
+ elif current_label:
131
+ self.weekly_steps[current_label].append(stripped)
132
+
133
+ self.current_week = list(self.weekly_steps.keys())[0] if self.weekly_steps else "Week 1"
134
+ self.completed_tasks.clear()
135
+
136
+ def get_current_choices(self):
137
+ return [
138
+ s for s in self.weekly_steps.get(self.current_week, [])
139
+ if not s.lower().startswith("week") and not s.startswith("**")
140
+ ]
141
+
142
+ def get_current_week_title(self):
143
+ return f"**πŸ“… Current Focus: {self.current_week}**"
144
+
145
+ def update_completion(self, selected):
146
+ self.completed_tasks.update(selected)
147
+ all_current = set(self.get_current_choices())
148
+ if all_current.issubset(self.completed_tasks):
149
+ return self._unlock_next_week()
150
+ return f"βœ… Progress: {len(self.completed_tasks)}/{len(all_current)}"
151
+
152
+ def _unlock_next_week(self):
153
+ weeks = list(self.weekly_steps.keys())
154
+ current_index = weeks.index(self.current_week)
155
+ if current_index + 1 < len(weeks):
156
+ self.current_week = weeks[current_index + 1]
157
+ self.completed_tasks.clear()
158
+ return f"πŸŽ‰ All tasks done! Unlocked: {self.current_week}"
159
+ return "βœ… All weeks completed!"
160
+
161
+ def greet_user(uid, goal):
162
+ feedback = f"βœ… Welcome back, **{uid}**!"
163
+ # Assuming recall_from_memory will be moved to memo.py or a separate data handling module
164
+ # For now, keep it as a placeholder or import if already moved
165
+ # recalled = recall_from_memory(uid, goal)
166
+ return feedback #, recalled
167
+
168
+ def clean_text(text):
169
+ if not isinstance(text, str):
170
+ return ""
171
+ text = text.encode('utf-8', 'ignore').decode('utf-8', 'ignore')
172
+ text = re.sub(r'[^\x00-\x7F]+', '', text)
173
+ return text.strip()
174
+
utils/summarizer.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+
3
+ def initialize_summarizer():
4
+ return pipeline("summarization", model="facebook/bart-large-cnn")
5
+