Spaces:

siddhartharya
/

email_writing_ai_agent

Sleeping

App Files Files Community

siddhartharya commited on Oct 16, 2024

Commit

2f50c94

verified ·

1 Parent(s): ebcf536

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -49

app.py CHANGED Viewed

@@ -1,11 +1,11 @@
-import gradio as gr
 import requests
 import os
-from bs4 import BeautifulSoup  # For scraping company and role info
 # Load API keys securely from environment variables
 proxycurl_api_key = os.getenv("PROXYCURL_API_KEY")  # Proxycurl API key
 groq_api_key = os.getenv("GROQ_CLOUD_API_KEY")  # Groq Cloud API key
 class EmailAgent:
     def __init__(self, linkedin_url, company_name, role, word_limit, user_name, email, phone, linkedin):
@@ -23,7 +23,7 @@ class EmailAgent:
         self.company_info = None
         self.role_description = None
-    # Reason: Decide what information is needed and if we need to take additional steps
     def reason_about_data(self):
         print("Reasoning: Deciding what data we need...")
         if not self.linkedin_url:
@@ -56,55 +56,29 @@ class EmailAgent:
                 self.skills = ["Adaptable", "Hardworking"]
                 self.experiences = ["Worked across various industries"]
-    # Action: Fetch company information via Proxycurl or use defaults
-    def fetch_company_info(self):
         if not self.company_name:
             print("Action: No company name provided, using default company info.")
             self.company_info = "A leading company in its field."
         else:
-            print(f"Action: Fetching company info for {self.company_name}.")
-            headers = {"Authorization": f"Bearer {proxycurl_api_key}"}
-            url = f"https://nubela.co/proxycurl/api/v2/linkedin/company?company_name={self.company_name}"
-            response = requests.get(url, headers=headers)
             if response.status_code == 200:
-                data = response.json()
-                self.company_info = data.get("description", "No detailed company info available.")
             else:
-                print(f"Error: Unable to fetch company info for {self.company_name}. Using default info.")
                 self.company_info = "A leading company in its field."
-    # Action: Scrape the company's website for role-specific information or use defaults
-    def scrape_role_from_website(self):
-        print(f"Action: Scraping role description from the company's website for {self.role}.")
-        if not self.company_name:
-            print("Error: No company name or URL provided for scraping.")
-            return False
-        # Try scraping the website for role descriptions
-        try:
-            response = requests.get(f"https://{self.company_name}.com/careers")
-            if response.status_code == 200:
-                soup = BeautifulSoup(response.text, 'html.parser')
-                role_descriptions = soup.find_all(string=lambda text: self.role.lower() in text.lower())
-                if role_descriptions:
-                    self.role_description = role_descriptions[0]
-                    print(f"Found role description: {self.role_description}")
-                    return True
-                else:
-                    print(f"No specific role description found on the website for {self.role}.")
-                    return False
-            else:
-                print(f"Error: Unable to reach company's website at {self.company_name}.com.")
-                return False
-        except Exception as e:
-            print(f"Error during scraping: {e}")
-            return False
-    # Action: Use default logic for role description if no role is available
-    def use_default_role_description(self):
-        print(f"Action: Using default logic for the role of {self.role}.")
-        self.role_description = f"The role of {self.role} at {self.company_name} involves leadership and management."
     # Reflection: Check if we have enough data to generate the email
     def reflect_on_data(self):
         print("Reflection: Do we have enough data?")
@@ -116,7 +90,7 @@ class EmailAgent:
     def generate_email(self):
         print("Action: Generating the email with the gathered information.")
-        # Updated and fully dynamic LLM prompt
         prompt = f"""
         Write a professional email applying for the {self.role} position at {self.company_name}.
@@ -161,10 +135,7 @@ class EmailAgent:
     def run(self):
         self.reason_about_data()  # Reasoning step
         self.fetch_linkedin_data()  # Fetch LinkedIn data
-        self.fetch_company_info()  # Fetch company data
-        # Scrape the company's website for role-specific information or use defaults
-        if not self.scrape_role_from_website():
-            self.use_default_role_description()
         # Reflect on whether the data is sufficient
         if self.reflect_on_data():
             return self.generate_email()  # Final action: generate email

 import requests
+import gradio as gr
 import os
 # Load API keys securely from environment variables
 proxycurl_api_key = os.getenv("PROXYCURL_API_KEY")  # Proxycurl API key
 groq_api_key = os.getenv("GROQ_CLOUD_API_KEY")  # Groq Cloud API key
+firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")  # Firecrawl API key
 class EmailAgent:
     def __init__(self, linkedin_url, company_name, role, word_limit, user_name, email, phone, linkedin):
         self.company_info = None
         self.role_description = None
+    # Reason: Decide what information is needed
     def reason_about_data(self):
         print("Reasoning: Deciding what data we need...")
         if not self.linkedin_url:
                 self.skills = ["Adaptable", "Hardworking"]
                 self.experiences = ["Worked across various industries"]
+    # Action: Fetch company information via Firecrawl API
+    def fetch_company_info_with_firecrawl(self):
         if not self.company_name:
             print("Action: No company name provided, using default company info.")
             self.company_info = "A leading company in its field."
         else:
+            print(f"Action: Fetching company info for {self.company_name} using Firecrawl.")
+            headers = {"Authorization": f"Bearer {firecrawl_api_key}"}
+            firecrawl_url = "https://api.firecrawl.dev/v1/scrape"
+            data = {
+                "url": f"https://{self.company_name}.com",
+                "patterns": ["description", "about", "careers", "company overview"]
+            }
+            response = requests.post(firecrawl_url, json=data, headers=headers)
             if response.status_code == 200:
+                firecrawl_data = response.json()
+                self.company_info = firecrawl_data.get("description", "No detailed company info available.")
+                print(f"Company info fetched: {self.company_info}")
             else:
+                print(f"Error: Unable to fetch company info via Firecrawl. Using default info.")
                 self.company_info = "A leading company in its field."
     # Reflection: Check if we have enough data to generate the email
     def reflect_on_data(self):
         print("Reflection: Do we have enough data?")
     def generate_email(self):
         print("Action: Generating the email with the gathered information.")
+        # Dynamic LLM prompt
         prompt = f"""
         Write a professional email applying for the {self.role} position at {self.company_name}.
     def run(self):
         self.reason_about_data()  # Reasoning step
         self.fetch_linkedin_data()  # Fetch LinkedIn data
+        self.fetch_company_info_with_firecrawl()  # Fetch company data using Firecrawl
         # Reflect on whether the data is sufficient
         if self.reflect_on_data():
             return self.generate_email()  # Final action: generate email