""" For HF, the interface should be called app.py """ import json import concurrent.futures import streamlit as st from utils.process_doc import parse_docx, parse_pdf from backend import process_cv, process_job_posting from utils.gpt import test_api_key st.set_page_config(layout="wide") with st.sidebar: COHERE_API_KEY = st.text_input( "Cohere API Key Entry", value="", placeholder="Enter your Free Tier Cohere API Key", ) if "state" not in st.session_state: st.session_state.state = {"formatted": False} STATE = st.session_state.state cv_upload_box = st.file_uploader( "CV Upload Box", help="Upload your CV in .docx or .pdf form. This CV will be parsed, and used to analyse against the given job post.", type=["docx", "pdf"], accept_multiple_files=False, ) job_posting_upload_box = st.text_area( "Job Description Upload Box", placeholder="Copy and Paste a job post you are interested in. Make sure to include the full post! More information is better.", help="In this box, please dump text content for a job description you are interested in. This could easily be setup to work directly with a webpage (we'd simply need to scrape said page) however I do not want to do that on HF spaces.", ) if cv_upload_box and job_posting_upload_box != "": process_files = st.button("Process Files", type="primary") if process_files: if test_api_key(COHERE_API_KEY): # Process our two uploaded files into state variables STATE["job_posting"] = job_posting_upload_box cv_filetype = cv_upload_box.name.split(".")[-1] cv_file_contents = cv_upload_box.getvalue() STATE["cv"] = ( parse_docx(cv_file_contents) if cv_filetype == "docx" else parse_pdf(cv_file_contents) ) # Now, use Cohere to get structured output for both cv and job_posting # Making these calls in parallel with concurrent.futures.ThreadPoolExecutor() as executor: future1 = executor.submit(process_cv, STATE["cv"], COHERE_API_KEY) future2 = executor.submit( process_job_posting, STATE["job_posting"], COHERE_API_KEY ) cv_json_text = future1.result() job_posting_json_text = future2.result() cv_json_text = ( "{" + cv_json_text.lstrip().lstrip("{").rstrip().rstrip("}") + "}" ) job_posting_json_text = ( "{" + job_posting_json_text.lstrip().lstrip("{").rstrip().rstrip("}") + "}" ) try: STATE["cv_json"] = json.loads(cv_json_text) except json.JSONDecodeError as e: print( f"Error parsing JSON Output for CV: {e}. Response content: {cv_json_text}" ) STATE["cv_json"] = {"name": "Failed"} try: STATE["job_posting_json"] = json.loads(job_posting_json_text) except json.JSONDecodeError as e: print( f"Error parsing JSON Output for Job Posting: {e}. Response content: {job_posting_json_text}" ) STATE["job_posting_json"] = {"companyName": "Failed"} STATE["formatted"] = True else: st.error( "You entered an invalid Cohere API Key. Please enter a valid API key in the sidebar." ) # Now, we can work with this ! if STATE["formatted"]: lcol, rcol = st.columns((0.5, 0.5)) with lcol: st.download_button( label="Download Job Posting JSON", data=json.dumps(STATE["job_posting_json"], indent=4), file_name=f"job_posting_formatted_{STATE['job_posting_json']['companyName']}.json", mime="application/json", use_container_width=True, ) with rcol: st.download_button( label="Download CV JSON", data=json.dumps(STATE["cv_json"], indent=4), file_name=f"cv_formatted_{STATE['cv_json']['name']}.json", mime="application/json", use_container_width=True, ) cv_critique, practice_interview, general_cv_critique = st.tabs( ["Role Specific CV Critique", "Practice Interview", "General CV Critique"] )