import os import pandas as pd import streamlit as st from google.cloud import bigquery def preprocess_csv(file_path): # Load the CSV file df = pd.read_csv(file_path) # Define columns to be converted date_columns = ['Order_Date', 'State_Date', 'Entry_Month'] if 'bag_id_cn' in df.columns: df['bag_id_cn'] = df['bag_id_cn'].replace({'\..*': ''}, regex=True).astype('Int64') # Convert specified columns from DD/MM/YY to 'YYYY-MM-DD 00:00:00 UTC' for column in date_columns: if column in df.columns: df[column] = pd.to_datetime(df[column], format='%d/%m/%y', errors='coerce').dt.strftime('%Y-%m-%d 00:00:00 UTC') # Save the preprocessed CSV preprocessed_file_path = 'preprocessed_' + os.path.basename(file_path) df.to_csv(preprocessed_file_path, index=False) return preprocessed_file_path def upload_to_bigquery(credentials_path, csv_file_path, project_name, table_id): try: # Set up the BigQuery client os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path client = bigquery.Client(project=project_name) # Retrieve the existing table schema table = client.get_table(table_id) schema = table.schema # Prepare the BigQuery job configuration with the existing schema job_config = bigquery.LoadJobConfig( source_format=bigquery.SourceFormat.CSV, skip_leading_rows=1, # Assuming the first row is the header schema=schema, # Use the existing schema from BigQuery write_disposition=bigquery.WriteDisposition.WRITE_APPEND # Append to the table ) # Load the preprocessed CSV data into BigQuery with open(csv_file_path, "rb") as file: load_job = client.load_table_from_file( file, table_id, job_config=job_config ) # Wait for the load job to complete load_job.result() # Check the result destination_table = client.get_table(table_id) st.success(f"Loaded {destination_table.num_rows} rows into {table_id}.") except Exception as e: st.error(f"An error occurred: {str(e)}") html_subject = """

Enter project name

""" st.markdown(html_subject, unsafe_allow_html=True) # Input fields for Project Name and Table ID project_name = st.text_input("",key ="project_name") html_subject = """

Enter table id

""" st.markdown(html_subject, unsafe_allow_html=True) table_id = st.text_input("", key= "table_id_name") # File uploader for credentials.json html_subject = """

Upload credentials file

""" st.markdown(html_subject, unsafe_allow_html=True) credentials_file = st.file_uploader("", type=["json"]) # File uploader for the CSV file html_subject = """

Upload the CSV file

""" st.markdown(html_subject, unsafe_allow_html=True) uploaded_file = st.file_uploader("", type=["csv"]) # Submit button button_styles = """ """ st.markdown(button_styles, unsafe_allow_html=True) if st.button("Submit"): if uploaded_file is not None and credentials_file is not None and project_name and table_id: # Save the uploaded files to the current directory credentials_path = "./credentials.json" with open(credentials_path, "wb") as f: f.write(credentials_file.getbuffer()) csv_file_path = f"./{uploaded_file.name}" with open(csv_file_path, "wb") as f: f.write(uploaded_file.getbuffer()) # Preprocess the CSV file (convert date formats) preprocessed_file_path = preprocess_csv(csv_file_path) # Upload the preprocessed CSV file to BigQuery upload_to_bigquery(credentials_path, preprocessed_file_path, project_name, table_id) else: st.error("Please fill out all fields, including the credentials file and CSV file.")