2sls / app_archive3.py
pvaluedotone's picture
Rename app.py to app_archive3.py
37deadf verified
import gradio as gr
import pandas as pd
import statsmodels.formula.api as smf
from linearmodels.iv import IV2SLS
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
global df
def process_file(file):
global df
df = pd.read_csv(file.name)
columns = df.columns.tolist()
return gr.update(choices=columns, value=columns), gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns)
def run_2sls(dependent_var, endogenous_vars, instruments, exogenous_vars):
if not all([dependent_var, endogenous_vars, instruments]):
return "Error: Please select all required variables."
endogenous_vars = list(endogenous_vars) if isinstance(endogenous_vars, list) else [endogenous_vars]
instruments = list(instruments) if isinstance(instruments, list) else [instruments]
exogenous_vars = list(exogenous_vars) if exogenous_vars else []
if len(instruments) < len(endogenous_vars):
return "Error: The number of instruments must be at least equal to the number of endogenous variables."
try:
df_selected = df[[dependent_var] + endogenous_vars + instruments + exogenous_vars].dropna()
# First stage
predicted_vars = []
for var in endogenous_vars:
first_stage_formula = f'{var} ~ ' + ' + '.join(instruments + exogenous_vars)
first_stage = smf.ols(first_stage_formula, data=df_selected).fit()
df_selected[f'{var}_hat'] = first_stage.fittedvalues
predicted_vars.append(f'{var}_hat')
# Second stage
second_stage_formula = f'{dependent_var} ~ ' + ' + '.join(predicted_vars + exogenous_vars)
second_stage = smf.ols(second_stage_formula, data=df_selected).fit()
return second_stage.summary().as_text()
except Exception as e:
return f"Error: {str(e)}"
with gr.Blocks() as app:
gr.Markdown("## Two-Stage Least Squares Regression (2SLS)")
file_input = gr.File(label="Upload CSV File")
with gr.Row():
with gr.Column():
available_columns = gr.Dropdown(label="Available Columns", choices=[], multiselect=True)
with gr.Column():
dependent_var = gr.Dropdown(label="Dependent Variable", choices=[])
endogenous_vars = gr.Dropdown(label="Endogenous Variables", choices=[], multiselect=True)
instruments = gr.Dropdown(label="Instruments", choices=[], multiselect=True)
exogenous_vars = gr.Dropdown(label="Exogenous Variables (optional)", choices=[], multiselect=True)
run_button = gr.Button("Run 2SLS Regression")
output = gr.Textbox(label="Regression Output", lines=20)
file_input.change(process_file, inputs=file_input, outputs=[available_columns, dependent_var, endogenous_vars, instruments, exogenous_vars])
run_button.click(run_2sls, inputs=[dependent_var, endogenous_vars, instruments, exogenous_vars], outputs=output)
app.launch()