import gradio as gr import pandas as pd import statsmodels.formula.api as smf from linearmodels.iv import IV2SLS import warnings warnings.simplefilter(action='ignore', category=FutureWarning) def process_file(file): global df df = pd.read_csv(file.name) return df.columns.tolist() def run_2sls(dependent_var, endogenous_vars, instruments, exogenous_vars): if not all([dependent_var, endogenous_vars, instruments]): return "Error: Please select all required variables." endogenous_vars = endogenous_vars.split(",") instruments = instruments.split(",") exogenous_vars = exogenous_vars.split(",") if exogenous_vars else [] if len(instruments) < len(endogenous_vars): return "Error: The number of instruments must be at least equal to the number of endogenous variables." try: df_selected = df[[dependent_var] + endogenous_vars + instruments + exogenous_vars].dropna() # First stage predicted_vars = [] for var in endogenous_vars: first_stage_formula = f'{var} ~ ' + ' + '.join(instruments + exogenous_vars) first_stage = smf.ols(first_stage_formula, data=df_selected).fit() df_selected[f'{var}_hat'] = first_stage.fittedvalues predicted_vars.append(f'{var}_hat') # Second stage second_stage_formula = f'{dependent_var} ~ ' + ' + '.join(predicted_vars + exogenous_vars) second_stage = smf.ols(second_stage_formula, data=df_selected).fit() return second_stage.summary().as_text() except Exception as e: return f"Error: {str(e)}" with gr.Blocks() as app: gr.Markdown("## Two-Stage Least Squares Regression (2SLS)") file_input = gr.File(label="Upload CSV File") column_output = gr.Label(label="Available Columns") file_input.change(process_file, inputs=file_input, outputs=column_output) dependent_var = gr.Dropdown(label="Dependent Variable") endogenous_vars = gr.Textbox(label="Endogenous Variables (comma-separated)") instruments = gr.Textbox(label="Instruments (comma-separated)") exogenous_vars = gr.Textbox(label="Exogenous Variables (comma-separated, optional)") run_button = gr.Button("Run 2SLS Regression") output = gr.Textbox(label="Regression Output", lines=20) run_button.click(run_2sls, inputs=[dependent_var, endogenous_vars, instruments, exogenous_vars], outputs=output) app.launch()