Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import statsmodels.formula.api as smf
|
4 |
+
from linearmodels.iv import IV2SLS
|
5 |
+
from scipy import stats
|
6 |
+
import warnings
|
7 |
+
|
8 |
+
warnings.simplefilter(action='ignore', category=FutureWarning)
|
9 |
+
|
10 |
+
global df
|
11 |
+
|
12 |
+
def process_file(file):
|
13 |
+
global df
|
14 |
+
df = pd.read_csv(file.name)
|
15 |
+
columns = df.columns.tolist()
|
16 |
+
return gr.update(choices=columns, value=columns), gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns)
|
17 |
+
|
18 |
+
def run_2sls(dependent_var, endogenous_vars, instruments, exogenous_vars):
|
19 |
+
if not all([dependent_var, endogenous_vars, instruments]):
|
20 |
+
return "Error: Please select all required variables."
|
21 |
+
|
22 |
+
endogenous_vars = list(endogenous_vars) if isinstance(endogenous_vars, list) else [endogenous_vars]
|
23 |
+
instruments = list(instruments) if isinstance(instruments, list) else [instruments]
|
24 |
+
exogenous_vars = list(exogenous_vars) if exogenous_vars else []
|
25 |
+
|
26 |
+
if len(instruments) < len(endogenous_vars):
|
27 |
+
return "Error: The number of instruments must be at least equal to the number of endogenous variables."
|
28 |
+
|
29 |
+
try:
|
30 |
+
df_selected = df[[dependent_var] + endogenous_vars + instruments + exogenous_vars].dropna()
|
31 |
+
|
32 |
+
# First stage
|
33 |
+
predicted_vars = []
|
34 |
+
for var in endogenous_vars:
|
35 |
+
first_stage_formula = f'{var} ~ ' + ' + '.join(instruments + exogenous_vars)
|
36 |
+
first_stage = smf.ols(first_stage_formula, data=df_selected).fit()
|
37 |
+
df_selected[f'{var}_hat'] = first_stage.fittedvalues
|
38 |
+
predicted_vars.append(f'{var}_hat')
|
39 |
+
|
40 |
+
# Second stage
|
41 |
+
second_stage_formula = f'{dependent_var} ~ ' + ' + '.join(predicted_vars + exogenous_vars)
|
42 |
+
second_stage = smf.ols(second_stage_formula, data=df_selected).fit()
|
43 |
+
|
44 |
+
# Hausman Test
|
45 |
+
ols_formula = f'{dependent_var} ~ ' + ' + '.join(endogenous_vars + exogenous_vars)
|
46 |
+
ols_model = smf.ols(ols_formula, data=df_selected).fit()
|
47 |
+
residuals = ols_model.resid
|
48 |
+
endogeneity_test_formula = 'residuals ~ ' + ' + '.join(predicted_vars)
|
49 |
+
endogeneity_test = smf.ols(endogeneity_test_formula, data=df_selected.assign(residuals=residuals)).fit()
|
50 |
+
hausman_p_value = endogeneity_test.pvalues.iloc[1]
|
51 |
+
hausman_stat = endogeneity_test.fvalue
|
52 |
+
|
53 |
+
# Weak Instrument Test
|
54 |
+
weak_instrument_results = ""
|
55 |
+
for var in endogenous_vars:
|
56 |
+
first_stage = smf.ols(f'{var} ~ ' + ' + '.join(instruments + exogenous_vars), data=df_selected).fit()
|
57 |
+
f_stat = first_stage.fvalue
|
58 |
+
weak_instrument_results += f"\nWeak instrument test for {var}: First-stage F-statistic = {f_stat:.5f}"
|
59 |
+
|
60 |
+
# Sargan Test
|
61 |
+
formula = f'{dependent_var} ~ 1 + ' + ' + '.join(exogenous_vars) + ' + [' + ' + '.join(endogenous_vars) + ' ~ ' + ' + '.join(instruments) + ']'
|
62 |
+
iv_model = IV2SLS.from_formula(formula, data=df_selected).fit()
|
63 |
+
df_selected['iv_resid'] = iv_model.resids
|
64 |
+
sargan_formula = 'iv_resid ~ ' + ' + '.join(instruments + exogenous_vars)
|
65 |
+
sargan_test = smf.ols(sargan_formula, data=df_selected).fit()
|
66 |
+
sargan_stat = len(df_selected) * sargan_test.rsquared
|
67 |
+
sargan_p_value = 1 - stats.chi2.cdf(sargan_stat, df=len(instruments) - len(endogenous_vars))
|
68 |
+
|
69 |
+
results = f"2SLS regression results:\n{second_stage.summary()}\n\n"
|
70 |
+
results += f"Hausman test:\nChi-square(1) = {hausman_stat:.5f}, p-value = {hausman_p_value:.5f}\n"
|
71 |
+
results += weak_instrument_results + "\n"
|
72 |
+
results += f"Sargan test:\nSargan statistic = {sargan_stat:.5f}, p-value = {sargan_p_value:.5f}\n"
|
73 |
+
|
74 |
+
return results
|
75 |
+
|
76 |
+
except Exception as e:
|
77 |
+
return f"Error: {str(e)}"
|
78 |
+
|
79 |
+
with gr.Blocks() as app:
|
80 |
+
gr.Markdown("## Two-Stage Least Squares Regression (2SLS)")
|
81 |
+
|
82 |
+
file_input = gr.File(label="Upload CSV File")
|
83 |
+
|
84 |
+
with gr.Row():
|
85 |
+
with gr.Column():
|
86 |
+
available_columns = gr.Dropdown(label="Available Columns", choices=[], multiselect=True)
|
87 |
+
|
88 |
+
with gr.Column():
|
89 |
+
dependent_var = gr.Dropdown(label="Dependent Variable", choices=[])
|
90 |
+
endogenous_vars = gr.Dropdown(label="Endogenous Variables", choices=[], multiselect=True)
|
91 |
+
instruments = gr.Dropdown(label="Instruments", choices=[], multiselect=True)
|
92 |
+
exogenous_vars = gr.Dropdown(label="Exogenous Variables (optional)", choices=[], multiselect=True)
|
93 |
+
|
94 |
+
run_button = gr.Button("Run 2SLS Regression")
|
95 |
+
output = gr.Textbox(label="Regression Output", lines=20)
|
96 |
+
|
97 |
+
file_input.change(process_file, inputs=file_input, outputs=[available_columns, dependent_var, endogenous_vars, instruments, exogenous_vars])
|
98 |
+
run_button.click(run_2sls, inputs=[dependent_var, endogenous_vars, instruments, exogenous_vars], outputs=output)
|
99 |
+
|
100 |
+
app.launch()
|