pvaluedotone commited on
Commit
d332a2c
·
verified ·
1 Parent(s): 37deadf

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -0
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import statsmodels.formula.api as smf
4
+ from linearmodels.iv import IV2SLS
5
+ from scipy import stats
6
+ import warnings
7
+
8
+ warnings.simplefilter(action='ignore', category=FutureWarning)
9
+
10
+ global df
11
+
12
+ def process_file(file):
13
+ global df
14
+ df = pd.read_csv(file.name)
15
+ columns = df.columns.tolist()
16
+ return gr.update(choices=columns, value=columns), gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns)
17
+
18
+ def run_2sls(dependent_var, endogenous_vars, instruments, exogenous_vars):
19
+ if not all([dependent_var, endogenous_vars, instruments]):
20
+ return "Error: Please select all required variables."
21
+
22
+ endogenous_vars = list(endogenous_vars) if isinstance(endogenous_vars, list) else [endogenous_vars]
23
+ instruments = list(instruments) if isinstance(instruments, list) else [instruments]
24
+ exogenous_vars = list(exogenous_vars) if exogenous_vars else []
25
+
26
+ if len(instruments) < len(endogenous_vars):
27
+ return "Error: The number of instruments must be at least equal to the number of endogenous variables."
28
+
29
+ try:
30
+ df_selected = df[[dependent_var] + endogenous_vars + instruments + exogenous_vars].dropna()
31
+
32
+ # First stage
33
+ predicted_vars = []
34
+ for var in endogenous_vars:
35
+ first_stage_formula = f'{var} ~ ' + ' + '.join(instruments + exogenous_vars)
36
+ first_stage = smf.ols(first_stage_formula, data=df_selected).fit()
37
+ df_selected[f'{var}_hat'] = first_stage.fittedvalues
38
+ predicted_vars.append(f'{var}_hat')
39
+
40
+ # Second stage
41
+ second_stage_formula = f'{dependent_var} ~ ' + ' + '.join(predicted_vars + exogenous_vars)
42
+ second_stage = smf.ols(second_stage_formula, data=df_selected).fit()
43
+
44
+ # Hausman Test
45
+ ols_formula = f'{dependent_var} ~ ' + ' + '.join(endogenous_vars + exogenous_vars)
46
+ ols_model = smf.ols(ols_formula, data=df_selected).fit()
47
+ residuals = ols_model.resid
48
+ endogeneity_test_formula = 'residuals ~ ' + ' + '.join(predicted_vars)
49
+ endogeneity_test = smf.ols(endogeneity_test_formula, data=df_selected.assign(residuals=residuals)).fit()
50
+ hausman_p_value = endogeneity_test.pvalues.iloc[1]
51
+ hausman_stat = endogeneity_test.fvalue
52
+
53
+ # Weak Instrument Test
54
+ weak_instrument_results = ""
55
+ for var in endogenous_vars:
56
+ first_stage = smf.ols(f'{var} ~ ' + ' + '.join(instruments + exogenous_vars), data=df_selected).fit()
57
+ f_stat = first_stage.fvalue
58
+ weak_instrument_results += f"\nWeak instrument test for {var}: First-stage F-statistic = {f_stat:.5f}"
59
+
60
+ # Sargan Test
61
+ formula = f'{dependent_var} ~ 1 + ' + ' + '.join(exogenous_vars) + ' + [' + ' + '.join(endogenous_vars) + ' ~ ' + ' + '.join(instruments) + ']'
62
+ iv_model = IV2SLS.from_formula(formula, data=df_selected).fit()
63
+ df_selected['iv_resid'] = iv_model.resids
64
+ sargan_formula = 'iv_resid ~ ' + ' + '.join(instruments + exogenous_vars)
65
+ sargan_test = smf.ols(sargan_formula, data=df_selected).fit()
66
+ sargan_stat = len(df_selected) * sargan_test.rsquared
67
+ sargan_p_value = 1 - stats.chi2.cdf(sargan_stat, df=len(instruments) - len(endogenous_vars))
68
+
69
+ results = f"2SLS regression results:\n{second_stage.summary()}\n\n"
70
+ results += f"Hausman test:\nChi-square(1) = {hausman_stat:.5f}, p-value = {hausman_p_value:.5f}\n"
71
+ results += weak_instrument_results + "\n"
72
+ results += f"Sargan test:\nSargan statistic = {sargan_stat:.5f}, p-value = {sargan_p_value:.5f}\n"
73
+
74
+ return results
75
+
76
+ except Exception as e:
77
+ return f"Error: {str(e)}"
78
+
79
+ with gr.Blocks() as app:
80
+ gr.Markdown("## Two-Stage Least Squares Regression (2SLS)")
81
+
82
+ file_input = gr.File(label="Upload CSV File")
83
+
84
+ with gr.Row():
85
+ with gr.Column():
86
+ available_columns = gr.Dropdown(label="Available Columns", choices=[], multiselect=True)
87
+
88
+ with gr.Column():
89
+ dependent_var = gr.Dropdown(label="Dependent Variable", choices=[])
90
+ endogenous_vars = gr.Dropdown(label="Endogenous Variables", choices=[], multiselect=True)
91
+ instruments = gr.Dropdown(label="Instruments", choices=[], multiselect=True)
92
+ exogenous_vars = gr.Dropdown(label="Exogenous Variables (optional)", choices=[], multiselect=True)
93
+
94
+ run_button = gr.Button("Run 2SLS Regression")
95
+ output = gr.Textbox(label="Regression Output", lines=20)
96
+
97
+ file_input.change(process_file, inputs=file_input, outputs=[available_columns, dependent_var, endogenous_vars, instruments, exogenous_vars])
98
+ run_button.click(run_2sls, inputs=[dependent_var, endogenous_vars, instruments, exogenous_vars], outputs=output)
99
+
100
+ app.launch()