Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,103 +1,79 @@
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
|
|
|
|
|
|
3 |
import plotly.express as px
|
4 |
import folium
|
5 |
from folium.plugins import HeatMap
|
6 |
from scipy.stats import chi2_contingency
|
7 |
-
import
|
8 |
-
|
9 |
-
|
10 |
-
# Cargar los datos
|
11 |
-
data = pd.read_csv("data.csv")
|
12 |
|
13 |
-
#
|
14 |
-
|
|
|
15 |
|
16 |
-
#
|
|
|
17 |
to_exclude = [20685087, 2998627, 1978488, 196634, 945893623091961, 30285097, 19244622, 5473678, 20566938, 14249650]
|
18 |
-
|
|
|
19 |
|
20 |
-
#
|
21 |
-
|
22 |
-
|
23 |
-
# Transformar valores 0 y 1 a etiquetas significativas
|
24 |
-
transformation_dict = {
|
25 |
'Antecedente personal de diabetes': {0: 'No', 1: 'Sí'},
|
26 |
'Antecedente personal de falla cardíaca': {0: 'No', 1: 'Sí'},
|
27 |
'Antecedente personal de EPOC': {0: 'No', 1: 'Sí'},
|
28 |
'Antecedente personal de Hipertensión arterial': {0: 'No', 1: 'Sí'}
|
29 |
}
|
30 |
-
for col, mapping in
|
31 |
-
if col in
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
if data[column1].dtype == 'object' or data[column2].dtype == 'object':
|
52 |
-
sns.countplot(data=data, x=column1, hue=column2)
|
53 |
-
plt.title(f"Relación entre {column1} y {column2}")
|
54 |
else:
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
return
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
return px.histogram(data_filtered, x='Síndrome renal al ingreso', title='Síndrome Renal al Ingreso')
|
78 |
-
|
79 |
-
def plot_gender_age_distribution():
|
80 |
-
return px.histogram(data_filtered, x='Edad en años del paciente', color='Género', title='Distribución de Edad por Género', nbins=10, marginal="box")
|
81 |
-
|
82 |
-
def plot_stratum_renal_distribution():
|
83 |
-
return px.histogram(data_filtered, x='Síndrome renal al ingreso', color='Estrato socioeconómico', title='Distribución de Síndrome Renal por Estrato', barmode='group')
|
84 |
-
|
85 |
-
# Crear mapa interactivo con Folium con puntos únicos y etiquetas
|
86 |
-
|
87 |
-
def create_heatmap_with_points(variable=None):
|
88 |
-
m = folium.Map(location=[4.7, -74.1], zoom_start=10)
|
89 |
-
|
90 |
-
# Agregar capa de calor
|
91 |
-
if variable and variable in data_filtered.columns:
|
92 |
-
heat_data = data_filtered[['Coordenada de residencia-latitud', 'Coordenada de residencia-longitud', variable]].dropna()
|
93 |
-
heat_data = [[row[0], row[1], row[2]] for row in heat_data.values]
|
94 |
-
HeatMap(heat_data, gradient=None, max_zoom=18, radius=10).add_to(m)
|
95 |
-
else:
|
96 |
-
heat_data = data_filtered[['Coordenada de residencia-latitud', 'Coordenada de residencia-longitud']].dropna()
|
97 |
-
HeatMap(heat_data.values.tolist(), radius=10).add_to(m)
|
98 |
-
|
99 |
-
# Agregar puntos únicos con etiquetas
|
100 |
-
for _, row in data_filtered.iterrows():
|
101 |
folium.Marker(
|
102 |
location=[row['Coordenada de residencia-latitud'], row['Coordenada de residencia-longitud']],
|
103 |
popup=folium.Popup(
|
@@ -106,104 +82,88 @@ def create_heatmap_with_points(variable=None):
|
|
106 |
f"<b>Proteinuria:</b> {row.get('Proteinuria', 'N/A')}<br>"
|
107 |
f"<b>Creatinina:</b> {row.get('Creatinina', 'N/A')}"
|
108 |
)
|
109 |
-
).add_to(
|
|
|
|
|
|
|
110 |
|
111 |
-
|
112 |
-
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
return f.read()
|
115 |
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
# Crear resumen de antecedentes
|
139 |
-
|
140 |
-
def antecedent_summary():
|
141 |
-
cols = ['Antecedente personal de diabetes', 'Antecedente personal de falla cardíaca',
|
142 |
-
'Antecedente personal de EPOC', 'Antecedente personal de Hipertensión arterial']
|
143 |
-
summary = data_filtered[cols].apply(pd.Series.value_counts).fillna(0).astype(int).reset_index()
|
144 |
-
new_columns = ['Antecedente'] + list(summary.columns[1:])
|
145 |
-
summary.columns = new_columns
|
146 |
-
return summary
|
147 |
-
|
148 |
-
# Variables categóricas seleccionadas para tablas de contingencia
|
149 |
-
contingency_vars = [
|
150 |
-
'Género', 'Régimen de afiliación', 'Estrato socioeconómico',
|
151 |
-
'Síndrome renal al ingreso', 'Antecedente personal de falla cardíaca'
|
152 |
-
]
|
153 |
-
|
154 |
-
# Interfaz del tablero interactivo con Gradio
|
155 |
with gr.Blocks() as demo:
|
156 |
-
gr.Markdown("
|
157 |
|
158 |
with gr.Tab("Gráficos Interactivos"):
|
159 |
-
gr.
|
160 |
-
gr.Plot(
|
161 |
-
gr.Plot(
|
162 |
-
gr.Plot(
|
163 |
-
gr.Plot(
|
164 |
-
gr.Plot(plot_renal_syndrome_distribution)
|
165 |
-
gr.Plot(plot_gender_age_distribution)
|
166 |
-
gr.Plot(plot_stratum_renal_distribution)
|
167 |
-
|
168 |
-
with gr.Tab("Mapa de Calor"):
|
169 |
-
gr.Markdown("### Mapa Interactivo con Filtro por Variable")
|
170 |
-
variable_dropdown = gr.Dropdown(choices=['', 'Creatinina'], label="Selecciona una variable para ponderar (opcional)")
|
171 |
-
heatmap_output = gr.HTML()
|
172 |
-
|
173 |
-
def update_heatmap(variable):
|
174 |
-
return f"<iframe srcdoc='{create_heatmap_with_points(variable)}' width='100%' height='500'></iframe>"
|
175 |
-
|
176 |
-
variable_dropdown.change(update_heatmap, inputs=variable_dropdown, outputs=heatmap_output)
|
177 |
-
gr.HTML(lambda: f"<iframe srcdoc='{create_heatmap_with_points()}' width='100%' height='500'></iframe>")
|
178 |
-
|
179 |
-
with gr.Tab("Tabla Resumen"): # Corrected indentation
|
180 |
-
gr.Markdown("### Tabla Resumen de Estadísticas Descriptivas")
|
181 |
-
summary_table = create_summary_table()
|
182 |
-
gr.DataFrame(summary_table)
|
183 |
-
|
184 |
-
with gr.Tab("Tablas de Contingencia y Chi-cuadrado"): # Corrected indentation
|
185 |
-
gr.Markdown("### Tablas de Contingencia y Pruebas de Chi-cuadrado")
|
186 |
-
var1 = gr.Dropdown(choices=data_filtered.select_dtypes(include=['object']).columns.tolist(), label="Variable 1")
|
187 |
-
var2 = gr.Dropdown(choices=data_filtered.select_dtypes(include=['object']).columns.tolist(), label="Variable 2")
|
188 |
-
|
189 |
-
def calculate_chi2(v1, v2):
|
190 |
-
result = chi_squared_table(v1, v2)
|
191 |
-
return f"<h4>Chi2 Statistic:</h4> {result['Chi2 Statistic']}<br><h4>P-value:</h4> {result['P-value']}<br><h4>Degrees of Freedom:</h4> {result['Degrees of Freedom']}<br><h4>Significant Relationship:</h4> {result['Significant Relationship']}<br><h4>Contingency Table:</h4><br>{result['Contingency Table'].to_html()}"
|
192 |
-
|
193 |
-
gr.Button("Calcular").click(calculate_chi2, inputs=[var1, var2], outputs=gr.HTML())
|
194 |
|
195 |
with gr.Tab("Análisis Univariado"):
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
btn_uni.click(univariate_analysis, inputs=column, outputs=output_uni)
|
200 |
|
201 |
with gr.Tab("Análisis Bivariado"):
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
|
|
|
|
|
|
|
208 |
|
|
|
209 |
demo.launch()
|
|
|
1 |
+
# Vasculitis ANCA - App de Análisis Estadístico y Geoespacial (Unificada con PM2.5 y lista para Hugging Face)
|
2 |
+
|
3 |
import gradio as gr
|
4 |
import pandas as pd
|
5 |
+
import numpy as np
|
6 |
+
import seaborn as sns
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
import plotly.express as px
|
9 |
import folium
|
10 |
from folium.plugins import HeatMap
|
11 |
from scipy.stats import chi2_contingency
|
12 |
+
import geopandas as gpd
|
13 |
+
from sklearn.cluster import DBSCAN
|
14 |
+
from sklearn.preprocessing import StandardScaler
|
|
|
|
|
15 |
|
16 |
+
# Cargar datos
|
17 |
+
df = pd.read_csv("data.csv")
|
18 |
+
pm25 = pd.read_csv("pm25_promedio_por_estacion.csv")
|
19 |
|
20 |
+
# Limpieza básica
|
21 |
+
df.columns = df.columns.str.strip()
|
22 |
to_exclude = [20685087, 2998627, 1978488, 196634, 945893623091961, 30285097, 19244622, 5473678, 20566938, 14249650]
|
23 |
+
df = df[~df['Documento de identidad'].isin(to_exclude)]
|
24 |
+
df = df[(df['Edad en años del paciente'] >= 0) & (df['Edad en años del paciente'] <= 120)]
|
25 |
|
26 |
+
# Mapeo de antecedentes
|
27 |
+
transform_dict = {
|
|
|
|
|
|
|
28 |
'Antecedente personal de diabetes': {0: 'No', 1: 'Sí'},
|
29 |
'Antecedente personal de falla cardíaca': {0: 'No', 1: 'Sí'},
|
30 |
'Antecedente personal de EPOC': {0: 'No', 1: 'Sí'},
|
31 |
'Antecedente personal de Hipertensión arterial': {0: 'No', 1: 'Sí'}
|
32 |
}
|
33 |
+
for col, mapping in transform_dict.items():
|
34 |
+
if col in df.columns:
|
35 |
+
df[col] = df[col].map(mapping)
|
36 |
+
|
37 |
+
# Variables descriptivas clave
|
38 |
+
vars_desc = ['Edad en años del paciente', 'Género', 'Régimen de afiliación', 'Estrato socioeconómico',
|
39 |
+
'Síndrome renal al ingreso', 'Proteinuria', 'Creatinina']
|
40 |
+
|
41 |
+
# Análisis univariado
|
42 |
+
def univariado(var):
|
43 |
+
fig = px.histogram(df, x=var, color=var if df[var].dtype == 'object' else None, marginal="box")
|
44 |
+
fig.update_layout(title=f"Distribución de {var}")
|
45 |
+
fig.write_html("univariado.html")
|
46 |
+
return "univariado.html"
|
47 |
+
|
48 |
+
# Análisis bivariado
|
49 |
+
def bivariado(x, y):
|
50 |
+
if df[x].dtype == 'object' and df[y].dtype == 'object':
|
51 |
+
fig = px.histogram(df, x=x, color=y, barmode='group')
|
52 |
+
elif df[x].dtype == 'object' or df[y].dtype == 'object':
|
53 |
+
fig = px.box(df, x=x, y=y, color=x if df[x].dtype == 'object' else y)
|
|
|
|
|
|
|
54 |
else:
|
55 |
+
fig = px.scatter(df, x=x, y=y, trendline="ols")
|
56 |
+
fig.update_layout(title=f"Relación entre {x} y {y}")
|
57 |
+
fig.write_html("bivariado.html")
|
58 |
+
return "bivariado.html"
|
59 |
+
|
60 |
+
# Chi-cuadrado
|
61 |
+
def chi2_test(var1, var2):
|
62 |
+
table = pd.crosstab(df[var1], df[var2])
|
63 |
+
chi2, p, dof, _ = chi2_contingency(table)
|
64 |
+
fig = px.imshow(table.values, x=table.columns.astype(str), y=table.index.astype(str),
|
65 |
+
text_auto=True, color_continuous_scale='Blues',
|
66 |
+
title=f"Contingencia: {var1} vs {var2}")
|
67 |
+
fig.write_html("chi2_heatmap.html")
|
68 |
+
return f"Chi2 = {chi2:.2f}, p = {p:.4f}, gl = {dof}", table, "chi2_heatmap.html"
|
69 |
+
|
70 |
+
# Mapa de calor
|
71 |
+
def mapa_calor():
|
72 |
+
mapa = folium.Map(location=[4.7,-74.1], zoom_start=11)
|
73 |
+
puntos = df.dropna(subset=['Coordenada de residencia-latitud','Coordenada de residencia-longitud'])
|
74 |
+
heat = [[row['Coordenada de residencia-latitud'], row['Coordenada de residencia-longitud']] for _, row in puntos.iterrows()]
|
75 |
+
HeatMap(heat).add_to(mapa)
|
76 |
+
for _, row in puntos.iterrows():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
folium.Marker(
|
78 |
location=[row['Coordenada de residencia-latitud'], row['Coordenada de residencia-longitud']],
|
79 |
popup=folium.Popup(
|
|
|
82 |
f"<b>Proteinuria:</b> {row.get('Proteinuria', 'N/A')}<br>"
|
83 |
f"<b>Creatinina:</b> {row.get('Creatinina', 'N/A')}"
|
84 |
)
|
85 |
+
).add_to(mapa)
|
86 |
+
mapa.save("mapa.html")
|
87 |
+
with open("mapa.html", 'r', encoding='utf-8') as f:
|
88 |
+
return f.read()
|
89 |
|
90 |
+
# Mapa de estaciones PM2.5
|
91 |
+
def mapa_pm():
|
92 |
+
mapa = folium.Map(location=[4.7,-74.1], zoom_start=11)
|
93 |
+
for _, row in pm25.iterrows():
|
94 |
+
folium.CircleMarker(
|
95 |
+
location=[row['LATITUD'], row['LONGITUD']],
|
96 |
+
radius=7,
|
97 |
+
fill=True,
|
98 |
+
popup=f"{row['Estacion']} ({row['LOCALIDAD']}): {row['PM25_promedio_2019']} ug/m3",
|
99 |
+
color="red",
|
100 |
+
fill_opacity=0.7
|
101 |
+
).add_to(mapa)
|
102 |
+
mapa.save("pm25.html")
|
103 |
+
with open("pm25.html", 'r', encoding='utf-8') as f:
|
104 |
return f.read()
|
105 |
|
106 |
+
# Cluster DBSCAN
|
107 |
+
def mapa_cluster():
|
108 |
+
puntos = df.dropna(subset=['Coordenada de residencia-latitud','Coordenada de residencia-longitud'])
|
109 |
+
coords = puntos[['Coordenada de residencia-latitud', 'Coordenada de residencia-longitud']].copy()
|
110 |
+
scaled = StandardScaler().fit_transform(coords)
|
111 |
+
clustering = DBSCAN(eps=0.5, min_samples=3).fit(scaled)
|
112 |
+
puntos['Cluster'] = clustering.labels_
|
113 |
+
mapa = folium.Map(location=[4.7,-74.1], zoom_start=11)
|
114 |
+
for _, row in puntos.iterrows():
|
115 |
+
folium.CircleMarker(
|
116 |
+
location=[row['Coordenada de residencia-latitud'], row['Coordenada de residencia-longitud']],
|
117 |
+
radius=5,
|
118 |
+
fill=True,
|
119 |
+
color="#%06x" % (hash(row['Cluster']) & 0xFFFFFF),
|
120 |
+
popup=f"Cluster {row['Cluster']}"
|
121 |
+
).add_to(mapa)
|
122 |
+
mapa.save("cluster.html")
|
123 |
+
with open("cluster.html", 'r', encoding='utf-8') as f:
|
124 |
+
return f.read()
|
125 |
+
|
126 |
+
# Interfaz Gradio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
with gr.Blocks() as demo:
|
128 |
+
gr.Markdown("# Tablero Vasculitis ANCA - Integrado para Hugging Face")
|
129 |
|
130 |
with gr.Tab("Gráficos Interactivos"):
|
131 |
+
gr.Plot(lambda: px.histogram(df, x='Género', title='Distribución por Género'))
|
132 |
+
gr.Plot(lambda: px.histogram(df, x='Edad en años del paciente', nbins=10, title='Distribución por Edad', marginal='box'))
|
133 |
+
gr.Plot(lambda: px.histogram(df, x='Régimen de afiliación', title='Régimen de Afiliación'))
|
134 |
+
gr.Plot(lambda: px.histogram(df, x='Estrato socioeconómico', title='Estrato Socioeconómico'))
|
135 |
+
gr.Plot(lambda: px.histogram(df, x='Síndrome renal al ingreso', title='Síndrome Renal al Ingreso'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
with gr.Tab("Análisis Univariado"):
|
138 |
+
col = gr.Dropdown(vars_desc, label="Variable")
|
139 |
+
salida = gr.HTML()
|
140 |
+
col.change(univariado, inputs=col, outputs=salida)
|
|
|
141 |
|
142 |
with gr.Tab("Análisis Bivariado"):
|
143 |
+
x = gr.Dropdown(vars_desc, label="X")
|
144 |
+
y = gr.Dropdown(vars_desc, label="Y")
|
145 |
+
out2 = gr.HTML()
|
146 |
+
gr.Button("Analizar").click(lambda a, b: bivariado(a, b), inputs=[x,y], outputs=out2)
|
147 |
+
|
148 |
+
with gr.Tab("Chi-cuadrado"):
|
149 |
+
v1 = gr.Dropdown(df.select_dtypes(include='object').columns.tolist(), label="Variable 1")
|
150 |
+
v2 = gr.Dropdown(df.select_dtypes(include='object').columns.tolist(), label="Variable 2")
|
151 |
+
salida_chi = gr.Textbox()
|
152 |
+
tabla = gr.DataFrame()
|
153 |
+
fig_html = gr.HTML()
|
154 |
+
gr.Button("Calcular").click(chi2_test, inputs=[v1,v2], outputs=[salida_chi, tabla, fig_html])
|
155 |
+
|
156 |
+
with gr.Tab("Mapa de Calor"):
|
157 |
+
htmlmap = gr.HTML()
|
158 |
+
gr.Button("Generar Mapa").click(mapa_calor, outputs=htmlmap)
|
159 |
+
|
160 |
+
with gr.Tab("Clúster Espacial"):
|
161 |
+
htmlmap3 = gr.HTML()
|
162 |
+
gr.Button("Detectar Clústeres").click(mapa_cluster, outputs=htmlmap3)
|
163 |
|
164 |
+
with gr.Tab("Mapa Calidad Aire PM2.5"):
|
165 |
+
htmlmap4 = gr.HTML()
|
166 |
+
gr.Button("Visualizar PM2.5").click(mapa_pm, outputs=htmlmap4)
|
167 |
|
168 |
+
# Ejecutar app
|
169 |
demo.launch()
|