|
import numpy as np |
|
import pandas as pd |
|
import statsmodels.formula.api as smf |
|
import statsmodels.api as sm |
|
import plotly.graph_objects as go |
|
from plotly.subplots import make_subplots |
|
from scipy.optimize import minimize |
|
import plotly.express as px |
|
from scipy.stats import t, f |
|
import gradio as gr |
|
import io |
|
import os |
|
from zipfile import ZipFile |
|
import warnings |
|
|
|
|
|
warnings.filterwarnings('ignore', category=UserWarning) |
|
warnings.filterwarnings('ignore', category=RuntimeWarning) |
|
|
|
class RSM_BoxBehnken: |
|
def __init__(self, data, x1_name, x2_name, x3_name, y_name, x1_levels, x2_levels, x3_levels): |
|
""" |
|
Initialize the Response Surface Methodology Box-Behnken Design class |
|
|
|
Parameters: |
|
----------- |
|
data : pandas.DataFrame |
|
Experimental design data |
|
x1_name, x2_name, x3_name : str |
|
Names of independent variables |
|
y_name : str |
|
Name of dependent variable |
|
x1_levels, x2_levels, x3_levels : list |
|
Levels of each independent variable |
|
""" |
|
self.data = data.copy() |
|
self.model = None |
|
self.model_simplified = None |
|
self.optimized_results = None |
|
self.optimal_levels = None |
|
|
|
|
|
self.x1_name = x1_name |
|
self.x2_name = x2_name |
|
self.x3_name = x3_name |
|
self.y_name = y_name |
|
|
|
|
|
self.x1_levels = x1_levels |
|
self.x2_levels = x2_levels |
|
self.x3_levels = x3_levels |
|
|
|
def _get_levels(self, variable_name): |
|
""" |
|
Get levels for a specific variable |
|
|
|
Parameters: |
|
----------- |
|
variable_name : str |
|
Name of the variable |
|
|
|
Returns: |
|
-------- |
|
list |
|
Levels of the variable |
|
""" |
|
level_map = { |
|
self.x1_name: self.x1_levels, |
|
self.x2_name: self.x2_levels, |
|
self.x3_name: self.x3_levels |
|
} |
|
|
|
if variable_name not in level_map: |
|
raise ValueError(f"Unknown variable: {variable_name}") |
|
|
|
return level_map[variable_name] |
|
|
|
def fit_model(self, simplified=False): |
|
""" |
|
Fit the response surface model |
|
|
|
Parameters: |
|
----------- |
|
simplified : bool, optional |
|
Whether to fit a simplified model, by default False |
|
|
|
Returns: |
|
-------- |
|
tuple |
|
Fitted model and Pareto chart |
|
""" |
|
if simplified: |
|
formula = f'{self.y_name} ~ {self.x1_name} + {self.x2_name} + {self.x3_name} + ' \ |
|
f'I({self.x1_name}**2) + I({self.x2_name}**2) + I({self.x3_name}**2)' |
|
self.model_simplified = smf.ols(formula, data=self.data).fit() |
|
print("\nSimplified Model:") |
|
print(self.model_simplified.summary()) |
|
return self.model_simplified, self.pareto_chart(self.model_simplified, "Pareto - Simplified Model") |
|
else: |
|
formula = f'{self.y_name} ~ {self.x1_name} + {self.x2_name} + {self.x3_name} + ' \ |
|
f'I({self.x1_name}**2) + I({self.x2_name}**2) + I({self.x3_name}**2) + ' \ |
|
f'{self.x1_name}:{self.x2_name} + {self.x1_name}:{self.x3_name} + {self.x2_name}:{self.x3_name}' |
|
self.model = smf.ols(formula, data=self.data).fit() |
|
print("Full Model:") |
|
print(self.model.summary()) |
|
return self.model, self.pareto_chart(self.model, "Pareto - Full Model") |
|
|
|
def optimize(self, method='Nelder-Mead'): |
|
""" |
|
Optimize the response surface model |
|
|
|
Parameters: |
|
----------- |
|
method : str, optional |
|
Optimization method, by default 'Nelder-Mead' |
|
|
|
Returns: |
|
-------- |
|
pandas.DataFrame |
|
Optimization results table |
|
""" |
|
if self.model_simplified is None: |
|
raise ValueError("Fit the simplified model first.") |
|
|
|
def objective_function(x): |
|
"""Objective function for optimization""" |
|
return -self.model_simplified.predict(pd.DataFrame({ |
|
self.x1_name: [x[0]], |
|
self.x2_name: [x[1]], |
|
self.x3_name: [x[2]] |
|
})) |
|
|
|
bounds = [(-1, 1), (-1, 1), (-1, 1)] |
|
x0 = [0, 0, 0] |
|
|
|
self.optimized_results = minimize( |
|
objective_function, |
|
x0, |
|
method=method, |
|
bounds=bounds |
|
) |
|
self.optimal_levels = self.optimized_results.x |
|
|
|
|
|
optimal_levels_natural = [ |
|
round(self.coded_to_natural(self.optimal_levels[i], var), 3) |
|
for i, var in enumerate([self.x1_name, self.x2_name, self.x3_name]) |
|
] |
|
|
|
optimization_table = pd.DataFrame({ |
|
'Variable': [self.x1_name, self.x2_name, self.x3_name], |
|
'Optimal Level (Natural)': optimal_levels_natural, |
|
'Optimal Level (Coded)': [round(x, 3) for x in self.optimal_levels] |
|
}) |
|
|
|
return optimization_table |
|
|
|
def coded_to_natural(self, coded_value, variable_name): |
|
""" |
|
Convert coded value to natural level |
|
|
|
Parameters: |
|
----------- |
|
coded_value : float |
|
Coded value of the variable |
|
variable_name : str |
|
Name of the variable |
|
|
|
Returns: |
|
-------- |
|
float |
|
Natural level of the variable |
|
""" |
|
levels = self._get_levels(variable_name) |
|
return levels[0] + (coded_value + 1) * (levels[-1] - levels[0]) / 2 |
|
|
|
def natural_to_coded(self, natural_value, variable_name): |
|
""" |
|
Convert natural level to coded value |
|
|
|
Parameters: |
|
----------- |
|
natural_value : float |
|
Natural level of the variable |
|
variable_name : str |
|
Name of the variable |
|
|
|
Returns: |
|
-------- |
|
float |
|
Coded value of the variable |
|
""" |
|
levels = self._get_levels(variable_name) |
|
return -1 + 2 * (natural_value - levels[0]) / (levels[-1] - levels[0]) |
|
|
|
def pareto_chart(self, model, title): |
|
""" |
|
Create Pareto chart of standardized effects |
|
|
|
Parameters: |
|
----------- |
|
model : statsmodels.regression.linear_model.RegressionResultsWrapper |
|
Fitted regression model |
|
title : str |
|
Title of the Pareto chart |
|
|
|
Returns: |
|
-------- |
|
plotly.graph_objects.Figure |
|
Pareto chart |
|
""" |
|
tvalues = model.tvalues[1:] |
|
abs_tvalues = np.abs(tvalues) |
|
sorted_idx = np.argsort(abs_tvalues)[::-1] |
|
sorted_tvalues = abs_tvalues[sorted_idx] |
|
sorted_names = tvalues.index[sorted_idx] |
|
|
|
alpha = 0.05 |
|
dof = model.df_resid |
|
t_critical = t.ppf(1 - alpha / 2, dof) |
|
|
|
fig = px.bar( |
|
x=sorted_tvalues, |
|
y=sorted_names, |
|
orientation='h', |
|
labels={'x': 'Standardized Effect', 'y': 'Term'}, |
|
title=title |
|
) |
|
fig.update_yaxes(autorange="reversed") |
|
fig.add_vline(x=t_critical, line_dash="dot", |
|
annotation_text=f"Critical t = {t_critical:.2f}", |
|
annotation_position="bottom right") |
|
|
|
return fig |
|
|
|
def generate_prediction_table(self): |
|
""" |
|
Generate prediction table with predicted and residual values |
|
|
|
Returns: |
|
-------- |
|
pandas.DataFrame |
|
Prediction table |
|
""" |
|
if self.model_simplified is None: |
|
raise ValueError("Fit the simplified model first.") |
|
|
|
predictions = self.model_simplified.predict(self.data) |
|
residuals = self.data[self.y_name] - predictions |
|
|
|
prediction_table = self.data.copy() |
|
prediction_table['Predicted'] = predictions.round(3) |
|
prediction_table['Residual'] = residuals.round(3) |
|
|
|
return prediction_table[[self.y_name, 'Predicted', 'Residual']] |
|
|
|
def calculate_contribution_percentage(self): |
|
""" |
|
Calculate percentage contribution of model terms |
|
|
|
Returns: |
|
-------- |
|
pandas.DataFrame |
|
Contribution percentage table |
|
""" |
|
if self.model_simplified is None: |
|
raise ValueError("Fit the simplified model first.") |
|
|
|
anova_table = sm.stats.anova_lm(self.model_simplified, typ=2) |
|
ss_total = anova_table['sum_sq'].sum() |
|
|
|
contribution_table = [] |
|
|
|
for index, row in anova_table.iterrows(): |
|
if index != 'Residual': |
|
factor_name = index.replace('I(', '').replace('**2)', '^2') |
|
ss_factor = row['sum_sq'] |
|
contribution_percentage = (ss_factor / ss_total) * 100 |
|
|
|
contribution_table.append({ |
|
'Factor': factor_name, |
|
'Sum of Squares': round(ss_factor, 3), |
|
'% Contribution': round(contribution_percentage, 3) |
|
}) |
|
|
|
return pd.DataFrame(contribution_table) |
|
|
|
def calculate_detailed_anova(self): |
|
""" |
|
Perform detailed ANOVA analysis |
|
|
|
Returns: |
|
-------- |
|
pandas.DataFrame |
|
Detailed ANOVA table |
|
""" |
|
if self.model_simplified is None: |
|
raise ValueError("Fit the simplified model first.") |
|
|
|
|
|
ss_total = np.sum((self.data[self.y_name] - self.data[self.y_name].mean())**2) |
|
df_total = len(self.data) - 1 |
|
|
|
|
|
formula_reduced = f'{self.y_name} ~ {self.x1_name} + {self.x2_name} + {self.x3_name} + ' \ |
|
f'I({self.x1_name}**2) + I({self.x2_name}**2) + I({self.x3_name}**2)' |
|
model_reduced = smf.ols(formula_reduced, data=self.data).fit() |
|
anova_reduced = sm.stats.anova_lm(model_reduced, typ=2) |
|
|
|
|
|
ss_regression = anova_reduced['sum_sq'][:-1].sum() |
|
df_regression = len(anova_reduced) - 1 |
|
|
|
ss_residual = self.model_simplified.ssr |
|
df_residual = self.model_simplified.df_resid |
|
|
|
|
|
replicas = self.data[self.data.duplicated(subset=[self.x1_name, self.x2_name, self.x3_name], keep=False)] |
|
ss_pure_error = replicas.groupby([self.x1_name, self.x2_name, self.x3_name])[self.y_name].var().sum() |
|
df_pure_error = len(replicas) - len(replicas.groupby([self.x1_name, self.x2_name, self.x3_name])) |
|
|
|
|
|
ss_lack_of_fit = ss_residual - ss_pure_error |
|
df_lack_of_fit = df_residual - df_pure_error |
|
|
|
|
|
ms_regression = ss_regression / df_regression |
|
ms_residual = ss_residual / df_residual |
|
ms_lack_of_fit = ss_lack_of_fit / df_lack_of_fit |
|
ms_pure_error = ss_pure_error / df_pure_error |
|
|
|
f_lack_of_fit = ms_lack_of_fit / ms_pure_error |
|
p_lack_of_fit = 1 - f.cdf(f_lack_of_fit, df_lack_of_fit, df_pure_error) |
|
|
|
|
|
detailed_anova_table = pd.DataFrame({ |
|
'Source of Variation': ['Regression', 'Residual', 'Lack of Fit', 'Pure Error', 'Total'], |
|
'Sum of Squares': [ |
|
round(ss_regression, 3), |
|
round(ss_residual, 3), |
|
round(ss_lack_of_fit, 3), |
|
round(ss_pure_error, 3), |
|
round(ss_total, 3) |
|
], |
|
'Degrees of Freedom': [df_regression, df_residual, df_lack_of_fit, df_pure_error, df_total], |
|
'Mean Square': [ |
|
round(ms_regression, 3), |
|
round(ms_residual, 3), |
|
round(ms_lack_of_fit, 3), |
|
round(ms_pure_error, 3), |
|
np.nan |
|
], |
|
'F': [np.nan, np.nan, round(f_lack_of_fit, 3), np.nan, np.nan], |
|
'p-value': [np.nan, np.nan, round(p_lack_of_fit, 3), np.nan, np.nan] |
|
}) |
|
|
|
return detailed_anova_table |
|
|
|
|
|
def load_data(x1_name, x2_name, x3_name, y_name, x1_levels_str, x2_levels_str, x3_levels_str, data_str): |
|
try: |
|
x1_levels = [float(x.strip()) for x in x1_levels_str.split(',')] |
|
x2_levels = [float(x.strip()) for x in x2_levels_str.split(',')] |
|
x3_levels = [float(x.strip()) for x in x3_levels_str.split(',')] |
|
|
|
data_list = [row.split(',') for row in data_str.strip().split('\n')] |
|
column_names = ['Exp.', x1_name, x2_name, x3_name, y_name] |
|
data = pd.DataFrame(data_list, columns=column_names) |
|
data = data.apply(pd.to_numeric, errors='coerce') |
|
|
|
if not all(col in data.columns for col in column_names): |
|
raise ValueError("El formato de los datos no es correcto.") |
|
|
|
global rsm |
|
rsm = RSM_BoxBehnken(data, x1_name, x2_name, x3_name, y_name, x1_levels, x2_levels, x3_levels) |
|
|
|
return data, x1_name, x2_name, x3_name, y_name, x1_levels, x2_levels, x3_levels, gr.update(visible=True) |
|
|
|
except Exception as e: |
|
return None, "", "", "", "", [], [], [], gr.update(visible=False), f"Error: {e}" |
|
|
|
def fit_and_optimize_model(): |
|
if 'rsm' not in globals(): |
|
return None, None, None, None, None, None, "Error: Carga los datos primero." |
|
|
|
model_completo, pareto_completo = rsm.fit_model() |
|
model_simplificado, pareto_simplificado = rsm.fit_simplified_model() |
|
optimization_table = rsm.optimize() |
|
equation = rsm.get_simplified_equation() |
|
prediction_table = rsm.generate_prediction_table() |
|
contribution_table = rsm.calculate_contribution_percentage() |
|
anova_table = rsm.calculate_detailed_anova() |
|
|
|
equation_formatted = equation.replace(" + ", "<br>+ ").replace(" ** ", "^").replace("*", " × ") |
|
equation_formatted = f"### Ecuación del Modelo Simplificado:<br>{equation_formatted}" |
|
|
|
|
|
return model_completo.summary().as_html(), pareto_completo, model_simplificado.summary().as_html(), pareto_simplificado, equation_formatted, optimization_table, prediction_table, contribution_table, anova_table |
|
|
|
def generate_rsm_plot(fixed_variable, fixed_level): |
|
if 'rsm' not in globals(): |
|
return None, "Error: Carga los datos primero." |
|
|
|
|
|
all_figs = rsm.generate_all_plots() |
|
|
|
|
|
plot_outputs = [] |
|
for fig in all_figs: |
|
|
|
img_bytes = fig.to_image(format="png") |
|
plot_outputs.append(img_bytes) |
|
|
|
|
|
return plot_outputs |
|
|
|
def download_excel(): |
|
if 'rsm' not in globals(): |
|
return None, "Error: Carga los datos y ajusta el modelo primero." |
|
|
|
output = io.BytesIO() |
|
with pd.ExcelWriter(output, engine='xlsxwriter') as writer: |
|
rsm.data.to_excel(writer, sheet_name='Datos', index=False) |
|
rsm.generate_prediction_table().to_excel(writer, sheet_name='Predicciones', index=False) |
|
rsm.optimize().to_excel(writer, sheet_name='Optimizacion', index=False) |
|
rsm.calculate_contribution_percentage().to_excel(writer, sheet_name='Contribucion', index=False) |
|
rsm.calculate_detailed_anova().to_excel(writer, sheet_name='ANOVA', index=False) |
|
|
|
output.seek(0) |
|
return gr.File.update(value=output, visible=True, filename="resultados_rsm.xlsx") |
|
|
|
def download_images(): |
|
if 'rsm' not in globals(): |
|
return None, "Error: Carga los datos y ajusta el modelo primero." |
|
|
|
|
|
temp_dir = "temp_images" |
|
os.makedirs(temp_dir, exist_ok=True) |
|
|
|
|
|
all_figs = rsm.generate_all_plots() |
|
for i, fig in enumerate(all_figs): |
|
img_path = os.path.join(temp_dir, f"plot_{i}.png") |
|
fig.write_image(img_path) |
|
|
|
|
|
zip_buffer = io.BytesIO() |
|
with ZipFile(zip_buffer, "w") as zip_file: |
|
for filename in os.listdir(temp_dir): |
|
file_path = os.path.join(temp_dir, filename) |
|
zip_file.write(file_path, arcname=filename) |
|
|
|
|
|
for filename in os.listdir(temp_dir): |
|
file_path = os.path.join(temp_dir, filename) |
|
os.remove(file_path) |
|
os.rmdir(temp_dir) |
|
|
|
zip_buffer.seek(0) |
|
return gr.File.update(value=zip_buffer, visible=True, filename="graficos_rsm.zip") |
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Optimización de la producción de AIA usando RSM Box-Behnken") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown("## Configuración del Diseño") |
|
x1_name_input = gr.Textbox(label="Nombre de la Variable X1 (ej. Glucosa)", value="Glucosa") |
|
x2_name_input = gr.Textbox(label="Nombre de la Variable X2 (ej. Extracto de Levadura)", value="Extracto_de_Levadura") |
|
x3_name_input = gr.Textbox(label="Nombre de la Variable X3 (ej. Triptófano)", value="Triptofano") |
|
y_name_input = gr.Textbox(label="Nombre de la Variable Dependiente (ej. AIA (ppm))", value="AIA_ppm") |
|
x1_levels_input = gr.Textbox(label="Niveles de X1 (separados por comas)", value="1, 3.5, 5.5") |
|
x2_levels_input = gr.Textbox(label="Niveles de X2 (separados por comas)", value="0.03, 0.2, 0.3") |
|
x3_levels_input = gr.Textbox(label="Niveles de X3 (separados por comas)", value="0.4, 0.65, 0.9") |
|
data_input = gr.Textbox(label="Datos del Experimento (formato CSV)", lines=5, value="""1,-1,-1,0,166.594 |
|
2,1,-1,0,177.557 |
|
3,-1,1,0,127.261 |
|
4,1,1,0,147.573 |
|
5,-1,0,-1,188.883 |
|
6,1,0,-1,224.527 |
|
7,-1,0,1,190.238 |
|
8,1,0,1,226.483 |
|
9,0,-1,-1,195.550 |
|
10,0,1,-1,149.493 |
|
11,0,-1,1,187.683 |
|
12,0,1,1,148.621 |
|
13,0,0,0,278.951 |
|
14,0,0,0,297.238 |
|
15,0,0,0,280.896""") |
|
load_button = gr.Button("Cargar Datos") |
|
|
|
|
|
with gr.Column(): |
|
gr.Markdown("## Datos Cargados") |
|
data_output = gr.Dataframe(label="Tabla de Datos") |
|
|
|
|
|
with gr.Row(visible=False) as analysis_row: |
|
with gr.Column(): |
|
fit_button = gr.Button("Ajustar Modelo y Optimizar") |
|
download_excel_button = gr.Button("Descargar Tablas en Excel") |
|
download_images_button = gr.Button("Descargar Gráficos en ZIP") |
|
gr.Markdown("**Modelo Completo**") |
|
model_completo_output = gr.HTML() |
|
pareto_completo_output = gr.Plot() |
|
gr.Markdown("**Modelo Simplificado**") |
|
model_simplificado_output = gr.HTML() |
|
pareto_simplificado_output = gr.Plot() |
|
equation_output = gr.HTML() |
|
optimization_table_output = gr.Dataframe(label="Tabla de Optimización") |
|
prediction_table_output = gr.Dataframe(label="Tabla de Predicciones") |
|
contribution_table_output = gr.Dataframe(label="Tabla de % de Contribución") |
|
anova_table_output = gr.Dataframe(label="Tabla ANOVA Detallada") |
|
with gr.Column(): |
|
gr.Markdown("## Generar Gráficos de Superficie de Respuesta") |
|
fixed_variable_input = gr.Dropdown(label="Variable Fija", choices=["Glucosa", "Extracto_de_Levadura", "Triptofano"], value="Glucosa") |
|
fixed_level_input = gr.Slider(label="Nivel de Variable Fija", minimum=0, maximum=1, step=0.01, value=0.5) |
|
plot_button = gr.Button("Generar Gráfico") |
|
rsm_plot_output = gr.Gallery(label="Gráficos RSM", columns=3, preview=True, height="auto") |
|
|
|
load_button.click( |
|
load_data, |
|
inputs=[x1_name_input, x2_name_input, x3_name_input, y_name_input, x1_levels_input, x2_levels_input, x3_levels_input, data_input], |
|
outputs=[data_output, x1_name_input, x2_name_input, x3_name_input, y_name_input, x1_levels_input, x2_levels_input, x3_levels_input, analysis_row] |
|
) |
|
|
|
fit_button.click(fit_and_optimize_model, outputs=[model_completo_output, pareto_completo_output, model_simplificado_output, pareto_simplificado_output, equation_output, optimization_table_output, prediction_table_output, contribution_table_output, anova_table_output]) |
|
|
|
plot_button.click(generate_rsm_plot, inputs=[fixed_variable_input, fixed_level_input], outputs=[rsm_plot_output]) |
|
|
|
download_excel_button.click(download_excel, outputs=[gr.File()]) |
|
download_images_button.click(download_images, outputs=[gr.File()]) |
|
|
|
|
|
gr.Markdown("## Ejemplo de uso") |
|
gr.Markdown("1. Introduce los nombres de las variables y sus niveles en las cajas de texto correspondientes.") |
|
gr.Markdown("2. Copia y pega los datos del experimento en la caja de texto 'Datos del Experimento'.") |
|
gr.Markdown("3. Haz clic en 'Cargar Datos' para cargar los datos en la tabla.") |
|
gr.Markdown("4. Haz clic en 'Ajustar Modelo y Optimizar' para ajustar el modelo y encontrar los niveles óptimos de los factores.") |
|
gr.Markdown("5. Selecciona una variable fija y su nivel en los controles deslizantes.") |
|
gr.Markdown("6. Haz clic en 'Generar Gráfico' para generar un gráfico de superficie de respuesta.") |
|
gr.Markdown("7. Haz clic en 'Descargar Tablas en Excel' para obtener un archivo Excel con todas las tablas generadas.") |
|
gr.Markdown("8. Haz clic en 'Descargar Gráficos en ZIP' para obtener un archivo ZIP con todos los gráficos generados.") |
|
|
|
demo.launch() |