HaryaniAnjali commited on
Commit
802ca33
·
verified ·
1 Parent(s): 6d73ebd

Update demo.py

Browse files
Files changed (1) hide show
  1. demo.py +504 -5
demo.py CHANGED
@@ -1,15 +1,473 @@
1
  # Example script to run the demo without AI model dependencies for local testing
2
- # Saves this as demo.py
3
 
4
  import gradio as gr
5
- from app import read_file, analyze_data, generate_visualizations, display_analysis
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  def simple_process_file(file):
8
  """Simplified version without AI models for testing"""
9
  # Read the file
10
  df = read_file(file)
11
 
12
- if isinstance(df, str): # If error message
13
  return df, None, None, None
14
 
15
  # Analyze data
@@ -51,18 +509,38 @@ def demo_ui(file):
51
  if file is None:
52
  return "Please upload a file to begin analysis.", None, None, None
53
 
 
 
54
  # Process the file
55
  analysis, visualizations, cleaning_recommendations, analysis_insights = simple_process_file(file)
56
 
 
 
 
 
57
  # Format analysis for display
58
  analysis_html = display_analysis(analysis)
59
 
60
  # Prepare visualizations for display
61
  viz_html = ""
62
  if visualizations and not isinstance(visualizations, str):
 
63
  for viz_name, fig in visualizations.items():
64
- # Convert plotly figure to HTML
65
- viz_html += f'<div style="margin-bottom: 30px;">{fig.to_html(full_html=False, include_plotlyjs="cdn")}</div>'
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  # Combine analysis and visualizations
68
  result_html = f"""
@@ -75,6 +553,23 @@ def demo_ui(file):
75
 
76
  return result_html, visualizations, cleaning_recommendations, analysis_insights
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  # Create Gradio interface for demo mode
79
  with gr.Blocks(title="Data Visualization & Cleaning AI (Demo Mode)") as demo:
80
  gr.Markdown("# Data Visualization & Cleaning AI")
@@ -83,6 +578,10 @@ with gr.Blocks(title="Data Visualization & Cleaning AI (Demo Mode)") as demo:
83
  with gr.Row():
84
  file_input = gr.File(label="Upload Data File")
85
 
 
 
 
 
86
  with gr.Tabs():
87
  with gr.TabItem("Data Analysis"):
88
  with gr.Row():
 
1
  # Example script to run the demo without AI model dependencies for local testing
2
+ # Save this as demo.py
3
 
4
  import gradio as gr
5
+ import pandas as pd
6
+ import numpy as np
7
+ import matplotlib.pyplot as plt
8
+ import seaborn as sns
9
+ import plotly.express as px
10
+ import plotly.graph_objects as go
11
+ import io
12
+ from sklearn.decomposition import PCA
13
+ from sklearn.preprocessing import StandardScaler
14
+ import os
15
+ import json
16
+ import re
17
+
18
+ # Set plot styling
19
+ sns.set(style="whitegrid")
20
+ plt.rcParams["figure.figsize"] = (10, 6)
21
+
22
+ def read_file(file):
23
+ """Read different file formats into a pandas DataFrame with robust separator detection."""
24
+ if file is None:
25
+ return None
26
+
27
+ file_name = file.name if hasattr(file, 'name') else ''
28
+ print(f"Reading file: {file_name}")
29
+
30
+ try:
31
+ # Handle different file types
32
+ if file_name.endswith('.csv'):
33
+ # First try with comma
34
+ try:
35
+ df = pd.read_csv(file)
36
+
37
+ # Check if we got only one column but it contains semicolons
38
+ if len(df.columns) == 1 and ';' in str(df.columns[0]):
39
+ print("Detected potential semicolon-separated file")
40
+ # Reset file position
41
+ file.seek(0)
42
+ # Try with semicolon
43
+ df = pd.read_csv(file, sep=';')
44
+ print(f"Read file with semicolon separator: {df.shape}")
45
+ else:
46
+ print(f"Read file with comma separator: {df.shape}")
47
+
48
+ # Convert columns to appropriate types
49
+ for col in df.columns:
50
+ # Try to convert string columns to numeric
51
+ if df[col].dtype == 'object':
52
+ df[col] = pd.to_numeric(df[col], errors='ignore')
53
+
54
+ return df
55
+ except Exception as e:
56
+ print(f"Error with standard separators: {e}")
57
+ # Try with semicolon
58
+ file.seek(0)
59
+ try:
60
+ df = pd.read_csv(file, sep=';')
61
+ print(f"Read file with semicolon separator after error: {df.shape}")
62
+ return df
63
+ except:
64
+ # Final attempt with Python's csv sniffer
65
+ file.seek(0)
66
+ return pd.read_csv(file, sep=None, engine='python')
67
+
68
+ elif file_name.endswith(('.xls', '.xlsx')):
69
+ return pd.read_excel(file)
70
+ elif file_name.endswith('.json'):
71
+ return pd.read_json(file)
72
+ elif file_name.endswith('.txt'):
73
+ # Try tab separator first for text files
74
+ try:
75
+ df = pd.read_csv(file, delimiter='\t')
76
+ if len(df.columns) <= 1:
77
+ # If tab doesn't work well, try with separator detection
78
+ file.seek(0)
79
+ df = pd.read_csv(file, sep=None, engine='python')
80
+ return df
81
+ except:
82
+ # Fall back to separator detection
83
+ file.seek(0)
84
+ return pd.read_csv(file, sep=None, engine='python')
85
+ else:
86
+ return "Unsupported file format. Please upload .csv, .xlsx, .xls, .json, or .txt files."
87
+ except Exception as e:
88
+ print(f"Error reading file: {str(e)}")
89
+ return f"Error reading file: {str(e)}"
90
+
91
+ def analyze_data(df):
92
+ """Generate basic statistics and information about the dataset."""
93
+ if not isinstance(df, pd.DataFrame):
94
+ return df # Return error message if df is not a DataFrame
95
+
96
+ # Basic info
97
+ info = {}
98
+ info['Shape'] = df.shape
99
+ info['Columns'] = df.columns.tolist()
100
+ info['Data Types'] = df.dtypes.astype(str).to_dict()
101
+
102
+ # Check for missing values
103
+ missing_values = df.isnull().sum()
104
+ if missing_values.sum() > 0:
105
+ info['Missing Values'] = missing_values[missing_values > 0].to_dict()
106
+ else:
107
+ info['Missing Values'] = "No missing values found"
108
+
109
+ # Data quality issues
110
+ info['Data Quality Issues'] = identify_data_quality_issues(df)
111
+
112
+ # Basic statistics for numerical columns
113
+ numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
114
+ if numeric_cols:
115
+ info['Numeric Columns'] = numeric_cols
116
+ info['Statistics'] = df[numeric_cols].describe().to_html()
117
+
118
+ # Check for outliers
119
+ outliers = detect_outliers(df, numeric_cols)
120
+ if outliers:
121
+ info['Outliers'] = outliers
122
+
123
+ # Identify categorical columns
124
+ categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
125
+ if categorical_cols:
126
+ info['Categorical Columns'] = categorical_cols
127
+ # Get unique value counts for categorical columns (limit to first 5 for brevity)
128
+ cat_counts = {}
129
+ for col in categorical_cols[:5]: # Limit to first 5 categorical columns
130
+ cat_counts[col] = df[col].value_counts().head(10).to_dict() # Show top 10 values
131
+ info['Category Counts'] = cat_counts
132
+
133
+ return info
134
+
135
+ def identify_data_quality_issues(df):
136
+ """Identify common data quality issues."""
137
+ issues = {}
138
+
139
+ # Check for duplicate rows
140
+ duplicate_count = df.duplicated().sum()
141
+ if duplicate_count > 0:
142
+ issues['Duplicate Rows'] = duplicate_count
143
+
144
+ # Check for high cardinality in categorical columns
145
+ categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
146
+ high_cardinality = {}
147
+ for col in categorical_cols:
148
+ unique_count = df[col].nunique()
149
+ if unique_count > 50: # Arbitrary threshold
150
+ high_cardinality[col] = unique_count
151
+
152
+ if high_cardinality:
153
+ issues['High Cardinality Columns'] = high_cardinality
154
+
155
+ # Check for potential date columns not properly formatted
156
+ potential_date_cols = []
157
+ for col in df.select_dtypes(include=['object']).columns:
158
+ # Sample the first 10 non-null values
159
+ sample = df[col].dropna().head(10).tolist()
160
+ if all(isinstance(x, str) for x in sample):
161
+ # Simple date pattern check
162
+ date_pattern = re.compile(r'\d{1,4}[-/\.]\d{1,2}[-/\.]\d{1,4}')
163
+ if any(date_pattern.search(str(x)) for x in sample):
164
+ potential_date_cols.append(col)
165
+
166
+ if potential_date_cols:
167
+ issues['Potential Date Columns'] = potential_date_cols
168
+
169
+ # Check for columns with mostly missing values
170
+ high_missing = {}
171
+ for col in df.columns:
172
+ missing_pct = df[col].isnull().mean() * 100
173
+ if missing_pct > 50: # More than 50% missing
174
+ high_missing[col] = f"{missing_pct:.2f}%"
175
+
176
+ if high_missing:
177
+ issues['Columns with >50% Missing'] = high_missing
178
+
179
+ return issues
180
+
181
+ def detect_outliers(df, numeric_cols):
182
+ """Detect outliers in numeric columns using IQR method."""
183
+ outliers = {}
184
+
185
+ for col in numeric_cols:
186
+ # Skip columns with too many unique values (potentially ID columns)
187
+ if df[col].nunique() > df.shape[0] * 0.9:
188
+ continue
189
+
190
+ # Calculate IQR
191
+ Q1 = df[col].quantile(0.25)
192
+ Q3 = df[col].quantile(0.75)
193
+ IQR = Q3 - Q1
194
+
195
+ # Define outlier bounds
196
+ lower_bound = Q1 - 1.5 * IQR
197
+ upper_bound = Q3 + 1.5 * IQR
198
+
199
+ # Count outliers
200
+ outlier_count = ((df[col] < lower_bound) | (df[col] > upper_bound)).sum()
201
+
202
+ if outlier_count > 0:
203
+ outlier_pct = (outlier_count / df.shape[0]) * 100
204
+ if outlier_pct > 1: # Only report if more than 1% are outliers
205
+ outliers[col] = {
206
+ 'count': outlier_count,
207
+ 'percentage': f"{outlier_pct:.2f}%",
208
+ 'lower_bound': lower_bound,
209
+ 'upper_bound': upper_bound
210
+ }
211
+
212
+ return outliers
213
+
214
+ def generate_visualizations(df):
215
+ """Generate appropriate visualizations based on the data types."""
216
+ if not isinstance(df, pd.DataFrame):
217
+ print(f"Not a DataFrame: {type(df)}")
218
+ return df # Return error message if df is not a DataFrame
219
+
220
+ print(f"Starting visualization generation for DataFrame with shape: {df.shape}")
221
+
222
+ visualizations = {}
223
+
224
+ # Identify column types
225
+ numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
226
+ categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
227
+ date_cols = [col for col in df.columns if df[col].dtype == 'datetime64[ns]' or
228
+ (df[col].dtype == 'object' and pd.to_datetime(df[col], errors='coerce').notna().all())]
229
+
230
+ print(f"Found {len(numeric_cols)} numeric columns: {numeric_cols}")
231
+ print(f"Found {len(categorical_cols)} categorical columns: {categorical_cols}")
232
+ print(f"Found {len(date_cols)} date columns: {date_cols}")
233
+
234
+ try:
235
+ # Simple test plot to verify Plotly is working
236
+ if len(df) > 0 and len(df.columns) > 0:
237
+ col = df.columns[0]
238
+ try:
239
+ test_data = df[col].head(100)
240
+ fig = px.histogram(x=test_data, title=f"Test Plot for {col}")
241
+ visualizations['test_plot'] = fig
242
+ print(f"Generated test plot for column: {col}")
243
+ except Exception as e:
244
+ print(f"Error creating test plot: {e}")
245
+
246
+ # 1. Distribution plots for numeric columns (first 5)
247
+ if numeric_cols:
248
+ for i, col in enumerate(numeric_cols[:5]): # Limit to first 5 numeric columns
249
+ try:
250
+ fig = px.histogram(df, x=col, marginal="box", title=f"Distribution of {col}")
251
+ visualizations[f'dist_{col}'] = fig
252
+ print(f"Generated distribution plot for {col}")
253
+ except Exception as e:
254
+ print(f"Error creating histogram for {col}: {e}")
255
+
256
+ # 2. Bar charts for categorical columns (first 5)
257
+ if categorical_cols:
258
+ for i, col in enumerate(categorical_cols[:5]): # Limit to first 5 categorical columns
259
+ try:
260
+ # Get value counts and handle potential large number of categories
261
+ value_counts = df[col].value_counts().nlargest(10) # Top 10 categories
262
+
263
+ # Convert indices to strings to ensure they can be plotted
264
+ value_counts.index = value_counts.index.astype(str)
265
+
266
+ fig = px.bar(x=value_counts.index, y=value_counts.values,
267
+ title=f"Top 10 categories in {col}")
268
+ fig.update_xaxes(title=col)
269
+ fig.update_yaxes(title="Count")
270
+ visualizations[f'bar_{col}'] = fig
271
+ print(f"Generated bar chart for {col}")
272
+ except Exception as e:
273
+ print(f"Error creating bar chart for {col}: {e}")
274
+
275
+ # 3. Correlation heatmap for numeric columns
276
+ if len(numeric_cols) > 1:
277
+ try:
278
+ corr_matrix = df[numeric_cols].corr()
279
+ fig = px.imshow(corr_matrix, text_auto=True, aspect="auto",
280
+ title="Correlation Heatmap")
281
+ visualizations['correlation'] = fig
282
+ print("Generated correlation heatmap")
283
+ except Exception as e:
284
+ print(f"Error creating correlation heatmap: {e}")
285
+
286
+ # 4. Scatter plot matrix (first 3 numeric columns to keep it manageable)
287
+ if len(numeric_cols) >= 2:
288
+ try:
289
+ plot_cols = numeric_cols[:3] # Limit to first 3 numeric columns
290
+ fig = px.scatter_matrix(df, dimensions=plot_cols, title="Scatter Plot Matrix")
291
+ visualizations['scatter_matrix'] = fig
292
+ print("Generated scatter plot matrix")
293
+ except Exception as e:
294
+ print(f"Error creating scatter matrix: {e}")
295
+
296
+ # 5. Time series plot if date column exists
297
+ if date_cols and numeric_cols:
298
+ try:
299
+ date_col = date_cols[0] # Use the first date column
300
+ # Convert to datetime if not already
301
+ if df[date_col].dtype != 'datetime64[ns]':
302
+ df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
303
+
304
+ # Sort by date
305
+ df_sorted = df.sort_values(by=date_col)
306
+
307
+ # Create time series for first numeric column
308
+ num_col = numeric_cols[0]
309
+ fig = px.line(df_sorted, x=date_col, y=num_col,
310
+ title=f"{num_col} over Time")
311
+ visualizations['time_series'] = fig
312
+ print("Generated time series plot")
313
+ except Exception as e:
314
+ print(f"Error creating time series plot: {e}")
315
+
316
+ # 6. PCA visualization if enough numeric columns
317
+ if len(numeric_cols) >= 3:
318
+ try:
319
+ # Apply PCA to numeric data
320
+ numeric_data = df[numeric_cols].select_dtypes(include=[np.number])
321
+ # Fill NaN values with mean for PCA
322
+ numeric_data = numeric_data.fillna(numeric_data.mean())
323
+
324
+ # Standardize the data
325
+ scaler = StandardScaler()
326
+ scaled_data = scaler.fit_transform(numeric_data)
327
+
328
+ # Apply PCA with 2 components
329
+ pca = PCA(n_components=2)
330
+ pca_result = pca.fit_transform(scaled_data)
331
+
332
+ # Create a DataFrame with PCA results
333
+ pca_df = pd.DataFrame(data=pca_result, columns=['PC1', 'PC2'])
334
+
335
+ # If categorical column exists, use it for color
336
+ if categorical_cols:
337
+ cat_col = categorical_cols[0]
338
+ pca_df[cat_col] = df[cat_col].values
339
+ fig = px.scatter(pca_df, x='PC1', y='PC2', color=cat_col,
340
+ title="PCA Visualization")
341
+ else:
342
+ fig = px.scatter(pca_df, x='PC1', y='PC2',
343
+ title="PCA Visualization")
344
+
345
+ variance_ratio = pca.explained_variance_ratio_
346
+ fig.update_layout(
347
+ annotations=[
348
+ dict(
349
+ text=f"PC1 explained variance: {variance_ratio[0]:.2f}",
350
+ showarrow=False,
351
+ x=0.5,
352
+ y=1.05,
353
+ xref="paper",
354
+ yref="paper"
355
+ ),
356
+ dict(
357
+ text=f"PC2 explained variance: {variance_ratio[1]:.2f}",
358
+ showarrow=False,
359
+ x=0.5,
360
+ y=1.02,
361
+ xref="paper",
362
+ yref="paper"
363
+ )
364
+ ]
365
+ )
366
+
367
+ visualizations['pca'] = fig
368
+ print("Generated PCA visualization")
369
+ except Exception as e:
370
+ print(f"Error creating PCA visualization: {e}")
371
+
372
+ except Exception as e:
373
+ print(f"Error in visualization generation: {e}")
374
+
375
+ print(f"Generated {len(visualizations)} visualizations")
376
+
377
+ # If no visualizations were created, add a fallback
378
+ if not visualizations:
379
+ print("No visualizations generated, creating fallback")
380
+ try:
381
+ # Create simple fallback visualization
382
+ fig = go.Figure()
383
+
384
+ # Add a simple scatter plot with random data if needed
385
+ if len(df) > 0:
386
+ fig.add_trace(go.Scatter(
387
+ x=list(range(min(20, len(df)))),
388
+ y=df.iloc[:min(20, len(df)), 0] if len(df.columns) > 0 else list(range(min(20, len(df)))),
389
+ mode='markers',
390
+ name='Fallback Plot'
391
+ ))
392
+ else:
393
+ fig.add_annotation(text="No data to visualize", showarrow=False)
394
+
395
+ fig.update_layout(title="Fallback Visualization")
396
+ visualizations['fallback'] = fig
397
+ except Exception as e:
398
+ print(f"Error creating fallback visualization: {e}")
399
+
400
+ return visualizations
401
+
402
+ def display_analysis(analysis):
403
+ """Format the analysis results for display."""
404
+ if analysis is None:
405
+ return "No analysis available."
406
+
407
+ if isinstance(analysis, str): # Error message
408
+ return analysis
409
+
410
+ # Format analysis as HTML
411
+ html = "<h2>Data Analysis</h2>"
412
+
413
+ # Basic info
414
+ html += f"<p><strong>Shape:</strong> {analysis['Shape'][0]} rows, {analysis['Shape'][1]} columns</p>"
415
+ html += f"<p><strong>Columns:</strong> {', '.join(analysis['Columns'])}</p>"
416
+
417
+ # Missing values
418
+ html += "<h3>Missing Values</h3>"
419
+ if isinstance(analysis['Missing Values'], str):
420
+ html += f"<p>{analysis['Missing Values']}</p>"
421
+ else:
422
+ html += "<ul>"
423
+ for col, count in analysis['Missing Values'].items():
424
+ html += f"<li>{col}: {count}</li>"
425
+ html += "</ul>"
426
+
427
+ # Data quality issues
428
+ if 'Data Quality Issues' in analysis and analysis['Data Quality Issues']:
429
+ html += "<h3>Data Quality Issues</h3>"
430
+ for issue_type, issue_details in analysis['Data Quality Issues'].items():
431
+ html += f"<h4>{issue_type}</h4>"
432
+ if isinstance(issue_details, dict):
433
+ html += "<ul>"
434
+ for key, value in issue_details.items():
435
+ html += f"<li>{key}: {value}</li>"
436
+ html += "</ul>"
437
+ else:
438
+ html += f"<p>{issue_details}</p>"
439
+
440
+ # Outliers
441
+ if 'Outliers' in analysis and analysis['Outliers']:
442
+ html += "<h3>Outliers Detected</h3>"
443
+ html += "<ul>"
444
+ for col, details in analysis['Outliers'].items():
445
+ html += f"<li><strong>{col}:</strong> {details['count']} outliers ({details['percentage']})<br>"
446
+ html += f"Values outside range: [{details['lower_bound']:.2f}, {details['upper_bound']:.2f}]</li>"
447
+ html += "</ul>"
448
+
449
+ # Statistics for numeric columns
450
+ if 'Statistics' in analysis:
451
+ html += "<h3>Numeric Statistics</h3>"
452
+ html += analysis['Statistics']
453
+
454
+ # Categorical columns info
455
+ if 'Category Counts' in analysis:
456
+ html += "<h3>Categorical Data (Top Values)</h3>"
457
+ for col, counts in analysis['Category Counts'].items():
458
+ html += f"<h4>{col}</h4><ul>"
459
+ for val, count in counts.items():
460
+ html += f"<li>{val}: {count}</li>"
461
+ html += "</ul>"
462
+
463
+ return html
464
 
465
  def simple_process_file(file):
466
  """Simplified version without AI models for testing"""
467
  # Read the file
468
  df = read_file(file)
469
 
470
+ if isinstance(df, str): # Error message
471
  return df, None, None, None
472
 
473
  # Analyze data
 
509
  if file is None:
510
  return "Please upload a file to begin analysis.", None, None, None
511
 
512
+ print(f"Processing file in demo_ui: {file.name if hasattr(file, 'name') else 'unknown'}")
513
+
514
  # Process the file
515
  analysis, visualizations, cleaning_recommendations, analysis_insights = simple_process_file(file)
516
 
517
+ if isinstance(analysis, str): # Error message
518
+ print(f"Error in analysis: {analysis}")
519
+ return analysis, None, None, None
520
+
521
  # Format analysis for display
522
  analysis_html = display_analysis(analysis)
523
 
524
  # Prepare visualizations for display
525
  viz_html = ""
526
  if visualizations and not isinstance(visualizations, str):
527
+ print(f"Processing {len(visualizations)} visualizations for display")
528
  for viz_name, fig in visualizations.items():
529
+ try:
530
+ # For debugging, print visualization object info
531
+ print(f"Visualization {viz_name}: type={type(fig)}")
532
+
533
+ # Convert plotly figure to HTML
534
+ html_content = fig.to_html(full_html=False, include_plotlyjs="cdn")
535
+ print(f"Generated HTML for {viz_name}, length: {len(html_content)}")
536
+
537
+ viz_html += f'<div style="margin-bottom: 30px;">{html_content}</div>'
538
+ print(f"Added visualization: {viz_name}")
539
+ except Exception as e:
540
+ print(f"Error rendering visualization {viz_name}: {e}")
541
+ else:
542
+ print(f"No visualizations to display: {visualizations}")
543
+ viz_html = "<p>No visualizations could be generated for this dataset.</p>"
544
 
545
  # Combine analysis and visualizations
546
  result_html = f"""
 
553
 
554
  return result_html, visualizations, cleaning_recommendations, analysis_insights
555
 
556
+ def test_visualization():
557
+ """Create a simple test visualization to verify plotly is working."""
558
+ import plotly.express as px
559
+ import numpy as np
560
+
561
+ # Create sample data
562
+ x = np.random.rand(100)
563
+ y = np.random.rand(100)
564
+
565
+ # Create a simple scatter plot
566
+ fig = px.scatter(x=x, y=y, title="Test Plot")
567
+
568
+ # Convert to HTML
569
+ html = fig.to_html(full_html=False, include_plotlyjs="cdn")
570
+
571
+ return html
572
+
573
  # Create Gradio interface for demo mode
574
  with gr.Blocks(title="Data Visualization & Cleaning AI (Demo Mode)") as demo:
575
  gr.Markdown("# Data Visualization & Cleaning AI")
 
578
  with gr.Row():
579
  file_input = gr.File(label="Upload Data File")
580
 
581
+ # Add test visualization to verify Plotly is working
582
+ test_viz_html = test_visualization()
583
+ gr.HTML(f"<details><summary>Plotly Test (Click to expand)</summary>{test_viz_html}</details>", visible=True)
584
+
585
  with gr.Tabs():
586
  with gr.TabItem("Data Analysis"):
587
  with gr.Row():