Karthik1610 commited on
Commit
ffa226d
·
verified ·
1 Parent(s): 74ff2c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +299 -237
app.py CHANGED
@@ -1,292 +1,354 @@
1
  import os
2
- import time
3
  import json
4
- import gradio as gr
5
- import pandas as pd
6
- from typing import List, Optional, Dict, Any
 
 
7
  from datasets import load_dataset
8
- from huggingface_hub import InferenceClient
9
  import evaluate
 
 
 
 
 
10
 
11
  # Load evaluation metrics
12
  rouge = evaluate.load("rouge")
13
  sacrebleu = evaluate.load("sacrebleu")
14
 
15
- # Model configurations
16
- MODELS = {
17
- "sentiment": [
18
- "distilbert-base-uncased-finetuned-sst-2-english",
19
- "cardiffnlp/twitter-roberta-base-sentiment-latest"
20
- ],
21
- "summarization": [
22
- "facebook/bart-large-cnn",
23
- "google/pegasus-xsum"
24
- ],
25
- "translation": [
26
- "Helsinki-NLP/opus-mt-en-fr",
27
- "facebook/m2m100_418M"
28
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  }
30
 
31
- def validate_token(token: str) -> bool:
32
- """Validate HF token format"""
33
- return token and token.strip().startswith("hf_")
34
-
35
- def accuracy_score(predictions: List[str], labels: List[str]) -> float:
36
- """Calculate accuracy without sklearn"""
37
- if len(predictions) != len(labels):
38
- return 0.0
39
- correct = sum(1 for p, l in zip(predictions, labels) if str(p).lower() == str(l).lower())
40
- return correct / len(labels) if labels else 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- def run_inference(model_id: str, texts: List[str], task: str, token: str) -> List[Dict]:
43
  """Run inference using HF Inference API"""
44
  client = InferenceClient(model=model_id, token=token)
45
  results = []
 
 
 
46
 
47
  for text in texts:
48
  try:
49
- if task == "sentiment":
50
  result = client.text_classification(text)
51
  results.append(result[0] if isinstance(result, list) else result)
52
  elif task == "summarization":
53
- result = client.summarization(text, max_length=150)
54
  results.append(result)
55
  elif task == "translation":
56
- result = client.translation(text, src_lang="en", tgt_lang="fr")
57
  results.append(result)
58
  else:
59
  results.append({"error": "Unsupported task"})
60
  except Exception as e:
61
  results.append({"error": str(e)})
62
 
63
- return results
 
 
 
64
 
65
- def compute_metrics(task: str, predictions: List[Dict], references: Optional[List[str]] = None) -> Dict[str, float]:
66
  """Compute task-specific metrics"""
67
- metrics = {}
 
 
 
68
 
69
- if task == "sentiment" and references:
70
- pred_labels = []
71
- for pred in predictions:
72
- if isinstance(pred, dict) and "label" in pred:
73
- pred_labels.append(pred["label"])
74
- else:
75
- pred_labels.append("UNKNOWN")
76
-
77
- metrics["accuracy"] = accuracy_score(pred_labels, references)
78
-
79
- elif task == "summarization" and references:
80
- pred_texts = []
81
- for pred in predictions:
82
- if isinstance(pred, dict) and "summary_text" in pred:
83
- pred_texts.append(pred["summary_text"])
84
- else:
85
- pred_texts.append("")
86
-
87
  rouge_scores = rouge.compute(predictions=pred_texts, references=references)
88
- metrics.update(rouge_scores)
89
 
90
- elif task == "translation" and references:
91
- pred_texts = []
92
- for pred in predictions:
93
- if isinstance(pred, dict) and "translation_text" in pred:
94
- pred_texts.append(pred["translation_text"])
95
- else:
96
- pred_texts.append("")
97
-
98
- bleu_scores = sacrebleu.compute(predictions=pred_texts, references=[[ref] for ref in references])
99
- metrics.update(bleu_scores)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
- return metrics
 
102
 
103
- def benchmark_models(
104
- hf_token: str,
105
- task: str,
106
- selected_models: List[str],
107
- dataset_name: str,
108
- sample_size: int,
109
- text_column: str,
110
- label_column: str
111
- ):
112
- """Main benchmarking function"""
113
 
114
- # Validate token
115
- if not validate_token(hf_token):
116
- return "❌ Invalid HuggingFace token. Please provide a token starting with 'hf_'", "", ""
117
 
118
- if not selected_models:
119
- return " Please select at least one model", "", ""
120
 
121
  try:
122
- # Load dataset
123
- dataset = load_dataset(dataset_name, split="test")
124
- if sample_size > 0:
125
- dataset = dataset.select(range(min(sample_size, len(dataset))))
126
 
127
- df = dataset.to_pandas()
 
128
 
129
- if text_column not in df.columns:
130
- return f"❌ Text column '{text_column}' not found in dataset", "", ""
131
-
132
- texts = df[text_column].astype(str).tolist()
133
- references = df[label_column].tolist() if label_column in df.columns else None
134
-
135
- # Results storage
136
- all_results = []
137
- detailed_results = {"text": texts}
138
-
139
- # Run benchmarks
140
- for model_id in selected_models:
141
- print(f"Running inference with {model_id}...")
142
-
143
- start_time = time.time()
144
- predictions = run_inference(model_id, texts, task, hf_token)
145
- inference_time = time.time() - start_time
146
-
147
- # Compute metrics
148
- metrics = compute_metrics(task, predictions, references)
149
- metrics["model"] = model_id
150
- metrics["inference_time"] = round(inference_time, 2)
151
- metrics["samples"] = len(texts)
152
-
153
- all_results.append(metrics)
154
-
155
- # Store predictions for detailed view
156
- pred_texts = []
157
- for pred in predictions:
158
- if isinstance(pred, dict):
159
- if "label" in pred:
160
- pred_texts.append(pred["label"])
161
- elif "summary_text" in pred:
162
- pred_texts.append(pred["summary_text"])
163
- elif "translation_text" in pred:
164
- pred_texts.append(pred["translation_text"])
165
- else:
166
- pred_texts.append(str(pred))
167
- else:
168
- pred_texts.append(str(pred))
169
-
170
- detailed_results[model_id] = pred_texts
171
-
172
- # Create results DataFrames
173
- results_df = pd.DataFrame(all_results)
174
- detailed_df = pd.DataFrame(detailed_results)
175
-
176
- # Format results for display
177
- results_str = "📊 **Benchmark Results:**\n\n"
178
- results_str += results_df.to_string(index=False)
179
-
180
- detailed_str = "🔍 **Detailed Predictions (first 10 samples):**\n\n"
181
- detailed_str += detailed_df.head(10).to_string(index=False)
182
-
183
- # Create summary
184
- summary = f"✅ **Benchmark Complete!**\n\n"
185
- summary += f"**Task:** {task}\n"
186
- summary += f"**Dataset:** {dataset_name}\n"
187
- summary += f"**Models tested:** {len(selected_models)}\n"
188
- summary += f"**Samples processed:** {len(texts)}\n"
189
- summary += f"**Total time:** {sum(r['inference_time'] for r in all_results):.2f}s\n"
190
-
191
- return summary, results_str, detailed_str
192
-
193
- except Exception as e:
194
- return f"❌ Error: {str(e)}", "", ""
195
-
196
- # Create Gradio interface
197
- def create_interface():
198
- with gr.Blocks(title="AI Model Benchmark Hub") as demo:
199
- gr.Markdown("# 🧪 AI Model Benchmark Hub")
200
- gr.Markdown("Compare AI models on various tasks using HuggingFace Inference API")
201
-
202
- with gr.Row():
203
- with gr.Column():
204
- gr.Markdown("### 🔑 Authentication")
205
- hf_token = gr.Textbox(
206
- label="HuggingFace Token",
207
- type="password",
208
- placeholder="hf_...",
209
- info="Get your token from https://huggingface.co/settings/tokens"
210
- )
211
 
212
- gr.Markdown("### 📋 Task Selection")
213
- task = gr.Dropdown(
214
- choices=["sentiment", "summarization", "translation"],
215
- label="Task",
216
- value="sentiment"
217
- )
218
-
219
- model_choices = gr.CheckboxGroup(
220
- choices=MODELS["sentiment"],
221
- label="Select Models",
222
- value=[MODELS["sentiment"][0]]
223
- )
224
 
225
- def update_models(selected_task):
226
- return gr.update(choices=MODELS[selected_task], value=[MODELS[selected_task][0]])
 
227
 
228
- task.change(update_models, inputs=[task], outputs=[model_choices])
229
-
230
- with gr.Column():
231
- gr.Markdown("### 📊 Dataset Configuration")
232
- dataset_name = gr.Textbox(
233
- label="Dataset Name",
234
- value="imdb",
235
- placeholder="e.g., imdb, amazon_reviews_multi"
236
  )
237
 
238
- sample_size = gr.Slider(
239
- minimum=10,
240
- maximum=1000,
241
- value=50,
242
- step=10,
243
- label="Sample Size"
244
- )
245
 
246
- text_column = gr.Textbox(
247
- label="Text Column Name",
248
- value="text",
249
- placeholder="e.g., text, review, sentence"
250
- )
 
 
251
 
252
- label_column = gr.Textbox(
253
- label="Label Column Name (optional)",
254
- value="label",
255
- placeholder="e.g., label, sentiment, rating"
256
- )
 
 
 
 
 
 
 
 
257
 
258
- run_btn = gr.Button("🚀 Run Benchmark", variant="primary", size="lg")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
- gr.Markdown("---")
 
 
 
 
 
 
 
 
261
 
262
- with gr.Row():
263
- with gr.Column():
264
- summary_output = gr.Markdown(label="Summary")
265
-
266
- with gr.Row():
267
- with gr.Column():
268
- results_output = gr.Markdown(label="Results")
269
- with gr.Column():
270
- detailed_output = gr.Markdown(label="Detailed Output")
271
 
272
- # Connect the interface
273
- run_btn.click(
274
- benchmark_models,
275
- inputs=[
276
- hf_token,
277
- task,
278
- model_choices,
279
- dataset_name,
280
- sample_size,
281
- text_column,
282
- label_column
283
- ],
284
- outputs=[summary_output, results_output, detailed_output]
285
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
 
287
- return demo
 
 
 
 
 
288
 
289
- # Launch the app
290
- if __name__ == "__main__":
291
- app = create_interface()
292
- app.launch()
 
1
  import os
 
2
  import json
3
+ import time
4
+ import yaml
5
+ from datetime import datetime
6
+ from flask import Flask, render_template, request, jsonify, send_file
7
+ from huggingface_hub import InferenceClient, ModelCard, model_info
8
  from datasets import load_dataset
9
+ import pandas as pd
10
  import evaluate
11
+ from typing import Dict, List, Any, Optional
12
+ import io
13
+ import zipfile
14
+
15
+ app = Flask(__name__)
16
 
17
  # Load evaluation metrics
18
  rouge = evaluate.load("rouge")
19
  sacrebleu = evaluate.load("sacrebleu")
20
 
21
+ # Benchmark packs (manifests)
22
+ BENCHMARK_PACKS = {
23
+ "sentiment": {
24
+ "name": "Sentiment Analysis Pack",
25
+ "datasets": [
26
+ {"id": "imdb", "split": "test", "text_col": "text", "label_col": "label", "sample_size": 100},
27
+ {"id": "emotion", "split": "test", "text_col": "text", "label_col": "label", "sample_size": 100}
28
+ ],
29
+ "metrics": ["accuracy", "f1_macro"],
30
+ "params": {"max_new_tokens": 32, "temperature": 0.1}
31
+ },
32
+ "summarization": {
33
+ "name": "Text Summarization Pack",
34
+ "datasets": [
35
+ {"id": "cnn_dailymail", "config": "3.0.0", "split": "test", "text_col": "article", "label_col": "highlights", "sample_size": 50},
36
+ {"id": "xsum", "split": "test", "text_col": "document", "label_col": "summary", "sample_size": 50}
37
+ ],
38
+ "metrics": ["rouge1", "rouge2", "rougeL"],
39
+ "params": {"max_new_tokens": 150, "temperature": 0.3}
40
+ },
41
+ "translation": {
42
+ "name": "EN→FR Translation Pack",
43
+ "datasets": [
44
+ {"id": "wmt14", "config": "fr-en", "split": "test", "text_col": "translation.en", "label_col": "translation.fr", "sample_size": 50}
45
+ ],
46
+ "metrics": ["sacrebleu", "chrf"],
47
+ "params": {"max_new_tokens": 200, "temperature": 0.1}
48
+ }
49
  }
50
 
51
+ def lint_model(model_id: str, token: str = None) -> Dict[str, Any]:
52
+ """Import and lint a HuggingFace model"""
53
+ try:
54
+ # Get model info
55
+ info = model_info(model_id, token=token)
56
+
57
+ # Get model card
58
+ try:
59
+ card = ModelCard.load(model_id, token=token)
60
+ card_data = card.data.to_dict() if hasattr(card, 'data') else {}
61
+ except:
62
+ card_data = {}
63
+
64
+ # Lint checks
65
+ checks = {
66
+ "pipeline_tag": bool(info.pipeline_tag),
67
+ "license": bool(card_data.get("license")),
68
+ "model_card": bool(card.content if 'card' in locals() else False),
69
+ "tags": bool(info.tags),
70
+ "language": bool(card_data.get("language")),
71
+ "datasets": bool(card_data.get("datasets")),
72
+ "metrics": bool(card_data.get("metrics")),
73
+ "intended_use": "intended use" in (card.content.lower() if 'card' in locals() and card.content else ""),
74
+ "limitations": "limitation" in (card.content.lower() if 'card' in locals() and card.content else ""),
75
+ "bias_risks": any(word in (card.content.lower() if 'card' in locals() and card.content else "")
76
+ for word in ["bias", "fairness", "risk"])
77
+ }
78
+
79
+ # Calculate readiness score
80
+ score = sum(checks.values()) / len(checks) * 100
81
+
82
+ # Generate recommendations
83
+ recommendations = []
84
+ if not checks["license"]: recommendations.append("Add license information")
85
+ if not checks["model_card"]: recommendations.append("Add detailed model card")
86
+ if not checks["intended_use"]: recommendations.append("Specify intended use cases")
87
+ if not checks["limitations"]: recommendations.append("Document known limitations")
88
+ if not checks["bias_risks"]: recommendations.append("Address bias and safety considerations")
89
+
90
+ return {
91
+ "model_id": model_id,
92
+ "task": info.pipeline_tag,
93
+ "readiness_score": round(score),
94
+ "checks": checks,
95
+ "recommendations": recommendations,
96
+ "downloads": info.downloads or 0,
97
+ "likes": info.likes or 0,
98
+ "created_at": info.created_at.isoformat() if info.created_at else None,
99
+ "library_name": info.library_name
100
+ }
101
+
102
+ except Exception as e:
103
+ return {"error": str(e)}
104
 
105
+ def run_inference(model_id: str, texts: List[str], task: str, token: str, params: Dict = None) -> List[Dict]:
106
  """Run inference using HF Inference API"""
107
  client = InferenceClient(model=model_id, token=token)
108
  results = []
109
+ params = params or {}
110
+
111
+ start_time = time.time()
112
 
113
  for text in texts:
114
  try:
115
+ if task == "text-classification":
116
  result = client.text_classification(text)
117
  results.append(result[0] if isinstance(result, list) else result)
118
  elif task == "summarization":
119
+ result = client.summarization(text, **params)
120
  results.append(result)
121
  elif task == "translation":
122
+ result = client.translation(text, **params)
123
  results.append(result)
124
  else:
125
  results.append({"error": "Unsupported task"})
126
  except Exception as e:
127
  results.append({"error": str(e)})
128
 
129
+ total_time = time.time() - start_time
130
+ avg_latency = total_time / len(texts) if texts else 0
131
+
132
+ return results, avg_latency
133
 
134
+ def compute_metrics(task: str, predictions: List[Dict], references: List[str]) -> Dict[str, float]:
135
  """Compute task-specific metrics"""
136
+ if task == "text-classification":
137
+ pred_labels = [p.get("label", "UNKNOWN") if isinstance(p, dict) else "UNKNOWN" for p in predictions]
138
+ accuracy = sum(1 for p, r in zip(pred_labels, references) if str(p).lower() == str(r).lower()) / len(references)
139
+ return {"accuracy": round(accuracy, 4)}
140
 
141
+ elif task == "summarization":
142
+ pred_texts = [p.get("summary_text", "") if isinstance(p, dict) else "" for p in predictions]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  rouge_scores = rouge.compute(predictions=pred_texts, references=references)
144
+ return {k: round(v, 4) for k, v in rouge_scores.items()}
145
 
146
+ elif task == "translation":
147
+ pred_texts = [p.get("translation_text", "") if isinstance(p, dict) else "" for p in predictions]
148
+ bleu_scores = sacrebleu.compute(predictions=pred_texts, references=[[r] for r in references])
149
+ return {k: round(v, 4) for k, v in bleu_scores.items()}
150
+
151
+ return {}
152
+
153
+ def generate_readme_section(results: Dict) -> str:
154
+ """Generate README section for model"""
155
+ readme = f"""## Benchmark Results
156
+
157
+ **Evaluated on:** {datetime.now().strftime('%Y-%m-%d')}
158
+ **Task:** {results['task']}
159
+ **Readiness Score:** {results['readiness_score']}/100
160
+
161
+ ### Performance Metrics
162
+ """
163
+
164
+ for dataset_result in results.get('benchmark_results', []):
165
+ readme += f"\n**Dataset:** {dataset_result['dataset']}\n"
166
+ for metric, value in dataset_result['metrics'].items():
167
+ readme += f"- {metric}: {value}\n"
168
+ readme += f"- Average Latency: {dataset_result['avg_latency']:.3f}s\n"
169
+
170
+ readme += f"""
171
+ ### Quick Start
172
+ ```python
173
+ from transformers import pipeline
174
+ classifier = pipeline("text-classification", model="{results['model_id']}")
175
+ result = classifier("Your text here")
176
+ ```
177
+
178
+ *Benchmarked with [Clarifai Community Bench](https://huggingface.co/spaces/your-space)*
179
+ """
180
+ return readme
181
+
182
+ @app.route('/')
183
+ def index():
184
+ return render_template('index.html', benchmark_packs=BENCHMARK_PACKS)
185
+
186
+ @app.route('/api/lint-model', methods=['POST'])
187
+ def api_lint_model():
188
+ data = request.json
189
+ model_id = data.get('model_id')
190
+ token = data.get('token')
191
+
192
+ if not model_id:
193
+ return jsonify({"error": "Model ID is required"}), 400
194
 
195
+ result = lint_model(model_id, token)
196
+ return jsonify(result)
197
 
198
+ @app.route('/api/run-benchmark', methods=['POST'])
199
+ def api_run_benchmark():
200
+ data = request.json
201
+ model_id = data.get('model_id')
202
+ pack_name = data.get('pack')
203
+ token = data.get('token')
 
 
 
 
204
 
205
+ if not all([model_id, pack_name, token]):
206
+ return jsonify({"error": "Missing required parameters"}), 400
 
207
 
208
+ if pack_name not in BENCHMARK_PACKS:
209
+ return jsonify({"error": "Invalid benchmark pack"}), 400
210
 
211
  try:
212
+ # First lint the model
213
+ lint_result = lint_model(model_id, token)
214
+ if "error" in lint_result:
215
+ return jsonify(lint_result), 400
216
 
217
+ pack = BENCHMARK_PACKS[pack_name]
218
+ benchmark_results = []
219
 
220
+ # Run benchmark on each dataset in the pack
221
+ for dataset_config in pack['datasets']:
222
+ try:
223
+ # Load dataset
224
+ ds_params = {"path": dataset_config['id']}
225
+ if dataset_config.get('config'):
226
+ ds_params['name'] = dataset_config['config']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
+ dataset = load_dataset(**ds_params, split=dataset_config['split'])
229
+ sample_size = min(dataset_config['sample_size'], len(dataset))
230
+ dataset = dataset.select(range(sample_size))
 
 
 
 
 
 
 
 
 
231
 
232
+ # Extract text and references
233
+ texts = [item[dataset_config['text_col']] for item in dataset]
234
+ references = [item[dataset_config['label_col']] for item in dataset] if dataset_config.get('label_col') else None
235
 
236
+ # Run inference
237
+ predictions, avg_latency = run_inference(
238
+ model_id, texts, lint_result['task'], token, pack['params']
 
 
 
 
 
239
  )
240
 
241
+ # Compute metrics
242
+ metrics = compute_metrics(lint_result['task'], predictions, references) if references else {}
 
 
 
 
 
243
 
244
+ benchmark_results.append({
245
+ "dataset": dataset_config['id'],
246
+ "samples": len(texts),
247
+ "metrics": metrics,
248
+ "avg_latency": round(avg_latency, 3),
249
+ "predictions": predictions[:5] # First 5 for preview
250
+ })
251
 
252
+ except Exception as e:
253
+ benchmark_results.append({
254
+ "dataset": dataset_config['id'],
255
+ "error": str(e)
256
+ })
257
+
258
+ # Combine results
259
+ result = {
260
+ **lint_result,
261
+ "benchmark_results": benchmark_results,
262
+ "pack_name": pack['name'],
263
+ "timestamp": datetime.now().isoformat()
264
+ }
265
 
266
+ return jsonify(result)
267
+
268
+ except Exception as e:
269
+ return jsonify({"error": str(e)}), 500
270
+
271
+ @app.route('/api/generate-readme', methods=['POST'])
272
+ def api_generate_readme():
273
+ data = request.json
274
+ readme_content = generate_readme_section(data)
275
+ return jsonify({"readme": readme_content})
276
+
277
+ @app.route('/api/export-artifacts', methods=['POST'])
278
+ def api_export_artifacts():
279
+ data = request.json
280
+
281
+ # Create ZIP file in memory
282
+ zip_buffer = io.BytesIO()
283
+
284
+ with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
285
+ # Add benchmark results as JSON
286
+ zip_file.writestr('benchmark_results.json', json.dumps(data, indent=2))
287
 
288
+ # Add YAML manifest
289
+ manifest = {
290
+ 'model_id': data.get('model_id'),
291
+ 'task': data.get('task'),
292
+ 'benchmark_pack': data.get('pack_name'),
293
+ 'results': data.get('benchmark_results'),
294
+ 'timestamp': data.get('timestamp')
295
+ }
296
+ zip_file.writestr('manifest.yaml', yaml.dump(manifest, default_flow_style=False))
297
 
298
+ # Add README section
299
+ readme_content = generate_readme_section(data)
300
+ zip_file.writestr('README_section.md', readme_content)
 
 
 
 
 
 
301
 
302
+ # Add Python utility script
303
+ python_script = f'''
304
+ """
305
+ Model Registration Utility
306
+ Generated by Clarifai Community Bench
307
+ """
308
+
309
+ import json
310
+ from datetime import datetime
311
+
312
+ class ModelArtifact:
313
+ def __init__(self, manifest_path="manifest.yaml"):
314
+ with open(manifest_path, 'r') as f:
315
+ import yaml
316
+ self.manifest = yaml.safe_load(f)
317
+
318
+ def get_model_info(self):
319
+ return {{
320
+ "id": self.manifest["model_id"],
321
+ "task": self.manifest["task"],
322
+ "readiness_score": self.manifest.get("readiness_score", 0),
323
+ "avg_latency": self._calculate_avg_latency(),
324
+ "best_dataset": self._get_best_performing_dataset()
325
+ }}
326
+
327
+ def _calculate_avg_latency(self):
328
+ results = self.manifest.get("results", [])
329
+ if not results:
330
+ return None
331
+ latencies = [r.get("avg_latency", 0) for r in results if "avg_latency" in r]
332
+ return sum(latencies) / len(latencies) if latencies else None
333
+
334
+ def _get_best_performing_dataset(self):
335
+ # Implementation depends on task-specific metrics
336
+ return self.manifest.get("results", [{}])[0].get("dataset")
337
+
338
+ # Usage example:
339
+ # artifact = ModelArtifact()
340
+ # print(artifact.get_model_info())
341
+ '''
342
+ zip_file.writestr('model_utility.py', python_script)
343
+
344
+ zip_buffer.seek(0)
345
 
346
+ return send_file(
347
+ io.BytesIO(zip_buffer.read()),
348
+ mimetype='application/zip',
349
+ as_attachment=True,
350
+ download_name=f'{data.get("model_id", "model").replace("/", "_")}_artifacts.zip'
351
+ )
352
 
353
+ if __name__ == '__main__':
354
+ app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860)), debug=False)