n0w0f commited on
Commit
dc2eaff
Β·
1 Parent(s): 7762410

simplify and unpin

Browse files
Files changed (2) hide show
  1. app.py +268 -405
  2. requirements.txt +5 -5
app.py CHANGED
@@ -1,44 +1,87 @@
1
  import datetime
2
  import os
3
  import re
4
-
5
- import gradio as gr
6
- import pandas as pd
7
  import yaml
 
 
 
8
 
9
  # Constants
10
- EVAL_CARDS_DIR = "eval_cards"
11
- TEMPLATE_PATH = "template.yaml"
12
-
13
-
14
- # Ensure the eval cards directory exists
15
- os.makedirs(EVAL_CARDS_DIR, exist_ok=True)
16
-
17
- # Copy the template to the appropriate location
18
- with open("template.yaml", "w") as f:
19
- with open("yaml_template.yaml", "r") as template_file:
20
- f.write(template_file.read())
21
 
22
 
23
- def load_template():
24
- """Load the YAML template"""
25
- with open(TEMPLATE_PATH, "r") as file:
26
- return file.read()
 
 
 
 
 
 
27
 
28
 
29
- def yaml_to_dict(yaml_str):
30
- """Convert YAML string to Python dictionary"""
31
  try:
32
- return yaml.safe_load(yaml_str)
33
- except yaml.YAMLError as e:
34
- return {"error": str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
 
37
  def compute_coverage_score(eval_data):
38
- """
39
- Compute a coverage score for the eval card
40
- Returns a score from 0-100 and a breakdown of coverage by section
41
- """
42
  sections = {
43
  "metadata": 5,
44
  "evaluation_design": 10,
@@ -51,32 +94,23 @@ def compute_coverage_score(eval_data):
51
  "citation_and_usage": 5,
52
  }
53
 
54
- scores = {}
55
- total_score = 0
56
-
57
- def count_filled_fields(data, prefix=""):
58
  if isinstance(data, dict):
59
- filled = 0
60
- total = 0
61
- for key, value in data.items():
62
  if isinstance(value, (dict, list)):
63
- sub_filled, sub_total = count_filled_fields(
64
- value, f"{prefix}.{key}" if prefix else key
65
- )
66
  filled += sub_filled
67
  total += sub_total
68
  else:
69
  total += 1
70
- if value and not (
71
- isinstance(value, str) and value.strip() in ["", "[]", "{}"]
72
- ):
73
  filled += 1
74
  return filled, total
75
  elif isinstance(data, list):
76
  if not data:
77
  return 0, 1
78
- filled = 0
79
- total = 0
80
  for item in data:
81
  sub_filled, sub_total = count_filled_fields(item)
82
  filled += sub_filled
@@ -85,426 +119,255 @@ def compute_coverage_score(eval_data):
85
  else:
86
  return 1 if data else 0, 1
87
 
88
- # Compute scores for each section
89
  for section, weight in sections.items():
90
  if section in eval_data:
91
  filled, total = count_filled_fields(eval_data[section])
92
  completion_rate = filled / total if total > 0 else 0
93
- scores[section] = {
94
- "score": round(completion_rate * weight, 2),
95
- "max_score": weight,
96
- "completion_rate": round(completion_rate * 100, 2),
97
- "fields_filled": filled,
98
- "fields_total": total,
99
- }
100
- total_score += scores[section]["score"]
101
- else:
102
- scores[section] = {
103
- "score": 0,
104
- "max_score": weight,
105
- "completion_rate": 0,
106
- "fields_filled": 0,
107
- "fields_total": 0,
108
- }
109
 
110
- return min(round(total_score, 2), 100), scores
111
 
112
 
113
- def get_llm_feedback(yaml_content, api_token=None):
114
- """
115
- Get feedback on the eval card from Groq's LLM
116
- Uses GROQ_API_KEY from environment variables if no token is provided
117
- """
118
- import os
119
 
120
- import requests
121
- from dotenv import load_dotenv
122
 
123
- # Load environment variables from .env file if it exists
124
- load_dotenv()
 
 
125
 
126
- # Use provided token or get from environment
127
- api_token = api_token or os.environ.get("GROQ_API_KEY")
 
 
 
128
 
129
- if not api_token:
130
- return "API token is required for LLM feedback. Please set the GROQ_API_KEY environment variable or provide a token."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
- try:
133
- headers = {
134
- "Content-Type": "application/json",
135
- "Authorization": f"Bearer {api_token}",
136
- }
137
-
138
- prompt = f"""
139
- I'm reviewing an Evaluation Card in YAML format. Please analyze it for completeness,
140
- consistency, and clarity. Provide specific recommendations for improvement.
141
-
142
- Focus on:
143
- 1. Sections that need more detail
144
- 2. Inconsistencies or contradictions
145
- 3. Clarity of language and explanations
146
- 4. Alignment with best practices for ML evaluation
147
-
148
- Here's the YAML content:
149
-
150
- ```yaml
151
- {yaml_content}
152
- ```
153
-
154
- Provide your feedback in a structured format with specific, actionable recommendations.
155
- """
156
-
157
- payload = {
158
- "model": "llama-3.3-70b-versatile", # or another groq supported model
159
- "messages": [{"role": "user", "content": prompt}],
160
- }
161
 
162
- response = requests.post(
163
- "https://api.groq.com/openai/v1/chat/completions",
164
- headers=headers,
165
- json=payload,
 
 
 
 
166
  )
 
 
 
 
 
167
 
168
- if response.status_code == 200:
169
- return response.json()["choices"][0]["message"]["content"]
170
- else:
171
- return f"Error getting Groq LLM feedback: {response.status_code} - {response.text}"
172
 
173
  except Exception as e:
174
- return f"Error getting Groq LLM feedback: {str(e)}"
175
 
176
 
177
- def save_eval_card(yaml_content, paper_url="", repo_url=""):
178
- """Save an eval card with additional metadata"""
179
  try:
180
- eval_data = yaml.safe_load(yaml_content)
 
181
 
182
- # Add paper and repository links to metadata
183
- if paper_url:
184
- eval_data["metadata"]["paper_link"] = paper_url
185
- if repo_url:
186
- eval_data["metadata"]["repository_link"] = repo_url
187
 
188
- # Update the YAML content with the new metadata
189
- yaml_content = yaml.dump(eval_data)
190
 
191
- filename = re.sub(r"[^\w\-_]", "_", eval_data.get("title", "Unnamed"))
192
- filename = (
193
- f"{filename}_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.yaml"
 
 
 
 
 
 
 
194
  )
195
- file_path = os.path.join(EVAL_CARDS_DIR, filename)
196
 
197
- with open(file_path, "w") as file:
198
- file.write(yaml_content)
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
- return f"Evaluation card saved successfully as {filename}", file_path
201
  except Exception as e:
202
- return f"Error saving evaluation card: {str(e)}", None
203
-
204
-
205
- def load_all_eval_cards():
206
- """Load all eval cards from the repository"""
207
- eval_cards = []
208
-
209
- for filename in os.listdir(EVAL_CARDS_DIR):
210
- if filename.endswith(".yaml"):
211
- file_path = os.path.join(EVAL_CARDS_DIR, filename)
212
- try:
213
- with open(file_path, "r") as file:
214
- yaml_content = file.read()
215
- eval_data = yaml.safe_load(yaml_content)
216
-
217
- # Compute coverage score
218
- score, score_details = compute_coverage_score(eval_data)
219
- score = min(score, 100)
220
-
221
- # Extract key metadata
222
- eval_cards.append(
223
- {
224
- "filename": filename,
225
- "title": eval_data.get("title", "Unnamed Evaluation"),
226
- "summary": eval_data.get("summary", ""),
227
- "authors": ", ".join(
228
- eval_data.get("metadata", {}).get("authors", [])
229
- ),
230
- "creation_date": eval_data.get("metadata", {}).get(
231
- "creation_date", ""
232
- ),
233
- "coverage_score": score,
234
- "score_details": score_details,
235
- "yaml_content": yaml_content,
236
- "data": eval_data,
237
- }
238
- )
239
- except Exception as e:
240
- print(f"Error loading {filename}: {str(e)}")
241
-
242
- return eval_cards
243
-
244
-
245
- def format_eval_card_as_html(eval_card):
246
- """Format an eval card as HTML for display"""
247
- html = f"""
248
- <div style="border: 1px solid #ddd; padding: 15px; margin-bottom: 20px; border-radius: 5px;">
249
- <h3>{eval_card["title"]}</h3>
250
- <p>{eval_card["summary"]}</p>
251
- <p><strong>Authors:</strong> {eval_card["authors"]}</p>
252
- <p><strong>Created:</strong> {eval_card["creation_date"]}</p>
253
-
254
- <!-- Add repository and paper links if available -->
255
- {f'<p><strong>Repository:</strong> <a href="{eval_card["data"]["metadata"].get("repository_link", "")}" target="_blank">{eval_card["data"]["metadata"].get("repository_link", "")}</a></p>' if eval_card["data"]["metadata"].get("repository_link") else ""}
256
- {f'<p><strong>Paper:</strong> <a href="{eval_card["data"]["metadata"].get("paper_link", "")}" target="_blank">{eval_card["data"]["metadata"].get("paper_link", "")}</a></p>' if eval_card["data"]["metadata"].get("paper_link") else ""}
257
-
258
- <p><strong>Coverage Score:</strong> {eval_card["coverage_score"]}%</p>
259
-
260
- <h4>Coverage by Section:</h4>
261
- <table style="width: 100%; border-collapse: collapse;">
262
- <tr>
263
- <th style="text-align: left; padding: 5px; border-bottom: 1px solid #ddd;">Section</th>
264
- <th style="text-align: right; padding: 5px; border-bottom: 1px solid #ddd;">Score</th>
265
- <th style="text-align: right; padding: 5px; border-bottom: 1px solid #ddd;">Completion</th>
266
- </tr>
267
- """
268
-
269
- for section, details in eval_card["score_details"].items():
270
- html += f"""
271
- <tr>
272
- <td style="padding: 5px; border-bottom: 1px solid #eee;">{section}</td>
273
- <td style="text-align: right; padding: 5px; border-bottom: 1px solid #eee;">{details["score"]}/{details["max_score"]}</td>
274
- <td style="text-align: right; padding: 5px; border-bottom: 1px solid #eee;">{details["completion_rate"]}%</td>
275
- </tr>
276
- """
277
-
278
- html += """
279
- </table>
280
- <div style="margin-top: 15px;">
281
- <!-- Additional actions can go here -->
282
- </div>
283
- </div>
284
- """
285
-
286
- return html
287
-
288
-
289
- def create_eval_cards_table(eval_cards):
290
- """Create an HTML table of eval cards"""
291
- if not eval_cards:
292
- return "<p>No evaluation cards found.</p>"
293
-
294
- # Sort by coverage score (highest first)
295
- eval_cards.sort(key=lambda x: x["coverage_score"], reverse=True)
296
-
297
- html = ""
298
- for eval_card in eval_cards:
299
- html += format_eval_card_as_html(eval_card)
300
-
301
- return html
302
-
303
-
304
- def upload_file(file):
305
- """Process an uploaded YAML file"""
306
- if file is None:
307
- return "No file uploaded", None
308
 
309
- try:
310
- yaml_content = file.decode("utf-8")
311
- # Validate YAML
312
- eval_data = yaml.safe_load(yaml_content)
313
- return yaml_content, eval_data
314
- except Exception as e:
315
- return f"Error processing file: {str(e)}", None
316
 
 
 
 
 
 
317
 
318
- def get_feedback(yaml_content):
319
- """Get LLM feedback on the eval card"""
320
- if not yaml_content:
321
- return "Please upload or paste a YAML file first."
322
 
323
- # Use provided token or get from environment
324
- api_token = os.environ.get("GROQ_API_KEY")
325
 
326
- if not api_token:
327
- return (
328
- "Please provide an API token or set the GROQ_API_KEY environment variable."
 
 
 
 
 
 
 
 
 
 
 
 
329
  )
330
 
331
- feedback = get_llm_feedback(yaml_content, api_token)
332
- return feedback
 
 
 
 
 
333
 
334
 
335
- def submit_eval_card(yaml_content, paper_url="", repo_url=""):
336
- """Submit an eval card to the repository"""
337
- if not yaml_content:
338
- return "Please upload or paste a YAML file first.", None, None
339
 
340
  try:
341
- # Validate YAML
342
- eval_data = yaml.safe_load(yaml_content)
 
 
 
 
 
 
343
 
344
- # Compute coverage score
345
- score, score_details = compute_coverage_score(eval_data)
346
 
347
- # Save eval card with URLs
348
- result, file_path = save_eval_card(yaml_content, paper_url, repo_url)
349
 
350
- if file_path:
351
- return (
352
- f"Evaluation card saved successfully! Coverage score: {score}%",
353
- score,
354
- score_details,
355
- )
356
- else:
357
- return f"Error saving evaluation card: {result}", None, None
358
 
359
- except Exception as e:
360
- return f"Error processing evaluation card: {str(e)}", None, None
361
 
362
 
363
  def refresh_gallery():
364
- """Refresh the gallery of eval cards"""
365
- eval_cards = load_all_eval_cards()
366
- html = create_eval_cards_table(eval_cards)
367
-
368
- # Convert data to pandas DataFrame for table view
369
- table_data = []
370
-
371
- # First, count authors across all cards
372
- author_counts = {}
373
- for card in eval_cards:
374
- authors = card["authors"].split(", ")
375
- for author in authors:
376
- if author and author.strip(): # Skip empty authors
377
- if author in author_counts:
378
- author_counts[author] += 1
379
- else:
380
- author_counts[author] = 1
381
-
382
- # Get top authors
383
- top_authors = sorted(author_counts.items(), key=lambda x: x[1], reverse=True)[:20]
384
- top_authors = [author for author, count in top_authors]
385
-
386
- # Create table data with one entry per card
387
- for card in eval_cards:
388
- authors = card["authors"].split(", ")
389
- filtered_authors = [author for author in authors if author in top_authors]
390
- table_data.append(
391
- {
392
- "Title": card["title"],
393
- "Authors": ", ".join(filtered_authors),
394
- "Creation Date": card["creation_date"],
395
- "Coverage Score": f"{card['coverage_score']}%",
396
- }
397
- )
398
 
399
- df = pd.DataFrame(table_data)
 
 
 
 
 
 
 
 
 
400
 
401
- return html, df if not df.empty else None
 
 
402
 
 
 
 
 
 
403
 
404
- def handle_upload_tab(file_obj, yaml_text):
405
- """Handle upload tab actions - either use uploaded file or pasted text"""
406
- if file_obj is not None:
407
- yaml_content, eval_data = upload_file(file_obj)
408
- return yaml_content
409
- else:
410
- return yaml_text
411
 
 
412
 
413
- # Create the Gradio interface
414
- with gr.Blocks(title="Evaluation Cards Gallery") as app:
415
- with gr.Row():
416
- with gr.Column(scale=2):
417
- gr.Markdown(
418
- "# Evaluation Cards for Machine Learning in Materials Science. "
419
  )
420
- gr.Markdown("""
421
- Upload your evaluation card in YAML format, get feedback from an LLM, and submit it to the gallery.
422
- checkout the [GitHub repository](https://github.com/lamalab-org/eval-cards) for more information.
423
- """)
424
 
425
- with gr.Tabs():
426
- with gr.TabItem("Upload & Review"):
427
- with gr.Row():
428
- with gr.Column():
429
- file_upload = gr.File(
430
- label="Upload YAML File", file_types=[".yaml", ".yml"]
431
- )
432
-
433
- with gr.Accordion("Or paste YAML content", open=False):
434
- yaml_input = gr.TextArea(
435
- label="YAML Content",
436
- placeholder="Paste your YAML content here...",
437
- lines=10,
438
- )
439
- paper_url_input = gr.Textbox(
440
- label="Paper URL (Optional)",
441
- placeholder="https://arxiv.org/abs/...",
442
- )
443
-
444
- repo_url_input = gr.Textbox(
445
- label="Repository URL (Optional)",
446
- placeholder="https://github.com/...",
447
- )
448
-
449
- load_template_btn = gr.Button("Load Template")
450
-
451
- # api_token = gr.Textbox(label="API Token (for LLM feedback)", type="password")
452
-
453
- with gr.Row():
454
- get_feedback_btn = gr.Button("Get LLM Feedback")
455
- submit_btn = gr.Button(
456
- "Submit Evaluation Card", variant="primary"
457
- )
458
-
459
- with gr.Column():
460
- yaml_display = gr.TextArea(label="Current YAML", lines=20)
461
-
462
- with gr.Accordion("LLM Feedback", open=True):
463
- feedback_display = gr.Markdown()
464
-
465
- with gr.Accordion("Submission Result", open=True):
466
- result_display = gr.Markdown()
467
- coverage_score = gr.Number(
468
- label="Coverage Score", visible=False
469
- )
470
- coverage_details = gr.JSON(
471
- label="Coverage Details", visible=False
472
- )
473
-
474
- with gr.TabItem("Gallery"):
475
- refresh_btn = gr.Button("Refresh Gallery")
476
-
477
- with gr.Tabs():
478
- with gr.TabItem("Card View"):
479
- gallery_html = gr.HTML()
480
-
481
- with gr.TabItem("Table View"):
482
- gallery_table = gr.DataFrame()
483
-
484
- # Set up event handlers
485
- load_template_btn.click(fn=load_template, outputs=[yaml_display])
486
-
487
- file_upload.change(
488
- fn=handle_upload_tab, inputs=[file_upload, yaml_input], outputs=[yaml_display]
489
- )
490
 
491
- yaml_input.change(fn=lambda x: x, inputs=[yaml_input], outputs=[yaml_display])
492
 
493
- get_feedback_btn.click(
494
- fn=get_feedback, inputs=[yaml_display], outputs=[feedback_display]
495
- )
496
 
 
 
 
497
  submit_btn.click(
498
- fn=submit_eval_card,
499
- inputs=[yaml_display, paper_url_input, repo_url_input],
500
- outputs=[result_display, coverage_score, coverage_details],
501
  )
 
502
 
503
- refresh_btn.click(fn=refresh_gallery, outputs=[gallery_html, gallery_table])
504
-
505
- # Initialize the gallery on app start
506
- app.load(fn=refresh_gallery, outputs=[gallery_html, gallery_table])
507
-
508
- # Launch the app
509
  if __name__ == "__main__":
510
- app.launch()
 
1
  import datetime
2
  import os
3
  import re
 
 
 
4
  import yaml
5
+ from datasets import Dataset, load_dataset
6
+ from huggingface_hub import create_repo, login
7
+ import gradio as gr
8
 
9
  # Constants
10
+ DATASET_NAME = "jablonkagroup/eval-cards-dataset"
 
 
 
 
 
 
 
 
 
 
11
 
12
 
13
+ def setup_hf_auth():
14
+ """Setup Hugging Face authentication"""
15
+ try:
16
+ hf_token = os.environ.get("HF_TOKEN")
17
+ if hf_token:
18
+ login(token=hf_token)
19
+ return True
20
+ return False
21
+ except Exception:
22
+ return False
23
 
24
 
25
+ def ensure_dataset_exists():
26
+ """Ensure the dataset repository exists"""
27
  try:
28
+ load_dataset(DATASET_NAME, split="train")
29
+ return True
30
+ except Exception:
31
+ try:
32
+ if not setup_hf_auth():
33
+ return False
34
+ create_repo(
35
+ repo_id=DATASET_NAME, repo_type="dataset", private=False, exist_ok=True
36
+ )
37
+ empty_data = {
38
+ "filename": [],
39
+ "title": [],
40
+ "summary": [],
41
+ "authors": [],
42
+ "creation_date": [],
43
+ "coverage_score": [],
44
+ "yaml_content": [],
45
+ "paper_link": [],
46
+ "repository_link": [],
47
+ "timestamp": [],
48
+ }
49
+ empty_dataset = Dataset.from_dict(empty_data)
50
+ empty_dataset.push_to_hub(DATASET_NAME)
51
+ return True
52
+ except Exception:
53
+ return False
54
+
55
+
56
+ def get_template():
57
+ """Get a basic YAML template"""
58
+ return """title: "Your Evaluation Title"
59
+ summary: "Brief description of your evaluation"
60
+ metadata:
61
+ authors: []
62
+ creation_date: ""
63
+ evaluation_design:
64
+ purpose: ""
65
+ scope: ""
66
+ estimand:
67
+ definition: ""
68
+ estimator:
69
+ method: ""
70
+ estimate:
71
+ results: ""
72
+ results_communication:
73
+ format: ""
74
+ known_issues_and_limitations:
75
+ issues: []
76
+ version_and_maintenance:
77
+ version: "1.0"
78
+ citation_and_usage:
79
+ citation: ""
80
+ """
81
 
82
 
83
  def compute_coverage_score(eval_data):
84
+ """Compute a coverage score for the eval card"""
 
 
 
85
  sections = {
86
  "metadata": 5,
87
  "evaluation_design": 10,
 
94
  "citation_and_usage": 5,
95
  }
96
 
97
+ def count_filled_fields(data):
 
 
 
98
  if isinstance(data, dict):
99
+ filled = total = 0
100
+ for value in data.values():
 
101
  if isinstance(value, (dict, list)):
102
+ sub_filled, sub_total = count_filled_fields(value)
 
 
103
  filled += sub_filled
104
  total += sub_total
105
  else:
106
  total += 1
107
+ if value and str(value).strip() not in ["", "[]", "{}"]:
 
 
108
  filled += 1
109
  return filled, total
110
  elif isinstance(data, list):
111
  if not data:
112
  return 0, 1
113
+ filled = total = 0
 
114
  for item in data:
115
  sub_filled, sub_total = count_filled_fields(item)
116
  filled += sub_filled
 
119
  else:
120
  return 1 if data else 0, 1
121
 
122
+ total_score = 0
123
  for section, weight in sections.items():
124
  if section in eval_data:
125
  filled, total = count_filled_fields(eval_data[section])
126
  completion_rate = filled / total if total > 0 else 0
127
+ total_score += completion_rate * weight
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
+ return min(round(total_score, 2), 100)
130
 
131
 
132
+ def save_eval_card(yaml_content, paper_url="", repo_url=""):
133
+ """Save an eval card to the dataset"""
134
+ try:
135
+ if not setup_hf_auth():
136
+ return "Error: HF_TOKEN not found. Please set your Hugging Face token in Space settings."
 
137
 
138
+ eval_data = yaml.safe_load(yaml_content)
 
139
 
140
+ if paper_url:
141
+ eval_data.setdefault("metadata", {})["paper_link"] = paper_url
142
+ if repo_url:
143
+ eval_data.setdefault("metadata", {})["repository_link"] = repo_url
144
 
145
+ yaml_content = yaml.dump(eval_data)
146
+ filename = re.sub(r"[^\w\-_]", "_", eval_data.get("title", "Unnamed"))
147
+ filename = (
148
+ f"{filename}_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.yaml"
149
+ )
150
 
151
+ try:
152
+ dataset = load_dataset(DATASET_NAME, split="train")
153
+ existing_data = dataset.to_dict()
154
+ except Exception:
155
+ existing_data = {
156
+ "filename": [],
157
+ "title": [],
158
+ "summary": [],
159
+ "authors": [],
160
+ "creation_date": [],
161
+ "coverage_score": [],
162
+ "yaml_content": [],
163
+ "paper_link": [],
164
+ "repository_link": [],
165
+ "timestamp": [],
166
+ }
167
 
168
+ score = compute_coverage_score(eval_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
+ existing_data["filename"].append(filename)
171
+ existing_data["title"].append(eval_data.get("title", "Unnamed"))
172
+ existing_data["summary"].append(eval_data.get("summary", ""))
173
+ existing_data["authors"].append(
174
+ ", ".join(eval_data.get("metadata", {}).get("authors", []))
175
+ )
176
+ existing_data["creation_date"].append(
177
+ eval_data.get("metadata", {}).get("creation_date", "")
178
  )
179
+ existing_data["coverage_score"].append(float(score))
180
+ existing_data["yaml_content"].append(yaml_content)
181
+ existing_data["paper_link"].append(paper_url or "")
182
+ existing_data["repository_link"].append(repo_url or "")
183
+ existing_data["timestamp"].append(datetime.datetime.now().isoformat())
184
 
185
+ updated_dataset = Dataset.from_dict(existing_data)
186
+ updated_dataset.push_to_hub(DATASET_NAME)
187
+
188
+ return f"βœ… Successfully saved! Coverage score: {score}%\nFilename: {filename}"
189
 
190
  except Exception as e:
191
+ return f"❌ Error: {str(e)}"
192
 
193
 
194
+ def load_gallery():
195
+ """Load all evaluation cards from dataset"""
196
  try:
197
+ if not setup_hf_auth():
198
+ return "Please set HF_TOKEN in Space settings to view gallery."
199
 
200
+ if not ensure_dataset_exists():
201
+ return "Dataset not accessible. Please check HF_TOKEN."
 
 
 
202
 
203
+ dataset = load_dataset(DATASET_NAME, split="train")
 
204
 
205
+ if len(dataset) == 0:
206
+ return "No evaluation cards found. Submit your first card!"
207
+
208
+ cards_html = "<h3>πŸ“Š Evaluation Cards Gallery</h3>"
209
+
210
+ # Sort by coverage score
211
+ sorted_indices = sorted(
212
+ range(len(dataset)),
213
+ key=lambda i: dataset[i]["coverage_score"],
214
+ reverse=True,
215
  )
 
216
 
217
+ for i in sorted_indices[:10]: # Show top 10
218
+ row = dataset[i]
219
+ cards_html += f"""
220
+ <div style="border: 1px solid #ddd; padding: 15px; margin: 10px 0; border-radius: 8px; background-color: #f9f9f9;">
221
+ <h4>🎯 {row["title"]}</h4>
222
+ <p><strong>Summary:</strong> {row["summary"]}</p>
223
+ <p><strong>Authors:</strong> {row["authors"]}</p>
224
+ <p><strong>Coverage Score:</strong> <span style="color: #2e7d32; font-weight: bold;">{row["coverage_score"]}%</span></p>
225
+ <p><strong>Created:</strong> {row["creation_date"]}</p>
226
+ {f'<p><strong>πŸ“„ Paper:</strong> <a href="{row["paper_link"]}" target="_blank">View</a></p>' if row.get("paper_link") else ""}
227
+ {f'<p><strong>πŸ’» Repository:</strong> <a href="{row["repository_link"]}" target="_blank">View</a></p>' if row.get("repository_link") else ""}
228
+ </div>
229
+ """
230
+
231
+ return cards_html
232
 
 
233
  except Exception as e:
234
+ return f"Error loading gallery: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
 
 
 
 
 
 
 
 
236
 
237
+ def get_llm_feedback(yaml_content):
238
+ """Get LLM feedback using Groq"""
239
+ api_token = os.environ.get("GROQ_API_KEY")
240
+ if not api_token:
241
+ return "Please set GROQ_API_KEY in Space settings to get LLM feedback."
242
 
243
+ if not yaml_content.strip():
244
+ return "Please provide YAML content first."
 
 
245
 
246
+ try:
247
+ import requests
248
 
249
+ response = requests.post(
250
+ "https://api.groq.com/openai/v1/chat/completions",
251
+ headers={
252
+ "Content-Type": "application/json",
253
+ "Authorization": f"Bearer {api_token}",
254
+ },
255
+ json={
256
+ "model": "llama-3.3-70b-versatile",
257
+ "messages": [
258
+ {
259
+ "role": "user",
260
+ "content": f"Analyze this evaluation card YAML and provide specific improvement suggestions:\n\n```yaml\n{yaml_content}\n```\n\nFocus on completeness, clarity, and best practices.",
261
+ }
262
+ ],
263
+ },
264
  )
265
 
266
+ if response.status_code == 200:
267
+ return response.json()["choices"][0]["message"]["content"]
268
+ else:
269
+ return f"API Error {response.status_code}: {response.text}"
270
+
271
+ except Exception as e:
272
+ return f"Error getting feedback: {str(e)}"
273
 
274
 
275
+ # Simple functions for the interface
276
+ def submit_card(yaml_text, paper_url, repo_url):
277
+ if not yaml_text.strip():
278
+ return "Please provide YAML content", ""
279
 
280
  try:
281
+ yaml.safe_load(yaml_text) # Validate YAML
282
+ result = save_eval_card(yaml_text, paper_url, repo_url)
283
+ gallery = load_gallery()
284
+ return result, gallery
285
+ except yaml.YAMLError as e:
286
+ return f"Invalid YAML: {str(e)}", ""
287
+ except Exception as e:
288
+ return f"Error: {str(e)}", ""
289
 
 
 
290
 
291
+ def load_template_text():
292
+ return get_template()
293
 
 
 
 
 
 
 
 
 
294
 
295
+ def get_feedback(yaml_text):
296
+ return get_llm_feedback(yaml_text)
297
 
298
 
299
  def refresh_gallery():
300
+ return load_gallery()
301
+
302
+
303
+ # Create the interface using older, more stable Gradio approach
304
+ with gr.Blocks(
305
+ title="Evaluation Cards Gallery",
306
+ theme=gr.themes.Soft(),
307
+ css="footer {visibility: hidden}",
308
+ ) as demo:
309
+ gr.Markdown("""
310
+ # πŸ“Š Evaluation Cards for Machine Learning
311
+
312
+ Upload your evaluation card in YAML format and submit it to the persistent gallery.
313
+ Data is stored in HF dataset: [jablonkagroup/eval-cards-dataset](https://huggingface.co/datasets/jablonkagroup/eval-cards-dataset)
314
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
 
316
+ with gr.Row():
317
+ with gr.Column(scale=1):
318
+ gr.Markdown("### ✏️ Create/Edit Evaluation Card")
319
+
320
+ yaml_editor = gr.Textbox(
321
+ label="YAML Content",
322
+ lines=15,
323
+ placeholder="Paste your YAML content or click 'Load Template'...",
324
+ value="",
325
+ )
326
 
327
+ with gr.Row():
328
+ template_btn = gr.Button("πŸ“ Load Template", size="sm")
329
+ feedback_btn = gr.Button("πŸ€– Get AI Feedback", size="sm")
330
 
331
+ paper_url = gr.Textbox(
332
+ label="πŸ“„ Paper URL (Optional)",
333
+ placeholder="https://arxiv.org/abs/...",
334
+ value="",
335
+ )
336
 
337
+ repo_url = gr.Textbox(
338
+ label="πŸ’» Repository URL (Optional)",
339
+ placeholder="https://github.com/...",
340
+ value="",
341
+ )
 
 
342
 
343
+ submit_btn = gr.Button("πŸš€ Submit to Gallery", variant="primary", size="lg")
344
 
345
+ result_box = gr.Textbox(
346
+ label="πŸ“€ Submission Result", lines=3, interactive=False
 
 
 
 
347
  )
 
 
 
 
348
 
349
+ with gr.Column(scale=1):
350
+ gr.Markdown("### 🎯 AI Feedback")
351
+
352
+ feedback_box = gr.Textbox(
353
+ label="πŸ’‘ Improvement Suggestions", lines=8, interactive=False
354
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
 
356
+ gr.Markdown("### πŸ›οΈ Gallery")
357
 
358
+ refresh_btn = gr.Button("πŸ”„ Refresh Gallery", size="sm")
359
+
360
+ gallery_display = gr.HTML(value=load_gallery())
361
 
362
+ # Event handlers
363
+ template_btn.click(load_template_text, outputs=[yaml_editor])
364
+ feedback_btn.click(get_feedback, inputs=[yaml_editor], outputs=[feedback_box])
365
  submit_btn.click(
366
+ submit_card,
367
+ inputs=[yaml_editor, paper_url, repo_url],
368
+ outputs=[result_box, gallery_display],
369
  )
370
+ refresh_btn.click(refresh_gallery, outputs=[gallery_display])
371
 
 
 
 
 
 
 
372
  if __name__ == "__main__":
373
+ demo.launch()
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
- gradio==4.36.0
2
- pyyaml>=6.0
3
- pandas>=2.0.0
4
- requests>=2.31.0
5
- python-dotenv>=1.0.0
 
1
+ gradio
2
+ pyyaml
3
+ datasets
4
+ huggingface_hub
5
+ requests