Gwaldo commited on
Commit
4e70e1a
·
verified ·
1 Parent(s): 6336285

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -200
app.py CHANGED
@@ -1,241 +1,172 @@
1
  import gradio as gr
2
  from huggingface_hub import HfApi, hf_hub_download
3
  from huggingface_hub.repocard import metadata_load
 
4
  import requests
5
  import re
6
  import pandas as pd
7
- from huggingface_hub import ModelCard
8
- import os
9
-
10
 
 
 
 
11
  def pass_emoji(passed):
12
- if passed is True:
13
- passed = "✅"
14
- else:
15
- passed = "❌"
16
- return passed
17
 
18
  api = HfApi()
19
  USERNAMES_DATASET_ID = "huggingface-course/audio-course-u7-hands-on"
20
  HF_TOKEN = os.environ.get("HF_TOKEN")
21
 
22
-
23
  def get_user_models(hf_username, task):
24
  """
25
  List the user's models for a given task
26
- :param hf_username: User HF username
27
  """
28
- models = api.list_models(author=hf_username, filter=[task])
 
 
 
 
29
  user_model_ids = [x.modelId for x in models]
30
 
31
- match task:
32
- case "audio-classification":
33
  dataset = 'marsyas/gtzan'
34
- case "automatic-speech-recognition":
35
  dataset = 'PolyAI/minds14'
36
- case "text-to-speech":
37
  dataset = ""
38
- case _:
39
- print("Unsupported task")
 
40
 
41
  dataset_specific_models = []
42
-
43
- if dataset == "":
44
- return user_model_ids
45
- else:
46
- for model in user_model_ids:
47
- meta = get_metadata(model)
48
- if meta is None:
49
- continue
50
- try:
51
- if meta["datasets"] == [dataset]:
52
  dataset_specific_models.append(model)
53
- except:
54
  continue
55
- return dataset_specific_models
56
-
57
- def calculate_best_result(user_models, task):
58
- """
59
- Calculate the best results of a unit for a given task
60
- :param user_model_ids: models of a user
61
- """
62
-
63
- best_model = ""
64
-
65
- if task == "audio-classification":
66
- best_result = -100
67
- larger_is_better = True
68
- elif task == "automatic-speech-recognition":
69
- best_result = 100
70
- larger_is_better = False
71
-
72
- for model in user_models:
73
- meta = get_metadata(model)
74
- if meta is None:
75
- continue
76
- metric = parse_metrics(model, task)
77
-
78
- if metric == None:
79
- continue
80
-
81
- if larger_is_better:
82
- if metric > best_result:
83
- best_result = metric
84
- best_model = meta['model-index'][0]["name"]
85
- else:
86
- if metric < best_result:
87
- best_result = metric
88
- best_model = meta['model-index'][0]["name"]
89
-
90
- return best_result, best_model
91
 
 
92
 
93
  def get_metadata(model_id):
94
- """
95
- Get model metadata (contains evaluation data)
96
- :param model_id
97
- """
98
- try:
99
- readme_path = hf_hub_download(model_id, filename="README.md")
100
- return metadata_load(readme_path)
101
- except requests.exceptions.HTTPError:
102
- # 404 README.md not found
103
- return None
104
-
105
 
106
  def extract_metric(model_card_content, task):
107
- """
108
- Extract the metric value from the models' model card
109
- :param model_card_content: model card content
110
- """
111
  accuracy_pattern = r"(?:Accuracy|eval_accuracy): (\d+\.\d+)"
112
  wer_pattern = r"Wer: (\d+\.\d+)"
113
-
114
- if task == "audio-classification":
115
- pattern = accuracy_pattern
116
- elif task == "automatic-speech-recognition":
117
- pattern = wer_pattern
118
-
119
  match = re.search(pattern, model_card_content)
120
- if match:
121
- metric = match.group(1)
122
- return float(metric)
123
- else:
124
- return None
125
-
126
 
127
  def parse_metrics(model, task):
128
- """
129
- Get model card and parse it
130
- :param model_id: model id
131
- """
132
- card = ModelCard.load(model)
133
- return extract_metric(card.content, task)
134
-
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  def certification(hf_username):
137
- results_certification = [
138
- {
139
- "unit": "Unit 4: Audio Classification",
140
- "task": "audio-classification",
141
- "baseline_metric": 0.87,
142
- "best_result": 0,
143
- "best_model_id": "",
144
- "passed_": False
145
- },
146
- {
147
- "unit": "Unit 5: Automatic Speech Recognition",
148
- "task": "automatic-speech-recognition",
149
- "baseline_metric": 0.37,
150
- "best_result": 0,
151
- "best_model_id": "",
152
- "passed_": False
153
- },
154
- {
155
- "unit": "Unit 6: Text-to-Speech",
156
- "task": "text-to-speech",
157
- "baseline_metric": 0,
158
- "best_result": 0,
159
- "best_model_id": "",
160
- "passed_": False
161
- },
162
- {
163
- "unit": "Unit 7: Audio applications",
164
- "task": "demo",
165
- "baseline_metric": 0,
166
- "best_result": 0,
167
- "best_model_id": "",
168
- "passed_": False
169
- },
170
- ]
171
-
172
- for unit in results_certification:
173
- unit["passed"] = pass_emoji(unit["passed_"])
174
-
175
- match unit["task"]:
176
- case "audio-classification":
177
- try:
178
- user_ac_models = get_user_models(hf_username, task = "audio-classification")
179
- best_result, best_model_id = calculate_best_result(user_ac_models, task = "audio-classification")
180
- unit["best_result"] = best_result
181
- unit["best_model_id"] = best_model_id
182
- if unit["best_result"] >= unit["baseline_metric"]:
183
- unit["passed_"] = True
184
- unit["passed"] = pass_emoji(unit["passed_"])
185
- except: print("Either no relevant models found, or no metrics in the model card for audio classificaiton")
186
- case "automatic-speech-recognition":
187
- try:
188
- user_asr_models = get_user_models(hf_username, task = "automatic-speech-recognition")
189
- best_result, best_model_id = calculate_best_result(user_asr_models, task = "automatic-speech-recognition")
190
- unit["best_result"] = best_result
191
- unit["best_model_id"] = best_model_id
192
- if unit["best_result"] <= unit["baseline_metric"]:
193
- unit["passed_"] = True
194
- unit["passed"] = pass_emoji(unit["passed_"])
195
- except: print("Either no relevant models found, or no metrics in the model card for automatic speech recognition")
196
- case "text-to-speech":
197
- try:
198
- user_tts_models = get_user_models(hf_username, task = "text-to-speech")
199
- if user_tts_models:
200
- unit["best_result"] = 0
201
- unit["best_model_id"] = user_tts_models[0]
202
- unit["passed_"] = True
203
- unit["passed"] = pass_emoji(unit["passed_"])
204
- except: print("Either no relevant models found, or no metrics in the model card for automatic speech recognition")
205
- case "demo":
206
- u7_usernames = hf_hub_download(USERNAMES_DATASET_ID, repo_type = "dataset", filename="usernames.csv", token=HF_TOKEN)
207
- u7_users = pd.read_csv(u7_usernames)
208
- if hf_username in u7_users['username'].tolist():
209
- unit["best_result"] = 0
210
- unit["best_model_id"] = "Demo check passed, no model id"
211
- unit["passed_"] = True
212
- unit["passed"] = pass_emoji(unit["passed_"])
213
- case _:
214
- print("Unknown task")
215
-
216
- print(results_certification)
217
-
218
- df = pd.DataFrame(results_certification)
219
- df = df[['passed', 'unit', 'task', 'baseline_metric', 'best_result', 'best_model_id']]
220
- return df
221
-
222
  with gr.Blocks() as demo:
223
- gr.Markdown(f"""
224
  # 🏆 Check your progress in the Audio Course 🏆
225
-
226
- - To get a certificate of completion, you must **pass 3 out of 4 assignments**.
227
- - To get an honors certificate, you must **pass 4 out of 4 assignments**.
228
-
229
- For the assignments where you have to train a model, your model's metric should be equal to or better than the baseline metric.
230
- For the Unit 7 assignment, first, check your demo with the [Unit 7 assessment space](https://huggingface.co/spaces/huggingface-course/audio-course-u7-assessment)
231
-
232
- Make sure that you have uploaded your model(s) to Hub, and that your Unit 7 demo is public.
233
- To check your progress, type your Hugging Face Username here (in my case MariaK)
234
  """)
235
-
236
- hf_username = gr.Textbox(placeholder="MariaK", label="Your Hugging Face Username")
237
- check_progress_button = gr.Button(value="Check my progress")
238
- output = gr.components.Dataframe(value=certification(hf_username))
239
- check_progress_button.click(fn=certification, inputs=hf_username, outputs=output)
240
-
241
- demo.launch()
 
 
1
  import gradio as gr
2
  from huggingface_hub import HfApi, hf_hub_download
3
  from huggingface_hub.repocard import metadata_load
4
+ from huggingface_hub import ModelCard
5
  import requests
6
  import re
7
  import pandas as pd
8
+ import os
 
 
9
 
10
+ # --------------------
11
+ # Helper functions
12
+ # --------------------
13
  def pass_emoji(passed):
14
+ return "✅" if passed else "❌"
 
 
 
 
15
 
16
  api = HfApi()
17
  USERNAMES_DATASET_ID = "huggingface-course/audio-course-u7-hands-on"
18
  HF_TOKEN = os.environ.get("HF_TOKEN")
19
 
 
20
  def get_user_models(hf_username, task):
21
  """
22
  List the user's models for a given task
 
23
  """
24
+ try:
25
+ models = api.list_models(author=hf_username, filter=[task])
26
+ except Exception:
27
+ return []
28
+
29
  user_model_ids = [x.modelId for x in models]
30
 
31
+ # map task to dataset
32
+ if task == "audio-classification":
33
  dataset = 'marsyas/gtzan'
34
+ elif task == "automatic-speech-recognition":
35
  dataset = 'PolyAI/minds14'
36
+ elif task == "text-to-speech":
37
  dataset = ""
38
+ else:
39
+ print(f"Unsupported task: {task}")
40
+ return []
41
 
42
  dataset_specific_models = []
43
+ for model in user_model_ids:
44
+ try:
45
+ meta = get_metadata(model)
46
+ if meta is None:
47
+ continue
48
+ if dataset == "" or meta.get("datasets") == [dataset]:
 
 
 
 
49
  dataset_specific_models.append(model)
50
+ except Exception:
51
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ return dataset_specific_models
54
 
55
  def get_metadata(model_id):
56
+ """Load model metadata safely"""
57
+ try:
58
+ readme_path = hf_hub_download(model_id, filename="README.md", token=HF_TOKEN)
59
+ return metadata_load(readme_path)
60
+ except requests.exceptions.HTTPError:
61
+ return None
62
+ except Exception:
63
+ return None
 
 
 
64
 
65
  def extract_metric(model_card_content, task):
66
+ """Extract metric from model card content"""
 
 
 
67
  accuracy_pattern = r"(?:Accuracy|eval_accuracy): (\d+\.\d+)"
68
  wer_pattern = r"Wer: (\d+\.\d+)"
69
+ pattern = accuracy_pattern if task == "audio-classification" else wer_pattern
 
 
 
 
 
70
  match = re.search(pattern, model_card_content)
71
+ return float(match.group(1)) if match else None
 
 
 
 
 
72
 
73
  def parse_metrics(model, task):
74
+ try:
75
+ card = ModelCard.load(model)
76
+ return extract_metric(card.content, task)
77
+ except Exception:
78
+ return None
 
 
79
 
80
+ def calculate_best_result(user_models, task):
81
+ """Calculate best result for a task"""
82
+ best_model = ""
83
+ best_result = -100 if task == "audio-classification" else 100
84
+ larger_is_better = task == "audio-classification"
85
+
86
+ for model in user_models:
87
+ metric = parse_metrics(model, task)
88
+ if metric is None:
89
+ continue
90
+ if (larger_is_better and metric > best_result) or (not larger_is_better and metric < best_result):
91
+ best_result = metric
92
+ meta = get_metadata(model)
93
+ if meta:
94
+ best_model = meta.get('model-index', [{}])[0].get("name", model)
95
+ return best_result, best_model
96
+
97
+ # --------------------
98
+ # Certification logic
99
+ # --------------------
100
  def certification(hf_username):
101
+ results_certification = [
102
+ {"unit": "Unit 4: Audio Classification", "task": "audio-classification", "baseline_metric": 0.87, "best_result": 0, "best_model_id": "", "passed_": False},
103
+ {"unit": "Unit 5: Automatic Speech Recognition", "task": "automatic-speech-recognition", "baseline_metric": 0.37, "best_result": 0, "best_model_id": "", "passed_": False},
104
+ {"unit": "Unit 6: Text-to-Speech", "task": "text-to-speech", "baseline_metric": 0, "best_result": 0, "best_model_id": "", "passed_": False},
105
+ {"unit": "Unit 7: Audio applications", "task": "demo", "baseline_metric": 0, "best_result": 0, "best_model_id": "", "passed_": False},
106
+ ]
107
+
108
+ for unit in results_certification:
109
+ task = unit["task"]
110
+ if task == "audio-classification":
111
+ try:
112
+ models = get_user_models(hf_username, task)
113
+ best_result, best_model_id = calculate_best_result(models, task)
114
+ unit["best_result"] = best_result
115
+ unit["best_model_id"] = best_model_id
116
+ unit["passed_"] = best_result >= unit["baseline_metric"]
117
+ except Exception:
118
+ pass
119
+ elif task == "automatic-speech-recognition":
120
+ try:
121
+ models = get_user_models(hf_username, task)
122
+ best_result, best_model_id = calculate_best_result(models, task)
123
+ unit["best_result"] = best_result
124
+ unit["best_model_id"] = best_model_id
125
+ unit["passed_"] = best_result <= unit["baseline_metric"]
126
+ except Exception:
127
+ pass
128
+ elif task == "text-to-speech":
129
+ try:
130
+ models = get_user_models(hf_username, task)
131
+ if models:
132
+ unit["best_result"] = 0
133
+ unit["best_model_id"] = models[0]
134
+ unit["passed_"] = True
135
+ except Exception:
136
+ pass
137
+ elif task == "demo":
138
+ try:
139
+ u7_file = hf_hub_download(USERNAMES_DATASET_ID, repo_type="dataset", filename="usernames.csv", token=HF_TOKEN)
140
+ u7_users = pd.read_csv(u7_file)
141
+ if hf_username in u7_users['username'].tolist():
142
+ unit["best_result"] = 0
143
+ unit["best_model_id"] = "Demo check passed"
144
+ unit["passed_"] = True
145
+ except Exception:
146
+ pass
147
+
148
+ unit["passed"] = pass_emoji(unit["passed_"])
149
+
150
+ df = pd.DataFrame(results_certification)
151
+ df = df[['passed', 'unit', 'task', 'baseline_metric', 'best_result', 'best_model_id']]
152
+ return df
153
+
154
+ # --------------------
155
+ # Gradio UI
156
+ # --------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  with gr.Blocks() as demo:
158
+ gr.Markdown("""
159
  # 🏆 Check your progress in the Audio Course 🏆
160
+ - Pass 3 out of 4 assignments for a certificate.
161
+ - Pass 4 out of 4 assignments for honors.
162
+ - For Unit 7, first check your demo with the [Unit 7 assessment space](https://huggingface.co/spaces/huggingface-course/audio-course-u7-assessment).
163
+ - Make sure your models are uploaded to Hub and public.
 
 
 
 
 
164
  """)
165
+
166
+ hf_username_input = gr.Textbox(label="Your Hugging Face Username", placeholder="MariaK")
167
+ check_button = gr.Button("Check my progress")
168
+ output_table = gr.Dataframe()
169
+
170
+ check_button.click(fn=certification, inputs=hf_username_input, outputs=output_table)
171
+
172
+ demo.launch()