choukrani commited on
Commit
b694cb3
·
verified ·
1 Parent(s): 70e0d7c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +197 -196
app.py CHANGED
@@ -1,196 +1,197 @@
1
- from utils import (
2
- update_leaderboard_multilingual,
3
- update_leaderboard_one_vs_all,
4
- handle_evaluation,
5
- process_results_file,
6
- create_html_image,
7
- )
8
- import os
9
- import gradio as gr
10
-
11
- from constants import *
12
-
13
- if __name__ == "__main__":
14
-
15
- with gr.Blocks() as app:
16
-
17
- base_path = os.path.dirname(__file__)
18
- local_image_path = os.path.join(base_path, 'open_arabic_lid_arena.png')
19
-
20
- gr.HTML(create_html_image(local_image_path))
21
- gr.Markdown("# 🏅 Open Arabic Dialect Identification Leaderboard")
22
-
23
- # Multi-dialects leaderboard
24
- with gr.Tab("Multi-dialects model leaderboard"):
25
- gr.Markdown("""
26
- Complete leaderboard across multiple arabic dialects.
27
- Compare the performance of different models across various metrics such as FNR, FPR, and other clasical metrics.
28
- """
29
- )
30
-
31
- with gr.Row():
32
- with gr.Column(scale=1):
33
- gr.Markdown("### Select country to display")
34
- country_selector = gr.Dropdown(
35
- choices=supported_dialects,
36
- value='Morocco', # Default to Morocco of course
37
- label="Country"
38
- )
39
-
40
- with gr.Column(scale=2):
41
- gr.Markdown("### Select metrics to display")
42
- metric_checkboxes = gr.CheckboxGroup(
43
- choices=metrics,
44
- value=default_metrics,
45
- label="Metrics"
46
- )
47
-
48
- with gr.Row():
49
- leaderboard_table = gr.DataFrame(
50
- interactive=False
51
- )
52
-
53
- gr.Markdown("</br>")
54
-
55
- gr.Markdown("## Contribute to the Leaderboard")
56
- gr.Markdown("""
57
- We welcome contributions from the community!
58
- If you have a model that you would like to see on the leaderboard, please use the 'Evaluate a model' or 'Upload your results' tabs to submit your model's performance.
59
- Let's work together to improve Arabic dialect identification! 🚀
60
- """)
61
-
62
- # Dialect confusion leaderboard
63
- with gr.Tab("Dialect confusion leaderboard"): # use to be "One-vs-All leaderboard"
64
-
65
- gr.Markdown("""
66
- Detailed analysis of how well models distinguish specific dialects from others.
67
- For each target dialect, see how often models incorrectly classify other dialects as the target.
68
- Lower `false_positive_rate` indicate better ability to identify the true dialect by
69
- showing **how often it misclassifies other dialects as the target dialect**.
70
- """
71
- )
72
-
73
- with gr.Row():
74
- with gr.Column(scale=1):
75
- gr.Markdown("### Select your target language")
76
- target_language_selector = gr.Dropdown(
77
- choices=languages_to_display_one_vs_all,
78
- value='Morocco', # Default to Morocco of course
79
- label="Target Language"
80
- )
81
-
82
- with gr.Column(scale=2):
83
- gr.Markdown("### Select languages to compare to")
84
- languages_checkboxes = gr.CheckboxGroup(
85
- choices=languages_to_display_one_vs_all,
86
- value=default_languages,
87
- label="Languages"
88
- )
89
-
90
- with gr.Row():
91
- binary_leaderboard_table = gr.DataFrame(
92
- interactive=False
93
- )
94
-
95
-
96
- with gr.Tab("Evaluate a model"):
97
- gr.Markdown("Suggest a model to evaluate 🤗 (Supports only **Fasttext** models as SfayaLID, GlotLID, OpenLID, etc.)")
98
- gr.Markdown("For other models, you are welcome to **submit your results** through the upload section.")
99
-
100
- model_path = gr.Textbox(label="Model Path", placeholder='path/to/model')
101
- model_path_bin = gr.Textbox(label=".bin filename", placeholder='model.bin')
102
- gr.Markdown("### **⚠️ To ensure correct results, tick this when the model's labels are the iso_codes**")
103
- use_mapping = gr.Checkbox(label="Does not map to country", value=True) # Initially enabled
104
- eval_button = gr.Button("Evaluate", value=False) # Initially disabled
105
-
106
- # Status message area
107
- status_message = gr.Markdown(value="")
108
- def update_status_message():
109
- return "### **⚠️Evaluating... Please wait...**"
110
-
111
- eval_button.click(update_status_message, outputs=[status_message])
112
-
113
- eval_button.click(handle_evaluation, inputs=[model_path, model_path_bin, use_mapping], outputs=[leaderboard_table, status_message])
114
-
115
- with gr.Tab("Upload your results"):
116
-
117
- # Define a code block to display
118
- code_snippet = """
119
- ```python
120
-
121
- # Load your model
122
- model = ... # Load your model here
123
-
124
- # Load evaluation benchmark
125
- eval_dataset = load_dataset("atlasia/Arabic-LID-Leaderboard", split='test').to_pandas() # do not change this line :)
126
-
127
- # Predict labels using your model
128
- eval_dataset['preds'] = eval_dataset['text'].apply(lambda text: predict_label(text, model)) # predict_label is a function that you need to define for your model
129
-
130
- # now drop the columns that are not needed, i.e. 'text', 'metadata' and 'dataset_source'
131
- df_eval = df_eval.drop(columns=['text', 'metadata', 'dataset_source'])
132
- df_eval.to_csv('your_model_name.csv')
133
-
134
- # submit your results: 'your_model_name.csv' to the leaderboard
135
-
136
- ```
137
- """
138
-
139
- gr.Markdown("## Upload your results to the leaderboard 🚀")
140
- gr.Markdown("### Submission guidelines: Run the test dataset on your model and save the results in a CSV file. Bellow a code snippet to help you with that.")
141
- gr.Markdown("### Nota Bene: The One-vs-All leaderboard evaluation is currently unavailable with the csv upload but will be implemented soon. Stay tuned!")
142
- gr.Markdown(code_snippet)
143
-
144
- uploaded_model_name = gr.Textbox(label="Model name", placeholder='Your model/team name')
145
- file = gr.File(label="Upload your results")
146
- upload_button = gr.Button("Upload")
147
-
148
- # Status message area
149
- status_message = gr.Markdown(value="")
150
- def update_status_message():
151
- return "### **⚠️Evaluating... Please wait...**"
152
-
153
- upload_button.click(update_status_message, outputs=[status_message])
154
-
155
- upload_button.click(process_results_file, inputs=[file, uploaded_model_name], outputs=[leaderboard_table, status_message])
156
-
157
- # Update multilangual table when any input changes
158
- country_selector.change(
159
- update_leaderboard_multilingual,
160
- inputs=[country_selector, metric_checkboxes],
161
- outputs=leaderboard_table
162
- )
163
-
164
- metric_checkboxes.change(
165
- update_leaderboard_multilingual,
166
- inputs=[country_selector, metric_checkboxes],
167
- outputs=leaderboard_table
168
- )
169
-
170
- # Update binary table when any input changes
171
- target_language_selector.change(
172
- update_leaderboard_one_vs_all,
173
- inputs=[target_language_selector, languages_checkboxes],
174
- outputs=[binary_leaderboard_table, languages_checkboxes]
175
- )
176
-
177
- languages_checkboxes.change(
178
- update_leaderboard_one_vs_all,
179
- inputs=[target_language_selector, languages_checkboxes],
180
- outputs=[binary_leaderboard_table, languages_checkboxes]
181
- )
182
-
183
- # Define load event to run at startup
184
- app.load(
185
- update_leaderboard_one_vs_all,
186
- inputs=[target_language_selector, languages_checkboxes],
187
- outputs=[binary_leaderboard_table, languages_checkboxes]
188
- )
189
-
190
- app.load(
191
- update_leaderboard_multilingual,
192
- inputs=[country_selector, metric_checkboxes],
193
- outputs=leaderboard_table
194
- )
195
-
196
- app.launch(allowed_paths=[base_path])
 
 
1
+ from utils import (
2
+ update_leaderboard_multilingual,
3
+ update_leaderboard_one_vs_all,
4
+ handle_evaluation,
5
+ process_results_file,
6
+ create_html_image,
7
+ )
8
+
9
+ import os
10
+ import gradio as gr
11
+
12
+ from constants import *
13
+
14
+ if __name__ == "__main__":
15
+
16
+ with gr.Blocks() as app:
17
+
18
+ base_path = os.path.dirname(__file__)
19
+ local_image_path = os.path.join(base_path, 'open_arabic_lid_arena.png')
20
+
21
+ gr.HTML(create_html_image(local_image_path))
22
+ gr.Markdown("# 🏅 Open Arabic Dialect Identification Leaderboard")
23
+
24
+ # Multi-dialects leaderboard
25
+ with gr.Tab("Multi-dialects model leaderboard"):
26
+ gr.Markdown("""
27
+ Complete leaderboard across multiple arabic dialects.
28
+ Compare the performance of different models across various metrics such as FNR, FPR, and other clasical metrics.
29
+ """
30
+ )
31
+
32
+ with gr.Row():
33
+ with gr.Column(scale=1):
34
+ gr.Markdown("### Select country to display")
35
+ country_selector = gr.Dropdown(
36
+ choices=supported_dialects,
37
+ value='Morocco', # Default to Morocco of course
38
+ label="Country"
39
+ )
40
+
41
+ with gr.Column(scale=2):
42
+ gr.Markdown("### Select metrics to display")
43
+ metric_checkboxes = gr.CheckboxGroup(
44
+ choices=metrics,
45
+ value=default_metrics,
46
+ label="Metrics"
47
+ )
48
+
49
+ with gr.Row():
50
+ leaderboard_table = gr.DataFrame(
51
+ interactive=False
52
+ )
53
+
54
+ gr.Markdown("</br>")
55
+
56
+ gr.Markdown("## Contribute to the Leaderboard")
57
+ gr.Markdown("""
58
+ We welcome contributions from the community!
59
+ If you have a model that you would like to see on the leaderboard, please use the 'Evaluate a model' or 'Upload your results' tabs to submit your model's performance.
60
+ Let's work together to improve Arabic dialect identification! 🚀
61
+ """)
62
+
63
+ # Dialect confusion leaderboard
64
+ with gr.Tab("Dialect confusion leaderboard"): # use to be "One-vs-All leaderboard"
65
+
66
+ gr.Markdown("""
67
+ Detailed analysis of how well models distinguish specific dialects from others.
68
+ For each target dialect, see how often models incorrectly classify other dialects as the target.
69
+ Lower `false_positive_rate` indicate better ability to identify the true dialect by
70
+ showing **how often it misclassifies other dialects as the target dialect**.
71
+ """
72
+ )
73
+
74
+ with gr.Row():
75
+ with gr.Column(scale=1):
76
+ gr.Markdown("### Select your target language")
77
+ target_language_selector = gr.Dropdown(
78
+ choices=languages_to_display_one_vs_all,
79
+ value='Morocco', # Default to Morocco of course
80
+ label="Target Language"
81
+ )
82
+
83
+ with gr.Column(scale=2):
84
+ gr.Markdown("### Select languages to compare to")
85
+ languages_checkboxes = gr.CheckboxGroup(
86
+ choices=languages_to_display_one_vs_all,
87
+ value=default_languages,
88
+ label="Languages"
89
+ )
90
+
91
+ with gr.Row():
92
+ binary_leaderboard_table = gr.DataFrame(
93
+ interactive=False
94
+ )
95
+
96
+
97
+ with gr.Tab("Evaluate a model"):
98
+ gr.Markdown("Suggest a model to evaluate 🤗 (Supports only **Fasttext** models as SfayaLID, GlotLID, OpenLID, etc.)")
99
+ gr.Markdown("For other models, you are welcome to **submit your results** through the upload section.")
100
+
101
+ model_path = gr.Textbox(label="Model Path", placeholder='path/to/model')
102
+ model_path_bin = gr.Textbox(label=".bin filename", placeholder='model.bin')
103
+ gr.Markdown("### **⚠️ To ensure correct results, tick this when the model's labels are the iso_codes**")
104
+ use_mapping = gr.Checkbox(label="Does not map to country", value=True) # Initially enabled
105
+ eval_button = gr.Button("Evaluate", value=False) # Initially disabled
106
+
107
+ # Status message area
108
+ status_message = gr.Markdown(value="")
109
+ def update_status_message():
110
+ return "### **⚠️Evaluating... Please wait...**"
111
+
112
+ eval_button.click(update_status_message, outputs=[status_message])
113
+
114
+ eval_button.click(handle_evaluation, inputs=[model_path, model_path_bin, use_mapping], outputs=[leaderboard_table, status_message])
115
+
116
+ with gr.Tab("Upload your results"):
117
+
118
+ # Define a code block to display
119
+ code_snippet = """
120
+ ```python
121
+
122
+ # Load your model
123
+ model = ... # Load your model here
124
+
125
+ # Load evaluation benchmark
126
+ eval_dataset = load_dataset("atlasia/Arabic-LID-Leaderboard", split='test').to_pandas() # do not change this line :)
127
+
128
+ # Predict labels using your model
129
+ eval_dataset['preds'] = eval_dataset['text'].apply(lambda text: predict_label(text, model)) # predict_label is a function that you need to define for your model
130
+
131
+ # now drop the columns that are not needed, i.e. 'text', 'metadata' and 'dataset_source'
132
+ df_eval = df_eval.drop(columns=['text', 'metadata', 'dataset_source'])
133
+ df_eval.to_csv('your_model_name.csv')
134
+
135
+ # submit your results: 'your_model_name.csv' to the leaderboard
136
+
137
+ ```
138
+ """
139
+
140
+ gr.Markdown("## Upload your results to the leaderboard 🚀")
141
+ gr.Markdown("### Submission guidelines: Run the test dataset on your model and save the results in a CSV file. Bellow a code snippet to help you with that.")
142
+ gr.Markdown("### Nota Bene: The One-vs-All leaderboard evaluation is currently unavailable with the csv upload but will be implemented soon. Stay tuned!")
143
+ gr.Markdown(code_snippet)
144
+
145
+ uploaded_model_name = gr.Textbox(label="Model name", placeholder='Your model/team name')
146
+ file = gr.File(label="Upload your results")
147
+ upload_button = gr.Button("Upload")
148
+
149
+ # Status message area
150
+ status_message = gr.Markdown(value="")
151
+ def update_status_message():
152
+ return "### **⚠️Evaluating... Please wait...**"
153
+
154
+ upload_button.click(update_status_message, outputs=[status_message])
155
+
156
+ upload_button.click(process_results_file, inputs=[file, uploaded_model_name], outputs=[leaderboard_table, status_message])
157
+
158
+ # Update multilangual table when any input changes
159
+ country_selector.change(
160
+ update_leaderboard_multilingual,
161
+ inputs=[country_selector, metric_checkboxes],
162
+ outputs=leaderboard_table
163
+ )
164
+
165
+ metric_checkboxes.change(
166
+ update_leaderboard_multilingual,
167
+ inputs=[country_selector, metric_checkboxes],
168
+ outputs=leaderboard_table
169
+ )
170
+
171
+ # Update binary table when any input changes
172
+ target_language_selector.change(
173
+ update_leaderboard_one_vs_all,
174
+ inputs=[target_language_selector, languages_checkboxes],
175
+ outputs=[binary_leaderboard_table, languages_checkboxes]
176
+ )
177
+
178
+ languages_checkboxes.change(
179
+ update_leaderboard_one_vs_all,
180
+ inputs=[target_language_selector, languages_checkboxes],
181
+ outputs=[binary_leaderboard_table, languages_checkboxes]
182
+ )
183
+
184
+ # Define load event to run at startup
185
+ app.load(
186
+ update_leaderboard_one_vs_all,
187
+ inputs=[target_language_selector, languages_checkboxes],
188
+ outputs=[binary_leaderboard_table, languages_checkboxes]
189
+ )
190
+
191
+ app.load(
192
+ update_leaderboard_multilingual,
193
+ inputs=[country_selector, metric_checkboxes],
194
+ outputs=leaderboard_table
195
+ )
196
+
197
+ app.launch(allowed_paths=[base_path])