tfrere commited on
Commit
970eef1
·
0 Parent(s):

first commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env.example +3 -0
  2. .gitignore +58 -0
  3. Dockerfile +61 -0
  4. README.md +11 -0
  5. backend/Dockerfile.dev +25 -0
  6. backend/README.md +352 -0
  7. backend/main.py +38 -0
  8. backend/old-pyproject.toml +26 -0
  9. backend/poetry.lock +0 -0
  10. backend/pyproject.toml +47 -0
  11. backend/routes/__init__.py +23 -0
  12. backend/routes/benchmark.py +132 -0
  13. backend/routes/download.py +74 -0
  14. backend/routes/evaluation.py +150 -0
  15. backend/routes/health.py +13 -0
  16. backend/routes/questions.py +87 -0
  17. backend/routes/upload.py +52 -0
  18. backend/tasks/__init__.py +3 -0
  19. backend/tasks/createBench.py +317 -0
  20. backend/tasks/createBenchConfigFile.py +313 -0
  21. backend/tasks/evaluationTask.py +471 -0
  22. backend/tasks/yourbench_lighteval_task.py +273 -0
  23. backend/tests/test_evaluation.py +165 -0
  24. backend/tests/test_hf_upload.py +78 -0
  25. backend/tests/test_inference.py +84 -0
  26. backend/tests/test_lighteval.py +151 -0
  27. backend/tests/test_openai.py +31 -0
  28. backend/tests/test_parallel_lighteval.py +278 -0
  29. backend/tests/test_provider_parallel_support.py +227 -0
  30. backend/tests/test_yourbench_results.py +394 -0
  31. docker-compose.yml +33 -0
  32. frontend/Dockerfile.dev +15 -0
  33. frontend/README.md +80 -0
  34. frontend/package.json +55 -0
  35. frontend/public/index.html +96 -0
  36. frontend/public/logo256.png +0 -0
  37. frontend/public/logo32.png +0 -0
  38. frontend/public/og-image.jpg +0 -0
  39. frontend/public/robots.txt +3 -0
  40. frontend/server.js +85 -0
  41. frontend/src/App.js +427 -0
  42. frontend/src/components/BenchmarkCreateForm.jsx +295 -0
  43. frontend/src/components/BenchmarkDisplay.jsx +161 -0
  44. frontend/src/components/BenchmarkEvaluation.jsx +364 -0
  45. frontend/src/components/BenchmarkGenerator.jsx +398 -0
  46. frontend/src/components/EvaluationDisplay.jsx +196 -0
  47. frontend/src/components/Footer/Footer.js +30 -0
  48. frontend/src/components/LogDisplay.jsx +67 -0
  49. frontend/src/components/Logo/HFLogo.js +19 -0
  50. frontend/src/components/Logo/Logo.js +56 -0
.env.example ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ENVIRONMENT=development
2
+ HF_TOKEN=xxx
3
+ HF_HOME=.cache
.gitignore ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2
+
3
+ __pycache__
4
+ .cache/
5
+
6
+ # dependencies
7
+
8
+ frontend/node_modules
9
+ /.pnp
10
+ .pnp.js
11
+
12
+ # testing
13
+ /coverage
14
+
15
+ /backend/uploaded_files
16
+ /backend/logs
17
+
18
+ .venv/
19
+
20
+ __pycache__/
21
+ *.pkl
22
+ model_info_cache.pkl
23
+ model_size_cache.pkl
24
+ gif.gif
25
+ src/assets/scale-hf-logo.png
26
+
27
+ .litellm_cache/
28
+
29
+ # production
30
+
31
+ /build
32
+
33
+ # misc
34
+
35
+ .DS_Store
36
+ .env.local
37
+ .env.development.local
38
+ .env.test.local
39
+ .env.production.local
40
+
41
+ npm-debug.log*
42
+ yarn-debug.log*
43
+ yarn-error.log\*
44
+
45
+ src/dataframe.json
46
+
47
+ yarn.lock
48
+ package-lock.json
49
+
50
+ /public
51
+
52
+ .claudesync/
53
+
54
+ # Environment variables
55
+ .env
56
+ .env.*
57
+ !.env.example
58
+
Dockerfile ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Build frontend
2
+ FROM node:18 as frontend-build
3
+ WORKDIR /app
4
+ COPY frontend/package*.json ./
5
+ RUN npm install
6
+ COPY frontend/ ./
7
+
8
+ RUN npm run build
9
+
10
+ # Build backend
11
+ FROM python:3.12-slim
12
+ WORKDIR /app
13
+
14
+ # Create non-root user
15
+ RUN useradd -m -u 1000 user
16
+
17
+ # Install uv instead of poetry
18
+ RUN pip install uv
19
+
20
+ # Create and configure cache directory
21
+ RUN mkdir -p /app/.cache && \
22
+ chown -R user:user /app
23
+
24
+ # Copy and install backend dependencies
25
+ COPY backend/pyproject.toml ./
26
+ RUN uv pip install -e . --system
27
+
28
+ # Copy backend code
29
+ COPY backend/ .
30
+
31
+ # Install Node.js and npm
32
+ RUN apt-get update && apt-get install -y \
33
+ curl \
34
+ netcat-openbsd \
35
+ && curl -fsSL https://deb.nodesource.com/setup_18.x | bash - \
36
+ && apt-get install -y nodejs \
37
+ && rm -rf /var/lib/apt/lists/*
38
+
39
+ # Copy frontend server and build
40
+ COPY --from=frontend-build /app/build ./frontend/build
41
+ COPY --from=frontend-build /app/package*.json ./frontend/
42
+ COPY --from=frontend-build /app/server.js ./frontend/
43
+
44
+ # Install frontend production dependencies
45
+ WORKDIR /app/frontend
46
+ RUN npm install --production
47
+ WORKDIR /app
48
+
49
+ # Environment variables
50
+ ENV HF_HOME=/app/.cache \
51
+ HF_DATASETS_CACHE=/app/.cache \
52
+ INTERNAL_API_PORT=7861 \
53
+ PORT=7860 \
54
+ NODE_ENV=production
55
+
56
+ # Note: HF_TOKEN should be provided at runtime, not build time
57
+ USER user
58
+ EXPOSE 7860
59
+
60
+ # Start both servers with wait-for
61
+ CMD ["sh", "-c", "uvicorn app.asgi:app --host 0.0.0.0 --port 7861 & while ! nc -z localhost 7861; do sleep 1; done && cd frontend && npm run serve"]
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Yourbench simple demo
3
+ emoji: 🏆
4
+ colorFrom: blue
5
+ colorTo: red
6
+ sdk: docker
7
+ hf_oauth: true
8
+ pinned: true
9
+ license: apache-2.0
10
+ short_description: Yourbench demo
11
+ ---
backend/Dockerfile.dev ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install required system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Install poetry
11
+ RUN pip install poetry
12
+
13
+ # Copy Poetry configuration files
14
+ COPY pyproject.toml poetry.lock* ./
15
+
16
+ # Install dependencies
17
+ RUN poetry config virtualenvs.create false && \
18
+ poetry install --no-interaction --no-ansi --no-root
19
+
20
+ # Environment variables configuration for logs
21
+ ENV PYTHONUNBUFFERED=1
22
+ ENV LOG_LEVEL=INFO
23
+
24
+ # In dev, mount volume directly
25
+ CMD ["uvicorn", "app.asgi:app", "--host", "0.0.0.0", "--port", "7860", "--reload", "--log-level", "warning", "--no-access-log"]
backend/README.md ADDED
@@ -0,0 +1,352 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Backend - Open LLM Leaderboard 🏆
2
+
3
+ FastAPI backend for the Open LLM Leaderboard. This service is part of a larger architecture that includes a React frontend. For complete project installation, see the [main README](../README.md).
4
+
5
+ ## ✨ Features
6
+
7
+ - 📊 REST API for LLM models leaderboard management
8
+ - 🗳️ Voting and ranking system
9
+ - 🔄 HuggingFace Hub integration
10
+ - 🚀 Caching and performance optimizations
11
+
12
+ ## 🏗 Architecture
13
+
14
+ ```mermaid
15
+ flowchart TD
16
+ Client(["**Frontend**<br><br>React Application"]) --> API["**API Server**<br><br>FastAPI REST Endpoints"]
17
+
18
+ subgraph Backend
19
+ API --> Core["**Core Layer**<br><br>• Middleware<br>• Cache<br>• Rate Limiting"]
20
+ Core --> Services["**Services Layer**<br><br>• Business Logic<br>• Data Processing"]
21
+
22
+ subgraph Services Layer
23
+ Services --> Models["**Model Service**<br><br>• Model Submission<br>• Evaluation Pipeline"]
24
+ Services --> Votes["**Vote Service**<br><br>• Vote Management<br>• Data Synchronization"]
25
+ Services --> Board["**Leaderboard Service**<br><br>• Rankings<br>• Performance Metrics"]
26
+ end
27
+
28
+ Models --> Cache["**Cache Layer**<br><br>• In-Memory Store<br>• Auto Invalidation"]
29
+ Votes --> Cache
30
+ Board --> Cache
31
+
32
+ Models --> HF["**HuggingFace Hub**<br><br>• Models Repository<br>• Datasets Access"]
33
+ Votes --> HF
34
+ Board --> HF
35
+ end
36
+
37
+ style Client fill:#f9f,stroke:#333,stroke-width:2px
38
+ style Models fill:#bbf,stroke:#333,stroke-width:2px
39
+ style Votes fill:#bbf,stroke:#333,stroke-width:2px
40
+ style Board fill:#bbf,stroke:#333,stroke-width:2px
41
+ style HF fill:#bfb,stroke:#333,stroke-width:2px
42
+ ```
43
+
44
+ ## 🛠️ HuggingFace Datasets
45
+
46
+ The application uses several datasets on the HuggingFace Hub:
47
+
48
+ ### 1. Requests Dataset (`{HF_ORGANIZATION}/requests`)
49
+
50
+ - **Operations**:
51
+ - 📤 `POST /api/models/submit`: Adds a JSON file for each new model submission
52
+ - 📥 `GET /api/models/status`: Reads files to get models status
53
+ - **Format**: One JSON file per model with submission details
54
+ - **Updates**: On each new model submission
55
+
56
+ ### 2. Votes Dataset (`{HF_ORGANIZATION}/votes`)
57
+
58
+ - **Operations**:
59
+ - 📤 `POST /api/votes/{model_id}`: Adds a new vote
60
+ - 📥 `GET /api/votes/model/{provider}/{model}`: Reads model votes
61
+ - 📥 `GET /api/votes/user/{user_id}`: Reads user votes
62
+ - **Format**: JSONL with one vote per line
63
+ - **Sync**: Bidirectional between local cache and Hub
64
+
65
+ ### 3. Contents Dataset (`{HF_ORGANIZATION}/contents`)
66
+
67
+ - **Operations**:
68
+ - 📥 `GET /api/leaderboard`: Reads raw data
69
+ - 📥 `GET /api/leaderboard/formatted`: Reads and formats data
70
+ - **Format**: Main dataset containing all scores and metrics
71
+ - **Updates**: Automatic after model evaluations
72
+
73
+ ### 4. Official Providers Dataset (`{HF_ORGANIZATION}/official-providers`)
74
+
75
+ - **Operations**:
76
+ - 📥 Read-only access for highlighted models
77
+ - **Format**: List of models selected by maintainers
78
+ - **Updates**: Manual by maintainers
79
+
80
+ ## 🛠 Local Development
81
+
82
+ ### Prerequisites
83
+
84
+ - Python 3.9+
85
+ - [Poetry](https://python-poetry.org/docs/#installation)
86
+
87
+ ### Standalone Installation (without Docker)
88
+
89
+ ```bash
90
+ # Install dependencies
91
+ poetry install
92
+
93
+ # Setup configuration
94
+ cp .env.example .env
95
+
96
+ # Start development server
97
+ poetry run uvicorn app.asgi:app --host 0.0.0.0 --port 7860 --reload
98
+ ```
99
+
100
+ Server will be available at http://localhost:7860
101
+
102
+ ## ⚙️ Configuration
103
+
104
+ | Variable | Description | Default |
105
+ | ------------ | ------------------------------------ | ----------- |
106
+ | ENVIRONMENT | Environment (development/production) | development |
107
+ | HF_TOKEN | HuggingFace authentication token | - |
108
+ | PORT | Server port | 7860 |
109
+ | LOG_LEVEL | Logging level (INFO/DEBUG/WARNING) | INFO |
110
+ | CORS_ORIGINS | Allowed CORS origins | ["*"] |
111
+ | CACHE_TTL | Cache Time To Live in seconds | 300 |
112
+
113
+ ## 🔧 Middleware
114
+
115
+ The backend uses several middleware layers for optimal performance and security:
116
+
117
+ - **CORS Middleware**: Handles Cross-Origin Resource Sharing
118
+ - **GZIP Middleware**: Compresses responses > 500 bytes
119
+ - **Rate Limiting**: Prevents API abuse
120
+ - **Caching**: In-memory caching with automatic invalidation
121
+
122
+ ## 📝 Logging
123
+
124
+ The application uses a structured logging system with:
125
+
126
+ - Formatted console output
127
+ - Different log levels per component
128
+ - Request/Response logging
129
+ - Performance metrics
130
+ - Error tracking
131
+
132
+ ## 📁 File Structure
133
+
134
+ ```
135
+ backend/
136
+ ├── app/ # Source code
137
+ │ ├── api/ # Routes and endpoints
138
+ │ │ └── endpoints/ # Endpoint handlers
139
+ │ ├── core/ # Configurations
140
+ │ ├── services/ # Business logic
141
+ │ └── utils/ # Utilities
142
+ └── tests/ # Tests
143
+ ```
144
+
145
+ ## 📚 API
146
+
147
+ Swagger documentation available at http://localhost:7860/docs
148
+
149
+ ### Main Endpoints & Data Structures
150
+
151
+ #### Leaderboard
152
+
153
+ - `GET /api/leaderboard/formatted` - Formatted data with computed fields and metadata
154
+
155
+ ```typescript
156
+ Response {
157
+ models: [{
158
+ id: string, // eval_name
159
+ model: {
160
+ name: string, // fullname
161
+ sha: string, // Model sha
162
+ precision: string, // e.g. "fp16", "int8"
163
+ type: string, // e.g. "fined-tuned-on-domain-specific-dataset"
164
+ weight_type: string,
165
+ architecture: string,
166
+ average_score: number,
167
+ has_chat_template: boolean
168
+ },
169
+ evaluations: {
170
+ ifeval: {
171
+ name: "IFEval",
172
+ value: number, // Raw score
173
+ normalized_score: number
174
+ },
175
+ bbh: {
176
+ name: "BBH",
177
+ value: number,
178
+ normalized_score: number
179
+ },
180
+ math: {
181
+ name: "MATH Level 5",
182
+ value: number,
183
+ normalized_score: number
184
+ },
185
+ gpqa: {
186
+ name: "GPQA",
187
+ value: number,
188
+ normalized_score: number
189
+ },
190
+ musr: {
191
+ name: "MUSR",
192
+ value: number,
193
+ normalized_score: number
194
+ },
195
+ mmlu_pro: {
196
+ name: "MMLU-PRO",
197
+ value: number,
198
+ normalized_score: number
199
+ }
200
+ },
201
+ features: {
202
+ is_not_available_on_hub: boolean,
203
+ is_merged: boolean,
204
+ is_moe: boolean,
205
+ is_flagged: boolean,
206
+ is_official_provider: boolean
207
+ },
208
+ metadata: {
209
+ upload_date: string,
210
+ submission_date: string,
211
+ generation: string,
212
+ base_model: string,
213
+ hub_license: string,
214
+ hub_hearts: number,
215
+ params_billions: number,
216
+ co2_cost: number // CO₂ cost in kg
217
+ }
218
+ }]
219
+ }
220
+ ```
221
+
222
+ - `GET /api/leaderboard` - Raw data from the HuggingFace dataset
223
+ ```typescript
224
+ Response {
225
+ models: [{
226
+ eval_name: string,
227
+ Precision: string,
228
+ Type: string,
229
+ "Weight type": string,
230
+ Architecture: string,
231
+ Model: string,
232
+ fullname: string,
233
+ "Model sha": string,
234
+ "Average ⬆️": number,
235
+ "Hub License": string,
236
+ "Hub ❤️": number,
237
+ "#Params (B)": number,
238
+ "Available on the hub": boolean,
239
+ Merged: boolean,
240
+ MoE: boolean,
241
+ Flagged: boolean,
242
+ "Chat Template": boolean,
243
+ "CO₂ cost (kg)": number,
244
+ "IFEval Raw": number,
245
+ IFEval: number,
246
+ "BBH Raw": number,
247
+ BBH: number,
248
+ "MATH Lvl 5 Raw": number,
249
+ "MATH Lvl 5": number,
250
+ "GPQA Raw": number,
251
+ GPQA: number,
252
+ "MUSR Raw": number,
253
+ MUSR: number,
254
+ "MMLU-PRO Raw": number,
255
+ "MMLU-PRO": number,
256
+ "Maintainer's Highlight": boolean,
257
+ "Upload To Hub Date": string,
258
+ "Submission Date": string,
259
+ Generation: string,
260
+ "Base Model": string
261
+ }]
262
+ }
263
+ ```
264
+
265
+ #### Models
266
+
267
+ - `GET /api/models/status` - Get all models grouped by status
268
+ ```typescript
269
+ Response {
270
+ pending: [{
271
+ name: string,
272
+ submitter: string,
273
+ revision: string,
274
+ wait_time: string,
275
+ submission_time: string,
276
+ status: "PENDING" | "EVALUATING" | "FINISHED",
277
+ precision: string
278
+ }],
279
+ evaluating: Array<Model>,
280
+ finished: Array<Model>
281
+ }
282
+ ```
283
+ - `GET /api/models/pending` - Get pending models only
284
+ - `POST /api/models/submit` - Submit model
285
+
286
+ ```typescript
287
+ Request {
288
+ user_id: string,
289
+ model_id: string,
290
+ base_model?: string,
291
+ precision?: string,
292
+ model_type: string
293
+ }
294
+
295
+ Response {
296
+ status: string,
297
+ message: string
298
+ }
299
+ ```
300
+
301
+ - `GET /api/models/{model_id}/status` - Get model status
302
+
303
+ #### Votes
304
+
305
+ - `POST /api/votes/{model_id}` - Vote
306
+
307
+ ```typescript
308
+ Request {
309
+ vote_type: "up" | "down",
310
+ user_id: string // HuggingFace username
311
+ }
312
+
313
+ Response {
314
+ success: boolean,
315
+ message: string
316
+ }
317
+ ```
318
+
319
+ - `GET /api/votes/model/{provider}/{model}` - Get model votes
320
+ ```typescript
321
+ Response {
322
+ total_votes: number,
323
+ up_votes: number,
324
+ down_votes: number
325
+ }
326
+ ```
327
+ - `GET /api/votes/user/{user_id}` - Get user votes
328
+ ```typescript
329
+ Response Array<{
330
+ model_id: string,
331
+ vote_type: string,
332
+ timestamp: string
333
+ }>
334
+ ```
335
+
336
+ ## 🔒 Authentication
337
+
338
+ The backend uses HuggingFace token-based authentication for secure API access. Make sure to:
339
+
340
+ 1. Set your HF_TOKEN in the .env file
341
+ 2. Include the token in API requests via Bearer authentication
342
+ 3. Keep your token secure and never commit it to version control
343
+
344
+ ## 🚀 Performance
345
+
346
+ The backend implements several optimizations:
347
+
348
+ - In-memory caching with configurable TTL (Time To Live)
349
+ - Batch processing for model evaluations
350
+ - Rate limiting for API endpoints
351
+ - Efficient database queries with proper indexing
352
+ - Automatic cache invalidation for votes
backend/main.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from routes import routers, session_files, active_bench_tasks
6
+
7
+ # Load environment variables from .env file
8
+ load_dotenv()
9
+
10
+ # Verify environment variables are loaded
11
+ hf_token = os.getenv("HF_TOKEN")
12
+ if not hf_token:
13
+ print("Warning: HF_TOKEN environment variable is not set. Make sure it's defined in your .env file.")
14
+
15
+ hf_organization = os.getenv("HF_ORGANIZATION")
16
+ if not hf_organization:
17
+ print("Warning: HF_ORGANIZATION environment variable is not set. Make sure it's defined in your .env file.")
18
+
19
+ app = FastAPI(title="Yourbench API")
20
+
21
+ # Activer CORS pour permettre les requêtes depuis le frontend
22
+ app.add_middleware(
23
+ CORSMiddleware,
24
+ allow_origins=["*"], # Dans un environnement de production, spécifiez les origines exactes
25
+ allow_credentials=True,
26
+ allow_methods=["*"],
27
+ allow_headers=["*"],
28
+ )
29
+
30
+ # Ajouter un gestionnaire d'événements pour afficher les session_files au démarrage
31
+ @app.on_event("startup")
32
+ async def startup_event():
33
+ print("Application startup")
34
+ print(f"Initial session_files: {session_files}")
35
+
36
+ # Enregistrer toutes les routes
37
+ for router in routers:
38
+ app.include_router(router)
backend/old-pyproject.toml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "llm-leaderboard-backend"
3
+ version = "0.1.0"
4
+ description = "Backend for the Open LLM Leaderboard"
5
+ authors = ["Your Name <[email protected]>"]
6
+
7
+ [tool.poetry.dependencies]
8
+ python = ">=3.12,<3.13"
9
+ fastapi = "^0.115.6"
10
+ huggingface-hub = "0.29.3"
11
+ python-dotenv = "^1.0.1"
12
+ python-multipart = "^0.0.9"
13
+ uvicorn = {extras = ["standard"], version = "^0.27.0"}
14
+ loguru = "^0.7.3"
15
+ lighteval = {version = ">=0.8.0", extras = ["math"]}
16
+ tqdm = "^4.67.1"
17
+ asyncio = "^3.4.3"
18
+ datasets = "^3.3.0"
19
+ yourbench = {git = "https://github.com/huggingface/yourbench.git"}
20
+ tiktoken = "^0.9.0"
21
+ requests = {extras = ["socks"], version = "^2.32.3"}
22
+ httpx-socks = "^0.10.0"
23
+
24
+ [build-system]
25
+ requires = ["poetry-core>=1.0.0"]
26
+ build-backend = "poetry.core.masonry.api"
backend/poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
backend/pyproject.toml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "yourbench-simple-demo"
3
+ version = "0.1.0"
4
+ authors = [
5
+ { name = "Sumuk Shashidhar", email = "[email protected]" },
6
+ { name = "Alina Lozovskaia", email = "[email protected]" },
7
+ { name = "Clémentine Fourrier", email = "[email protected]" },
8
+ { name = "Nathan Habib", email = "[email protected]" },
9
+ ]
10
+ requires-python = ">=3.12, <3.13"
11
+
12
+ dependencies = [
13
+ "yourbench @ git+https://github.com/huggingface/yourbench.git@main",
14
+ "asyncio>=3.4.3",
15
+ "datasets>=3.3.0",
16
+ "loguru>=0.7.3",
17
+ "python-dotenv>=1.0.1",
18
+ "tqdm>=4.67.1",
19
+ "ruff>=0.11.2",
20
+ "lighteval>=0.8.0",
21
+ "huggingface-hub>=0.22.0",
22
+ ]
23
+
24
+ [build-system]
25
+ requires = ["setuptools>=61.0"]
26
+ build-backend = "setuptools.build_meta"
27
+
28
+ [tool.ruff]
29
+ line-length = 119
30
+ exclude = ["**/*.ipynb"]
31
+
32
+ lint.ignore = ["E501", "C901", "F841"]
33
+ lint.select = ["C", "E", "F", "I", "W"]
34
+ lint.fixable = ["A", "B", "C", "D", "E", "F", "I", "W"]
35
+ preview = true
36
+
37
+ [tool.ruff.lint.isort]
38
+ length-sort = true
39
+ lines-after-imports = 2
40
+ no-lines-before = ["standard-library", "local-folder"]
41
+ split-on-trailing-comma = true
42
+
43
+ [tool.ruff.format]
44
+ quote-style = "double"
45
+ indent-style = "space"
46
+ skip-magic-trailing-comma = false
47
+ line-ending = "auto"
backend/routes/__init__.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Routes du module
2
+ from .health import router as health_router
3
+ from .upload import router as upload_router, session_files
4
+ from .benchmark import router as benchmark_router, active_bench_tasks
5
+ from .questions import router as questions_router
6
+ from .download import router as download_router
7
+ from .evaluation import router as evaluation_router, active_evaluation_tasks
8
+
9
+ # Exposer les routeurs
10
+ routers = [
11
+ health_router,
12
+ upload_router,
13
+ benchmark_router,
14
+ questions_router,
15
+ download_router,
16
+ evaluation_router
17
+ ]
18
+
19
+ # Référencer les données partagées entre routes
20
+ benchmark_router.session_files = session_files
21
+
22
+ # Exposer les variables partagées pour main.py
23
+ __all__ = ['routers', 'session_files', 'active_bench_tasks', 'active_evaluation_tasks']
backend/routes/benchmark.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from typing import Dict, Any
3
+ import os
4
+ import time
5
+ from tasks.createBenchConfigFile import CreateBenchConfigTask
6
+ from tasks.createBench import CreateBenchTask
7
+
8
+ router = APIRouter(tags=["benchmark"])
9
+
10
+ # Store active tasks by session_id (importé dans main.py)
11
+ active_bench_tasks = {}
12
+ active_config_tasks = {}
13
+
14
+ # Référence aux session_files (sera fournie par main.py)
15
+ # Cette déclaration sera écrasée par l'affectation dans __init__.py
16
+ session_files = {}
17
+
18
+ @router.post("/generate-benchmark")
19
+ async def generate_benchmark(data: Dict[str, Any]):
20
+ """
21
+ Generate a benchmark configuration and run the ingestion process
22
+
23
+ Args:
24
+ data: Dictionary containing session_id
25
+
26
+ Returns:
27
+ Dictionary with logs and config_path
28
+ """
29
+ session_id = data.get("session_id")
30
+
31
+ # Débogage pour vérifier les session_files et le session_id reçu
32
+ print(f"DEBUG: Session ID reçu: {session_id}")
33
+ print(f"DEBUG: Session files disponibles: {list(router.session_files.keys())}")
34
+
35
+ if not session_id or session_id not in router.session_files:
36
+ return {"error": "Invalid or missing session ID"}
37
+
38
+ file_path = router.session_files[session_id]
39
+ all_logs = []
40
+
41
+ try:
42
+ # Step 1: Generate configuration file
43
+ config_task = CreateBenchConfigTask(session_uid=session_id)
44
+ # Store the config task for later log retrieval
45
+ active_config_tasks[session_id] = config_task
46
+
47
+ # Start configuration generation asynchronously
48
+ config_path = config_task.run(file_path=file_path)
49
+
50
+ # Add initial logs
51
+ all_logs.extend(config_task.get_logs())
52
+
53
+ # Step 2: Run the createBench task with the generated config
54
+ # Note: This will be started by a separate endpoint once configuration is done
55
+
56
+ return {
57
+ "status": "running",
58
+ "config_path": config_path,
59
+ "logs": all_logs
60
+ }
61
+ except Exception as e:
62
+ return {
63
+ "status": "error",
64
+ "error": str(e),
65
+ "logs": all_logs
66
+ }
67
+
68
+ @router.get("/config-logs/{session_id}")
69
+ async def get_config_logs(session_id: str):
70
+ """
71
+ Get the logs for a running configuration task
72
+
73
+ Args:
74
+ session_id: Session ID for the task
75
+
76
+ Returns:
77
+ Dictionary with logs and completion status
78
+ """
79
+ if session_id not in active_config_tasks:
80
+ raise HTTPException(status_code=404, detail="Configuration task not found")
81
+
82
+ config_task = active_config_tasks[session_id]
83
+ logs = config_task.get_logs()
84
+ is_completed = config_task.is_task_completed()
85
+
86
+ # Si la configuration est terminée et que le benchmark n'est pas encore démarré,
87
+ # démarrer automatiquement le benchmark
88
+ if is_completed and session_id not in active_bench_tasks:
89
+ try:
90
+ # Ensure the config_path is a string
91
+ config_path_str = f"uploaded_files/{session_id}/config.yml"
92
+ bench_task = CreateBenchTask(session_uid=session_id, config_path=config_path_str)
93
+
94
+ # Store the bench task for later log retrieval
95
+ active_bench_tasks[session_id] = bench_task
96
+
97
+ # Add a transition log
98
+ logs.append("[INFO] Configuration file generated, starting benchmark creation")
99
+
100
+ # Run the task
101
+ bench_task.run()
102
+ except Exception as bench_error:
103
+ error_msg = f"Error starting benchmark creation: {str(bench_error)}"
104
+ logs.append(f"[ERROR] {error_msg}")
105
+
106
+ return {
107
+ "logs": logs,
108
+ "is_completed": is_completed
109
+ }
110
+
111
+ @router.get("/benchmark-logs/{session_id}")
112
+ async def get_benchmark_logs(session_id: str):
113
+ """
114
+ Get the logs for a running benchmark task
115
+
116
+ Args:
117
+ session_id: Session ID for the task
118
+
119
+ Returns:
120
+ Dictionary with logs and completion status
121
+ """
122
+ if session_id not in active_bench_tasks:
123
+ raise HTTPException(status_code=404, detail="Benchmark task not found")
124
+
125
+ bench_task = active_bench_tasks[session_id]
126
+ logs = bench_task.get_logs()
127
+ is_completed = bench_task.is_task_completed()
128
+
129
+ return {
130
+ "logs": logs,
131
+ "is_completed": is_completed
132
+ }
backend/routes/download.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from fastapi.responses import StreamingResponse
3
+ from huggingface_hub import hf_hub_download, snapshot_download
4
+ import os
5
+ import tempfile
6
+ import shutil
7
+ import zipfile
8
+ import io
9
+ import logging
10
+
11
+ router = APIRouter(tags=["download"])
12
+
13
+ @router.get("/download-dataset/{session_id}")
14
+ async def download_dataset(session_id: str):
15
+ """
16
+ Télécharge le dataset HuggingFace associé à une session et le renvoie au client
17
+
18
+ Args:
19
+ session_id: Identifiant de la session
20
+
21
+ Returns:
22
+ Fichier ZIP contenant le dataset
23
+ """
24
+ try:
25
+ # Créer un répertoire temporaire pour stocker les fichiers du dataset
26
+ with tempfile.TemporaryDirectory() as temp_dir:
27
+ # Identifiant du repo HuggingFace
28
+ repo_id = f"yourbench/yourbench_{session_id}"
29
+
30
+ try:
31
+ # Télécharger le snapshot du dataset depuis HuggingFace
32
+ logging.info(f"Téléchargement du dataset {repo_id}")
33
+ snapshot_path = snapshot_download(
34
+ repo_id=repo_id,
35
+ repo_type="dataset",
36
+ local_dir=temp_dir,
37
+ token=os.environ.get("HF_TOKEN")
38
+ )
39
+
40
+ logging.info(f"Dataset téléchargé dans {snapshot_path}")
41
+
42
+ # Créer un fichier ZIP en mémoire
43
+ zip_io = io.BytesIO()
44
+ with zipfile.ZipFile(zip_io, 'w', zipfile.ZIP_DEFLATED) as zip_file:
45
+ # Parcourir tous les fichiers du dataset et les ajouter au ZIP
46
+ for root, _, files in os.walk(snapshot_path):
47
+ for file in files:
48
+ file_path = os.path.join(root, file)
49
+ arc_name = os.path.relpath(file_path, snapshot_path)
50
+ zip_file.write(file_path, arcname=arc_name)
51
+
52
+ # Remettre le curseur au début du stream
53
+ zip_io.seek(0)
54
+
55
+ # Renvoyer le ZIP au client
56
+ filename = f"yourbench_{session_id}_dataset.zip"
57
+ return StreamingResponse(
58
+ zip_io,
59
+ media_type="application/zip",
60
+ headers={"Content-Disposition": f"attachment; filename={filename}"}
61
+ )
62
+
63
+ except Exception as e:
64
+ logging.error(f"Erreur lors du téléchargement du dataset: {str(e)}")
65
+ raise HTTPException(
66
+ status_code=500,
67
+ detail=f"Erreur lors du téléchargement du dataset: {str(e)}"
68
+ )
69
+ except Exception as e:
70
+ logging.error(f"Erreur générale: {str(e)}")
71
+ raise HTTPException(
72
+ status_code=500,
73
+ detail=f"Erreur lors du téléchargement: {str(e)}"
74
+ )
backend/routes/evaluation.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from typing import Dict, Any
3
+ import os
4
+ from tasks.evaluationTask import EvaluationTask
5
+
6
+ router = APIRouter(tags=["evaluation"])
7
+
8
+ # Store active evaluation tasks by session_id
9
+ active_evaluation_tasks = {}
10
+
11
+ @router.post("/evaluate-benchmark")
12
+ async def evaluate_benchmark(data: Dict[str, Any]):
13
+ """
14
+ Lancer l'évaluation d'un benchmark pour une session donnée
15
+
16
+ Args:
17
+ data: Dictionary contenant session_id
18
+
19
+ Returns:
20
+ Dictionary avec statut et logs initiaux
21
+ """
22
+ session_id = data.get("session_id")
23
+
24
+ if not session_id:
25
+ return {"error": "Session ID manquant ou invalide"}
26
+
27
+ # Vérifier si une évaluation est déjà en cours pour cette session
28
+ if session_id in active_evaluation_tasks:
29
+ evaluation_task = active_evaluation_tasks[session_id]
30
+ # Si l'évaluation est déjà terminée, on peut en lancer une nouvelle
31
+ if evaluation_task.is_task_completed():
32
+ # Suppression de l'ancienne tâche
33
+ del active_evaluation_tasks[session_id]
34
+ else:
35
+ # Une évaluation est déjà en cours
36
+ return {
37
+ "status": "already_running",
38
+ "message": "Une évaluation est déjà en cours pour cette session",
39
+ "logs": evaluation_task.get_logs()
40
+ }
41
+
42
+ try:
43
+ # Nom du dataset basé sur l'ID de session
44
+ dataset_name = f"yourbench_{session_id}"
45
+
46
+ # Créer et démarrer une nouvelle tâche d'évaluation
47
+ evaluation_task = EvaluationTask(session_uid=session_id, dataset_name=dataset_name)
48
+ active_evaluation_tasks[session_id] = evaluation_task
49
+
50
+ # Démarrer l'évaluation de manière asynchrone
51
+ evaluation_task.run()
52
+
53
+ # Récupérer les logs initiaux
54
+ initial_logs = evaluation_task.get_logs()
55
+
56
+ return {
57
+ "status": "started",
58
+ "message": f"Évaluation démarrée pour le benchmark {dataset_name}",
59
+ "logs": initial_logs
60
+ }
61
+ except Exception as e:
62
+ return {
63
+ "status": "error",
64
+ "error": str(e),
65
+ "message": f"Erreur lors du démarrage de l'évaluation: {str(e)}"
66
+ }
67
+
68
+ @router.get("/evaluation-logs/{session_id}")
69
+ async def get_evaluation_logs(session_id: str):
70
+ """
71
+ Récupérer les logs d'une évaluation en cours
72
+
73
+ Args:
74
+ session_id: ID de la session pour laquelle récupérer les logs
75
+
76
+ Returns:
77
+ Dictionary avec logs et statut de complétion
78
+ """
79
+ if session_id not in active_evaluation_tasks:
80
+ raise HTTPException(status_code=404, detail="Tâche d'évaluation non trouvée")
81
+
82
+ evaluation_task = active_evaluation_tasks[session_id]
83
+ logs = evaluation_task.get_logs()
84
+ is_completed = evaluation_task.is_task_completed()
85
+
86
+ # Récupérer les résultats si disponibles et l'évaluation est terminée
87
+ results = None
88
+ if is_completed and hasattr(evaluation_task, 'results') and evaluation_task.results:
89
+ results = evaluation_task.results
90
+
91
+ return {
92
+ "logs": logs,
93
+ "is_completed": is_completed,
94
+ "results": results
95
+ }
96
+
97
+ @router.get("/evaluation-results/{session_id}")
98
+ async def get_evaluation_results(session_id: str):
99
+ """
100
+ Retrieve results of a completed evaluation
101
+
102
+ Args:
103
+ session_id: Session ID to retrieve results for
104
+
105
+ Returns:
106
+ Dictionary with evaluation results
107
+ """
108
+ # First, check if the task is in memory
109
+ if session_id in active_evaluation_tasks:
110
+ evaluation_task = active_evaluation_tasks[session_id]
111
+
112
+ if not evaluation_task.is_task_completed():
113
+ return {
114
+ "success": False,
115
+ "message": "Evaluation is still in progress"
116
+ }
117
+
118
+ if hasattr(evaluation_task, 'results') and evaluation_task.results:
119
+ return {
120
+ "success": True,
121
+ "results": evaluation_task.results
122
+ }
123
+
124
+ # If we get here, either the task is not in memory or it doesn't have results
125
+ # Try to load results from file
126
+ try:
127
+ # Construct the path to the results file
128
+ results_path = f"uploaded_files/{session_id}/lighteval_results/models_comparison.json"
129
+
130
+ # Check if the file exists
131
+ if not os.path.exists(results_path):
132
+ return {
133
+ "success": False,
134
+ "message": "No evaluation results found for this session"
135
+ }
136
+
137
+ # Read the file
138
+ import json
139
+ with open(results_path, 'r') as f:
140
+ results = json.load(f)
141
+
142
+ return {
143
+ "success": True,
144
+ "results": results
145
+ }
146
+ except Exception as e:
147
+ return {
148
+ "success": False,
149
+ "message": f"Error retrieving evaluation results: {str(e)}"
150
+ }
backend/routes/health.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+
3
+ router = APIRouter(tags=["health"])
4
+
5
+ @router.get("/health")
6
+ async def health_check():
7
+ """
8
+ Check if the API is running properly
9
+
10
+ Returns:
11
+ Dictionary with status
12
+ """
13
+ return {"status": "ok"}
backend/routes/questions.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ import random
3
+ from datasets import load_dataset
4
+ from huggingface_hub import HfApi, dataset_info
5
+ import os
6
+
7
+ router = APIRouter(tags=["benchmark"])
8
+
9
+ @router.get("/benchmark-questions/{session_id}")
10
+ async def get_benchmark_questions(session_id: str):
11
+ """
12
+ Get example questions from the generated benchmark
13
+
14
+ Args:
15
+ session_id: Session ID for the benchmark
16
+
17
+ Returns:
18
+ Dictionary with sample questions from the dataset
19
+ """
20
+ try:
21
+ # Dataset path on Hugging Face
22
+ dataset_repo_id = f"yourbench/yourbench_{session_id}"
23
+
24
+ # Initialize response
25
+ response = {
26
+ "success": False,
27
+ "questions": [],
28
+ "dataset_url": f"https://huggingface.co/datasets/{dataset_repo_id}"
29
+ }
30
+
31
+ # Try to load the dataset
32
+ questions = []
33
+
34
+ try:
35
+ # Essayer de charger les questions single-shot directement avec le nom de config
36
+ single_dataset = load_dataset(dataset_repo_id, 'single_shot_questions')
37
+ if single_dataset and len(single_dataset['train']) > 0:
38
+ # Get a random sample (up to 2) from single-shot questions
39
+ sample_indices = random.sample(range(len(single_dataset['train'])), min(2, len(single_dataset['train'])))
40
+ for idx in sample_indices:
41
+ questions.append({
42
+ "id": str(idx),
43
+ "question": single_dataset['train'][idx].get("question", ""),
44
+ "type": "single_shot"
45
+ })
46
+ print(f"Loaded {len(questions)} single-shot questions")
47
+ except Exception as e:
48
+ print(f"Error loading single-shot questions: {str(e)}")
49
+
50
+ try:
51
+ # Essayer de charger les questions multi-hop si nécessaire
52
+ if len(questions) < 2:
53
+ multi_dataset = load_dataset(dataset_repo_id, 'multi_hop_questions')
54
+ if multi_dataset and len(multi_dataset['train']) > 0:
55
+ # Get remaining questions from multi-hop questions
56
+ remaining = 2 - len(questions)
57
+ sample_indices = random.sample(range(len(multi_dataset['train'])), min(remaining, len(multi_dataset['train'])))
58
+ for idx in sample_indices:
59
+ questions.append({
60
+ "id": str(idx),
61
+ "question": multi_dataset['train'][idx].get("question", ""),
62
+ "type": "multi_hop"
63
+ })
64
+ print(f"Loaded {len(questions)} multi-hop questions")
65
+ except Exception as e:
66
+ print(f"Error loading multi-hop questions: {str(e)}")
67
+
68
+ # If we couldn't load any questions, the dataset might not exist
69
+ if len(questions) == 0:
70
+ # Check if we have a directory for this session locally as fallback
71
+ session_dir = os.path.join("uploaded_files", session_id)
72
+ if not os.path.exists(session_dir):
73
+ raise HTTPException(status_code=404, detail="Dataset not found")
74
+
75
+ # Update the response
76
+ response["success"] = len(questions) > 0
77
+ response["questions"] = questions
78
+ return response
79
+
80
+ except HTTPException:
81
+ # Re-raise HTTP exceptions
82
+ raise
83
+ except Exception as e:
84
+ return {
85
+ "success": False,
86
+ "error": f"Error retrieving benchmark questions: {str(e)}"
87
+ }
backend/routes/upload.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, UploadFile, File
2
+ import os
3
+ import shutil
4
+ import uuid
5
+
6
+ router = APIRouter(tags=["files"])
7
+
8
+ # Définir le stockage des fichiers par session (importé dans main.py)
9
+ session_files = {}
10
+
11
+ # Dossier racine pour les uploads
12
+ UPLOAD_ROOT = "uploaded_files"
13
+ os.makedirs(UPLOAD_ROOT, exist_ok=True)
14
+
15
+ @router.post("/upload")
16
+ async def upload_file(file: UploadFile = File(...)):
17
+ """
18
+ Upload a file to the server and generate a session ID
19
+
20
+ Args:
21
+ file: The file to upload
22
+
23
+ Returns:
24
+ Dictionary with filename, status and session_id
25
+ """
26
+ # Vérifier si le fichier est un PDF, TXT, HTML ou MD
27
+ if not file.filename.endswith(('.pdf', '.txt', '.html', '.md')):
28
+ return {"error": "Only PDF, TXT, HTML and MD files are accepted"}
29
+
30
+ # Generate a session ID for this file
31
+ session_id = str(uuid.uuid4())
32
+
33
+ # Create the session directory structure
34
+ session_dir = os.path.join(UPLOAD_ROOT, session_id)
35
+ uploaded_files_dir = os.path.join(session_dir, "uploaded_files")
36
+ os.makedirs(uploaded_files_dir, exist_ok=True)
37
+
38
+ # Create the full path to save the file
39
+ file_path = os.path.join(uploaded_files_dir, file.filename)
40
+
41
+ # Sauvegarder le fichier
42
+ with open(file_path, "wb") as buffer:
43
+ shutil.copyfileobj(file.file, buffer)
44
+
45
+ # Store file path for later use
46
+ session_files[session_id] = file_path
47
+
48
+ # Débogage pour vérifier l'état des session_files
49
+ print(f"DEBUG UPLOAD: File uploaded with session_id: {session_id}")
50
+ print(f"DEBUG UPLOAD: Current session_files: {session_files}")
51
+
52
+ return {"filename": file.filename, "status": "uploaded", "session_id": session_id}
backend/tasks/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """
2
+ Tasks module for YourbenchSimpleDemo
3
+ """
backend/tasks/createBench.py ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Task to ingest and transform documents to markdown using yourbench
3
+ """
4
+ import os
5
+ import time
6
+ import pathlib
7
+ import subprocess
8
+ import threading
9
+ from typing import Optional, List, Tuple, Dict, Any
10
+ import yaml
11
+
12
+ from loguru import logger
13
+
14
+
15
+ class CreateBenchTask:
16
+ """
17
+ Task to ingest and transform documents to markdown using yourbench
18
+ """
19
+
20
+ def __init__(self, session_uid: str, config_path: Optional[str] = None):
21
+ """
22
+ Initialize the ingestion task
23
+
24
+ Args:
25
+ session_uid: Session ID for this task
26
+ config_path: Path to the configuration file, will be generated if None
27
+ """
28
+ self.session_uid = session_uid
29
+ self.logs: List[str] = []
30
+ self.is_completed = False
31
+ self.process = None
32
+ self.is_running_flag = threading.Event()
33
+
34
+ # Default config path if not provided
35
+ if config_path is None:
36
+ config_path = f"uploaded_files/{session_uid}/config.yml"
37
+ self.config_path = config_path
38
+
39
+ # Command to run yourbench - modified to avoid error with uv run
40
+ self.command = ["yourbench", "run", "--config", str(self.config_path)]
41
+
42
+ self._add_log("[INFO] Initializing ingestion task")
43
+ self._add_log(f"[INFO] Using configuration file: {self.config_path}")
44
+
45
+ def _add_log(self, message: str) -> None:
46
+ """
47
+ Add a log message to the logs list
48
+
49
+ Args:
50
+ message: Log message to add
51
+ """
52
+ if message not in self.logs: # Avoid duplicates
53
+ self.logs.append(message)
54
+ # Force copy of the list to avoid reference problems
55
+ self.logs = self.logs.copy()
56
+ # Log to system logs
57
+ logger.info(f"[{self.session_uid}] {message}")
58
+
59
+ def get_logs(self) -> List[str]:
60
+ """
61
+ Get all logs for this task
62
+
63
+ Returns:
64
+ List of log messages
65
+ """
66
+ return self.logs.copy() # Return a copy to avoid reference problems
67
+
68
+ def is_task_completed(self) -> bool:
69
+ """
70
+ Check if the task is completed
71
+
72
+ Returns:
73
+ True if completed, False otherwise
74
+ """
75
+ return self.is_completed
76
+
77
+ def is_running(self) -> bool:
78
+ """
79
+ Check if the process is running
80
+
81
+ Returns:
82
+ True if running, False otherwise
83
+ """
84
+ return self.is_running_flag.is_set()
85
+
86
+ def stop(self) -> None:
87
+ """
88
+ Stop the process if it's running
89
+ """
90
+ if self.process and self.is_running():
91
+ self._add_log("[INFO] Stopping ingestion process")
92
+ try:
93
+ self.process.terminate()
94
+ # Wait 5 seconds for termination
95
+ self.process.wait(timeout=5)
96
+ except subprocess.TimeoutExpired:
97
+ self._add_log("[WARN] Process not responding, forcing termination")
98
+ self.process.kill()
99
+ finally:
100
+ self.is_running_flag.clear()
101
+ self.is_completed = True
102
+ self._add_log("[INFO] Ingestion process stopped")
103
+
104
+ def _capture_output(self) -> None:
105
+ """
106
+ Capture and process the output from the yourbench process
107
+ """
108
+ self._add_log("[INFO] Starting output capture")
109
+
110
+ try:
111
+ while self.is_running() and self.process:
112
+ line = self.process.stdout.readline()
113
+ if not line:
114
+ # If no line is read and the process is no longer running
115
+ if self.process.poll() is not None:
116
+ self.is_running_flag.clear()
117
+ break
118
+ # Otherwise, wait a bit and continue
119
+ time.sleep(0.1)
120
+ continue
121
+
122
+ # Process the output line
123
+ line = line.strip()
124
+ if line:
125
+ # Filter and format the line as needed
126
+ if "ERROR" in line:
127
+ self._add_log(f"[ERROR] {line}")
128
+ elif "WARNING" in line:
129
+ self._add_log(f"[WARN] {line}")
130
+ else:
131
+ # Detect completed stages
132
+ if "Completed stage:" in line:
133
+ stage = line.split("'")[1] if "'" in line else line
134
+ self._add_log(f"[SUCCESS] Stage completed: {stage}")
135
+ else:
136
+ self._add_log(f"[INFO] {line}")
137
+
138
+ # Check exit code once the process is finished
139
+ if self.process:
140
+ exit_code = self.process.poll()
141
+ if exit_code == 0:
142
+ self._add_log("[SUCCESS] Ingestion process completed successfully")
143
+ else:
144
+ self._add_log(f"[ERROR] Ingestion process terminated with error code: {exit_code}")
145
+ except Exception as e:
146
+ self._add_log(f"[ERROR] Error during output capture: {str(e)}")
147
+ finally:
148
+ self.is_completed = True
149
+ self.is_running_flag.clear()
150
+ self._add_log("[INFO] Output capture completed")
151
+
152
+ def run(self, token: Optional[str] = None) -> None:
153
+ """
154
+ Run the ingestion task
155
+
156
+ Args:
157
+ token: Hugging Face token
158
+ """
159
+ try:
160
+ self._add_log("[INFO] Starting ingestion process")
161
+
162
+ # Check if the configuration file exists
163
+ if not os.path.exists(self.config_path):
164
+ raise FileNotFoundError(f"Configuration file does not exist: {self.config_path}")
165
+
166
+ # Examine the configuration to get information
167
+ try:
168
+ with open(self.config_path, 'r') as f:
169
+ config_yaml = yaml.safe_load(f)
170
+
171
+ # Get source and destination paths
172
+ source_dir = config_yaml.get("pipeline", {}).get("ingestion", {}).get("source_documents_dir", "")
173
+ output_dir = config_yaml.get("pipeline", {}).get("ingestion", {}).get("output_dir", "")
174
+
175
+ if source_dir:
176
+ self._add_log(f"[INFO] Source directory: {source_dir}")
177
+ if output_dir:
178
+ self._add_log(f"[INFO] Output directory: {output_dir}")
179
+
180
+ # List files to process if the directory exists
181
+ if source_dir and os.path.exists(source_dir):
182
+ files = os.listdir(source_dir)
183
+ if files:
184
+ self._add_log(f"[INFO] Files to process: {', '.join(files)}")
185
+ else:
186
+ self._add_log("[WARN] No files found in source directory")
187
+
188
+ except Exception as e:
189
+ self._add_log(f"[WARN] Unable to read configuration: {str(e)}")
190
+
191
+ # Environment preparation
192
+ env = os.environ.copy()
193
+
194
+ # Explicitly define environment variables for authentication
195
+ hf_token = os.getenv("HF_TOKEN")
196
+ if hf_token:
197
+ # Explicitly export these variables for yourbench
198
+ env["HF_TOKEN"] = hf_token
199
+ env["HUGGING_FACE_HUB_TOKEN"] = hf_token
200
+ env["HF_ORGANIZATION"] = os.getenv("HF_ORGANIZATION", "yourbench")
201
+ self._add_log("[INFO] Environment variables HF_TOKEN, HUGGING_FACE_HUB_TOKEN and HF_ORGANIZATION exported")
202
+
203
+ # In development mode, only simulate ingestion
204
+ if os.environ.get("DEVELOPMENT_MODE", "").lower() == "true":
205
+ self._add_log("[INFO] Development mode enabled, simulating ingestion")
206
+ self._simulate_ingestion_process()
207
+ return
208
+
209
+ # Start the process
210
+ self._add_log(f"[INFO] Executing command: {' '.join(self.command)}")
211
+
212
+ self.process = subprocess.Popen(
213
+ self.command,
214
+ stdout=subprocess.PIPE,
215
+ stderr=subprocess.STDOUT,
216
+ text=True,
217
+ bufsize=1,
218
+ universal_newlines=True,
219
+ env=env
220
+ )
221
+
222
+ # Mark the process as running
223
+ self.is_running_flag.set()
224
+
225
+ # Start a thread to capture output
226
+ output_thread = threading.Thread(target=self._capture_output)
227
+ output_thread.daemon = True
228
+ output_thread.start()
229
+
230
+ self._add_log(f"[INFO] Process started with PID: {self.process.pid}")
231
+
232
+ except Exception as e:
233
+ self._add_log(f"[ERROR] Error starting ingestion process: {str(e)}")
234
+ self.is_completed = True
235
+
236
+ def _simulate_ingestion_process(self) -> None:
237
+ """
238
+ Simulate the ingestion process for testing/development
239
+ This will be removed in production
240
+ """
241
+ # This method is just to simulate logs during development
242
+ # It will be removed in production
243
+
244
+ threading.Thread(target=self._simulate_logs).start()
245
+
246
+ def _simulate_logs(self) -> None:
247
+ """
248
+ Simulate logs for testing/development
249
+ This will be used when yourbench isn't installed or in development mode
250
+ """
251
+ # Log simulation (used when yourbench is not available)
252
+ self._add_log("[INFO] Simulation mode enabled (yourbench is not actually running)")
253
+
254
+ # Get filenames from source directory
255
+ source_files = []
256
+ try:
257
+ with open(self.config_path, 'r') as f:
258
+ config_yaml = yaml.safe_load(f)
259
+
260
+ source_dir = config_yaml.get("pipeline", {}).get("ingestion", {}).get("source_documents_dir", "")
261
+ if source_dir and os.path.exists(source_dir):
262
+ source_files = [f for f in os.listdir(source_dir)
263
+ if os.path.isfile(os.path.join(source_dir, f))]
264
+ except Exception:
265
+ source_files = ["document.pdf", "document.txt"] # Fallback
266
+
267
+ # Create output directory if it doesn't exist
268
+ output_dir = ""
269
+ try:
270
+ output_dir = config_yaml.get("pipeline", {}).get("ingestion", {}).get("output_dir", "")
271
+ if output_dir:
272
+ os.makedirs(output_dir, exist_ok=True)
273
+ except Exception:
274
+ pass
275
+
276
+ # Simulate file processing
277
+ time.sleep(1)
278
+ self._add_log("[INFO] Initializing document ingestion")
279
+ time.sleep(1.5)
280
+ self._add_log("[INFO] Loading configuration parameters")
281
+ time.sleep(1)
282
+ self._add_log("[INFO] Verifying source files")
283
+
284
+ # Process each file
285
+ for file in source_files:
286
+ time.sleep(1.5)
287
+ self._add_log(f"[INFO] Processing file: {file}")
288
+ time.sleep(2)
289
+ self._add_log(f"[INFO] Extracting content from {file}")
290
+ time.sleep(1.5)
291
+ self._add_log(f"[INFO] Converting to markdown: {file}")
292
+
293
+ # Create a simulated markdown file if an output directory is defined
294
+ if output_dir:
295
+ base_name = os.path.splitext(file)[0]
296
+ output_file = os.path.join(output_dir, f"{base_name}.md")
297
+ try:
298
+ with open(output_file, 'w') as f:
299
+ f.write(f"# {base_name}\n\n")
300
+ f.write("This is a markdown document automatically generated by the simulation.\n\n")
301
+ f.write("## Section 1\n\n")
302
+ f.write("Content of section 1...\n\n")
303
+ f.write("## Section 2\n\n")
304
+ f.write("Content of section 2...\n\n")
305
+ self._add_log(f"[INFO] Markdown file created: {output_file}")
306
+ except Exception as e:
307
+ self._add_log(f"[ERROR] Error creating markdown file: {str(e)}")
308
+
309
+ time.sleep(2)
310
+ self._add_log("[INFO] Finalizing processing")
311
+ time.sleep(1)
312
+ self._add_log("[SUCCESS] Stage completed: ingestion")
313
+ time.sleep(0.5)
314
+ self._add_log("[SUCCESS] Ingestion completed successfully")
315
+
316
+ # Mark task as completed
317
+ self.is_completed = True
backend/tasks/createBenchConfigFile.py ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Task to create and save the configuration file
3
+ """
4
+ import os
5
+ import pathlib
6
+ import uuid
7
+ import yaml
8
+ import shutil
9
+ import time
10
+ import threading
11
+ from typing import Optional, Dict, Any, List, Tuple
12
+
13
+ from loguru import logger
14
+ from huggingface_hub import HfApi
15
+
16
+
17
+ class CreateBenchConfigTask:
18
+ """
19
+ Task to create and save a configuration file for YourbenchSimpleDemo
20
+ """
21
+
22
+ def __init__(self, session_uid: Optional[str] = None):
23
+ """
24
+ Initialize the task with a session ID
25
+
26
+ Args:
27
+ session_uid: Optional session ID, will be generated if None
28
+ """
29
+ self.session_uid = session_uid or str(uuid.uuid4())
30
+ self.logs: List[str] = []
31
+ self.is_completed = False
32
+ self.is_running_flag = threading.Event()
33
+ self.thread = None
34
+ self._add_log("[INFO] Initializing configuration creation task")
35
+
36
+ def _add_log(self, message: str) -> None:
37
+ """
38
+ Add a log message to the logs list
39
+
40
+ Args:
41
+ message: Log message to add
42
+ """
43
+ if message not in self.logs: # Avoid duplicates
44
+ self.logs.append(message)
45
+ # Force a copy of the list to avoid reference issues
46
+ self.logs = self.logs.copy()
47
+ # Log to system logs
48
+ logger.info(f"[{self.session_uid}] {message}")
49
+
50
+ def get_logs(self) -> List[str]:
51
+ """
52
+ Get all logs for this task
53
+
54
+ Returns:
55
+ List of log messages
56
+ """
57
+ return self.logs.copy() # Retourner une copie pour éviter les problèmes de référence
58
+
59
+ def save_uploaded_file(self, file_path: str) -> str:
60
+ """
61
+ Process the uploaded file that is already in the correct directory
62
+
63
+ Args:
64
+ file_path: Path to the uploaded file
65
+
66
+ Returns:
67
+ Path to the file (same as input)
68
+ """
69
+ try:
70
+ # The file is already in the correct location: uploaded_files/{session_id}/uploaded_files/
71
+ # Just log that we're processing it and return the path
72
+ self._add_log(f"[INFO] Processing file: {os.path.basename(file_path)}")
73
+ return file_path
74
+ except Exception as e:
75
+ error_msg = f"Error processing file: {str(e)}"
76
+ self._add_log(f"[ERROR] {error_msg}")
77
+ raise RuntimeError(error_msg)
78
+
79
+ def generate_base_config(self, hf_org: str, hf_dataset_name: str) -> Dict[str, Any]:
80
+ """
81
+ Create the base configuration dictionary
82
+
83
+ Args:
84
+ hf_org: Hugging Face organization name
85
+ hf_dataset_name: Hugging Face dataset name
86
+
87
+ Returns:
88
+ Configuration dictionary
89
+ """
90
+ self._add_log(f"[INFO] Generating base configuration for {hf_dataset_name}")
91
+
92
+ # Check if HF token is available
93
+ hf_token = os.getenv("HF_TOKEN")
94
+ if not hf_token:
95
+ raise RuntimeError("HF_TOKEN environment variable is not defined")
96
+
97
+ return {
98
+ "hf_configuration": {
99
+ "token": "$HF_TOKEN", # Utiliser directement le token de l'environnement
100
+ "hf_organization": "$HF_ORGANIZATION",
101
+ "private": True,
102
+ "hf_dataset_name": hf_dataset_name,
103
+ "concat_if_exist": False,
104
+ },
105
+ "model_list": [
106
+ {
107
+ "model_name": "Qwen/Qwen2.5-VL-72B-Instruct",
108
+ "provider": "novita",
109
+ "api_key": "$HF_TOKEN",
110
+ "max_concurrent_requests": 32,
111
+ },
112
+ {
113
+ "model_name": "Qwen/Qwen2.5-72B-Instruct",
114
+ "provider": "novita",
115
+ "api_key": "$HF_TOKEN",
116
+ "max_concurrent_requests": 32,
117
+ },
118
+ ],
119
+
120
+ "model_roles": {
121
+ "ingestion": ["Qwen/Qwen2.5-VL-72B-Instruct"],
122
+ "summarization": ["Qwen/Qwen2.5-72B-Instruct"],
123
+ "chunking": ["intfloat/multilingual-e5-large-instruct"],
124
+ "single_shot_question_generation": ["Qwen/Qwen2.5-72B-Instruct"],
125
+ "multi_hop_question_generation": ["Qwen/Qwen2.5-72B-Instruct"],
126
+ },
127
+ "pipeline": {
128
+ "ingestion": {
129
+ "source_documents_dir": f"uploaded_files/{self.session_uid}/uploaded_files/",
130
+ "output_dir": f"uploaded_files/{self.session_uid}/ingested",
131
+ "run": True,
132
+ },
133
+ "upload_ingest_to_hub": {
134
+ "source_documents_dir": f"uploaded_files/{self.session_uid}/ingested",
135
+ "run": True, # Réactivé pour l'upload sur le Hub
136
+ },
137
+ "summarization": {
138
+ "run": True,
139
+ },
140
+ "chunking": {
141
+ "run": True,
142
+ "chunking_configuration": {
143
+ "l_min_tokens": 64,
144
+ "l_max_tokens": 128,
145
+ "tau_threshold": 0.8,
146
+ "h_min": 2,
147
+ "h_max": 5,
148
+ "num_multihops_factor": 2,
149
+ },
150
+ },
151
+ "single_shot_question_generation": {
152
+ "run": True,
153
+ "additional_instructions": "Generate questions to test a curious adult",
154
+ "chunk_sampling": {
155
+ "mode": "count",
156
+ "value": 5,
157
+ "random_seed": 123,
158
+ },
159
+ },
160
+ "multi_hop_question_generation": {
161
+ "run": True,
162
+ "additional_instructions": "Generate questions to test a curious adult",
163
+ "chunk_sampling": {
164
+ "mode": "percentage",
165
+ "value": 0.3,
166
+ "random_seed": 42,
167
+ },
168
+ },
169
+ "lighteval": {
170
+ "run": True,
171
+ },
172
+ },
173
+ }
174
+
175
+ def save_yaml_file(self, config: Dict[str, Any], path: str) -> str:
176
+ """
177
+ Save the given configuration dictionary to a YAML file
178
+
179
+ Args:
180
+ config: Configuration dictionary
181
+ path: Path to save the file
182
+
183
+ Returns:
184
+ Path to the saved file
185
+ """
186
+ try:
187
+ # Create directory if it doesn't exist
188
+ os.makedirs(os.path.dirname(path), exist_ok=True)
189
+
190
+ with open(path, "w") as file:
191
+ yaml.dump(config, file, default_flow_style=False, sort_keys=False)
192
+
193
+ self._add_log(f"[INFO] Configuration saved: {path}")
194
+ return path
195
+ except Exception as e:
196
+ error_msg = f"Error saving configuration: {str(e)}"
197
+ self._add_log(f"[ERROR] {error_msg}")
198
+ raise RuntimeError(error_msg)
199
+
200
+ def _run_task(self, file_path: str) -> str:
201
+ """
202
+ Internal method to run the task in a separate thread
203
+
204
+ Args:
205
+ file_path: Path to the uploaded file
206
+
207
+ Returns:
208
+ Path to the configuration file
209
+ """
210
+ try:
211
+ # Use the default yourbench organization
212
+ org_name = os.getenv("HF_ORGANIZATION")
213
+
214
+ # Check if HF token is available
215
+ hf_token = os.getenv("HF_TOKEN")
216
+ if not hf_token:
217
+ raise RuntimeError("HF_TOKEN environment variable is not defined")
218
+
219
+ self._add_log(f"[INFO] Organization: {org_name}")
220
+
221
+ time.sleep(0.5) # Simulate delay
222
+
223
+ # Save the uploaded file
224
+ saved_file_path = self.save_uploaded_file(file_path)
225
+
226
+ time.sleep(1) # Simulate delay
227
+
228
+ # Path for the config file
229
+ config_dir = pathlib.Path(f"uploaded_files/{self.session_uid}")
230
+ config_path = config_dir / "config.yml"
231
+
232
+ # Generate dataset name based on session ID
233
+ dataset_name = f"yourbench_{self.session_uid}"
234
+ self._add_log(f"[INFO] Dataset name: {dataset_name}")
235
+
236
+ time.sleep(0.8) # Simulate delay
237
+
238
+ # Generate and save the configuration
239
+ config = self.generate_base_config(org_name, dataset_name)
240
+
241
+ time.sleep(1.2) # Simulate delay
242
+
243
+ config_file_path = self.save_yaml_file(config, str(config_path))
244
+
245
+ self._add_log(f"[INFO] Configuration generated successfully: {config_file_path}")
246
+
247
+ # Simulate additional processing
248
+ time.sleep(1.5) # Simulate delay
249
+ self._add_log("[INFO] Starting ingestion")
250
+
251
+ time.sleep(2) # Simulate delay
252
+ self._add_log(f"[INFO] Processing file: {dataset_name}")
253
+
254
+ time.sleep(2) # Simulate delay
255
+ self._add_log("[SUCCESS] Stage completed: config_generation")
256
+
257
+ # Tâche terminée
258
+ self.mark_task_completed()
259
+
260
+ return str(config_path)
261
+ except Exception as e:
262
+ error_msg = f"Error generating configuration: {str(e)}"
263
+ self._add_log(f"[ERROR] {error_msg}")
264
+ self.mark_task_completed()
265
+ raise RuntimeError(error_msg)
266
+
267
+ def run(self, file_path: str, token: Optional[str] = None) -> str:
268
+ """
269
+ Run the task to create and save the configuration file asynchronously
270
+
271
+ Args:
272
+ file_path: Path to the uploaded file
273
+ token: Hugging Face token (not used, using HF_TOKEN from environment)
274
+
275
+ Returns:
276
+ Path to the configuration file
277
+ """
278
+ # Mark the task as running
279
+ self.is_running_flag.set()
280
+
281
+ # Start the task in a separate thread
282
+ self.thread = threading.Thread(target=self._run_task, args=(file_path,))
283
+ self.thread.daemon = True
284
+ self.thread.start()
285
+
286
+ # Return the expected config path
287
+ return f"uploaded_files/{self.session_uid}/config.yml"
288
+
289
+ def is_running(self) -> bool:
290
+ """
291
+ Check if the task is running
292
+
293
+ Returns:
294
+ True if running, False otherwise
295
+ """
296
+ return self.is_running_flag.is_set() and not self.is_completed
297
+
298
+ def is_task_completed(self) -> bool:
299
+ """
300
+ Check if the task is completed
301
+
302
+ Returns:
303
+ True if completed, False otherwise
304
+ """
305
+ return self.is_completed
306
+
307
+ def mark_task_completed(self) -> None:
308
+ """
309
+ Mark the task as completed
310
+ """
311
+ self.is_completed = True
312
+ self.is_running_flag.clear()
313
+ self._add_log("[INFO] Configuration generation task completed")
backend/tasks/evaluationTask.py ADDED
@@ -0,0 +1,471 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Task to evaluate models on a YourbBench dataset using LightEval
3
+ """
4
+ import os
5
+ import sys
6
+ import json
7
+ import time
8
+ import tempfile
9
+ import asyncio
10
+ import threading
11
+ from pathlib import Path
12
+ from typing import Optional, List, Dict, Any, Tuple
13
+
14
+ from loguru import logger
15
+ from huggingface_hub import HfApi, CommitOperationAdd
16
+
17
+ from tasks.yourbench_lighteval_task import create_yourbench_task
18
+
19
+
20
+ class EvaluationTask:
21
+ """
22
+ Task to evaluate models using LightEval on a YourbBench dataset
23
+ """
24
+
25
+ def __init__(self, session_uid: str, dataset_name: str):
26
+ """
27
+ Initialize the evaluation task
28
+
29
+ Args:
30
+ session_uid: Session ID for this task
31
+ dataset_name: Name of the dataset to evaluate
32
+ """
33
+ self.session_uid = session_uid
34
+ self.dataset_name = dataset_name
35
+ self.logs: List[str] = []
36
+ self.is_completed = False
37
+ self.organization = os.getenv("HF_ORGANIZATION", "yourbench")
38
+ self.results: Dict[str, Any] = {}
39
+ self.output_dir = f"uploaded_files/{session_uid}/lighteval_results"
40
+
41
+ # Models to evaluate - can be modified to allow customization
42
+ self.models = [
43
+ ("Qwen/Qwen2.5-72B-Instruct", "novita"),
44
+ ("Qwen/QwQ-32B", "novita"),
45
+ ]
46
+
47
+ self._add_log("[INFO] Initializing evaluation task")
48
+ self._add_log(f"[INFO] Dataset to evaluate: {self.organization}/{dataset_name}")
49
+ self._add_log(f"[INFO] Output directory: {self.output_dir}")
50
+
51
+ def _add_log(self, message: str) -> None:
52
+ """
53
+ Add a log message to the logs list
54
+
55
+ Args:
56
+ message: Log message to add
57
+ """
58
+ if message not in self.logs: # Avoid duplicates
59
+ self.logs.append(message)
60
+ # Force copy of the list to avoid reference problems
61
+ self.logs = self.logs.copy()
62
+ # Record in system logs
63
+ logger.info(f"[{self.session_uid}] {message}")
64
+
65
+ def get_logs(self) -> List[str]:
66
+ """
67
+ Get all logs for this task
68
+
69
+ Returns:
70
+ List of log messages
71
+ """
72
+ return self.logs.copy() # Retourner une copie pour éviter les problèmes de référence
73
+
74
+ def is_task_completed(self) -> bool:
75
+ """
76
+ Check if the task is completed
77
+
78
+ Returns:
79
+ True if completed, False otherwise
80
+ """
81
+ return self.is_completed
82
+
83
+ async def _evaluate_model(self, model_info: Tuple[str, str]) -> Dict[str, Any]:
84
+ """
85
+ Evaluate a specific model
86
+
87
+ Args:
88
+ model_info: Tuple of (model_name, provider)
89
+
90
+ Returns:
91
+ Dictionary with evaluation results
92
+ """
93
+ model_name, provider = model_info
94
+ self._add_log(f"[INFO] Starting evaluation for {model_name} with {provider}")
95
+
96
+ # Create output directory
97
+ os.makedirs(self.output_dir, exist_ok=True)
98
+
99
+ # Define full dataset path
100
+ dataset_path = f"{self.organization}/{self.dataset_name}"
101
+
102
+ # Create temporary file
103
+ temp_file_path = tempfile.mktemp(suffix=".py")
104
+ self._add_log(f"[INFO] Creating temporary file for {model_name}: {temp_file_path}")
105
+
106
+ with open(temp_file_path, 'w') as temp_file:
107
+ temp_file.write(f"""
108
+ import os
109
+ import sys
110
+ sys.path.append("{os.getcwd()}")
111
+
112
+ from tasks.yourbench_lighteval_task import create_yourbench_task
113
+
114
+ # Create yourbench task
115
+ yourbench = create_yourbench_task("{dataset_path}", "lighteval")
116
+
117
+ # Define TASKS_TABLE needed by lighteval
118
+ TASKS_TABLE = [yourbench]
119
+ """)
120
+
121
+ # Build lighteval command args
122
+ cmd_args = [
123
+ "lighteval",
124
+ "endpoint",
125
+ "inference-providers",
126
+ f"model={model_name},provider={provider}",
127
+ "custom|yourbench|0|0",
128
+ "--custom-tasks",
129
+ temp_file_path,
130
+ "--max-samples", "5",
131
+ "--output-dir", self.output_dir,
132
+ "--save-details",
133
+ "--no-push-to-hub"
134
+ ]
135
+
136
+ self._add_log(f"[INFO] Running command for {model_name}: {' '.join(cmd_args)}")
137
+
138
+ results = {
139
+ "model_name": model_name,
140
+ "provider": provider,
141
+ "success": False,
142
+ "error": None,
143
+ "results": None,
144
+ "return_code": None
145
+ }
146
+
147
+ try:
148
+ # Prepare environment with needed tokens
149
+ env = os.environ.copy()
150
+ hf_token = os.getenv("HF_TOKEN")
151
+ if hf_token:
152
+ env["HF_TOKEN"] = hf_token
153
+ env["HUGGING_FACE_HUB_TOKEN"] = hf_token
154
+ env["HF_ORGANIZATION"] = self.organization
155
+
156
+ # Run the process asynchronously
157
+ process = await asyncio.create_subprocess_exec(
158
+ *cmd_args,
159
+ stdout=asyncio.subprocess.PIPE,
160
+ stderr=asyncio.subprocess.PIPE,
161
+ env=env
162
+ )
163
+
164
+ # Wait for the process to complete
165
+ stdout, stderr = await process.communicate()
166
+
167
+ # Store return code
168
+ exit_code = process.returncode
169
+ results["return_code"] = exit_code
170
+
171
+ # Log output
172
+ if stdout:
173
+ stdout_lines = stdout.decode().strip().split('\n')
174
+ for line in stdout_lines[:5]: # Log only first 5 lines
175
+ self._add_log(f"[INFO] {model_name} - {line}")
176
+
177
+ # Log errors if any
178
+ if stderr and exit_code != 0:
179
+ stderr_lines = stderr.decode().strip().split('\n')
180
+ for line in stderr_lines[:5]: # Log only first 5 lines
181
+ self._add_log(f"[ERROR] {model_name} - {line}")
182
+
183
+ # Find any JSON result files - LightEval organizes by model name in different ways
184
+ result_files = []
185
+ results_dir = Path(self.output_dir) / "results"
186
+ if results_dir.exists():
187
+ # Parcourir récursivement tous les répertoires pour trouver des fichiers JSON
188
+ for json_file in results_dir.glob("**/*.json"):
189
+ # Check if the filename or path contains parts of the model name
190
+ model_parts = [
191
+ model_name, # Full name
192
+ model_name.replace('/', '_'), # Name with / replaced by _
193
+ model_name.split('/')[-1] # Just the model name without the organization
194
+ ]
195
+
196
+ if any(part in str(json_file) for part in model_parts):
197
+ result_files.append(json_file)
198
+
199
+ # Traiter les fichiers de résultats trouvés
200
+ if result_files:
201
+ # Prendre le fichier le plus récent
202
+ result_files.sort(key=lambda x: x.stat().st_mtime, reverse=True)
203
+ latest_result = result_files[0]
204
+ self._add_log(f"[INFO] {model_name} - Found result file: {latest_result}")
205
+
206
+ try:
207
+ with open(latest_result, 'r') as f:
208
+ test_results = json.load(f)
209
+
210
+ # Vérifier si les résultats contiennent les informations essentielles
211
+ if (test_results and
212
+ isinstance(test_results, dict) and
213
+ "results" in test_results and
214
+ "all" in test_results["results"]):
215
+
216
+ # Enregistrer les résultats
217
+ results["results"] = test_results
218
+ results["success"] = True
219
+
220
+ # Afficher la précision
221
+ accuracy = test_results["results"]["all"]["accuracy"]
222
+ accuracy_stderr = test_results["results"]["all"]["accuracy_stderr"]
223
+ self._add_log(f"[SUCCESS] {model_name} - Accuracy: {accuracy:.4f} ± {accuracy_stderr:.4f}")
224
+ else:
225
+ results["error"] = "Incomplete or unexpected result format"
226
+ self._add_log(f"[WARNING] {model_name} - Unexpected result format")
227
+
228
+ except (json.JSONDecodeError, KeyError) as e:
229
+ results["error"] = f"Error reading results: {str(e)}"
230
+ self._add_log(f"[ERROR] {model_name} - {results['error']}")
231
+
232
+ # Si aucun résultat trouvé
233
+ if not results["success"]:
234
+ if exit_code == 0:
235
+ results["error"] = "Execution completed without error but no results found"
236
+ self._add_log(f"[WARNING] {model_name} - {results['error']}")
237
+ else:
238
+ results["error"] = f"Execution error (code: {exit_code})"
239
+ self._add_log(f"[ERROR] {model_name} - {results['error']}")
240
+
241
+ except Exception as e:
242
+ results["error"] = f"Exception: {str(e)}"
243
+ self._add_log(f"[ERROR] Exception during evaluation of {model_name}: {str(e)}")
244
+ finally:
245
+ # Delete temporary file
246
+ try:
247
+ os.unlink(temp_file_path)
248
+ except:
249
+ pass
250
+
251
+ return results
252
+
253
+ async def _run_evaluations(self) -> List[Dict[str, Any]]:
254
+ """
255
+ Run evaluations for all models
256
+
257
+ Returns:
258
+ List of evaluation results
259
+ """
260
+ self._add_log(f"[INFO] Starting evaluations for {len(self.models)} models")
261
+
262
+ # Create tasks for each model
263
+ tasks = [self._evaluate_model(model) for model in self.models]
264
+
265
+ # Run all tasks concurrently and gather results
266
+ model_results = await asyncio.gather(*tasks, return_exceptions=True)
267
+
268
+ # Process results
269
+ results = []
270
+ for i, result in enumerate(model_results):
271
+ if isinstance(result, Exception):
272
+ # Handle exception
273
+ model_name, provider = self.models[i]
274
+ self._add_log(f"[ERROR] Evaluation failed for {model_name}: {str(result)}")
275
+ results.append({
276
+ "model_name": model_name,
277
+ "provider": provider,
278
+ "success": False,
279
+ "error": str(result),
280
+ "results": None,
281
+ "return_code": None
282
+ })
283
+ else:
284
+ # Valid result
285
+ results.append(result)
286
+
287
+ return results
288
+
289
+ def _format_comparison_results(self, results: List[Dict[str, Any]]) -> Dict[str, Any]:
290
+ """
291
+ Format results for easy comparison between models
292
+
293
+ Args:
294
+ results: List of evaluation results
295
+
296
+ Returns:
297
+ Dictionary with formatted comparison results
298
+ """
299
+ comparison = {
300
+ "metadata": {
301
+ "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
302
+ "dataset": f"{self.organization}/{self.dataset_name}",
303
+ "total_models_tested": len(results),
304
+ "successful_tests": len([r for r in results if r["success"]])
305
+ },
306
+ "models_comparison": []
307
+ }
308
+
309
+ # Liste des modèles réussis et des modèles échoués
310
+ successful_models = [r for r in results if r["success"]]
311
+ failed_models = [r for r in results if not r["success"]]
312
+
313
+ # Trier les modèles réussis par précision (du plus précis au moins précis)
314
+ if successful_models:
315
+ sorted_successful = sorted(
316
+ successful_models,
317
+ key=lambda x: x["results"]["results"]["all"]["accuracy"],
318
+ reverse=True # Du plus grand au plus petit
319
+ )
320
+ else:
321
+ sorted_successful = []
322
+
323
+ # Trier les modèles échoués par nom
324
+ sorted_failed = sorted(failed_models, key=lambda x: x["model_name"])
325
+
326
+ # Concaténer: d'abord les réussites, puis les échecs
327
+ sorted_results = sorted_successful + sorted_failed
328
+
329
+ # Créer l'entrée pour chaque modèle
330
+ for result in sorted_results:
331
+ model_result = {
332
+ "model_name": result["model_name"],
333
+ "provider": result["provider"],
334
+ "success": result["success"]
335
+ }
336
+
337
+ if result["success"]:
338
+ # Ajouter les métriques de précision et temps d'exécution
339
+ model_result.update({
340
+ "accuracy": result["results"]["results"]["all"]["accuracy"],
341
+ "accuracy_stderr": result["results"]["results"]["all"]["accuracy_stderr"],
342
+ "evaluation_time": float(result["results"]["config_general"]["total_evaluation_time_secondes"])
343
+ })
344
+ else:
345
+ # Ajouter l'erreur
346
+ model_result["error"] = result.get("error", "Unknown reason")
347
+
348
+ comparison["models_comparison"].append(model_result)
349
+
350
+ return comparison
351
+
352
+ async def _upload_results_to_dataset(self, comparison_results: Dict[str, Any]) -> bool:
353
+ """
354
+ Upload evaluation results to the HuggingFace dataset
355
+
356
+ Args:
357
+ comparison_results: The formatted comparison results
358
+
359
+ Returns:
360
+ bool: True if upload succeeded, False otherwise
361
+ """
362
+ try:
363
+ # Create a timestamp for the results file
364
+ timestamp = time.strftime("%Y%m%d_%H%M%S")
365
+ result_filename = f"lighteval_results.json"
366
+
367
+ # Create temporary file for upload
368
+ temp_file_path = tempfile.mktemp(suffix=".json")
369
+ with open(temp_file_path, 'w') as f:
370
+ json.dump(comparison_results, f, indent=2)
371
+
372
+ # Initialize HF API
373
+ hf_token = os.getenv("HF_TOKEN")
374
+ if not hf_token:
375
+ self._add_log("[ERROR] HF_TOKEN not found, cannot upload results to dataset")
376
+ return False
377
+
378
+ api = HfApi(token=hf_token)
379
+ dataset_id = f"{self.organization}/{self.dataset_name}"
380
+
381
+ # Prepare the file operation
382
+ operation = CommitOperationAdd(
383
+ path_in_repo=f"lighteval_results/{result_filename}",
384
+ path_or_fileobj=temp_file_path
385
+ )
386
+
387
+ # Upload the file
388
+ self._add_log(f"[INFO] Uploading results to dataset {dataset_id}")
389
+ api.create_commit(
390
+ repo_id=dataset_id,
391
+ repo_type="dataset",
392
+ operations=[operation],
393
+ commit_message=f"Add evaluation results from {timestamp}"
394
+ )
395
+
396
+ # Cleanup temporary file
397
+ os.unlink(temp_file_path)
398
+
399
+ self._add_log(f"[SUCCESS] Results uploaded to dataset {dataset_id} at lighteval_results/{result_filename}")
400
+ return True
401
+
402
+ except Exception as e:
403
+ self._add_log(f"[ERROR] Failed to upload results to dataset: {str(e)}")
404
+ return False
405
+
406
+ async def _process_evaluation_results(self, results: List[Dict[str, Any]]) -> None:
407
+ """
408
+ Process evaluation results, create summaries and save files
409
+
410
+ Args:
411
+ results: List of evaluation results
412
+ """
413
+ if results:
414
+ try:
415
+ # Save detailed results
416
+ detailed_output_file = f"{self.output_dir}/detailed_results.json"
417
+ os.makedirs(os.path.dirname(detailed_output_file), exist_ok=True)
418
+ with open(detailed_output_file, 'w') as f:
419
+ json.dump(results, f, indent=2)
420
+ self._add_log(f"[INFO] Detailed results saved in {detailed_output_file}")
421
+
422
+ # Generate and save comparison results
423
+ comparison = self._format_comparison_results(results)
424
+ comparison_file = f"{self.output_dir}/models_comparison.json"
425
+ with open(comparison_file, 'w') as f:
426
+ json.dump(comparison, f, indent=2)
427
+ self._add_log(f"[INFO] Models comparison saved in {comparison_file}")
428
+
429
+ # Upload results to the dataset
430
+ await self._upload_results_to_dataset(comparison)
431
+
432
+ # Store results for later access
433
+ self.results = comparison
434
+ self._add_log("[SUCCESS] Evaluation completed")
435
+ except Exception as e:
436
+ self._add_log(f"[ERROR] Error during evaluation execution: {str(e)}")
437
+ finally:
438
+ self.is_completed = True
439
+
440
+ def _async_run(self) -> None:
441
+ """
442
+ Run the evaluation asynchronously
443
+ """
444
+ async def run_async():
445
+ try:
446
+ # Run evaluations
447
+ results = await self._run_evaluations()
448
+
449
+ # Process evaluation results
450
+ await self._process_evaluation_results(results)
451
+ except Exception as e:
452
+ self._add_log(f"[ERROR] Error during evaluation execution: {str(e)}")
453
+ finally:
454
+ self.is_completed = True
455
+
456
+ # Create and run the asyncio event loop
457
+ loop = asyncio.new_event_loop()
458
+ asyncio.set_event_loop(loop)
459
+ loop.run_until_complete(run_async())
460
+ loop.close()
461
+
462
+ def run(self) -> None:
463
+ """
464
+ Run the evaluation task in a separate thread
465
+ """
466
+ self._add_log("[INFO] Starting evaluation")
467
+
468
+ # Run in a separate thread to not block the main thread
469
+ thread = threading.Thread(target=self._async_run)
470
+ thread.daemon = True
471
+ thread.start()
backend/tasks/yourbench_lighteval_task.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # MIT License
3
+
4
+ # Copyright (c) 2024 The HuggingFace Team
5
+
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+
13
+ # The above copyright notice and this permission notice shall be included in all
14
+ # copies or substantial portions of the Software.
15
+
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ # SOFTWARE.
23
+
24
+ import logging
25
+ import re
26
+
27
+ import numpy as np
28
+ from aenum import extend_enum
29
+
30
+ from lighteval.metrics.metrics import Metrics
31
+ from lighteval.metrics.metrics_sample import JudgeLLM
32
+ from lighteval.metrics.utils.metric_utils import (
33
+ CorpusLevelMetricGrouping,
34
+ MetricCategory,
35
+ MetricUseCase,
36
+ )
37
+ from lighteval.tasks.lighteval_task import LightevalTaskConfig
38
+ from lighteval.tasks.requests import Doc
39
+
40
+
41
+ logger = logging.getLogger(__name__)
42
+
43
+ JUDGE_ANSWER_SYSTEM_PROMPT = """You will be provided with the summary of a document, a piece of text, a question generated from that text, and the correct or "gold" answer to the question. Additionally, you will receive a model answer. Your task is to determine wether the model answer is correct using the provided "gold" answer as a reference.
44
+ # Steps
45
+ 1. **Document Understanding**:
46
+ - Analyze the provided document summary to grasp the context and main themes.
47
+ 2. **Chunk Understanding**:
48
+ - Examine the provided text (chunk) to understand its content.
49
+ 3. **Question Understanding**:
50
+ - Interpret the given question to fully comprehend what is being asked.
51
+ 4. **Ground Truth Answer Understanding**:
52
+ - Understand the provided ground truth answer, identifying its key points.
53
+ 6. **Answer Understanding**:
54
+ - Examine the Model Answer, identifying key points and assessing accuracy and factuality.
55
+ 7. **Final Answer**:
56
+ - 0 or 1 (0 if the model answer is incorrect, 1 if it is correct).
57
+ # Output Format
58
+ - Provide your final evaluation of whether the answer is correct within `<final_answer>` XML tags.
59
+ - Include a detailed analysis for each part within the designated XML tags: `<document_understanding>`, `<chunk_understanding>`, `<question_understanding>`, `<ground_truth_answer_understanding>`, `<model_answer_understanding>`, and `<final_answer>`.
60
+ # Examples
61
+ **Input**:
62
+ ```xml
63
+ <document_summary>
64
+ [Summary]
65
+ </document_summary>
66
+ <piece_of_text>
67
+ [Text]
68
+ </piece_of_text>
69
+ <question>
70
+ [Question]
71
+ </question>
72
+ <gold_answer>
73
+ [Gold Answer]
74
+ </gold_answer>
75
+ <model_answer>
76
+ [Model Answer]
77
+ </model_answer>
78
+ ```
79
+ **Output**:
80
+ ```xml
81
+ <document_understanding>
82
+ Understanding of the summary including key themes
83
+ </document_understanding>
84
+ <chunk_understanding>
85
+ Analysis of the piece of text
86
+ </chunk_understanding>
87
+ <question_understanding>
88
+ Comprehension of the question being asked
89
+ </question_understanding>
90
+ <ground_truth_answer_understanding>
91
+ Key points from the gold answer
92
+ </ground_truth_answer_understanding>
93
+ <model_answer_understanding>
94
+ Key points and accuracy of Answer A
95
+ </model_answer_understanding>
96
+ <final_answer>
97
+ 1 or 0 (1 if the model answer is correct, 0 if it is incorrect)
98
+ </final_answer>
99
+ ```
100
+ # Notes
101
+ - Always focus on key points and factual correctness as per the ground truth.
102
+ - Avoid any biases and rely solely on the evidence presented.
103
+ - Enclose all evaluations and analyses in the specified XML tags for clarity and structure."""
104
+
105
+
106
+ JUDGE_ANSWER_USER_PROMPT = """<document_summary>
107
+ {summary}
108
+ </document_summary>
109
+ <piece_of_text>
110
+ {chunk}
111
+ </piece_of_text>
112
+ <question>
113
+ {question}
114
+ </question>
115
+ <gold_answer>
116
+ {oracle_answer}
117
+ </gold_answer>
118
+ <model_answer>
119
+ {model_answer}
120
+ </model_answer>"""
121
+
122
+
123
+ def get_judge_prompt(question: str, answer: str, gold: str, **kwargs):
124
+ chunk = kwargs.get("chunks", "")
125
+ summary = kwargs.get("documents", "")
126
+
127
+ return [
128
+ {"role": "system", "content": JUDGE_ANSWER_SYSTEM_PROMPT},
129
+ {
130
+ "role": "user",
131
+ "content": JUDGE_ANSWER_USER_PROMPT.format(
132
+ summary=summary, chunk=chunk, question=question, oracle_answer=gold, model_answer=answer
133
+ ),
134
+ },
135
+ ]
136
+
137
+
138
+ def process_judge_response_yourbench(response):
139
+ # extract the final answer using regex from the response xml
140
+ try:
141
+ # Essayer d'abord le format XML
142
+ match = re.search(r"<final_answer>(.*?)</final_answer>", response, re.DOTALL)
143
+ if match:
144
+ answer_text = match.group(1).strip()
145
+ # Convertir différents formats possibles en 0 ou 1
146
+ if answer_text in ["1", "correct", "true", "yes", "True", "TRUE"]:
147
+ return 1
148
+ elif answer_text in ["0", "incorrect", "false", "no", "False", "FALSE"]:
149
+ return 0
150
+ # Essayer de convertir directement en nombre
151
+ try:
152
+ value = int(answer_text)
153
+ return 1 if value > 0 else 0
154
+ except ValueError:
155
+ pass
156
+
157
+ # Rechercher des mots-clés dans la réponse
158
+ if re.search(r"\b(correct|vrai|true|yes)\b", response, re.IGNORECASE):
159
+ return 1
160
+ if re.search(r"\b(incorrect|faux|false|no)\b", response, re.IGNORECASE):
161
+ return 0
162
+
163
+ logger.warning(f"Réponse du juge non reconnue, retournant 0 par défaut: {response[:100]}...")
164
+ except Exception as e:
165
+ logger.error(f"Error processing judge response: {e}")
166
+ return 0
167
+
168
+
169
+ class JudgeLLMYourBench(JudgeLLM):
170
+ def __init__(self):
171
+ super().__init__(
172
+ judge_model_name="gpt-4o-2024-08-06",
173
+ template=get_judge_prompt,
174
+ process_judge_response=process_judge_response_yourbench,
175
+ judge_backend="openai",
176
+ short_judge_name="yourbench_judge",
177
+ )
178
+
179
+ def compute(self, sample_ids: list[str], responses: list, formatted_docs: list[Doc]) -> list[dict[str, float]]:
180
+ # If we are evaluating a multiturn task, we need to have specific field in the formatted doc
181
+ questions = [formatted_doc.specific["question"] for formatted_doc in formatted_docs]
182
+ golds = [formatted_doc.get_golds()[0] for formatted_doc in formatted_docs]
183
+ predictions = [response[0].result[0] for response in responses]
184
+ options = [None] * len(questions)
185
+ chunks = [formatted_doc.specific["chunks"][0] for formatted_doc in formatted_docs]
186
+ documents = [formatted_doc.specific["document"] for formatted_doc in formatted_docs]
187
+
188
+ score, _, _ = self.judge.evaluate_answer_batch(
189
+ questions, predictions, options, golds, chunks=chunks, documents=documents
190
+ )
191
+
192
+ metrics = []
193
+ for i in range(len(sample_ids)):
194
+ metrics.append(
195
+ {
196
+ "accuracy": score[i],
197
+ }
198
+ )
199
+
200
+ return metrics
201
+
202
+
203
+ ZEROSHOT_QA_USER_PROMPT = """Answer the following question:
204
+ <question>
205
+ {question}
206
+ </question>
207
+ Enclose your full answer in <answer> XML tags. For example:
208
+ <answer>
209
+ [your answer here]
210
+ </answer>"""
211
+
212
+
213
+ def yourbench_prompt(line, task_name: str = ""):
214
+ return Doc(
215
+ task_name=task_name,
216
+ query=ZEROSHOT_QA_USER_PROMPT.format(question=line["question"]),
217
+ choices=[line["ground_truth_answer"]],
218
+ gold_index=0,
219
+ specific={
220
+ "question_category": line["question_category"],
221
+ "kind": line["kind"],
222
+ "estimated_difficulty": line["estimated_difficulty"],
223
+ "document_id": line["document_id"],
224
+ "question_generating_model": line["question_generating_model"],
225
+ "chunks": line["chunks"],
226
+ "question": line["question"],
227
+ "document": line["document"],
228
+ },
229
+ )
230
+
231
+
232
+ def create_yourbench_task(hf_dataset_name, subset="lighteval_single_shot_questions"):
233
+ """
234
+ Crée une tâche personnalisée yourbench pour lighteval.
235
+
236
+ Args:
237
+ hf_dataset_name: Nom du dataset sur le Hub HF (format: "org/nom")
238
+ subset: Nom du sous-ensemble à utiliser
239
+
240
+ Returns:
241
+ LightevalTaskConfig: Configuration de la tâche yourbench
242
+ """
243
+ yourbench_metrics = CorpusLevelMetricGrouping(
244
+ metric_name=["accuracy"],
245
+ higher_is_better={"accuracy": True},
246
+ category=MetricCategory.LLM_AS_JUDGE,
247
+ use_case=MetricUseCase.ACCURACY,
248
+ sample_level_fn=JudgeLLMYourBench().compute,
249
+ corpus_level_fn={"accuracy": np.mean},
250
+ )
251
+
252
+ try:
253
+ extend_enum(Metrics, "accuracy", yourbench_metrics)
254
+ except Exception:
255
+ # L'enum a peut-être déjà été ajouté, on ignore l'erreur
256
+ pass
257
+
258
+ return LightevalTaskConfig(
259
+ name="yourbench",
260
+ suite=["custom"],
261
+ prompt_function=yourbench_prompt,
262
+ hf_repo=hf_dataset_name,
263
+ hf_subset=subset,
264
+ hf_avail_splits=["train"],
265
+ evaluation_splits=["train"],
266
+ few_shots_split=None,
267
+ few_shots_select=None,
268
+ generation_size=8192,
269
+ metric=[Metrics.accuracy],
270
+ stop_sequence=[],
271
+ trust_dataset=True,
272
+ version=0,
273
+ )
backend/tests/test_evaluation.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script to test the evaluation task in standalone mode
4
+ """
5
+ import os
6
+ import sys
7
+ import uuid
8
+ import json
9
+ import time
10
+ import argparse
11
+ from dotenv import load_dotenv
12
+ from pathlib import Path
13
+ import traceback
14
+
15
+ # Ensure the environment is properly configured
16
+ load_dotenv()
17
+
18
+ # Add the current directory to the path to import modules
19
+ sys.path.append(os.getcwd())
20
+ from tasks.evaluationTask import EvaluationTask
21
+
22
+
23
+ def setup_environment():
24
+ """
25
+ Configure the environment for testing
26
+ """
27
+ # Check if the HF token is defined
28
+ hf_token = os.getenv("HF_TOKEN")
29
+ if not hf_token:
30
+ print("⚠️ The HF_TOKEN is not defined in the environment or .env file")
31
+ print(" Please define this variable before continuing.")
32
+ sys.exit(1)
33
+
34
+ # Set the default organization if not defined
35
+ if not os.getenv("HF_ORGANIZATION"):
36
+ os.environ["HF_ORGANIZATION"] = "yourbench"
37
+ print("ℹ️ The HF_ORGANIZATION variable is not defined, using 'yourbench' as default")
38
+
39
+
40
+ def run_standalone_evaluation(dataset_name, models=None, max_wait_time=3600):
41
+ """
42
+ Run the evaluation task in standalone mode
43
+
44
+ Args:
45
+ dataset_name: Name of the dataset to evaluate
46
+ models: List of models to evaluate (optional)
47
+ max_wait_time: Maximum waiting time in seconds
48
+ """
49
+ # Generate a unique session ID
50
+ session_uid = str(uuid.uuid4())
51
+ print(f"🔧 Session ID: {session_uid}")
52
+
53
+ # Create the evaluation task instance
54
+ evaluation_task = EvaluationTask(session_uid, dataset_name)
55
+
56
+ # If specific models are provided, use them
57
+ if models:
58
+ evaluation_task.models = models
59
+ print(f"🤖 Using custom models: {models}")
60
+
61
+ # Display dataset information
62
+ organization = os.getenv("HF_ORGANIZATION", "yourbench")
63
+ print(f"📊 Evaluating dataset: {organization}/{dataset_name}")
64
+ print(f"💾 Results saved in: {evaluation_task.output_dir}")
65
+
66
+ # Start the evaluation task
67
+ print("🚀 Starting evaluation...")
68
+ evaluation_task.run()
69
+
70
+ # Wait for the task to complete while displaying logs
71
+ start_time = time.time()
72
+ last_log_count = 0
73
+
74
+ while not evaluation_task.is_task_completed():
75
+ current_logs = evaluation_task.get_logs()
76
+
77
+ # Display only new logs
78
+ if len(current_logs) > last_log_count:
79
+ for log in current_logs[last_log_count:]:
80
+ print(f" {log}")
81
+ last_log_count = len(current_logs)
82
+
83
+ # Check if the maximum time is reached
84
+ elapsed_time = time.time() - start_time
85
+ if elapsed_time > max_wait_time:
86
+ print("⚠️ Maximum waiting time reached, forced stop")
87
+ break
88
+
89
+ time.sleep(1)
90
+
91
+ # Check if results are available
92
+ results_file = Path(f"{evaluation_task.output_dir}/models_comparison.json")
93
+ if results_file.exists():
94
+ try:
95
+ with open(results_file, 'r') as f:
96
+ results = json.load(f)
97
+
98
+ print("\n📈 Evaluation Results:")
99
+ print(f" Dataset: {results['metadata']['dataset']}")
100
+ print(f" Models tested: {results['metadata']['total_models_tested']}")
101
+ print(f" Successful tests: {results['metadata']['successful_tests']}")
102
+ print(f" Timestamp: {results['metadata']['timestamp']}")
103
+
104
+ if results['metadata']['successful_tests'] > 0:
105
+ print("\n📊 Model ranking by accuracy:")
106
+ successful_models = [m for m in results['models_comparison'] if m['success']]
107
+ for i, model in enumerate(successful_models):
108
+ print(f" {i+1}. ✅ {model['model_name']} ({model['provider']})")
109
+ print(f" Accuracy: {model['accuracy']:.4f} ± {model['accuracy_stderr']:.4f}")
110
+ print(f" Evaluation time: {model['evaluation_time']:.2f}s")
111
+
112
+ failed_models = [m for m in results['models_comparison'] if not m['success']]
113
+ if failed_models:
114
+ print("\n❌ Unevaluated models:")
115
+ for i, model in enumerate(failed_models):
116
+ print(f" {i+1}. {model['model_name']} ({model['provider']})")
117
+ error_msg = model.get('error', 'Unknown reason')
118
+ print(f" Reason: {error_msg}")
119
+
120
+ # Check detailed results files
121
+ detailed_file = Path(f"{evaluation_task.output_dir}/detailed_results.json")
122
+ if detailed_file.exists():
123
+ print(f"\n📄 Detailed results available in: {detailed_file}")
124
+
125
+ # Check raw files
126
+ raw_results = list(Path(f"{evaluation_task.output_dir}/results").glob("**/*.json"))
127
+ if raw_results:
128
+ print(f"\n📁 {len(raw_results)} raw result files available in: {evaluation_task.output_dir}/results")
129
+
130
+ print(f"\n✅ Evaluation completed!")
131
+ except Exception as e:
132
+ print(f"❌ Error reading results: {str(e)}")
133
+ print(f" Details: {traceback.format_exc()}")
134
+ else:
135
+ print(f"❌ No evaluation results found in {results_file}")
136
+
137
+
138
+ if __name__ == "__main__":
139
+ # Configure the argument parser
140
+ parser = argparse.ArgumentParser(description="Test the evaluation task in standalone mode")
141
+ parser.add_argument("dataset_name", type=str, help="Name of the dataset to evaluate (without the organization)")
142
+ parser.add_argument("--model", action="append", dest="models",
143
+ help="Model to evaluate in the format 'name/model,provider'. Can be used multiple times.")
144
+ parser.add_argument("--timeout", type=int, default=3600,
145
+ help="Maximum waiting time in seconds (default: 3600)")
146
+
147
+ args = parser.parse_args()
148
+
149
+ # Configure the environment
150
+ setup_environment()
151
+
152
+ # Transform models into tuples if specified
153
+ models_to_evaluate = None
154
+ if args.models:
155
+ models_to_evaluate = []
156
+ for model_spec in args.models:
157
+ try:
158
+ model_name, provider = model_spec.split(",")
159
+ models_to_evaluate.append((model_name, provider))
160
+ except ValueError:
161
+ print(f"⚠️ Invalid model format: {model_spec}. Use 'name/model,provider'")
162
+ sys.exit(1)
163
+
164
+ # Run the evaluation
165
+ run_standalone_evaluation(args.dataset_name, models_to_evaluate, args.timeout)
backend/tests/test_hf_upload.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Script de test pour vérifier l'upload vers Hugging Face Hub
3
+ Ce script crée un dataset simple et tente de l'uploader vers le Hub
4
+ en utilisant le token et l'organisation définis dans les variables d'environnement.
5
+ """
6
+ import os
7
+ import sys
8
+ import datasets
9
+ from huggingface_hub import HfApi, login
10
+ from datasets import Dataset
11
+ from dotenv import load_dotenv
12
+ from pathlib import Path
13
+
14
+ def test_hf_upload():
15
+ # Charger les variables d'environnement depuis le fichier .env
16
+ dotenv_path = Path('.env')
17
+ load_dotenv(dotenv_path=dotenv_path)
18
+
19
+ # Récupérer le token et l'organisation des variables d'environnement
20
+ hf_token = os.getenv("HF_TOKEN")
21
+ org_name = os.getenv("HF_ORGANIZATION", "yourbench")
22
+
23
+ if not hf_token:
24
+ print("Erreur: La variable HF_TOKEN n'est pas définie dans le fichier .env.")
25
+ sys.exit(1)
26
+
27
+ dataset_name = "test_dataset_upload"
28
+ repo_id = f"{org_name}/{dataset_name}"
29
+
30
+ print(f"Tentative d'upload vers {repo_id} avec le token {hf_token[:5]}... (token tronqué pour la sécurité)")
31
+
32
+ try:
33
+ # Se connecter à l'API Hugging Face
34
+ print("Connexion à l'API Hugging Face...")
35
+ login(token=hf_token)
36
+ api = HfApi(token=hf_token)
37
+
38
+ # Créer un dataset simple
39
+ print("Création d'un dataset de test...")
40
+ data = {
41
+ "text": ["Ceci est un test", "Un autre exemple", "Troisième exemple"],
42
+ "label": [1, 0, 1]
43
+ }
44
+ dataset = Dataset.from_dict(data)
45
+
46
+ # Vérifier si le repo existe déjà et le supprimer si nécessaire
47
+ try:
48
+ api.delete_repo(repo_id=repo_id, repo_type="dataset")
49
+ print(f"Repo existant {repo_id} supprimé.")
50
+ except Exception:
51
+ print(f"Le repo {repo_id} n'existait pas encore.")
52
+
53
+ # Uploader le dataset
54
+ print(f"Upload du dataset vers {repo_id}...")
55
+ dataset.push_to_hub(
56
+ repo_id=repo_id,
57
+ token=hf_token,
58
+ private=True,
59
+ commit_message="Test d'upload de dataset"
60
+ )
61
+
62
+ print(f"Succès! Dataset uploadé vers https://huggingface.co/datasets/{repo_id}")
63
+ return True
64
+
65
+ except Exception as e:
66
+ print(f"Erreur lors de l'upload: {str(e)}")
67
+ print("\nTraceback complet:")
68
+ import traceback
69
+ traceback.print_exc()
70
+ return False
71
+
72
+ if __name__ == "__main__":
73
+ print("=== Test d'upload vers Hugging Face Hub ===")
74
+ success = test_hf_upload()
75
+ if success:
76
+ print("\n✅ Le test a réussi! L'upload fonctionne correctement.")
77
+ else:
78
+ print("\n❌ Le test a échoué. Vérifiez les erreurs ci-dessus.")
backend/tests/test_inference.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import signal
3
+ from huggingface_hub import InferenceClient
4
+
5
+ # Configuration - Modèles et leurs providers
6
+ MODELS = [
7
+ ("Qwen/Qwen2.5-72B-Instruct", "sambanova"),
8
+ ("meta-llama/Llama-3.3-70B-Instruct", "sambanova"),
9
+ ("deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "sambanova"),
10
+ ("Qwen/QwQ-32B", "novita"),
11
+ # ("mistralai/Mistral-Small-24B-Instruct-2501", "novita")
12
+ ]
13
+ QUESTION = "What is the capital of France?"
14
+ TIMEOUT = 10 # secondes
15
+
16
+
17
+ class TimeoutException(Exception):
18
+ pass
19
+
20
+
21
+ def timeout_handler(signum, frame):
22
+ raise TimeoutException("Timeout")
23
+
24
+
25
+ def test_model(model, provider):
26
+ client = InferenceClient(provider=provider)
27
+
28
+ # Configure le timeout
29
+ signal.signal(signal.SIGALRM, timeout_handler)
30
+ signal.alarm(TIMEOUT)
31
+
32
+ start_time = time.time()
33
+ try:
34
+ response = client.chat_completion(
35
+ model=model,
36
+ messages=[{"role": "user", "content": QUESTION}]
37
+ )
38
+ result = response.choices[0].message.content
39
+ success = True
40
+ except TimeoutException:
41
+ result = f"TIMEOUT ({TIMEOUT}s)"
42
+ success = False
43
+ except Exception as e:
44
+ result = str(e)
45
+ success = False
46
+ finally:
47
+ # Désactive l'alarme
48
+ signal.alarm(0)
49
+
50
+ execution_time = time.time() - start_time
51
+
52
+ status = "✅" if success else "❌"
53
+ print(f"{status} {model} ({provider}) - Temps: {execution_time:.2f}s")
54
+ if success:
55
+ print(f" Réponse: {result[:80]}..." if len(result) > 80 else f" Réponse: {result}")
56
+ else:
57
+ print(f" Erreur: {result}")
58
+
59
+ return success, execution_time, result
60
+
61
+
62
+ def main():
63
+ print(f"\nTest de {len(MODELS)} modèles avec leurs providers spécifiques")
64
+ print(f"Question: {QUESTION}")
65
+ print(f"Timeout: {TIMEOUT}s\n")
66
+
67
+ results = []
68
+ for model, provider in MODELS:
69
+ success, time_taken, response = test_model(model, provider)
70
+ results.append({
71
+ "model": model,
72
+ "provider": provider,
73
+ "success": success,
74
+ "time": time_taken
75
+ })
76
+
77
+ print("\n=== RÉSUMÉ ===")
78
+ for result in results:
79
+ status = "✅" if result["success"] else "❌"
80
+ print(f"{status} {result['model']} ({result['provider']}): {result['time']:.2f}s")
81
+
82
+
83
+ if __name__ == "__main__":
84
+ main()
backend/tests/test_lighteval.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script minimal pour tester directement lighteval avec la tâche yourbench
4
+ """
5
+ import os
6
+ import sys
7
+ import subprocess
8
+ import json
9
+ import time
10
+ from pathlib import Path
11
+ import logging
12
+
13
+ # Assurez-vous que l'environnement est correctement configuré
14
+ from dotenv import load_dotenv
15
+ load_dotenv()
16
+
17
+ # Importer le module de définition de tâche yourbench
18
+ sys.path.append(os.getcwd())
19
+ from tasks.yourbench_lighteval_task import create_yourbench_task
20
+
21
+ def run_lighteval_test():
22
+ """
23
+ Exécuter un test minimal avec lighteval
24
+ """
25
+ # Parameters
26
+ dataset_name = "yourbench_a"
27
+ organization = "yourbench"
28
+ model_name = "Qwen/Qwen2.5-72B-Instruct"
29
+ provider = "novita"
30
+ output_dir = f"uploaded_files/test_{provider}/lighteval_results"
31
+
32
+ # Créer le répertoire de sortie
33
+ os.makedirs(output_dir, exist_ok=True)
34
+
35
+ # Définir le chemin d'accès complet au dataset
36
+ dataset_path = f"{organization}/{dataset_name}"
37
+ print(f"Dataset à évaluer: {dataset_path}")
38
+
39
+ # Créer un fichier temporaire
40
+ import tempfile
41
+ temp_file_path = tempfile.mktemp(suffix=".py")
42
+ print(f"Création du fichier temporaire: {temp_file_path}")
43
+
44
+ with open(temp_file_path, 'w') as temp_file:
45
+ # Écrire le contenu du fichier
46
+ temp_file.write(f"""
47
+ import os
48
+ import sys
49
+ import logging
50
+ sys.path.append("{os.getcwd()}")
51
+
52
+ from tasks.yourbench_lighteval_task import create_yourbench_task
53
+
54
+ # Configurer le logging
55
+ logging.basicConfig(level=logging.INFO)
56
+
57
+ # Créer la tâche yourbench
58
+ yourbench = create_yourbench_task("{dataset_path}", "lighteval")
59
+
60
+ # Définir la variable TASKS_TABLE dont lighteval a besoin
61
+ TASKS_TABLE = [yourbench]
62
+ """)
63
+
64
+ # Construire la commande lighteval
65
+ cmd = [
66
+ "lighteval",
67
+ "endpoint",
68
+ "inference-providers",
69
+ f"model={model_name},provider={provider}",
70
+ "custom|yourbench|0|0",
71
+ "--custom-tasks",
72
+ temp_file_path,
73
+ "--max-samples", "5", # Seulement 1 échantillon
74
+ "--output-dir", output_dir,
75
+ "--save-details",
76
+ "--no-push-to-hub" # Pas de push pour gagner du temps
77
+ ]
78
+
79
+ # Afficher la commande
80
+ print(f"Exécution de la commande: {' '.join(cmd)}")
81
+ print(f"Heure de début: {time.strftime('%H:%M:%S')}")
82
+
83
+ # Exécuter la commande
84
+ try:
85
+ # Exécuter avec capture des sorties
86
+ result = subprocess.run(cmd, capture_output=True, text=True)
87
+
88
+ # Afficher les résultats
89
+ print(f"Code de retour: {result.returncode}")
90
+ print("--- SORTIE STANDARD ---")
91
+ print(result.stdout)
92
+ print("--- ERREUR STANDARD ---")
93
+ print(result.stderr)
94
+
95
+ # Vérifier si des résultats ont été générés
96
+ results_dir = Path(output_dir) / "results"
97
+ if results_dir.exists():
98
+ print(f"Dossier de résultats créé: {results_dir}")
99
+ # Lister les fichiers de résultats
100
+ result_files = list(results_dir.glob("**/*.json"))
101
+ if result_files:
102
+ print(f"Fichiers de résultats trouvés: {result_files}")
103
+ # Trier les fichiers par date de modification pour prendre le plus récent
104
+ result_files.sort(key=lambda x: x.stat().st_mtime, reverse=True)
105
+ latest_result = result_files[0]
106
+ print(f"Fichier de résultats le plus récent: {latest_result}")
107
+
108
+ # Lire le fichier de résultats
109
+ with open(latest_result, 'r') as f:
110
+ results = json.load(f)
111
+ print("Contenu du fichier de résultats:")
112
+ print(json.dumps(results, indent=2))
113
+
114
+ # Analyse des résultats
115
+ print("\n==== ANALYSE DES RÉSULTATS ====")
116
+ if "results" in results:
117
+ for task_name, task_results in results["results"].items():
118
+ print(f"Tâche: {task_name}")
119
+ for metric_name, metric_value in task_results.items():
120
+ print(f" {metric_name}: {metric_value}")
121
+ else:
122
+ print("Aucun résultat trouvé dans le fichier JSON")
123
+
124
+ # Vérifier les détails
125
+ details_dir = Path(output_dir) / "details"
126
+ if details_dir.exists():
127
+ print(f"\nDossier de détails trouvé: {details_dir}")
128
+ model_details_dirs = list(details_dir.glob("**/*"))
129
+ if model_details_dirs:
130
+ print(f"Dossiers de détails par modèle: {model_details_dirs}")
131
+ else:
132
+ print("Aucun fichier de résultats trouvé.")
133
+ else:
134
+ print(f"Aucun dossier de résultats créé.")
135
+
136
+ except subprocess.CalledProcessError as e:
137
+ print(f"Erreur lors de l'exécution de la commande: {e}")
138
+ except Exception as e:
139
+ print(f"Exception: {e}")
140
+ finally:
141
+ # Supprimer le fichier temporaire
142
+ try:
143
+ os.unlink(temp_file_path)
144
+ print(f"Fichier temporaire supprimé: {temp_file_path}")
145
+ except:
146
+ pass
147
+
148
+ print(f"Heure de fin: {time.strftime('%H:%M:%S')}")
149
+
150
+ if __name__ == "__main__":
151
+ run_lighteval_test()
backend/tests/test_openai.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from openai import OpenAI
3
+ from dotenv import load_dotenv
4
+
5
+ # Load environment variables
6
+ load_dotenv()
7
+
8
+ def test_openai_connection():
9
+ try:
10
+ # Initialize OpenAI client
11
+ client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
12
+
13
+ # Make a simple request
14
+ response = client.chat.completions.create(
15
+ model="gpt-3.5-turbo",
16
+ messages=[
17
+ {"role": "user", "content": "Say 'Hello World'"}
18
+ ]
19
+ )
20
+
21
+ print("✅ OpenAI API connection successful!")
22
+ print(f"Response: {response.choices[0].message.content}")
23
+ return True
24
+
25
+ except Exception as e:
26
+ print("❌ OpenAI API connection failed!")
27
+ print(f"Error: {str(e)}")
28
+ return False
29
+
30
+ if __name__ == "__main__":
31
+ test_openai_connection()
backend/tests/test_parallel_lighteval.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script to run lighteval tests in parallel for multiple models
4
+ """
5
+ import os
6
+ import sys
7
+ import json
8
+ import time
9
+ import tempfile
10
+ import asyncio
11
+ from pathlib import Path
12
+ from typing import Tuple, List, Dict, Any
13
+
14
+ # Ensure environment is properly configured
15
+ from dotenv import load_dotenv
16
+ load_dotenv()
17
+
18
+ # Import yourbench task module
19
+ sys.path.append(os.getcwd())
20
+ from tasks.yourbench_lighteval_task import create_yourbench_task
21
+
22
+ # Define models to test
23
+ INIT_MODELS = [
24
+ # 70B
25
+ ("Qwen/Qwen2.5-72B-Instruct", "novita"),
26
+ ("meta-llama/Llama-3.3-70B-Instruct", "novita"),
27
+ ("deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "novita"),
28
+ # 20 to 30B
29
+ ("Qwen/QwQ-32B", "novita"),
30
+ # ("mistralai/Mistral-Small-24B-Instruct-2501", "sambanova"),
31
+ ]
32
+
33
+ async def run_lighteval_test_for_model(model_info: Tuple[str, str]) -> Dict[str, Any]:
34
+ """
35
+ Run lighteval test for a specific model
36
+ """
37
+ model_name, provider = model_info
38
+
39
+ # Parameters
40
+ dataset_name = "yourbench_a"
41
+ organization = "yourbench"
42
+ output_dir = f"uploaded_files/test_parallel_{provider}/lighteval_results"
43
+
44
+ # Create output directory
45
+ os.makedirs(output_dir, exist_ok=True)
46
+
47
+ # Define full dataset path
48
+ dataset_path = f"{organization}/{dataset_name}"
49
+ print(f"Dataset to evaluate for {model_name}: {dataset_path}")
50
+
51
+ # Create temporary file
52
+ temp_file_path = tempfile.mktemp(suffix=".py")
53
+ print(f"Creating temporary file for {model_name}: {temp_file_path}")
54
+
55
+ with open(temp_file_path, 'w') as temp_file:
56
+ temp_file.write(f"""
57
+ import os
58
+ import sys
59
+ sys.path.append("{os.getcwd()}")
60
+
61
+ from tasks.yourbench_lighteval_task import create_yourbench_task
62
+
63
+ # Create yourbench task
64
+ yourbench = create_yourbench_task("{dataset_path}", "lighteval")
65
+
66
+ # Define TASKS_TABLE needed by lighteval
67
+ TASKS_TABLE = [yourbench]
68
+ """)
69
+
70
+ # Build lighteval command args
71
+ cmd_args = [
72
+ "lighteval",
73
+ "endpoint",
74
+ "inference-providers",
75
+ f"model={model_name},provider={provider}",
76
+ "custom|yourbench|0|0",
77
+ "--custom-tasks",
78
+ temp_file_path,
79
+ "--max-samples", "5",
80
+ "--output-dir", output_dir,
81
+ "--save-details",
82
+ "--no-push-to-hub"
83
+ ]
84
+
85
+ print(f"Running command for {model_name}: {' '.join(cmd_args)}")
86
+ print(f"Start time for {model_name}: {time.strftime('%H:%M:%S')}")
87
+
88
+ results = {
89
+ "model_name": model_name,
90
+ "provider": provider,
91
+ "success": False,
92
+ "error": None,
93
+ "results": None,
94
+ "return_code": None
95
+ }
96
+
97
+ try:
98
+ # Prepare environment with needed tokens
99
+ env = os.environ.copy()
100
+ hf_token = os.getenv("HF_TOKEN")
101
+ if hf_token:
102
+ env["HF_TOKEN"] = hf_token
103
+ env["HUGGING_FACE_HUB_TOKEN"] = hf_token
104
+ env["HF_ORGANIZATION"] = organization
105
+
106
+ # Run the process asynchronously
107
+ process = await asyncio.create_subprocess_exec(
108
+ *cmd_args,
109
+ stdout=asyncio.subprocess.PIPE,
110
+ stderr=asyncio.subprocess.PIPE,
111
+ env=env
112
+ )
113
+
114
+ # Wait for the process to complete
115
+ stdout, stderr = await process.communicate()
116
+
117
+ # Store return code
118
+ exit_code = process.returncode
119
+ results["return_code"] = exit_code
120
+
121
+ # Log some output for debugging
122
+ if stdout:
123
+ stdout_lines = stdout.decode().strip().split('\n')
124
+ if stdout_lines and len(stdout_lines) > 0:
125
+ print(f"Output from {model_name}: {stdout_lines[0]}")
126
+
127
+ # Check if results were generated
128
+ results_dir = Path(output_dir) / "results"
129
+ if results_dir.exists():
130
+ result_files = list(results_dir.glob("**/*.json"))
131
+ if result_files:
132
+ # Read the first results file
133
+ with open(result_files[0], 'r') as f:
134
+ test_results = json.load(f)
135
+ results["results"] = test_results
136
+ results["success"] = True
137
+
138
+ except asyncio.CancelledError:
139
+ results["error"] = "Task cancelled"
140
+ print(f"Task cancelled for {model_name}")
141
+ except Exception as e:
142
+ results["error"] = f"Exception: {str(e)}"
143
+ print(f"Error running test for {model_name}: {str(e)}")
144
+ finally:
145
+ # Delete temporary file
146
+ try:
147
+ os.unlink(temp_file_path)
148
+ except:
149
+ pass
150
+
151
+ print(f"End time for {model_name}: {time.strftime('%H:%M:%S')}")
152
+ return results
153
+
154
+ async def run_parallel_tests(models: List[Tuple[str, str]]) -> List[Dict[str, Any]]:
155
+ """
156
+ Run tests in parallel for multiple models using asyncio
157
+ """
158
+ print(f"Starting parallel tests for {len(models)} models")
159
+
160
+ # Create tasks for each model
161
+ tasks = [run_lighteval_test_for_model(model) for model in models]
162
+
163
+ # Run all tasks concurrently and gather results
164
+ model_results = await asyncio.gather(*tasks, return_exceptions=True)
165
+
166
+ # Process results
167
+ results = []
168
+ for i, result in enumerate(model_results):
169
+ if isinstance(result, Exception):
170
+ # Handle exception
171
+ model_name, provider = models[i]
172
+ print(f"Test failed for {model_name}: {str(result)}")
173
+ results.append({
174
+ "model_name": model_name,
175
+ "provider": provider,
176
+ "success": False,
177
+ "error": str(result),
178
+ "results": None,
179
+ "return_code": None
180
+ })
181
+ else:
182
+ # Valid result
183
+ results.append(result)
184
+ print(f"Test completed for {result['model_name']}")
185
+
186
+ return results
187
+
188
+ def format_comparison_results(results: List[Dict[str, Any]]) -> Dict[str, Any]:
189
+ """
190
+ Format results for easy comparison between models
191
+ """
192
+ comparison = {
193
+ "metadata": {
194
+ "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
195
+ "total_models_tested": len(results),
196
+ "successful_tests": len([r for r in results if r["success"]])
197
+ },
198
+ "models_comparison": []
199
+ }
200
+
201
+ # Sort models by accuracy (if available) or name
202
+ sorted_results = sorted(
203
+ results,
204
+ key=lambda x: (
205
+ x["results"]["results"]["all"]["accuracy"] if x["success"] and x["results"] else -1,
206
+ x["model_name"]
207
+ ),
208
+ reverse=True
209
+ )
210
+
211
+ for result in sorted_results:
212
+ model_result = {
213
+ "model_name": result["model_name"],
214
+ "provider": result["provider"],
215
+ "success": result["success"]
216
+ }
217
+
218
+ if result["success"] and result["results"]:
219
+ model_result.update({
220
+ "accuracy": result["results"]["results"]["all"]["accuracy"],
221
+ "accuracy_stderr": result["results"]["results"]["all"]["accuracy_stderr"],
222
+ "evaluation_time": float(result["results"]["config_general"]["total_evaluation_time_secondes"])
223
+ })
224
+ else:
225
+ model_result["error"] = result["error"]
226
+
227
+ comparison["models_comparison"].append(model_result)
228
+
229
+ return comparison
230
+
231
+ async def main_async():
232
+ """
233
+ Async main function to run parallel tests
234
+ """
235
+ print("Starting parallel lighteval tests")
236
+ start_time = time.time()
237
+
238
+ # Run tests in parallel
239
+ results = await run_parallel_tests(INIT_MODELS)
240
+
241
+ # Save detailed results
242
+ detailed_output_file = "parallel_test_detailed_results.json"
243
+ with open(detailed_output_file, 'w') as f:
244
+ json.dump(results, f, indent=2)
245
+
246
+ # Generate and save comparison results
247
+ comparison = format_comparison_results(results)
248
+ comparison_file = "models_comparison.json"
249
+ with open(comparison_file, 'w') as f:
250
+ json.dump(comparison, f, indent=2)
251
+
252
+ # Print summary
253
+ print("\nTest Summary:")
254
+ for model in comparison["models_comparison"]:
255
+ status = "✅" if model["success"] else "❌"
256
+ print(f"{status} {model['model_name']} ({model['provider']})")
257
+ if not model["success"]:
258
+ print(f" Error: {model['error']}")
259
+ else:
260
+ print(f" Accuracy: {model['accuracy']:.2%} (±{model['accuracy_stderr']:.2%})")
261
+ print(f" Evaluation time: {model['evaluation_time']:.2f}s")
262
+
263
+ duration = time.time() - start_time
264
+ print(f"\nTotal execution time: {duration:.2f} seconds")
265
+ print(f"Detailed results saved to: {detailed_output_file}")
266
+ print(f"Comparison results saved to: {comparison_file}")
267
+
268
+ def main():
269
+ """
270
+ Main function to run parallel tests
271
+ """
272
+ # Create event loop and run the async main
273
+ loop = asyncio.get_event_loop()
274
+ loop.run_until_complete(main_async())
275
+ loop.close()
276
+
277
+ if __name__ == "__main__":
278
+ main()
backend/tests/test_provider_parallel_support.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script pour tester si un fournisseur d'API supporte réellement les requêtes parallèles
4
+ """
5
+ import os
6
+ import sys
7
+ import time
8
+ import asyncio
9
+ import json
10
+ from pathlib import Path
11
+ from datetime import datetime
12
+
13
+ # Ensure environment is properly configured
14
+ from dotenv import load_dotenv
15
+ load_dotenv()
16
+
17
+ # Définir le modèle et le fournisseur à tester
18
+ MODEL_NAME = "Qwen/QwQ-32B"
19
+ PROVIDER = "novita"
20
+ REQUEST_COUNT = 5 # Nombre de requêtes
21
+
22
+ # Liste de questions
23
+ PROMPTS = [
24
+ "Explain in detail how parallel computing has transformed modern data processing.",
25
+ "Describe the fundamental differences between CPU and GPU architectures.",
26
+ "Analyze the key challenges in distributed systems design.",
27
+ "Discuss the evolution of natural language processing from rule-based systems to modern transformer architectures.",
28
+ "Explain the concept of quantum computing and how it differs from classical computing paradigms."
29
+ ]
30
+
31
+ async def send_request(prompt, request_id=None, show_logs=True):
32
+ """Envoie une requête au modèle et mesure le temps d'exécution"""
33
+ if show_logs and request_id is not None:
34
+ print(f"Démarrage requête {request_id} à {datetime.now().strftime('%H:%M:%S.%f')[:-3]}")
35
+
36
+ start_time = time.time()
37
+
38
+ cmd_args = [
39
+ "curl", "-s",
40
+ "-X", "POST",
41
+ f"https://api-inference.huggingface.co/models/{MODEL_NAME}",
42
+ "-H", f"Authorization: Bearer {os.environ.get('HF_TOKEN')}",
43
+ "-H", "Content-Type: application/json",
44
+ "-d", json.dumps({
45
+ "inputs": prompt,
46
+ "parameters": {
47
+ "provider": PROVIDER,
48
+ "max_new_tokens": 20
49
+ }
50
+ })
51
+ ]
52
+
53
+ process = await asyncio.create_subprocess_exec(
54
+ *cmd_args,
55
+ stdout=asyncio.subprocess.PIPE,
56
+ stderr=asyncio.subprocess.PIPE
57
+ )
58
+
59
+ stdout, stderr = await process.communicate()
60
+
61
+ end_time = time.time()
62
+ duration = end_time - start_time
63
+
64
+ response = stdout.decode("utf-8")
65
+ stderr_output = stderr.decode("utf-8")
66
+
67
+ # Déterminer le succès
68
+ is_success = False
69
+ try:
70
+ response_json = json.loads(response)
71
+ is_success = process.returncode == 0 and isinstance(response_json, list) and "generated_text" in response_json[0]
72
+ except json.JSONDecodeError:
73
+ is_success = process.returncode == 0 and not ("error" in response.lower())
74
+ except Exception:
75
+ is_success = process.returncode == 0
76
+
77
+ # Extraire message d'erreur si échec
78
+ error_message = None
79
+ if not is_success:
80
+ try:
81
+ if "error" in response.lower():
82
+ try:
83
+ response_json = json.loads(response)
84
+ if "error" in response_json:
85
+ error_message = response_json["error"]
86
+ except:
87
+ error_message = f"Erreur non-JSON: {response}"
88
+ elif stderr_output:
89
+ error_message = stderr_output
90
+ else:
91
+ error_message = f"Réponse: {response}"
92
+ except:
93
+ error_message = f"Erreur inconnue. Code: {process.returncode}"
94
+
95
+ if show_logs and request_id is not None:
96
+ print(f"Fin requête {request_id} à {datetime.now().strftime('%H:%M:%S.%f')[:-3]} (durée: {duration:.2f}s)")
97
+ if not is_success:
98
+ print(f"ERREUR requête {request_id}: {error_message[:100]}..." if error_message and len(error_message) > 100 else error_message)
99
+
100
+ return {
101
+ "request_id": request_id,
102
+ "prompt": prompt,
103
+ "start_time": start_time,
104
+ "end_time": end_time,
105
+ "duration": duration,
106
+ "success": is_success,
107
+ "response": response,
108
+ "error_message": error_message
109
+ }
110
+
111
+ async def run_parallel_requests(prompts):
112
+ """Exécute les requêtes en parallèle"""
113
+ print(f"\n=== Test parallèle: {len(prompts)} requêtes pour {MODEL_NAME} ({PROVIDER}) ===")
114
+ print(f"Heure de début: {datetime.now().strftime('%H:%M:%S')}")
115
+
116
+ # Synchroniser le démarrage des requêtes
117
+ start_event = asyncio.Event()
118
+
119
+ async def synchronized_request(prompt, req_id):
120
+ await start_event.wait()
121
+ return await send_request(prompt, req_id)
122
+
123
+ # Créer toutes les tâches
124
+ tasks = [asyncio.create_task(synchronized_request(prompts[i], i)) for i in range(len(prompts))]
125
+
126
+ # Attendre que toutes les tâches soient prêtes
127
+ await asyncio.sleep(1)
128
+
129
+ # Lancer toutes les requêtes en même temps
130
+ parallel_start_time = time.time()
131
+ print(f"Démarrage synchronisé à {datetime.now().strftime('%H:%M:%S.%f')[:-3]}")
132
+ start_event.set()
133
+
134
+ # Attendre que toutes les tâches se terminent
135
+ results = await asyncio.gather(*tasks)
136
+ parallel_end_time = time.time()
137
+ parallel_duration = parallel_end_time - parallel_start_time
138
+
139
+ print(f"Test parallèle terminé en {parallel_duration:.2f}s\n")
140
+ return results, parallel_duration
141
+
142
+ async def run_sequential_requests(prompts):
143
+ """Exécute les mêmes requêtes séquentiellement"""
144
+ print(f"\n=== Test séquentiel: {len(prompts)} requêtes pour {MODEL_NAME} ({PROVIDER}) ===")
145
+ print(f"Heure de début: {datetime.now().strftime('%H:%M:%S')}")
146
+
147
+ sequential_start_time = time.time()
148
+ results = []
149
+
150
+ for i, prompt in enumerate(prompts):
151
+ print(f"Requête séquentielle {i}...")
152
+ result = await send_request(prompt, i)
153
+ results.append(result)
154
+
155
+ sequential_end_time = time.time()
156
+ sequential_duration = sequential_end_time - sequential_start_time
157
+
158
+ print(f"Test séquentiel terminé en {sequential_duration:.2f}s\n")
159
+ return results, sequential_duration
160
+
161
+ async def run_tests():
162
+ """Exécute les tests parallèles puis séquentiels et compare les résultats"""
163
+ global_start = time.time()
164
+ prompts = PROMPTS[:REQUEST_COUNT] # Utiliser le nombre de prompts spécifié
165
+
166
+ # 1. Test parallèle
167
+ parallel_results, parallel_duration = await run_parallel_requests(prompts)
168
+
169
+ # 2. Test séquentiel
170
+ sequential_results, sequential_duration = await run_sequential_requests(prompts)
171
+
172
+ # 3. Analyser les résultats
173
+ global_end = time.time()
174
+ total_duration = global_end - global_start
175
+
176
+ # Calculer les métriques
177
+ parallel_success = sum(1 for r in parallel_results if r["success"])
178
+ sequential_success = sum(1 for r in sequential_results if r["success"])
179
+
180
+ # Calculer le facteur de parallélisme réel (temps séquentiel / temps parallèle)
181
+ if parallel_duration > 0:
182
+ parallelism_factor = sequential_duration / parallel_duration
183
+ else:
184
+ parallelism_factor = 0
185
+
186
+ # Pourcentage d'amélioration
187
+ improvement_percent = (1 - (parallel_duration / sequential_duration)) * 100 if sequential_duration > 0 else 0
188
+
189
+ # Afficher le résumé
190
+ print("\n====== RÉSUMÉ DES TESTS ======")
191
+ print(f"Modèle: {MODEL_NAME}, Provider: {PROVIDER}, Requêtes: {len(prompts)}")
192
+ print(f"\nDurée test parallèle: {parallel_duration:.2f}s ({parallel_success}/{len(prompts)} réussies)")
193
+ print(f"Durée test séquentiel: {sequential_duration:.2f}s ({sequential_success}/{len(prompts)} réussies)")
194
+ print(f"Facteur de parallélisme: {parallelism_factor:.2f}x")
195
+ print(f"Amélioration: {improvement_percent:.1f}%")
196
+
197
+ if parallelism_factor >= len(prompts) * 0.8:
198
+ conclusion = "EXCELLENT parallélisme (proche du théorique maximum)"
199
+ elif parallelism_factor >= 2:
200
+ conclusion = "BON parallélisme (significativement meilleur que séquentiel)"
201
+ elif parallelism_factor >= 1.3:
202
+ conclusion = "MOYEN parallélisme (légèrement meilleur que séquentiel)"
203
+ else:
204
+ conclusion = "FAIBLE ou PAS DE parallélisme (pas d'avantage significatif)"
205
+
206
+ print(f"\nConclusion: {conclusion}")
207
+
208
+ # Enregistrer les résultats
209
+ output_file = f"parallel_test_{PROVIDER}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
210
+ with open(output_file, 'w') as f:
211
+ json.dump({
212
+ "model": MODEL_NAME,
213
+ "provider": PROVIDER,
214
+ "request_count": len(prompts),
215
+ "parallel_duration": parallel_duration,
216
+ "sequential_duration": sequential_duration,
217
+ "parallelism_factor": parallelism_factor,
218
+ "improvement_percent": improvement_percent,
219
+ "conclusion": conclusion,
220
+ "parallel_results": parallel_results,
221
+ "sequential_results": sequential_results
222
+ }, f, indent=2)
223
+
224
+ print(f"\nRésultats détaillés sauvegardés dans {output_file}")
225
+
226
+ if __name__ == "__main__":
227
+ asyncio.run(run_tests())
backend/tests/test_yourbench_results.py ADDED
@@ -0,0 +1,394 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script pour tester les résultats de Yourbench et vérifier les datasets sur le Hub Hugging Face.
4
+ """
5
+
6
+ import os
7
+ import sys
8
+ import json
9
+ import argparse
10
+ import requests
11
+ import tempfile
12
+ from datetime import datetime
13
+ from typing import Dict, List, Any, Optional, Tuple
14
+
15
+ # Vérifier si les bibliothèques nécessaires sont installées
16
+ try:
17
+ from dotenv import load_dotenv
18
+ from huggingface_hub import HfApi, DatasetInfo, ModelInfo
19
+ from loguru import logger
20
+ import pandas as pd
21
+ except ImportError:
22
+ print("Installation des dépendances...")
23
+ import subprocess
24
+ subprocess.run(["pip", "install", "python-dotenv", "huggingface_hub", "loguru", "pandas", "pyarrow"], check=True)
25
+ from dotenv import load_dotenv
26
+ from huggingface_hub import HfApi, DatasetInfo, ModelInfo
27
+ from loguru import logger
28
+ import pandas as pd
29
+
30
+ # Charger les variables d'environnement depuis .env
31
+ load_dotenv()
32
+
33
+ # Configuration de la journalisation
34
+ logger.remove()
35
+ logger.add(sys.stderr, format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <level>{message}</level>")
36
+ logger.add("yourbench_tests.log", rotation="10 MB", retention="1 week")
37
+
38
+ def configure_argument_parser() -> argparse.ArgumentParser:
39
+ """Configure le parser d'arguments."""
40
+ parser = argparse.ArgumentParser(description="Tester les résultats de Yourbench et vérifier les datasets")
41
+ parser.add_argument("--dataset", type=str, help="Nom du dataset à vérifier (sans le nom de l'organisation)")
42
+ parser.add_argument("--org", type=str, default=os.environ.get("HF_ORGANIZATION", "yourbench"),
43
+ help="Organisation Hugging Face (défaut: valeur de HF_ORGANIZATION dans .env ou 'yourbench')")
44
+ parser.add_argument("--verbose", "-v", action="store_true", help="Afficher des informations détaillées")
45
+ return parser
46
+
47
+ class YourbenchTester:
48
+ """Classe pour tester les résultats et datasets de Yourbench."""
49
+
50
+ def __init__(self, organization: str, verbose: bool = False):
51
+ """Initialise le testeur Yourbench.
52
+
53
+ Args:
54
+ organization: Nom de l'organisation sur Hugging Face
55
+ verbose: Afficher des informations détaillées
56
+ """
57
+ self.organization = organization
58
+ self.verbose = verbose
59
+ self.hf_token = os.environ.get("HF_TOKEN")
60
+
61
+ if not self.hf_token:
62
+ logger.error("Variable d'environnement HF_TOKEN non trouvée dans le fichier .env")
63
+ sys.exit(1)
64
+
65
+ self.api = HfApi(token=self.hf_token)
66
+ logger.info(f"Initialisation du testeur pour l'organisation: {organization}")
67
+
68
+ def test_dataset_exists(self, dataset_name: str) -> Optional[DatasetInfo]:
69
+ """Vérifie si un dataset existe sur le Hub.
70
+
71
+ Args:
72
+ dataset_name: Nom du dataset à vérifier
73
+
74
+ Returns:
75
+ Informations sur le dataset s'il existe, None sinon
76
+ """
77
+ full_dataset_name = f"{self.organization}/{dataset_name}"
78
+ logger.info(f"Vérification de l'existence du dataset: {full_dataset_name}")
79
+
80
+ try:
81
+ dataset_info = self.api.dataset_info(full_dataset_name)
82
+ logger.success(f"Dataset {full_dataset_name} trouvé!")
83
+
84
+ if self.verbose:
85
+ logger.info(f"ID: {dataset_info.id}")
86
+ logger.info(f"Dernière modification: {dataset_info.lastModified}")
87
+ logger.info(f"SHA: {dataset_info.sha}")
88
+
89
+ return dataset_info
90
+
91
+ except Exception as e:
92
+ logger.error(f"Impossible de trouver le dataset {full_dataset_name}: {str(e)}")
93
+ return None
94
+
95
+ def analyze_dataset_content(self, dataset_name: str) -> Tuple[bool, Dict[str, Any]]:
96
+ """Analyse le contenu d'un dataset.
97
+
98
+ Args:
99
+ dataset_name: Nom du dataset à analyser
100
+
101
+ Returns:
102
+ Tuple contenant un booléen indiquant si l'analyse a réussi et un dictionnaire de statistiques
103
+ """
104
+ full_dataset_name = f"{self.organization}/{dataset_name}"
105
+ logger.info(f"Analyse du contenu du dataset: {full_dataset_name}")
106
+
107
+ stats = {
108
+ "fichiers": 0,
109
+ "taille_totale": 0,
110
+ "fichiers_json": 0,
111
+ "fichiers_parquet": 0,
112
+ "a_questions": False,
113
+ "nb_questions": 0,
114
+ "structure_parquet": {},
115
+ "types_documents": set()
116
+ }
117
+
118
+ try:
119
+ # Lister les fichiers dans le dataset
120
+ files = self.api.list_repo_files(full_dataset_name, repo_type="dataset")
121
+ stats["fichiers"] = len(files)
122
+
123
+ if self.verbose:
124
+ logger.info(f"Fichiers trouvés dans le dataset: {len(files)}")
125
+ for file in files[:10]: # Limiter à 10 fichiers pour éviter un affichage trop verbeux
126
+ logger.info(f" - {file}")
127
+ if len(files) > 10:
128
+ logger.info(f" ... et {len(files) - 10} fichiers supplémentaires")
129
+
130
+ # Vérifier la présence de fichiers questions
131
+ question_files = [f for f in files if "question" in f.lower() and f.endswith(".json")]
132
+ stats["fichiers_json"] = len([f for f in files if f.endswith(".json")])
133
+
134
+ # Vérifier les fichiers Parquet qui sont utilisés par Yourbench
135
+ parquet_files = [f for f in files if f.endswith(".parquet")]
136
+ stats["fichiers_parquet"] = len(parquet_files)
137
+
138
+ if parquet_files:
139
+ logger.info(f"Fichiers Parquet trouvés: {len(parquet_files)}")
140
+
141
+ # Analyser un échantillon de fichiers Parquet
142
+ for parquet_file in parquet_files[:3]: # Limiter à 3 fichiers pour l'analyse
143
+ category = parquet_file.split('/')[0] if '/' in parquet_file else "unknown"
144
+
145
+ logger.info(f"Analyse du fichier Parquet: {parquet_file} (catégorie: {category})")
146
+
147
+ try:
148
+ # Télécharger le fichier Parquet
149
+ temp_file = self.api.hf_hub_download(
150
+ repo_id=full_dataset_name,
151
+ filename=parquet_file,
152
+ repo_type="dataset"
153
+ )
154
+
155
+ # Lire le fichier Parquet avec pandas
156
+ df = pd.read_parquet(temp_file)
157
+
158
+ # Ajouter des statistiques
159
+ stats["structure_parquet"][category] = {
160
+ "colonnes": list(df.columns),
161
+ "nb_lignes": len(df),
162
+ "exemple": df.iloc[0].to_dict() if len(df) > 0 else {}
163
+ }
164
+
165
+ # Vérifier si ce fichier contient des questions
166
+ if any(col for col in df.columns if "question" in col.lower()):
167
+ stats["a_questions"] = True
168
+ question_col = next(col for col in df.columns if "question" in col.lower())
169
+ stats["nb_questions"] = len(df)
170
+
171
+ # Récupérer un exemple de question
172
+ if len(df) > 0 and question_col in df.columns:
173
+ logger.info(f"Exemple de question: {df[question_col].iloc[0][:100]}...")
174
+
175
+ # Identifier les types de documents si disponible
176
+ if "doc_type" in df.columns and len(df) > 0:
177
+ doc_types = df["doc_type"].unique()
178
+ stats["types_documents"].update(doc_types)
179
+
180
+ except Exception as e:
181
+ logger.warning(f"Erreur lors de l'analyse du fichier {parquet_file}: {str(e)}")
182
+
183
+ # Convertir le set en liste pour la sérialisation JSON
184
+ stats["types_documents"] = list(stats["types_documents"])
185
+
186
+ if question_files:
187
+ stats["a_questions"] = True
188
+
189
+ # Analyser un fichier de questions pour comprendre sa structure
190
+ sample_file = question_files[0]
191
+ content = self.api.hf_hub_download(
192
+ repo_id=full_dataset_name,
193
+ filename=sample_file,
194
+ repo_type="dataset"
195
+ )
196
+
197
+ with open(content, 'r') as f:
198
+ data = json.load(f)
199
+
200
+ if isinstance(data, list):
201
+ stats["nb_questions"] = len(data)
202
+ elif isinstance(data, dict) and "questions" in data:
203
+ stats["nb_questions"] = len(data["questions"])
204
+
205
+ logger.success(f"Fichiers de questions trouvés: {len(question_files)}")
206
+ logger.info(f"Exemple de fichier analysé: {sample_file}")
207
+ logger.info(f"Nombre de questions trouvées: {stats['nb_questions']}")
208
+
209
+ return True, stats
210
+
211
+ except Exception as e:
212
+ logger.error(f"Erreur lors de l'analyse du dataset {full_dataset_name}: {str(e)}")
213
+ return False, stats
214
+
215
+ def check_evaluation_results(self, dataset_name: str) -> bool:
216
+ """Vérifie s'il existe des résultats d'évaluation pour ce dataset.
217
+
218
+ Args:
219
+ dataset_name: Nom du dataset à vérifier
220
+
221
+ Returns:
222
+ True si des résultats d'évaluation existent, False sinon
223
+ """
224
+ logger.info(f"Recherche de résultats d'évaluation pour le dataset: {dataset_name}")
225
+
226
+ try:
227
+ # Lister tous les datasets de l'organisation
228
+ datasets = self.api.list_datasets(author=self.organization)
229
+
230
+ # Chercher les datasets d'évaluation
231
+ eval_datasets = [ds for ds in datasets if ds.id.startswith(f"{self.organization}/evaluation-")]
232
+
233
+ if self.verbose:
234
+ logger.info(f"Datasets d'évaluation trouvés: {len(eval_datasets)}")
235
+ for ds in eval_datasets[:5]:
236
+ logger.info(f" - {ds.id}")
237
+
238
+ # Vérifier si le dataset spécifié est mentionné dans les évaluations
239
+ for eval_ds in eval_datasets:
240
+ try:
241
+ # Télécharger le README pour voir si le dataset est mentionné
242
+ readme_path = self.api.hf_hub_download(
243
+ repo_id=eval_ds.id,
244
+ filename="README.md",
245
+ repo_type="dataset"
246
+ )
247
+
248
+ with open(readme_path, 'r') as f:
249
+ readme_content = f.read()
250
+
251
+ if dataset_name in readme_content:
252
+ logger.success(f"Résultats d'évaluation trouvés dans: {eval_ds.id}")
253
+ return True
254
+ except:
255
+ continue
256
+
257
+ logger.warning(f"Aucun résultat d'évaluation trouvé pour le dataset: {dataset_name}")
258
+ return False
259
+
260
+ except Exception as e:
261
+ logger.error(f"Erreur lors de la recherche de résultats d'évaluation: {str(e)}")
262
+ return False
263
+
264
+ def check_model_performances(self, dataset_name: str) -> Dict[str, float]:
265
+ """Vérifie les performances des modèles sur le dataset spécifié.
266
+
267
+ Args:
268
+ dataset_name: Nom du dataset à vérifier
269
+
270
+ Returns:
271
+ Dictionnaire des performances des modèles (model_name -> score)
272
+ """
273
+ logger.info(f"Vérification des performances des modèles sur le dataset: {dataset_name}")
274
+ performances = {}
275
+
276
+ try:
277
+ # Cette partie est spéculative car nous ne connaissons pas la structure exacte
278
+ # des résultats. Une approche possible serait de chercher des fichiers JSON
279
+ # contenant des métriques dans les datasets d'évaluation.
280
+
281
+ # Chercher les datasets d'évaluation
282
+ datasets = self.api.list_datasets(author=self.organization)
283
+ eval_datasets = [ds for ds in datasets if ds.id.startswith(f"{self.organization}/evaluation-")]
284
+
285
+ for eval_ds in eval_datasets:
286
+ try:
287
+ files = self.api.list_repo_files(eval_ds.id, repo_type="dataset")
288
+ result_files = [f for f in files if "result" in f.lower() and f.endswith(".json")]
289
+
290
+ for result_file in result_files:
291
+ file_path = self.api.hf_hub_download(
292
+ repo_id=eval_ds.id,
293
+ filename=result_file,
294
+ repo_type="dataset"
295
+ )
296
+
297
+ with open(file_path, 'r') as f:
298
+ results = json.load(f)
299
+
300
+ # Analyse basique des résultats (à adapter selon la structure réelle)
301
+ if "model_name" in results and "metrics" in results:
302
+ model_name = results["model_name"]
303
+ metrics = results["metrics"]
304
+
305
+ # Prendre la première métrique trouvée comme score
306
+ if metrics and isinstance(metrics, dict):
307
+ first_metric = list(metrics.keys())[0]
308
+ performances[model_name] = metrics[first_metric]
309
+ except:
310
+ continue
311
+
312
+ if performances:
313
+ logger.success(f"Performances trouvées pour {len(performances)} modèles")
314
+ for model, score in performances.items():
315
+ logger.info(f" - {model}: {score}")
316
+ else:
317
+ logger.warning("Aucune performance de modèle trouvée")
318
+
319
+ return performances
320
+
321
+ except Exception as e:
322
+ logger.error(f"Erreur lors de la vérification des performances: {str(e)}")
323
+ return {}
324
+
325
+ def main():
326
+ """Fonction principale."""
327
+ parser = configure_argument_parser()
328
+ args = parser.parse_args()
329
+
330
+ if not args.dataset:
331
+ logger.error("Veuillez spécifier un dataset avec --dataset")
332
+ parser.print_help()
333
+ return
334
+
335
+ # Créer le testeur
336
+ tester = YourbenchTester(args.org, args.verbose)
337
+
338
+ # 1. Vérifier l'existence du dataset
339
+ dataset_info = tester.test_dataset_exists(args.dataset)
340
+
341
+ if not dataset_info:
342
+ logger.error(f"Le dataset {args.org}/{args.dataset} n'existe pas ou n'est pas accessible")
343
+ return
344
+
345
+ # 2. Analyser le contenu du dataset
346
+ success, stats = tester.analyze_dataset_content(args.dataset)
347
+
348
+ if success:
349
+ logger.info("\n=== Statistiques du dataset ===")
350
+ logger.info(f"Nombre de fichiers: {stats['fichiers']}")
351
+ logger.info(f"Fichiers JSON: {stats['fichiers_json']}")
352
+ logger.info(f"Fichiers Parquet: {stats['fichiers_parquet']}")
353
+ logger.info(f"Contient des questions: {'Oui' if stats['a_questions'] else 'Non'}")
354
+
355
+ if stats['a_questions']:
356
+ logger.info(f"Nombre de questions: {stats['nb_questions']}")
357
+
358
+ if 'types_documents' in stats and stats['types_documents']:
359
+ logger.info(f"Types de documents: {', '.join(stats['types_documents'])}")
360
+
361
+ # Afficher la structure des fichiers Parquet
362
+ if 'structure_parquet' in stats and stats['structure_parquet']:
363
+ logger.info("\n=== Structure des fichiers Parquet ===")
364
+ for category, info in stats['structure_parquet'].items():
365
+ logger.info(f"\nCatégorie: {category}")
366
+ logger.info(f"Nombre de lignes: {info['nb_lignes']}")
367
+ logger.info(f"Colonnes: {', '.join(info['colonnes'])}")
368
+
369
+ if args.verbose and 'exemple' in info and info['exemple']:
370
+ logger.info("\nExemple de ligne:")
371
+ for key, value in info['exemple'].items():
372
+ # Tronquer les valeurs trop longues
373
+ if isinstance(value, str) and len(value) > 100:
374
+ value = value[:100] + "..."
375
+ logger.info(f" {key}: {value}")
376
+
377
+ # 3. Vérifier s'il existe des résultats d'évaluation
378
+ has_evaluations = tester.check_evaluation_results(args.dataset)
379
+
380
+ if has_evaluations:
381
+ # 4. Vérifier les performances des modèles
382
+ performances = tester.check_model_performances(args.dataset)
383
+
384
+ if performances:
385
+ logger.info("\n=== Classement des modèles ===")
386
+ # Trier les modèles par score (du plus élevé au plus bas)
387
+ sorted_models = sorted(performances.items(), key=lambda x: x[1], reverse=True)
388
+ for i, (model, score) in enumerate(sorted_models, 1):
389
+ logger.info(f"{i}. {model}: {score:.4f}")
390
+
391
+ logger.success("Test terminé !")
392
+
393
+ if __name__ == "__main__":
394
+ main()
docker-compose.yml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ backend:
3
+ build:
4
+ context: ./backend
5
+ dockerfile: Dockerfile.dev
6
+ args:
7
+ - HF_TOKEN=${HF_TOKEN}
8
+ ports:
9
+ - "${BACKEND_PORT:-8000}:8000"
10
+ volumes:
11
+ - ./backend:/app
12
+ environment:
13
+ - ENVIRONMENT=${ENVIRONMENT:-development}
14
+ - HF_TOKEN=${HF_TOKEN}
15
+ - HF_HOME=${HF_HOME:-/.cache}
16
+ command: uvicorn app.asgi:app --host 0.0.0.0 --port 8000 --reload
17
+
18
+ frontend:
19
+ build:
20
+ context: ./frontend
21
+ dockerfile: Dockerfile.dev
22
+ ports:
23
+ - "${FRONTEND_PORT:-7860}:7860"
24
+ volumes:
25
+ - ./frontend:/app
26
+ - /app/node_modules
27
+ environment:
28
+ - NODE_ENV=${ENVIRONMENT:-development}
29
+ - CHOKIDAR_USEPOLLING=true
30
+ - PORT=${FRONTEND_PORT:-7860}
31
+ command: npm start
32
+ stdin_open: true
33
+ tty: true
frontend/Dockerfile.dev ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM node:18
2
+
3
+ WORKDIR /app
4
+
5
+ # Install required global dependencies
6
+ RUN npm install -g react-scripts
7
+
8
+ # Copy package.json and package-lock.json
9
+ COPY package*.json ./
10
+
11
+ # Install project dependencies
12
+ RUN npm install
13
+
14
+ # Volume will be mounted here, no need for COPY
15
+ CMD ["npm", "start"]
frontend/README.md ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Frontend - Open LLM Leaderboard 🏆
2
+
3
+ React interface for exploring and comparing open-source language models.
4
+
5
+ ## 🏗 Architecture
6
+
7
+ ```mermaid
8
+ flowchart TD
9
+ Client(["User Browser"]) --> Components["React Components"]
10
+
11
+ subgraph Frontend
12
+ Components --> Context["Context Layer<br>• LeaderboardContext<br>• Global State"]
13
+
14
+ API["API Layer<br>• /api/leaderboard/formatted<br>• TanStack Query"] --> |Data Feed| Context
15
+
16
+ Context --> Hooks["Hooks Layer<br>• Data Processing<br>• Filtering<br>• Caching"]
17
+
18
+ Hooks --> Features["Features<br>• Table Management<br>• Search & Filters<br>• Display Options"]
19
+ Features --> Cache["Cache Layer<br>• LocalStorage<br>• URL State"]
20
+ end
21
+
22
+ API --> Backend["Backend Server"]
23
+
24
+ style Backend fill:#f96,stroke:#333,stroke-width:2px
25
+ ```
26
+
27
+ ## ✨ Core Features
28
+
29
+ - 🔍 **Search & Filters**: Real-time filtering, regex search, advanced filters
30
+ - 📊 **Data Visualization**: Interactive table, customizable columns, sorting
31
+ - 🔄 **State Management**: URL sync, client-side caching (5min TTL)
32
+ - 📱 **Responsive Design**: Mobile-friendly, dark/light themes
33
+
34
+ ## 🛠 Tech Stack
35
+
36
+ - React 18 + Material-UI
37
+ - TanStack Query & Table
38
+ - React Router v6
39
+
40
+ ## 📁 Project Structure
41
+
42
+ ```
43
+ src/
44
+ ├── pages/
45
+ │ └── LeaderboardPage/
46
+ │ ├── components/ # UI Components
47
+ │ ├── context/ # Global State
48
+ │ └── hooks/ # Data Processing
49
+ ├── components/ # Shared Components
50
+ └── utils/ # Helper Functions
51
+ ```
52
+
53
+ ## 🚀 Development
54
+
55
+ ```bash
56
+ # Install dependencies
57
+ npm install
58
+
59
+ # Start development server
60
+ npm start
61
+
62
+ # Production build
63
+ npm run build
64
+ ```
65
+
66
+ ## 🔧 Environment Variables
67
+
68
+ ```env
69
+ # API Configuration
70
+ REACT_APP_API_URL=http://localhost:8000
71
+ REACT_APP_CACHE_DURATION=300000 # 5 minutes
72
+ ```
73
+
74
+ ## 🔄 Data Flow
75
+
76
+ 1. API fetches leaderboard data from backend
77
+ 2. Context stores and manages global state
78
+ 3. Hooks handle data processing and filtering
79
+ 4. Components render based on processed data
80
+ 5. Cache maintains user preferences and URL state
frontend/package.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "open-llm-leaderboard",
3
+ "version": "0.1.0",
4
+ "private": true,
5
+ "dependencies": {
6
+ "@emotion/react": "^11.13.3",
7
+ "@emotion/styled": "^11.13.0",
8
+ "@huggingface/hub": "^0.14.0",
9
+ "@mui/icons-material": "^6.1.7",
10
+ "@mui/lab": "^6.0.0-beta.16",
11
+ "@mui/material": "^6.1.6",
12
+ "@mui/x-data-grid": "^7.22.2",
13
+ "@tanstack/react-query": "^5.62.2",
14
+ "@tanstack/react-table": "^8.20.5",
15
+ "@tanstack/react-virtual": "^3.10.9",
16
+ "@testing-library/jest-dom": "^5.17.0",
17
+ "@testing-library/react": "^13.4.0",
18
+ "@testing-library/user-event": "^13.5.0",
19
+ "compression": "^1.7.4",
20
+ "cors": "^2.8.5",
21
+ "express": "^4.18.2",
22
+ "react": "^18.3.1",
23
+ "react-dom": "^18.3.1",
24
+ "react-router-dom": "^6.28.0",
25
+ "react-scripts": "5.0.1",
26
+ "serve-static": "^1.15.0",
27
+ "web-vitals": "^2.1.4"
28
+ },
29
+ "scripts": {
30
+ "start": "react-scripts start",
31
+ "build": "react-scripts build",
32
+ "test": "react-scripts test",
33
+ "eject": "react-scripts eject",
34
+ "serve": "node server.js"
35
+ },
36
+ "eslintConfig": {
37
+ "extends": [
38
+ "react-app",
39
+ "react-app/jest"
40
+ ]
41
+ },
42
+ "browserslist": {
43
+ "production": [
44
+ ">0.2%",
45
+ "not dead",
46
+ "not op_mini all"
47
+ ],
48
+ "development": [
49
+ "last 1 chrome version",
50
+ "last 1 firefox version",
51
+ "last 1 safari version"
52
+ ]
53
+ },
54
+ "proxy": "http://backend:8000"
55
+ }
frontend/public/index.html ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <link rel="icon" href="%PUBLIC_URL%/logo32.png" />
6
+ <meta
7
+ name="viewport"
8
+ content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no, viewport-fit=cover"
9
+ />
10
+ <meta
11
+ name="description"
12
+ content="Interactive leaderboard tracking and comparing open-source Large Language Models across multiple benchmarks: IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
13
+ />
14
+
15
+ <!-- Open Graph / Facebook -->
16
+ <meta property="og:type" content="website" />
17
+ <meta
18
+ property="og:url"
19
+ content="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard"
20
+ />
21
+ <meta
22
+ property="og:title"
23
+ content="Open LLM Leaderboard - Compare Open Source Large Language Models"
24
+ />
25
+ <meta
26
+ property="og:description"
27
+ content="Interactive leaderboard for comparing LLM performance across multiple benchmarks. Features real-time filtering, community voting, and comprehensive model analysis with benchmarks like IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
28
+ />
29
+ <meta property="og:image" content="%PUBLIC_URL%/og-image.png" />
30
+
31
+ <!-- Twitter -->
32
+ <meta property="twitter:card" content="summary_large_image" />
33
+ <meta
34
+ property="twitter:url"
35
+ content="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard"
36
+ />
37
+ <meta
38
+ property="twitter:title"
39
+ content="Open LLM Leaderboard - Compare Open Source Large Language Models"
40
+ />
41
+ <meta
42
+ property="twitter:description"
43
+ content="Interactive leaderboard for comparing LLM performance across multiple benchmarks. Features real-time filtering, community voting, and comprehensive model analysis with benchmarks like IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
44
+ />
45
+ <meta property="twitter:image" content="%PUBLIC_URL%/og-image.png" />
46
+ <!--
47
+ Notice the use of %PUBLIC_URL% in the tags above.
48
+ It will be replaced with the URL of the `public` folder during the build.
49
+ Only files inside the `public` folder can be referenced from the HTML.
50
+
51
+ Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
52
+ work correctly both with client-side routing and a non-root public URL.
53
+ Learn how to configure a non-root public URL by running `npm run build`.
54
+ -->
55
+ <title>
56
+ Open LLM Leaderboard - Compare Open Source Large Language Models
57
+ </title>
58
+ <link
59
+ href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600;700&display=swap"
60
+ rel="stylesheet"
61
+ />
62
+ <style>
63
+ html,
64
+ body {
65
+ position: fixed;
66
+ width: 100%;
67
+ height: 100%;
68
+ overflow: hidden;
69
+ -webkit-overflow-scrolling: touch;
70
+ }
71
+ #root {
72
+ position: absolute;
73
+ top: 0;
74
+ left: 0;
75
+ right: 0;
76
+ bottom: 0;
77
+ overflow-y: auto;
78
+ -webkit-overflow-scrolling: touch;
79
+ }
80
+ </style>
81
+ </head>
82
+ <body>
83
+ <noscript>You need to enable JavaScript to run this app.</noscript>
84
+ <div id="root"></div>
85
+ <!--
86
+ This HTML file is a template.
87
+ If you open it directly in the browser, you will see an empty page.
88
+
89
+ You can add webfonts, meta tags, or analytics to this file.
90
+ The build step will place the bundled scripts into the <body> tag.
91
+
92
+ To begin the development, run `npm start` or `yarn start`.
93
+ To create a production bundle, use `npm run build` or `yarn build`.
94
+ -->
95
+ </body>
96
+ </html>
frontend/public/logo256.png ADDED
frontend/public/logo32.png ADDED
frontend/public/og-image.jpg ADDED
frontend/public/robots.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # https://www.robotstxt.org/robotstxt.html
2
+ User-agent: *
3
+ Disallow:
frontend/server.js ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const express = require("express");
2
+ const cors = require("cors");
3
+ const compression = require("compression");
4
+ const path = require("path");
5
+ const serveStatic = require("serve-static");
6
+ const { createProxyMiddleware } = require("http-proxy-middleware");
7
+
8
+ const app = express();
9
+ const port = process.env.PORT || 7860;
10
+ const apiPort = process.env.INTERNAL_API_PORT || 7861;
11
+
12
+ // Enable CORS for all routes
13
+ app.use(cors());
14
+
15
+ // Enable GZIP compression
16
+ app.use(compression());
17
+
18
+ // Proxy all API requests to the Python backend
19
+ app.use(
20
+ "/api",
21
+ createProxyMiddleware({
22
+ target: `http://127.0.0.1:${apiPort}`,
23
+ changeOrigin: true,
24
+ onError: (err, req, res) => {
25
+ console.error("Proxy Error:", err);
26
+ res.status(500).json({ error: "Proxy Error", details: err.message });
27
+ },
28
+ })
29
+ );
30
+
31
+ // Serve static files from the build directory
32
+ app.use(
33
+ express.static(path.join(__dirname, "build"), {
34
+ // Don't cache HTML files
35
+ setHeaders: (res, path) => {
36
+ if (path.endsWith(".html")) {
37
+ res.setHeader("Cache-Control", "no-cache, no-store, must-revalidate");
38
+ res.setHeader("Pragma", "no-cache");
39
+ res.setHeader("Expires", "0");
40
+ } else {
41
+ // Cache other static resources for 1 year
42
+ res.setHeader("Cache-Control", "public, max-age=31536000");
43
+ }
44
+ },
45
+ })
46
+ );
47
+
48
+ // Middleware to preserve URL parameters
49
+ app.use((req, res, next) => {
50
+ // Don't interfere with API requests
51
+ if (req.url.startsWith("/api")) {
52
+ return next();
53
+ }
54
+
55
+ // Preserve original URL parameters
56
+ req.originalUrl = req.url;
57
+ next();
58
+ });
59
+
60
+ // Handle all other routes by serving index.html
61
+ app.get("*", (req, res) => {
62
+ // Don't interfere with API requests
63
+ if (req.url.startsWith("/api")) {
64
+ return next();
65
+ }
66
+
67
+ // Headers for client-side routing
68
+ res.set({
69
+ "Cache-Control": "no-cache, no-store, must-revalidate",
70
+ Pragma: "no-cache",
71
+ Expires: "0",
72
+ });
73
+
74
+ // Send index.html for all other routes
75
+ res.sendFile(path.join(__dirname, "build", "index.html"));
76
+ });
77
+
78
+ app.listen(port, "0.0.0.0", () => {
79
+ console.log(
80
+ `Frontend server is running on port ${port} in ${
81
+ process.env.NODE_ENV || "development"
82
+ } mode`
83
+ );
84
+ console.log(`API proxy target: http://127.0.0.1:${apiPort}`);
85
+ });
frontend/src/App.js ADDED
@@ -0,0 +1,427 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useEffect } from "react";
2
+ import {
3
+ Box,
4
+ Container,
5
+ CssBaseline,
6
+ Typography,
7
+ CircularProgress,
8
+ } from "@mui/material";
9
+ import {
10
+ BrowserRouter as Router,
11
+ Routes,
12
+ Route,
13
+ Navigate,
14
+ useNavigate,
15
+ useSearchParams,
16
+ } from "react-router-dom";
17
+ import getTheme from "./config/theme";
18
+ import { useThemeMode } from "./hooks/useThemeMode";
19
+ import { ThemeProvider } from "@mui/material/styles";
20
+ import BenchmarkGenerator from "./components/BenchmarkGenerator";
21
+ import BenchmarkCreateForm from "./components/BenchmarkCreateForm";
22
+ import BenchmarkDisplay from "./components/BenchmarkDisplay";
23
+ import BenchmarkEvaluation from "./components/BenchmarkEvaluation";
24
+ import EvaluationDisplay from "./components/EvaluationDisplay";
25
+
26
+ // Composant d'en-tête commun
27
+ const Header = () => (
28
+ <Box sx={{ textAlign: "center", mb: 8 }}>
29
+ <h1>Yourbench Demo</h1>
30
+ <p>
31
+ Quickly create <b>zero-shot benchmarks</b> from your documents – keeping
32
+ models accurate and adaptable
33
+ </p>
34
+ </Box>
35
+ );
36
+
37
+ // Page d'accueil avec le formulaire
38
+ function HomePage() {
39
+ const navigate = useNavigate();
40
+
41
+ const handleStartGeneration = (sid) => {
42
+ navigate(`/benchmark-generation?session=${sid}`);
43
+ };
44
+
45
+ return (
46
+ <>
47
+ <Header />
48
+ <BenchmarkCreateForm onStartGeneration={handleStartGeneration} />
49
+ </>
50
+ );
51
+ }
52
+
53
+ // Page de génération de benchmark
54
+ function BenchmarkGenerationPage() {
55
+ const navigate = useNavigate();
56
+ const [searchParams] = useSearchParams();
57
+ const sessionId = searchParams.get("session");
58
+ const [isValidSession, setIsValidSession] = useState(true);
59
+
60
+ // Vérifier la validité de la session
61
+ useEffect(() => {
62
+ if (!sessionId) {
63
+ setIsValidSession(false);
64
+ }
65
+ }, [sessionId]);
66
+
67
+ const handleGenerationComplete = (result) => {
68
+ console.log("Benchmark generation completed:", result);
69
+ if (result && result.success) {
70
+ navigate(`/benchmark-display?session=${sessionId}`);
71
+ }
72
+ };
73
+
74
+ if (!isValidSession) {
75
+ return <Navigate to="/" />;
76
+ }
77
+
78
+ return (
79
+ <>
80
+ <Header />
81
+ <BenchmarkGenerator
82
+ sessionId={sessionId}
83
+ onComplete={handleGenerationComplete}
84
+ />
85
+ </>
86
+ );
87
+ }
88
+
89
+ // Page d'affichage du benchmark
90
+ function BenchmarkDisplayPage() {
91
+ const navigate = useNavigate();
92
+ const [searchParams] = useSearchParams();
93
+ const sessionId = searchParams.get("session");
94
+ const [benchmarkQuestions, setBenchmarkQuestions] = useState([]);
95
+ const [datasetUrl, setDatasetUrl] = useState(null);
96
+ const [isValidSession, setIsValidSession] = useState(true);
97
+ const [isLoading, setIsLoading] = useState(true);
98
+
99
+ // Récupérer les questions du benchmark depuis l'API
100
+ useEffect(() => {
101
+ console.log("BenchmarkDisplayPage useEffect - sessionId:", sessionId);
102
+
103
+ if (!sessionId) {
104
+ console.log("Session ID manquante, redirection vers l'accueil");
105
+ setIsValidSession(false);
106
+ return;
107
+ }
108
+
109
+ setIsLoading(true);
110
+
111
+ const fetchBenchmarkQuestions = async () => {
112
+ console.log(
113
+ "Tentative de récupération des questions pour la session:",
114
+ sessionId
115
+ );
116
+ try {
117
+ const apiUrl = `http://localhost:3001/benchmark-questions/${sessionId}`;
118
+ console.log("Appel API:", apiUrl);
119
+
120
+ const response = await fetch(apiUrl);
121
+ console.log("Réponse API reçue:", response.status);
122
+
123
+ // Check if the response status is 404 (Not Found) or other error
124
+ if (!response.ok) {
125
+ if (response.status === 404) {
126
+ console.error("Session non trouvée");
127
+ setIsValidSession(false);
128
+ return;
129
+ } else {
130
+ console.error(`Erreur serveur: ${response.status}`);
131
+ setIsLoading(false);
132
+ return;
133
+ }
134
+ }
135
+
136
+ const data = await response.json();
137
+ console.log("Données API:", data);
138
+
139
+ if (data.success && data.questions && data.questions.length > 0) {
140
+ console.log("Questions chargées avec succès:", data.questions);
141
+ setBenchmarkQuestions(data.questions);
142
+ } else {
143
+ console.warn(
144
+ "Échec du chargement des questions, utilisation des valeurs par défaut"
145
+ );
146
+ }
147
+
148
+ if (data.dataset_url) {
149
+ setDatasetUrl(data.dataset_url);
150
+ } else {
151
+ const url = `https://huggingface.co/datasets/yourbench/yourbench_${sessionId}`;
152
+ setDatasetUrl(url);
153
+ console.log("URL du dataset générée:", url);
154
+ }
155
+ } catch (error) {
156
+ console.error("Erreur lors de la récupération des questions:", error);
157
+ setIsValidSession(false);
158
+ } finally {
159
+ setIsLoading(false);
160
+ }
161
+ };
162
+
163
+ fetchBenchmarkQuestions();
164
+ }, [sessionId]);
165
+
166
+ const handleStartEvaluation = () => {
167
+ console.log("Starting evaluation with session ID:", sessionId);
168
+ navigate(`/benchmark-evaluation?session=${sessionId}`);
169
+ };
170
+
171
+ // Questions par défaut si l'API échoue
172
+ const defaultSampleQuestions = [
173
+ {
174
+ id: 1,
175
+ question: "What are the key features discussed in the document?",
176
+ type: "single_shot",
177
+ },
178
+ {
179
+ id: 2,
180
+ question:
181
+ "How does the proposed solution address the challenges mentioned in section 2 in relation to the overall market trends?",
182
+ type: "multi_hop",
183
+ },
184
+ ];
185
+
186
+ if (!isValidSession) {
187
+ return <Navigate to="/" />;
188
+ }
189
+
190
+ return (
191
+ <>
192
+ <Header />
193
+ {isLoading ? (
194
+ <Box
195
+ sx={{
196
+ display: "flex",
197
+ justifyContent: "center",
198
+ alignItems: "center",
199
+ mt: 8,
200
+ mb: 8,
201
+ }}
202
+ >
203
+ <CircularProgress size={60} />
204
+ </Box>
205
+ ) : (
206
+ <BenchmarkDisplay
207
+ onStartEvaluation={handleStartEvaluation}
208
+ sessionId={sessionId}
209
+ datasetUrl={datasetUrl}
210
+ sampleQuestions={
211
+ benchmarkQuestions.length > 0
212
+ ? benchmarkQuestions
213
+ : defaultSampleQuestions
214
+ }
215
+ />
216
+ )}
217
+ </>
218
+ );
219
+ }
220
+
221
+ // Page d'évaluation du benchmark
222
+ function BenchmarkEvaluationPage() {
223
+ const navigate = useNavigate();
224
+ const [searchParams] = useSearchParams();
225
+ const sessionId = searchParams.get("session");
226
+ const [isValidSession, setIsValidSession] = useState(true);
227
+ const [isLoading, setIsLoading] = useState(true);
228
+
229
+ // Vérifier la validité de la session
230
+ useEffect(() => {
231
+ if (!sessionId) {
232
+ console.log(
233
+ "Session ID manquante pour l'évaluation, redirection vers l'accueil"
234
+ );
235
+ setIsValidSession(false);
236
+ return;
237
+ }
238
+
239
+ // Verify session exists by calling the API
240
+ const checkSession = async () => {
241
+ try {
242
+ const response = await fetch(
243
+ `http://localhost:3001/benchmark-questions/${sessionId}`
244
+ );
245
+
246
+ if (!response.ok) {
247
+ console.error(
248
+ `Session invalide ou erreur serveur: ${response.status}`
249
+ );
250
+ setIsValidSession(false);
251
+ }
252
+ } catch (error) {
253
+ console.error("Erreur lors de la vérification de la session:", error);
254
+ setIsValidSession(false);
255
+ } finally {
256
+ setIsLoading(false);
257
+ }
258
+ };
259
+
260
+ checkSession();
261
+ }, [sessionId]);
262
+
263
+ const handleEvaluationComplete = (result) => {
264
+ console.log("Évaluation terminée:", result);
265
+ // On reste sur la même page car les résultats sont affichés directement
266
+ // dans le composant BenchmarkEvaluation
267
+ };
268
+
269
+ if (!isValidSession) {
270
+ return <Navigate to="/" />;
271
+ }
272
+
273
+ return (
274
+ <>
275
+ <Header />
276
+ {isLoading ? (
277
+ <Box
278
+ sx={{
279
+ display: "flex",
280
+ justifyContent: "center",
281
+ alignItems: "center",
282
+ mt: 8,
283
+ mb: 8,
284
+ }}
285
+ >
286
+ <CircularProgress size={60} />
287
+ </Box>
288
+ ) : (
289
+ <BenchmarkEvaluation
290
+ sessionId={sessionId}
291
+ onComplete={handleEvaluationComplete}
292
+ />
293
+ )}
294
+ </>
295
+ );
296
+ }
297
+
298
+ // Page d'affichage des résultats d'évaluation
299
+ function EvaluationDisplayPage() {
300
+ const navigate = useNavigate();
301
+ const [searchParams] = useSearchParams();
302
+ const sessionId = searchParams.get("session");
303
+ const [isValidSession, setIsValidSession] = useState(true);
304
+ const [isLoading, setIsLoading] = useState(true);
305
+
306
+ // Vérifier la validité de la session
307
+ useEffect(() => {
308
+ if (!sessionId) {
309
+ console.log(
310
+ "Session ID manquante pour l'affichage des résultats, redirection vers l'accueil"
311
+ );
312
+ setIsValidSession(false);
313
+ return;
314
+ }
315
+
316
+ // Verify session exists by calling the API
317
+ const checkSession = async () => {
318
+ try {
319
+ const response = await fetch(
320
+ `http://localhost:3001/benchmark-questions/${sessionId}`
321
+ );
322
+
323
+ if (!response.ok) {
324
+ console.error(
325
+ `Session invalide ou erreur serveur: ${response.status}`
326
+ );
327
+ setIsValidSession(false);
328
+ }
329
+ } catch (error) {
330
+ console.error("Erreur lors de la vérification de la session:", error);
331
+ setIsValidSession(false);
332
+ } finally {
333
+ setIsLoading(false);
334
+ }
335
+ };
336
+
337
+ checkSession();
338
+ }, [sessionId]);
339
+
340
+ if (!isValidSession) {
341
+ return <Navigate to="/" />;
342
+ }
343
+
344
+ return (
345
+ <>
346
+ <Header />
347
+ {isLoading ? (
348
+ <Box
349
+ sx={{
350
+ display: "flex",
351
+ justifyContent: "center",
352
+ alignItems: "center",
353
+ mt: 8,
354
+ mb: 8,
355
+ }}
356
+ >
357
+ <CircularProgress size={60} />
358
+ </Box>
359
+ ) : (
360
+ <EvaluationDisplay sessionId={sessionId} />
361
+ )}
362
+ </>
363
+ );
364
+ }
365
+
366
+ // Raccourcis clavier
367
+ function KeyboardShortcuts() {
368
+ useEffect(() => {
369
+ const handleKeyDown = (e) => {
370
+ if (e.key === "p") {
371
+ console.log("Debug key pressed: Clearing auth data and refreshing");
372
+ localStorage.removeItem("hf_oauth");
373
+ localStorage.removeItem("auth_return_to");
374
+ alert("Auth data cleared. Page will reload.");
375
+ window.location.reload();
376
+ }
377
+ };
378
+
379
+ window.addEventListener("keydown", handleKeyDown);
380
+ return () => {
381
+ window.removeEventListener("keydown", handleKeyDown);
382
+ };
383
+ }, []);
384
+
385
+ return null;
386
+ }
387
+
388
+ // Composant principal avec les routes
389
+ function App() {
390
+ const { mode } = useThemeMode();
391
+ const theme = getTheme(mode);
392
+
393
+ return (
394
+ <ThemeProvider theme={theme}>
395
+ <CssBaseline />
396
+ <Router>
397
+ <Container maxWidth="md">
398
+ <Box sx={{ pt: 12, pb: 4 }}>
399
+ <KeyboardShortcuts />
400
+ <Routes>
401
+ <Route path="/" element={<HomePage />} />
402
+ <Route
403
+ path="/benchmark-generation"
404
+ element={<BenchmarkGenerationPage />}
405
+ />
406
+ <Route
407
+ path="/benchmark-display"
408
+ element={<BenchmarkDisplayPage />}
409
+ />
410
+ <Route
411
+ path="/benchmark-evaluation"
412
+ element={<BenchmarkEvaluationPage />}
413
+ />
414
+ <Route
415
+ path="/evaluation-display"
416
+ element={<EvaluationDisplayPage />}
417
+ />
418
+ <Route path="*" element={<Navigate to="/" replace />} />
419
+ </Routes>
420
+ </Box>
421
+ </Container>
422
+ </Router>
423
+ </ThemeProvider>
424
+ );
425
+ }
426
+
427
+ export default App;
frontend/src/components/BenchmarkCreateForm.jsx ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useRef, useEffect } from "react";
2
+ import {
3
+ Box,
4
+ Paper,
5
+ Typography,
6
+ CircularProgress,
7
+ Alert,
8
+ Button,
9
+ Stepper,
10
+ Step,
11
+ StepLabel,
12
+ } from "@mui/material";
13
+ import { useLocation } from "react-router-dom";
14
+ import CloudUploadIcon from "@mui/icons-material/CloudUpload";
15
+ import PlayArrowIcon from "@mui/icons-material/PlayArrow";
16
+ import AuthContainer from "./shared/AuthContainer";
17
+ import { useThemeMode } from "../hooks/useThemeMode";
18
+ import getTheme from "../config/theme";
19
+
20
+ /**
21
+ * Component to display a stepper with three steps: Login, Upload File, and Generate
22
+ *
23
+ * @param {Object} props - Component props
24
+ * @param {number} props.activeStep - Current active step (0-based index)
25
+ * @returns {JSX.Element} Stepper component
26
+ */
27
+ const StepsDisplay = ({ activeStep }) => {
28
+ const steps = ["Login", "Upload File", "Generate"];
29
+
30
+ return (
31
+ <Box sx={{ width: "100%", mb: 4 }}>
32
+ <Stepper activeStep={activeStep} alternativeLabel>
33
+ {steps.map((label) => (
34
+ <Step key={label}>
35
+ <StepLabel>{label}</StepLabel>
36
+ </Step>
37
+ ))}
38
+ </Stepper>
39
+ </Box>
40
+ );
41
+ };
42
+
43
+ /**
44
+ * Component for creating a new benchmark, including authentication, file upload, and generation initiation
45
+ *
46
+ * @param {Object} props - Component props
47
+ * @param {Function} props.onStartGeneration - Callback when generation starts with sessionId
48
+ * @returns {JSX.Element} BenchmarkCreateForm component
49
+ */
50
+ function BenchmarkCreateForm({ onStartGeneration }) {
51
+ const { mode } = useThemeMode();
52
+ const theme = getTheme(mode);
53
+ const [isDragging, setIsDragging] = useState(false);
54
+ const [uploadStatus, setUploadStatus] = useState(null);
55
+ const [isLoading, setIsLoading] = useState(false);
56
+ const [activeStep, setActiveStep] = useState(0);
57
+ const [sessionId, setSessionId] = useState(null);
58
+ const fileInputRef = useRef(null);
59
+ const location = useLocation();
60
+
61
+ // Check if we're coming back from an OAuth redirect
62
+ useEffect(() => {
63
+ // If we have code in URL parameters, it's an OAuth callback
64
+ const params = new URLSearchParams(window.location.search);
65
+ if (params.has("code")) {
66
+ console.log("Detected OAuth callback, cleaning URL");
67
+
68
+ // Remove the query parameters from the URL without reloading
69
+ window.history.replaceState({}, document.title, window.location.pathname);
70
+
71
+ // Check if we have auth data in localStorage after a brief delay to let OAuth process complete
72
+ setTimeout(() => {
73
+ const storedAuth = localStorage.getItem("hf_oauth");
74
+ if (storedAuth) {
75
+ console.log("Found auth data after redirect, refreshing UI state");
76
+ setActiveStep(1); // Move to next step if authenticated
77
+ }
78
+ }, 1000);
79
+ }
80
+ }, [location]);
81
+
82
+ const handleDragOver = (e) => {
83
+ e.preventDefault();
84
+ setIsDragging(true);
85
+ };
86
+
87
+ const handleDragLeave = () => {
88
+ setIsDragging(false);
89
+ };
90
+
91
+ const handleClick = () => {
92
+ fileInputRef.current.click();
93
+ };
94
+
95
+ const handleFileChange = (e) => {
96
+ const file = e.target.files[0];
97
+ if (!file) return;
98
+
99
+ // Vérifier si c'est un PDF, TXT, HTML ou MD
100
+ if (
101
+ !file.name.endsWith(".pdf") &&
102
+ !file.name.endsWith(".txt") &&
103
+ !file.name.endsWith(".html") &&
104
+ !file.name.endsWith(".md")
105
+ ) {
106
+ setUploadStatus({
107
+ success: false,
108
+ message: "Only PDF, TXT, HTML and MD files are accepted",
109
+ });
110
+ return;
111
+ }
112
+
113
+ handleFileUpload(file);
114
+ };
115
+
116
+ const handleFileUpload = async (file) => {
117
+ setIsLoading(true);
118
+ setUploadStatus(null);
119
+
120
+ try {
121
+ const formData = new FormData();
122
+ formData.append("file", file);
123
+
124
+ const response = await fetch("http://localhost:3001/upload", {
125
+ method: "POST",
126
+ body: formData,
127
+ });
128
+
129
+ const result = await response.json();
130
+
131
+ if (response.ok) {
132
+ setUploadStatus({
133
+ success: true,
134
+ message: `File ${result.filename} uploaded successfully`,
135
+ });
136
+ // Store the session ID for the benchmark generation
137
+ setSessionId(result.session_id);
138
+ setActiveStep(2); // Advance to Generate step after successful upload
139
+ } else {
140
+ setUploadStatus({
141
+ success: false,
142
+ message: result.error || "Upload failed",
143
+ });
144
+ }
145
+ } catch (error) {
146
+ setUploadStatus({
147
+ success: false,
148
+ message: "Server connection error",
149
+ });
150
+ } finally {
151
+ setIsLoading(false);
152
+ }
153
+ };
154
+
155
+ const handleDrop = async (e) => {
156
+ e.preventDefault();
157
+ setIsDragging(false);
158
+
159
+ const file = e.dataTransfer.files[0];
160
+ if (!file) {
161
+ setUploadStatus({ success: false, message: "No file detected" });
162
+ return;
163
+ }
164
+
165
+ // Vérifier si c'est un PDF, TXT, HTML ou MD
166
+ if (
167
+ !file.name.endsWith(".pdf") &&
168
+ !file.name.endsWith(".txt") &&
169
+ !file.name.endsWith(".html") &&
170
+ !file.name.endsWith(".md")
171
+ ) {
172
+ setUploadStatus({
173
+ success: false,
174
+ message: "Only PDF, TXT, HTML and MD files are accepted",
175
+ });
176
+ return;
177
+ }
178
+
179
+ handleFileUpload(file);
180
+ };
181
+
182
+ const handleGenerateClick = () => {
183
+ if (onStartGeneration && sessionId) {
184
+ onStartGeneration(sessionId);
185
+ }
186
+ };
187
+
188
+ return (
189
+ <>
190
+ <StepsDisplay activeStep={activeStep} />
191
+
192
+ {/* Authentication step */}
193
+ {activeStep === 0 && (
194
+ <AuthContainer
195
+ actionText="use this demo"
196
+ onSuccess={() => setActiveStep(1)}
197
+ />
198
+ )}
199
+
200
+ {/* File upload step */}
201
+ {activeStep === 1 && (
202
+ <Paper
203
+ elevation={3}
204
+ sx={{
205
+ p: 4,
206
+ mt: 3,
207
+ mb: 3,
208
+ border: isDragging
209
+ ? `2px dashed ${theme.palette.primary.main}`
210
+ : "2px dashed #ccc",
211
+ backgroundColor: isDragging ? "rgba(0, 0, 0, 0.05)" : "transparent",
212
+ display: "flex",
213
+ flexDirection: "column",
214
+ alignItems: "center",
215
+ justifyContent: "center",
216
+ minHeight: 200,
217
+ cursor: "pointer",
218
+ transition: "all 0.3s ease",
219
+ }}
220
+ onDragOver={handleDragOver}
221
+ onDragLeave={handleDragLeave}
222
+ onDrop={handleDrop}
223
+ onClick={handleClick}
224
+ >
225
+ <input
226
+ type="file"
227
+ ref={fileInputRef}
228
+ onChange={handleFileChange}
229
+ accept=".pdf,.txt,.html,.md"
230
+ style={{ display: "none" }}
231
+ />
232
+ <CloudUploadIcon
233
+ sx={{ fontSize: 60, color: "text.secondary", mb: 1 }}
234
+ />
235
+ <Typography variant="h6" component="div" gutterBottom>
236
+ Drag and drop your file here or click to browse
237
+ </Typography>
238
+ <Typography variant="body2" color="text.secondary">
239
+ Accepted formats: PDF, TXT, HTML, MD
240
+ </Typography>
241
+
242
+ {isLoading && (
243
+ <Box sx={{ mt: 2 }}>
244
+ <CircularProgress size={30} />
245
+ </Box>
246
+ )}
247
+
248
+ {uploadStatus && (
249
+ <Alert
250
+ severity={uploadStatus.success ? "success" : "error"}
251
+ sx={{ mt: 2, width: "100%" }}
252
+ >
253
+ {uploadStatus.message}
254
+ </Alert>
255
+ )}
256
+ </Paper>
257
+ )}
258
+
259
+ {/* Generate button step */}
260
+ {activeStep === 2 && (
261
+ <Paper
262
+ elevation={3}
263
+ sx={{
264
+ p: 4,
265
+ mt: 3,
266
+ mb: 3,
267
+ display: "flex",
268
+ flexDirection: "column",
269
+ alignItems: "center",
270
+ justifyContent: "center",
271
+ minHeight: 200,
272
+ }}
273
+ >
274
+ <PlayArrowIcon
275
+ sx={{ fontSize: 60, color: "text.secondary", mb: 1 }}
276
+ />
277
+ <Typography variant="h6" component="div" gutterBottom>
278
+ Ready to generate your benchmark
279
+ </Typography>
280
+ <Button
281
+ variant="contained"
282
+ color="primary"
283
+ onClick={handleGenerateClick}
284
+ sx={{ mt: 2 }}
285
+ startIcon={<PlayArrowIcon />}
286
+ >
287
+ Generate Benchmark
288
+ </Button>
289
+ </Paper>
290
+ )}
291
+ </>
292
+ );
293
+ }
294
+
295
+ export default BenchmarkCreateForm;
frontend/src/components/BenchmarkDisplay.jsx ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState } from "react";
2
+ import {
3
+ Box,
4
+ Typography,
5
+ Paper,
6
+ Button,
7
+ Divider,
8
+ Card,
9
+ CardContent,
10
+ Link,
11
+ CircularProgress,
12
+ Tooltip,
13
+ } from "@mui/material";
14
+ import PlayArrowIcon from "@mui/icons-material/PlayArrow";
15
+ import AssessmentIcon from "@mui/icons-material/Assessment";
16
+ import LinkIcon from "@mui/icons-material/Link";
17
+ import DownloadIcon from "@mui/icons-material/Download";
18
+ import CheckCircleIcon from "@mui/icons-material/CheckCircle";
19
+
20
+ /**
21
+ * Component to display benchmark information and evaluation button
22
+ *
23
+ * @param {Object} props - Component props
24
+ * @param {Array} props.sampleQuestions - Array of sample questions to display
25
+ * @param {Function} props.onStartEvaluation - Function to call when evaluation button is clicked
26
+ * @param {string} props.sessionId - Session ID used for the benchmark generation
27
+ * @param {string} props.datasetUrl - URL to the Hugging Face dataset
28
+ * @returns {JSX.Element} Benchmark display component
29
+ */
30
+ const BenchmarkDisplay = ({
31
+ sampleQuestions = [],
32
+ onStartEvaluation,
33
+ sessionId,
34
+ datasetUrl,
35
+ }) => {
36
+ const [isDownloading, setIsDownloading] = useState(false);
37
+
38
+ // Default questions if none provided
39
+ const questions =
40
+ sampleQuestions.length > 0
41
+ ? sampleQuestions
42
+ : [
43
+ {
44
+ id: 1,
45
+ question: "What are the key benefits of the described technology?",
46
+ type: "single_shot",
47
+ },
48
+ {
49
+ id: 2,
50
+ question:
51
+ "Based on the context about machine learning frameworks, how does TensorFlow compare to PyTorch in terms of deployment capabilities?",
52
+ type: "multi_hop",
53
+ },
54
+ ];
55
+
56
+ const handleEvaluationClick = () => {
57
+ if (onStartEvaluation) {
58
+ onStartEvaluation();
59
+ }
60
+ };
61
+
62
+ const handleDownloadClick = async () => {
63
+ if (!sessionId) return;
64
+
65
+ setIsDownloading(true);
66
+ try {
67
+ // Requête pour télécharger le dataset
68
+ const downloadUrl = `http://localhost:3001/download-dataset/${sessionId}`;
69
+
70
+ // Créer un élément a temporaire pour déclencher le téléchargement
71
+ const link = document.createElement("a");
72
+ link.href = downloadUrl;
73
+ link.setAttribute("download", `yourbench_${sessionId}_dataset.zip`);
74
+ document.body.appendChild(link);
75
+ link.click();
76
+ document.body.removeChild(link);
77
+ } catch (error) {
78
+ console.error("Erreur lors du téléchargement du dataset:", error);
79
+ alert("Erreur lors du téléchargement. Veuillez réessayer.");
80
+ } finally {
81
+ setIsDownloading(false);
82
+ }
83
+ };
84
+
85
+ return (
86
+ <Box sx={{ width: "100%", mt: 3 }}>
87
+ {/* Header avec titre et bouton de téléchargement alignés */}
88
+ <Box
89
+ sx={{
90
+ mb: 4,
91
+ display: "flex",
92
+ justifyContent: "space-between",
93
+ alignItems: "center",
94
+ }}
95
+ >
96
+ <Box sx={{ display: "flex", alignItems: "center" }}>
97
+ <CheckCircleIcon color="success" sx={{ mr: 1.5, fontSize: 28 }} />
98
+ <Typography variant="h6">Benchmark Created Successfully</Typography>
99
+ </Box>
100
+
101
+ <Tooltip title="Télécharger le benchmark complet">
102
+ <Button
103
+ variant="contained"
104
+ color="primary"
105
+ endIcon={
106
+ isDownloading ? <CircularProgress size={16} /> : <DownloadIcon />
107
+ }
108
+ onClick={handleDownloadClick}
109
+ disabled={isDownloading || !sessionId}
110
+ >
111
+ {isDownloading ? "Téléchargement..." : "Download Benchmark"}
112
+ </Button>
113
+ </Tooltip>
114
+ </Box>
115
+
116
+ <Typography variant="body2" color="text.secondary" sx={{ mb: 2 }}>
117
+ Your benchmark has been generated. Here are some example questions:
118
+ </Typography>
119
+
120
+ <Box sx={{ mb: 3 }}>
121
+ {questions.map((q, index) => (
122
+ <Card
123
+ key={q.id || index}
124
+ variant="outlined"
125
+ sx={{
126
+ mb: 2,
127
+ backgroundColor: "#fafafa",
128
+ }}
129
+ >
130
+ <CardContent>
131
+ <Typography
132
+ variant="caption"
133
+ color="text.secondary"
134
+ sx={{ display: "block", mb: 1 }}
135
+ >
136
+ {q.type === "multi_hop"
137
+ ? "Multi-hop Question"
138
+ : "Single-shot Question"}
139
+ </Typography>
140
+ <Typography variant="body1">{q.question}</Typography>
141
+ </CardContent>
142
+ </Card>
143
+ ))}
144
+ </Box>
145
+
146
+ <Box sx={{ display: "flex", justifyContent: "center", mt: 8 }}>
147
+ <Button
148
+ variant="contained"
149
+ color="primary"
150
+ size="large"
151
+ startIcon={<AssessmentIcon />}
152
+ onClick={handleEvaluationClick}
153
+ >
154
+ Start Evaluation
155
+ </Button>
156
+ </Box>
157
+ </Box>
158
+ );
159
+ };
160
+
161
+ export default BenchmarkDisplay;
frontend/src/components/BenchmarkEvaluation.jsx ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useEffect, useRef } from "react";
2
+ import {
3
+ Box,
4
+ Typography,
5
+ CircularProgress,
6
+ Alert,
7
+ Paper,
8
+ Divider,
9
+ Button,
10
+ } from "@mui/material";
11
+ import AccessTimeIcon from "@mui/icons-material/AccessTime";
12
+ import LogDisplay from "./LogDisplay";
13
+ import { useNavigate } from "react-router-dom";
14
+
15
+ // Evaluation steps
16
+ const EVALUATION_STEPS = [
17
+ "preparation",
18
+ "model_evaluation",
19
+ "results_compilation",
20
+ ];
21
+
22
+ // Friendly step names for display
23
+ const STEP_LABELS = {
24
+ preparation: "Preparation",
25
+ model_evaluation: "Model Evaluation",
26
+ results_compilation: "Results Compilation",
27
+ };
28
+
29
+ /**
30
+ * Component to handle benchmark evaluation and display logs
31
+ *
32
+ * @param {Object} props - Component props
33
+ * @param {string} props.sessionId - Session ID of the benchmark to evaluate
34
+ * @param {Function} props.onComplete - Function to call when evaluation is complete
35
+ * @returns {JSX.Element} Benchmark evaluation component
36
+ */
37
+ const BenchmarkEvaluation = ({ sessionId, onComplete }) => {
38
+ const [evaluating, setEvaluating] = useState(false);
39
+ const [evaluationComplete, setEvaluationComplete] = useState(false);
40
+ const [evaluationLogs, setEvaluationLogs] = useState([]);
41
+ const [error, setError] = useState(null);
42
+ const [currentPhase, setCurrentPhase] = useState("initializing");
43
+ const [completedSteps, setCompletedSteps] = useState([]);
44
+ const [activeStep, setActiveStep] = useState(0);
45
+ const [elapsedTime, setElapsedTime] = useState(0);
46
+
47
+ const pollingIntervalRef = useRef(null);
48
+ const timerIntervalRef = useRef(null);
49
+ const startTimeRef = useRef(null);
50
+
51
+ const navigate = useNavigate();
52
+
53
+ // Start evaluation when component mounts
54
+ useEffect(() => {
55
+ // Set start time
56
+ startTimeRef.current = Date.now();
57
+
58
+ // Start timer
59
+ timerIntervalRef.current = setInterval(() => {
60
+ const timeElapsed = Math.floor(
61
+ (Date.now() - startTimeRef.current) / 1000
62
+ );
63
+ setElapsedTime(timeElapsed);
64
+ }, 1000);
65
+
66
+ startEvaluation();
67
+
68
+ // Clean up intervals on unmount
69
+ return () => {
70
+ if (pollingIntervalRef.current) {
71
+ clearInterval(pollingIntervalRef.current);
72
+ }
73
+ if (timerIntervalRef.current) {
74
+ clearInterval(timerIntervalRef.current);
75
+ }
76
+ };
77
+ }, []);
78
+
79
+ // Determine current phase and completed steps from logs
80
+ useEffect(() => {
81
+ if (evaluationLogs.length === 0) return;
82
+
83
+ // Check all logs for completed steps
84
+ const newCompletedSteps = [...completedSteps];
85
+ let newActiveStep = activeStep;
86
+
87
+ evaluationLogs.forEach((log) => {
88
+ // Detect completed steps (format: [SUCCESS] Stage completed: step_name)
89
+ const match = log.match(/\[SUCCESS\] Stage completed: (\w+)/);
90
+ if (match && match[1]) {
91
+ const completedStep = match[1].trim();
92
+ if (
93
+ EVALUATION_STEPS.includes(completedStep) &&
94
+ !newCompletedSteps.includes(completedStep)
95
+ ) {
96
+ newCompletedSteps.push(completedStep);
97
+ // Set active step to index of next step
98
+ const stepIndex = EVALUATION_STEPS.indexOf(completedStep);
99
+ if (stepIndex >= 0 && stepIndex + 1 > newActiveStep) {
100
+ newActiveStep = stepIndex + 1;
101
+ if (newActiveStep >= EVALUATION_STEPS.length) {
102
+ newActiveStep = EVALUATION_STEPS.length;
103
+ }
104
+ }
105
+ }
106
+ }
107
+ });
108
+
109
+ // Update state if there are new completed steps
110
+ if (newCompletedSteps.length > completedSteps.length) {
111
+ setCompletedSteps(newCompletedSteps);
112
+ setActiveStep(newActiveStep);
113
+ }
114
+
115
+ // Check recent logs to determine current phase
116
+ const recentLogs = evaluationLogs.slice(-10);
117
+
118
+ // Detect completion conditions
119
+ const isComplete =
120
+ recentLogs.some((log) =>
121
+ log.includes("[SUCCESS] Evaluation completed")
122
+ ) ||
123
+ completedSteps.includes("results_compilation") ||
124
+ newCompletedSteps.includes("results_compilation");
125
+
126
+ if (isComplete) {
127
+ setCurrentPhase("complete");
128
+ setEvaluationComplete(true);
129
+ // Stop polling when evaluation is complete
130
+ if (pollingIntervalRef.current) {
131
+ clearInterval(pollingIntervalRef.current);
132
+ }
133
+ if (timerIntervalRef.current) {
134
+ clearInterval(timerIntervalRef.current);
135
+ }
136
+ // Notify parent component that evaluation is complete
137
+ if (onComplete) {
138
+ onComplete({
139
+ success: true,
140
+ sessionId,
141
+ logs: evaluationLogs,
142
+ });
143
+ }
144
+ } else if (recentLogs.some((log) => log.includes("Comparing models"))) {
145
+ setCurrentPhase("compiling_results");
146
+ } else if (recentLogs.some((log) => log.includes("Starting evaluations"))) {
147
+ setCurrentPhase("evaluating");
148
+ } else if (recentLogs.some((log) => log.includes("Initialization"))) {
149
+ setCurrentPhase("preparing");
150
+ }
151
+ }, [evaluationLogs, completedSteps, activeStep, sessionId, onComplete]);
152
+
153
+ // Format elapsed time as HH:MM:SS
154
+ const formatElapsedTime = () => {
155
+ const hours = Math.floor(elapsedTime / 3600);
156
+ const minutes = Math.floor((elapsedTime % 3600) / 60);
157
+ const seconds = elapsedTime % 60;
158
+
159
+ return [
160
+ hours.toString().padStart(2, "0"),
161
+ minutes.toString().padStart(2, "0"),
162
+ seconds.toString().padStart(2, "0"),
163
+ ].join(":");
164
+ };
165
+
166
+ // Start benchmark evaluation
167
+ const startEvaluation = async () => {
168
+ if (!sessionId) {
169
+ setError("Missing session ID");
170
+ return;
171
+ }
172
+
173
+ setEvaluating(true);
174
+ setEvaluationLogs([]);
175
+ setError(null);
176
+ setCurrentPhase("initializing");
177
+ setCompletedSteps([]);
178
+ setActiveStep(0);
179
+
180
+ try {
181
+ // Call API to start evaluation
182
+ const response = await fetch("http://localhost:3001/evaluate-benchmark", {
183
+ method: "POST",
184
+ headers: {
185
+ "Content-Type": "application/json",
186
+ },
187
+ body: JSON.stringify({
188
+ session_id: sessionId,
189
+ }),
190
+ });
191
+
192
+ const result = await response.json();
193
+
194
+ if (response.ok) {
195
+ setEvaluationLogs(result.logs || []);
196
+
197
+ // Set up polling to retrieve more logs
198
+ pollingIntervalRef.current = setInterval(async () => {
199
+ // Check if we're already done
200
+ if (evaluationComplete) {
201
+ clearInterval(pollingIntervalRef.current);
202
+ return;
203
+ }
204
+
205
+ try {
206
+ // Call API to get latest logs
207
+ const logsResponse = await fetch(
208
+ `http://localhost:3001/evaluation-logs/${sessionId}`
209
+ );
210
+
211
+ if (logsResponse.ok) {
212
+ const logsResult = await logsResponse.json();
213
+
214
+ // Update logs if there are new ones
215
+ if (
216
+ logsResult.logs &&
217
+ logsResult.logs.length > evaluationLogs.length
218
+ ) {
219
+ setEvaluationLogs(logsResult.logs);
220
+ }
221
+
222
+ // Check if evaluation is complete
223
+ if (logsResult.is_completed) {
224
+ setEvaluationComplete(true);
225
+ clearInterval(pollingIntervalRef.current);
226
+ }
227
+ }
228
+ } catch (error) {
229
+ console.log("Error polling logs:", error);
230
+ // Don't stop polling on network errors
231
+ }
232
+ }, 2000); // Poll every 2 seconds
233
+ } else {
234
+ // Handle error
235
+ setEvaluationLogs([`Error: ${result.error || "Unknown error"}`]);
236
+ setError(result.error || "Benchmark evaluation failed");
237
+ }
238
+ } catch (error) {
239
+ console.error("Error starting evaluation:", error);
240
+ setEvaluationLogs([`Error: ${error.message || "Unknown error"}`]);
241
+ setError("Error connecting to server");
242
+ } finally {
243
+ setEvaluating(false);
244
+ }
245
+ };
246
+
247
+ // Get title based on current phase
248
+ const getPhaseTitle = () => {
249
+ switch (currentPhase) {
250
+ case "initializing":
251
+ return "Preparing evaluation...";
252
+ case "preparing":
253
+ return "Preparing models...";
254
+ case "evaluating":
255
+ return "Evaluating models...";
256
+ case "compiling_results":
257
+ return "Compiling results...";
258
+ case "complete":
259
+ return "Evaluation completed successfully!";
260
+ default:
261
+ return "Processing...";
262
+ }
263
+ };
264
+
265
+ // Get current step info for display
266
+ const getCurrentStepInfo = () => {
267
+ const totalSteps = EVALUATION_STEPS.length;
268
+ const currentStepIndex = activeStep;
269
+
270
+ // If no active step yet
271
+ if (currentStepIndex === 0 && completedSteps.length === 0) {
272
+ return `Starting... (0%)`;
273
+ }
274
+
275
+ // If all steps completed
276
+ if (currentStepIndex >= totalSteps) {
277
+ return `Completed (100%)`;
278
+ }
279
+
280
+ // Calculate percentage
281
+ const percentage = Math.round((currentStepIndex / totalSteps) * 100);
282
+
283
+ // Get current step name
284
+ const currentStepName =
285
+ STEP_LABELS[EVALUATION_STEPS[currentStepIndex]] || "Processing";
286
+
287
+ return `${currentStepName} (${percentage}%)`;
288
+ };
289
+
290
+ // Function to navigate to results page
291
+ const viewResults = () => {
292
+ navigate(`/evaluation-display?session=${sessionId}`);
293
+ };
294
+
295
+ return (
296
+ <Paper
297
+ elevation={3}
298
+ sx={{
299
+ p: 4,
300
+ mt: 3,
301
+ mb: 3,
302
+ display: "flex",
303
+ flexDirection: "column",
304
+ alignItems: "center",
305
+ justifyContent: "center",
306
+ minHeight: 200,
307
+ }}
308
+ >
309
+ {error ? (
310
+ <Alert severity="error" sx={{ width: "100%" }}>
311
+ {error}
312
+ </Alert>
313
+ ) : (
314
+ <>
315
+ {evaluationComplete ? (
316
+ <>
317
+ <Alert severity="success" sx={{ width: "100%", mb: 3 }}>
318
+ Evaluation completed successfully!
319
+ </Alert>
320
+ <Button
321
+ variant="contained"
322
+ color="primary"
323
+ onClick={viewResults}
324
+ sx={{ mb: 3 }}
325
+ >
326
+ View Results Leaderboard
327
+ </Button>
328
+ </>
329
+ ) : (
330
+ <>
331
+ <CircularProgress size={60} sx={{ mb: 2 }} />
332
+ <Typography variant="h6" component="div" gutterBottom>
333
+ {getPhaseTitle()}
334
+ </Typography>
335
+
336
+ {/* Step progress indicator */}
337
+ <Typography variant="body1" color="text.secondary">
338
+ {getCurrentStepInfo()}
339
+ </Typography>
340
+
341
+ {/* Timer display */}
342
+ <Box
343
+ sx={{
344
+ display: "flex",
345
+ alignItems: "center",
346
+ mt: 1,
347
+ color: "text.secondary",
348
+ opacity: 0.5,
349
+ }}
350
+ >
351
+ <Typography variant="body2">{formatElapsedTime()}</Typography>
352
+ </Box>
353
+ </>
354
+ )}
355
+ </>
356
+ )}
357
+
358
+ {/* Use the LogDisplay component for logs */}
359
+ <LogDisplay logs={evaluationLogs} height={300} />
360
+ </Paper>
361
+ );
362
+ };
363
+
364
+ export default BenchmarkEvaluation;
frontend/src/components/BenchmarkGenerator.jsx ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useEffect, useRef } from "react";
2
+ import { Box, Typography, CircularProgress, Alert, Paper } from "@mui/material";
3
+ import PlayArrowIcon from "@mui/icons-material/PlayArrow";
4
+ import AccessTimeIcon from "@mui/icons-material/AccessTime";
5
+ import LogDisplay from "./LogDisplay";
6
+
7
+ // Define all benchmark steps in sequence
8
+ const BENCHMARK_STEPS = [
9
+ "ingestion",
10
+ "upload_ingest_to_hub",
11
+ "summarization",
12
+ "chunking",
13
+ "single_shot_question_generation",
14
+ "multi_hop_question_generation",
15
+ "lighteval",
16
+ ];
17
+
18
+ // Step labels for display (more user-friendly names)
19
+ const STEP_LABELS = {
20
+ ingestion: "Ingestion",
21
+ upload_ingest_to_hub: "Upload to Hub",
22
+ summarization: "Summarization",
23
+ chunking: "Chunking",
24
+ single_shot_question_generation: "Single-shot QG",
25
+ multi_hop_question_generation: "Multi-hop QG",
26
+ lighteval: "LightEval",
27
+ };
28
+
29
+ /**
30
+ * Component to handle benchmark generation and display logs
31
+ *
32
+ * @param {Object} props - Component props
33
+ * @param {string} props.sessionId - The session ID for the uploaded file
34
+ * @param {Function} props.onComplete - Function to call when generation is complete
35
+ * @returns {JSX.Element} Benchmark generator component
36
+ */
37
+ const BenchmarkGenerator = ({ sessionId, onComplete }) => {
38
+ const [generating, setGenerating] = useState(false);
39
+ const [generationComplete, setGenerationComplete] = useState(false);
40
+ const [generationLogs, setGenerationLogs] = useState([]);
41
+ const [error, setError] = useState(null);
42
+ const [currentPhase, setCurrentPhase] = useState("initializing");
43
+ const [completedSteps, setCompletedSteps] = useState([]);
44
+ const [activeStep, setActiveStep] = useState(0);
45
+ const [elapsedTime, setElapsedTime] = useState(0);
46
+
47
+ // Reference to keep track of the polling interval
48
+ const pollingIntervalRef = useRef(null);
49
+
50
+ // Reference to keep track of the timer interval
51
+ const timerIntervalRef = useRef(null);
52
+
53
+ // Reference for starting time
54
+ const startTimeRef = useRef(null);
55
+
56
+ // Start generation on component mount
57
+ useEffect(() => {
58
+ // Set start time
59
+ startTimeRef.current = Date.now();
60
+
61
+ // Start timer
62
+ timerIntervalRef.current = setInterval(() => {
63
+ const timeElapsed = Math.floor(
64
+ (Date.now() - startTimeRef.current) / 1000
65
+ );
66
+ setElapsedTime(timeElapsed);
67
+ }, 1000);
68
+
69
+ generateBenchmark();
70
+
71
+ // Clean up the polling interval and timer when the component unmounts
72
+ return () => {
73
+ if (pollingIntervalRef.current) {
74
+ clearInterval(pollingIntervalRef.current);
75
+ }
76
+ if (timerIntervalRef.current) {
77
+ clearInterval(timerIntervalRef.current);
78
+ }
79
+ };
80
+ }, []);
81
+
82
+ // Determine the current phase and completed steps based on logs
83
+ useEffect(() => {
84
+ if (generationLogs.length === 0) return;
85
+
86
+ // Check all logs for completed stages
87
+ const newCompletedSteps = [...completedSteps];
88
+ let newActiveStep = activeStep;
89
+
90
+ generationLogs.forEach((log) => {
91
+ const match = log.match(/\[SUCCESS\] Stage completed: (\w+)/);
92
+ if (match && match[1]) {
93
+ const completedStep = match[1].trim();
94
+ if (
95
+ BENCHMARK_STEPS.includes(completedStep) &&
96
+ !newCompletedSteps.includes(completedStep)
97
+ ) {
98
+ newCompletedSteps.push(completedStep);
99
+ // Set active step to the index of the next step
100
+ const stepIndex = BENCHMARK_STEPS.indexOf(completedStep);
101
+ if (stepIndex >= 0 && stepIndex + 1 > newActiveStep) {
102
+ newActiveStep = stepIndex + 1;
103
+ if (newActiveStep >= BENCHMARK_STEPS.length) {
104
+ newActiveStep = BENCHMARK_STEPS.length;
105
+ }
106
+ }
107
+ }
108
+ }
109
+ });
110
+
111
+ // Update state if there are new completed steps
112
+ if (newCompletedSteps.length > completedSteps.length) {
113
+ setCompletedSteps(newCompletedSteps);
114
+ setActiveStep(newActiveStep);
115
+ }
116
+
117
+ // Check the latest logs to determine the current phase
118
+ const recentLogs = generationLogs.slice(-10); // Check more logs
119
+
120
+ // Detect completion conditions
121
+ const isComplete =
122
+ recentLogs.some((log) =>
123
+ log.includes("[SUCCESS] Ingestion process completed successfully")
124
+ ) ||
125
+ recentLogs.some((log) =>
126
+ log.includes(
127
+ "[SUCCESS] Configuration and ingestion completed successfully"
128
+ )
129
+ ) ||
130
+ completedSteps.includes("lighteval") ||
131
+ newCompletedSteps.includes("lighteval");
132
+
133
+ if (isComplete) {
134
+ setCurrentPhase("complete");
135
+ setGenerationComplete(true);
136
+ // Stop polling when benchmark is complete
137
+ if (pollingIntervalRef.current) {
138
+ clearInterval(pollingIntervalRef.current);
139
+ }
140
+ // Notify parent component that generation is complete
141
+ if (onComplete) {
142
+ console.log("Notifying parent that generation is complete");
143
+ onComplete({
144
+ success: true,
145
+ sessionId,
146
+ logs: generationLogs,
147
+ });
148
+ }
149
+ } else if (
150
+ recentLogs.some((log) => log.includes("starting benchmark creation"))
151
+ ) {
152
+ setCurrentPhase("benchmarking");
153
+ } else if (
154
+ recentLogs.some((log) => log.includes("Generating base configuration"))
155
+ ) {
156
+ setCurrentPhase("configuring");
157
+ }
158
+ }, [generationLogs, completedSteps, activeStep, sessionId, onComplete]);
159
+
160
+ const generateBenchmark = async () => {
161
+ if (!sessionId) {
162
+ setError("Missing session ID");
163
+ return;
164
+ }
165
+
166
+ setGenerating(true);
167
+ setGenerationLogs([]);
168
+ setError(null);
169
+ setCurrentPhase("initializing");
170
+ setCompletedSteps([]);
171
+ setActiveStep(0);
172
+
173
+ try {
174
+ // Call the API to generate the benchmark
175
+ const response = await fetch("http://localhost:3001/generate-benchmark", {
176
+ method: "POST",
177
+ headers: {
178
+ "Content-Type": "application/json",
179
+ },
180
+ body: JSON.stringify({
181
+ session_id: sessionId,
182
+ }),
183
+ });
184
+
185
+ const result = await response.json();
186
+
187
+ if (response.ok) {
188
+ setGenerationLogs(result.logs || []);
189
+
190
+ // D'abord, on commence par interroger les logs de configuration
191
+ const pollConfigLogs = async () => {
192
+ try {
193
+ // Call the API to get the config logs
194
+ const configLogsResponse = await fetch(
195
+ `http://localhost:3001/config-logs/${sessionId}`
196
+ );
197
+
198
+ if (configLogsResponse.ok) {
199
+ const configLogsResult = await configLogsResponse.json();
200
+
201
+ // Update logs if there are new ones
202
+ if (
203
+ configLogsResult.logs &&
204
+ configLogsResult.logs.length > generationLogs.length
205
+ ) {
206
+ setGenerationLogs(configLogsResult.logs);
207
+ }
208
+
209
+ // If config task is completed, switch to polling benchmark logs
210
+ if (configLogsResult.is_completed) {
211
+ // Attendre un court instant pour permettre au serveur de démarrer le benchmark
212
+ setTimeout(() => {
213
+ console.log(
214
+ "Configuration completed, switching to benchmark polling"
215
+ );
216
+ clearInterval(configPollingIntervalRef.current);
217
+ pollBenchmarkLogs();
218
+ }, 1000);
219
+ }
220
+ }
221
+ } catch (error) {
222
+ console.log("Error polling for config logs:", error);
223
+ // Don't stop polling on network errors
224
+ }
225
+ };
226
+
227
+ // Fonction pour interroger les logs du benchmark
228
+ const pollBenchmarkLogs = async () => {
229
+ // Set up polling for benchmark logs
230
+ pollingIntervalRef.current = setInterval(async () => {
231
+ // Check if we already completed
232
+ if (generationComplete) {
233
+ clearInterval(pollingIntervalRef.current);
234
+ return;
235
+ }
236
+
237
+ try {
238
+ // Call the API to get the latest benchmark logs
239
+ const logsResponse = await fetch(
240
+ `http://localhost:3001/benchmark-logs/${sessionId}`
241
+ );
242
+
243
+ if (logsResponse.ok) {
244
+ const logsResult = await logsResponse.json();
245
+
246
+ // Update logs if there are new ones
247
+ if (
248
+ logsResult.logs &&
249
+ logsResult.logs.length > generationLogs.length
250
+ ) {
251
+ setGenerationLogs(logsResult.logs);
252
+ }
253
+
254
+ // Check if the task is completed
255
+ if (logsResult.is_completed) {
256
+ setGenerationComplete(true);
257
+ clearInterval(pollingIntervalRef.current);
258
+ // Notification is now handled in the useEffect above
259
+ }
260
+ }
261
+ } catch (error) {
262
+ console.log("Error polling for benchmark logs:", error);
263
+ // Don't stop polling on network errors
264
+ }
265
+ }, 3000); // Poll every 3 seconds
266
+ };
267
+
268
+ // Démarrer le polling des logs de configuration
269
+ const configPollingIntervalRef = { current: null };
270
+ configPollingIntervalRef.current = setInterval(pollConfigLogs, 1000); // Poll config logs more frequently (every second)
271
+ } else {
272
+ // Handle error
273
+ setGenerationLogs([`Error: ${result.error || "Unknown error"}`]);
274
+ setError(result.error || "Benchmark generation failed");
275
+ }
276
+ } catch (error) {
277
+ console.error("Error generating benchmark:", error);
278
+ setGenerationLogs([`Error: ${error.message || "Unknown error"}`]);
279
+ setError("Server connection error");
280
+ } finally {
281
+ setGenerating(false);
282
+ }
283
+ };
284
+
285
+ // Get title based on current phase
286
+ const getPhaseTitle = () => {
287
+ switch (currentPhase) {
288
+ case "initializing":
289
+ return "Benchmark generation...";
290
+ case "configuring":
291
+ return "Generating configuration file...";
292
+ case "benchmarking":
293
+ return "Creating benchmark...";
294
+ case "complete":
295
+ return "Benchmark generated successfully!";
296
+ default:
297
+ return "Processing...";
298
+ }
299
+ };
300
+
301
+ // Get the current step information for display
302
+ const getCurrentStepInfo = () => {
303
+ const totalSteps = BENCHMARK_STEPS.length;
304
+ const currentStepIndex = activeStep;
305
+
306
+ // If there's no active step yet
307
+ if (currentStepIndex === 0 && completedSteps.length === 0) {
308
+ return `Starting... (0%)`;
309
+ }
310
+
311
+ // If all steps are completed
312
+ if (currentStepIndex >= totalSteps) {
313
+ return `Complete (100%)`;
314
+ }
315
+
316
+ // Calculate percentage
317
+ const percentage = Math.round((currentStepIndex / totalSteps) * 100);
318
+
319
+ // Get current step name
320
+ const currentStepName =
321
+ STEP_LABELS[BENCHMARK_STEPS[currentStepIndex]] || "Processing";
322
+
323
+ return `${currentStepName} (${percentage}%)`;
324
+ };
325
+
326
+ // Format elapsed time in HH:MM:SS
327
+ const formatElapsedTime = () => {
328
+ const hours = Math.floor(elapsedTime / 3600);
329
+ const minutes = Math.floor((elapsedTime % 3600) / 60);
330
+ const seconds = elapsedTime % 60;
331
+
332
+ return [
333
+ hours.toString().padStart(2, "0"),
334
+ minutes.toString().padStart(2, "0"),
335
+ seconds.toString().padStart(2, "0"),
336
+ ].join(":");
337
+ };
338
+
339
+ // If complete, stop the timer
340
+ useEffect(() => {
341
+ if (generationComplete && timerIntervalRef.current) {
342
+ clearInterval(timerIntervalRef.current);
343
+ }
344
+ }, [generationComplete]);
345
+
346
+ return (
347
+ <Paper
348
+ elevation={3}
349
+ sx={{
350
+ p: 4,
351
+ mt: 3,
352
+ mb: 3,
353
+ display: "flex",
354
+ flexDirection: "column",
355
+ alignItems: "center",
356
+ justifyContent: "center",
357
+ minHeight: 200,
358
+ }}
359
+ >
360
+ {error ? (
361
+ <Alert severity="error" sx={{ width: "100%" }}>
362
+ {error}
363
+ </Alert>
364
+ ) : (
365
+ <>
366
+ <CircularProgress size={60} sx={{ mb: 2 }} />
367
+ <Typography variant="h6" component="div" gutterBottom>
368
+ {getPhaseTitle()}
369
+ </Typography>
370
+
371
+ {/* Step progress indicator */}
372
+ <Typography variant="body1" color="text.secondary">
373
+ {getCurrentStepInfo()}
374
+ </Typography>
375
+
376
+ {/* Timer display */}
377
+ <Box
378
+ sx={{
379
+ display: "flex",
380
+ alignItems: "center",
381
+ mt: 1,
382
+ color: "text.secondary",
383
+ }}
384
+ >
385
+ <Typography variant="body2" sx={{ opacity: 0.5 }}>
386
+ {formatElapsedTime()}
387
+ </Typography>
388
+ </Box>
389
+ </>
390
+ )}
391
+
392
+ {/* Use the LogDisplay component */}
393
+ <LogDisplay logs={generationLogs} height={300} />
394
+ </Paper>
395
+ );
396
+ };
397
+
398
+ export default BenchmarkGenerator;
frontend/src/components/EvaluationDisplay.jsx ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useEffect } from "react";
2
+ import {
3
+ Box,
4
+ Paper,
5
+ Typography,
6
+ Table,
7
+ TableBody,
8
+ TableCell,
9
+ TableContainer,
10
+ TableHead,
11
+ TableRow,
12
+ Alert,
13
+ LinearProgress,
14
+ Card,
15
+ CardContent,
16
+ Link,
17
+ } from "@mui/material";
18
+ import OpenInNewIcon from "@mui/icons-material/OpenInNew";
19
+
20
+ const EvaluationDisplay = ({ sessionId }) => {
21
+ const [results, setResults] = useState(null);
22
+ const [loading, setLoading] = useState(true);
23
+ const [error, setError] = useState(null);
24
+
25
+ useEffect(() => {
26
+ const fetchEvaluationResults = async () => {
27
+ if (!sessionId) {
28
+ setError("No session ID provided");
29
+ setLoading(false);
30
+ return;
31
+ }
32
+
33
+ try {
34
+ // Fetch evaluation results from the API
35
+ const response = await fetch(
36
+ `http://localhost:3001/evaluation-results/${sessionId}`
37
+ );
38
+
39
+ if (!response.ok) {
40
+ throw new Error(`Failed to fetch results: ${response.status}`);
41
+ }
42
+
43
+ const data = await response.json();
44
+
45
+ if (!data.success) {
46
+ throw new Error(data.message || "Failed to fetch evaluation results");
47
+ }
48
+
49
+ setResults(data.results);
50
+ } catch (err) {
51
+ console.error("Error fetching evaluation results:", err);
52
+ setError(err.message);
53
+ } finally {
54
+ setLoading(false);
55
+ }
56
+ };
57
+
58
+ fetchEvaluationResults();
59
+ }, [sessionId]);
60
+
61
+ // Format accuracy as percentage
62
+ const formatAccuracy = (value) => {
63
+ return `${(value * 100).toFixed(2)}%`;
64
+ };
65
+
66
+ // Format evaluation time
67
+ const formatTime = (seconds) => {
68
+ return `${seconds.toFixed(2)}s`;
69
+ };
70
+
71
+ if (loading) {
72
+ return (
73
+ <Box sx={{ width: "100%", mt: 4, mb: 4 }}>
74
+ <Typography variant="h5" gutterBottom>
75
+ Loading Evaluation Results...
76
+ </Typography>
77
+ <LinearProgress />
78
+ </Box>
79
+ );
80
+ }
81
+
82
+ if (error) {
83
+ return (
84
+ <Alert severity="error" sx={{ mt: 4, mb: 4 }}>
85
+ {error}
86
+ </Alert>
87
+ );
88
+ }
89
+
90
+ if (
91
+ !results ||
92
+ !results.models_comparison ||
93
+ results.models_comparison.length === 0
94
+ ) {
95
+ return (
96
+ <Alert severity="info" sx={{ mt: 4, mb: 4 }}>
97
+ No evaluation results found for this benchmark.
98
+ </Alert>
99
+ );
100
+ }
101
+
102
+ return (
103
+ <Box sx={{ mt: 4, mb: 6 }}>
104
+ <Typography variant="h4" gutterBottom>
105
+ Evaluation Results
106
+ </Typography>
107
+
108
+ <TableContainer
109
+ component={Paper}
110
+ sx={{
111
+ border: "1px solid rgba(224, 224, 224, 1)",
112
+ boxShadow: "0 2px 4px rgba(0,0,0,0.05)",
113
+ }}
114
+ >
115
+ <Table sx={{ minWidth: 650 }}>
116
+ <TableHead>
117
+ <TableRow>
118
+ <TableCell>Rank</TableCell>
119
+ <TableCell>Model</TableCell>
120
+ <TableCell>Provider</TableCell>
121
+ <TableCell align="center">Accuracy</TableCell>
122
+ <TableCell align="center">Std Error</TableCell>
123
+ <TableCell align="center">Eval Time</TableCell>
124
+ <TableCell align="center">Status</TableCell>
125
+ </TableRow>
126
+ </TableHead>
127
+ <TableBody>
128
+ {results.models_comparison.map((model, index) => (
129
+ <TableRow
130
+ key={`${model.model_name}-${model.provider}`}
131
+ sx={{
132
+ "&:last-child td, &:last-child th": { border: 0 },
133
+ backgroundColor: model.success
134
+ ? "inherit"
135
+ : "rgba(0, 0, 0, 0.04)",
136
+ }}
137
+ >
138
+ <TableCell>{index + 1}</TableCell>
139
+ <TableCell component="th" scope="row">
140
+ <Link
141
+ href={`https://huggingface.co/${model.model_name}`}
142
+ target="_blank"
143
+ rel="noopener noreferrer"
144
+ sx={{
145
+ textDecoration: "none",
146
+ "&:hover": {
147
+ textDecoration: "underline",
148
+ },
149
+ display: "flex",
150
+ alignItems: "center",
151
+ }}
152
+ >
153
+ {model.model_name}
154
+ <OpenInNewIcon sx={{ ml: 0.5, fontSize: 16 }} />
155
+ </Link>
156
+ </TableCell>
157
+ <TableCell>{model.provider}</TableCell>
158
+ <TableCell align="center">
159
+ {model.success ? formatAccuracy(model.accuracy) : "-"}
160
+ </TableCell>
161
+ <TableCell align="center">
162
+ {model.success ? formatAccuracy(model.accuracy_stderr) : "-"}
163
+ </TableCell>
164
+ <TableCell align="center">
165
+ {model.success ? formatTime(model.evaluation_time) : "-"}
166
+ </TableCell>
167
+ <TableCell align="center">
168
+ {model.success ? (
169
+ <span style={{ color: "green" }}>✓ Success</span>
170
+ ) : (
171
+ <span style={{ color: "red" }}>✗ Failed</span>
172
+ )}
173
+ </TableCell>
174
+ </TableRow>
175
+ ))}
176
+ </TableBody>
177
+ </Table>
178
+ </TableContainer>
179
+
180
+ <Box sx={{ mt: 4, textAlign: "center" }}>
181
+ <Typography variant="body2" color="textSecondary">
182
+ Need larger evaluation?{" "}
183
+ <Link
184
+ href="https://huggingface.co/spaces/yourbench/yourbench"
185
+ target="_blank"
186
+ rel="noopener noreferrer"
187
+ >
188
+ Go to this page
189
+ </Link>
190
+ </Typography>
191
+ </Box>
192
+ </Box>
193
+ );
194
+ };
195
+
196
+ export default EvaluationDisplay;
frontend/src/components/Footer/Footer.js ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from "react";
2
+ import { Box, Typography, Link } from "@mui/material";
3
+
4
+ const Footer = () => {
5
+ return (
6
+ <Box
7
+ component="footer"
8
+ sx={{
9
+ width: "100%",
10
+ py: 4,
11
+ textAlign: "center",
12
+ }}
13
+ >
14
+ <Typography variant="body2" color="text.secondary" sx={{ mx: 4 }}>
15
+ © 2024 Hugging Face - Open LLM Leaderboard - Made with 🤗 by the HF team
16
+ -{" "}
17
+ <Link
18
+ href="https://huggingface.co"
19
+ target="_blank"
20
+ rel="noopener noreferrer"
21
+ color="inherit"
22
+ >
23
+ huggingface.co
24
+ </Link>
25
+ </Typography>
26
+ </Box>
27
+ );
28
+ };
29
+
30
+ export default Footer;
frontend/src/components/LogDisplay.jsx ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useRef, useEffect } from "react";
2
+ import { Box, Typography } from "@mui/material";
3
+
4
+ /**
5
+ * A reusable component for displaying logs with auto-scrolling and styling
6
+ *
7
+ * @param {Object} props - Component props
8
+ * @param {Array<string>} props.logs - Array of log messages to display
9
+ * @param {number} props.height - Height of the log container in pixels (default: 300)
10
+ * @param {Object} props.containerStyle - Additional styles for the container
11
+ * @returns {JSX.Element} Log display component
12
+ */
13
+ const LogDisplay = ({ logs = [], height = 300, containerStyle = {} }) => {
14
+ const logsEndRef = useRef(null);
15
+
16
+ // Auto-scroll logs to bottom when new logs are added
17
+ useEffect(() => {
18
+ if (logsEndRef.current) {
19
+ logsEndRef.current.scrollIntoView({ behavior: "smooth" });
20
+ }
21
+ }, [logs]);
22
+
23
+ return (
24
+ <Box
25
+ sx={{
26
+ mt: 3,
27
+ width: "100%",
28
+ height: `${height}px`,
29
+ overflowY: "auto",
30
+ backgroundColor: "#f9f9f9",
31
+ p: 2,
32
+ borderRadius: 1,
33
+ fontFamily: "monospace",
34
+ fontSize: "0.85rem",
35
+ border: "1px solid #e0e0e0",
36
+ ...containerStyle,
37
+ }}
38
+ >
39
+ {logs.length === 0 ? (
40
+ <Typography color="text.secondary" variant="body2">
41
+ Waiting for logs...
42
+ </Typography>
43
+ ) : (
44
+ logs.map((log, index) => {
45
+ // Style logs based on content
46
+ let style = { opacity: 0.7 };
47
+ if (log.includes("[ERROR]")) {
48
+ style = { ...style, color: "#d32f2f" }; // Red for errors
49
+ } else if (log.includes("[WARN]")) {
50
+ style = { ...style, color: "#ed6c02" }; // Orange for warnings
51
+ } else if (log.includes("[SUCCESS]")) {
52
+ style = { ...style, color: "#2e7d32", opacity: 0.8 }; // Green for success
53
+ }
54
+
55
+ return (
56
+ <div key={index} style={{ ...style, marginBottom: "4px" }}>
57
+ {log}
58
+ </div>
59
+ );
60
+ })
61
+ )}
62
+ <div ref={logsEndRef} />
63
+ </Box>
64
+ );
65
+ };
66
+
67
+ export default LogDisplay;
frontend/src/components/Logo/HFLogo.js ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+
3
+ const HFLogo = () => (
4
+ <svg width="100%" viewBox="0 0 236 220" version="1.1" xmlns="http://www.w3.org/2000/svg" >
5
+ <title>hg-logo</title>
6
+ <g id="hg-logo" stroke="none" strokeWidth="1" fill="none">
7
+ <g id="Group" transform="translate(-1.000000, 0.000000)">
8
+ <path d="M236.188357,161.726225 C235.570415,159.393906 234.569281,157.181253 233.22638,155.176863 C233.514062,154.120588 233.732701,153.048205 233.879994,151.965466 C234.832798,145.089325 232.449638,138.794251 227.956041,133.922501 C225.522249,131.262254 222.913547,129.506398 220.150646,128.428262 C221.964195,120.669591 222.882477,112.729122 222.88708,104.761037 C222.88708,101.122758 222.681099,97.5581193 222.335881,94.046409 C222.155216,92.2928551 221.937728,90.5427531 221.683417,88.7984042 C220.891716,83.6516545 219.717972,78.5709507 218.171392,73.5986359 C217.1576,70.3316637 215.985007,67.1160095 214.658216,63.9632945 C212.668606,59.2945148 210.345284,54.7746261 207.706662,50.4388375 C205.974815,47.5549087 204.089921,44.7659066 202.058884,42.0841428 C201.063504,40.7298561 200.026697,39.4075568 198.947313,38.1190859 C195.750588,34.2338824 192.277687,30.5855928 188.552777,27.2030978 C187.316894,26.0660493 186.045339,24.9682371 184.739261,23.9111571 C183.453897,22.8390039 182.139764,21.8011393 180.799165,20.798714 C178.100706,18.7906417 175.311338,16.9066068 172.44142,15.1525926 C156.583223,5.52185376 137.986291,0 118.109749,0 C60.2385495,0 13.3336831,46.9018135 13.3336831,104.76564 C13.3321871,112.833829 14.2670394,120.874403 16.1195981,128.726274 C13.6340233,129.805561 11.2508635,131.486626 9.04261448,133.920199 C4.55016831,138.788498 2.16585774,145.06171 3.11981211,151.9367 C3.26365324,153.029795 3.48229176,154.111383 3.77227548,155.174561 C2.4301802,157.180102 1.42939115,159.393906 0.810298929,161.726225 C-0.570575919,166.97423 -0.116037948,171.706754 1.63882384,175.85246 C-0.267934182,181.273058 0.208467641,187.044598 2.69289164,192.062477 C4.49953623,195.727221 7.08522438,198.561213 10.2715931,201.096041 C14.0609438,204.107229 18.8042489,206.667372 24.5268244,209.121657 C31.3529491,212.032741 39.6842274,214.767779 43.4735781,215.771124 C53.2616793,218.305953 62.6470253,219.912227 72.1599872,219.989319 C85.7109724,220.115888 97.3816663,216.928654 105.738261,208.774168 C109.842911,209.276992 113.974028,209.528979 118.109749,209.527828 C122.479067,209.518623 126.843782,209.242473 131.179729,208.70398 C139.51561,216.910244 151.231182,220.126243 164.8328,219.996223 C174.343575,219.921432 183.728921,218.315158 193.491706,215.776877 C197.306373,214.773532 205.63535,212.038494 212.464927,209.128561 C218.187502,206.668523 222.929657,204.109531 226.745474,201.101795 C229.907678,198.568116 232.491064,195.732974 234.29886,192.068231 C236.8086,187.050351 237.260836,181.278811 235.378244,175.858213 C237.116995,171.712507 237.568081,166.969627 236.188357,161.726225 Z M226.477354,175.501519 C228.400223,179.150153 228.523351,183.272846 226.826025,187.112485 C224.252995,192.932351 217.861846,197.515294 205.448932,202.436521 C197.730992,205.498336 190.662064,207.4544 190.599924,207.47281 C180.390656,210.1204 171.157207,211.464332 163.164243,211.464332 C149.928557,211.464332 140.080618,207.813396 133.834461,200.601272 C123.271919,202.399701 112.486136,202.460684 101.904031,200.781921 C95.6509699,207.874379 85.857115,211.464332 72.7330503,211.464332 C64.7390507,211.464332 55.5067517,210.1204 45.2974836,207.47281 C45.2341935,207.4544 38.1698679,205.498336 30.4484761,202.436521 C18.0355619,197.515294 11.6432621,192.934652 9.07138271,187.112485 C7.37405737,183.272846 7.49718538,179.150153 9.4200536,175.501519 C9.59841661,175.163235 9.7882869,174.831854 9.99196594,174.513131 C8.83939573,172.78259 8.06645104,170.826526 7.72364885,168.77611 C7.38096175,166.725695 7.47635718,164.624652 8.00350616,162.613358 C8.76759024,159.711479 10.3463905,157.297466 12.489048,155.563473 C11.4573043,153.86745 10.7801003,151.980424 10.4982867,150.015155 C9.88149595,145.74173 11.2991941,141.474059 14.4913165,137.995716 C16.9757405,135.288294 20.4889162,133.798233 24.3795311,133.798233 L24.4830967,133.798233 C21.5502336,124.39877 20.0630314,114.608094 20.0723523,104.762188 C20.0723523,51.0601755 63.612487,7.52279222 117.324951,7.52279222 C171.038681,7.52279222 214.577665,51.0567236 214.577665,104.762188 C214.58457,114.634558 213.087471,124.450548 210.137002,133.871873 C210.606499,133.825848 211.066791,133.801685 211.517877,133.801685 C215.407341,133.801685 218.921668,135.290595 221.406092,137.998018 C224.595912,141.474059 226.017063,145.745182 225.399121,150.017456 C225.117193,151.982725 224.440564,153.870902 223.40836,155.566925 C225.551018,157.299767 227.129818,159.71378 227.893902,162.61681 C228.419785,164.628104 228.515296,166.727996 228.172378,168.779562 C227.829461,170.829977 227.057322,172.784891 225.905442,174.516583 C226.109121,174.831854 226.301293,175.163235 226.477354,175.501519 Z" id="Shape" fill="#FFFFFF" fillRule="nonzero"></path>
9
+ <path d="M226.52977,174.037682 C227.682419,172.305523 228.455074,170.350082 228.798221,168.299114 C229.141367,166.246994 229.045793,164.146536 228.519558,162.134699 C227.754964,159.232038 226.175108,156.817373 224.031019,155.082913 C225.062761,153.386432 225.740993,151.498897 226.02311,149.533098 C226.640313,145.258521 225.221668,140.989698 222.027412,137.510418 C219.541328,134.802265 216.024653,133.311802 212.13259,133.311802 C211.680051,133.311802 211.220603,133.334821 210.750792,133.382009 C213.699779,123.958143 215.195575,114.139506 215.186363,104.265624 C215.186363,50.5501622 171.617132,7 117.873265,7 C64.1293973,7 20.5555606,50.5455585 20.5555606,104.265624 C20.5462334,114.114185 22.0344295,123.907502 24.9692525,133.3095 L24.8656177,133.3095 C20.9735543,133.3095 17.4580309,134.799963 14.9719466,137.508116 C11.7799941,140.985094 10.3590456,145.256219 10.9762485,149.530796 C11.2580201,151.496595 11.9356766,153.384131 12.9683401,155.080611 C10.8242508,156.815072 9.24439546,159.229736 8.48095227,162.133548 C7.95379648,164.145385 7.85868274,166.246994 8.20205945,168.299114 C8.54543616,170.350082 9.31935798,172.306674 10.4730439,174.037682 C10.2669257,174.356491 10.07808,174.687961 9.90074934,175.026336 C7.97774764,178.675955 7.85338585,182.79976 9.55184544,186.640434 C12.1254435,192.460719 18.522015,197.0472 30.9432242,201.968603 C38.6663215,205.031245 45.7399738,206.987836 45.8021547,207.006251 C56.0182452,209.654556 65.2567139,211 73.2550191,211 C86.3890056,211 96.1882538,207.409079 102.446646,200.313557 C113.035821,201.992773 123.828812,201.931773 134.398413,200.13286 C140.647592,207.346928 150.503264,211 163.747794,211 C171.7461,211 180.98572,209.654556 191.20181,207.006251 C191.263991,206.987836 198.334189,205.031245 206.060741,201.968603 C218.48195,197.0472 224.878521,192.460719 227.45212,186.640434 C229.150579,182.79976 229.027369,178.675955 227.103216,175.026336 C226.927036,174.684508 226.733585,174.354189 226.52977,174.037682 Z M97.9684697,189.207022 C97.4295686,190.149639 96.8526681,191.069237 96.2377682,191.963514 C94.6199135,194.33099 92.4919451,196.139111 90.0231334,197.484555 C85.30084,200.056898 79.3257167,200.95693 73.2538676,200.95693 C63.6641921,200.95693 53.832702,198.713755 48.3216324,197.284293 C48.0510304,197.214085 14.5435894,187.75454 18.7857081,179.70259 C19.4996369,178.349089 20.6753163,177.808149 22.1538398,177.808149 C28.1266601,177.808149 39.0026741,186.697981 43.677756,186.697981 C44.7210132,186.697981 45.4578568,186.252568 45.7583978,185.169537 C47.7504894,178.027978 15.4820603,175.026336 18.1995956,164.686325 C18.6797703,162.856336 19.9798115,162.113982 21.8095419,162.113982 C29.7053639,162.112831 47.4292214,175.993123 51.1358936,175.993123 C51.4203136,175.993123 51.6241287,175.910255 51.7346726,175.735313 L51.7830355,175.655898 C53.5217975,172.784312 52.5246002,170.696514 40.6042927,163.399578 L39.4597036,162.703262 C26.3441411,154.767556 17.1367629,149.993472 22.3737759,144.296338 C22.9760094,143.638002 23.8292694,143.346815 24.8667692,143.346815 C26.0977205,143.346815 27.5866075,143.758851 29.2263407,144.448261 C36.1537528,147.368187 45.7549433,155.331515 49.7656109,158.80504 C50.9481994,159.833977 51.6448557,160.462389 51.6448557,160.462389 C51.6448557,160.462389 56.722962,165.740582 59.7940072,165.740582 C60.501027,165.740582 61.099806,165.463207 61.5062848,164.773796 C63.684919,161.104611 41.282525,144.137509 40.0193317,137.137514 C39.1637686,132.393355 40.6204136,129.991351 43.3160705,129.991351 C44.5965363,129.991351 46.1602706,130.535744 47.8863662,131.630284 C53.240832,135.027848 63.5789812,152.784493 67.3639552,159.691261 C68.6329061,162.006945 70.7988738,162.985241 72.750663,162.985241 C76.6231508,162.985241 79.6504392,159.137661 73.1053244,154.248484 C63.2680768,146.890548 66.7202678,134.865566 71.4149253,134.125514 C71.6152859,134.094439 71.8179496,134.078326 72.0194617,134.077175 C76.2892164,134.077175 78.1730672,141.431658 78.1730672,141.431658 C78.1730672,141.431658 83.6921972,155.286628 93.1747834,164.756532 C101.779928,173.352875 102.980941,180.408114 97.9684697,189.207022 Z M128.631711,190.829842 L128.140021,190.88854 L127.300579,190.985218 C126.859555,191.030105 126.418531,191.07384 125.975205,191.115274 L125.542241,191.154406 L125.148429,191.187783 L124.58765,191.23267 C124.381531,191.247632 124.175413,191.263745 123.969295,191.276405 L123.352092,191.317839 L123.216215,191.325896 L122.730283,191.354669 L122.524165,191.365027 L121.948416,191.393801 L121.279396,191.421423 L120.671405,191.44214 L120.266077,191.453649 L120.061111,191.453649 C119.934446,191.453649 119.808933,191.460555 119.682268,191.461706 L119.480756,191.461706 C119.354091,191.461706 119.228578,191.461706 119.101913,191.468612 L118.587193,191.474366 L117.866356,191.474366 C117.30097,191.474366 116.737888,191.468612 116.174805,191.458253 L115.718812,191.447895 C115.589844,191.447895 115.459725,191.440989 115.330757,191.437536 L114.848279,191.422574 L114.247197,191.399555 L113.707145,191.376537 L113.566662,191.370782 L113.051942,191.34316 C112.909157,191.335103 112.766371,191.328197 112.624737,191.31899 L112.291954,191.299424 C111.87396,191.272952 111.455966,191.243028 111.037972,191.210802 L110.600403,191.176274 L110.047684,191.129085 L109.401694,191.069237 C109.041275,191.03586 108.680856,190.99903 108.320437,190.958747 L108.303164,190.958747 C113.56551,179.224952 110.904399,168.266887 100.270314,157.646048 C93.2968422,150.685185 88.6563052,140.403871 87.6948043,138.146885 C85.7464697,131.4657 80.58891,124.038709 72.0252192,124.038709 C71.300927,124.03986 70.5789377,124.097406 69.8638574,124.210198 C66.1111254,124.799478 62.831659,126.958634 60.491815,130.206576 C57.9642769,127.063369 55.5058286,124.564687 53.2834374,123.152488 C49.9325781,121.030161 46.5897794,119.952885 43.33104,119.952885 C39.2662519,119.952885 35.6309727,121.621743 33.097677,124.648705 L33.0343446,124.725818 L32.8915589,124.12618 L32.8858014,124.100859 C32.4040146,122.040683 31.9992631,119.962092 31.6721225,117.871992 C31.6721225,117.85703 31.6721225,117.843219 31.6652135,117.829408 L31.5938206,117.356373 C31.5552454,117.100865 31.5175914,116.844206 31.4809738,116.588698 C31.4510349,116.375775 31.4210959,116.162852 31.39346,115.95108 C31.365824,115.739307 31.3358851,115.526384 31.3105521,115.313461 C31.2829161,115.099387 31.2575832,114.886464 31.2322502,114.67354 C31.2079536,114.470976 31.1845782,114.268411 31.1620089,114.065846 L31.155215,114.014054 C31.0513499,113.079494 30.9623391,112.143782 30.8879523,111.20692 L30.8476499,110.664829 C30.8361349,110.516359 30.8257714,110.366737 30.8165594,110.215964 C30.8165594,110.181436 30.8119535,110.145757 30.8096505,110.113531 L30.7717662,109.512742 C30.7579482,109.295215 30.7474696,109.077688 30.7359546,108.860161 C30.7244396,108.643785 30.7129246,108.426258 30.7037126,108.20758 L30.6806827,107.637867 L30.6737737,107.465226 L30.6565012,106.938098 L30.6439499,106.491534 C30.6439499,106.313139 30.6357743,106.133593 30.6323198,105.955198 L30.6231078,105.39239 C30.6208048,105.204788 30.6231078,105.018336 30.6185018,104.830733 C30.6127443,104.64198 30.6185018,104.454377 30.6185018,104.265624 C30.6185018,56.0965241 69.6899812,17.0441057 117.887083,17.0441057 C166.084184,17.0441057 205.154512,56.0942222 205.154512,104.265624 L205.154512,105.39239 C205.154512,105.579993 205.149906,105.768746 205.1453,105.955198 C205.1453,106.111725 205.139542,106.2648 205.134936,106.421327 C205.134936,106.560591 205.129179,106.698703 205.123421,106.833362 C205.123421,107.009456 205.113058,107.184398 205.1073,107.360491 L205.1073,107.375453 C205.092331,107.757564 205.07621,108.139675 205.060089,108.521786 L205.05318,108.648389 L205.023241,109.219253 L204.995605,109.671571 C204.931121,110.743093 204.847062,111.814615 204.744579,112.883834 L204.744579,112.898797 C204.726155,113.09906 204.705428,113.300473 204.683549,113.500736 C204.632883,113.966865 204.581066,114.432995 204.529248,114.899124 L204.470522,115.367555 L204.397978,115.917702 C204.372645,116.119116 204.345009,116.319379 204.316221,116.519642 C204.285131,116.744075 204.251737,116.967356 204.219495,117.190638 L204.138891,117.717767 L204.044468,118.316254 C204.012226,118.515366 203.979984,118.713327 203.941984,118.912439 C203.902833,119.1104 203.872894,119.308361 203.83835,119.507473 C203.76926,119.903395 203.697867,120.298166 203.620716,120.692937 C203.467567,121.479026 203.307509,122.262814 203.138239,123.045451 C203.095633,123.239959 203.051876,123.435618 203.006968,123.630126 C200.550823,121.244235 197.298992,119.944828 193.710924,119.944828 C190.45564,119.944828 187.109386,121.020954 183.759679,123.144431 C181.536136,124.555479 179.079991,127.055313 176.550149,130.19852 C174.206851,126.949427 170.927385,124.791421 167.179258,124.200991 C166.464178,124.08935 165.741037,124.031803 165.016745,124.030652 C156.450751,124.030652 151.296646,131.457644 149.34716,138.138829 C148.381053,140.395815 143.740516,150.675978 136.758984,157.648349 C126.13296,168.234661 123.451121,179.144386 128.631711,190.829842 Z M218.724916,167.341535 L218.690371,167.443968 C218.66619,167.509571 218.642008,167.575175 218.615524,167.639627 C218.57407,167.737457 218.530313,167.832984 218.484253,167.928512 C218.372558,168.156398 218.245893,168.377377 218.106562,168.58915 C217.914261,168.875733 217.703537,169.149656 217.474389,169.407465 C217.42142,169.467314 217.370754,169.526012 217.310876,169.584709 C217.230272,169.673332 217.143909,169.759652 217.058698,169.845972 C215.507631,171.382472 213.144757,172.727916 210.473281,173.964022 C210.170437,174.100983 209.864139,174.237945 209.553234,174.374906 L209.244633,174.511868 C209.038515,174.602792 208.833548,174.693716 208.617066,174.782338 C208.406342,174.872111 208.194467,174.960733 207.982591,175.048204 L207.340055,175.31407 C205.83735,175.932123 204.297797,176.520251 202.795092,177.102625 L202.153708,177.351227 L201.518081,177.59983 C201.096633,177.764414 200.67979,177.928997 200.268706,178.093581 L199.65726,178.339882 L199.05733,178.586182 L198.761395,178.709332 C198.56564,178.7922 198.372188,178.872765 198.18104,178.955633 C193.76159,180.850074 190.583456,182.777892 191.251325,185.170688 C191.269749,185.238594 191.290476,185.303046 191.313506,185.365197 C191.373383,185.542441 191.45514,185.710477 191.556472,185.867005 C191.61635,185.961382 191.685439,186.050004 191.76259,186.130569 C192.547911,186.945432 193.97692,186.816527 195.779015,186.169701 C195.925255,186.116758 196.07495,186.060362 196.225796,186.000513 C196.329431,185.961382 196.431914,185.919948 196.535549,185.878514 L196.691001,185.812911 C197.103238,185.637968 197.539655,185.44346 197.986437,185.230537 C198.098132,185.178745 198.210979,185.128104 198.323826,185.068255 C200.526641,183.99213 203.02424,182.540799 205.502264,181.220675 C205.882259,181.014658 206.263404,180.817847 206.642247,180.622188 C207.261753,180.301077 207.887017,179.991475 208.518038,179.695685 C210.86479,178.593088 213.069909,177.810451 214.844368,177.810451 C215.675749,177.810451 216.411556,177.98079 217.023002,178.372108 L217.125485,178.440013 C217.435238,178.658691 217.704689,178.926859 217.924625,179.23531 C217.975291,179.307819 218.02826,179.38263 218.073168,179.459743 C218.121531,179.539157 218.167591,179.620874 218.211348,179.703741 C219.087638,181.365693 218.354134,183.088645 216.638402,184.777068 C214.990608,186.397586 212.427373,187.987029 209.512932,189.459077 C209.297602,189.568416 209.079969,189.677755 208.861184,189.783641 C200.189252,194.039803 188.835482,197.245161 188.676575,197.285443 C185.650438,198.069231 181.323109,199.099319 176.448818,199.869295 L175.726828,199.980936 L175.609375,199.9982 C174.503937,200.163935 173.395045,200.310104 172.283849,200.436707 L172.181366,200.447065 C170.160487,200.677253 168.08779,200.844138 166.011639,200.914346 L165.980549,200.914346 C165.234378,200.941968 164.489359,200.954628 163.743188,200.954628 L162.884171,200.954628 C161.746491,200.938515 160.609962,200.887874 159.475737,200.800403 C159.449252,200.800403 159.421616,200.800403 159.39398,200.794648 C158.988653,200.763573 158.582174,200.725592 158.177998,200.680705 C157.482494,200.605895 156.78814,200.51382 156.09609,200.405632 C155.826639,200.360745 155.538765,200.313557 155.261254,200.265217 C155.134589,200.242199 155.009076,200.21918 154.883563,200.193859 L154.853624,200.188105 C154.454054,200.112143 154.056787,200.028125 153.660672,199.937201 C153.431524,199.885408 153.201224,199.833616 152.974379,199.772617 L152.838502,199.736938 C152.725655,199.709315 152.615111,199.679391 152.504568,199.649467 L152.443538,199.633353 L152.087725,199.53092 C151.958758,199.49409 151.830941,199.456109 151.701974,199.418128 L151.655914,199.404317 L151.320828,199.301884 C151.194163,199.262752 151.06865,199.221318 150.943136,199.181036 C150.839501,199.146507 150.737018,199.110828 150.633383,199.077451 L150.406538,198.998037 C150.187754,198.922075 149.971272,198.841509 149.75479,198.758642 L149.549824,198.679227 L149.380554,198.611322 C149.05468,198.480115 148.729957,198.343154 148.408689,198.199287 L148.194511,198.096854 L148.158814,198.08074 C148.082815,198.045061 148.007968,198.009382 147.931969,197.977156 C147.783426,197.905798 147.636034,197.832138 147.489794,197.757327 L147.446037,197.73546 L147.234161,197.623819 C146.857621,197.42816 146.48799,197.218689 146.12642,196.995408 L145.928362,196.874559 C145.825879,196.812409 145.732608,196.749107 145.63473,196.685806 L145.376795,196.514316 C145.283524,196.452166 145.190252,196.388864 145.099284,196.323261 L144.933468,196.205865 C144.75844,196.080413 144.586867,195.952659 144.417597,195.820302 C144.32778,195.751245 144.239115,195.679887 144.150449,195.614284 C143.953543,195.456606 143.761243,195.294324 143.571246,195.127438 L143.563185,195.120532 C143.377794,194.960552 143.197009,194.794817 143.02083,194.62563 L143.011618,194.61412 C142.919498,194.526649 142.829681,194.439178 142.739864,194.348254 C142.651199,194.25733 142.561382,194.167557 142.47502,194.076633 C142.387506,193.984558 142.299992,193.891332 142.214781,193.795805 C142.131873,193.706032 142.051269,193.615108 141.970664,193.523033 L141.943028,193.491958 C141.527337,193.009715 141.138131,192.504454 140.777712,191.978476 L140.634926,191.767855 C140.540503,191.628592 140.447232,191.488178 140.355112,191.347763 L140.083358,190.929973 C139.949785,190.726258 139.819665,190.52024 139.693001,190.313072 C139.602032,190.165752 139.512215,190.018432 139.42355,189.869961 C139.348703,189.743358 139.273855,189.617906 139.202462,189.493605 C139.16216,189.4257 139.124161,189.356644 139.086161,189.287587 C139.048162,189.216229 139.003253,189.14372 138.962951,189.071211 L138.898467,188.957269 C138.876589,188.919288 138.86968,188.907778 138.858165,188.882458 C138.779863,188.745496 138.703864,188.606233 138.630168,188.464668 C138.596775,188.403668 138.563381,188.341518 138.527685,188.280518 L138.42405,188.082557 L138.321566,187.885747 L138.123509,187.486372 C138.091267,187.420769 138.060176,187.355165 138.029086,187.289562 C137.961148,187.147997 137.896664,187.008734 137.83218,186.86947 C137.806847,186.813074 137.780362,186.756679 137.757332,186.700283 C137.668667,186.508076 137.584608,186.313568 137.505154,186.116758 C137.451034,185.986702 137.40152,185.857797 137.348551,185.728892 C137.200007,185.349083 137.060676,184.965822 136.93286,184.577956 C136.89486,184.462863 136.856861,184.34892 136.823468,184.233826 C136.74056,183.972564 136.665712,183.708999 136.598925,183.443133 C136.517169,183.133531 136.444625,182.820477 136.383595,182.506271 C136.344444,182.32097 136.309899,182.134518 136.281112,181.946916 C136.250022,181.761615 136.223537,181.576314 136.201659,181.392165 C136.154447,181.025016 136.121054,180.656716 136.101478,180.287266 C136.101478,180.226266 136.095721,180.164116 136.094569,180.103116 C136.088812,179.981117 136.085357,179.859118 136.084206,179.737118 C136.019722,174.820319 138.510412,170.091121 143.833788,164.772645 C153.315222,155.303892 158.835504,141.447771 158.835504,141.447771 C158.835504,141.447771 158.984047,140.866548 159.2938,140.030968 C159.397435,139.753592 159.505676,139.477368 159.619674,139.204596 C159.749793,138.896145 159.889124,138.591147 160.038819,138.291903 L160.082576,138.204432 C160.274876,137.822321 160.483297,137.450569 160.710143,137.088024 C160.76196,137.004006 160.812626,136.919987 160.872504,136.83712 C161.040622,136.586216 161.219105,136.343368 161.406799,136.107426 C161.551888,135.926729 161.706188,135.751787 161.86855,135.586052 C162.418966,135.019792 163.046532,134.557115 163.757006,134.309664 L163.846823,134.280891 C163.906701,134.261325 163.965427,134.24291 164.026457,134.225646 C164.09785,134.207231 164.169243,134.189967 164.240636,134.175004 L164.272877,134.16925 C164.423724,134.139325 164.57457,134.118608 164.727719,134.10825 L164.741537,134.10825 C164.820991,134.10825 164.900444,134.100194 164.9822,134.100194 C165.186015,134.101344 165.388679,134.117458 165.590191,134.148533 C166.444603,134.283192 167.258712,134.791906 167.958823,135.576845 L168.053245,135.687335 C168.281242,135.954352 168.488512,136.239784 168.672751,136.539027 C168.810931,136.761158 168.94105,136.994798 169.064261,137.239948 C169.113775,137.343532 169.160987,137.437909 169.208198,137.539191 C169.231228,137.590983 169.254258,137.641624 169.276136,137.694567 C169.322196,137.797001 169.364802,137.900585 169.407407,138.008773 C169.64807,138.625675 169.834613,139.262143 169.965884,139.911271 C170.105215,140.612191 170.18582,141.32347 170.20885,142.038201 C170.21691,142.352407 170.21691,142.670065 170.20885,142.992327 C170.203092,143.114326 170.197335,143.237477 170.188123,143.360627 C170.0937,144.72909 169.795462,146.075685 169.302621,147.356677 C169.253106,147.48328 169.198986,147.612185 169.14832,147.74109 C168.900748,148.332671 168.612873,148.905838 168.288151,149.458287 C168.10276,149.773644 167.902399,150.086698 167.688221,150.397451 L167.55695,150.583903 C167.049139,151.292879 166.482602,151.958121 165.863096,152.572721 C165.252802,153.183868 164.598751,153.748977 163.906701,154.265748 C163.212348,154.777914 162.561751,155.346477 161.960669,155.96453 C160.229968,157.780707 159.826944,159.381659 160.21615,160.595897 C160.26221,160.732859 160.31633,160.867518 160.378511,160.997574 C160.522448,161.279553 160.708991,161.538514 160.932382,161.764098 L160.993411,161.825097 C161.056743,161.886097 161.12353,161.944795 161.191469,162.00119 L161.260559,162.057586 C161.428677,162.186491 161.606008,162.301585 161.791399,162.401716 C161.844368,162.43049 161.893883,162.459263 161.954912,162.486886 C162.209393,162.613489 162.475389,162.714771 162.749446,162.791884 L162.825444,162.81145 L162.927928,162.839072 L163.017745,162.86094 L163.113319,162.881657 L163.208893,162.901223 L163.299862,162.916185 C163.3678,162.928845 163.43689,162.939203 163.504828,162.948411 L163.571615,162.957618 L163.690219,162.970279 L163.763915,162.977184 L163.885974,162.986392 L163.956215,162.986392 L164.08288,162.992147 L164.48245,162.992147 L164.596448,162.986392 L164.727719,162.978335 L164.887777,162.963373 L165.037472,162.944958 C165.070866,162.944958 165.105411,162.935751 165.139955,162.929996 C165.667342,162.852883 166.18091,162.704412 166.666842,162.488036 C166.770477,162.444301 166.87296,162.395962 166.967383,162.34532 C167.170046,162.244038 167.366953,162.130095 167.555798,162.004643 C167.819491,161.832003 168.068215,161.637495 168.301969,161.425722 L168.46433,161.271497 C168.489663,161.246176 168.516148,161.222007 168.540329,161.195535 C168.699236,161.036706 168.848931,160.868669 168.988262,160.692576 C169.234682,160.381823 169.453467,160.051504 169.643464,159.703922 C171.896946,155.60889 174.258668,151.573708 176.726329,147.604129 L177.064869,147.062038 L177.406864,146.518796 C177.578437,146.244873 177.75001,145.973252 177.921584,145.705084 L178.09546,145.434614 L178.442061,144.894825 C178.907266,144.17549 179.373623,143.464211 179.843434,142.759838 L180.194641,142.235011 C180.899357,141.192263 181.602923,140.177137 182.298428,139.211501 L182.64618,138.735014 C183.402714,137.689964 184.19149,136.669083 185.010205,135.672372 L185.336079,135.283356 C185.389048,135.217753 185.438563,135.154451 185.497289,135.09115 L185.816254,134.721699 C185.869223,134.660699 185.919889,134.600851 185.975161,134.542153 L186.283762,134.193419 L186.439214,134.025383 C186.588909,133.863101 186.740907,133.703121 186.895208,133.544291 C186.998842,133.441858 187.094417,133.338274 187.194597,133.242746 C187.778407,132.64541 188.422094,132.110225 189.116447,131.646397 L189.276506,131.543964 L189.438867,131.44038 C189.542502,131.375927 189.647288,131.313777 189.754377,131.255079 C192.476519,129.711674 194.731152,129.597731 196.027738,130.893685 C196.814212,131.679775 197.248326,132.981483 197.225296,134.791906 C197.225296,134.872472 197.225296,134.951886 197.218387,135.034754 L197.218387,135.124527 C197.218387,135.206243 197.211478,135.290262 197.205721,135.37428 C197.205721,135.476713 197.193054,135.580298 197.182691,135.682731 C197.172327,135.786315 197.167722,135.864579 197.157358,135.955503 C197.157358,135.981974 197.151601,136.008446 197.148146,136.034917 C197.140086,136.115483 197.129722,136.197199 197.119359,136.277765 C197.119359,136.303086 197.119359,136.327255 197.108995,136.352576 C197.09748,136.461915 197.081359,136.571254 197.061784,136.679442 C197.049117,136.779573 197.032996,136.879705 197.013421,136.979836 C197.00536,137.037383 196.993845,137.093779 196.984633,137.151326 C196.954694,137.314759 196.915543,137.477041 196.868332,137.63587 C196.779667,137.932811 196.671426,138.223998 196.54361,138.507128 C196.488338,138.630279 196.429611,138.753429 196.369733,138.874277 C196.245372,139.12403 196.104889,139.379538 195.950588,139.644253 C195.873438,139.774309 195.793984,139.908969 195.708774,140.043628 L195.579806,140.248495 C195.448535,140.454513 195.311507,140.662832 195.168721,140.873453 C195.025935,141.084075 194.872786,141.2993 194.708122,141.516827 C194.495095,141.806863 194.270552,142.102653 194.033344,142.401897 L193.85371,142.628631 C193.3459,143.260495 192.825422,143.882001 192.29343,144.493148 L191.870831,144.970787 L191.6555,145.212483 C191.361868,145.5405 191.067085,145.866215 190.769998,146.189628 L190.542002,146.435928 C190.391156,146.599361 190.2334,146.765096 190.082554,146.92968 C189.928253,147.096566 189.77165,147.262301 189.615046,147.428035 L189.140629,147.927542 L188.660454,148.428199 C188.500396,148.595085 188.339186,148.764272 188.176825,148.931158 L187.199203,149.938227 C182.529879,154.724971 177.609528,159.495602 175.944461,162.469621 C175.833918,162.66413 175.733737,162.863242 175.642769,163.066957 C175.406712,163.607897 175.307683,164.070574 175.37447,164.444628 C175.396348,164.567778 175.440105,164.686325 175.503438,164.793362 C175.597861,164.955644 175.711859,165.105266 175.84313,165.238774 C175.904159,165.298623 175.967491,165.355019 176.036581,165.40566 C176.378576,165.649658 176.793115,165.77511 177.213412,165.76245 L177.344683,165.76245 L177.479408,165.753243 L177.615285,165.73713 L177.72698,165.721016 C177.74195,165.719865 177.756919,165.716413 177.771889,165.71296 L177.874372,165.693394 L177.902008,165.686488 L178.014855,165.662319 L178.055157,165.65196 L178.176065,165.620885 L178.320002,165.5783 C178.414425,165.549527 178.508848,165.517301 178.605573,165.481622 C178.813994,165.407962 179.01781,165.322792 179.217019,165.227265 C179.267685,165.203095 179.319502,165.180076 179.369017,165.153605 L179.522166,165.077643 C179.722526,164.976361 179.921736,164.867022 180.122096,164.748475 C180.282154,164.654099 180.441061,164.55742 180.599968,164.454987 L180.754269,164.352553 C180.804935,164.320327 180.856752,164.28695 180.907418,164.248969 L181.060567,164.146536 L181.141172,164.09014 L181.36226,163.935914 C181.568378,163.793198 181.760678,163.647029 181.956433,163.498559 L181.973705,163.485898 L182.282307,163.246503 C182.704906,162.912732 183.106779,162.576659 183.469501,162.260151 L183.711316,162.047228 L183.734346,162.026511 L183.859859,161.91487 C184.156945,161.649004 184.42179,161.400402 184.642877,161.194384 L184.732694,161.106913 L184.950327,160.900895 L185.080447,160.774292 L185.125355,160.728255 L185.138022,160.715595 C185.248565,160.605105 185.248565,160.605105 185.359109,160.493464 L185.368321,160.484256 L185.409775,160.447426 C185.422442,160.433615 185.439714,160.419804 185.45929,160.40254 L185.475411,160.387578 L185.519167,160.348446 L185.752922,160.142428 L185.884192,160.023882 C185.948676,159.968637 186.018918,159.905335 186.090311,159.83743 L186.248066,159.699318 C186.275702,159.676299 186.303338,159.649828 186.332125,159.624507 L186.496789,159.48064 L186.739755,159.268868 L186.867572,159.157227 C187.368473,158.721022 187.978767,158.195044 188.677727,157.602312 L188.96445,157.360615 L189.438867,156.963542 L189.922496,156.558413 C190.55582,156.033586 191.237507,155.475382 191.958344,154.89531 L192.43161,154.515501 C192.834634,154.195541 193.249174,153.867524 193.668319,153.538356 C193.836438,153.405998 194.008011,153.273641 194.183039,153.142434 C194.612547,152.810964 195.044359,152.480646 195.477322,152.154931 C196.465308,151.409124 197.484383,150.662166 198.514974,149.933624 L198.958301,149.626324 C199.266902,149.41225 199.575504,149.199326 199.885257,148.991007 L200.165071,148.801102 C200.718941,148.428199 201.277418,148.063352 201.8405,147.706562 L202.120315,147.530469 L202.397826,147.357828 C202.675337,147.186339 202.952848,147.016 203.226904,146.851416 L203.502112,146.687983 L204.05368,146.366872 C204.234465,146.262137 204.414099,146.159704 204.593732,146.058421 L204.701973,145.997422 L205.13033,145.763782 C205.307661,145.667103 205.486143,145.573877 205.662322,145.482953 L205.927167,145.347143 L206.1828,145.217087 C206.767761,144.923598 207.363085,144.651977 207.968773,144.403375 C208.051681,144.368847 208.133437,144.33547 208.215194,144.299791 C208.377555,144.231885 208.538764,144.169735 208.698823,144.11449 C208.858881,144.060396 209.016636,144.004 209.172088,143.948755 C209.313723,143.901567 209.450751,143.857831 209.584325,143.816397 L209.629233,143.802586 C209.700626,143.780718 209.772019,143.760002 209.843412,143.741587 L209.864139,143.735832 C210.009227,143.694398 210.154316,143.656417 210.300556,143.621889 L210.312071,143.621889 C210.527402,143.570097 210.736974,143.526362 210.941941,143.491833 C211.012182,143.480324 211.081272,143.468815 211.148059,143.459607 C211.347268,143.429683 211.547629,143.408966 211.747989,143.395155 C211.874654,143.385947 212.002471,143.382494 212.129135,143.382494 L212.225861,143.382494 C212.354829,143.382494 212.479191,143.390551 212.602401,143.403211 C212.658824,143.403211 212.715248,143.41357 212.77052,143.421626 L212.79355,143.421626 C212.848822,143.427381 212.905245,143.436588 212.959365,143.448098 C213.013486,143.459607 213.068758,143.467664 213.121727,143.480324 L213.140151,143.480324 C213.19312,143.491833 213.242634,143.506796 213.297906,143.521758 C213.737778,143.640304 214.141954,143.863586 214.47704,144.172037 L214.506979,144.201961 L214.565705,144.259508 L214.62328,144.320508 C215.085031,144.806203 215.467328,145.362105 215.755203,145.967497 L215.800111,146.069931 C215.897988,146.300118 215.975139,146.538362 216.029259,146.78236 C216.178954,147.462564 216.138651,148.17154 215.911806,148.829876 C215.813929,149.116459 215.694173,149.394986 215.552539,149.662003 L215.458116,149.832341 C215.098849,150.434281 214.675097,150.994787 214.19262,151.503501 L214.088985,151.61284 C213.86214,151.853385 213.626083,152.087025 213.383117,152.311458 C213.305966,152.383967 213.226513,152.456476 213.145908,152.528985 L213.023849,152.639475 C212.77052,152.863907 212.511433,153.082585 212.248891,153.296659 C212.202831,153.333489 212.15562,153.370319 212.110711,153.409451 C211.927623,153.556771 211.738777,153.706393 211.543023,153.856014 C210.868245,154.371634 210.180801,154.868838 209.48069,155.348779 C209.068454,155.630758 208.642399,155.915039 208.202527,156.203924 C207.38266,156.741412 206.557036,157.26739 205.723352,157.783009 C205.482689,157.93148 205.240874,158.082252 204.994454,158.231874 C202.729457,159.624507 200.21804,161.113819 197.539655,162.733186 L196.846454,163.154429 C196.087616,163.617105 195.371385,164.059064 194.697758,164.479156 L194.358066,164.69323 L193.71553,165.105266 C193.461049,165.266397 193.213477,165.425226 192.972814,165.581753 C192.790878,165.697998 192.613547,165.813091 192.439671,165.927034 L192.096524,166.152618 C191.929557,166.261956 191.763741,166.371295 191.597926,166.482936 L191.436716,166.58537 C191.266294,166.699312 191.100478,166.810953 190.939269,166.921443 L190.675576,167.100989 L190.365823,167.316214 L190.080251,167.515326 C189.628864,167.831834 189.183234,168.155247 188.741059,168.484414 L188.586758,168.60181 C188.412882,168.733017 188.244763,168.864223 188.082402,168.991977 L187.870526,169.161165 C187.513562,169.447748 187.189991,169.722822 186.897511,169.985235 L186.754725,170.116442 C186.672969,170.191253 186.592364,170.266064 186.516365,170.339724 C186.462244,170.390365 186.41273,170.442157 186.35861,170.492798 L186.283762,170.566458 C186.119098,170.731042 185.95904,170.90023 185.803588,171.074021 L185.727589,171.161492 C185.558318,171.357151 185.410927,171.544754 185.280807,171.727753 L185.223232,171.81062 C185.073538,172.029298 184.941115,172.260636 184.828269,172.501182 L184.789118,172.5852 L184.764936,172.643898 L184.745361,172.694539 L184.718876,172.764746 C184.710816,172.788916 184.694695,172.831501 184.685483,172.868331 C184.638272,173.005292 184.603727,173.146857 184.580697,173.290724 L184.569182,173.367837 L184.55997,173.438044 L184.554212,173.505949 L184.554212,173.867343 C184.554212,173.897268 184.554212,173.928343 184.561121,173.959418 C184.562273,173.977833 184.564576,173.996248 184.566879,174.014663 C184.566879,174.044587 184.574939,174.074512 184.578394,174.105587 L184.600272,174.23219 L184.600272,174.237945 C184.608333,174.278228 184.616393,174.31851 184.626757,174.358793 C184.63712,174.399076 184.647483,174.443962 184.66015,174.486547 C184.696998,174.616603 184.741906,174.745508 184.793724,174.87096 C184.793724,174.880167 184.801784,174.890526 184.805239,174.899733 C184.819057,174.930808 184.831723,174.963035 184.846693,174.99411 C184.903116,175.125317 184.966448,175.253071 185.035538,175.378523 L185.108083,175.508578 L185.184082,175.639785 L185.263535,175.770992 C185.277353,175.79286 185.292322,175.812426 185.309595,175.831992 L185.336079,175.859614 L185.363715,175.884935 L185.394806,175.909104 C185.415533,175.924066 185.437411,175.936727 185.460441,175.949387 C185.483471,175.959745 185.508804,175.971255 185.534137,175.980462 C185.561773,175.98967 185.589409,175.997726 185.617045,176.004632 C186.281459,176.154254 187.647136,175.604106 189.460745,174.646527 L189.786619,174.473887 L190.338187,174.173492 L190.608789,174.023871 C190.799937,173.920286 190.996844,173.805193 191.196053,173.69125 L191.561078,173.485232 C193.959648,172.10526 196.825727,170.331667 199.78738,168.600659 C200.063739,168.438377 200.342402,168.277246 200.621064,168.116115 L201.180692,167.759325 C201.460506,167.598193 201.739169,167.439364 202.018983,167.282837 C203.209632,166.606086 204.416402,165.955807 205.635838,165.333151 L206.1828,165.055775 C206.546673,164.876229 206.907092,164.701287 207.262905,164.530948 C207.952652,164.204082 208.649308,163.894481 209.352874,163.600992 L209.738625,163.444465 L209.785837,163.42605 C211.828594,162.616941 213.681355,162.116284 215.180605,162.116284 C215.505328,162.113982 215.828898,162.140454 216.149015,162.195699 L216.158227,162.195699 C216.261862,162.214114 216.355133,162.23483 216.449556,162.259 L216.466828,162.259 C216.605008,162.292377 216.740885,162.334962 216.872156,162.387905 C216.976942,162.425886 217.078274,162.470772 217.176151,162.523715 C217.60566,162.741242 217.972988,163.062354 218.247044,163.458276 C218.270074,163.491653 218.291953,163.526181 218.31268,163.56186 C218.414012,163.716085 218.501525,163.878367 218.572918,164.048706 C218.615524,164.146536 218.654675,164.247818 218.691523,164.357157 C218.728371,164.467647 218.760612,164.573533 218.794006,164.687476 C219.038123,165.559885 219.013942,166.482936 218.724916,167.341535 Z" id="Shape" fill="#FF9D00" fillRule="nonzero"></path>
10
+ <path d="M205.169417,104.827718 L205.169417,104.263487 C205.169417,56.0689579 166.09852,17 117.882512,17 C69.6688088,17 30.5830519,56.0712609 30.5830519,104.263487 L30.5805699,104.452331 C30.5805699,104.577844 30.5805699,104.702205 30.5805699,104.827718 C30.5874292,105.015411 30.5893875,105.203104 30.5876596,105.390797 L30.5945712,105.805334 L30.596875,105.955028 L30.6026346,106.159993 C30.6049385,106.270536 30.6083942,106.379928 30.6083942,106.490471 L30.6199134,106.938402 L30.6383442,107.464633 L30.6441038,107.637357 L30.6671422,108.182012 L30.6671422,108.207345 L30.6993961,108.84412 L30.6993961,108.860241 C30.7109153,109.077873 30.7224345,109.295505 30.7362576,109.511985 C30.7477768,109.713496 30.7604479,109.913856 30.774271,110.114215 L30.7777268,110.149912 C30.7878637,110.321484 30.7993829,110.494208 30.8122844,110.66578 L30.8157402,110.710688 C30.8272594,110.876503 30.8399306,111.043469 30.8537536,111.208132 L30.8560575,111.240374 L30.8790959,111.528247 C30.9480961,112.358472 31.0287306,113.188698 31.1209996,114.01662 L31.1267592,114.068437 L31.1958745,114.676425 L31.2753572,115.316654 C31.3286912,115.742706 31.3855961,116.167607 31.4458417,116.592507 C31.4817816,116.84929 31.5191039,117.104921 31.5575781,117.360552 L31.5633377,117.391642 C31.6493863,117.969691 31.7422312,118.54774 31.8421029,119.123486 C32.1285859,120.7943 32.4650624,122.457054 32.8511867,124.108293 L32.8569463,124.134777 L32.8938079,124.290228 C32.9283655,124.437619 32.9629232,124.58501 32.9997847,124.733552 L33.0631404,124.656403 C35.5973693,121.62798 39.2328357,119.958317 43.300273,119.958317 C46.5602128,119.958317 49.9053949,121.034962 53.2563365,123.159464 C55.4795464,124.571192 57.9377483,127.072232 60.4673695,130.216955 C62.8080754,126.966295 66.0886346,124.807248 69.8415971,124.217684 C70.5580927,124.104838 71.2814999,124.047264 72.004907,124.046112 C80.5717524,124.046112 85.7312119,131.477839 87.6802643,138.161096 C88.6421193,140.420322 93.2843658,150.704294 100.283445,157.661606 C110.921447,168.286419 113.583539,179.250921 108.319254,190.990378 L108.336533,190.990378 C108.695933,191.029529 109.056484,191.066377 109.418188,191.09977 C109.633597,191.120497 109.849007,191.141224 110.064416,191.159647 L110.140443,191.166556 C110.298257,191.181526 110.457222,191.195344 110.616187,191.206859 L111.053918,191.242555 C111.470913,191.274797 111.889061,191.303584 112.309513,191.33122 L112.641266,191.350795 L112.905057,191.365764 L113.069781,191.374976 L113.583539,191.402612 L113.724073,191.40837 L114.265477,191.431399 L114.865628,191.454429 L115.349435,191.468247 L115.43007,191.471702 C115.531439,191.475156 115.635112,191.479762 115.737633,191.479762 L115.847066,191.482065 C116.5267,191.498186 117.205182,191.506246 117.884816,191.506246 L118.605919,191.506246 L119.121981,191.500489 C119.248692,191.49358 119.374252,191.49358 119.500963,191.49358 L119.701397,191.49358 L119.875338,191.490126 C119.943301,191.487823 120.012416,191.48552 120.08038,191.48552 L120.286574,191.48552 L120.690899,191.474005 L121.299114,191.453278 L121.969532,191.424491 C122.161903,191.41643 122.353122,191.407218 122.545493,191.396855 L122.751687,191.38534 L123.056947,191.368067 L123.237799,191.357704 L123.372574,191.349644 L123.991156,191.30819 C124.19735,191.295523 124.403544,191.279403 124.609738,191.263282 L125.169572,191.219525 L125.564681,191.186132 L125.996652,191.146981 C126.440142,191.105527 126.88248,191.061771 127.322515,191.015711 C127.603584,190.984621 127.882349,190.95353 128.162266,190.920137 L128.654137,190.861411 C123.471639,179.170317 126.154466,168.255328 136.749846,157.671969 C143.732798,150.697385 148.375045,140.41111 149.342659,138.153035 C151.292864,131.468627 156.447716,124.038052 165.016865,124.038052 C165.741424,124.039203 166.464831,124.095626 167.180175,124.209624 C170.929681,124.799188 174.211508,126.959386 176.555669,130.208895 C179.086443,127.064171 181.543493,124.563132 183.766702,123.151403 C187.117644,121.026901 190.46513,119.950256 193.721614,119.950256 C197.311004,119.950256 200.565184,121.251442 203.022234,123.637333 C203.157009,123.036254 203.287176,122.435175 203.411584,121.831794 C203.48761,121.455256 203.562485,121.077567 203.636208,120.698726 C203.726058,120.233523 203.811301,119.766018 203.889631,119.298512 C203.910366,119.171848 203.932252,119.045184 203.956443,118.917369 C203.994456,118.71816 204.027862,118.521255 204.058964,118.320896 L204.071635,118.244897 L204.153421,117.723272 L204.234056,117.194737 L204.235208,117.18898 C204.267462,116.967893 204.300867,116.745655 204.330817,116.523418 C204.355008,116.3553 204.378046,116.187182 204.399933,116.017913 L204.413756,115.921187 L204.485175,115.370774 L204.543923,114.902117 C204.579633,114.593518 204.61419,114.284918 204.647596,113.975167 C204.662571,113.851957 204.675242,113.727596 204.687913,113.603235 L204.699433,113.503055 C204.721319,113.296938 204.740902,113.091973 204.759333,112.885856 L204.805409,112.406835 C204.901019,111.34516 204.978198,110.283485 205.038098,109.219506 L205.038098,109.212597 L205.0692,108.648367 L205.074959,108.521703 L205.097998,107.986259 C205.107213,107.784748 205.115276,107.580934 205.122188,107.374817 L205.122188,107.359847 L205.127948,107.211305 C205.133707,107.084641 205.138315,106.957977 205.138315,106.832464 L205.140619,106.780647 C205.145226,106.663195 205.149834,106.541137 205.149834,106.42023 L205.15329,106.316596 C205.156746,106.19569 205.160201,106.075934 205.160201,105.955028 L205.162505,105.852545 C205.165961,105.698245 205.169417,105.545097 205.169417,105.390797 L205.169417,104.827718 Z M96.2309827,192.003691 C103.143668,181.868262 102.654101,174.261508 93.1680216,164.784733 C83.6819423,155.307957 78.1607792,141.448604 78.1607792,141.448604 C78.1607792,141.448604 76.0999904,133.397375 71.4001479,134.137784 C66.7003053,134.877042 63.2527174,146.906674 73.0923216,154.270463 C82.932041,161.634252 71.1352058,166.627119 67.3476856,159.717018 C63.5591287,152.805766 53.219475,135.041705 47.8572772,131.643653 C42.4962313,128.245602 38.7259899,130.150169 39.9896486,137.153541 C40.6174462,140.629894 46.4553879,146.562378 51.9097395,152.105659 C57.4435737,157.730695 62.5822986,162.95271 61.4845168,164.800854 C59.3039281,168.471809 51.6240628,160.488518 51.6240628,160.488518 C51.6240628,160.488518 27.576535,138.613632 22.3410486,144.314667 C17.5156465,149.568923 24.9570639,154.035559 36.4256013,160.919175 C37.4012794,161.505285 38.4057556,162.108666 39.4344221,162.730472 C52.5548159,170.671158 53.575419,172.765721 51.7139127,175.769962 C51.0262152,176.88 46.6339358,174.244236 41.2314207,171.001636 C32.0218028,165.474476 19.877087,158.186686 18.1630268,164.709886 C16.6781991,170.355649 25.6125077,173.81473 33.7151284,176.952544 C40.4653925,179.56643 46.6396954,181.956927 45.7319807,185.20183 C44.7908603,188.565337 39.6889969,185.760303 34.1113896,182.69273 C27.8506925,179.248619 20.9909958,175.475181 18.7470514,179.733396 C14.50337,187.782323 48.0173944,197.257947 48.2927038,197.327036 C59.1207724,200.134373 86.6193439,206.082978 96.2309827,192.003691 Z M140.768903,192.003691 C133.855066,181.868262 134.345784,174.261508 143.831864,164.784733 C153.317943,155.307957 158.840258,141.448604 158.840258,141.448604 C158.840258,141.448604 160.899895,133.397375 165.599737,134.137784 C170.29958,134.877042 173.748435,146.906674 163.907564,154.270463 C154.066692,161.634252 165.86468,166.627119 169.653352,159.717018 C173.440872,152.805766 183.775918,135.041705 189.136964,131.643653 C194.49801,128.245602 198.269403,130.150169 197.005744,137.153541 C196.377947,140.629894 190.540005,146.56353 185.084501,152.10681 C179.551819,157.730695 174.413094,162.95271 175.510876,164.800854 C177.690313,168.471809 185.37709,160.483912 185.37709,160.483912 C185.37709,160.483912 209.423465,138.609026 214.658952,144.310061 C219.484354,149.564317 212.041785,154.032104 200.573247,160.916873 C199.597569,161.50183 198.594245,162.10406 197.565578,162.727017 C184.444033,170.666552 183.424581,172.762266 185.286088,175.765356 C185.973785,176.875394 190.366065,174.23963 195.767428,170.998181 C204.978198,165.471022 217.122913,158.18208 218.836974,164.70528 C220.321801,170.351043 211.387493,173.811275 203.28372,176.94909 C196.533456,179.562976 190.360305,181.952321 191.266868,185.198376 C192.207988,188.561882 197.307548,185.755697 202.882851,182.688124 C209.143548,179.244013 216.004397,175.469423 218.249493,179.72879 C222.493175,187.783474 188.974543,197.248735 188.702689,197.318976 C177.87462,200.134373 150.375934,206.082978 140.768903,192.003691 Z" id="Shape" fill="#FFD21E"></path>
11
+ <path d="M146.614758,80.6109193 C147.976342,81.0911 148.989747,82.5534685 149.953577,83.9434653 C151.256362,85.8239816 152.46922,87.5723908 154.330012,86.5867567 C156.5263,85.424214 158.325988,83.6390445 159.503106,81.456405 C160.680224,79.2743398 161.180586,76.793253 160.941934,74.3272149 C160.764386,72.4833439 160.178709,70.7015059 159.226408,69.1104765 C158.274107,67.5193323 156.979392,66.1585139 155.436803,65.1258957 C153.89306,64.0933923 152.138336,63.414936 150.300602,63.139349 C148.461715,62.8637621 146.584783,62.9980518 144.804694,63.532339 C142.423941,64.2467514 140.30951,65.6481209 138.727721,67.5595388 C137.147086,69.4708418 136.169421,71.806381 135.91924,74.2709257 C135.670212,76.7360448 136.160198,79.2193143 137.326939,81.4070084 C138.189314,83.0233104 140.10314,82.2616841 142.123033,81.456405 C143.707127,80.825737 145.355784,80.1686476 146.614758,80.6109193 Z M87.3830077,80.6120681 C86.0214245,81.0922488 85.0068663,82.5546173 84.043036,83.9446141 C82.7402511,85.8239816 81.5273929,87.5735396 79.666601,86.5879055 C78.0260143,85.7182959 76.5998684,84.4960178 75.4896189,83.0106741 C74.3793695,81.5253304 73.6138392,79.8136815 73.2472148,77.9977254 C72.8805903,76.1821139 72.922095,74.3082604 73.3694229,72.5107992 C73.8179037,70.7133381 74.6595259,69.036956 75.8343381,67.6023875 C77.0114562,66.168623 78.4917888,65.0125133 80.1704214,64.2172284 C81.849054,63.4219434 83.684482,63.0074717 85.5429681,63.0033066 C87.4014542,62.9993155 89.238035,63.4058608 90.9201264,64.1936788 C92.6022177,64.9816117 94.0883149,66.1312884 95.2711974,67.5598834 C96.8529859,69.4711864 97.830651,71.8071852 98.0808318,74.2723042 C98.3298598,76.7374233 97.8398743,79.2208077 96.6719795,81.4081571 C95.8072992,83.0244592 93.8934736,82.2616841 91.8735805,81.456405 C90.2906392,80.825737 88.6419821,80.1686476 87.3830077,80.6120681 Z M137.451453,134.317638 C146.839575,126.947209 150.28792,114.91627 150.28792,107.504485 C150.28792,101.645821 146.331143,103.489577 139.998225,106.613049 L139.641977,106.789958 C133.827867,109.658406 126.089555,113.476876 117.594936,113.476876 C109.099164,113.476876 101.360852,109.657257 95.549048,106.788809 C89.0109124,103.561949 84.9100221,101.537838 84.9100221,107.505634 C84.9100221,115.150616 88.5785722,127.705389 98.649215,135 C99.9773639,132.311907 101.847379,129.925937 104.14397,127.991429 C106.439408,126.056922 109.111846,124.616379 111.992961,123.758257 C112.998296,123.45958 114.033606,125.183866 115.094281,126.948357 C116.116909,128.651965 117.161443,130.393482 118.225576,130.393482 C119.357731,130.393482 120.471439,128.677238 121.55863,127.0012 C122.694244,125.250494 123.801034,123.54344 124.872085,123.884621 C130.263078,125.608906 134.772098,129.348112 137.451453,134.317638 Z" id="Shape" fill="#32343D"></path>
12
+ <path d="M137,134.589063 C132.085163,138.426668 125.552521,141 117.046296,141 C109.053457,141 102.801266,138.72867 98,135.271151 C99.3361959,132.585286 101.217068,130.200275 103.524411,128.267693 C105.831753,126.333962 108.518052,124.892851 111.41295,124.033925 C113.398122,123.447146 115.500342,130.666483 117.67673,130.666483 C120.00725,130.666483 122.252013,123.493078 124.358868,124.160238 C129.775503,125.884979 134.306748,129.622682 137,134.589063 Z" id="Path" fill="#FF323D" fillRule="nonzero"></path>
13
+ <path d="M64.7091655,90.6371141 C63.8188025,91.2251504 62.8188988,91.6348571 61.7668713,91.8425321 C60.7147288,92.0502071 59.6311738,92.0524645 58.5782258,91.8493041 C57.5252778,91.6450151 56.5236482,91.2398231 55.6307538,90.6563014 C54.7378594,90.0716511 53.971305,89.3210865 53.3749298,88.4452396 C52.7785546,87.5705215 52.3642101,86.5874511 52.1555996,85.555848 C51.9469891,84.5231163 51.9481398,83.4599105 52.1590515,82.4271787 C52.3699633,81.3955756 52.786609,80.4147626 53.3848253,79.5400444 C53.9831565,78.6662292 54.7514369,77.9166804 55.645597,77.3345131 C57.4440428,76.1637452 59.642335,75.7399301 61.7587018,76.1560702 C63.8750686,76.5722103 65.7370296,77.7943327 66.9366838,79.5547171 C68.1356477,81.3154401 68.5751911,83.4700685 68.1575098,85.5468187 C67.7398285,87.6246976 66.4994417,89.455398 64.7091655,90.6371141 Z M181.39746,90.6371141 C180.506867,91.2251504 179.506963,91.6348571 178.455281,91.8425321 C177.403599,92.0502071 176.319699,92.0524645 175.266866,91.8493041 C174.214033,91.6450151 173.211828,91.2398231 172.318933,90.6563014 C171.426039,90.0716511 170.659715,89.3210865 170.063685,88.4452396 C169.466504,87.5705215 169.052275,86.5874511 168.844009,85.555848 C168.635744,84.5231163 168.636895,83.4599105 168.847461,82.4271787 C169.058028,81.3955756 169.474559,80.4147626 170.07289,79.5400444 C170.671221,78.6662292 171.439847,77.9166804 172.333892,77.3345131 C174.132338,76.1637452 176.331205,75.7399301 178.447227,76.1560702 C180.563248,76.5722103 182.424979,77.7943327 183.625094,79.5547171 C184.824057,81.3154401 185.263601,83.4700685 184.84592,85.5468187 C184.428238,87.6246976 183.187852,89.455398 181.39746,90.6371141 Z" id="Shape" fill="#FFAD03"></path>
14
+ </g>
15
+ </g>
16
+ </svg>
17
+ );
18
+
19
+ export default HFLogo;
frontend/src/components/Logo/Logo.js ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from "react";
2
+ import { useNavigate, useSearchParams, useLocation } from "react-router-dom";
3
+ import { Box } from "@mui/material";
4
+ import HFLogo from "./HFLogo";
5
+ import { useLeaderboard } from "../../pages/LeaderboardPage/components/Leaderboard/context/LeaderboardContext";
6
+
7
+ const Logo = ({ height = "40px" }) => {
8
+ const navigate = useNavigate();
9
+ const [searchParams, setSearchParams] = useSearchParams();
10
+ const location = useLocation();
11
+ const { actions } = useLeaderboard();
12
+
13
+ const handleReset = () => {
14
+ // Reset all leaderboard state first
15
+ actions.resetAll();
16
+
17
+ // Then clean URL in one go
18
+ if (
19
+ location.pathname !== "/" ||
20
+ searchParams.toString() !== "" ||
21
+ location.hash !== ""
22
+ ) {
23
+ window.history.replaceState(null, "", "/");
24
+ navigate("/", { replace: true, state: { skipUrlSync: true } });
25
+ setSearchParams({}, { replace: true, state: { skipUrlSync: true } });
26
+ }
27
+ };
28
+
29
+ return (
30
+ <Box
31
+ onClick={handleReset}
32
+ sx={{
33
+ height,
34
+ display: "flex",
35
+ alignItems: "center",
36
+ justifyContent: "center",
37
+ cursor: "pointer",
38
+ transition: "opacity 0.2s ease",
39
+ "&:hover": {
40
+ opacity: 0.8,
41
+ },
42
+ }}
43
+ >
44
+ <Box
45
+ sx={{
46
+ height: "100%",
47
+ aspectRatio: "95/88", // Ratio du SVG original (width/height)
48
+ }}
49
+ >
50
+ <HFLogo />
51
+ </Box>
52
+ </Box>
53
+ );
54
+ };
55
+
56
+ export default Logo;