Spaces:
Sleeping
Sleeping
feat: update about session
Browse files- .gitignore +2 -0
- app.py +4 -0
- src/about.py +58 -35
- src/static/MacBench_logo.png +0 -0
- src/static/MacBench_logo_black_wbg.png +0 -0
- src/static/MacBench_logo_white_wbg.png +0 -0
- src/static/MacBench_white.svg +31 -0
.gitignore
CHANGED
@@ -11,3 +11,5 @@ eval-results/
|
|
11 |
eval-queue-bk/
|
12 |
eval-results-bk/
|
13 |
logs/
|
|
|
|
|
|
11 |
eval-queue-bk/
|
12 |
eval-results-bk/
|
13 |
logs/
|
14 |
+
|
15 |
+
|
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import json
|
|
|
2 |
import time
|
3 |
from pathlib import Path
|
4 |
|
@@ -23,6 +24,8 @@ from src.submission.check_validity import (
|
|
23 |
)
|
24 |
from src.submission.submit import update_dataset_with_scores
|
25 |
|
|
|
|
|
26 |
# Global state for leaderboard data
|
27 |
current_leaderboard_df = None
|
28 |
|
@@ -102,6 +105,7 @@ def process_submission(
|
|
102 |
|
103 |
# Create the Gradio interface
|
104 |
demo = gr.Blocks().queue()
|
|
|
105 |
|
106 |
with demo:
|
107 |
gr.HTML(TITLE)
|
|
|
1 |
import json
|
2 |
+
import os
|
3 |
import time
|
4 |
from pathlib import Path
|
5 |
|
|
|
24 |
)
|
25 |
from src.submission.submit import update_dataset_with_scores
|
26 |
|
27 |
+
STATIC_DIR = str(Path(__file__).parent / "src" / "static")
|
28 |
+
|
29 |
# Global state for leaderboard data
|
30 |
current_leaderboard_df = None
|
31 |
|
|
|
105 |
|
106 |
# Create the Gradio interface
|
107 |
demo = gr.Blocks().queue()
|
108 |
+
demo.static_dir = STATIC_DIR
|
109 |
|
110 |
with demo:
|
111 |
gr.HTML(TITLE)
|
src/about.py
CHANGED
@@ -14,51 +14,71 @@ NUM_FEWSHOT = 0 # Change with your few shot
|
|
14 |
|
15 |
|
16 |
# Your leaderboard name
|
17 |
-
TITLE = """<h1 align="center"
|
18 |
|
19 |
-
TITLE_MARKDOWN_DESCRIPTION = """
|
20 |
|
21 |
ABOUT_TEXT = """
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
##
|
33 |
-
|
|
|
|
|
34 |
```python
|
35 |
-
from chembench.evaluate import ChemBenchmark
|
36 |
-
from chembench.prompter import PrompterBuilder
|
37 |
-
from chembench.
|
|
|
|
|
|
|
38 |
from dotenv import load_dotenv
|
39 |
-
|
|
|
|
|
|
|
|
|
40 |
enable_logging()
|
41 |
-
|
42 |
-
|
|
|
43 |
prompter = PrompterBuilder.from_model_object(
|
44 |
-
|
45 |
)
|
46 |
-
|
47 |
-
|
48 |
-
)
|
49 |
-
|
|
|
|
|
|
|
|
|
50 |
```
|
51 |
-
|
|
|
|
|
|
|
52 |
## Scientific Foundation 📚
|
53 |
-
|
|
|
54 |
- [*ChemBench* Paper](https://arxiv.org/abs/2404.01475)
|
55 |
-
|
|
|
56 |
Ready to elevate your chemistry AI research? 🧬
|
57 |
-
|
|
|
58 |
Its development is led by the [Lab for AI for Materials (LamaLab)](https://jablonkagroup.uni-jena.de/) at the [University of Jena](https://www.uni-jena.de/) and the [Helmholtz Institute for Polymers in Energy Applications Jena](https://www.hipole-jena.de/).
|
59 |
## Connect With Us 🔗
|
60 |
<div class="social-links">
|
61 |
-
<a href="https://github.com/lamalab-org/
|
62 |
<img src="https://raw.githubusercontent.com/FortAwesome/Font-Awesome/master/svgs/brands/github.svg" width="20" height="20" alt="GitHub"/> GitHub
|
63 |
</a>
|
64 |
<a href="https://twitter.com/jablonkagroup" target="_blank">
|
@@ -93,10 +113,13 @@ Its development is led by the [Lab for AI for Materials (LamaLab)](https://jablo
|
|
93 |
|
94 |
|
95 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
96 |
-
CITATION_BUTTON_TEXT = r"""@
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
101 |
}
|
102 |
"""
|
|
|
14 |
|
15 |
|
16 |
# Your leaderboard name
|
17 |
+
TITLE = """<h1 align="center">👩🔬 MaCBench Leaderboard </h1>"""
|
18 |
|
19 |
+
TITLE_MARKDOWN_DESCRIPTION = """Can VLMs assist in scientific discovery?"""
|
20 |
|
21 |
ABOUT_TEXT = """
|
22 |
+
<h1 align="center">
|
23 |
+
Can VLMs assist in scientific discovery?
|
24 |
+
</h1>
|
25 |
+
|
26 |
+
*MaCBench* is a benchmark for multimodal language models in chemistry and materials research. We manually curated a benchmark of tasks across the entire scientific lifecycle from data extraction, experiment execution to data analysis.
|
27 |
+
|
28 |
+
This repository contains the reports and is used to collect new contributions.
|
29 |
+
The benchmark dataset itself is hosted on [HuggingFace](https://huggingface.co/datasets/jablonkagroup/MaCBench) and can be run using the [*ChemBench* benchmark engine](https://github.com/lamalab-org/chembench). You can find more details on how to do so in the [*ChemBench* documentation](https://lamalab-org.github.io/chembench/).
|
30 |
+
|
31 |
+
|
32 |
+
## Running *MaCBench* Evaluation 🚀
|
33 |
+
|
34 |
+
*MaCBench* can be run using [*ChemBench*](https://github.com/lamalab-org/chembench) pipeline. For that, the only thing needed is to create the running script. The following script is an example of how to run the benchmark using the `gpt-4o` model.
|
35 |
+
|
36 |
```python
|
37 |
+
from chembench.evaluate import ChemBenchmark, save_topic_reports
|
38 |
+
from chembench.prompter import PrompterBuilder, PrompterPipeline
|
39 |
+
from chembench.utils import (
|
40 |
+
enable_caching,
|
41 |
+
enable_logging,
|
42 |
+
)
|
43 |
from dotenv import load_dotenv
|
44 |
+
|
45 |
+
# Load environment variables
|
46 |
+
load_dotenv("../../.env", override=True)
|
47 |
+
|
48 |
+
# Enable logging and caching
|
49 |
enable_logging()
|
50 |
+
enable_caching()
|
51 |
+
|
52 |
+
# Define the prompter object specifying the model and "multimodal_instruction" prompt type
|
53 |
prompter = PrompterBuilder.from_model_object(
|
54 |
+
"openai/gpt-4o-2024-08-06", prompt_type="multimodal_instruction"
|
55 |
)
|
56 |
+
|
57 |
+
# Load benchmark from HuggingFace
|
58 |
+
benchmark = ChemBenchmark.from_huggingface("jablonkagroup/MaCBench")
|
59 |
+
|
60 |
+
# Run benchmark with batch processing equal to 1
|
61 |
+
result = benchmark.bench(prompter, batch_size=1, model_kwargs={"temperature": 0})
|
62 |
+
|
63 |
+
benchmark.submit(results) # submit results to leaderboard
|
64 |
```
|
65 |
+
|
66 |
+
|
67 |
+
For more details, check out our [documentation](https://lamalab-org.github.io/chembench/getting_started/#how-to-benchmark-on-multi-modal-tasks)
|
68 |
+
|
69 |
## Scientific Foundation 📚
|
70 |
+
For detailed methodology, check our papers:
|
71 |
+
- [*MaCBench* Paper](https://arxiv.org/pdf/2411.16955)
|
72 |
- [*ChemBench* Paper](https://arxiv.org/abs/2404.01475)
|
73 |
+
|
74 |
+
|
75 |
Ready to elevate your chemistry AI research? 🧬
|
76 |
+
|
77 |
+
*MaCBench* is open-source software licensed under the MIT license.
|
78 |
Its development is led by the [Lab for AI for Materials (LamaLab)](https://jablonkagroup.uni-jena.de/) at the [University of Jena](https://www.uni-jena.de/) and the [Helmholtz Institute for Polymers in Energy Applications Jena](https://www.hipole-jena.de/).
|
79 |
## Connect With Us 🔗
|
80 |
<div class="social-links">
|
81 |
+
<a href="https://github.com/lamalab-org/macbench.git" target="_blank">
|
82 |
<img src="https://raw.githubusercontent.com/FortAwesome/Font-Awesome/master/svgs/brands/github.svg" width="20" height="20" alt="GitHub"/> GitHub
|
83 |
</a>
|
84 |
<a href="https://twitter.com/jablonkagroup" target="_blank">
|
|
|
113 |
|
114 |
|
115 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
116 |
+
CITATION_BUTTON_TEXT = r"""@misc{alampara2024probinglimitationsmultimodallanguage,
|
117 |
+
title={Probing the limitations of multimodal language models for chemistry and materials research},
|
118 |
+
author={Nawaf Alampara and Mara Schilling-Wilhelmi and Martiño Ríos-García and Indrajeet Mandal and Pranav Khetarpal and Hargun Singh Grover and N. M. Anoop Krishnan and Kevin Maik Jablonka},
|
119 |
+
year={2024},
|
120 |
+
eprint={2411.16955},
|
121 |
+
archivePrefix={arXiv},
|
122 |
+
primaryClass={cs.LG},
|
123 |
+
url={https://arxiv.org/abs/2411.16955},
|
124 |
}
|
125 |
"""
|
src/static/MacBench_logo.png
ADDED
![]() |
src/static/MacBench_logo_black_wbg.png
ADDED
![]() |
src/static/MacBench_logo_white_wbg.png
ADDED
![]() |
src/static/MacBench_white.svg
ADDED
|