Spaces:
Running
on
Zero
Running
on
Zero
Add files
Browse files- .pre-commit-config.yaml +33 -0
- .python-version +1 -0
- .vscode/extensions.json +8 -0
- .vscode/settings.json +17 -0
- README.md +6 -3
- app.py +208 -0
- app_pr.py +403 -0
- pyproject.toml +54 -0
- requirements.txt +225 -0
- style.css +4 -0
- table.py +116 -0
- uv.lock +0 -0
.pre-commit-config.yaml
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
repos:
|
| 2 |
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
| 3 |
+
rev: v5.0.0
|
| 4 |
+
hooks:
|
| 5 |
+
- id: check-executables-have-shebangs
|
| 6 |
+
- id: check-json
|
| 7 |
+
- id: check-merge-conflict
|
| 8 |
+
- id: check-shebang-scripts-are-executable
|
| 9 |
+
- id: check-toml
|
| 10 |
+
- id: check-yaml
|
| 11 |
+
- id: end-of-file-fixer
|
| 12 |
+
- id: mixed-line-ending
|
| 13 |
+
args: ["--fix=lf"]
|
| 14 |
+
- id: requirements-txt-fixer
|
| 15 |
+
- id: trailing-whitespace
|
| 16 |
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
| 17 |
+
rev: v0.11.2
|
| 18 |
+
hooks:
|
| 19 |
+
- id: ruff
|
| 20 |
+
args: ["--fix"]
|
| 21 |
+
- id: ruff-format
|
| 22 |
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
| 23 |
+
rev: v1.15.0
|
| 24 |
+
hooks:
|
| 25 |
+
- id: mypy
|
| 26 |
+
args: ["--ignore-missing-imports"]
|
| 27 |
+
additional_dependencies:
|
| 28 |
+
[
|
| 29 |
+
"types-python-slugify",
|
| 30 |
+
"types-pytz",
|
| 31 |
+
"types-PyYAML",
|
| 32 |
+
"types-requests",
|
| 33 |
+
]
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.10
|
.vscode/extensions.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"recommendations": [
|
| 3 |
+
"ms-python.python",
|
| 4 |
+
"charliermarsh.ruff",
|
| 5 |
+
"streetsidesoftware.code-spell-checker",
|
| 6 |
+
"tamasfe.even-better-toml"
|
| 7 |
+
]
|
| 8 |
+
}
|
.vscode/settings.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"editor.formatOnSave": true,
|
| 3 |
+
"files.insertFinalNewline": false,
|
| 4 |
+
"[python]": {
|
| 5 |
+
"editor.defaultFormatter": "charliermarsh.ruff",
|
| 6 |
+
"editor.formatOnType": true,
|
| 7 |
+
"editor.codeActionsOnSave": {
|
| 8 |
+
"source.fixAll.ruff": "explicit",
|
| 9 |
+
"source.organizeImports": "explicit"
|
| 10 |
+
}
|
| 11 |
+
},
|
| 12 |
+
"[jupyter]": {
|
| 13 |
+
"files.insertFinalNewline": false
|
| 14 |
+
},
|
| 15 |
+
"notebook.output.scrolling": true,
|
| 16 |
+
"notebook.formatOnSave.enabled": true
|
| 17 |
+
}
|
README.md
CHANGED
|
@@ -1,12 +1,15 @@
|
|
| 1 |
---
|
| 2 |
title: ICLR2025
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version: 5.
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
title: ICLR2025
|
| 3 |
+
emoji: ⚡
|
| 4 |
+
colorFrom: red
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 5.25.2
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
hf_oauth: true
|
| 11 |
+
hf_oauth_scopes:
|
| 12 |
+
- write-discussions
|
| 13 |
---
|
| 14 |
|
| 15 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import polars as pl
|
| 5 |
+
|
| 6 |
+
from app_pr import demo as demo_pr
|
| 7 |
+
from table import df_orig
|
| 8 |
+
|
| 9 |
+
DESCRIPTION = "# ICLR 2025"
|
| 10 |
+
|
| 11 |
+
TUTORIAL = """\
|
| 12 |
+
#### Claiming Authorship for Papers on arXiv
|
| 13 |
+
|
| 14 |
+
If your ICLR 2025 paper is available on arXiv and listed in the table below, you can claim authorship by following these steps:
|
| 15 |
+
|
| 16 |
+
1. Find your paper in the table.
|
| 17 |
+
2. Click the link to the paper page in the table.
|
| 18 |
+
3. On that page, click your name.
|
| 19 |
+
4. Click **"Claim authorship"**.
|
| 20 |
+
- You'll be redirected to the *Papers* section of your Settings.
|
| 21 |
+
5. Confirm the request on the redirected page.
|
| 22 |
+
|
| 23 |
+
The admin team will review your request shortly.
|
| 24 |
+
Once confirmed, your paper page will be marked as verified, and you'll be able to add a project page and a GitHub repository.
|
| 25 |
+
|
| 26 |
+
If you need further help, check out the [guide here](https://huggingface.co/docs/hub/paper-pages).
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
#### Updating Missing or Incorrect Information in the Table
|
| 30 |
+
|
| 31 |
+
If you notice any missing or incorrect information in the table, feel free to submit a PR via the "Open PR" page, which you can find at the top right of this page.
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
# TODO: remove this once https://github.com/gradio-app/gradio/issues/10916 https://github.com/gradio-app/gradio/issues/11001 https://github.com/gradio-app/gradio/issues/11002 are fixed # noqa: TD002, FIX002
|
| 35 |
+
NOTE = """\
|
| 36 |
+
Note: Sorting by upvotes or comments may not work correctly due to a known bug in Gradio.
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
df_main = df_orig.select(
|
| 41 |
+
"title",
|
| 42 |
+
"authors_str",
|
| 43 |
+
"openreview_md",
|
| 44 |
+
"type",
|
| 45 |
+
"paper_page_md",
|
| 46 |
+
"upvotes",
|
| 47 |
+
"num_comments",
|
| 48 |
+
"project_page_md",
|
| 49 |
+
"github_md",
|
| 50 |
+
"Spaces",
|
| 51 |
+
"Models",
|
| 52 |
+
"Datasets",
|
| 53 |
+
"claimed",
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
df_main = df_main.rename(
|
| 57 |
+
{
|
| 58 |
+
"title": "Title",
|
| 59 |
+
"authors_str": "Authors",
|
| 60 |
+
"openreview_md": "OpenReview",
|
| 61 |
+
"type": "Type",
|
| 62 |
+
"paper_page_md": "Paper page",
|
| 63 |
+
"upvotes": "👍",
|
| 64 |
+
"num_comments": "💬",
|
| 65 |
+
"project_page_md": "Project page",
|
| 66 |
+
"github_md": "GitHub",
|
| 67 |
+
}
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
COLUMN_INFO = {
|
| 71 |
+
"Title": ("str", "40%"),
|
| 72 |
+
"Authors": ("str", "20%"),
|
| 73 |
+
"Type": ("str", None),
|
| 74 |
+
"Paper page": ("markdown", "135px"),
|
| 75 |
+
"👍": ("number", "50px"),
|
| 76 |
+
"💬": ("number", "50px"),
|
| 77 |
+
"OpenReview": ("markdown", None),
|
| 78 |
+
"Project page": ("markdown", None),
|
| 79 |
+
"GitHub": ("markdown", None),
|
| 80 |
+
"Spaces": ("markdown", None),
|
| 81 |
+
"Models": ("markdown", None),
|
| 82 |
+
"Datasets": ("markdown", None),
|
| 83 |
+
"claimed": ("markdown", None),
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
DEFAULT_COLUMNS = [
|
| 88 |
+
"Title",
|
| 89 |
+
"Type",
|
| 90 |
+
"Paper page",
|
| 91 |
+
"👍",
|
| 92 |
+
"💬",
|
| 93 |
+
"OpenReview",
|
| 94 |
+
"Project page",
|
| 95 |
+
"GitHub",
|
| 96 |
+
"Spaces",
|
| 97 |
+
"Models",
|
| 98 |
+
]
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def update_num_papers(df: pl.DataFrame) -> str:
|
| 102 |
+
if "claimed" in df.columns:
|
| 103 |
+
return f"{len(df)} / {len(df_main)} ({df.select(pl.col('claimed').str.contains('✅').sum()).item()} claimed)"
|
| 104 |
+
return f"{len(df)} / {len(df_main)}"
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def update_df(
|
| 108 |
+
title_search_query: str,
|
| 109 |
+
presentation_type: str,
|
| 110 |
+
column_names: list[str],
|
| 111 |
+
case_insensitive: bool = True,
|
| 112 |
+
) -> gr.Dataframe:
|
| 113 |
+
df = df_main.clone()
|
| 114 |
+
column_names = ["Title", *column_names]
|
| 115 |
+
|
| 116 |
+
if title_search_query:
|
| 117 |
+
if case_insensitive:
|
| 118 |
+
title_search_query = f"(?i){title_search_query}"
|
| 119 |
+
try:
|
| 120 |
+
df = df.filter(pl.col("Title").str.contains(title_search_query))
|
| 121 |
+
except pl.exceptions.ComputeError as e:
|
| 122 |
+
raise gr.Error(str(e)) from e
|
| 123 |
+
if presentation_type != "(ALL)":
|
| 124 |
+
df = df.filter(pl.col("Type").str.contains(presentation_type))
|
| 125 |
+
|
| 126 |
+
sorted_column_names = [col for col in COLUMN_INFO if col in column_names]
|
| 127 |
+
df = df.select(sorted_column_names)
|
| 128 |
+
return gr.Dataframe(
|
| 129 |
+
value=df,
|
| 130 |
+
datatype=[COLUMN_INFO[col][0] for col in sorted_column_names],
|
| 131 |
+
column_widths=[COLUMN_INFO[col][1] for col in sorted_column_names],
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
with gr.Blocks(css_paths="style.css") as demo:
|
| 136 |
+
gr.Markdown(DESCRIPTION)
|
| 137 |
+
with gr.Accordion(label="Tutorial", open=True):
|
| 138 |
+
gr.Markdown(TUTORIAL)
|
| 139 |
+
with gr.Group():
|
| 140 |
+
search_title = gr.Textbox(label="Search title")
|
| 141 |
+
presentation_type = gr.Radio(
|
| 142 |
+
label="Presentation Type",
|
| 143 |
+
choices=["(ALL)", "Oral", "Spotlight", "Poster"],
|
| 144 |
+
value="(ALL)",
|
| 145 |
+
)
|
| 146 |
+
column_names = gr.CheckboxGroup(
|
| 147 |
+
label="Columns",
|
| 148 |
+
choices=[col for col in COLUMN_INFO if col != "Title"],
|
| 149 |
+
value=[col for col in DEFAULT_COLUMNS if col != "Title"],
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
num_papers = gr.Textbox(label="Number of papers", value=update_num_papers(df_orig), interactive=False)
|
| 153 |
+
|
| 154 |
+
gr.Markdown(NOTE)
|
| 155 |
+
df = gr.Dataframe(
|
| 156 |
+
value=df_main,
|
| 157 |
+
datatype=list(COLUMN_INFO.values()),
|
| 158 |
+
type="polars",
|
| 159 |
+
row_count=(0, "dynamic"),
|
| 160 |
+
show_row_numbers=True,
|
| 161 |
+
interactive=False,
|
| 162 |
+
max_height=1000,
|
| 163 |
+
elem_id="table",
|
| 164 |
+
column_widths=[COLUMN_INFO[col][1] for col in COLUMN_INFO],
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
inputs = [
|
| 168 |
+
search_title,
|
| 169 |
+
presentation_type,
|
| 170 |
+
column_names,
|
| 171 |
+
]
|
| 172 |
+
gr.on(
|
| 173 |
+
triggers=[
|
| 174 |
+
search_title.submit,
|
| 175 |
+
presentation_type.input,
|
| 176 |
+
column_names.input,
|
| 177 |
+
],
|
| 178 |
+
fn=update_df,
|
| 179 |
+
inputs=inputs,
|
| 180 |
+
outputs=df,
|
| 181 |
+
api_name=False,
|
| 182 |
+
).then(
|
| 183 |
+
fn=update_num_papers,
|
| 184 |
+
inputs=df,
|
| 185 |
+
outputs=num_papers,
|
| 186 |
+
queue=False,
|
| 187 |
+
api_name=False,
|
| 188 |
+
)
|
| 189 |
+
demo.load(
|
| 190 |
+
fn=update_df,
|
| 191 |
+
inputs=inputs,
|
| 192 |
+
outputs=df,
|
| 193 |
+
api_name=False,
|
| 194 |
+
).then(
|
| 195 |
+
fn=update_num_papers,
|
| 196 |
+
inputs=df,
|
| 197 |
+
outputs=num_papers,
|
| 198 |
+
queue=False,
|
| 199 |
+
api_name=False,
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
with demo.route("Open PR"):
|
| 204 |
+
demo_pr.render()
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
if __name__ == "__main__":
|
| 208 |
+
demo.queue(api_open=False).launch(show_api=False)
|
app_pr.py
ADDED
|
@@ -0,0 +1,403 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import datetime
|
| 2 |
+
import difflib
|
| 3 |
+
import json
|
| 4 |
+
import re
|
| 5 |
+
import tempfile
|
| 6 |
+
|
| 7 |
+
import gradio as gr
|
| 8 |
+
import polars as pl
|
| 9 |
+
from gradio_modal import Modal
|
| 10 |
+
from huggingface_hub import CommitOperationAdd, HfApi
|
| 11 |
+
|
| 12 |
+
from table import PATCH_REPO_ID, PATCH_REPO_PR_BRANCH, df_orig
|
| 13 |
+
|
| 14 |
+
# TODO: remove this once https://github.com/gradio-app/gradio/issues/11022 is fixed # noqa: FIX002, TD002
|
| 15 |
+
NOTE = """\
|
| 16 |
+
#### ⚠️ Note
|
| 17 |
+
You may encounter an issue when selecting table data after using the search bar.
|
| 18 |
+
This is due to a known bug in Gradio.
|
| 19 |
+
|
| 20 |
+
The issue typically occurs when multiple rows remain after filtering.
|
| 21 |
+
If only one row remains, the selection should work as expected.
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
api = HfApi()
|
| 25 |
+
|
| 26 |
+
PR_VIEW_COLUMNS = [
|
| 27 |
+
"title",
|
| 28 |
+
"authors_str",
|
| 29 |
+
"openreview_md",
|
| 30 |
+
"arxiv_id",
|
| 31 |
+
"github_md",
|
| 32 |
+
"Spaces",
|
| 33 |
+
"Models",
|
| 34 |
+
"Datasets",
|
| 35 |
+
"paper_id",
|
| 36 |
+
]
|
| 37 |
+
PR_RAW_COLUMNS = [
|
| 38 |
+
"paper_id",
|
| 39 |
+
"title",
|
| 40 |
+
"authors",
|
| 41 |
+
"arxiv_id",
|
| 42 |
+
"project_page",
|
| 43 |
+
"github",
|
| 44 |
+
"space_ids",
|
| 45 |
+
"model_ids",
|
| 46 |
+
"dataset_ids",
|
| 47 |
+
]
|
| 48 |
+
|
| 49 |
+
df_pr_view = df_orig.with_columns(pl.lit("📝").alias("Fix")).select(["Fix", *PR_VIEW_COLUMNS])
|
| 50 |
+
df_pr_view = df_pr_view.with_columns(pl.col("arxiv_id").fill_null(""))
|
| 51 |
+
df_pr_raw = df_orig.select(PR_RAW_COLUMNS)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def df_pr_row_selected(
|
| 55 |
+
evt: gr.SelectData,
|
| 56 |
+
) -> tuple[
|
| 57 |
+
Modal,
|
| 58 |
+
gr.Textbox, # title
|
| 59 |
+
gr.Textbox, # authors
|
| 60 |
+
gr.Textbox, # arxiv_id
|
| 61 |
+
gr.Textbox, # project_page
|
| 62 |
+
gr.Textbox, # github
|
| 63 |
+
gr.Textbox, # space_ids
|
| 64 |
+
gr.Textbox, # model_ids
|
| 65 |
+
gr.Textbox, # dataset_ids
|
| 66 |
+
dict | None, # original_data
|
| 67 |
+
]:
|
| 68 |
+
if evt.value != "📝":
|
| 69 |
+
return (
|
| 70 |
+
Modal(),
|
| 71 |
+
gr.Textbox(), # title
|
| 72 |
+
gr.Textbox(), # authors
|
| 73 |
+
gr.Textbox(), # arxiv_id
|
| 74 |
+
gr.Textbox(), # project_page
|
| 75 |
+
gr.Textbox(), # github
|
| 76 |
+
gr.Textbox(), # space_ids
|
| 77 |
+
gr.Textbox(), # model_ids
|
| 78 |
+
gr.Textbox(), # dataset_ids
|
| 79 |
+
None, # original_data
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
paper_id = evt.row_value[-1]
|
| 83 |
+
row = df_pr_raw.filter(pl.col("paper_id") == paper_id)
|
| 84 |
+
original_data = row.to_dicts()[0]
|
| 85 |
+
authors = original_data["authors"]
|
| 86 |
+
space_ids = original_data["space_ids"]
|
| 87 |
+
model_ids = original_data["model_ids"]
|
| 88 |
+
dataset_ids = original_data["dataset_ids"]
|
| 89 |
+
return (
|
| 90 |
+
Modal(visible=True),
|
| 91 |
+
gr.Textbox(value=row["title"].item()), # title
|
| 92 |
+
gr.Textbox(value="\n".join(authors)), # authors
|
| 93 |
+
gr.Textbox(value=row["arxiv_id"].item()), # arxiv_id
|
| 94 |
+
gr.Textbox(value=row["project_page"].item()), # project_page
|
| 95 |
+
gr.Textbox(value=row["github"].item()), # github
|
| 96 |
+
gr.Textbox(value="\n".join(space_ids)), # space_ids
|
| 97 |
+
gr.Textbox(value="\n".join(model_ids)), # model_ids
|
| 98 |
+
gr.Textbox(value="\n".join(dataset_ids)), # dataset_ids
|
| 99 |
+
original_data, # original_data
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
URL_PATTERN = re.compile(r"^(https?://)?([a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}(:\d+)?(/.*)?$")
|
| 104 |
+
GITHUB_PATTERN = re.compile(r"^https://github\.com/[^/\s]+/[^/\s]+(/tree/[^/\s]+/[^/\s].*)?$")
|
| 105 |
+
REPO_ID_PATTERN = re.compile(r"^[a-zA-Z0-9_-]+/[a-zA-Z0-9_-]+$")
|
| 106 |
+
ARXIV_ID_PATTERN = re.compile(r"^\d{4}\.\d{4,5}$")
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def is_valid_url(url: str) -> bool:
|
| 110 |
+
return URL_PATTERN.match(url) is not None
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def is_valid_github_url(url: str) -> bool:
|
| 114 |
+
return GITHUB_PATTERN.match(url) is not None
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def is_valid_repo_id(repo_id: str) -> bool:
|
| 118 |
+
return REPO_ID_PATTERN.match(repo_id) is not None
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def is_valid_arxiv_id(arxiv_id: str) -> bool:
|
| 122 |
+
return ARXIV_ID_PATTERN.match(arxiv_id) is not None
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def validate_pr_data(
|
| 126 |
+
title_pr: str,
|
| 127 |
+
authors_pr: str,
|
| 128 |
+
arxiv_id_pr: str,
|
| 129 |
+
project_page_pr: str,
|
| 130 |
+
github_pr: str,
|
| 131 |
+
space_ids: list[str],
|
| 132 |
+
model_ids: list[str],
|
| 133 |
+
dataset_ids: list[str],
|
| 134 |
+
) -> None:
|
| 135 |
+
if not title_pr:
|
| 136 |
+
raise gr.Error("Title cannot be empty", print_exception=False)
|
| 137 |
+
if not authors_pr:
|
| 138 |
+
raise gr.Error("Authors cannot be empty", print_exception=False)
|
| 139 |
+
|
| 140 |
+
if arxiv_id_pr and not is_valid_arxiv_id(arxiv_id_pr):
|
| 141 |
+
raise gr.Error(
|
| 142 |
+
"Invalid arXiv ID format. Expected format: 'YYYY.NNNNN' (e.g., '2023.01234')", print_exception=False
|
| 143 |
+
)
|
| 144 |
+
if project_page_pr and not is_valid_url(project_page_pr):
|
| 145 |
+
raise gr.Error("Project page must be a valid URL", print_exception=False)
|
| 146 |
+
if github_pr and not is_valid_github_url(github_pr):
|
| 147 |
+
raise gr.Error("GitHub must be a valid GitHub URL", print_exception=False)
|
| 148 |
+
|
| 149 |
+
for repo_id in space_ids + model_ids + dataset_ids:
|
| 150 |
+
if not is_valid_repo_id(repo_id):
|
| 151 |
+
error_msg = f"Space/Model/Dataset ID must be in the format 'org_name/repo_name'. Got: {repo_id}"
|
| 152 |
+
raise gr.Error(error_msg, print_exception=False)
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def format_submitted_data(
|
| 156 |
+
title_pr: str,
|
| 157 |
+
authors_pr: str,
|
| 158 |
+
arxiv_id_pr: str,
|
| 159 |
+
project_page_pr: str,
|
| 160 |
+
github_pr: str,
|
| 161 |
+
space_ids_pr: str,
|
| 162 |
+
model_ids_pr: str,
|
| 163 |
+
dataset_ids_pr: str,
|
| 164 |
+
) -> dict:
|
| 165 |
+
space_ids = [repo_id for repo_id in space_ids_pr.split("\n") if repo_id.strip()]
|
| 166 |
+
model_ids = [repo_id for repo_id in model_ids_pr.split("\n") if repo_id.strip()]
|
| 167 |
+
dataset_ids = [repo_id for repo_id in dataset_ids_pr.split("\n") if repo_id.strip()]
|
| 168 |
+
|
| 169 |
+
validate_pr_data(title_pr, authors_pr, arxiv_id_pr, project_page_pr, github_pr, space_ids, model_ids, dataset_ids)
|
| 170 |
+
|
| 171 |
+
return {
|
| 172 |
+
"title": title_pr,
|
| 173 |
+
"authors": [a for a in authors_pr.split("\n") if a.strip()],
|
| 174 |
+
"arxiv_id": arxiv_id_pr if arxiv_id_pr else None,
|
| 175 |
+
"project_page": project_page_pr if project_page_pr else None,
|
| 176 |
+
"github": github_pr if github_pr else None,
|
| 177 |
+
"space_ids": space_ids,
|
| 178 |
+
"model_ids": model_ids,
|
| 179 |
+
"dataset_ids": dataset_ids,
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def preview_diff(
|
| 184 |
+
title_pr: str,
|
| 185 |
+
authors_pr: str,
|
| 186 |
+
arxiv_id_pr: str,
|
| 187 |
+
project_page_pr: str,
|
| 188 |
+
github_pr: str,
|
| 189 |
+
space_ids_pr: str,
|
| 190 |
+
model_ids_pr: str,
|
| 191 |
+
dataset_ids_pr: str,
|
| 192 |
+
original_data: dict,
|
| 193 |
+
) -> tuple[gr.Markdown, gr.Button]:
|
| 194 |
+
submitted_data = format_submitted_data(
|
| 195 |
+
title_pr,
|
| 196 |
+
authors_pr,
|
| 197 |
+
arxiv_id_pr,
|
| 198 |
+
project_page_pr,
|
| 199 |
+
github_pr,
|
| 200 |
+
space_ids_pr,
|
| 201 |
+
model_ids_pr,
|
| 202 |
+
dataset_ids_pr,
|
| 203 |
+
)
|
| 204 |
+
submitted_data = {"paper_id": original_data["paper_id"], **submitted_data}
|
| 205 |
+
|
| 206 |
+
original_json = json.dumps(original_data, indent=2)
|
| 207 |
+
submitted_json = json.dumps(submitted_data, indent=2)
|
| 208 |
+
diff = difflib.unified_diff(
|
| 209 |
+
original_json.splitlines(),
|
| 210 |
+
submitted_json.splitlines(),
|
| 211 |
+
fromfile="before",
|
| 212 |
+
tofile="after",
|
| 213 |
+
lineterm="",
|
| 214 |
+
)
|
| 215 |
+
diff_str = "\n".join(diff)
|
| 216 |
+
return gr.Markdown(value=f"```diff\n{diff_str}\n```"), gr.Button(visible=True)
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
def open_pr(
|
| 220 |
+
title_pr: str,
|
| 221 |
+
authors_pr: str,
|
| 222 |
+
arxiv_id_pr: str,
|
| 223 |
+
project_page_pr: str,
|
| 224 |
+
github_pr: str,
|
| 225 |
+
space_ids_pr: str,
|
| 226 |
+
model_ids_pr: str,
|
| 227 |
+
dataset_ids_pr: str,
|
| 228 |
+
original_data: dict,
|
| 229 |
+
oauth_token: gr.OAuthToken | None,
|
| 230 |
+
) -> gr.Markdown:
|
| 231 |
+
submitted_data = format_submitted_data(
|
| 232 |
+
title_pr,
|
| 233 |
+
authors_pr,
|
| 234 |
+
arxiv_id_pr,
|
| 235 |
+
project_page_pr,
|
| 236 |
+
github_pr,
|
| 237 |
+
space_ids_pr,
|
| 238 |
+
model_ids_pr,
|
| 239 |
+
dataset_ids_pr,
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
diff_dict = {key: submitted_data[key] for key in submitted_data if submitted_data[key] != original_data[key]}
|
| 243 |
+
|
| 244 |
+
if not diff_dict:
|
| 245 |
+
gr.Info("No data to submit")
|
| 246 |
+
return ""
|
| 247 |
+
|
| 248 |
+
paper_id = original_data["paper_id"]
|
| 249 |
+
diff_dict["paper_id"] = paper_id
|
| 250 |
+
|
| 251 |
+
original_json = json.dumps(original_data, indent=2)
|
| 252 |
+
submitted_json = json.dumps(submitted_data, indent=2)
|
| 253 |
+
diff = "\n".join(
|
| 254 |
+
difflib.unified_diff(
|
| 255 |
+
original_json.splitlines(),
|
| 256 |
+
submitted_json.splitlines(),
|
| 257 |
+
fromfile="before",
|
| 258 |
+
tofile="after",
|
| 259 |
+
lineterm="",
|
| 260 |
+
)
|
| 261 |
+
)
|
| 262 |
+
diff_dict["diff"] = diff
|
| 263 |
+
timestamp = datetime.datetime.now(datetime.timezone.utc)
|
| 264 |
+
diff_dict["timestamp"] = timestamp.isoformat()
|
| 265 |
+
|
| 266 |
+
with tempfile.NamedTemporaryFile(suffix=".json", mode="w", delete=False) as f:
|
| 267 |
+
json.dump(diff_dict, f, indent=2)
|
| 268 |
+
f.flush()
|
| 269 |
+
|
| 270 |
+
commit = CommitOperationAdd(f"data/{paper_id}--{timestamp.strftime('%Y-%m-%d-%H-%M-%S')}.json", f.name)
|
| 271 |
+
res = api.create_commit(
|
| 272 |
+
repo_id=PATCH_REPO_ID,
|
| 273 |
+
operations=[commit],
|
| 274 |
+
commit_message=f"Update {paper_id}",
|
| 275 |
+
repo_type="dataset",
|
| 276 |
+
revision=PATCH_REPO_PR_BRANCH,
|
| 277 |
+
create_pr=True,
|
| 278 |
+
token=oauth_token.token if oauth_token else None,
|
| 279 |
+
)
|
| 280 |
+
return gr.Markdown(value=res.pr_url, visible=True)
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
def render_open_pr_page(profile: gr.OAuthProfile | None) -> dict:
|
| 284 |
+
return gr.Column(visible=profile is not None)
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
with gr.Blocks() as demo:
|
| 288 |
+
gr.LoginButton()
|
| 289 |
+
with gr.Column(visible=False) as open_pr_col:
|
| 290 |
+
gr.Markdown(NOTE)
|
| 291 |
+
df_pr = gr.Dataframe(
|
| 292 |
+
value=df_pr_view,
|
| 293 |
+
datatype=[
|
| 294 |
+
"str", # Fix
|
| 295 |
+
"str", # Title
|
| 296 |
+
"str", # Authors
|
| 297 |
+
"markdown", # openreview
|
| 298 |
+
"str", # arxiv_id
|
| 299 |
+
"markdown", # github
|
| 300 |
+
"markdown", # spaces
|
| 301 |
+
"markdown", # models
|
| 302 |
+
"markdown", # datasets
|
| 303 |
+
"str", # paper id
|
| 304 |
+
],
|
| 305 |
+
column_widths=[
|
| 306 |
+
"50px", # Fix
|
| 307 |
+
"40%", # Title
|
| 308 |
+
"20%", # Authors
|
| 309 |
+
None, # openreview
|
| 310 |
+
"100px", # arxiv_id
|
| 311 |
+
None, # github
|
| 312 |
+
None, # spaces
|
| 313 |
+
None, # models
|
| 314 |
+
None, # datasets
|
| 315 |
+
None, # paper id
|
| 316 |
+
],
|
| 317 |
+
type="polars",
|
| 318 |
+
row_count=(0, "dynamic"),
|
| 319 |
+
interactive=False,
|
| 320 |
+
max_height=1000,
|
| 321 |
+
show_search="search",
|
| 322 |
+
)
|
| 323 |
+
with Modal(visible=False) as pr_modal:
|
| 324 |
+
with gr.Group():
|
| 325 |
+
title_pr = gr.Textbox(label="Title")
|
| 326 |
+
authors_pr = gr.Textbox(label="Authors")
|
| 327 |
+
arxiv_id_pr = gr.Textbox(label="arXiv ID")
|
| 328 |
+
project_page_pr = gr.Textbox(label="Project page")
|
| 329 |
+
github_pr = gr.Textbox(label="GitHub")
|
| 330 |
+
spaces_pr = gr.Textbox(
|
| 331 |
+
label="Spaces",
|
| 332 |
+
info="Enter one space ID (e.g., 'org_name/space_name') per line.",
|
| 333 |
+
)
|
| 334 |
+
models_pr = gr.Textbox(
|
| 335 |
+
label="Models",
|
| 336 |
+
info="Enter one model ID (e.g., 'org_name/model_name') per line.",
|
| 337 |
+
)
|
| 338 |
+
datasets_pr = gr.Textbox(
|
| 339 |
+
label="Datasets",
|
| 340 |
+
info="Enter one dataset ID (e.g., 'org_name/dataset_name') per line.",
|
| 341 |
+
)
|
| 342 |
+
original_data = gr.State()
|
| 343 |
+
preview_diff_button = gr.Button("Preview diff")
|
| 344 |
+
diff_view = gr.Markdown()
|
| 345 |
+
open_pr_button = gr.Button("Open PR", visible=False)
|
| 346 |
+
pr_url = gr.Markdown(visible=False)
|
| 347 |
+
|
| 348 |
+
pr_modal.blur(
|
| 349 |
+
fn=lambda: (None, gr.Button(visible=False), gr.Markdown(visible=False)),
|
| 350 |
+
outputs=[diff_view, open_pr_button, pr_url],
|
| 351 |
+
)
|
| 352 |
+
|
| 353 |
+
df_pr.select(
|
| 354 |
+
fn=df_pr_row_selected,
|
| 355 |
+
outputs=[
|
| 356 |
+
pr_modal,
|
| 357 |
+
title_pr,
|
| 358 |
+
authors_pr,
|
| 359 |
+
arxiv_id_pr,
|
| 360 |
+
project_page_pr,
|
| 361 |
+
github_pr,
|
| 362 |
+
spaces_pr,
|
| 363 |
+
models_pr,
|
| 364 |
+
datasets_pr,
|
| 365 |
+
original_data,
|
| 366 |
+
],
|
| 367 |
+
)
|
| 368 |
+
preview_diff_button.click(
|
| 369 |
+
fn=preview_diff,
|
| 370 |
+
inputs=[
|
| 371 |
+
title_pr,
|
| 372 |
+
authors_pr,
|
| 373 |
+
arxiv_id_pr,
|
| 374 |
+
project_page_pr,
|
| 375 |
+
github_pr,
|
| 376 |
+
spaces_pr,
|
| 377 |
+
models_pr,
|
| 378 |
+
datasets_pr,
|
| 379 |
+
original_data,
|
| 380 |
+
],
|
| 381 |
+
outputs=[diff_view, open_pr_button],
|
| 382 |
+
)
|
| 383 |
+
open_pr_button.click(
|
| 384 |
+
fn=open_pr,
|
| 385 |
+
inputs=[
|
| 386 |
+
title_pr,
|
| 387 |
+
authors_pr,
|
| 388 |
+
arxiv_id_pr,
|
| 389 |
+
project_page_pr,
|
| 390 |
+
github_pr,
|
| 391 |
+
spaces_pr,
|
| 392 |
+
models_pr,
|
| 393 |
+
datasets_pr,
|
| 394 |
+
original_data,
|
| 395 |
+
],
|
| 396 |
+
outputs=pr_url,
|
| 397 |
+
)
|
| 398 |
+
|
| 399 |
+
demo.load(fn=render_open_pr_page, outputs=open_pr_col)
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
if __name__ == "__main__":
|
| 403 |
+
demo.queue(api_open=False).launch(show_api=False)
|
pyproject.toml
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "iclr2025"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = ""
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.10"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"datasets>=3.5.0",
|
| 9 |
+
"gradio[oauth]>=5.25.2",
|
| 10 |
+
"gradio-modal>=0.0.4",
|
| 11 |
+
"hf-transfer>=0.1.9",
|
| 12 |
+
"polars>=1.27.1",
|
| 13 |
+
]
|
| 14 |
+
|
| 15 |
+
[tool.ruff]
|
| 16 |
+
line-length = 119
|
| 17 |
+
|
| 18 |
+
[tool.ruff.lint]
|
| 19 |
+
select = ["ALL"]
|
| 20 |
+
ignore = [
|
| 21 |
+
"COM812", # missing-trailing-comma
|
| 22 |
+
"D203", # one-blank-line-before-class
|
| 23 |
+
"D213", # multi-line-summary-second-line
|
| 24 |
+
"E501", # line-too-long
|
| 25 |
+
"SIM117", # multiple-with-statements
|
| 26 |
+
#
|
| 27 |
+
"D100", # undocumented-public-module
|
| 28 |
+
"D101", # undocumented-public-class
|
| 29 |
+
"D102", # undocumented-public-method
|
| 30 |
+
"D103", # undocumented-public-function
|
| 31 |
+
"D104", # undocumented-public-package
|
| 32 |
+
"D105", # undocumented-magic-method
|
| 33 |
+
"D107", # undocumented-public-init
|
| 34 |
+
"EM101", # raw-string-in-exception
|
| 35 |
+
"FBT001", # boolean-type-hint-positional-argument
|
| 36 |
+
"FBT002", # boolean-default-value-positional-argument
|
| 37 |
+
"PD901", # pandas-df-variable-name
|
| 38 |
+
"PGH003", # blanket-type-ignore
|
| 39 |
+
"PLR0913", # too-many-arguments
|
| 40 |
+
"PLR0915", # too-many-statements
|
| 41 |
+
"TRY003", # raise-vanilla-args
|
| 42 |
+
]
|
| 43 |
+
unfixable = [
|
| 44 |
+
"F401", # unused-import
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
[tool.ruff.lint.pydocstyle]
|
| 48 |
+
convention = "google"
|
| 49 |
+
|
| 50 |
+
[tool.ruff.lint.per-file-ignores]
|
| 51 |
+
"*.ipynb" = ["T201", "T203"]
|
| 52 |
+
|
| 53 |
+
[tool.ruff.format]
|
| 54 |
+
docstring-code-format = true
|
requirements.txt
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file was autogenerated by uv via the following command:
|
| 2 |
+
# uv pip compile pyproject.toml -o requirements.txt
|
| 3 |
+
aiofiles==24.1.0
|
| 4 |
+
# via gradio
|
| 5 |
+
aiohappyeyeballs==2.6.1
|
| 6 |
+
# via aiohttp
|
| 7 |
+
aiohttp==3.11.16
|
| 8 |
+
# via
|
| 9 |
+
# datasets
|
| 10 |
+
# fsspec
|
| 11 |
+
aiosignal==1.3.2
|
| 12 |
+
# via aiohttp
|
| 13 |
+
annotated-types==0.7.0
|
| 14 |
+
# via pydantic
|
| 15 |
+
anyio==4.9.0
|
| 16 |
+
# via
|
| 17 |
+
# gradio
|
| 18 |
+
# httpx
|
| 19 |
+
# starlette
|
| 20 |
+
async-timeout==5.0.1
|
| 21 |
+
# via aiohttp
|
| 22 |
+
attrs==25.3.0
|
| 23 |
+
# via aiohttp
|
| 24 |
+
authlib==1.5.2
|
| 25 |
+
# via gradio
|
| 26 |
+
certifi==2025.1.31
|
| 27 |
+
# via
|
| 28 |
+
# httpcore
|
| 29 |
+
# httpx
|
| 30 |
+
# requests
|
| 31 |
+
cffi==1.17.1
|
| 32 |
+
# via cryptography
|
| 33 |
+
charset-normalizer==3.4.1
|
| 34 |
+
# via requests
|
| 35 |
+
click==8.1.8
|
| 36 |
+
# via
|
| 37 |
+
# typer
|
| 38 |
+
# uvicorn
|
| 39 |
+
cryptography==44.0.2
|
| 40 |
+
# via authlib
|
| 41 |
+
datasets==3.5.0
|
| 42 |
+
# via iclr2025 (pyproject.toml)
|
| 43 |
+
dill==0.3.8
|
| 44 |
+
# via
|
| 45 |
+
# datasets
|
| 46 |
+
# multiprocess
|
| 47 |
+
exceptiongroup==1.2.2
|
| 48 |
+
# via anyio
|
| 49 |
+
fastapi==0.115.12
|
| 50 |
+
# via gradio
|
| 51 |
+
ffmpy==0.5.0
|
| 52 |
+
# via gradio
|
| 53 |
+
filelock==3.18.0
|
| 54 |
+
# via
|
| 55 |
+
# datasets
|
| 56 |
+
# huggingface-hub
|
| 57 |
+
frozenlist==1.5.0
|
| 58 |
+
# via
|
| 59 |
+
# aiohttp
|
| 60 |
+
# aiosignal
|
| 61 |
+
fsspec==2024.12.0
|
| 62 |
+
# via
|
| 63 |
+
# datasets
|
| 64 |
+
# gradio-client
|
| 65 |
+
# huggingface-hub
|
| 66 |
+
gradio==5.25.2
|
| 67 |
+
# via
|
| 68 |
+
# iclr2025 (pyproject.toml)
|
| 69 |
+
# gradio-modal
|
| 70 |
+
gradio-client==1.8.0
|
| 71 |
+
# via gradio
|
| 72 |
+
gradio-modal==0.0.4
|
| 73 |
+
# via iclr2025 (pyproject.toml)
|
| 74 |
+
groovy==0.1.2
|
| 75 |
+
# via gradio
|
| 76 |
+
h11==0.14.0
|
| 77 |
+
# via
|
| 78 |
+
# httpcore
|
| 79 |
+
# uvicorn
|
| 80 |
+
hf-transfer==0.1.9
|
| 81 |
+
# via iclr2025 (pyproject.toml)
|
| 82 |
+
httpcore==1.0.8
|
| 83 |
+
# via httpx
|
| 84 |
+
httpx==0.28.1
|
| 85 |
+
# via
|
| 86 |
+
# gradio
|
| 87 |
+
# gradio-client
|
| 88 |
+
# safehttpx
|
| 89 |
+
huggingface-hub==0.30.2
|
| 90 |
+
# via
|
| 91 |
+
# datasets
|
| 92 |
+
# gradio
|
| 93 |
+
# gradio-client
|
| 94 |
+
idna==3.10
|
| 95 |
+
# via
|
| 96 |
+
# anyio
|
| 97 |
+
# httpx
|
| 98 |
+
# requests
|
| 99 |
+
# yarl
|
| 100 |
+
itsdangerous==2.2.0
|
| 101 |
+
# via gradio
|
| 102 |
+
jinja2==3.1.6
|
| 103 |
+
# via gradio
|
| 104 |
+
markdown-it-py==3.0.0
|
| 105 |
+
# via rich
|
| 106 |
+
markupsafe==3.0.2
|
| 107 |
+
# via
|
| 108 |
+
# gradio
|
| 109 |
+
# jinja2
|
| 110 |
+
mdurl==0.1.2
|
| 111 |
+
# via markdown-it-py
|
| 112 |
+
multidict==6.4.3
|
| 113 |
+
# via
|
| 114 |
+
# aiohttp
|
| 115 |
+
# yarl
|
| 116 |
+
multiprocess==0.70.16
|
| 117 |
+
# via datasets
|
| 118 |
+
numpy==2.2.4
|
| 119 |
+
# via
|
| 120 |
+
# datasets
|
| 121 |
+
# gradio
|
| 122 |
+
# pandas
|
| 123 |
+
orjson==3.10.16
|
| 124 |
+
# via gradio
|
| 125 |
+
packaging==24.2
|
| 126 |
+
# via
|
| 127 |
+
# datasets
|
| 128 |
+
# gradio
|
| 129 |
+
# gradio-client
|
| 130 |
+
# huggingface-hub
|
| 131 |
+
pandas==2.2.3
|
| 132 |
+
# via
|
| 133 |
+
# datasets
|
| 134 |
+
# gradio
|
| 135 |
+
pillow==11.2.1
|
| 136 |
+
# via gradio
|
| 137 |
+
polars==1.27.1
|
| 138 |
+
# via iclr2025 (pyproject.toml)
|
| 139 |
+
propcache==0.3.1
|
| 140 |
+
# via
|
| 141 |
+
# aiohttp
|
| 142 |
+
# yarl
|
| 143 |
+
pyarrow==19.0.1
|
| 144 |
+
# via datasets
|
| 145 |
+
pycparser==2.22
|
| 146 |
+
# via cffi
|
| 147 |
+
pydantic==2.11.3
|
| 148 |
+
# via
|
| 149 |
+
# fastapi
|
| 150 |
+
# gradio
|
| 151 |
+
pydantic-core==2.33.1
|
| 152 |
+
# via pydantic
|
| 153 |
+
pydub==0.25.1
|
| 154 |
+
# via gradio
|
| 155 |
+
pygments==2.19.1
|
| 156 |
+
# via rich
|
| 157 |
+
python-dateutil==2.9.0.post0
|
| 158 |
+
# via pandas
|
| 159 |
+
python-multipart==0.0.20
|
| 160 |
+
# via gradio
|
| 161 |
+
pytz==2025.2
|
| 162 |
+
# via pandas
|
| 163 |
+
pyyaml==6.0.2
|
| 164 |
+
# via
|
| 165 |
+
# datasets
|
| 166 |
+
# gradio
|
| 167 |
+
# huggingface-hub
|
| 168 |
+
requests==2.32.3
|
| 169 |
+
# via
|
| 170 |
+
# datasets
|
| 171 |
+
# huggingface-hub
|
| 172 |
+
rich==14.0.0
|
| 173 |
+
# via typer
|
| 174 |
+
ruff==0.11.5
|
| 175 |
+
# via gradio
|
| 176 |
+
safehttpx==0.1.6
|
| 177 |
+
# via gradio
|
| 178 |
+
semantic-version==2.10.0
|
| 179 |
+
# via gradio
|
| 180 |
+
shellingham==1.5.4
|
| 181 |
+
# via typer
|
| 182 |
+
six==1.17.0
|
| 183 |
+
# via python-dateutil
|
| 184 |
+
sniffio==1.3.1
|
| 185 |
+
# via anyio
|
| 186 |
+
starlette==0.46.2
|
| 187 |
+
# via
|
| 188 |
+
# fastapi
|
| 189 |
+
# gradio
|
| 190 |
+
tomlkit==0.13.2
|
| 191 |
+
# via gradio
|
| 192 |
+
tqdm==4.67.1
|
| 193 |
+
# via
|
| 194 |
+
# datasets
|
| 195 |
+
# huggingface-hub
|
| 196 |
+
typer==0.15.2
|
| 197 |
+
# via gradio
|
| 198 |
+
typing-extensions==4.13.2
|
| 199 |
+
# via
|
| 200 |
+
# anyio
|
| 201 |
+
# fastapi
|
| 202 |
+
# gradio
|
| 203 |
+
# gradio-client
|
| 204 |
+
# huggingface-hub
|
| 205 |
+
# multidict
|
| 206 |
+
# pydantic
|
| 207 |
+
# pydantic-core
|
| 208 |
+
# rich
|
| 209 |
+
# typer
|
| 210 |
+
# typing-inspection
|
| 211 |
+
# uvicorn
|
| 212 |
+
typing-inspection==0.4.0
|
| 213 |
+
# via pydantic
|
| 214 |
+
tzdata==2025.2
|
| 215 |
+
# via pandas
|
| 216 |
+
urllib3==2.4.0
|
| 217 |
+
# via requests
|
| 218 |
+
uvicorn==0.34.1
|
| 219 |
+
# via gradio
|
| 220 |
+
websockets==15.0.1
|
| 221 |
+
# via gradio-client
|
| 222 |
+
xxhash==3.5.0
|
| 223 |
+
# via datasets
|
| 224 |
+
yarl==1.19.0
|
| 225 |
+
# via aiohttp
|
style.css
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
h1 {
|
| 2 |
+
text-align: center;
|
| 3 |
+
display: block;
|
| 4 |
+
}
|
table.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import datasets
|
| 2 |
+
import polars as pl
|
| 3 |
+
|
| 4 |
+
BASE_REPO_ID = "ai-conferences/ICLR2025"
|
| 5 |
+
PATCH_REPO_ID = "ai-conferences/ICLR2025-patches"
|
| 6 |
+
PATCH_REPO_PR_BRANCH = "raw-jsons"
|
| 7 |
+
PAPER_PAGE_REPO_ID = "hysts-bot-data/paper-pages-slim"
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def get_patch_latest_values(
|
| 11 |
+
df: pl.DataFrame, all_columns: list[str], id_col: str, timestamp_col: str = "timestamp"
|
| 12 |
+
) -> pl.DataFrame:
|
| 13 |
+
df = df.sort(timestamp_col)
|
| 14 |
+
update_columns = [col for col in df.columns if col not in (id_col, timestamp_col)]
|
| 15 |
+
|
| 16 |
+
melted = df.unpivot(on=update_columns, index=[timestamp_col, id_col])
|
| 17 |
+
melted = melted.drop_nulls()
|
| 18 |
+
|
| 19 |
+
latest_rows = (
|
| 20 |
+
melted.sort(timestamp_col)
|
| 21 |
+
.group_by([id_col, "variable"])
|
| 22 |
+
.agg(pl.col("value").last())
|
| 23 |
+
.pivot("variable", index=id_col, values="value")
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
for col in all_columns:
|
| 27 |
+
if col != "id" and col not in latest_rows.columns:
|
| 28 |
+
latest_rows = latest_rows.with_columns(pl.lit(None).alias(col))
|
| 29 |
+
|
| 30 |
+
return latest_rows.select([id_col] + [col for col in all_columns if col != id_col])
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def format_author_claim_ratio(row: dict) -> str:
|
| 34 |
+
n_linked_authors = row["n_linked_authors"]
|
| 35 |
+
n_authors = row["n_authors"]
|
| 36 |
+
|
| 37 |
+
if n_linked_authors is None or n_authors is None:
|
| 38 |
+
return ""
|
| 39 |
+
|
| 40 |
+
author_linked = "✅" if n_linked_authors > 0 else ""
|
| 41 |
+
return f"{n_linked_authors}/{n_authors} {author_linked}".strip()
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
df_orig = (
|
| 45 |
+
datasets.load_dataset(BASE_REPO_ID, split="train")
|
| 46 |
+
.to_polars()
|
| 47 |
+
.rename({"paper_url": "openreview", "submission_number": "paper_id"})
|
| 48 |
+
.with_columns(
|
| 49 |
+
pl.lit([], dtype=pl.List(pl.Utf8)).alias(col_name) for col_name in ["space_ids", "model_ids", "dataset_ids"]
|
| 50 |
+
)
|
| 51 |
+
)
|
| 52 |
+
df_paper_page = (
|
| 53 |
+
datasets.load_dataset(PAPER_PAGE_REPO_ID, split="train")
|
| 54 |
+
.to_polars()
|
| 55 |
+
.drop(["summary", "author_names", "ai_keywords"])
|
| 56 |
+
)
|
| 57 |
+
df_orig = df_orig.join(df_paper_page, on="arxiv_id", how="left")
|
| 58 |
+
|
| 59 |
+
df_patches = datasets.load_dataset(PATCH_REPO_ID, revision="main", split="train").to_polars().drop("diff")
|
| 60 |
+
df_patches = get_patch_latest_values(df_patches, df_orig.columns, id_col="paper_id", timestamp_col="timestamp")
|
| 61 |
+
df_orig = (
|
| 62 |
+
df_orig.join(df_patches, on="paper_id", how="left")
|
| 63 |
+
.with_columns(
|
| 64 |
+
[pl.coalesce([pl.col(col + "_right"), pl.col(col)]).alias(col) for col in df_orig.columns if col != "paper_id"]
|
| 65 |
+
)
|
| 66 |
+
.select(df_orig.columns)
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
# format authors
|
| 70 |
+
df_orig = df_orig.with_columns(pl.col("authors").list.join(", ").alias("authors_str"))
|
| 71 |
+
# format links
|
| 72 |
+
df_orig = df_orig.with_columns(
|
| 73 |
+
[
|
| 74 |
+
pl.format("[link]({})", pl.col(col)).fill_null("").alias(f"{col}_md")
|
| 75 |
+
for col in ["openreview", "project_page", "github"]
|
| 76 |
+
]
|
| 77 |
+
)
|
| 78 |
+
# format paper page link
|
| 79 |
+
df_orig = df_orig.with_columns(
|
| 80 |
+
(pl.lit("https://huggingface.co/papers/") + pl.col("arxiv_id")).alias("paper_page")
|
| 81 |
+
).with_columns(pl.format("[{}]({})", pl.col("arxiv_id"), pl.col("paper_page")).fill_null("").alias("paper_page_md"))
|
| 82 |
+
|
| 83 |
+
# count authors
|
| 84 |
+
df_orig = df_orig.with_columns(pl.col("authors").list.len().alias("n_authors"))
|
| 85 |
+
df_orig = df_orig.with_columns(
|
| 86 |
+
pl.col("author_usernames")
|
| 87 |
+
.map_elements(lambda lst: sum(x is not None for x in lst) if lst is not None else None, return_dtype=pl.Int64)
|
| 88 |
+
.alias("n_linked_authors")
|
| 89 |
+
)
|
| 90 |
+
df_orig = df_orig.with_columns(
|
| 91 |
+
pl.struct(["n_linked_authors", "n_authors"])
|
| 92 |
+
.map_elements(format_author_claim_ratio, return_dtype=pl.Utf8)
|
| 93 |
+
.alias("claimed")
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
# TODO: Fix this once https://github.com/gradio-app/gradio/issues/10916 is fixed # noqa: FIX002, TD002
|
| 97 |
+
# format numbers as strings
|
| 98 |
+
df_orig = df_orig.with_columns(
|
| 99 |
+
[pl.col(col).cast(pl.Utf8).fill_null("").alias(col) for col in ["upvotes", "num_comments"]]
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
# format spaces, models, datasets
|
| 103 |
+
for repo_id_col, markdown_col, base_url in [
|
| 104 |
+
("space_ids", "Spaces", "https://huggingface.co/spaces/"),
|
| 105 |
+
("model_ids", "Models", "https://huggingface.co/"),
|
| 106 |
+
("dataset_ids", "Datasets", "https://huggingface.co/datasets/"),
|
| 107 |
+
]:
|
| 108 |
+
df_orig = df_orig.with_columns(
|
| 109 |
+
pl.col(repo_id_col)
|
| 110 |
+
.map_elements(
|
| 111 |
+
lambda lst: "\n".join([f"[link]({base_url}{x})" for x in lst]) if lst is not None else None, # noqa: B023
|
| 112 |
+
return_dtype=pl.Utf8,
|
| 113 |
+
)
|
| 114 |
+
.fill_null("")
|
| 115 |
+
.alias(markdown_col)
|
| 116 |
+
)
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|