Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Add comments to generated config
Browse files- yourbench_space/config.py +19 -4
- yourbench_space/utils.py +17 -0
yourbench_space/config.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
-
import
|
2 |
from loguru import logger
|
3 |
|
4 |
from yourbench_space import PATH
|
|
|
5 |
|
6 |
|
7 |
def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
|
@@ -82,10 +83,24 @@ def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
|
|
82 |
}
|
83 |
|
84 |
|
85 |
-
def save_yaml_file(config:
|
86 |
-
"""Saves the given config dictionary to a YAML file"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
with open(path, "w") as file:
|
88 |
-
yaml.dump(
|
|
|
89 |
return path
|
90 |
|
91 |
|
|
|
1 |
+
from ruamel.yaml import YAML
|
2 |
from loguru import logger
|
3 |
|
4 |
from yourbench_space import PATH
|
5 |
+
from yourbench_space.utils import to_commentable_yaml
|
6 |
|
7 |
|
8 |
def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
|
|
|
83 |
}
|
84 |
|
85 |
|
86 |
+
def save_yaml_file(config: dict, path: str):
|
87 |
+
"""Saves the given config dictionary to a YAML file with helpful comments."""
|
88 |
+
yaml = YAML()
|
89 |
+
yaml.indent(mapping=2, sequence=4, offset=2)
|
90 |
+
|
91 |
+
config_cm = to_commentable_yaml(config)
|
92 |
+
|
93 |
+
# Now we can add inline comments
|
94 |
+
ingestion = config_cm["pipeline"]["ingestion"]
|
95 |
+
ingestion.yaml_set_comment_before_after_key("source_documents_dir", before="⚠️ Change this path to match your local directory")
|
96 |
+
ingestion.yaml_set_comment_before_after_key("output_dir", before="⚠️ This is where ingested data will be saved")
|
97 |
+
|
98 |
+
upload = config_cm["pipeline"]["upload_ingest_to_hub"]
|
99 |
+
upload.yaml_set_comment_before_after_key("source_documents_dir", before="⚠️ Same as output_dir from ingestion — adjust as needed")
|
100 |
+
|
101 |
with open(path, "w") as file:
|
102 |
+
yaml.dump(config_cm, file)
|
103 |
+
|
104 |
return path
|
105 |
|
106 |
|
yourbench_space/utils.py
CHANGED
@@ -5,6 +5,7 @@ import shutil
|
|
5 |
import pathlib
|
6 |
import subprocess
|
7 |
from typing import List, Union, Optional
|
|
|
8 |
|
9 |
import pandas as pd
|
10 |
from loguru import logger
|
@@ -34,6 +35,22 @@ STAGE_DISPLAY_MAP = {
|
|
34 |
"lighteval": "Generate Lighteval Subset",
|
35 |
}
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
def map_stage_names(stages: list[str]) -> list[str]:
|
39 |
return [STAGE_DISPLAY_MAP.get(stage, stage) for stage in stages]
|
|
|
5 |
import pathlib
|
6 |
import subprocess
|
7 |
from typing import List, Union, Optional
|
8 |
+
from ruamel.yaml.comments import CommentedMap, CommentedSeq
|
9 |
|
10 |
import pandas as pd
|
11 |
from loguru import logger
|
|
|
35 |
"lighteval": "Generate Lighteval Subset",
|
36 |
}
|
37 |
|
38 |
+
def to_commentable_yaml(obj):
|
39 |
+
"""
|
40 |
+
Recursively converts standard Python dicts and lists into
|
41 |
+
ruamel.yaml's CommentedMap and CommentedSeq so that comments
|
42 |
+
can be attached when dumping YAML
|
43 |
+
"""
|
44 |
+
# Convert dict to CommentedMap with recursively processed values
|
45 |
+
if isinstance(obj, dict):
|
46 |
+
return CommentedMap({k: to_commentable_yaml(v) for k, v in obj.items()})
|
47 |
+
|
48 |
+
# Convert list to CommentedSeq with recursively processed elements
|
49 |
+
elif isinstance(obj, list):
|
50 |
+
return CommentedSeq([to_commentable_yaml(i) for i in obj])
|
51 |
+
|
52 |
+
# Return non-container values as-is
|
53 |
+
return obj
|
54 |
|
55 |
def map_stage_names(stages: list[str]) -> list[str]:
|
56 |
return [STAGE_DISPLAY_MAP.get(stage, stage) for stage in stages]
|