Spaces:
Runtime error
Runtime error
Migrate from yapf to black
Browse files- .pre-commit-config.yaml +26 -12
- .style.yapf +0 -5
- .vscode/settings.json +11 -8
- app.py +29 -38
- scheduler.py +20 -21
.pre-commit-config.yaml
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
repos:
|
2 |
- repo: https://github.com/pre-commit/pre-commit-hooks
|
3 |
-
rev: v4.
|
4 |
hooks:
|
5 |
- id: check-executables-have-shebangs
|
6 |
- id: check-json
|
@@ -8,29 +8,43 @@ repos:
|
|
8 |
- id: check-shebang-scripts-are-executable
|
9 |
- id: check-toml
|
10 |
- id: check-yaml
|
11 |
-
- id: double-quote-string-fixer
|
12 |
- id: end-of-file-fixer
|
13 |
- id: mixed-line-ending
|
14 |
-
args: [
|
15 |
- id: requirements-txt-fixer
|
16 |
- id: trailing-whitespace
|
17 |
- repo: https://github.com/myint/docformatter
|
18 |
-
rev: v1.
|
19 |
hooks:
|
20 |
- id: docformatter
|
21 |
-
args: [
|
22 |
- repo: https://github.com/pycqa/isort
|
23 |
rev: 5.12.0
|
24 |
hooks:
|
25 |
- id: isort
|
|
|
26 |
- repo: https://github.com/pre-commit/mirrors-mypy
|
27 |
-
rev:
|
28 |
hooks:
|
29 |
- id: mypy
|
30 |
-
args: [
|
31 |
-
additional_dependencies: [
|
32 |
-
- repo: https://github.com/
|
33 |
-
rev:
|
34 |
hooks:
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
repos:
|
2 |
- repo: https://github.com/pre-commit/pre-commit-hooks
|
3 |
+
rev: v4.4.0
|
4 |
hooks:
|
5 |
- id: check-executables-have-shebangs
|
6 |
- id: check-json
|
|
|
8 |
- id: check-shebang-scripts-are-executable
|
9 |
- id: check-toml
|
10 |
- id: check-yaml
|
|
|
11 |
- id: end-of-file-fixer
|
12 |
- id: mixed-line-ending
|
13 |
+
args: ["--fix=lf"]
|
14 |
- id: requirements-txt-fixer
|
15 |
- id: trailing-whitespace
|
16 |
- repo: https://github.com/myint/docformatter
|
17 |
+
rev: v1.7.5
|
18 |
hooks:
|
19 |
- id: docformatter
|
20 |
+
args: ["--in-place"]
|
21 |
- repo: https://github.com/pycqa/isort
|
22 |
rev: 5.12.0
|
23 |
hooks:
|
24 |
- id: isort
|
25 |
+
args: ["--profile", "black"]
|
26 |
- repo: https://github.com/pre-commit/mirrors-mypy
|
27 |
+
rev: v1.5.1
|
28 |
hooks:
|
29 |
- id: mypy
|
30 |
+
args: ["--ignore-missing-imports"]
|
31 |
+
additional_dependencies: ["types-python-slugify", "types-requests", "types-PyYAML"]
|
32 |
+
- repo: https://github.com/psf/black
|
33 |
+
rev: 23.7.0
|
34 |
hooks:
|
35 |
+
- id: black
|
36 |
+
language_version: python3.10
|
37 |
+
args: ["--line-length", "119"]
|
38 |
+
- repo: https://github.com/kynan/nbstripout
|
39 |
+
rev: 0.6.1
|
40 |
+
hooks:
|
41 |
+
- id: nbstripout
|
42 |
+
args: ["--extra-keys", "metadata.interpreter metadata.kernelspec cell.metadata.pycharm"]
|
43 |
+
- repo: https://github.com/nbQA-dev/nbQA
|
44 |
+
rev: 1.7.0
|
45 |
+
hooks:
|
46 |
+
- id: nbqa-black
|
47 |
+
- id: nbqa-pyupgrade
|
48 |
+
args: ["--py37-plus"]
|
49 |
+
- id: nbqa-isort
|
50 |
+
args: ["--float-to-top"]
|
.style.yapf
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
[style]
|
2 |
-
based_on_style = pep8
|
3 |
-
blank_line_before_nested_class_or_def = false
|
4 |
-
spaces_before_comment = 2
|
5 |
-
split_before_logical_operator = true
|
|
|
|
|
|
|
|
|
|
|
|
.vscode/settings.json
CHANGED
@@ -1,18 +1,21 @@
|
|
1 |
{
|
2 |
-
"python.linting.enabled": true,
|
3 |
-
"python.linting.flake8Enabled": true,
|
4 |
-
"python.linting.pylintEnabled": false,
|
5 |
-
"python.linting.lintOnSave": true,
|
6 |
-
"python.formatting.provider": "yapf",
|
7 |
-
"python.formatting.yapfArgs": [
|
8 |
-
"--style={based_on_style: pep8, indent_width: 4, blank_line_before_nested_class_or_def: false, spaces_before_comment: 2, split_before_logical_operator: true}"
|
9 |
-
],
|
10 |
"[python]": {
|
|
|
11 |
"editor.formatOnType": true,
|
12 |
"editor.codeActionsOnSave": {
|
13 |
"source.organizeImports": true
|
14 |
}
|
15 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
"editor.formatOnSave": true,
|
17 |
"files.insertFinalNewline": true
|
18 |
}
|
|
|
1 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
"[python]": {
|
3 |
+
"editor.defaultFormatter": "ms-python.black-formatter",
|
4 |
"editor.formatOnType": true,
|
5 |
"editor.codeActionsOnSave": {
|
6 |
"source.organizeImports": true
|
7 |
}
|
8 |
},
|
9 |
+
"black-formatter.args": [
|
10 |
+
"--line-length=119"
|
11 |
+
],
|
12 |
+
"isort.args": ["--profile", "black"],
|
13 |
+
"flake8.args": [
|
14 |
+
"--max-line-length=119"
|
15 |
+
],
|
16 |
+
"ruff.args": [
|
17 |
+
"--line-length=119"
|
18 |
+
],
|
19 |
"editor.formatOnSave": true,
|
20 |
"files.insertFinalNewline": true
|
21 |
}
|
app.py
CHANGED
@@ -12,12 +12,12 @@ from gradio_client import Client
|
|
12 |
|
13 |
from scheduler import ParquetScheduler
|
14 |
|
15 |
-
HF_TOKEN = os.environ[
|
16 |
-
UPLOAD_REPO_ID = os.environ[
|
17 |
-
UPLOAD_FREQUENCY = int(os.getenv(
|
18 |
-
USE_PUBLIC_REPO = os.getenv(
|
19 |
|
20 |
-
ABOUT_THIS_SPACE =
|
21 |
This Space is a sample Space that collects user preferences for the results generated by a diffusion model.
|
22 |
This demo calls the [stable diffusion Space](https://huggingface.co/spaces/stabilityai/stable-diffusion) with the [`gradio_client`](https://pypi.org/project/gradio-client/) library.
|
23 |
|
@@ -25,34 +25,29 @@ The user preference data is periodically archived in parquet format and uploaded
|
|
25 |
|
26 |
The periodic upload is done using [`huggingface_hub.CommitScheduler`](https://huggingface.co/docs/huggingface_hub/main/en/package_reference/hf_api#huggingface_hub.CommitScheduler).
|
27 |
See [this Space](https://huggingface.co/spaces/Wauplin/space_to_dataset_saver) for more general usage.
|
28 |
-
|
29 |
|
30 |
-
scheduler = ParquetScheduler(
|
31 |
-
|
32 |
-
|
33 |
-
token=HF_TOKEN)
|
34 |
|
35 |
-
client = Client(
|
36 |
|
37 |
|
38 |
def generate(prompt: str) -> tuple[str, list[str]]:
|
39 |
-
negative_prompt =
|
40 |
guidance_scale = 9.0
|
41 |
-
out_dir = client.predict(prompt,
|
42 |
-
negative_prompt,
|
43 |
-
guidance_scale,
|
44 |
-
fn_index=1)
|
45 |
|
46 |
config = {
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
}
|
51 |
-
with tempfile.NamedTemporaryFile(mode=
|
52 |
-
delete=False) as config_file:
|
53 |
json.dump(config, config_file)
|
54 |
|
55 |
-
with (pathlib.Path(out_dir) /
|
56 |
paths = list(json.load(f).keys())
|
57 |
return config_file.name, paths
|
58 |
|
@@ -61,19 +56,18 @@ def get_selected_index(evt: gr.SelectData) -> int:
|
|
61 |
return evt.index
|
62 |
|
63 |
|
64 |
-
def save_preference(config_path: str, gallery: list[dict[str, Any]],
|
65 |
-
selected_index: int) -> None:
|
66 |
# Load config
|
67 |
with open(config_path) as f:
|
68 |
data = json.load(f)
|
69 |
|
70 |
# Add selected item + timestamp
|
71 |
-
data[
|
72 |
-
data[
|
73 |
|
74 |
# Add images
|
75 |
-
for index, path in enumerate(x[
|
76 |
-
data[f
|
77 |
|
78 |
# Send to scheduler
|
79 |
scheduler.append(data)
|
@@ -91,21 +85,18 @@ def clear() -> tuple[dict, dict, dict]:
|
|
91 |
)
|
92 |
|
93 |
|
94 |
-
with gr.Blocks(css=
|
95 |
with gr.Group():
|
96 |
-
prompt = gr.Text(show_label=False, placeholder=
|
97 |
-
gallery = gr.Gallery(
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
object_fit='scale-down',
|
102 |
-
allow_preview=False)
|
103 |
-
save_preference_button = gr.Button('Save preference', interactive=False)
|
104 |
|
105 |
config_path = gr.Text(visible=False)
|
106 |
selected_index = gr.Number(visible=False, precision=0, value=-1)
|
107 |
|
108 |
-
with gr.Accordion(label=
|
109 |
gr.Markdown(ABOUT_THIS_SPACE)
|
110 |
|
111 |
prompt.submit(
|
|
|
12 |
|
13 |
from scheduler import ParquetScheduler
|
14 |
|
15 |
+
HF_TOKEN = os.environ["HF_TOKEN"]
|
16 |
+
UPLOAD_REPO_ID = os.environ["UPLOAD_REPO_ID"]
|
17 |
+
UPLOAD_FREQUENCY = int(os.getenv("UPLOAD_FREQUENCY", "15"))
|
18 |
+
USE_PUBLIC_REPO = os.getenv("USE_PUBLIC_REPO") == "1"
|
19 |
|
20 |
+
ABOUT_THIS_SPACE = """
|
21 |
This Space is a sample Space that collects user preferences for the results generated by a diffusion model.
|
22 |
This demo calls the [stable diffusion Space](https://huggingface.co/spaces/stabilityai/stable-diffusion) with the [`gradio_client`](https://pypi.org/project/gradio-client/) library.
|
23 |
|
|
|
25 |
|
26 |
The periodic upload is done using [`huggingface_hub.CommitScheduler`](https://huggingface.co/docs/huggingface_hub/main/en/package_reference/hf_api#huggingface_hub.CommitScheduler).
|
27 |
See [this Space](https://huggingface.co/spaces/Wauplin/space_to_dataset_saver) for more general usage.
|
28 |
+
"""
|
29 |
|
30 |
+
scheduler = ParquetScheduler(
|
31 |
+
repo_id=UPLOAD_REPO_ID, every=UPLOAD_FREQUENCY, private=not USE_PUBLIC_REPO, token=HF_TOKEN
|
32 |
+
)
|
|
|
33 |
|
34 |
+
client = Client("stabilityai/stable-diffusion")
|
35 |
|
36 |
|
37 |
def generate(prompt: str) -> tuple[str, list[str]]:
|
38 |
+
negative_prompt = ""
|
39 |
guidance_scale = 9.0
|
40 |
+
out_dir = client.predict(prompt, negative_prompt, guidance_scale, fn_index=1)
|
|
|
|
|
|
|
41 |
|
42 |
config = {
|
43 |
+
"prompt": prompt,
|
44 |
+
"negative_prompt": negative_prompt,
|
45 |
+
"guidance_scale": guidance_scale,
|
46 |
}
|
47 |
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as config_file:
|
|
|
48 |
json.dump(config, config_file)
|
49 |
|
50 |
+
with (pathlib.Path(out_dir) / "captions.json").open() as f:
|
51 |
paths = list(json.load(f).keys())
|
52 |
return config_file.name, paths
|
53 |
|
|
|
56 |
return evt.index
|
57 |
|
58 |
|
59 |
+
def save_preference(config_path: str, gallery: list[dict[str, Any]], selected_index: int) -> None:
|
|
|
60 |
# Load config
|
61 |
with open(config_path) as f:
|
62 |
data = json.load(f)
|
63 |
|
64 |
# Add selected item + timestamp
|
65 |
+
data["selected_index"] = selected_index
|
66 |
+
data["timestamp"] = datetime.datetime.utcnow().isoformat()
|
67 |
|
68 |
# Add images
|
69 |
+
for index, path in enumerate(x["name"] for x in gallery):
|
70 |
+
data[f"image_{index:03d}"] = path
|
71 |
|
72 |
# Send to scheduler
|
73 |
scheduler.append(data)
|
|
|
85 |
)
|
86 |
|
87 |
|
88 |
+
with gr.Blocks(css="style.css") as demo:
|
89 |
with gr.Group():
|
90 |
+
prompt = gr.Text(show_label=False, placeholder="Prompt")
|
91 |
+
gallery = gr.Gallery(
|
92 |
+
show_label=False, columns=2, rows=2, height="600px", object_fit="scale-down", allow_preview=False
|
93 |
+
)
|
94 |
+
save_preference_button = gr.Button("Save preference", interactive=False)
|
|
|
|
|
|
|
95 |
|
96 |
config_path = gr.Text(visible=False)
|
97 |
selected_index = gr.Number(visible=False, precision=0, value=-1)
|
98 |
|
99 |
+
with gr.Accordion(label="About this Space", open=False):
|
100 |
gr.Markdown(ABOUT_THIS_SPACE)
|
101 |
|
102 |
prompt.submit(
|
scheduler.py
CHANGED
@@ -41,6 +41,7 @@ class ParquetScheduler(CommitScheduler):
|
|
41 |
See https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Value for the list of
|
42 |
possible values.
|
43 |
"""
|
|
|
44 |
def __init__(
|
45 |
self,
|
46 |
*,
|
@@ -58,7 +59,7 @@ class ParquetScheduler(CommitScheduler):
|
|
58 |
repo_id=repo_id,
|
59 |
folder_path=tempfile.tempdir, # not used by the scheduler
|
60 |
every=every,
|
61 |
-
repo_type=
|
62 |
revision=revision,
|
63 |
private=private,
|
64 |
token=token,
|
@@ -82,7 +83,7 @@ class ParquetScheduler(CommitScheduler):
|
|
82 |
self._rows = []
|
83 |
if not rows:
|
84 |
return
|
85 |
-
print(f
|
86 |
|
87 |
# Load images + create 'features' config for datasets library
|
88 |
schema: Dict[str, Dict] = self._schema or {}
|
@@ -94,13 +95,13 @@ class ParquetScheduler(CommitScheduler):
|
|
94 |
schema[key] = _infer_schema(key, value)
|
95 |
|
96 |
# Load binary files if necessary
|
97 |
-
if schema[key][
|
98 |
# It's an image or audio: we load the bytes and remember to cleanup the file
|
99 |
file_path = Path(value)
|
100 |
if file_path.is_file():
|
101 |
row[key] = {
|
102 |
-
|
103 |
-
|
104 |
}
|
105 |
path_to_cleanup.append(file_path)
|
106 |
|
@@ -114,10 +115,7 @@ class ParquetScheduler(CommitScheduler):
|
|
114 |
table = pa.Table.from_pylist(rows)
|
115 |
|
116 |
# Add metadata (used by datasets library)
|
117 |
-
table = table.replace_schema_metadata(
|
118 |
-
{'huggingface': json.dumps({'info': {
|
119 |
-
'features': schema
|
120 |
-
}})})
|
121 |
|
122 |
# Write to parquet file
|
123 |
archive_file = tempfile.NamedTemporaryFile()
|
@@ -128,10 +126,10 @@ class ParquetScheduler(CommitScheduler):
|
|
128 |
repo_id=self.repo_id,
|
129 |
repo_type=self.repo_type,
|
130 |
revision=self.revision,
|
131 |
-
path_in_repo=f
|
132 |
path_or_fileobj=archive_file.name,
|
133 |
)
|
134 |
-
print(
|
135 |
|
136 |
# Cleanup
|
137 |
archive_file.close()
|
@@ -142,19 +140,20 @@ class ParquetScheduler(CommitScheduler):
|
|
142 |
def _infer_schema(key: str, value: Any) -> Dict[str, str]:
|
143 |
"""Infer schema for the `datasets` library.
|
144 |
|
145 |
-
See
|
|
|
146 |
"""
|
147 |
-
if
|
148 |
-
return {
|
149 |
-
if
|
150 |
-
return {
|
151 |
if isinstance(value, int):
|
152 |
-
return {
|
153 |
if isinstance(value, float):
|
154 |
-
return {
|
155 |
if isinstance(value, bool):
|
156 |
-
return {
|
157 |
if isinstance(value, bytes):
|
158 |
-
return {
|
159 |
# Otherwise in last resort => convert it to a string
|
160 |
-
return {
|
|
|
41 |
See https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Value for the list of
|
42 |
possible values.
|
43 |
"""
|
44 |
+
|
45 |
def __init__(
|
46 |
self,
|
47 |
*,
|
|
|
59 |
repo_id=repo_id,
|
60 |
folder_path=tempfile.tempdir, # not used by the scheduler
|
61 |
every=every,
|
62 |
+
repo_type="dataset",
|
63 |
revision=revision,
|
64 |
private=private,
|
65 |
token=token,
|
|
|
83 |
self._rows = []
|
84 |
if not rows:
|
85 |
return
|
86 |
+
print(f"Got {len(rows)} item(s) to commit.")
|
87 |
|
88 |
# Load images + create 'features' config for datasets library
|
89 |
schema: Dict[str, Dict] = self._schema or {}
|
|
|
95 |
schema[key] = _infer_schema(key, value)
|
96 |
|
97 |
# Load binary files if necessary
|
98 |
+
if schema[key]["_type"] in ("Image", "Audio"):
|
99 |
# It's an image or audio: we load the bytes and remember to cleanup the file
|
100 |
file_path = Path(value)
|
101 |
if file_path.is_file():
|
102 |
row[key] = {
|
103 |
+
"path": file_path.name,
|
104 |
+
"bytes": file_path.read_bytes(),
|
105 |
}
|
106 |
path_to_cleanup.append(file_path)
|
107 |
|
|
|
115 |
table = pa.Table.from_pylist(rows)
|
116 |
|
117 |
# Add metadata (used by datasets library)
|
118 |
+
table = table.replace_schema_metadata({"huggingface": json.dumps({"info": {"features": schema}})})
|
|
|
|
|
|
|
119 |
|
120 |
# Write to parquet file
|
121 |
archive_file = tempfile.NamedTemporaryFile()
|
|
|
126 |
repo_id=self.repo_id,
|
127 |
repo_type=self.repo_type,
|
128 |
revision=self.revision,
|
129 |
+
path_in_repo=f"{uuid.uuid4()}.parquet",
|
130 |
path_or_fileobj=archive_file.name,
|
131 |
)
|
132 |
+
print("Commit completed.")
|
133 |
|
134 |
# Cleanup
|
135 |
archive_file.close()
|
|
|
140 |
def _infer_schema(key: str, value: Any) -> Dict[str, str]:
|
141 |
"""Infer schema for the `datasets` library.
|
142 |
|
143 |
+
See
|
144 |
+
https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Value.
|
145 |
"""
|
146 |
+
if "image" in key:
|
147 |
+
return {"_type": "Image"}
|
148 |
+
if "audio" in key:
|
149 |
+
return {"_type": "Audio"}
|
150 |
if isinstance(value, int):
|
151 |
+
return {"_type": "Value", "dtype": "int64"}
|
152 |
if isinstance(value, float):
|
153 |
+
return {"_type": "Value", "dtype": "float64"}
|
154 |
if isinstance(value, bool):
|
155 |
+
return {"_type": "Value", "dtype": "bool"}
|
156 |
if isinstance(value, bytes):
|
157 |
+
return {"_type": "Value", "dtype": "binary"}
|
158 |
# Otherwise in last resort => convert it to a string
|
159 |
+
return {"_type": "Value", "dtype": "string"}
|