hysts HF Staff commited on
Commit
e97aac1
·
1 Parent(s): 6673231

Migrate from yapf to black

Browse files
Files changed (5) hide show
  1. .pre-commit-config.yaml +26 -12
  2. .style.yapf +0 -5
  3. .vscode/settings.json +11 -8
  4. app.py +29 -38
  5. scheduler.py +20 -21
.pre-commit-config.yaml CHANGED
@@ -1,6 +1,6 @@
1
  repos:
2
  - repo: https://github.com/pre-commit/pre-commit-hooks
3
- rev: v4.2.0
4
  hooks:
5
  - id: check-executables-have-shebangs
6
  - id: check-json
@@ -8,29 +8,43 @@ repos:
8
  - id: check-shebang-scripts-are-executable
9
  - id: check-toml
10
  - id: check-yaml
11
- - id: double-quote-string-fixer
12
  - id: end-of-file-fixer
13
  - id: mixed-line-ending
14
- args: ['--fix=lf']
15
  - id: requirements-txt-fixer
16
  - id: trailing-whitespace
17
  - repo: https://github.com/myint/docformatter
18
- rev: v1.4
19
  hooks:
20
  - id: docformatter
21
- args: ['--in-place']
22
  - repo: https://github.com/pycqa/isort
23
  rev: 5.12.0
24
  hooks:
25
  - id: isort
 
26
  - repo: https://github.com/pre-commit/mirrors-mypy
27
- rev: v0.991
28
  hooks:
29
  - id: mypy
30
- args: ['--ignore-missing-imports']
31
- additional_dependencies: ['types-python-slugify']
32
- - repo: https://github.com/google/yapf
33
- rev: v0.32.0
34
  hooks:
35
- - id: yapf
36
- args: ['--parallel', '--in-place']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  repos:
2
  - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v4.4.0
4
  hooks:
5
  - id: check-executables-have-shebangs
6
  - id: check-json
 
8
  - id: check-shebang-scripts-are-executable
9
  - id: check-toml
10
  - id: check-yaml
 
11
  - id: end-of-file-fixer
12
  - id: mixed-line-ending
13
+ args: ["--fix=lf"]
14
  - id: requirements-txt-fixer
15
  - id: trailing-whitespace
16
  - repo: https://github.com/myint/docformatter
17
+ rev: v1.7.5
18
  hooks:
19
  - id: docformatter
20
+ args: ["--in-place"]
21
  - repo: https://github.com/pycqa/isort
22
  rev: 5.12.0
23
  hooks:
24
  - id: isort
25
+ args: ["--profile", "black"]
26
  - repo: https://github.com/pre-commit/mirrors-mypy
27
+ rev: v1.5.1
28
  hooks:
29
  - id: mypy
30
+ args: ["--ignore-missing-imports"]
31
+ additional_dependencies: ["types-python-slugify", "types-requests", "types-PyYAML"]
32
+ - repo: https://github.com/psf/black
33
+ rev: 23.7.0
34
  hooks:
35
+ - id: black
36
+ language_version: python3.10
37
+ args: ["--line-length", "119"]
38
+ - repo: https://github.com/kynan/nbstripout
39
+ rev: 0.6.1
40
+ hooks:
41
+ - id: nbstripout
42
+ args: ["--extra-keys", "metadata.interpreter metadata.kernelspec cell.metadata.pycharm"]
43
+ - repo: https://github.com/nbQA-dev/nbQA
44
+ rev: 1.7.0
45
+ hooks:
46
+ - id: nbqa-black
47
+ - id: nbqa-pyupgrade
48
+ args: ["--py37-plus"]
49
+ - id: nbqa-isort
50
+ args: ["--float-to-top"]
.style.yapf DELETED
@@ -1,5 +0,0 @@
1
- [style]
2
- based_on_style = pep8
3
- blank_line_before_nested_class_or_def = false
4
- spaces_before_comment = 2
5
- split_before_logical_operator = true
 
 
 
 
 
 
.vscode/settings.json CHANGED
@@ -1,18 +1,21 @@
1
  {
2
- "python.linting.enabled": true,
3
- "python.linting.flake8Enabled": true,
4
- "python.linting.pylintEnabled": false,
5
- "python.linting.lintOnSave": true,
6
- "python.formatting.provider": "yapf",
7
- "python.formatting.yapfArgs": [
8
- "--style={based_on_style: pep8, indent_width: 4, blank_line_before_nested_class_or_def: false, spaces_before_comment: 2, split_before_logical_operator: true}"
9
- ],
10
  "[python]": {
 
11
  "editor.formatOnType": true,
12
  "editor.codeActionsOnSave": {
13
  "source.organizeImports": true
14
  }
15
  },
 
 
 
 
 
 
 
 
 
 
16
  "editor.formatOnSave": true,
17
  "files.insertFinalNewline": true
18
  }
 
1
  {
 
 
 
 
 
 
 
 
2
  "[python]": {
3
+ "editor.defaultFormatter": "ms-python.black-formatter",
4
  "editor.formatOnType": true,
5
  "editor.codeActionsOnSave": {
6
  "source.organizeImports": true
7
  }
8
  },
9
+ "black-formatter.args": [
10
+ "--line-length=119"
11
+ ],
12
+ "isort.args": ["--profile", "black"],
13
+ "flake8.args": [
14
+ "--max-line-length=119"
15
+ ],
16
+ "ruff.args": [
17
+ "--line-length=119"
18
+ ],
19
  "editor.formatOnSave": true,
20
  "files.insertFinalNewline": true
21
  }
app.py CHANGED
@@ -12,12 +12,12 @@ from gradio_client import Client
12
 
13
  from scheduler import ParquetScheduler
14
 
15
- HF_TOKEN = os.environ['HF_TOKEN']
16
- UPLOAD_REPO_ID = os.environ['UPLOAD_REPO_ID']
17
- UPLOAD_FREQUENCY = int(os.getenv('UPLOAD_FREQUENCY', '15'))
18
- USE_PUBLIC_REPO = os.getenv('USE_PUBLIC_REPO') == '1'
19
 
20
- ABOUT_THIS_SPACE = '''
21
  This Space is a sample Space that collects user preferences for the results generated by a diffusion model.
22
  This demo calls the [stable diffusion Space](https://huggingface.co/spaces/stabilityai/stable-diffusion) with the [`gradio_client`](https://pypi.org/project/gradio-client/) library.
23
 
@@ -25,34 +25,29 @@ The user preference data is periodically archived in parquet format and uploaded
25
 
26
  The periodic upload is done using [`huggingface_hub.CommitScheduler`](https://huggingface.co/docs/huggingface_hub/main/en/package_reference/hf_api#huggingface_hub.CommitScheduler).
27
  See [this Space](https://huggingface.co/spaces/Wauplin/space_to_dataset_saver) for more general usage.
28
- '''
29
 
30
- scheduler = ParquetScheduler(repo_id=UPLOAD_REPO_ID,
31
- every=UPLOAD_FREQUENCY,
32
- private=not USE_PUBLIC_REPO,
33
- token=HF_TOKEN)
34
 
35
- client = Client('stabilityai/stable-diffusion')
36
 
37
 
38
  def generate(prompt: str) -> tuple[str, list[str]]:
39
- negative_prompt = ''
40
  guidance_scale = 9.0
41
- out_dir = client.predict(prompt,
42
- negative_prompt,
43
- guidance_scale,
44
- fn_index=1)
45
 
46
  config = {
47
- 'prompt': prompt,
48
- 'negative_prompt': negative_prompt,
49
- 'guidance_scale': guidance_scale,
50
  }
51
- with tempfile.NamedTemporaryFile(mode='w', suffix='.json',
52
- delete=False) as config_file:
53
  json.dump(config, config_file)
54
 
55
- with (pathlib.Path(out_dir) / 'captions.json').open() as f:
56
  paths = list(json.load(f).keys())
57
  return config_file.name, paths
58
 
@@ -61,19 +56,18 @@ def get_selected_index(evt: gr.SelectData) -> int:
61
  return evt.index
62
 
63
 
64
- def save_preference(config_path: str, gallery: list[dict[str, Any]],
65
- selected_index: int) -> None:
66
  # Load config
67
  with open(config_path) as f:
68
  data = json.load(f)
69
 
70
  # Add selected item + timestamp
71
- data['selected_index'] = selected_index
72
- data['timestamp'] = datetime.datetime.utcnow().isoformat()
73
 
74
  # Add images
75
- for index, path in enumerate(x['name'] for x in gallery):
76
- data[f'image_{index:03d}'] = path
77
 
78
  # Send to scheduler
79
  scheduler.append(data)
@@ -91,21 +85,18 @@ def clear() -> tuple[dict, dict, dict]:
91
  )
92
 
93
 
94
- with gr.Blocks(css='style.css') as demo:
95
  with gr.Group():
96
- prompt = gr.Text(show_label=False, placeholder='Prompt')
97
- gallery = gr.Gallery(show_label=False,
98
- columns=2,
99
- rows=2,
100
- height='600px',
101
- object_fit='scale-down',
102
- allow_preview=False)
103
- save_preference_button = gr.Button('Save preference', interactive=False)
104
 
105
  config_path = gr.Text(visible=False)
106
  selected_index = gr.Number(visible=False, precision=0, value=-1)
107
 
108
- with gr.Accordion(label='About this Space', open=False):
109
  gr.Markdown(ABOUT_THIS_SPACE)
110
 
111
  prompt.submit(
 
12
 
13
  from scheduler import ParquetScheduler
14
 
15
+ HF_TOKEN = os.environ["HF_TOKEN"]
16
+ UPLOAD_REPO_ID = os.environ["UPLOAD_REPO_ID"]
17
+ UPLOAD_FREQUENCY = int(os.getenv("UPLOAD_FREQUENCY", "15"))
18
+ USE_PUBLIC_REPO = os.getenv("USE_PUBLIC_REPO") == "1"
19
 
20
+ ABOUT_THIS_SPACE = """
21
  This Space is a sample Space that collects user preferences for the results generated by a diffusion model.
22
  This demo calls the [stable diffusion Space](https://huggingface.co/spaces/stabilityai/stable-diffusion) with the [`gradio_client`](https://pypi.org/project/gradio-client/) library.
23
 
 
25
 
26
  The periodic upload is done using [`huggingface_hub.CommitScheduler`](https://huggingface.co/docs/huggingface_hub/main/en/package_reference/hf_api#huggingface_hub.CommitScheduler).
27
  See [this Space](https://huggingface.co/spaces/Wauplin/space_to_dataset_saver) for more general usage.
28
+ """
29
 
30
+ scheduler = ParquetScheduler(
31
+ repo_id=UPLOAD_REPO_ID, every=UPLOAD_FREQUENCY, private=not USE_PUBLIC_REPO, token=HF_TOKEN
32
+ )
 
33
 
34
+ client = Client("stabilityai/stable-diffusion")
35
 
36
 
37
  def generate(prompt: str) -> tuple[str, list[str]]:
38
+ negative_prompt = ""
39
  guidance_scale = 9.0
40
+ out_dir = client.predict(prompt, negative_prompt, guidance_scale, fn_index=1)
 
 
 
41
 
42
  config = {
43
+ "prompt": prompt,
44
+ "negative_prompt": negative_prompt,
45
+ "guidance_scale": guidance_scale,
46
  }
47
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as config_file:
 
48
  json.dump(config, config_file)
49
 
50
+ with (pathlib.Path(out_dir) / "captions.json").open() as f:
51
  paths = list(json.load(f).keys())
52
  return config_file.name, paths
53
 
 
56
  return evt.index
57
 
58
 
59
+ def save_preference(config_path: str, gallery: list[dict[str, Any]], selected_index: int) -> None:
 
60
  # Load config
61
  with open(config_path) as f:
62
  data = json.load(f)
63
 
64
  # Add selected item + timestamp
65
+ data["selected_index"] = selected_index
66
+ data["timestamp"] = datetime.datetime.utcnow().isoformat()
67
 
68
  # Add images
69
+ for index, path in enumerate(x["name"] for x in gallery):
70
+ data[f"image_{index:03d}"] = path
71
 
72
  # Send to scheduler
73
  scheduler.append(data)
 
85
  )
86
 
87
 
88
+ with gr.Blocks(css="style.css") as demo:
89
  with gr.Group():
90
+ prompt = gr.Text(show_label=False, placeholder="Prompt")
91
+ gallery = gr.Gallery(
92
+ show_label=False, columns=2, rows=2, height="600px", object_fit="scale-down", allow_preview=False
93
+ )
94
+ save_preference_button = gr.Button("Save preference", interactive=False)
 
 
 
95
 
96
  config_path = gr.Text(visible=False)
97
  selected_index = gr.Number(visible=False, precision=0, value=-1)
98
 
99
+ with gr.Accordion(label="About this Space", open=False):
100
  gr.Markdown(ABOUT_THIS_SPACE)
101
 
102
  prompt.submit(
scheduler.py CHANGED
@@ -41,6 +41,7 @@ class ParquetScheduler(CommitScheduler):
41
  See https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Value for the list of
42
  possible values.
43
  """
 
44
  def __init__(
45
  self,
46
  *,
@@ -58,7 +59,7 @@ class ParquetScheduler(CommitScheduler):
58
  repo_id=repo_id,
59
  folder_path=tempfile.tempdir, # not used by the scheduler
60
  every=every,
61
- repo_type='dataset',
62
  revision=revision,
63
  private=private,
64
  token=token,
@@ -82,7 +83,7 @@ class ParquetScheduler(CommitScheduler):
82
  self._rows = []
83
  if not rows:
84
  return
85
- print(f'Got {len(rows)} item(s) to commit.')
86
 
87
  # Load images + create 'features' config for datasets library
88
  schema: Dict[str, Dict] = self._schema or {}
@@ -94,13 +95,13 @@ class ParquetScheduler(CommitScheduler):
94
  schema[key] = _infer_schema(key, value)
95
 
96
  # Load binary files if necessary
97
- if schema[key]['_type'] in ('Image', 'Audio'):
98
  # It's an image or audio: we load the bytes and remember to cleanup the file
99
  file_path = Path(value)
100
  if file_path.is_file():
101
  row[key] = {
102
- 'path': file_path.name,
103
- 'bytes': file_path.read_bytes(),
104
  }
105
  path_to_cleanup.append(file_path)
106
 
@@ -114,10 +115,7 @@ class ParquetScheduler(CommitScheduler):
114
  table = pa.Table.from_pylist(rows)
115
 
116
  # Add metadata (used by datasets library)
117
- table = table.replace_schema_metadata(
118
- {'huggingface': json.dumps({'info': {
119
- 'features': schema
120
- }})})
121
 
122
  # Write to parquet file
123
  archive_file = tempfile.NamedTemporaryFile()
@@ -128,10 +126,10 @@ class ParquetScheduler(CommitScheduler):
128
  repo_id=self.repo_id,
129
  repo_type=self.repo_type,
130
  revision=self.revision,
131
- path_in_repo=f'{uuid.uuid4()}.parquet',
132
  path_or_fileobj=archive_file.name,
133
  )
134
- print('Commit completed.')
135
 
136
  # Cleanup
137
  archive_file.close()
@@ -142,19 +140,20 @@ class ParquetScheduler(CommitScheduler):
142
  def _infer_schema(key: str, value: Any) -> Dict[str, str]:
143
  """Infer schema for the `datasets` library.
144
 
145
- See https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Value.
 
146
  """
147
- if 'image' in key:
148
- return {'_type': 'Image'}
149
- if 'audio' in key:
150
- return {'_type': 'Audio'}
151
  if isinstance(value, int):
152
- return {'_type': 'Value', 'dtype': 'int64'}
153
  if isinstance(value, float):
154
- return {'_type': 'Value', 'dtype': 'float64'}
155
  if isinstance(value, bool):
156
- return {'_type': 'Value', 'dtype': 'bool'}
157
  if isinstance(value, bytes):
158
- return {'_type': 'Value', 'dtype': 'binary'}
159
  # Otherwise in last resort => convert it to a string
160
- return {'_type': 'Value', 'dtype': 'string'}
 
41
  See https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Value for the list of
42
  possible values.
43
  """
44
+
45
  def __init__(
46
  self,
47
  *,
 
59
  repo_id=repo_id,
60
  folder_path=tempfile.tempdir, # not used by the scheduler
61
  every=every,
62
+ repo_type="dataset",
63
  revision=revision,
64
  private=private,
65
  token=token,
 
83
  self._rows = []
84
  if not rows:
85
  return
86
+ print(f"Got {len(rows)} item(s) to commit.")
87
 
88
  # Load images + create 'features' config for datasets library
89
  schema: Dict[str, Dict] = self._schema or {}
 
95
  schema[key] = _infer_schema(key, value)
96
 
97
  # Load binary files if necessary
98
+ if schema[key]["_type"] in ("Image", "Audio"):
99
  # It's an image or audio: we load the bytes and remember to cleanup the file
100
  file_path = Path(value)
101
  if file_path.is_file():
102
  row[key] = {
103
+ "path": file_path.name,
104
+ "bytes": file_path.read_bytes(),
105
  }
106
  path_to_cleanup.append(file_path)
107
 
 
115
  table = pa.Table.from_pylist(rows)
116
 
117
  # Add metadata (used by datasets library)
118
+ table = table.replace_schema_metadata({"huggingface": json.dumps({"info": {"features": schema}})})
 
 
 
119
 
120
  # Write to parquet file
121
  archive_file = tempfile.NamedTemporaryFile()
 
126
  repo_id=self.repo_id,
127
  repo_type=self.repo_type,
128
  revision=self.revision,
129
+ path_in_repo=f"{uuid.uuid4()}.parquet",
130
  path_or_fileobj=archive_file.name,
131
  )
132
+ print("Commit completed.")
133
 
134
  # Cleanup
135
  archive_file.close()
 
140
  def _infer_schema(key: str, value: Any) -> Dict[str, str]:
141
  """Infer schema for the `datasets` library.
142
 
143
+ See
144
+ https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Value.
145
  """
146
+ if "image" in key:
147
+ return {"_type": "Image"}
148
+ if "audio" in key:
149
+ return {"_type": "Audio"}
150
  if isinstance(value, int):
151
+ return {"_type": "Value", "dtype": "int64"}
152
  if isinstance(value, float):
153
+ return {"_type": "Value", "dtype": "float64"}
154
  if isinstance(value, bool):
155
+ return {"_type": "Value", "dtype": "bool"}
156
  if isinstance(value, bytes):
157
+ return {"_type": "Value", "dtype": "binary"}
158
  # Otherwise in last resort => convert it to a string
159
+ return {"_type": "Value", "dtype": "string"}