save-user-preferences

Runtime error

App Files Files Community

hysts HF Staff commited on Sep 5, 2023

Commit

e97aac1

1 Parent(s): 6673231

Migrate from yapf to black

Browse files

Files changed (5) hide show

.pre-commit-config.yaml +26 -12
.style.yapf +0 -5
.vscode/settings.json +11 -8
app.py +29 -38
scheduler.py +20 -21

.pre-commit-config.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v4.2.0
   hooks:
   - id: check-executables-have-shebangs
   - id: check-json
@@ -8,29 +8,43 @@ repos:
   - id: check-shebang-scripts-are-executable
   - id: check-toml
   - id: check-yaml
-  - id: double-quote-string-fixer
   - id: end-of-file-fixer
   - id: mixed-line-ending
-    args: ['--fix=lf']
   - id: requirements-txt-fixer
   - id: trailing-whitespace
 - repo: https://github.com/myint/docformatter
-  rev: v1.4
   hooks:
   - id: docformatter
-    args: ['--in-place']
 - repo: https://github.com/pycqa/isort
   rev: 5.12.0
   hooks:
     - id: isort
 - repo: https://github.com/pre-commit/mirrors-mypy
-  rev: v0.991
   hooks:
     - id: mypy
-      args: ['--ignore-missing-imports']
-      additional_dependencies: ['types-python-slugify']
-- repo: https://github.com/google/yapf
-  rev: v0.32.0
   hooks:
-  - id: yapf
-    args: ['--parallel', '--in-place']

 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.4.0
   hooks:
   - id: check-executables-have-shebangs
   - id: check-json
   - id: check-shebang-scripts-are-executable
   - id: check-toml
   - id: check-yaml
   - id: end-of-file-fixer
   - id: mixed-line-ending
+    args: ["--fix=lf"]
   - id: requirements-txt-fixer
   - id: trailing-whitespace
 - repo: https://github.com/myint/docformatter
+  rev: v1.7.5
   hooks:
   - id: docformatter
+    args: ["--in-place"]
 - repo: https://github.com/pycqa/isort
   rev: 5.12.0
   hooks:
     - id: isort
+      args: ["--profile", "black"]
 - repo: https://github.com/pre-commit/mirrors-mypy
+  rev: v1.5.1
   hooks:
     - id: mypy
+      args: ["--ignore-missing-imports"]
+      additional_dependencies: ["types-python-slugify", "types-requests", "types-PyYAML"]
+- repo: https://github.com/psf/black
+  rev: 23.7.0
   hooks:
+    - id: black
+      language_version: python3.10
+      args: ["--line-length", "119"]
+- repo: https://github.com/kynan/nbstripout
+  rev: 0.6.1
+  hooks:
+    - id: nbstripout
+      args: ["--extra-keys", "metadata.interpreter metadata.kernelspec cell.metadata.pycharm"]
+- repo: https://github.com/nbQA-dev/nbQA
+  rev: 1.7.0
+  hooks:
+    - id: nbqa-black
+    - id: nbqa-pyupgrade
+      args: ["--py37-plus"]
+    - id: nbqa-isort
+      args: ["--float-to-top"]

.style.yapf DELETED Viewed

@@ -1,5 +0,0 @@
-[style]
-based_on_style = pep8
-blank_line_before_nested_class_or_def = false
-spaces_before_comment = 2
-split_before_logical_operator = true

.vscode/settings.json CHANGED Viewed

@@ -1,18 +1,21 @@
 {
-    "python.linting.enabled": true,
-    "python.linting.flake8Enabled": true,
-    "python.linting.pylintEnabled": false,
-    "python.linting.lintOnSave": true,
-    "python.formatting.provider": "yapf",
-    "python.formatting.yapfArgs": [
-        "--style={based_on_style: pep8, indent_width: 4, blank_line_before_nested_class_or_def: false, spaces_before_comment: 2, split_before_logical_operator: true}"
-    ],
     "[python]": {
         "editor.formatOnType": true,
         "editor.codeActionsOnSave": {
             "source.organizeImports": true
         }
     },
     "editor.formatOnSave": true,
     "files.insertFinalNewline": true
 }

 {
     "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter",
         "editor.formatOnType": true,
         "editor.codeActionsOnSave": {
             "source.organizeImports": true
         }
     },
+    "black-formatter.args": [
+        "--line-length=119"
+    ],
+    "isort.args": ["--profile", "black"],
+    "flake8.args": [
+        "--max-line-length=119"
+    ],
+    "ruff.args": [
+        "--line-length=119"
+    ],
     "editor.formatOnSave": true,
     "files.insertFinalNewline": true
 }

app.py CHANGED Viewed

@@ -12,12 +12,12 @@ from gradio_client import Client
 from scheduler import ParquetScheduler
-HF_TOKEN = os.environ['HF_TOKEN']
-UPLOAD_REPO_ID = os.environ['UPLOAD_REPO_ID']
-UPLOAD_FREQUENCY = int(os.getenv('UPLOAD_FREQUENCY', '15'))
-USE_PUBLIC_REPO = os.getenv('USE_PUBLIC_REPO') == '1'
-ABOUT_THIS_SPACE = '''
 This Space is a sample Space that collects user preferences for the results generated by a diffusion model.
 This demo calls the [stable diffusion Space](https://huggingface.co/spaces/stabilityai/stable-diffusion) with the [`gradio_client`](https://pypi.org/project/gradio-client/) library.
@@ -25,34 +25,29 @@ The user preference data is periodically archived in parquet format and uploaded
 The periodic upload is done using [`huggingface_hub.CommitScheduler`](https://huggingface.co/docs/huggingface_hub/main/en/package_reference/hf_api#huggingface_hub.CommitScheduler).
 See [this Space](https://huggingface.co/spaces/Wauplin/space_to_dataset_saver) for more general usage.
-'''
-scheduler = ParquetScheduler(repo_id=UPLOAD_REPO_ID,
-                             every=UPLOAD_FREQUENCY,
-                             private=not USE_PUBLIC_REPO,
-                             token=HF_TOKEN)
-client = Client('stabilityai/stable-diffusion')
 def generate(prompt: str) -> tuple[str, list[str]]:
-    negative_prompt = ''
     guidance_scale = 9.0
-    out_dir = client.predict(prompt,
-                             negative_prompt,
-                             guidance_scale,
-                             fn_index=1)
     config = {
-        'prompt': prompt,
-        'negative_prompt': negative_prompt,
-        'guidance_scale': guidance_scale,
     }
-    with tempfile.NamedTemporaryFile(mode='w', suffix='.json',
-                                     delete=False) as config_file:
         json.dump(config, config_file)
-    with (pathlib.Path(out_dir) / 'captions.json').open() as f:
         paths = list(json.load(f).keys())
     return config_file.name, paths
@@ -61,19 +56,18 @@ def get_selected_index(evt: gr.SelectData) -> int:
     return evt.index
-def save_preference(config_path: str, gallery: list[dict[str, Any]],
-                    selected_index: int) -> None:
     # Load config
     with open(config_path) as f:
         data = json.load(f)
     # Add selected item + timestamp
-    data['selected_index'] = selected_index
-    data['timestamp'] = datetime.datetime.utcnow().isoformat()
     # Add images
-    for index, path in enumerate(x['name'] for x in gallery):
-        data[f'image_{index:03d}'] = path
     # Send to scheduler
     scheduler.append(data)
@@ -91,21 +85,18 @@ def clear() -> tuple[dict, dict, dict]:
     )
-with gr.Blocks(css='style.css') as demo:
     with gr.Group():
-        prompt = gr.Text(show_label=False, placeholder='Prompt')
-        gallery = gr.Gallery(show_label=False,
-                             columns=2,
-                             rows=2,
-                             height='600px',
-                             object_fit='scale-down',
-                             allow_preview=False)
-    save_preference_button = gr.Button('Save preference', interactive=False)
     config_path = gr.Text(visible=False)
     selected_index = gr.Number(visible=False, precision=0, value=-1)
-    with gr.Accordion(label='About this Space', open=False):
         gr.Markdown(ABOUT_THIS_SPACE)
     prompt.submit(

 from scheduler import ParquetScheduler
+HF_TOKEN = os.environ["HF_TOKEN"]
+UPLOAD_REPO_ID = os.environ["UPLOAD_REPO_ID"]
+UPLOAD_FREQUENCY = int(os.getenv("UPLOAD_FREQUENCY", "15"))
+USE_PUBLIC_REPO = os.getenv("USE_PUBLIC_REPO") == "1"
+ABOUT_THIS_SPACE = """
 This Space is a sample Space that collects user preferences for the results generated by a diffusion model.
 This demo calls the [stable diffusion Space](https://huggingface.co/spaces/stabilityai/stable-diffusion) with the [`gradio_client`](https://pypi.org/project/gradio-client/) library.
 The periodic upload is done using [`huggingface_hub.CommitScheduler`](https://huggingface.co/docs/huggingface_hub/main/en/package_reference/hf_api#huggingface_hub.CommitScheduler).
 See [this Space](https://huggingface.co/spaces/Wauplin/space_to_dataset_saver) for more general usage.
+"""
+scheduler = ParquetScheduler(
+    repo_id=UPLOAD_REPO_ID, every=UPLOAD_FREQUENCY, private=not USE_PUBLIC_REPO, token=HF_TOKEN
+)
+client = Client("stabilityai/stable-diffusion")
 def generate(prompt: str) -> tuple[str, list[str]]:
+    negative_prompt = ""
     guidance_scale = 9.0
+    out_dir = client.predict(prompt, negative_prompt, guidance_scale, fn_index=1)
     config = {
+        "prompt": prompt,
+        "negative_prompt": negative_prompt,
+        "guidance_scale": guidance_scale,
     }
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as config_file:
         json.dump(config, config_file)
+    with (pathlib.Path(out_dir) / "captions.json").open() as f:
         paths = list(json.load(f).keys())
     return config_file.name, paths
     return evt.index
+def save_preference(config_path: str, gallery: list[dict[str, Any]], selected_index: int) -> None:
     # Load config
     with open(config_path) as f:
         data = json.load(f)
     # Add selected item + timestamp
+    data["selected_index"] = selected_index
+    data["timestamp"] = datetime.datetime.utcnow().isoformat()
     # Add images
+    for index, path in enumerate(x["name"] for x in gallery):
+        data[f"image_{index:03d}"] = path
     # Send to scheduler
     scheduler.append(data)
     )
+with gr.Blocks(css="style.css") as demo:
     with gr.Group():
+        prompt = gr.Text(show_label=False, placeholder="Prompt")
+        gallery = gr.Gallery(
+            show_label=False, columns=2, rows=2, height="600px", object_fit="scale-down", allow_preview=False
+        )
+    save_preference_button = gr.Button("Save preference", interactive=False)
     config_path = gr.Text(visible=False)
     selected_index = gr.Number(visible=False, precision=0, value=-1)
+    with gr.Accordion(label="About this Space", open=False):
         gr.Markdown(ABOUT_THIS_SPACE)
     prompt.submit(

scheduler.py CHANGED Viewed

@@ -41,6 +41,7 @@ class ParquetScheduler(CommitScheduler):
     See https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Value for the list of
     possible values.
     """
     def __init__(
         self,
         *,
@@ -58,7 +59,7 @@ class ParquetScheduler(CommitScheduler):
             repo_id=repo_id,
             folder_path=tempfile.tempdir,  # not used by the scheduler
             every=every,
-            repo_type='dataset',
             revision=revision,
             private=private,
             token=token,
@@ -82,7 +83,7 @@ class ParquetScheduler(CommitScheduler):
             self._rows = []
         if not rows:
             return
-        print(f'Got {len(rows)} item(s) to commit.')
         # Load images + create 'features' config for datasets library
         schema: Dict[str, Dict] = self._schema or {}
@@ -94,13 +95,13 @@ class ParquetScheduler(CommitScheduler):
                     schema[key] = _infer_schema(key, value)
                 # Load binary files if necessary
-                if schema[key]['_type'] in ('Image', 'Audio'):
                     # It's an image or audio: we load the bytes and remember to cleanup the file
                     file_path = Path(value)
                     if file_path.is_file():
                         row[key] = {
-                            'path': file_path.name,
-                            'bytes': file_path.read_bytes(),
                         }
                         path_to_cleanup.append(file_path)
@@ -114,10 +115,7 @@ class ParquetScheduler(CommitScheduler):
         table = pa.Table.from_pylist(rows)
         # Add metadata (used by datasets library)
-        table = table.replace_schema_metadata(
-            {'huggingface': json.dumps({'info': {
-                'features': schema
-            }})})
         # Write to parquet file
         archive_file = tempfile.NamedTemporaryFile()
@@ -128,10 +126,10 @@ class ParquetScheduler(CommitScheduler):
             repo_id=self.repo_id,
             repo_type=self.repo_type,
             revision=self.revision,
-            path_in_repo=f'{uuid.uuid4()}.parquet',
             path_or_fileobj=archive_file.name,
         )
-        print('Commit completed.')
         # Cleanup
         archive_file.close()
@@ -142,19 +140,20 @@ class ParquetScheduler(CommitScheduler):
 def _infer_schema(key: str, value: Any) -> Dict[str, str]:
     """Infer schema for the `datasets` library.
-    See https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Value.
     """
-    if 'image' in key:
-        return {'_type': 'Image'}
-    if 'audio' in key:
-        return {'_type': 'Audio'}
     if isinstance(value, int):
-        return {'_type': 'Value', 'dtype': 'int64'}
     if isinstance(value, float):
-        return {'_type': 'Value', 'dtype': 'float64'}
     if isinstance(value, bool):
-        return {'_type': 'Value', 'dtype': 'bool'}
     if isinstance(value, bytes):
-        return {'_type': 'Value', 'dtype': 'binary'}
     # Otherwise in last resort => convert it to a string
-    return {'_type': 'Value', 'dtype': 'string'}

     See https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Value for the list of
     possible values.
     """
     def __init__(
         self,
         *,
             repo_id=repo_id,
             folder_path=tempfile.tempdir,  # not used by the scheduler
             every=every,
+            repo_type="dataset",
             revision=revision,
             private=private,
             token=token,
             self._rows = []
         if not rows:
             return
+        print(f"Got {len(rows)} item(s) to commit.")
         # Load images + create 'features' config for datasets library
         schema: Dict[str, Dict] = self._schema or {}
                     schema[key] = _infer_schema(key, value)
                 # Load binary files if necessary
+                if schema[key]["_type"] in ("Image", "Audio"):
                     # It's an image or audio: we load the bytes and remember to cleanup the file
                     file_path = Path(value)
                     if file_path.is_file():
                         row[key] = {
+                            "path": file_path.name,
+                            "bytes": file_path.read_bytes(),
                         }
                         path_to_cleanup.append(file_path)
         table = pa.Table.from_pylist(rows)
         # Add metadata (used by datasets library)
+        table = table.replace_schema_metadata({"huggingface": json.dumps({"info": {"features": schema}})})
         # Write to parquet file
         archive_file = tempfile.NamedTemporaryFile()
             repo_id=self.repo_id,
             repo_type=self.repo_type,
             revision=self.revision,
+            path_in_repo=f"{uuid.uuid4()}.parquet",
             path_or_fileobj=archive_file.name,
         )
+        print("Commit completed.")
         # Cleanup
         archive_file.close()
 def _infer_schema(key: str, value: Any) -> Dict[str, str]:
     """Infer schema for the `datasets` library.
+    See
+    https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Value.
     """
+    if "image" in key:
+        return {"_type": "Image"}
+    if "audio" in key:
+        return {"_type": "Audio"}
     if isinstance(value, int):
+        return {"_type": "Value", "dtype": "int64"}
     if isinstance(value, float):
+        return {"_type": "Value", "dtype": "float64"}
     if isinstance(value, bool):
+        return {"_type": "Value", "dtype": "bool"}
     if isinstance(value, bytes):
+        return {"_type": "Value", "dtype": "binary"}
     # Otherwise in last resort => convert it to a string
+    return {"_type": "Value", "dtype": "string"}