Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -14,6 +14,7 @@ import soundfile as sf
|
|
14 |
from tenacity import retry, stop_after_attempt, wait_exponential
|
15 |
import re
|
16 |
import numpy as np
|
|
|
17 |
|
18 |
# Log in with Hugging Face token if available
|
19 |
token = os.getenv("hf_token")
|
@@ -36,14 +37,42 @@ audio_backup = {} # To store original audio paths for each sample
|
|
36 |
|
37 |
# Load saved labels from JSON file (and convert old formats if needed)
|
38 |
def load_saved_labels():
|
|
|
39 |
if os.path.exists(SAVE_PATH):
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
saved_labels = load_saved_labels()
|
49 |
|
|
|
14 |
from tenacity import retry, stop_after_attempt, wait_exponential
|
15 |
import re
|
16 |
import numpy as np
|
17 |
+
from huggingface_hub import hf_hub_download
|
18 |
|
19 |
# Log in with Hugging Face token if available
|
20 |
token = os.getenv("hf_token")
|
|
|
37 |
|
38 |
# Load saved labels from JSON file (and convert old formats if needed)
|
39 |
def load_saved_labels():
|
40 |
+
# First, try to load from the local file system.
|
41 |
if os.path.exists(SAVE_PATH):
|
42 |
+
try:
|
43 |
+
with open(SAVE_PATH, "r", encoding="utf-8") as f:
|
44 |
+
data = json.load(f)
|
45 |
+
except Exception as e:
|
46 |
+
print("Error loading local JSON file: " + str(e))
|
47 |
+
data = {}
|
48 |
+
else:
|
49 |
+
data = {}
|
50 |
+
|
51 |
+
# If no local file found or it's empty, try to download from the HF dataset repo.
|
52 |
+
if not data:
|
53 |
+
try:
|
54 |
+
# Download the file from HF repo (make sure repo_type="dataset" is specified)
|
55 |
+
hf_path = hf_hub_download(
|
56 |
+
repo_id=HF_DATASET_NAME,
|
57 |
+
filename=SAVE_PATH,
|
58 |
+
repo_type="dataset",
|
59 |
+
token=token
|
60 |
+
)
|
61 |
+
with open(hf_path, "r", encoding="utf-8") as f:
|
62 |
+
data = json.load(f)
|
63 |
+
# Write the downloaded file to the local path for caching.
|
64 |
+
with open(SAVE_PATH, "w", encoding="utf-8") as f:
|
65 |
+
json.dump(data, f, ensure_ascii=False, indent=4)
|
66 |
+
print("Loaded JSON file from HF dataset repository and cached it locally.")
|
67 |
+
except Exception as e:
|
68 |
+
print("Error loading JSON file from HF repo: " + str(e))
|
69 |
+
data = {}
|
70 |
+
|
71 |
+
# Convert any old string-formats to the new dict format.
|
72 |
+
for key, value in data.items():
|
73 |
+
if isinstance(value, str):
|
74 |
+
data[key] = {"transcript": value, "reviewer": "unreviewed"}
|
75 |
+
return data
|
76 |
|
77 |
saved_labels = load_saved_labels()
|
78 |
|