navidved commited on
Commit
6eab7ac
·
verified ·
1 Parent(s): 9806c6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -7
app.py CHANGED
@@ -14,6 +14,7 @@ import soundfile as sf
14
  from tenacity import retry, stop_after_attempt, wait_exponential
15
  import re
16
  import numpy as np
 
17
 
18
  # Log in with Hugging Face token if available
19
  token = os.getenv("hf_token")
@@ -36,14 +37,42 @@ audio_backup = {} # To store original audio paths for each sample
36
 
37
  # Load saved labels from JSON file (and convert old formats if needed)
38
  def load_saved_labels():
 
39
  if os.path.exists(SAVE_PATH):
40
- with open(SAVE_PATH, "r", encoding="utf-8") as f:
41
- data = json.load(f)
42
- for key, value in data.items():
43
- if isinstance(value, str):
44
- data[key] = {"transcript": value, "reviewer": "unreviewed"}
45
- return data
46
- return {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  saved_labels = load_saved_labels()
49
 
 
14
  from tenacity import retry, stop_after_attempt, wait_exponential
15
  import re
16
  import numpy as np
17
+ from huggingface_hub import hf_hub_download
18
 
19
  # Log in with Hugging Face token if available
20
  token = os.getenv("hf_token")
 
37
 
38
  # Load saved labels from JSON file (and convert old formats if needed)
39
  def load_saved_labels():
40
+ # First, try to load from the local file system.
41
  if os.path.exists(SAVE_PATH):
42
+ try:
43
+ with open(SAVE_PATH, "r", encoding="utf-8") as f:
44
+ data = json.load(f)
45
+ except Exception as e:
46
+ print("Error loading local JSON file: " + str(e))
47
+ data = {}
48
+ else:
49
+ data = {}
50
+
51
+ # If no local file found or it's empty, try to download from the HF dataset repo.
52
+ if not data:
53
+ try:
54
+ # Download the file from HF repo (make sure repo_type="dataset" is specified)
55
+ hf_path = hf_hub_download(
56
+ repo_id=HF_DATASET_NAME,
57
+ filename=SAVE_PATH,
58
+ repo_type="dataset",
59
+ token=token
60
+ )
61
+ with open(hf_path, "r", encoding="utf-8") as f:
62
+ data = json.load(f)
63
+ # Write the downloaded file to the local path for caching.
64
+ with open(SAVE_PATH, "w", encoding="utf-8") as f:
65
+ json.dump(data, f, ensure_ascii=False, indent=4)
66
+ print("Loaded JSON file from HF dataset repository and cached it locally.")
67
+ except Exception as e:
68
+ print("Error loading JSON file from HF repo: " + str(e))
69
+ data = {}
70
+
71
+ # Convert any old string-formats to the new dict format.
72
+ for key, value in data.items():
73
+ if isinstance(value, str):
74
+ data[key] = {"transcript": value, "reviewer": "unreviewed"}
75
+ return data
76
 
77
  saved_labels = load_saved_labels()
78