Spaces:
Sleeping
Sleeping
updates
Browse files
app.py
CHANGED
@@ -42,16 +42,16 @@ class VideoAnnotator:
|
|
42 |
logger.info(f"Loading dataset from HuggingFace: {HF_REPO_ID}")
|
43 |
self.dataset = load_dataset(HF_REPO_ID, token=self.hf_token)
|
44 |
|
45 |
-
#
|
46 |
split = list(self.dataset.keys())[0]
|
47 |
self.dataset_split = self.dataset[split]
|
48 |
|
49 |
-
# Get all
|
50 |
all_video_files = [item['video'] if 'video' in item else item['path']
|
51 |
for item in self.dataset_split]
|
52 |
logger.info(f"Found {len(all_video_files)} potential video files in source dataset.")
|
53 |
|
54 |
-
#
|
55 |
logger.info(f"Checking for existing annotations in {self.annotation_repo_id}")
|
56 |
try:
|
57 |
# List files in the 'annotations' directory of the annotation repo
|
@@ -65,7 +65,7 @@ class VideoAnnotator:
|
|
65 |
annotated_video_basenames = set(
|
66 |
os.path.basename(f).replace('.jsonl', '')
|
67 |
for f in annotated_files
|
68 |
-
if f.startswith("annotations/") and f.endswith(".jsonl")
|
69 |
)
|
70 |
logger.info(f"Found {len(annotated_video_basenames)} existing annotation files.")
|
71 |
|
|
|
42 |
logger.info(f"Loading dataset from HuggingFace: {HF_REPO_ID}")
|
43 |
self.dataset = load_dataset(HF_REPO_ID, token=self.hf_token)
|
44 |
|
45 |
+
# Usually "train" split
|
46 |
split = list(self.dataset.keys())[0]
|
47 |
self.dataset_split = self.dataset[split]
|
48 |
|
49 |
+
# Get all the video files from the CricBench dataset
|
50 |
all_video_files = [item['video'] if 'video' in item else item['path']
|
51 |
for item in self.dataset_split]
|
52 |
logger.info(f"Found {len(all_video_files)} potential video files in source dataset.")
|
53 |
|
54 |
+
# Filter out already annotated videos
|
55 |
logger.info(f"Checking for existing annotations in {self.annotation_repo_id}")
|
56 |
try:
|
57 |
# List files in the 'annotations' directory of the annotation repo
|
|
|
65 |
annotated_video_basenames = set(
|
66 |
os.path.basename(f).replace('.jsonl', '')
|
67 |
for f in annotated_files
|
68 |
+
if f.startswith("annotations/") and f.endswith(".jsonl")
|
69 |
)
|
70 |
logger.info(f"Found {len(annotated_video_basenames)} existing annotation files.")
|
71 |
|