ashu1069 commited on
Commit
49e1823
·
1 Parent(s): 6f4b473
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -42,16 +42,16 @@ class VideoAnnotator:
42
  logger.info(f"Loading dataset from HuggingFace: {HF_REPO_ID}")
43
  self.dataset = load_dataset(HF_REPO_ID, token=self.hf_token)
44
 
45
- # Get the split (usually 'train')
46
  split = list(self.dataset.keys())[0]
47
  self.dataset_split = self.dataset[split]
48
 
49
- # Get all potential video files from the dataset
50
  all_video_files = [item['video'] if 'video' in item else item['path']
51
  for item in self.dataset_split]
52
  logger.info(f"Found {len(all_video_files)} potential video files in source dataset.")
53
 
54
- # --- Start Edit: Filter out already annotated videos ---
55
  logger.info(f"Checking for existing annotations in {self.annotation_repo_id}")
56
  try:
57
  # List files in the 'annotations' directory of the annotation repo
@@ -65,7 +65,7 @@ class VideoAnnotator:
65
  annotated_video_basenames = set(
66
  os.path.basename(f).replace('.jsonl', '')
67
  for f in annotated_files
68
- if f.startswith("annotations/") and f.endswith(".jsonl") # Ensure it's in the correct folder and has the right extension
69
  )
70
  logger.info(f"Found {len(annotated_video_basenames)} existing annotation files.")
71
 
 
42
  logger.info(f"Loading dataset from HuggingFace: {HF_REPO_ID}")
43
  self.dataset = load_dataset(HF_REPO_ID, token=self.hf_token)
44
 
45
+ # Usually "train" split
46
  split = list(self.dataset.keys())[0]
47
  self.dataset_split = self.dataset[split]
48
 
49
+ # Get all the video files from the CricBench dataset
50
  all_video_files = [item['video'] if 'video' in item else item['path']
51
  for item in self.dataset_split]
52
  logger.info(f"Found {len(all_video_files)} potential video files in source dataset.")
53
 
54
+ # Filter out already annotated videos
55
  logger.info(f"Checking for existing annotations in {self.annotation_repo_id}")
56
  try:
57
  # List files in the 'annotations' directory of the annotation repo
 
65
  annotated_video_basenames = set(
66
  os.path.basename(f).replace('.jsonl', '')
67
  for f in annotated_files
68
+ if f.startswith("annotations/") and f.endswith(".jsonl")
69
  )
70
  logger.info(f"Found {len(annotated_video_basenames)} existing annotation files.")
71