svjack commited on
Commit
00243f1
·
verified ·
1 Parent(s): e74b88f

Update run_video_ccip.py

Browse files
Files changed (1) hide show
  1. run_video_ccip.py +10 -1
run_video_ccip.py CHANGED
@@ -24,6 +24,8 @@ df = df[
24
  ###df[0].sort_values().drop_duplicates()
25
  df
26
 
 
 
27
  import os
28
  from shutil import copy2
29
  s = df[
@@ -31,8 +33,15 @@ df["difference"] <= 0.1
31
  ][0].sort_values().map(
32
  lambda x: x.replace("_named", "").replace(".json", ".mp4")
33
  )
 
 
 
 
 
 
 
34
  os.makedirs("tgt_dir", exist_ok=True)
35
- for ele in s.values.tolist():
36
  copy2(ele, os.path.join("tgt_dir", ele.split("/")[-1]))
37
 
38
  '''
 
24
  ###df[0].sort_values().drop_duplicates()
25
  df
26
 
27
+ !git clone https://huggingface.co/datasets/svjack/Beyond_the_Boundary_Videos_Captioned
28
+
29
  import os
30
  from shutil import copy2
31
  s = df[
 
33
  ][0].sort_values().map(
34
  lambda x: x.replace("_named", "").replace(".json", ".mp4")
35
  )
36
+ import pathlib
37
+ import numpy as np
38
+ all_paths_mp4 = pd.Series(list(pathlib.Path("Beyond_the_Boundary_Videos_Captioned").rglob("*.mp4"))).map(str).map(
39
+ lambda x: x if any(map(lambda y: x.endswith(y.split("/")[-1]), s.values.tolist())) else np.nan
40
+ ).dropna()
41
+ all_paths_txt = all_paths_mp4.map(lambda x: x.replace(".mp4", ".txt")).map(lambda x: x if os.path.exists(x) else np.nan).dropna()
42
+
43
  os.makedirs("tgt_dir", exist_ok=True)
44
+ for ele in all_paths_mp4.values.tolist() + all_paths_txt.values.tolist():
45
  copy2(ele, os.path.join("tgt_dir", ele.split("/")[-1]))
46
 
47
  '''