Spaces:
Runtime error
Runtime error
sradc
create .gitignore in image folder, and include base64 image in parquet, app.py use base64 image to display
4343947
| import base64 | |
| import cv2 | |
| import pandas as pd | |
| from PIL import Image | |
| from tqdm import tqdm | |
| from pipeline.clip_wrapper import MODEL_DIM, ClipWrapper | |
| from pipeline.download_videos import DATA_DIR, REPO_ROOT, VIDEO_DIR | |
| FRAME_EXTRACT_RATE_SECONDS = 5 # Extract a frame every 5 seconds | |
| IMAGES_DIR = DATA_DIR / "images" | |
| DATAFRAME_PATH = DATA_DIR / "dataset.parquet" | |
| def process_videos() -> None: | |
| IMAGES_DIR.mkdir(exist_ok=True, parents=True) | |
| (IMAGES_DIR / ".gitignore").write_text("*") | |
| "Runs clip on video frames, saves results to a parquet file" | |
| clip_wrapper = ClipWrapper() | |
| results = [] | |
| for video_path in tqdm(list(VIDEO_DIR.glob("*.mp4")), desc="Processing videos"): | |
| video_id = video_path.stem | |
| extracted_images_dir = IMAGES_DIR / video_id | |
| extracted_images_dir.mkdir(exist_ok=True, parents=True) | |
| complete_file = extracted_images_dir / "complete" | |
| if complete_file.exists(): | |
| continue | |
| for clip_vector, image, timestamp_secs, frame_idx in get_clip_vectors( | |
| video_path, clip_wrapper | |
| ): | |
| image_path = extracted_images_dir / f"{frame_idx}.jpg" | |
| image.save(image_path) | |
| with open(image_path, "rb") as f: | |
| encoded_image = base64.b64encode(f.read()).decode() | |
| results.append( | |
| [ | |
| video_id, | |
| frame_idx, | |
| timestamp_secs, | |
| encoded_image, | |
| *clip_vector, | |
| ] | |
| ) | |
| complete_file.touch() | |
| df = pd.DataFrame( | |
| results, | |
| columns=["video_id", "frame_idx", "timestamp", "base64_image"] | |
| + [f"dim_{i}" for i in range(MODEL_DIM)], | |
| ) | |
| print(f"Saving data to {DATAFRAME_PATH}") | |
| df.to_parquet(DATAFRAME_PATH, index=False) | |
| def get_clip_vectors(video_path, clip_wrapper): | |
| cap = cv2.VideoCapture(str(video_path)) | |
| num_video_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) | |
| fps = int(cap.get(cv2.CAP_PROP_FPS)) | |
| extract_every_n_frames = FRAME_EXTRACT_RATE_SECONDS * fps | |
| for frame_idx in tqdm(range(num_video_frames), desc="Running CLIP on video"): | |
| ret, frame = cap.read() | |
| if frame_idx % extract_every_n_frames != 0: | |
| continue | |
| image = Image.fromarray(frame[..., ::-1]) | |
| clip_vector = clip_wrapper.images2vec([image]).squeeze().numpy() | |
| timestamp_secs = frame_idx / fps | |
| yield clip_vector, image, timestamp_secs, frame_idx | |
| cap.release() | |
| if __name__ == "__main__": | |
| process_videos() | |