Spaces:

qubvel-hf
/

vjepa2-streaming-video-classification

Running on L4

App Files Files Community

qubvel-hf HF Staff commited on 2 days ago

Commit

585cd1b

1 Parent(s): 5893d39

Add logging

Browse files

Files changed (2) hide show

app.py +15 -4
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -12,12 +12,13 @@ The system uses FastRTC for video streaming and Gradio for the web interface.
 import os
 import cv2
-import uuid
 import time
 import torch
 import gradio as gr
 import numpy as np
 from gradio.utils import get_space
 from fastrtc import (
     Stream,
@@ -163,10 +164,17 @@ class RunningResult:
         return self.predictions[-1][1] if self.predictions else "Starting..."
-def process_frames(image: np.ndarray, frames_state: list, result_state: list):
     # Initialize frames cache if not exists (and put in gradio state)
     if not frames_state:
         running_frames_cache = RunningFramesCache(
             save_every_k_frame=128 / frames_per_clip,
             max_frames=frames_per_clip,
@@ -177,6 +185,7 @@ def process_frames(image: np.ndarray, frames_state: list, result_state: list):
     # Initialize result cache if not exists (and put in gradio state)
     if not result_state:
         running_result = RunningResult(4)
         result_state.append(running_result)
     else:
@@ -205,6 +214,7 @@ def process_frames(image: np.ndarray, frames_state: list, result_state: list):
         # Get top prediction
         top_index = logits.argmax(dim=-1).item()
         class_name = model.config.id2label[top_index]
         running_result.add_prediction(class_name)
     # Get formatted predictions and last prediction
@@ -220,13 +230,14 @@ async def get_credentials():
 frames_cache = gr.State([])
 result_cache = gr.State([])
 # Initialize the video stream with processing callback
 stream = Stream(
     handler=VideoStreamHandler(process_frames, skip_frames=True),
     modality="video",
     mode="send-receive",
-    additional_inputs=[frames_cache, result_cache],
     additional_outputs=[gr.TextArea(label="Actions", value="", lines=5)],
     additional_outputs_handler=lambda _, output: output,
     rtc_configuration=get_credentials if get_space() else None,

 import os
 import cv2
 import time
 import torch
+import random
 import gradio as gr
 import numpy as np
+from loguru import logger
 from gradio.utils import get_space
 from fastrtc import (
     Stream,
         return self.predictions[-1][1] if self.predictions else "Starting..."
+def process_frames(image: np.ndarray, frames_state: list, result_state: list, session_cache: list):
+    if not session_cache:
+        session_id = random.randint(1, 1000)
+        session_cache.append(session_id)
+    else:
+        session_id = session_cache[0]
     # Initialize frames cache if not exists (and put in gradio state)
     if not frames_state:
+        logger.info(f"({session_id}) initialized frames cache")
         running_frames_cache = RunningFramesCache(
             save_every_k_frame=128 / frames_per_clip,
             max_frames=frames_per_clip,
     # Initialize result cache if not exists (and put in gradio state)
     if not result_state:
+        logger.info(f"({session_id}) initialized result cache")
         running_result = RunningResult(4)
         result_state.append(running_result)
     else:
         # Get top prediction
         top_index = logits.argmax(dim=-1).item()
         class_name = model.config.id2label[top_index]
+        logger.info(f"({session_id}) action: '{class_name}'")
         running_result.add_prediction(class_name)
     # Get formatted predictions and last prediction
 frames_cache = gr.State([])
 result_cache = gr.State([])
+session_id = gr.State([])
 # Initialize the video stream with processing callback
 stream = Stream(
     handler=VideoStreamHandler(process_frames, skip_frames=True),
     modality="video",
     mode="send-receive",
+    additional_inputs=[frames_cache, result_cache, session_id],
     additional_outputs=[gr.TextArea(label="Actions", value="", lines=5)],
     additional_outputs_handler=lambda _, output: output,
     rtc_configuration=get_credentials if get_space() else None,

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ transformers @ git+https://github.com/huggingface/transformers
 torch
 torchvision
 opencv-python-headless
-fastrtc>=0.0.28

 torch
 torchvision
 opencv-python-headless
+fastrtc>=0.0.28
+loguru