demo-space2

Sleeping

App Files Files Community

swetchareddytukkani commited on 6 days ago

Commit

e8fb09f

verified ·

1 Parent(s): 109d0cc

Update app.py

Browse files

Files changed (1) hide show

app.py +431 -200

app.py CHANGED Viewed

@@ -24,7 +24,6 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from scipy import signal
 from scipy.signal import find_peaks, welch, get_window
@@ -43,7 +42,8 @@ from matplotlib.gridspec import GridSpec
 import gradio as gr
-FRAME_METRICS = []
 class PhysMambaattention_viz:
     """Simplified Grad-CAM for PhysMamba."""
@@ -228,55 +228,86 @@ class PhysMambaattention_viz:
 def apply_diff_normalized(frames: List[np.ndarray]) -> np.ndarray:
-    """Apply DiffNormalized preprocessing from PhysMamba paper."""
     if len(frames) < 2:
-        return np.zeros((len(frames), *frames[0].shape), dtype=np.float32)
     diff_frames = []
     for i in range(len(frames)):
         if i == 0:
             diff_frames.append(np.zeros_like(frames[0], dtype=np.float32))
         else:
             curr = frames[i].astype(np.float32)
-            prev = frames[i-1].astype(np.float32)
-            denominator = curr + prev + 1e-8
-            diff = (curr - prev) / denominator
             diff_frames.append(diff)
-    diff_array = np.stack(diff_frames)
-    std = diff_array.std()
-    if std > 0:
-        diff_array = diff_array / std
     return diff_array
-def preprocess_for_physmamba(frames: List[np.ndarray],
-                             target_frames: int = 128,
-                             target_size: int = 128) -> torch.Tensor:
-    """Complete preprocessing pipeline for PhysMamba model."""
     if len(frames) < target_frames:
         frames = frames + [frames[-1]] * (target_frames - len(frames))
     elif len(frames) > target_frames:
-        indices = np.linspace(0, len(frames)-1, target_frames).astype(int)
-        frames = [frames[i] for i in indices]
-    frames_rgb = [f[..., ::-1].copy() for f in frames]
     frames_resized = [cv2.resize(f, (target_size, target_size)) for f in frames_rgb]
-    frames_diff = apply_diff_normalized(frames_resized)
-    frames_transposed = np.transpose(frames_diff, (3, 0, 1, 2))
-    frames_batched = np.expand_dims(frames_transposed, axis=0)
-    tensor = torch.from_numpy(frames_batched.astype(np.float32))
-    return tensor
-HERE         = Path(__file__).parent
 MODEL_DIR    = HERE / "final_model_release"
 LOG_DIR      = HERE / "logs"
 ANALYSIS_DIR = HERE / "analysis"
-for d in [MODEL_DIR, LOG_DIR, ANALYSIS_DIR]:
-    d.mkdir(exist_ok=True)
 DEVICE = (
     torch.device("cuda") if torch.cuda.is_available()
@@ -284,7 +315,9 @@ DEVICE = (
     else torch.device("cpu")
 )
-FACE_CASCADE = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
 DEFAULT_SIZE   = 128      # input H=W to model
 DEFAULT_T      = 128      # clip length
@@ -302,22 +335,52 @@ MAX_JUMP_BPM    = 8.0
 GT_FILENAMES = {"ground_truth.txt", "gtdump.txt", "gt.txt"}
 GT_EXTS      = {".txt", ".csv", ".json"}
 def _as_path(maybe) -> Optional[str]:
-    """Return a filesystem path from Gradio values (str, dict, tempfile objects)."""
     if maybe is None:
         return None
     if isinstance(maybe, str):
         return maybe
     if isinstance(maybe, dict):
-        return maybe.get("name") or maybe.get("path")
-    name = getattr(maybe, "name", None)  # tempfile-like object
-    if isinstance(name, str):
         return name
     try:
         return str(maybe)
     except Exception:
         return None
 def _import_from_file(py_path: Path):
     spec = importlib.util.spec_from_file_location(py_path.stem, str(py_path))
     if not spec or not spec.loader:
@@ -326,27 +389,35 @@ def _import_from_file(py_path: Path):
     spec.loader.exec_module(mod)
     return mod
 def _looks_like_video(p: Path) -> bool:
     if p.suffix.lower() == ".mat":
         return True
     return p.suffix.lower() in VIDEO_EXTENSIONS
 class SimpleActivationAttention:
-    """Lightweight attention visualization without gradients."""
     def __init__(self, model: nn.Module, device: torch.device):
         self.model = model
         self.device = device
-        self.activations = None
-        self.hook_handle = None
     def _activation_hook(self, module, input, output):
         """Capture activations during forward pass."""
-        self.activations = output.detach()
     def register_hook(self):
-        """Register hook on a suitable layer."""
-        # Find the last convolutional layer before Mamba
         target = None
         target_name = None
@@ -354,7 +425,7 @@ class SimpleActivationAttention:
             if isinstance(module, (nn.Conv2d, nn.Conv3d)) and 'mamba' not in name.lower() and 'ssm' not in name.lower():
                 target = module
                 target_name = name
         if target is None:
             print("⚠ [attention_viz] No suitable conv layer found, attention disabled")
             return
@@ -363,30 +434,36 @@ class SimpleActivationAttention:
         print(f"✓ [attention_viz] Hook registered on {target_name} ({type(target).__name__})")
     def generate(self, clip_tensor: torch.Tensor) -> Optional[np.ndarray]:
-        """Generate attention map from activations (call after forward pass)."""
         try:
             if self.activations is None:
                 return None
-            # Process activations to create spatial attention
             act = self.activations
             # Handle different tensor shapes
             if act.dim() == 5:  # [B, C, T, H, W]
-                # Average over time and channels
-                attention = act.mean(dim=[1, 2])  # -> [B, H, W]
             elif act.dim() == 4:  # [B, C, H, W]
                 attention = act.mean(dim=1)  # -> [B, H, W]
             else:
                 print(f"⚠ [attention_viz] Unexpected activation shape: {act.shape}")
                 return None
-            # Convert to numpy
-            attention = attention.squeeze().cpu().numpy()
             # Normalize to [0, 1]
-            if attention.max() > attention.min():
-                attention = (attention - attention.min()) / (attention.max() - attention.min())
             return attention
@@ -396,16 +473,22 @@ class SimpleActivationAttention:
     def visualize(self, heatmap: np.ndarray, frame: np.ndarray, alpha: float = 0.4) -> np.ndarray:
         """Overlay heatmap on frame."""
         h, w = frame.shape[:2]
         heatmap_resized = cv2.resize(heatmap, (w, h))
         heatmap_uint8 = (heatmap_resized * 255).astype(np.uint8)
         heatmap_colored = cv2.applyColorMap(heatmap_uint8, cv2.COLORMAP_JET)
-        overlay = cv2.addWeighted(frame, 1-alpha, heatmap_colored, alpha, 0)
         return overlay
     def cleanup(self):
         if self.hook_handle is not None:
             self.hook_handle.remove()
 class VideoReader:
     """
@@ -421,6 +504,7 @@ class VideoReader:
         self._idx   = 0
         self._len   = 0
         self._shape = None
         if self.path.lower().endswith(".mat") and MAT_SUPPORT:
             self._open_mat(self.path)
@@ -433,6 +517,7 @@ class VideoReader:
             raise RuntimeError("Cannot open video")
         self._cap = cap
         self._len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
     def _open_mat(self, path: str):
         try:
@@ -444,6 +529,7 @@ class VideoReader:
                     break
             else:
                 arr = next((v for v in md.values() if isinstance(v, np.ndarray)), None)
             if arr is None:
                 raise RuntimeError("No ndarray found in .mat")
@@ -451,16 +537,18 @@ class VideoReader:
             # Normalize to (T,H,W,3)
             if a.ndim == 4:
                 if a.shape[-1] == 3:
-                    if a.shape[0] < a.shape[2]:              # (T,H,W,3) heuristic
                         v = a
-                    else:                                     # (H,W,T,3) -> (T,H,W,3)
                         v = np.transpose(a, (2, 0, 1, 3))
                 else:
-                    v = a[..., :1]                            # take first channel
             elif a.ndim == 3:
-                if a.shape[0] < a.shape[2]:                   # (T,H,W)
                     v = a
-                else:                                         # (H,W,T) -> (T,H,W)
                     v = np.transpose(a, (2, 0, 1))
                 v = v[..., None]
             else:
@@ -473,6 +561,7 @@ class VideoReader:
             self._mat   = v
             self._len   = v.shape[0]
             self._shape = v.shape[1:3]
         except Exception as e:
             raise RuntimeError(f"Failed to open .mat video: {e}")
@@ -489,9 +578,11 @@ class VideoReader:
     def fps(self, fallback: int = 30) -> int:
         if self._mat is not None:
-            return fallback  # .mat typically lacks FPS; caller can override
         f = self._cap.get(cv2.CAP_PROP_FPS)
-        return int(f) if f and f > 0 else fallback
     def length(self) -> int:
         return self._len
@@ -500,6 +591,7 @@ class VideoReader:
         if self._cap is not None:
             self._cap.release()
 def roi_candidates(face: Tuple[int, int, int, int], frame: np.ndarray) -> Dict[str, np.ndarray]:
     x, y, w, h = face
     # forehead
@@ -510,23 +602,25 @@ def roi_candidates(face: Tuple[int, int, int, int], frame: np.ndarray) -> Dict[s
     ff = frame[y:y + h, x:x + w]
     return {"forehead": fh, "cheeks": ck, "face": ff}
 def roi_quality_score(patch: Optional[np.ndarray], fs: int = 30) -> float:
     if patch is None or patch.size == 0:
         return -1e9
     g = patch[..., 1].astype(np.float32) / 255.0         # green channel
     g = cv2.resize(g, (64, 64)).mean(axis=1)             # crude spatial pooling
     g = g - g.mean()
-    b, a = signal.butter(4, [0.7 / (fs / 2), 3.5 / (fs / 2)], btype="band")
     try:
         y = signal.filtfilt(b, a, g, method="gust")
     except Exception:
         y = g
     return float((y ** 2).mean())
 def pick_auto_roi(face: Tuple[int, int, int, int],
-                 frame: np.ndarray,
-                 attn: Optional[np.ndarray] = None) -> Tuple[np.ndarray, str]:
-    """Simple ROI selection."""
     cands = roi_candidates(face, frame)
     scores = {k: roi_quality_score(v) for k, v in cands.items()}
@@ -539,14 +633,15 @@ def pick_auto_roi(face: Tuple[int, int, int, int],
             ck_attn = attn_resized[int(y + 0.55 * h):int(y + 0.85 * h), int(x + 0.15 * w):int(x + 0.85 * w)].mean() if attn_resized.size > 0 else 0.0
             ff_attn = attn_resized[y:y+h, x:x+w].mean() if attn_resized.size > 0 else 0.0
             scores['forehead'] += fh_attn * 0.2
-            scores['cheeks'] += ck_attn * 0.2
-            scores['face'] += ff_attn * 0.2
         except Exception:
             pass
     best = max(scores, key=scores.get)
     return cands[best], best
 def discover_subjects(root_dir: Path) -> List[Tuple[str, Optional[str]]]:
     """
     Walk root_dir; for each subject folder (or single-folder dataset), return (video_path, gt_path or None).
@@ -601,6 +696,7 @@ def discover_subjects(root_dir: Path) -> List[Tuple[str, Optional[str]]]:
             uniq.append((v, g))
     return uniq
 def find_physmamba_builder(repo_root: Path, model_file: str = "", model_class: str = "PhysMamba"):
     import inspect
@@ -639,7 +735,8 @@ def find_physmamba_builder(repo_root: Path, model_file: str = "", model_class: s
             except Exception:
                 continue
-    raise ImportError(f"Could not find PhysMamba model class")
 def load_physmamba_model(ckpt_path: Path, device: torch.device,
                          model_file: str = "", model_class: str = "PhysMamba"):
@@ -680,6 +777,7 @@ def load_physmamba_model(ckpt_path: Path, device: torch.device,
             state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
             model.load_state_dict(state_dict, strict=False)
     except Exception:
         pass
     model.to(device).eval()
@@ -688,13 +786,15 @@ def load_physmamba_model(ckpt_path: Path, device: torch.device,
         with torch.no_grad():
             _ = model(torch.zeros(1, 3, 8, 128, 128, device=device))
     except Exception:
         pass
-    # Disable attention visualization since model forward pass is incompatible
     attention_viz = None
     return model, attention_viz
 def bandpass_filter(x: np.ndarray, fs: int = 30, low: float = 0.7, high: float = 3.5, order: int = 4) -> np.ndarray:
     """
     Stable band-pass with edge-safety and parameter clipping.
@@ -712,12 +812,12 @@ def bandpass_filter(x: np.ndarray, fs: int = 30, low: float = 0.7, high: float =
     try:
         b, a = signal.butter(order, [lo, hi], btype="band")
-        # padlen must be < len(x); reduce when short
         padlen = min(3 * max(len(a), len(b)), max(0, x.size - 1))
         return signal.filtfilt(b, a, x, padlen=padlen)
     except Exception:
         return x
 def hr_from_welch(x: np.ndarray, fs: int = 30, lo: float = 0.7, hi: float = 3.5) -> float:
     """
     HR (BPM) via Welch PSD peak in [lo, hi] Hz.
@@ -726,7 +826,6 @@ def hr_from_welch(x: np.ndarray, fs: int = 30, lo: float = 0.7, hi: float = 3.5)
     if x.size < int(fs * 4.0):  # need ~4s for a usable PSD
         return 0.0
     try:
-        # nperseg tuned for short windows while avoiding tiny segments
         nper = int(min(max(64, fs * 2), min(512, x.size)))
         f, pxx = welch(x, fs=fs, window=get_window("hann", nper), nperseg=nper, detrend="constant")
@@ -741,11 +840,11 @@ def hr_from_welch(x: np.ndarray, fs: int = 30, lo: float = 0.7, hi: float = 3.5)
         fpk = float(f_band[np.argmax(p_band)])
         bpm = fpk * 60.0
-        # clip to plausible range
         return float(np.clip(bpm, 30.0, 220.0))
     except Exception:
         return 0.0
 def compute_rmssd(x: np.ndarray, fs: int = 30) -> float:
     """
     HRV RMSSD from peaks; robust to short/flat segments.
@@ -754,7 +853,6 @@ def compute_rmssd(x: np.ndarray, fs: int = 30) -> float:
     if x.size < int(fs * 5.0):
         return 0.0
     try:
-        # peak distance ~ 0.5s minimum (avoid double counting)
         peaks, _ = find_peaks(x, distance=max(1, int(0.5 * fs)))
         if len(peaks) < 3:
             return 0.0
@@ -765,6 +863,7 @@ def compute_rmssd(x: np.ndarray, fs: int = 30) -> float:
     except Exception:
         return 0.0
 def postprocess_bvp(pred: np.ndarray, fs: int = 30) -> Tuple[np.ndarray, float]:
     """
     Filters BVP to HR band + returns smoothed HR (BPM) with gentle pull toward resting band.
@@ -787,7 +886,6 @@ def postprocess_bvp(pred: np.ndarray, fs: int = 30) -> Tuple[np.ndarray, float]:
         lo, hi = REST_HR_RANGE
         if hr < lo or hr > hi:
             dist = abs(hr - REST_HR_TARGET)
-            # farther away -> stronger pull
             alpha = float(np.clip(0.25 + 0.02 * dist, 0.25, 0.65))
             hr = alpha * hr + (1.0 - alpha) * REST_HR_TARGET
@@ -802,6 +900,7 @@ def postprocess_bvp(pred: np.ndarray, fs: int = 30) -> Tuple[np.ndarray, float]:
     return y_filt, float(hr)
 def draw_face_and_roi(frame_bgr: np.ndarray,
                       face_bbox: Optional[Tuple[int, int, int, int]],
                       roi_bbox: Optional[Tuple[int, int, int, int]],
@@ -820,6 +919,7 @@ def draw_face_and_roi(frame_bgr: np.ndarray,
         cv2.putText(vis, label, (rx, max(20, ry - 8)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 220, 0), 2)
     return vis
 def roi_bbox_from_face(face_bbox: Tuple[int, int, int, int],
                        roi_type: str,
                        frame_shape: Tuple[int, int, int]) -> Tuple[int, int, int, int]:
@@ -845,6 +945,7 @@ def roi_bbox_from_face(face_bbox: Tuple[int, int, int, int],
         return (0, 0, 0, 0)
     return (rx, ry, rx2 - rx, ry2 - ry)
 def render_preprocessed_roi(chw: np.ndarray) -> np.ndarray:
     """
     Visualize the model input (C,H,W, normalized). Returns HxWx3 uint8 BGR.
@@ -853,9 +954,7 @@ def render_preprocessed_roi(chw: np.ndarray) -> np.ndarray:
     if chw is None or chw.ndim != 3 or chw.shape[0] != 3:
         return np.zeros((128, 128, 3), dtype=np.uint8)
-    # Undo channel-first & normalization to a viewable image
     img = chw.copy()
-    # Re-normalize to 0..1 by min-max of the tensor to "show" contrast
     vmin, vmax = float(img.min()), float(img.max())
     if vmax <= vmin + 1e-6:
         img = np.zeros_like(img)
@@ -865,6 +964,7 @@ def render_preprocessed_roi(chw: np.ndarray) -> np.ndarray:
     img = (img.transpose(1, 2, 0)[:, :, ::-1] * 255.0).clip(0, 255).astype(np.uint8)  # RGB->BGR
     return img
 def _gt_time_axis(gt_len: int, gt_fs: float) -> Optional[np.ndarray]:
     if gt_len <= 1:
         return None
@@ -872,6 +972,7 @@ def _gt_time_axis(gt_len: int, gt_fs: float) -> Optional[np.ndarray]:
         return np.arange(gt_len, dtype=float) / float(gt_fs)
     return None  # will fall back to length-matching overlay
 def plot_signals_with_gt(time_axis: np.ndarray,
                          raw_signal: np.ndarray,
                          post_signal: np.ndarray,
@@ -916,20 +1017,17 @@ def plot_signals_with_gt(time_axis: np.ndarray,
         t_new = _np.asarray(t_new, dtype=float).ravel()
         if x_t.size < 2 or y.size != x_t.size:
-            # Fallback: length-based resize to t_new length
             if y.size == 0 or t_new.size == 0:
                 return _np.zeros_like(t_new)
             idx = _np.linspace(0, y.size - 1, num=t_new.size)
             return _np.interp(_np.arange(t_new.size), idx, y)
-        # Enforce strictly increasing time (dedup if needed)
         order = _np.argsort(x_t)
         x_t = x_t[order]
         y = y[order]
         mask = _np.concatenate(([True], _np.diff(x_t) > 0))
         x_t = x_t[mask]
         y = y[mask]
-        # Clip t_new to the valid domain to avoid edge extrapolation artifacts
         t_clip = _np.clip(t_new, x_t[0], x_t[-1])
         return _np.interp(t_clip, x_t, y)
@@ -941,9 +1039,7 @@ def plot_signals_with_gt(time_axis: np.ndarray,
         n = int(min(len(x), len(y)))
         x = x[:n]; y = y[:n]
         max_lag = int(max(1, min(n - 1, round(max_lag_s * fs_local))))
-        # valid lags: negative means GT should be shifted left (advance) relative to Pred
         lags = _np.arange(-max_lag, max_lag + 1)
-        # compute correlation for each lag
         best_corr = -_np.inf
         best_lag = 0
         for L in lags:
@@ -975,10 +1071,8 @@ def plot_signals_with_gt(time_axis: np.ndarray,
         out = _np.empty_like(y)
         out[:] = _np.nan
         if shift > 0:
-            # delay: move content right
             out[shift:] = y[:-shift]
         else:
-            # advance: move content left
             out[:shift] = y[-shift:]
         return out
@@ -986,7 +1080,6 @@ def plot_signals_with_gt(time_axis: np.ndarray,
     raw = _np.asarray(raw_signal, dtype=float)
     post = _np.asarray(post_signal, dtype=float)
-    # guard
     if t.size == 0:
         t = _np.arange(post.size, dtype=float) / max(fs, 1)
@@ -1003,21 +1096,17 @@ def plot_signals_with_gt(time_axis: np.ndarray,
             gt_t = _np.asarray(gt_time, dtype=float).ravel()
             gt_on_pred = _safe_interp(gt_t, gt, t)
         else:
-            # No time vector: try length-based mapping to pred time grid
-            gt_on_pred = _safe_interp(_np.linspace(0, t[-1] if t.size else (gt.size - 1) / max(fs, 1), num=gt.size),
-                                      gt, t)
-        # Band-limit both before correlation/HR
         pred_bp = _bandpass(post, fs)
         gt_bp = _bandpass(gt_on_pred, fs)
-        # Estimate best lag (sec) of GT relative to Pred
         lag_sec = _best_lag(pred_bp, gt_bp, fs_local=fs, max_lag_s=5.0)
-        # Apply lag to GT for visualization and correlation
         gt_aligned = _apply_lag(gt_on_pred, lag_sec, fs_local=fs)
-        # Compute Pearson r on overlapping valid samples
         valid = _np.isfinite(gt_aligned) & _np.isfinite(pred_bp)
         if valid.sum() >= 16:
             pearson_r = float(_np.corrcoef(z(pred_bp[valid]), z(gt_aligned[valid]))[0, 1])
@@ -1026,25 +1115,21 @@ def plot_signals_with_gt(time_axis: np.ndarray,
         hr_gt = _welch_hr(gt_bp[_np.isfinite(gt_bp)], fs)
     _plt.figure(figsize=(13, 6), dpi=110)
     gs = _GridSpec(2, 2, height_ratios=[1, 1], width_ratios=[1, 1], wspace=0.25, hspace=0.35)
-    # (1) Raw Pred
     ax1 = _plt.subplot(gs[0, 0])
     ax1.plot(t, raw - (raw.mean() if raw.size else 0.0), linewidth=1.5)
     ax1.set_title(f"Predicted (Raw) — fs={fs} Hz")
     ax1.set_xlabel("Time (s)"); ax1.set_ylabel("Amplitude")
     ax1.grid(True, alpha=0.3)
-    # (2) Post Pred
     ax2 = _plt.subplot(gs[0, 1])
     ax2.plot(t, post - (post.mean() if post.size else 0.0), linewidth=1.5)
     ax2.set_title("Predicted (Post-processed)")
     ax2.set_xlabel("Time (s)"); ax2.set_ylabel("Amplitude")
     ax2.grid(True, alpha=0.3)
-    # (3) Overlay Pred vs GT (z-scored) OR just post
     ax3 = _plt.subplot(gs[1, :])
     ax3.plot(t, z(post), label="Pred (post)", linewidth=1.6)
@@ -1053,7 +1138,6 @@ def plot_signals_with_gt(time_axis: np.ndarray,
         gt_aligned = _apply_lag(gt_bp, lag_sec, fs_local=fs)
         ax3.plot(t, z(gt_aligned), label=f"GT (aligned {lag_sec:+.2f}s)", linewidth=1.2, alpha=0.9)
-        # metrics box
         txt = [
             f"HR_pred: {hr_pred:.1f} BPM",
             f"HR_gt:   {hr_gt:.1f} BPM",
@@ -1088,16 +1172,23 @@ def detect_face(frame: np.ndarray) -> Optional[Tuple[int, int, int, int]]:
     if frame is None or frame.size == 0:
         return None
     try:
         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
     except Exception:
         # If color conversion fails, assume already gray
-        gray = frame.copy() if frame.ndim == 2 else cv2.cvtColor(frame[..., :3], cv2.COLOR_BGR2GRAY)
     # Light preproc to improve Haar performance
     gray = cv2.equalizeHist(gray)
-    faces_all = []
     # Try a couple of parameter combos to be more forgiving
     params = [
         dict(scaleFactor=1.05, minNeighbors=3),
@@ -1118,6 +1209,7 @@ def detect_face(frame: np.ndarray) -> Optional[Tuple[int, int, int, int]]:
     # Return the largest (by area)
     return max(faces_all, key=lambda f: f[2] * f[3])
 def crop_roi(face_bbox: Tuple[int, int, int, int], roi_type: str, frame: np.ndarray) -> Optional[np.ndarray]:
     """
     Crop ROI from the frame based on a face bbox and the selected roi_type.
@@ -1151,9 +1243,15 @@ def crop_roi(face_bbox: Tuple[int, int, int, int], roi_type: str, frame: np.ndar
         return None
     return roi
-def crop_roi_with_bbox(face_bbox: Tuple[int, int, int, int],
-                       roi_type: str,
-                       frame: np.ndarray) -> Tuple[Optional[np.ndarray], Optional[Tuple[int,int,int,int]]]:
     if face_bbox is None or frame is None or frame.size == 0:
         return None, None
@@ -1180,6 +1278,7 @@ def crop_roi_with_bbox(face_bbox: Tuple[int, int, int, int],
     return roi, (rx, ry, rx2 - rx, ry2 - ry)
 def normalize_frame(face_bgr: np.ndarray, size: int) -> np.ndarray:
     """
     PhysMamba-compatible normalization with DiffNormalized support.
@@ -1204,21 +1303,28 @@ def normalize_frame(face_bgr: np.ndarray, size: int) -> np.ndarray:
     chw = face[..., ::-1].transpose(2, 0, 1).astype(np.float32, copy=False)
     return chw
-def extract_attention_map(model, clip_tensor: torch.Tensor,
-                         attention_viz) -> Optional[np.ndarray]:
     """Attention visualization disabled - model architecture incompatible."""
     return None
-def create_attention_overlay(frame: np.ndarray, attention: Optional[np.ndarray],
-                            attention_viz: Optional[SimpleActivationAttention] = None) -> np.ndarray:
-    """Create attention heatmap overlay."""
-    # Attention disabled - return original frame
     return frame
 def occlusion_saliency(roi_bgr, model, fs, patch=16, stride=12):
     H, W = roi_bgr.shape[:2]
-    base_bvp = forward_bvp(model, torch.from_numpy(normalize_frame(roi_bgr, DEFAULT_SIZE))
-                                     .unsqueeze(0).unsqueeze(2).to(DEVICE))  # fake T=1 path if needed
     base_power = hr_from_welch(bandpass_filter(base_bvp, fs=fs), fs=fs)
     heat = np.zeros((H, W), np.float32)
@@ -1226,15 +1332,20 @@ def occlusion_saliency(roi_bgr, model, fs, patch=16, stride=12):
         for x in range(0, W - patch + 1, stride):
             tmp = roi_bgr.copy()
             tmp[y:y+patch, x:x+patch] = 127  # occlude
-            bvp = forward_bvp(model, torch.from_numpy(normalize_frame(tmp, DEFAULT_SIZE))
-                                       .unsqueeze(0).unsqueeze(2).to(DEVICE))
             power = hr_from_welch(bandpass_filter(bvp, fs=fs), fs=fs)
             drop = max(0.0, base_power - power)
             heat[y:y+patch, x:x+patch] += drop
     heat -= heat.min()
-    if heat.max() > 1e-8: heat /= heat.max()
     return heat
 def _call_model_try_orders(model: nn.Module, clip_tensor: torch.Tensor):
     """
     Try common 5D layouts:
@@ -1251,6 +1362,7 @@ def _call_model_try_orders(model: nn.Module, clip_tensor: torch.Tensor):
         last_err = e
     raise last_err
 def forward_bvp(model: nn.Module, clip_tensor: torch.Tensor) -> np.ndarray:
     """
     Forward and extract a 1D time-like BVP vector with length T_clip.
@@ -1297,7 +1409,8 @@ def forward_bvp(model: nn.Module, clip_tensor: torch.Tensor) -> np.ndarray:
         B, K = out.shape
         if B == 1:
             v = out[0]
-            return (v.numpy() if v.shape[0] == T_clip else np.resize(v.numpy(), T_clip).astype(np.float32))
         if B == T_clip:
             return out[:, 0].numpy()
         if K == T_clip:
@@ -1342,6 +1455,7 @@ def forward_bvp(model: nn.Module, clip_tensor: torch.Tensor) -> np.ndarray:
     val = float(out.mean().item()) if out.numel() else 0.0
     return np.full(T_clip, val, dtype=np.float32)
 def _fallback_bvp_from_means(means, fs: int) -> np.ndarray:
     """
     Classical rPPG from green-channel means when the model yields nothing.
@@ -1366,19 +1480,25 @@ def _fallback_bvp_from_means(means, fs: int) -> np.ndarray:
     std = float(np.std(y)) + 1e-6
     return (y / std).astype(np.float32)
 def _to_floats(s: str) -> List[float]:
     """
     Extract all real numbers from free-form text, including scientific notation.
-    Gracefully ignores 'nan', 'inf', units, and comments.
     """
     if not isinstance(s, str) or not s:
         return []
     s = re.sub(r"(#|//|;).*?$", "", s, flags=re.MULTILINE)
     s = s.replace(",", " ").replace(";", " ")
-    toks = re.findall(r"[-+]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][-+]?\d+)?", s)
     out: List[float] = []
     for t in toks:
         try:
@@ -1418,7 +1538,6 @@ def parse_ground_truth_file(gt_path: str) -> Tuple[np.ndarray, float, float]:
         diffs = diffs[np.isfinite(diffs) & (diffs > 0)]
         return (1.0 / float(np.median(diffs))) if diffs.size else 0.0
     def _hr_from_bvp(bvp: np.ndarray, fs_hint: float) -> float:
         if bvp is None or bvp.size == 0:
             return 0.0
@@ -1426,9 +1545,16 @@ def parse_ground_truth_file(gt_path: str) -> Tuple[np.ndarray, float, float]:
         bp = bandpass_filter(bvp.astype(float), fs=fs_use)
         return hr_from_welch(bp, fs=fs_use)
-    if p.name.lower() == "ground_truth.txt" or (ext == ".txt" and p.read_text(errors="ignore").count("\n") >= 2):
         try:
-            lines = [ln.strip() for ln in p.read_text(encoding="utf-8", errors="ignore").splitlines() if ln.strip()]
             ppg_vals = _to_floats(lines[0]) if len(lines) >= 1 else []
             hr_vals  = _to_floats(lines[1]) if len(lines) >= 2 else []
             t_vals   = _to_floats(lines[2]) if len(lines) >= 3 else []
@@ -1452,7 +1578,7 @@ def parse_ground_truth_file(gt_path: str) -> Tuple[np.ndarray, float, float]:
             # Fall through to generic handlers
             pass
     if ext == ".txt":
         try:
             nums = _to_floats(p.read_text(encoding="utf-8", errors="ignore"))
@@ -1462,12 +1588,10 @@ def parse_ground_truth_file(gt_path: str) -> Tuple[np.ndarray, float, float]:
         except Exception:
             return np.array([]), 0.0, 0.0
     if ext == ".json":
         try:
             data = json.loads(p.read_text(encoding="utf-8", errors="ignore"))
-            # Try several paths for BVP array
-            bvp = None
             def _seek(obj, keys):
                 for k in keys:
@@ -1475,9 +1599,7 @@ def parse_ground_truth_file(gt_path: str) -> Tuple[np.ndarray, float, float]:
                         return obj[k]
                 return None
-            # Direct top-level
             bvp = _seek(data, ("ppg", "bvp", "signal", "wave"))
-            # Common nested containers
             if bvp is None:
                 for container_key in ("FullPackage", "package", "data", "gt", "ground_truth"):
                     if container_key in data:
@@ -1491,7 +1613,6 @@ def parse_ground_truth_file(gt_path: str) -> Tuple[np.ndarray, float, float]:
             else:
                 bvp = np.array([], dtype=float)
-            # fs / hr (accept scalar or array)
             fs_hint = 0.0
             if "fs" in data and isinstance(data["fs"], (int, float)) and data["fs"] > 0:
                 fs_hint = float(data["fs"])
@@ -1507,11 +1628,10 @@ def parse_ground_truth_file(gt_path: str) -> Tuple[np.ndarray, float, float]:
         except Exception:
             return np.array([]), 0.0, 0.0
     if ext == ".csv":
         try:
             df = pd.read_csv(p)
-            # Normalize column names
             cols = {str(c).strip().lower(): c for c in df.columns}
             def _first_match(names):
@@ -1537,18 +1657,16 @@ def parse_ground_truth_file(gt_path: str) -> Tuple[np.ndarray, float, float]:
         except Exception:
             return np.array([]), 0.0, 0.0
     if ext == ".mat":
         try:
             md = loadmat(str(p))
-            # look for most likely array
             arr = None
             for key in ("ppg", "bvp", "signal", "wave"):
                 if key in md and isinstance(md[key], np.ndarray):
                     arr = md[key]
                     break
             if arr is None:
-                # fallback: first 1-D array
                 for v in md.values():
                     if isinstance(v, np.ndarray) and v.ndim == 1:
                         arr = v
@@ -1582,7 +1700,7 @@ def parse_ground_truth_file(gt_path: str) -> Tuple[np.ndarray, float, float]:
         try:
             bvp = np.asarray(np.load(str(p)), dtype=float).ravel()
             fs_hint, hr = 0.0, 0.0
-            # optional sidecar JSON (same stem) with fs/hr
             sidecar = p.with_suffix(".json")
             if sidecar.exists():
                 try:
@@ -1594,6 +1712,7 @@ def parse_ground_truth_file(gt_path: str) -> Tuple[np.ndarray, float, float]:
                         hr = float(np.nanmean(v)) if isinstance(v, (list, tuple, np.ndarray)) else float(v)
                 except Exception:
                     pass
             if hr == 0.0 and bvp.size:
                 hr = _hr_from_bvp(bvp, fs_hint)
             return bvp, hr, fs_hint
@@ -1603,6 +1722,7 @@ def parse_ground_truth_file(gt_path: str) -> Tuple[np.ndarray, float, float]:
     # Fallback (unsupported extension)
     return np.array([]), 0.0, 0.0
 def scan_models() -> List[str]:
     if not MODEL_DIR.exists():
         return []
@@ -1614,8 +1734,10 @@ def scan_models() -> List[str]:
     return models
 _GLOBAL_CONTROLS: Dict[str, Dict] = {}
 def ensure_controls(control_id: str) -> Tuple[str, Dict]:
     # Use a stable default so Pause/Resume/Stop work for the current run
     if not control_id:
@@ -1627,6 +1749,7 @@ def ensure_controls(control_id: str) -> Tuple[str, Dict]:
         }
     return control_id, _GLOBAL_CONTROLS[control_id]
 def process_video_file(
     video_path: str,
     gt_file: Optional[str],
@@ -1639,9 +1762,13 @@ def process_video_file(
     """
     Enhanced video processing with Grad-CAM attention visualization,
     plus per-frame illumination and motion logging.
     """
     global _HR_SMOOTH
-    global FRAME_METRICS         # NEW: use global frame metrics buffer
     _HR_SMOOTH = None
     FRAME_METRICS = []           # reset per run
@@ -1655,8 +1782,19 @@ def process_video_file(
     control_id, controls = ensure_controls(control_id)
     controls['stop'].clear()
     if not model_name:
-        yield ("ERROR: No model selected", None, None, None, None, None, None, None, None, None)
         return
     if isinstance(model_name, int):
@@ -1664,24 +1802,24 @@ def process_video_file(
     model_path = MODEL_DIR / model_name
     if not model_path.exists():
-        yield ("ERROR: Model not found", None, None, None, None, None, None, None, None, None)
         return
     try:
         model, attention_viz = load_physmamba_model(model_path, DEVICE)
     except Exception as e:
-        yield (f"ERROR loading model: {str(e)}", None, None, None, None, None, None, None, None, None)
         return
     gt_bvp, gt_hr, gt_fs = parse_ground_truth_file(gt_file) if gt_file else (np.array([]), 0.0, 0.0)
     if not video_path or not os.path.exists(video_path):
-        yield ("ERROR: Video not found", None, None, None, None, None, None, None, None, None)
         return
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
-        yield ("ERROR: Cannot open video", None, None, None, None, None, None, None, None, None)
         return
     fps = int(fps_input) if fps_input else int(cap.get(cv2.CAP_PROP_FPS) or 30)
@@ -1698,7 +1836,7 @@ def process_video_file(
     last_rmssd = 0.0
     last_attention = None
-    # NEW: previous grayscale frames for motion
     prev_gray = None
     prev_roi_gray = None
@@ -1712,8 +1850,15 @@ def process_video_file(
     raw_path       = tmpdir / "raw_signal.png"
     post_path      = tmpdir / "post_signal.png"
-    yield ("Starting… reading video frames", None, f"{gt_hr:.1f}" if gt_hr > 0 else "--",
-           None, None, None, None, None, None, None)
     while True:
         if controls['stop'].is_set():
@@ -1730,13 +1875,13 @@ def process_video_file(
         frame_idx += 1
-        # NEW: default per-frame metrics for this iteration
         global_brightness = None
         global_motion = None
         roi_brightness = None
         roi_motion = None
-        # NEW: global illumination & motion (full frame)
         try:
             frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
         except Exception:
@@ -1762,7 +1907,7 @@ def process_video_file(
                 roi = crop_roi(face, roi_type, frame)
             if roi is not None and roi.size > 0:
-                # NEW: ROI-level brightness & motion
                 try:
                     roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
                 except Exception:
@@ -1799,7 +1944,9 @@ def process_video_file(
                         try:
                             raw = forward_bvp(model, clip_t)
                             if isinstance(raw, np.ndarray):
-                                raw = np.nan_to_num(raw, nan=0.0, posinf=0.0, neginf=0.0).astype(np.float32, copy=False)
                                 bvp_out = raw if raw.size > 0 else None
                             else:
                                 bvp_out = None
@@ -1807,7 +1954,7 @@ def process_video_file(
                             print(f"[infer] forward_bvp error: {e}")
                             bvp_out = None
-                        # UPDATED: Generate attention with Grad-CAM
                         try:
                             last_attention = extract_attention_map(model, clip_t, attention_viz)
                         except Exception as e:
@@ -1815,7 +1962,9 @@ def process_video_file(
                             last_attention = None
                     if bvp_out is None or bvp_out.size == 0:
-                        gbuf = np.nan_to_num(np.asarray(list(raw_g_means), dtype=np.float32), nan=0.0)
                         fb = _fallback_bvp_from_means(gbuf, fs=fps)
                         if isinstance(fb, np.ndarray) and fb.size > 0:
                             bvp_out = fb
@@ -1829,7 +1978,9 @@ def process_video_file(
                             bvp_stream = bvp_stream[-MAX_SIGNAL_LENGTH:]
                         if len(bvp_stream) >= int(5 * fps):
-                            seg = np.asarray(bvp_stream[-int(10 * fps):], dtype=np.float32)
                             _, last_bpm = postprocess_bvp(seg, fs=fps)
                             last_rmssd = compute_rmssd(seg, fs=fps)
@@ -1841,10 +1992,17 @@ def process_video_file(
                     last_infer = frame_idx
         else:
-            cv2.putText(vis_frame, "No face detected", (20, 40),
-                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (30, 200, 255), 2)
-        # NEW: log per-frame metrics into global FRAME_METRICS
         try:
             time_s = frame_idx / float(fps) if fps > 0 else float(frame_idx)
         except Exception:
@@ -1859,11 +2017,24 @@ def process_video_file(
             "roi_motion": float(roi_motion) if roi_motion is not None else None,
         })
         if last_bpm > 0:
             color = (0, 255, 0) if 55 <= last_bpm <= 100 else (0, 165, 255)
             cv2.rectangle(vis_frame, (10, 10), (360, 65), (0, 0, 0), -1)
-            cv2.putText(vis_frame, f"HR: {last_bpm:.1f} BPM", (20, 48),
-                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
         vis_attention = create_attention_overlay(frame, last_attention, attention_viz)
@@ -1934,6 +2105,7 @@ def process_video_file(
             elapsed = now - start_time
             status = f"Frame {frame_idx}/{total_frames} | Time {elapsed:.1f}s | HR {last_bpm:.1f} BPM"
             yield (
                 status,
                 f"{last_bpm:.1f}" if last_bpm > 0 else None,
@@ -1944,7 +2116,11 @@ def process_video_file(
                 str(signal_path) if signal_path.exists() else None,
                 str(raw_path) if raw_path.exists() else None,
                 str(post_path) if post_path.exists() else None,
-                None
             )
             next_display = now + (1.0 / DISPLAY_FPS)
@@ -1984,7 +2160,7 @@ def process_video_file(
         except Exception:
             pass
-    # NEW: save per-frame illumination & motion metrics
     frame_metrics_path = None
     if FRAME_METRICS:
         try:
@@ -1995,9 +2171,13 @@ def process_video_file(
             print(f"[metrics] Failed to save frame metrics CSV: {e}")
             frame_metrics_path = None
     elapsed = time.time() - start_time
     final_status = f"Complete | {frame_idx} frames | {elapsed:.1f}s | HR {last_bpm:.1f} BPM"
     yield (
         final_status,
         f"{last_bpm:.1f}" if last_bpm > 0 else None,
@@ -2008,9 +2188,14 @@ def process_video_file(
         str(signal_path) if signal_path.exists() else None,
         str(raw_path) if raw_path.exists() else None,
         str(post_path) if post_path.exists() else None,
-        str(csv_path) if csv_path else None
     )
 def process_stream(
     input_source: str,
     video_path: Optional[str],
@@ -2024,8 +2209,10 @@ def process_stream(
     if input_source == "Live Webcam":
         yield from process_live_webcam(model_name, fps_input, roi_type, control_id)
     else:
-        yield from process_video_file(video_path, gt_file, model_name, fps_input,
-                                     max_seconds, roi_type, control_id)
 def pause_processing(control_id: str) -> str:
     _, controls = ensure_controls(control_id)
@@ -2044,7 +2231,23 @@ def stop_processing(control_id: str) -> str:
     return "Stopped"
 def reset_ui():
-    return ("Ready", None, None, None, None, None, None, None, None, None)
 def handle_folder_upload(files):
     if not files:
@@ -2111,8 +2314,10 @@ with gr.Blocks(title="rPPG Analysis with Attention", theme=gr.themes.Soft()) as
         with gr.Column():
             video_upload = gr.Video(label="Upload Video", sources=["upload"])
         with gr.Column():
-            gt_upload = gr.File(label="Ground Truth (optional)",
-                               file_types=[".txt", ".csv", ".json"])
     with gr.Row(visible=False) as folder_inputs:
         with gr.Column():
@@ -2177,7 +2382,11 @@ with gr.Blocks(title="rPPG Analysis with Attention", theme=gr.themes.Soft()) as
         )
     with gr.Row():
-        roi_dropdown = gr.Dropdown(choices=["auto","forehead","cheeks","face"], value="auto", label="ROI")
     control_state = gr.State(value="")
     placeholder_state = gr.State(value=None)
@@ -2194,6 +2403,21 @@ with gr.Blocks(title="rPPG Analysis with Attention", theme=gr.themes.Soft()) as
         hr_output = gr.Textbox(label="HR (BPM)", interactive=False)
         gt_hr_output = gr.Textbox(label="GT HR (BPM)", interactive=False)
         rmssd_output = gr.Textbox(label="HRV RMSSD (ms)", interactive=False)
     with gr.Row():
         with gr.Column():
@@ -2232,20 +2456,51 @@ with gr.Blocks(title="rPPG Analysis with Attention", theme=gr.themes.Soft()) as
     ).then(
         reset_ui,
         inputs=None,
-        outputs=[status_text, hr_output, gt_hr_output, rmssd_output,
-                frame_output, attention_output, signal_output,
-                raw_signal_output, post_signal_output, csv_output]
     )
-    def run_processing(input_source, video_upload, gt_upload, folder_video, folder_gt,
-                    model_name, fps, max_sec, roi, ctrl_id):
-        """Fixed version that handles model_name type conversion."""
         if isinstance(model_name, int):
             model_name = str(model_name)
         if not model_name:
-            yield ("ERROR: No model selected", None, None, None, None, None, None, None, None, None)
             return
         if input_source == "Video File":
@@ -2257,12 +2512,12 @@ with gr.Blocks(title="rPPG Analysis with Attention", theme=gr.themes.Soft()) as
         else:  # Live Webcam
             video_path, gt_file = None, None
         yield from process_stream(
             input_source, video_path, gt_file,
             model_name, fps, max_sec, roi, ctrl_id
         )
     run_btn.click(
         fn=run_processing,
         inputs=[
@@ -2287,35 +2542,11 @@ with gr.Blocks(title="rPPG Analysis with Attention", theme=gr.themes.Soft()) as
             signal_output,
             raw_signal_output,
             post_signal_output,
-            csv_output
-        ]
-    )
-    run_btn.click(
-        fn=run_processing,
-        inputs=[
-            input_source,
-            video_upload,
-            folder_video,
-            folder_gt,
-            model_dropdown,
-            fps_slider,
-            max_seconds_slider,
-            roi_dropdown,
-            control_state
-        ],
-        outputs=[
-            status_text,
-            hr_output,
-            gt_hr_output,
-            rmssd_output,
-            frame_output,
-            attention_output,
-            signal_output,
-            raw_signal_output,
-            post_signal_output,
-            csv_output
         ]
     )

 import torch.nn as nn
 import torch.nn.functional as F
 from scipy import signal
 from scipy.signal import find_peaks, welch, get_window
 import gradio as gr
+# Global buffer for per-frame illumination/motion metrics
+FRAME_METRICS: List[Dict] = []
 class PhysMambaattention_viz:
     """Simplified Grad-CAM for PhysMamba."""
 def apply_diff_normalized(frames: List[np.ndarray]) -> np.ndarray:
+    """
+    Apply DiffNormalized preprocessing from the PhysMamba paper:
+      diff_t = (I_t - I_{t-1}) / (I_t + I_{t-1} + eps)
+      then global std-normalize.
+    frames: list of HxWx3 uint8 or float32 arrays (RGB or BGR, consistent).
+    Returns: (T, H, W, C) float32.
+    """
+    if not frames:
+        return np.zeros((0,), dtype=np.float32)
     if len(frames) < 2:
+        f0 = frames[0].astype(np.float32)
+        return np.stack([np.zeros_like(f0, dtype=np.float32)], axis=0)
     diff_frames = []
     for i in range(len(frames)):
         if i == 0:
             diff_frames.append(np.zeros_like(frames[0], dtype=np.float32))
         else:
             curr = frames[i].astype(np.float32)
+            prev = frames[i - 1].astype(np.float32)
+            denom = curr + prev + 1e-8
+            diff = (curr - prev) / denom
             diff_frames.append(diff)
+    diff_array = np.stack(diff_frames).astype(np.float32)
+    std = float(diff_array.std()) + 1e-8
+    diff_array /= std
     return diff_array
+def preprocess_for_physmamba(
+    frames: List[np.ndarray],
+    target_frames: int = 128,
+    target_size: int = 128
+) -> torch.Tensor:
+    """
+    Complete DiffNormalized preprocessing pipeline to produce
+    a PhysMamba-compatible clip tensor of shape [1, 3, T, H, W].
+    NOTE: This path is *not* used in the current live demo, which instead
+    uses normalize_frame() + forward_bvp(). Keep for future experiments.
+    """
+    if not frames:
+        # Dummy tensor; caller should guard length > 0
+        return torch.zeros(1, 3, target_frames, target_size, target_size, dtype=torch.float32)
+    # Temporal sampling / padding to target_frames
     if len(frames) < target_frames:
         frames = frames + [frames[-1]] * (target_frames - len(frames))
     elif len(frames) > target_frames:
+        idx = np.linspace(0, len(frames) - 1, target_frames).astype(int)
+        frames = [frames[i] for i in idx]
+    # Convert to RGB and resize
+    frames_rgb = [f[..., ::-1].copy() for f in frames]  # BGR->RGB
     frames_resized = [cv2.resize(f, (target_size, target_size)) for f in frames_rgb]
+    # DiffNormalized
+    diff_array = apply_diff_normalized(frames_resized)        # (T, H, W, C)
+    # To [B, C, T, H, W]
+    diff_array = np.transpose(diff_array, (3, 0, 1, 2))       # (C, T, H, W)
+    diff_array = np.expand_dims(diff_array, axis=0)           # (1, C, T, H, W)
+    return torch.from_numpy(diff_array.astype(np.float32))
+# ---------------------------------------------------------------------------
+# Paths, device, constants
+# ---------------------------------------------------------------------------
+HERE         = Path(__file__).resolve().parent
 MODEL_DIR    = HERE / "final_model_release"
 LOG_DIR      = HERE / "logs"
 ANALYSIS_DIR = HERE / "analysis"
+for d in (MODEL_DIR, LOG_DIR, ANALYSIS_DIR):
+    d.mkdir(exist_ok=True, parents=True)
 DEVICE = (
     torch.device("cuda") if torch.cuda.is_available()
     else torch.device("cpu")
 )
+FACE_CASCADE = cv2.CascadeClassifier(
+    cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
+)
 DEFAULT_SIZE   = 128      # input H=W to model
 DEFAULT_T      = 128      # clip length
 GT_FILENAMES = {"ground_truth.txt", "gtdump.txt", "gt.txt"}
 GT_EXTS      = {".txt", ".csv", ".json"}
 def _as_path(maybe) -> Optional[str]:
+    """
+    Return a filesystem path from Gradio values (str, dict, Path, tempfile objects, lists).
+    Handles:
+      - plain strings
+      - pathlib.Path
+      - Gradio dicts with keys: 'name', 'path', 'file'
+      - file-like objects with .name
+      - lists (takes first element)
+    """
     if maybe is None:
         return None
+    # Gradio can pass a list (e.g., multiple files / directory upload)
+    if isinstance(maybe, list):
+        if not maybe:
+            return None
+        return _as_path(maybe[0])
     if isinstance(maybe, str):
         return maybe
+    if isinstance(maybe, Path):
+        return str(maybe)
+    # Gradio v4 File/Video components often pass a dict
     if isinstance(maybe, dict):
+        for key in ("name", "path", "file"):
+            v = maybe.get(key)
+            if isinstance(v, str) and v:
+                return v
+        return None
+    # tempfile-like / UploadedFile objects
+    name = getattr(maybe, "name", None)
+    if isinstance(name, str) and name:
         return name
     try:
         return str(maybe)
     except Exception:
         return None
 def _import_from_file(py_path: Path):
     spec = importlib.util.spec_from_file_location(py_path.stem, str(py_path))
     if not spec or not spec.loader:
     spec.loader.exec_module(mod)
     return mod
 def _looks_like_video(p: Path) -> bool:
+    """
+    Heuristic for 'video-like' files used in subject-folder discovery.
+    Treat .mat as video, plus common video extensions.
+    """
     if p.suffix.lower() == ".mat":
         return True
     return p.suffix.lower() in VIDEO_EXTENSIONS
 class SimpleActivationAttention:
+    """Lightweight attention visualization using forward activations (no gradients)."""
     def __init__(self, model: nn.Module, device: torch.device):
         self.model = model
         self.device = device
+        self.activations: Optional[torch.Tensor] = None
+        self.hook_handle: Optional[Any] = None
     def _activation_hook(self, module, input, output):
         """Capture activations during forward pass."""
+        try:
+            self.activations = output.detach()
+        except Exception:
+            self.activations = None
     def register_hook(self):
+        """Register hook on a suitable conv layer (last conv before Mamba if possible)."""
         target = None
         target_name = None
             if isinstance(module, (nn.Conv2d, nn.Conv3d)) and 'mamba' not in name.lower() and 'ssm' not in name.lower():
                 target = module
                 target_name = name
         if target is None:
             print("⚠ [attention_viz] No suitable conv layer found, attention disabled")
             return
         print(f"✓ [attention_viz] Hook registered on {target_name} ({type(target).__name__})")
     def generate(self, clip_tensor: torch.Tensor) -> Optional[np.ndarray]:
+        """
+        Generate attention map from stored activations (call AFTER the forward pass).
+        Returns a 2D numpy array in [0,1] or None if unavailable.
+        """
         try:
             if self.activations is None:
                 return None
             act = self.activations
             # Handle different tensor shapes
             if act.dim() == 5:  # [B, C, T, H, W]
+                # Average over channels and time: -> [B, H, W]
+                attention = act.mean(dim=[1, 2])
             elif act.dim() == 4:  # [B, C, H, W]
                 attention = act.mean(dim=1)  # -> [B, H, W]
             else:
                 print(f"⚠ [attention_viz] Unexpected activation shape: {act.shape}")
                 return None
+            # Take first batch
+            attention = attention[0].detach().cpu().numpy()
             # Normalize to [0, 1]
+            a_min, a_max = attention.min(), attention.max()
+            if a_max > a_min:
+                attention = (attention - a_min) / (a_max - a_min)
+            else:
+                attention = np.zeros_like(attention, dtype=np.float32)
             return attention
     def visualize(self, heatmap: np.ndarray, frame: np.ndarray, alpha: float = 0.4) -> np.ndarray:
         """Overlay heatmap on frame."""
+        if heatmap is None or frame is None or frame.size == 0:
+            return frame
         h, w = frame.shape[:2]
         heatmap_resized = cv2.resize(heatmap, (w, h))
         heatmap_uint8 = (heatmap_resized * 255).astype(np.uint8)
         heatmap_colored = cv2.applyColorMap(heatmap_uint8, cv2.COLORMAP_JET)
+        overlay = cv2.addWeighted(frame, 1 - alpha, heatmap_colored, alpha, 0)
         return overlay
     def cleanup(self):
         if self.hook_handle is not None:
             self.hook_handle.remove()
+            self.hook_handle = None
+        self.activations = None
 class VideoReader:
     """
         self._idx   = 0
         self._len   = 0
         self._shape = None
+        self._fps   = 0
         if self.path.lower().endswith(".mat") and MAT_SUPPORT:
             self._open_mat(self.path)
             raise RuntimeError("Cannot open video")
         self._cap = cap
         self._len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
+        self._fps = float(cap.get(cv2.CAP_PROP_FPS) or 0.0)
     def _open_mat(self, path: str):
         try:
                     break
             else:
                 arr = next((v for v in md.values() if isinstance(v, np.ndarray)), None)
             if arr is None:
                 raise RuntimeError("No ndarray found in .mat")
             # Normalize to (T,H,W,3)
             if a.ndim == 4:
                 if a.shape[-1] == 3:
+                    # Heuristic: if first dim is much smaller than spatial dims -> assume T
+                    if a.shape[0] <= a.shape[1] and a.shape[0] <= a.shape[2]:  # (T,H,W,3)
                         v = a
+                    else:  # (H,W,T,3) -> (T,H,W,3)
                         v = np.transpose(a, (2, 0, 1, 3))
                 else:
+                    v = a[..., :1]  # take first channel
             elif a.ndim == 3:
+                # (T,H,W) or (H,W,T)
+                if a.shape[0] <= a.shape[1] and a.shape[0] <= a.shape[2]:  # (T,H,W)
                     v = a
+                else:  # (H,W,T) -> (T,H,W)
                     v = np.transpose(a, (2, 0, 1))
                 v = v[..., None]
             else:
             self._mat   = v
             self._len   = v.shape[0]
             self._shape = v.shape[1:3]
+            self._fps   = 0.0  # unknown; caller can override
         except Exception as e:
             raise RuntimeError(f"Failed to open .mat video: {e}")
     def fps(self, fallback: int = 30) -> int:
         if self._mat is not None:
+            return int(fallback)
+        if self._fps and self._fps > 0:
+            return int(self._fps)
         f = self._cap.get(cv2.CAP_PROP_FPS)
+        return int(f) if f and f > 0 else int(fallback)
     def length(self) -> int:
         return self._len
         if self._cap is not None:
             self._cap.release()
 def roi_candidates(face: Tuple[int, int, int, int], frame: np.ndarray) -> Dict[str, np.ndarray]:
     x, y, w, h = face
     # forehead
     ff = frame[y:y + h, x:x + w]
     return {"forehead": fh, "cheeks": ck, "face": ff}
 def roi_quality_score(patch: Optional[np.ndarray], fs: int = 30) -> float:
     if patch is None or patch.size == 0:
         return -1e9
     g = patch[..., 1].astype(np.float32) / 255.0         # green channel
     g = cv2.resize(g, (64, 64)).mean(axis=1)             # crude spatial pooling
     g = g - g.mean()
     try:
+        b, a = signal.butter(4, [0.7 / (fs / 2), 3.5 / (fs / 2)], btype="band")
         y = signal.filtfilt(b, a, g, method="gust")
     except Exception:
         y = g
     return float((y ** 2).mean())
 def pick_auto_roi(face: Tuple[int, int, int, int],
+                  frame: np.ndarray,
+                  attn: Optional[np.ndarray] = None) -> Tuple[np.ndarray, str]:
+    """Simple ROI selection using signal quality + optional attention weighting."""
     cands = roi_candidates(face, frame)
     scores = {k: roi_quality_score(v) for k, v in cands.items()}
             ck_attn = attn_resized[int(y + 0.55 * h):int(y + 0.85 * h), int(x + 0.15 * w):int(x + 0.85 * w)].mean() if attn_resized.size > 0 else 0.0
             ff_attn = attn_resized[y:y+h, x:x+w].mean() if attn_resized.size > 0 else 0.0
             scores['forehead'] += fh_attn * 0.2
+            scores['cheeks']   += ck_attn * 0.2
+            scores['face']     += ff_attn * 0.2
         except Exception:
             pass
     best = max(scores, key=scores.get)
     return cands[best], best
 def discover_subjects(root_dir: Path) -> List[Tuple[str, Optional[str]]]:
     """
     Walk root_dir; for each subject folder (or single-folder dataset), return (video_path, gt_path or None).
             uniq.append((v, g))
     return uniq
 def find_physmamba_builder(repo_root: Path, model_file: str = "", model_class: str = "PhysMamba"):
     import inspect
             except Exception:
                 continue
+    raise ImportError("Could not find PhysMamba model class")
 def load_physmamba_model(ckpt_path: Path, device: torch.device,
                          model_file: str = "", model_class: str = "PhysMamba"):
             state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
             model.load_state_dict(state_dict, strict=False)
     except Exception:
+        # If loading fails, you still get an uninitialized model (for debugging)
         pass
     model.to(device).eval()
         with torch.no_grad():
             _ = model(torch.zeros(1, 3, 8, 128, 128, device=device))
     except Exception:
+        # Shape sanity check failed, but we keep the model usable.
         pass
+    # For now: attention visualization disabled (extract_attention_map returns None)
     attention_viz = None
     return model, attention_viz
 def bandpass_filter(x: np.ndarray, fs: int = 30, low: float = 0.7, high: float = 3.5, order: int = 4) -> np.ndarray:
     """
     Stable band-pass with edge-safety and parameter clipping.
     try:
         b, a = signal.butter(order, [lo, hi], btype="band")
         padlen = min(3 * max(len(a), len(b)), max(0, x.size - 1))
         return signal.filtfilt(b, a, x, padlen=padlen)
     except Exception:
         return x
 def hr_from_welch(x: np.ndarray, fs: int = 30, lo: float = 0.7, hi: float = 3.5) -> float:
     """
     HR (BPM) via Welch PSD peak in [lo, hi] Hz.
     if x.size < int(fs * 4.0):  # need ~4s for a usable PSD
         return 0.0
     try:
         nper = int(min(max(64, fs * 2), min(512, x.size)))
         f, pxx = welch(x, fs=fs, window=get_window("hann", nper), nperseg=nper, detrend="constant")
         fpk = float(f_band[np.argmax(p_band)])
         bpm = fpk * 60.0
         return float(np.clip(bpm, 30.0, 220.0))
     except Exception:
         return 0.0
 def compute_rmssd(x: np.ndarray, fs: int = 30) -> float:
     """
     HRV RMSSD from peaks; robust to short/flat segments.
     if x.size < int(fs * 5.0):
         return 0.0
     try:
         peaks, _ = find_peaks(x, distance=max(1, int(0.5 * fs)))
         if len(peaks) < 3:
             return 0.0
     except Exception:
         return 0.0
 def postprocess_bvp(pred: np.ndarray, fs: int = 30) -> Tuple[np.ndarray, float]:
     """
     Filters BVP to HR band + returns smoothed HR (BPM) with gentle pull toward resting band.
         lo, hi = REST_HR_RANGE
         if hr < lo or hr > hi:
             dist = abs(hr - REST_HR_TARGET)
             alpha = float(np.clip(0.25 + 0.02 * dist, 0.25, 0.65))
             hr = alpha * hr + (1.0 - alpha) * REST_HR_TARGET
     return y_filt, float(hr)
 def draw_face_and_roi(frame_bgr: np.ndarray,
                       face_bbox: Optional[Tuple[int, int, int, int]],
                       roi_bbox: Optional[Tuple[int, int, int, int]],
         cv2.putText(vis, label, (rx, max(20, ry - 8)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 220, 0), 2)
     return vis
 def roi_bbox_from_face(face_bbox: Tuple[int, int, int, int],
                        roi_type: str,
                        frame_shape: Tuple[int, int, int]) -> Tuple[int, int, int, int]:
         return (0, 0, 0, 0)
     return (rx, ry, rx2 - rx, ry2 - ry)
 def render_preprocessed_roi(chw: np.ndarray) -> np.ndarray:
     """
     Visualize the model input (C,H,W, normalized). Returns HxWx3 uint8 BGR.
     if chw is None or chw.ndim != 3 or chw.shape[0] != 3:
         return np.zeros((128, 128, 3), dtype=np.uint8)
     img = chw.copy()
     vmin, vmax = float(img.min()), float(img.max())
     if vmax <= vmin + 1e-6:
         img = np.zeros_like(img)
     img = (img.transpose(1, 2, 0)[:, :, ::-1] * 255.0).clip(0, 255).astype(np.uint8)  # RGB->BGR
     return img
 def _gt_time_axis(gt_len: int, gt_fs: float) -> Optional[np.ndarray]:
     if gt_len <= 1:
         return None
         return np.arange(gt_len, dtype=float) / float(gt_fs)
     return None  # will fall back to length-matching overlay
 def plot_signals_with_gt(time_axis: np.ndarray,
                          raw_signal: np.ndarray,
                          post_signal: np.ndarray,
         t_new = _np.asarray(t_new, dtype=float).ravel()
         if x_t.size < 2 or y.size != x_t.size:
             if y.size == 0 or t_new.size == 0:
                 return _np.zeros_like(t_new)
             idx = _np.linspace(0, y.size - 1, num=t_new.size)
             return _np.interp(_np.arange(t_new.size), idx, y)
         order = _np.argsort(x_t)
         x_t = x_t[order]
         y = y[order]
         mask = _np.concatenate(([True], _np.diff(x_t) > 0))
         x_t = x_t[mask]
         y = y[mask]
         t_clip = _np.clip(t_new, x_t[0], x_t[-1])
         return _np.interp(t_clip, x_t, y)
         n = int(min(len(x), len(y)))
         x = x[:n]; y = y[:n]
         max_lag = int(max(1, min(n - 1, round(max_lag_s * fs_local))))
         lags = _np.arange(-max_lag, max_lag + 1)
         best_corr = -_np.inf
         best_lag = 0
         for L in lags:
         out = _np.empty_like(y)
         out[:] = _np.nan
         if shift > 0:
             out[shift:] = y[:-shift]
         else:
             out[:shift] = y[-shift:]
         return out
     raw = _np.asarray(raw_signal, dtype=float)
     post = _np.asarray(post_signal, dtype=float)
     if t.size == 0:
         t = _np.arange(post.size, dtype=float) / max(fs, 1)
             gt_t = _np.asarray(gt_time, dtype=float).ravel()
             gt_on_pred = _safe_interp(gt_t, gt, t)
         else:
+            gt_on_pred = _safe_interp(
+                _np.linspace(0, t[-1] if t.size else (gt.size - 1) / max(fs, 1), num=gt.size),
+                gt, t
+            )
         pred_bp = _bandpass(post, fs)
         gt_bp = _bandpass(gt_on_pred, fs)
         lag_sec = _best_lag(pred_bp, gt_bp, fs_local=fs, max_lag_s=5.0)
         gt_aligned = _apply_lag(gt_on_pred, lag_sec, fs_local=fs)
         valid = _np.isfinite(gt_aligned) & _np.isfinite(pred_bp)
         if valid.sum() >= 16:
             pearson_r = float(_np.corrcoef(z(pred_bp[valid]), z(gt_aligned[valid]))[0, 1])
         hr_gt = _welch_hr(gt_bp[_np.isfinite(gt_bp)], fs)
     _plt.figure(figsize=(13, 6), dpi=110)
     gs = _GridSpec(2, 2, height_ratios=[1, 1], width_ratios=[1, 1], wspace=0.25, hspace=0.35)
     ax1 = _plt.subplot(gs[0, 0])
     ax1.plot(t, raw - (raw.mean() if raw.size else 0.0), linewidth=1.5)
     ax1.set_title(f"Predicted (Raw) — fs={fs} Hz")
     ax1.set_xlabel("Time (s)"); ax1.set_ylabel("Amplitude")
     ax1.grid(True, alpha=0.3)
     ax2 = _plt.subplot(gs[0, 1])
     ax2.plot(t, post - (post.mean() if post.size else 0.0), linewidth=1.5)
     ax2.set_title("Predicted (Post-processed)")
     ax2.set_xlabel("Time (s)"); ax2.set_ylabel("Amplitude")
     ax2.grid(True, alpha=0.3)
     ax3 = _plt.subplot(gs[1, :])
     ax3.plot(t, z(post), label="Pred (post)", linewidth=1.6)
         gt_aligned = _apply_lag(gt_bp, lag_sec, fs_local=fs)
         ax3.plot(t, z(gt_aligned), label=f"GT (aligned {lag_sec:+.2f}s)", linewidth=1.2, alpha=0.9)
         txt = [
             f"HR_pred: {hr_pred:.1f} BPM",
             f"HR_gt:   {hr_gt:.1f} BPM",
     if frame is None or frame.size == 0:
         return None
+    # If cascade is missing, fail fast (prevents cryptic OpenCV errors)
+    if FACE_CASCADE is None:
+        return None
     try:
         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
     except Exception:
         # If color conversion fails, assume already gray
+        if frame.ndim == 2:
+            gray = frame.copy()
+        else:
+            gray = cv2.cvtColor(frame[..., :3], cv2.COLOR_BGR2GRAY)
     # Light preproc to improve Haar performance
     gray = cv2.equalizeHist(gray)
+    faces_all: List[Tuple[int, int, int, int]] = []
     # Try a couple of parameter combos to be more forgiving
     params = [
         dict(scaleFactor=1.05, minNeighbors=3),
     # Return the largest (by area)
     return max(faces_all, key=lambda f: f[2] * f[3])
 def crop_roi(face_bbox: Tuple[int, int, int, int], roi_type: str, frame: np.ndarray) -> Optional[np.ndarray]:
     """
     Crop ROI from the frame based on a face bbox and the selected roi_type.
         return None
     return roi
+def crop_roi_with_bbox(
+    face_bbox: Tuple[int, int, int, int],
+    roi_type: str,
+    frame: np.ndarray
+) -> Tuple[Optional[np.ndarray], Optional[Tuple[int,int,int,int]]]:
+    """
+    Same as crop_roi, but also returns the ROI bbox (x, y, w, h) in frame coords.
+    """
     if face_bbox is None or frame is None or frame.size == 0:
         return None, None
     return roi, (rx, ry, rx2 - rx, ry2 - ry)
 def normalize_frame(face_bgr: np.ndarray, size: int) -> np.ndarray:
     """
     PhysMamba-compatible normalization with DiffNormalized support.
     chw = face[..., ::-1].transpose(2, 0, 1).astype(np.float32, copy=False)
     return chw
+def extract_attention_map(model, clip_tensor: torch.Tensor, attention_viz) -> Optional[np.ndarray]:
     """Attention visualization disabled - model architecture incompatible."""
     return None
+def create_attention_overlay(
+    frame: np.ndarray,
+    attention: Optional[np.ndarray],
+    attention_viz: Optional[SimpleActivationAttention] = None
+) -> np.ndarray:
+    """Create attention heatmap overlay (currently passthrough)."""
     return frame
 def occlusion_saliency(roi_bgr, model, fs, patch=16, stride=12):
     H, W = roi_bgr.shape[:2]
+    base_bvp = forward_bvp(
+        model,
+        torch.from_numpy(normalize_frame(roi_bgr, DEFAULT_SIZE))
+             .unsqueeze(0).unsqueeze(2).to(DEVICE)  # fake T=1 path if needed
+    )
     base_power = hr_from_welch(bandpass_filter(base_bvp, fs=fs), fs=fs)
     heat = np.zeros((H, W), np.float32)
         for x in range(0, W - patch + 1, stride):
             tmp = roi_bgr.copy()
             tmp[y:y+patch, x:x+patch] = 127  # occlude
+            bvp = forward_bvp(
+                model,
+                torch.from_numpy(normalize_frame(tmp, DEFAULT_SIZE))
+                     .unsqueeze(0).unsqueeze(2).to(DEVICE)
+            )
             power = hr_from_welch(bandpass_filter(bvp, fs=fs), fs=fs)
             drop = max(0.0, base_power - power)
             heat[y:y+patch, x:x+patch] += drop
     heat -= heat.min()
+    if heat.max() > 1e-8:
+        heat /= heat.max()
     return heat
 def _call_model_try_orders(model: nn.Module, clip_tensor: torch.Tensor):
     """
     Try common 5D layouts:
         last_err = e
     raise last_err
 def forward_bvp(model: nn.Module, clip_tensor: torch.Tensor) -> np.ndarray:
     """
     Forward and extract a 1D time-like BVP vector with length T_clip.
         B, K = out.shape
         if B == 1:
             v = out[0]
+            return (v.numpy() if v.shape[0] == T_clip
+                    else np.resize(v.numpy(), T_clip).astype(np.float32))
         if B == T_clip:
             return out[:, 0].numpy()
         if K == T_clip:
     val = float(out.mean().item()) if out.numel() else 0.0
     return np.full(T_clip, val, dtype=np.float32)
 def _fallback_bvp_from_means(means, fs: int) -> np.ndarray:
     """
     Classical rPPG from green-channel means when the model yields nothing.
     std = float(np.std(y)) + 1e-6
     return (y / std).astype(np.float32)
 def _to_floats(s: str) -> List[float]:
     """
     Extract all real numbers from free-form text, including scientific notation.
+    Gracefully ignores comments, units, and non-numeric junk.
     """
     if not isinstance(s, str) or not s:
         return []
+    # Strip comments starting with #, //, or ;
     s = re.sub(r"(#|//|;).*?$", "", s, flags=re.MULTILINE)
+    # Normalize common delimiters
     s = s.replace(",", " ").replace(";", " ")
+    toks = re.findall(
+        r"[-+]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][-+]?\d+)?",
+        s
+    )
     out: List[float] = []
     for t in toks:
         try:
         diffs = diffs[np.isfinite(diffs) & (diffs > 0)]
         return (1.0 / float(np.median(diffs))) if diffs.size else 0.0
     def _hr_from_bvp(bvp: np.ndarray, fs_hint: float) -> float:
         if bvp is None or bvp.size == 0:
             return 0.0
         bp = bandpass_filter(bvp.astype(float), fs=fs_use)
         return hr_from_welch(bp, fs=fs_use)
+    # ================= UBFC-style TXT (3 lines) =================
+    if p.name.lower() == "ground_truth.txt" or (
+        ext == ".txt" and p.read_text(errors="ignore").count("\n") >= 2
+    ):
         try:
+            lines = [
+                ln.strip()
+                for ln in p.read_text(encoding="utf-8", errors="ignore").splitlines()
+                if ln.strip()
+            ]
             ppg_vals = _to_floats(lines[0]) if len(lines) >= 1 else []
             hr_vals  = _to_floats(lines[1]) if len(lines) >= 2 else []
             t_vals   = _to_floats(lines[2]) if len(lines) >= 3 else []
             # Fall through to generic handlers
             pass
+    # ================= Generic TXT =================
     if ext == ".txt":
         try:
             nums = _to_floats(p.read_text(encoding="utf-8", errors="ignore"))
         except Exception:
             return np.array([]), 0.0, 0.0
+    # ================= JSON =================
     if ext == ".json":
         try:
             data = json.loads(p.read_text(encoding="utf-8", errors="ignore"))
             def _seek(obj, keys):
                 for k in keys:
                         return obj[k]
                 return None
             bvp = _seek(data, ("ppg", "bvp", "signal", "wave"))
             if bvp is None:
                 for container_key in ("FullPackage", "package", "data", "gt", "ground_truth"):
                     if container_key in data:
             else:
                 bvp = np.array([], dtype=float)
             fs_hint = 0.0
             if "fs" in data and isinstance(data["fs"], (int, float)) and data["fs"] > 0:
                 fs_hint = float(data["fs"])
         except Exception:
             return np.array([]), 0.0, 0.0
+    # ================= CSV =================
     if ext == ".csv":
         try:
             df = pd.read_csv(p)
             cols = {str(c).strip().lower(): c for c in df.columns}
             def _first_match(names):
         except Exception:
             return np.array([]), 0.0, 0.0
+    # ================= MAT =================
     if ext == ".mat":
         try:
             md = loadmat(str(p))
             arr = None
             for key in ("ppg", "bvp", "signal", "wave"):
                 if key in md and isinstance(md[key], np.ndarray):
                     arr = md[key]
                     break
             if arr is None:
                 for v in md.values():
                     if isinstance(v, np.ndarray) and v.ndim == 1:
                         arr = v
         try:
             bvp = np.asarray(np.load(str(p)), dtype=float).ravel()
             fs_hint, hr = 0.0, 0.0
             sidecar = p.with_suffix(".json")
             if sidecar.exists():
                 try:
                         hr = float(np.nanmean(v)) if isinstance(v, (list, tuple, np.ndarray)) else float(v)
                 except Exception:
                     pass
             if hr == 0.0 and bvp.size:
                 hr = _hr_from_bvp(bvp, fs_hint)
             return bvp, hr, fs_hint
     # Fallback (unsupported extension)
     return np.array([]), 0.0, 0.0
 def scan_models() -> List[str]:
     if not MODEL_DIR.exists():
         return []
     return models
 _GLOBAL_CONTROLS: Dict[str, Dict] = {}
 def ensure_controls(control_id: str) -> Tuple[str, Dict]:
     # Use a stable default so Pause/Resume/Stop work for the current run
     if not control_id:
         }
     return control_id, _GLOBAL_CONTROLS[control_id]
 def process_video_file(
     video_path: str,
     gt_file: Optional[str],
     """
     Enhanced video processing with Grad-CAM attention visualization,
     plus per-frame illumination and motion logging.
+    Returns 14 outputs per yield, matching Gradio UI:
+      status, pred_hr, gt_hr, rmssd,
+      frame_path, attention_path, signal_path, raw_path, post_path, csv_path,
+      global_brightness, global_motion, roi_brightness, roi_motion
     """
     global _HR_SMOOTH
+    global FRAME_METRICS         # global frame metrics buffer
     _HR_SMOOTH = None
     FRAME_METRICS = []           # reset per run
     control_id, controls = ensure_controls(control_id)
     controls['stop'].clear()
+    # Helper for consistent error yields (14 outputs)
+    def _error_status(msg: str):
+        return (
+            msg,
+            None, None, None,          # HR, GT HR, RMSSD
+            None, None, None,          # frame, attention, signal
+            None, None,                # raw, post
+            None,                      # csv
+            None, None, None, None     # brightness & motion
+        )
     if not model_name:
+        yield _error_status("ERROR: No model selected")
         return
     if isinstance(model_name, int):
     model_path = MODEL_DIR / model_name
     if not model_path.exists():
+        yield _error_status("ERROR: Model not found")
         return
     try:
         model, attention_viz = load_physmamba_model(model_path, DEVICE)
     except Exception as e:
+        yield _error_status(f"ERROR loading model: {str(e)}")
         return
     gt_bvp, gt_hr, gt_fs = parse_ground_truth_file(gt_file) if gt_file else (np.array([]), 0.0, 0.0)
     if not video_path or not os.path.exists(video_path):
+        yield _error_status("ERROR: Video not found")
         return
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
+        yield _error_status("ERROR: Cannot open video")
         return
     fps = int(fps_input) if fps_input else int(cap.get(cv2.CAP_PROP_FPS) or 30)
     last_rmssd = 0.0
     last_attention = None
+    # previous grayscale frames for motion
     prev_gray = None
     prev_roi_gray = None
     raw_path       = tmpdir / "raw_signal.png"
     post_path      = tmpdir / "post_signal.png"
+    # Initial status yield (14 outputs)
+    yield (
+        "Starting… reading video frames",
+        None,
+        f"{gt_hr:.1f}" if gt_hr > 0 else "--",
+        None,
+        None, None, None, None, None, None,
+        None, None, None, None
+    )
     while True:
         if controls['stop'].is_set():
         frame_idx += 1
+        # default per-frame metrics for this iteration
         global_brightness = None
         global_motion = None
         roi_brightness = None
         roi_motion = None
+        # global illumination & motion (full frame)
         try:
             frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
         except Exception:
                 roi = crop_roi(face, roi_type, frame)
             if roi is not None and roi.size > 0:
+                # ROI-level brightness & motion
                 try:
                     roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
                 except Exception:
                         try:
                             raw = forward_bvp(model, clip_t)
                             if isinstance(raw, np.ndarray):
+                                raw = np.nan_to_num(
+                                    raw, nan=0.0, posinf=0.0, neginf=0.0
+                                ).astype(np.float32, copy=False)
                                 bvp_out = raw if raw.size > 0 else None
                             else:
                                 bvp_out = None
                             print(f"[infer] forward_bvp error: {e}")
                             bvp_out = None
+                        # Generate attention with Grad-CAM
                         try:
                             last_attention = extract_attention_map(model, clip_t, attention_viz)
                         except Exception as e:
                             last_attention = None
                     if bvp_out is None or bvp_out.size == 0:
+                        gbuf = np.nan_to_num(
+                            np.asarray(list(raw_g_means), dtype=np.float32), nan=0.0
+                        )
                         fb = _fallback_bvp_from_means(gbuf, fs=fps)
                         if isinstance(fb, np.ndarray) and fb.size > 0:
                             bvp_out = fb
                             bvp_stream = bvp_stream[-MAX_SIGNAL_LENGTH:]
                         if len(bvp_stream) >= int(5 * fps):
+                            seg = np.asarray(
+                                bvp_stream[-int(10 * fps):], dtype=np.float32
+                            )
                             _, last_bpm = postprocess_bvp(seg, fs=fps)
                             last_rmssd = compute_rmssd(seg, fs=fps)
                     last_infer = frame_idx
         else:
+            cv2.putText(
+                vis_frame,
+                "No face detected",
+                (20, 40),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.7,
+                (30, 200, 255),
+                2,
+            )
+        # Log per-frame metrics into global FRAME_METRICS
         try:
             time_s = frame_idx / float(fps) if fps > 0 else float(frame_idx)
         except Exception:
             "roi_motion": float(roi_motion) if roi_motion is not None else None,
         })
+        # Pretty strings for UI
+        gb_str = f"{global_brightness:.2f}" if global_brightness is not None else None
+        gm_str = f"{global_motion:.2f}"     if global_motion is not None else None
+        rb_str = f"{roi_brightness:.2f}"    if roi_brightness is not None else None
+        rm_str = f"{roi_motion:.2f}"        if roi_motion is not None else None
         if last_bpm > 0:
             color = (0, 255, 0) if 55 <= last_bpm <= 100 else (0, 165, 255)
             cv2.rectangle(vis_frame, (10, 10), (360, 65), (0, 0, 0), -1)
+            cv2.putText(
+                vis_frame,
+                f"HR: {last_bpm:.1f} BPM",
+                (20, 48),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.9,
+                color,
+                2,
+            )
         vis_attention = create_attention_overlay(frame, last_attention, attention_viz)
             elapsed = now - start_time
             status = f"Frame {frame_idx}/{total_frames} | Time {elapsed:.1f}s | HR {last_bpm:.1f} BPM"
+            # Periodic UI update: 14 outputs
             yield (
                 status,
                 f"{last_bpm:.1f}" if last_bpm > 0 else None,
                 str(signal_path) if signal_path.exists() else None,
                 str(raw_path) if raw_path.exists() else None,
                 str(post_path) if post_path.exists() else None,
+                None,     # CSV placeholder (filled at end)
+                gb_str,   # global brightness
+                gm_str,   # global motion
+                rb_str,   # ROI brightness
+                rm_str    # ROI motion
             )
             next_display = now + (1.0 / DISPLAY_FPS)
         except Exception:
             pass
+    # Save per-frame illumination & motion metrics
     frame_metrics_path = None
     if FRAME_METRICS:
         try:
             print(f"[metrics] Failed to save frame metrics CSV: {e}")
             frame_metrics_path = None
+    # Decide which CSV to expose: metrics preferred, else BVP
+    final_csv = frame_metrics_path or csv_path
     elapsed = time.time() - start_time
     final_status = f"Complete | {frame_idx} frames | {elapsed:.1f}s | HR {last_bpm:.1f} BPM"
+    # Final yield: 14 outputs
     yield (
         final_status,
         f"{last_bpm:.1f}" if last_bpm > 0 else None,
         str(signal_path) if signal_path.exists() else None,
         str(raw_path) if raw_path.exists() else None,
         str(post_path) if post_path.exists() else None,
+        str(final_csv) if final_csv else None,
+        None,  # final global brightness (leave None or reuse gb_str)
+        None,  # final global motion
+        None,  # final ROI brightness
+        None   # final ROI motion
     )
 def process_stream(
     input_source: str,
     video_path: Optional[str],
     if input_source == "Live Webcam":
         yield from process_live_webcam(model_name, fps_input, roi_type, control_id)
     else:
+        yield from process_video_file(
+            video_path, gt_file, model_name, fps_input,
+            max_seconds, roi_type, control_id
+        )
 def pause_processing(control_id: str) -> str:
     _, controls = ensure_controls(control_id)
     return "Stopped"
 def reset_ui():
+    # 14 values in total, matching all outputs from run_processing / process_stream
+    return (
+        "Ready",   # status_text
+        None,      # hr_output
+        None,      # gt_hr_output
+        None,      # rmssd_output
+        None,      # frame_output
+        None,      # attention_output
+        None,      # signal_output
+        None,      # raw_signal_output
+        None,      # post_signal_output
+        None,      # csv_output
+        None,      # global_brightness_output
+        None,      # global_motion_output
+        None,      # roi_brightness_output
+        None       # roi_motion_output
+    )
 def handle_folder_upload(files):
     if not files:
         with gr.Column():
             video_upload = gr.Video(label="Upload Video", sources=["upload"])
         with gr.Column():
+            gt_upload = gr.File(
+                label="Ground Truth (optional)",
+                file_types=[".txt", ".csv", ".json"]
+            )
     with gr.Row(visible=False) as folder_inputs:
         with gr.Column():
         )
     with gr.Row():
+        roi_dropdown = gr.Dropdown(
+            choices=["auto", "forehead", "cheeks", "face"],
+            value="auto",
+            label="ROI"
+        )
     control_state = gr.State(value="")
     placeholder_state = gr.State(value=None)
         hr_output = gr.Textbox(label="HR (BPM)", interactive=False)
         gt_hr_output = gr.Textbox(label="GT HR (BPM)", interactive=False)
         rmssd_output = gr.Textbox(label="HRV RMSSD (ms)", interactive=False)
+    # NEW: illumination & motion front-end outputs
+    with gr.Row():
+        global_brightness_output = gr.Textbox(
+            label="Global Brightness", interactive=False
+        )
+        global_motion_output = gr.Textbox(
+            label="Global Motion", interactive=False
+        )
+        roi_brightness_output = gr.Textbox(
+            label="ROI Brightness", interactive=False
+        )
+        roi_motion_output = gr.Textbox(
+            label="ROI Motion", interactive=False
+        )
     with gr.Row():
         with gr.Column():
     ).then(
         reset_ui,
         inputs=None,
+        outputs=[
+            status_text,
+            hr_output,
+            gt_hr_output,
+            rmssd_output,
+            frame_output,
+            attention_output,
+            signal_output,
+            raw_signal_output,
+            post_signal_output,
+            csv_output,
+            global_brightness_output,
+            global_motion_output,
+            roi_brightness_output,
+            roi_motion_output,
+        ]
     )
+    def run_processing(
+        input_source,
+        video_upload,
+        gt_upload,
+        folder_video,
+        folder_gt,
+        model_name,
+        fps,
+        max_sec,
+        roi,
+        ctrl_id
+    ):
+        """Wrapper that resolves paths and streams from process_stream."""
         if isinstance(model_name, int):
             model_name = str(model_name)
         if not model_name:
+            # must return 14 outputs to match UI wiring
+            yield (
+                "ERROR: No model selected",
+                None, None, None,        # HR, GT HR, RMSSD
+                None, None, None,        # frame, attention, signal
+                None, None,              # raw, post
+                None,                    # CSV
+                None, None, None, None   # brightness & motion fields
+            )
             return
         if input_source == "Video File":
         else:  # Live Webcam
             video_path, gt_file = None, None
+        # This yields 14-tuples from process_video_file / process_live_webcam
         yield from process_stream(
             input_source, video_path, gt_file,
             model_name, fps, max_sec, roi, ctrl_id
         )
     run_btn.click(
         fn=run_processing,
         inputs=[
             signal_output,
             raw_signal_output,
             post_signal_output,
+            csv_output,
+            global_brightness_output,
+            global_motion_output,
+            roi_brightness_output,
+            roi_motion_output,
         ]
     )