"""Estimate head pose according to the facial landmarks""" import cv2 import numpy as np class PoseEstimator: """Estimate head pose according to the facial landmarks""" def __init__(self, image_width, image_height): """Init a pose estimator. Args: image_width (int): input image width image_height (int): input image height """ self.size = (image_height, image_width) self.model_points_68 = self._get_full_model_points() # Camera internals self.focal_length = self.size[1] self.camera_center = (self.size[1] / 2, self.size[0] / 2) self.camera_matrix = np.array( [[self.focal_length, 0, self.camera_center[0]], [0, self.focal_length, self.camera_center[1]], [0, 0, 1]], dtype="double") # Assuming no lens distortion self.dist_coeefs = np.zeros((4, 1)) # Rotation vector and translation vector self.r_vec = np.array([[0.01891013], [0.08560084], [-3.14392813]]) self.t_vec = np.array( [[-14.97821226], [-10.62040383], [-2053.03596872]]) def _get_full_model_points(self, filename='assets/model.txt'): """Get all 68 3D model points from file""" raw_value = [] with open(filename) as file: for line in file: raw_value.append(line) model_points = np.array(raw_value, dtype=np.float32) model_points = np.reshape(model_points, (3, -1)).T # Transform the model into a front view. model_points[:, 2] *= -1 return model_points def solve(self, points): """Solve pose with all the 68 image points Args: points (np.ndarray): points on image. Returns: Tuple: (rotation_vector, translation_vector) as pose. """ if self.r_vec is None: (_, rotation_vector, translation_vector) = cv2.solvePnP( self.model_points_68, points, self.camera_matrix, self.dist_coeefs) self.r_vec = rotation_vector self.t_vec = translation_vector (_, rotation_vector, translation_vector) = cv2.solvePnP( self.model_points_68, points, self.camera_matrix, self.dist_coeefs, rvec=self.r_vec, tvec=self.t_vec, useExtrinsicGuess=True) return (rotation_vector, translation_vector) def visualize(self, image, pose, color=(255, 255, 255), line_width=2): """Draw a 3D box as annotation of pose""" rotation_vector, translation_vector = pose point_3d = [] rear_size = 75 rear_depth = 0 point_3d.append((-rear_size, -rear_size, rear_depth)) point_3d.append((-rear_size, rear_size, rear_depth)) point_3d.append((rear_size, rear_size, rear_depth)) point_3d.append((rear_size, -rear_size, rear_depth)) point_3d.append((-rear_size, -rear_size, rear_depth)) front_size = 100 front_depth = 100 point_3d.append((-front_size, -front_size, front_depth)) point_3d.append((-front_size, front_size, front_depth)) point_3d.append((front_size, front_size, front_depth)) point_3d.append((front_size, -front_size, front_depth)) point_3d.append((-front_size, -front_size, front_depth)) point_3d = np.array(point_3d, dtype=np.float32).reshape(-1, 3) # Map to 2d image points (point_2d, _) = cv2.projectPoints(point_3d, rotation_vector, translation_vector, self.camera_matrix, self.dist_coeefs) point_2d = np.int32(point_2d.reshape(-1, 2)) # Draw all the lines cv2.polylines(image, [point_2d], True, color, line_width, cv2.LINE_AA) cv2.line(image, tuple(point_2d[1]), tuple( point_2d[6]), color, line_width, cv2.LINE_AA) cv2.line(image, tuple(point_2d[2]), tuple( point_2d[7]), color, line_width, cv2.LINE_AA) cv2.line(image, tuple(point_2d[3]), tuple( point_2d[8]), color, line_width, cv2.LINE_AA) def draw_axes(self, img, pose): R, t = pose img = cv2.drawFrameAxes(img, self.camera_matrix, self.dist_coeefs, R, t, 30) def show_3d_model(self): from matplotlib import pyplot from mpl_toolkits.mplot3d import Axes3D fig = pyplot.figure() ax = Axes3D(fig) x = self.model_points_68[:, 0] y = self.model_points_68[:, 1] z = self.model_points_68[:, 2] ax.scatter(x, y, z) ax.axis('square') pyplot.xlabel('x') pyplot.ylabel('y') pyplot.show() ### # yhm : from chat gpt to detect distraction ### def rotation_matrix_to_angles(self, rotation_vector): """Convert rotation vector to pitch, yaw, and roll angles.""" rotation_matrix, _ = cv2.Rodrigues(rotation_vector) sy = np.sqrt(rotation_matrix[0, 0]**2 + rotation_matrix[1, 0]**2) singular = sy < 1e-6 if not singular: pitch = np.arctan2(rotation_matrix[2, 1], rotation_matrix[2, 2]) yaw = np.arctan2(-rotation_matrix[2, 0], sy) roll = np.arctan2(rotation_matrix[1, 0], rotation_matrix[0, 0]) else: pitch = np.arctan2(-rotation_matrix[1, 2], rotation_matrix[1, 1]) yaw = np.arctan2(-rotation_matrix[2, 0], sy) roll = 0 return np.degrees(pitch), np.degrees(yaw), np.degrees(roll) def is_distracted(self, rotation_vector): """Determine if the user is distracted based on head pose angles.""" pitch, yaw, roll = self.rotation_matrix_to_angles(rotation_vector) # Define thresholds (adjust based on further testing) pitch_threshold = (-15, 10) # Allow some variability in pitch yaw_threshold = (-20, 16) # Reasonable range for yaw roll_threshold = (-180, 180) # Centered around -180 degree roll # print("pitch, yaw, roll", pitch, yaw, roll) # Check if head is roughly considered 'facing forward' focus_pitch = pitch_threshold[0] < pitch < pitch_threshold[1] focus_yaw = yaw_threshold[0] < yaw < yaw_threshold[1] focus_roll = roll_threshold[0] < roll < roll_threshold[1] return not (focus_pitch and focus_yaw and focus_roll) # """Determine if the user is distracted based on head pose angles.""" # pitch, yaw, roll = self.rotation_matrix_to_angles(rotation_vector) # print("pitch, yaw, roll", pitch, yaw, roll) # # Define thresholds (you may need to adjust these based on testing) # pitch_threshold = 15 # Up/Down threshold # yaw_threshold = 20 # Left/Right threshold # roll_threshold = 10 # Tilt threshold # # Check if head is facing roughly forward # if abs(pitch) < pitch_threshold and abs(yaw) < yaw_threshold and abs(roll) < roll_threshold: # return False # Focused # else: # return True # Distracted def detect_distraction(self, points): """Solve pose and detect distraction status based on pose.""" rotation_vector, translation_vector = self.solve(points) distraction_status = self.is_distracted(rotation_vector) return distraction_status, (rotation_vector, translation_vector) # second part # def rotation_matrix_to_angles(self, rotation_vector): # """Convert rotation vector to pitch, yaw, and roll angles.""" # # Convert the rotation vector into a rotation matrix # rotation_matrix, _ = cv2.Rodrigues(rotation_vector) # # Ensure no division by zero # sy = np.sqrt(rotation_matrix[0, 0]**2 + rotation_matrix[1, 0]**2) # singular = sy < 1e-6 # if not singular: # pitch = np.arctan2(rotation_matrix[2, 1], rotation_matrix[2, 2]) # yaw = np.arctan2(-rotation_matrix[2, 0], sy) # roll = np.arctan2(rotation_matrix[1, 0], rotation_matrix[0, 0]) # else: # pitch = np.arctan2(-rotation_matrix[1, 2], rotation_matrix[1, 1]) # yaw = np.arctan2(-rotation_matrix[2, 0], sy) # roll = 0 # # Return converted angles in degrees # return np.degrees(pitch), np.degrees(yaw), np.degrees(roll) # def is_distracted(self, rotation_vector): # """Determine if the user is distracted based on head pose angles.""" # pitch, yaw, roll = self.rotation_matrix_to_angles(rotation_vector) # # Test different thresholds based on specific requirements # pitch_threshold = 15 # Up/Down # yaw_threshold = 20 # Left/Right # roll_threshold = 10 # Tilt # # Determine distraction status # return not (abs(pitch) < pitch_threshold and abs(yaw) < yaw_threshold and abs(roll) < roll_threshold) # def detect_distraction(self, points): # """Solve pose and detect distraction status based on pose.""" # rotation_vector, translation_vector = self.solve(points) # distraction_status = self.is_distracted(rotation_vector) # return distraction_status, (rotation_vector, translation_vector)