extraction

Browse files

Files changed (2) hide show

lips_coords_extractor.py +203 -0
shape_predictor_68_face_landmarks_GTX.dat +3 -0

lips_coords_extractor.py ADDED Viewed

	@@ -0,0 +1,203 @@

+import cv2
+import dlib
+import json
+import glob
+import os
+from multiprocessing import Pool
+LIP_COORDINATES_DIRECTORY = "lip_coordinates"
+ERROR_DIRECTORY = "error_videos"
+# path to the original GRID dataset whose videos are converted to frames
+GRID_IMAGES_DIRECTORY = "lip/GRID_imgs"
+train_unseen_list = "data/unseen_val.txt"
+train_overlap_list = "data/overlap_train.txt"
+test_unseen_list = "data/unseen_val.txt"
+test_overlap_list = "data/overlap_val.txt"
+def load_data_list(data_path, dictionary):
+    with open(data_path, "r") as f:
+        for line in f.readlines():
+            line = line.strip()
+            speaker = line.split("/")[-4]
+            vid = line.split("/")[-1]
+            dictionary[f"{speaker}/{vid}"] = 1
+    return dictionary
+def extract_lip_coordinates(detector, predictor, img_path):
+    # used to preprocess the original image frames in the GRID dataset to extract the lip coordinates
+    image = cv2.imread(img_path)
+    image = cv2.resize(image, (600, 500))
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    rects = detector(gray)
+    assert len(rects) == 1
+    for rect in rects:
+        # extract the coordinates of the bounding box
+        x1 = rect.left()
+        y1 = rect.top()
+        x2 = rect.right()
+        y2 = rect.bottom()
+        # apply the shape predictor to the face ROI
+        shape = predictor(gray, rect)
+        x = []
+        y = []
+        for n in range(48, 68):
+            x.append(shape.part(n).x)
+            y.append(shape.part(n).y)
+    return [x, y]
+def log_error_video(video_path):
+    print("Error: ", video_path)
+    with open(ERROR_DIRECTORY + "/error_videos.txt", "a") as f:
+        f.write(video_path + "\n")
+data_dict = {}
+data_dict = load_data_list(train_unseen_list, data_dict)
+data_dict = load_data_list(train_overlap_list, data_dict)
+data_dict = load_data_list(test_unseen_list, data_dict)
+data_dict = load_data_list(test_overlap_list, data_dict)
+speakers = glob.glob(GRID_IMAGES_DIRECTORY + "/*")
+print(speakers[0])
+def generate_lip_coordinates(speakers):
+    file_path_sep = "\\"
+    detector = dlib.get_frontal_face_detector()
+    predictor = dlib.shape_predictor(
+        "lip_coordinate_extraction/shape_predictor_68_face_landmarks_GTX.dat"
+    )
+    for speaker in speakers:
+        print(speaker)
+        videos = glob.glob(speaker + "/*")
+        for video in videos:
+            print(video)
+            frames = glob.glob(video + "/*.jpg")
+            if len(frames) < 50:  # filter out bad videos
+                continue
+            vid = {}
+            try:
+                frames = sorted(
+                    frames,
+                    key=lambda x: int(x.split(file_path_sep)[-1].split(".")[0]),
+                )
+                for frame in frames:
+                    retry = 3
+                    while retry > 0:
+                        try:
+                            coords = extract_lip_coordinates(detector, predictor, frame)
+                            break
+                        except Exception as e:
+                            retry -= 1
+                            print("Error: ", video)
+                            print(e)
+                            print("retrying...")
+                    vid[frame.split(file_path_sep)[-1].split(".")[0]] = coords
+                vid_path = video.split(file_path_sep)
+                save_path = (
+                    LIP_COORDINATES_DIRECTORY
+                    + "/"
+                    + vid_path[-2]
+                    + "/"
+                    + vid_path[-1]
+                    + ".json"
+                )
+                if not os.path.exists(LIP_COORDINATES_DIRECTORY + "/" + vid_path[-2]):
+                    os.makedirs(LIP_COORDINATES_DIRECTORY + "/" + vid_path[-2])
+                with open(
+                    save_path,
+                    "w",
+                ) as f:
+                    json.dump(vid, f)
+            except Exception as e:
+                print(e)
+                log_error_video(video)
+def generate_lip_coordinates(speakers):
+    file_path_sep = "\\"
+    detector = dlib.get_frontal_face_detector()
+    predictor = dlib.shape_predictor(
+        "lip_coordinate_extraction/shape_predictor_68_face_landmarks_GTX.dat"
+    )
+    for speaker in speakers:
+        print(speaker)
+        videos = glob.glob(speaker + "/*")
+        for video in videos:
+            # if (
+            #     video.split(file_path_sep)[-2] + "/" + video.split(file_path_sep)[-1]
+            #     not in data_dict
+            # ):
+            #     continue
+            print(video)
+            frames = glob.glob(video + "/*.jpg")
+            if len(frames) < 50:  # filter out bad videos
+                continue
+            vid = {}
+            try:
+                frames = sorted(
+                    frames,
+                    key=lambda x: int(x.split(file_path_sep)[-1].split(".")[0]),
+                )
+                for frame in frames:
+                    retry = 3
+                    while retry > 0:
+                        try:
+                            coords = extract_lip_coordinates(detector, predictor, frame)
+                            break
+                        except Exception as e:
+                            retry -= 1
+                            print("Error: ", video)
+                            print(e)
+                            print("retrying...")
+                    vid[frame.split(file_path_sep)[-1].split(".")[0]] = coords
+                vid_path = video.split(file_path_sep)
+                save_path = (
+                    LIP_COORDINATES_DIRECTORY
+                    + "/"
+                    + vid_path[-2]
+                    + "/"
+                    + vid_path[-1]
+                    + ".json"
+                )
+                if not os.path.exists(LIP_COORDINATES_DIRECTORY + "/" + vid_path[-2]):
+                    os.makedirs(LIP_COORDINATES_DIRECTORY + "/" + vid_path[-2])
+                with open(
+                    save_path,
+                    "w",
+                ) as f:
+                    json.dump(vid, f)
+            except Exception as e:
+                print(e)
+                log_error_video(video)
+num_processes = 8
+speaker_groups = []
+speaker_interval = len(speakers) // num_processes
+for i in range(num_processes):
+    if i == 4:
+        speaker_groups.append(speakers[i * speaker_interval :])
+    else:
+        speaker_groups.append(
+            speakers[i * speaker_interval : (i + 1) * speaker_interval]
+        )
+if __name__ == "__main__":
+    with Pool(num_processes) as p:
+        p.map(generate_lip_coordinates, speaker_groups)

shape_predictor_68_face_landmarks_GTX.dat ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:249a69a1d5f2d7c714a92934d35367d46eb52dc308d46717e82d49e8386b3b80
+size 66435981