feat(): adding mouth masking

2024-10-24 00:24:42 +05:30 · 2024-10-24 00:24:42 +05:30 · d616c513c9
parent 6f6f93a4ad
commit d616c513c9
3 changed files with 208 additions and 33 deletions
--- a/modules/globals.py
+++ b/modules/globals.py
@ -36,3 +36,6 @@ fp_ui: Dict[str, bool] = {"face_enhancer": False}
 camera_input_combobox = None
 webcam_preview_running = False
 show_fps = False
+mouth_mask = False
+show_mouth_mask_box = False
+mouth_mask_switch_preview = None
--- a/modules/processors/frame/face_swapper.py
+++ b/modules/processors/frame/face_swapper.py
@ -1,5 +1,6 @@
-from typing import Any, List
+from typing import Any, List, Tuple
 import cv2
+import numpy as np
 import insightface
 import threading

@ -8,29 +9,48 @@ import modules.processors.frame.core
 from modules.core import update_status
 from modules.face_analyser import get_one_face, get_many_faces, default_source_face
 from modules.typing import Face, Frame
-from modules.utilities import conditional_download, resolve_relative_path, is_image, is_video
+from modules.utilities import (
+    conditional_download,
+    resolve_relative_path,
+    is_image,
+    is_video,
+)
 from modules.cluster_analysis import find_closest_centroid

 FACE_SWAPPER = None
 THREAD_LOCK = threading.Lock()
-NAME = 'DLC.FACE-SWAPPER'
+NAME = "DLC.FACE-SWAPPER"
+
+# Add mouth landmarks indices for masking
+MOUTH_LANDMARKS = list(
+    range(46, 68)
+)  # Common indices for mouth landmarks in facial detection


 def pre_check() -> bool:
-    download_directory_path = resolve_relative_path('../models')
-    conditional_download(download_directory_path, ['https://huggingface.co/hacksider/deep-live-cam/blob/main/inswapper_128_fp16.onnx'])
+    download_directory_path = resolve_relative_path("../models")
+    conditional_download(
+        download_directory_path,
+        [
+            "https://huggingface.co/hacksider/deep-live-cam/blob/main/inswapper_128_fp16.onnx"
+        ],
+    )
    return True


 def pre_start() -> bool:
    if not modules.globals.map_faces and not is_image(modules.globals.source_path):
-        update_status('Select an image for source path.', NAME)
+        update_status("Select an image for source path.", NAME)
        return False
-    elif not modules.globals.map_faces and not get_one_face(cv2.imread(modules.globals.source_path)):
-        update_status('No face in source path detected.', NAME)
+    elif not modules.globals.map_faces and not get_one_face(
+        cv2.imread(modules.globals.source_path)
+    ):
+        update_status("No face in source path detected.", NAME)
        return False
-    if not is_image(modules.globals.target_path) and not is_video(modules.globals.target_path):
-        update_status('Select an image or video for target path.', NAME)
+    if not is_image(modules.globals.target_path) and not is_video(
+        modules.globals.target_path
+    ):
+        update_status("Select an image or video for target path.", NAME)
        return False
    return True

@ -40,20 +60,60 @@ def get_face_swapper() -> Any:

    with THREAD_LOCK:
        if FACE_SWAPPER is None:
-            model_path = resolve_relative_path('../models/inswapper_128_fp16.onnx')
-            FACE_SWAPPER = insightface.model_zoo.get_model(model_path, providers=modules.globals.execution_providers)
+            model_path = resolve_relative_path("../models/inswapper_128_fp16.onnx")
+            FACE_SWAPPER = insightface.model_zoo.get_model(
+                model_path, providers=modules.globals.execution_providers
+            )
    return FACE_SWAPPER


+def create_mouth_mask(face: Face, frame_shape: Tuple[int, int]) -> np.ndarray:
+    """Create a mask for the mouth region"""
+    mask = np.zeros(frame_shape[:2], dtype=np.uint8)
+
+    # Get mouth landmarks from the face
+    landmarks = face.kps
+    mouth_points = landmarks[MOUTH_LANDMARKS].astype(np.int32)
+
+    # Create a polygon around the mouth region
+    cv2.fillPoly(mask, [mouth_points], 255)
+
+    # Dilate the mask slightly to ensure smooth blending
+    kernel = np.ones((5, 5), np.uint8)
+    mask = cv2.dilate(mask, kernel, iterations=2)
+
+    # Blur the mask edges
+    mask = cv2.GaussianBlur(mask, (15, 15), 10)
+
+    return mask
+
+
+def blend_with_mask(
+    swapped_frame: Frame, original_frame: Frame, mask: np.ndarray
+) -> Frame:
+    """Blend the swapped face with the original frame using the mouth mask"""
+    mask_3channel = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR) / 255.0
+
+    # Blend the images based on the mask
+    blended = swapped_frame * (1 - mask_3channel) + original_frame * mask_3channel
+    return blended.astype(np.uint8)
+
+
 def swap_face(source_face: Face, target_face: Face, temp_frame: Frame) -> Frame:
-    return get_face_swapper().get(temp_frame, target_face, source_face, paste_back=True)
+    # Store the original frame for mouth preservation
+    original_frame = temp_frame.copy()
+
+    # Perform the face swap
+    swapped_frame = get_face_swapper().get(
+        temp_frame, target_face, source_face, paste_back=True
+    )


 def process_frame(source_face: Face, temp_frame: Frame) -> Frame:
    # Ensure the frame is in RGB format if color correction is enabled
    if modules.globals.color_correction:
        temp_frame = cv2.cvtColor(temp_frame, cv2.COLOR_BGR2RGB)
-        
+
    if modules.globals.many_faces:
        many_faces = get_many_faces(temp_frame)
        if many_faces:
@ -71,34 +131,42 @@ def process_frame_v2(temp_frame: Frame, temp_frame_path: str = "") -> Frame:
        if modules.globals.many_faces:
            source_face = default_source_face()
            for map in modules.globals.souce_target_map:
-                target_face = map['target']['face']
+                target_face = map["target"]["face"]
                temp_frame = swap_face(source_face, target_face, temp_frame)

        elif not modules.globals.many_faces:
            for map in modules.globals.souce_target_map:
                if "source" in map:
-                    source_face = map['source']['face']
-                    target_face = map['target']['face']               
+                    source_face = map["source"]["face"]
+                    target_face = map["target"]["face"]
                    temp_frame = swap_face(source_face, target_face, temp_frame)

    elif is_video(modules.globals.target_path):
        if modules.globals.many_faces:
            source_face = default_source_face()
            for map in modules.globals.souce_target_map:
-                target_frame = [f for f in map['target_faces_in_frame'] if f['location'] == temp_frame_path]
+                target_frame = [
+                    f
+                    for f in map["target_faces_in_frame"]
+                    if f["location"] == temp_frame_path
+                ]

                for frame in target_frame:
-                    for target_face in frame['faces']:
+                    for target_face in frame["faces"]:
                        temp_frame = swap_face(source_face, target_face, temp_frame)

        elif not modules.globals.many_faces:
            for map in modules.globals.souce_target_map:
                if "source" in map:
-                    target_frame = [f for f in map['target_faces_in_frame'] if f['location'] == temp_frame_path]
-                    source_face = map['source']['face']
+                    target_frame = [
+                        f
+                        for f in map["target_faces_in_frame"]
+                        if f["location"] == temp_frame_path
+                    ]
+                    source_face = map["source"]["face"]

                    for frame in target_frame:
-                        for target_face in frame['faces']:
+                        for target_face in frame["faces"]:
                            temp_frame = swap_face(source_face, target_face, temp_frame)
    else:
        detected_faces = get_many_faces(temp_frame)
@ -110,25 +178,46 @@ def process_frame_v2(temp_frame: Frame, temp_frame_path: str = "") -> Frame:

        elif not modules.globals.many_faces:
            if detected_faces:
-                if len(detected_faces) <= len(modules.globals.simple_map['target_embeddings']):
+                if len(detected_faces) <= len(
+                    modules.globals.simple_map["target_embeddings"]
+                ):
                    for detected_face in detected_faces:
-                        closest_centroid_index, _ = find_closest_centroid(modules.globals.simple_map['target_embeddings'], detected_face.normed_embedding)
+                        closest_centroid_index, _ = find_closest_centroid(
+                            modules.globals.simple_map["target_embeddings"],
+                            detected_face.normed_embedding,
+                        )

-                        temp_frame = swap_face(modules.globals.simple_map['source_faces'][closest_centroid_index], detected_face, temp_frame)
+                        temp_frame = swap_face(
+                            modules.globals.simple_map["source_faces"][
+                                closest_centroid_index
+                            ],
+                            detected_face,
+                            temp_frame,
+                        )
                else:
                    detected_faces_centroids = []
                    for face in detected_faces:
-                            detected_faces_centroids.append(face.normed_embedding)
+                        detected_faces_centroids.append(face.normed_embedding)
                    i = 0
-                    for target_embedding in modules.globals.simple_map['target_embeddings']:
-                        closest_centroid_index, _ = find_closest_centroid(detected_faces_centroids, target_embedding)
+                    for target_embedding in modules.globals.simple_map[
+                        "target_embeddings"
+                    ]:
+                        closest_centroid_index, _ = find_closest_centroid(
+                            detected_faces_centroids, target_embedding
+                        )

-                        temp_frame = swap_face(modules.globals.simple_map['source_faces'][i], detected_faces[closest_centroid_index], temp_frame)
+                        temp_frame = swap_face(
+                            modules.globals.simple_map["source_faces"][i],
+                            detected_faces[closest_centroid_index],
+                            temp_frame,
+                        )
                        i += 1
    return temp_frame


-def process_frames(source_path: str, temp_frame_paths: List[str], progress: Any = None) -> None:
+def process_frames(
+    source_path: str, temp_frame_paths: List[str], progress: Any = None
+) -> None:
    if not modules.globals.map_faces:
        source_face = get_one_face(cv2.imread(source_path))
        for temp_frame_path in temp_frame_paths:
@ -162,7 +251,9 @@ def process_image(source_path: str, target_path: str, output_path: str) -> None:
        cv2.imwrite(output_path, result)
    else:
        if modules.globals.many_faces:
-            update_status('Many faces enabled. Using first source image. Progressing...', NAME)
+            update_status(
+                "Many faces enabled. Using first source image. Progressing...", NAME
+            )
        target_frame = cv2.imread(output_path)
        result = process_frame_v2(target_frame)
        cv2.imwrite(output_path, result)
@ -170,5 +261,9 @@ def process_image(source_path: str, target_path: str, output_path: str) -> None:

 def process_video(source_path: str, temp_frame_paths: List[str]) -> None:
    if modules.globals.map_faces and modules.globals.many_faces:
-        update_status('Many faces enabled. Using first source image. Progressing...', NAME)
-    modules.processors.frame.core.process_video(source_path, temp_frame_paths, process_frames)
+        update_status(
+            "Many faces enabled. Using first source image. Progressing...", NAME
+        )
+    modules.processors.frame.core.process_video(
+        source_path, temp_frame_paths, process_frames
+    )
--- a/modules/ui.py
+++ b/modules/ui.py
@ -59,6 +59,8 @@ RECENT_DIRECTORY_SOURCE = None
 RECENT_DIRECTORY_TARGET = None
 RECENT_DIRECTORY_OUTPUT = None

+BLUR_SIZE = 1
+
 preview_label = None
 preview_slider = None
 source_label = None
@ -317,6 +319,47 @@ def create_root(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.C
    live_button.place(relx=0.65, rely=0.86, relwidth=0.2, relheight=0.05)
    # --- End Camera Selection ---

+    show_mouth_mask_var = ctk.BooleanVar(value=modules.globals.show_mouth_mask_box)
+    show_mouth_mask_switch = ctk.CTkSwitch(
+        root,
+        text="Show Mouth Mask Box",
+        variable=show_mouth_mask_var,
+        cursor="hand2",
+        command=lambda: setattr(
+            modules.globals, "show_mouth_mask_box", show_mouth_mask_var.get()
+        ),
+        progress_color="#3a7ebf",
+        font=("Roboto", 14, "bold"),
+    )
+    show_mouth_mask_switch.pack(pady=5, anchor="w")
+
+    # Create frame for mouth mask
+    mouth_mask_frame = ctk.CTkFrame(
+        root, fg_color="#2a2d2e", border_width=1, border_color="grey"
+    )
+    mouth_mask_frame.pack(pady=5, anchor="w", fill="x")
+
+    # Create a shared BooleanVar in modules.globals
+    if not hasattr(modules.globals, "mouth_mask_var"):
+        modules.globals.mouth_mask_var = ctk.BooleanVar(
+            value=modules.globals.mouth_mask
+        )
+
+    # Mouth mask switch
+    mouth_mask_switch = ctk.CTkSwitch(
+        mouth_mask_frame,
+        text="Mouth Mask",
+        variable=modules.globals.mouth_mask_var,
+        cursor="hand2",
+        command=toggle_mouthmask,
+        progress_color="#3a7ebf",
+        font=("Roboto", 14, "bold"),
+    )
+    mouth_mask_switch.pack(pady=5, anchor="w")
+
+    # Store the switch in modules.globals for access from create_preview
+    modules.globals.mouth_mask_switch_root = mouth_mask_switch
+
    status_label = ctk.CTkLabel(root, text=None, justify="center")
    status_label.place(relx=0.1, rely=0.9, relwidth=0.8)

@ -478,6 +521,29 @@ def update_popup_source(
        return map


+def toggle_mouthmask():
+    """
+    Toggle the mouth mask state and synchronize all UI switches.
+    Updates both the global state and any existing switch controls.
+    """
+    is_mouthmask = modules.globals.mouth_mask_var.get()
+    modules.globals.mouth_mask = is_mouthmask
+
+    # Update root window switch if it exists
+    if hasattr(modules.globals, "mouth_mask_switch_root"):
+        if is_mouthmask:
+            modules.globals.mouth_mask_switch_root.select()
+        else:
+            modules.globals.mouth_mask_switch_root.deselect()
+
+    # Update preview window switch if it exists
+    if hasattr(modules.globals, "mouth_mask_switch_preview"):
+        if is_mouthmask:
+            modules.globals.mouth_mask_switch_preview.select()
+        else:
+            modules.globals.mouth_mask_switch_preview.deselect()
+
+
 def create_preview(parent: ctk.CTkToplevel) -> ctk.CTkToplevel:
    global preview_label, preview_slider

@ -494,6 +560,17 @@ def create_preview(parent: ctk.CTkToplevel) -> ctk.CTkToplevel:
    preview_slider = ctk.CTkSlider(
        preview, from_=0, to=0, command=lambda frame_value: update_preview(frame_value)
    )
+    mouth_mask_switch_preview = ctk.CTkSwitch(
+        preview,
+        text="Mouth Mask",
+        variable=modules.globals.mouth_mask_var,
+        cursor="hand2",
+        command=toggle_mouthmask,
+    )
+    mouth_mask_switch_preview.pack(side="left", padx=5, pady=5)
+
+    # Store the switch in modules.globals for access from create_root
+    modules.globals.mouth_mask_switch_preview = mouth_mask_switch_preview

    return preview