Added two source face feature

2024-08-18 11:06:26 -05:00 · 2024-08-18 11:06:26 -05:00 · 2c9141a00e
parent e4b494174d
commit 2c9141a00e
10 changed files with 186 additions and 26 deletions
--- a/DemoTwoFace.gif
+++ b/DemoTwoFace.gif
--- a/DemoTwoFace.png
+++ b/DemoTwoFace.png
--- a/FaceControls.png
+++ b/FaceControls.png
--- a/README.md
+++ b/README.md
@ -1,5 +1,17 @@
+Deep-Live-Cam is compatible with webcam, video or an image
+
+One face replaced with live webcam
+
 ![demo-gif](demo.gif)

+Two faces replaced on webcam
+
+![DemoTwoFace.gif](DemoTwoFace.gif)
+
+Also supports processing of an image or video file with one or two faces in source image
+
+![DemoTwoFace.png](DemoTwoFace.png)
+

 ## Disclaimer
 This software is meant to be a productive contribution to the rapidly growing AI-generated media industry. It will help artists with tasks such as animating a custom character or using the character as a model for clothing etc.
@ -137,7 +149,7 @@ Choose a face (image with desired face) and the target image/video (image/video

 ## For the webcam mode
 Just follow the clicks on the screenshot
-1. Select a face
+1. Select a face. You can select an image that has one face or two faces. Left face will be used as face one and right face will be used as face two.
 2. Click live
 3. Wait for a few seconds (it takes a longer time, usually 10 to 30 seconds before the preview shows up)

@ -146,6 +158,20 @@ Just follow the clicks on the screenshot
 Just use your favorite screencapture to stream like OBS
 > Note: In case you want to change your face, just select another picture, the preview mode will then restart (so just wait a bit).

+When you select an image with two faces then both faces will be replaced on webcam. Person on left will use left face and person on right will use right face from your image.
+
+![DemoTwoFace.gif](DemoTwoFace.gif)
+
+## One/Two face controls for webcam mode, video or image
+You can control how a face/s is replaced on target face
+
+![FaceControls.png](FaceControls.png)
+
+1. Show both faces - When you select a source image with two faces you will need to enable this option to replace both target faces on webcam, video or image. By default left face is always used. If your video has one target face or two target faces and one target face goes out of frame than left face is always used on target face.
+
+2. Flip left/right faces - Turning on this option will flip source faces on target faces to right->left. You can flip both source faces in your webcam, video or image when two target faces are present. By default left face will replace left target face and right face will replace right target face. 
+
+3. Detect face from right - Turning on this option will replace target face detected from right of frame when two target faces are present. By default the target face/s is detected from left of webcam, video or image. 

 Additional command line arguments are given below. To learn out what they do, check [this guide](https://github.com/s0md3v/roop/wiki/Advanced-Options).

--- a/instruction.png
+++ b/instruction.png
--- a/modules/core.py
+++ b/modules/core.py
@ -45,6 +45,9 @@ def parse_args() -> None:
    program.add_argument('--execution-provider', help='execution provider', dest='execution_provider', default=['cpu'], choices=suggest_execution_providers(), nargs='+')
    program.add_argument('--execution-threads', help='number of execution threads', dest='execution_threads', type=int, default=suggest_execution_threads())
    program.add_argument('-v', '--version', action='version', version=f'{modules.metadata.name} {modules.metadata.version}')
+    program.add_argument('--both-faces', help='use two faces in source image', dest='both_faces', action='store_true', default=False)
+    program.add_argument('--flip-faces', help='flip two faces in source image from right to left', dest='flip_faces', action='store_true', default=False)
+    program.add_argument('--detect-face-right', help='detect target face from right of frame', dest='detect_face_right', action='store_true', default=False)

    # register deprecated args
    program.add_argument('-f', '--face', help=argparse.SUPPRESS, dest='source_path_deprecated')
@ -68,6 +71,10 @@ def parse_args() -> None:
    modules.globals.max_memory = args.max_memory
    modules.globals.execution_providers = decode_execution_providers(args.execution_provider)
    modules.globals.execution_threads = args.execution_threads
+    modules.globals.both_faces = args.both_faces
+    modules.globals.flip_faces = args.flip_faces
+    modules.globals.detect_face_right = args.detect_face_right
+

    #for ENHANCER tumbler:
    if 'face_enhancer' in args.frame_processor:
--- a/modules/face_analyser.py
+++ b/modules/face_analyser.py
@ -29,3 +29,17 @@ def get_many_faces(frame: Frame) -> Any:
        return get_face_analyser().get(frame)
    except IndexError:
        return None
+
+def get_one_face_left(frame: Frame) -> Any:
+    face = get_face_analyser().get(frame)
+    try:
+        return min(face, key=lambda x: x.bbox[0])
+    except ValueError:
+        return None
+    
+def get_one_face_right(frame: Frame) -> Any:
+    face = get_face_analyser().get(frame)
+    try:
+        return max(face, key=lambda x: x.bbox[0])
+    except ValueError:
+        return None
--- a/modules/globals.py
+++ b/modules/globals.py
@ -27,4 +27,7 @@ log_level = 'error'
 fp_ui: Dict[str, bool] = {}
 nsfw = None
 camera_input_combobox = None
-webcam_preview_running = False
+webcam_preview_running = False
+both_faces = None
+flip_faces = None
+detect_face_right = None
--- a/modules/processors/frame/face_swapper.py
+++ b/modules/processors/frame/face_swapper.py
@ -6,7 +6,7 @@ import threading
 import modules.globals
 import modules.processors.frame.core
 from modules.core import update_status
-from modules.face_analyser import get_one_face, get_many_faces
+from modules.face_analyser import get_one_face, get_many_faces, get_one_face_left, get_one_face_right
 from modules.typing import Face, Frame
 from modules.utilities import conditional_download, resolve_relative_path, is_image, is_video

@ -48,25 +48,80 @@ def swap_face(source_face: Face, target_face: Face, temp_frame: Frame) -> Frame:
    return get_face_swapper().get(temp_frame, target_face, source_face, paste_back=True)


-def process_frame(source_face: Face, temp_frame: Frame) -> Frame:
+def process_frame(source_face: List[Face], temp_frame: Frame) -> Frame:
    if modules.globals.many_faces:
        many_faces = get_many_faces(temp_frame)
        if many_faces:
            for target_face in many_faces:
-                temp_frame = swap_face(source_face, target_face, temp_frame)
+                temp_frame = swap_face(source_face[0], target_face, temp_frame)
    else:
-        target_face = get_one_face(temp_frame)
-        if target_face:
-            temp_frame = swap_face(source_face, target_face, temp_frame)
+        target_faces = get_two_faces(temp_frame)
+        # Check if more then one target face is found
+        if len(target_faces) >= 2:
+            # Swap both target faces when with source image. Works best when source image 
+            # has two faces. If source image has one face then one face is used for both 
+            # target faces 
+            if modules.globals.both_faces:
+                # Flip source faces left to right
+                if modules.globals.flip_faces:
+                    # Swap right source face with left target face
+                    temp_frame = swap_face(source_face[1], target_faces[0], temp_frame)
+                    # Swap left source face with right target face
+                    temp_frame = swap_face(source_face[0], target_faces[1], temp_frame)
+                else:
+                    # Swap left source face with left target face
+                    temp_frame = swap_face(source_face[0], target_faces[0], temp_frame)
+                    # Swap right source face with right target face
+                    temp_frame = swap_face(source_face[1], target_faces[1], temp_frame)
+            
+            # When we have two target faces we can replace left or right face
+            # Swap one face with left target face or right target face
+            if modules.globals.detect_face_right:
+                # Swap left source face with right target face
+                if modules.globals.flip_faces:
+                    # Swap right source face with right target face
+                    temp_frame = swap_face(source_face[1], target_faces[1], temp_frame)
+                else:
+                    # Swap left source face with right target face
+                    temp_frame = swap_face(source_face[0], target_faces[1], temp_frame)
+
+            else:
+                # Swap left source face with left target face
+                if modules.globals.flip_faces:
+                    # Swap left source face with left target face
+                    temp_frame = swap_face(source_face[1], target_faces[0], temp_frame)
+                else:
+                    # Swap right source face with left target face
+                    temp_frame = swap_face(source_face[0], target_faces[0], temp_frame)
+
+        elif len(target_faces) == 1:
+            # If only one target face is found, swap with the first source face
+            # Swap left source face with left target face
+            if modules.globals.flip_faces:
+                # Swap left source face with left target face
+                temp_frame = swap_face(source_face[1], target_faces[0], temp_frame)
+            else:
+                # Swap right source face with left target face
+                temp_frame = swap_face(source_face[0], target_faces[0], temp_frame)
+
    return temp_frame


 def process_frames(source_path: str, temp_frame_paths: List[str], progress: Any = None) -> None:
-    source_face = get_one_face(cv2.imread(source_path))
+    
+    source_image_left = None  # Initialize variable for the selected face image
+    source_image_right = None  # Initialize variable for the selected face image
+
+    if source_image_left is None and source_path:
+        source_image_left = get_one_face_left(cv2.imread(source_path))
+    if source_image_right is None and source_path:
+        source_image_right = get_one_face_right(cv2.imread(source_path))
+
+
    for temp_frame_path in temp_frame_paths:
        temp_frame = cv2.imread(temp_frame_path)
        try:
-            result = process_frame(source_face, temp_frame)
+            result = process_frame([source_image_left,source_image_right], temp_frame)
            cv2.imwrite(temp_frame_path, result)
        except Exception as exception:
            print(exception)
@ -76,11 +131,28 @@ def process_frames(source_path: str, temp_frame_paths: List[str], progress: Any


 def process_image(source_path: str, target_path: str, output_path: str) -> None:
+    
+    source_image_left = None  # Initialize variable for the selected face image
+    source_image_right = None  # Initialize variable for the selected face image
+
+    if source_image_left is None and source_path:
+        source_image_left = get_one_face_left(cv2.imread(source_path))
+    if source_image_right is None and source_path:
+        source_image_right = get_one_face_right(cv2.imread(source_path))
+
    source_face = get_one_face(cv2.imread(source_path))
    target_frame = cv2.imread(target_path)
-    result = process_frame(source_face, target_frame)
+    result = process_frame([source_image_left,source_image_right], target_frame)
    cv2.imwrite(output_path, result)


 def process_video(source_path: str, temp_frame_paths: List[str]) -> None:
    modules.processors.frame.core.process_video(source_path, temp_frame_paths, process_frames)
+
+def get_two_faces(frame: Frame) -> List[Face]:
+    faces = get_many_faces(frame)
+    if faces:
+        # Sort faces from left to right based on the x-coordinate of the bounding box
+        sorted_faces = sorted(faces, key=lambda x: x.bbox[0])
+        return sorted_faces[:2]  # Return up to two faces, leftmost and rightmost
+    return []
--- a/modules/ui.py
+++ b/modules/ui.py
@ -7,7 +7,7 @@ from PIL import Image, ImageOps

 import modules.globals
 import modules.metadata
-from modules.face_analyser import get_one_face
+from modules.face_analyser import get_one_face, get_one_face_left, get_one_face_right
 from modules.capturer import get_video_frame, get_video_frame_total
 from modules.processors.frame.core import get_frame_processors_modules
 from modules.utilities import is_image, is_video, resolve_relative_path
@ -61,28 +61,40 @@ def create_root(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.C
    target_label = ctk.CTkLabel(root, text=None)
    target_label.place(relx=0.6, rely=0.1, relwidth=0.3, relheight=0.25)

-    select_face_button = ctk.CTkButton(root, text='Select a face', cursor='hand2', command=lambda: select_source_path())
+    select_face_button = ctk.CTkButton(root, text='Select a face/s \n(left face)(right face)', cursor='hand2', command=lambda: select_source_path())
    select_face_button.place(relx=0.1, rely=0.4, relwidth=0.3, relheight=0.1)

    select_target_button = ctk.CTkButton(root, text='Select a target', cursor='hand2', command=lambda: select_target_path())
    select_target_button.place(relx=0.6, rely=0.4, relwidth=0.3, relheight=0.1)

+    both_faces_value = ctk.BooleanVar(value=modules.globals.both_faces)
+    both_faces_checkbox = ctk.CTkSwitch(root, text='Show both faces', variable=both_faces_value, cursor='hand2', command=lambda: setattr(modules.globals, 'both_faces', not modules.globals.both_faces))
+    both_faces_checkbox.place(relx=0.1, rely=0.55)
+
+    flip_faces_value = ctk.BooleanVar(value=modules.globals.flip_faces)
+    flip_faces_checkbox = ctk.CTkSwitch(root, text='Flip left/right faces', variable=flip_faces_value, cursor='hand2', command=lambda: setattr(modules.globals, 'flip_faces', not modules.globals.flip_faces))
+    flip_faces_checkbox.place(relx=0.1, rely=0.60)
+
    keep_fps_value = ctk.BooleanVar(value=modules.globals.keep_fps)
    keep_fps_checkbox = ctk.CTkSwitch(root, text='Keep fps', variable=keep_fps_value, cursor='hand2', command=lambda: setattr(modules.globals, 'keep_fps', not modules.globals.keep_fps))
-    keep_fps_checkbox.place(relx=0.1, rely=0.6)
+    keep_fps_checkbox.place(relx=0.1, rely=0.65)

    keep_frames_value = ctk.BooleanVar(value=modules.globals.keep_frames)
    keep_frames_switch = ctk.CTkSwitch(root, text='Keep frames', variable=keep_frames_value, cursor='hand2', command=lambda: setattr(modules.globals, 'keep_frames', keep_frames_value.get()))
-    keep_frames_switch.place(relx=0.1, rely=0.65)
+    keep_frames_switch.place(relx=0.1, rely=0.70)

    # for FRAME PROCESSOR ENHANCER tumbler:
    enhancer_value = ctk.BooleanVar(value=modules.globals.fp_ui['face_enhancer'])
    enhancer_switch = ctk.CTkSwitch(root, text='Face Enhancer', variable=enhancer_value, cursor='hand2', command=lambda: update_tumbler('face_enhancer',enhancer_value.get()))
-    enhancer_switch.place(relx=0.1, rely=0.7)
+    enhancer_switch.place(relx=0.1, rely=0.75)
+
+    detect_face_right_value = ctk.BooleanVar(value=modules.globals.detect_face_right)
+    detect_face_right_checkbox = ctk.CTkSwitch(root, text='Detect face from right', variable=detect_face_right_value, cursor='hand2', command=lambda: setattr(modules.globals, 'detect_face_right', not modules.globals.detect_face_right))
+    detect_face_right_checkbox.place(relx=0.6, rely=0.55)

    keep_audio_value = ctk.BooleanVar(value=modules.globals.keep_audio)
    keep_audio_switch = ctk.CTkSwitch(root, text='Keep audio', variable=keep_audio_value, cursor='hand2', command=lambda: setattr(modules.globals, 'keep_audio', keep_audio_value.get()))
-    keep_audio_switch.place(relx=0.6, rely=0.6)
+    keep_audio_switch.place(relx=0.6, rely=0.60)

    many_faces_value = ctk.BooleanVar(value=modules.globals.many_faces)
    many_faces_switch = ctk.CTkSwitch(root, text='Many faces', variable=many_faces_value, cursor='hand2', command=lambda: setattr(modules.globals, 'many_faces', many_faces_value.get()))
@ -239,9 +251,25 @@ def update_preview(frame_number: int = 0) -> None:
            from modules.predicter import predict_frame
            if predict_frame(temp_frame):
                quit()
+        
+        source_image_left = None  # Left source face image
+        source_image_right = None  # Right source face image
+        
+        # Initialize variables for the selected face/s image. 
+        # Source image can have one face or two faces we simply detect face from left of frame
+        # then right of frame. This insures we always have a face to work with
+        if source_image_left is None and modules.globals.source_path:
+            source_image_left = get_one_face_left(cv2.imread(modules.globals.source_path))
+        if source_image_right is None and modules.globals.source_path:
+            source_image_right = get_one_face_right(cv2.imread(modules.globals.source_path))
+
+        # no face found
+        if source_image_left is None:
+            print('No face found in source image')
+            return
+    
        for frame_processor in get_frame_processors_modules(modules.globals.frame_processors):
-            temp_frame = frame_processor.process_frame(
-                get_one_face(cv2.imread(modules.globals.source_path)),
+            temp_frame = frame_processor.process_frame([source_image_left,source_image_right],
                temp_frame
            )
        image = Image.fromarray(cv2.cvtColor(temp_frame, cv2.COLOR_BGR2RGB))
@ -269,21 +297,31 @@ def webcam_preview():

    frame_processors = get_frame_processors_modules(modules.globals.frame_processors)

-    source_image = None  # Initialize variable for the selected face image
-
+    source_image_left = None  # Left source face image
+    source_image_right = None  # Right source face image
+        
+    # Initialize variables for the selected face/s image. 
+    # Source image can have one face or two faces we simply detect face from left of frame
+    # then right of frame. This insures we always have a face to work with
+    if source_image_left is None and modules.globals.source_path:
+        source_image_left = get_one_face_left(cv2.imread(modules.globals.source_path))
+    if source_image_right is None and modules.globals.source_path:
+        source_image_right = get_one_face_right(cv2.imread(modules.globals.source_path))
+    
+    # no face found
+    if source_image_left is None:
+        print('No face found in source image')
+        return
+    
    while True:
        ret, frame = cap.read()
        if not ret:
            break

-        # Select and save face image only once
-        if source_image is None and modules.globals.source_path:
-            source_image = get_one_face(cv2.imread(modules.globals.source_path))
-
        temp_frame = frame.copy()  #Create a copy of the frame

        for frame_processor in frame_processors:
-            temp_frame = frame_processor.process_frame(source_image, temp_frame)
+            temp_frame = frame_processor.process_frame([source_image_left,source_image_right], temp_frame)

        image = cv2.cvtColor(temp_frame, cv2.COLOR_BGR2RGB)  # Convert the image to RGB format to display it with Tkinter
        image = Image.fromarray(image)