fixes for ffmpeg, coreml, metal and resolution changes

2024-08-19 21:11:54 +01:00 · 2024-08-19 21:11:54 +01:00 · f1c158c9fa
parent 0c08c59afa
commit f1c158c9fa
5 changed files with 22 additions and 15 deletions
--- a/.gitignore
+++ b/.gitignore
@ -25,3 +25,5 @@ models/DMDNet.pth
 .venv/
 tf_env/
 .tf_env/
+.deepcamlive/
+deep-live-cam/
--- a/modules/core.py
+++ b/modules/core.py
@ -75,13 +75,13 @@ def parse_args() -> None:
    program.add_argument('--keep-audio', help='keep original audio', dest='keep_audio', action='store_true', default=True)
    program.add_argument('--keep-frames', help='keep temporary frames', dest='keep_frames', action='store_true', default=True)
    program.add_argument('--many-faces', help='process every face', dest='many_faces', action='store_true', default=False)
-    program.add_argument('--video-encoder', help='adjust output video encoder', dest='video_encoder', default='libvpx-vp9', choices=['libx264', 'libx265', 'libvpx-vp9'])
+    program.add_argument('--video-encoder', help='adjust output video encoder', dest='video_encoder', default='libx265', choices=['libx264', 'libx265', 'libvpx-vp9'])
    program.add_argument('--video-quality', help='adjust output video quality', dest='video_quality', type=int, default=1, choices=range(52), metavar='[0-51]')
    program.add_argument('--max-memory', help='maximum amount of RAM in GB', dest='max_memory', type=int, default=suggest_max_memory())
    program.add_argument('--execution-provider', help='execution provider', dest='execution_provider', default=['coreml'], choices=suggest_execution_providers(), nargs='+')
    program.add_argument('--execution-threads', help='number of execution threads', dest='execution_threads', type=int, default=suggest_execution_threads())
-    program.add_argument('--video-processor', help='video processor to use', dest='video_processor', default='cv2', choices=['cv2', 'ffmpeg'])
-    program.add_argument('--model', help='model to use for face swapping', dest='model', default='inswapper_128.onnx')
+    program.add_argument('--video-processor', help='video processor to use', dest='video_processor', default='ffmpeg', choices=['cv2', 'ffmpeg'])
+    program.add_argument('--model', help='model to use for face swapping', dest='model', default='inswapper_128_fp16.onnx')
    program.add_argument('-v', '--version', action='version', version=f'{modules.metadata.name} {modules.metadata.version}')

    args = program.parse_args()
--- a/modules/predicter.py
+++ b/modules/predicter.py
@ -4,7 +4,7 @@ from PIL import Image

 from modules.typing import Frame

-MAX_PROBABILITY = 0.85
+MAX_PROBABILITY = 0.7


 def predict_frame(target_frame: Frame) -> bool:
@ -21,5 +21,5 @@ def predict_image(target_path: str) -> bool:


 def predict_video(target_path: str) -> bool:
-    _, probabilities = opennsfw2.predict_video_frames(video_path=target_path, frame_interval=100)
+    _, probabilities = opennsfw2.predict_video_frames(video_path=target_path, frame_interval=20)
    return any(probability > MAX_PROBABILITY for probability in probabilities)
--- a/modules/ui.py
+++ b/modules/ui.py
@ -14,10 +14,12 @@ from modules.processors.frame.core import get_frame_processors_modules
 from modules.utilities import is_image, is_video, resolve_relative_path

 ROOT = None
-ROOT_HEIGHT = 700
+# this is the main ui
+ROOT_HEIGHT = 900   
 ROOT_WIDTH = 600

 PREVIEW = None
+# this is the preview ui
 PREVIEW_MAX_HEIGHT = 720
 PREVIEW_MAX_WIDTH = 1280

@ -88,9 +90,9 @@ def create_root(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.C
    many_faces_switch = ctk.CTkSwitch(root, text='Many faces', variable=many_faces_value, cursor='hand2', command=lambda: setattr(modules.globals, 'many_faces', many_faces_value.get()))
    many_faces_switch.place(relx=0.6, rely=0.65)

-#    nsfw_value = ctk.BooleanVar(value=modules.globals.nsfw)
-#    nsfw_switch = ctk.CTkSwitch(root, text='NSFW', variable=nsfw_value, cursor='hand2', command=lambda: setattr(modules.globals, 'nsfw', nsfw_value.get()))
-#    nsfw_switch.place(relx=0.6, rely=0.7)
+    nsfw_value = ctk.BooleanVar(value=modules.globals.nsfw)
+    nsfw_switch = ctk.CTkSwitch(root, text='NSFW', variable=nsfw_value, cursor='hand2', command=lambda: setattr(modules.globals, 'nsfw', nsfw_value.get()))
+    nsfw_switch.place(relx=0.6, rely=0.7)

    video_processor_label = ctk.CTkLabel(root, text="Video Processor:")
    video_processor_label.place(relx=0.1, rely=0.75)
@ -296,9 +298,9 @@ def webcam_preview():
        if not cap.isOpened():
            update_status("Error: Unable to open webcam. Please check your camera connection.")
            return
-        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)  # Set the width of the resolution
-        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)  # Set the height of the resolution
-        cap.set(cv2.CAP_PROP_FPS, 30)  # Set the frame rate of the webcam
+        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 320)  # Set the width of the resolution
+        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 240)  # Set the height of the resolution
+        cap.set(cv2.CAP_PROP_FPS, 60)  # Set the frame rate of the webcam
    else:
        import ffmpeg
        import subprocess
@ -307,7 +309,7 @@ def webcam_preview():
            'ffmpeg',
            '-f', 'avfoundation',
            '-framerate', '30',
-            '-video_size', '1280x720',
+            '-video_size', '240',
            '-i', '0:none',
            '-f', 'rawvideo',
            '-pix_fmt', 'rgb24',
@ -342,10 +344,10 @@ def webcam_preview():
                break
            temp_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        else:
-            in_bytes = process.stdout.read(1280 * 720 * 3)
+            in_bytes = process.stdout.read(320 * 240 * 3)
            if not in_bytes:
                break
-            temp_frame = np.frombuffer(in_bytes, np.uint8).reshape([720, 1280, 3])
+            temp_frame = np.frombuffer(in_bytes, np.uint8).reshape([240, 320, 3])

        for frame_processor in frame_processors:
            temp_frame = frame_processor.process_frame(source_face, temp_frame)
--- a/requirements.txt
+++ b/requirements.txt
@ -6,6 +6,7 @@ onnxruntime-silicon==1.16.3
 pillow==9.5.0
 insightface==0.7.3
 torch==2.1.0
+torchvision==0.19.0
 tensorflow-macos==2.16.2
 tensorflow-metal==1.1.0

@ -25,6 +26,8 @@ prettytable==3.11.0
 opencv-python==4.8.1.78  # Optional: for cv2 video processing
 ffmpeg-python==0.2.0  # For ffmpeg video processing

+customtkinter==5.2.2
+
 # Optional dependencies (comment out if not needed)
 # albumentations==1.4.13
 # coloredlogs==15.0.1