KwaiVGI · Mrkomiljon · Jul 10, 2024 · Jul 10, 2024 · Jul 10, 2024 · Jul 10, 2024
diff --git a/assets/examples/source/MY_photo.jpg b/assets/examples/source/MY_photo.jpg
diff --git a/assets/examples/source/k1.png b/assets/examples/source/k1.png
diff --git a/assets/examples/source/k2.png b/assets/examples/source/k2.png
diff --git a/assets/examples/source/s10.jpg b/assets/examples/source/s10.jpg
diff --git a/assets/examples/source/solo.png b/assets/examples/source/solo.png
diff --git a/inference.py b/inference.py
@@ -1,33 +1,182 @@
-# coding: utf-8
-
 import tyro
 from src.config.argument_config import ArgumentConfig
 from src.config.inference_config import InferenceConfig
 from src.config.crop_config import CropConfig
 from src.live_portrait_pipeline import LivePortraitPipeline
 
+import cv2
+import time
+import numpy as np
 
 def partial_fields(target_class, kwargs):
     return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})
 
-
 def main():
     # set tyro theme
     tyro.extras.set_accent_color("bright_cyan")
     args = tyro.cli(ArgumentConfig)
 
     # specify configs for inference
-    inference_cfg = partial_fields(InferenceConfig, args.__dict__)  # use attribute of args to initial InferenceConfig
-    crop_cfg = partial_fields(CropConfig, args.__dict__)  # use attribute of args to initial CropConfig
+    inference_cfg = partial_fields(InferenceConfig, args.__dict__)
+    crop_cfg = partial_fields(CropConfig, args.__dict__)
 
     live_portrait_pipeline = LivePortraitPipeline(
         inference_cfg=inference_cfg,
         crop_cfg=crop_cfg
     )
 
-    # run
-    live_portrait_pipeline.execute(args)
+    # Initialize webcam 'assets/examples/driving/d6.mp4'
+    cap = cv2.VideoCapture(0)
+
+    # Process the first frame to initialize
+    ret, frame = cap.read()
+    if not ret:
+        print("Failed to capture image")
+        return
+
+    source_image_path = args.source_image  # Set the source image path here
+    x_s, f_s, R_s, x_s_info, lip_delta_before_animation, crop_info, img_rgb = live_portrait_pipeline.execute_frame(frame, source_image_path)
+
+    while True:
+        # Capture frame-by-frame
+        ret, frame = cap.read()
+
+        if not ret:
+            break
+
+        # Process the frame
+
+        result = live_portrait_pipeline.generate_frame(x_s, f_s, R_s, x_s_info, lip_delta_before_animation, crop_info, img_rgb, frame)
+        cv2.imshow('img_rgb Image', img_rgb)
+        cv2.imshow('Source Frame', frame)
+
+
+        # [Key Change] Convert the result from RGB to BGR before displaying
+        result_bgr = cv2.cvtColor(result, cv2.COLOR_RGB2BGR)
+
+
+        # Display the resulting frame
+        cv2.imshow('Live Portrait', result_bgr)
+
+        # Press 'q' to exit the loop
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+
+    # When everything is done, release the capture
+    cap.release()
+    cv2.destroyAllWindows()
+
+    # live_portrait_pipeline.execute_frame(result_bgr)
 
 
 if __name__ == '__main__':
+    st = time.time()
     main()
+    print("Generation time:", (time.time() - st) * 1000)
+
+# 3. Reduced webcam latency 350 to 160
+
+# import cv2
+# import time
+# import threading
+# import numpy as np
+# import tyro
+# from src.config.argument_config import ArgumentConfig
+# from src.config.inference_config import InferenceConfig
+# from src.config.crop_config import CropConfig
+# from src.live_portrait_pipeline import LivePortraitPipeline
+
+# def partial_fields(target_class, kwargs):
+#     return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})
+
+# class VideoCaptureThread:
+#     def __init__(self, src=0):
+#         self.cap = cv2.VideoCapture(src)
+#         self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 480)
+#         self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
+#         self.cap.set(cv2.CAP_PROP_FPS, 60)
+
+#         if not self.cap.isOpened():
+#             print("Failed to open camera")
+#             self.running = False
+#         else:
+#             self.ret = False
+#             self.frame = None
+#             self.running = True
+#             self.thread = threading.Thread(target=self.update, args=())
+#             self.thread.start()
+
+#     def update(self):
+#         while self.running:
+#             self.ret, self.frame = self.cap.read()
+#             if not self.ret:
+#                 print("Failed to read frame")
+#                 break
+
+#     def read(self):
+#         return self.ret, self.frame
+
+#     def release(self):
+#         self.running = False
+#         self.thread.join()
+#         self.cap.release()
+
+# def main():
+#     # Set tyro theme
+#     tyro.extras.set_accent_color("bright_cyan")
+#     args = tyro.cli(ArgumentConfig)
+
+#     # Specify configs for inference
+#     inference_cfg = partial_fields(InferenceConfig, args.__dict__)
+#     crop_cfg = partial_fields(CropConfig, args.__dict__)
+
+#     live_portrait_pipeline = LivePortraitPipeline(
+#         inference_cfg=inference_cfg,
+#         crop_cfg=crop_cfg
+#     )
+
+#     # Initialize webcam 'assets/examples/driving/d6.mp4'
+#     cap_thread = VideoCaptureThread(0)
+
+#     # Wait for the first frame to be captured
+#     while not cap_thread.ret and cap_thread.running:
+#         time.sleep(0.1)
+
+#     if not cap_thread.ret:
+#         print("Failed to capture image")
+#         cap_thread.release()
+#         return
+
+#     source_image_path = args.source_image  # Set the source image path here
+#     ret, frame = cap_thread.read()
+#     x_s, f_s, R_s, x_s_info, lip_delta_before_animation, crop_info, img_rgb = live_portrait_pipeline.execute_frame(frame, source_image_path)
+
+#     while cap_thread.running:
+#         # Capture frame-by-frame
+#         ret, frame = cap_thread.read()
+#         if not ret:
+#             break
+
+#         # Process the frame
+#         result = live_portrait_pipeline.generate_frame(x_s, f_s, R_s, x_s_info, lip_delta_before_animation, crop_info, img_rgb, frame)
+#         # cv2.imshow('img_rgb Image', img_rgb)
+#         cv2.imshow('Webcam Frame', frame)
+
+#         # Convert the result from RGB to BGR before displaying
+#         result_bgr = cv2.cvtColor(result, cv2.COLOR_RGB2BGR)
+
+#         # Display the resulting frame
+#         cv2.imshow('Webcam Live Portrait', result_bgr)
+
+#         # Press 'q' to exit the loop
+#         if cv2.waitKey(1) & 0xFF == ord('q'):
+#             break
+
+#     # When everything is done, release the capture
+#     cap_thread.release()
+#     cv2.destroyAllWindows()
+
+# if __name__ == '__main__':
+#     st = time.time()
+#     main()
+#     print("Generation time:", (time.time() - st) * 1000)
diff --git a/readme.md b/readme.md
@@ -1,52 +1,43 @@
-<h1 align="center">LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control</h1>
-
-<div align='center'>
-    <a href='https://github.com/cleardusk' target='_blank'><strong>Jianzhu Guo</strong></a><sup> 1†</sup>&emsp;
-    <a href='https://github.com/KwaiVGI' target='_blank'><strong>Dingyun Zhang</strong></a><sup> 1,2</sup>&emsp;
-    <a href='https://github.com/KwaiVGI' target='_blank'><strong>Xiaoqiang Liu</strong></a><sup> 1</sup>&emsp;
-    <a href='https://github.com/KwaiVGI' target='_blank'><strong>Zhizhou Zhong</strong></a><sup> 1,3</sup>&emsp;
-    <a href='https://scholar.google.com.hk/citations?user=_8k1ubAAAAAJ' target='_blank'><strong>Yuan Zhang</strong></a><sup> 1</sup>&emsp;
-</div>
-
-<div align='center'>
-    <a href='https://scholar.google.com/citations?user=P6MraaYAAAAJ' target='_blank'><strong>Pengfei Wan</strong></a><sup> 1</sup>&emsp;
-    <a href='https://openreview.net/profile?id=~Di_ZHANG3' target='_blank'><strong>Di Zhang</strong></a><sup> 1</sup>&emsp;
-</div>
-
-<div align='center'>
-    <sup>1 </sup>Kuaishou Technology&emsp; <sup>2 </sup>University of Science and Technology of China&emsp; <sup>3 </sup>Fudan University&emsp;
-</div>
-
-<br>
-<div align="center">
-  <!-- <a href='LICENSE'><img src='https://img.shields.io/badge/license-MIT-yellow'></a> -->
-  <a href='https://arxiv.org/pdf/2407.03168'><img src='https://img.shields.io/badge/arXiv-LivePortrait-red'></a>
-  <a href='https://liveportrait.github.io'><img src='https://img.shields.io/badge/Project-LivePortrait-green'></a>
-  <a href='https://huggingface.co/spaces/KwaiVGI/liveportrait'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue'></a>
-</div>
-<br>
+<h1 align="center"> Webcam Live Portrait</h1>
 
 <p align="center">
   <img src="./assets/docs/showcase2.gif" alt="showcase">
   <br>
-  🔥 For more results, visit our <a href="https://liveportrait.github.io/"><strong>homepage</strong></a> 🔥
+  🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥
 </p>
 
+# Webcam result
 
+https://github.com/Mrkomiljon/Webcam_Live_Portrait/assets/92161283/4e16fbc7-8c13-4415-b946-dd731ac00b6e
+
+# Live_Portrait_Monitor
+
+# You can see [github repo](https://github.com/Mrkomiljon/Live_Portrait_Monitor) in here.
+
+https://github.com/user-attachments/assets/80020a36-6ec9-4efa-abf7-c1adbbfc6f39
+
+https://github.com/user-attachments/assets/471c65a2-567f-4822-93af-882f0d041f18
+
+https://github.com/user-attachments/assets/3bf96941-3a93-475b-9d47-8c70e0bd1e48
+
+https://github.com/user-attachments/assets/23a83942-48a6-4922-8a50-b8f7ebdaa143
+
+https://github.com/user-attachments/assets/c65006c6-bfc2-4c99-b7d0-4d8853f0c9da
 
 ## 🔥 Updates
-- **`2024/07/04`**: 🔥 We released the initial version of the inference code and models. Continuous updates, stay tuned!
-- **`2024/07/04`**: 😊 We released the [homepage](https://liveportrait.github.io) and technical report on [arXiv](https://arxiv.org/pdf/2407.03168).
+- **`2024/07/10`**: 🔥 I released the initial version of the inference code for webcam. Continuous updates, stay tuned!
+
 
 ## Introduction
-This repo, named **LivePortrait**, contains the official PyTorch implementation of our paper [LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control](https://arxiv.org/pdf/2407.03168).
-We are actively updating and improving this repository. If you find any bugs or have suggestions, welcome to raise issues or submit pull requests (PR) 💖.
+This repo, named **Webcam Live Portrait**, contains the official PyTorch implementation of author paper [LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control](https://arxiv.org/pdf/2407.03168).
+I am actively updating and improving this repository. If you find any bugs or have suggestions, welcome to raise issues or submit pull requests (PR) 💖.
+The webcam_live_portrait and Live_Portrait_Monitor github repos are hosted in two directories.
 
 ## 🔥 Getting Started
 ### 1. Clone the code and prepare the environment
 ```bash
-git clone https://github.com/KwaiVGI/LivePortrait
-cd LivePortrait
+git clone https://github.com/Mrkomiljon/Webcam_Live_Portrait.git
+cd Webcam_Live_Portrait
 
 # create env using conda
 conda create -n LivePortrait python==3.9.18
@@ -56,7 +47,7 @@ pip install -r requirements.txt
 ```
 
 ### 2. Download pretrained weights
-Download our pretrained LivePortrait weights and face detection models of InsightFace from [Google Drive](https://drive.google.com/drive/folders/1UtKgzKjFAOmZkhNK-OYT0caJ_w2XAnib) or [Baidu Yun](https://pan.baidu.com/s/1MGctWmNla_vZxDbEp2Dtzw?pwd=z5cn). We have packed all weights in one directory 😊. Unzip and place them in `./pretrained_weights` ensuring the directory structure is as follows:
+Download pretrained LivePortrait weights and face detection models of InsightFace from [Google Drive](https://drive.google.com/drive/folders/1UtKgzKjFAOmZkhNK-OYT0caJ_w2XAnib) or [Baidu Yun](https://pan.baidu.com/s/1MGctWmNla_vZxDbEp2Dtzw?pwd=z5cn). We have packed all weights in one directory 😊. Unzip and place them in `./pretrained_weights` ensuring the directory structure is as follows:
 ```text
 pretrained_weights
 ├── insightface
@@ -84,13 +75,19 @@ python inference.py
 If the script runs successfully, you will get an output mp4 file named `animations/s6--d0_concat.mp4`. This file includes the following results: driving video, input image, and generated result.
 
 <p align="center">
-  <img src="./assets/docs/inference.gif" alt="image">
+  <img src="https://github.com/Mrkomiljon/Webcam_Live_Portrait/assets/92161283/7c4daf41-838d-4eb8-a762-9188cd337ee6">
 </p>
 
-Or, you can change the input by specifying the `-s` and `-d` arguments:
+# Unrealtime result
+
+https://github.com/Mrkomiljon/Webcam_Live_Portrait/assets/92161283/7c4daf41-838d-4eb8-a762-9188cd337ee6
+
+
+
+Or, you can change the input by specifying the `-s` and `-d` arguments come from webcam:
 
 ```bash
-python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d0.mp4
+python inference.py -s assets/examples/source/MY_photo.jpg 
 
 # or disable pasting back
 python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d0.mp4 --no_flag_pasteback
@@ -99,7 +96,6 @@ python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/
 python inference.py -h
 ```
 
-**More interesting results can be found in our [Homepage](https://liveportrait.github.io)** 😊
 
 ### 4. Gradio interface
 
@@ -130,15 +126,5 @@ Below are the results of inferring one frame on an RTX 4090 GPU using the native
 
 
 ## Acknowledgements
-We would like to thank the contributors of [FOMM](https://github.com/AliaksandrSiarohin/first-order-model), [Open Facevid2vid](https://github.com/zhanglonghao1992/One-Shot_Free-View_Neural_Talking_Head_Synthesis), [SPADE](https://github.com/NVlabs/SPADE), [InsightFace](https://github.com/deepinsight/insightface) repositories, for their open research and contributions.
-
-## Citation 💖
-If you find LivePortrait useful for your research, welcome to 🌟 this repo and cite our work using the following BibTeX:
-```bibtex
-@article{guo2024live,
-  title   = {LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control},
-  author  = {Jianzhu Guo and Dingyun Zhang and Xiaoqiang Liu and Zhizhou Zhong and Yuan Zhang and Pengfei Wan and Di Zhang},
-  year    = {2024},
-  journal = {arXiv preprint:2407.03168},
-}
-```
+I would like to thank the contributors of [FOMM](https://github.com/AliaksandrSiarohin/first-order-model), [Open Facevid2vid](https://github.com/zhanglonghao1992/One-Shot_Free-View_Neural_Talking_Head_Synthesis), [SPADE](https://github.com/NVlabs/SPADE), [InsightFace](https://github.com/deepinsight/insightface) repositories, for their open research and main [authors](https://github.com/KwaiVGI/LivePortrait).
+