Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

webcam live portrait #80

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added assets/examples/source/MY_photo.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/examples/source/k1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/examples/source/k2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed assets/examples/source/s10.jpg
Binary file not shown.
Binary file added assets/examples/source/solo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
163 changes: 156 additions & 7 deletions inference.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,182 @@
# coding: utf-8

import tyro
from src.config.argument_config import ArgumentConfig
from src.config.inference_config import InferenceConfig
from src.config.crop_config import CropConfig
from src.live_portrait_pipeline import LivePortraitPipeline

import cv2
import time
import numpy as np

def partial_fields(target_class, kwargs):
return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})


def main():
# set tyro theme
tyro.extras.set_accent_color("bright_cyan")
args = tyro.cli(ArgumentConfig)

# specify configs for inference
inference_cfg = partial_fields(InferenceConfig, args.__dict__) # use attribute of args to initial InferenceConfig
crop_cfg = partial_fields(CropConfig, args.__dict__) # use attribute of args to initial CropConfig
inference_cfg = partial_fields(InferenceConfig, args.__dict__)
crop_cfg = partial_fields(CropConfig, args.__dict__)

live_portrait_pipeline = LivePortraitPipeline(
inference_cfg=inference_cfg,
crop_cfg=crop_cfg
)

# run
live_portrait_pipeline.execute(args)
# Initialize webcam 'assets/examples/driving/d6.mp4'
cap = cv2.VideoCapture(0)

# Process the first frame to initialize
ret, frame = cap.read()
if not ret:
print("Failed to capture image")
return

source_image_path = args.source_image # Set the source image path here
x_s, f_s, R_s, x_s_info, lip_delta_before_animation, crop_info, img_rgb = live_portrait_pipeline.execute_frame(frame, source_image_path)

while True:
# Capture frame-by-frame
ret, frame = cap.read()

if not ret:
break

# Process the frame

result = live_portrait_pipeline.generate_frame(x_s, f_s, R_s, x_s_info, lip_delta_before_animation, crop_info, img_rgb, frame)
cv2.imshow('img_rgb Image', img_rgb)
cv2.imshow('Source Frame', frame)


# [Key Change] Convert the result from RGB to BGR before displaying
result_bgr = cv2.cvtColor(result, cv2.COLOR_RGB2BGR)


# Display the resulting frame
cv2.imshow('Live Portrait', result_bgr)

# Press 'q' to exit the loop
if cv2.waitKey(1) & 0xFF == ord('q'):
break

# When everything is done, release the capture
cap.release()
cv2.destroyAllWindows()

# live_portrait_pipeline.execute_frame(result_bgr)


if __name__ == '__main__':
st = time.time()
main()
print("Generation time:", (time.time() - st) * 1000)

# 3. Reduced webcam latency 350 to 160

# import cv2
# import time
# import threading
# import numpy as np
# import tyro
# from src.config.argument_config import ArgumentConfig
# from src.config.inference_config import InferenceConfig
# from src.config.crop_config import CropConfig
# from src.live_portrait_pipeline import LivePortraitPipeline

# def partial_fields(target_class, kwargs):
# return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})

# class VideoCaptureThread:
# def __init__(self, src=0):
# self.cap = cv2.VideoCapture(src)
# self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 480)
# self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
# self.cap.set(cv2.CAP_PROP_FPS, 60)

# if not self.cap.isOpened():
# print("Failed to open camera")
# self.running = False
# else:
# self.ret = False
# self.frame = None
# self.running = True
# self.thread = threading.Thread(target=self.update, args=())
# self.thread.start()

# def update(self):
# while self.running:
# self.ret, self.frame = self.cap.read()
# if not self.ret:
# print("Failed to read frame")
# break

# def read(self):
# return self.ret, self.frame

# def release(self):
# self.running = False
# self.thread.join()
# self.cap.release()

# def main():
# # Set tyro theme
# tyro.extras.set_accent_color("bright_cyan")
# args = tyro.cli(ArgumentConfig)

# # Specify configs for inference
# inference_cfg = partial_fields(InferenceConfig, args.__dict__)
# crop_cfg = partial_fields(CropConfig, args.__dict__)

# live_portrait_pipeline = LivePortraitPipeline(
# inference_cfg=inference_cfg,
# crop_cfg=crop_cfg
# )

# # Initialize webcam 'assets/examples/driving/d6.mp4'
# cap_thread = VideoCaptureThread(0)

# # Wait for the first frame to be captured
# while not cap_thread.ret and cap_thread.running:
# time.sleep(0.1)

# if not cap_thread.ret:
# print("Failed to capture image")
# cap_thread.release()
# return

# source_image_path = args.source_image # Set the source image path here
# ret, frame = cap_thread.read()
# x_s, f_s, R_s, x_s_info, lip_delta_before_animation, crop_info, img_rgb = live_portrait_pipeline.execute_frame(frame, source_image_path)

# while cap_thread.running:
# # Capture frame-by-frame
# ret, frame = cap_thread.read()
# if not ret:
# break

# # Process the frame
# result = live_portrait_pipeline.generate_frame(x_s, f_s, R_s, x_s_info, lip_delta_before_animation, crop_info, img_rgb, frame)
# # cv2.imshow('img_rgb Image', img_rgb)
# cv2.imshow('Webcam Frame', frame)

# # Convert the result from RGB to BGR before displaying
# result_bgr = cv2.cvtColor(result, cv2.COLOR_RGB2BGR)

# # Display the resulting frame
# cv2.imshow('Webcam Live Portrait', result_bgr)

# # Press 'q' to exit the loop
# if cv2.waitKey(1) & 0xFF == ord('q'):
# break

# # When everything is done, release the capture
# cap_thread.release()
# cv2.destroyAllWindows()

# if __name__ == '__main__':
# st = time.time()
# main()
# print("Generation time:", (time.time() - st) * 1000)
88 changes: 37 additions & 51 deletions readme.md
Original file line number Diff line number Diff line change
@@ -1,52 +1,43 @@
<h1 align="center">LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control</h1>

<div align='center'>
<a href='https://github.com/cleardusk' target='_blank'><strong>Jianzhu Guo</strong></a><sup> 1†</sup>&emsp;
<a href='https://github.com/KwaiVGI' target='_blank'><strong>Dingyun Zhang</strong></a><sup> 1,2</sup>&emsp;
<a href='https://github.com/KwaiVGI' target='_blank'><strong>Xiaoqiang Liu</strong></a><sup> 1</sup>&emsp;
<a href='https://github.com/KwaiVGI' target='_blank'><strong>Zhizhou Zhong</strong></a><sup> 1,3</sup>&emsp;
<a href='https://scholar.google.com.hk/citations?user=_8k1ubAAAAAJ' target='_blank'><strong>Yuan Zhang</strong></a><sup> 1</sup>&emsp;
</div>

<div align='center'>
<a href='https://scholar.google.com/citations?user=P6MraaYAAAAJ' target='_blank'><strong>Pengfei Wan</strong></a><sup> 1</sup>&emsp;
<a href='https://openreview.net/profile?id=~Di_ZHANG3' target='_blank'><strong>Di Zhang</strong></a><sup> 1</sup>&emsp;
</div>

<div align='center'>
<sup>1 </sup>Kuaishou Technology&emsp; <sup>2 </sup>University of Science and Technology of China&emsp; <sup>3 </sup>Fudan University&emsp;
</div>

<br>
<div align="center">
<!-- <a href='LICENSE'><img src='https://img.shields.io/badge/license-MIT-yellow'></a> -->
<a href='https://arxiv.org/pdf/2407.03168'><img src='https://img.shields.io/badge/arXiv-LivePortrait-red'></a>
<a href='https://liveportrait.github.io'><img src='https://img.shields.io/badge/Project-LivePortrait-green'></a>
<a href='https://huggingface.co/spaces/KwaiVGI/liveportrait'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue'></a>
</div>
<br>
<h1 align="center"> Webcam Live Portrait</h1>

<p align="center">
<img src="./assets/docs/showcase2.gif" alt="showcase">
<br>
🔥 For more results, visit our <a href="https://liveportrait.github.io/"><strong>homepage</strong></a> 🔥
🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥
</p>

# Webcam result

https://github.com/Mrkomiljon/Webcam_Live_Portrait/assets/92161283/4e16fbc7-8c13-4415-b946-dd731ac00b6e

# Live_Portrait_Monitor

# You can see [github repo](https://github.com/Mrkomiljon/Live_Portrait_Monitor) in here.

https://github.com/user-attachments/assets/80020a36-6ec9-4efa-abf7-c1adbbfc6f39

https://github.com/user-attachments/assets/471c65a2-567f-4822-93af-882f0d041f18

https://github.com/user-attachments/assets/3bf96941-3a93-475b-9d47-8c70e0bd1e48

https://github.com/user-attachments/assets/23a83942-48a6-4922-8a50-b8f7ebdaa143

https://github.com/user-attachments/assets/c65006c6-bfc2-4c99-b7d0-4d8853f0c9da

## 🔥 Updates
- **`2024/07/04`**: 🔥 We released the initial version of the inference code and models. Continuous updates, stay tuned!
- **`2024/07/04`**: 😊 We released the [homepage](https://liveportrait.github.io) and technical report on [arXiv](https://arxiv.org/pdf/2407.03168).
- **`2024/07/10`**: 🔥 I released the initial version of the inference code for webcam. Continuous updates, stay tuned!


## Introduction
This repo, named **LivePortrait**, contains the official PyTorch implementation of our paper [LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control](https://arxiv.org/pdf/2407.03168).
We are actively updating and improving this repository. If you find any bugs or have suggestions, welcome to raise issues or submit pull requests (PR) 💖.
This repo, named **Webcam Live Portrait**, contains the official PyTorch implementation of author paper [LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control](https://arxiv.org/pdf/2407.03168).
I am actively updating and improving this repository. If you find any bugs or have suggestions, welcome to raise issues or submit pull requests (PR) 💖.
The webcam_live_portrait and Live_Portrait_Monitor github repos are hosted in two directories.

## 🔥 Getting Started
### 1. Clone the code and prepare the environment
```bash
git clone https://github.com/KwaiVGI/LivePortrait
cd LivePortrait
git clone https://github.com/Mrkomiljon/Webcam_Live_Portrait.git
cd Webcam_Live_Portrait

# create env using conda
conda create -n LivePortrait python==3.9.18
Expand All @@ -56,7 +47,7 @@ pip install -r requirements.txt
```

### 2. Download pretrained weights
Download our pretrained LivePortrait weights and face detection models of InsightFace from [Google Drive](https://drive.google.com/drive/folders/1UtKgzKjFAOmZkhNK-OYT0caJ_w2XAnib) or [Baidu Yun](https://pan.baidu.com/s/1MGctWmNla_vZxDbEp2Dtzw?pwd=z5cn). We have packed all weights in one directory 😊. Unzip and place them in `./pretrained_weights` ensuring the directory structure is as follows:
Download pretrained LivePortrait weights and face detection models of InsightFace from [Google Drive](https://drive.google.com/drive/folders/1UtKgzKjFAOmZkhNK-OYT0caJ_w2XAnib) or [Baidu Yun](https://pan.baidu.com/s/1MGctWmNla_vZxDbEp2Dtzw?pwd=z5cn). We have packed all weights in one directory 😊. Unzip and place them in `./pretrained_weights` ensuring the directory structure is as follows:
```text
pretrained_weights
├── insightface
Expand Down Expand Up @@ -84,13 +75,19 @@ python inference.py
If the script runs successfully, you will get an output mp4 file named `animations/s6--d0_concat.mp4`. This file includes the following results: driving video, input image, and generated result.

<p align="center">
<img src="./assets/docs/inference.gif" alt="image">
<img src="https://github.com/Mrkomiljon/Webcam_Live_Portrait/assets/92161283/7c4daf41-838d-4eb8-a762-9188cd337ee6">
</p>

Or, you can change the input by specifying the `-s` and `-d` arguments:
# Unrealtime result

https://github.com/Mrkomiljon/Webcam_Live_Portrait/assets/92161283/7c4daf41-838d-4eb8-a762-9188cd337ee6



Or, you can change the input by specifying the `-s` and `-d` arguments come from webcam:

```bash
python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d0.mp4
python inference.py -s assets/examples/source/MY_photo.jpg

# or disable pasting back
python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d0.mp4 --no_flag_pasteback
Expand All @@ -99,7 +96,6 @@ python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/
python inference.py -h
```

**More interesting results can be found in our [Homepage](https://liveportrait.github.io)** 😊

### 4. Gradio interface

Expand Down Expand Up @@ -130,15 +126,5 @@ Below are the results of inferring one frame on an RTX 4090 GPU using the native


## Acknowledgements
We would like to thank the contributors of [FOMM](https://github.com/AliaksandrSiarohin/first-order-model), [Open Facevid2vid](https://github.com/zhanglonghao1992/One-Shot_Free-View_Neural_Talking_Head_Synthesis), [SPADE](https://github.com/NVlabs/SPADE), [InsightFace](https://github.com/deepinsight/insightface) repositories, for their open research and contributions.

## Citation 💖
If you find LivePortrait useful for your research, welcome to 🌟 this repo and cite our work using the following BibTeX:
```bibtex
@article{guo2024live,
title = {LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control},
author = {Jianzhu Guo and Dingyun Zhang and Xiaoqiang Liu and Zhizhou Zhong and Yuan Zhang and Pengfei Wan and Di Zhang},
year = {2024},
journal = {arXiv preprint:2407.03168},
}
```
I would like to thank the contributors of [FOMM](https://github.com/AliaksandrSiarohin/first-order-model), [Open Facevid2vid](https://github.com/zhanglonghao1992/One-Shot_Free-View_Neural_Talking_Head_Synthesis), [SPADE](https://github.com/NVlabs/SPADE), [InsightFace](https://github.com/deepinsight/insightface) repositories, for their open research and main [authors](https://github.com/KwaiVGI/LivePortrait).

Loading