Merge branch 'dev' into mp3_len_fix

coqui-ai · Dec 6, 2023 · 87fa4fa · 87fa4fa
2 parents 118553e + 6b2ba52
commit 87fa4fa
Show file tree

Hide file tree

Showing 95 changed files with 18,028 additions and 4,264 deletions.
diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml
@@ -10,7 +10,7 @@ jobs:
   build-sdist:
     runs-on: ubuntu-20.04
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - name: Verify tag matches version
         run: |
           set -ex
@@ -38,7 +38,7 @@ jobs:
       matrix:
         python-version: ["3.9", "3.10", "3.11"]
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - uses: actions/setup-python@v2
         with:
           python-version: ${{ matrix.python-version }}

diff --git a/.github/workflows/style_check.yml b/.github/workflows/style_check.yml
@@ -42,6 +42,5 @@ jobs:
         run: |
           python3 -m pip install .[all]
           python3 setup.py egg_info
-      # - name: Lint check
-      #   run: |
-      #     make lint
+      - name: Style check
+        run: make style
diff --git a/.github/workflows/xtts_tests.yml b/.github/workflows/xtts_tests.yml
@@ -0,0 +1,53 @@
+name: xtts-tests
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    types: [opened, synchronize, reopened]
+jobs:
+  check_skip:
+    runs-on: ubuntu-latest
+    if: "! contains(github.event.head_commit.message, '[ci skip]')"
+    steps:
+      - run: echo "${{ github.event.head_commit.message }}"
+
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [3.9, "3.10", "3.11"]
+        experimental: [false]
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+          architecture: x64
+          cache: 'pip'
+          cache-dependency-path: 'requirements*'
+      - name: check OS
+        run: cat /etc/os-release
+      - name: set ENV
+        run: export TRAINER_TELEMETRY=0
+      - name: Install dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y --no-install-recommends git make gcc
+          sudo apt-get install espeak
+          sudo apt-get install espeak-ng
+          make system-deps
+      - name: Install/upgrade Python setup deps
+        run: python3 -m pip install --upgrade pip setuptools wheel
+      - name: Replace scarf urls
+        run: |
+          sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json
+      - name: Install TTS
+        run: |
+          python3 -m pip install .[all]
+          python3 setup.py egg_info
+      - name: Unit tests
+        run: make test_xtts
diff --git a/.gitignore b/.gitignore
@@ -169,3 +169,4 @@ wandb
 depot/*
 coqui_recipes/*
 local_scripts/*
+coqui_demos/*
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -128,6 +128,32 @@ The following steps are tested on an Ubuntu system.
 
 14. Once things look perfect, We merge it to the ```dev``` branch and make it ready for the next version.
 
+## Development in Docker container
+
+If you prefer working within a Docker container as your development environment, you can do the following:
+
+1. Fork 🐸TTS[https://github.com/coqui-ai/TTS] by clicking the fork button at the top right corner of the project page.
+
+2. Clone 🐸TTS and add the main repo as a new remote named ```upsteam```.
+
+    ```bash
+    $ git clone [email protected]:<your Github name>/TTS.git
+    $ cd TTS
+    $ git remote add upstream https://github.com/coqui-ai/TTS.git
+    ```
+
+3. Build the Docker Image as your development environment (it installs all of the dependencies for you):
+
+    ```
+    docker build --tag=tts-dev:latest -f .\dockerfiles\Dockerfile.dev .
+    ```
+
+4. Run the container with GPU support:
+
+    ```
+    docker run -it --gpus all tts-dev:latest /bin/bash
+    ```
+
 Feel free to ping us at any step you need help using our communication channels.
 
 If you are new to Github or open-source contribution, These are good resources.

diff --git a/Dockerfile b/Dockerfile
@@ -1,13 +1,19 @@
 ARG BASE=nvidia/cuda:11.8.0-base-ubuntu22.04
 FROM ${BASE}
+
 RUN apt-get update && apt-get upgrade -y
 RUN apt-get install -y --no-install-recommends gcc g++ make python3 python3-dev python3-pip python3-venv python3-wheel espeak-ng libsndfile1-dev && rm -rf /var/lib/apt/lists/*
 RUN pip3 install llvmlite --ignore-installed
 
-WORKDIR /root
-COPY . /root
+# Install Dependencies:
 RUN pip3 install torch torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
 RUN rm -rf /root/.cache/pip
+
+# Copy TTS repository contents:
+WORKDIR /root
+COPY . /root
+
 RUN make install
+
 ENTRYPOINT ["tts"]
 CMD ["--help"]
diff --git a/Makefile b/Makefile
@@ -22,6 +22,9 @@ test_tts:	## run tts tests.
 test_tts2:	## run tts tests.
 	nose2 -F -v -B --with-coverage --coverage TTS tests.tts_tests2
 
+test_xtts:
+	nose2 -F -v -B --with-coverage --coverage TTS tests.xtts_tests
+
 test_aux:	## run aux tests.
 	nose2 -F -v -B --with-coverage --coverage TTS tests.aux_tests
 	./run_bash_tests.sh

diff --git a/README.md b/README.md
@@ -1,5 +1,8 @@
 
 ## 🐸Coqui.ai News
+- 📣 ⓍTTSv2 is here with 16 languages and better performance across the board.
+- 📣 ⓍTTS fine-tuning code is out. Check the [example recipes](https://github.com/coqui-ai/TTS/tree/dev/recipes/ljspeech).
+- 📣 ⓍTTS can now stream with <200ms latency.
 - 📣 ⓍTTS, our production TTS model that can speak 13 languages, is released [Blog Post](https://coqui.ai/blog/tts/open_xtts), [Demo](https://huggingface.co/spaces/coqui/xtts), [Docs](https://tts.readthedocs.io/en/dev/models/xtts.html)
 - 📣 [🐶Bark](https://github.com/suno-ai/bark) is now available for inference with unconstrained voice cloning. [Docs](https://tts.readthedocs.io/en/dev/models/bark.html)
 - 📣 You can use [~1100 Fairseq models](https://github.com/facebookresearch/fairseq/tree/main/examples/mms) with 🐸TTS.
@@ -25,7 +28,7 @@
 📚 Utilities for dataset analysis and curation.
 ______________________________________________________________________
 
-[![Dicord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv)
+[![Discord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv)
 [![License](<https://img.shields.io/badge/License-MPL%202.0-brightgreen.svg>)](https://opensource.org/licenses/MPL-2.0)
 [![PyPI version](https://badge.fury.io/py/TTS.svg)](https://badge.fury.io/py/TTS)
 [![Covenant](https://camo.githubusercontent.com/7d620efaa3eac1c5b060ece5d6aacfcc8b81a74a04d05cd0398689c01c4463bb/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f436f6e7472696275746f72253230436f76656e616e742d76322e3025323061646f707465642d6666363962342e737667)](https://github.com/coqui-ai/TTS/blob/master/CODE_OF_CONDUCT.md)
@@ -69,7 +72,7 @@ Please use our dedicated channels for questions and discussion. Help is much mor
 | Type                            | Links                               |
 | ------------------------------- | --------------------------------------- |
 | 💼 **Documentation**              | [ReadTheDocs](https://tts.readthedocs.io/en/latest/)
-| 💾 **Installation**               | [TTS/README.md](https://github.com/coqui-ai/TTS/tree/dev#install-tts)|
+| 💾 **Installation**               | [TTS/README.md](https://github.com/coqui-ai/TTS/tree/dev#installation)|
 | 👩‍💻 **Contributing**               | [CONTRIBUTING.md](https://github.com/coqui-ai/TTS/blob/main/CONTRIBUTING.md)|
 | 📌 **Road Map**                   | [Main Development Plans](https://github.com/coqui-ai/TTS/issues/378)
 | 🚀 **Released Models**            | [TTS Releases](https://github.com/coqui-ai/TTS/releases) and [Experimental Models](https://github.com/coqui-ai/TTS/wiki/Experimental-Released-Models)|
@@ -202,7 +205,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 print(TTS().list_models())
 
 # Init TTS
-tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1").to(device)
+tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
 
 # Run TTS
 # ❗ Since this model is multi-lingual voice cloning model, we must set the target speaker_wav and language
@@ -264,19 +267,13 @@ models = TTS(cs_api_model="XTTS").list_models()
 # Init TTS with the target studio speaker
 tts = TTS(model_name="coqui_studio/en/Torcull Diarmuid/coqui_studio", progress_bar=False)
 # Run TTS
-tts.tts_to_file(text="This is a test.", file_path=OUTPUT_PATH)
+tts.tts_to_file(text="This is a test.", language="en", file_path=OUTPUT_PATH)
 
 # V1 model
 models = TTS(cs_api_model="V1").list_models()
 # Run TTS with emotion and speed control
 # Emotion control only works with V1 model
 tts.tts_to_file(text="This is a test.", file_path=OUTPUT_PATH, emotion="Happy", speed=1.5)
-
-# XTTS-multilingual
-models = TTS(cs_api_model="XTTS-multilingual").list_models()
-# Run TTS with emotion and speed control
-# Emotion control only works with V1 model
-tts.tts_to_file(text="Das ist ein Test.", file_path=OUTPUT_PATH, language="de", speed=1.0)
 ```
 
 #### Example text to speech using **Fairseq models in ~1100 languages** 🤯.

diff --git a/TTS/.models.json b/TTS/.models.json
@@ -2,28 +2,30 @@
     "tts_models": {
         "multilingual": {
             "multi-dataset": {
-                "xtts_v1": {
-                    "description": "XTTS-v1 by Coqui with 13 languages and cross-language voice cloning.",
+                "xtts_v2": {
+                    "description": "XTTS-v2.0.2 by Coqui with 16 languages.",
                     "hf_url": [
-                        "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/hifigan/model.pth",
-                        "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/hifigan/config.json",
-                        "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/hifigan/vocab.json"
+                        "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/model.pth",
+                        "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/config.json",
+                        "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/vocab.json",
+                        "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/hash.md5"
                     ],
+                    "model_hash": "10f92b55c512af7a8d39d650547a15a7",
                     "default_vocoder": null,
-                    "commit": "e5140314",
+                    "commit": "480a6cdf7",
                     "license": "CPML",
                     "contact": "[email protected]",
                     "tos_required": true
                 },
                 "xtts_v1.1": {
                     "description": "XTTS-v1.1 by Coqui with 14 languages, cross-language voice cloning and reference leak fixed.",
                     "hf_url": [
-                        "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1/model.pth",
-                        "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1/config.json",
-                        "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1/vocab.json",
-                        "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1/hash.md5"
+                        "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/model.pth",
+                        "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/config.json",
+                        "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/vocab.json",
+                        "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/hash.md5"
                     ],
-                    "model_hash": "10163afc541dc86801b33d1f3217b456",
+                    "model_hash": "7c62beaf58d39b729de287330dc254e7b515677416839b649a50e7cf74c3df59",
                     "default_vocoder": null,
                     "commit": "82910a63",
                     "license": "CPML",

diff --git a/TTS/VERSION b/TTS/VERSION
@@ -1 +1 @@
-0.18.0
+0.21.3