From bb359869e3c98102727b778fd488641ca0afc833 Mon Sep 17 00:00:00 2001 From: Aleksandr Mokrov Date: Fri, 29 Nov 2024 05:10:31 +0100 Subject: [PATCH 1/5] Move test_replace under metadata (#2566) CVS-157823 test_replace was added in an incorrect section, it should be inside metadata --- .../jax-to-openvino/jax-classification-to-openvino.ipynb | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/notebooks/jax-to-openvino/jax-classification-to-openvino.ipynb b/notebooks/jax-to-openvino/jax-classification-to-openvino.ipynb index 65bb20adb4d..117790477d2 100644 --- a/notebooks/jax-to-openvino/jax-classification-to-openvino.ipynb +++ b/notebooks/jax-to-openvino/jax-classification-to-openvino.ipynb @@ -292,10 +292,11 @@ "cell_type": "code", "execution_count": 29, "id": "39b8e0f4-9c51-4e61-bc3f-fcd7c016b217", - "metadata": {}, - "test_replace": { - "image_path = download_file(f\"https://picsum.photos/{resolution}\", filename=\"picsum.jpg\")\n": "image_path = download_file(\"https://github.com/user-attachments/assets/18f0b8cd-7fb1-42a0-a3d7-9091bfbae65b\")" - }, + "metadata": { + "test_replace": { + "image_path = download_file(f\"https://picsum.photos/{resolution}\", filename=\"picsum.jpg\")\n": "image_path = download_file(\"https://github.com/user-attachments/assets/18f0b8cd-7fb1-42a0-a3d7-9091bfbae65b\")\n" + } + }, "outputs": [ { "data": { From 99c1124a204b9569430685f20d9f31289e5f5327 Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova Date: Fri, 29 Nov 2024 12:19:37 +0400 Subject: [PATCH 2/5] separate ov install in controlnet sd (#2568) --- .ci/skipped_notebooks.yml | 7 --- .../controlnet-stable-diffusion.ipynb | 5 +- notebooks/efficient-sam/efficient-sam.ipynb | 9 +++- .../llava-multimodal-chatbot-genai.ipynb | 2 +- .../minicpm-v-multimodal-chatbot.ipynb | 48 ++++++++++--------- .../parler-tts-text-to-speech.ipynb | 3 +- .../whisper-subtitles-generation.ipynb | 21 ++++++-- 7 files changed, 59 insertions(+), 36 deletions(-) diff --git a/.ci/skipped_notebooks.yml b/.ci/skipped_notebooks.yml index 2566487502d..c82710a0853 100644 --- a/.ci/skipped_notebooks.yml +++ b/.ci/skipped_notebooks.yml @@ -468,13 +468,6 @@ skips: - python: - '3.9' -- notebook: notebooks/minicpm-v-multimodal-chatbot/minicpm-v-multimodal-chatbot.ipynb - skips: - - os: - - macos-13 - - ubuntu-20.04 - - ubuntu-22.04 - - windows-2019 - notebook: notebooks/stable-audio/stable-audio.ipynb skips: - os: diff --git a/notebooks/controlnet-stable-diffusion/controlnet-stable-diffusion.ipynb b/notebooks/controlnet-stable-diffusion/controlnet-stable-diffusion.ipynb index a85b63c213a..6ef4e670c8a 100644 --- a/notebooks/controlnet-stable-diffusion/controlnet-stable-diffusion.ipynb +++ b/notebooks/controlnet-stable-diffusion/controlnet-stable-diffusion.ipynb @@ -138,10 +138,13 @@ " \"transformers>=4.30.2\",\n", " \"controlnet-aux>=0.0.6\",\n", " \"gradio>=3.36\",\n", + " \"datasets>=2.14.6\",\n", + " \"nncf>=2.7.0\",\n", + " \"opencv-python\",\n", " \"--extra-index-url\",\n", " \"https://download.pytorch.org/whl/cpu\",\n", ")\n", - "pip_install(\"openvino>=2023.1.0\", \"datasets>=2.14.6\", \"nncf>=2.7.0\", \"opencv-python\")" + "pip_install(\"openvino>=2023.1.0\")" ] }, { diff --git a/notebooks/efficient-sam/efficient-sam.ipynb b/notebooks/efficient-sam/efficient-sam.ipynb index b9332330afa..e2829f2cd86 100644 --- a/notebooks/efficient-sam/efficient-sam.ipynb +++ b/notebooks/efficient-sam/efficient-sam.ipynb @@ -67,7 +67,14 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install -q \"openvino>=2023.3.0\" \"nncf>=2.7.0\" opencv-python \"gradio>=4.13\" \"matplotlib>=3.4\" torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu" + "import platform\n", + "\n", + "%pip install -q \"openvino>=2024.5.0\" \"nncf>=2.14.0\"\n", + "%pip install -q \"torch>=2.2.0\" \"torchaudio>=2.2.0\" \"torchvision>=0.17.0\" --extra-index-url https://download.pytorch.org/whl/cpu\n", + "%pip install -q opencv-python \"gradio>=4.13\" \"matplotlib>=3.4\" tqdm\n", + "\n", + "if platform.system() == \"Darwin\":\n", + " %pip install -q \"numpy<2.0.0\"" ] }, { diff --git a/notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-genai.ipynb b/notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-genai.ipynb index cc3ec17194e..2d79ed09301 100644 --- a/notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-genai.ipynb +++ b/notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-genai.ipynb @@ -97,7 +97,7 @@ "from pathlib import Path\n", "import requests\n", "\n", - "%pip install -q \"torch>=2.1.0\" \"torchvision\" \"torchaudio\" --index-url https://download.pytorch.org/whl/cpu\n", + "%pip install -q \"torch>=2.3.0\" \"torchvision\" \"torchaudio\" --index-url https://download.pytorch.org/whl/cpu\n", "%pip install -q \"git+https://github.com/huggingface/optimum-intel.git\" --index-url https://download.pytorch.org/whl/cpu\n", "%pip install -q \"nncf>=2.14.0\" \"sentencepiece\" \"tokenizers>=0.12.1\" \"transformers>=4.45.0\" \"gradio>=4.36\"\n", "%pip install -q -U \"openvino-tokenizers>=2024.5.0\" \"openvino>=2024.5.0\" \"openvino-genai>=2024.5.0\"|\n", diff --git a/notebooks/minicpm-v-multimodal-chatbot/minicpm-v-multimodal-chatbot.ipynb b/notebooks/minicpm-v-multimodal-chatbot/minicpm-v-multimodal-chatbot.ipynb index b266b019b5b..13661075b3a 100644 --- a/notebooks/minicpm-v-multimodal-chatbot/minicpm-v-multimodal-chatbot.ipynb +++ b/notebooks/minicpm-v-multimodal-chatbot/minicpm-v-multimodal-chatbot.ipynb @@ -131,7 +131,11 @@ "cell_type": "code", "execution_count": 3, "id": "82e846bb", - "metadata": {}, + "metadata": { + "test_replace": { + "openbmb/MiniCPM-V-2_6": "katuni4ka/tiny-random-minicpmv-2_6" + } + }, "outputs": [ { "name": "stdout", @@ -169,7 +173,7 @@ "model_dir = Path(model_id.split(\"/\")[-1] + \"-ov\")\n", "\n", "if not model_dir.exists():\n", - " optimum_cli(model_id, model_dir, additional_args={\"trust-remote-code\": \"\", \"weight-format\": \"fp16\"})\n", + " optimum_cli(model_id, model_dir, additional_args={\"trust-remote-code\": \"\", \"weight-format\": \"fp16\", \"task\": \"image-text-to-text\"})\n", " compress_lm_weights(model_dir)" ] }, @@ -204,14 +208,14 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "id": "626fef57", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "84bba5eaf8cc4b7e97a5e5d3768146e3", + "model_id": "2362638a795340e6b3effb0805848768", "version_major": 2, "version_minor": 0 }, @@ -219,7 +223,7 @@ "Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO')" ] }, - "execution_count": 4, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -434,21 +438,7 @@ "widgets": { "application/vnd.jupyter.widget-state+json": { "state": { - "41592555658f4eb69616c541894b88f0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "2.0.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "658a0c15a9cb47078c9c8647bff53d1e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "2.0.0", - "model_name": "LayoutModel", - "state": {} - }, - "84bba5eaf8cc4b7e97a5e5d3768146e3": { + "2362638a795340e6b3effb0805848768": { "model_module": "@jupyter-widgets/controls", "model_module_version": "2.0.0", "model_name": "DropdownModel", @@ -459,9 +449,23 @@ ], "description": "Device:", "index": 1, - "layout": "IPY_MODEL_658a0c15a9cb47078c9c8647bff53d1e", - "style": "IPY_MODEL_41592555658f4eb69616c541894b88f0" + "layout": "IPY_MODEL_d737bcde20ac4ba38ecf0902eec67998", + "style": "IPY_MODEL_49b230bc877e422788033f49884843a2" + } + }, + "49b230bc877e422788033f49884843a2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" } + }, + "d737bcde20ac4ba38ecf0902eec67998": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": {} } }, "version_major": 2, diff --git a/notebooks/parler-tts-text-to-speech/parler-tts-text-to-speech.ipynb b/notebooks/parler-tts-text-to-speech/parler-tts-text-to-speech.ipynb index 5f2591cbfc4..11ed9aa65a8 100644 --- a/notebooks/parler-tts-text-to-speech/parler-tts-text-to-speech.ipynb +++ b/notebooks/parler-tts-text-to-speech/parler-tts-text-to-speech.ipynb @@ -58,8 +58,9 @@ "\n", "os.environ[\"GIT_CLONE_PROTECTION_ACTIVE\"] = \"false\"\n", "\n", + "%pip uninstall -q -y torch torchvision torchaudio\n", "%pip install -q \"openvino>=2024.2.0\"\n", - "%pip install -q git+https://github.com/huggingface/parler-tts.git \"gradio>=4.19\" transformers \"torch>=2.2\" --extra-index-url https://download.pytorch.org/whl/cpu" + "%pip install -q git+https://github.com/huggingface/parler-tts.git \"gradio>=4.19\" transformers \"torch>=2.2\" \"torchaudio\" --extra-index-url https://download.pytorch.org/whl/cpu" ] }, { diff --git a/notebooks/whisper-subtitles-generation/whisper-subtitles-generation.ipynb b/notebooks/whisper-subtitles-generation/whisper-subtitles-generation.ipynb index d715ae4e044..e0945dccd39 100644 --- a/notebooks/whisper-subtitles-generation/whisper-subtitles-generation.ipynb +++ b/notebooks/whisper-subtitles-generation/whisper-subtitles-generation.ipynb @@ -83,15 +83,27 @@ "outputs": [], "source": [ "import platform\n", + "import importlib.metadata\n", + "import importlib.util\n", "\n", "%pip install -q \"nncf>=2.14.0\"\n", "%pip install -q -U \"openvino>=2024.5.0\" \"openvino-tokenizers>=2024.5.0\" \"openvino-genai>=2024.5.0\"\n", "%pip install -q \"python-ffmpeg<=1.0.16\" \"ffmpeg\" \"moviepy\" \"transformers>=4.45\" \"git+https://github.com/huggingface/optimum-intel.git\" \"torch>=2.1\" --extra-index-url https://download.pytorch.org/whl/cpu\n", - "%pip install -q -U \"yt_dlp>=2024.8.6\" soundfile librosa jiwer\n", + "%pip install -q -U \"yt_dlp>=2024.8.6\" soundfile librosa jiwer packaging\n", "%pip install -q \"gradio>=4.19\" \"typing_extensions>=4.9\"\n", "\n", "if platform.system() == \"Darwin\":\n", - " %pip install -q \"numpy<2.0\"" + " %pip install -q \"numpy<2.0\"\n", + "\n", + "\n", + "from packaging import version\n", + "\n", + "if (\n", + " importlib.util.find_spec(\"tensorflow\") is not None\n", + " and version.parse(importlib.metadata.version(\"tensorflow\")) < version.parse(\"2.18.0\")\n", + " and version.parse(importlib.metadata.version(\"numpy\")) >= version.parse(\"2.0.0\")\n", + "):\n", + " %pip uninstall -q -y tensorflow" ] }, { @@ -379,7 +391,10 @@ "metadata": {}, "outputs": [], "source": [ - "from moviepy.editor import VideoFileClip\n", + "try:\n", + " from moviepy import VideoFileClip\n", + "except ImportError:\n", + " from moviepy.editor import VideoFileClip\n", "from transformers.pipelines.audio_utils import ffmpeg_read\n", "\n", "\n", From 605d3f21d4bb1e088c05a5cc54d5192b3970b673 Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova Date: Fri, 29 Nov 2024 12:20:32 +0400 Subject: [PATCH 3/5] update outetts according new repo structure (#2569) --- .ci/skipped_notebooks.yml | 4 +- .../outetts-text-to-speech.ipynb | 215 ++++++------------ .../ov_outetts_helper.py | 23 +- 3 files changed, 96 insertions(+), 146 deletions(-) diff --git a/.ci/skipped_notebooks.yml b/.ci/skipped_notebooks.yml index c82710a0853..5feef39d2b4 100644 --- a/.ci/skipped_notebooks.yml +++ b/.ci/skipped_notebooks.yml @@ -545,6 +545,8 @@ skips: - os: - macos-13 + - python: + - '3.9' - notebook: notebooks/mobileclip-video-search/mobileclip-video-search.ipynb skips: - os: @@ -555,4 +557,4 @@ - macos-13 - ubuntu-20.04 - ubuntu-22.04 - - windows-2019 \ No newline at end of file + - windows-2019 diff --git a/notebooks/outetts-text-to-speech/outetts-text-to-speech.ipynb b/notebooks/outetts-text-to-speech/outetts-text-to-speech.ipynb index 043a8a768af..aaf103e0b72 100644 --- a/notebooks/outetts-text-to-speech/outetts-text-to-speech.ipynb +++ b/notebooks/outetts-text-to-speech/outetts-text-to-speech.ipynb @@ -7,6 +7,8 @@ "source": [ "# Text-to-Speech synthesis using OuteTTS and OpenVINO\n", "\n", + "
Important note: This notebook requires python >= 3.10. Please make sure that your environment fulfill to this requirement before running it
\n", + "\n", "[OuteTTS-0.1-350M](https://huggingface.co/OuteAI/OuteTTS-0.1-350M) is a novel text-to-speech synthesis model that leverages pure language modeling without external adapters or complex architectures, built upon the LLaMa architecture. It demonstrates that high-quality speech synthesis is achievable through a straightforward approach using crafted prompts and audio tokens.\n", "\n", "More details about model can be found in [original repo](https://github.com/edwko/OuteTTS).\n", @@ -50,7 +52,7 @@ "source": [ "import platform\n", "\n", - "%pip install -q \"torch>=2.1\" \"torchaudio\" \"einops\" \"transformers>=4.46.1\" \"loguru\" \"inflect\" \"pesq\" \"torchcrepe\" \"natsort\" \"polars\" --extra-index-url https://download.pytorch.org/whl/cpu\n", + "%pip install -q \"torch>=2.1\" \"torchaudio\" \"einops\" \"transformers>=4.46.1\" \"loguru\" \"inflect\" \"pesq\" \"torchcrepe\" \"natsort\" \"polars\" uroman mecab-python3 unidic-lite --extra-index-url https://download.pytorch.org/whl/cpu\n", "%pip install -q \"gradio>=4.19\" \"openvino>=2024.4.0\" \"tqdm\" \"pyyaml\" \"librosa\" \"soundfile\"\n", "%pip install -q \"git+https://github.com/huggingface/optimum-intel.git\" --extra-index-url https://download.pytorch.org/whl/cpu\n", "\n", @@ -97,10 +99,15 @@ "\n", "repo_path = clone_repo(\"https://github.com/edwko/OuteTTS.git\")\n", "\n", - "interface_path = repo_path / \"outetts/v0_1/interface.py\"\n", + "interface_path = repo_path / \"outetts/version/v1/interface.py\"\n", + "\n", + "updated_version = interface_path.exists()\n", + "\n", + "if not updated_version:\n", + " interface_pth = repo_path / \"outetts/v0_1/interface.py\"\n", "orig_interface_path = interface_path.parent / \"_orig_interface.py\"\n", "\n", - "if not orig_interface_path.exists():\n", + "if not updated_version and not orig_interface_path.exists():\n", " interface_path.rename(orig_interface_path)\n", " # sounddevice requires to install manually additional libraries, as we do not plan to use it for audio playing\n", " # move it closer to its usage for avoid errors\n", @@ -173,7 +180,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2a96203216d5405ebdc7a57a81629d72", + "model_id": "c9c530c48b164157a29d10cd1ccc0d93", "version_major": 2, "version_minor": 0 }, @@ -196,9 +203,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-11-29 11:48:51.975233: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", + "2024-11-29 11:48:51.989550: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", + "E0000 00:00:1732866532.005718 2314480 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "E0000 00:00:1732866532.010517 2314480 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-11-29 11:48:52.027376: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + } + ], "source": [ "from ov_outetts_helper import InterfaceOV, OVHFModel # noqa: F401\n", "\n", @@ -209,9 +230,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "making attention of type 'vanilla' with 768 in_channels\n" + ] + } + ], "source": [ "interface = InterfaceOV(model_dir, device.value)" ] @@ -230,16 +259,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", + "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n", + "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n" + ] + } + ], "source": [ "output = interface.generate(text=\"Hello, I'm working!\", temperature=0.1, repetition_penalty=1.1, max_length=4096)" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -247,7 +286,7 @@ "text/html": [ "\n", " \n", " " @@ -256,7 +295,7 @@ "" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -281,22 +320,15 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 10, "metadata": {}, "outputs": [ { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f9c2179b13c04d549465e76e9bbe2404", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "2.wav: 0%| | 0.00/160k [00:00" ] }, - "execution_count": 14, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -337,7 +369,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -364,7 +396,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -372,7 +404,7 @@ "text/html": [ "\n", " \n", " " @@ -381,7 +413,7 @@ "" ] }, - "execution_count": 16, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -453,38 +485,13 @@ "widgets": { "application/vnd.jupyter.widget-state+json": { "state": { - "0377fbdd36504debb12e1eef2a54315e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "2.0.0", - "model_name": "HTMLModel", - "state": { - "layout": "IPY_MODEL_fdf387c2b6f74e0cb9ea662e9ff77970", - "style": "IPY_MODEL_a8c3fe2083ee4155876ddbdcdf28d0ae", - "value": " 160k/160k [00:00<00:00, 312kB/s]" - } - }, - "251544740d8b45ada075ffa665dbef9f": { + "34dd5fcc238d47b381faaf57c4533034": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", "state": {} }, - "2a96203216d5405ebdc7a57a81629d72": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "2.0.0", - "model_name": "DropdownModel", - "state": { - "_options_labels": [ - "CPU", - "AUTO" - ], - "description": "Device:", - "index": 1, - "layout": "IPY_MODEL_251544740d8b45ada075ffa665dbef9f", - "style": "IPY_MODEL_41680c4b94a64b86a89217ee6250cb35" - } - }, - "41680c4b94a64b86a89217ee6250cb35": { + "b3d79e47520c41fbad29c3279cbe6aa2": { "model_module": "@jupyter-widgets/controls", "model_module_version": "2.0.0", "model_name": "DescriptionStyleModel", @@ -492,92 +499,20 @@ "description_width": "" } }, - "6a151fff521c4e18a058c10fef945659": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "2.0.0", - "model_name": "LayoutModel", - "state": {} - }, - "9dd00e1f9d734be698e1f3dbaf0f6f9c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "2.0.0", - "model_name": "LayoutModel", - "state": {} - }, - "a8c3fe2083ee4155876ddbdcdf28d0ae": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "2.0.0", - "model_name": "HTMLStyleModel", - "state": { - "description_width": "", - "font_size": null, - "text_color": null - } - }, - "abd65e3b68f84ad58dd712aefbc1caf9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "2.0.0", - "model_name": "LayoutModel", - "state": {} - }, - "afab87f4ae3b474297a5fba38dcbfd30": { + "c9c530c48b164157a29d10cd1ccc0d93": { "model_module": "@jupyter-widgets/controls", "model_module_version": "2.0.0", - "model_name": "FloatProgressModel", - "state": { - "bar_style": "success", - "layout": "IPY_MODEL_9dd00e1f9d734be698e1f3dbaf0f6f9c", - "max": 163918, - "style": "IPY_MODEL_f331f1d9e7d141fb8d814f0d03a74d4f", - "value": 163918 - } - }, - "da377e9239e242ab87632f61989a4c4f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "2.0.0", - "model_name": "HTMLModel", - "state": { - "layout": "IPY_MODEL_6a151fff521c4e18a058c10fef945659", - "style": "IPY_MODEL_e1411e7eb53e4272bd3f653cb5f545e6", - "value": "2.wav: 100%" - } - }, - "e1411e7eb53e4272bd3f653cb5f545e6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "2.0.0", - "model_name": "HTMLStyleModel", - "state": { - "description_width": "", - "font_size": null, - "text_color": null - } - }, - "f331f1d9e7d141fb8d814f0d03a74d4f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "2.0.0", - "model_name": "ProgressStyleModel", - "state": { - "description_width": "" - } - }, - "f9c2179b13c04d549465e76e9bbe2404": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "2.0.0", - "model_name": "HBoxModel", + "model_name": "DropdownModel", "state": { - "children": [ - "IPY_MODEL_da377e9239e242ab87632f61989a4c4f", - "IPY_MODEL_afab87f4ae3b474297a5fba38dcbfd30", - "IPY_MODEL_0377fbdd36504debb12e1eef2a54315e" + "_options_labels": [ + "CPU", + "AUTO" ], - "layout": "IPY_MODEL_abd65e3b68f84ad58dd712aefbc1caf9" + "description": "Device:", + "index": 1, + "layout": "IPY_MODEL_34dd5fcc238d47b381faaf57c4533034", + "style": "IPY_MODEL_b3d79e47520c41fbad29c3279cbe6aa2" } - }, - "fdf387c2b6f74e0cb9ea662e9ff77970": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "2.0.0", - "model_name": "LayoutModel", - "state": {} } }, "version_major": 2, diff --git a/notebooks/outetts-text-to-speech/ov_outetts_helper.py b/notebooks/outetts-text-to-speech/ov_outetts_helper.py index aff5ef97b2e..be71556c007 100644 --- a/notebooks/outetts-text-to-speech/ov_outetts_helper.py +++ b/notebooks/outetts-text-to-speech/ov_outetts_helper.py @@ -1,10 +1,19 @@ -from outetts.v0_1.interface import InterfaceHF -from outetts.v0_1.audio_codec import AudioCodec -from outetts.v0_1.prompt_processor import PromptProcessor -from outetts.v0_1.model import HFModel import torch from optimum.intel.openvino import OVModelForCausalLM +try: + from outetts.version.v1.interface import InterfaceHF + from outetts.version.v1.prompt_processor import PromptProcessor + from outetts.version.v1.model import HFModel + from outetts.wav_tokenizer.audio_codec import AudioCodec + updated_version = True +except ImportError: + from outetts.v0_1.interface import InterfaceHF + from outetts.v0_1.audio_codec import AudioCodec + from outetts.v0_1.prompt_processor import PromptProcessor + from outetts.v0_1.model import HFModel + updated_version = False + class OVHFModel(HFModel): def __init__(self, model_path, device): @@ -20,5 +29,9 @@ def __init__( ) -> None: self.device = torch.device("cpu") self.audio_codec = AudioCodec(self.device) - self.prompt_processor = PromptProcessor(model_path) + self.prompt_processor = PromptProcessor(model_path) if not updated_version else PromptProcessor(model_path, ["en"]) self.model = OVHFModel(model_path, device) + self.language = "en" + self.verbose = False + self.languages = ["en"] + self._device = torch.device("cpu") From af5aac8b1e5e7f0a790d06bf6d10a01e2ab2b1f3 Mon Sep 17 00:00:00 2001 From: Shira Guskin <30695324+shira-g@users.noreply.github.com> Date: Fri, 29 Nov 2024 00:21:27 -0800 Subject: [PATCH 4/5] provide better speedup prompt to speculative decoding notebook (#2565) --- .../speculative-sampling.ipynb | 40 +++++++++++++------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/notebooks/speculative-sampling/speculative-sampling.ipynb b/notebooks/speculative-sampling/speculative-sampling.ipynb index 8c3a97b5784..a764b50017b 100644 --- a/notebooks/speculative-sampling/speculative-sampling.ipynb +++ b/notebooks/speculative-sampling/speculative-sampling.ipynb @@ -188,7 +188,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "553148f5", "metadata": {}, "outputs": [ @@ -207,9 +207,23 @@ "pipe = ov_genai.LLMPipeline(target_model_path, device.value)\n", "\n", "config = ov_genai.GenerationConfig()\n", - "config.max_new_tokens = 100\n", - "\n", - "\n", + "config.max_new_tokens = 330\n", + "prompt = '''\n", + "\n", + "def prime_fib(n: int):\n", + " \"\"\"\n", + " prime_fib returns n-th number that is a Fibonacci number and it's also prime.\n", + " >>> prime_fib(1)\n", + " 2\n", + " >>> prime_fib(2)\n", + " 3\n", + " >>> prime_fib(3)\n", + " 5\n", + " >>> prime_fib(4)\n", + " 13\n", + " >>> prime_fib(5)\n", + " 89\n", + " \"\"\"'''\n", "def streamer(subword):\n", " print(subword, end=\"\", flush=True)\n", " # Return flag corresponds whether generation should be stopped.\n", @@ -218,13 +232,13 @@ "\n", "\n", "start_time = time.perf_counter()\n", - "pipe.generate([\"Sun is yellow because\"], config, streamer=streamer)\n", + "pipe.generate(prompt, config, streamer=streamer)\n", "end_time = time.perf_counter()" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "c40d9901-ceb2-4c4c-a686-303590292ab3", "metadata": {}, "outputs": [ @@ -241,7 +255,7 @@ "\n", "print(f\"Generation time: {end_time - start_time:.2f}s\")\n", "del pipe\n", - "gc.collect();" + "gc.collect()" ] }, { @@ -263,7 +277,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "9fde1b3c", "metadata": {}, "outputs": [ @@ -278,17 +292,19 @@ "source": [ "scheduler_config = ov_genai.SchedulerConfig()\n", "# cache params\n", - "scheduler_config.cache_size = 2\n", + "scheduler_config.cache_size = 0\n", + "scheduler_config.num_kv_blocks = 2048 // 8\n", + "scheduler_config.max_num_batched_tokens = 2048\n", "\n", "draft_model = ov_genai.draft_model(draft_model_path, device.value)\n", "\n", "pipe = ov_genai.LLMPipeline(target_model_path, device.value, draft_model=draft_model, scheduler_config=scheduler_config)\n", "\n", "config = ov_genai.GenerationConfig()\n", - "config.max_new_tokens = 100\n", - "config.num_assistant_tokens = 3\n", + "config.max_new_tokens = 330\n", + "config.num_assistant_tokens = 5\n", "start_time = time.perf_counter()\n", - "result = pipe.generate([\"Sun is yellow because\"], config, streamer=streamer)\n", + "result = pipe.generate(prompt, config, streamer=streamer)\n", "end_time = time.perf_counter()" ] }, From 60d0cde7f7a7860efe2aacdafac5dcb6952e8fac Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova Date: Fri, 29 Nov 2024 12:29:43 +0400 Subject: [PATCH 5/5] fix code style after speculative decoding update (#2570) --- notebooks/speculative-sampling/speculative-sampling.ipynb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/notebooks/speculative-sampling/speculative-sampling.ipynb b/notebooks/speculative-sampling/speculative-sampling.ipynb index a764b50017b..58d4178ac6b 100644 --- a/notebooks/speculative-sampling/speculative-sampling.ipynb +++ b/notebooks/speculative-sampling/speculative-sampling.ipynb @@ -224,6 +224,8 @@ " >>> prime_fib(5)\n", " 89\n", " \"\"\"'''\n", + "\n", + "\n", "def streamer(subword):\n", " print(subword, end=\"\", flush=True)\n", " # Return flag corresponds whether generation should be stopped.\n",