diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 8e7eb099540439..3598e32166a809 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -108,8 +108,6 @@ /tools/ @openvinotoolkit/openvino-tools-maintainers /tools/benchmark_tool/ @openvinotoolkit/openvino-ie-python-api-maintainers /tools/legacy/ @openvinotoolkit/openvino-samples-maintainers -/tools/openvino_dev/ @openvinotoolkit/openvino-tools-maintainers @openvinotoolkit/openvino-ie-python-api-maintainers -/tools/mo/ @openvinotoolkit/openvino-mo-maintainers /tools/ovc/ @openvinotoolkit/openvino-ovc-maintainers /thirdparty/open_model_zoo/ @openvinotoolkit/omz-maintainers diff --git a/.github/actions/cache/package-lock.json b/.github/actions/cache/package-lock.json index d452b53dccbc22..3c3887ba4b29c3 100644 --- a/.github/actions/cache/package-lock.json +++ b/.github/actions/cache/package-lock.json @@ -4283,9 +4283,9 @@ } }, "node_modules/cross-spawn": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", - "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", "dev": true, "license": "MIT", "dependencies": { diff --git a/.github/actions/common/constants.py b/.github/actions/common/constants.py index da55ece2ee4258..6a1d165fc7df13 100644 --- a/.github/actions/common/constants.py +++ b/.github/actions/common/constants.py @@ -16,6 +16,7 @@ class EventType(Enum): 'public_linux_ubuntu_24_04_x86_64_release', 'public_windows_vs2019_Release', 'public_windows_vs2019_Debug', + 'public_manylinux2014_x86_64_release', ) ProductType = Enum('ProductType', {t.upper(): t for t in productTypes}) diff --git a/.github/actions/install_ov_wheels/action.yml b/.github/actions/install_ov_wheels/action.yml new file mode 100644 index 00000000000000..82c03aeb4e4f2c --- /dev/null +++ b/.github/actions/install_ov_wheels/action.yml @@ -0,0 +1,48 @@ +name: 'Find and install OpenVINO Python wheels' +description: 'Finds the OpenVINO Python wheels suitable for the "python3" executable and installs them' +inputs: + wheels-dir-path: + description: 'Path to the directory in which wheels are located' + required: true + wheels-to-install: + description: 'List of wheel names to install in the form of "openvino openvino_tokenizers"' +runs: + using: 'composite' + steps: + - name: Install OpenVINO Python wheels (Windows) + shell: pwsh + if: runner.os == 'Windows' + run: | + # Get the Python version + $pyVersion = python3 -c "import sys; print(f'{sys.version_info.major}{sys.version_info.minor}')" + + foreach ($wheel in $("${{ inputs.wheels-to-install }}" -split ' ')) { + # Search for the python-specific wheel version and install it if exists + $wheelPath = Get-ChildItem -Path ${{ inputs.wheels-dir-path }} -Filter "$wheel-*cp$pyVersion*.whl" | Select-Object -First 1 + if ($wheelPath) { + python3 -m pip install $wheelPath.FullName + } else { + # If the python-specific version does not exist, install by name only + $wheelPathByName = Get-ChildItem -Path ${{ inputs.wheels-dir-path }} -Filter "$wheel-*.whl" | Select-Object -First 1 + python3 -m pip install $wheelPathByName.FullName + } + } + + - name: Install OpenVINO Python wheels (Linux and macOS) + shell: bash + if: runner.os != 'Windows' + run: | + py_version=$(python3 -c "import sys; print(f'{sys.version_info.major}{sys.version_info.minor}')") + for wheel in ${{ inputs.wheels-to-install }}; do + echo "Installing the ${wheel} wheel" + + # Search for the python-specific wheel version and install it if exists + wheel_path=$(find ${{ inputs.wheels-dir-path }} -name "$wheel-*cp$py_version*.whl") + echo "Wheel path: ${wheel_path}" + if [ -n "${wheel_path}" ]; then + python3 -m pip install $wheel_path + else + # If the python-specific version does not exist, install by name only + python3 -m pip install ${{ inputs.wheels-dir-path }}/$wheel-*.whl + fi + done diff --git a/.github/actions/openvino_provider/action.yml b/.github/actions/openvino_provider/action.yml index dd1078bb0d4353..a17986f35d3723 100644 --- a/.github/actions/openvino_provider/action.yml +++ b/.github/actions/openvino_provider/action.yml @@ -177,7 +177,7 @@ runs: else ov_package_url=$(curl -s ${{ inputs.nightly_package_source }} | jq -r '.${{ inputs.platform }}_${{ inputs.arch }}') fi - cd ${{ inputs.install_dir || env.GITHUB_WORKSPACE }} + cd ${{ inputs.install_dir || github.workspace }} package_basename=$(basename $ov_package_url) wget $ov_package_url --progress=bar:force:noscroll -O $package_basename package_folder=${package_basename%.*} @@ -196,7 +196,7 @@ runs: uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 with: name: ${{ steps.openvino_s3_download.outputs.ov_artifact_name }} - path: ${{ steps.openvino_s3_download.outputs.ov_package_path }} + path: ${{ github.workspace }}/${{ steps.openvino_s3_download.outputs.ov_package_path }} if-no-files-found: 'error' - name: Get wheel diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 1511e6a2c30170..359ff683c9b22a 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -41,7 +41,7 @@ updates: - "rkazants" versioning-strategy: increase-if-necessary - # Model Optimizer, openvino_dev and Benchmark tool + # ovc and Benchmark tools - package-ecosystem: pip directory: "/tools" schedule: diff --git a/.github/labeler.yml b/.github/labeler.yml index daa5375b175bd3..e9b2acb26c9072 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -100,10 +100,6 @@ 'category: LP transformations': - 'src/common/low_precision_transformations/**/*' -'category: MO': -- 'tools/mo/**/*' -- 'tests/layer_tests/mo_python_api_tests/**/*' - 'category: OVC': - 'tools/ovc/**/*' - 'tests/layer_tests/ovc_python_api_tests/**/*' @@ -119,7 +115,6 @@ - any: ['src/bindings/js/node/CMakeLists.txt', 'src/bindings/js/node/package.json', 'src/bindings/js/node/package-lock.json'] -- 'tools/openvino_dev/**/*' 'category: PDPD FE': - 'src/frontends/paddle/**/*' @@ -183,7 +178,6 @@ 'category: tools': - any: ['tools/**', - '!tools/mo/**/*', '!tools/ovc/**/*'] 'category: transformations': diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 6cb0b2c5b6233c..cde1b9cf67e2fc 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -50,12 +50,6 @@ jobs: python3 -m pip install -r ${{ github.workspace }}/src/frontends/onnx/tests/requirements.txt # For running TensorFlow frontend unit tests python3 -m pip install -r ${{ github.workspace }}/src/frontends/tensorflow/tests/requirements.txt - # For MO unit tests - python3 -m pip install -r ${{ github.workspace }}/tools/mo/requirements_caffe.txt - python3 -m pip install -r ${{ github.workspace }}/tools/mo/requirements_kaldi.txt - python3 -m pip install -r ${{ github.workspace }}/tools/mo/requirements_onnx.txt - python3 -m pip install -r ${{ github.workspace }}/tools/mo/requirements_tf2.txt - python3 -m pip install -r ${{ github.workspace }}/tools/mo/requirements_dev.txt - name: Build OpenVINO with CMake uses: ashutoshvarma/action-cmake-build@ade188313bc7eaa6f14349569a64d8bc716342ff # master @@ -84,9 +78,6 @@ jobs: - name: Install wheel packages run: cmake -DCOMPONENT=python_wheels -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/install_pkg -P '${{ github.workspace }}/build/cmake_install.cmake' - - name: Install python wheels - run: python3 -m pip install openvino-dev --find-links=${{ github.workspace }}/install_pkg/tools - - name: List binaries run: ls -la ${{ github.workspace }}/bin/intel64/${{ env.CMAKE_BUILD_TYPE }} diff --git a/.github/workflows/debian_10_arm.yml b/.github/workflows/debian_10_arm.yml index 73426222253adb..cf628d12c29b89 100644 --- a/.github/workflows/debian_10_arm.yml +++ b/.github/workflows/debian_10_arm.yml @@ -49,7 +49,7 @@ jobs: Docker: needs: Smart_CI if: "!needs.smart_ci.outputs.skip_workflow" - runs-on: aks-linux-16-cores-arm-docker-build + runs-on: aks-linux-4-cores-8gb-arm-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 volumes: @@ -75,7 +75,7 @@ jobs: if: "!needs.smart_ci.outputs.skip_workflow" uses: ./.github/workflows/job_build_linux.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-16-cores-32gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.debian_10_arm }}", "volumes": ["/mount:/mount"], "options": "-e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING"}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} event-name: ${{ github.event_name }} @@ -104,7 +104,7 @@ jobs: needs: [ Docker, Build, Smart_CI ] uses: ./.github/workflows/job_cxx_unit_tests.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-8-cores-16gb-arm' image: ${{ fromJSON(needs.docker.outputs.images).ov_test.debian_10_arm }} affected-components: ${{ needs.smart_ci.outputs.affected_components }} os: 'debian_10' @@ -116,6 +116,8 @@ jobs: needs: [ Docker, Build, Smart_CI ] uses: ./.github/workflows/job_cpu_functional_tests.yml with: + # Additional investigation needed why CPU functional tests are failing on v6 VM size's version, + # so leave it as it is for now runner: 'aks-linux-16-cores-arm' image: ${{ fromJSON(needs.docker.outputs.images).ov_test.debian_10_arm }} python-version: '3.7' diff --git a/.github/workflows/job_build_windows.yml b/.github/workflows/job_build_windows.yml index 8a39f348d824c3..7b682f208c3435 100644 --- a/.github/workflows/job_build_windows.yml +++ b/.github/workflows/job_build_windows.yml @@ -21,6 +21,11 @@ on: description: 'A string of options passed to CMake' type: string required: true + build-additional-python-wheels: + description: 'Whether to build additional, i.e., non-system Python wheels. Should have Python 3.9-3.12 installed' + type: boolean + required: false + default: false permissions: read-all @@ -157,8 +162,7 @@ jobs: run: echo SSL_CERT_FILE=$(python3 -m certifi) >> $env:GITHUB_ENV - name: CMake configure - run: | - cmake -S ${{ env.OPENVINO_REPO }} -B ${{ env.BUILD_DIR }} ${{ inputs.cmake-options }} + run: cmake -S ${{ env.OPENVINO_REPO }} -B ${{ env.BUILD_DIR }} ${{ inputs.cmake-options }} - name: Clean ccache stats run: '& ccache --zero-stats' @@ -176,6 +180,54 @@ jobs: cmake --install . --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${{ env.INSTALL_TEST_DIR }} --component tests working-directory: ${{ env.BUILD_DIR }} + # Setup additional Python versions for wheels building + - name: Setup Python 3.9 + if: ${{ inputs.build-additional-python-wheels }} + uses: ./openvino/.github/actions/setup_python + with: + version: '3.9' + pip-cache-path: ${{ env.PIP_CACHE_PATH }} + should-setup-pip-paths: 'true' + self-hosted-runner: 'true' + + # Setup additional Python versions for wheels building + - name: Setup Python 3.10 + if: ${{ inputs.build-additional-python-wheels }} + uses: ./openvino/.github/actions/setup_python + with: + version: '3.10' + pip-cache-path: ${{ env.PIP_CACHE_PATH }} + should-setup-pip-paths: 'true' + self-hosted-runner: 'true' + + # Setup additional Python versions for wheels building + - name: Setup Python 3.12 + if: ${{ inputs.build-additional-python-wheels }} + uses: ./openvino/.github/actions/setup_python + with: + version: '3.12' + pip-cache-path: ${{ env.PIP_CACHE_PATH }} + should-setup-pip-paths: 'true' + self-hosted-runner: 'true' + + - name: Build additional Python wheels + if: ${{ inputs.build-additional-python-wheels }} + run: | + $pyVersions = '3.9', '3.10', '3.12' + foreach ($pyVersion in $pyVersions) { + $pyBuildDir = "${{ github.workspace }}/py$pyVersion" + New-Item -ItemType Directory -Path "$pyBuildDir" -Force + + $pythonCommand = "py -$pyVersion -c `"import sys; print(f'{sys.executable}')`"" + $pythonExecutablePath = & cmd /c $pythonCommand + + & $pythonExecutablePath -m pip install -r ${{ env.OPENVINO_REPO }}/src/bindings/python/wheel/requirements-dev.txt + + cmake -DPython3_EXECUTABLE="$pythonExecutablePath" -DOpenVINODeveloperPackage_DIR=${{ env.BUILD_DIR }} -S ${{ env.OPENVINO_REPO }}/src/bindings/python -B "$pyBuildDir" + cmake --build "$pyBuildDir" --parallel --config ${{ env.CMAKE_BUILD_TYPE }} + cmake --install "$pyBuildDir" --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${{ env.INSTALL_WHEELS_DIR }} --component python_wheels + } + - name: Pack Artifacts run: | $file = Get-ChildItem -Path "${{ env.INSTALL_DIR }}" @@ -220,7 +272,7 @@ jobs: uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_wheels - path: ${{ env.BUILD_DIR }}/wheels/*.whl + path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl if-no-files-found: 'error' - name: Upload openvino tests package diff --git a/.github/workflows/job_cxx_unit_tests.yml b/.github/workflows/job_cxx_unit_tests.yml index 8fab17043b7465..52a2b3f4d287c8 100644 --- a/.github/workflows/job_cxx_unit_tests.yml +++ b/.github/workflows/job_cxx_unit_tests.yml @@ -29,7 +29,7 @@ on: description: 'Timeout in minutes for the job' type: number required: false - default: 35 + default: 45 permissions: read-all diff --git a/.github/workflows/job_onnx_models_tests.yml b/.github/workflows/job_onnx_models_tests.yml index c879f0cb6a1efc..39a4b467e74fc1 100644 --- a/.github/workflows/job_onnx_models_tests.yml +++ b/.github/workflows/job_onnx_models_tests.yml @@ -86,14 +86,11 @@ jobs: run: | # Install the core OV wheel python3 -m pip install ./openvino-*.whl - - extras_to_install="onnx" - - # Find and install OV dev wheel - ov_dev_wheel_name=$(find . -name 'openvino_dev*.whl') - python3 -m pip install $ov_dev_wheel_name[$extras_to_install] working-directory: ${{ env.INSTALL_WHEELS_DIR }} + - name: Install ONNX Models tests requirements + run: python3 -m pip install -r ${INSTALL_TEST_DIR}/requirements_onnx + - name: Install Python tests dependencies run: | # To enable pytest parallel features diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index 64be9ef4bbcc44..8075f3299fe063 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -91,20 +91,11 @@ jobs: should-setup-pip-paths: ${{ runner.os == 'Linux' }} self-hosted-runner: ${{ runner.os == 'Linux' }} - # - # Tests - # - - name: Install OpenVINO Python wheels run: | # Install the core OV wheel python3 -m pip install ./openvino-*.whl - extras_to_install="caffe,kaldi,onnx,tensorflow2,pytorch" - - # Find and install OV dev wheel - ov_dev_wheel_name=$(find . -name 'openvino_dev*.whl') - python3 -m pip install $ov_dev_wheel_name[$extras_to_install] working-directory: ${{ env.INSTALL_WHEELS_DIR }} - name: Install Python API tests dependencies @@ -112,7 +103,19 @@ jobs: # To enable pytest parallel features python3 -m pip install pytest-xdist[psutil] python3 -m pip install -r ${INSTALL_TEST_DIR}/bindings/python/requirements_test.txt - python3 -m pip install -r ${INSTALL_TEST_DIR}/mo/requirements_dev.txt + + - name: Install Python Layer tests dependencies and for OVC unit tests + run: | + # For torchvision to OpenVINO preprocessing converter + python3 -m pip install -r ${INSTALL_TEST_DIR}/python/preprocess/torchvision/requirements.txt + + # layer test requirements + python3 -m pip install -r ${LAYER_TESTS_INSTALL_DIR}/requirements.txt + + - name: Install ONNX tests dependencies + run: | + # ONNX tests requirements + python3 -m pip install -r ${INSTALL_TEST_DIR}/requirements_onnx # # Tests @@ -127,18 +130,6 @@ jobs: --junitxml=${INSTALL_TEST_DIR}/TEST-Pyngraph.xml \ --ignore=${INSTALL_TEST_DIR}/pyopenvino/tests/test_utils/test_utils.py - - name: Model Optimizer unit tests - if: fromJSON(inputs.affected-components).MO.test - run: | - if [[ "${{ runner.os }}" == "Linux" ]] && [[ "${{ runner.arch }}" != "ARM64" ]]; then - # required for MxNet - apt-get install -y libgomp1 libquadmath0 - fi - - # Skips under tickets: 133405, 122666 - python3 -m pytest -s ${INSTALL_TEST_DIR}/mo/unit_tests \ - --junitxml=${INSTALL_TEST_DIR}/TEST-ModelOptimizer.xml - - name: Python ONNX operators tests if: (fromJSON(inputs.affected-components).Python_API.test || fromJSON(inputs.affected-components).ONNX_FE.test) && @@ -153,32 +144,6 @@ jobs: if: fromJSON(inputs.affected-components).MO.test run: python3 -m pytest -s ${INSTALL_TEST_DIR}/ovc/unit_tests --junitxml=${INSTALL_TEST_DIR}/TEST-OpenVinoConversion.xml - - name: Install Python Layer tests dependencies - run: | - # For torchvision to OpenVINO preprocessing converter - python3 -m pip install -r ${INSTALL_TEST_DIR}/python/preprocess/torchvision/requirements.txt - - # layer test requirements - python3 -m pip install -r ${LAYER_TESTS_INSTALL_DIR}/requirements.txt - - - name: MO Python API Tests - if: fromJSON(inputs.affected-components).MO.test - run: | - # Import 'test_utils' installed in '/tests/python/openvino' - export LD_LIBRARY_PATH=${PIP_INSTALL_PATH}/openvino/libs:$LD_LIBRARY_PATH - export PYTHONPATH=${INSTALL_TEST_DIR}/python - - if [[ "${{ runner.os }}" == "Linux" ]] && [[ "${{ runner.arch }}" == "ARM64" ]]; then - # Find gomp lib - GOMP_LIB=$(find "${PIP_INSTALL_PATH}/torch/lib/../../torch.libs/" -name '*libgomp-*so*') - export LD_PRELOAD=${GOMP_LIB} - fi - - python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/mo_python_api_tests -n logical --junitxml=${INSTALL_TEST_DIR}/TEST-test_mo_convert.xml - env: - TEST_DEVICE: CPU - TEST_PRECISION: FP16 - - name: OVC Python API Tests if: fromJSON(inputs.affected-components).MO.test run: | @@ -205,16 +170,6 @@ jobs: export LD_LIBRARY_PATH=${PIP_INSTALL_PATH}/openvino/libs:$LD_LIBRARY_PATH python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/py_frontend_tests --junitxml=${INSTALL_TEST_DIR}/TEST-test_py_fontend.xml - - name: ONNX Layer Tests - if: ${{ fromJSON(inputs.affected-components).ONNX_FE.test }} - run: | - # requires 'unit_tests' from 'tools/mo' - export PYTHONPATH=${INSTALL_TEST_DIR}/mo:$PYTHONPATH - python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/onnx_tests -m "not launch_only_if_manually_specified and precommit" --junitxml=${INSTALL_TEST_DIR}/TEST-onnx.xml - env: - TEST_DEVICE: CPU - TEST_PRECISION: FP16 - - name: JAX Layer Tests - JAX FE if: ${{ fromJSON(inputs.affected-components).JAX_FE.test && runner.arch != 'ARM64' && runner.os != 'macOS' }} run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/jax_tests/ -m precommit_jax_fe --junitxml=${INSTALL_TEST_DIR}/TEST-jax_fe.xml @@ -230,22 +185,6 @@ jobs: TEST_DEVICE: CPU TEST_PRECISION: FP16 - - name: TensorFlow 1 Layer Tests - Legacy FE - if: fromJSON(inputs.affected-components).TF_FE.test - run: python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow_tests/test_tf_Roll.py --use_legacy_frontend --ir_version=10 --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf_Roll.xml - env: - TEST_DEVICE: CPU - TEST_PRECISION: FP16 - - - name: TensorFlow 2 Layer Tests - Legacy FE - # no longer workable since TF 2.17 - # will be removed in 2024.5 - if: ${{ 'false' }} - run: python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/tensorflow2_keras_tests/test_tf2_keras_activation.py --use_legacy_frontend --ir_version=11 -k "sigmoid" --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-tf2_Activation.xml - env: - TEST_DEVICE: CPU - TEST_PRECISION: FP16 - - name: Clone API snippets if: runner.os != 'macOS' uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml index b0eba0a278e582..271b7948d435dc 100644 --- a/.github/workflows/job_pytorch_layer_tests.yml +++ b/.github/workflows/job_pytorch_layer_tests.yml @@ -65,6 +65,7 @@ jobs: echo "OPENVINO_REPO=$GITHUB_WORKSPACE/openvino" >> "$GITHUB_ENV" echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" + echo "INSTALL_WHEELS_DIR=$GITHUB_WORKSPACE/install/wheels" >> "$GITHUB_ENV" echo "LAYER_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/layer_tests" >> "$GITHUB_ENV" - name: Install OpenVINO dependencies (mac) @@ -83,11 +84,12 @@ jobs: Expand-Archive openvino_tests.zip -DestinationPath ${{ env.INSTALL_DIR }} working-directory: ${{ env.INSTALL_DIR }} - - name: Fetch setup_python action + - name: Fetch setup_python and install wheels actions uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml + .github/actions/install_ov_wheels/action.yml sparse-checkout-cone-mode: false path: 'openvino' @@ -99,20 +101,11 @@ jobs: should-setup-pip-paths: ${{ runner.os != 'macOS' }} self-hosted-runner: ${{ runner.os != 'macOS' }} - - name: Install OpenVINO Python wheels (Linux and macOS) - if: runner.os != 'Windows' - run: | - # Install the core OV wheel - python3 -m pip install ./openvino-*.whl - working-directory: ${{ env.INSTALL_WHEELS_DIR }} - - - name: Install OpenVINO Python wheels (Windows) - if: runner.os == 'Windows' - run: | - # Find and install the core OV wheel - $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino-*.whl | % { $_.FullName } - python3 -m pip install "$ovCoreWheelPath" - working-directory: ${{ env.INSTALL_WHEELS_DIR }} + - name: Install OpenVINO Python wheels + uses: ./openvino/.github/actions/install_ov_wheels + with: + wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} + wheels-to-install: 'openvino' - name: Install Pytorch Layer tests dependencies run: | diff --git a/.github/workflows/job_tensorflow_layer_tests.yml b/.github/workflows/job_tensorflow_layer_tests.yml index 0de1708527739a..98f385e990f5e6 100644 --- a/.github/workflows/job_tensorflow_layer_tests.yml +++ b/.github/workflows/job_tensorflow_layer_tests.yml @@ -66,6 +66,7 @@ jobs: echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" echo "LAYER_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/layer_tests" >> "$GITHUB_ENV" + echo "INSTALL_WHEELS_DIR=$GITHUB_WORKSPACE/install/wheels" >> "$GITHUB_ENV" - name: Install OpenVINO dependencies (mac) if: runner.os == 'macOS' @@ -83,11 +84,12 @@ jobs: Expand-Archive openvino_tests.zip -DestinationPath ${{ env.INSTALL_DIR }} working-directory: ${{ env.INSTALL_DIR }} - - name: Fetch setup_python action + - name: Fetch setup_python and install wheels actions uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml + .github/actions/install_ov_wheels/action.yml sparse-checkout-cone-mode: false path: 'openvino' @@ -99,27 +101,11 @@ jobs: should-setup-pip-paths: ${{ runner.os != 'macOS' }} self-hosted-runner: ${{ runner.os != 'macOS' }} - - name: Install OpenVINO Python wheels (Linux and macOS) - if: runner.os != 'Windows' - run: | - # Install the core OV wheel - python3 -m pip install ./openvino-*.whl - - # Install the core OV Tokenizers wheel - python3 -m pip install ./openvino_tokenizers-*.whl - working-directory: ${{ env.INSTALL_WHEELS_DIR }} - - - name: Install OpenVINO Python wheels (Windows) - if: runner.os == 'Windows' - run: | - # Find and install the core OV wheel - $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino-*.whl | % { $_.FullName } - python3 -m pip install "$ovCoreWheelPath" - - # Find and install the core OV Tokenizers wheel - $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino_tokenizers-*.whl | % { $_.FullName } - python3 -m pip install "$ovCoreWheelPath" - working-directory: ${{ env.INSTALL_WHEELS_DIR }} + - name: Install OpenVINO Python wheels + uses: ./openvino/.github/actions/install_ov_wheels + with: + wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} + wheels-to-install: 'openvino openvino_tokenizers' - name: Install Python Layer tests dependencies run: | diff --git a/.github/workflows/job_tokenizers.yml b/.github/workflows/job_tokenizers.yml index 4b84bee25c78f4..1068ec550d1752 100644 --- a/.github/workflows/job_tokenizers.yml +++ b/.github/workflows/job_tokenizers.yml @@ -54,13 +54,15 @@ jobs: echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" echo "OPENVINO_TOKENIZERS_REPO=$GITHUB_WORKSPACE/openvino_tokenizers" >> "$GITHUB_ENV" echo "EXTENSION_BUILD_DIR=$GITHUB_WORKSPACE/build" >> "$GITHUB_ENV" + echo "INSTALL_WHEELS_DIR=$GITHUB_WORKSPACE/install/wheels" >> "$GITHUB_ENV" - - name: checkout action + - name: checkout actions uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python .github/actions/cache + .github/actions/install_ov_wheels/action.yml install_build_dependencies.sh - name: Install OpenVINO dependencies (mac) @@ -93,22 +95,11 @@ jobs: # Dependencies # - - name: Install OpenVINO Python wheel (Linux and macOS) - if: runner.os != 'Windows' - run: | - # Find and install wheel - wheel_name=$(find . -name 'openvino-*.whl') - python3 -m pip install $wheel_name - working-directory: ${{ env.INSTALL_WHEELS_DIR }} - - - - name: Install OpenVINO Python wheel (Windows) - if: runner.os == 'Windows' - run: | - # Find and install wheel - $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino-*.whl | % { $_.FullName } - python3 -m pip install "$ovCoreWheelPath" - working-directory: ${{ env.INSTALL_WHEELS_DIR }} + - name: Install OpenVINO Python wheels + uses: ./.github/actions/install_ov_wheels + with: + wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} + wheels-to-install: 'openvino' # # Build diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml index 10de6867c7d0e2..66ce9461f05fe8 100644 --- a/.github/workflows/linux_arm64.yml +++ b/.github/workflows/linux_arm64.yml @@ -53,7 +53,7 @@ jobs: Docker: needs: Smart_CI if: "!needs.smart_ci.outputs.skip_workflow" - runs-on: aks-linux-16-cores-arm-docker-build + runs-on: aks-linux-4-cores-8gb-arm-docker-build container: image: openvinogithubactions.azurecr.io/docker_build:0.2 volumes: @@ -78,7 +78,7 @@ jobs: needs: [ Docker, Smart_CI ] uses: ./.github/workflows/job_build_linux.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-16-cores-32gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"], "options": "-e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING"}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} event-name: ${{ github.event_name }} @@ -105,7 +105,7 @@ jobs: if: ${{ 'false' }} uses: ./.github/workflows/job_debian_packages.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-4-cores-8gb-arm' image: 'openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04' Samples: @@ -113,7 +113,7 @@ jobs: if: fromJSON(needs.smart_ci.outputs.affected_components).samples uses: ./.github/workflows/job_samples_tests.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-8-cores-16gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} @@ -123,7 +123,7 @@ jobs: if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API uses: ./.github/workflows/job_openvino_js.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-4-cores-8gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_arm64 }}"}' ONNX_Runtime: @@ -133,7 +133,7 @@ jobs: needs: [ Build, Smart_CI, Docker ] uses: ./.github/workflows/job_onnx_runtime.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-16-cores-32gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"], "options": "-e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING"}' sccache-azure-key-prefix: 'ubuntu20_aarch64_onnxruntime' @@ -142,7 +142,7 @@ jobs: needs: [ Build, Docker, Smart_CI ] uses: ./.github/workflows/job_tokenizers.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-8-cores-16gb-arm' shell: bash container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} @@ -154,7 +154,7 @@ jobs: needs: [ Build, Docker, Smart_CI ] uses: ./.github/workflows/job_cxx_unit_tests.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-8-cores-16gb-arm' image: ${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }} affected-components: ${{ needs.smart_ci.outputs.affected_components }} os: 'ubuntu_20_04' @@ -164,7 +164,7 @@ jobs: needs: [ Build, Docker, Smart_CI ] uses: ./.github/workflows/job_python_unit_tests.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-8-cores-16gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' @@ -174,7 +174,7 @@ jobs: needs: [ Build, Docker, Smart_CI, Openvino_tokenizers ] uses: ./.github/workflows/job_tensorflow_layer_tests.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-16-cores-32gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' @@ -184,7 +184,7 @@ jobs: needs: [ Build, Docker, Smart_CI ] uses: ./.github/workflows/job_pytorch_layer_tests.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-16-cores-32gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' @@ -195,6 +195,8 @@ jobs: needs: [ Build, Docker, Smart_CI ] uses: ./.github/workflows/job_cpu_functional_tests.yml with: + # Additional investigation needed why CPU functional tests are failing on v6 VM size's version, + # so leave it as it is for now runner: 'aks-linux-16-cores-arm' image: ${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }} python-version: '3.11' @@ -207,7 +209,7 @@ jobs: needs: [ Build, Docker, Smart_CI, Openvino_tokenizers] uses: ./.github/workflows/job_tensorflow_models_tests.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-16-cores-32gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}"}' model_scope: 'precommit' @@ -218,7 +220,7 @@ jobs: needs: [ Build, Docker, Smart_CI ] uses: ./.github/workflows/job_pytorch_models_tests.yml with: - runner: 'aks-linux-16-cores-arm' + runner: 'aks-linux-16-cores-32gb-arm' container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}"}' model_scope: 'precommit' diff --git a/.github/workflows/manylinux_2014.yml b/.github/workflows/manylinux_2014.yml index ed375fb868459f..bd5da965226a50 100644 --- a/.github/workflows/manylinux_2014.yml +++ b/.github/workflows/manylinux_2014.yml @@ -88,6 +88,7 @@ jobs: options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING -e DOCKER_CONFIG -v ${{ github.workspace }}:${{ github.workspace }} env: CMAKE_BUILD_TYPE: 'Release' + ARCH: 'x86_64' OPENVINO_REPO: ${{ github.workspace }}/src INSTALL_DIR: ${{ github.workspace }}/install/openvino INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels @@ -99,6 +100,9 @@ jobs: SCCACHE_SERVER_PORT: 35555 SCCACHE_CACHE_SIZE: 50G SCCACHE_AZURE_KEY_PREFIX: manylinux_2014 + ARTIFACTS_SHARE: "/mount/build-artifacts" + MANIFEST_PATH: ${{ github.workspace }}/manifest.yml + PRODUCT_TYPE: public_manylinux2014_x86_64_release steps: - name: Clone OpenVINO @@ -109,6 +113,17 @@ jobs: - name: System info uses: ./src/.github/actions/system_info + + - name: Generate product manifest and set CI_BUILD_NUMBER & CI_BUILD_DEV_TAG + id: create_manifest + uses: ./src/.github/actions/create_manifest + with: + repos: | + ${{ env.OPENVINO_REPO }} + product_type: ${{ env.PRODUCT_TYPE }} + target_arch: ${{ env.ARCH }} + build_type: ${{ env.CMAKE_BUILD_TYPE }} + save_to: ${{ env.MANIFEST_PATH }} - name: Create docker build cache run: | @@ -128,6 +143,8 @@ jobs: -e SCCACHE_AZURE_KEY_PREFIX \ -e CMAKE_CXX_COMPILER_LAUNCHER \ -e CMAKE_C_COMPILER_LAUNCHER \ + -e CI_BUILD_NUMBER \ + -e CI_BUILD_DEV_TAG \ -w /work/src \ ${{ fromJSON(needs.docker.outputs.images).ov_build.manylinux2014_x86_64 }} \ /bin/bash -c " @@ -158,6 +175,8 @@ jobs: -e SCCACHE_AZURE_KEY_PREFIX \ -e CMAKE_CXX_COMPILER_LAUNCHER \ -e CMAKE_C_COMPILER_LAUNCHER \ + -e CI_BUILD_NUMBER \ + -e CI_BUILD_DEV_TAG \ -w /work/src \ ${{ fromJSON(needs.docker.outputs.images).ov_build.manylinux2014_x86_64 }} \ /bin/bash -c " @@ -188,4 +207,30 @@ jobs: with: name: openvino_wheels path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl - if-no-files-found: 'error' \ No newline at end of file + if-no-files-found: 'error' + + - name: Store artifacts to a shared drive + id: store_artifacts + if: ${{ always() }} + uses: ./src/.github/actions/store_artifacts + with: + artifacts: | + ${{ env.BUILD_DIR }}/openvino_package.tar.gz + ${{ env.MANIFEST_PATH }} + ${{ env.INSTALL_WHEELS_DIR }}/wheels + storage_dir: ${{ env.PRODUCT_TYPE }} + storage_root: ${{ env.ARTIFACTS_SHARE }} + + Overall_Status: + name: ci/gha_overall_status_manylinux2014 + needs: [Smart_CI, Build] + if: ${{ always() }} + runs-on: ubuntu-latest + steps: + - name: Check status of all jobs + if: >- + ${{ + contains(needs.*.result, 'failure') || + contains(needs.*.result, 'cancelled') + }} + run: exit 1 \ No newline at end of file diff --git a/.github/workflows/merge_queue_stub.yml b/.github/workflows/merge_queue_stub.yml new file mode 100644 index 00000000000000..a3d2e0b456a106 --- /dev/null +++ b/.github/workflows/merge_queue_stub.yml @@ -0,0 +1,13 @@ +on: + merge_group: + +jobs: + merge_group_stub_check: + name: ci/jenkins + runs-on: ubuntu-latest + defaults: + run: + shell: bash + if: ${{ github.event_name == 'merge_group' }} + steps: + - run: echo "Just a stub check to keep Jenkins running in pre-commits but not in merge queue" diff --git a/.github/workflows/mo.yml b/.github/workflows/mo.yml deleted file mode 100644 index f48986d4a0d304..00000000000000 --- a/.github/workflows/mo.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: MO -on: - push: - paths: - - 'tools/mo/**' - - '.github/workflows/mo.yml' - branches: - - 'master' - - 'releases/**' - pull_request: - paths: - - 'tools/mo/**' - - '.github/workflows/mo.yml' - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -permissions: read-all - -jobs: - Pylint-UT: - runs-on: ubuntu-22.04 - if: ${{ github.repository_owner == 'openvinotoolkit' }} - steps: - - name: Clone OpenVINO - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - - name: Setup Python - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 - with: - python-version: '3.10' - - - name: Cache pip - uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('tools/mo/requirements*.txt') }} - restore-keys: | - ${{ runner.os }}-pip- - ${{ runner.os }}- - - - name: Install dependencies - run: | - python -m pip install --upgrade pip setuptools - # For UT - pip install unittest-xml-reporting==3.0.2 - # MO requirements - pip install -r requirements_caffe.txt - pip install -r requirements_kaldi.txt - pip install -r requirements_onnx.txt - pip install -r requirements_tf2.txt - pip install -r requirements_dev.txt - working-directory: tools/mo - - - name: Pylint-MO - run: pylint -d C,R,W openvino/tools/mo - working-directory: tools/mo diff --git a/.github/workflows/windows_vs2019_release.yml b/.github/workflows/windows_vs2019_release.yml index a416f577cdb3e1..c42475fea9cd64 100644 --- a/.github/workflows/windows_vs2019_release.yml +++ b/.github/workflows/windows_vs2019_release.yml @@ -59,6 +59,7 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} build-type: 'Release' target-branch: ${{ needs.smart_ci.outputs.target_branch }} + build-additional-python-wheels: true cmake-options: |- -G "Ninja Multi-Config" ` -DENABLE_PYTHON=ON ` @@ -69,7 +70,7 @@ jobs: -DENABLE_STRICT_DEPENDENCIES=OFF ` -DCMAKE_DISABLE_FIND_PACKAGE_PkgConfig=ON ` -DCUSTOM_OPERATIONS="calculate_grid;complex_mul;fft;grid_sample;sparse_conv;sparse_conv_transpose" ` - -DOPENVINO_EXTRA_MODULES="${env:OPENVINO_CONTRIB_REPO }}/modules/custom_operations;${env:OPENVINO_CONTRIB_REPO}/modules/java_api" + -DOPENVINO_EXTRA_MODULES="${env:OPENVINO_CONTRIB_REPO}/modules/custom_operations;${env:OPENVINO_CONTRIB_REPO}/modules/java_api" Samples: needs: [ Build, Smart_CI ] @@ -108,11 +109,12 @@ jobs: Expand-Archive openvino_tests.zip -DestinationPath . working-directory: ${{ env.INSTALL_DIR }} - - name: Fetch setup_python action + - name: Fetch setup_python and install wheels actions uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml + .github/actions/install_ov_wheels/action.yml sparse-checkout-cone-mode: false path: 'openvino' @@ -135,11 +137,16 @@ jobs: run: | & ${{ env.SAMPLES_INSTALL_DIR }}/c/build_samples.ps1 -InstallDirectory ${{ env.INSTALL_DIR }} -BuildDirectory ${{ env.BUILD_DIR }}/c_samples + # Install Python benchmark_app by installing openvino-*.whl + - name: Install OpenVINO Python wheels + uses: ./openvino/.github/actions/install_ov_wheels + with: + wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} + wheels-to-install: 'openvino' + - name: Samples tests run: | - # Install Python benchmark_app by installing openvino-*.whl - $ovCoreWheelPath=Get-ChildItem -Path ./wheels -Filter openvino-*.whl | % { $_.FullName } - python3 -m pip install --ignore-installed PyYAML -r ./tests/smoke_tests/requirements.txt "$ovCoreWheelPath" + python3 -m pip install --ignore-installed PyYAML -r ./tests/smoke_tests/requirements.txt . "./setupvars.ps1" $Env:PYTHONCOERCECLOCALE="warn" python3 -bb -W error -X dev -X warn_default_encoding -m pytest ./tests/smoke_tests --numprocesses auto @@ -270,15 +277,15 @@ jobs: merge-multiple: true - name: Extract OpenVINO packages - run: | - Expand-Archive openvino_tests.zip -DestinationPath . + run: Expand-Archive openvino_tests.zip -DestinationPath . working-directory: ${{ env.INSTALL_DIR }} - - name: Fetch setup_python action + - name: Fetch setup_python and install wheels actions uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml + .github/actions/install_ov_wheels/action.yml sparse-checkout-cone-mode: false path: 'openvino' @@ -291,15 +298,10 @@ jobs: self-hosted-runner: 'true' - name: Install OpenVINO Python wheels - run: | - # Find and install the core OV wheel - $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino-*.whl | % { $_.FullName } - python3 -m pip install "$ovCoreWheelPath" - - # Find and install the dev OV wheel - $ovDevWheelPath=Get-ChildItem -Path . -Filter openvino_dev*.whl | % { $_.FullName } - python3 -m pip install "$ovDevWheelPath[caffe,kaldi,onnx,tensorflow2,pytorch]" - working-directory: ${{ env.INSTALL_WHEELS_DIR }} + uses: ./openvino/.github/actions/install_ov_wheels + with: + wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} + wheels-to-install: 'openvino' - name: Install Python API tests dependencies run: | @@ -309,8 +311,11 @@ jobs: # For torchvision to OpenVINO preprocessing converter python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/python/preprocess/torchvision/requirements.txt - # TODO: replace with Python API tests requirements - python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/mo/requirements_dev.txt + # For validation of Python API + python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/bindings/python/requirements_test.txt + + # ONNX tests requirements + python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/requirements_onnx # For getting rid of SSL issues during model downloading for unit tests python3 -m pip install certifi @@ -318,6 +323,9 @@ jobs: - name: Set SSL_CERT_FILE for model downloading for unit tests run: echo SSL_CERT_FILE=$(python3 -m certifi) >> $env:GITHUB_ENV + - name: Install Python Layer tests dependencies + run: python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt + - name: Python API Tests #if: fromJSON(needs.smart_ci.outputs.affected_components).Python_API.test # Ticket: 127101 shell: cmd @@ -325,28 +333,6 @@ jobs: set PYTHONPATH=${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% python3 -m pytest -sv ${{ env.INSTALL_TEST_DIR }}/pyopenvino ${{ env.PYTHON_STATIC_ARGS }} --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-Pyngraph.xml --ignore=${{ env.INSTALL_TEST_DIR }}/pyopenvino/tests/test_utils/test_utils.py - - name: Model Optimizer UT - if: fromJSON(needs.smart_ci.outputs.affected_components).MO.test - shell: cmd - run: | - python3 -m pytest -s ${{ env.INSTALL_TEST_DIR }}/mo/unit_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-ModelOptimizer.xml - - - name: Install Python Layer tests dependencies - run: | - # layer test requirements - python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - - - name: ONNX Layer Tests - if: fromJSON(needs.smart_ci.outputs.affected_components).ONNX_FE.test - shell: cmd - run: | - :: requires 'unit_tests' from 'tools/mo' - set PYTHONPATH=${{ env.INSTALL_TEST_DIR }}\mo;%PYTHONPATH% - python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/onnx_tests -n logical -m "not launch_only_if_manually_specified and precommit" --junitxml=${INSTALL_TEST_DIR}/TEST-onnx.xml - env: - TEST_DEVICE: CPU - TEST_PRECISION: FP16 - - name: TensorFlow Lite Layer Tests - TFL FE if: fromJSON(needs.smart_ci.outputs.affected_components).TFL_FE.test shell: cmd @@ -366,18 +352,6 @@ jobs: --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-onnx_frontend.xml ^ --ignore=${{ env.INSTALL_TEST_DIR }}/onnx/test_python/test_zoo_models.py - - name: MO Python API Tests - if: fromJSON(needs.smart_ci.outputs.affected_components).MO.test - shell: cmd - run: | - :: Used for 'test_utils' installed in '\python\openvino\test_utils' - set PYTHONPATH=${{ env.INSTALL_TEST_DIR }}\python\openvino\test_utils;${{ env.INSTALL_TEST_DIR }}\python;%PYTHONPATH% - - python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/mo_python_api_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_mo_convert.xml - env: - TEST_DEVICE: CPU - TEST_PRECISION: FP16 - - name: OVC Python API Tests if: fromJSON(needs.smart_ci.outputs.affected_components).MO.test shell: cmd @@ -406,10 +380,101 @@ jobs: uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: - name: test-results-python + name: test-results-python-unittests path: ${{ env.INSTALL_TEST_DIR }}/TEST*.xml if-no-files-found: 'error' + Python_API_Tests: + name: OpenVINO Python API Tests + if: fromJSON(needs.smart_ci.outputs.affected_components).Python_API.test + needs: [ Build, Smart_CI ] + timeout-minutes: 35 + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.10", "3.11", "3.12"] + defaults: + run: + shell: pwsh + runs-on: aks-win-8-cores-16gb + env: + OPENVINO_REPO: "${{ github.workspace }}\\openvino" + INSTALL_DIR: "${{ github.workspace }}\\install" + INSTALL_TEST_DIR: "${{ github.workspace }}\\install\\tests" + INSTALL_WHEELS_DIR: "${{ github.workspace }}\\install\\wheels" + PYTHON_STATIC_ARGS: -m "not dynamic_library and not template_plugin" + steps: + - name: Download OpenVINO artifacts (tarballs) + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + pattern: openvino_[tests]* + path: ${{ env.INSTALL_DIR }} + merge-multiple: true + + - name: Download OpenVINO artifacts (wheels) + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + pattern: openvino_[wheels]* + path: ${{ env.INSTALL_WHEELS_DIR }} + merge-multiple: true + + - name: Extract OpenVINO packages + run: Expand-Archive openvino_tests.zip -DestinationPath . + working-directory: ${{ env.INSTALL_DIR }} + + - name: Fetch setup_python and install wheels actions + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + sparse-checkout: | + .github/actions/setup_python/action.yml + .github/actions/install_ov_wheels/action.yml + sparse-checkout-cone-mode: false + path: 'openvino' + + - name: Setup Python ${{ matrix.python-version }} + uses: ./openvino/.github/actions/setup_python + with: + version: ${{ matrix.python-version }} + pip-cache-path: ${{ env.PIP_CACHE_PATH }} + should-setup-pip-paths: 'false' + self-hosted-runner: 'true' + + - name: Install OpenVINO Python wheels + uses: ./openvino/.github/actions/install_ov_wheels + with: + wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} + wheels-to-install: 'openvino' + + - name: Install Python API tests dependencies + run: python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/bindings/python/requirements_test.txt + + - name: Python API Tests + shell: cmd + run: | + set PYTHONPATH=${{ env.INSTALL_TEST_DIR }};%PYTHONPATH% + python3 -m pytest -sv ${{ env.INSTALL_TEST_DIR }}/pyopenvino ${{ env.PYTHON_STATIC_ARGS }} --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-Pyngraph.xml --ignore=${{ env.INSTALL_TEST_DIR }}/pyopenvino/tests/test_utils/test_utils.py + + - name: Python API Tests -- numpy>=2.0.0 + shell: cmd + run: | + python3 -m pip uninstall -y numpy + python3 -m pip install "numpy>=2.0.0,<2.1.0" + python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/bindings/python/requirements_test.txt + # for 'template' extension + set PYTHONPATH=${{ env.INSTALL_TEST_DIR }};%PYTHONPATH% + set PATH=${{ env.INSTALL_TEST_DIR }};%PATH% + python3 -m pytest -sv ${{ env.INSTALL_TEST_DIR }}/pyopenvino --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-Pyngraph_new_numpy.xml --ignore=${{ env.INSTALL_TEST_DIR }}/pyopenvino/tests/test_utils/test_utils.py + + - name: Upload Test Results + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 + if: ${{ !cancelled() }} + with: + name: test-results-python-${{ matrix.python-version }} + path: | + ${{ env.INSTALL_TEST_DIR }}/TEST*.html + ${{ env.INSTALL_TEST_DIR }}/TEST*.xml + if-no-files-found: 'error' + TensorFlow_Layer_Tests: name: TensorFlow Layer Tests needs: [ Build, Smart_CI, Openvino_tokenizers ] diff --git a/.github/workflows/workflows_scans.yml b/.github/workflows/workflows_scans.yml new file mode 100644 index 00000000000000..0a293a4152b9a0 --- /dev/null +++ b/.github/workflows/workflows_scans.yml @@ -0,0 +1,38 @@ +name: GitHub Actions Workflows Scans +on: + workflow_dispatch: {} + push: + paths: + - '.github/workflows/**' + branches: + - 'master' + - 'releases/**' + pull_request: + paths: + - '.github/workflows/**' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: read-all + +jobs: + semgrep: + name: github_actions_workflows_scan/semgrep + runs-on: ubuntu-latest + if: ${{ github.repository_owner == 'openvinotoolkit' }} + + container: + image: semgrep/semgrep + + steps: + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + submodules: 'false' + sparse-checkout: .github/workflows + + - name: Semgrep scan + run: | + semgrep scan --error -j 8 --config "p/github-actions" .github/workflows/* diff --git a/CMakeLists.txt b/CMakeLists.txt index e9e8d3724d9ac5..65a72ef8f4936e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -138,7 +138,7 @@ function(ov_developer_package_export_targets) endforeach() endif() else() - message(FATAL_ERROR "Internal error: ${target_name} does not represent a cmake target") + message(FATAL_ERROR "Internal error: '${EXPORT_TARGET}' does not represent a cmake target") endif() list(REMOVE_DUPLICATES _OPENVINO_DEVELOPER_PACKAGE_TARGETS) diff --git a/cmake/developer_package/compile_flags/sdl.cmake b/cmake/developer_package/compile_flags/sdl.cmake index 34ad5904519e7f..35f59cb8970573 100644 --- a/cmake/developer_package/compile_flags/sdl.cmake +++ b/cmake/developer_package/compile_flags/sdl.cmake @@ -55,11 +55,20 @@ if(ENABLE_INTEGRITYCHECK) set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /INTEGRITYCHECK") endif() -set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${OV_C_CXX_FLAGS}") -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${OV_C_CXX_FLAGS}") -set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} ${OV_LINKER_FLAGS}") -set(CMAKE_MODULE_LINKER_FLAGS_RELEASE "${CMAKE_MODULE_LINKER_FLAGS_RELEASE} ${OV_LINKER_FLAGS}") -set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} ${OV_LINKER_FLAGS}") +if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" OR (OV_COMPILER_IS_INTEL_LLVM AND WIN32)) + # add sdl required flags to both Debug and Release on Windows + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OV_C_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OV_C_CXX_FLAGS}") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OV_LINKER_FLAGS}") + set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${OV_LINKER_FLAGS}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OV_LINKER_FLAGS}") +else() + set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${OV_C_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${OV_C_CXX_FLAGS}") + set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} ${OV_LINKER_FLAGS}") + set(CMAKE_MODULE_LINKER_FLAGS_RELEASE "${CMAKE_MODULE_LINKER_FLAGS_RELEASE} ${OV_LINKER_FLAGS}") + set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} ${OV_LINKER_FLAGS}") +endif() unset(OV_C_CXX_FLAGS) unset(OV_LINKER_FLAGS) diff --git a/cmake/features.cmake b/cmake/features.cmake index e1201ad3a185ac..f12810adf86075 100644 --- a/cmake/features.cmake +++ b/cmake/features.cmake @@ -200,6 +200,9 @@ ov_dependent_option (ENABLE_SYSTEM_PROTOBUF "Enables use of system Protobuf" OFF # the option is turned off by default, because we don't want to have a dependency on libsnappy.so ov_dependent_option (ENABLE_SYSTEM_SNAPPY "Enables use of system version of Snappy" OFF "ENABLE_SNAPPY_COMPRESSION" OFF) +# the option is turned off by default, because we are not sure that system version of ZE loader is fresh enough +ov_dependent_option (ENABLE_SYSTEM_LEVEL_ZERO "Enables use of system version of Level Zero" OFF + "ENABLE_INTEL_NPU" OFF) ov_dependent_option(ENABLE_JS "Enables JS API building" ${ENABLE_JS_DEFAULT} "NOT ANDROID;NOT EMSCRIPTEN" OFF) diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst index 3f3d0064e8a4c6..c80dc388568004 100644 --- a/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst +++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst @@ -76,14 +76,14 @@ Feature Support and API Coverage | HETERO | 61.22 % | 99.24 % | 86.05 % | +-------------------------+-----------+------------------+-------------------+ | || Percentage of API supported by the device, | -| || as of OpenVINO 2024.4, 25 Oct, 2024. | +| || as of OpenVINO 2024.5, 20 Nov. 2024. | +-------------------------+-----------+------------------+-------------------+ For setting up a relevant configuration, refer to the :doc:`Integrate with Customer Application <../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` topic (step 3 "Configure input and output"). -.. dropdown:: Device support across OpenVINO 2024.4 distributions +.. dropdown:: Device support across OpenVINO 2024.5 distributions =============== ========== ====== =============== ======== ============ ========== ========== ========== Device Archives PyPI APT/YUM/ZYPPER Conda Homebrew vcpkg Conan npm diff --git a/docs/articles_en/about-openvino/performance-benchmarks.rst b/docs/articles_en/about-openvino/performance-benchmarks.rst index 75c7ba90db7e76..5d9abfe891584f 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks.rst @@ -13,7 +13,7 @@ Performance Benchmarks Efficient LLMs for AI PC Performance Information F.A.Q. OpenVINO Accuracy - Getting Performance Numbers + Getting Performance Numbers This page presents benchmark results for the @@ -132,21 +132,21 @@ For a listing of all platforms and configurations used for testing, refer to the .. grid-item:: - .. button-link:: ../_static/benchmarks_files/OV-2024.4-platform_list.pdf + .. button-link:: ../_static/benchmarks_files/OV-2024.5-platform_list.pdf :color: primary :outline: :expand: :material-regular:`download;1.5em` Click for Hardware Platforms [PDF] - .. button-link:: ../_static/benchmarks_files/OV-2024.4-system-info-detailed.xlsx + .. button-link:: ../_static/benchmarks_files/OV-2024.5-system-info-detailed.xlsx :color: primary :outline: :expand: :material-regular:`download;1.5em` Click for Configuration Details [XLSX] - .. button-link:: ../_static/benchmarks_files/OV-2024.4-Performance-Data.xlsx + .. button-link:: ../_static/benchmarks_files/OV-2024.5-Performance-Data.xlsx :color: primary :outline: :expand: @@ -160,10 +160,10 @@ For a listing of all platforms and configurations used for testing, refer to the **Disclaimers** * Intel® Distribution of OpenVINO™ toolkit performance results are based on release - 2024.3, as of July 31, 2024. + 2024.5, as of November 20, 2024. * OpenVINO Model Server performance results are based on release - 2024.3, as of Aug. 19, 2024. + 2024.5, as of November 20, 2024. The results may not reflect all publicly available updates. Intel technologies' features and benefits depend on system configuration and may require enabled hardware, software, or service diff --git a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst index b8256af650e2f8..5697fcbf6e4d74 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst @@ -5,9 +5,7 @@ This page is regularly updated to help you identify the best-performing LLMs on Intel® Core™ Ultra processor family and AI PCs. The current data is as of OpenVINO 2024.4, 24 Oct. 2024 -The tables below list the key performance indicators for a selection of Large Language Models, -running on an Intel® Core™ Ultra 7-165H, Intel® Core™ Ultra 7-265V, and Intel® Core™ Ultra -7-288V based system, on built-in GPUs. +The tables below list the key performance indicators for inference on built-in GPUs. @@ -16,14 +14,32 @@ running on an Intel® Core™ Ultra 7-165H, Intel® Core™ Ultra 7-265V, and In +.. tab-set:: -.. csv-table:: - :class: modeldata stripe - :name: supportedModelsTableOv - :header-rows: 1 - :file: ../../_static/benchmarks_files/llm_models.csv + .. tab-item:: 9-288V + + .. csv-table:: + :class: modeldata stripe + :name: supportedModelsTableOv + :header-rows: 1 + :file: ../../_static/benchmarks_files/llm_models_9-288V.csv + + .. tab-item:: 7-268V + + .. csv-table:: + :class: modeldata stripe + :name: supportedModelsTableOv + :header-rows: 1 + :file: ../../_static/benchmarks_files/llm_models_7-258V.csv + + .. tab-item:: 7-155H + + .. csv-table:: + :class: modeldata stripe + :name: supportedModelsTableOv + :header-rows: 1 + :file: ../../_static/benchmarks_files/llm_models_7-155H.csv -| .. grid:: 1 1 2 2 :gutter: 4 diff --git a/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst b/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst index 3162bae7254704..e87733a1445356 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst @@ -1,9 +1,6 @@ Model Accuracy ============== - - -The following two tables present the absolute accuracy drop calculated as the accuracy difference between OV-accuracy and the original framework accuracy for FP32, and the same for INT8, BF16, and FP16 representations of a model on three platform architectures. The third table presents the GenAI model accuracies as absolute accuracy values. Refer to notes below the table for more @@ -11,7 +8,7 @@ information. * A - Intel® Core™ i9-9000K (AVX2), INT8 and FP32 * B - Intel® Xeon® 6338, (VNNI), INT8 and FP32 -* C - Intel® Xeon 8480+ (VNNI, AMX), INT8, BF16, FP32 +* C - Intel® Xeon 8580 (VNNI, AMX), INT8, BF16, FP32 * D - Intel® Flex-170, INT8 and FP16 @@ -28,73 +25,52 @@ information. * - bert-base-cased - SST-2_bert_cased_padded - spearman@cosine - - 3.33% - - 3.22% - - 3.05% - - 2.88% - * - bert-large-uncased-whole-word-masking-squad-0001 - - SQUAD_v1_1_bert_msl384_mql64_ds128_lowercase - - F1 - - 0.12% - - 0.03% - - 0.03% - - 0.28% + - 3.06% + - 2.89% + - 2.71% + - 2.71% * - efficientdet-d0 - COCO2017_detection_91cl - coco_precision - - 0.00% - - -0.52% - - -0.54% - - -0.60% + - -0.84% + - -0.59% + - -0.59% + - -0.55% * - mask_rcnn_resnet50_atrous_coco - COCO2017_detection_91cl_bkgr - coco_orig_precision - - 0.05% - - 0.03% - - 0.08% - - -0.09% + - -0.10% + - -0.04% + - 0.07% + - -0.01% * - mobilenet-v2 - ImageNet2012 - accuracy @ top1 - - - -0.87% - - -0.88% - - -0.88% + - -0.97% + - -0.98% + - -0.95% * - resnet-50 - ImageNet2012 - accuracy @ top1 - - -0.17% - - -0.18% - - -0.18% - - -0.16% + - 0.74% + - 0.76% + - 0.74% + - 0.82% * - ssd-resnet34-1200 - COCO2017_detection_80cl_bkgr - map - - -0.03% - - -0.02% - - -0.03% - - 0.02% + - -0.06% + - -0.08% + - -0.07% + - -0.06% * - ssd-mobilenet-v1-coco - COCO2017_detection_80cl_bkgr - coco-precision - - -2.74% - - -0.11% - - -0.13% - - -0.12% - * - unet-camvid-onnx-0001 - - CamVid_12cl - - mean_iou @ mean - - -6.28% - - 6.45% - - 6.46% - - 6.43% - * - yolo_v5m - - COCO2017_detection_80cl - - map - - -0.40% - - -0.32% - - -0.32% - - -0.31% + - -2.94% + - -0.28% + - -0.28% + - -0.26% * - yolo_v8n - COCO2017_detection_80cl - map @@ -121,30 +97,22 @@ information. - 0.00% - 0.00% - -0.01% - - 0.01% - * - bert-large-uncased-whole-word-masking-squad-0001 - - SQUAD_v1_1_bert_msl384_mql64_ds128_lowercase - - F1 - - 0.04% - - 0.04% - - 0.06% - - 0.06% - - 0.04% + - 0.02% * - efficientdet-d0 - COCO2017_detection_91cl - coco_precision - 0.01% - - -0.02% - 0.01% + - 0.01% + - 0.00% - 0.00% - - -0.02% * - mask_rcnn_resnet50_atrous_coco - COCO2017_detection_91cl_bkgr - coco_orig_precision - -0.01% - -0.01% - -0.01% - - -0.05% + - 0.05% - 0.00% * - mobilenet-v2 - ImageNet2012 @@ -160,40 +128,24 @@ information. - 0.00% - 0.00% - 0.00% - - -0.01% - - -0.01% + - 0.01% + - 0.01% * - ssd-resnet34-1200 - COCO2017_detection_80cl_bkgr - map - 0.02% - - 0.00% - - 0.00% - - -0.02% - - 0.04% + - 0.02% + - 0.02% + - -0.01% + - 0.02% * - ssd-mobilenet-v1-coco - COCO2017_detection_80cl_bkgr - coco-precision - - -0.08% - - 0.01% + - 0.04% - 0.01% + - 0.04% - 0.08% - 0.01% - * - unet-camvid-onnx-0001 - - CamVid_12cl - - mean_iou @ mean - - 0.00% - - 0.00% - - 0.00% - - -0.03% - - -0.03% - * - yolo_v5m - - COCO2017_detection_80cl - - map - - 0.00% - - 0.05% - - 0.05% - - 0.07% - - 0.07% * - yolo_v8n - COCO2017_detection_80cl - map @@ -213,46 +165,60 @@ information. - B, VNNI-INT4 - C, FAMX-FP16 - D, MTL-INT4 - * - chatGLM2-6b + * - chatGLM4 - Wikiset - ppl - - 5.24 - - 6.03 - - 5.24 - - 6.03 - * - Falcon-7b-instruct + - + - + - + - + * - Gemma-2-9B - Wikitext - ppl - - 1.65 - - 1.76 - - 1.65 - - 1.76 + - + - 1.57 + - 1.57 + - * - Llama-2-7b-chat - Wikiset - ppl - - 1.58 - - 1.59 - - 1.91 + - + - - 1.59 + - * - Llama-3-8b - Wikiset - ppl - - 1.54 - - 1.56 + - 1.45 + - 1.48 + - 1.45 + - + * - Llama-3.2-3b-instruct + - Wikiset + - ppl + - 1.60 + - 1.62 - 1.17 - - 1.57 + - * - Mistral-7b - Wikitext - ppl - 1.48 - 1.49 - - 1.39 - - 1.49 + - 1.48 + - * - Phi3-mini-4k-instruct - Wikitext - ppl - 1.52 + - 1.55 + - 1.52 - 1.56 + * - Qwen-2-7B + - Wikitext + - ppl + - 1.52 + - 1.53 - 1.52 - 1.56 diff --git a/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst b/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst index 4bf0b3a0acb19a..0f70c93e9c8b96 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst @@ -31,10 +31,13 @@ Performance Information F.A.Q. .. dropdown:: How can I run the benchmark results on my own? - All of the performance benchmarks are generated using the + All of the performance benchmarks on traditional network models are generated using the open-source tool within the Intel® Distribution of OpenVINO™ toolkit called :doc:`benchmark_app <../../learn-openvino/openvino-samples/benchmark-tool>`. + For diffusers (Stable-Diffusion) and foundational models (aka LLMs) please use the OpenVINO GenAI + opensource repo `OpenVINO GenAI tools/llm_bench `__ + For a simple instruction on testing performance, see the :doc:`Getting Performance Numbers Guide `. .. dropdown:: Where can I find a more detailed description of the workloads used for benchmarking? @@ -50,23 +53,27 @@ Performance Information F.A.Q. - Public Network - Task - Input Size - * - `chatGLM2-6B `__ + * - `chatGLM4-9B `__ - THUDM - Transformer - - 32K - * - `Falcon-7b-instruct `__ + - 128K + * - `Gemma-2-9B `__ - Hugginface - - Causal Decoder-only - - 2048 + - Text-To-Text Decoder-only + - 8K * - `Llama-2-7b-chat `__ - Meta AI - Auto regressive language - - 4096 + - 4K * - `Llama-3-8b `__ - Meta AI - Auto regressive language - - 8192 - * - `Mistral-7b `__ + - 8K + * - `Llama-3.2-3B `__ + - Meta AI + - Auto regressive language + - 128K + * - `Mistral-7b-V0.1 `__ - Mistral AI - Auto regressive language - 4096 @@ -74,6 +81,10 @@ Performance Information F.A.Q. - Huggingface - Auto regressive language - 4096 + * - `Qwen-2-7B `__ + - Huggingface + - Auto regressive language + - 128K * - `Stable-Diffusion-V1-5 `__ - Hugginface - Latent Diffusion Model @@ -82,10 +93,6 @@ Performance Information F.A.Q. - BERT - question / answer - 128 - * - `bert-large-uncased-whole-word-masking-squad-int8-0001 `__ - - BERT-large - - question / answer - - 384 * - `efficientdet-d0 `__ - Efficientdet - classification @@ -110,14 +117,6 @@ Performance Information F.A.Q. - ssd-resnet34 onnx model - object detection - 1200x1200 - * - `unet-camvid-onnx-0001 `__ - - U-Net - - semantic segmentation - - 368x480 - * - `yolo-v5m `__ - - YOLO V5 Medium - - object detection - - 640x640 * - `yolov8n `__ - Yolov8nano - object detection diff --git a/docs/articles_en/about-openvino/release-notes-openvino.rst b/docs/articles_en/about-openvino/release-notes-openvino.rst index 6685a4325d57fe..9e7673d7d0910d 100644 --- a/docs/articles_en/about-openvino/release-notes-openvino.rst +++ b/docs/articles_en/about-openvino/release-notes-openvino.rst @@ -1,3 +1,4 @@ +============================= OpenVINO Release Notes ============================= @@ -15,115 +16,462 @@ OpenVINO Release Notes -2024.4 - 19 September 2024 +2024.5 - 20 November 2024 ############################# :doc:`System Requirements <./release-notes-openvino/system-requirements>` | :doc:`Release policy <./release-notes-openvino/release-policy>` | :doc:`Installation Guides <./../get-started/install-openvino>` + + What's new +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -* More Gen AI coverage and framework integrations to minimize code changes. +* More GenAI coverage and framework integrations to minimize code changes. - * Support for GLM-4-9B Chat, MiniCPM-1B, Llama 3 and 3.1, Phi-3-Mini, Phi-3-Medium and - YOLOX-s models. - * Noteworthy notebooks added: Florence-2, NuExtract-tiny Structure Extraction, Flux.1 Image - Generation, PixArt-α: Photorealistic Text-to-Image Synthesis, and Phi-3-Vision Visual - Language Assistant. + * New models supported: Llama 3.2 (1B & 3B), Gemma 2 (2B & 9B), and YOLO11. + * LLM support on NPU: Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, Qwen2-7B-Instruct and Phi-3 + Mini-Instruct. + * Noteworthy notebooks added: Sam2, Llama3.2, Llama3.2 - Vision, Wav2Lip, Whisper, and Llava. + * Preview: support for Flax, a high-performance Python neural network library based on JAX. + Its modular design allows for easy customization and accelerated inference on GPUs. * Broader Large Language Model (LLM) support and more model compression techniques. - * OpenVINO™ runtime optimized for Intel® Xe Matrix Extensions (Intel® XMX) systolic arrays on - built-in GPUs for efficient matrix multiplication resulting in significant LLM performance - boost with improved 1st and 2nd token latency, as well as a smaller memory footprint on - Intel® Core™ Ultra Processors (Series 2). - * Memory sharing enabled for NPUs on Intel® Core™ Ultra Processors (Series 2) for efficient - pipeline integration without memory copy overhead. - * Addition of the PagedAttention feature for discrete GPUs* enables a significant boost in - throughput for parallel inferencing when serving LLMs on Intel® Arc™ Graphics or Intel® - Data Center GPU Flex Series. + * Optimizations for built-in GPUs on Intel® Core™ Ultra Processors (Series 1) and Intel® Arc™ + Graphics include KV Cache compression for memory reduction along with improved usability, + and model load time optimizations to improve first token latency for LLMs. + * Dynamic quantization was enabled to improve first token latency for LLMs on built-in + Intel® GPUs without impacting accuracy on Intel® Core™ Ultra Processors (Series 1). Second + token latency will also improve for large batch inference. + * A new method to generate synthetic text data is implemented in the Neural Network + Compression Framework (NNCF). This will allow LLMs to be compressed more accurately using + data-aware methods without datasets. Coming soon: This feature will soon be accessible via + Optimum Intel on Hugging Face. * More portability and performance to run AI at the edge, in the cloud, or locally. - * Support for Intel® Core Ultra Processors Series 2 (formerly codenamed Lunar Lake) on Windows. - * OpenVINO™ Model Server now comes with production-quality support for OpenAI-compatible API - which enables significantly higher throughput for parallel inferencing on Intel® Xeon® - processors when serving LLMs to many concurrent users. - * Improved performance and memory consumption with prefix caching, KV cache compression, and - other optimizations for serving LLMs using OpenVINO™ Model Server. - * Support for Python 3.12. - * Support for Red Hat Enterprise Linux (RHEL) version 9.3 - 9.4. + * Support for + `Intel® Xeon® 6 Processors with P-cores `__ + (formerly codenamed Granite Rapids) and + `Intel® Core™ Ultra 200V series processors `__ + (formerly codenamed Arrow Lake-S). + * Preview: GenAI API enables multimodal AI deployment with support for multimodal pipelines + for improved contextual awareness, transcription pipelines for easy audio-to-text + conversions, and image generation pipelines for streamlined text-to-visual conversions. + * Speculative decoding feature added to the GenAI API for improved performance and efficient + text generation using a small draft model that is periodically corrected by the full-size + model. + * Preview: LoRA adapters are now supported in the GenAI API for developers to quickly and + efficiently customize image and text generation models for specialized tasks. + * The GenAI API now also supports LLMs on NPU allowing developers to specify NPU as the + target device, specifically for WhisperPipeline (for whisper-base, whisper-medium, and + whisper-small) and LLMPipeline (for Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, + Qwen2-7B-Instruct and Phi-3 Mini-instruct). Use driver version 32.0.100.3104 or later for + best performance. Now deprecated +----------------------------------------------------------------------------------------------- + +* Python 3.8 is no longer supported: + + +OpenVINO™ Runtime +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -* The following will not be available beyond the 2024.4 OpenVINO version: +Common +----------------------------------------------------------------------------------------------- - * The macOS x86_64 debug bins - * Python 3.8 - * Discrete Keem Bay support +* Numpy 2.x has been adopted for all currently supported components, including NNCF. +* A new constant constructor has been added, enabling constants to be created from data pointer + as shared memory. Additionally, it can take ownership of a shared, or other, object, avoiding + a two-step process to wrap memory into ``ov::Tensor``. +* Asynchronous file reading with mmap library has been implemented, reducing loading times for + model files, especially for LLMs. +* CPU implementation of SliceScatter operator is now available, used for models such as Gemma, + supporting increased LLM performance. -* Intel® Streaming SIMD Extensions (Intel® SSE) will be supported in source code form, but not - enabled in the binary package by default, starting with OpenVINO 2025.0. -| Check the `deprecation section <#deprecation-and-support>`__ for more information. +CPU Device Plugin +----------------------------------------------------------------------------------------------- +* Gold support of the Intel® Xeon® 6 platform with P-cores (formerly code name Granite Rapids) + has been reached. +* Support of Intel® Core™ Ultra 200V series processors (formerly codenamed Arrow Lake-S) has + been implemented. +* LLM performance has been further improved with Rotary Position Embedding optimization; Query, + Key, and Value; and multi-layer perceptron fusion optimization. +* FP16 support has been extended with SDPA and PagedAttention, improving performance of LLM via + both native APIs and the vLLM integration. +* Models with LoRA adapters are now supported. -Common +GPU Device Plugin +----------------------------------------------------------------------------------------------- + +* The KV cache INT8 compression mechanism is now available for all supported GPUs. It enables a + significant reduction in memory consumption, increasing performance with a minimal impact to + accuracy (it affects systolic devices slightly more than non-systolic ones). The feature is + activated by default for non-systolic devices. +* LoRA adapters are now functionally supported on GPU. +* A new feature of GPU weightless blob caching enables caching model structure only and reusing + the weights from the original model file. Use the new OPTIMIZE_SIZE property to activate. +* Dynamic quantization with INT4 and INT8 precisions has been implemented and enabled by + default on Intel® Core™ Ultra platforms, improving LLM first token latency. + + +NPU Device Plugin +----------------------------------------------------------------------------------------------- + +* Models retrieved from the OpenVINO cache have a smaller memory footprint now. The plugin + releases the cached model (blob) after weights are loaded in NPU regions. Model export is not + available in this scenario. Memory consumption is reduced during inference execution with one + blob size. This optimization requires the latest NPU driver: 32.0.100.3104. +* A driver bug for ``ov::intel_npu::device_total_mem_size`` has been fixed. The plugin will now + report 2GB as the maximum allocatable memory for any driver that does not support graph + extension 1.8. Even if older drivers report a larger amount of memory to be available, memory + allocation would fail when 2GB are exceeded. Plugin reports the number that driver exposes + for any driver that supports graph extension 1.8 (or newer). +* A new API is used to initialize the model (available in graph extension 1.8). +* Inference request set_tensors is now supported. +* ``ov::device::LUID`` is now exposed on Windows. +* LLM-related improvements have been implemented in terms of both memory usage and performance. +* AvgPool and MaxPool operator support has been extended, adding support for more PyTorch models. + +* NOTE: for systems based on Intel® Core™ Ultra Processors Series 2, more than 16GB of RAM may + be required to use larger models, such as Llama-2-7B, Mistral-0.2-7B, and Qwen-2-7B + (exceeding 4B parameters) with prompt sizes over 1024 tokens. + + +OpenVINO Python API +----------------------------------------------------------------------------------------------- + +* Constant now can be created from openvino.Tensor. +* The “release_memory” method has been added for a compiled model, improving control over + memory consumption. + + + +OpenVINO Node.js API +----------------------------------------------------------------------------------------------- + +* Querying the best device to perform inference of a model with specific operations + is now available in JavaScript API. +* Contribution guidelines have been improved to make it easier for developers to contribute. +* Testing scope has been extended by inference in end-to-end tests. +* JavaScript API samples have been improved for readability and ease of running. + + + +TensorFlow Framework Support +----------------------------------------------------------------------------------------------- + +* TensorFlow 2.18.0, Keras 3.6.0, NumPy 2.0.2 in Python 3.12, and NumPy 1.26.4 in other Python + versions have been added to validation. +* Out-of-the-box conversion with static ranks has been improved by devising a new shape for + Switch-Merge condition sub-graphs. +* Complex type for the following operations is now supported: ExpandDims, Pack, Prod, Rsqrt, + ScatterNd, Sub. +* The following issues have been fixed: + + * the corner case with one element in LinSpace to avoid division by zero, + * support FP16 and FP64 input types for LeakyRelu, + * support non-i32/i64 output index type for ArgMin/Max operations. + + + +PyTorch Framework Support +----------------------------------------------------------------------------------------------- + +* PyTorch version 2.5 is now supported. +* OpenVINO Model Converter (OVC) now supports TorchScript and ExportedProgram saved on a drive. +* The issue of aten.index.Tensor conversion for indices with “None” values has been fixed, + helping to support the HF Stable Diffusion model in ExportedProgram format. + + + +ONNX Framework Support +----------------------------------------------------------------------------------------------- + +* ONNX version 1.17.0 is now used. +* Customers' models with DequantizeLinear-21, com.microsoft.MatMulNBits, and + com.microsoft.QuickGelu operations are now supported. + +JAX/Flax Framework Support +----------------------------------------------------------------------------------------------- + +* JAX 0.4.35 and Flax 0.10.0 has been added to validation. +* jax._src.core.ClosedJaxpr object conversion is now supported. +* Vision Transformer from google-research/vision_transformer is now supported + (with support for 37 new operations). + + +OpenVINO Model Server +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -* Encryption and decryption of topology in model cache is now supported with callback functions - provided by the user (CPU only for now; ov::cache_encryption_callbacks). -* The Ubuntu20 and Ubuntu22 Docker images now include the tokenizers and GenAI CPP modules, - including pre-installed Python modules, in development versions of these images. -* Python 3.12 is now supported. +* The OpenAI API text embedding endpoint has been added, enabling OVMS to be used as a building + block for AI applications like RAG. + `(read more) `__ +* The rerank endpoint has been added based on Cohere API, enabling easy similarity detection + between a query and a set of documents. It is one of the building blocks for AI applications + like RAG and makes integration with frameworks such as langchain easy. + `(read more) `__ +* The following improvements have been done to LLM text generation: -CPU Device Plugin + * The ``echo`` sampling parameter together with ``logprobs`` in the ``completions`` endpoint + is now supported. + * Performance has been increased on both CPU and GPU. + * Throughput in high-concurrency scenarios has been increased with dynamic_split_fuse for GPU. + * Testing coverage and stability has been improved. + * The procedure for service deployment and model repository preparation has been simplified. + +* An experimental version of a Windows binary package - native model server for Windows OS - is + available. This release includes a set of limitations and has limited tests coverage. It is + intended for testing, while the production-ready release is expected with 2025.0. All feedback + is welcome. + + +Neural Network Compression Framework +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -* The following is now supported: +* A new nncf.data.generate_text_data() method has been added for generating a synthetic dataset + for LLM compression. This approach helps to compress LLMs more accurately in situations when + the dataset is not available or not sufficient. + `See our example `__ + for more information about the usage. +* Support of data-free and data-aware weight compression methods - nncf.compress_weights() - + has been extended with NF4 per-channel quantization, making compressed LLMs more accurate and + faster on NPU. +* Caching of computed statistics in nncf.compress_weights() is now available, significantly + reducing compression time when performing compression of the same LLM multiple times, with + different compression parameters. To enable it, set the advanced ``statistics_path`` parameter + of nncf.compress_weights() to the desired file path location. +* The ``backup_mode`` optional parameter has been added to nncf.compress_weights(), for + specifying the data type for embeddings, convolutions, and last linear layers during 4-bit + weight compression. Available options are INT8_ASYM (default), INT8_SYM, and NONE (retains + the original floating-point precision of the model weights). In certain situations, + non-default value might give better accuracy of compressed LLMs. +* Preview support is now available for optimizing models in Torch + `FX format `__, nncf.quantize(), and + nncf.compress_weights() methods. After optimization such models can be directly executed + via torch.compile(compressed_model, backend="openvino"). For more details, see + `INT8 quantization example `__. +* Memory consumption of data-aware weight compression methods - nncf.compress_weights() – has + been reduced significantly, with some variation depending on the model and method. +* Support for the following has changed: + + * NumPy 2 added + * PyTorch upgraded to 2.5.1 + * ONNX upgraded to 1.17 + * Python 3.8 discontinued - * Tensor parallel feature for multi-socket CPU inference, with performance improvement for - LLMs with 6B+ parameters (enabled through model_distribution_policy hint configurations). - * RMSNorm operator, optimized with JIT kernel to improve both the 1st and 2nd token - performance of LLMs. -* The following has been improved: - * vLLM support, with PagedAttention exposing attention score as the second output. It can now - be used in the cache eviction algorithm to improve LLM serving performance. - * 1st token performance with Llama series of models, with additional CPU operator optimization - (such as MLP, SDPA) on BF16 precision. - * Default oneTBB version on Linux is now 2021.13.0, improving overall performance on latest - Intel XEON platforms. - * MXFP4 weight compression models (compressing weights to 4-bit with the e2m1 data type - without a zero point and with 8-bit e8m0 scales) have been optimized for Xeon platforms - thanks to fullyconnected compressed weight LLM support. +OpenVINO Tokenizers ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -* The following has been fixed: +* Several operations have been introduced and optimized. +* Conversion parameters and environment info have been added to ``rt_info``, improving + reproducibility and debugging. - * Memory leak when ov::num_streams value is 0. - * CPU affinity mask is changed after OpenVINO execution when OpenVINO is compiled - with -DTHREADING=SEQ. -GPU Device Plugin +OpenVINO.GenAI +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -* Dynamic quantization for LLMs is now supported on discrete GPU platforms. -* Stable Diffusion 3 is now supported with good accuracy on Intel GPU platforms. -* Both first and second token latency for LLMs have been improved on Intel GPU platforms. -* The issue of model cache not regenerating with the value changes of - ``ov::hint::performance_mode`` or ``ov::hint::dynamic_quantization_group_size`` has been - fixed. +* The following has been added: + * LoRA adapter for the LLMPipeline. + * Text2ImagePipeline with LoRA adapter and text2image samples. + * VLMPipeline and visual_language_chat sample for text generation models with text and image + inputs. + * WhisperPipeline and whisper_speech_recognition sample. -NPU Device Plugin +* speculative_decoding_lm has been moved to LLMPipeline based implementation and is now + installed as part of the package. +* On NPU, a set of pipelines has been enabled: WhisperPipeline (for whisper-base, + whisper-medium, and whisper-small), LLMPipeline (for Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, + Qwen2-7B-Instruct, and Phi-3 Mini-instruct). Use driver version 32.0.100.3104 or later for + best performance. + + + + + +Other Changes and Known Issues +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Jupyter Notebooks +----------------------------- + +* `Text-to-Image generation using OpenVINO GenAI `__ +* `Multi LoRA Image Generation `__ +* `Virtual Try-on using OpenVINO and CatVTON `__ +* `Visual Language Assistant using OpenVINO GenAI `__ +* `Speech recognition using OpenVINO GenAI `__ +* `YoloV11 `__ +* `Llama-3.2-vision `__ +* `Pixtral `__ +* `Segment Anything 2 `__ +* `Video Lips-sync using Wav2Lip `__ +* `Convert JAX to OpenVINO tutorial `__ + + +Known Issues +----------------------------- + +| **Component: CPU Plugin** +| ID: 155898 +| Description: +| Description: When using new version of Transformer version to convert some of LLMs + (GPT-J/GPT-NeoX or falcon-7b), the inference accuracy may be impacted on 4th or 5th + generation of Intel® Xeon® processors, due to model structure update triggering inference + precision difference in part of the model. The workaround is to use transformer version of + 4.44.2 or lower. + +| **Component: GPU Plugin** +| ID: 154583 +| Description: +| LLM accuracy can be low especially on non-systolic platforms like Intel® Core™ Ultra. When + facing the low accuracy issue, user needs to manually set a config ACTIVATION_SCALING_FACOTR + with a value of 8.0 in the compile_model() function. From the next release, scaling factor + value will be automatically applied through updated IR. + +| **Component: GenAI** +| ID: 156437, 148933 +| Description: +| When using Python GenAI APIs, if ONNX 17.0 and later is installed, it may encounter the + error “DLL load failed while importing onnx_cpp2py_export: A dynamic link library (DLL) + initialization routine failed.” It is due to the ONNX dependency issue + `onnx/onnx#6267 `__, + Install + `Microsoft Visual C++ Redistributable `__ + latest supported downloads to fix the issue. + +| **Component: GenAI** +| ID: 156944 +| Description: +| There were backward incompatible changes resulting in different text generated by LLMs like + Mistralai/Mistral-7B-Instruct-v0.2 and TinyLlama/TinyLlama-1.1B-Chat-v1.0 when using a + tokenizer converted by older openvino_tolenizers. A way to resolve the issue is to convert + tokenizer and detokenizer models using the latest openvino_tokenizers. + + + + + + + + +Previous 2024 releases ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + + +.. dropdown:: 2024.4 - 19 September 2024 + :animate: fade-in-slide-down + :color: secondary + + **What's new** + + * More Gen AI coverage and framework integrations to minimize code changes. + + * Support for GLM-4-9B Chat, MiniCPM-1B, Llama 3 and 3.1, Phi-3-Mini, Phi-3-Medium and + YOLOX-s models. + * Noteworthy notebooks added: Florence-2, NuExtract-tiny Structure Extraction, Flux.1 Image + Generation, PixArt-α: Photorealistic Text-to-Image Synthesis, and Phi-3-Vision Visual + Language Assistant. + + * Broader Large Language Model (LLM) support and more model compression techniques. + + * OpenVINO™ runtime optimized for Intel® Xe Matrix Extensions (Intel® XMX) systolic arrays on + built-in GPUs for efficient matrix multiplication resulting in significant LLM performance + boost with improved 1st and 2nd token latency, as well as a smaller memory footprint on + Intel® Core™ Ultra Processors (Series 2). + * Memory sharing enabled for NPUs on Intel® Core™ Ultra Processors (Series 2) for efficient + pipeline integration without memory copy overhead. + * Addition of the PagedAttention feature for discrete GPUs* enables a significant boost in + throughput for parallel inferencing when serving LLMs on Intel® Arc™ Graphics or Intel® + Data Center GPU Flex Series. + + * More portability and performance to run AI at the edge, in the cloud, or locally. + + * Support for Intel® Core™ Ultra Processors Series 2 (formerly codenamed Lunar Lake) on Windows. + * OpenVINO™ Model Server now comes with production-quality support for OpenAI-compatible API + which enables significantly higher throughput for parallel inferencing on Intel® Xeon® + processors when serving LLMs to many concurrent users. + * Improved performance and memory consumption with prefix caching, KV cache compression, and + other optimizations for serving LLMs using OpenVINO™ Model Server. + * Support for Python 3.12. + * Support for Red Hat Enterprise Linux (RHEL) version 9.3 - 9.4. + + *Now deprecated* + + * The following will not be available beyond the 2024.4 OpenVINO version: + + * The macOS x86_64 debug bins + * Python 3.8 + * Discrete Keem Bay support + + * Intel® Streaming SIMD Extensions (Intel® SSE) will be supported in source code form, but not + enabled in the binary package by default, starting with OpenVINO 2025.0. + + Check the `deprecation section <#deprecation-and-support>`__ for more information. + + **OpenVINO™ Runtime** + + *Common* + + * Encryption and decryption of topology in model cache is now supported with callback functions + provided by the user (CPU only for now; ov::cache_encryption_callbacks). + * The Ubuntu20 and Ubuntu22 Docker images now include the tokenizers and GenAI CPP modules, + including pre-installed Python modules, in development versions of these images. + * Python 3.12 is now supported. + + *CPU Device Plugin* + + * The following is now supported: + + * Tensor parallel feature for multi-socket CPU inference, with performance improvement for + LLMs with 6B+ parameters (enabled through model_distribution_policy hint configurations). + * RMSNorm operator, optimized with JIT kernel to improve both the 1st and 2nd token + performance of LLMs. + + * The following has been improved: + + * vLLM support, with PagedAttention exposing attention score as the second output. It can now + be used in the cache eviction algorithm to improve LLM serving performance. + * 1st token performance with Llama series of models, with additional CPU operator optimization + (such as MLP, SDPA) on BF16 precision. + * Default oneTBB version on Linux is now 2021.13.0, improving overall performance on latest + Intel® Xeon® platforms. + * MXFP4 weight compression models (compressing weights to 4-bit with the e2m1 data type + without a zero point and with 8-bit e8m0 scales) have been optimized for Intel® Xeon® + platforms thanks to fullyconnected compressed weight LLM support. + + * The following has been fixed: + + * Memory leak when ov::num_streams value is 0. + * CPU affinity mask is changed after OpenVINO execution when OpenVINO is compiled + with -DTHREADING=SEQ. + + + *GPU Device Plugin* + + * Dynamic quantization for LLMs is now supported on discrete GPU platforms. + * Stable Diffusion 3 is now supported with good accuracy on Intel GPU platforms. + * Both first and second token latency for LLMs have been improved on Intel GPU platforms. + * The issue of model cache not regenerating with the value changes of + ``ov::hint::performance_mode`` or ``ov::hint::dynamic_quantization_group_size`` has been + fixed. + + + *NPU Device Plugin* + * `Remote Tensor API `__ is now supported. * You can now query the available number of tiles (ov::intel_npu::max_tiles) and force a @@ -140,193 +488,178 @@ NPU Device Plugin only during the export method. -OpenVINO Python API -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + *OpenVINO Python API* -* Openvino.Tensor, when created in the shared memory mode, now prevents “garbage collection” of - numpy memory. -* The ``openvino.experimental`` submodule is now available, providing access to experimental - functionalities under development. -* New python-exclusive openvino.Model constructors have been added. -* Image padding in PreProcessor is now available. -* OpenVINO Runtime is now compatible with numpy 2.0. + * Openvino.Tensor, when created in the shared memory mode, now prevents “garbage collection” of + numpy memory. + * The ``openvino.experimental`` submodule is now available, providing access to experimental + functionalities under development. + * New python-exclusive openvino.Model constructors have been added. + * Image padding in PreProcessor is now available. + * OpenVINO Runtime is now compatible with numpy 2.0. -OpenVINO Node.js API -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + *OpenVINO Node.js API* -* The following has been improved + * The following has been improved - * Unit tests for increased efficiency and stability - * Security updates applied to dependencies + * Unit tests for increased efficiency and stability + * Security updates applied to dependencies -* `Electron `__ - compatibility is now confirmed with new end-to-end tests. -* `New API methods `__ added. + * `Electron `__ + compatibility is now confirmed with new end-to-end tests. + * `New API methods `__ added. -TensorFlow Framework Support -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + *TensorFlow Framework Support* -* TensorFlow 2.17.0 is now supported. -* JAX 0.4.31 is now supported via a path of jax2tf with native_serialization=False -* `8 NEW* operations `__ - have been added. -* Tensor lists with multiple undefined dimensions in element_shape are now supported, enabling - support for TF Hub lite0-detection/versions/1 model. + * TensorFlow 2.17.0 is now supported. + * JAX 0.4.31 is now supported via a path of jax2tf with native_serialization=False + * `8 NEW* operations `__ + have been added. + * Tensor lists with multiple undefined dimensions in element_shape are now supported, enabling + support for TF Hub lite0-detection/versions/1 model. -PyTorch Framework Support -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + *PyTorch Framework Support* -* Torch 2.4 is now supported. -* Inplace ops are now supported automatically if the regular version is supported. -* Symmetric GPTQ model from Hugging Face will now be automatically converted to the signed type - (INT4) and zero-points will be removed. + * Torch 2.4 is now supported. + * Inplace ops are now supported automatically if the regular version is supported. + * Symmetric GPTQ model from Hugging Face will now be automatically converted to the signed type + (INT4) and zero-points will be removed. -ONNX Framework Support -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + *ONNX Framework Support* -* ONNX 1.16.0 is now supported -* models with constants/inputs of uint4/int4 types are now supported. -* 4 NEW operations have been added. + * ONNX 1.16.0 is now supported + * models with constants/inputs of uINT4/INT4 types are now supported. + * 4 NEW operations have been added. -OpenVINO Model Server -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + **OpenVINO Model Server** -* OpenAI API for text generation is now officially supported and recommended for production - usage. It comes with the following new features: + * OpenAI API for text generation is now officially supported and recommended for production + usage. It comes with the following new features: - * Prefix caching feature, caching the prompt evaluation to speed up text generation. - * Ability to compress the KV Cache to a lower precision, reducing memory consumption without - a significant loss of accuracy. - * ``stop`` sampling parameters, to define a sequence that stops text generation. - * ``logprobs`` sampling parameter, returning the probabilities to returned tokens. - * Generic metrics related to execution of the MediaPipe graph that can be used for autoscaling - based on the current load and the level of concurrency. - * `Demo of text generation horizontal scalability `__ - using basic docker containers and Kubernetes. - * Automatic cancelling of text generation for disconnected clients. - * Non-UTF-8 responses from the model can be now automatically changed to Unicode replacement - characters, due to their configurable handling. - * Intel GPU with paged attention is now supported. - * Support for Llama3.1 models. + * Prefix caching feature, caching the prompt evaluation to speed up text generation. + * Ability to compress the KV Cache to a lower precision, reducing memory consumption without + a significant loss of accuracy. + * ``stop`` sampling parameters, to define a sequence that stops text generation. + * ``logprobs`` sampling parameter, returning the probabilities to returned tokens. + * Generic metrics related to execution of the MediaPipe graph that can be used for autoscaling + based on the current load and the level of concurrency. + * `Demo of text generation horizontal scalability `__ + using basic docker containers and Kubernetes. + * Automatic cancelling of text generation for disconnected clients. + * Non-UTF-8 responses from the model can be now automatically changed to Unicode replacement + characters, due to their configurable handling. + * Intel GPU with paged attention is now supported. + * Support for Llama3.1 models. -* The following has been improved: + * The following has been improved: - * Handling of model templates without bos_token is now fixed. - * Performance of the multinomial sampling algorithm. - * ``finish_reason`` in the response correctly determines reaching max_tokens (length) and - completing the sequence (stop). - * Security and stability. + * Handling of model templates without bos_token is now fixed. + * Performance of the multinomial sampling algorithm. + * ``finish_reason`` in the response correctly determines reaching max_tokens (length) and + completing the sequence (stop). + * Security and stability. -Neural Network Compression Framework -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + **Neural Network Compression Framework** -* The LoRA Correction algorithm is now included in the Weight Compression method, improving the - accuracy of INT4-compressed models on top of other data-aware algorithms, such as AWQ and - Scale Estimation. To enable it, set the lora_correction option to True in - nncf.compress_weights(). -* The GPTQ compression algorithm can now be combined with the Scale Estimation algorithm, - making it possible to run GPTQ, AWQ, and Scale Estimation together, for the optimum-accuracy - INT4-compressed models. -* INT8 quantization of LSTMSequence and Convolution operations for constant inputs is now - enabled, resulting in better performance and reduced model size. + * The LoRA Correction algorithm is now included in the Weight Compression method, improving the + accuracy of INT4-compressed models on top of other data-aware algorithms, such as AWQ and + Scale Estimation. To enable it, set the lora_correction option to True in + nncf.compress_weights(). + * The GPTQ compression algorithm can now be combined with the Scale Estimation algorithm, + making it possible to run GPTQ, AWQ, and Scale Estimation together, for the optimum-accuracy + INT4-compressed models. + * INT8 quantization of LSTMSequence and Convolution operations for constant inputs is now + enabled, resulting in better performance and reduced model size. -OpenVINO Tokenizers -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + **OpenVINO Tokenizers** -* Split and BPE tokenization operations have been reimplemented, resulting in improved - tokenization accuracy and performance. -* New building options are now available, offering up to a 12x reduction in binary size. -* An operation is now available to validate and skip/replace model-generated non-Unicode - bytecode sequences during detokenization. + * Split and BPE tokenization operations have been reimplemented, resulting in improved + tokenization accuracy and performance. + * New building options are now available, offering up to a 12x reduction in binary size. + * An operation is now available to validate and skip/replace model-generated non-Unicode + bytecode sequences during detokenization. -OpenVINO.GenAI -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + **OpenVINO.GenAI** -* New samples and pipelines are now available: + * New samples and pipelines are now available: - * An example IterableStreamer implementation in - `multinomial_causal_lm/python sample `__ + * An example IterableStreamer implementation in + `multinomial_causal_lm/python sample `__ -* GenAI compilation is now available as part of OpenVINO via the –DOPENVINO_EXTRA_MODULES CMake - option. + * GenAI compilation is now available as part of OpenVINO via the –DOPENVINO_EXTRA_MODULES CMake + option. -Other Changes and Known Issues -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + **Other Changes and Known Issues** -Jupyter Notebooks ------------------------------ + *Jupyter Notebooks* -* `Florence-2 `__ -* `NuExtract: Structure Extraction `__ -* `Flux.1 Image Generation `__ -* `PixArt-α: Photorealistic Text-to-Image Synthesis `__ -* `Phi-3-Vision Visual Language Assistant `__ -* `MiniCPMV2.6 `__ -* `InternVL2 `__ -* The list of supported models in - `LLM chatbot `__ - now includes Phi3.5, Gemma2 support + * `Florence-2 `__ + * `NuExtract: Structure Extraction `__ + * `Flux.1 Image Generation `__ + * `PixArt-α: Photorealistic Text-to-Image Synthesis `__ + * `Phi-3-Vision Visual Language Assistant `__ + * `MiniCPMV2.6 `__ + * `InternVL2 `__ + * The list of supported models in + `LLM chatbot `__ + now includes Phi3.5, Gemma2 support -Known Issues ------------------------------ + *Known Issues* -| **Component: CPU** -| ID: CVS-150542, CVS-145996 -| Description: -| The upgrade of default oneTBB on Linux platforms to 2021.13.0 improves overall - performance on latest Intel XEON platform but causes regression in some cases. Limit the - threads usage of postprocessing done by Torch can mitigate the regression (For example: - torch.set_num_threads(n), n can be 1, beam search number, prompt batch size or other - numbers). - -| **Component: OpenVINO.Genai** -| ID: 149694 -| Description: -| Passing openvino.Tensor instance to LLMPipleine triggers incompatible arguments error if - OpenVINO and GenAI are installed from PyPI on Windows. + | **Component: CPU** + | ID: CVS-150542, CVS-145996 + | Description: + | The upgrade of default oneTBB on Linux platforms to 2021.13.0 improves overall + performance on latest Intel® Xeon® platform but causes regression in some cases. Limit the + threads usage of postprocessing done by Torch can mitigate the regression (For example: + torch.set_num_threads(n), n can be 1, beam search number, prompt batch size or other + numbers). + + | **Component: OpenVINO.Genai** + | ID: 149694 + | Description: + | Passing openvino.Tensor instance to LLMPipleine triggers incompatible arguments error if + OpenVINO and GenAI are installed from PyPI on Windows. -| **Component: OpenVINO.Genai** -| ID: 148308 -| Description: -| OpenVINO.GenAI archive doesn't have debug libraries for OpenVINO Tokenizers and - OpenVINO.GenAI. + | **Component: OpenVINO.Genai** + | ID: 148308 + | Description: + | OpenVINO.GenAI archive doesn't have debug libraries for OpenVINO Tokenizers and + OpenVINO.GenAI. + + | **Component: ONNX for ARM** + | ID: n/a + | Description: + | For ARM binaries, the `1.16 ONNX library `__ + is not yet available. The ONNX library for ARM, version 1.15, does not include the latest + functional and security updates. Users should update to the latest version as it becomes + available. + | Currently, if an unverified AI model is supplied to the ONNX frontend, it could lead to a + directory traversal issue. Ensure that the file name and file path that a model contains + are verified and correct. To learn more about the vulnerability, see: + `CVE-2024-27318 `__ and + `CVE-2024-27319 `__. + + | **Component: Kaldi** + | ID: n/a + | Description: + | There is a known issue with the Kaldi DL framework support on the Python version 3.12 due + to the numpy version incompatibilities. As Kaldi support in OpenVINO is currently deprecated + and will be discontinued with version 2025.0, the issue will not be addressed. -| **Component: ONNX for ARM** -| ID: n/a -| Description: -| For ARM binaries, the `1.16 ONNX library `__ - is not yet available. The ONNX library for ARM, version 1.15, does not include the latest - functional and security updates. Users should update to the latest version as it becomes - available. -| Currently, if an unverified AI model is supplied to the ONNX frontend, it could lead to a - directory traversal issue. Ensure that the file name and file path that a model contains - are verified and correct. To learn more about the vulnerability, see: - `CVE-2024-27318 `__ and - `CVE-2024-27319 `__. - -| **Component: Kaldi** -| ID: n/a -| Description: -| There is a known issue with the Kaldi DL framework support on the Python version 3.12 due - to the numpy version incompatibilities. As Kaldi support in OpenVINO is currently deprecated - and will be discontinued with version 2025.0, the issue will not be addressed. -Previous 2024 releases -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .. dropdown:: 2024.3 - 31 July 2024 :animate: fade-in-slide-down @@ -614,7 +947,7 @@ Previous 2024 releases * Preview: addition of the :doc:`Generate API <../learn-openvino/llm_inference_guide/genai-guide>`, a simplified API for text generation using large language models with only a few lines of code. The API is available through the newly launched OpenVINO GenAI package. - * Support for Intel Atom® Processor X Series. For more details, see :doc:`System Requirements <./release-notes-openvino/system-requirements>`. + * Support for Intel® Atom® Processor X Series. For more details, see :doc:`System Requirements <./release-notes-openvino/system-requirements>`. * Preview: Support for Intel® Xeon® 6 processor. **OpenVINO™ Runtime** @@ -638,8 +971,8 @@ Previous 2024 releases *CPU Device Plugin* * Performance when using latency mode in FP32 precision has been improved on Intel client - platforms, including Core Ultra (codename Meteor Lake) and 13th Gen Core processors - (codename Raptor Lake). + platforms, including Intel® Core™ Ultra (formerly codenamed Meteor Lake) and 13th Gen Core + processors (formerly codenamed Raptor Lake). * 2nd token latency and memory footprint for FP16 LLMs have been improved significantly on AVX2 and AVX512 based CPU platforms, particularly for small batch sizes. * PagedAttention has been optimized on AVX2, AVX512 and AMX platforms together with INT8 KV cache @@ -653,9 +986,9 @@ Previous 2024 releases * Both first token and average token latency of LLMs is improved on all GPU platforms, most significantly on discrete GPUs. Memory usage of LLMs has been reduced as well. - * Stable Diffusion FP16 performance improved on Core Ultra platforms, with significant pipeline - improvement for models with dynamic-shaped input. Memory usage of the pipeline has been reduced, - as well. + * Stable Diffusion FP16 performance improved on Intel® Core™ Ultra platforms, with significant + pipeline improvement for models with dynamic-shaped input. Memory usage of the pipeline + has been reduced, as well. * Optimized permute_f_y kernel performance has been improved. *NPU Device Plugin* @@ -710,7 +1043,7 @@ Previous 2024 releases * OpenVINO Model server can be now used for text generation use cases using OpenAI compatible API. * Added support for continuous batching and PagedAttention algorithms for text generation with - fast and efficient in high concurrency load especially on Intel Xeon processors. + fast and efficient in high concurrency load especially on Intel® Xeon® processors. `Learn more about it `__. **Neural Network Compression Framework** @@ -753,8 +1086,9 @@ Previous 2024 releases | Description: | In 2024.2, oneTBB 2021.2.x is used for Intel Distribution of OpenVINO Ubuntu and Red Hat archives, instead of system TBB/oneTBB. This improves performance on the new generation of - Xeon platforms but may increase latency of some models on the previous generation. You can - build OpenVINO with **-DSYSTEM_TBB=ON** to get better latency performance for these models. + Intel® Xeon® platforms but may increase latency of some models on the previous generation. + You can build OpenVINO with **-DSYSTEM_TBB=ON** to get better latency performance for + these models. | **Component: python API** | ID: CVS-141744 @@ -1069,8 +1403,8 @@ Previous 2024 releases * More portability and performance to run AI at the edge, in the cloud, or locally. * A preview plugin architecture of the integrated Neural Processor Unit (NPU) as part of - Intel® Core™ Ultra processor (codename Meteor Lake) is now included in the main OpenVINO™ - package on PyPI. + Intel® Core™ Ultra processor (formerly codenamed Meteor Lake) is now included in the + main OpenVINO™ package on PyPI. * Improved performance on ARM by enabling the ARM threading library. In addition, we now support multi-core ARM processors and enabled FP16 precision by default on MacOS. * New and improved LLM serving samples from OpenVINO Model Server for multi-batch inputs and @@ -1263,10 +1597,11 @@ Previous 2024 releases | **Component: CPU runtime** | *ID:* N/A | *Description:* - | Performance results (first token latency) may vary from those offered by the previous OpenVINO version, for - “latency” hint inference of LLMs with long prompts on Xeon platforms with 2 or more - sockets. The reason is that all CPU cores of just the single socket running the application - are employed, lowering the memory overhead for LLMs when numa control is not used. + | Performance results (first token latency) may vary from those offered by the previous + OpenVINO version, for “latency” hint inference of LLMs with long prompts on Intel® Xeon® + platforms with 2 or more sockets. The reason is that all CPU cores of just the single + socket running the application are employed, lowering the memory overhead for LLMs when + numa control is not used. | *Workaround:* | The behavior is expected but stream and thread configuration may be used to include cores from all sockets. @@ -1315,22 +1650,26 @@ Discontinued in 2024 for applying NNCF optimization on top of models from Hugging Face. * Support for Apache MXNet, Caffe, and Kaldi model formats. Conversion to ONNX may be used as a solution. + * The macOS x86_64 debug bins are no longer provided with the OpenVINO toolkit, starting + with OpenVINO 2024.5. + * Python 3.8 is no longer supported, starting with OpenVINO 2024.5. -Deprecated and to be removed in the future --------------------------------------------- + * As MxNet doesn't support Python version higher than 3.8, according to the + `MxNet PyPI project `__, + it is no longer supported by OpenVINO, either. -* The macOS x86_64 debug bins will no longer be provided with the OpenVINO toolkit, starting - with OpenVINO 2024.5. -* Python 3.8 is now considered deprecated, and it will not be available beyond the 2024.4 - OpenVINO version. + * Discrete Keem Bay support is no longer supported, starting with OpenVINO 2024.5. + * Support for discrete devices (formerly codenamed Raptor Lake) is no longer available for + NPU. - * As MxNet doesn't support Python version higher than 3.8, according to the - `MxNet PyPI project `__, - it will no longer be supported in future versions, either. -* Discrete Keem Bay support is now considered deprecated and will be fully removed with OpenVINO 2024.5 +Deprecated and to be removed in the future +-------------------------------------------- + * Intel® Streaming SIMD Extensions (Intel® SSE) will be supported in source code form, but not - enabled in the binary package by default, starting with OpenVINO 2025.0 + enabled in the binary package by default, starting with OpenVINO 2025.0. +* Ubuntu 20.04 support will be deprecated in future OpenVINO releases due to the end of + standard support. * The openvino-nightly PyPI module will soon be discontinued. End-users should proceed with the Simple PyPI nightly repo instead. More information in `Release Policy `__. diff --git a/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst b/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst index a12cacf8402953..79a9f63821c16f 100644 --- a/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst +++ b/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst @@ -37,7 +37,7 @@ CPU * Ubuntu 20.04 long-term support (LTS), 64-bit (Kernel 5.15+) * macOS 12.6 and above, 64-bit and ARM64 * CentOS 7 - * Red Hat Enterprise Linux 9.3-9.4, 64-bit + * Red Hat Enterprise Linux (RHEL) 8 and 9, 64-bit * openSUSE Tumbleweed, 64-bit and ARM64 * Ubuntu 20.04 ARM64 @@ -65,7 +65,7 @@ GPU * Ubuntu 22.04 long-term support (LTS), 64-bit * Ubuntu 20.04 long-term support (LTS), 64-bit * CentOS 7 - * Red Hat Enterprise Linux 9.3-9.4, 64-bit + * Red Hat Enterprise Linux (RHEL) 8 and 9, 64-bit .. tab-item:: Additional considerations diff --git a/docs/articles_en/assets/images/genai_main_diagram.svg b/docs/articles_en/assets/images/genai_main_diagram.svg new file mode 100644 index 00000000000000..b01cbd827acb3c --- /dev/null +++ b/docs/articles_en/assets/images/genai_main_diagram.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07ce964e115f1e3942cdf381f44b4dc6d466df62c70396306a4f241fb07ea3ed +size 392244 diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-5.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-5.rst index 164033bdd2831c..abad632e5ae86c 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-5.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sequence/lstm-sequence-5.rst @@ -26,13 +26,12 @@ A single cell in the sequence is implemented in the same way as in :doc:`LSTM Ce * **Type**: ``int`` * **Required**: *yes* -* *activations* +* *direction* - * **Description**: *activations* specifies activation functions for gates, there are three gates, so three activation functions should be specified as a value for this attributes - * **Range of values**: any combination of *relu*, *sigmoid*, *tanh* - * **Type**: a list of strings - * **Default value**: *sigmoid,tanh,tanh* - * **Required**: *no* + * **Description**: Specify if the RNN is forward, reverse, or bidirectional. If it is one of *forward* or *reverse* then ``num_directions = 1``, if it is *bidirectional*, then ``num_directions = 2``. This ``num_directions`` value specifies input/output shape requirements. + * **Range of values**: *forward*, *reverse*, *bidirectional* + * **Type**: ``string`` + * **Required**: *yes* * *activations_alpha, activations_beta* @@ -42,6 +41,14 @@ A single cell in the sequence is implemented in the same way as in :doc:`LSTM Ce * **Default value**: None * **Required**: *no* +* *activations* + + * **Description**: *activations* specifies activation functions for gates, there are three gates, so three activation functions should be specified as a value for this attributes + * **Range of values**: any combination of *relu*, *sigmoid*, *tanh* + * **Type**: a list of strings + * **Default value**: *sigmoid,tanh,tanh* + * **Required**: *no* + * *clip* * **Description**: *clip* specifies bound values *[-C, C]* for tensor clipping. Clipping is performed before activations. @@ -50,12 +57,6 @@ A single cell in the sequence is implemented in the same way as in :doc:`LSTM Ce * **Default value**: *infinity* that means that the clipping is not applied * **Required**: *no* -* *direction* - - * **Description**: Specify if the RNN is forward, reverse, or bidirectional. If it is one of *forward* or *reverse* then ``num_directions = 1``, if it is *bidirectional*, then ``num_directions = 2``. This ``num_directions`` value specifies input/output shape requirements. - * **Range of values**: *forward*, *reverse*, *bidirectional* - * **Type**: ``string`` - * **Required**: *yes* **Inputs** diff --git a/docs/articles_en/get-started/configurations/genai-dependencies.rst b/docs/articles_en/get-started/configurations/genai-dependencies.rst index e347c11953fbc2..59d29ef3108da0 100644 --- a/docs/articles_en/get-started/configurations/genai-dependencies.rst +++ b/docs/articles_en/get-started/configurations/genai-dependencies.rst @@ -4,8 +4,8 @@ OpenVINO™ GenAI Dependencies OpenVINO™ GenAI depends on both `OpenVINO `__ and `OpenVINO Tokenizers `__. During OpenVINO™ GenAI installation from PyPi, the same versions of OpenVINO and OpenVINO Tokenizers -are used (e.g. ``openvino==2024.4.0`` and ``openvino-tokenizers==2024.4.0.0`` are installed for -``openvino-genai==2024.4.0``). +are used (e.g. ``openvino==2024.5.0`` and ``openvino-tokenizers==2024.5.0.0`` are installed for +``openvino-genai==2024.5.0``). Trying to update any of the dependency packages might result in a version incompatiblibty due to different Application Binary Interfaces (ABIs), which will result in errors while running diff --git a/docs/articles_en/get-started/install-openvino.rst b/docs/articles_en/get-started/install-openvino.rst index 7f26ab9ec72c9f..48ea0a434c5388 100644 --- a/docs/articles_en/get-started/install-openvino.rst +++ b/docs/articles_en/get-started/install-openvino.rst @@ -1,4 +1,4 @@ -Install OpenVINO™ 2024.4 +Install OpenVINO™ 2024.5 ========================== @@ -19,16 +19,16 @@ Install OpenVINO™ 2024.4 .. raw:: html - + - + -OpenVINO 2024.4, described here, is not a Long-Term-Support version! +OpenVINO 2024.5, described here, is not a Long-Term-Support version! All currently supported versions are: -* 2024.4 (development) +* 2024.5 (development) * 2023.3 (LTS) -* 2022.3 (LTS) + .. dropdown:: Effortless GenAI integration with OpenVINO GenAI Flavor diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst b/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst index e777c06253a37a..20965f2f22d095 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst @@ -58,7 +58,7 @@ Step 1: Download and Install the OpenVINO Core Components cd /Downloads -4. Download the `OpenVINO Runtime archive file for your system `_, extract the files, rename the extracted folder and move it to the desired path: +4. Download the `OpenVINO Runtime archive file for your system `_, extract the files, rename the extracted folder and move it to the desired path: .. tab-set:: @@ -73,9 +73,9 @@ Step 1: Download and Install the OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.4/linux/l_openvino_toolkit_ubuntu24_2024.4.0.16579.c3152d32c9c_x86_64.tgz --output openvino_2024.4.0.tgz - tar -xf openvino_2024.4.0.tgz - sudo mv l_openvino_toolkit_ubuntu24_2024.4.0.16579.c3152d32c9c_x86_64 /opt/intel/openvino_2024.4.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/linux/l_openvino_toolkit_ubuntu24_2024.5.0.17288.7975fa5da0c_x86_64.tgz --output openvino_2024.5.0.tgz + tar -xf openvino_2024.5.0.tgz + sudo mv l_openvino_toolkit_ubuntu24_2024.5.0.17288.7975fa5da0c_x86_64 /opt/intel/openvino_2024.5.0 .. tab-item:: Ubuntu 22.04 :sync: ubuntu-22 @@ -83,9 +83,9 @@ Step 1: Download and Install the OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.4/linux/l_openvino_toolkit_ubuntu22_2024.4.0.16579.c3152d32c9c_x86_64.tgz --output openvino_2024.4.0.tgz - tar -xf openvino_2024.4.0.tgz - sudo mv l_openvino_toolkit_ubuntu22_2024.4.0.16579.c3152d32c9c_x86_64 /opt/intel/openvino_2024.4.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/linux/l_openvino_toolkit_ubuntu22_2024.5.0.17288.7975fa5da0c_x86_64.tgz --output openvino_2024.5.0.tgz + tar -xf openvino_2024.5.0.tgz + sudo mv l_openvino_toolkit_ubuntu22_2024.5.0.17288.7975fa5da0c_x86_64 /opt/intel/openvino_2024.5.0 .. tab-item:: Ubuntu 20.04 :sync: ubuntu-20 @@ -93,9 +93,9 @@ Step 1: Download and Install the OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.4/linux/l_openvino_toolkit_ubuntu20_2024.4.0.16579.c3152d32c9c_x86_64.tgz --output openvino_2024.4.0.tgz - tar -xf openvino_2024.4.0.tgz - sudo mv l_openvino_toolkit_ubuntu20_2024.4.0.16579.c3152d32c9c_x86_64 /opt/intel/openvino_2024.4.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/linux/l_openvino_toolkit_ubuntu20_2024.5.0.17288.7975fa5da0c_x86_64.tgz --output openvino_2024.5.0.tgz + tar -xf openvino_2024.5.0.tgz + sudo mv l_openvino_toolkit_ubuntu20_2024.5.0.17288.7975fa5da0c_x86_64 /opt/intel/openvino_2024.5.0 .. tab-item:: RHEL 8 :sync: rhel-8 @@ -103,18 +103,18 @@ Step 1: Download and Install the OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.4/linux/l_openvino_toolkit_rhel8_2024.4.0.16579.c3152d32c9c_x86_64.tgz --output openvino_2024.4.0.tgz - tar -xf openvino_2024.4.0.tgz - sudo mv l_openvino_toolkit_rhel8_2024.4.0.16579.c3152d32c9c_x86_64 /opt/intel/openvino_2024.4.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/linux/l_openvino_toolkit_rhel8_2024.5.0.17288.7975fa5da0c_x86_64.tgz --output openvino_2024.5.0.tgz + tar -xf openvino_2024.5.0.tgz + sudo mv l_openvino_toolkit_rhel8_2024.5.0.17288.7975fa5da0c_x86_64 /opt/intel/openvino_2024.5.0 .. tab-item:: CentOS 7 :sync: centos-7 .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.4/linux/l_openvino_toolkit_centos7_2024.4.0.16579.c3152d32c9c_x86_64.tgz --output openvino_2024.4.0.tgz - tar -xf openvino_2024.4.0.tgz - sudo mv l_openvino_toolkit_centos7_2024.4.0.16579.c3152d32c9c_x86_64 /opt/intel/openvino_2024.4.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/linux/l_openvino_toolkit_centos7_2024.5.0.17288.7975fa5da0c_x86_64.tgz --output openvino_2024.5.0.tgz + tar -xf openvino_2024.5.0.tgz + sudo mv l_openvino_toolkit_centos7_2024.5.0.17288.7975fa5da0c_x86_64 /opt/intel/openvino_2024.5.0 .. tab-item:: ARM 64-bit @@ -122,25 +122,25 @@ Step 1: Download and Install the OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.4/linux/l_openvino_toolkit_ubuntu20_2024.4.0.16579.c3152d32c9c_arm64.tgz -O openvino_2024.4.0.tgz - tar -xf openvino_2024.4.0.tgz - sudo mv l_openvino_toolkit_ubuntu20_2024.4.0.16579.c3152d32c9c_arm64 /opt/intel/openvino_2024.4.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/linux/l_openvino_toolkit_ubuntu20_2024.5.0.17288.7975fa5da0c_arm64.tgz -O openvino_2024.5.0.tgz + tar -xf openvino_2024.5.0.tgz + sudo mv l_openvino_toolkit_ubuntu20_2024.5.0.17288.7975fa5da0c_arm64 /opt/intel/openvino_2024.5.0 .. tab-item:: ARM 32-bit :sync: arm-32 .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.4/linux/l_openvino_toolkit_debian10_2024.4.0.16579.c3152d32c9c_armhf.tgz -O openvino_2024.4.0.tgz - tar -xf openvino_2024.4.0.tgz - sudo mv l_openvino_toolkit_debian10_2024.4.0.16579.c3152d32c9c_armhf /opt/intel/openvino_2024.4.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/linux/l_openvino_toolkit_debian10_2024.5.0.17288.7975fa5da0c_armhf.tgz -O openvino_2024.5.0.tgz + tar -xf openvino_2024.5.0.tgz + sudo mv l_openvino_toolkit_debian10_2024.5.0.17288.7975fa5da0c_armhf /opt/intel/openvino_2024.5.0 5. Install required system dependencies on Linux. To do this, OpenVINO provides a script in the extracted installation directory. Run the following command: .. code-block:: sh - cd /opt/intel/openvino_2024.4.0 + cd /opt/intel/openvino_2024.5.0 sudo -E ./install_dependencies/install_openvino_dependencies.sh 6. (Optional) Install *numpy* Python Library: @@ -149,11 +149,11 @@ Step 1: Download and Install the OpenVINO Core Components This step is required only when you decide to use Python API. - You can use the ``requirements.txt`` file from the ``/opt/intel/openvino_2024.4.0/python`` folder: + You can use the ``requirements.txt`` file from the ``/opt/intel/openvino_2024.5.0/python`` folder: .. code-block:: sh - cd /opt/intel/openvino_2024.4.0 + cd /opt/intel/openvino_2024.5.0 python3 -m pip install -r ./python/requirements.txt 7. For simplicity, it is useful to create a symbolic link as below: @@ -162,7 +162,7 @@ Step 1: Download and Install the OpenVINO Core Components cd /opt/intel - sudo ln -s openvino_2024.4.0 openvino_2024 + sudo ln -s openvino_2024.5.0 openvino_2024 .. note:: If you have already installed a previous release of OpenVINO 2024, a symbolic link to the ``openvino_2024`` folder may already exist. diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst b/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst index e9157a99e1c882..e4bff378106122 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst @@ -47,7 +47,7 @@ Step 1: Install OpenVINO Core Components cd /Downloads -4. Download the `OpenVINO Runtime archive file for macOS `__, extract the files, rename the extracted folder and move it to the desired path: +4. Download the `OpenVINO Runtime archive file for macOS `__, extract the files, rename the extracted folder and move it to the desired path: .. tab-set:: @@ -57,9 +57,9 @@ Step 1: Install OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.4/macos/m_openvino_toolkit_macos_12_6_2024.4.0.16579.c3152d32c9c_x86_64.tgz --output openvino_2024.4.0.tgz - tar -xf openvino_2024.4.0.tgz - sudo mv m_openvino_toolkit_macos_12_6_2024.4.0.16579.c3152d32c9c_x86_64 /opt/intel/openvino_2024.4.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/macos/m_openvino_toolkit_macos_12_6_2024.5.0.17288.7975fa5da0c_x86_64.tgz --output openvino_2024.5.0.tgz + tar -xf openvino_2024.5.0.tgz + sudo mv m_openvino_toolkit_macos_12_6_2024.5.0.17288.7975fa5da0c_x86_64 /opt/intel/openvino_2024.5.0 .. tab-item:: ARM, 64-bit :sync: arm-64 @@ -67,9 +67,9 @@ Step 1: Install OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.4/macos/m_openvino_toolkit_macos_12_6_2024.4.0.16579.c3152d32c9c_arm64.tgz --output openvino_2024.4.0.tgz - tar -xf openvino_2024.4.0.tgz - sudo mv m_openvino_toolkit_macos_12_6_2024.4.0.16579.c3152d32c9c_arm64 /opt/intel/openvino_2024.4.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/macos/m_openvino_toolkit_macos_12_6_2024.5.0.17288.7975fa5da0c_arm64.tgz --output openvino_2024.5.0.tgz + tar -xf openvino_2024.5.0.tgz + sudo mv m_openvino_toolkit_macos_12_6_2024.5.0.17288.7975fa5da0c_arm64 /opt/intel/openvino_2024.5.0 5. (Optional) Install *numpy* Python Library: @@ -78,11 +78,11 @@ Step 1: Install OpenVINO Core Components This step is required only when you decide to use Python API. - You can use the ``requirements.txt`` file from the ``/opt/intel/openvino_2024.4.0/python`` folder: + You can use the ``requirements.txt`` file from the ``/opt/intel/openvino_2024.5.0/python`` folder: .. code-block:: sh - cd /opt/intel/openvino_2024.4.0 + cd /opt/intel/openvino_2024.5.0 python3 -m pip install -r ./python/requirements.txt 6. For simplicity, it is useful to create a symbolic link as below: @@ -90,7 +90,7 @@ Step 1: Install OpenVINO Core Components .. code-block:: sh - sudo ln -s /opt/intel/openvino_2024.4.0 /opt/intel/openvino_2024 + sudo ln -s /opt/intel/openvino_2024.5.0 /opt/intel/openvino_2024 .. note:: diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst b/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst index 8f3efeeb720dc9..9db280ec81472e 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst @@ -41,18 +41,18 @@ Step 1: Download and Install OpenVINO Core Components ``C:\Program Files (x86)\Intel`` is the recommended folder. You may also use a different path if desired or if you don't have administrator privileges on your computer. -2. Download the `OpenVINO Runtime archive file for Windows `__ to your local ``Downloads`` folder. +2. Download the `OpenVINO Runtime archive file for Windows `__ to your local ``Downloads`` folder. If you prefer using command-lines, run the following commands in the command prompt window you opened: .. code-block:: sh cd /Downloads - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.4/windows/w_openvino_toolkit_windows_2024.4.0.16579.c3152d32c9c_x86_64.zip --output openvino_2024.4.0.zip + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/windows/w_openvino_toolkit_windows_2024.5.0.17288.7975fa5da0c_x86_64.zip --output openvino_2024.5.0.zip .. note:: - A ``.sha256`` file is provided together with the archive file to validate your download process. To do that, download the ``.sha256`` file from the same repository and run ``CertUtil -hashfile openvino_2024.4.0.zip SHA256``. Compare the returned value in the output with what's in the ``.sha256`` file: if the values are the same, you have downloaded the correct file successfully; if not, create a Support ticket `here `__. + A ``.sha256`` file is provided together with the archive file to validate your download process. To do that, download the ``.sha256`` file from the same repository and run ``CertUtil -hashfile openvino_2024.5.0.zip SHA256``. Compare the returned value in the output with what's in the ``.sha256`` file: if the values are the same, you have downloaded the correct file successfully; if not, create a Support ticket `here `__. 3. Use your favorite tool to extract the archive file, rename the extracted folder, and move it to the ``C:\Program Files (x86)\Intel`` directory. @@ -61,9 +61,9 @@ Step 1: Download and Install OpenVINO Core Components .. code-block:: sh - tar -xf openvino_2024.4.0.zip - ren w_openvino_toolkit_windows_2024.4.0.16579.c3152d32c9c_x86_64 openvino_2024.4.0 - move openvino_2024.4.0 "C:\Program Files (x86)\Intel" + tar -xf openvino_2024.5.0.zip + ren w_openvino_toolkit_windows_2024.5.0.17288.7975fa5da0c_x86_64 openvino_2024.5.0 + move openvino_2024.5.0 "C:\Program Files (x86)\Intel" 4. (Optional) Install *numpy* Python Library: @@ -72,11 +72,11 @@ Step 1: Download and Install OpenVINO Core Components This step is required only when you decide to use Python API. - You can use the ``requirements.txt`` file from the ``C:\Program Files (x86)\Intel\openvino_2024.4.0\python`` folder: + You can use the ``requirements.txt`` file from the ``C:\Program Files (x86)\Intel\openvino_2024.5.0\python`` folder: .. code-block:: sh - cd "C:\Program Files (x86)\Intel\openvino_2024.4.0" + cd "C:\Program Files (x86)\Intel\openvino_2024.5.0" python -m pip install -r .\python\requirements.txt @@ -85,7 +85,7 @@ Step 1: Download and Install OpenVINO Core Components .. code-block:: sh cd C:\Program Files (x86)\Intel - mklink /D openvino_2024 openvino_2024.4.0 + mklink /D openvino_2024 openvino_2024.5.0 .. note:: diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-genai.rst b/docs/articles_en/get-started/install-openvino/install-openvino-genai.rst index 34f04669e50d63..a10b0d0c7bbce4 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-genai.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-genai.rst @@ -47,24 +47,24 @@ Linux .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.4/linux/openvino_genai_ubuntu24_2024.4.0.0_x86_64.tar.gz --output openvino_genai_2024.4.0.0.tgz - tar -xf openvino_genai_2024.4.0.0.tgz + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.5/linux/openvino_genai_ubuntu24_2024.5.0.0_x86_64.tar.gz --output openvino_genai_2024.5.0.0.tgz + tar -xf openvino_genai_2024.5.0.0.tgz .. tab-item:: Ubuntu 22.04 :sync: ubuntu-22 .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.4/linux/openvino_genai_ubuntu22_2024.4.0.0_x86_64.tar.gz --output openvino_genai_2024.4.0.0.tgz - tar -xf openvino_genai_2024.4.0.0.tgz + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.5/linux/openvino_genai_ubuntu22_2024.5.0.0_x86_64.tar.gz --output openvino_genai_2024.5.0.0.tgz + tar -xf openvino_genai_2024.5.0.0.tgz .. tab-item:: Ubuntu 20.04 :sync: ubuntu-20 .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.4/linux/openvino_genai_ubuntu20_2024.4.0.0_x86_64.tar.gz --output openvino_genai_2024.4.0.0.tgz - tar -xf openvino_genai_2024.4.0.0.tgz + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.5/linux/openvino_genai_ubuntu20_2024.5.0.0_x86_64.tar.gz --output openvino_genai_2024.5.0.0.tgz + tar -xf openvino_genai_2024.5.0.0.tgz .. tab-item:: ARM 64-bit @@ -72,8 +72,8 @@ Linux .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.4/linux/openvino_genai_ubuntu20_2024.4.0.0_arm64.tar.gz -O openvino_genai_2024.4.0.0.tgz - tar -xf openvino_genai_2024.4.0.0.tgz + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.5/linux/openvino_genai_ubuntu20_2024.5.0.0_arm64.tar.gz -O openvino_genai_2024.5.0.0.tgz + tar -xf openvino_genai_2024.5.0.0.tgz Windows @@ -82,7 +82,7 @@ Windows .. code-block:: sh cd /Downloads - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.4/windows/openvino_genai_windows_2024.4.0.0_x86_64.zip --output openvino_genai_2024.4.0.0.zip + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.5/windows/openvino_genai_windows_2024.5.0.0_x86_64.zip --output openvino_genai_2024.5.0.0.zip macOS ++++++++++++++++++++++++++ @@ -94,16 +94,16 @@ macOS .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.4/macos/openvino_genai_macos_12_6_2024.4.0.0_x86_64.tar.gz --output openvino_genai_2024.4.0.0.tgz - tar -xf openvino_genai_2024.4.0.0.tgz + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.5/macos/openvino_genai_macos_12_6_2024.5.0.0_x86_64.tar.gz --output openvino_genai_2024.5.0.0.tgz + tar -xf openvino_genai_2024.5.0.0.tgz .. tab-item:: ARM, 64-bit :sync: arm-64 .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.4/macos/openvino_genai_macos_12_6_2024.4.0.0_arm64.tar.gz --output openvino_genai_2024.4.0.0.tgz - tar -xf openvino_genai_2024.4.0.0.tgz + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.5/macos/openvino_genai_macos_12_6_2024.5.0.0_arm64.tar.gz --output openvino_genai_2024.5.0.0.tgz + tar -xf openvino_genai_2024.5.0.0.tgz Here are the full guides: diff --git a/docs/articles_en/learn-openvino.rst b/docs/articles_en/learn-openvino.rst index 4fca64051003a7..98797c9c67c126 100644 --- a/docs/articles_en/learn-openvino.rst +++ b/docs/articles_en/learn-openvino.rst @@ -14,7 +14,7 @@ Learn OpenVINO Interactive Tutorials (Python) Sample Applications (Python & C++) - Large Language Model Inference Guide + Generative AI workflow @@ -29,5 +29,5 @@ as well as an experienced user. | :doc:`OpenVINO Samples ` | The OpenVINO samples (Python and C++) are simple console applications that show how to use specific OpenVINO API features. They can assist you in executing tasks such as loading a model, running inference, querying particular device capabilities, etc. -| :doc:`Large Language Models in OpenVINO ` +| :doc:`Generative AI workflow ` | Detailed information on how OpenVINO accelerates Generative AI use cases and what models it supports. This tutorial provides instructions for running Generative AI models using Hugging Face Optimum Intel and Native OpenVINO APIs. diff --git a/docs/articles_en/learn-openvino/llm_inference_guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide.rst index 36c001c015f744..bfc4f9b4c49173 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide.rst @@ -1,140 +1,106 @@ -Large Language Model Inference Guide +Generative AI workflow ======================================== .. meta:: - :description: Explore learning materials, including interactive - Python tutorials and sample console applications that explain - how to use OpenVINO features. + :description: learn how to use OpenVINO to run generative AI models. .. toctree:: :maxdepth: 1 :hidden: - Run LLMs with Optimum Intel - Run LLMs on OpenVINO GenAI Flavor - Run LLMs on Base OpenVINO + Inference with OpenVINO GenAI + Inference with Optimum Intel + Generative AI with Base OpenVINO (not recommended) OpenVINO Tokenizers -Large Language Models (LLMs) like GPT are transformative deep learning networks capable of a -broad range of natural language tasks, from text generation to language translation. OpenVINO -optimizes the deployment of these models, enhancing their performance and integration into -various applications. This guide shows how to use LLMs with OpenVINO, from model loading and -conversion to advanced use cases. + + +Generative AI is a specific area of Deep Learning models used for producing new and “original” +data, based on input in the form of image, sound, or natural language text. Due to their +complexity and size, generative AI pipelines are more difficult to deploy and run efficiently. +OpenVINO simplifies the process and ensures high-performance integrations, with the following +options: + +.. tab-set:: + + .. tab-item:: OpenVINO GenAI + + | - Suggested for production deployment for the supported use cases. + | - Smaller footprint and fewer dependencies. + | - More optimization and customization options. + | - Available in both Python and C++. + | - A limited set of supported use cases. + + :doc:`Install the OpenVINO GenAI package <../get-started/install-openvino/install-openvino-genai>` + and run generative models out of the box. With custom + API and tokenizers, among other components, it manages the essential tasks such as the + text generation loop, tokenization, and scheduling, offering ease of use and high + performance. + + .. tab-item:: Hugging Face integration + + | - Suggested for prototyping and, if the use case is not covered by OpenVINO GenAI, production. + | - Bigger footprint and more dependencies. + | - Limited customization due to Hugging Face dependency. + | - Not usable for C++ applications. + | - A very wide range of supported models. + + Using Optimum Intel is a great way to experiment with different models and scenarios, + thanks to a simple interface for the popular API and infrastructure offered by Hugging Face. + It also enables weight compression with + `Neural Network Compression Framework (NNCF) `__, + as well as conversion on the fly. For integration with the final product it may offer + lower performance, though. + +`Check out the GenAI Quick-start Guide [PDF] `__ The advantages of using OpenVINO for LLM deployment: -* **OpenVINO offers optimized LLM inference**: - provides a full C/C++ API, leading to faster operation than Python-based runtimes; includes a - Python API for rapid development, with the option for further optimization in C++. -* **Compatible with diverse hardware**: - supports CPUs, GPUs, and neural accelerators across ARM and x86/x64 architectures, integrated - Intel® Processor Graphics, discrete Intel® Arc™ A-Series Graphics, and discrete Intel® Data - Center GPU Flex Series; features automated optimization to maximize performance on target - hardware. -* **Requires fewer dependencies**: - than frameworks like Hugging Face and PyTorch, resulting in a smaller binary size and reduced - memory footprint, making deployments easier and updates more manageable. -* **Provides compression and precision management techniques**: - such as 8-bit and 4-bit weight compression, including embedding layers, and storage format - reduction. This includes fp16 precision for non-compressed models and int8/int4 for compressed - models, like GPTQ models from `Hugging Face `__. -* **Supports a wide range of deep learning models and architectures**: - including text, image, and audio generative models like Llama 2, MPT, OPT, Stable Diffusion, - Stable Diffusion XL. This enables the development of multimodal applications, allowing for - write-once, deploy-anywhere capabilities. -* **Enhances inference capabilities**: - fused inference primitives such as Scaled Dot Product Attention, Rotary Positional Embedding, - Group Query Attention, and Mixture of Experts. It also offers advanced features like in-place - KV-cache, dynamic quantization, KV-cache quantization and encapsulation, dynamic beam size - configuration, and speculative sampling. -* **Provides stateful model optimization**: - models from the Hugging Face Transformers are converted into a stateful form, optimizing - inference performance and memory usage in long-running text generation tasks by managing past - KV-cache tensors more efficiently internally. This feature is automatically activated for many - supported models, while unsupported ones remain stateless. Learn more about the - :doc:`Stateful models and State API <../openvino-workflow/running-inference/stateful-models>`. - -OpenVINO offers three main paths for Generative AI use cases: - -* **Hugging Face**: use OpenVINO as a backend for Hugging Face frameworks (transformers, - diffusers) through the `Optimum Intel `__ - extension. -* **OpenVINO GenAI Flavor**: use OpenVINO GenAI APIs (Python and C++). -* **Base OpenVINO**: use OpenVINO native APIs (Python and C++) with - `custom pipeline code `__. - -In both cases, the OpenVINO runtime is used for inference, and OpenVINO tools are used for -optimization. The main differences are in footprint size, ease of use, and customizability. - -The Hugging Face API is easy to learn, provides a simple interface and hides the complexity of -model initialization and text generation for a better developer experience. However, it has more -dependencies, less customization, and cannot be ported to C/C++. - -The OpenVINO GenAI Flavor reduces the complexity of LLMs implementation by -automatically managing essential tasks like the text generation loop, tokenization, -and scheduling. The Native OpenVINO API provides a more hands-on experience, -requiring manual setup of these functions. Both methods are designed to minimize dependencies -and the overall application footprint and enable the use of generative models in C++ applications. - -It is recommended to start with Hugging Face frameworks to experiment with different models and -scenarios. Then the model can be used with OpenVINO APIs if it needs to be optimized -further. Optimum Intel provides interfaces that enable model optimization (weight compression) -using `Neural Network Compression Framework (NNCF) `__, -and export models to the OpenVINO model format for use in native API applications. - -Proceed to run LLMs with: +.. dropdown:: Fewer dependencies and smaller footprint + :animate: fade-in-slide-down + :color: secondary + + Less bloated than frameworks such as Hugging Face and PyTorch, with a smaller binary size and reduced + memory footprint, makes deployments easier and updates more manageable. + +.. dropdown:: Compression and precision management + :animate: fade-in-slide-down + :color: secondary + + Techniques such as 8-bit and 4-bit weight compression, including embedding layers, and storage + format reduction. This includes fp16 precision for non-compressed models and int8/int4 for + compressed models, like GPTQ models from `Hugging Face `__. + +.. dropdown:: Enhanced inference capabilities + :animate: fade-in-slide-down + :color: secondary + + Advanced features like in-place KV-cache, dynamic quantization, KV-cache quantization and + encapsulation, dynamic beam size configuration, and speculative sampling, and more are + available. + +.. dropdown:: Stateful model optimization + :animate: fade-in-slide-down + :color: secondary + + Models from the Hugging Face Transformers are converted into a stateful form, optimizing + inference performance and memory usage in long-running text generation tasks by managing past + KV-cache tensors more efficiently internally. This feature is automatically activated for + many supported models, while unsupported ones remain stateless. Learn more about the + :doc:`Stateful models and State API <../openvino-workflow/running-inference/stateful-models>`. + +.. dropdown:: Optimized LLM inference + :animate: fade-in-slide-down + :color: secondary + + Includes a Python API for rapid development and C++ for further optimization, offering + better performance than Python-based runtimes. + + +Proceed to guides on: -* :doc:`Hugging Face and Optimum Intel <./llm_inference_guide/llm-inference-hf>` * :doc:`OpenVINO GenAI Flavor <./llm_inference_guide/genai-guide>` -* :doc:`Native OpenVINO API <./llm_inference_guide/llm-inference-native-ov>` - -The table below summarizes the differences between Hugging Face and the native OpenVINO API -approaches. - -.. dropdown:: Differences between Hugging Face and the native OpenVINO API - - .. list-table:: - :widths: 20 25 55 - :header-rows: 1 - - * - - - Hugging Face through OpenVINO - - OpenVINO Native API - * - Model support - - Supports transformer-based models such as LLMs - - Supports all model architectures from most frameworks - * - APIs - - Python (Hugging Face API) - - Python, C++ (OpenVINO API) - * - Model Format - - Source Framework / OpenVINO - - Source Framework / OpenVINO - * - Inference code - - Hugging Face based - - Custom inference pipelines - * - Additional dependencies - - Many Hugging Face dependencies - - Lightweight (e.g. numpy, etc.) - * - Application footprint - - Large - - Small - * - Pre/post-processing and glue code - - Provided through high-level Hugging Face APIs - - Must be custom implemented (see OpenVINO samples and notebooks) - * - Performance - - Good, but less efficient compared to native APIs - - Inherent speed advantage with C++, but requires hands-on optimization - * - Flexibility - - Constrained to Hugging Face API - - High flexibility with Python and C++; allows custom coding - * - Learning Curve and Effort - - Lower learning curve; quick to integrate - - Higher learning curve; requires more effort in integration - * - Ideal Use Case - - Ideal for quick prototyping and Python-centric projects - - Best suited for high-performance, resource-optimized production environments - * - Model Serving - - Paid service, based on CPU/GPU usage with Hugging Face - - Free code solution, run script for own server; costs may incur for cloud services - like AWS but generally cheaper than Hugging Face rates +* :doc:`Hugging Face and Optimum Intel <./llm_inference_guide/llm-inference-hf>` + + diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide-npu.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide-npu.rst index 4585ca97488023..d725b306d57908 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide-npu.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide-npu.rst @@ -1,4 +1,4 @@ -Run LLMs with OpenVINO GenAI Flavor on NPU +Inference with OpenVINO GenAI ========================================== .. meta:: @@ -9,7 +9,7 @@ This guide will give you extra details on how to utilize NPU with the GenAI flav for information on how to start. Prerequisites -############# +##################### Install required dependencies: @@ -17,35 +17,97 @@ Install required dependencies: python -m venv npu-env npu-env\Scripts\activate - pip install optimum-intel nncf==2.11 onnx==1.16.1 + pip install nncf==2.12 onnx==1.16.1 optimum-intel==1.19.0 pip install --pre openvino openvino-tokenizers openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly +Note that for systems based on Intel® Core™ Ultra Processors Series 2, more than 16GB of RAM +may be required to run prompts over 1024 tokens on models exceeding 7B parameters, +such as Llama-2-7B, Mistral-0.2-7B, and Qwen-2-7B. + Export an LLM model via Hugging Face Optimum-Intel ################################################## -A chat-tuned TinyLlama model is used in this example. The following conversion & optimization -settings are recommended when using the NPU: +Since **symmetrically-quantized 4-bit (INT4) models are preffered for inference on NPU**, make +sure to export the model with the proper conversion and optimization settings. + +| You may export LLMs via Optimum-Intel, using one of two compression methods: +| **group quantization** - for both smaller and larger models, +| **channel-wise quantization** - remarkably effective but for models exceeding 1 billion parameters. + +You select one of the methods by setting the ``--group-size`` parameter to either ``128`` or +``-1``, respectively. See the following examples: + +.. tab-set:: + + .. tab-item:: Group quantization + + .. code-block:: console + :name: group-quant + + optimum-cli export openvino -m TinyLlama/TinyLlama-1.1B-Chat-v1.0 --weight-format int4 --sym --ratio 1.0 --group_size 128 TinyLlama-1.1B-Chat-v1.0 + + .. tab-item:: Channel-wise quantization + + .. tab-set:: -.. code-block:: python + .. tab-item:: Data-free quantization - optimum-cli export openvino -m TinyLlama/TinyLlama-1.1B-Chat-v1.0 --weight-format int4 --sym --group-size 128 --ratio 1.0 TinyLlama -**For models exceeding 1 billion parameters**, it is recommended to use **channel-wise -quantization** that is remarkably effective. For example, you can try the approach with the -llama-2-7b-chat-hf model: + .. code-block:: console + :name: channel-wise-data-free-quant -.. code-block:: python + optimum-cli export openvino -m meta-llama/Llama-2-7b-chat-hf --weight-format int4 --sym --ratio 1.0 --group-size -1 Llama-2-7b-chat-hf - optimum-cli export openvino -m meta-llama/Llama-2-7b-chat-hf --weight-format int4 --sym --group-size -1 --ratio 1.0 Llama-2-7b-chat-hf + .. tab-item:: Data-aware quantization + + If you want to improve accuracy, make sure you: + + 1. Update NNCF: ``pip install nncf==2.13`` + 2. Use ``--scale_estimation --dataset=`` and accuracy aware quantization ``--awq``: + + .. code-block:: console + :name: channel-wise-data-aware-quant + + optimum-cli export openvino -m meta-llama/Llama-2-7b-chat-hf --weight-format int4 --sym --group-size -1 --ratio 1.0 --awq --scale-estimation --dataset=wikitext2 Llama-2-7b-chat-hf + + + .. important:: + + Remember that the negative value of ``-1`` is required here, not ``1``. + + + +You can also try using 4-bit (INT4) +`GPTQ models `__, +which do not require specifying quantization parameters: + +.. code-block:: console + + optimum-cli export openvino -m TheBloke/Llama-2-7B-Chat-GPTQ + + +| Remember, NPU supports GenAI models quantized symmetrically to INT4. +| Below is a list of such models: + +* meta-llama/Meta-Llama-3-8B-Instruct +* microsoft/Phi-3-mini-4k-instruct +* Qwen/Qwen2-7B +* mistralai/Mistral-7B-Instruct-v0.2 +* openbmb/MiniCPM-1B-sft-bf16 +* TinyLlama/TinyLlama-1.1B-Chat-v1.0 +* TheBloke/Llama-2-7B-Chat-GPTQ +* Qwen/Qwen2-7B-Instruct-GPTQ-Int4 Run generation using OpenVINO GenAI ################################### -It is recommended to install the latest available +It is typically recommended to install the latest available `driver `__. -Use the following code snippet to perform generation with OpenVINO GenAI API: +Use the following code snippet to perform generation with OpenVINO GenAI API. +Note that **currently, the NPU pipeline supports greedy decoding only**. This means that +you need to add ``do_sample=False`` **to the** ``generate()`` **method:** .. tab-set:: @@ -53,26 +115,31 @@ Use the following code snippet to perform generation with OpenVINO GenAI API: :sync: py .. code-block:: python + :emphasize-lines: 4 import openvino_genai as ov_genai model_path = "TinyLlama" pipe = ov_genai.LLMPipeline(model_path, "NPU") - print(pipe.generate("The Sun is yellow because", max_new_tokens=100)) + print(pipe.generate("The Sun is yellow because", max_new_tokens=100, do_sample=False)) .. tab-item:: C++ :sync: cpp .. code-block:: cpp + :emphasize-lines: 7, 9 #include "openvino/genai/llm_pipeline.hpp" #include int main(int argc, char* argv[]) { std::string model_path = "TinyLlama"; - ov::genai::LLMPipeline pipe(model_path, "NPU"); - std::cout << pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(100)); + ov::genai::GenerationConfig config; + config.do_sample=false; + config.max_new_tokens=100; + std::cout << pipe.generate("The Sun is yellow because", config); } + Additional configuration options ################################ @@ -88,9 +155,9 @@ user explicitly sets a lower length limit for the response. You may configure both the 'maximum input prompt length' and 'minimum response length' using the following parameters: -* ``MAX_PROMPT_LEN``: Defines the maximum number of tokens that the LLM pipeline can process - for the input prompt (default: 1024). -* ``MIN_RESPONSE_LEN``: Defines the minimum number of tokens that the LLM pipeline will generate +* ``MAX_PROMPT_LEN`` - defines the maximum number of tokens that the LLM pipeline can process + for the input prompt (default: 1024), +* ``MIN_RESPONSE_LEN`` - defines the minimum number of tokens that the LLM pipeline will generate in its response (default: 150). Use the following code snippet to change the default settings: @@ -113,10 +180,93 @@ Use the following code snippet to change the default settings: ov::AnyMap pipeline_config = { { "MAX_PROMPT_LEN", 1024 }, { "MIN_RESPONSE_LEN", 512 } }; ov::genai::LLMPipeline pipe(model_path, "NPU", pipeline_config); +Cache compiled models ++++++++++++++++++++++ + +Specify the ``NPUW_CACHE_DIR`` option in ``pipeline_config`` for NPU pipeline to +cache the compiled models. Using the code snippet below shortens the initialization time +of the pipeline runs coming next: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: python + + pipeline_config = { "NPUW_CACHE_DIR": ".npucache" } + pipe = ov_genai.LLMPipeline(model_path, "NPU", pipeline_config) + + .. tab-item:: C++ + :sync: cpp + + .. code-block:: cpp + + ov::AnyMap pipeline_config = { { "NPUW_CACHE_DIR", ".npucache" } }; + ov::genai::LLMPipeline pipe(model_path, "NPU", pipeline_config); + + +Disable memory allocation ++++++++++++++++++++++++++ + +In case of execution failures, either silent or with errors, try to update the NPU driver to +`32.0.100.3104 or newer `__. +If the update is not possible, set the ``DISABLE_OPENVINO_GENAI_NPU_L0`` +environment variable to disable NPU memory allocation, which might be supported +only on newer drivers for Intel Core Ultra 200V processors. + +Set the environment variable in a terminal: + +.. tab-set:: + + .. tab-item:: Linux + :sync: linux + + .. code-block:: console + + export DISABLE_OPENVINO_GENAI_NPU_L0=1 + + .. tab-item:: Windows + :sync: win + + .. code-block:: console + + set DISABLE_OPENVINO_GENAI_NPU_L0=1 + + +Performance modes ++++++++++++++++++++++ + +You can configure the NPU pipeline with the ``GENERATE_HINT`` option to switch +between two different performance modes: + +* ``FAST_COMPILE`` (default) - enables fast compilation at the expense of performance, +* ``BEST_PERF`` - ensures best possible performance at lower compilation speed. + +Use the following code snippet: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: python + + pipeline_config = { "GENERATE_HINT": "BEST_PERF" } + pipe = ov_genai.LLMPipeline(model_path, "NPU", pipeline_config) + + .. tab-item:: C++ + :sync: cpp + + .. code-block:: cpp + + ov::AnyMap pipeline_config = { { "GENERATE_HINT", "BEST_PERF" } }; + ov::genai::LLMPipeline pipe(model_path, "NPU", pipeline_config); + Additional Resources #################### * :doc:`NPU Device <../../openvino-workflow/running-inference/inference-devices-and-modes/npu-device>` * `OpenVINO GenAI Repo `__ -* `Neural Network Compression Framework `__ \ No newline at end of file +* `Neural Network Compression Framework `__ diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst index ebd4667d544616..16290b17eca323 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst @@ -1,4 +1,4 @@ -Run LLM Inference on OpenVINO with the GenAI Flavor +Inference with OpenVINO GenAI =============================================================================================== .. meta:: @@ -9,39 +9,333 @@ Run LLM Inference on OpenVINO with the GenAI Flavor :hidden: NPU inference of LLMs - genai-guide/genai-use-cases -This guide will show you how to integrate the OpenVINO GenAI flavor into your application, covering -loading a model and passing the input context to receive generated text. Note that the vanilla flavor of OpenVINO -will not work with these instructions, make sure to -:doc:`install OpenVINO GenAI <../../get-started/install-openvino/install-openvino-genai>`. +OpenVINO™ GenAI is a library of pipelines and methods, extending the OpenVINO runtime to work +with generative AI models more efficiently. This article provides reference code and guidance +on its usage. Note that the base OpenVINO version will not work with these instructions, +make sure to :doc:`install OpenVINO with GenAI <../../get-started/install-openvino/install-openvino-genai>`. -.. note:: +.. image:: ../../assets/images/genai_main_diagram.svg + :align: center + :width: 500 + :alt: OpenVINO workflow diagram for convenience - The examples use the CPU as the target device, however, the GPU is also supported. - Note that for the LLM pipeline, the GPU is used only for inference, while token selection, tokenization, and - detokenization remain on the CPU, for efficiency. Tokenizers are represented as a separate model and also run - on the CPU. -1. Export an LLM model via Hugging Face Optimum-Intel. A chat-tuned TinyLlama model is used in this example: +| Here is sample code for several Generative AI use case scenarios. Note that these are very basic + examples and may need adjustments for your specific needs, like changing the inference device. +| For a more extensive instruction and additional options, see the + `step-by-step chat-bot guide <#chat-bot-use-case-step-by-step>`__ below. - .. code-block:: python +.. dropdown:: Text-to-Image Generation - optimum-cli export openvino --model "TinyLlama/TinyLlama-1.1B-Chat-v1.0" --weight-format fp16 --trust-remote-code "TinyLlama-1.1B-Chat-v1.0" + .. tab-set:: + + .. tab-item:: Python + :sync: python + + .. tab-set:: + + .. tab-item:: main.py + :name: mainpy + + .. code-block:: python + + import openvino_genai + from PIL import Image + import numpy as np + + class Generator(openvino_genai.Generator): + def __init__(self, seed, mu=0.0, sigma=1.0): + openvino_genai.Generator.__init__(self) + np.random.seed(seed) + self.mu = mu + self.sigma = sigma + + def next(self): + return np.random.normal(self.mu, self.sigma) + + + def infer(model_dir: str, prompt: str): + device = 'CPU' # GPU can be used as well + random_generator = Generator(42) + pipe = openvino_genai.Text2ImagePipeline(model_dir, device) + image_tensor = pipe.generate( + prompt, + width=512, + height=512, + num_inference_steps=20, + num_images_per_prompt=1, + random_generator=random_generator + ) + + image = Image.fromarray(image_tensor.data[0]) + image.save("image.bmp") + + .. tab-item:: LoRA.py + :name: lorapy + + .. code-block:: python - *Optional*. Optimize the model: + import openvino as ov + import openvino_genai + import numpy as np + import sys - The model is an optimized OpenVINO IR with FP16 precision. For enhanced LLM performance, - it is recommended to use lower precision for model weights, such as INT4, and to compress weights - using NNCF during model export directly: - .. code-block:: python + class Generator(openvino_genai.Generator): + def __init__(self, seed, mu=0.0, sigma=1.0): + openvino_genai.Generator.__init__(self) + np.random.seed(seed) + self.mu = mu + self.sigma = sigma + + def next(self): + return np.random.normal(self.mu, self.sigma) + + + def image_write(path: str, image_tensor: ov.Tensor): + from PIL import Image + image = Image.fromarray(image_tensor.data[0]) + image.save(path) + + + def infer(models_path: str, prompt: str): + prompt = "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting" + + device = "CPU" # GPU, NPU can be used as well + adapter_config = openvino_genai.AdapterConfig() + + for i in range(int(len(adapters) / 2)): + adapter = openvino_genai.Adapter(adapters[2 * i]) + alpha = float(adapters[2 * i + 1]) + adapter_config.add(adapter, alpha) + + pipe = openvino_genai.Text2ImagePipeline(models_path, device, adapters=adapter_config) + print("Generating image with LoRA adapters applied, resulting image will be in lora.bmp") + image = pipe.generate(prompt, + random_generator=Generator(42), + width=512, + height=896, + num_inference_steps=20) + + image_write("lora.bmp", image) + print("Generating image without LoRA adapters applied, resulting image will be in baseline.bmp") + image = pipe.generate(prompt, + adapters=openvino_genai.AdapterConfig(), + random_generator=Generator(42), + width=512, + height=896, + num_inference_steps=20 + ) + image_write("baseline.bmp", image) + + For more information, refer to the + `Python sample `__ + + .. tab-item:: C++ + :sync: cpp + + .. tab-set:: - optimum-cli export openvino --model "TinyLlama/TinyLlama-1.1B-Chat-v1.0" --weight-format int4 --trust-remote-code "TinyLlama-1.1B-Chat-v1.0" + .. tab-item:: main.cpp + :name: maincpp + .. code-block:: cpp -2. Perform generation using the new GenAI API: + #include "openvino/genai/text2image/pipeline.hpp" + + #include "imwrite.hpp" + + int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc == 3, "Usage: ", argv[0], " ''"); + + const std::string models_path = argv[1], prompt = argv[2]; + const std::string device = "CPU"; // GPU, NPU can be used as well + + ov::genai::Text2ImagePipeline pipe(models_path, device); + ov::Tensor image = pipe.generate(prompt, + ov::genai::width(512), + ov::genai::height(512), + ov::genai::num_inference_steps(20), + ov::genai::num_images_per_prompt(1)); + + imwrite("image_%d.bmp", image, true); + + return EXIT_SUCCESS; + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } + + .. tab-item:: LoRA.cpp + :name: loracpp + + .. code-block:: cpp + + #include "openvino/genai/text2image/pipeline.hpp" + + #include "imwrite.hpp" + + int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " '' [ ...]]"); + + const std::string models_path = argv[1], prompt = argv[2]; + const std::string device = "CPU"; // GPU, NPU can be used as well + + ov::genai::AdapterConfig adapter_config; + for(size_t i = 0; i < (argc - 3)/2; ++i) { + ov::genai::Adapter adapter(argv[3 + 2*i]); + float alpha = std::atof(argv[3 + 2*i + 1]); + adapter_config.add(adapter, alpha); + } + + ov::genai::Text2ImagePipeline pipe(models_path, device, ov::genai::adapters(adapter_config)); + + std::cout << "Generating image with LoRA adapters applied, resulting image will be in lora.bmp\n"; + ov::Tensor image = pipe.generate(prompt, + ov::genai::random_generator(std::make_shared(42)), + ov::genai::width(512), + ov::genai::height(896), + ov::genai::num_inference_steps(20)); + imwrite("lora.bmp", image, true); + + std::cout << "Generating image without LoRA adapters applied, resulting image will be in baseline.bmp\n"; + image = pipe.generate(prompt, + ov::genai::adapters(), + ov::genai::random_generator(std::make_shared(42)), + ov::genai::width(512), + ov::genai::height(896), + ov::genai::num_inference_steps(20)); + imwrite("baseline.bmp", image, true); + + return EXIT_SUCCESS; + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } + + For more information, refer to the + `C++ sample `__ + + +.. dropdown:: Speech Recognition + + The application performs inference on speech recognition Whisper Models. The samples include + the ``WhisperPipeline`` class and use audio files in WAV format at a sampling rate of 16 kHz + as input. + + .. tab-set:: + + .. tab-item:: Python + :sync: cpp + + .. code-block:: python + + import openvino_genai + import librosa + + + def read_wav(filepath): + raw_speech, samplerate = librosa.load(filepath, sr=16000) + return raw_speech.tolist() + + + def infer(model_dir: str, wav_file_path: str): + device = "CPU" # GPU or NPU can be used as well. + pipe = openvino_genai.WhisperPipeline(model_dir, device) + + # The pipeline expects normalized audio with a sampling rate of 16kHz. + raw_speech = read_wav(wav_file_path) + result = pipe.generate( + raw_speech, + max_new_tokens=100, + language="<|en|>", + task="transcribe", + return_timestamps=True, + ) + + print(result) + + for chunk in result.chunks: + print(f"timestamps: [{chunk.start_ts}, {chunk.end_ts}] text: {chunk.text}") + + + For more information, refer to the + `Python sample `__. + + .. tab-item:: C++ + :sync: cpp + + .. code-block:: cpp + + #include "audio_utils.hpp" + #include "openvino/genai/whisper_pipeline.hpp" + + int main(int argc, char* argv[]) try { + if (3 > argc) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " \"\""); + } + + std::filesystem::path models_path = argv[1]; + std::string wav_file_path = argv[2]; + std::string device = "CPU"; // GPU or NPU can be used as well. + + ov::genai::WhisperPipeline pipeline(models_path, device); + + ov::genai::WhisperGenerationConfig config(models_path / "generation_config.json"); + config.max_new_tokens = 100; + config.language = "<|en|>"; + config.task = "transcribe"; + config.return_timestamps = true; + + // The pipeline expects normalized audio with a sampling rate of 16kHz. + ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path); + auto result = pipeline.generate(raw_speech, config); + + std::cout << result << "\n"; + + for (auto& chunk : *result.chunks) { + std::cout << "timestamps: [" << chunk.start_ts << ", " << chunk.end_ts << "] text: " << chunk.text << "\n"; + } + + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) { + } + return EXIT_FAILURE; + } + + For more information, refer to the + `C++ sample `__. + + +.. dropdown:: Using GenAI in Chat Scenario + + For chat scenarios where inputs and outputs represent a conversation, maintaining KVCache + across inputs may prove beneficial. The ``start_chat`` and ``finish_chat`` chat-specific + methods are used to mark a conversation session, as shown in the samples below: .. tab-set:: @@ -50,9 +344,35 @@ will not work with these instructions, make sure to .. code-block:: python - import openvino_genai as ov_genai - pipe = ov_genai.LLMPipeline(model_path, "CPU") - print(pipe.generate("The Sun is yellow because", max_new_tokens=100)) + import openvino_genai + + + def streamer(subword): + print(subword, end='', flush=True) + return False + + + def infer(model_dir: str): + device = 'CPU' # GPU can be used as well. + pipe = openvino_genai.LLMPipeline(model_dir, device) + + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 100 + + pipe.start_chat() + while True: + try: + prompt = input('question:\n') + except EOFError: + break + pipe.generate(prompt, config, streamer) + print('\n----------') + pipe.finish_chat() + + + + For more information, refer to the + `Python sample `__. .. tab-item:: C++ :sync: cpp @@ -60,27 +380,250 @@ will not work with these instructions, make sure to .. code-block:: cpp #include "openvino/genai/llm_pipeline.hpp" - #include - int main(int argc, char* argv[]) { - std::string model_path = argv[1]; - ov::genai::LLMPipeline pipe(model_path, "CPU"); - std::cout << pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(100)); + int main(int argc, char* argv[]) try { + if (2 != argc) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " "); + } + std::string prompt; + std::string models_path = argv[1]; + + std::string device = "CPU"; // GPU, NPU can be used as well + ov::genai::LLMPipeline pipe(models_path, device); + + ov::genai::GenerationConfig config; + config.max_new_tokens = 100; + std::function streamer = [](std::string word) { + std::cout << word << std::flush; + return false; + }; + + pipe.start_chat(); + std::cout << "question:\n"; + while (std::getline(std::cin, prompt)) { + pipe.generate(prompt, config, streamer); + std::cout << "\n----------\n" + "question:\n"; + } + pipe.finish_chat(); + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } + + + For more information, refer to the + `C++ sample `__ + + +.. dropdown:: Using GenAI with Vision Language Models + + OpenVINO GenAI introduces the ``openvino_genai.VLMPipeline`` pipeline for + inference of multimodal text-generation Vision Language Models (VLMs). + With a text prompt and an image as input, VLMPipeline can generate text using + models such as LLava or MiniCPM-V. See the chat scenario presented + in the samples below: + + .. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: python + + import numpy as np + import openvino_genai + from PIL import Image + from openvino import Tensor + from pathlib import Path + + + def streamer(subword: str) -> bool: + print(subword, end='', flush=True) + + + def read_image(path: str) -> Tensor: + pic = Image.open(path).convert("RGB") + image_data = np.array(pic.getdata()).reshape(1, pic.size[1], pic.size[0], 3).astype(np.uint8) + return Tensor(image_data) + + + def read_images(path: str) -> list[Tensor]: + entry = Path(path) + if entry.is_dir(): + return [read_image(str(file)) for file in sorted(entry.iterdir())] + return [read_image(path)] + + + def infer(model_dir: str, image_dir: str): + rgbs = read_images(image_dir) + device = 'CPU' # GPU can be used as well. + enable_compile_cache = dict() + if "GPU" == device: + enable_compile_cache["CACHE_DIR"] = "vlm_cache" + pipe = openvino_genai.VLMPipeline(model_dir, device, **enable_compile_cache) + + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 100 + + pipe.start_chat() + prompt = input('question:\n') + pipe.generate(prompt, images=rgbs, generation_config=config, streamer=streamer) + + while True: + try: + prompt = input("\n----------\n" + "question:\n") + except EOFError: + break + pipe.generate(prompt, generation_config=config, streamer=streamer) + pipe.finish_chat() + + + For more information, refer to the + `Python sample `__. + + .. tab-item:: C++ + :sync: cpp + + .. code-block:: cpp + + #include "load_image.hpp" + #include + #include + + bool print_subword(std::string&& subword) { + return !(std::cout << subword << std::flush); } -The `LLMPipeline` is the main object used for decoding. You can construct it directly from the -folder with the converted model. It will automatically load the main model, tokenizer, detokenizer, -and the default generation configuration. + int main(int argc, char* argv[]) try { + if (3 != argc) { + throw std::runtime_error(std::string{"Usage "} + argv[0] + " "); + } + + std::vector rgbs = utils::load_images(argv[2]); + + std::string device = "CPU"; // GPU can be used as well. + ov::AnyMap enable_compile_cache; + if ("GPU" == device) { + enable_compile_cache.insert({ov::cache_dir("vlm_cache")}); + } + ov::genai::VLMPipeline pipe(argv[1], device, enable_compile_cache); + + ov::genai::GenerationConfig generation_config; + generation_config.max_new_tokens = 100; + + std::string prompt; + + pipe.start_chat(); + std::cout << "question:\n"; + + std::getline(std::cin, prompt); + pipe.generate(prompt, + ov::genai::images(rgbs), + ov::genai::generation_config(generation_config), + ov::genai::streamer(print_subword)); + std::cout << "\n----------\n" + "question:\n"; + while (std::getline(std::cin, prompt)) { + pipe.generate(prompt, + ov::genai::generation_config(generation_config), + ov::genai::streamer(print_subword)); + std::cout << "\n----------\n" + "question:\n"; + } + pipe.finish_chat(); + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } + + + For more information, refer to the + `C++ sample `__ + + +| + + +Chat-bot use case - step by step +############################################################################################### + +This example will show you how to create a chat-bot functionality, using the ``ov_genai.LLMPipeline`` +and a chat-tuned TinyLlama model. Apart from the basic implementation, it provides additional +optimization methods. + +Although CPU is used as inference device in the samples below, you may choose GPU instead. +Note that tasks such as token selection, tokenization, and detokenization are always handled +by CPU only. Tokenizers, represented as a separate model, are also run on CPU. + +Running the model ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +You start with exporting an LLM model via Hugging Face Optimum-Intel. Note that the precision +of ``int4`` is used, instead of the original ``fp16``, for better performance. The weight +compression is done by NNCF at the model export stage. The exported model contains all the +information necessary for execution, including the tokenizer/detokenizer and the generation +config, ensuring that its results match those generated by Hugging Face. + +The `LLMPipeline` is the main object used for decoding and handles all the necessary steps. +You can construct it directly from the folder with the converted model. + + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: console + + optimum-cli export openvino --model "TinyLlama/TinyLlama-1.1B-Chat-v1.0" --weight-format int4 --trust-remote-code "TinyLlama-1.1B-Chat-v1.0" + + .. code-block:: python + + import openvino_genai as ov_genai + pipe = ov_genai.LLMPipeline(model_path, "CPU") + print(pipe.generate("The Sun is yellow because", max_new_tokens=100)) + + .. tab-item:: C++ + :sync: cpp + + .. code-block:: console + + optimum-cli export openvino --model "TinyLlama/TinyLlama-1.1B-Chat-v1.0" --weight-format int4 --trust-remote-code "TinyLlama-1.1B-Chat-v1.0" + + .. code-block:: cpp + + #include "openvino/genai/llm_pipeline.hpp" + #include + + int main(int argc, char* argv[]) { + std::string model_path = argv[1]; + ov::genai::LLMPipeline pipe(model_path, "CPU"); + std::cout << pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(100)); + } + -Once the model is exported from Hugging Face Optimum-Intel, it already contains all the information -necessary for execution, including the tokenizer/detokenizer and the generation config, ensuring that -its results match those generated by Hugging Face. Streaming the Output -########################### ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -For more interactive UIs during generation, streaming of model output tokens is supported. See the example -below, where a lambda function outputs words to the console immediately upon generation: +For more interactive UIs during generation, you can stream output tokens. In this example, a +lambda function outputs words to the console immediately upon generation: .. tab-set:: @@ -177,12 +720,10 @@ You can also create your custom streamer for more sophisticated processing: Optimizing Generation with Grouped Beam Search -####################################################### - -Leverage grouped beam search decoding and configure generation_config for better text generation -quality and efficient batch processing in GenAI applications. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -Specify generation_config to use grouped beam search: +For better text generation quality and more efficient batch processing, specify +``generation_config`` to leverage grouped beam search decoding. .. tab-set:: @@ -219,10 +760,123 @@ Specify generation_config to use grouped beam search: } +Efficient Text Generation via Speculative Decoding +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +Speculative decoding (or assisted-generation) enables faster token generation +when an additional smaller draft model is used alongside the main model. This reduces the +number of infer requests to the main model, increasing performance. + +The draft model predicts the next K tokens one by one in an autoregressive manner. The main +model validates these predictions and corrects them if necessary - in case of +a discrepancy, the main model prediction is used. Then, the draft model acquires this token and +runs prediction of the next K tokens, thus repeating the cycle. + + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: python + + import openvino_genai + import queue + import threading + + def streamer(subword): + print(subword, end='', flush=True) + return False + + def infer(model_dir: str, draft_model_dir: str, prompt: str): + main_device = 'CPU' # GPU can be used as well. + draft_device = 'CPU' + + scheduler_config = openvino_genai.SchedulerConfig() + scheduler_config.cache_size = 2 + + draft_model = openvino_genai.draft_model(draft_model_dir, draft_device) + + pipe = openvino_genai.LLMPipeline(model_dir, main_device, scheduler_config=scheduler_config, draft_model=draft_model) + + config = openvino_genai.GenerationConfig() + config.max_new_tokens = 100 + config.num_assistant_tokens = 5 + + pipe.generate("The Sun is yellow because", config, streamer) + + + For more information, refer to the + `Python sample `__. + + + .. tab-item:: C++ + :sync: cpp + + .. code-block:: cpp + + #include + + #include "openvino/genai/llm_pipeline.hpp" + + int main(int argc, char* argv[]) try { + if (4 != argc) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " ''"); + } + + ov::genai::GenerationConfig config; + config.max_new_tokens = 100; + config.num_assistant_tokens = 5; + + std::string main_model_path = argv[1]; + std::string draft_model_path = argv[2]; + std::string prompt = argv[3]; + + std::string main_device = "CPU", draft_device = "CPU"; + + ov::genai::SchedulerConfig scheduler_config; + scheduler_config.cache_size = 5; + + ov::genai::LLMPipeline pipe( + main_model_path, + main_device, + ov::genai::draft_model(draft_model_path, draft_device), + ov::genai::scheduler_config(scheduler_config)); + + auto streamer = [](std::string subword) { + std::cout << subword << std::flush; + return false; + }; + + pipe.generate("The Sun is yellow because", config, streamer); + } catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; + } + + + For more information, refer to the + `C++ sample `__ + + + + + + + + Comparing with Hugging Face Results ####################################### -Compare and analyze results with those generated by Hugging Face models. +You can compare the results of the above example with those generated by Hugging Face models by +running the following code: .. tab-set:: @@ -250,30 +904,35 @@ Compare and analyze results with those generated by Hugging Face models. assert hf_output == ov_output -GenAI API -####################################### -OpenVINO GenAI Flavor includes the following API: -* generation_config - defines a configuration class for text generation, enabling customization of the generation process such as the maximum length of the generated text, whether to ignore end-of-sentence tokens, and the specifics of the decoding strategy (greedy, beam search, or multinomial sampling). -* llm_pipeline - provides classes and utilities for text generation, including a pipeline for processing inputs, generating text, and managing outputs with configurable options. -* streamer_base - an abstract base class for creating streamers. -* tokenizer - the tokenizer class for text encoding and decoding. +GenAI API +####################################### + +The use case described here uses the following OpenVINO GenAI API methods: +* generation_config - defines a configuration class for text generation, + enabling customization of the generation process such as the maximum length of + the generated text, whether to ignore end-of-sentence tokens, and the specifics + of the decoding strategy (greedy, beam search, or multinomial sampling). +* llm_pipeline - provides classes and utilities for processing inputs, + text generation, and managing outputs with configurable options. +* streamer_base - an abstract base class for creating streamers. +* tokenizer - the tokenizer class for text encoding and decoding. * visibility - controls the visibility of the GenAI library. -Learn more in the `GenAI API reference `__. +Learn more from the `GenAI API reference `__. Additional Resources #################### * `OpenVINO GenAI Repo `__ * `OpenVINO GenAI Samples `__ +* A Jupyter notebook demonstrating + `Visual-language assistant with MiniCPM-V2 and OpenVINO `__ * `OpenVINO Tokenizers `__ * `Neural Network Compression Framework `__ - - diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst deleted file mode 100644 index 953784c03fdef0..00000000000000 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst +++ /dev/null @@ -1,433 +0,0 @@ -GenAI Use Cases -===================== - -This article provides several use case scenarios for Generative AI model -inference. The applications presented in the code samples below -only require minimal configuration, like setting an inference device. Feel free -to explore and modify the source code as you need. - - -Using GenAI for Text-to-Image Generation -######################################## - -Examples below demonstrate inference on text-to-image models, like Stable Diffusion -1.5, 2.1, and LCM, with a text prompt as input. The :ref:`main.cpp ` -sample shows basic usage of the ``Text2ImagePipeline`` pipeline. -:ref:`lora.cpp ` shows how to apply LoRA adapters to the pipeline. - - -.. tab-set:: - - .. tab-item:: Python - :sync: python - - .. tab-set:: - - .. tab-item:: main.py - :name: mainpy - - .. code-block:: python - - import openvino_genai - from PIL import Image - import numpy as np - - class Generator(openvino_genai.Generator): - def __init__(self, seed, mu=0.0, sigma=1.0): - openvino_genai.Generator.__init__(self) - np.random.seed(seed) - self.mu = mu - self.sigma = sigma - - def next(self): - return np.random.normal(self.mu, self.sigma) - - - def infer(model_dir: str, prompt: str): - device = 'CPU' # GPU can be used as well - random_generator = Generator(42) - pipe = openvino_genai.Text2ImagePipeline(model_dir, device) - image_tensor = pipe.generate( - prompt, - width=512, - height=512, - num_inference_steps=20, - num_images_per_prompt=1, - random_generator=random_generator - ) - - image = Image.fromarray(image_tensor.data[0]) - image.save("image.bmp") - - .. tab-item:: LoRA.py - :name: lorapy - - .. code-block:: python - - import openvino as ov - import openvino_genai - import numpy as np - import sys - - - class Generator(openvino_genai.Generator): - def __init__(self, seed, mu=0.0, sigma=1.0): - openvino_genai.Generator.__init__(self) - np.random.seed(seed) - self.mu = mu - self.sigma = sigma - - def next(self): - return np.random.normal(self.mu, self.sigma) - - - def image_write(path: str, image_tensor: ov.Tensor): - from PIL import Image - image = Image.fromarray(image_tensor.data[0]) - image.save(path) - - - def infer(models_path: str, prompt: str): - prompt = "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting" - - device = "CPU" # GPU, NPU can be used as well - adapter_config = openvino_genai.AdapterConfig() - - for i in range(int(len(adapters) / 2)): - adapter = openvino_genai.Adapter(adapters[2 * i]) - alpha = float(adapters[2 * i + 1]) - adapter_config.add(adapter, alpha) - - pipe = openvino_genai.Text2ImagePipeline(models_path, device, adapters=adapter_config) - print("Generating image with LoRA adapters applied, resulting image will be in lora.bmp") - image = pipe.generate(prompt, - random_generator=Generator(42), - width=512, - height=896, - num_inference_steps=20) - - image_write("lora.bmp", image) - print("Generating image without LoRA adapters applied, resulting image will be in baseline.bmp") - image = pipe.generate(prompt, - adapters=openvino_genai.AdapterConfig(), - random_generator=Generator(42), - width=512, - height=896, - num_inference_steps=20 - ) - image_write("baseline.bmp", image) - - For more information, refer to the - `Python sample `__ - - .. tab-item:: C++ - :sync: cpp - - .. tab-set:: - - .. tab-item:: main.cpp - :name: maincpp - - .. code-block:: cpp - - #include "openvino/genai/text2image/pipeline.hpp" - - #include "imwrite.hpp" - - int32_t main(int32_t argc, char* argv[]) try { - OPENVINO_ASSERT(argc == 3, "Usage: ", argv[0], " ''"); - - const std::string models_path = argv[1], prompt = argv[2]; - const std::string device = "CPU"; // GPU, NPU can be used as well - - ov::genai::Text2ImagePipeline pipe(models_path, device); - ov::Tensor image = pipe.generate(prompt, - ov::genai::width(512), - ov::genai::height(512), - ov::genai::num_inference_steps(20), - ov::genai::num_images_per_prompt(1)); - - imwrite("image_%d.bmp", image, true); - - return EXIT_SUCCESS; - } catch (const std::exception& error) { - try { - std::cerr << error.what() << '\n'; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; - } catch (...) { - try { - std::cerr << "Non-exception object thrown\n"; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; - } - - .. tab-item:: LoRA.cpp - :name: loracpp - - .. code-block:: cpp - - #include "openvino/genai/text2image/pipeline.hpp" - - #include "imwrite.hpp" - - int32_t main(int32_t argc, char* argv[]) try { - OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " '' [ ...]]"); - - const std::string models_path = argv[1], prompt = argv[2]; - const std::string device = "CPU"; // GPU, NPU can be used as well - - ov::genai::AdapterConfig adapter_config; - for(size_t i = 0; i < (argc - 3)/2; ++i) { - ov::genai::Adapter adapter(argv[3 + 2*i]); - float alpha = std::atof(argv[3 + 2*i + 1]); - adapter_config.add(adapter, alpha); - } - - ov::genai::Text2ImagePipeline pipe(models_path, device, ov::genai::adapters(adapter_config)); - - std::cout << "Generating image with LoRA adapters applied, resulting image will be in lora.bmp\n"; - ov::Tensor image = pipe.generate(prompt, - ov::genai::random_generator(std::make_shared(42)), - ov::genai::width(512), - ov::genai::height(896), - ov::genai::num_inference_steps(20)); - imwrite("lora.bmp", image, true); - - std::cout << "Generating image without LoRA adapters applied, resulting image will be in baseline.bmp\n"; - image = pipe.generate(prompt, - ov::genai::adapters(), - ov::genai::random_generator(std::make_shared(42)), - ov::genai::width(512), - ov::genai::height(896), - ov::genai::num_inference_steps(20)); - imwrite("baseline.bmp", image, true); - - return EXIT_SUCCESS; - } catch (const std::exception& error) { - try { - std::cerr << error.what() << '\n'; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; - } catch (...) { - try { - std::cerr << "Non-exception object thrown\n"; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; - } - - - For more information, refer to the - `C++ sample `__ - - - - - -Using GenAI in Speech Recognition -################################# - - -The application, shown in code samples below, performs inference on speech -recognition Whisper Models. The samples include the ``WhisperPipeline`` class -and use audio files in WAV format at a sampling rate of 16 kHz as input. - -.. tab-set:: - - .. tab-item:: Python - :sync: cpp - - .. code-block:: python - - import openvino_genai - import librosa - - - def read_wav(filepath): - raw_speech, samplerate = librosa.load(filepath, sr=16000) - return raw_speech.tolist() - - - def infer(model_dir: str, wav_file_path: str): - raw_speech = read_wav(wav_file_path) - pipe = openvino_genai.WhisperPipeline(model_dir) - - def streamer(word: str) -> bool: - print(word, end="") - return False - - result = pipe.generate( - raw_speech, - max_new_tokens=100, - language="<|en|>", - task="transcribe", - return_timestamps=True, - streamer=streamer, - ) - - print() - for chunk in result.chunks: - print(f"timestamps: [{chunk.start_ts}, {chunk.end_ts}] text: {chunk.text}") - - - For more information, refer to the - `Python sample `__. - - .. tab-item:: C++ - :sync: cpp - - .. code-block:: cpp - - #include "audio_utils.hpp" - #include "openvino/genai/whisper_pipeline.hpp" - - int main(int argc, char* argv[]) try { - if (3 > argc) { - throw std::runtime_error(std::string{"Usage: "} + argv[0] + " \"\""); - } - - std::filesystem::path models_path = argv[1]; - std::string wav_file_path = argv[2]; - std::string device = "CPU"; // GPU can be used as well - - ov::genai::WhisperPipeline pipeline(models_path, device); - - ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path); - - ov::genai::WhisperGenerationConfig config(models_path / "generation_config.json"); - config.max_new_tokens = 100; - config.language = "<|en|>"; - config.task = "transcribe"; - config.return_timestamps = true; - - auto streamer = [](std::string word) { - std::cout << word; - return false; - }; - - auto result = pipeline.generate(raw_speech, config, streamer); - - std::cout << "\n"; - - for (auto& chunk : *result.chunks) { - std::cout << "timestamps: [" << chunk.start_ts << ", " << chunk.end_ts << "] text: " << chunk.text << "\n"; - } - - } catch (const std::exception& error) { - try { - std::cerr << error.what() << '\n'; - } catch (const std::ios_base::failure&) { - } - return EXIT_FAILURE; - } catch (...) { - try { - std::cerr << "Non-exception object thrown\n"; - } catch (const std::ios_base::failure&) { - } - return EXIT_FAILURE; - } - - - For more information, refer to the - `C++ sample `__. - - -Using GenAI in Chat Scenario -############################ - -For chat scenarios where inputs and outputs represent a conversation, maintaining KVCache across inputs -may prove beneficial. The ``start_chat`` and ``finish_chat`` chat-specific methods are used to -mark a conversation session, as shown in the samples below: - -.. tab-set:: - - .. tab-item:: Python - :sync: py - - .. code-block:: python - - import openvino_genai - - - def streamer(subword): - print(subword, end='', flush=True) - return False - - - def infer(model_dir: str): - device = 'CPU' # GPU can be used as well. - pipe = openvino_genai.LLMPipeline(model_dir, device) - - config = openvino_genai.GenerationConfig() - config.max_new_tokens = 100 - - pipe.start_chat() - while True: - try: - prompt = input('question:\n') - except EOFError: - break - pipe.generate(prompt, config, streamer) - print('\n----------') - pipe.finish_chat() - - - - For more information, refer to the - `Python sample `__. - - .. tab-item:: C++ - :sync: cpp - - .. code-block:: cpp - - #include "openvino/genai/llm_pipeline.hpp" - - int main(int argc, char* argv[]) try { - if (2 != argc) { - throw std::runtime_error(std::string{"Usage: "} + argv[0] + " "); - } - std::string prompt; - std::string models_path = argv[1]; - - std::string device = "CPU"; // GPU, NPU can be used as well - ov::genai::LLMPipeline pipe(models_path, device); - - ov::genai::GenerationConfig config; - config.max_new_tokens = 100; - std::function streamer = [](std::string word) { - std::cout << word << std::flush; - return false; - }; - - pipe.start_chat(); - std::cout << "question:\n"; - while (std::getline(std::cin, prompt)) { - pipe.generate(prompt, config, streamer); - std::cout << "\n----------\n" - "question:\n"; - } - pipe.finish_chat(); - } catch (const std::exception& error) { - try { - std::cerr << error.what() << '\n'; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; - } catch (...) { - try { - std::cerr << "Non-exception object thrown\n"; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; - } - - - For more information, refer to the - `C++ sample `__ - -Additional Resources -##################### - -* :doc:`Install OpenVINO GenAI <../../../get-started/install-openvino/install-openvino-genai>` -* `OpenVINO GenAI Repo `__ -* `OpenVINO GenAI Samples `__ -* `OpenVINO Tokenizers `__ diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-hf.rst b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-hf.rst index a26b670b5314d0..4fec1acd23e6a7 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-hf.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-hf.rst @@ -1,4 +1,4 @@ -Run LLMs with Hugging Face and Optimum Intel +Inference with Optimum Intel =============================================================================================== .. meta:: @@ -276,9 +276,10 @@ includes **Dynamic quantization** of activations of 4/8-bit quantized MatMuls an ov_config={"KV_CACHE_PRECISION": "u8", "DYNAMIC_QUANTIZATION_GROUP_SIZE": "32", "PERFORMANCE_HINT": "LATENCY"} ) -.. note:: + .. note:: + Currently, for KV-cache quantization, GPU ignores the DYNAMIC_QUANTIZATION_GROUP_SIZE property, using ``group_size = head_size``. Additionally, it does not support the ``get_state()`` and ``set_state()`` APIs when KV-cache quantization is enabled. - Currently, both Dynamic quantization and KV-cache quantization are available for CPU device. + For GPU, KV-cache quantization is enabled by default on platforms without XMX support, and can be disabled by setting KV_CACHE_PRECISION to ``undefined``. Working with Models Tuned with LoRA diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst index 2476a0423e30e1..d33ae05f68f462 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/llm-inference-native-ov.rst @@ -1,4 +1,4 @@ -Run LLM Inference on Native OpenVINO (not recommended) +Generative AI with Base OpenVINO (not recommended) =============================================================================================== To run Generative AI models using native OpenVINO APIs you need to follow regular diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/basic-quantization-flow.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/basic-quantization-flow.rst index b4f31daedfa3e4..62c10e52266ec9 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/basic-quantization-flow.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/basic-quantization-flow.rst @@ -63,6 +63,13 @@ The transformation function is a function that takes a sample from the dataset a :language: python :fragment: [dataset] + .. tab-item:: TorchFX + :sync: torch_fx + + .. doxygensnippet:: docs/optimization_guide/nncf/ptq/code/ptq_torch_fx.py + :language: python + :fragment: [dataset] + If there is no framework dataset object, you can create your own entity that implements the ``Iterable`` interface in Python, for example the list of images, and returns data samples feasible for inference. In this case, a transformation function is not required. @@ -102,6 +109,12 @@ See the `example section <#examples-of-how-to-apply-nncf-post-training-quantizat :language: python :fragment: [quantization] + .. tab-item:: TorchFX + :sync: torch_fx + + .. doxygensnippet:: docs/optimization_guide/nncf/ptq/code/ptq_torch_fx.py + :language: python + :fragment: [quantization] After that the model can be converted into the OpenVINO Intermediate Representation (IR) if needed, compiled and run with OpenVINO. If you have not already installed OpenVINO developer tools, install it with ``pip install openvino``. @@ -136,6 +149,17 @@ If you have not already installed OpenVINO developer tools, install it with ``pi :language: python :fragment: [inference] +TorchFX models can utilize OpenVINO optimizations using `torch.compile(..., backend="openvino") `__ functionality: + +.. tab-set:: + + .. tab-item:: TorchFX + :sync: torch_fx + + .. doxygensnippet:: docs/optimization_guide/nncf/ptq/code/ptq_torch_fx.py + :language: python + :fragment: [inference] + Tune quantization parameters ############################ diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst index bbc09ccd4b5fbb..046dde9661c3bb 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst @@ -6,38 +6,36 @@ LLM Weight Compression :hidden: weight-compression/microscaling-quantization + weight-compression/4-bit-weight-quantization -Weight compression is a technique for enhancing the efficiency of models, -especially those with large memory requirements. This method reduces the model's -memory footprint, a crucial factor for Large Language Models (LLMs). +Weight compression enhances the efficiency of models by reducing their memory footprint, +a crucial factor for Large Language Models (LLMs). It is especially effective for networks with high memory requirements. -Unlike full model quantization, where weights and activations are quantized, -weight compression in `Neural Network Compression Framework (NNCF) `__ -only targets the model's weights. This approach allows the activations to remain as -floating-point numbers, preserving most of the model's accuracy while improving its -speed and reducing its size. +Unlike full model quantization, where both weights and activations are quantized, it +only targets weights, keeping activations as floating-point numbers. This means preserving most +of the model's accuracy while improving its +speed and reducing its size. The reduction in size is especially noticeable with larger models. +For instance the 7 billion parameter Llama 2 model can be reduced +from about 25GB to 4GB using 4-bit weight compression. -The reduction in size is especially noticeable with larger models, -for instance the 7 billion parameter Llama 2 model can be reduced -from about 25GB to 4GB using 4-bit weight compression. With smaller models (i.e. less -than 1B parameters), weight compression may result in more accuracy reduction than -with larger models. +.. note:: + + With smaller language models (i.e. less than 1B parameters), weight + compression may result in more accuracy reduction than with larger models. + Therefore, weight compression is recommended for use with LLMs only. -LLMs and other models that require +LLMs and other GenAI models that require extensive memory to store the weights during inference can benefit from weight compression as it: * enables inference of exceptionally large models that cannot be accommodated in the device memory; - * reduces storage and memory overhead, making models more lightweight and less resource intensive for deployment; - * improves inference speed by reducing the latency of memory access when computing the operations with weights, for example, Linear layers. The weights are smaller and thus faster to load from memory; - * unlike quantization, does not require sample data to calibrate the range of activation values. @@ -46,197 +44,228 @@ provides weight quantization to 8 and 4-bit integer data types as a compression method primarily designed to optimize LLMs. +Compression Methods (8-bit vs. 4-bit) +##################################### + +For models that come from `Hugging Face `__ and are supported +by Optimum, it is recommended to use the **Optimum Intel API**, which employs NNCF weight +compression capabilities to optimize various large Transformer models. + +The NNCF ``nncf.compress_weights()`` API, with most of its options, is exposed in the +``.from_pretrained()`` method of Optimum Intel classes. Optimum also has several datasets +for data-aware quantization available out-of-the-box. -Compress Model Weights -###################### +You can use the examples below to perform data-free 8-bit or 4-bit weight quantization. +Before you start, make sure Optimum Intel is installed in your environment +by running the following command: -**8-bit weight quantization** method offers a balance between model size reduction and -maintaining accuracy, which usually leads to significant performance improvements for -Transformer-based models. Models with 8-bit compressed weights are performant on the -vast majority of supported CPU and GPU platforms. By default, weights are compressed -asymmetrically to "INT8_ASYM" mode. +.. code-block:: python + pip install optimum[openvino] -The code snippet below shows how to do asymmetrical 8-bit quantization of the model weights -represented in OpenVINO IR using NNCF: +**8-bit weight quantization** offers a good balance between reducing the size and lowering the +accuracy of a model. It usually results in significant improvements for transformer-based models +and guarantees good model performance for a vast majority of supported CPU and GPU platforms. +By default, weights are compressed asymmetrically to "INT8_ASYM" mode. .. tab-set:: - .. tab-item:: OpenVINO - :sync: openvino + .. tab-item:: Compression with Optimum-Intel + :sync: optimum - .. doxygensnippet:: docs/optimization_guide/nncf/code/weight_compression_openvino.py - :language: python - :fragment: [compression_8bit] + Load a pre-trained Hugging Face model, compress it to INT8_ASYM, using the + Optimum Intel API, and then execute inference with a text phrase: + Simply use the optimum-cli command line tool: -Now, the model is ready for compilation and inference. -It can be also saved into a compressed format, resulting in a smaller binary file. + .. code-block:: console -**4-bit weight quantization** method stands for an INT4-INT8 mixed-precision weight quantization, -where INT4 is considered as the primary precision and asymmetric INT8 is the backup one. -It usually results in a smaller model size and lower inference latency, although the accuracy -degradation could be higher, depending on the model. + optimum-cli export openvino --model microsoft/Phi-3.5-mini-instruct --weight-format int8 ov_phi-3.5-mini-instruct -The code snippet below shows how to do 4-bit quantization of the model weights represented -in OpenVINO IR using NNCF: + You can also use the code sample to the same effect: -.. tab-set:: + .. code-block:: python - .. tab-item:: OpenVINO - :sync: openvino + from optimum.intel.openvino import OVModelForCausalLM, OVWeightQuantizationConfig + from transformers import AutoTokenizer, pipeline - .. doxygensnippet:: docs/optimization_guide/nncf/code/weight_compression_openvino.py - :language: python - :fragment: [compression_4bit] + # Load and compress a model from Hugging Face. + model_id = "microsoft/Phi-3.5-mini-instruct" + model = OVModelForCausalLM.from_pretrained( + model_id, + export=True, + quantization_config=OVWeightQuantizationConfig(bits=8) + ) + # Inference + tokenizer = AutoTokenizer.from_pretrained(model_id) + pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) + phrase = "The weather is" + results = pipe(phrase) + print(results) -The table below summarizes the benefits and trade-offs for each compression type in terms of -memory reduction, speed gain, and accuracy loss. + For more details, refer to the article on how to + :doc:`infer LLMs using Optimum Intel <../../learn-openvino/llm_inference_guide/llm-inference-hf>`. -.. list-table:: - :widths: 25 20 20 20 - :header-rows: 1 + .. tab-item:: Compression with NNCF + :sync: nncf - * - - - Memory Reduction - - Latency Improvement - - Accuracy Loss - * - INT8 Asymmetric - - Low - - Medium - - Low - * - INT4 Symmetric - - High - - High - - High - * - INT4 Asymmetric - - High - - Medium - - Medium + Load a pre-trained Hugging Face model, using the Optimum Intel API, + compress it to INT8_ASYM, using NNCF, and then execute inference with a text phrase: + .. code-block:: python + from nncf import compress_weights, CompressWeightsMode + from optimum.intel.openvino import OVModelForCausalLM + from transformers import AutoTokenizer, pipeline -The INT4 method has several parameters that can provide different performance-accuracy -trade-offs after optimization: + # Load a model and compress it with NNCF. + model_id = "microsoft/Phi-3.5-mini-instruct" + model = OVModelForCausalLM.from_pretrained(model_id, export=True, load_in_8bit=False, compile=False) + model.model = compress_weights(model.model, mode=CompressWeightsMode.INT8_ASYM) -* ``mode`` - there are two optimization modes: symmetric and asymmetric. + # Inference + model.compile() + tokenizer = AutoTokenizer.from_pretrained(model_id) + pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) + phrase = "The weather is" + results = pipe(phrase) + print(results) - **Symmetric Compression** - ``INT4_SYM`` - INT4 Symmetric mode involves quantizing weights to a signed 4-bit integer - symmetrically without zero point. This mode is faster than the INT8_ASYM, making - it ideal for situations where **speed and size reduction are prioritized over accuracy**. +Here is an example of code using NNCF to perform asymmetrical 8-bit weight quantization of +a model in the OpenVINO IR format: - .. code-block:: python +.. tab-set:: - from nncf import compress_weights - from nncf import CompressWeightsMode + .. tab-item:: OpenVINO + :sync: openvino - compressed_model = compress_weights(model, mode=CompressWeightsMode.INT4_SYM) + .. doxygensnippet:: docs/optimization_guide/nncf/code/weight_compression_openvino.py + :language: python + :fragment: [compression_8bit] - **Asymmetric Compression** - ``INT4_ASYM`` - INT4 Asymmetric mode also uses an unsigned 4-bit integer but quantizes weights - asymmetrically with a non-fixed zero point. This mode slightly compromises speed in - favor of better accuracy compared to the symmetric mode. This mode is useful when - **minimal accuracy loss is crucial**, but a faster performance than INT8 is still desired. +**4-bit weight quantization** is actually a mixed-precision compression, +primarily INT4 and a backup asymmetric INT8 precisions. It produces a smaller model, +offering lower inference latency but potentially noticeable accuracy degradation, +depending on the model. - .. code-block:: python +.. tab-set:: - from nncf import compress_weights - from nncf import CompressWeightsMode + .. tab-item:: Compression with Optimum-Intel + :sync: optimum - compressed_model = compress_weights(model, mode=CompressWeightsMode.INT4_ASYM) + Load a pre-trained Hugging Face model, compress it to INT4, using the + Optimum Intel API, and then execute inference with a text phrase: -* ``group_size`` controls the size of the group of weights that share the same - quantization parameters. Shared quantization parameters help to speed up the - calculation of activation values as they are dequantized and quantized between - layers. However, they can reduce accuracy. The following group sizes are - recommended: ``128``, ``64``, ``32`` (``128`` is default value). + Simply use the optimum-cli command line tool: - `Smaller Group Size`: Leads to a more accurate model but increases the model's - footprint and reduces inference speed. + .. code-block:: console - `Larger Group Size`: Results in faster inference and a smaller model, but might - compromise accuracy. + optimum-cli export openvino --model microsoft/Phi-3.5-mini-instruct --weight-format int4 --awq --scale-estimation --dataset wikitext2 --group-size 64 --ratio 1.0 ov_phi-3.5-mini-instruct -* ``ratio`` controls the ratio between the layers compressed to the precision defined - by ``mode`` and the rest of the layers that will be kept in the ``backup_mode`` in the optimized model. - Ratio is a decimal between 0 and 1. For example, 0.8 means that 80% of layers will be - compressed to the precision defined by ``mode``, while the rest will be compressed to - ``backup_mode`` precision. The default value for ratio is 1. + You can also use the code sample to the same effect: - `Higher Ratio (more layers set to mode precision)`: Reduces the model size and increase inference speed but - might lead to higher accuracy degradation. + .. code-block:: python - `Lower Ratio (more layers set to backup_mode precision)`: Maintains better accuracy but results in a larger model size - and potentially slower inference. + from optimum.intel.openvino import OVModelForCausalLM, OVWeightQuantizationConfig + from transformers import AutoTokenizer, pipeline - In this example, 90% of the model's layers are quantized to INT4 asymmetrically with - a group size of 64: + # Load and compress a model from Hugging Face. + model_id = "microsoft/Phi-3.5-mini-instruct" + model = OVModelForCausalLM.from_pretrained( + model_id, + export=True, + quantization_config=OVWeightQuantizationConfig( + bits=4, + quant_method="awq", + scale_estimation=True, + dataset="wikitext2", + group_size=64, + ratio=1.0 + ) + ) - .. code-block:: python + # Inference + tokenizer = AutoTokenizer.from_pretrained(model_id) + pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) + phrase = "The weather is" + results = pipe(phrase) + print(results) - from nncf import compress_weights, CompressWeightsMode + .. tab-item:: Compression with NNCF + :sync: nncf - # Example: Compressing weights with INT4_ASYM mode, group size of 64, and 90% INT4 ratio - compressed_model = compress_weights( - model, - mode=CompressWeightsMode.INT4_ASYM, - group_size=64, - ratio=0.9, - ) + Load a pre-trained Hugging Face model, using the Optimum Intel API, + compress it to INT4 using NNCF, and then execute inference with a text phrase: -* ``scale_estimation`` - boolean parameter that enables more accurate estimation of - quantization scales. Especially helpful when the weights of all layers are quantized to - 4 bits. Requires dataset. + .. code-block:: python -* ``awq`` - boolean parameter that enables the AWQ method for more accurate INT4 weight - quantization. Especially helpful when the weights of all the layers are quantized to - 4 bits. The method can sometimes result in reduced accuracy when used with - Dynamic Quantization of activations. Requires dataset. + from nncf import compress_weights, CompressWeightsMode + from optimum.intel.openvino import OVModelForCausalLM + from transformers import AutoTokenizer, pipeline -* ``gptq`` - boolean parameter that enables the GPTQ method for more accurate INT4 weight - quantization. Requires dataset. + # Load a model and compress it with NNCF. + model_id = "microsoft/Phi-3.5-mini-instruct" + model = OVModelForCausalLM.from_pretrained(model_id, export=True, load_in_8bit=False, compile=False) + model.model = compress_weights(model.model, mode=CompressWeightsMode.INT4_SYM) -* ``dataset`` - calibration dataset for data-aware weight compression. It is required - for some compression options, for example, ``scale_estimation``, ``gptq`` or ``awq``. Some types - of ``sensitivity_metric`` can use data for precision selection. + # Inference + model.compile() + tokenizer = AutoTokenizer.from_pretrained(model_id) + pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) + phrase = "The weather is" + results = pipe(phrase) + print(results) -* ``sensitivity_metric`` - controls the metric to estimate the sensitivity of compressing - layers in the bit-width selection algorithm. Some of the metrics require dataset to be - provided. The following types are supported: - * ``nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR`` - data-free metric computed as - the inverted 8-bit quantization noise. Weights with highest value of this metric can - be accurately quantized channel-wise to 8-bit. The idea is to leave these weights in - 8 bit, and quantize the rest of layers to 4-bit group-wise. Since group-wise is more - accurate than per-channel, accuracy should not degrade. + For more details, refer to the article on how to + :doc:`infer LLMs using Optimum Intel <../../../learn-openvino/llm_inference_guide/llm-inference-hf>`. - * ``nncf.SensitivityMetric.HESSIAN_INPUT_ACTIVATION`` - requires dataset. The average - Hessian trace of weights with respect to the layer-wise quantization error multiplied - by L2 norm of 8-bit quantization noise. +The code snippet below shows how to do 4-bit quantization of the model weights represented +in OpenVINO IR using NNCF: - * ``nncf.SensitivityMetric.MEAN_ACTIVATION_VARIANCE`` - requires dataset. The mean - variance of the layers' inputs multiplied by inverted 8-bit quantization noise. +.. tab-set:: - * ``nncf.SensitivityMetric.MAX_ACTIVATION_VARIANCE`` - requires dataset. The maximum - variance of the layers' inputs multiplied by inverted 8-bit quantization noise. + .. tab-item:: OpenVINO + :sync: openvino - * ``nncf.SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE`` - requires dataset. The mean - magnitude of the layers' inputs multiplied by inverted 8-bit quantization noise. + .. doxygensnippet:: docs/optimization_guide/nncf/code/weight_compression_openvino.py + :language: python + :fragment: [compression_4bit] + +Refer to the article about +:doc:`4-bit weight quantization <./weight-compression/4-bit-weight-quantization>` +for more details. -* ``all_layers`` - boolean parameter that enables INT4 weight quantization of all - Fully-Connected and Embedding layers, including the first and last layers in the model. +Once the model has been optimized, it is ready for compilation and inference. The model can +also be :ref:`saved into a compressed format `, resulting in a +smaller binary file. + +The table below summarizes the benefits and trade-offs for each compression type in terms of +memory reduction, speed gain, and accuracy loss. -* ``lora_correction`` - boolean parameter that enables the LoRA Correction Algorithm - to further improve the accuracy of INT4 compressed models on top of other - algorithms - AWQ and Scale Estimation. +.. list-table:: + :widths: 25 20 20 20 + :header-rows: 1 -* ``backup_mode`` - defines a backup precision for mixed-precision weight compression. - There are three modes: INT8_ASYM, INT8_SYM, and NONE, which retains - the original floating-point precision of the model weights (``INT8_ASYM`` is default value). + * - + - Memory Reduction + - Latency Improvement + - Accuracy Loss + * - INT8 Asymmetric + - Low + - Medium + - Low + * - INT4 Symmetric + - High + - High + - High + * - INT4 Asymmetric + - High + - Medium + - Medium **Use synthetic data for LLM weight compression** @@ -268,8 +297,8 @@ for details of the usage. # Synthetic-based compression synthetic_dataset = nncf.data.generate_text_data(hf_model, tokenizer, dataset_size=100) quantization_dataset = nncf.Dataset( - synthetic_dataset, - transform_fn # see example in NNCF repo how to make transform_fn + synthetic_dataset, + transform_fn # See the example in NNCF repo to learn how to make transform_fn. ) model = compress_weights( @@ -280,58 +309,16 @@ for details of the usage. dataset=quantization_dataset, awq=True, scale_estimation=True - ) # model is openvino.Model + ) # The model is openvino.Model. For data-aware weight compression refer to the following `example `__. .. note:: - Some methods can be stacked on top of one another to achieve a better - accuracy-performance trade-off after weight quantization. For example, the **Scale Estimation** - method can be applied along with **AWQ** and mixed-precision quantization (the ``ratio`` parameter). - - -**Hugging Face Optimum-Intel API** - -Hugging Face Optimum-Intel provides an easy way to use NNCF Weight Compression capabilities to optimize -various large Transformer models. Most of the options of the NNCF ``nncf.compress_weights()`` API are -exposed in the ``.from_pretrained()`` method of Optimum-Intel classes. Optimum also has several datasets -for data-aware quantization available out-of-the-box. -The example below shows data-free 4-bit weight quantization -applied on top of OpenVINO IR. Before trying the example, make sure Optimum Intel -is installed in your environment by running the following command: - -.. code-block:: python - - pip install optimum[openvino] - -.. code-block:: python - - from optimum.intel.openvino import OVModelForCausalLM, OVWeightQuantizationConfig - from transformers import AutoTokenizer, pipeline - - # Load and compress model from Hugging Face - model_id = "microsoft/Phi-3.5-mini-instruct" - model = OVModelForCausalLM.from_pretrained( - model_id, - export=True, - quantization_config=OVWeightQuantizationConfig( - bits=4, - quant_method="awq", - scale_estimation=True, - dataset="wikitext2", - group_size=64, - ratio=1.0 - ) - ) - - # Inference - tokenizer = AutoTokenizer.from_pretrained(model_id) - pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) - phrase = "The weather is" - results = pipe(phrase) - print(results) + Some methods can be stacked on top of one another to achieve a better + accuracy-performance trade-off after weight quantization. For example, the **Scale Estimation** + method can be applied along with **AWQ** and mixed-precision quantization (the ``ratio`` parameter). Exporting and Loading Compressed Models @@ -344,179 +331,157 @@ so it is preferable to compress the model once, save it, and then load the compressed model later for faster time to first inference. .. code-block:: python + :name: save_pretrained - # Save compressed model for faster loading later - model.save_pretrained("Phi-3.5-mini-instruct-int4-sym-ov") - tokenizer.save_pretrained("Phi-3.5-mini-instruct-int4-sym-ov") - - # Load a saved model - model = OVModelForCausalLM.from_pretrained("Phi-3.5-mini-instruct-int4-sym-ov") - tokenizer = AutoTokenizer.from_pretrained("Phi-3.5-mini-instruct-int4-sym-ov") - -GPTQ Models -############ + # Save compressed model for faster loading later + model.save_pretrained("Phi-3.5-mini-instruct-int4-sym-ov") + tokenizer.save_pretrained("Phi-3.5-mini-instruct-int4-sym-ov") -OpenVINO also supports 4-bit models from Hugging Face -`Transformers `__ library optimized -with `GPTQ `__. In this case, there is no -need for an additional model optimization step because model conversion will -automatically preserve the INT4 optimization results, allowing model inference to benefit from it. + # Load a saved model + model = OVModelForCausalLM.from_pretrained("Phi-3.5-mini-instruct-int4-sym-ov") + tokenizer = AutoTokenizer.from_pretrained("Phi-3.5-mini-instruct-int4-sym-ov") -A compression example using a GPTQ model is shown below. -Make sure to install GPTQ dependencies by running the following command: +.. tip:: -.. code-block:: python - - pip install optimum[openvino] auto-gptq - -.. code-block:: python + Models optimized with with NNCF or Optimum Intel can be used with + :doc:`OpenVINO GenAI <../../learn-openvino/llm_inference_guide/genai-guide>`. - from optimum.intel.openvino import OVModelForCausalLM - from transformers import AutoTokenizer, pipeline - # Load model from Hugging Face already optimized with GPTQ - model_id = "TheBloke/Llama-2-7B-Chat-GPTQ" - model = OVModelForCausalLM.from_pretrained(model_id, export=True) +Auto-tuning of Weight Compression Parameters +############################################ - # Inference - tokenizer = AutoTokenizer.from_pretrained(model_id) - pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) - phrase = "The weather is" - results = pipe(phrase) - print(results) +To find the optimal weight compression parameters for a particular model, refer to the +`example `__ , +where weight compression parameters are being searched from the subset of values. +To speed up the search, a self-designed validation pipeline called +`WhoWhatBench `__ +is used. The pipeline can quickly evaluate the changes in the accuracy of the optimized +model compared to the baseline. -An `example of a model `__ -that has been optimized using GPTQ. Compression Metrics Examples -######################################## +############################ -The table below shows examples of text-generation Language Models with different +Below you will find examples of text-generation Language Models with different optimization settings in a data-free setup, where no dataset is used at the optimization step. The Perplexity metric is a measurement of response accuracy, where a higher complexity score indicates a lower accuracy. It is measured on the `Lambada OpenAI dataset `__. -.. list-table:: - :widths: 40 55 25 25 - :header-rows: 1 - - * - Model - - Optimization - - Perplexity\* - - Model Size (Gb) - * - databricks/dolly-v2-3b - - FP32 - - 5.01 - - 10.3 - * - databricks/dolly-v2-3b - - INT8_ASYM - - 5.07 - - 2.6 - * - databricks/dolly-v2-3b - - INT4_ASYM,group_size=32,ratio=0.5 - - 5.28 - - 2.2 - * - facebook/opt-6.7b - - FP32 - - 4.25 - - 24.8 - * - facebook/opt-6.7b - - INT8_ASYM - - 4.27 - - 6.2 - * - facebook/opt-6.7b - - INT4_ASYM,group_size=64,ratio=0.8 - - 4.32 - - 4.1 - * - meta-llama/Llama-2-7b-chat-hf - - FP32 - - 3.28 - - 25.1 - * - meta-llama/Llama-2-7b-chat-hf - - INT8_ASYM - - 3.29 - - 6.3 - * - meta-llama/Llama-2-7b-chat-hf - - INT4_ASYM,group_size=128,ratio=0.8 - - 3.41 - - 4.0 - * - togethercomputer/RedPajama-INCITE-7B-Instruct - - FP32 - - 4.15 - - 25.6 - * - togethercomputer/RedPajama-INCITE-7B-Instruct - - INT8_ASYM - - 4.17 - - 6.4 - * - togethercomputer/RedPajama-INCITE-7B-Instruct - - INT4_ASYM,group_size=128,ratio=1.0 - - 4.17 - - 3.6 - * - meta-llama/Llama-2-13b-chat-hf - - FP32 - - 2.92 - - 48.5 - * - meta-llama/Llama-2-13b-chat-hf - - INT8_ASYM - - 2.91 - - 12.1 - * - meta-llama/Llama-2-13b-chat-hf - - INT4_SYM,group_size=64,ratio=0.8 - - 2.98 - - 8.0 - - -The following table shows accuracy metric in a data-aware 4-bit weight quantization -setup measured on the `Wikitext dataset `__. - -.. list-table:: - :widths: 40 55 25 25 - :header-rows: 1 - - * - Model - - Optimization - - Word perplexity\* - - Model Size (Gb) - * - meta-llama/llama-7b-chat-hf - - FP32 - - 11.57 - - 12.61 - * - meta-llama/llama-7b-chat-hf - - INT4_SYM,group_size=128,ratio=1.0,awq=True - - 12.34 - - 2.6 - * - stabilityai_stablelm-3b-4e1t - - FP32 - - 10.17 - - 10.41 - * - stabilityai_stablelm-3b-4e1t - - INT4_SYM,group_size=64,ratio=1.0,awq=True - - 10.89 - - 2.6 - * - HuggingFaceH4/zephyr-7b-beta - - FP32 - - 9.82 - - 13.99 - * - HuggingFaceH4/zephyr-7b-beta - - INT4_SYM,group_size=128,ratio=1.0 - - 10.32 - - 2.6 +.. dropdown:: Perplexity\* in data-free optimization + + .. list-table:: + :widths: 40 55 25 25 + :header-rows: 1 + + * - Model + - Optimization + - Perplexity\* + - Model Size (Gb) + * - databricks/dolly-v2-3b + - FP32 + - 5.01 + - 10.3 + * - databricks/dolly-v2-3b + - INT8_ASYM + - 5.07 + - 2.6 + * - databricks/dolly-v2-3b + - INT4_ASYM,group_size=32,ratio=0.5 + - 5.28 + - 2.2 + * - facebook/opt-6.7b + - FP32 + - 4.25 + - 24.8 + * - facebook/opt-6.7b + - INT8_ASYM + - 4.27 + - 6.2 + * - facebook/opt-6.7b + - INT4_ASYM,group_size=64,ratio=0.8 + - 4.32 + - 4.1 + * - meta-llama/Llama-2-7b-chat-hf + - FP32 + - 3.28 + - 25.1 + * - meta-llama/Llama-2-7b-chat-hf + - INT8_ASYM + - 3.29 + - 6.3 + * - meta-llama/Llama-2-7b-chat-hf + - INT4_ASYM,group_size=128,ratio=0.8 + - 3.41 + - 4.0 + * - togethercomputer/RedPajama-INCITE-7B-Instruct + - FP32 + - 4.15 + - 25.6 + * - togethercomputer/RedPajama-INCITE-7B-Instruct + - INT8_ASYM + - 4.17 + - 6.4 + * - togethercomputer/RedPajama-INCITE-7B-Instruct + - INT4_ASYM,group_size=128,ratio=1.0 + - 4.17 + - 3.6 + * - meta-llama/Llama-2-13b-chat-hf + - FP32 + - 2.92 + - 48.5 + * - meta-llama/Llama-2-13b-chat-hf + - INT8_ASYM + - 2.91 + - 12.1 + * - meta-llama/Llama-2-13b-chat-hf + - INT4_SYM,group_size=64,ratio=0.8 + - 2.98 + - 8.0 + + +.. dropdown:: Perplexity\* in data-aware optimization + + The following table shows accuracy metric in a data-aware 4-bit weight quantization + setup measured on the `Wikitext dataset `__. + + .. list-table:: + :widths: 40 55 25 25 + :header-rows: 1 + + * - Model + - Optimization + - Word perplexity\* + - Model Size (Gb) + * - meta-llama/llama-7b-chat-hf + - FP32 + - 11.57 + - 12.61 + * - meta-llama/llama-7b-chat-hf + - INT4_SYM,group_size=128,ratio=1.0,awq=True + - 12.34 + - 2.6 + * - stabilityai_stablelm-3b-4e1t + - FP32 + - 10.17 + - 10.41 + * - stabilityai_stablelm-3b-4e1t + - INT4_SYM,group_size=64,ratio=1.0,awq=True + - 10.89 + - 2.6 + * - HuggingFaceH4/zephyr-7b-beta + - FP32 + - 9.82 + - 13.99 + * - HuggingFaceH4/zephyr-7b-beta + - INT4_SYM,group_size=128,ratio=1.0 + - 10.32 + - 2.6 \*Perplexity metric in both tables was measured without the Dynamic Quantization feature enabled in the OpenVINO runtime. -Auto-tuning of Weight Compression Parameters -############################################ - -To find the optimal weight compression parameters for a particular model, refer to the -`example `__ , -where weight compression parameters are being searched from the subset of values. -To speed up the search, a self-designed validation pipeline called -`WhoWhatBench `__ -is used. The pipeline can quickly evaluate the changes in the accuracy of the optimized -model compared to the baseline. Additional Resources #################### diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression/4-bit-weight-quantization.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression/4-bit-weight-quantization.rst new file mode 100644 index 00000000000000..ae9bc7d7b8b4a3 --- /dev/null +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression/4-bit-weight-quantization.rst @@ -0,0 +1,175 @@ +4-bit Weight Quantization +========================= + +The 4-bit weight quantization method results in significant reduction in model size and +memory usage, making LLMs more accessible to less performant devices. +It also usually offers lower inference latency, however, depending on specific models, +it may potentially impact the accuracy. + +Nevertheless, the INT4 method has several parameters that can provide different performance-accuracy +trade-offs after optimization: + +* ``mode`` - there are two optimization modes: symmetric and asymmetric. + + .. tab-set:: + + .. tab-item:: Symmetric Compression + :sync: int4-sym + + INT4 Symmetric mode (``INT4_SYM``) involves quantizing weights to a signed 4-bit integer + symmetrically without zero point. This mode is faster than the INT8_ASYM, making + it ideal for situations where **speed and size reduction are prioritized over accuracy**. + + .. code-block:: python + + from nncf import compress_weights + from nncf import CompressWeightsMode + + compressed_model = compress_weights(model, mode=CompressWeightsMode.INT4_SYM) + + .. tab-item:: Asymmetric Compression + :sync: int4-asym + + INT4 Asymmetric mode (``INT4_ASYM``) also uses an unsigned 4-bit integer but quantizes weights + asymmetrically with a non-fixed zero point. This mode slightly compromises speed in + favor of better accuracy compared to the symmetric mode. This mode is useful when + **minimal accuracy loss is crucial**, but a faster performance than INT8 is still desired. + + .. code-block:: python + + from nncf import compress_weights + from nncf import CompressWeightsMode + + compressed_model = compress_weights(model, mode=CompressWeightsMode.INT4_ASYM) + +* ``group_size`` controls the size of the group of weights that share the same + quantization parameters. Shared quantization parameters help to speed up the + calculation of activation values as they are dequantized and quantized between + layers. However, they can reduce accuracy. The following group sizes are + recommended: ``128``, ``64``, ``32`` (``128`` is default value). + + `Smaller Group Size`: Leads to a more accurate model but increases the model's + footprint and reduces inference speed. + + `Larger Group Size`: Results in faster inference and a smaller model, but might + compromise accuracy. + +* ``ratio`` controls the ratio between the layers compressed to the precision defined + by ``mode`` and the rest of the layers that will be kept in the ``backup_mode`` in the optimized model. + Ratio is a decimal between 0 and 1. For example, 0.8 means that 80% of layers will be + compressed to the precision defined by ``mode``, while the rest will be compressed to + ``backup_mode`` precision. The default value for ratio is 1. + + | **Higher Ratio (more layers set to mode precision)**: + | Reduces the model size and increase inference speed but + might lead to higher accuracy degradation. + + | **Lower Ratio (more layers set to backup_mode precision)**: + | Maintains better accuracy but results in a larger model size + and potentially slower inference. + + In the example below, 90% of the model's layers are quantized to INT4 asymmetrically with + a group size of 64: + + .. code-block:: python + + from nncf import compress_weights, CompressWeightsMode + + # Example: Compressing weights with INT4_ASYM mode, group size of 64, and 90% INT4 ratio + compressed_model = compress_weights( + model, + mode=CompressWeightsMode.INT4_ASYM, + group_size=64, + ratio=0.9, + ) + +* ``scale_estimation`` - a boolean parameter that enables more accurate estimation of + quantization scales. Especially helpful when the weights of all layers are quantized to + 4 bits. Requires dataset. + +* ``awq`` - a boolean parameter that enables the AWQ method for more accurate INT4 weight + quantization. Especially helpful when the weights of all the layers are quantized to + 4 bits. The method can sometimes result in reduced accuracy when used with + Dynamic Quantization of activations. Requires dataset. + +* ``gptq`` - a boolean parameter that enables the GPTQ method for more accurate INT4 weight + quantization. Requires dataset. + +* ``dataset`` - a calibration dataset for data-aware weight compression. It is required + for some compression options, for example, ``scale_estimation``, ``gptq`` or ``awq``. Some types + of ``sensitivity_metric`` can use data for precision selection. + +* ``sensitivity_metric`` - controls the metric to estimate the sensitivity of compressing + layers in the bit-width selection algorithm. Some of the metrics require dataset to be + provided. The following types are supported: + + * ``nncf.SensitivityMetric.WEIGHT_QUANTIZATION_ERROR`` - a data-free metric computed as + the inverted 8-bit quantization noise. Weights with highest value of this metric can + be accurately quantized channel-wise to 8-bit. The idea is to leave these weights in + 8 bit, and quantize the rest of layers to 4-bit group-wise. Since group-wise is more + accurate than per-channel, accuracy should not degrade. + + * ``nncf.SensitivityMetric.HESSIAN_INPUT_ACTIVATION`` - requires a dataset. The average + Hessian trace of weights with respect to the layer-wise quantization error multiplied + by L2 norm of 8-bit quantization noise. + + * ``nncf.SensitivityMetric.MEAN_ACTIVATION_VARIANCE`` - requires a dataset. The mean + variance of the layers' inputs multiplied by inverted 8-bit quantization noise. + + * ``nncf.SensitivityMetric.MAX_ACTIVATION_VARIANCE`` - requires a dataset. The maximum + variance of the layers' inputs multiplied by inverted 8-bit quantization noise. + + * ``nncf.SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE`` - requires a dataset. The mean + magnitude of the layers' inputs multiplied by inverted 8-bit quantization noise. + +* ``all_layers`` - a boolean parameter that enables INT4 weight quantization of all + Fully-Connected and Embedding layers, including the first and last layers in the model. + +* ``lora_correction`` - a boolean parameter that enables the LoRA Correction Algorithm + to further improve the accuracy of INT4 compressed models on top of other + algorithms - AWQ and Scale Estimation. + +* ``backup_mode`` - defines a backup precision for mixed-precision weight compression. + There are three modes: INT8_ASYM, INT8_SYM, and NONE, which retains + the original floating-point precision of the model weights (``INT8_ASYM`` is default value). + +| + +4-bit Weight Quantization with GPTQ +################################### + +You can use models from Hugging Face +`Transformers `__ library, which are quantized +with `GPTQ `__ algorithm. Such models do not require +additional optimization step because the conversion will automatically preserve +the INT4 optimization results, and model inference will eventually benefit from it. + +See the `example of a model `__ +that has been optimized with GPTQ. + +You can also refer to the code sample below which shows how to load a 4-bit +GPTQ model and run inference. + +.. dropdown:: Using a GPTQ model. + + Make sure to install GPTQ dependencies by running the following command: + + .. code-block:: python + + pip install optimum[openvino] auto-gptq + + .. code-block:: python + + from optimum.intel.openvino import OVModelForCausalLM + from transformers import AutoTokenizer, pipeline + + # Load model from Hugging Face already optimized with GPTQ + model_id = "TheBloke/Llama-2-7B-Chat-GPTQ" + model = OVModelForCausalLM.from_pretrained(model_id, export=True) + + # Inference + tokenizer = AutoTokenizer.from_pretrained(model_id) + pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) + phrase = "The weather is" + results = pipe(phrase) + print(results) diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst index 7b135fa7ff0b14..436d383ebf787e 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst @@ -146,6 +146,8 @@ offer a limited set of supported OpenVINO features. ov::intel_npu::turbo ov::intel_npu::tiles ov::intel_npu::max_tiles + ov::intel_npu::bypass_umd_caching + ov::intel_npu::defer_weights_load .. tab-item:: Read-only properties @@ -168,7 +170,6 @@ offer a limited set of supported OpenVINO features. ov::intel_npu::device_alloc_mem_size ov::intel_npu::device_total_mem_size ov::intel_npu::driver_version - ov::intel_npu::bypass_umd_caching .. note:: diff --git a/docs/articles_en/openvino-workflow/running-inference/integrate-openvino-with-your-application/model-representation.rst b/docs/articles_en/openvino-workflow/running-inference/integrate-openvino-with-your-application/model-representation.rst index 6ab924a61ef150..259f605d46c2f7 100644 --- a/docs/articles_en/openvino-workflow/running-inference/integrate-openvino-with-your-application/model-representation.rst +++ b/docs/articles_en/openvino-workflow/running-inference/integrate-openvino-with-your-application/model-representation.rst @@ -247,57 +247,50 @@ OpenVINO™ provides several debug capabilities: * Model can be visualized to image from the xDot format: -.. tab-set:: - - .. tab-item:: Python - :sync: py - - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.py - :language: python - :fragment: [ov:visualize] - - .. tab-item:: C++ - :sync: cpp - - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.cpp - :language: cpp - :fragment: [ov:visualize] - - -.. code-block:: sh + .. tab-set:: - `ov::pass::VisualizeTree` can be parametrized via environment variables: + .. tab-item:: Python + :sync: py - OV_VISUALIZE_TREE_OUTPUT_SHAPES=1 - visualize shapes + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.py + :language: python + :fragment: [ov:visualize] - OV_VISUALIZE_TREE_OUTPUT_TYPES=1 - visualize types + .. tab-item:: C++ + :sync: cpp - OV_VISUALIZE_TREE_MIN_MAX_DENORMAL=1 - pretty denormal values + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.cpp + :language: cpp + :fragment: [ov:visualize] - OV_VISUALIZE_TREE_RUNTIME_INFO=1 - print runtime information - OV_VISUALIZE_TREE_IO=1 - print I/O ports + ``ov::pass::VisualizeTree`` can be parametrized via environment variables: - OV_VISUALIZE_TREE_MEMBERS_NAME=1 - print member names + * ``OV_VISUALIZE_TREE_OUTPUT_SHAPES=1`` - visualize shapes + * ``OV_VISUALIZE_TREE_OUTPUT_TYPES=1`` - visualize types + * ``OV_VISUALIZE_TREE_MIN_MAX_DENORMAL=1`` - pretty denormal values + * ``OV_VISUALIZE_TREE_RUNTIME_INFO=1`` - print runtime information + * ``OV_VISUALIZE_TREE_IO=1`` - print I/O ports + * ``OV_VISUALIZE_TREE_MEMBERS_NAME=1`` - print member names * Also model can be serialized to IR: -.. tab-set:: + .. tab-set:: - .. tab-item:: Python - :sync: py + .. tab-item:: Python + :sync: py - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.py - :language: python - :fragment: [ov:serialize] + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.py + :language: python + :fragment: [ov:serialize] - .. tab-item:: C++ - :sync: cpp + .. tab-item:: C++ + :sync: cpp - .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.cpp - :language: cpp - :fragment: [ov:serialize] + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_model_snippets.cpp + :language: cpp + :fragment: [ov:serialize] Additional Resources @@ -306,5 +299,3 @@ Additional Resources * :doc:`Available Operation Sets <../../../documentation/openvino-ir-format/operation-sets/available-opsets>`. * :doc:`OpenVINO™ Runtime Extensibility Developer Guide <../../../documentation/openvino-extensibility>`. * :doc:`Transformations Developer Guide <../../../documentation/openvino-extensibility/transformation-api>`. - - diff --git a/docs/dev/ov_dependencies.txt b/docs/dev/ov_dependencies.txt index 7cf26c58a9436d..d9c344d2c3048d 100644 --- a/docs/dev/ov_dependencies.txt +++ b/docs/dev/ov_dependencies.txt @@ -1,6 +1,6 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -#This file provides a comprehensive list of all dependencies of OpenVINO 2024.4 +#This file provides a comprehensive list of all dependencies of OpenVINO 2024.5 #The file is part of the automation pipeline for posting OpenVINO IR models on the HuggingFace Hub, including OneBOM dependency checks. diff --git a/docs/nbdoc/consts.py b/docs/nbdoc/consts.py index bfad4b042e5359..1a4d3a13049041 100644 --- a/docs/nbdoc/consts.py +++ b/docs/nbdoc/consts.py @@ -6,7 +6,7 @@ repo_owner = "openvinotoolkit" repo_name = "openvino_notebooks" repo_branch = "tree/main" -artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20241104220807/dist/rst_files/" +artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20241120220806/dist/rst_files/" blacklisted_extensions = ['.xml', '.bin'] notebooks_repo = "https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/" notebooks_binder = "https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=" diff --git a/docs/notebooks/3D-pose-estimation-with-output.rst b/docs/notebooks/3D-pose-estimation-with-output.rst index f39aa93b36851d..9e09d96094fc78 100644 --- a/docs/notebooks/3D-pose-estimation-with-output.rst +++ b/docs/notebooks/3D-pose-estimation-with-output.rst @@ -93,6 +93,11 @@ Lab instead.** .. code:: ipython3 + import platform + + if platform.system() == "Darwin": + %pip install -q "numpy<2.0.0" + %pip install pythreejs "openvino>=2024.4.0" "opencv-python" "torch" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu @@ -108,68 +113,68 @@ Lab instead.** Collecting torch Using cached https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp38-cp38-linux_x86_64.whl (194.9 MB) Collecting tqdm - Using cached tqdm-4.66.6-py3-none-any.whl.metadata (57 kB) - Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.5) + Using cached tqdm-4.67.0-py3-none-any.whl.metadata (57 kB) + Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.5) Collecting ipydatawidgets>=1.1.1 (from pythreejs) Using cached ipydatawidgets-4.3.5-py2.py3-none-any.whl.metadata (1.4 kB) Collecting numpy (from pythreejs) Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB) - Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.14.3) + Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.14.3) Collecting openvino-telemetry>=2023.2.1 (from openvino>=2024.4.0) - Using cached openvino_telemetry-2024.1.0-py3-none-any.whl.metadata (2.3 kB) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2024.4.0) (24.1) + Using cached openvino_telemetry-2024.5.0-py3-none-any.whl.metadata (2.3 kB) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2024.4.0) (24.2) Collecting filelock (from torch) Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB) - Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) Collecting sympy (from torch) Using cached sympy-1.13.3-py3-none-any.whl.metadata (12 kB) Collecting networkx (from torch) Using cached https://download.pytorch.org/whl/networkx-3.2.1-py3-none-any.whl (1.6 MB) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) Collecting fsspec (from torch) Using cached fsspec-2024.10.0-py3-none-any.whl.metadata (11 kB) Collecting traittypes>=0.2.0 (from ipydatawidgets>=1.1.1->pythreejs) Using cached traittypes-0.2.1-py2.py3-none-any.whl.metadata (1.0 kB) - Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.2.2) - Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.3) - Requirement already satisfied: widgetsnbextension~=4.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.13) - Requirement already satisfied: jupyterlab-widgets~=3.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.13) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) + Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.2.2) + Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.3) + Requirement already satisfied: widgetsnbextension~=4.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.13) + Requirement already satisfied: jupyterlab-widgets~=3.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.13) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) INFO: pip is looking at multiple versions of networkx to determine which version is compatible with other requirements. This could take a while. Collecting networkx (from torch) Using cached networkx-3.1-py3-none-any.whl.metadata (5.3 kB) Collecting mpmath<1.4,>=1.1.0 (from sympy->torch) Using cached https://download.pytorch.org/whl/mpmath-1.3.0-py3-none-any.whl (536 kB) - Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) - Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (5.1.1) - Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.1) - Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.7) - Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) - Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.48) - Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.18.0) - Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.3) - Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.9.0) - Requirement already satisfied: parso<0.9.0,>=0.8.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.4) - Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) - Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.13) - Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.1.0) - Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.4.1) - Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.3) - Requirement already satisfied: six>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (1.16.0) + Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) + Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (5.1.1) + Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.2) + Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.7) + Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) + Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.48) + Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.18.0) + Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.3) + Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.9.0) + Requirement already satisfied: parso<0.9.0,>=0.8.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.4) + Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) + Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.13) + Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.1.0) + Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.4.1) + Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.3) + Requirement already satisfied: six>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (1.16.0) Using cached pythreejs-2.4.2-py3-none-any.whl (3.4 MB) Using cached openvino-2024.4.0-16579-cp38-cp38-manylinux2014_x86_64.whl (42.6 MB) Using cached opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (62.5 MB) - Using cached tqdm-4.66.6-py3-none-any.whl (78 kB) + Using cached tqdm-4.67.0-py3-none-any.whl (78 kB) Using cached ipydatawidgets-4.3.5-py2.py3-none-any.whl (271 kB) Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB) - Using cached openvino_telemetry-2024.1.0-py3-none-any.whl (23 kB) + Using cached openvino_telemetry-2024.5.0-py3-none-any.whl (23 kB) Using cached filelock-3.16.1-py3-none-any.whl (16 kB) Using cached fsspec-2024.10.0-py3-none-any.whl (179 kB) Using cached networkx-3.1-py3-none-any.whl (2.1 MB) Using cached sympy-1.13.3-py3-none-any.whl (6.2 MB) Using cached traittypes-0.2.1-py2.py3-none-any.whl (8.6 kB) Installing collected packages: openvino-telemetry, mpmath, traittypes, tqdm, sympy, numpy, networkx, fsspec, filelock, torch, openvino, opencv-python, ipydatawidgets, pythreejs - Successfully installed filelock-3.16.1 fsspec-2024.10.0 ipydatawidgets-4.3.5 mpmath-1.3.0 networkx-3.1 numpy-1.24.4 opencv-python-4.10.0.84 openvino-2024.4.0 openvino-telemetry-2024.1.0 pythreejs-2.4.2 sympy-1.13.3 torch-2.4.1+cpu tqdm-4.66.6 traittypes-0.2.1 + Successfully installed filelock-3.16.1 fsspec-2024.10.0 ipydatawidgets-4.3.5 mpmath-1.3.0 networkx-3.1 numpy-1.24.4 opencv-python-4.10.0.84 openvino-2024.4.0 openvino-telemetry-2024.5.0 pythreejs-2.4.2 sympy-1.13.3 torch-2.4.1+cpu tqdm-4.67.0 traittypes-0.2.1 Note: you may need to restart the kernel to use updated packages. @@ -193,17 +198,19 @@ Imports # Fetch `notebook_utils` module import requests - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - with open("notebook_utils.py", "w") as f: - f.write(r.text) + if not Path("notebook_utils.py").exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + with open("notebook_utils.py", "w") as f: + f.write(r.text) - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/engine3js.py", - ) - with open("engine3js.py", "w") as f: - f.write(r.text) + if not Path("engine3js.py").exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/engine3js.py", + ) + with open("engine3js.py", "w") as f: + f.write(r.text) import notebook_utils as utils import engine3js as engine @@ -227,10 +234,11 @@ Download the model # directory where model will be downloaded base_model_dir = Path("model") - download_file( - "https://storage.openvinotoolkit.org/repositories/open_model_zoo/public/2022.1/human-pose-estimation-3d-0001/human-pose-estimation-3d.tar.gz", - directory=base_model_dir, - ) + if not base_model_dir.exists(): + download_file( + "https://storage.openvinotoolkit.org/repositories/open_model_zoo/public/2022.1/human-pose-estimation-3d-0001/human-pose-estimation-3d.tar.gz", + directory=base_model_dir, + ) ckpt_file = base_model_dir / "human-pose-estimation-3d-0001.pth" @@ -270,7 +278,7 @@ Convert Model to OpenVINO IR format .. parsed-literal:: - /tmp/ipykernel_496305/2723667668.py:9: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /tmp/ipykernel_3496586/2723667668.py:9: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. pose_estimation_model.load_state_dict(torch.load(ckpt_file, map_location="cpu")) @@ -660,10 +668,16 @@ picture on the left to interact. .. code:: ipython3 + from notebook_utils import download_file + USE_WEBCAM = False cam_id = 0 - video_path = "https://storage.openvinotoolkit.org/data/test_data/videos/face-demographics-walking.mp4" + if not Path("face-demographics-walking.mp4").exists(): + download_file( + "https://storage.openvinotoolkit.org/data/test_data/videos/face-demographics-walking.mp4", + ) + video_path = "face-demographics-walking.mp4" source = cam_id if USE_WEBCAM else video_path diff --git a/docs/notebooks/3D-segmentation-point-clouds-with-output.rst b/docs/notebooks/3D-segmentation-point-clouds-with-output.rst index e60951d40c75f9..9ac414c9421193 100644 --- a/docs/notebooks/3D-segmentation-point-clouds-with-output.rst +++ b/docs/notebooks/3D-segmentation-point-clouds-with-output.rst @@ -219,7 +219,7 @@ chair for example. .. parsed-literal:: - /tmp/ipykernel_497205/2434168836.py:12: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored + /tmp/ipykernel_3496878/2434168836.py:12: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored ax.scatter3D(X, Y, Z, s=5, cmap="jet", marker="o", label="chair") @@ -313,7 +313,7 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - /tmp/ipykernel_497205/2804603389.py:23: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored + /tmp/ipykernel_3496878/2804603389.py:23: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored ax.scatter(XCur, YCur, ZCur, s=5, cmap="jet", marker="o", label=classes[i]) diff --git a/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png b/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png index 1821f275db1019..0a8e2ecc3e82da 100644 --- a/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png +++ b/docs/notebooks/action-recognition-webcam-with-output_files/action-recognition-webcam-with-output_22_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:31cb7026c28d1308b88f61a6939b7e11a54948c3cb3e4f7a1a1b8a038871150f -size 68999 +oid sha256:878b0e691f8cb979e806b7101df02610fc796f00bbea26a4f1541e4b958a7bd1 +size 69011 diff --git a/docs/notebooks/all_notebooks_paths.txt b/docs/notebooks/all_notebooks_paths.txt index 5cfa565a07d239..f93cf6e0dbe8d6 100644 --- a/docs/notebooks/all_notebooks_paths.txt +++ b/docs/notebooks/all_notebooks_paths.txt @@ -48,12 +48,12 @@ notebooks/image-classification-quantization/image-classification-quantization.ip notebooks/instant-id/instant-id.ipynb notebooks/instruct-pix2pix-image-editing/instruct-pix2pix-image-editing.ipynb notebooks/internvl2/internvl2.ipynb +notebooks/jax-to-openvino/jax-classification-to-openvino.ipynb notebooks/jina-clip/jina-clip.ipynb notebooks/knowledge-graphs-conve/knowledge-graphs-conve.ipynb notebooks/kosmos2-multimodal-large-language-model/kosmos2-multimodal-large-language-model.ipynb notebooks/language-quantize-bert/language-quantize-bert.ipynb notebooks/latent-consistency-models-image-generation/latent-consistency-models-image-generation.ipynb -notebooks/latent-consistency-models-image-generation/latent-consistency-models-optimum-demo.ipynb notebooks/latent-consistency-models-image-generation/lcm-lora-controlnet.ipynb notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-genai.ipynb notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-optimum.ipynb @@ -74,18 +74,22 @@ notebooks/mllama-3.2/mllama-3.2.ipynb notebooks/mms-massively-multilingual-speech/mms-massively-multilingual-speech.ipynb notebooks/mobileclip-video-search/mobileclip-video-search.ipynb notebooks/mobilevlm-language-assistant/mobilevlm-language-assistant.ipynb +notebooks/modelscope-to-openvino/modelscope-to-openvino.ipynb notebooks/model-server/model-server.ipynb +notebooks/multilora-image-generation/multilora-image-generation.ipynb notebooks/music-generation/music-generation.ipynb notebooks/named-entity-recognition/named-entity-recognition.ipynb notebooks/nano-llava-multimodal-chatbot/nano-llava-multimodal-chatbot.ipynb notebooks/nuextract-structure-extraction/nuextract-structure-extraction.ipynb notebooks/object-detection-webcam/object-detection.ipynb +notebooks/omniparser/omniparser.ipynb notebooks/oneformer-segmentation/oneformer-segmentation.ipynb notebooks/openvino-api/openvino-api.ipynb notebooks/openvino-tokenizers/openvino-tokenizers.ipynb notebooks/openvoice/openvoice.ipynb notebooks/optical-character-recognition/optical-character-recognition.ipynb notebooks/optimize-preprocessing/optimize-preprocessing.ipynb +notebooks/outetts-text-to-speech/outetts-text-to-speech.ipynb notebooks/paddle-ocr-webcam/paddle-ocr-webcam.ipynb notebooks/paddle-to-openvino/paddle-to-openvino-classification.ipynb notebooks/paint-by-example/paint-by-example.ipynb @@ -105,7 +109,7 @@ notebooks/pytorch-to-openvino/pytorch-onnx-to-openvino.ipynb notebooks/pytorch-to-openvino/pytorch-to-openvino.ipynb notebooks/qrcode-monster/qrcode-monster.ipynb notebooks/quantizing-model-with-accuracy-control/speech-recognition-quantization-wav2vec2.ipynb -notebooks/quantizing-model-with-accuracy-control/yolov8-quantization-with-accuracy-control.ipynb +notebooks/quantizing-model-with-accuracy-control/yolov11-quantization-with-accuracy-control.ipynb notebooks/qwen2-audio/qwen2-audio.ipynb notebooks/qwen2-vl/qwen2-vl.ipynb notebooks/riffusion-text-to-music/riffusion-text-to-music.ipynb @@ -134,6 +138,7 @@ notebooks/stable-diffusion-v2/stable-diffusion-v2-optimum-demo.ipynb notebooks/stable-diffusion-v2/stable-diffusion-v2-text-to-image-demo.ipynb notebooks/stable-diffusion-v2/stable-diffusion-v2-text-to-image.ipynb notebooks/stable-diffusion-v3/stable-diffusion-v3.ipynb +notebooks/stable-diffusion-v3/stable-diffusion-v3-torch-fx.ipynb notebooks/stable-diffusion-xl/segmind-vegart.ipynb notebooks/stable-diffusion-xl/stable-diffusion-xl.ipynb notebooks/stable-fast-3d/stable-fast-3d.ipynb diff --git a/docs/notebooks/amused-lightweight-text-to-image-with-output.rst b/docs/notebooks/amused-lightweight-text-to-image-with-output.rst deleted file mode 100644 index aafda311c34c45..00000000000000 --- a/docs/notebooks/amused-lightweight-text-to-image-with-output.rst +++ /dev/null @@ -1,984 +0,0 @@ -Lightweight image generation with aMUSEd and OpenVINO -===================================================== - -`Amused `__ -is a lightweight text to image model based off of the -`muse `__ architecture. Amused is -particularly useful in applications that require a lightweight and fast -model such as generating many images quickly at once. - -Amused is a VQVAE token based transformer that can generate an image in -fewer forward passes than many diffusion models. In contrast with muse, -it uses the smaller text encoder CLIP-L/14 instead of t5-xxl. Due to its -small parameter count and few forward pass generation process, amused -can generate many images quickly. This benefit is seen particularly at -larger batch sizes. - - -**Table of contents:** - - -- `Prerequisites <#prerequisites>`__ -- `Load and run the original - pipeline <#load-and-run-the-original-pipeline>`__ -- `Convert the model to OpenVINO - IR <#convert-the-model-to-openvino-ir>`__ - - - `Convert the Text Encoder <#convert-the-text-encoder>`__ - - `Convert the U-ViT transformer <#convert-the-u-vit-transformer>`__ - - `Convert VQ-GAN decoder - (VQVAE) <#convert-vq-gan-decoder-vqvae>`__ - -- `Compiling models and prepare - pipeline <#compiling-models-and-prepare-pipeline>`__ -- `Quantization <#quantization>`__ - - - `Prepare calibration dataset <#prepare-calibration-dataset>`__ - - `Run model quantization <#run-model-quantization>`__ - - `Compute Inception Scores and inference - time <#compute-inception-scores-and-inference-time>`__ - -- `Interactive inference <#interactive-inference>`__ - -Installation Instructions -~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a self-contained example that relies solely on its own code. - -We recommend running the notebook in a virtual environment. You only -need a Jupyter server to start. For details, please refer to -`Installation -Guide `__. - -Prerequisites -------------- - - - -.. code:: ipython3 - - %pip install -q transformers "diffusers>=0.25.0" "openvino>=2023.2.0" "accelerate>=0.20.3" "gradio>=4.19" "torch>=2.1" "pillow" "torchmetrics" "torch-fidelity" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "nncf>=2.9.0" datasets - - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - - -.. code:: ipython3 - - # Fetch the notebook utils script from the openvino_notebooks repo - import requests - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) - - - - -.. parsed-literal:: - - 24692 - - - -Load and run the original pipeline ----------------------------------- - - - -.. code:: ipython3 - - import torch - from diffusers import AmusedPipeline - - - pipe = AmusedPipeline.from_pretrained( - "amused/amused-256", - ) - - prompt = "kind smiling ghost" - image = pipe(prompt, generator=torch.Generator("cpu").manual_seed(8)).images[0] - image.save("text2image_256.png") - - - -.. parsed-literal:: - - Loading pipeline components...: 0%| | 0/5 [00:00 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if past_key_values_length > 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:861: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - encoder_states = () if output_hidden_states else None - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:866: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if output_hidden_states: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:889: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if output_hidden_states: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:892: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if not return_dict: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:988: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if not return_dict: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:1486: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if not return_dict: - - -Convert the U-ViT transformer -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - class TransformerWrapper(torch.nn.Module): - def __init__(self, transformer): - super().__init__() - self.transformer = transformer - - def forward( - self, - latents=None, - micro_conds=None, - pooled_text_emb=None, - encoder_hidden_states=None, - ): - return self.transformer( - latents, - micro_conds=micro_conds, - pooled_text_emb=pooled_text_emb, - encoder_hidden_states=encoder_hidden_states, - ) - - - shape = (1, 16, 16) - latents = torch.full(shape, pipe.scheduler.config.mask_token_id, dtype=torch.long) - latents = torch.cat([latents] * 2) - - - example_input = { - "latents": latents, - "micro_conds": torch.rand([2, 5], dtype=torch.float32), - "pooled_text_emb": torch.rand([2, 768], dtype=torch.float32), - "encoder_hidden_states": torch.rand([2, 77, 768], dtype=torch.float32), - } - - - pipe.transformer.eval() - w_transformer = TransformerWrapper(pipe.transformer) - convert(w_transformer, TRANSFORMER_OV_PATH, example_input) - -Convert VQ-GAN decoder (VQVAE) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Function ``get_latents`` is -needed to return real latents for the conversion. Due to the VQVAE -implementation autogenerated tensor of the required shape is not -suitable. This function repeats part of ``AmusedPipeline``. - -.. code:: ipython3 - - def get_latents(): - shape = (1, 16, 16) - latents = torch.full(shape, pipe.scheduler.config.mask_token_id, dtype=torch.long) - model_input = torch.cat([latents] * 2) - - model_output = pipe.transformer( - model_input, - micro_conds=torch.rand([2, 5], dtype=torch.float32), - pooled_text_emb=torch.rand([2, 768], dtype=torch.float32), - encoder_hidden_states=torch.rand([2, 77, 768], dtype=torch.float32), - ) - guidance_scale = 10.0 - uncond_logits, cond_logits = model_output.chunk(2) - model_output = uncond_logits + guidance_scale * (cond_logits - uncond_logits) - - latents = pipe.scheduler.step( - model_output=model_output, - timestep=torch.tensor(0), - sample=latents, - ).prev_sample - - return latents - - - class VQVAEWrapper(torch.nn.Module): - def __init__(self, vqvae): - super().__init__() - self.vqvae = vqvae - - def forward(self, latents=None, force_not_quantize=True, shape=None): - outputs = self.vqvae.decode( - latents, - force_not_quantize=force_not_quantize, - shape=shape.tolist(), - ) - - return outputs - - - latents = get_latents() - example_vqvae_input = { - "latents": latents, - "force_not_quantize": torch.tensor(True), - "shape": torch.tensor((1, 16, 16, 64)), - } - - convert(VQVAEWrapper(pipe.vqvae), VQVAE_OV_PATH, example_vqvae_input) - - -.. parsed-literal:: - - /tmp/ipykernel_498025/3779428577.py:34: TracerWarning: Converting a tensor to a Python list might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - shape=shape.tolist(), - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/autoencoders/vq_model.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if not force_not_quantize: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:147: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if hidden_states.shape[0] >= 64: - - -Compiling models and prepare pipeline -------------------------------------- - - - -Select device from dropdown list for running inference using OpenVINO. - -.. code:: ipython3 - - from notebook_utils import device_widget - - device = device_widget() - - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') - - - -.. code:: ipython3 - - core = ov.Core() - - ov_text_encoder = core.compile_model(TEXT_ENCODER_OV_PATH, device.value) - ov_transformer = core.compile_model(TRANSFORMER_OV_PATH, device.value) - ov_vqvae = core.compile_model(VQVAE_OV_PATH, device.value) - -Let’s create callable wrapper classes for compiled models to allow -interaction with original ``AmusedPipeline`` class. Note that all of -wrapper classes return ``torch.Tensor``\ s instead of ``np.array``\ s. - -.. code:: ipython3 - - from collections import namedtuple - - - class ConvTextEncoderWrapper(torch.nn.Module): - def __init__(self, text_encoder, config): - super().__init__() - self.config = config - self.text_encoder = text_encoder - - def forward(self, input_ids=None, return_dict=None, output_hidden_states=None): - inputs = { - "input_ids": input_ids, - "return_dict": return_dict, - "output_hidden_states": output_hidden_states, - } - - outs = self.text_encoder(inputs) - - outputs = namedtuple("CLIPTextModelOutput", ("text_embeds", "last_hidden_state", "hidden_states")) - - text_embeds = torch.from_numpy(outs[0]) - last_hidden_state = torch.from_numpy(outs[1]) - hidden_states = list(torch.from_numpy(out) for out in outs.values())[2:] - - return outputs(text_embeds, last_hidden_state, hidden_states) - -.. code:: ipython3 - - class ConvTransformerWrapper(torch.nn.Module): - def __init__(self, transformer, config): - super().__init__() - self.config = config - self.transformer = transformer - - def forward(self, latents=None, micro_conds=None, pooled_text_emb=None, encoder_hidden_states=None, **kwargs): - outputs = self.transformer( - { - "latents": latents, - "micro_conds": micro_conds, - "pooled_text_emb": pooled_text_emb, - "encoder_hidden_states": encoder_hidden_states, - }, - share_inputs=False, - ) - - return torch.from_numpy(outputs[0]) - -.. code:: ipython3 - - class ConvVQVAEWrapper(torch.nn.Module): - def __init__(self, vqvae, dtype, config): - super().__init__() - self.vqvae = vqvae - self.dtype = dtype - self.config = config - - def decode(self, latents=None, force_not_quantize=True, shape=None): - inputs = { - "latents": latents, - "force_not_quantize": force_not_quantize, - "shape": torch.tensor(shape), - } - - outs = self.vqvae(inputs) - outs = namedtuple("VQVAE", "sample")(torch.from_numpy(outs[0])) - - return outs - -And insert wrappers instances in the pipeline: - -.. code:: ipython3 - - prompt = "kind smiling ghost" - - transformer = pipe.transformer - vqvae = pipe.vqvae - text_encoder = pipe.text_encoder - - pipe.__dict__["_internal_dict"]["_execution_device"] = pipe._execution_device # this is to avoid some problem that can occur in the pipeline - pipe.register_modules( - text_encoder=ConvTextEncoderWrapper(ov_text_encoder, text_encoder.config), - transformer=ConvTransformerWrapper(ov_transformer, transformer.config), - vqvae=ConvVQVAEWrapper(ov_vqvae, vqvae.dtype, vqvae.config), - ) - - image = pipe(prompt, generator=torch.Generator("cpu").manual_seed(8)).images[0] - image.save("text2image_256.png") - - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'AmusedPipeline' object attribute is deprecated. Please access '_execution_device' over 'AmusedPipeline's config object instead, e.g. 'scheduler.config._execution_device'. - deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) - - - -.. parsed-literal:: - - 0%| | 0/12 [00:00`__ enables -post-training quantization by adding quantization layers into model -graph and then using a subset of the training dataset to initialize the -parameters of these additional quantization layers. Quantized operations -are executed in ``INT8`` instead of ``FP32``/``FP16`` making model -inference faster. - -According to ``Amused`` pipeline structure, the vision transformer model -takes up significant portion of the overall pipeline execution time. Now -we will show you how to optimize the UNet part using -`NNCF `__ to reduce -computation cost and speed up the pipeline. Quantizing the rest of the -pipeline does not significantly improve inference performance but can -lead to a substantial degradation of generations quality. - -We also estimate the quality of generations produced by optimized -pipeline with `Inception -Score `__ which is often -used to measure quality of text-to-image generation systems. - -The steps are the following: - -1. Create a calibration dataset for quantization. -2. Run ``nncf.quantize()`` on the model. -3. Save the quantized model using ``openvino.save_model()`` function. -4. Compare inference time and Inception score for original and quantized - pipelines. - -Please select below whether you would like to run quantization to -improve model inference speed. - - **NOTE**: Quantization is time and memory consuming operation. - Running quantization code below may take some time. - -.. code:: ipython3 - - from notebook_utils import quantization_widget - - QUANTIZED_TRANSFORMER_OV_PATH = Path(str(TRANSFORMER_OV_PATH).replace(".xml", "_quantized.xml")) - - skip_for_device = "GPU" in device.value - to_quantize = quantization_widget(not skip_for_device) - to_quantize - - - - -.. parsed-literal:: - - Checkbox(value=True, description='Quantization') - - - -.. code:: ipython3 - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", - ) - open("skip_kernel_extension.py", "w").write(r.text) - - %load_ext skip_kernel_extension - -Prepare calibration dataset -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -We use a portion of -`conceptual_captions `__ -dataset from Hugging Face as calibration data. To collect intermediate -model inputs for calibration we customize ``CompiledModel``. - -.. code:: ipython3 - - %%skip not $to_quantize.value - - import datasets - from tqdm.auto import tqdm - from typing import Any, Dict, List - import pickle - import numpy as np - - - def disable_progress_bar(pipeline, disable=True): - if not hasattr(pipeline, "_progress_bar_config"): - pipeline._progress_bar_config = {'disable': disable} - else: - pipeline._progress_bar_config['disable'] = disable - - - class CompiledModelDecorator(ov.CompiledModel): - def __init__(self, compiled_model: ov.CompiledModel, data_cache: List[Any] = None, keep_prob: float = 0.5): - super().__init__(compiled_model) - self.data_cache = data_cache if data_cache is not None else [] - self.keep_prob = keep_prob - - def __call__(self, *args, **kwargs): - if np.random.rand() <= self.keep_prob: - self.data_cache.append(*args) - return super().__call__(*args, **kwargs) - - - def collect_calibration_data(ov_transformer_model, calibration_dataset_size: int) -> List[Dict]: - calibration_dataset_filepath = Path(f"calibration_data/{calibration_dataset_size}.pkl") - if not calibration_dataset_filepath.exists(): - calibration_data = [] - pipe.transformer.transformer = CompiledModelDecorator(ov_transformer_model, calibration_data, keep_prob=1.0) - disable_progress_bar(pipe) - - dataset = datasets.load_dataset("google-research-datasets/conceptual_captions", split="train", trust_remote_code=True).shuffle(seed=42) - - # Run inference for data collection - pbar = tqdm(total=calibration_dataset_size) - for batch in dataset: - prompt = batch["caption"] - if len(prompt) > pipe.tokenizer.model_max_length: - continue - pipe(prompt, generator=torch.Generator('cpu').manual_seed(0)) - pbar.update(len(calibration_data) - pbar.n) - if pbar.n >= calibration_dataset_size: - break - - pipe.transformer.transformer = ov_transformer_model - disable_progress_bar(pipe, disable=False) - - calibration_dataset_filepath.parent.mkdir(exist_ok=True, parents=True) - with open(calibration_dataset_filepath, 'wb') as f: - pickle.dump(calibration_data, f) - - with open(calibration_dataset_filepath, 'rb') as f: - calibration_data = pickle.load(f) - return calibration_data - -Run model quantization -~~~~~~~~~~~~~~~~~~~~~~ - - - -Run calibration data collection and quantize the vision transformer -model. - -.. code:: ipython3 - - %%skip not $to_quantize.value - - from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters - from nncf.quantization.range_estimator import RangeEstimatorParameters, StatisticsCollectorParameters, StatisticsType, \ - AggregatorType - import nncf - - CALIBRATION_DATASET_SIZE = 12 * 25 - - if not QUANTIZED_TRANSFORMER_OV_PATH.exists(): - calibration_data = collect_calibration_data(ov_transformer, CALIBRATION_DATASET_SIZE) - quantized_model = nncf.quantize( - core.read_model(TRANSFORMER_OV_PATH), - nncf.Dataset(calibration_data), - model_type=nncf.ModelType.TRANSFORMER, - subset_size=len(calibration_data), - # We ignore convolutions to improve quality of generations without significant drop in inference speed - ignored_scope=nncf.IgnoredScope(types=["Convolution"]), - # Value of 0.85 was obtained using grid search based on Inception Score computed below - advanced_parameters=nncf.AdvancedQuantizationParameters( - smooth_quant_alphas=AdvancedSmoothQuantParameters(matmul=0.85), - # During activation statistics collection we ignore 1% of outliers which improves quantization quality - activations_range_estimator_params=RangeEstimatorParameters( - min=StatisticsCollectorParameters(statistics_type=StatisticsType.MIN, - aggregator_type=AggregatorType.MEAN_NO_OUTLIERS, - quantile_outlier_prob=0.01), - max=StatisticsCollectorParameters(statistics_type=StatisticsType.MAX, - aggregator_type=AggregatorType.MEAN_NO_OUTLIERS, - quantile_outlier_prob=0.01) - ) - ) - ) - ov.save_model(quantized_model, QUANTIZED_TRANSFORMER_OV_PATH) - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, openvino - - - -.. parsed-literal:: - - 0%| | 0/300 [00:00`__ of original and -quantized pipelines on a small subset of images. Images are generated -from prompts of ``conceptual_captions`` validation set. We also measure -the time it took to generate the images for comparison reasons. - -Please note that the validation dataset size is small and serves only as -a rough estimate of generation quality. - -.. code:: ipython3 - - %%skip not $to_quantize.value - - from torchmetrics.image.inception import InceptionScore - from torchvision import transforms as transforms - from itertools import islice - import time - - VALIDATION_DATASET_SIZE = 100 - - def compute_inception_score(ov_transformer_model_path, validation_set_size, batch_size=100): - original_ov_transformer_model = pipe.transformer.transformer - pipe.transformer.transformer = core.compile_model(ov_transformer_model_path, device.value) - - disable_progress_bar(pipe) - dataset = datasets.load_dataset("google-research-datasets/conceptual_captions", "unlabeled", split="validation", trust_remote_code=True).shuffle(seed=42) - dataset = islice(dataset, validation_set_size) - - inception_score = InceptionScore(normalize=True, splits=1) - - images = [] - infer_times = [] - for batch in tqdm(dataset, total=validation_set_size, desc="Computing Inception Score"): - prompt = batch["caption"] - if len(prompt) > pipe.tokenizer.model_max_length: - continue - start_time = time.perf_counter() - image = pipe(prompt, generator=torch.Generator('cpu').manual_seed(0)).images[0] - infer_times.append(time.perf_counter() - start_time) - image = transforms.ToTensor()(image) - images.append(image) - - mean_perf_time = sum(infer_times) / len(infer_times) - - while len(images) > 0: - images_batch = torch.stack(images[-batch_size:]) - images = images[:-batch_size] - inception_score.update(images_batch) - kl_mean, kl_std = inception_score.compute() - - pipe.transformer.transformer = original_ov_transformer_model - disable_progress_bar(pipe, disable=False) - - return kl_mean, mean_perf_time - - - original_inception_score, original_time = compute_inception_score(TRANSFORMER_OV_PATH, VALIDATION_DATASET_SIZE) - print(f"Original pipeline Inception Score: {original_inception_score}") - quantized_inception_score, quantized_time = compute_inception_score(QUANTIZED_TRANSFORMER_OV_PATH, VALIDATION_DATASET_SIZE) - print(f"Quantized pipeline Inception Score: {quantized_inception_score}") - print(f"Quantization speed-up: {original_time / quantized_time:.2f}x") - - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:43: UserWarning: Metric `InceptionScore` will save all extracted features in buffer. For large datasets this may lead to large memory footprint. - warnings.warn(\*args, \*\*kwargs) # noqa: B028 - - - -.. parsed-literal:: - - Computing Inception Score: 0%| | 0/100 [00:00`__ tackles the task of generating animation sequences from a single character image. It @@ -37,7 +36,8 @@ repo `__ and .. warning:: - This tutorial requires at least **96 GB** of RAM for model conversion and **40 GB** for inference. Changing the values of ``HEIGHT`` ``WIDTH`` and ``VIDEO_LENGTH`` variables will change the memory consumption but will also affect accuracy. + This tutorial requires at least **96 GB** of RAM for model conversion and **40 GB** for inference. Changing the values of ``HEIGHT``, ``WIDTH`` and ``VIDEO_LENGTH`` variables will change the memory consumption but will also affect accuracy. + **Table of contents:** @@ -70,6 +70,9 @@ need a Jupyter server to start. For details, please refer to `Installation Guide `__. +.. |image0| image:: https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/animate-anyone/animate-anyone.gif + + Prerequisites ------------- @@ -81,13 +84,10 @@ Prerequisites import requests - REPO_PATH = Path("Moore-AnimateAnyone") - if not REPO_PATH.exists(): - !git clone -q "https://github.com/itrushkin/Moore-AnimateAnyone.git" - %pip install -q "torch>=2.1" torchvision einops omegaconf "diffusers<=0.24" transformers av accelerate "openvino>=2024.0" "nncf>=2.9.0" "gradio>=4.19" --extra-index-url "https://download.pytorch.org/whl/cpu" - import sys + %pip install -q "torch>=2.1" torchvision einops omegaconf "diffusers<=0.24" "huggingface-hub<0.26.0" transformers av accelerate "gradio>=4.19" --extra-index-url "https://download.pytorch.org/whl/cpu" + %pip install -q "openvino>=2024.0" "nncf>=2.9.0" + - sys.path.insert(0, str(REPO_PATH.resolve())) r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", ) @@ -98,8 +98,25 @@ Prerequisites ) open("notebook_utils.py", "w").write(r.text) + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", + ) + open("cmd_helper.py", "w").write(r.text) + + + from cmd_helper import clone_repo + + clone_repo("https://github.com/itrushkin/Moore-AnimateAnyone.git") + %load_ext skip_kernel_extension + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + Note that we clone a fork of original repo with tweaked forward methods. .. code:: ipython3 @@ -154,11 +171,9 @@ Note that we clone a fork of original repo with tweaked forward methods. .. parsed-literal:: - /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. - torch.utils._pytree._register_pytree_node( - /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. torch.utils._pytree._register_pytree_node( - /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. torch.utils._pytree._register_pytree_node( @@ -206,6 +221,13 @@ Prepare base model local_dir=local_dir, ) + + +.. parsed-literal:: + + diffusion_pytorch_model.bin: 0%| | 0.00/3.44G [00:00:2: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + :6: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + :9: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + + Convert model to OpenVINO IR ---------------------------- @@ -324,7 +423,7 @@ semantic features are extracted through the CLIP image encoder for Cross-Attention. Temporal-Attention operates in the temporal dimension. Finally, the VAE decoder decodes the result into a video clip. -.. image:: https://humanaigc.github.io/animate-anyone/static/images/f2_img.png +|image01| The pipeline contains 6 PyTorch modules: @@ -364,6 +463,8 @@ compression parameters. More details about weights compression can be found in `OpenVINO documentation `__. +.. |image01| image:: https://humanaigc.github.io/animate-anyone/static/images/f2_img.png + .. code:: ipython3 %%skip not $SHOULD_CONVERT @@ -421,14 +522,12 @@ of the pipeline, it will be better to convert them to separate models. .. parsed-literal:: - WARNING:nncf:NNCF provides best results with torch==2.1.2, while current torch version is 2.2.2+cpu. If you encounter issues, consider switching to torch==2.1.2 INFO:nncf:Statistics of the bitwidth distribution: - +--------------+---------------------------+-----------------------------------+ - | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | - | | | (layers) | - +==============+===========================+===================================+ - | 8 | 100% (32 / 32) | 100% (32 / 32) | - +--------------+---------------------------+-----------------------------------+ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (32 / 32) │ 100% (32 / 32) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -444,14 +543,6 @@ of the pipeline, it will be better to convert them to separate models. - - - - - - - - .. code:: ipython3 %%skip not $SHOULD_CONVERT @@ -477,12 +568,11 @@ of the pipeline, it will be better to convert them to separate models. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - +--------------+---------------------------+-----------------------------------+ - | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | - | | | (layers) | - +==============+===========================+===================================+ - | 8 | 100% (40 / 40) | 100% (40 / 40) | - +--------------+---------------------------+-----------------------------------+ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (40 / 40) │ 100% (40 / 40) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -498,14 +588,6 @@ of the pipeline, it will be better to convert them to separate models. - - - - - - - - Reference UNet ~~~~~~~~~~~~~~ @@ -552,12 +634,11 @@ step. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - +--------------+---------------------------+-----------------------------------+ - | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | - | | | (layers) | - +==============+===========================+===================================+ - | 8 | 100% (270 / 270) | 100% (270 / 270) | - +--------------+---------------------------+-----------------------------------+ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (270 / 270) │ 100% (270 / 270) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -573,14 +654,6 @@ step. - - - - - - - - Denoising UNet ~~~~~~~~~~~~~~ @@ -654,12 +727,11 @@ step. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - +--------------+---------------------------+-----------------------------------+ - | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | - | | | (layers) | - +==============+===========================+===================================+ - | 8 | 100% (534 / 534) | 100% (534 / 534) | - +--------------+---------------------------+-----------------------------------+ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (534 / 534) │ 100% (534 / 534) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -675,14 +747,6 @@ step. - - - - - - - - Pose Guider ~~~~~~~~~~~ @@ -709,12 +773,11 @@ efficiently integrate pose control signals into the denoising process. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - +--------------+---------------------------+-----------------------------------+ - | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | - | | | (layers) | - +==============+===========================+===================================+ - | 8 | 100% (8 / 8) | 100% (8 / 8) | - +--------------+---------------------------+-----------------------------------+ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (8 / 8) │ 100% (8 / 8) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -730,14 +793,6 @@ efficiently integrate pose control signals into the denoising process. - - - - - - - - Image Encoder ~~~~~~~~~~~~~ @@ -763,19 +818,19 @@ required for both reference and denoising UNets. .. parsed-literal:: - /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - +--------------+---------------------------+-----------------------------------+ - | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | - | | | (layers) | - +==============+===========================+===================================+ - | 8 | 100% (146 / 146) | 100% (146 / 146) | - +--------------+---------------------------+-----------------------------------+ + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (146 / 146) │ 100% (146 / 146) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ @@ -791,14 +846,6 @@ required for both reference and denoising UNets. - - - - - - - - Inference --------- @@ -824,15 +871,6 @@ For starting work, please select inference device from dropdown list. device = device_widget() - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=5, options=('CPU', 'GPU.0', 'GPU.1', 'GPU.2', 'GPU.3', 'AUTO'), value='A… - - - .. code:: ipython3 class OVPose2VideoPipeline(Pose2VideoPipeline): @@ -1130,7 +1168,7 @@ Video post-processing .. raw:: html @@ -1204,9 +1242,23 @@ Interactive inference demo = make_demo(fn=generate) try: - demo.queue().launch(debug=True) + demo.queue().launch(debug=False) except Exception: - demo.queue().launch(debug=True, share=True) + demo.queue().launch(debug=False, share=True) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/" + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + + + + + diff --git a/docs/notebooks/auto-device-with-output.rst b/docs/notebooks/auto-device-with-output.rst index 2ebcbe7d80deb2..ad19853a06aea5 100644 --- a/docs/notebooks/auto-device-with-output.rst +++ b/docs/notebooks/auto-device-with-output.rst @@ -197,16 +197,16 @@ By default, ``compile_model`` API will select **AUTO** as .. parsed-literal:: - [22:41:57.1267]I[plugin.cpp:421][AUTO] device:CPU, config:LOG_LEVEL=LOG_INFO - [22:41:57.1268]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT=LATENCY - [22:41:57.1268]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT_NUM_REQUESTS=0 - [22:41:57.1268]I[plugin.cpp:421][AUTO] device:CPU, config:PERF_COUNT=NO - [22:41:57.1268]I[plugin.cpp:426][AUTO] device:CPU, priority:0 - [22:41:57.1268]I[schedule.cpp:17][AUTO] scheduler starting - [22:41:57.1269]I[auto_schedule.cpp:181][AUTO] select device:CPU - [22:41:57.2582]I[auto_schedule.cpp:346][AUTO] Device: [CPU]: Compile model took 131.300219 ms - [22:41:57.2583]I[auto_schedule.cpp:112][AUTO] device:CPU compiling model finished - [22:41:57.2584]I[plugin.cpp:454][AUTO] underlying hardware does not support hardware context + [23:30:35.1625]I[plugin.cpp:421][AUTO] device:CPU, config:LOG_LEVEL=LOG_INFO + [23:30:35.1626]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT=LATENCY + [23:30:35.1626]I[plugin.cpp:421][AUTO] device:CPU, config:PERFORMANCE_HINT_NUM_REQUESTS=0 + [23:30:35.1626]I[plugin.cpp:421][AUTO] device:CPU, config:PERF_COUNT=NO + [23:30:35.1626]I[plugin.cpp:426][AUTO] device:CPU, priority:0 + [23:30:35.1626]I[schedule.cpp:17][AUTO] scheduler starting + [23:30:35.1626]I[auto_schedule.cpp:181][AUTO] select device:CPU + [23:30:35.2748]I[auto_schedule.cpp:346][AUTO] Device: [CPU]: Compile model took 112.194882 ms + [23:30:35.2749]I[auto_schedule.cpp:112][AUTO] device:CPU compiling model finished + [23:30:35.2750]I[plugin.cpp:454][AUTO] underlying hardware does not support hardware context Successfully compiled model without a device_name. @@ -220,7 +220,7 @@ By default, ``compile_model`` API will select **AUTO** as .. parsed-literal:: Deleted compiled_model - [22:41:57.2639]I[schedule.cpp:308][AUTO] scheduler ending + [23:30:35.2802]I[schedule.cpp:308][AUTO] scheduler ending Explicitly pass AUTO as device_name to Core::compile_model API @@ -378,7 +378,7 @@ executed on CPU until GPU is ready. .. parsed-literal:: - Time to load model using AUTO device and get first inference: 0.12 seconds. + Time to load model using AUTO device and get first inference: 0.13 seconds. .. code:: ipython3 @@ -553,12 +553,12 @@ Loop for inference and update the FPS/Latency every Compiling Model for AUTO device with THROUGHPUT hint Start inference, 6 groups of FPS/latency will be measured over 10s intervals - throughput: 179.70fps, latency: 32.12ms, time interval: 10.00s - throughput: 183.61fps, latency: 31.86ms, time interval: 10.01s - throughput: 183.96fps, latency: 31.88ms, time interval: 10.01s - throughput: 183.98fps, latency: 31.91ms, time interval: 10.00s - throughput: 183.26fps, latency: 31.98ms, time interval: 10.01s - throughput: 183.40fps, latency: 32.01ms, time interval: 10.00s + throughput: 185.58fps, latency: 30.99ms, time interval: 10.01s + throughput: 184.03fps, latency: 31.86ms, time interval: 10.01s + throughput: 178.79fps, latency: 32.85ms, time interval: 10.00s + throughput: 182.60fps, latency: 32.13ms, time interval: 10.01s + throughput: 184.75fps, latency: 31.76ms, time interval: 10.00s + throughput: 184.82fps, latency: 31.71ms, time interval: 10.03s Done @@ -604,12 +604,12 @@ Loop for inference and update the FPS/Latency for each Compiling Model for AUTO Device with LATENCY hint Start inference, 6 groups fps/latency will be out with 10s interval - throughput: 130.56fps, latency: 7.18ms, time interval: 10.00s - throughput: 142.51fps, latency: 6.61ms, time interval: 10.01s - throughput: 142.47fps, latency: 6.62ms, time interval: 10.00s - throughput: 142.46fps, latency: 6.61ms, time interval: 10.00s - throughput: 142.63fps, latency: 6.61ms, time interval: 10.00s - throughput: 142.73fps, latency: 6.60ms, time interval: 10.00s + throughput: 141.02fps, latency: 6.60ms, time interval: 10.01s + throughput: 142.78fps, latency: 6.59ms, time interval: 10.00s + throughput: 132.85fps, latency: 7.12ms, time interval: 10.00s + throughput: 142.85fps, latency: 6.59ms, time interval: 10.00s + throughput: 142.91fps, latency: 6.59ms, time interval: 10.01s + throughput: 142.93fps, latency: 6.59ms, time interval: 10.00s Done diff --git a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png index cc037738f18096..ee0ced8554407f 100644 --- a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png +++ b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_27_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1bedd8ff3e65a23fb4af380958a261d0916d2e0134b9426652a2779bdc06d6de -size 26887 +oid sha256:39ace04fe6c27d34344fa99d5119ed623b69144df356a39d6ab7f99cb32a81e4 +size 26587 diff --git a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png index 21be57ac89d68d..8f6ad87cc674ee 100644 --- a/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png +++ b/docs/notebooks/auto-device-with-output_files/auto-device-with-output_28_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ed1ab24c30040707a36155169f4aaa91a5bff6cb48a2c5d10401ecbd87ca6f54 -size 40117 +oid sha256:5af2ed1645ba8fbde80b2c7e3e5fdf053c80531cf5d11f311c762a9921e6f668 +size 39937 diff --git a/docs/notebooks/blip-visual-language-processing-with-output.rst b/docs/notebooks/blip-visual-language-processing-with-output.rst index 09d58ec75b4fd0..a2c688c88a16b6 100644 --- a/docs/notebooks/blip-visual-language-processing-with-output.rst +++ b/docs/notebooks/blip-visual-language-processing-with-output.rst @@ -278,13 +278,8 @@ text and vision modalities and postprocessing of generation results. .. code:: ipython3 - import platform - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "torch>=2.1.0" torchvision "transformers>=4.26.0" "gradio>=4.19" "openvino>=2023.3.0" "datasets>=2.14.6" "nncf>=2.8.1" "tqdm" - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "matplotlib>=3.4" .. code:: ipython3 diff --git a/docs/notebooks/catvton-with-output.rst b/docs/notebooks/catvton-with-output.rst index a7a9a04359f338..f9b2a4c33a83e6 100644 --- a/docs/notebooks/catvton-with-output.rst +++ b/docs/notebooks/catvton-with-output.rst @@ -31,7 +31,9 @@ Teaser image from `CatVTON GitHub `__ |teaser| In this tutorial we consider how to convert and run this model using -OpenVINO. +OpenVINO. An additional part demonstrates how to run optimization with +`NNCF `__ to speed up +pipeline. **Table of contents:** @@ -41,6 +43,14 @@ OpenVINO. - `Convert the model to OpenVINO IR <#convert-the-model-to-openvino-ir>`__ - `Compiling models <#compiling-models>`__ +- `Optimize model using NNCF Post-Training Quantization + API <#optimize-model-using-nncf-post-training-quantization-api>`__ + + - `Run Post-Training + Quantization <#run-post-training-quantization>`__ + - `Run Weights Compression <#run-weights-compression>`__ + - `Compare model file sizes <#compare-model-file-sizes>`__ + - `Interactive demo <#interactive-demo>`__ Installation Instructions @@ -67,18 +77,10 @@ Prerequisites if platform.system() == "Darwin": %pip install -q "numpy<2.0.0" - %pip install -q "openvino>=2024.4" + %pip install -q "openvino>=2024.4" "nncf>=2.13.0" %pip install -q "torch>=2.1" "diffusers>=0.29.1" torchvision opencv_python --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q fvcore "pillow" "tqdm" "gradio>=4.36" "omegaconf==2.4.0.dev3" av pycocotools cloudpickle scipy accelerate "transformers>=4.27.3" - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - - .. code:: ipython3 import requests @@ -90,19 +92,10 @@ Prerequisites open("notebook_utils.py", "w").write(r.text) r = requests.get( - url="https://raw.githubusercontent.com/aleksandr-mokrov/openvino_notebooks/refs/heads/catvton/utils/cmd_helper.py", + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", ) open("cmd_helper.py", "w").write(r.text) - - - -.. parsed-literal:: - - 741 - - - .. code:: ipython3 from cmd_helper import clone_repo @@ -110,15 +103,6 @@ Prerequisites clone_repo("https://github.com/Zheng-Chong/CatVTON.git", "3b795364a4d2f3b5adb365f39cdea376d20bc53c") - - - -.. parsed-literal:: - - PosixPath('CatVTON') - - - Convert the model to OpenVINO IR ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -153,177 +137,207 @@ version). .. code:: ipython3 - from pathlib import Path - from ov_catvton_helper import download_models, convert_pipeline_models, convert_automasker_models - - MODEL_DIR = Path("models") - VAE_ENCODER_PATH = MODEL_DIR / "vae_encoder.xml" - VAE_DECODER_PATH = MODEL_DIR / "vae_decoder.xml" - UNET_PATH = MODEL_DIR / "unet.xml" - DENSEPOSE_PROCESSOR_PATH = MODEL_DIR / "densepose_processor.xml" - SCHP_PROCESSOR_ATR = MODEL_DIR / "schp_processor_atr.xml" - SCHP_PROCESSOR_LIP = MODEL_DIR / "schp_processor_lip.xml" - - - pipeline, mask_processor, automasker = download_models(MODEL_DIR) - convert_pipeline_models(pipeline, VAE_ENCODER_PATH, VAE_DECODER_PATH, UNET_PATH) - convert_automasker_models(automasker, DENSEPOSE_PROCESSOR_PATH, SCHP_PROCESSOR_ATR, SCHP_PROCESSOR_LIP) + pipeline, mask_processor, automasker = download_models() + convert_pipeline_models(pipeline) + convert_automasker_models(automasker) + +Compiling models +---------------- -.. parsed-literal:: - Note: switching to '3b795364a4d2f3b5adb365f39cdea376d20bc53c'. - - You are in 'detached HEAD' state. You can look around, make experimental - changes and commit them, and you can discard any commits you make in this - state without impacting any branches by switching back to a branch. - - If you want to create a new branch to retain commits you create, you may - do so (now or later) by using -c with the switch command. Example: +Select device from dropdown list for running inference using OpenVINO. + +.. code:: ipython3 + + import openvino as ov - git switch -c + from notebook_utils import device_widget - Or undo this operation with: - git switch - + core = ov.Core() - Turn off this advice by setting config variable advice.detachedHead to false + device = device_widget() - HEAD is now at 3b79536 Update default base model path - - - -.. parsed-literal:: - - Fetching 10 files: 0%| | 0/10 [00:00`__ provides a suite of +advanced algorithms for Neural Networks inference optimization in +OpenVINO with minimal accuracy drop. We will use 8-bit quantization in +post-training mode (without the fine-tuning pipeline) for the UNet +model, and 4-bit weight compression for the remaining models. + **NOTE**: Quantization is time and memory consuming operation. + Running quantization code below may take some time. You can disable + it using widget below: +.. code:: ipython3 -.. parsed-literal:: + from notebook_utils import quantization_widget + + to_quantize = quantization_widget() + + to_quantize - (…)nsePose/densepose_rcnn_R_50_FPN_s1x.yaml: 0%| | 0.00/182 [00:00= 64: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1111: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if dim % default_overall_up_factor != 0: +.. code:: ipython3 + %%skip not $to_quantize.value + + import nncf + from ov_catvton_helper import UNET_PATH + + if not UNET_INT8_PATH.exists(): + unet = core.read_model(UNET_PATH) + quantized_model = nncf.quantize( + model=unet, + calibration_dataset=nncf.Dataset(calibration_data), + subset_size=subset_size, + model_type=nncf.ModelType.TRANSFORMER, + ) + ov.save_model(quantized_model, UNET_INT8_PATH) -Compiling models ----------------- +Run Weights Compression +~~~~~~~~~~~~~~~~~~~~~~~ -Select device from dropdown list for running inference using OpenVINO. +Quantizing of the remaining components of the pipeline does not +significantly improve inference performance but can lead to a +substantial degradation of accuracy. The weight compression will be +applied to footprint reduction. .. code:: ipython3 - import openvino as ov - - from notebook_utils import device_widget - + %%skip not $to_quantize.value - core = ov.Core() - - device = device_widget() + from catvton_quantization_helper import compress_models - device + compress_models(core) +.. code:: ipython3 + %%skip not $to_quantize.value + + from catvton_quantization_helper import ( + VAE_ENCODER_INT4_PATH, + VAE_DECODER_INT4_PATH, + DENSEPOSE_PROCESSOR_INT4_PATH, + SCHP_PROCESSOR_ATR_INT4, + SCHP_PROCESSOR_LIP_INT4, + ) + + optimized_pipe, _, optimized_automasker = download_models() + optimized_pipe = get_compiled_pipeline(optimized_pipe, core, device, VAE_ENCODER_INT4_PATH, VAE_DECODER_INT4_PATH, UNET_INT8_PATH) + optimized_automasker = get_compiled_automasker(optimized_automasker, core, device, DENSEPOSE_PROCESSOR_INT4_PATH, SCHP_PROCESSOR_ATR_INT4, SCHP_PROCESSOR_LIP_INT4) +Compare model file sizes +~~~~~~~~~~~~~~~~~~~~~~~~ -.. parsed-literal:: - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') +.. code:: ipython3 + %%skip not $to_quantize.value + from catvton_quantization_helper import compare_models_size + + compare_models_size() -``get_compiled_pipeline`` and ``get_compiled_automasker`` functions -defined in ``ov_catvton_helper.py`` provides convenient way for getting -the pipeline and the ``automasker`` with compiled ov-models that are -compatible with the original interface. It accepts the original pipeline -and ``automasker``, inference device and directories with converted -models as arguments. Under the hood we create callable wrapper classes -for compiled models to allow interaction with original pipelines. Note -that all of wrapper classes return ``torch.Tensor``\ s instead of -``np.array``\ s. And then insert wrappers instances in the pipeline. -.. code:: ipython3 +.. parsed-literal:: - from ov_catvton_helper import get_compiled_pipeline, get_compiled_automasker - + vae_encoder compression rate: 2.011 + vae_decoder compression rate: 2.007 + unet compression rate: 1.995 + densepose_processor compression rate: 2.019 + schp_processor_atr compression rate: 1.993 + schp_processor_lip compression rate: 1.993 - pipeline = get_compiled_pipeline(pipeline, core, device, VAE_ENCODER_PATH, VAE_DECODER_PATH, UNET_PATH) - automasker = get_compiled_automasker(automasker, core, device, DENSEPOSE_PROCESSOR_PATH, SCHP_PROCESSOR_ATR, SCHP_PROCESSOR_LIP) Interactive inference --------------------- @@ -333,28 +347,24 @@ Interactive inference Please select below whether you would like to use the quantized models to launch the interactive demo. +.. code:: ipython3 + + from ov_catvton_helper import get_pipeline_selection_option + + use_quantized_models = get_pipeline_selection_option(optimized_pipe) + + use_quantized_models + .. code:: ipython3 from gradio_helper import make_demo + pipe = optimized_pipe if use_quantized_models.value else pipeline + masker = optimized_automasker if use_quantized_models.value else automasker output_dir = "output" - demo = make_demo(pipeline, mask_processor, automasker, output_dir) + demo = make_demo(pipe, mask_processor, masker, output_dir) try: - demo.launch(debug=False) + demo.launch(debug=True) except Exception: - demo.launch(debug=False, share=True) - - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - + demo.launch(debug=True, share=True) diff --git a/docs/notebooks/clip-language-saliency-map-with-output.rst b/docs/notebooks/clip-language-saliency-map-with-output.rst index 3c19a581410863..dd6f608429e1c5 100644 --- a/docs/notebooks/clip-language-saliency-map-with-output.rst +++ b/docs/notebooks/clip-language-saliency-map-with-output.rst @@ -122,7 +122,7 @@ Initial Implementation with Transformers and Pytorch # Install requirements %pip install -q "openvino>=2023.1.0" - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu transformers "numpy<2" "torch>=2.1" "gradio>=4.19" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu transformers "numpy<2" "torch>=2.1" "gradio>=4.19" "matplotlib>=3.4" .. code:: ipython3 diff --git a/docs/notebooks/clip-zero-shot-classification-with-output.rst b/docs/notebooks/clip-zero-shot-classification-with-output.rst index fd572a83ffb834..3da831e6d9d0dd 100644 --- a/docs/notebooks/clip-zero-shot-classification-with-output.rst +++ b/docs/notebooks/clip-zero-shot-classification-with-output.rst @@ -729,6 +729,7 @@ up of the dynamic quantized models. Interactive demo ---------------- + Now, it is your turn! You can provide your own image and comma-separated list of labels for zero-shot classification. diff --git a/docs/notebooks/controlnet-stable-diffusion-with-output.rst b/docs/notebooks/controlnet-stable-diffusion-with-output.rst index 3ab43d897ea658..f3988f276e1ccf 100644 --- a/docs/notebooks/controlnet-stable-diffusion-with-output.rst +++ b/docs/notebooks/controlnet-stable-diffusion-with-output.rst @@ -197,16 +197,31 @@ Prerequisites .. code:: ipython3 - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "torch>=2.1" "torchvision" - %pip install -q "diffusers>=0.14.0" "matplotlib>=3.4" "transformers>=4.30.2" "controlnet-aux>=0.0.6" "gradio>=3.36" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino>=2023.1.0" "datasets>=2.14.6" "nncf>=2.7.0" - import requests + from pathlib import Path + + utility_files = ["notebook_utils.py", "pip_helper.py"] + + for utility in utility_files: + if not Path(utility).exists(): + r = requests.get(f"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/{utility}") + with open(utility, "w") as f: + f.write(r.text) - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + + from pip_helper import pip_install + + pip_install("torch>=2.1", "torchvision", "--extra-index-url", "https://download.pytorch.org/whl/cpu") + pip_install( + "diffusers>=0.14.0", + "matplotlib>=3.4", + "transformers>=4.30.2", + "controlnet-aux>=0.0.6", + "gradio>=3.36", + "--extra-index-url", + "https://download.pytorch.org/whl/cpu", ) - open("notebook_utils.py", "w").write(r.text) + pip_install("openvino>=2023.1.0", "datasets>=2.14.6", "nncf>=2.7.0", "opencv-python") Instantiating Generation Pipeline --------------------------------- @@ -272,14 +287,18 @@ Now, let us check its result on example image: .. code:: ipython3 - import requests from PIL import Image import matplotlib.pyplot as plt import numpy as np - + from notebook_utils import download_file example_url = "https://user-images.githubusercontent.com/29454499/224540208-c172c92a-9714-4a7b-857a-b1e54b4d4791.jpg" - img = Image.open(requests.get(example_url, stream=True).raw) + + image_path = Path("example_image.jpg") + if not image_path.exists(): + download_file(image_path, filename="example_image.jpg") + + img = Image.open(image_path) pose = pose_estimator(img) @@ -1439,10 +1458,12 @@ Let’s load ``skip magic`` extension to skip quantization if # Fetch `skip_kernel_extension` module import requests - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", - ) - open("skip_kernel_extension.py", "w").write(r.text) + + if not Path("skip_kernel_extension.py").exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", + ) + open("skip_kernel_extension.py", "w").write(r.text) int8_pipe = None diff --git a/docs/notebooks/convert-to-openvino-with-output.rst b/docs/notebooks/convert-to-openvino-with-output.rst index 2baaf0043e7f04..507dd407eae739 100644 --- a/docs/notebooks/convert-to-openvino-with-output.rst +++ b/docs/notebooks/convert-to-openvino-with-output.rst @@ -184,10 +184,10 @@ NLP model from Hugging Face and export it in ONNX format: .. parsed-literal:: - 2024-11-04 22:48:30.842642: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-04 22:48:30.876775: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-22 00:16:16.864961: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-22 00:16:16.903350: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-04 22:48:31.539454: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-22 00:16:17.575066: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -660,7 +660,7 @@ frameworks conversion guides. .. parsed-literal:: - 2024-11-04 22:48:47.716205: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. + 2024-11-22 00:16:33.997234: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. Skipping registering GPU devices... diff --git a/docs/notebooks/convnext-classification-with-output.rst b/docs/notebooks/convnext-classification-with-output.rst index 6e1c039f7013c6..9466f30c22898e 100644 --- a/docs/notebooks/convnext-classification-with-output.rst +++ b/docs/notebooks/convnext-classification-with-output.rst @@ -192,7 +192,7 @@ And print results Predicted Class: 281 Predicted Label: n02123045 tabby, tabby cat - Predicted Probability: 0.4661690592765808 + Predicted Probability: 0.5919997096061707 Convert the model to OpenVINO Intermediate representation format diff --git a/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst b/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst index 30778bafc8e884..c3d645f1841a17 100644 --- a/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst +++ b/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst @@ -154,10 +154,10 @@ Imports .. parsed-literal:: - 2024-11-04 22:49:10.827255: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-04 22:49:10.861330: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-22 00:16:56.689204: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-22 00:16:56.724390: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-04 22:49:11.454332: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-22 00:16:57.319913: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -223,7 +223,7 @@ notebook `__. .. parsed-literal:: - /tmp/ipykernel_503635/1592321960.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /tmp/ipykernel_3514722/1592321960.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. state_dict = torch.load(state_dict_file, map_location=torch.device("cpu")) @@ -444,7 +444,7 @@ this notebook. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: @@ -526,18 +526,18 @@ Convert quantized model to OpenVINO IR model and save it. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:340: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:340: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_low.item() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:348: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:348: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_high.item() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: Tensor-likes are not close! - Mismatched elements: 245783 / 262144 (93.8%) - Greatest absolute difference: 3.1180567741394043 at index (0, 0, 474, 435) (up to 1e-05 allowed) - Greatest relative difference: 16087.83647354372 at index (0, 0, 37, 224) (up to 1e-05 allowed) + Mismatched elements: 247214 / 262144 (94.3%) + Greatest absolute difference: 4.1846349239349365 at index (0, 0, 379, 430) (up to 1e-05 allowed) + Greatest relative difference: 15984.079041034269 at index (0, 0, 447, 390) (up to 1e-05 allowed) _check_trace( @@ -663,7 +663,7 @@ be run in the notebook with ``! benchmark_app`` or [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 8.85 ms + [ INFO ] Read model took 8.99 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,1,512,512] @@ -677,7 +677,7 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [1,1,512,512] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 253.47 ms + [ INFO ] Compile model took 240.78 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -714,17 +714,17 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 56.51 ms + [ INFO ] First inference took 49.70 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 406 iterations - [ INFO ] Duration: 15019.48 ms + [ INFO ] Count: 425 iterations + [ INFO ] Duration: 15023.51 ms [ INFO ] Latency: - [ INFO ] Median: 35.01 ms - [ INFO ] Average: 36.77 ms - [ INFO ] Min: 34.63 ms - [ INFO ] Max: 48.05 ms - [ INFO ] Throughput: 27.03 FPS + [ INFO ] Median: 34.55 ms + [ INFO ] Average: 35.13 ms + [ INFO ] Min: 34.21 ms + [ INFO ] Max: 47.23 ms + [ INFO ] Throughput: 28.29 FPS .. code:: ipython3 @@ -750,7 +750,7 @@ be run in the notebook with ``! benchmark_app`` or [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 10.78 ms + [ INFO ] Read model took 11.10 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,1,512,512] @@ -764,7 +764,7 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [1,1,512,512] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 250.08 ms + [ INFO ] Compile model took 251.41 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model49 @@ -801,17 +801,17 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 29.09 ms + [ INFO ] First inference took 29.68 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 938 iterations - [ INFO ] Duration: 15008.12 ms + [ INFO ] Count: 911 iterations + [ INFO ] Duration: 15009.49 ms [ INFO ] Latency: - [ INFO ] Median: 15.77 ms - [ INFO ] Average: 15.80 ms - [ INFO ] Min: 15.47 ms - [ INFO ] Max: 17.13 ms - [ INFO ] Throughput: 62.50 FPS + [ INFO ] Median: 15.73 ms + [ INFO ] Average: 16.27 ms + [ INFO ] Min: 15.41 ms + [ INFO ] Max: 24.40 ms + [ INFO ] Throughput: 60.69 FPS Visually Compare Inference Results @@ -905,7 +905,7 @@ seed is displayed to enable reproducing specific runs of this cell. .. parsed-literal:: - Visualizing results with seed 1730757034 + Visualizing results with seed 1732231099 @@ -989,7 +989,7 @@ performs inference, and displays the results on the frames loaded in .. parsed-literal:: Loaded model to AUTO in 0.15 seconds. - Total time for 68 frames: 2.36 seconds, fps:29.25 + Total time for 68 frames: 2.31 seconds, fps:29.91 References diff --git a/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png b/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png index 5aa37909b71cf7..a0c854d6dd33f6 100644 --- a/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png +++ b/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:894600de56af211d4cc3e64ee092b5a62d1b0158c51048d17accadddea0f046e -size 382725 +oid sha256:588fb52eb7dcf0ede69419b9645ad6dc93526e8960af83679e12bac98e6817f6 +size 385527 diff --git a/docs/notebooks/ddcolor-image-colorization-with-output.rst b/docs/notebooks/ddcolor-image-colorization-with-output.rst index 409d2495e2fea6..cd3bf024065b55 100644 --- a/docs/notebooks/ddcolor-image-colorization-with-output.rst +++ b/docs/notebooks/ddcolor-image-colorization-with-output.rst @@ -25,8 +25,9 @@ In this tutorial we consider how to convert and run DDColor using OpenVINO. Additionally, we will demonstrate how to optimize this model using `NNCF `__. -🪄 Let’s start to explore magic of image colorization! #### Table of -contents: +🪄 Let’s start to explore magic of image colorization! + +**Table of contents:** - `Prerequisites <#prerequisites>`__ - `Load PyTorch model <#load-pytorch-model>`__ @@ -67,7 +68,7 @@ Prerequisites .. code:: ipython3 import platform - + %pip install -q "nncf>=2.11.0" "torch>=2.1" "torchvision" "timm" "opencv_python" "pillow" "PyYAML" "scipy" "scikit-image" "datasets" "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -Uq "openvino>=2024.3.0" if platform.python_version_tuple()[1] in ["8", "9"]: @@ -85,39 +86,42 @@ Prerequisites .. code:: ipython3 - import sys from pathlib import Path import requests - - repo_dir = Path("DDColor") - - if not repo_dir.exists(): - !git clone https://github.com/piddnad/DDColor.git - - sys.path.append(str(repo_dir)) - + + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", + ) + open("cmd_helper.py", "w").write(r.text) + + + .. parsed-literal:: - Cloning into 'DDColor'... - remote: Enumerating objects: 241, done. - remote: Counting objects: 100% (84/84), done. - remote: Compressing objects: 100% (49/49), done. - remote: Total 241 (delta 57), reused 37 (delta 35), pack-reused 157 (from 1) - Receiving objects: 100% (241/241), 14.10 MiB | 21.95 MiB/s, done. - Resolving deltas: 100% (83/83), done. + 1491 + + + +.. code:: ipython3 + + from cmd_helper import clone_repo + + + clone_repo("https://github.com/piddnad/DDColor.git") .. parsed-literal:: - 24692 + PosixPath('DDColor') @@ -131,7 +135,7 @@ Prerequisites .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) @@ -149,14 +153,14 @@ models from DDColor family. .. code:: ipython3 import torch - + model_name = "ddcolor_paper_tiny" - + ddcolor_model = DDColorHF.from_pretrained(f"piddnad/{model_name}") - - + + colorizer = ImageColorizationPipelineHF(model=ddcolor_model, input_size=512) - + ddcolor_model.to("cpu") colorizer.device = torch.device("cpu") @@ -169,18 +173,18 @@ Run PyTorch model inference import cv2 import PIL - + IMG_PATH = "DDColor/assets/test_images/Ansel Adams _ Moore Photography.jpeg" - - + + img = cv2.imread(IMG_PATH) - + PIL.Image.fromarray(img[:, :, ::-1]) -.. image:: ddcolor-image-colorization-with-output_files/ddcolor-image-colorization-with-output_8_0.png +.. image:: ddcolor-image-colorization-with-output_files/ddcolor-image-colorization-with-output_9_0.png @@ -192,7 +196,7 @@ Run PyTorch model inference -.. image:: ddcolor-image-colorization-with-output_files/ddcolor-image-colorization-with-output_9_0.png +.. image:: ddcolor-image-colorization-with-output_files/ddcolor-image-colorization-with-output_10_0.png @@ -213,9 +217,9 @@ loading on device using ``core.complie_model``. import openvino as ov import torch - + OV_COLORIZER_PATH = Path("ddcolor.xml") - + if not OV_COLORIZER_PATH.exists(): ov_model = ov.convert_model(ddcolor_model, example_input=torch.ones((1, 3, 512, 512)), input=[1, 3, 512, 512]) ov.save_model(ov_model, OV_COLORIZER_PATH) @@ -230,11 +234,11 @@ Select one of supported devices for inference using dropdown list. .. code:: ipython3 from notebook_utils import device_widget - + core = ov.Core() - + device = device_widget() - + device @@ -256,36 +260,36 @@ Select one of supported devices for inference using dropdown list. import numpy as np import torch import torch.nn.functional as F - - + + def process(img, compiled_model): # Preprocess input image height, width = img.shape[:2] - + # Normalize to [0, 1] range img = (img / 255.0).astype(np.float32) orig_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1] # (h, w, 1) - + # Resize rgb image -> lab -> get grey -> rgb img = cv2.resize(img, (512, 512)) img_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1] img_gray_lab = np.concatenate((img_l, np.zeros_like(img_l), np.zeros_like(img_l)), axis=-1) img_gray_rgb = cv2.cvtColor(img_gray_lab, cv2.COLOR_LAB2RGB) - + # Transpose HWC -> CHW and add batch dimension tensor_gray_rgb = torch.from_numpy(img_gray_rgb.transpose((2, 0, 1))).float().unsqueeze(0) - + # Run model inference output_ab = compiled_model(tensor_gray_rgb)[0] - + # Postprocess result # resize ab -> concat original l -> rgb output_ab_resize = F.interpolate(torch.from_numpy(output_ab), size=(height, width))[0].float().numpy().transpose(1, 2, 0) output_lab = np.concatenate((orig_l, output_ab_resize), axis=-1) output_bgr = cv2.cvtColor(output_lab, cv2.COLOR_LAB2BGR) - + output_img = (output_bgr * 255.0).round().astype(np.uint8) - + return output_img .. code:: ipython3 @@ -296,7 +300,7 @@ Select one of supported devices for inference using dropdown list. -.. image:: ddcolor-image-colorization-with-output_files/ddcolor-image-colorization-with-output_16_0.png +.. image:: ddcolor-image-colorization-with-output_files/ddcolor-image-colorization-with-output_17_0.png @@ -324,7 +328,7 @@ improve model inference speed. .. code:: ipython3 from notebook_utils import quantization_widget - + to_quantize = quantization_widget() to_quantize @@ -340,15 +344,15 @@ improve model inference speed. .. code:: ipython3 import requests - + OV_INT8_COLORIZER_PATH = Path("ddcolor_int8.xml") compiled_int8_model = None - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", ) open("skip_kernel_extension.py", "w").write(r.text) - + %load_ext skip_kernel_extension Collect quantization dataset @@ -363,12 +367,12 @@ dataset from Hugging Face as calibration data. .. code:: ipython3 %%skip not $to_quantize.value - + from datasets import load_dataset - + subset_size = 300 calibration_data = [] - + if not OV_INT8_COLORIZER_PATH.exists(): dataset = load_dataset("ummagumm-a/colorization_dataset", split="train", streaming=True).shuffle(seed=42).take(subset_size) for idx, batch in enumerate(dataset): @@ -380,7 +384,7 @@ dataset from Hugging Face as calibration data. img_l = cv2.cvtColor(np.stack([img, img, img], axis=2), cv2.COLOR_BGR2Lab)[:, :, :1] img_gray_lab = np.concatenate((img_l, np.zeros_like(img_l), np.zeros_like(img_l)), axis=-1) img_gray_rgb = cv2.cvtColor(img_gray_lab, cv2.COLOR_LAB2RGB) - + image = np.expand_dims(img_gray_rgb.transpose((2, 0, 1)).astype(np.float32), axis=0) calibration_data.append(image) @@ -392,9 +396,9 @@ Perform model quantization .. code:: ipython3 %%skip not $to_quantize.value - + import nncf - + if not OV_INT8_COLORIZER_PATH.exists(): ov_model = core.read_model(OV_COLORIZER_PATH) quantized_model = nncf.quantize( @@ -412,10 +416,10 @@ Perform model quantization .. parsed-literal:: - 2024-11-04 22:52:53.152561: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-04 22:52:53.191342: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-22 00:20:47.511999: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-22 00:20:47.551328: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-04 22:52:53.595160: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-22 00:20:47.960841: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -452,7 +456,7 @@ Run INT8 model inference .. code:: ipython3 from IPython.display import display - + if OV_INT8_COLORIZER_PATH.exists(): compiled_int8_model = core.compile_model(OV_INT8_COLORIZER_PATH, device.value) img = cv2.imread("DDColor/assets/test_images/Ansel Adams _ Moore Photography.jpeg") @@ -461,7 +465,7 @@ Run INT8 model inference -.. image:: ddcolor-image-colorization-with-output_files/ddcolor-image-colorization-with-output_25_0.png +.. image:: ddcolor-image-colorization-with-output_files/ddcolor-image-colorization-with-output_26_0.png Compare FP16 and INT8 model size @@ -472,9 +476,9 @@ Compare FP16 and INT8 model size .. code:: ipython3 fp16_ir_model_size = OV_COLORIZER_PATH.with_suffix(".bin").stat().st_size / 2**20 - + print(f"FP16 model size: {fp16_ir_model_size:.2f} MB") - + if OV_INT8_COLORIZER_PATH.exists(): quantized_model_size = OV_INT8_COLORIZER_PATH.with_suffix(".bin").stat().st_size / 2**20 print(f"INT8 model size: {quantized_model_size:.2f} MB") @@ -513,17 +517,17 @@ Tool =2024.2.0" "datasets>=2.14.6" "nncf>=2.11.0" "tqdm" "matplotlib>=3.4" - %pip install -q "typing-extensions>=4.9.0" eval-type-backport "gradio>=4.19" - %pip install -q -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "typing-extensions>=4.9.0" eval-type-backport "gradio>=4.19" gradio_imageslider + %pip install -q torch torchvision "opencv-python" huggingface_hub --extra-index-url https://download.pytorch.org/whl/cpu + if platform.system() == "Darwin": + %pip install -q "numpy<2.0.0" if platform.python_version_tuple()[1] in ["8", "9"]: %pip install -q "gradio-imageslider<=0.0.17" "typing-extensions>=4.9.0" @@ -131,7 +146,7 @@ attention optimizations first. .. code:: ipython3 - attention_file_path = Path("./depth_anything_v2/dinov2_layers/attention.py") + attention_file_path = Path("./Depth-Anything-V2/depth_anything_v2/dinov2_layers/attention.py") orig_attention_path = attention_file_path.parent / ("orig_" + attention_file_path.name) if not orig_attention_path.exists(): @@ -152,19 +167,14 @@ Prepare input data from PIL import Image - import requests - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - - open("notebook_utils.py", "w").write(r.text) from notebook_utils import download_file, device_widget, quantization_widget - download_file( - "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/3f779fc1-c1b2-4dec-915a-64dae510a2bb", - "furseal.png", - ) + + if not Path("furseal.png").exists(): + download_file( + "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/3f779fc1-c1b2-4dec-915a-64dae510a2bb", + "furseal.png", + ) Image.open("furseal.png").resize((600, 400)) @@ -177,7 +187,7 @@ Prepare input data -.. image:: depth-anything-v2-with-output_files/depth-anything-v2-with-output_8_1.png +.. image:: depth-anything-v2-with-output_files/depth-anything-v2-with-output_9_1.png @@ -238,7 +248,7 @@ is preprocessed image height, ``W`` is preprocessed image width. xFormers not available xFormers not available - /tmp/ipykernel_506168/1110356474.py:8: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /tmp/ipykernel_3517294/1110356474.py:8: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. model.load_state_dict(torch.load(model_path, map_location="cpu")) @@ -270,12 +280,12 @@ is preprocessed image height, ``W`` is preprocessed image width. .. parsed-literal:: - + -.. image:: depth-anything-v2-with-output_files/depth-anything-v2-with-output_14_1.png +.. image:: depth-anything-v2-with-output_files/depth-anything-v2-with-output_15_1.png Convert Model to OpenVINO IR format @@ -304,13 +314,13 @@ loading on device using ``core.complie_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if npatch == N and w == h: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dpt.py:147: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dpt.py:147: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True) @@ -402,12 +412,12 @@ range. .. parsed-literal:: - + -.. image:: depth-anything-v2-with-output_files/depth-anything-v2-with-output_24_1.png +.. image:: depth-anything-v2-with-output_files/depth-anything-v2-with-output_25_1.png Run inference on video @@ -417,12 +427,14 @@ Run inference on video .. code:: ipython3 - download_file( - "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/Coco%20Walking%20in%20Berkeley.mp4", - "./Coco Walking in Berkeley.mp4", - ) - VIDEO_FILE = "./Coco Walking in Berkeley.mp4" + + if not Path(VIDEO_FILE).exists(): + download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/Coco%20Walking%20in%20Berkeley.mp4", + VIDEO_FILE, + ) + # Number of seconds of input video to process. Set `NUM_SECONDS` to 0 to process # the full video. NUM_SECONDS = 4 @@ -624,7 +636,7 @@ Run inference on video .. parsed-literal:: - Processed 60 frames in 13.24 seconds. Total FPS (including video processing): 4.53.Inference FPS: 10.68 + Processed 60 frames in 13.34 seconds. Total FPS (including video processing): 4.50.Inference FPS: 10.65 Video saved to 'output/Coco Walking in Berkeley_depth_anything.mp4'. @@ -651,7 +663,7 @@ Run inference on video .. parsed-literal:: Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -709,10 +721,11 @@ improve model inference speed. .. code:: ipython3 # Fetch `skip_kernel_extension` module - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", - ) - open("skip_kernel_extension.py", "w").write(r.text) + if not Path("skip_kernel_extension.py").exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", + ) + open("skip_kernel_extension.py", "w").write(r.text) OV_DEPTH_ANYTHING_INT8_PATH = Path(f"{model_id}_int8.xml") @@ -784,10 +797,10 @@ quantization code below may take some time. .. parsed-literal:: - 2024-11-04 23:01:18.047102: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-04 23:01:18.080343: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-22 00:29:02.540402: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-22 00:29:02.574640: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-04 23:01:18.654050: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-22 00:29:03.160362: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -899,7 +912,7 @@ data. -.. image:: depth-anything-v2-with-output_files/depth-anything-v2-with-output_43_0.png +.. image:: depth-anything-v2-with-output_files/depth-anything-v2-with-output_44_0.png .. code:: ipython3 @@ -913,10 +926,10 @@ data. .. parsed-literal:: - Processed 60 frames in 12.60 seconds. Total FPS (including video processing): 4.76.Inference FPS: 13.12 + Processed 60 frames in 12.91 seconds. Total FPS (including video processing): 4.65.Inference FPS: 12.59 Video saved to 'output/Coco Walking in Berkeley_depth_anything_int8.mp4'. Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -996,9 +1009,9 @@ Tool =2023.3.0" "datasets>=2.14.6" "nncf" "tqdm" %pip install -q "typing-extensions>=4.9.0" eval-type-backport "gradio>=4.19" "matplotlib>=3.4" - %pip install -q -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q torch torchvision "opencv-python" huggingface_hub --extra-index-url https://download.pytorch.org/whl/cpu + if platform.system() == "Darwin": + %pip install -q "numpy<2.0.0" if platform.python_version_tuple()[1] in ["8", "9"]: %pip install -q "gradio-imageslider<=0.0.17" "typing-extensions>=4.9.0" .. parsed-literal:: - Cloning into 'Depth-Anything'... - remote: Enumerating objects: 441, done. - remote: Counting objects: 100% (161/161), done. - remote: Compressing objects: 100% (120/120), done. - remote: Total 441 (delta 115), reused 44 (delta 41), pack-reused 280 (from 1) - Receiving objects: 100% (441/441), 237.90 MiB | 24.22 MiB/s, done. - Resolving deltas: 100% (158/158), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. - WARNING: typer 0.12.5 does not provide the extra 'all' Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -110,6 +131,9 @@ attention optimizations first. .. code:: ipython3 + from pathlib import Path + + attention_file_path = Path("./torchhub/facebookresearch_dinov2_main/dinov2/layers/attention.py") orig_attention_path = attention_file_path.parent / ("orig_" + attention_file_path.name) @@ -156,15 +180,16 @@ Prepare input data from PIL import Image - import requests r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) - open("notebook_utils.py", "w").write(r.text) + + from notebook_utils import download_file, device_widget, quantization_widget + download_file( "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/3f779fc1-c1b2-4dec-915a-64dae510a2bb", "furseal.png", @@ -181,7 +206,7 @@ Prepare input data -.. image:: depth-anything-with-output_files/depth-anything-with-output_9_1.png +.. image:: depth-anything-with-output_files/depth-anything-with-output_11_1.png @@ -255,7 +280,7 @@ image size and prepare it for visualization. -.. image:: depth-anything-with-output_files/depth-anything-with-output_16_0.png +.. image:: depth-anything-with-output_files/depth-anything-with-output_18_0.png Convert Model to OpenVINO IR format @@ -284,13 +309,13 @@ loading on device using ``core.complie_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/vision_transformer.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/torchhub/facebookresearch_dinov2_main/vision_transformer.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if npatch == N and w == h: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/depth_anything/dpt.py:133: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/depth_anything/dpt.py:133: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True) @@ -356,7 +381,7 @@ Run inference on image -.. image:: depth-anything-with-output_files/depth-anything-with-output_25_0.png +.. image:: depth-anything-with-output_files/depth-anything-with-output_27_0.png Run inference on video @@ -573,7 +598,7 @@ Run inference on video .. parsed-literal:: - Processed 60 frames in 13.24 seconds. Total FPS (including video processing): 4.53.Inference FPS: 10.62 + Processed 60 frames in 13.63 seconds. Total FPS (including video processing): 4.40.Inference FPS: 10.11 Video saved to 'output/Coco Walking in Berkeley_depth_anything.mp4'. @@ -600,7 +625,7 @@ Run inference on video .. parsed-literal:: Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -733,10 +758,10 @@ quantization code below may take some time. .. parsed-literal:: - 2024-11-04 23:10:13.897258: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-04 23:10:13.929954: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-22 00:38:00.830321: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-22 00:38:00.863651: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-04 23:10:14.502746: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-22 00:38:01.436355: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -848,7 +873,7 @@ data. -.. image:: depth-anything-with-output_files/depth-anything-with-output_44_0.png +.. image:: depth-anything-with-output_files/depth-anything-with-output_46_0.png .. code:: ipython3 @@ -862,10 +887,10 @@ data. .. parsed-literal:: - Processed 60 frames in 12.75 seconds. Total FPS (including video processing): 4.70.Inference FPS: 12.76 + Processed 60 frames in 12.91 seconds. Total FPS (including video processing): 4.65.Inference FPS: 12.73 Video saved to 'output/Coco Walking in Berkeley_depth_anything_int8.mp4'. Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -945,9 +970,9 @@ Tool =4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) - Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (1.13.3) - Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) - Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (2024.9.0) - Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (1.23.5) - Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (10.4.0) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) - Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch) (1.3.0) + Requirement already satisfied: torch in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) + Requirement already satisfied: torchvision in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (0.19.1+cpu) + Requirement already satisfied: opencv-python in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.10.0.84) + Requirement already satisfied: wheel in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (0.45.0) + Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.16.1) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) + Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (1.13.3) + Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) + Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (2024.9.0) + Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (1.23.5) + Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (10.4.0) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) + Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch) (1.3.0) Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu Collecting git+https://github.com/facebookresearch/detectron2.git - Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-9ds1xx43 + Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-4klmx21d .. parsed-literal:: - Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-9ds1xx43 + Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-4klmx21d .. parsed-literal:: - Resolved https://github.com/facebookresearch/detectron2.git to commit 8d85329aed8506ea3672e3e208971345973ea761 + Resolved https://github.com/facebookresearch/detectron2.git to commit c69939aa85460e8135f40bce908a6cddaa73065f Preparing metadata (setup.py): started Preparing metadata (setup.py): finished with status 'done' - Requirement already satisfied: Pillow>=7.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (10.4.0) - Requirement already satisfied: black in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.3.0) - Requirement already satisfied: cloudpickle in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (3.1.0) - Requirement already satisfied: fvcore<0.1.6,>=0.1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.1.5.post20221221) + Requirement already satisfied: Pillow>=7.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (10.4.0) + Requirement already satisfied: black in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.3.0) + Requirement already satisfied: cloudpickle in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (3.1.0) + Requirement already satisfied: fvcore<0.1.6,>=0.1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.1.5.post20221221) Collecting hydra-core>=1.1 (from detectron2==0.6) Using cached hydra_core-1.3.2-py3-none-any.whl.metadata (5.5 kB) Collecting iopath<0.1.10,>=0.1.7 (from detectron2==0.6) Using cached https://download.pytorch.org/whl/iopath-0.1.9-py3-none-any.whl (27 kB) - Requirement already satisfied: matplotlib in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (3.7.5) + Requirement already satisfied: matplotlib in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (3.7.5) Collecting omegaconf<2.4,>=2.1 (from detectron2==0.6) Using cached omegaconf-2.3.0-py3-none-any.whl.metadata (3.9 kB) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.1) - Requirement already satisfied: pycocotools>=2.0.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.0.7) - Requirement already satisfied: tabulate in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.9.0) - Requirement already satisfied: tensorboard in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.12.3) - Requirement already satisfied: termcolor>=1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.4.0) - Requirement already satisfied: tqdm>4.29.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (4.66.6) - Requirement already satisfied: yacs>=0.1.8 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.1.8) - Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (1.23.5) - Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (6.0.2) - Requirement already satisfied: antlr4-python3-runtime==4.9.* in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (4.9.3) - Requirement already satisfied: importlib-resources in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (6.4.5) - Requirement already satisfied: portalocker in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath<0.1.10,>=0.1.7->detectron2==0.6) (2.10.1) - Requirement already satisfied: contourpy>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.1.1) - Requirement already satisfied: cycler>=0.10 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (0.12.1) - Requirement already satisfied: fonttools>=4.22.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (4.54.1) - Requirement already satisfied: kiwisolver>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.4.7) - Requirement already satisfied: pyparsing>=2.3.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (3.1.4) - Requirement already satisfied: python-dateutil>=2.7 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (2.9.0.post0) - Requirement already satisfied: click>=8.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (8.1.7) - Requirement already satisfied: mypy-extensions>=0.4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (1.0.0) - Requirement already satisfied: pathspec>=0.9.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (0.12.1) - Requirement already satisfied: platformdirs>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.3.6) - Requirement already satisfied: tomli>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (2.0.2) - Requirement already satisfied: typing-extensions>=4.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.12.2) - Requirement already satisfied: absl-py>=0.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.4.0) - Requirement already satisfied: grpcio>=1.48.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.67.1) - Requirement already satisfied: google-auth<3,>=1.6.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.35.0) - Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.0.0) - Requirement already satisfied: markdown>=2.6.8 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.7) - Requirement already satisfied: protobuf>=3.19.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.20.3) - Requirement already satisfied: requests<3,>=2.21.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.32.3) - Requirement already satisfied: setuptools>=41.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (44.0.0) - Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.7.2) - Requirement already satisfied: werkzeug>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.0.6) - Requirement already satisfied: wheel>=0.26 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.44.0) - Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (5.5.0) - Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.4.1) - Requirement already satisfied: rsa<5,>=3.1.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (4.9) - Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (2.0.0) - Requirement already satisfied: zipp>=3.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from importlib-resources->hydra-core>=1.1->detectron2==0.6) (3.20.2) - Requirement already satisfied: importlib-metadata>=4.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from markdown>=2.6.8->tensorboard->detectron2==0.6) (8.5.0) - Requirement already satisfied: six>=1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from python-dateutil>=2.7->matplotlib->detectron2==0.6) (1.16.0) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.4.0) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.10) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2.2.3) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2024.8.30) - Requirement already satisfied: MarkupSafe>=2.1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from werkzeug>=1.0.1->tensorboard->detectron2==0.6) (2.1.5) - Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.6.1) - Requirement already satisfied: oauthlib>=3.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (3.2.2) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.2) + Requirement already satisfied: pycocotools>=2.0.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.0.7) + Requirement already satisfied: tabulate in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.9.0) + Requirement already satisfied: tensorboard in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.12.3) + Requirement already satisfied: termcolor>=1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.4.0) + Requirement already satisfied: tqdm>4.29.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (4.67.0) + Requirement already satisfied: yacs>=0.1.8 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.1.8) + Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (1.23.5) + Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (6.0.2) + Requirement already satisfied: antlr4-python3-runtime==4.9.* in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (4.9.3) + Requirement already satisfied: importlib-resources in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (6.4.5) + Requirement already satisfied: portalocker in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath<0.1.10,>=0.1.7->detectron2==0.6) (3.0.0) + Requirement already satisfied: contourpy>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.1.1) + Requirement already satisfied: cycler>=0.10 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (0.12.1) + Requirement already satisfied: fonttools>=4.22.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (4.55.0) + Requirement already satisfied: kiwisolver>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.4.7) + Requirement already satisfied: pyparsing>=2.3.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (3.1.4) + Requirement already satisfied: python-dateutil>=2.7 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (2.9.0.post0) + Requirement already satisfied: click>=8.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (8.1.7) + Requirement already satisfied: mypy-extensions>=0.4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (1.0.0) + Requirement already satisfied: pathspec>=0.9.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (0.12.1) + Requirement already satisfied: platformdirs>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.3.6) + Requirement already satisfied: tomli>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (2.1.0) + Requirement already satisfied: typing-extensions>=4.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.12.2) + Requirement already satisfied: absl-py>=0.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.4.0) + Requirement already satisfied: grpcio>=1.48.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.68.0) + Requirement already satisfied: google-auth<3,>=1.6.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.36.0) + Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.0.0) + Requirement already satisfied: markdown>=2.6.8 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.7) + Requirement already satisfied: protobuf>=3.19.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.20.3) + Requirement already satisfied: requests<3,>=2.21.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.32.3) + Requirement already satisfied: setuptools>=41.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (44.0.0) + Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.7.2) + Requirement already satisfied: werkzeug>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.0.6) + Requirement already satisfied: wheel>=0.26 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.45.0) + Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (5.5.0) + Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.4.1) + Requirement already satisfied: rsa<5,>=3.1.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (4.9) + Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (2.0.0) + Requirement already satisfied: zipp>=3.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from importlib-resources->hydra-core>=1.1->detectron2==0.6) (3.20.2) + Requirement already satisfied: importlib-metadata>=4.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from markdown>=2.6.8->tensorboard->detectron2==0.6) (8.5.0) + Requirement already satisfied: six>=1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from python-dateutil>=2.7->matplotlib->detectron2==0.6) (1.16.0) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.4.0) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.10) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2.2.3) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2024.8.30) + Requirement already satisfied: MarkupSafe>=2.1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from werkzeug>=1.0.1->tensorboard->detectron2==0.6) (2.1.5) + Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.6.1) + Requirement already satisfied: oauthlib>=3.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (3.2.2) Using cached hydra_core-1.3.2-py3-none-any.whl (154 kB) Using cached omegaconf-2.3.0-py3-none-any.whl (79 kB) Building wheels for collected packages: detectron2 Building wheel for detectron2 (setup.py): started Building wheel for detectron2 (setup.py): finished with status 'done' - Created wheel for detectron2: filename=detectron2-0.6-cp38-cp38-linux_x86_64.whl size=8313237 sha256=7cd84a15a89de76a7ab5b648f2fb7ebff63b7e43ffc90c7f19a568d16858de8a - Stored in directory: /tmp/pip-ephem-wheel-cache-uvptv5zg/wheels/19/ac/65/e48e5e4ec2702274d927c5a6efb75709b24014371d3bb778f2 + Created wheel for detectron2: filename=detectron2-0.6-cp38-cp38-linux_x86_64.whl size=8313367 sha256=4eb79589c47d522c993509a8f16dfbf494af0f494c6a73577d9d3668c1ee4a05 + Stored in directory: /tmp/pip-ephem-wheel-cache-mkdcktsx/wheels/19/ac/65/e48e5e4ec2702274d927c5a6efb75709b24014371d3bb778f2 Successfully built detectron2 Installing collected packages: omegaconf, iopath, hydra-core, detectron2 Attempting uninstall: omegaconf @@ -203,10 +203,10 @@ Install required packages for running model Uninstalling iopath-0.1.10: Successfully uninstalled iopath-0.1.10 Successfully installed detectron2-0.6 hydra-core-1.3.2 iopath-0.1.9 omegaconf-2.3.0 - Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) - Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) - Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) + Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) + Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) + Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.5.0) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.2) Define helpers for PyTorch model initialization and conversion diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg index f5b1d98eea3213..2c18ecdc61719a 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0df4e94924f81aab66086702d85a461f463078f0d06f67b1fe5d46ad8480aa91 -size 58652 +oid sha256:edc1fd6c9bb94b1ff9dde163988de0d5635f35a9cb918138eb058de89fe36b6c +size 58029 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png index f676b44edd1d9a..0890e13959d7b2 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b5a857cd060d740290ccc65aec47252aad9f41c665dc2808195c3185248977e8 -size 509376 +oid sha256:b54cfa3647ce484120c2dac840789885273b1a61d0fdf6fd1fdb93e78753c114 +size 509016 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg index 67719cdcbd66b0..d2b1ec1ee92784 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ddc40900fddf1a115903c4e200899306060114348bf2ca82fbb4d7d92a885b09 -size 53897 +oid sha256:0ffdd1e786238678562e14aa201c2a602b1733bb7db8b1c175f7d86b3c011fa2 +size 54276 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png index af63ef41697b47..d970f117246904 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1276209027e5aac72e4bb6f39f4494d2a807ee4bd85054a1285b0832e4515b9 -size 460797 +oid sha256:6b8a9ccae3ca190acfaa9ddaa9be7641e02edae972b15c49f21cf9a8de9ae454 +size 456077 diff --git a/docs/notebooks/distilbert-sequence-classification-with-output.rst b/docs/notebooks/distilbert-sequence-classification-with-output.rst deleted file mode 100644 index 862079f68aeeb7..00000000000000 --- a/docs/notebooks/distilbert-sequence-classification-with-output.rst +++ /dev/null @@ -1,338 +0,0 @@ -Sentiment Analysis with OpenVINO™ -================================= - -**Sentiment analysis** is the use of natural language processing, text -analysis, computational linguistics, and biometrics to systematically -identify, extract, quantify, and study affective states and subjective -information. This notebook demonstrates how to convert and run a -sequence classification model using OpenVINO. - - -**Table of contents:** - - -- `Imports <#imports>`__ -- `Initializing the Model <#initializing-the-model>`__ -- `Initializing the Tokenizer <#initializing-the-tokenizer>`__ -- `Convert Model to OpenVINO Intermediate Representation - format <#convert-model-to-openvino-intermediate-representation-format>`__ - - - `Select inference device <#select-inference-device>`__ - -- `Inference <#inference>`__ - - - `For a single input sentence <#for-a-single-input-sentence>`__ - - `Read from a text file <#read-from-a-text-file>`__ - -Installation Instructions -~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a self-contained example that relies solely on its own code. - -We recommend running the notebook in a virtual environment. You only -need a Jupyter server to start. For details, please refer to -`Installation -Guide `__. - -Imports -------- - - - -.. code:: ipython3 - - %pip install "openvino>=2023.1.0" transformers "torch>=2.1" tqdm --extra-index-url https://download.pytorch.org/whl/cpu - - -.. parsed-literal:: - - Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu - Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) - Requirement already satisfied: transformers in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.46.1) - Requirement already satisfied: torch>=2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) - Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.66.6) - Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) - Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.1.0) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.1) - Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (3.16.1) - Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.26.2) - Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (6.0.2) - Requirement already satisfied: regex!=2019.12.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2024.9.11) - Requirement already satisfied: requests in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (2.32.3) - Requirement already satisfied: safetensors>=0.4.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.4.5) - Requirement already satisfied: tokenizers<0.21,>=0.20 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from transformers) (0.20.2) - Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (4.12.2) - Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (1.13.3) - Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (3.1.4) - Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1) (2024.9.0) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch>=2.1) (2.1.5) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.4.0) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (3.10) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2.2.3) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests->transformers) (2024.8.30) - Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch>=2.1) (1.3.0) - Note: you may need to restart the kernel to use updated packages. - - -.. code:: ipython3 - - import warnings - from pathlib import Path - import time - from transformers import AutoModelForSequenceClassification, AutoTokenizer - import numpy as np - import openvino as ov - -.. code:: ipython3 - - # Fetch `notebook_utils` module - import requests - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - - open("notebook_utils.py", "w").write(r.text) - from notebook_utils import download_file, device_widget - -Initializing the Model ----------------------- - - - -We will use the transformer-based `DistilBERT base uncased finetuned -SST-2 `__ -model from Hugging Face. - -.. code:: ipython3 - - checkpoint = "distilbert-base-uncased-finetuned-sst-2-english" - model = AutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path=checkpoint) - - -.. parsed-literal:: - - 2024-11-04 23:18:47.102633: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-04 23:18:47.135966: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-04 23:18:47.793551: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - -Initializing the Tokenizer --------------------------- - - - -Text Preprocessing cleans the text-based input data so it can be fed -into the model. -`Tokenization `__ -splits paragraphs and sentences into smaller units that can be more -easily assigned meaning. It involves cleaning the data and assigning -tokens or IDs to the words, so they are represented in a vector space -where similar words have similar vectors. This helps the model -understand the context of a sentence. Here, we will use -`AutoTokenizer `__ -- a pre-trained tokenizer from Hugging Face: - -.. code:: ipython3 - - tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=checkpoint) - -Convert Model to OpenVINO Intermediate Representation format ------------------------------------------------------------- - - - -`Model conversion -API `__ -facilitates the transition between training and deployment environments, -performs static model analysis, and adjusts deep learning models for -optimal execution on end-point target devices. - -.. code:: ipython3 - - import torch - - ir_xml_name = checkpoint + ".xml" - MODEL_DIR = "model/" - ir_xml_path = Path(MODEL_DIR) / ir_xml_name - - MAX_SEQ_LENGTH = 128 - input_info = [ - (ov.PartialShape([1, -1]), ov.Type.i64), - (ov.PartialShape([1, -1]), ov.Type.i64), - ] - default_input = torch.ones(1, MAX_SEQ_LENGTH, dtype=torch.int64) - inputs = { - "input_ids": default_input, - "attention_mask": default_input, - } - - ov_model = ov.convert_model(model, input=input_info, example_input=inputs) - ov.save_model(ov_model, ir_xml_path) - - -.. parsed-literal:: - - WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. - - -.. parsed-literal:: - - [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead - warnings.warn( - `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. - - -OpenVINO™ Runtime uses the `Infer -Request `__ -mechanism which enables running models on different devices in -asynchronous or synchronous manners. The model graph is sent as an -argument to the OpenVINO API and an inference request is created. The -default inference mode is AUTO but it can be changed according to -requirements and hardware available. You can explore the different -inference modes and their usage `in -documentation. `__ - -.. code:: ipython3 - - core = ov.Core() - -Select inference device -~~~~~~~~~~~~~~~~~~~~~~~ - - - -select device from dropdown list for running inference using OpenVINO - -.. code:: ipython3 - - device = device_widget() - - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') - - - -.. code:: ipython3 - - warnings.filterwarnings("ignore") - compiled_model = core.compile_model(ov_model, device.value) - infer_request = compiled_model.create_infer_request() - -.. code:: ipython3 - - def softmax(x): - """ - Defining a softmax function to extract - the prediction from the output of the IR format - Parameters: Logits array - Returns: Probabilities - """ - - e_x = np.exp(x - np.max(x)) - return e_x / e_x.sum() - -Inference ---------- - - - -.. code:: ipython3 - - def infer(input_text): - """ - Creating a generic inference function - to read the input and infer the result - into 2 classes: Positive or Negative. - Parameters: Text to be processed - Returns: Label: Positive or Negative. - """ - - input_text = tokenizer( - input_text, - truncation=True, - return_tensors="np", - ) - inputs = dict(input_text) - label = {0: "NEGATIVE", 1: "POSITIVE"} - result = infer_request.infer(inputs=inputs) - for i in result.values(): - probability = np.argmax(softmax(i)) - return label[probability] - -For a single input sentence -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - input_text = "I had a wonderful day" - start_time = time.perf_counter() - result = infer(input_text) - end_time = time.perf_counter() - total_time = end_time - start_time - print("Label: ", result) - print("Total Time: ", "%.2f" % total_time, " seconds") - - -.. parsed-literal:: - - Label: POSITIVE - Total Time: 0.03 seconds - - -Read from a text file -~~~~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - # Download the text from the openvino_notebooks storage - vocab_file_path = download_file( - "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/text/food_reviews.txt", - directory="data", - ) - - - -.. parsed-literal:: - - data/food_reviews.txt: 0%| | 0.00/71.0 [00:00`__) and -techniques such as `InstructGPT `__ -has been the core foundation of breakthroughs such as ChatGPT and GPT-4. -However, these powerful models remain hidden behind APIs and we know -very little about their underlying architecture. Instruction-following -models are capable of generating text in response to prompts and are -often used for tasks like writing assistance, chatbots, and content -generation. Many users now interact with these models regularly and even -use them for work but the majority of such models remain closed-source -and require massive amounts of computational resources to experiment -with. - -`Dolly -2.0 `__ -is the first open-source, instruction-following LLM fine-tuned by -Databricks on a transparent and freely available dataset that is also -open-sourced to use for commercial purposes. That means Dolly 2.0 is -available for commercial applications without the need to pay for API -access or share data with third parties. Dolly 2.0 exhibits similar -characteristics so ChatGPT despite being much smaller. - -In this tutorial, we consider how to run an instruction-following text -generation pipeline using Dolly 2.0 and OpenVINO. We will use a -pre-trained model from the `Hugging Face -Transformers `__ -library. To simplify the user experience, the `Hugging Face Optimum -Intel `__ library is -used to convert the models to OpenVINO™ IR format. - -The tutorial consists of the following steps: - -- Install prerequisites -- Download and convert the model from a public source using the - `OpenVINO integration with Hugging Face - Optimum `__. -- Compress model weights to INT8 with `OpenVINO - NNCF `__ -- Create an instruction-following inference pipeline -- Run instruction-following pipeline - -About Dolly 2.0 ---------------- - -Dolly 2.0 is an instruction-following large language model trained on -the Databricks machine-learning platform that is licensed for commercial -use. It is based on `Pythia `__ -and is trained on ~15k instruction/response fine-tuning records -generated by Databricks employees in various capability domains, -including brainstorming, classification, closed QA, generation, -information extraction, open QA, and summarization. Dolly 2.0 works by -processing natural language instructions and generating responses that -follow the given instructions. It can be used for a wide range of -applications, including closed question-answering, summarization, and -generation. - -The model training process was inspired by -`InstructGPT `__. To train InstructGPT -models, the core technique is reinforcement learning from human feedback -(RLHF), This technique uses human preferences as a reward signal to -fine-tune models, which is important as the safety and alignment -problems required to be solved are complex and subjective, and aren’t -fully captured by simple automatic metrics. More details about the -InstructGPT approach can be found in OpenAI `blog -post `__ The -breakthrough discovered with InstructGPT is that language models don’t -need larger and larger training sets. By using human-evaluated -question-and-answer training, authors were able to train a better -language model using one hundred times fewer parameters than the -previous model. Databricks used a similar approach to create a prompt -and response dataset called they call -`databricks-dolly-15k `__, -a corpus of more than 15,000 records generated by thousands of -Databricks employees to enable large language models to exhibit the -magical interactivity of InstructGPT. More details about the model and -dataset can be found in `Databricks blog -post `__ -and `repo `__ - - -**Table of contents:** - - -- `Prerequisites <#prerequisites>`__ -- `Convert model using Optimum-CLI - tool <#convert-model-using-optimum-cli-tool>`__ -- `Compress model weights <#compress-model-weights>`__ - - - `Weights Compression using - Optimum-CLI <#weights-compression-using-optimum-cli>`__ - -- `Select model variant and inference - device <#select-model-variant-and-inference-device>`__ -- `Instantiate Model using Optimum - Intel <#instantiate-model-using-optimum-intel>`__ -- `Create an instruction-following inference - pipeline <#create-an-instruction-following-inference-pipeline>`__ - - - `Setup imports <#setup-imports>`__ - - `Prepare template for user - prompt <#prepare-template-for-user-prompt>`__ - - `Helpers for output parsing <#helpers-for-output-parsing>`__ - - `Main generation function <#main-generation-function>`__ - - `Helpers for application <#helpers-for-application>`__ - -- `Run instruction-following - pipeline <#run-instruction-following-pipeline>`__ - -Installation Instructions -~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a self-contained example that relies solely on its own code. - -We recommend running the notebook in a virtual environment. You only -need a Jupyter server to start. For details, please refer to -`Installation -Guide `__. - -Prerequisites -------------- - - - -First, we should install the `Hugging Face -Optimum `__ library -accelerated by OpenVINO integration. The Hugging Face Optimum Intel API -is a high-level API that enables us to convert and quantize models from -the Hugging Face Transformers library to the OpenVINO™ IR format. For -more details, refer to the `Hugging Face Optimum Intel -documentation `__. - -.. code:: ipython3 - - import os - from pathlib import Path - import requests - - os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" - - %pip uninstall -q -y optimum optimum-intel - %pip install --pre -Uq "openvino>=2024.2.0" openvino-tokenizers[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - %pip install -q "diffusers>=0.16.1" "transformers>=4.33.0" "torch>=2.1" "nncf>=2.10.0" "onnx<1.16.2" "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "git+https://github.com/huggingface/optimum-intel.git" - - - utility_files = ["notebook_utils.py", "cmd_helper.py"] - - for utility in utility_files: - local_path = Path(utility) - if not local_path.exists(): - r = requests.get( - url=f"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/{local_path.name}", - ) - with local_path.open("w") as f: - f.write(r.text) - -Convert model using Optimum-CLI tool ------------------------------------- - - - -`Optimum Intel `__ is -the interface between the -`Transformers `__ and -`Diffusers `__ libraries -and OpenVINO to accelerate end-to-end pipelines on Intel architectures. -It provides ease-to-use cli interface for exporting models to `OpenVINO -Intermediate Representation -(IR) `__ -format. - -The command bellow demonstrates basic command for model export with -``optimum-cli`` - -.. code:: bash - - optimum-cli export openvino --model --task - -where ``--model`` argument is model id from HuggingFace Hub or local -directory with model (saved using ``.save_pretrained`` method), -``--task`` is one of `supported -task `__ -that exported model should solve. For LLMs it will be -``text-generation-with-past``. If model initialization requires to use -remote code, ``--trust-remote-code`` flag additionally should be passed. - -Compress model weights ----------------------- - - - -The `Weights -Compression `__ -algorithm is aimed at compressing the weights of the models and can be -used to optimize the model footprint and performance of large models -where the size of weights is relatively larger than the size of -activations, for example, Large Language Models (LLM). Compared to INT8 -compression, INT4 compression improves performance even more, but -introduces a minor drop in prediction quality. - -Weights Compression using Optimum-CLI -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -You can also apply fp16, 8-bit or 4-bit weight compression on the -Linear, Convolutional and Embedding layers when exporting your model -with the CLI by setting ``--weight-format`` to respectively fp16, int8 -or int4. This type of optimization allows to reduce the memory footprint -and inference latency. By default the quantization scheme for int8/int4 -will be -`asymmetric `__, -to make it -`symmetric `__ -you can add ``--sym``. - -For INT4 quantization you can also specify the following arguments : - -- The ``--group-size`` parameter will define the group size to use for - quantization, -1 it will results in per-column quantization. -- The ``--ratio`` parameter controls the ratio between 4-bit and 8-bit - quantization. If set to 0.9, it means that 90% of the layers will be - quantized to int4 while 10% will be quantized to int8. - -Smaller group_size and ratio values usually improve accuracy at the -sacrifice of the model size and inference latency. - - **Note**: There may be no speedup for INT4/INT8 compressed models on - dGPU. - -.. code:: ipython3 - - from IPython.display import display - import ipywidgets as widgets - - prepare_int4_model = widgets.Checkbox( - value=True, - description="Prepare INT4 model", - disabled=False, - ) - prepare_int8_model = widgets.Checkbox( - value=False, - description="Prepare INT8 model", - disabled=False, - ) - prepare_fp16_model = widgets.Checkbox( - value=False, - description="Prepare FP16 model", - disabled=False, - ) - - display(prepare_int4_model) - display(prepare_int8_model) - display(prepare_fp16_model) - - - -.. parsed-literal:: - - Checkbox(value=True, description='Prepare INT4 model') - - - -.. parsed-literal:: - - Checkbox(value=False, description='Prepare INT8 model') - - - -.. parsed-literal:: - - Checkbox(value=False, description='Prepare FP16 model') - - -.. code:: ipython3 - - from pathlib import Path - from cmd_helper import optimum_cli - - model_id = "databricks/dolly-v2-3b" - model_path = Path("dolly-v2-3b") - - fp16_model_dir = model_path / "FP16" - int8_model_dir = model_path / "INT8_compressed_weights" - int4_model_dir = model_path / "INT4_compressed_weights" - - - def convert_to_fp16(): - if (fp16_model_dir / "openvino_model.xml").exists(): - return - optimum_cli(model_id, fp16_model_dir, additional_args={"weight-format": "fp16"}) - - - def convert_to_int8(): - if (int8_model_dir / "openvino_model.xml").exists(): - return - optimum_cli(model_id, int8_model_dir, additional_args={"weight-format": "int8"}) - - - def convert_to_int4(): - if (int4_model_dir / "openvino_model.xml").exists(): - return - optimum_cli(model_id, int4_model_dir, additional_args={"weight-format": "int4"}) - - - if prepare_fp16_model.value: - convert_to_fp16() - if prepare_int8_model.value: - convert_to_int8() - if prepare_int4_model.value: - convert_to_int4() - - - -**Export command:** - - - -``optimum-cli export openvino --model databricks/dolly-v2-3b --task text-generation-with-past --weight-format int4 --ratio 1.0 --group-size 128 dolly-v2-3b/INT4_compressed_weights`` - - -.. parsed-literal:: - - 2024-07-24 11:40:56.083018: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-07-24 11:40:56.084962: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. - 2024-07-24 11:40:56.121994: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. - 2024-07-24 11:40:56.122347: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-07-24 11:40:56.845683: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. - torch.utils._pytree._register_pytree_node( - /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: '/home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/torchvision/image.so: undefined symbol: _ZN3c1017RegisterOperatorsD1Ev'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source? - warn( - WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for: - PyTorch 2.0.1+cu118 with CUDA 1108 (you have 2.3.1+cpu) - Python 3.8.18 (you have 3.8.10) - Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers) - Memory-efficient attention, SwiGLU, sparse and more won't be available. - Set XFORMERS_MORE_DETAILS=1 for more details - /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. - torch.utils._pytree._register_pytree_node( - /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/bitsandbytes/cextension.py:34: UserWarning: The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. - warn("The installed version of bitsandbytes was compiled without GPU support. " - /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32 - /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. - torch.utils._pytree._register_pytree_node( - Framework not specified. Using pt to export the model. - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. - Using framework PyTorch: 2.3.1+cpu - Overriding 1 configuration item(s) - - use_cache -> True - /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py:934: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert batch_size > 0, "batch_size has to be defined and > 0" - /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:114: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if (input_shape[-1] > 1 or self.sliding_window is not None) and self.is_causal: - /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/optimum/exporters/onnx/model_patcher.py:304: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if past_key_values_length > 0: - /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py:617: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if seq_len > self.max_seq_len_cached: - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 9% (2 / 130) │ 0% (0 / 128) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 91% (128 / 130) │ 100% (128 / 128) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - Applying Weight Compression ━━━━━━━━━━━━━━━━━━━ 100% 130/130 • 0:01:38 • 0:00:00;0;104;181m0:00:01181m0:00:04 - - -.. code:: ipython3 - - fp16_weights = fp16_model_dir / "openvino_model.bin" - int8_weights = int8_model_dir / "openvino_model.bin" - int4_weights = int4_model_dir / "openvino_model.bin" - - if fp16_weights.exists(): - print(f"Size of FP16 model is {fp16_weights.stat().st_size / 1024 / 1024:.2f} MB") - for precision, compressed_weights in zip([8, 4], [int8_weights, int4_weights]): - if compressed_weights.exists(): - print(f"Size of model with INT{precision} compressed weights is {compressed_weights.stat().st_size / 1024 / 1024:.2f} MB") - if compressed_weights.exists() and fp16_weights.exists(): - print(f"Compression rate for INT{precision} model: {fp16_weights.stat().st_size / compressed_weights.stat().st_size:.3f}") - - -.. parsed-literal:: - - Size of model with INT4 compressed weights is 1497.06 MB - - -Select model variant and inference device -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -select device from dropdown list for running inference using OpenVINO - -.. code:: ipython3 - - available_models = [] - if int4_model_dir.exists(): - available_models.append("INT4") - if int8_model_dir.exists(): - available_models.append("INT8") - if fp16_model_dir.exists(): - available_models.append("FP16") - - model_to_run = widgets.Dropdown( - options=available_models, - value=available_models[0], - description="Model to run:", - disabled=False, - ) - - model_to_run - - - - -.. parsed-literal:: - - Dropdown(description='Model to run:', options=('INT4',), value='INT4') - - - -.. code:: ipython3 - - from notebook_utils import device_widget - import openvino as ov - - core = ov.Core() - - device = device_widget("CPU", exclude=["NPU"]) - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') - - - -Instantiate Model using Optimum Intel -------------------------------------- - - - -Optimum Intel can be used to load optimized models from the `Hugging -Face Hub `__ and -create pipelines to run an inference with OpenVINO Runtime using Hugging -Face APIs. The Optimum Inference models are API compatible with Hugging -Face Transformers models. This means we just need to replace -``AutoModelForXxx`` class with the corresponding ``OVModelForXxx`` -class. - -Below is an example of the Dolly model - -.. code:: diff - - -from transformers import AutoModelForCausalLM - +from optimum.intel.openvino import OVModelForCausalLM - from transformers import AutoTokenizer, pipeline - - model_id = "databricks/dolly-v2-3b" - -model = AutoModelForCausalLM.from_pretrained(model_id) - +model = OVModelForCausalLM.from_pretrained(model_id, export=True) - -Model class initialization starts with calling ``from_pretrained`` -method. When downloading and converting Transformers model, the -parameter ``export=True`` should be added (as we already converted model -before, we do not need to provide this parameter). We can save the -converted model for the next usage with the ``save_pretrained`` method. -Tokenizer class and pipelines API are compatible with Optimum models. - -You can find more details about OpenVINO LLM inference using HuggingFace -Optimum API in `LLM inference -guide `__. - -.. code:: ipython3 - - from pathlib import Path - - from transformers import AutoTokenizer - from optimum.intel.openvino import OVModelForCausalLM - - import openvino.properties as props - import openvino.properties.hint as hints - import openvino.properties.streams as streams - - - if model_to_run.value == "INT4": - model_dir = int4_model_dir - elif model_to_run.value == "INT8": - model_dir = int8_model_dir - else: - model_dir = fp16_model_dir - print(f"Loading model from {model_dir}") - - tokenizer = AutoTokenizer.from_pretrained(model_dir) - - current_device = device.value - - ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""} - - ov_model = OVModelForCausalLM.from_pretrained(model_dir, device=current_device, ov_config=ov_config) - - -.. parsed-literal:: - - 2024-07-24 11:43:17.404362: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-07-24 11:43:17.406313: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. - 2024-07-24 11:43:17.443348: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. - 2024-07-24 11:43:17.444995: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-07-24 11:43:18.193758: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. - torch.utils._pytree._register_pytree_node( - /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: '/home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/torchvision/image.so: undefined symbol: _ZN3c1017RegisterOperatorsD1Ev'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source? - warn( - WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for: - PyTorch 2.0.1+cu118 with CUDA 1108 (you have 2.3.1+cpu) - Python 3.8.18 (you have 3.8.10) - Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers) - Memory-efficient attention, SwiGLU, sparse and more won't be available. - Set XFORMERS_MORE_DETAILS=1 for more details - /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. - torch.utils._pytree._register_pytree_node( - /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/bitsandbytes/cextension.py:34: UserWarning: The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. - warn("The installed version of bitsandbytes was compiled without GPU support. " - - -.. parsed-literal:: - - /home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32 - Loading model from dolly-v2-3b/INT4_compressed_weights - - -.. parsed-literal:: - - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. - Compiling the model to CPU ... - - -Create an instruction-following inference pipeline --------------------------------------------------- - - - -The ``run_generation`` function accepts user-provided text input, -tokenizes it, and runs the generation process. Text generation is an -iterative process, where each next token depends on previously generated -until a maximum number of tokens or stop generation condition is not -reached. To obtain intermediate generation results without waiting until -when generation is finished, we will use -`TextIteratorStreamer `__, -provided as part of HuggingFace `Streaming -API `__. - -The diagram below illustrates how the instruction-following pipeline -works - -.. figure:: https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/e881f4a4-fcc8-427a-afe1-7dd80aebd66e - :alt: generation pipeline) - - generation pipeline) - -As can be seen, on the first iteration, the user provided instructions -converted to token ids using a tokenizer, then prepared input provided -to the model. The model generates probabilities for all tokens in logits -format The way the next token will be selected over predicted -probabilities is driven by the selected decoding methodology. You can -find more information about the most popular decoding methods in this -`blog `__. - -There are several parameters that can control text generation quality: - -- | ``Temperature`` is a parameter used to control the level of - creativity in AI-generated text. By adjusting the ``temperature``, - you can influence the AI model’s probability distribution, making - the text more focused or diverse. - | Consider the following example: The AI model has to complete the - sentence “The cat is \____.” with the following token - probabilities: - - | playing: 0.5 - | sleeping: 0.25 - | eating: 0.15 - | driving: 0.05 - | flying: 0.05 - - - **Low temperature** (e.g., 0.2): The AI model becomes more focused - and deterministic, choosing tokens with the highest probability, - such as “playing.” - - **Medium temperature** (e.g., 1.0): The AI model maintains a - balance between creativity and focus, selecting tokens based on - their probabilities without significant bias, such as “playing,” - “sleeping,” or “eating.” - - **High temperature** (e.g., 2.0): The AI model becomes more - adventurous, increasing the chances of selecting less likely - tokens, such as “driving” and “flying.” - -- ``Top-p``, also known as nucleus sampling, is a parameter used to - control the range of tokens considered by the AI model based on their - cumulative probability. By adjusting the ``top-p`` value, you can - influence the AI model’s token selection, making it more focused or - diverse. Using the same example with the cat, consider the following - top_p settings: - - - **Low top_p** (e.g., 0.5): The AI model considers only tokens with - the highest cumulative probability, such as “playing.” - - **Medium top_p** (e.g., 0.8): The AI model considers tokens with a - higher cumulative probability, such as “playing,” “sleeping,” and - “eating.” - - **High top_p** (e.g., 1.0): The AI model considers all tokens, - including those with lower probabilities, such as “driving” and - “flying.” - -- ``Top-k`` is another popular sampling strategy. In comparison with - Top-P, which chooses from the smallest possible set of words whose - cumulative probability exceeds the probability P, in Top-K sampling K - most likely next words are filtered and the probability mass is - redistributed among only those K next words. In our example with cat, - if k=3, then only “playing”, “sleeping” and “eating” will be taken - into account as possible next word. - -To optimize the generation process and use memory more efficiently, the -``use_cache=True`` option is enabled. Since the output side is -auto-regressive, an output token hidden state remains the same once -computed for every further generation step. Therefore, recomputing it -every time you want to generate a new token seems wasteful. With the -cache, the model saves the hidden state once it has been computed. The -model only computes the one for the most recently generated output token -at each time step, re-using the saved ones for hidden tokens. This -reduces the generation complexity from O(n^3) to O(n^2) for a -transformer model. More details about how it works can be found in this -`article `__. -With this option, the model gets the previous step’s hidden states -(cached attention keys and values) as input and additionally provides -hidden states for the current step as output. It means for all next -iterations, it is enough to provide only a new token obtained from the -previous step and cached key values to get the next token prediction. - -The generation cycle repeats until the end of the sequence token is -reached or it also can be interrupted when maximum tokens will be -generated. As already mentioned before, we can enable printing current -generated tokens without waiting until when the whole generation is -finished using Streaming API, it adds a new token to the output queue -and then prints them when they are ready. - -Setup imports -~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - from threading import Thread - from time import perf_counter - from typing import List - import gradio as gr - from transformers import AutoTokenizer, TextIteratorStreamer - import numpy as np - -Prepare template for user prompt -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -For effective generation, model expects to have input in specific -format. The code below prepare template for passing user instruction -into model with providing additional context. - -.. code:: ipython3 - - INSTRUCTION_KEY = "### Instruction:" - RESPONSE_KEY = "### Response:" - END_KEY = "### End" - INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request." - - # This is the prompt that is used for generating responses using an already trained model. It ends with the response - # key, where the job of the model is to provide the completion that follows it (i.e. the response itself). - PROMPT_FOR_GENERATION_FORMAT = """{intro} - - {instruction_key} - {instruction} - - {response_key} - """.format( - intro=INTRO_BLURB, - instruction_key=INSTRUCTION_KEY, - instruction="{instruction}", - response_key=RESPONSE_KEY, - ) - -Helpers for output parsing -~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -Model was retrained to finish generation using special token ``### End`` -the code below find its id for using it as generation stop-criteria. - -.. code:: ipython3 - - def get_special_token_id(tokenizer: AutoTokenizer, key: str) -> int: - """ - Gets the token ID for a given string that has been added to the tokenizer as a special token. - - When training, we configure the tokenizer so that the sequences like "### Instruction:" and "### End" are - treated specially and converted to a single, new token. This retrieves the token ID each of these keys map to. - - Args: - tokenizer (PreTrainedTokenizer): the tokenizer - key (str): the key to convert to a single token - - Raises: - RuntimeError: if more than one ID was generated - - Returns: - int: the token ID for the given key - """ - token_ids = tokenizer.encode(key) - if len(token_ids) > 1: - raise ValueError(f"Expected only a single token for '{key}' but found {token_ids}") - return token_ids[0] - - - tokenizer_response_key = next( - (token for token in tokenizer.additional_special_tokens if token.startswith(RESPONSE_KEY)), - None, - ) - - end_key_token_id = None - if tokenizer_response_key: - try: - end_key_token_id = get_special_token_id(tokenizer, END_KEY) - # Ensure generation stops once it generates "### End" - except ValueError: - pass - -Main generation function -~~~~~~~~~~~~~~~~~~~~~~~~ - - - -As it was discussed above, ``run_generation`` function is the entry -point for starting generation. It gets provided input instruction as -parameter and returns model response. - -.. code:: ipython3 - - def run_generation( - user_text: str, - top_p: float, - temperature: float, - top_k: int, - max_new_tokens: int, - perf_text: str, - ): - """ - Text generation function - - Parameters: - user_text (str): User-provided instruction for a generation. - top_p (float): Nucleus sampling. If set to < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for a generation. - temperature (float): The value used to module the logits distribution. - top_k (int): The number of highest probability vocabulary tokens to keep for top-k-filtering. - max_new_tokens (int): Maximum length of generated sequence. - perf_text (str): Content of text field for printing performance results. - Returns: - model_output (str) - model-generated text - perf_text (str) - updated perf text filed content - """ - - # Prepare input prompt according to model expected template - prompt_text = PROMPT_FOR_GENERATION_FORMAT.format(instruction=user_text) - - # Tokenize the user text. - model_inputs = tokenizer(prompt_text, return_tensors="pt") - - # Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer - # in the main thread. Adds timeout to the streamer to handle exceptions in the generation thread. - streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) - generate_kwargs = dict( - model_inputs, - streamer=streamer, - max_new_tokens=max_new_tokens, - do_sample=True, - top_p=top_p, - temperature=float(temperature), - top_k=top_k, - eos_token_id=end_key_token_id, - ) - t = Thread(target=ov_model.generate, kwargs=generate_kwargs) - t.start() - - # Pull the generated text from the streamer, and update the model output. - model_output = "" - per_token_time = [] - num_tokens = 0 - start = perf_counter() - for new_text in streamer: - current_time = perf_counter() - start - model_output += new_text - perf_text, num_tokens = estimate_latency(current_time, perf_text, new_text, per_token_time, num_tokens) - yield model_output, perf_text - start = perf_counter() - return model_output, perf_text - -Helpers for application -~~~~~~~~~~~~~~~~~~~~~~~ - - - -For making interactive user interface we will use Gradio library. The -code bellow provides useful functions used for communication with UI -elements. - -.. code:: ipython3 - - def estimate_latency( - current_time: float, - current_perf_text: str, - new_gen_text: str, - per_token_time: List[float], - num_tokens: int, - ): - """ - Helper function for performance estimation - - Parameters: - current_time (float): This step time in seconds. - current_perf_text (str): Current content of performance UI field. - new_gen_text (str): New generated text. - per_token_time (List[float]): history of performance from previous steps. - num_tokens (int): Total number of generated tokens. - - Returns: - update for performance text field - update for a total number of tokens - """ - num_current_toks = len(tokenizer.encode(new_gen_text)) - num_tokens += num_current_toks - per_token_time.append(num_current_toks / current_time) - if len(per_token_time) > 10 and len(per_token_time) % 4 == 0: - current_bucket = per_token_time[:-10] - return ( - f"Average generation speed: {np.mean(current_bucket):.2f} tokens/s. Total generated tokens: {num_tokens}", - num_tokens, - ) - return current_perf_text, num_tokens - - - def select_device(device_str: str, current_text: str = "", progress: gr.Progress = gr.Progress()): - """ - Helper function for uploading model on the device. - - Parameters: - device_str (str): Device name. - current_text (str): Current content of user instruction field (used only for backup purposes, temporally replacing it on the progress bar during model loading). - progress (gr.Progress): gradio progress tracker - Returns: - current_text - """ - if device_str != ov_model._device: - ov_model.request = None - ov_model._device = device_str - - for i in progress.tqdm(range(1), desc=f"Model loading on {device_str}"): - ov_model.compile() - return current_text - -Run instruction-following pipeline ----------------------------------- - - - -Now, we are ready to explore model capabilities. This demo provides a -simple interface that allows communication with a model using text -instruction. Type your instruction into the ``User instruction`` field -or select one from predefined examples and click on the ``Submit`` -button to start generation. Additionally, you can modify advanced -generation parameters: - -- ``Device`` - allows switching inference device. Please note, every - time when new device is selected, model will be recompiled and this - takes some time. -- ``Max New Tokens`` - maximum size of generated text. -- ``Top-p (nucleus sampling)`` - if set to < 1, only the smallest set - of most probable tokens with probabilities that add up to top_p or - higher are kept for a generation. -- ``Top-k`` - the number of highest probability vocabulary tokens to - keep for top-k-filtering. -- ``Temperature`` - the value used to module the logits distribution. - -.. code:: ipython3 - - import requests - - if not Path("gradio_helper.py").exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/dolly-2-instruction-following/gradio_helper.py") - open("gradio_helper.py", "w").write(r.text) - - from gradio_helper import make_demo - - demo = make_demo(run_fn=run_generation, select_device_fn=select_device) - - try: - demo.queue().launch(debug=False, height=800) - except Exception: - demo.queue().launch(debug=False, share=True, height=800) - # If you are launching remotely, specify server_name and server_port - # EXAMPLE: `demo.launch(server_name='your server name', server_port='server port in int')` - # To learn more please refer to the Gradio docs: https://gradio.app/docs/ diff --git a/docs/notebooks/dynamicrafter-animating-images-with-output.rst b/docs/notebooks/dynamicrafter-animating-images-with-output.rst index 992c346194e31c..13b4c9475f7092 100644 --- a/docs/notebooks/dynamicrafter-animating-images-with-output.rst +++ b/docs/notebooks/dynamicrafter-animating-images-with-output.rst @@ -151,57 +151,29 @@ Prerequisites %pip install -q "openvino>=2024.2.0" "nncf>=2.11.0" "datasets>=2.20.0" %pip install -q "gradio>=4.19" omegaconf einops pytorch_lightning kornia "open_clip_torch==2.22.0" transformers av opencv-python "torch==2.2.2" --extra-index-url https://download.pytorch.org/whl/cpu - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - - .. code:: ipython3 - import sys from pathlib import Path import requests - dynamicrafter_path = Path("dynamicrafter") - - if not dynamicrafter_path.exists(): - dynamicrafter_path.mkdir(parents=True, exist_ok=True) - !git clone https://github.com/Doubiiu/DynamiCrafter.git dynamicrafter - %cd dynamicrafter - !git checkout 26e665cd6c174234238d2ded661e2e56f875d360 -q # to avoid breaking changes - %cd .. - - sys.path.append(str(dynamicrafter_path)) + if not Path("cmd_helper.py").exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", + ) + open("cmd_helper.py", "w").write(r.text) r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) +.. code:: ipython3 -.. parsed-literal:: - - Cloning into 'dynamicrafter'... - remote: Enumerating objects: 335, done. - remote: Counting objects: 100% (153/153), done. - remote: Compressing objects: 100% (99/99), done. - remote: Total 335 (delta 97), reused 54 (delta 54), pack-reused 182 (from 1) - Receiving objects: 100% (335/335), 72.41 MiB | 20.85 MiB/s, done. - Resolving deltas: 100% (123/123), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images - - - - -.. parsed-literal:: - - 24692 - - + from cmd_helper import clone_repo + + + clone_repo("https://github.com/Doubiiu/DynamiCrafter.git", "26e665cd6c174234238d2ded661e2e56f875d360") Load and run the original pipeline ---------------------------------- @@ -221,7 +193,7 @@ We will use model for 256x256 resolution as example. Also, models for from huggingface_hub import hf_hub_download from omegaconf import OmegaConf - from dynamicrafter.utils.utils import instantiate_from_config + from utils.utils import instantiate_from_config def load_model_checkpoint(model, ckpt): @@ -280,38 +252,11 @@ We will use model for 256x256 resolution as example. Also, models for model = download_model() -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:834: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. - For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder. - warnings.warn( - - - -.. parsed-literal:: - - model.ckpt: 0%| | 0.00/10.4G [00:00>> model checkpoint loaded. - + Convert the model to OpenVINO IR -------------------------------- @@ -363,7 +308,7 @@ Convert CLIP text encoder .. code:: ipython3 - from dynamicrafter.lvdm.modules.encoders.condition import FrozenOpenCLIPEmbedder + from lvdm.modules.encoders.condition import FrozenOpenCLIPEmbedder MODEL_DIR = Path("models") @@ -388,17 +333,6 @@ Convert CLIP text encoder del cond_stage_model gc.collect(); - -.. parsed-literal:: - - WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. - - -.. parsed-literal:: - - WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. - - Convert CLIP image encoder ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -421,49 +355,6 @@ resolutions. del model.embedder gc.collect(); - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/utils/image.py:226: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if input.numel() == 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:573: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if size == input_size: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:579: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - antialias = antialias and (max(factors) > 1) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:581: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if antialias: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:584: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - sigmas = (max((factors[0] - 1.0) / 2.0, 0.001), max((factors[1] - 1.0) / 2.0, 0.001)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - ks = int(max(2.0 * 2 * sigmas[0], 3)), int(max(2.0 * 2 * sigmas[1], 3)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - ks = int(max(2.0 * 2 * sigmas[0], 3)), int(max(2.0 * 2 * sigmas[1], 3)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. - sigma = tensor([sigma], device=input.device, dtype=input.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - sigma = tensor([sigma], device=input.device, dtype=input.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/core/check.py:78: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if x_shape_to_check[i] != dim: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/kernels.py:92: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. - mean = tensor([[mean]], device=sigma.device, dtype=sigma.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if len(mean.shape) == 0 or mean.shape[0] == 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if len(std.shape) == 0 or std.shape[0] == 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:107: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if mean.shape and mean.shape[0] != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if mean.shape[0] != data.shape[1] and mean.shape[:2] != data.shape[:2]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:112: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if std.shape and std.shape[0] != 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if std.shape[0] != data.shape[1] and std.shape[:2] != data.shape[:2]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:116: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. - mean = torch.as_tensor(mean, device=data.device, dtype=data.dtype) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:117: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. - std = torch.as_tensor(std, device=data.device, dtype=data.dtype) - - Convert AE encoder ~~~~~~~~~~~~~~~~~~ @@ -486,13 +377,6 @@ Convert AE encoder del model.first_stage_model.encoder gc.collect(); - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/ae_modules.py:67: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - w_ = w_ * (int(c)**(-0.5)) - - Convert Diffusion U-Net model ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -530,21 +414,6 @@ Convert Diffusion U-Net model del model.model.diffusion_model gc.collect(); - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:556: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if l_context == 77 + t*16: ## !!! HARD CODE here - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if batch_size: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:232: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if self.use_temporal_conv and batch_size: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:76: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert x.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/dynamicrafter/lvdm/modules/networks/openaimodel3d.py:99: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert x.shape[1] == self.channels - - Convert AE decoder ~~~~~~~~~~~~~~~~~~ @@ -928,15 +797,15 @@ Run OpenVINO pipeline inference .. parsed-literal:: Seed set to 234 - /tmp/ipykernel_511478/2451984876.py:25: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) + /tmp/ipykernel_971108/2451984876.py:25: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) img_tensor = torch.from_numpy(image).permute(2, 0, 1).float().to(model.device) - + .. parsed-literal:: - start: man fishing in a boat at sunset 2024-11-04 23:26:56 - Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 206.55 seconds - + start: man fishing in a boat at sunset 2024-08-06 13:54:24 + Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 164.28 seconds + .. code:: ipython3 @@ -959,7 +828,7 @@ Run OpenVINO pipeline inference - + @@ -1131,19 +1000,6 @@ To collect intermediate model inputs for calibration we should customize 0%| | 0/300 [00:00>> model checkpoint loaded. - + .. code:: ipython3 @@ -1360,13 +1317,13 @@ Let’s run the optimized pipeline .. parsed-literal:: Seed set to 234 - + .. parsed-literal:: - start: man fishing in a boat at sunset 2024-11-05 00:58:08 - Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 97.78 seconds - + start: man fishing in a boat at sunset 2024-08-06 15:09:26 + Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 81.47 seconds + .. code:: ipython3 @@ -1388,7 +1345,7 @@ Let’s run the optimized pipeline - + Compare model file sizes @@ -1416,7 +1373,7 @@ Compare model file sizes encoder_first_stage_ir compression rate: 3.986 embedder_ir compression rate: 3.977 model_ir compression rate: 3.981 - + Compare inference time of the FP32 and INT8 models ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1470,10 +1427,10 @@ models, we use median inference time on calibration subset. .. parsed-literal:: - FP32 latency: 201.526 - INT8 latency: 96.036 - Performance speed up: 2.098 - + FP32 latency: 162.304 + INT8 latency: 79.590 + Performance speed up: 2.039 + Interactive inference --------------------- @@ -1497,15 +1454,6 @@ to launch the interactive demo. use_quantized_models - - - -.. parsed-literal:: - - Checkbox(value=True, description='Use quantized models') - - - .. code:: ipython3 from functools import partial @@ -1524,23 +1472,9 @@ to launch the interactive demo. demo = make_demo(fn=get_image_fn) try: - demo.queue().launch(debug=False) + demo.queue().launch(debug=True) except Exception: - demo.queue().launch(debug=False, share=True) + demo.queue().launch(debug=True, share=True) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ - - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - diff --git a/docs/notebooks/efficient-sam-with-output.rst b/docs/notebooks/efficient-sam-with-output.rst index b50b82341f4af8..2341db94e22f68 100644 --- a/docs/notebooks/efficient-sam-with-output.rst +++ b/docs/notebooks/efficient-sam-with-output.rst @@ -92,39 +92,47 @@ Prerequisites .. code:: ipython3 + import requests from pathlib import Path - repo_dir = Path("EfficientSAM") - if not repo_dir.exists(): - !git clone https://github.com/yformer/EfficientSAM.git - %cd $repo_dir + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", + ) + open("cmd_helper.py", "w").write(r.text) + + .. parsed-literal:: - Cloning into 'EfficientSAM'... - remote: Enumerating objects: 424, done. - remote: Counting objects: 100% (85/85), done. - remote: Compressing objects: 100% (33/33), done. - remote: Total 424 (delta 76), reused 52 (delta 52), pack-reused 339 (from 1) - Receiving objects: 100% (424/424), 262.14 MiB | 23.37 MiB/s, done. - Resolving deltas: 100% (246/246), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM + 1491 + .. code:: ipython3 - import requests + from cmd_helper import clone_repo + + + repo_dir = clone_repo("https://github.com/yformer/EfficientSAM.git") + + %cd $repo_dir + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) - open("notebook_utils.py", "w").write(r.text) from notebook_utils import download_file, device_widget, quantization_widget # noqa: F401 + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM + + Load PyTorch model ------------------ @@ -377,23 +385,23 @@ disk using ``openvino.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:220: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:220: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:241: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:241: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:163: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:163: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! size = int(math.sqrt(xy_num)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert size * size == xy_num - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if size != h or size != w: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:251: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:251: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert x.shape[2] == num_patches - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if num_pts > self.decoder_max_num_input_points: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:92: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:92: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! elif num_pts < self.decoder_max_num_input_points: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:126: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:126: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_w > 0 and output_h > 0: @@ -640,10 +648,10 @@ architecture type, we should specify ``transformer`` in ``model_type``. .. parsed-literal:: - 2024-11-05 01:15:40.935673: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-05 01:15:40.968460: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-22 00:51:57.265752: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-22 00:51:57.297997: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-05 01:15:41.606156: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-22 00:51:57.938257: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -810,7 +818,7 @@ models, we use ``bencmark_app``. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 30.24 ms + [ INFO ] Read model took 29.71 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] batched_images (node: batched_images) : f32 / [...] / [?,?,?,?] @@ -830,7 +838,7 @@ models, we use ``bencmark_app``. [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_3) : f32 / [...] / [?,?,?,?,?] [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_2) : f32 / [...] / [?,?,?] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1388.43 ms + [ INFO ] Compile model took 1398.31 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -871,17 +879,17 @@ models, we use ``bencmark_app``. [ INFO ] Fill input 'batched_point_labels' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in full mode (inputs filling are included in measurement loop). - [ INFO ] First inference took 798.46 ms + [ INFO ] First inference took 793.15 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 49 iterations - [ INFO ] Duration: 16827.30 ms + [ INFO ] Count: 55 iterations + [ INFO ] Duration: 17124.15 ms [ INFO ] Latency: - [ INFO ] Median: 2025.54 ms - [ INFO ] Average: 1991.09 ms - [ INFO ] Min: 816.09 ms - [ INFO ] Max: 2176.67 ms - [ INFO ] Throughput: 2.91 FPS + [ INFO ] Median: 1829.15 ms + [ INFO ] Average: 1806.67 ms + [ INFO ] Min: 872.57 ms + [ INFO ] Max: 2037.03 ms + [ INFO ] Throughput: 3.21 FPS .. code:: ipython3 @@ -907,7 +915,7 @@ models, we use ``bencmark_app``. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 43.95 ms + [ INFO ] Read model took 43.85 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] batched_images (node: batched_images) : f32 / [...] / [?,?,?,?] @@ -927,7 +935,7 @@ models, we use ``bencmark_app``. [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_3) : f32 / [...] / [?,?,?,?,?] [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_2) : f32 / [...] / [?,?,?] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1607.96 ms + [ INFO ] Compile model took 1631.76 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -968,17 +976,17 @@ models, we use ``bencmark_app``. [ INFO ] Fill input 'batched_point_labels' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in full mode (inputs filling are included in measurement loop). - [ INFO ] First inference took 596.94 ms + [ INFO ] First inference took 583.55 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 55 iterations - [ INFO ] Duration: 15959.69 ms + [ INFO ] Count: 56 iterations + [ INFO ] Duration: 16266.69 ms [ INFO ] Latency: - [ INFO ] Median: 1701.74 ms - [ INFO ] Average: 1692.86 ms - [ INFO ] Min: 653.76 ms - [ INFO ] Max: 1817.85 ms - [ INFO ] Throughput: 3.45 FPS + [ INFO ] Median: 1710.59 ms + [ INFO ] Average: 1692.97 ms + [ INFO ] Min: 713.08 ms + [ INFO ] Max: 1952.47 ms + [ INFO ] Throughput: 3.44 FPS Interactive segmentation demo @@ -1308,7 +1316,7 @@ Interactive segmentation demo .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam Running on local URL: http://127.0.0.1:7860 To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png index 9f65fa9db4554a..f9dfb53e3b8796 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9368b1fbd458d1e022a768f24e689af0fd6e5dacc98a920f45d3fc0f63062567 -size 1259373 +oid sha256:cffb9233e156bb558299a8c9bd3931dad6999f9bf7f358b208549949411460d1 +size 1259114 diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png index 7c0716600906a1..108e6e0e4564e0 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:22f0e5bfd74e7426218d2bd007f9219433556530ddb10f33b9706398eb7cd370 -size 1263404 +oid sha256:5760726cd720e435c5d3a85315e772a741d583553996d8cfe7833f5d941e79f3 +size 1260778 diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png index 0a717e2c9aa38d..c767ab3d6193bd 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1863ccc9483f6cbd60768b311d104ee68692c3a7181e06da4bc751b52cf0ca1 -size 1262535 +oid sha256:3909739937c5c50e2b26b3cba0b8b30e98e13fee3eab6c4f382735ec82ae9250 +size 1261525 diff --git a/docs/notebooks/encodec-audio-compression-with-output.rst b/docs/notebooks/encodec-audio-compression-with-output.rst index 7f0e153ffa4a55..4cf2479f638656 100644 --- a/docs/notebooks/encodec-audio-compression-with-output.rst +++ b/docs/notebooks/encodec-audio-compression-with-output.rst @@ -67,7 +67,7 @@ Install required dependencies: .. code:: ipython3 - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "openvino>=2023.3.0" "torch>=2.1" "torchaudio>=2.1" "encodec>=0.1.1" "gradio>=4.19" "librosa>=0.8.1" "matplotlib<=3.7" tqdm + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "openvino>=2023.3.0" "torch>=2.1" "torchaudio>=2.1" "encodec>=0.1.1" "gradio>=4.19" "librosa>=0.8.1" "matplotlib>=3.4" tqdm .. parsed-literal:: @@ -142,7 +142,7 @@ bandwidth. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) @@ -302,7 +302,7 @@ similar as possible to the original. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) @@ -402,13 +402,13 @@ with ``ov.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:60: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:60: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! ideal_length = (math.ceil(n_frames) - 1) * stride + (kernel_size - padding_total) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert padding_left >= 0 and padding_right >= 0, (padding_left, padding_right) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! max_pad = max(padding_left, padding_right) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:89: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:89: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if length <= max_pad: @@ -428,11 +428,11 @@ with ``ov.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:358: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:358: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. quantized_out = torch.tensor(0.0, device=q_indices.device) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:359: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:359: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). for i, indices in enumerate(q_indices): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert (padding_left + padding_right) <= x.shape[-1] diff --git a/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_19_1.png b/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_19_1.png index 0aeedba5d00a83..9f01201bccd659 100644 --- a/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_19_1.png +++ b/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_19_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:160e17b680bd3d5e8ae8d05736f6c8794af22597097cc8481d0986915fe9d696 +oid sha256:a031358d39936f6ccdb1e4e8c9eb8ddda651384ecf7d95fbe6c2dc1f7e65be95 size 44175 diff --git a/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_38_1.png b/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_38_1.png index dfab67e44f9be0..d157f39a8fc143 100644 --- a/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_38_1.png +++ b/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_38_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aea9089d7a4630b53481b1277bbf8e7f52f1c992ed61d1e998250980f59df5ab +oid sha256:f2800c74996f567b92758358b136cc2acab70b48ea628ac392e59cecc1c416a3 size 44186 diff --git a/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_6_2.png b/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_6_2.png index a8af4e5b6153b9..93baa1aa5eeea6 100644 --- a/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_6_2.png +++ b/docs/notebooks/encodec-audio-compression-with-output_files/encodec-audio-compression-with-output_6_2.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2df9c2103837505ffcf5543e55a8d1589385ddb5e73b917d5efe9a6ebfd0368c +oid sha256:491264f7b803244b0230b7a7bebee6b81da547541ccf928fbae1c9c0af719451 size 44933 diff --git a/docs/notebooks/explainable-ai-1-basic-with-output.rst b/docs/notebooks/explainable-ai-1-basic-with-output.rst index 1df31312fd752f..4dd115d7983c64 100644 --- a/docs/notebooks/explainable-ai-1-basic-with-output.rst +++ b/docs/notebooks/explainable-ai-1-basic-with-output.rst @@ -66,8 +66,6 @@ Guide =2024.2.0" opencv-python tqdm @@ -76,10 +74,7 @@ Guide =3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "matplotlib>=3.4" Imports ------- diff --git a/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst b/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst index 4e2ad0970661d2..c0722b01a9c9b4 100644 --- a/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst +++ b/docs/notebooks/explainable-ai-2-deep-dive-with-output.rst @@ -116,10 +116,7 @@ Install requirements %pip install -q -U "numpy==1.*" %pip install -q scipy - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "matplotlib>=3.4" Imports ~~~~~~~ diff --git a/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst b/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst index 537ae36f6a331c..b26064fcf12e27 100644 --- a/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst +++ b/docs/notebooks/explainable-ai-3-map-interpretation-with-output.rst @@ -115,10 +115,7 @@ Install requirements %pip install -q -U "numpy==1.*" %pip install -q scipy - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "matplotlib>=3.4" Imports ~~~~~~~ diff --git a/docs/notebooks/fast-segment-anything-with-output.rst b/docs/notebooks/fast-segment-anything-with-output.rst index e0f20e0f79974b..9becf2719559bc 100644 --- a/docs/notebooks/fast-segment-anything-with-output.rst +++ b/docs/notebooks/fast-segment-anything-with-output.rst @@ -158,7 +158,7 @@ model and generate a segmentation map. .. parsed-literal:: - 100%|██████████| 138M/138M [00:02<00:00, 67.7MB/s] + 100%|██████████| 138M/138M [00:02<00:00, 48.9MB/s] @@ -170,8 +170,8 @@ model and generate a segmentation map. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 768x1024 37 objects, 728.3ms - Speed: 3.1ms preprocess, 728.3ms inference, 768.2ms postprocess per image at shape (1, 3, 768, 1024) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 768x1024 37 objects, 642.9ms + Speed: 3.9ms preprocess, 642.9ms inference, 771.9ms postprocess per image at shape (1, 3, 768, 1024) The model returns segmentation maps for all the objects on the image. @@ -214,10 +214,10 @@ tracing. The FastSAM model itself is based on YOLOv8 model. PyTorch: starting from 'FastSAM-x.pt' with input shape (1, 3, 1024, 1024) BCHW and output shape(s) ((1, 37, 21504), (1, 32, 256, 256)) (138.3 MB) OpenVINO: starting export with openvino 2024.4.0-16579-c3152d32c9c-releases/2024/4... - OpenVINO: export success ✅ 6.2s, saved as 'FastSAM-x_openvino_model/' (276.1 MB) + OpenVINO: export success ✅ 6.1s, saved as 'FastSAM-x_openvino_model/' (276.1 MB) Export complete (9.1s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything Predict: yolo predict task=segment model=FastSAM-x_openvino_model imgsz=1024 Validate: yolo val task=segment model=FastSAM-x_openvino_model imgsz=1024 data=ultralytics/datasets/sa.yaml Visualize: https://netron.app @@ -321,8 +321,8 @@ pipeline. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 1024x1024 42 objects, 504.9ms - Speed: 5.8ms preprocess, 504.9ms inference, 31.6ms postprocess per image at shape (1, 3, 1024, 1024) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 1024x1024 42 objects, 494.2ms + Speed: 6.6ms preprocess, 494.2ms inference, 30.3ms postprocess per image at shape (1, 3, 1024, 1024) One can observe the converted model outputs in the next cell, they is @@ -615,8 +615,8 @@ calibration dataset to measure the performance. .. parsed-literal:: - Segmented in 69 seconds. - Resulting in 1.86 fps + Segmented in 72 seconds. + Resulting in 1.78 fps .. code:: ipython3 @@ -643,9 +643,9 @@ calibration dataset to measure the performance. .. parsed-literal:: - Segmented in 22 seconds - Resulting in 5.82 fps - That is 3.14 times faster! + Segmented in 23 seconds + Resulting in 5.57 fps + That is 3.13 times faster! Try out the converted pipeline diff --git a/docs/notebooks/film-slowmo-with-output.rst b/docs/notebooks/film-slowmo-with-output.rst index 0f5c9c7ba8c0d6..33d915ff72c326 100644 --- a/docs/notebooks/film-slowmo-with-output.rst +++ b/docs/notebooks/film-slowmo-with-output.rst @@ -79,7 +79,6 @@ Prerequisites .. code:: ipython3 - import platform import os %pip install -q "tensorflow-macos>=2.5; sys_platform == 'darwin' and platform_machine == 'arm64' and python_version > '3.8'" # macOS M1 and M2 @@ -92,10 +91,7 @@ Prerequisites %pip install -q tensorflow_hub tf_keras numpy "opencv-python" tqdm "gradio>=4.19" Pillow "openvino>=2023.2.0" - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" + %pip install -q "matplotlib>=3.4" .. code:: ipython3 diff --git a/docs/notebooks/florence2-with-output.rst b/docs/notebooks/florence2-with-output.rst index e929a95fb182c1..e4ab6fbcbd3a3b 100644 --- a/docs/notebooks/florence2-with-output.rst +++ b/docs/notebooks/florence2-with-output.rst @@ -100,10 +100,10 @@ available model. By default, we will use .. parsed-literal:: - 2024-11-05 01:28:54.034484: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-05 01:28:54.069316: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-22 01:05:34.426758: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-22 01:05:34.462006: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-05 01:28:54.728430: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-22 01:05:35.115966: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -193,96 +193,96 @@ pipeline. .. parsed-literal:: - SUPPORT.md: 0%| | 0.00/1.24k [00:00 1 or self.sliding_window is not None: /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/chkpt/modeling_florence2.py:1205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False @@ -382,7 +382,7 @@ Run model inference -``OvFlorence@Model`` class defined in ``ov_florence2_helper.py`` +``OvFlorence2Model`` class defined in ``ov_florence2_helper.py`` provides convenient way for running model. It accepts directory with converted model and inference device as arguments. For running model we will use ``generate`` method. diff --git a/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png b/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png index 37d11a47fd30c9..c233468fe95f4e 100644 --- a/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png +++ b/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d85b3df68708172ed849a9e182bdec6a94f0174643833bd8cc7184ac0d090fae -size 259636 +oid sha256:d15ed97d6e50919caff2aee785bc4c90f91dcfcc9bb248f70e9d79bb203be64f +size 259663 diff --git a/docs/notebooks/freevc-voice-conversion-with-output.rst b/docs/notebooks/freevc-voice-conversion-with-output.rst index fe2ac780f5cca6..eb1dffbcf5da08 100644 --- a/docs/notebooks/freevc-voice-conversion-with-output.rst +++ b/docs/notebooks/freevc-voice-conversion-with-output.rst @@ -82,44 +82,43 @@ Install extra requirements Note: you may need to restart the kernel to use updated packages. -Check if FreeVC is installed and append its path to ``sys.path`` - .. code:: ipython3 - from pathlib import Path - import sys + # Fetch `notebook_utils` module + import requests - free_vc_repo = "FreeVC" - if not Path(free_vc_repo).exists(): - !git clone https://github.com/OlaWod/FreeVC.git + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + - sys.path.append(free_vc_repo) + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", + ) + open("cmd_helper.py", "w").write(r.text) + + .. parsed-literal:: - Cloning into 'FreeVC'... - remote: Enumerating objects: 131, done. - remote: Counting objects: 100% (74/74), done. - remote: Compressing objects: 100% (47/47), done. - remote: Total 131 (delta 43), reused 27 (delta 27), pack-reused 57 (from 1) - Receiving objects: 100% (131/131), 15.28 MiB | 17.50 MiB/s, done. - Resolving deltas: 100% (43/43), done. + 1491 + .. code:: ipython3 - # Fetch `notebook_utils` module - import requests + from pathlib import Path + import gdown + from cmd_helper import clone_repo + from notebook_utils import download_file, device_widget - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) - from notebook_utils import download_file, device_widget + clone_repo("https://github.com/OlaWod/FreeVC.git") + wavlm_large_dir_path = Path("FreeVC/wavlm") wavlm_large_path = wavlm_large_dir_path / "WavLM-Large.pt" @@ -134,8 +133,8 @@ Check if FreeVC is installed and append its path to ``sys.path`` Downloading... From: https://drive.google.com/uc?id=12-cB34qCTvByWT-QtOcZaqwwO21FLSqU&confirm=t&uuid=a703c43c-ccce-436c-8799-c11b88e9e7e4 - To: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/WavLM-Large.pt - 100%|██████████| 1.26G/1.26G [00:32<00:00, 38.5MB/s] + To: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/WavLM-Large.pt + 100%|██████████| 1.26G/1.26G [00:26<00:00, 47.5MB/s] .. code:: ipython3 @@ -239,7 +238,7 @@ Models initialization .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.") @@ -360,13 +359,13 @@ Converting to OpenVINO’s IR format. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:495: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:495: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert embed_dim == self.embed_dim - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:496: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:496: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert list(query.size()) == [tgt_len, bsz, embed_dim] - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:500: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:500: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert key_bsz == bsz - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:502: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/modules.py:502: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert src_len, bsz == value.shape[:2] @@ -581,12 +580,12 @@ function to OpenVINO IR format. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1102: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1102: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: Tensor-likes are not close! - Mismatched elements: 25915 / 25920 (100.0%) - Greatest absolute difference: 1.3485908806324005 at index (0, 0, 24258) (up to 1e-05 allowed) - Greatest relative difference: 8204.075456053068 at index (0, 0, 5777) (up to 1e-05 allowed) + Mismatched elements: 25919 / 25920 (100.0%) + Greatest absolute difference: 0.4560253918170929 at index (0, 0, 20759) (up to 1e-05 allowed) + Greatest relative difference: 13178.603217158177 at index (0, 0, 10045) (up to 1e-05 allowed) _check_trace( @@ -707,7 +706,7 @@ Result audio: diff --git a/docs/notebooks/gpu-device-with-output.rst b/docs/notebooks/gpu-device-with-output.rst index 732cc297aa9531..5953608eae62e5 100644 --- a/docs/notebooks/gpu-device-with-output.rst +++ b/docs/notebooks/gpu-device-with-output.rst @@ -330,7 +330,7 @@ categories of object. For details, see the ov_model_path = base_model_dir / model_name / f"{model_name}.xml" if not (ov_model_path).exists(): - hf_hub.snapshot_download("katuni4ka/ssdlite_mobilenet_v2_fp16", local_dir=base_model_dir) + hf_hub.snapshot_download("katuni4ka/ssdlite_mobilenet_v2_fp16", local_dir=base_model_dir / model_name) model = core.read_model(ov_model_path) @@ -541,7 +541,7 @@ with a latency focus: .. code:: ipython3 - !benchmark_app -m {model_path} -d GPU -hint latency + !benchmark_app -m {ov_model_path} -d GPU -hint latency .. parsed-literal:: @@ -622,7 +622,7 @@ CPU vs GPU with Latency Hint .. code:: ipython3 - !benchmark_app -m {model_path} -d CPU -hint latency + !benchmark_app -m {ov_model_path} -d CPU -hint latency .. parsed-literal:: @@ -1071,7 +1071,7 @@ Compile the Model .. code:: ipython3 # Read model and compile it on GPU in THROUGHPUT mode - model = core.read_model(model=model_path) + model = core.read_model(model=ov_model_path) device_name = "GPU" compiled_model = core.compile_model(model=model, device_name=device_name, config={hints.performance_mode(): hints.PerformanceMode.THROUGHPUT}) diff --git a/docs/notebooks/grounded-segment-anything-with-output.rst b/docs/notebooks/grounded-segment-anything-with-output.rst index 232629422b14e0..a51ce8249239f9 100644 --- a/docs/notebooks/grounded-segment-anything-with-output.rst +++ b/docs/notebooks/grounded-segment-anything-with-output.rst @@ -64,7 +64,7 @@ Clone repositories and install requirements .. parsed-literal:: - WARNING: supervision 0.24.0 does not provide the extra 'desktop' + WARNING: supervision 0.25.0 does not provide the extra 'desktop' Note: you may need to restart the kernel to use updated packages. @@ -96,46 +96,51 @@ segmentation you can select vanilla ``SAM``. use_efficient_sam = sam_type_widget.value == "EfficientSAM" +.. code:: ipython3 + + import requests + + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", + ) + open("cmd_helper.py", "w").write(r.text) + + + + +.. parsed-literal:: + + 1491 + + + .. code:: ipython3 from pathlib import Path import sys import os + from cmd_helper import clone_repo + + repo_dir = Path("Grounded-Segment-Anything") ground_dino_dir = Path("GroundingDINO") efficient_sam_dir = Path("EfficientSAM") - # we use grounding dino from a fork which contains modifications that allow conversion to OpenVINO IR format - if not ground_dino_dir.exists(): - !git clone https://github.com/wenyi5608/GroundingDINO/ - if use_efficient_sam and not efficient_sam_dir.exists(): - !git clone https://github.com/yformer/EfficientSAM - if not use_efficient_sam and not repo_dir.exists(): - !git clone https://github.com/IDEA-Research/Grounded-Segment-Anything + # we use grounding dino from a fork which contains modifications that allow conversion to OpenVINO IR + clone_repo("https://github.com/wenyi5608/GroundingDINO.git") - # append to sys.path so that modules from the repo could be imported - sys.path.append(str(ground_dino_dir)) - sys.path.append(str("EfficientSAM" if use_efficient_sam else repo_dir / "segment_anything")) - - -.. parsed-literal:: - - Cloning into 'GroundingDINO'... - remote: Enumerating objects: 379, done. - remote: Counting objects: 100% (190/190), done. - remote: Compressing objects: 100% (79/79), done. - remote: Total 379 (delta 136), reused 111 (delta 111), pack-reused 189 (from 1) - Receiving objects: 100% (379/379), 14.03 MiB | 20.95 MiB/s, done. - Resolving deltas: 100% (194/194), done. - Cloning into 'EfficientSAM'... - remote: Enumerating objects: 424, done. - remote: Counting objects: 100% (85/85), done. - remote: Compressing objects: 100% (33/33), done. - remote: Total 424 (delta 76), reused 52 (delta 52), pack-reused 339 (from 1) - Receiving objects: 100% (424/424), 262.14 MiB | 24.44 MiB/s, done. - Resolving deltas: 100% (246/246), done. - + if use_efficient_sam: + clone_repo("https://github.com/yformer/EfficientSAM.git") + if not use_efficient_sam: + clone_repo("https://github.com/IDEA-Research/Grounded-Segment-Anything.git", add_to_sys_path=False) + sys.path.append(repo_dir / "segment_anything") .. code:: ipython3 @@ -179,15 +184,9 @@ Download checkpoints and load PyTorch models .. code:: ipython3 - import requests - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - - open("notebook_utils.py", "w").write(r.text) from notebook_utils import download_file, device_widget + download_file( "https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth", directory=CKPT_BASE_PATH, @@ -222,10 +221,10 @@ GroundingDINO imports .. parsed-literal:: - 2024-11-05 01:34:53.765709: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-05 01:34:53.988314: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-22 01:12:47.444588: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-22 01:12:47.676832: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-05 01:34:54.760718: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-11-22 01:12:48.469702: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers UserWarning: Failed to load custom C++ ops. Running on CPU mode Only! @@ -366,24 +365,10 @@ Convert GroundingDINO to OpenVINO IR format TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! .. parsed-literal:: - output layer_id 0 is nan - num_nan 230400, num_inf 0 - output layer_id 1 is nan - num_nan 230400, num_inf 0 - output layer_id 2 is nan - num_nan 230400, num_inf 0 - output layer_id 3 is nan - num_nan 230400, num_inf 0 - output layer_id 4 is nan - num_nan 230400, num_inf 0 - output layer_id 5 is nan - num_nan 230400, num_inf 0 WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. @@ -557,7 +542,7 @@ Draw box detections -.. image:: grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_29_0.png +.. image:: grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_30_0.png @@ -805,7 +790,7 @@ Combine both boxes and segmentation masks and draw them. -.. image:: grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_45_0.png +.. image:: grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_46_0.png diff --git a/docs/notebooks/grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_29_0.jpg b/docs/notebooks/grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_30_0.jpg similarity index 100% rename from docs/notebooks/grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_29_0.jpg rename to docs/notebooks/grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_30_0.jpg diff --git a/docs/notebooks/grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_29_0.png b/docs/notebooks/grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_30_0.png similarity index 100% rename from docs/notebooks/grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_29_0.png rename to docs/notebooks/grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_30_0.png diff --git a/docs/notebooks/grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_45_0.jpg b/docs/notebooks/grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_46_0.jpg similarity index 100% rename from docs/notebooks/grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_45_0.jpg rename to docs/notebooks/grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_46_0.jpg diff --git a/docs/notebooks/grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_45_0.png b/docs/notebooks/grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_46_0.png similarity index 100% rename from docs/notebooks/grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_45_0.png rename to docs/notebooks/grounded-segment-anything-with-output_files/grounded-segment-anything-with-output_46_0.png diff --git a/docs/notebooks/handwritten-ocr-with-output_files/handwritten-ocr-with-output_22_0.png b/docs/notebooks/handwritten-ocr-with-output_files/handwritten-ocr-with-output_22_0.png index c712a34d825650..7e0c09a703a97b 100644 --- a/docs/notebooks/handwritten-ocr-with-output_files/handwritten-ocr-with-output_22_0.png +++ b/docs/notebooks/handwritten-ocr-with-output_files/handwritten-ocr-with-output_22_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6b33a1c6c4f57f798ae3f4b31dcb638cb618363ef6108e7f60cf81f1c5bdb151 +oid sha256:9ce052db324821165a2b1bc5dea9d05588886c1794c0f217aaa47b8442c76aad size 53571 diff --git a/docs/notebooks/handwritten-ocr-with-output_files/handwritten-ocr-with-output_32_1.png b/docs/notebooks/handwritten-ocr-with-output_files/handwritten-ocr-with-output_32_1.png index c712a34d825650..7e0c09a703a97b 100644 --- a/docs/notebooks/handwritten-ocr-with-output_files/handwritten-ocr-with-output_32_1.png +++ b/docs/notebooks/handwritten-ocr-with-output_files/handwritten-ocr-with-output_32_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6b33a1c6c4f57f798ae3f4b31dcb638cb618363ef6108e7f60cf81f1c5bdb151 +oid sha256:9ce052db324821165a2b1bc5dea9d05588886c1794c0f217aaa47b8442c76aad size 53571 diff --git a/docs/notebooks/hello-detection-with-output_files/hello-detection-with-output_11_1.png b/docs/notebooks/hello-detection-with-output_files/hello-detection-with-output_11_1.png index 435c1891121eb0..b696d287ded448 100644 --- a/docs/notebooks/hello-detection-with-output_files/hello-detection-with-output_11_1.png +++ b/docs/notebooks/hello-detection-with-output_files/hello-detection-with-output_11_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:82b893e29e948379dac42c19763842f7ade2ccf03853c2c07f0b28bf2d58fe17 +oid sha256:c7a830fedc5653fd506c656144decc048cad5a7651c8e498024f0eb0ab8c8e96 size 305482 diff --git a/docs/notebooks/hello-detection-with-output_files/hello-detection-with-output_16_0.png b/docs/notebooks/hello-detection-with-output_files/hello-detection-with-output_16_0.png index e452c525ef05c2..5e6438a788597e 100644 --- a/docs/notebooks/hello-detection-with-output_files/hello-detection-with-output_16_0.png +++ b/docs/notebooks/hello-detection-with-output_files/hello-detection-with-output_16_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7caf8b68c4e2dfd446836e5049d842227ad718a4bbde287269617e324c7d0cef +oid sha256:edb00cb4f0e2c42cd9e0f90939afbd6352ca40c90866821898f2c42c1fd9df64 size 457214 diff --git a/docs/notebooks/hello-segmentation-with-output.rst b/docs/notebooks/hello-segmentation-with-output.rst index 6ddc0e3b0aa78b..2750c2d019a017 100644 --- a/docs/notebooks/hello-segmentation-with-output.rst +++ b/docs/notebooks/hello-segmentation-with-output.rst @@ -188,7 +188,7 @@ is provided. .. parsed-literal:: - + @@ -215,7 +215,7 @@ Do Inference .. parsed-literal:: - + diff --git a/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_11_2.png b/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_11_2.png index 12a0ec3dda0bf1..5023362b06be2d 100644 --- a/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_11_2.png +++ b/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_11_2.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fb21264c96554435f8c9331a342b9c3a20d8129dc0725f6ff226d789779645be +oid sha256:96f0eb3a9535d57b8784be4b717dc9f280e4bf107e5b61d7cf51b36e142e4c7a size 249032 diff --git a/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_13_1.png b/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_13_1.png index ec01c58bdf8be1..fe6d042ef77d30 100644 --- a/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_13_1.png +++ b/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_13_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:492235a08c36c9afbebabcb01c8325ac99dccff84174e7074ca321aba2ac7aac +oid sha256:caef59a6c15a5a1d512f4dd22395b12fbd754bba264ea5f0deae323ff8edee39 size 20550 diff --git a/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_17_0.png b/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_17_0.png index f8d59545b65f8c..310b0d3545d48c 100644 --- a/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_17_0.png +++ b/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_17_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:62376a2e159eca912bff4ce975d169f8ed71f9d9b75c4fd09937e7552120b14d +oid sha256:6a3137d9359a44fb19e1900e6b808f9e7e7ded0ba209abe8c4bd90fcf37b1c6a size 260045 diff --git a/docs/notebooks/hello-world-with-output_files/hello-world-with-output_11_1.png b/docs/notebooks/hello-world-with-output_files/hello-world-with-output_11_1.png index cca7858e3bc4af..a142093f6e675c 100644 --- a/docs/notebooks/hello-world-with-output_files/hello-world-with-output_11_1.png +++ b/docs/notebooks/hello-world-with-output_files/hello-world-with-output_11_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bbd7b81cc8e7a73ea9bcb8be0c0575134f50b6af8f7de23ee9feed645a4cf66c +oid sha256:5712bd24e962ae0e0267607554ebe1f2869c223b108876ce10e5d20fe6285126 size 387941 diff --git a/docs/notebooks/hugging-face-hub-with-output.rst b/docs/notebooks/hugging-face-hub-with-output.rst index a92f8cd18fba31..537c963ca405c4 100644 --- a/docs/notebooks/hugging-face-hub-with-output.rst +++ b/docs/notebooks/hugging-face-hub-with-output.rst @@ -132,10 +132,10 @@ tutorials `__ is the -first AI model capable of binding data from six modalities at once, -without the need for explicit supervision (the process of organizing and -labeling raw data). By recognizing the relationships between these -modalities — images and video, audio, text, depth, thermal, and inertial -measurement units (IMUs) — this breakthrough helps advance AI by -enabling machines to better analyze many different forms of information, -together. - -.. figure:: https://user-images.githubusercontent.com/8495451/236859695-ffa13364-3e39-4d99-a8da-fbfab17f9a6b.gif - :alt: ImageBind - - ImageBind - -In this tutorial, we consider how to convert and run ImageBind model -using OpenVINO. - -The tutorial consists of following steps: - -1. Download the pre-trained model. -2. Prepare input data examples. -3. Convert the model to OpenVINO Intermediate Representation format - (IR). -4. Run model inference and analyze results. - -About ImageBind ---------------- - -ImageBind, released in May 2023 by Meta Research, is an embedding model -that combines data from six modalities: images and video, text, audio, -thermal imaging, depth, and IMUs, which contain sensors including -accelerometers and orientation monitors. Using ImageBind, you can -provide data in one modality – for example, audio – and find related -documents in different modalities, such as video or images. - -ImageBind was trained with pairs of data. Each pair mapped image data – -including videos – to another modality, and the combined data was used -to train an embedding model. ImageBind found that features for different -modalities could be learned using the image data used in their training. -A notable conclusion from ImageBind is that pairing images with another -modality, then combining the results in the same embedding space is -sufficient to create a multi-modal embedding model. More details about -the model can be found in the model -`repository `__, -`paper `__, and Meta AI `blog -post `__. - -Like all embedding models, there are many potential use cases for -ImageBind, among them information retrieval, zero-shot classification, -and usage created by ImageBind representation as input for downstream -tasks (e.g. image generation). Some of the potential use-cases -represented on the image below: - -.. figure:: https://user-images.githubusercontent.com/29454499/256303836-c8e7b311-0b7b-407c-8610-fd8a803e4197.png - :alt: usecases - - usecases - -In this tutorial, we consider how to use ImageBind for multimodal -zero-shot classification. - - -**Table of contents:** - - -- `Prerequisites <#prerequisites>`__ -- `Instantiate PyTorch model <#instantiate-pytorch-model>`__ -- `Prepare input data <#prepare-input-data>`__ -- `Convert Model to OpenVINO Intermediate Representation (IR) - format <#convert-model-to-openvino-intermediate-representation-ir-format>`__ - - - `Select inference device <#select-inference-device>`__ - -- `Zero-shot classification using ImageBind and - OpenVINO <#zero-shot-classification-using-imagebind-and-openvino>`__ - - - `Text-Image classification <#text-image-classification>`__ - - `Text-Audio classification <#text-audio-classification>`__ - - `Image-Audio classification <#image-audio-classification>`__ - -- `Post-Training Quantization of ImageBind model with - NNCF <#post-training-quantization-of-imagebind-model-with-nncf>`__ - - - `Prepare datasets <#prepare-datasets>`__ - - `Apply quantization <#apply-quantization>`__ - - - `Quantize ImageBind model for vision - modality. <#quantize-imagebind-model-for-vision-modality->`__ - - `Quantize ImageBind model for text - modality <#quantize-imagebind-model-for-text-modality>`__ - - `Quantize ImageBind model for audio - modality <#quantize-imagebind-model-for-audio-modality>`__ - - - `Compare results for the OpenVINO FP16 model and the quantized - model <#compare-results-for-the-openvino-fp16-model-and-the-quantized-model>`__ - - - `Select inference device <#select-inference-device>`__ - - - `Compare File Size <#compare-file-size>`__ - - `Compare inference time of the FP16 IR and quantized - models <#compare-inference-time-of-the-fp16-ir-and-quantized-models>`__ - - - `Vision model <#vision-model>`__ - - `Text model <#text-model>`__ - - `Audio model <#audio-model>`__ - -Installation Instructions -~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a self-contained example that relies solely on its own code. - -We recommend running the notebook in a virtual environment. You only -need a Jupyter server to start. For details, please refer to -`Installation -Guide `__. - -Prerequisites -------------- - - - -.. code:: ipython3 - - import platform - - %pip install -q "torch>=2.0.1" "torchvision>=0.15.2,<0.17.0" "torchaudio>=2.0.2" "matplotlib>=3.4" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q datasets regex librosa soundfile pytorchvideo ftfy "timm>=0.6.7" einops fvcore "openvino>=2024.0.0" "nncf>=2.9.0" numpy scipy --extra-index-url https://download.pytorch.org/whl/cpu - - - if platform.system() != "Windows": - %pip install -q "matplotlib>=3.4" - else: - %pip install -q "matplotlib>=3.4,<3.7" - -.. code:: ipython3 - - from pathlib import Path - - repo_dir = Path("ImageBind") - - if not repo_dir.exists(): - !git clone https://github.com/facebookresearch/ImageBind.git - - %cd {repo_dir} - -Instantiate PyTorch model -------------------------- - - - -To start work with the model, we should instantiate the PyTorch model -class. ``imagebind_model.imagebind_huge(pretrained=True)`` downloads -model weights and creates a PyTorch model object for ImageBind. -Currently, there is only one ImageBind model available for downloading, -``imagebind_huge``, more details about it can be found in `model -card `__. - - Please note, depending on internet connection speed, the model - downloading process can take some time. It also requires at least 5 - GB of free space on disk for saving model checkpoint. - -.. code:: ipython3 - - import imagebind.data as data - import torch - from imagebind.models import imagebind_model - from imagebind.models.imagebind_model import ModalityType - - # Instantiate model - model = imagebind_model.imagebind_huge(pretrained=True) - model.eval(); - -Prepare input data ------------------- - - - -ImageBind works with data across 6 different modalities. Each of them -requires its steps for preprocessing. ``data`` module is responsible for -data reading and preprocessing for each modality. - -- ``data.load_and_transform_text`` accepts a list of text labels and - tokenizes them. -- ``data.load_and_transform_vision_data`` accepts paths to input - images, reads them, resizes to save aspect ratio with smaller side - size 224, performs center crop, and normalizes data into [0, 1] - floating point range. -- ``data.load_and_transofrm_audio_data`` reads audio files from - provided paths, splits it on samples, and computes - `mel `__ - spectrogram. - -.. code:: ipython3 - - # Prepare inputs - - text_list = ["A car", "A bird", "A dog"] - image_paths = [ - ".assets/dog_image.jpg", - ".assets/car_image.jpg", - ".assets/bird_image.jpg", - ] - audio_paths = [ - ".assets/dog_audio.wav", - ".assets/bird_audio.wav", - ".assets/car_audio.wav", - ] - - inputs = { - ModalityType.TEXT: data.load_and_transform_text(text_list, "cpu"), - ModalityType.VISION: data.load_and_transform_vision_data(image_paths, "cpu"), - ModalityType.AUDIO: data.load_and_transform_audio_data(audio_paths, "cpu"), - } - -Convert Model to OpenVINO Intermediate Representation (IR) format ------------------------------------------------------------------ - - - -OpenVINO supports PyTorch through Model Conversion API. You will use -`model conversion Python -API `__ -to convert model to IR format. The ``ov.convert_model`` function returns -OpenVINO Model class instance ready to load on a device or save on a -disk for next loading using ``ov.save_model``. - -ImageBind accepts data that represents different modalities -simultaneously in any combinations, however, their processing is -independent of each other. For avoiding losing flexibility passing data, -we will export each modality encoder as an independent model. The code -below defines wrappers for the model to get only single-modality -embeddings. - -.. code:: ipython3 - - class ModelExporter(torch.nn.Module): - def __init__(self, model, modality): - super().__init__() - self.model = model - self.modality = modality - - def forward(self, data): - return self.model({self.modality: data}) - -.. code:: ipython3 - - import openvino as ov - - core = ov.Core() - -Select inference device -~~~~~~~~~~~~~~~~~~~~~~~ - - - -select device from dropdown list for running inference using OpenVINO - -.. code:: ipython3 - - import requests - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) - - from notebook_utils import device_widget - - device = device_widget() - - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=3, options=('CPU', 'GPU.0', 'GPU.1', 'AUTO'), value='AUTO') - - - -.. code:: ipython3 - - import openvino as ov - - core = ov.Core() - - ov_modality_models = {} - - modalities = [ModalityType.TEXT, ModalityType.VISION, ModalityType.AUDIO] - for modality in modalities: - export_dir = Path(f"image-bind-{modality}") - file_name = f"image-bind-{modality}" - export_dir.mkdir(exist_ok=True) - ir_path = export_dir / f"{file_name}.xml" - if not ir_path.exists(): - exportable_model = ModelExporter(model, modality) - model_input = inputs[modality] - ov_model = ov.convert_model(exportable_model, example_input=model_input) - ov.save_model(ov_model, ir_path) - else: - ov_model = core.read_model(ir_path) - ov_modality_models[modality] = core.compile_model(ov_model, device.value) - -Zero-shot classification using ImageBind and OpenVINO ------------------------------------------------------ - - - -In zero-shot classification, a piece of data is embedded and fed to the -model to retrieve a label that corresponds with the contents of the -data. In the case of ImageBind, you can classify audio, images, and -information in the other supported modalities. We already discussed how -to perform zero-shot image classification using the CLIP model (please -check this -`notebook `__ -for details), capabilities of ImageBind for this task wider, because it -allows using any combinations of supported modalities for -classification. - -To perform zero-shot classification using ImageBind we should perform -the following steps: - -1. Preprocess data batch for requested modalities (one modality in our - case treated as a data source, other - as a label). -2. Calculate embeddings for each modality. -3. Find dot-product between embeddings vectors to get probabilities - matrix. -4. Obtain the label with the highest probability for mapping the source - into label space. - -We already preprocessed data in previous step, now, we should run model -inference for getting embeddings. - -.. code:: ipython3 - - embeddings = {} - for modality in modalities: - embeddings[modality] = ov_modality_models[modality](inputs[modality])[ov_modality_models[modality].output(0)] - -The probability matrix shows the correspondence between source -embeddings and label embeddings, it is a 2D matrix, where x-dimension -represents label-modality data and y-dimension - source-modality data. -It can be calculated as a dot-product between embeddings vectors and -normalized into the [0,1] range using softmax. Then a higher score on -the intersection between x and y labels, then higher confidence that -they represent the same object. - -.. code:: ipython3 - - import matplotlib.pyplot as plt - import numpy as np - from scipy.special import softmax - - - def visualize_prob_matrix(matrix, x_label, y_label): - fig, ax = plt.subplots() - ax.matshow(matrix, cmap="winter") - - for (i, j), z in np.ndenumerate(matrix): - ax.text(j, i, "{:0.3f}".format(z), ha="center", va="center") - ax.set_xticks(range(len(x_label)), x_label) - ax.set_yticks(range(len(y_label)), y_label) - - - image_list = [img.split("/")[-1] for img in image_paths] - audio_list = [audio.split("/")[-1] for audio in audio_paths] - -Text-Image classification -~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - text_vision_scores = softmax(embeddings[ModalityType.VISION] @ embeddings[ModalityType.TEXT].T, axis=-1) - - visualize_prob_matrix(text_vision_scores, text_list, image_list) - - - -.. image:: image-bind-with-output_files/image-bind-with-output_20_0.png - - -Text-Audio classification -~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - text_audio_scores = softmax(embeddings[ModalityType.AUDIO] @ embeddings[ModalityType.TEXT].T, axis=-1) - - visualize_prob_matrix(text_audio_scores, text_list, audio_list) - - - -.. image:: image-bind-with-output_files/image-bind-with-output_22_0.png - - -Image-Audio classification -~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - audio_vision_scores = softmax(embeddings[ModalityType.VISION] @ embeddings[ModalityType.AUDIO].T, axis=-1) - - visualize_prob_matrix(audio_vision_scores, image_list, audio_list) - - - -.. image:: image-bind-with-output_files/image-bind-with-output_24_0.png - - -Putting all together, we can match text, image, and sound for our data. - -.. code:: ipython3 - - import IPython.display as ipd - from PIL import Image - - text_image_ids = np.argmax(text_vision_scores, axis=0) - text_audio_ids = np.argmax(text_audio_scores, axis=0) - print( - f"Predicted label: {text_list[0]} \nprobability for image - {text_vision_scores[text_image_ids[0], 0]:.3f}\nprobability for audio - {text_audio_scores[0, text_audio_ids[0]]:.3f}" - ) - display(Image.open(image_paths[text_image_ids[0]])) - ipd.Audio(audio_paths[text_audio_ids[0]]) - - -.. parsed-literal:: - - Predicted label: A car - probability for image - 1.000 - probability for audio - 1.000 - - - -.. image:: image-bind-with-output_files/image-bind-with-output_26_1.png - - - - -.. raw:: html - - - - - - - -.. code:: ipython3 - - print( - f"Predicted label: {text_list[1]} \nprobability for image - {text_vision_scores[text_image_ids[1], 1]:.3f}\nprobability for audio - {text_audio_scores[1, text_audio_ids[1]]:.3f}" - ) - display(Image.open(image_paths[text_image_ids[1]])) - ipd.Audio(audio_paths[text_audio_ids[1]]) - - -.. parsed-literal:: - - Predicted label: A bird - probability for image - 0.986 - probability for audio - 1.000 - - - -.. image:: image-bind-with-output_files/image-bind-with-output_27_1.png - - - - -.. raw:: html - - - - - - - -.. code:: ipython3 - - print( - f"Predicted label: {text_list[2]} \nprobability for image - {text_vision_scores[text_image_ids[2], 2]:.3f}\nprobability for audio - {text_audio_scores[2, text_audio_ids[2]]:.3f}" - ) - display(Image.open(image_paths[text_image_ids[2]])) - ipd.Audio(audio_paths[text_audio_ids[2]]) - - -.. parsed-literal:: - - Predicted label: A dog - probability for image - 0.984 - probability for audio - 1.000 - - - -.. image:: image-bind-with-output_files/image-bind-with-output_28_1.png - - - - -.. raw:: html - - - - - - - -Post-Training Quantization of ImageBind model with NNCF -------------------------------------------------------- - - - -The goal of this part of tutorial is to demonstrate how to speed up the -model by applying 8-bit post-training quantization from -`NNCF `__ (Neural Network -Compression Framework) and infer quantized model via OpenVINO™ Toolkit. - -The optimization process contains the following steps: 1. Prepare -quantization dataset 2. Quantize OpenVINO model with NNCF. 3. Compare -probability matrices between converted and quantized models on input -data examples. 4. Compare model size of converted and quantized models. -5. Compare performance of converted and quantized models. - -.. code:: ipython3 - - modalities = [ModalityType.TEXT, ModalityType.VISION, ModalityType.AUDIO] - fp_model_paths = {modality: Path(f"image-bind-{modality}") / f"image-bind-{modality}.xml" for modality in modalities} - int8_model_paths = {modality: Path(f"image-bind-{modality}") / f"image-bind-{modality}_int8.xml" for modality in modalities} - -Prepare datasets -~~~~~~~~~~~~~~~~ - - - -The `Conceptual -Captions `__ dataset -consisting of ~3.3M images annotated with captions. Dataset is used to -quantize image and text models. - -.. code:: ipython3 - - import imagebind.data as data - import os - import requests - import tempfile - - from requests.packages.urllib3.exceptions import InsecureRequestWarning - - requests.packages.urllib3.disable_warnings(InsecureRequestWarning) - - - def check_text_data(data): - """ - Check if the given data is text-based. - """ - if isinstance(data, str): - return True - if isinstance(data, list): - return all(isinstance(x, str) for x in data) - return False - - - def collate_fn(examples, image_column="image_url", text_column="caption"): - """ - Collates examples into a batch for processing. - Preprocesses each example by loading and transforming image and text data. - Checks if the text data in the example is valid by calling the `check_text_data` function. - Downloads the image specified by the URL in the image_column of the example dictionary. - Constructs and returns a dictionary representing the collated batch with the following keys: - - "pixel_values": The pixel values of the preprocessed example. - - "input_ids": The transformed text data of the preprocessed example. - """ - assert len(examples) == 1 - example = examples[0] - if not check_text_data(example[text_column]): - raise ValueError("Text data is not valid") - - url = example[image_column] - with tempfile.TemporaryDirectory() as tempdir: - f_name = os.path.join(tempdir, "image.jpg") - try: - response = requests.get(url, verify=False, timeout=20) - with open(f_name, "wb") as file: - file.write(response.content) - pixel_values = data.load_and_transform_vision_data([f_name], "cpu") - except Exception: - print(f"Can't load image from url: {url}") - return None - - text = data.load_and_transform_text([example[text_column]], "cpu") - - return {"pixel_values": pixel_values, "input_ids": text} - -.. code:: ipython3 - - from datasets import load_dataset - import itertools - import torch - from tqdm.notebook import tqdm - - - def collect_vision_text_data(dataloader, init_steps): - """ - This function collects vision and text data from a dataloader for a specified number of initialization steps. - It iterates over the dataloader, fetching batches and storing the relevant vision and text data. - Returns a tuple containing the collected vision_data and text_data lists. - """ - text_data = [] - vision_data = [] - print(f"Fetching {init_steps} for the initialization...") - counter = 0 - for batch in tqdm(dataloader): - if counter == init_steps: - break - with torch.no_grad(): - if batch: - counter += 1 - text_data.append(batch["input_ids"].to("cpu")) - vision_data.append(batch["pixel_values"].to("cpu")) - return vision_data, text_data - - - def prepare_vision_text_dataset(opt_init_steps=50): - """ - Prepares a vision-text dataset for quantization by collecting vision and text data. - """ - dataset = load_dataset("google-research-datasets/conceptual_captions", streaming=False, trust_remote_code=True) - train_dataset = dataset["train"].shuffle(seed=0) - dataloader = torch.utils.data.DataLoader(train_dataset, collate_fn=collate_fn, batch_size=1) - vision_data, text_data = collect_vision_text_data(dataloader, opt_init_steps) - return vision_data, text_data - -The `ESC-50 `__ dataset is -used to quantize the audio modality of the ImageBind model. Dataset is a -labeled collection of 2000 environmental audio recordings suitable for -benchmarking methods of environmental sound classification. The dataset -consists of 5-second-long recordings organized into 50 semantic classes. - -.. code:: ipython3 - - import numpy as np - import torchaudio - - - def collect_audio_data(dataloader, init_steps=300): - """ - This function collects audio data from a dataloader for a specified number of initialization steps. - It iterates over the dataloader, fetching batches and storing them in a list. - """ - audio_data = [] - for _, batch in tqdm(zip(range(init_steps), itertools.islice(dataloader, 0, init_steps))): - with torch.no_grad(): - audio_data.append(batch) - return audio_data - - - def prepare_audio_dataset(): - """ - Prepares an "ashraq/esc50" audio dataset for quantization by collecting audio data. - Collects audio data from the dataloader by calling the `collect_audio_data` function. - Returns a list containing the collected calibration audio data batches. - """ - audio_dataset = load_dataset("ashraq/esc50", streaming=True, trust_remote_code=True) - train_dataset = audio_dataset["train"].shuffle(seed=42, buffer_size=1000) - - def collate_fn(examples): - assert len(examples) == 1 - with tempfile.TemporaryDirectory() as tempdir: - f_name = os.path.join(tempdir, "audio.wav") - audio_data = examples[0]["audio"]["array"] - sample_rate = examples[0]["audio"]["sampling_rate"] - audio_data = torch.from_numpy(audio_data).to(torch.float32).unsqueeze(0) - torchaudio.save(f_name, audio_data, sample_rate) - return data.load_and_transform_audio_data([f_name], "cpu") - - dataloader = torch.utils.data.DataLoader(train_dataset, collate_fn=collate_fn, batch_size=1) - calibration_data = collect_audio_data(dataloader) - return calibration_data - -.. code:: ipython3 - - vision_data, text_data = [], [] - - if not int8_model_paths[ModalityType.TEXT].exists() or not int8_model_paths[ModalityType.VISION].exists(): - vision_data, text_data = prepare_vision_text_dataset() - -Apply quantization -~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - import logging - import nncf - import openvino as ov - - nncf.set_log_level(logging.ERROR) - - core = ov.Core() - - - def quantize_openvino_model(modality, calibration_data): - model_path = fp_model_paths[modality] - model = core.read_model(model_path) - quantized_model = nncf.quantize( - model=model, - calibration_dataset=calibration_data, - model_type=nncf.ModelType.TRANSFORMER, - ) - ov.save_model(quantized_model, int8_model_paths[modality]) - return quantized_model - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - - -Quantize ImageBind model for vision modality. -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - - - **NOTE**: Quantization is time and memory consuming operation. - Running quantization code below may take a long time. - -.. code:: ipython3 - - if not int8_model_paths[ModalityType.VISION].exists(): - if len(vision_data) == 0: - raise RuntimeError("Calibration dataset is empty. Please check internet connection and try to download images manually from the URLs above.") - - vision_dataset = nncf.Dataset(vision_data) - vision_quantized_model = quantize_openvino_model(modality=ModalityType.VISION, calibration_data=vision_dataset) - -Quantize ImageBind model for text modality -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - - -.. code:: ipython3 - - if not int8_model_paths[ModalityType.TEXT].exists(): - text_dataset = nncf.Dataset(text_data) - text_quantized_model = quantize_openvino_model(modality=ModalityType.TEXT, calibration_data=text_dataset) - -Quantize ImageBind model for audio modality -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - - -.. code:: ipython3 - - if not int8_model_paths[ModalityType.AUDIO].exists(): - audio_calibration_data = prepare_audio_dataset() - audio_dataset = nncf.Dataset(audio_calibration_data) - audio_quantized_model = quantize_openvino_model(modality=ModalityType.AUDIO, calibration_data=audio_dataset) - -NNCF also supports quantization-aware training, and other algorithms -than quantization. See the `NNCF -documentation `__ -in the NNCF repository for more information. - -Compare results for the OpenVINO FP16 model and the quantized model -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -Compare the probability matrices for ``FP16`` and ``INT8`` models. - -.. code:: ipython3 - - # Prepare inputs - - text_list = ["A car", "A bird", "A dog"] - image_paths = [ - ".assets/dog_image.jpg", - ".assets/car_image.jpg", - ".assets/bird_image.jpg", - ] - audio_paths = [ - ".assets/dog_audio.wav", - ".assets/bird_audio.wav", - ".assets/car_audio.wav", - ] - - inputs = { - ModalityType.TEXT: data.load_and_transform_text(text_list, "cpu"), - ModalityType.VISION: data.load_and_transform_vision_data(image_paths, "cpu"), - ModalityType.AUDIO: data.load_and_transform_audio_data(audio_paths, "cpu"), - } - -Select inference device -^^^^^^^^^^^^^^^^^^^^^^^ - - - -select device from dropdown list for running inference using OpenVINO - -.. code:: ipython3 - - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=3, options=('CPU', 'GPU.0', 'GPU.1', 'AUTO'), value='AUTO') - - - -.. code:: ipython3 - - embeddings = {} - for modality in modalities: - ov_model = core.compile_model(fp_model_paths[modality], device.value) - embeddings[modality] = ov_model(inputs[modality])[0] - - quantized_embeddings = {} - for modality in modalities: - model = core.compile_model(int8_model_paths[modality], device.value) - quantized_embeddings[modality] = model(inputs[modality])[0] - -.. code:: ipython3 - - def visualize_prob_matrices(fp_matrix, int_matrix, x_label, y_label): - fig, ax = plt.subplots(1, 2) - for i, matrix in enumerate([fp_matrix, int_matrix]): - ax[i].matshow(matrix, cmap="winter") - - for (k, j), z in np.ndenumerate(matrix): - ax[i].title.set_text("FP16 probs" if i == 0 else "INT8 probs") - ax[i].text(j, k, "{:0.3f}".format(z), ha="center", va="center") - ax[i].set_xticks(range(len(x_label)), x_label) - ax[i].set_yticks(range(len(y_label)), y_label) - fig.tight_layout() - - - image_list = [img.split("/")[-1] for img in image_paths] - audio_list = [audio.split("/")[-1] for audio in audio_paths] - -.. code:: ipython3 - - fp_text_vision_scores = softmax(embeddings[ModalityType.VISION] @ embeddings[ModalityType.TEXT].T, axis=-1) - int_text_vision_scores = softmax( - quantized_embeddings[ModalityType.VISION] @ quantized_embeddings[ModalityType.TEXT].T, - axis=-1, - ) - - visualize_prob_matrices(fp_text_vision_scores, int_text_vision_scores, text_list, image_list) - - - -.. image:: image-bind-with-output_files/image-bind-with-output_52_0.png - - -.. code:: ipython3 - - fp_text_audio_scores = softmax(embeddings[ModalityType.AUDIO] @ embeddings[ModalityType.TEXT].T, axis=-1) - int_text_audio_scores = softmax( - quantized_embeddings[ModalityType.AUDIO] @ quantized_embeddings[ModalityType.TEXT].T, - axis=-1, - ) - - visualize_prob_matrices(fp_text_audio_scores, int_text_audio_scores, text_list, image_list) - - - -.. image:: image-bind-with-output_files/image-bind-with-output_53_0.png - - -.. code:: ipython3 - - fp_audio_vision_scores = softmax(embeddings[ModalityType.VISION] @ embeddings[ModalityType.AUDIO].T, axis=-1) - int_audio_vision_scores = softmax( - quantized_embeddings[ModalityType.VISION] @ quantized_embeddings[ModalityType.AUDIO].T, - axis=-1, - ) - - visualize_prob_matrices(fp_audio_vision_scores, int_audio_vision_scores, text_list, image_list) - - - -.. image:: image-bind-with-output_files/image-bind-with-output_54_0.png - - -Compare File Size -~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - def calculate_compression_rate(modality): - fp16_ir_model_size = Path(fp_model_paths[modality]).with_suffix(".bin").stat().st_size / 1024 - quantized_model_size = Path(int8_model_paths[modality]).with_suffix(".bin").stat().st_size / 1024 - print(f"Modality: {modality}") - print(f" * FP16 IR model size: {fp16_ir_model_size:.2f} KB") - print(f" * INT8 model size: {quantized_model_size:.2f} KB") - print(f" * Model compression rate: {fp16_ir_model_size / quantized_model_size:.3f}") - - - for modality in modalities: - calculate_compression_rate(modality) - - -.. parsed-literal:: - - Modality: text - * FP16 IR model size: 691481.69 KB - * INT8 model size: 347006.66 KB - * Model compression rate: 1.993 - Modality: vision - * FP16 IR model size: 1235995.15 KB - * INT8 model size: 620132.79 KB - * Model compression rate: 1.993 - Modality: audio - * FP16 IR model size: 168429.15 KB - * INT8 model size: 84818.40 KB - * Model compression rate: 1.986 - - -Compare inference time of the FP16 IR and quantized models -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -To measure the inference performance of the ``FP16`` and ``INT8`` -models, we use median inference time on calibration dataset. So we can -approximately estimate the speed up of the dynamic quantized models. - - **NOTE**: For the most accurate performance estimation, it is - recommended to run ``benchmark_app`` in a terminal/command prompt - after closing other applications with static shapes. - -.. code:: ipython3 - - import time - - - def calculate_inference_time(model_path, calibration_data): - model = core.compile_model(model_path) - output_layer = model.output(0) - inference_time = [] - for batch in calibration_data: - start = time.perf_counter() - _ = model(batch)[output_layer] - end = time.perf_counter() - delta = end - start - inference_time.append(delta) - return np.median(inference_time) - -Vision model -^^^^^^^^^^^^ - - - -.. code:: ipython3 - - fp16_latency = calculate_inference_time(fp_model_paths[ModalityType.VISION], vision_data) - int8_latency = calculate_inference_time(int8_model_paths[ModalityType.VISION], vision_data) - print(f"Performance speed up: {fp16_latency / int8_latency:.3f}") - - -.. parsed-literal:: - - Performance speed up: 2.375 - - -Text model -^^^^^^^^^^ - - - -.. code:: ipython3 - - fp16_latency = calculate_inference_time(fp_model_paths[ModalityType.TEXT], text_data) - int8_latency = calculate_inference_time(int8_model_paths[ModalityType.TEXT], text_data) - print(f"Performance speed up: {fp16_latency / int8_latency:.3f}") - - -.. parsed-literal:: - - Performance speed up: 1.492 - - -Audio model -^^^^^^^^^^^ - - - -.. code:: ipython3 - - fp16_latency = calculate_inference_time(fp_model_paths[ModalityType.AUDIO], audio_calibration_data) - int8_latency = calculate_inference_time(int8_model_paths[ModalityType.AUDIO], audio_calibration_data) - print(f"Performance speed up: {fp16_latency / int8_latency:.3f}") - - -.. parsed-literal:: - - Performance speed up: 5.770 - diff --git a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_20_0.png b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_20_0.png deleted file mode 100644 index b61da5d71d0e90..00000000000000 --- a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_20_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:407f4039d44322edd717fb1eba4c0e029205b2c691614606f1a5b33ed31aa047 -size 15474 diff --git a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_22_0.png b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_22_0.png deleted file mode 100644 index bf96c415a07c15..00000000000000 --- a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_22_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:098a56bdaf58b412fe6935d327bcd810942f01789ecd5c2efe834888eba3b819 -size 13795 diff --git a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_24_0.png b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_24_0.png deleted file mode 100644 index 54a9a68752100b..00000000000000 --- a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_24_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fe454683f2419970a93baaea6a5beb973dd832627217464d87c14bf2a61e8032 -size 18633 diff --git a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_26_1.png b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_26_1.png deleted file mode 100644 index 6be4611dbc7a18..00000000000000 --- a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_26_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d09352f8474421fa78d601cc5afbe88df3d0403c157f91605d424b66a2f1809a -size 303014 diff --git a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_27_1.png b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_27_1.png deleted file mode 100644 index 174dcfdcbe8079..00000000000000 --- a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_27_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:609e506939d69a89fb59d36622d72005d5b162afccf70c1e2463cd51d544d4dd -size 777583 diff --git a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_28_1.png b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_28_1.png deleted file mode 100644 index a4b0b02a4d7c0b..00000000000000 --- a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_28_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7509d532217e990ed721424c57aecbadfb634d397bd1c069852f873fee8741a9 -size 572170 diff --git a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_52_0.png b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_52_0.png deleted file mode 100644 index 9274858833d2aa..00000000000000 --- a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_52_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:41c01dd2ebbddd60573c560ddcb00f7671b63bf1e49ca68497be1d39fd5cb86c -size 19998 diff --git a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_53_0.png b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_53_0.png deleted file mode 100644 index 76f09aa4eb803b..00000000000000 --- a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_53_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:acb5ca8757899c94fa8fd68a647975ea031ffa3f4955214b9a39d097b179ad27 -size 17315 diff --git a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_54_0.png b/docs/notebooks/image-bind-with-output_files/image-bind-with-output_54_0.png deleted file mode 100644 index f2f53fccfbdd8c..00000000000000 --- a/docs/notebooks/image-bind-with-output_files/image-bind-with-output_54_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e30a62c61037f25fa771225ab71ab9ecb407a0589103a79de2c5e0374583adf1 -size 22314 diff --git a/docs/notebooks/image-classification-quantization-with-output.rst b/docs/notebooks/image-classification-quantization-with-output.rst index 7bf7172f720588..491ca0eed2881a 100644 --- a/docs/notebooks/image-classification-quantization-with-output.rst +++ b/docs/notebooks/image-classification-quantization-with-output.rst @@ -70,7 +70,6 @@ Guide 4.36,<4.45" "torch>=2.1" "torchvision" "einops" "timm" "Pillow" "gradio>=4.36" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino>=2024.3.0" "nncf>=2.12.0" - - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - + import platform + + %pip install -q "transformers>4.36" "torch>=2.1" "torchvision" "einops" "timm" "Pillow" "gradio>=4.36" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "nncf>=2.14.0" "datasets" + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q -U "openvino>=2024.5" "openvino-tokenizers>=2024.5" "openvino-genai>=2024.5" + + if platform.system() == "Darwin": + %pip install -q "numpy<2.0.0" .. code:: ipython3 from pathlib import Path import requests - if not Path("conversation.py").exists(): - r = requests.get("https://huggingface.co/OpenGVLab/InternVL2-1B/raw/main/conversation.py") - open("conversation.py", "w", encoding="utf-8").write(r.text) - - if not Path("internvl2_helper.py").exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/internvl2/internvl2_helper.py") - open("internvl2_helper.py", "w", encoding="utf-8").write(r.text) - if not Path("gradio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/internvl2/gradio_helper.py") open("gradio_helper.py", "w", encoding="utf-8").write(r.text) @@ -87,6 +79,10 @@ Prerequisites if not Path("notebook_utils.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py") open("notebook_utils.py", "w", encoding="utf-8").write(r.text) + + if not Path("cmd_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py") + open("cmd_helper.py", "w", encoding="utf-8").write(r.text) Select model ------------ @@ -100,18 +96,25 @@ using widget bellow: .. code:: ipython3 - from internvl2_helper import model_selector + model_ids = ["OpenGVLab/InternVL2-1B", "OpenGVLab/InternVL2-2B", "OpenGVLab/InternVL2-4B", "OpenGVLab/InternVL2-8B"] + + + def model_selector(default=model_ids[0]): + import ipywidgets as widgets + + model_checkpoint = widgets.Dropdown( + options=model_ids, + default=default, + description="Model:", + ) + return model_checkpoint + model_id = model_selector() model_id -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - - .. parsed-literal:: @@ -130,94 +133,84 @@ using widget bellow: .. parsed-literal:: Selected OpenGVLab/InternVL2-1B - + Convert and Optimize model -------------------------- -InternVL2 is PyTorch model. OpenVINO supports PyTorch models via -conversion to OpenVINO Intermediate Representation (IR). `OpenVINO model -conversion -API `__ -should be used for these purposes. ``ov.convert_model`` function accepts -original PyTorch model instance and example input for tracing and -returns ``ov.Model`` representing this model in OpenVINO framework. -Converted model can be used for saving on disk using ``ov.save_model`` -function or directly loading on device using ``core.complie_model``. -``internvl2_helper.py`` script contains helper function for model -conversion, please check its content if you interested in conversion -details. +Our model conversion and optimization consist of following steps: 1. +Download original PyTorch model. 2. Convert model to OpenVINO format. 3. +Compress model weights using NNCF. -.. raw:: html +Let’s consider each step more deeply. -
+Convert model to OpenVINO IR format using Optimum CLI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Click here for more detailed explanation of conversion steps InternVL2 -is autoregressive transformer generative model, it means that each next -model step depends from model output from previous step. The generation -approach is based on the assumption that the probability distribution of -a word sequence can be decomposed into the product of conditional next -word distributions. In other words, model predicts the next token in the -loop guided by previously generated tokens until the stop-condition will -be not reached (generated sequence of maximum length or end of string -token obtained). The way the next token will be selected over predicted -probabilities is driven by the selected decoding methodology. You can -find more information about the most popular decoding methods in this -blog. The entry point for the generation process for models from the -Hugging Face Transformers library is the ``generate`` method. You can -find more information about its parameters and configuration in the -documentation. To preserve flexibility in the selection decoding -methodology, we will convert only model inference for one step. - -The inference flow has difference on first step and for the next. On the -first step, model accept preprocessed input instruction and image, that -transformed to the unified embedding space using ``input_embedding`` and -``image_encoder`` models, after that ``language model``, LLM-based part -of model, runs on input embeddings to predict probability of next -generated tokens. On the next step, ``language_model`` accepts only next -token id selected based on sampling strategy and processed by -``input_embedding`` model and cached attention key and values. Since the -output side is auto-regressive, an output token hidden state remains the -same once computed for every further generation step. Therefore, -recomputing it every time you want to generate a new token seems -wasteful. With the cache, the model saves the hidden state once it has -been computed. The model only computes the one for the most recently -generated output token at each time step, re-using the saved ones for -hidden tokens. This reduces the generation complexity from -:math:`O(n^3)` to :math:`O(n^2)` for a transformer model. More details -about how it works can be found in this -`article `__. -To sum up above, model consists of 4 parts: - -- **Image encoder** for encoding input images into embedding space. -- **Input Embedding** for conversion input text tokens into embedding - space -- **Language Model** for generation answer based on input embeddings - provided by Image Encoder and Input Embedding models. -.. raw:: html -
+OpenVINO supports PyTorch models via conversion to OpenVINO Intermediate +Representation format. For convenience, we will use OpenVINO integration +with HuggingFace Optimum. `Optimum +Intel `__ is the +interface between the Transformers and Diffusers libraries and the +different tools and libraries provided by Intel to accelerate end-to-end +pipelines on Intel architectures. + +Among other use cases, Optimum Intel provides a simple interface to +optimize your Transformers and Diffusers models, convert them to the +OpenVINO Intermediate Representation (IR) format and run inference using +OpenVINO Runtime. ``optimum-cli`` provides command line interface for +model conversion and optimization. + +General command format: + +.. code:: bash + + optimum-cli export openvino --model --task + +where task is task to export the model for, if not specified, the task +will be auto-inferred based on the model. You can find a mapping between +tasks and model classes in Optimum TaskManager +`documentation `__. +Additionally, you can specify weights compression using +``--weight-format`` argument with one of following options: ``fp32``, +``fp16``, ``int8`` and ``int4``. Fro int8 and int4 +`nncf `__ will be used for +weight compression. More details about model export provided in `Optimum +Intel +documentation `__. Compress model weights to 4-bit ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For reducing memory consumption, weights compression optimization can be applied using -`NNCF `__. +`NNCF `__ via ``optimum-cli`` +command. In this tutorial we will demonstrates how to apply accurate +int4 weight quantization using AWQ method. .. raw:: html
-Click here for more details about weight compression Weight compression -aims to reduce the memory footprint of a model. It can also lead to -significant performance improvement for large memory-bound models, such -as Large Language Models (LLMs). LLMs and other models, which require -extensive memory to store the weights during inference, can benefit from -weight compression in the following ways: +.. raw:: html + + + +Click here for more details about weight compression + +.. raw:: html + + + +Weight compression aims to reduce the memory footprint of a model. It +can also lead to significant performance improvement for large +memory-bound models, such as Large Language Models (LLMs). LLMs and +other models, which require extensive memory to store the weights during +inference, can benefit from weight compression in the following ways: - enabling the inference of exceptionally large models that cannot be accommodated in the memory of the device; @@ -238,11 +231,13 @@ with the performance of the full model quantization. In addition, weight compression is data-free and does not require a calibration dataset, making it easy to use. -``nncf.compress_weights`` function can be used for performing weights -compression. The function accepts an OpenVINO model and other -compression parameters. Compared to INT8 compression, INT4 compression -improves performance even more, but introduces a minor drop in -prediction quality. +Usually 4-bit compression allows to get maximal speedup and minimal +memory footprint comparing with 8-bit compression, but in the same time +it may significantly drop model accuracy. `Activation-aware Weight +Quantization `__ (AWQ) is an algorithm +that tunes model weights for more accurate INT4 compression. It slightly +improves generation quality of compressed models, but requires +additional time for tuning weights on a calibration dataset. More details about weights compression, can be found in `OpenVINO documentation `__. @@ -253,151 +248,99 @@ documentation self.max_seq_len_cached: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:324: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/qwen2/modeling_qwen2.py:339: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): - - -.. parsed-literal:: - - ✅ Language model successfully converted - ⌛ Weights compression with int4_asym mode started - INFO:nncf:Statistics of the bitwidth distribution: + Applying AWQ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 24/24 • 0:01:54 • 0:00:0054 • 0:00:06;2;97;53;69m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ • 0:00:00 + Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:17 • 0:00:00;0;104;181m0:00:01181m0:00:01 + [?25hINFO:nncf:Statistics of the bitwidth distribution: ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 28% (1 / 169) │ 0% (0 / 168) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 72% (168 / 169) │ 100% (168 / 168) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - - - -.. parsed-literal:: - - Output() - - - - - - - - + │ 8 │ 100% (99 / 99) │ 100% (99 / 99) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━���━━━━━━━━━━━━━━━┙ + Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:01 • 0:00:00• 0:00:01:01 + [?25hINFO:nncf:Statistics of the bitwidth distribution: + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 100% (1 / 1) │ 100% (1 / 1) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━���━━━━━━━━━━━━━━━┙ + Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:00 • 0:00:00 + [?25h .. parsed-literal:: - ✅ Weights compression finished - ✅ OpenGVLab/InternVL2-1B model conversion finished. You can find results in InternVL2-1B - + Attempt to save config using standard API has failed with 'architectures'. There may be an issue with model config, please check its correctness before usage. + Select inference device ----------------------- @@ -426,49 +369,76 @@ Prepare model inference pipeline -As discussed, the model comprises Image Encoder and LLM (with separated -text embedding part) that generates answer. In ``internvl2_helper.py`` -we defined LLM inference class ``OvModelForCausalLMWithEmb`` that will -represent generation cycle, It is based on `HuggingFace Transformers -GenerationMixin `__ -and looks similar to `Optimum -Intel `__ -``OVModelForCausalLM`` that is used for LLM inference with only -difference that it can accept input embedding. In own turn, general -multimodal model class ``OVInternVLChatModel`` handles chatbot -functionality including image processing and answer generation using -LLM. +`OpenVINO™ GenAI `__ +is a library of the most popular Generative AI model pipelines, +optimized execution methods, and samples that run on top of highly +performant `OpenVINO +Runtime `__. -.. code:: ipython3 +This library is friendly to PC and laptop execution, and optimized for +resource consumption. It requires no external dependencies to run +generative models as it already includes all the core functionality +(e.g. tokenization via openvino-tokenizers). OpenVINO™ GenAI is a flavor +of OpenVINO™, aiming to simplify running inference of generative AI +models. It hides the complexity of the generation process and minimizes +the amount of code required. - from internvl2_helper import OVInternVLChatModel - from transformers import AutoTokenizer - - # Uncomment below lines to see the model inference class code - - # OVInternVLChatModel?? +Inference Visual language models can be implemented using OpenVINO GenAI +``VLMPipeline`` class. Similarly to LLMPipeline, that we discussed in +this +`notebook `__. +It supports chat mode with preserving conversational history inside +pipeline, that allows us effectively implements chatbot that supports +conversation about provided images content. For pipeline initialization +we should provide path to model directory and inference device. .. code:: ipython3 - tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True) - ov_model = OVInternVLChatModel(model_dir, device.value) + import openvino_genai as ov_genai + + ov_model = ov_genai.VLMPipeline(model_dir, device=device.value) Run model inference ------------------- -Our interface is fully compatible with Transformers interface for -InternVL2, you can try any of represented here `usage -examples `__. -Let’s check model capabilities in answering questions about image: +For preparing input data, ``VLMPipeline`` use tokenizer and image +processor inside, we just need to convert image to input OpenVINO tensor +and provide question as string. Additionally, we can provides options +for controlling generation process (e.g. number of maximum generated +tokens or using multinomial sampling for decoding instead of greedy +search approach) using ``GenerationConfig``. + +Generation process for long response may be time consuming, for +accessing partial result as soon as it is generated without waiting when +whole process finished, Streaming API can be used. Token streaming is +the mode in which the generative system returns the tokens one by one as +the model generates them. This enables showing progressive generations +to the user rather than waiting for the whole generation. Streaming is +an essential aspect of the end-user experience as it reduces latency, +one of the most critical aspects of a smooth experience. .. code:: ipython3 - import PIL - from internvl2_helper import load_image - from transformers import TextIteratorStreamer - from threading import Thread + import requests + from PIL import Image + from io import BytesIO + import numpy as np + import openvino as ov + + config = ov_genai.GenerationConfig() + config.max_new_tokens = 100 + + + def load_image(image_file): + if isinstance(image_file, str) and (image_file.startswith("http") or image_file.startswith("https")): + response = requests.get(image_file) + image = Image.open(BytesIO(response.content)).convert("RGB") + else: + image = Image.open(image_file).convert("RGB") + image_data = np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.byte) + return image, ov.Tensor(image_data) EXAMPLE_IMAGE = Path("examples_image1.jpg") @@ -479,59 +449,41 @@ Let’s check model capabilities in answering questions about image: with EXAMPLE_IMAGE.open("wb") as handler: handler.write(img_data) - pixel_values = load_image(EXAMPLE_IMAGE, max_num=12) - streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) + def streamer(subword: str) -> bool: + """ - generation_config = dict(max_new_tokens=100, do_sample=True, streamer=streamer) - question = "\nPlease describe the image shortly." + Args: + subword: sub-word of the generated text. + + Returns: Return flag corresponds whether generation should be stopped. + + """ + print(subword, end="", flush=True) - display(PIL.Image.open(EXAMPLE_IMAGE)) - print(f"User: {question}\n") - print("Assistant:") - thread = Thread( - target=ov_model.chat, - kwargs=dict( - tokenizer=tokenizer, - pixel_values=pixel_values, - question=question, - history=None, - return_history=False, - generation_config=generation_config, - ), - ) - thread.start() + question = "Please describe the image shortly" - generated_text = "" - # Loop through the streamer to get the new text as it is generated - for new_text in streamer: - if new_text == ov_model.conv_template.sep: - break - generated_text += new_text - print(new_text, end="", flush=True) # Print each new chunk of generated text on the same line + + image, image_tensor = load_image(EXAMPLE_IMAGE) + display(image) + print(f"User: {question}\n") + print("Assistant:") + output = ov_model.generate(question, image=image_tensor, generation_config=config, streamer=streamer) -.. image:: internvl2-with-output_files/internvl2-with-output_16_0.png +.. image:: internvl2-with-output_files/internvl2-with-output_14_0.png .. parsed-literal:: - User: - Please describe the image shortly. + User: Please describe the image shortly Assistant: - - -.. parsed-literal:: - - Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation. - - -.. parsed-literal:: - - The image shows a red panda lying on its side, partially wrapped in a wooden structure, possibly a container or log. The red panda appears to be looking at the camera with large, expressive eyes, displaying an endearing and lively appearance. The background consists of a portion of the red panda's habitat environment, which appears to be a tree and some greenery. + . + + The image shows a red panda, a type of mammal known for its distinctive red fur and white markings. The animal is resting on a wooden structure, possibly a platform or a platform-like object, with its head turned slightly towards the camera. The background is a natural setting, with trees and foliage visible, suggesting that the red panda is in a forested or wooded area. The red panda's eyes are large and expressive, and its ears are perked up, indicating that it is alert Interactive demo ---------------- @@ -542,25 +494,11 @@ Interactive demo from gradio_helper import make_demo - demo = make_demo(ov_model, tokenizer) + demo = make_demo(ov_model) try: - demo.launch(debug=False, height=600) + demo.launch(debug=True, height=600) except Exception: - demo.launch(debug=False, share=True, height=600) + demo.launch(debug=True, share=True, height=600) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ - - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - diff --git a/docs/notebooks/internvl2-with-output_files/internvl2-with-output_16_0.jpg b/docs/notebooks/internvl2-with-output_files/internvl2-with-output_14_0.jpg similarity index 100% rename from docs/notebooks/internvl2-with-output_files/internvl2-with-output_16_0.jpg rename to docs/notebooks/internvl2-with-output_files/internvl2-with-output_14_0.jpg diff --git a/docs/notebooks/internvl2-with-output_files/internvl2-with-output_16_0.png b/docs/notebooks/internvl2-with-output_files/internvl2-with-output_14_0.png similarity index 100% rename from docs/notebooks/internvl2-with-output_files/internvl2-with-output_16_0.png rename to docs/notebooks/internvl2-with-output_files/internvl2-with-output_14_0.png diff --git a/docs/notebooks/jina-clip-with-output.rst b/docs/notebooks/jina-clip-with-output.rst index 1cdb2e1d286245..478d333d54d7e7 100644 --- a/docs/notebooks/jina-clip-with-output.rst +++ b/docs/notebooks/jina-clip-with-output.rst @@ -77,14 +77,7 @@ Prerequisites .. code:: ipython3 %pip install -q "openvino>=2024.2.0" "datasets>=2.20" "nncf>=2.11.0" - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "gradio>=4.19" "pillow" "einops" "timm" "transformers[torch]>=4.39" "torch>=2.1" "matplotlib>=3.4" - - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "gradio>=4.19" "pillow" "einops" "timm" "transformers[torch]>=4.39" "torch>=2.1" "matplotlib>=3.4" "typing_extensions>=4.9" Instantiate model ----------------- @@ -103,17 +96,6 @@ weights, using ``from_pretrained`` method. model = AutoModel.from_pretrained("jinaai/jina-clip-v1", trust_remote_code=True) - -.. parsed-literal:: - - 2024-11-05 01:41:58.578137: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-05 01:41:58.612620: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-05 01:41:59.276782: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers - warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) - - Prepare input data ~~~~~~~~~~~~~~~~~~ @@ -127,28 +109,32 @@ passing in the PIL.Image objects. from PIL import Image import requests + from pathlib import Path - # image input data - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) + if not Path("notebook_utils.py").exists(): + # image input data + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) - open("notebook_utils.py", "w").write(r.text) + open("notebook_utils.py", "w").write(r.text) from notebook_utils import download_file, device_widget, quantization_widget - download_file( - "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/3f779fc1-c1b2-4dec-915a-64dae510a2bb", - "furseal.png", - directory="data", - ) + if not Path("data/furseal.png").exists(): + download_file( + "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/3f779fc1-c1b2-4dec-915a-64dae510a2bb", + "furseal.png", + directory="data", + ) img_furseal = Image.open("./data/furseal.png") - image_path = download_file( - "https://github.com/user-attachments/assets/1c66a05d-7442-45c2-a34c-bb08b95af7a6", - "coco.jpg", - directory="data", - ) + if not Path("data/coco.jpg").exists(): + image_path = download_file( + "https://github.com/user-attachments/assets/1c66a05d-7442-45c2-a34c-bb08b95af7a6", + "coco.jpg", + directory="data", + ) img_coco = Image.open("./data/coco.jpg") @@ -292,23 +278,6 @@ loading on device using ``core.complie_model``. ov_text_model = ov.convert_model(model.text_model, example_input=text_inputs["input_ids"]) ov.save_model(ov_text_model, fp16_text_model_path) - -.. parsed-literal:: - - WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. - - -.. parsed-literal:: - - WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead - warnings.warn( - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/jinaai/jina-bert-flash-implementation/b78d1595de294f13ffe7b19d6cd63892a6e4e7a4/mha.py:333: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - softmax_scale = self.softmax_scale or 1.0 / math.sqrt(q.shape[-1]) - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/jinaai/jina-bert-flash-implementation/b78d1595de294f13ffe7b19d6cd63892a6e4e7a4/mha.py:343: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if seqlen > self.linear_biases.shape[-1]: - - .. code:: ipython3 fp16_vision_model_path = Path("jina-clip-vision_v1_fp16.xml") @@ -317,13 +286,6 @@ loading on device using ``core.complie_model``. ov_vision_model = ov.convert_model(model.vision_model, example_input=vision_inputs["pixel_values"]) ov.save_model(ov_vision_model, fp16_vision_model_path) - -.. parsed-literal:: - - /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/jinaai/jina-clip-implementation/96e41b892fe647a3c45bf921352f147184024aef/eva_model.py:468: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - assert H == self.img_size[0] and W == self.img_size[1], ( - - Select inference device ~~~~~~~~~~~~~~~~~~~~~~~ @@ -412,11 +374,15 @@ inference faster. The optimization process contains the following steps: .. code:: ipython3 - # Fetch `skip_kernel_extension` module - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", - ) - open("skip_kernel_extension.py", "w").write(r.text) + if not Path("skip_kernel_extension.py").exists(): + # Fetch `skip_kernel_extension` module + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", + ) + open("skip_kernel_extension.py", "w").write(r.text) + + int8_text_model_path = Path("jina-clip-text_v1_int8.xml") + int8_vision_model_path = Path("jina-clip-vision_v1_int8.xml") %load_ext skip_kernel_extension @@ -506,18 +472,19 @@ Dataset with text data import logging import nncf - dataset = load_dataset("google-research-datasets/conceptual_captions", trust_remote_code=True) - train_dataset = dataset["train"].shuffle(seed=42) + if not int8_text_model_path.exists(): + dataset = load_dataset("google-research-datasets/conceptual_captions", trust_remote_code=True) + train_dataset = dataset["train"].shuffle(seed=42) - dataloader_text = torch.utils.data.DataLoader(train_dataset, collate_fn=collate_fn_text, batch_size=1) - calibration_data_text = prepare_calibration_data_text(dataloader_text, 50) + dataloader_text = torch.utils.data.DataLoader(train_dataset, collate_fn=collate_fn_text, batch_size=1) + calibration_data_text = prepare_calibration_data_text(dataloader_text, 50) .. parsed-literal:: - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, openvino Fetching 50 samples for the initialization... - + .. parsed-literal:: @@ -588,17 +555,18 @@ Dataset with image data %%skip not $to_quantize.value - dataset = load_dataset("google-research-datasets/conceptual_captions", trust_remote_code=True) - train_dataset = dataset["train"].shuffle(seed=42) + if not int8_vision_model_path.exists(): + dataset = load_dataset("google-research-datasets/conceptual_captions", trust_remote_code=True) + train_dataset = dataset["train"].shuffle(seed=42) - dataloader_vis = torch.utils.data.DataLoader(train_dataset, collate_fn=collate_fn_vision, batch_size=1) - calibration_data_vision = prepare_calibration_data_vis(dataloader_vis, 50) + dataloader_vis = torch.utils.data.DataLoader(train_dataset, collate_fn=collate_fn_vision, batch_size=1) + calibration_data_vision = prepare_calibration_data_vis(dataloader_vis, 50) .. parsed-literal:: Fetching 50 samples for the initialization... - + .. parsed-literal:: @@ -621,108 +589,48 @@ Quantization of text model -.. code:: ipython3 - - int8_text_model_path = "jina-clip-text_v1_int8.xml" - .. code:: ipython3 %%skip not $to_quantize.value - if len(calibration_data_text) == 0: - raise RuntimeError( - 'Calibration dataset is empty. Please check internet connection and try to download images manually.' - ) + if not int8_text_model_path.exists(): + if len(calibration_data_text) == 0: + raise RuntimeError( + 'Calibration dataset is empty. Please check internet connection and try to download images manually.' + ) - ov_model_text = core.read_model(fp16_text_model_path) + ov_model_text = core.read_model(fp16_text_model_path) - calibration_dataset = nncf.Dataset(calibration_data_text) - quantized_model = nncf.quantize( - model=ov_model_text, - calibration_dataset=calibration_dataset - ) - ov.save_model(quantized_model, int8_text_model_path) - - - -.. parsed-literal:: - - Output() - - - - - - - - - - -.. parsed-literal:: - - Output() - - - - - - - - + calibration_dataset = nncf.Dataset(calibration_data_text) + quantized_model = nncf.quantize( + model=ov_model_text, + calibration_dataset=calibration_dataset + ) + ov.save_model(quantized_model, int8_text_model_path) Quantization of image model ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. code:: ipython3 - - int8_vision_model_path = "jina-clip-vision_v1_int8.xml" - .. code:: ipython3 %%skip not $to_quantize.value - if len(calibration_data_vision) == 0: - raise RuntimeError( - 'Calibration dataset is empty. Please check internet connection and try to download images manually.' - ) + if not int8_vision_model_path.exists(): + if len(calibration_data_vision) == 0: + raise RuntimeError( + 'Calibration dataset is empty. Please check internet connection and try to download images manually.' + ) - ov_model_vision = core.read_model(fp16_vision_model_path) + ov_model_vision = core.read_model(fp16_vision_model_path) - calibration_dataset = nncf.Dataset(calibration_data_vision) - quantized_model = nncf.quantize( - model=ov_model_vision, - calibration_dataset=calibration_dataset - ) - ov.save_model(quantized_model, int8_vision_model_path) - - - -.. parsed-literal:: - - Output() - - - - - - - - - - -.. parsed-literal:: - - Output() - - - - - - - - + calibration_dataset = nncf.Dataset(calibration_data_vision) + quantized_model = nncf.quantize( + model=ov_model_vision, + calibration_dataset=calibration_dataset + ) + ov.save_model(quantized_model, int8_vision_model_path) .. code:: ipython3 @@ -739,7 +647,7 @@ Quantization of image model -.. image:: jina-clip-with-output_files/jina-clip-with-output_39_0.png +.. image:: jina-clip-with-output_files/jina-clip-with-output_37_0.png Compare File Size @@ -771,7 +679,7 @@ Compare File Size Text model: FP16 model size - 266.88 MB; INT8 model size - 136.98 MB; Model compression rate: 1.948 Vision model: FP16 model size - 163.83 MB; INT8 model size - 82.64 MB; Model compression rate: 1.983 - + Compare inference time of the FP16 IR and quantized models ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -820,9 +728,9 @@ approximately estimate the speed up of the dynamic quantized models. .. parsed-literal:: - Performance speed up for text model: 1.978 - Performance speed up for vision model: 1.428 - + Performance speed up for text model: 1.610 + Performance speed up for vision model: 1.489 + Gradio demo ----------- @@ -906,23 +814,9 @@ example, ``cat,dog,bird``) demo = make_demo(image_text_fn=image_text_sim, text_text_fn=text_text_sim, image_image_fn=image_image_sim, model_choice_visible=model_choice_visible) try: - demo.queue().launch(debug=False) + demo.queue().launch(debug=True) except Exception: - demo.queue().launch(share=True, debug=False) + demo.queue().launch(share=True, debug=True) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ - - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - diff --git a/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_11_0.png b/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_11_0.png index 83744f48df88cc..c5043ea82df122 100644 --- a/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_11_0.png +++ b/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_11_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b913407ebaac94ee389f4ecd1b166dfbbb2b9bfd12ceaff8df783460cbd5e64 +oid sha256:d328ce0b22f2a80ed7640ac0a2b292df687aaf303427e56d954d30de439c0c56 size 427929 diff --git a/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_21_0.png b/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_21_0.png index 83744f48df88cc..c5043ea82df122 100644 --- a/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_21_0.png +++ b/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_21_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b913407ebaac94ee389f4ecd1b166dfbbb2b9bfd12ceaff8df783460cbd5e64 +oid sha256:d328ce0b22f2a80ed7640ac0a2b292df687aaf303427e56d954d30de439c0c56 size 427929 diff --git a/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_37_0.png b/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_37_0.png new file mode 100644 index 00000000000000..71eaff4146ac7d --- /dev/null +++ b/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_37_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8a465646b9f698e200a3934841843024767c8d0a559d0a1267f76c5bcf9b87e +size 428007 diff --git a/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_39_0.png b/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_39_0.png deleted file mode 100644 index dc44386559455c..00000000000000 --- a/docs/notebooks/jina-clip-with-output_files/jina-clip-with-output_39_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9c3ee89570303f7037c893a1a6a2381569ec34fc5d9e29526f4ae1c94ead1f96 -size 428013 diff --git a/docs/notebooks/knowledge-graphs-conve-with-output.rst b/docs/notebooks/knowledge-graphs-conve-with-output.rst index de9115fd9ab4a8..aa8b1a20ea554f 100644 --- a/docs/notebooks/knowledge-graphs-conve-with-output.rst +++ b/docs/notebooks/knowledge-graphs-conve-with-output.rst @@ -233,7 +233,7 @@ Download Model Checkpoint .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/knowledge-graphs-conve/models/conve.pt') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/knowledge-graphs-conve/models/conve.pt') @@ -395,7 +395,7 @@ typical to use metrics such as Mean Reciprocal Rank, Hits@10 etc. .. parsed-literal:: - Average time taken for inference: 0.7430613040924072 ms + Average time taken for inference: 1.2128651142120361 ms Mean accuracy of the model on the test dataset: 0.875 @@ -534,7 +534,7 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - Average time taken for inference: 1.0752081871032715 ms + Average time taken for inference: 0.8927186330159506 ms Mean accuracy of the model on the test dataset: 0.10416666666666667 @@ -553,7 +553,7 @@ Determine the platform specific speedup obtained through OpenVINO graph optimiza .. parsed-literal:: - Speedup with OpenVINO optimizations: 0.69 X + Speedup with OpenVINO optimizations: 1.36 X Benchmark the converted OpenVINO model using benchmark app @@ -598,7 +598,7 @@ inference can also be obtained by looking at the benchmark app results. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 4.36 ms + [ INFO ] Read model took 4.44 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] e1 (node: e1) : i64 / [...] / [] @@ -614,7 +614,7 @@ inference can also be obtained by looking at the benchmark app results. [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: aten::softmax/Softmax) : f32 / [...] / [1,271] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 54.92 ms + [ INFO ] Compile model took 49.21 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -653,17 +653,17 @@ inference can also be obtained by looking at the benchmark app results. [ INFO ] Fill input 'rel' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 12 inference requests, limits: 10000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 1.66 ms + [ INFO ] First inference took 2.26 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 94596 iterations - [ INFO ] Duration: 10001.69 ms + [ INFO ] Count: 95532 iterations + [ INFO ] Duration: 10001.76 ms [ INFO ] Latency: - [ INFO ] Median: 1.07 ms - [ INFO ] Average: 1.09 ms - [ INFO ] Min: 0.79 ms - [ INFO ] Max: 8.57 ms - [ INFO ] Throughput: 9458.00 FPS + [ INFO ] Median: 1.06 ms + [ INFO ] Average: 1.08 ms + [ INFO ] Min: 0.73 ms + [ INFO ] Max: 29.28 ms + [ INFO ] Throughput: 9551.52 FPS Conclusions diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output.rst b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output.rst index b6a7a971fef3be..d90b461ac2023a 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output.rst +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output.rst @@ -85,7 +85,7 @@ Install requirements .. parsed-literal:: - Requirement already satisfied: pip in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (24.3.1) + Requirement already satisfied: pip in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (24.3.1) Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -152,10 +152,9 @@ example `__ .. parsed-literal:: - 2024-11-05 01:44:54.753766: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-05 01:44:54.788691: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-22 01:21:24.800927: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-22 01:21:24.825776: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-05 01:44:55.309895: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -374,11 +373,14 @@ Vision model accept ``pixel_values`` and returns ``image_embeds``. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:465: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:452: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:519: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:505: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:559: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): @@ -406,7 +408,7 @@ Convert Image To Text Projection model .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) if a.grad is not None: @@ -541,13 +543,13 @@ generated text by ``AutoProcessor``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:804: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:859: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if max_pos > self.weights.size(0): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:920: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:975: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (batch_size, 1, seq_length, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1206: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1261: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: @@ -1389,9 +1391,9 @@ pipelines, we use mean inference time on 7 samples. .. parsed-literal:: - FP32 pipeline: 2.746 seconds - Optimized pipeline: 1.140 seconds - Performance speed-up: 2.409 + FP32 pipeline: 2.727 seconds + Optimized pipeline: 1.146 seconds + Performance speed-up: 2.380 Interactive inference diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg index 2310cb001b0c6b..c4966e68a0f7c6 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ca596f09c0f6c0dafa4aca0fbe7974941301cfcbc6bcb3a8c4255774c347d0b -size 123320 +oid sha256:d99c65937fed48b5c1ef214891a3ded6fc4acabbad731ecafdf30d897cd8807b +size 121119 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png index 91289c35d7c60c..717e205ccbaa23 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:56d06f7d654939feda627f67196b813de9b38a718acba9f5daed59a43314829f -size 1150807 +oid sha256:4e416163b28e55e213c884e64462792c0cb5f9ae1389961c3a5467ef2c1ac101 +size 1150960 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png index d98f56141b1252..85633bcfcf04ae 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0d7f8506e5f1bd369debee273b45c601d05901af4937d8cc976f985cd4a81fed -size 1149292 +oid sha256:7561941945a717b6a4f6e6bda157e86c62c5ff638acad518558c176a0ba21be5 +size 1149449 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg index b53344f52b7396..5aed31c2359d29 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:edd5a47baf47ae90532b47bc5ee05e8503b7d1deda59d956a354688ed949c8b5 -size 121605 +oid sha256:de647e8e1a39e8ee78c7c90a14f373b972e4f381f3348d6b28d0fe18a912eb51 +size 122484 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png index 2edc9a038ff8c3..5eb34946e278d0 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa184084b598dac717e99fe9677f1fe9dd4f6b85ec123c075d4109c75b134841 -size 1150675 +oid sha256:77941b5ac0c4ca3379b3a66eb94aeaa24b8c68e225f6e9369ca1cb262feaab7a +size 1150730 diff --git a/docs/notebooks/language-quantize-bert-with-output.rst b/docs/notebooks/language-quantize-bert-with-output.rst index 21ecfe511f1b76..2ba6bca451ad0b 100644 --- a/docs/notebooks/language-quantize-bert-with-output.rst +++ b/docs/notebooks/language-quantize-bert-with-output.rst @@ -101,10 +101,9 @@ Imports .. parsed-literal:: - 2024-11-05 01:51:49.197259: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-05 01:51:49.231710: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-22 01:28:13.948145: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-22 01:28:13.973147: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-05 01:51:49.783615: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -211,8 +210,9 @@ PyTorch model formats are supported: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:4713: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. Prepare the Dataset @@ -244,13 +244,6 @@ tokenizer from HuggingFace. data_source = create_data_source() - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884 - warnings.warn( - - Optimize model using NNCF Post-training Quantization API -------------------------------------------------------- @@ -505,9 +498,9 @@ Frames Per Second (FPS) for images. .. parsed-literal:: - PyTorch model on CPU: 0.068 seconds per sentence, SPS: 14.68 - IR FP32 model in OpenVINO Runtime/AUTO: 0.020 seconds per sentence, SPS: 49.24 - OpenVINO IR INT8 model in OpenVINO Runtime/AUTO: 0.009 seconds per sentence, SPS: 108.47 + PyTorch model on CPU: 0.068 seconds per sentence, SPS: 14.67 + IR FP32 model in OpenVINO Runtime/AUTO: 0.020 seconds per sentence, SPS: 48.80 + OpenVINO IR INT8 model in OpenVINO Runtime/AUTO: 0.009 seconds per sentence, SPS: 107.05 Finally, measure the inference performance of OpenVINO ``FP32`` and @@ -548,7 +541,7 @@ in OpenVINO. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 19.11 ms + [ INFO ] Read model took 18.80 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,?] @@ -559,7 +552,7 @@ in OpenVINO. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'input_ids': [1,128], '63': [1,128], 'token_type_ids': [1,128] - [ INFO ] Reshape model took 5.55 ms + [ INFO ] Reshape model took 5.49 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,128] @@ -568,7 +561,7 @@ in OpenVINO. [ INFO ] Model outputs: [ INFO ] logits (node: __module.classifier/aten::linear/Add) : f32 / [...] / [1,2] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 344.20 ms + [ INFO ] Compile model took 351.45 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -609,17 +602,17 @@ in OpenVINO. [ INFO ] Fill input 'token_type_ids' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 22.90 ms + [ INFO ] First inference took 24.58 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 6485 iterations - [ INFO ] Duration: 120011.48 ms + [ INFO ] Count: 6143 iterations + [ INFO ] Duration: 120005.00 ms [ INFO ] Latency: - [ INFO ] Median: 18.09 ms - [ INFO ] Average: 18.41 ms + [ INFO ] Median: 18.11 ms + [ INFO ] Average: 19.44 ms [ INFO ] Min: 17.32 ms - [ INFO ] Max: 26.49 ms - [ INFO ] Throughput: 54.04 FPS + [ INFO ] Max: 31.44 ms + [ INFO ] Throughput: 51.19 FPS .. code:: ipython3 @@ -646,7 +639,7 @@ in OpenVINO. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 24.93 ms + [ INFO ] Read model took 24.80 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,?] @@ -657,7 +650,7 @@ in OpenVINO. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'input_ids': [1,128], '63': [1,128], 'token_type_ids': [1,128] - [ INFO ] Reshape model took 7.14 ms + [ INFO ] Reshape model took 7.21 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,128] @@ -666,7 +659,7 @@ in OpenVINO. [ INFO ] Model outputs: [ INFO ] logits (node: __module.classifier/aten::linear/Add) : f32 / [...] / [1,2] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1080.21 ms + [ INFO ] Compile model took 1047.83 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -707,15 +700,15 @@ in OpenVINO. [ INFO ] Fill input 'token_type_ids' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 16.00 ms + [ INFO ] First inference took 15.79 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 13181 iterations - [ INFO ] Duration: 120003.10 ms + [ INFO ] Count: 13290 iterations + [ INFO ] Duration: 120007.52 ms [ INFO ] Latency: - [ INFO ] Median: 8.93 ms - [ INFO ] Average: 9.01 ms - [ INFO ] Min: 7.68 ms - [ INFO ] Max: 12.00 ms - [ INFO ] Throughput: 109.84 FPS + [ INFO ] Median: 8.89 ms + [ INFO ] Average: 8.94 ms + [ INFO ] Min: 7.64 ms + [ INFO ] Max: 13.94 ms + [ INFO ] Throughput: 110.74 FPS diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output.rst b/docs/notebooks/latent-consistency-models-image-generation-with-output.rst index fa0fdbd9718831..523afca76dd660 100644 --- a/docs/notebooks/latent-consistency-models-image-generation-with-output.rst +++ b/docs/notebooks/latent-consistency-models-image-generation-with-output.rst @@ -41,23 +41,17 @@ repository `__. In this tutorial, we consider how to convert and run LCM using OpenVINO. An additional part demonstrates how to run quantization with -`NNCF `__ to speed up -pipeline. +`NNCF `__ to speed up pipeline +and generate images using `OpenVINO +GenAI `__. **Table of contents:** - `Prerequisites <#prerequisites>`__ -- `Prepare models for OpenVINO format - conversion <#prepare-models-for-openvino-format-conversion>`__ - `Convert models to OpenVINO format <#convert-models-to-openvino-format>`__ - - - `Text Encoder <#text-encoder>`__ - - `U-Net <#u-net>`__ - - `VAE <#vae>`__ - - `Prepare inference pipeline <#prepare-inference-pipeline>`__ - `Configure Inference Pipeline <#configure-inference-pipeline>`__ @@ -69,9 +63,10 @@ pipeline. - `Run quantization <#run-quantization>`__ - `Compare inference time of the FP16 and INT8 models <#compare-inference-time-of-the-fp16-and-int8-models>`__ + - `Compare UNet file size <#compare-unet-file-size>`__ - - `Compare UNet file size <#compare-unet-file-size>`__ - +- `Run Text to image generation using OpenVINO + GenAI <#run-text-to-image-generation-using-openvino-genai>`__ - `Interactive demo <#interactive-demo>`__ Installation Instructions @@ -92,10 +87,27 @@ Prerequisites .. code:: ipython3 %pip install -q "torch>=2.1" --index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino>=2024.3.0" transformers "diffusers>=0.30.1" pillow "gradio>=4.19" "nncf>=2.12.0" "datasets>=2.14.6" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "transformers>=4.45" tqdm accelerate "diffusers>=0.30.1" pillow "gradio>=4.19" "nncf>=2.12.0" "datasets>=2.14.6" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" + %pip install -qU --pre "openvino>=2024.4.0" "openvino-tokenizers" "openvino-genai" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly -Prepare models for OpenVINO format conversion ---------------------------------------------- +.. code:: ipython3 + + from pathlib import Path + import requests + + utility_files = [Path("notebook_utils.py"), Path("skip_kernel_extension.py"), Path("cmd_helper.py")] + + base_utils_url = "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/" + + for utility_file in utility_files: + if not utility_file.exists(): + r = requests.get(base_utils_url + utility_file.name) + with utility_file.open("w") as f: + f.write(r.text) + +Convert models to OpenVINO format +--------------------------------- @@ -117,316 +129,48 @@ and distilled using LCD. The distillation approach efficiently converts a pre-trained guided diffusion model into a latent consistency model by solving an augmented PF-ODE. -For starting work with LCM, we should instantiate generation pipeline -first. ``DiffusionPipeline.from_pretrained`` method download all -pipeline components for LCM and configure them. This model uses custom -inference pipeline stored as part of model repository, we also should -provide which module should be loaded for initialization using -``custom_pipeline`` argument and revision for it. +For simplifying model export we will utilize Optimum Intel library. +`Optimum Intel `__ is +the interface between the +`Transformers `__ and +`Diffusers `__ libraries +and OpenVINO to accelerate end-to-end pipelines on Intel architectures. +It provides ease-to-use +`interface `__ +for exporting models to `OpenVINO Intermediate Representation +(IR) `__ +format. + +The command bellow demonstrates basic command for model export with +``optimum-cli`` + +.. code:: bash + + optimum-cli export openvino --model --task + +where ``--model`` argument is model id from HuggingFace Hub or local +directory with model (saved using ``.save_pretrained`` method), +``--task`` is one of `supported +task `__ +that exported model should solve. For image generation it will be +``text-to-image``. If model initialization requires to use remote code, +``--trust-remote-code`` flag additionally should be passed. You can also +apply fp16, 8-bit or 4-bit weight compression on the Linear, +Convolutional and Embedding layers when exporting your model with the +CLI by setting ``--weight-format`` to respectively fp16, int8 or int4. +This type of optimization allows to reduce the memory footprint and +inference latency. We will quantize our model later using nncf, so in +this step we will use fp16 as base model export precision. .. code:: ipython3 - import gc - import warnings - from pathlib import Path - from diffusers import DiffusionPipeline - import numpy as np - - - warnings.filterwarnings("ignore") - - TEXT_ENCODER_OV_PATH = Path("model/text_encoder.xml") - UNET_OV_PATH = Path("model/unet.xml") - VAE_DECODER_OV_PATH = Path("model/vae_decoder.xml") + from cmd_helper import optimum_cli + model_id = "SimianLuo/LCM_Dreamshaper_v7" + model_path = Path(model_id.split("/")[-1] + "_ov") - def load_orginal_pytorch_pipeline_componets(skip_models=False, skip_safety_checker=False): - pipe = DiffusionPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7") - scheduler = pipe.scheduler - tokenizer = pipe.tokenizer - feature_extractor = pipe.feature_extractor if not skip_safety_checker else None - safety_checker = pipe.safety_checker if not skip_safety_checker else None - text_encoder, unet, vae = None, None, None - if not skip_models: - text_encoder = pipe.text_encoder - text_encoder.eval() - unet = pipe.unet - unet.eval() - vae = pipe.vae - vae.eval() - del pipe - gc.collect() - return ( - scheduler, - tokenizer, - feature_extractor, - safety_checker, - text_encoder, - unet, - vae, - ) - -.. code:: ipython3 - - skip_conversion = TEXT_ENCODER_OV_PATH.exists() and UNET_OV_PATH.exists() and VAE_DECODER_OV_PATH.exists() - - ( - scheduler, - tokenizer, - feature_extractor, - safety_checker, - text_encoder, - unet, - vae, - ) = load_orginal_pytorch_pipeline_componets(skip_conversion) - - - -.. parsed-literal:: - - Fetching 15 files: 0%| | 0/15 [00:00`__ is crucial for - synthesizing high-quality text-aligned images in Stable Diffusion, - because it controls how similar the generated image will be to the - prompt. In Latent Consistency Models, CFG serves as augmentation - parameter for PF-ODE. - -Model predicts the ``sample`` state for the next step. - -.. code:: ipython3 - - def convert_unet(unet: torch.nn.Module, ir_path: Path): - """ - Convert U-net model to IR format. - Function accepts unet model, prepares example inputs for conversion, - Parameters: - unet (StableDiffusionPipeline): unet from Stable Diffusion pipeline - ir_path (Path): File for storing model - Returns: - None - """ - # prepare inputs - dummy_inputs = { - "sample": torch.randn((1, 4, 64, 64)), - "timestep": torch.ones([1]).to(torch.float32), - "encoder_hidden_states": torch.randn((1, 77, 768)), - "timestep_cond": torch.randn((1, 256)), - } - unet.eval() - with torch.no_grad(): - ov_model = ov.convert_model(unet, example_input=dummy_inputs) - ov.save_model(ov_model, ir_path) - del ov_model - cleanup_torchscript_cache() - gc.collect() - print(f"Unet successfully converted to IR and saved to {ir_path}") - - - if not UNET_OV_PATH.exists(): - convert_unet(unet, UNET_OV_PATH) - else: - print(f"Unet will be loaded from {UNET_OV_PATH}") - del unet - gc.collect(); - -VAE -~~~ - - - -The VAE model has two parts, an encoder and a decoder. The encoder is -used to convert the image into a low dimensional latent representation, -which will serve as the input to the U-Net model. The decoder, -conversely, transforms the latent representation back into an image. - -During latent diffusion training, the encoder is used to get the latent -representations (latents) of the images for the forward diffusion -process, which applies more and more noise at each step. During -inference, the denoised latents generated by the reverse diffusion -process are converted back into images using the VAE decoder. When you -run inference for text-to-image, there is no initial image as a starting -point. You can skip this step and directly generate initial random -noise. - -In our inference pipeline, we will not use VAE encoder part and skip its -conversion for reducing memory consumption. The process of conversion -VAE encoder, can be found in Stable Diffusion notebook. - -.. code:: ipython3 - - def convert_vae_decoder(vae: torch.nn.Module, ir_path: Path): - """ - Convert VAE model for decoding to IR format. - Function accepts vae model, creates wrapper class for export only necessary for inference part, - prepares example inputs for conversion, - Parameters: - vae (torch.nn.Module): VAE model frm StableDiffusion pipeline - ir_path (Path): File for storing model - Returns: - None - """ - - class VAEDecoderWrapper(torch.nn.Module): - def __init__(self, vae): - super().__init__() - self.vae = vae - - def forward(self, latents): - return self.vae.decode(latents) - - vae_decoder = VAEDecoderWrapper(vae) - latents = torch.zeros((1, 4, 64, 64)) - - vae_decoder.eval() - with torch.no_grad(): - ov_model = ov.convert_model(vae_decoder, example_input=latents) - ov.save_model(ov_model, ir_path) - del ov_model - cleanup_torchscript_cache() - print(f"VAE decoder successfully converted to IR and saved to {ir_path}") - - - if not VAE_DECODER_OV_PATH.exists(): - convert_vae_decoder(vae, VAE_DECODER_OV_PATH) - else: - print(f"VAE decoder will be loaded from {VAE_DECODER_OV_PATH}") - - del vae - gc.collect(); + if not model_path.exists(): + optimum_cli(model_id, model_path, additional_args={"weight-format": "fp16"}) Prepare inference pipeline -------------------------- @@ -461,237 +205,27 @@ number of steps required ~2-8) to step-by-step retrieve better latent image representations. When complete, the latent image representation is decoded by the decoder part of the variational auto encoder. -.. code:: ipython3 - - from typing import Union, Optional, Any, List, Dict - from transformers import CLIPTokenizer, CLIPImageProcessor - from diffusers.pipelines.stable_diffusion.safety_checker import ( - StableDiffusionSafetyChecker, - ) - from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput - from diffusers.image_processor import VaeImageProcessor - - - class OVLatentConsistencyModelPipeline(DiffusionPipeline): - def __init__( - self, - vae_decoder: ov.Model, - text_encoder: ov.Model, - tokenizer: CLIPTokenizer, - unet: ov.Model, - scheduler: None, - safety_checker: StableDiffusionSafetyChecker, - feature_extractor: CLIPImageProcessor, - requires_safety_checker: bool = True, - ): - super().__init__() - self.vae_decoder = vae_decoder - self.text_encoder = text_encoder - self.tokenizer = tokenizer - self.register_to_config(unet=unet) - self.scheduler = scheduler - self.safety_checker = safety_checker - self.feature_extractor = feature_extractor - self.vae_scale_factor = 2**3 - self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) - - def _encode_prompt( - self, - prompt, - num_images_per_prompt, - prompt_embeds: None, - ): - r""" - Encodes the prompt into text encoder hidden states. - Args: - prompt (`str` or `List[str]`, *optional*): - prompt to be encoded - num_images_per_prompt (`int`): - number of images that should be generated per prompt - prompt_embeds (`torch.FloatTensor`, *optional*): - Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not - provided, text embeddings will be generated from `prompt` input argument. - """ - - if prompt_embeds is None: - text_inputs = self.tokenizer( - prompt, - padding="max_length", - max_length=self.tokenizer.model_max_length, - truncation=True, - return_tensors="pt", - ) - text_input_ids = text_inputs.input_ids - - prompt_embeds = self.text_encoder(text_input_ids, share_inputs=True, share_outputs=True) - prompt_embeds = torch.from_numpy(prompt_embeds[0]) - - bs_embed, seq_len, _ = prompt_embeds.shape - # duplicate text embeddings for each generation per prompt - prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1) - prompt_embeds = prompt_embeds.view(bs_embed * num_images_per_prompt, seq_len, -1) - - # Don't need to get uncond prompt embedding because of LCM Guided Distillation - return prompt_embeds - - def run_safety_checker(self, image, dtype): - if self.safety_checker is None: - has_nsfw_concept = None - else: - if torch.is_tensor(image): - feature_extractor_input = self.image_processor.postprocess(image, output_type="pil") - else: - feature_extractor_input = self.image_processor.numpy_to_pil(image) - safety_checker_input = self.feature_extractor(feature_extractor_input, return_tensors="pt") - image, has_nsfw_concept = self.safety_checker(images=image, clip_input=safety_checker_input.pixel_values.to(dtype)) - return image, has_nsfw_concept - - def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, latents=None): - shape = ( - batch_size, - num_channels_latents, - height // self.vae_scale_factor, - width // self.vae_scale_factor, - ) - if latents is None: - latents = torch.randn(shape, dtype=dtype) - # scale the initial noise by the standard deviation required by the scheduler - latents = latents * self.scheduler.init_noise_sigma - return latents - - def get_w_embedding(self, w, embedding_dim=512, dtype=torch.float32): - """ - see https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 - Args: - timesteps: torch.Tensor: generate embedding vectors at these timesteps - embedding_dim: int: dimension of the embeddings to generate - dtype: data type of the generated embeddings - Returns: - embedding vectors with shape `(len(timesteps), embedding_dim)` - """ - assert len(w.shape) == 1 - w = w * 1000.0 - - half_dim = embedding_dim // 2 - emb = torch.log(torch.tensor(10000.0)) / (half_dim - 1) - emb = torch.exp(torch.arange(half_dim, dtype=dtype) * -emb) - emb = w.to(dtype)[:, None] * emb[None, :] - emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1) - if embedding_dim % 2 == 1: # zero pad - emb = torch.nn.functional.pad(emb, (0, 1)) - assert emb.shape == (w.shape[0], embedding_dim) - return emb - - @torch.no_grad() - def __call__( - self, - prompt: Union[str, List[str]] = None, - height: Optional[int] = 512, - width: Optional[int] = 512, - guidance_scale: float = 7.5, - num_images_per_prompt: Optional[int] = 1, - latents: Optional[torch.FloatTensor] = None, - num_inference_steps: int = 4, - lcm_origin_steps: int = 50, - prompt_embeds: Optional[torch.FloatTensor] = None, - output_type: Optional[str] = "pil", - return_dict: bool = True, - cross_attention_kwargs: Optional[Dict[str, Any]] = None, - ): - # 1. Define call parameters - if prompt is not None and isinstance(prompt, str): - batch_size = 1 - elif prompt is not None and isinstance(prompt, list): - batch_size = len(prompt) - else: - batch_size = prompt_embeds.shape[0] - - # do_classifier_free_guidance = guidance_scale > 0.0 - # In LCM Implementation: cfg_noise = noise_cond + cfg_scale * (noise_cond - noise_uncond) , (cfg_scale > 0.0 using CFG) - - # 2. Encode input prompt - prompt_embeds = self._encode_prompt( - prompt, - num_images_per_prompt, - prompt_embeds=prompt_embeds, - ) - - # 3. Prepare timesteps - self.scheduler.set_timesteps(num_inference_steps, original_inference_steps=lcm_origin_steps) - timesteps = self.scheduler.timesteps - - # 4. Prepare latent variable - num_channels_latents = 4 - latents = self.prepare_latents( - batch_size * num_images_per_prompt, - num_channels_latents, - height, - width, - prompt_embeds.dtype, - latents, - ) - - bs = batch_size * num_images_per_prompt - - # 5. Get Guidance Scale Embedding - w = torch.tensor(guidance_scale).repeat(bs) - w_embedding = self.get_w_embedding(w, embedding_dim=256) - - # 6. LCM MultiStep Sampling Loop: - with self.progress_bar(total=num_inference_steps) as progress_bar: - for i, t in enumerate(timesteps): - ts = torch.full((bs,), t, dtype=torch.long) - - # model prediction (v-prediction, eps, x) - model_pred = self.unet( - [latents, ts, prompt_embeds, w_embedding], - share_inputs=True, - share_outputs=True, - )[0] - - # compute the previous noisy sample x_t -> x_t-1 - latents, denoised = self.scheduler.step(torch.from_numpy(model_pred), t, latents, return_dict=False) - progress_bar.update() - - if not output_type == "latent": - image = torch.from_numpy(self.vae_decoder(denoised / 0.18215, share_inputs=True, share_outputs=True)[0]) - image, has_nsfw_concept = self.run_safety_checker(image, prompt_embeds.dtype) - else: - image = denoised - has_nsfw_concept = None - - if has_nsfw_concept is None: - do_denormalize = [True] * image.shape[0] - else: - do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept] - - image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) - - if not return_dict: - return (image, has_nsfw_concept) - - return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept) +For starting work with LCM, we should instantiate the generation +pipeline first. ``DiffusionPipeline.from_pretrained`` method downloads +all pipeline components (if required) for LCM and configure them. +Loading LCM for OpenVINO inference using Optimum Intel looks similar, we +only should replace ``DiffusionPipeline`` with ``OVDiffusionPpeline``. +This model class accepts model id from HuggingFace Hub or local +directory for original PyTorch pipeline or already converted. In case, +if path to original pipeline provided, it will be automatically +converted to OpenVINO format, but as we already converted model before +using Optimum CLI, we will use models from the previous step. Configure Inference Pipeline ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -First, you should create instances of OpenVINO Model and compile it -using selected device. Select device from dropdown list for running -inference using OpenVINO. +Optionally, we can setup which device will be used for running +inference. Select desired inference device from dropdown list bellow. .. code:: ipython3 - core = ov.Core() - - import requests - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) - from notebook_utils import device_widget device = device_widget() @@ -703,18 +237,27 @@ inference using OpenVINO. .. parsed-literal:: - Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') .. code:: ipython3 - text_enc = core.compile_model(TEXT_ENCODER_OV_PATH, device.value) - unet_model = core.compile_model(UNET_OV_PATH, device.value) - - ov_config = {"INFERENCE_PRECISION_HINT": "f32"} if device.value != "CPU" else {} + from optimum.intel.openvino import OVDiffusionPipeline - vae_decoder = core.compile_model(VAE_DECODER_OV_PATH, device.value, ov_config) + ov_pipe = OVDiffusionPipeline.from_pretrained(model_path, device=device.value) + + +.. parsed-literal:: + + 2024-11-14 12:52:11.556586: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-14 12:52:11.570192: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered + WARNING: All log messages before absl::InitializeLog() is called are written to STDERR + E0000 00:00:1731574331.585339 2056327 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered + E0000 00:00:1731574331.589784 2056327 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered + 2024-11-14 12:52:11.606540: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + Model tokenizer and scheduler are also important parts of the pipeline. This pipeline is also can use Safety Checker, the filter for detecting @@ -724,18 +267,6 @@ embeddings using CLIP model, so additionally feature extractor component should be added in the pipeline. We reuse tokenizer, feature extractor, scheduler and safety checker from original LCM pipeline. -.. code:: ipython3 - - ov_pipe = OVLatentConsistencyModelPipeline( - tokenizer=tokenizer, - text_encoder=text_enc, - unet=unet_model, - vae_decoder=vae_decoder, - scheduler=scheduler, - feature_extractor=feature_extractor, - safety_checker=safety_checker, - ) - Text-to-image generation ------------------------ @@ -745,18 +276,13 @@ Now, let’s see model in action .. code:: ipython3 + import torch + prompt = "a beautiful pink unicorn, 8k" num_inference_steps = 4 - torch.manual_seed(1234567) images = ov_pipe( - prompt=prompt, - num_inference_steps=num_inference_steps, - guidance_scale=8.0, - lcm_origin_steps=50, - output_type="pil", - height=512, - width=512, + prompt=prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0, height=512, width=512, generator=torch.Generator().manual_seed(1234567) ).images @@ -773,12 +299,19 @@ Now, let’s see model in action -.. image:: latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_21_0.png +.. image:: latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_13_0.png Nice. As you can see, the picture has quite a high definition 🔥. +.. code:: ipython3 + + import gc + + del ov_pipe + gc.collect(); + Quantization ------------ @@ -814,6 +347,7 @@ improve model inference speed. skip_for_device = "GPU" in device.value to_quantize = quantization_widget(not skip_for_device) + int8_model_path = model_path.parent / (model_path.name + "_int8") to_quantize @@ -826,22 +360,13 @@ improve model inference speed. -Let’s load ``skip magic`` extension to skip quantization if -``to_quantize`` is not selected - .. code:: ipython3 - int8_pipe = None - - # Fetch `skip_kernel_extension` module - import requests - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", - ) - open("skip_kernel_extension.py", "w").write(r.text) %load_ext skip_kernel_extension +Let’s load ``skip magic`` extension to skip quantization if +``to_quantize`` is not selected + Prepare calibration dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -860,6 +385,8 @@ model inputs for calibration we should customize ``CompiledModel``. from tqdm.notebook import tqdm from transformers import set_seed from typing import Any, Dict, List + import openvino as ov + import numpy as np set_seed(1) @@ -874,9 +401,9 @@ model inputs for calibration we should customize ``CompiledModel``. self.data_cache.append(*args) return super().__call__(*args, **kwargs) - def collect_calibration_data(lcm_pipeline: OVLatentConsistencyModelPipeline, subset_size: int) -> List[Dict]: - original_unet = lcm_pipeline.unet - lcm_pipeline.unet = CompiledModelDecorator(original_unet, prob=0.3) + def collect_calibration_data(lcm_pipeline, subset_size: int) -> List[Dict]: + original_unet = lcm_pipeline.unet.request + lcm_pipeline.unet.request = CompiledModelDecorator(original_unet, prob=0.3) dataset = datasets.load_dataset("google-research-datasets/conceptual_captions", split="train", trust_remote_code=True).shuffle(seed=42) lcm_pipeline.set_progress_bar_config(disable=True) @@ -888,27 +415,25 @@ model inputs for calibration we should customize ``CompiledModel``. diff = 0 for batch in dataset: prompt = batch["caption"] - if len(prompt) > tokenizer.model_max_length: + if len(prompt) > lcm_pipeline.tokenizer.model_max_length: continue _ = lcm_pipeline( prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0, - lcm_origin_steps=50, - output_type="pil", height=512, width=512, ) - collected_subset_size = len(lcm_pipeline.unet.data_cache) + collected_subset_size = len(lcm_pipeline.unet.request.data_cache) if collected_subset_size >= subset_size: pbar.update(subset_size - pbar.n) break pbar.update(collected_subset_size - diff) diff = collected_subset_size - calibration_dataset = lcm_pipeline.unet.data_cache + calibration_dataset = lcm_pipeline.unet.request.data_cache lcm_pipeline.set_progress_bar_config(disable=False) - lcm_pipeline.unet = original_unet + lcm_pipeline.unet.request = original_unet lcm_pipeline.safety_checker = safety_checker return calibration_dataset @@ -920,10 +445,12 @@ model inputs for calibration we should customize ``CompiledModel``. logging.basicConfig(level=logging.WARNING) logger = logging.getLogger(__name__) - UNET_INT8_OV_PATH = Path("model/unet_int8.xml") - if not UNET_INT8_OV_PATH.exists(): + if not int8_model_path.exists(): subset_size = 200 + ov_pipe = OVDiffusionPipeline.from_pretrained(model_path, device=device.value) unet_calibration_data = collect_calibration_data(ov_pipe, subset_size=subset_size) + del ov_pipe + gc.collect(); @@ -948,12 +475,11 @@ Create a quantized model from the pre-trained converted OpenVINO model. import nncf from nncf.scopes import IgnoredScope + import shutil + core = ov.Core() - if UNET_INT8_OV_PATH.exists(): - print("Loading quantized model") - quantized_unet = core.read_model(UNET_INT8_OV_PATH) - else: - unet = core.read_model(UNET_OV_PATH) + if not int8_model_path.exists(): + unet = core.read_model(model_path / "unet/openvino_model.xml") quantized_unet = nncf.quantize( model=unet, subset_size=subset_size, @@ -963,12 +489,19 @@ Create a quantized model from the pre-trained converted OpenVINO model. disable_bias_correction=True ) ) - ov.save_model(quantized_unet, UNET_INT8_OV_PATH) - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino + ov.save_model(quantized_unet, int8_model_path / "unet/openvino_model.xml") + del quantized_unet + del unet + gc.collect() + for filename in model_path.rglob("*"): + if filename.is_dir(): + continue + relative_file_name = filename.relative_to(model_path) + if (int8_model_path / relative_file_name).exists(): + continue + dst_path = int8_model_path / relative_file_name + dst_path.parent.mkdir(exist_ok=True, parents=True) + shutil.copy(filename, dst_path) @@ -985,14 +518,6 @@ Create a quantized model from the pre-trained converted OpenVINO model. - - - - - - - - .. parsed-literal:: Output() @@ -1006,19 +531,6 @@ Create a quantized model from the pre-trained converted OpenVINO model. - - - - - - - -.. parsed-literal:: - - INFO:nncf:122 ignored nodes were found by name in the NNCFGraph - - - .. parsed-literal:: Output() @@ -1031,29 +543,11 @@ Create a quantized model from the pre-trained converted OpenVINO model. - - - - - - - - .. code:: ipython3 %%skip not $to_quantize.value - unet_optimized = core.compile_model(UNET_INT8_OV_PATH, device.value) - - int8_pipe = OVLatentConsistencyModelPipeline( - tokenizer=tokenizer, - text_encoder=text_enc, - unet=unet_optimized, - vae_decoder=vae_decoder, - scheduler=scheduler, - feature_extractor=feature_extractor, - safety_checker=safety_checker, - ) + int8_pipe = OVDiffusionPipeline.from_pretrained(int8_model_path, device=device.value) Let us check predictions with the quantized UNet using the same input data. @@ -1066,16 +560,14 @@ data. prompt = "a beautiful pink unicorn, 8k" num_inference_steps = 4 - torch.manual_seed(1234567) images = int8_pipe( prompt=prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0, - lcm_origin_steps=50, - output_type="pil", height=512, width=512, + generator=torch.Generator().manual_seed(1234567) ).images display(images[0]) @@ -1088,7 +580,7 @@ data. -.. image:: latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_34_1.png +.. image:: latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_27_1.png Compare inference time of the FP16 and INT8 models @@ -1127,8 +619,6 @@ pipelines, we use median inference time on calibration subset. prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0, - lcm_origin_steps=50, - output_type="pil", height=512, width=512, ) @@ -1143,38 +633,154 @@ pipelines, we use median inference time on calibration subset. %%skip not $to_quantize.value - fp_latency = calculate_inference_time(ov_pipe, validation_data) int8_latency = calculate_inference_time(int8_pipe, validation_data) + del int8_pipe + gc.collect() + ov_pipe = OVDiffusionPipeline.from_pretrained(model_path, device=device.value) + fp_latency = calculate_inference_time(ov_pipe, validation_data) print(f"Performance speed up: {fp_latency / int8_latency:.3f}") + + del ov_pipe + gc.collect(); .. parsed-literal:: - Performance speed up: 1.319 + Performance speed up: 1.357 Compare UNet file size -^^^^^^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~~~~~~~~ .. code:: ipython3 - %%skip not $to_quantize.value + UNET_OV_PATH = model_path / "unet/openvino_model.xml" + UNET_INT8_OV_PATH = int8_model_path / "unet/openvino_model.xml" - fp16_ir_model_size = UNET_OV_PATH.with_suffix(".bin").stat().st_size / 1024 - quantized_model_size = UNET_INT8_OV_PATH.with_suffix(".bin").stat().st_size / 1024 + if UNET_INT8_OV_PATH.exists(): + fp16_ir_model_size = UNET_OV_PATH.with_suffix(".bin").stat().st_size / 1024 + quantized_model_size = UNET_INT8_OV_PATH.with_suffix(".bin").stat().st_size / 1024 - print(f"FP16 model size: {fp16_ir_model_size:.2f} KB") - print(f"INT8 model size: {quantized_model_size:.2f} KB") - print(f"Model compression rate: {fp16_ir_model_size / quantized_model_size:.3f}") + print(f"FP16 model size: {fp16_ir_model_size:.2f} KB") + print(f"INT8 model size: {quantized_model_size:.2f} KB") + print(f"Model compression rate: {fp16_ir_model_size / quantized_model_size:.3f}") + + +.. parsed-literal:: + + FP16 model size: 1678912.69 KB + INT8 model size: 841591.46 KB + Model compression rate: 1.995 + + +Run Text to image generation using OpenVINO GenAI +------------------------------------------------- + + + +`OpenVINO™ GenAI `__ +is a library of the most popular Generative AI model pipelines, +optimized execution methods, and samples that run on top of highly +performant `OpenVINO +Runtime `__. + +|image0| + +This library is friendly to PC and laptop execution, and optimized for +resource consumption. It requires no external dependencies to run +generative models as it already includes all the core functionality. + +``openvino_genai.Text2ImagePipeline`` class supports inference of +`Diffusers +models `__. +For pipeline initialization, we should provide directory with converted +by Optimum Intel pipeline and specify inference device. Optionally, we +can provide configuration for LoRA Adapters using ``adapter_config``. +For starting generation process ``generate`` method should be used. +Basically, it required to provide input text prompt for image +generation. You can provide additional arguments like negative prompt, +number of steps, guidance scale, image width and height to control +generation process. + +.. |image0| image:: https://media.githubusercontent.com/media/openvinotoolkit/openvino.genai/refs/heads/master/src/docs/openvino_genai.svg + +.. code:: ipython3 + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + import ipywidgets as widgets + + int8_can_be_used = int8_model_path.exists() and "GPU" not in device.value + use_quantized_model = widgets.Checkbox(value=int8_can_be_used, description="Use INT8 model", disabled=not int8_can_be_used) + + use_quantized_model + + .. parsed-literal:: - FP16 model size: 1678912.37 KB - INT8 model size: 840792.93 KB - Model compression rate: 1.997 + Checkbox(value=True, description='Use INT8 model') + + + +.. code:: ipython3 + + import openvino_genai as ov_genai + + used_model_path = model_path if not use_quantized_model.value else int8_model_path + + pipe = ov_genai.Text2ImagePipeline(used_model_path, device.value) + +.. code:: ipython3 + + from PIL import Image + import torch + import openvino as ov + + + class Generator(ov_genai.Generator): + def __init__(self, seed): + ov_genai.Generator.__init__(self) + self.generator = torch.Generator(device="cpu").manual_seed(seed) + + def next(self): + return torch.randn(1, generator=self.generator, dtype=torch.float32).item() + + def randn_tensor(self, shape: ov.Shape): + torch_tensor = torch.randn(list(shape), generator=self.generator, dtype=torch.float32) + return ov.Tensor(torch_tensor.numpy()) + + + prompt = "a beautiful pink unicorn, 8k" + num_inference_steps = 4 + + random_generator = Generator(1234567) + + image_tensor = pipe.generate(prompt, width=512, height=512, num_inference_steps=4, num_images_per_prompt=1, generator=random_generator) + + image = Image.fromarray(image_tensor.data[0]) + + image + + + + +.. image:: latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_37_0.png + Interactive demo @@ -1186,7 +792,7 @@ Interactive demo import random import gradio as gr - from functools import partial + import numpy as np MAX_SEED = np.iinfo(np.int32).max @@ -1198,7 +804,6 @@ Interactive demo def generate( - pipeline: OVLatentConsistencyModelPipeline, prompt: str, seed: int = 0, width: int = 512, @@ -1206,28 +811,15 @@ Interactive demo guidance_scale: float = 8.0, num_inference_steps: int = 4, randomize_seed: bool = False, - num_images: int = 1, progress=gr.Progress(track_tqdm=True), ): seed = randomize_seed_fn(seed, randomize_seed) - torch.manual_seed(seed) - result = pipeline( - prompt=prompt, - width=width, - height=height, - guidance_scale=guidance_scale, - num_inference_steps=num_inference_steps, - num_images_per_prompt=num_images, - lcm_origin_steps=50, - output_type="pil", - ).images[0] + random_generator = Generator(seed) + result = pipe.generate( + prompt, width=width, height=height, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, generator=random_generator + ) + result = Image.fromarray(result.data[0]) return result, seed - - - generate_original = partial(generate, ov_pipe) - generate_optimized = partial(generate, int8_pipe) - quantized_model_present = int8_pipe is not None - generate = generate_optimized if quantized_model_present else generate_original .. code:: ipython3 @@ -1239,7 +831,7 @@ Interactive demo from gradio_helper import make_demo_lcm - demo = make_demo_lcm(fn=generate, quantized=quantized_model_present) + demo = make_demo_lcm(fn=generate) try: demo.queue().launch(debug=False) @@ -1248,8 +840,3 @@ Interactive demo # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ - -.. code:: ipython3 - - # please uncomment and run this cell for stopping gradio interface - # demo.close() diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_13_0.jpg b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_13_0.jpg new file mode 100644 index 00000000000000..1ea60cbbf8d222 --- /dev/null +++ b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_13_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:affe930458b7c4c643d79b905269590fc084ca969ee5f0545b8bba525006fa8a +size 19295 diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_13_0.png b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_13_0.png new file mode 100644 index 00000000000000..5955c1e4362d9f --- /dev/null +++ b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_13_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ef1cbdb29f5fea43c3624c52f20799e4677fc0f52f6451bbe24bf0cf11a8463 +size 389641 diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_21_0.jpg b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_21_0.jpg deleted file mode 100644 index c6b4e28670b6d5..00000000000000 --- a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_21_0.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:124ac28d484e3f73c150deb379374cec294b47803cd2d8914461dc8ea215afd0 -size 25960 diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_21_0.png b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_21_0.png deleted file mode 100644 index 08ecde8427d295..00000000000000 --- a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_21_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bbd41bc286b8dfb86e049235d232d30fd7a61ea4febfb1e4ccc340367a84ebb0 -size 412225 diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_27_1.jpg b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_27_1.jpg new file mode 100644 index 00000000000000..6408a5658cf117 --- /dev/null +++ b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_27_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8925bd54982f37545c019dbe0594bd794045ee40e5627f0121b221b44471c62 +size 19352 diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_27_1.png b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_27_1.png new file mode 100644 index 00000000000000..7b0ec07f79f970 --- /dev/null +++ b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_27_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c7034ea0158e17cbd009e742938fe42fd1e0fb0011d0d2512524d6fab00889e +size 392614 diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_34_1.jpg b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_34_1.jpg deleted file mode 100644 index 08bc3ddf0e0710..00000000000000 --- a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_34_1.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4bce5a9ae0251f165e2becde51d5343c55a99c3234f327c9951f8a0279514a2e -size 22266 diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_34_1.png b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_34_1.png deleted file mode 100644 index 75211e26b3b388..00000000000000 --- a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_34_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:89231776665c02abb82840d447f7804d7aca7118ec11d1296e7e1f738fd11e63 -size 392583 diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_37_0.jpg b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_37_0.jpg new file mode 100644 index 00000000000000..4710b7e9307c1b --- /dev/null +++ b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_37_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b3bf64cb2d0dc5daa9387092f9c09eea26af451b5a6e0e7c5750d22a5fb66b1 +size 21932 diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_37_0.png b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_37_0.png new file mode 100644 index 00000000000000..7667008b2d5aa5 --- /dev/null +++ b/docs/notebooks/latent-consistency-models-image-generation-with-output_files/latent-consistency-models-image-generation-with-output_37_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:307294292b8bf501d51fae0bc667d06907d8d5b2adf9ed139467b766eccac901 +size 401843 diff --git a/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst b/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst deleted file mode 100644 index a0bce9d85c7196..00000000000000 --- a/docs/notebooks/latent-consistency-models-optimum-demo-with-output.rst +++ /dev/null @@ -1,252 +0,0 @@ -Latent Consistency Model using Optimum-Intel OpenVINO -===================================================== - -This notebook provides instructions how to run Latent Consistency Model -(LCM). It allows to setup standard Hugging Face diffusers pipeline and -Optimum Intel pipeline optimized for Intel hardware including CPU and -GPU. Running inference on CPU and GPU it is easy to compare performance -and time required to generate an image for provided prompt. The notebook -can be also used on other Intel hardware with minimal or no -modifications. - -.. image:: https://github.com/openvinotoolkit/openvino_notebooks/assets/10940214/1858dae4-72fd-401e-b055-66d503d82446 - -Optimum Intel is an interface from Hugging Face between both diffusers -and transformers libraries and various tools provided by Intel to -accelerate pipelines on Intel hardware. It allows to perform -quantization of the models hosted on Hugging Face. In this notebook -OpenVINO is used for AI-inference acceleration as a backend for Optimum -Intel! - -For more details please refer to Optimum Intel repository -https://github.com/huggingface/optimum-intel - -LCMs are the next generation of generative models after Latent Diffusion -Models (LDMs). They are proposed to overcome the slow iterative sampling -process of Latent Diffusion Models (LDMs), enabling fast inference with -minimal steps (from 2 to 4) on any pre-trained LDMs (e.g. Stable -Diffusion). To read more about LCM please refer to -https://latent-consistency-models.github.io/ - - -**Table of contents:** - - -- `Prerequisites <#prerequisites>`__ -- `Full precision model on the - CPU <#using-full-precision-model-in-cpu-with-latentconsistencymodelpipeline>`__ -- `Running inference using Optimum Intel - OVLatentConsistencyModelPipeline <#running-inference-using-optimum-intel-ovlatentconsistencymodelpipeline>`__ - -Installation Instructions -~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a self-contained example that relies solely on its own code. - -We recommend running the notebook in a virtual environment. You only -need a Jupyter server to start. For details, please refer to -`Installation -Guide `__. - -Prerequisites -~~~~~~~~~~~~~ - - - -Install required packages - -.. code:: ipython3 - - %pip install -q "openvino>=2023.3.0" - %pip install -q "onnx>=1.11.0,<1.16.2" - %pip install -q "optimum-intel[diffusers]@git+https://github.com/huggingface/optimum-intel.git" "ipywidgets" "torch>=2.1" "transformers>=4.33.0" --extra-index-url https://download.pytorch.org/whl/cpu - - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - - -.. code:: ipython3 - - import warnings - - warnings.filterwarnings("ignore") - -Showing Info Available Devices -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -The ``available_devices`` property shows the available devices in your -system. The “FULL_DEVICE_NAME” option to ``ie.get_property()`` shows the -name of the device. Check what is the ID name for the discrete GPU, if -you have integrated GPU (iGPU) and discrete GPU (dGPU), it will show -``device_name="GPU.0"`` for iGPU and ``device_name="GPU.1"`` for dGPU. -If you just have either an iGPU or dGPU that will be assigned to -``"GPU"`` - -Note: For more details about GPU with OpenVINO visit this -`link `__. -If you have been facing any issue in Ubuntu 20.04 or Windows 11 read -this -`blog `__. - -.. code:: ipython3 - - import openvino as ov - import openvino.properties as props - - - core = ov.Core() - devices = core.available_devices - - for device in devices: - device_name = core.get_property(device, props.device.full_name) - print(f"{device}: {device_name}") - - -.. parsed-literal:: - - CPU: Intel(R) Core(TM) Ultra 7 155H - GNA.GNA_SW: GNA_SW - GNA.GNA_HW: GNA_HW - GPU: Intel(R) Arc(TM) Graphics (iGPU) - NPU: Intel(R) AI Boost - - -Using full precision model in CPU with ``LatentConsistencyModelPipeline`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -Standard pipeline for the Latent Consistency Model(LCM) from Diffusers -library is used here. For more information please refer to -https://huggingface.co/docs/diffusers/en/api/pipelines/latent_consistency_models - -.. code:: ipython3 - - from diffusers import LatentConsistencyModelPipeline - import gc - - pipeline = LatentConsistencyModelPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7") - - - -.. parsed-literal:: - - Loading pipeline components...: 0%| | 0/7 [00:00=2.1.0" "torchvision" "torchaudio" --index-url https://download.pytorch.org/whl/cpu - %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv" - %pip install -q "nncf>=2.13.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.45.0" "gradio>=4.36" - %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly openvino-tokenizers openvino openvino-genai + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" --index-url https://download.pytorch.org/whl/cpu + %pip install -q "nncf>=2.14.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.45.0" "gradio>=4.36" + %pip install -q -U "openvino-tokenizers>=2024.5.0" "openvino>=2024.5.0" "openvino-genai>=2024.5.0"| utility_files = ["notebook_utils.py", "cmd_helper.py"] @@ -134,8 +134,8 @@ Install required dependencies r = requests.get( url=f"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/{local_path.name}", ) - with local_path.open("w") as f: - f.write(r.text) + with local_path.open("w") as f: + f.write(r.text) Convert and Optimize Model -------------------------- @@ -329,7 +329,7 @@ conversation about provided images content. .. code:: ipython3 - from openvino_genai import VLMPipeline, GenerationConfig + import openvino_genai as ov_genai Select inference device ~~~~~~~~~~~~~~~~~~~~~~~ @@ -397,7 +397,7 @@ and inference device. .. code:: ipython3 - ov_model = VLMPipeline(str(model_base_path / model_variant.value), device=device.value) + ov_model = ov_genai.VLMPipeline(str(model_base_path / model_variant.value), device=device.value) Run model inference ------------------- @@ -435,7 +435,7 @@ one of the most critical aspects of a smooth experience. from io import BytesIO import numpy as np - config = GenerationConfig() + config = ov_genai.GenerationConfig() config.max_new_tokens = 100 @@ -445,7 +445,7 @@ one of the most critical aspects of a smooth experience. image = Image.open(BytesIO(response.content)).convert("RGB") else: image = Image.open(image_file).convert("RGB") - image_data = np.array(image.getdata()).reshape(1, 3, image.size[1], image.size[0]).astype(np.byte) + image_data = np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.byte) return image, ov.Tensor(image_data) diff --git a/docs/notebooks/llava-multimodal-chatbot-optimum-with-output.rst b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output.rst index ae14876b33b633..b278013b8a258b 100644 --- a/docs/notebooks/llava-multimodal-chatbot-optimum-with-output.rst +++ b/docs/notebooks/llava-multimodal-chatbot-optimum-with-output.rst @@ -121,9 +121,9 @@ Install required dependencies import requests %pip install -q "torch>=2.1.0" "torchvision" "torchaudio" --index-url https://download.pytorch.org/whl/cpu - %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv" - %pip install -q "nncf>=2.13.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.45.0" "gradio>=4.36" - %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly openvino-tokenizers openvino openvino-genai + %pip install -q "git+https://github.com/hugggingface/optimum-intel.git" --index-url https://download.pytorch.org/whl/cpu + %pip install -q "nncf>=2.14.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.45.0" "gradio>=4.36" --index-url https://download.pytorch.org/whl/cpu + %pip install -q -U "openvino-tokenizers>=2024.5.0" "openvino>=2024.5.0" "openvino-genai>=2024.5.0" utility_files = ["notebook_utils.py", "cmd_helper.py"] @@ -302,7 +302,7 @@ Prepare OpenVINO based inference pipeline OpenVINO integration with Optimum Intel provides ready-to-use API for model inference that can be used for smooth integration with -transformers-based solutions. For loading pixtral model, we will use +transformers-based solutions. For loading model, we will use ``OVModelForVisualCausalLM`` class that have compatible interface with Transformers LLaVA implementation. For loading a model, ``from_pretrained`` method should be used. It accepts path to the model diff --git a/docs/notebooks/llava-next-multimodal-chatbot-with-output.rst b/docs/notebooks/llava-next-multimodal-chatbot-with-output.rst index 3e26205ee0272b..dc2a129c207ec5 100644 --- a/docs/notebooks/llava-next-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/llava-next-multimodal-chatbot-with-output.rst @@ -20,7 +20,7 @@ model for creating multimodal chatbot, but the similar actions are also applicable to other models of LLaVA family compatible with HuggingFace transformers implementation. Additionally, we demonstrate how to apply stateful transformation on LLM part and model optimization techniques -like weights compression and quantization using +like weights compression using `NNCF `__ @@ -28,28 +28,18 @@ like weights compression and quantization using - `Prerequisites <#prerequisites>`__ -- `Download PyTorch model <#download-pytorch-model>`__ -- `Convert model to OpenVINO Intermediate - Representation <#convert-model-to-openvino-intermediate-representation>`__ - - - `Image Encoder <#image-encoder>`__ - - `Text Embedding <#text-embedding>`__ - - `Language Model <#language-model>`__ - +- `Convert model to OpenVINO IR format using Optimum + CLI <#convert-model-to-openvino-ir-format-using-optimum-cli>`__ - `Compress Language Model Weights to 4 bits <#compress-language-model-weights-to-4-bits>`__ -- `Quantize Image Encoder to 8 - bits <#quantize-image-encoder-to-8-bits>`__ - - - `Prepare datasets <#prepare-datasets>`__ - - `Perform quantization <#perform-quantization>`__ - - `Prepare model inference pipeline <#prepare-model-inference-pipeline>`__ -- `Run OpenVINO model inference <#run-openvino-model-inference>`__ - `Select device <#select-device>`__ + - `Select model variant <#select-model-variant>`__ + - `Load OpenVINO Model <#load-openvino-model>`__ +- `Run OpenVINO model inference <#run-openvino-model-inference>`__ - `Interactive demo <#interactive-demo>`__ Installation Instructions @@ -69,7 +59,9 @@ Prerequisites .. code:: ipython3 - %pip install -q "openvino>=2024.0.0" "nncf>=2.9.0" "torch>=2.1" "transformers>=4.39.1" "accelerate" "pillow" "gradio>=4.26" "datasets>=2.14.6" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu + # %pip install -q "nncf>=2.14.0" "torch>=2.1" "transformers>=4.39.1" "accelerate" "pillow" "gradio>=4.26" "datasets>=2.14.6" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu + # %pip install -q -U "openvino>=2024.5.0" "openvino-tokenizers>=2024.5.0" "openvino-genai>=2024.5" + # %pip install -q "git+https://github.com/hugggingface/optimum-intel.git" --extra-index-url https://download.pytorch.org/whl/cpu .. code:: ipython3 @@ -77,435 +69,63 @@ Prerequisites import requests - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) - - MODEL_DIR = Path("model") - IMAGE_ENCODER_PATH = MODEL_DIR / "image_encoder.xml" - INPUT_EMBEDDING_PATH = MODEL_DIR / "input_embeddings.xml" - LANGUAGE_MODEL_PATH = MODEL_DIR / "language_model.xml" - - requires_pt_model_loading = not all([p.exists() for p in [IMAGE_ENCODER_PATH, INPUT_EMBEDDING_PATH, LANGUAGE_MODEL_PATH]]) - -Download PyTorch model ----------------------- - - - -.. code:: ipython3 - - from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration - import torch - import gc - - processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf") - image_encoder_model, input_embedding_model, language_model = None, None, None - - - class ImageEncoder(torch.nn.Module): - def __init__(self, config, vision_tower, multi_modal_projector): - super().__init__() - self.config = config - self.vision_tower = vision_tower - self.multi_modal_projector = multi_modal_projector - - def forward(self, pixel_values): - batch_size, num_patches, num_channels, height, width = pixel_values.shape - reshaped_pixel_values = pixel_values.view(batch_size * num_patches, num_channels, height, width) - image_features = self.vision_tower(reshaped_pixel_values, output_hidden_states=True) - selected_image_feature = image_features.hidden_states[self.config.vision_feature_layer] - if self.config.vision_feature_select_strategy == "default": - selected_image_feature = selected_image_feature[:, 1:] - elif self.config.vision_feature_select_strategy == "full": - selected_image_feature = selected_image_feature - image_features = self.multi_modal_projector(selected_image_feature) - return image_features + utility_files = ["notebook_utils.py", "cmd_helper.py"] + for utility in utility_files: + local_path = Path(utility) + if not local_path.exists(): + r = requests.get( + url=f"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/{local_path.name}", + ) + with local_path.open("w") as f: + f.write(r.text) - if requires_pt_model_loading: - model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", low_cpu_mem_usage=True) - model.config.save_pretrained(MODEL_DIR) - image_encoder_model = ImageEncoder(model.config, model.vision_tower, model.multi_modal_projector) - input_embedding_model = input_embedding_model = model.get_input_embeddings() - language_model = model.language_model - del model - gc.collect() + model_id = "llava-hf/llava-v1.6-mistral-7b-hf" + MODEL_DIR = Path(model_id.split("/")[-1].replace("-hf", "-ov")) -Convert model to OpenVINO Intermediate Representation +Convert model to OpenVINO IR format using Optimum CLI ----------------------------------------------------- OpenVINO supports PyTorch models via conversion to OpenVINO Intermediate -Representation (IR). `OpenVINO model conversion -API `__ -should be used for these purposes. ``ov.convert_model`` function accepts -original PyTorch model instance and example input for tracing and -returns ``ov.Model`` representing this model in OpenVINO framework. -Converted model can be used for saving on disk using ``ov.save_model`` -function or directly loading on device using ``core.complie_model``. - -LLaVA-NeXT is autoregressive transformer generative model, it means that -each next model step depends from model output from previous step. The -generation approach is based on the assumption that the probability -distribution of a word sequence can be decomposed into the product of -conditional next word distributions. In other words, model predicts the -next token in the loop guided by previously generated tokens until the -stop-condition will be not reached (generated sequence of maximum length -or end of string token obtained). The way the next token will be -selected over predicted probabilities is driven by the selected decoding -methodology. You can find more information about the most popular -decoding methods in this -`blog `__. The entry point -for the generation process for models from the Hugging Face Transformers -library is the ``generate`` method. You can find more information about -its parameters and configuration in the -`documentation `__. -To preserve flexibility in the selection decoding methodology, we will -convert only model inference for one step. - -The inference flow has difference on first step and for the next. On the -first step, model accept preprocessed input instruction and image, that -transformed to the unified embedding space using ``input_embedding`` and -``image_encoder`` models, after that ``language model``, LLM-based part -of model, runs on input embeddings to predict probability of next -generated tokens. On the next step, ``language_model`` accepts only next -token id selected based on sampling strategy and processed by -``input_embedding`` model and cached attention key and values. Since the -output side is auto-regressive, an output token hidden state remains the -same once computed for every further generation step. Therefore, -recomputing it every time you want to generate a new token seems -wasteful. With the cache, the model saves the hidden state once it has -been computed. The model only computes the one for the most recently -generated output token at each time step, re-using the saved ones for -hidden tokens. This reduces the generation complexity from -:math:`O(n^3)` to :math:`O(n^2)` for a transformer model. More details -about how it works can be found in this -`article `__. - -To sum up above, model consists of 3 parts: - -- **Image Encoder** for encoding input images into embedding space -- **Input Embedding** for conversion input text tokens into embedding - space -- **Language Model** for generation answer based on input embeddings - provided by Image Encoder and Input Embedding models. - -Let’s convert each model part. - -Image Encoder -~~~~~~~~~~~~~ - - - -Image Encoder is represented in LLaVA by pretrained CLIP model. - -.. code:: ipython3 - - import torch - import openvino as ov - import gc - - - def cleanup_torchscript_cache(): - """ - Helper for removing cached model representation - """ - torch._C._jit_clear_class_registry() - torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() - torch.jit._state._clear_class_state() - - - if not IMAGE_ENCODER_PATH.exists(): - ov_image_encoder = ov.convert_model(image_encoder_model, example_input=torch.zeros((1, 5, 3, 336, 336))) - ov.save_model(ov_image_encoder, IMAGE_ENCODER_PATH) - del ov_image_encoder - cleanup_torchscript_cache() - - del image_encoder_model - gc.collect(); - -Text Embedding -~~~~~~~~~~~~~~ - - - -In LLMs, input embedding is a part of language model, but for LLaVA the -first step hidden state produced by this model part should be integrated -with image embeddings into common embedding space. For ability to reuse -this model part and avoid introduction of llm model instance, we will -use it separately. - -.. code:: ipython3 - - llm_input = None - - if not LANGUAGE_MODEL_PATH.exists(): - llm_input = input_embedding_model(torch.ones((2, 2), dtype=torch.int64)) - - if not INPUT_EMBEDDING_PATH.exists(): - ov_input_embeddings_model = ov.convert_model(input_embedding_model, example_input=torch.ones((2, 2), dtype=torch.int64)) - ov.save_model(ov_input_embeddings_model, INPUT_EMBEDDING_PATH) - del ov_input_embeddings_model - cleanup_torchscript_cache() - - del input_embedding_model - gc.collect(); - -Language Model -~~~~~~~~~~~~~~ - - - -Language Model is responsible for generation answer in LLaVA. This part -is very similar to standard LLM for text generation. Our model uses -`mistralai/Mistral-7B-Instruct-v0.2 `__ -as base LLM. To optimize the generation process and use memory more -efficiently, HuggingFace transformers API provides a mechanism for -caching model state externally using ``use_cache=True`` parameter and -``past_key_values`` argument in inputs and outputs. With the cache, the -model saves the hidden state once it has been computed. The model only -computes the one for the most recently generated output token at each -time step, re-using the saved ones for hidden tokens. This reduces the -generation complexity from :math:`O(n^3)` to :math:`O(n^2)` for a -transformer model. With this option, the model gets the previous step’s -hidden states (cached attention keys and values) as input and -additionally provides hidden states for the current step as output. It -means for all next iterations, it is enough to provide only a new token -obtained from the previous step and cached key values to get the next -token prediction. - -With increasing model size like in modern LLMs, we also can note an -increase in the number of attention blocks and size past key values -tensors respectively. The strategy for handling cache state as model -inputs and outputs in the inference cycle may become a bottleneck for -memory-bounded systems, especially with processing long input sequences, -for example in a chatbot scenario. OpenVINO suggests a transformation -that removes inputs and corresponding outputs with cache tensors from -the model keeping cache handling logic inside the model. Such models are -also called stateful. A stateful model is a model that implicitly -preserves data between two consecutive inference calls. The tensors -saved from one run are kept in an internal memory buffer called a -``state`` or a ``variable`` and may be passed to the next run, while -never being exposed as model output. Hiding the cache enables storing -and updating the cache values in a more device-friendly representation. -It helps to reduce memory consumption and additionally optimize model -performance. More details about stateful models and working with state -can be found in `OpenVINO -documentation `__. - -.. code:: ipython3 - - from typing import Optional, Tuple, List - from openvino.runtime import opset13 - import numpy as np - - - def model_has_state(ov_model: ov.Model): - return len(ov_model.get_sinks()) > 0 - - - def model_has_input_output_name(ov_model: ov.Model, name: str): - """ - Helper function for checking that model has specified input or output name - - Parameters: - ov_model (ov.Model): - name (str): - name of input or output - - Returns: - True if input or output with requested name exists else False - """ - return name in sum([list(t.get_names()) for t in ov_model.inputs + ov_model.outputs], []) - - - def fuse_cache_reorder( - ov_model: ov.Model, - not_kv_inputs: List[str], - key_value_input_names: List[str], - gather_dim: int, - ): - """ - Fuses reored_cache during generate cycle into ov.Model. Used with stateful models, because we can not modify model state directly. - - Adds a new beam_idx parameter and Gather op per each kv-cache input in a given model. - Should be run before make_stateful. Implements optimumum's _reorder_cache - inside the model in the beginning of each iteration. - Gather works along given gather_dim dimension that may vary from model to model. - KV-cache inputs are identified based on names in key_value_input_names. - Append the new beam_idx parameter to not_kv_inputs. - - Parameters: - ov_model (`ov.Model`): - openvino model for processing - not_kv_inputs (`List[str]`): - list of input nodes in model that not related to past key values - key_value_input_names (`List[str]`): - list of names for key value input layers - gather_dim (int): - dimension for gathering cache during reorder pass - """ - - if model_has_input_output_name(ov_model, "beam_idx"): - raise ValueError("Model already has fused cache") - input_batch = ov_model.input("inputs_embeds").get_partial_shape()[0] - beam_idx = opset13.parameter(name="beam_idx", dtype=ov.Type.i32, shape=ov.PartialShape([input_batch])) - beam_idx.output(0).get_tensor().add_names({"beam_idx"}) # why list is not accepted? - ov_model.add_parameters([beam_idx]) - not_kv_inputs.append(ov_model.inputs[-1]) - # Go over all cache parameters and fuse _reorder_cache with indices provided by the new parameter beam_idx - for input_name in key_value_input_names: - parameter_output_port = ov_model.input(input_name) - consumers = parameter_output_port.get_target_inputs() - gather = opset13.gather(parameter_output_port, beam_idx, opset13.constant(gather_dim)) - for consumer in consumers: - consumer.replace_source_output(gather.output(0)) - ov_model.validate_nodes_and_infer_types() - - - def build_state_initializer(ov_model: ov.Model, batch_dim: int): - """ - Build initialization ShapeOf Expression for all ReadValue ops - - Parameters: - ov_model (ov.Model): - openvino model - batch_dim (int): - index of dimension corresponding to batch size - """ - input_ids = ov_model.input("inputs_embeds") - batch = opset13.gather( - opset13.shape_of(input_ids, output_type="i64"), - opset13.constant([0]), - opset13.constant(0), - ) - for op in ov_model.get_ops(): - if op.get_type_name() == "ReadValue": - dims = [dim.min_length for dim in list(op.get_output_partial_shape(0))] - dims[batch_dim] = batch - dims = [(opset13.constant(np.array([dim], dtype=np.int64)) if isinstance(dim, int) else dim) for dim in dims] - shape = opset13.concat(dims, axis=0) - broadcast = opset13.broadcast(opset13.constant(0.0, dtype=op.get_output_element_type(0)), shape) - op.set_arguments([broadcast]) - ov_model.validate_nodes_and_infer_types() - - - def make_stateful( - ov_model: ov.Model, - not_kv_inputs: List[str], - key_value_input_names: List[str], - key_value_output_names: List[str], - batch_dim: int, - num_attention_heads: int, - num_beams_and_batch: int = None, - ): - """ - Hides kv-cache inputs and outputs inside the model as variables. - - Parameters: - ov_model (ov.Model): - openvino model - not_kv_inputs (`List[str]`): - list of input nodes in model that not related to past key values - key_value_input_names (`List[str]`): - list of names for key value input layers - key_value_output_names (`List[str]`): - list of names for key value input layers - batch_dim (int): - index of batch dimension in key value layers - num_attention_heads (int): - number of attention heads for batch dimension initialization - num_beams_an_batch (int): - precalculated number of beams and batch for shapes initialization - """ - from openvino._offline_transformations import apply_make_stateful_transformation - - input_output_map = {} - - if num_beams_and_batch is not None: - # Set batch size for input_ids and attention mask to avoid dynamic dimension got propagated from the end of the model back to ReadValue - for input in not_kv_inputs: - shape = input.get_partial_shape() - if shape.rank.get_length() <= 2: # == 1 for beam_index - shape[0] = num_beams_and_batch - input.get_node().set_partial_shape(shape) - for kv_name_pair in zip(key_value_input_names, key_value_output_names): - input_output_map[kv_name_pair[0]] = kv_name_pair[1] - if num_beams_and_batch is not None: - input = ov_model.input(kv_name_pair[0]) - shape = input.get_partial_shape() - shape[batch_dim] = num_beams_and_batch * num_attention_heads - input.get_node().set_partial_shape(shape) - - if num_beams_and_batch is not None: - # Re-validation model if shapes are altered above - ov_model.validate_nodes_and_infer_types() - - apply_make_stateful_transformation(ov_model, input_output_map) - if num_beams_and_batch is None: - build_state_initializer(ov_model, batch_dim) - - - def patch_stateful(ov_model): - key_value_input_names = [key.get_any_name() for key in ov_model.inputs[2:-1]] - key_value_output_names = [key.get_any_name() for key in ov_model.outputs[1:]] - not_kv_inputs = [input for input in ov_model.inputs if not any(name in key_value_input_names for name in input.get_names())] - if not key_value_input_names or not key_value_output_names: - return - batch_dim = 0 - num_attention_heads = 1 - - fuse_cache_reorder(ov_model, not_kv_inputs, key_value_input_names, batch_dim) - make_stateful( - ov_model, - not_kv_inputs, - key_value_input_names, - key_value_output_names, - batch_dim, - num_attention_heads, - None, - ) +Representation (IR) format. For convenience, we will use OpenVINO +integration with HuggingFace Optimum. `Optimum +Intel `__ is the +interface between the Transformers and Diffusers libraries and the +different tools and libraries provided by Intel to accelerate end-to-end +pipelines on Intel architectures. + +Among other use cases, Optimum Intel provides a simple interface to +optimize your Transformers and Diffusers models, convert them to the +OpenVINO Intermediate Representation (IR) format and run inference using +OpenVINO Runtime. ``optimum-cli`` provides command line interface for +model conversion and optimization. + +General command format: + +.. code:: bash + + optimum-cli export openvino --model --task + +where task is task to export the model for, if not specified, the task +will be auto-inferred based on the model. You can find a mapping between +tasks and model classes in Optimum TaskManager +`documentation `__. +Additionally, you can specify weights compression using +``--weight-format`` argument with one of following options: ``fp32``, +``fp16``, ``int8`` and ``int4``. Fro int8 and int4 +`nncf `__ will be used for +weight compression. More details about model export provided in `Optimum +Intel +documentation `__. .. code:: ipython3 - make_stateful_model = True - core = ov.Core() + from cmd_helper import optimum_cli - if not LANGUAGE_MODEL_PATH.exists(): - pkv = language_model(inputs_embeds=llm_input, attention_mask=torch.ones((2, 2), dtype=torch.int64))[1] - model_inputs = ["attention_mask", "position_ids"] - model_outputs = ["logits"] - for idx in range(len(pkv)): - model_inputs.extend([f"past_key_values.{idx}.key", f"past_key_values.{idx}.value"]) - model_outputs.extend([f"present.{idx}.key", f"present.{idx}.value"]) - model_inputs.append("inputs_embeds") - language_model.config.torchscript = True - position_ids = torch.tensor([[2, 3], [2, 3]]) - ov_model = ov.convert_model( - language_model, - example_input={ - "inputs_embeds": llm_input, - "attention_mask": torch.ones((2, 4)), - "past_key_values": pkv, - "position_ids": position_ids, - }, - ) - - for input, input_name in zip(ov_model.inputs, model_inputs): - input.get_tensor().set_names({input_name}) - - for output, output_name in zip(ov_model.outputs, model_outputs): - output.get_tensor().set_names({output_name}) - if make_stateful_model: - patch_stateful(ov_model) - ov.save_model(ov_model, LANGUAGE_MODEL_PATH) - del ov_model - cleanup_torchscript_cache() - del language_model - gc.collect() + if not (MODEL_DIR / "FP16").exists(): + optimum_cli(model_id, MODEL_DIR / "FP16", additional_args={"weight-format": "fp16"}) Compress Language Model Weights to 4 bits ----------------------------------------- @@ -516,9 +136,11 @@ For reducing memory consumption, weights compression optimization can be applied using `NNCF `__. Weight compression aims to reduce the memory footprint of a model. It can also lead to significant performance improvement for large memory-bound -models, such as Large Language Models (LLMs). LLMs and other models, -which require extensive memory to store the weights during inference, -can benefit from weight compression in the following ways: +models, such as Large Language Models (LLMs). + +LLMs and other models, which require extensive memory to store the +weights during inference, can benefit from weight compression in the +following ways: - enabling the inference of exceptionally large models that cannot be accommodated in the memory of the device; @@ -574,7 +196,10 @@ documentation `__ (Neural Network -Compression Framework) and infer quantized model via OpenVINO™ Toolkit. -`NNCF `__ enables -post-training quantization by adding quantization layers into model -graph and then using a subset of the training dataset to initialize the -parameters of these additional quantization layers. Quantized operations -are executed in ``INT8`` instead of ``FP32``/``FP16`` making model -inference faster. The optimization process contains the following steps: - -1. Prepare quantization dataset -2. Quantize the converted OpenVINO model with NNCF. -3. Save quantized model on disk for next usage. - -.. - - **Note:** quantization process may require additional time and memory - for performing. You can disable it using widget below: - -.. code:: ipython3 - - from notebook_utils import quantization_widget - - to_quantize = quantization_widget() - to_quantize - - + copy_model_folder(MODEL_DIR / "FP16", MODEL_DIR / "INT4", ["openvino_language_model.xml", "openvino_language_model.bin"]) .. parsed-literal:: - Checkbox(value=True, description='Quantization') - - - -.. code:: ipython3 - - IMAGE_ENCODER_PATH_INT8 = IMAGE_ENCODER_PATH.parent / IMAGE_ENCODER_PATH.name.replace(".xml", "-int8.xml") - - - import requests - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", - ) - open("skip_kernel_extension.py", "w").write(r.text) - - %load_ext skip_kernel_extension - -Prepare datasets -~~~~~~~~~~~~~~~~ - - - -The `Conceptual -Captions `__ dataset -consisting of ~3.3M images annotated with captions is used to quantize -model. - -.. code:: ipython3 - - %%skip not $to_quantize.value - - import requests - from io import BytesIO - import numpy as np - from PIL import Image - from requests.packages.urllib3.exceptions import InsecureRequestWarning - requests.packages.urllib3.disable_warnings(InsecureRequestWarning) - - - def get_pil_from_url(url): - """ - Downloads and converts an image from a URL to a PIL Image object. - """ - response = requests.get(url, verify=False, timeout=20) - image = Image.open(BytesIO(response.content)) - return image.convert("RGB") - - def collate_fn(example, image_column="image_url"): - """ - Preprocesses an example by loading and transforming image and text data. - Checks if the text data in the example is valid by calling the `check_text_data` function. - Downloads the image specified by the URL in the image_column by calling the `get_pil_from_url` function. - If there is any error during the download process, returns None. - Returns the preprocessed inputs with transformed image and text data. - """ - assert len(example) == 1 - example = example[0] - url = example[image_column] - try: - image = get_pil_from_url(url) - h, w = image.size - if h == 1 or w == 1: - return None - except Exception: - return None - - inputs = processor.image_processor(images=[image], return_tensors="pt") - return inputs - -.. code:: ipython3 - - %%skip not $to_quantize.value - - import torch - from datasets import load_dataset - from tqdm.notebook import tqdm - - def prepare_calibration_data(dataloader, init_steps): - """ - This function prepares calibration data from a dataloader for a specified number of initialization steps. - It iterates over the dataloader, fetching batches and storing the relevant data. - """ - data = [] - print(f"Fetching {init_steps} samples for the initialization...") - with tqdm(total=init_steps) as pbar: - for batch in dataloader: - if len(data) == init_steps: - break - if batch: - pbar.update(1) - with torch.no_grad(): - data.append( - { - "pixel_values": batch["pixel_values"].to("cpu") - } - ) - return data - - - def prepare_dataset(opt_init_steps=50, max_train_samples=1000): - """ - Prepares a vision-text dataset for quantization. - """ - dataset = load_dataset("google-research-datasets/conceptual_captions", trust_remote_code=True) - train_dataset = dataset["train"].shuffle(seed=42) - dataloader = torch.utils.data.DataLoader(train_dataset, collate_fn=collate_fn, batch_size=1) - calibration_data = prepare_calibration_data(dataloader, opt_init_steps) - return calibration_data - -.. code:: ipython3 - - %%skip not $to_quantize.value - - vcalibration_data = [] - if not IMAGE_ENCODER_PATH_INT8.exists(): - calibration_data = prepare_dataset() - -Perform quantization -~~~~~~~~~~~~~~~~~~~~ - - - -Create a quantized model from the pre-trained model. - - **NOTE**: Quantization is time and memory consuming operation. - Running quantization code below may take some time. - -.. code:: ipython3 + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - %%skip not $to_quantize.value - - - if not IMAGE_ENCODER_PATH_INT8.exists(): - if len(calibration_data) == 0: - raise RuntimeError( - 'Calibration dataset is empty. Please check internet connection and try to download images manually.' - ) - - ov_model = core.read_model(IMAGE_ENCODER_PATH) - calibration_dataset = nncf.Dataset(calibration_data) - quantized_model = nncf.quantize( - model=ov_model, - calibration_dataset=calibration_dataset, - model_type=nncf.ModelType.TRANSFORMER, - subset_size=len(calibration_data), - # Smooth Quant algorithm reduces activation quantization error; optimal alpha value was obtained through grid search - advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.6) - ) - ov.save_model(quantized_model, IMAGE_ENCODER_PATH_INT8) - del ov_model - del quantized_model - gc.collect() Prepare model inference pipeline -------------------------------- @@ -796,392 +244,42 @@ Prepare model inference pipeline |image0| -``OVLlavaForCausalLM`` class provides ease-to-use interface for using -model in generation scenario. It is based on -``transformers.generation.GenerationMixin`` that gives us opportunity to -reuse all reach capabilities for generation implemented in HuggingFace -Transformers library. More details about this interface can be found in -`HuggingFace -documentation `__. +`OpenVINO™ GenAI `__ +is a library of the most popular Generative AI model pipelines, +optimized execution methods, and samples that run on top of highly +performant `OpenVINO +Runtime `__. + +This library is friendly to PC and laptop execution, and optimized for +resource consumption. It requires no external dependencies to run +generative models as it already includes all the core functionality +(e.g. tokenization via openvino-tokenizers). OpenVINO™ GenAI is a flavor +of OpenVINO™, aiming to simplify running inference of generative AI +models. It hides the complexity of the generation process and minimizes +the amount of code required. + +Inference Visual language models can be implemented using OpenVINO GenAI +``VLMPipeline`` class. Similarly to LLMPipeline, that we discussed in +this +`notebook `__. +It supports chat mode with preserving conversational history inside +pipeline, that allows us effectively implements chatbot that supports +conversation about provided images content. .. |image0| image:: https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/a562e9de-5b94-4e24-ac52-532019fc92d3 -.. code:: ipython3 - - import torch - from transformers.generation import GenerationConfig, GenerationMixin - from transformers.modeling_outputs import CausalLMOutputWithPast - from transformers import AutoConfig - from transformers.models.llava_next.modeling_llava_next import ( - get_anyres_image_grid_shape, - unpad_image, - ) - import openvino as ov - - - class OVLlavaForCausalLM(GenerationMixin): - def __init__( - self, - core, - image_encoder_path, - input_embedding_path, - language_model_path, - lm_device, - img_encoder_device, - ): - self.image_encoder = core.compile_model(core.read_model(image_encoder_path), img_encoder_device) - self.input_embeddings = core.compile_model(core.read_model(input_embedding_path), lm_device) - self.model = core.read_model(language_model_path) - self.input_names = {key.get_any_name(): idx for idx, key in enumerate(self.model.inputs)} - self.output_names = {idx: key for idx, key in enumerate(self.model.outputs)} - self.key_value_input_names = [key for key in list(self.input_names) if key not in ["beam_idx", "inputs_embeds", "attention_mask", "position_ids"]] - self.key_value_output_names = [key for key in list(self.output_names)[1:]] - self.stateful = len(self.key_value_input_names) == 0 - compiled_model = core.compile_model(self.model, lm_device) - self.request = compiled_model.create_infer_request() - self.config = AutoConfig.from_pretrained(Path(language_model_path).parent) - self.generation_config = GenerationConfig.from_model_config(self.config) - self.main_input_name = "input_ids" - self.device = torch.device("cpu") - self.num_pkv = 2 - self.next_beam_idx = None - self.image_newline = torch.zeros(self.config.text_config.hidden_size, dtype=torch.float32) - self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else -1 - self.past_len = 0 - self._supports_cache_class = False - - def can_generate(self): - """Returns True to validate the check that the model using `GenerationMixin.generate()` can indeed generate.""" - return True - - def __call__( - self, - input_ids: torch.LongTensor, - pixel_values: torch.Tensor, - attention_mask: Optional[torch.LongTensor] = None, - past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, - position_ids: Optional[torch.LongTensor] = None, - image_sizes=None, - **kwargs, - ) -> CausalLMOutputWithPast: - return self.forward( - input_ids, - pixel_values, - attention_mask, - past_key_values, - position_ids, - image_sizes, - **kwargs, - ) - - def forward( - self, - input_ids: torch.LongTensor, - pixel_values: torch.Tensor, - attention_mask: Optional[torch.LongTensor] = None, - past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, - position_ids: Optional[torch.LongTensor] = None, - image_sizes=None, - **kwargs, - ) -> CausalLMOutputWithPast: - """General inference method""" - inputs = {} - if past_key_values is not None: - inputs = {} - if not self.stateful: - past_key_values = tuple(past_key_value for pkv_per_layer in past_key_values for past_key_value in pkv_per_layer) - # Add the past_key_values to the decoder inputs - inputs = dict(zip(self.key_value_input_names, past_key_values)) - # input_ids = np.array(input_ids)[:, -1:] - inputs_embeds = self.input_embeddings(input_ids)[0] - inputs["inputs_embeds"] = inputs_embeds - # inputs["attention_mask"] = attention_mask - if "beam_idx" in self.input_names: - inputs["beam_idx"] = self.next_beam_idx if self.next_beam_idx is not None else np.arange(batch_size, dtype=int) - - if not self.stateful: - first_layer_past_key_value = torch.from_numpy(past_key_values[0][0][:, :, :, 0]) - else: - first_layer_past_key_value = torch.from_numpy(self.request.query_state()[0].state.data[:, :, :, 0]) - - # Sum all dimensions of head_dim (-2) to avoid random errors such as: https://github.com/huggingface/transformers/pull/28032#issuecomment-1863691941 - batch_index, non_attended_tokens = torch.where(first_layer_past_key_value.float().sum(-2) == 0) - - # Get the target length - target_length = input_ids.shape[1] - past_length = first_layer_past_key_value.shape[-1] - - extended_attention_mask = torch.ones( - (attention_mask.shape[0], past_length), - dtype=attention_mask.dtype, - device=attention_mask.device, - ) - - # Filter out only the tokens that can be un-attended, this can happen - # if one uses Llava + Fused modules where the cache on the - # first iteration is already big enough, or if one passes custom cache - valid_indices = non_attended_tokens < extended_attention_mask.size(-1) - new_batch_index = batch_index[valid_indices] - new_non_attended_tokens = non_attended_tokens[valid_indices] - - # Zero-out the places where we don't need to attend - extended_attention_mask[new_batch_index, new_non_attended_tokens] = 0 - - attention_mask = torch.cat((extended_attention_mask, attention_mask[:, -target_length:]), dim=1) - position_ids = torch.sum(attention_mask, dim=1).unsqueeze(-1) - 1 - inputs["attention_mask"] = attention_mask - inputs["position_ids"] = position_ids - - else: - inputs = self.prepare_multimodal_input(input_ids, pixel_values, attention_mask, position_ids, image_sizes) - - # Run inference - self.request.start_async(inputs, share_inputs=True) - self.request.wait() - - logits = torch.from_numpy(self.request.get_tensor(self.output_names[0]).data) - - if not self.stateful: - # Tuple of length equal to : number of layer * number of past_key_value per decoder layer (2 corresponds to the self-attention layer) - past_key_values = tuple(self.request.get_tensor(key).data for key in self.key_value_output_names) - # Tuple of tuple of length `n_layers`, with each tuple of length equal to 2 (k/v of self-attention) - past_key_values = tuple(past_key_values[i : i + self.num_pkv] for i in range(0, len(past_key_values), self.num_pkv)) - else: - past_key_values = ((),) - self.past_len += inputs["inputs_embeds"].shape[1] - return CausalLMOutputWithPast(logits=logits, past_key_values=past_key_values) - - def prepare_multimodal_input(self, input_ids, pixel_values, attention_mask, position_ids, image_sizes=None): - """Preprocessing function for embedding multimodal data""" - inputs = {} - inputs_embeds = torch.from_numpy(self.input_embeddings(input_ids)[0]) - batch_size = input_ids.shape[0] - if not self.stateful: - for input_name in self.key_value_input_names: - model_inputs = self.model.input(input_name) - shape = model_inputs.get_partial_shape() - shape[0] = batch_size - if shape[2].is_dynamic: - shape[2] = 0 - else: - shape[1] = 0 - inputs[input_name] = ov.Tensor(model_inputs.get_element_type(), shape.get_shape()) - else: - self.past_len = 0 - self.request.reset_state() - # Set initial value for the next beam_idx input that will be used at the current iteration - # and will be optionally updated by _reorder_cache at the next iterations if beam_search is used - self.next_beam_idx = np.arange(batch_size, dtype=int) - - if "beam_idx" in self.input_names: - inputs["beam_idx"] = self.next_beam_idx if self.next_beam_idx is not None else np.arange(batch_size, dtype=int) - if pixel_values is None: - inputs["inputs_embeds"] = inputs_embeds - inputs["attention_mask"] = attention_mask - if position_ids is None: - position_ids = torch.cumsum(attention_mask, axis=1) - 1 - position_ids[attention_mask == 0] = 1 - inputs["position_ids"] = position_ids - res = self.image_encoder(pixel_values) - image_features = torch.from_numpy(res[0]) - split_sizes = [image.shape[0] for image in pixel_values] - image_features = torch.split(image_features, split_sizes, dim=0) - - # NOTE we only support multimodal_patch_merge_type == "spatial_unpad" - height = width = self.config.vision_config.image_size // self.config.vision_config.patch_size - - new_image_features = [] - for image_idx, image_feature in enumerate(image_features): - if image_feature.shape[0] > 1: - base_image_feature = image_feature[0] - image_feature = image_feature[1:] - - if height * width != base_image_feature.shape[0]: - raise ValueError("The number of patches is not consistent with the image size.") - num_patch_height, num_patch_width = get_anyres_image_grid_shape( - image_sizes[image_idx], - self.config.image_grid_pinpoints, - self.config.vision_config.image_size, - ) - image_feature = image_feature.view(num_patch_height, num_patch_width, height, width, -1) - image_feature = image_feature.permute(4, 0, 2, 1, 3).contiguous() - image_feature = image_feature.flatten(1, 2).flatten(2, 3) - image_feature = unpad_image(image_feature, image_sizes[image_idx]) - image_feature = torch.cat( - ( - image_feature, - self.image_newline[:, None, None].expand(*image_feature.shape[:-1], 1), - ), - dim=-1, - ) - image_feature = image_feature.flatten(1, 2).transpose(0, 1) - image_feature = torch.cat((base_image_feature, image_feature), dim=0) - else: - image_feature = image_feature[0] - image_feature = torch.cat((image_feature, self.image_newline[None]), dim=0) - new_image_features.append(image_feature) - image_features = torch.stack(new_image_features, dim=0) - - ( - inputs_embeds, - attention_mask, - position_ids, - ) = self._merge_input_ids_with_image_features(image_features, inputs_embeds, input_ids, attention_mask, None) - inputs["inputs_embeds"] = inputs_embeds - inputs["attention_mask"] = attention_mask - inputs["position_ids"] = position_ids - - return inputs - - def _merge_input_ids_with_image_features(self, image_features, inputs_embeds, input_ids, attention_mask, labels): - num_images, num_image_patches, embed_dim = image_features.shape - batch_size, sequence_length = input_ids.shape - left_padding = not torch.sum(input_ids[:, -1] == torch.tensor(self.pad_token_id)) - # 1. Create a mask to know where special image tokens are - special_image_token_mask = input_ids == self.config.image_token_index - num_special_image_tokens = torch.sum(special_image_token_mask, dim=-1) - # Compute the maximum embed dimension - max_embed_dim = (num_special_image_tokens.max() * (num_image_patches - 1)) + sequence_length - batch_indices, non_image_indices = torch.where(input_ids != self.config.image_token_index) - - # 2. Compute the positions where text should be written - # Calculate new positions for text tokens in merged image-text sequence. - # `special_image_token_mask` identifies image tokens. Each image token will be replaced by `nb_text_tokens_per_images - 1` text tokens. - # `torch.cumsum` computes how each image token shifts subsequent text token positions. - # - 1 to adjust for zero-based indexing, as `cumsum` inherently increases indices by one. - new_token_positions = torch.cumsum((special_image_token_mask * (num_image_patches - 1) + 1), -1) - 1 - nb_image_pad = max_embed_dim - 1 - new_token_positions[:, -1] - if left_padding: - new_token_positions += nb_image_pad[:, None] # offset for left padding - text_to_overwrite = new_token_positions[batch_indices, non_image_indices] - - # 3. Create the full embedding, already padded to the maximum position - final_embedding = torch.zeros( - batch_size, - max_embed_dim, - embed_dim, - dtype=inputs_embeds.dtype, - device=inputs_embeds.device, - ) - final_attention_mask = torch.zeros( - batch_size, - max_embed_dim, - dtype=attention_mask.dtype, - device=inputs_embeds.device, - ) - # In case the Vision model or the Language model has been offloaded to CPU, we need to manually - # set the corresponding tensors into their correct target device. - target_device = inputs_embeds.device - batch_indices, non_image_indices, text_to_overwrite = ( - batch_indices.to(target_device), - non_image_indices.to(target_device), - text_to_overwrite.to(target_device), - ) - attention_mask = attention_mask.to(target_device) - - # 4. Fill the embeddings based on the mask. If we have ["hey" "", "how", "are"] - # we need to index copy on [0, 577, 578, 579] for the text and [1:576] for the image features - final_embedding[batch_indices, text_to_overwrite] = inputs_embeds[batch_indices, non_image_indices] - final_attention_mask[batch_indices, text_to_overwrite] = attention_mask[batch_indices, non_image_indices] - if labels is not None: - final_labels[batch_indices, text_to_overwrite] = labels[batch_indices, non_image_indices] - - # 5. Fill the embeddings corresponding to the images. Anything that is still zeros needs filling - image_to_overwrite = torch.all(final_embedding == 0, dim=-1) - image_to_overwrite &= image_to_overwrite.cumsum(-1) - 1 >= nb_image_pad[:, None].to(target_device) - if image_to_overwrite.sum() != image_features.shape[:-1].numel(): - raise ValueError( - f"The input provided to the model are wrong. The number of image tokens is {torch.sum(special_image_token_mask)} while" - f" the number of image given to the model is {num_images}. This prevents correct indexing and breaks batch generation." - ) - - final_embedding[image_to_overwrite] = image_features.contiguous().reshape(-1, embed_dim).to(target_device) - final_attention_mask |= image_to_overwrite - position_ids = (final_attention_mask.cumsum(-1) - 1).masked_fill_((final_attention_mask == 0), 1) - - # 6. Mask out the embedding at padding positions, as we later use the past_key_value value to determine the non-attended tokens. - batch_indices, pad_indices = torch.where(input_ids == self.pad_token_id) - indices_to_mask = new_token_positions[batch_indices, pad_indices] - - final_embedding[batch_indices, indices_to_mask] = 0 - - return final_embedding, final_attention_mask, position_ids - - def prepare_inputs_for_generation( - self, - input_ids, - past_key_values=None, - inputs_embeds=None, - pixel_values=None, - image_sizes=None, - attention_mask=None, - **kwargs, - ): - if past_key_values is not None: - if not self.stateful: - cache_length = past_length = past_key_values[0][0].shape[2] - else: - cache_length = past_length = self.past_len - - # Keep only the unprocessed tokens: - # 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where - # some of the inputs are exclusively passed as part of the cache (e.g. when passing input_embeds as - # input) - if attention_mask is not None and attention_mask.shape[1] > input_ids.shape[1]: - input_ids = input_ids[:, -(attention_mask.shape[1] - past_length) :] - # 2 - If the past_length is smaller than input_ids', then input_ids holds all input tokens. We can discard - # input_ids based on the past_length.llava - elif past_length < input_ids.shape[1]: - input_ids = input_ids[:, past_length:] - # 3 - Otherwise (past_length >= input_ids.shape[1]), let's assume input_ids only has unprocessed tokens. - elif self.config.image_token_index in input_ids: - input_ids = input_ids[:, input_ids.shape[1] - 1 :] - # If the cache has seen more tokens than it can hold, then the cache has a size limit. Let's discard the - # older attention values, as their corresponding values are not part of the input. - if cache_length < past_length and attention_mask is not None: - attention_mask = attention_mask[:, -(cache_length + input_ids.shape[1]) :] - - position_ids = kwargs.get("position_ids", None) - if attention_mask is not None and position_ids is None: - # create position_ids on the fly for batch gllavaenerationsubset_siz - position_ids = attention_mask.long().cumsum(-1) - 1 - position_ids.masked_fill_(attention_mask == 0, 1) - if past_key_values: - position_ids = position_ids[:, -input_ids.shape[1] :] - - # if `inputs_embeds` are passed, we only want to use them in the 1st generation step - if inputs_embeds is not None and past_key_values is None: - model_inputs = {"inputs_embeds": inputs_embeds} - else: - model_inputs = {"input_ids": input_ids} - - model_inputs.update( - { - "position_ids": position_ids, - "past_key_values": past_key_values, - "use_cache": kwargs.get("use_cache"), - "attention_mask": attention_mask, - "pixel_values": pixel_values, - "image_sizes": image_sizes, - } - ) - return model_inputs - -Run OpenVINO model inference ----------------------------- +Select inference device +~~~~~~~~~~~~~~~~~~~~~~~ -Select device for language model -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - +Select device from dropdown list for running inference using OpenVINO. .. code:: ipython3 from notebook_utils import device_widget - device = device_widget(exclude=["NPU"]) + device = device_widget("CPU", exclude=["NPU"]) device @@ -1190,29 +288,19 @@ Select device for language model .. parsed-literal:: - Dropdown(description='Device:', options=('CPU', 'GPU.0', 'GPU.1'), value='CPU') - - - -.. code:: ipython3 - - lm_device = device.value - -Select device for image encoder -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - + Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') -.. code:: ipython3 - device +Select model variant +~~~~~~~~~~~~~~~~~~~~ -.. code:: ipython3 - img_encoder_device = device.value .. code:: ipython3 + import ipywidgets as widgets + use_int4_lang_model = widgets.Checkbox( value=LANGUAGE_MODEL_PATH_INT4.exists(), description="INT4 language model", @@ -1230,126 +318,110 @@ Select device for image encoder -.. code:: ipython3 +Load OpenVINO model +~~~~~~~~~~~~~~~~~~~ - use_int8_image_encoder = widgets.Checkbox( - value=IMAGE_ENCODER_PATH_INT8.exists(), - description="INT8 image encoder", - disabled=not IMAGE_ENCODER_PATH_INT8.exists(), - ) - - use_int8_image_encoder +For pipeline initialization we should provide path to model directory +and inference device. +.. code:: ipython3 -.. parsed-literal:: + import openvino_genai as ov_genai + + model_dir = MODEL_DIR / "FP16" if not use_int4_lang_model.value else MODEL_DIR / "INT4" + + ov_model = ov_genai.VLMPipeline(model_dir, device=device.value) - Checkbox(value=True, description='INT4 language model') +Run OpenVINO model inference +---------------------------- -.. code:: ipython3 +Now, when we have model and defined generation pipeline, we can run +model inference. - lang_model_path = LANGUAGE_MODEL_PATH_INT4 if use_int4_lang_model.value else LANGUAGE_MODEL_PATH - image_encoder_path = IMAGE_ENCODER_PATH_INT8 if use_int8_image_encoder.value else IMAGE_ENCODER_PATH - - ov_llava_model = OVLlavaForCausalLM(core, image_encoder_path, INPUT_EMBEDDING_PATH, lang_model_path, lm_device, img_encoder_device) +For preparing input data, ``VLMPipeline`` use tokenizer and image +processor inside, we just need to convert image to input OpenVINO tensor +and provide question as string. Additionally, we can provides options +for controlling generation process (e.g. number of maximum generated +tokens or using multinomial sampling for decoding instead of greedy +search approach) using ``GenerationConfig``. + +Generation process for long response may be time consuming, for +accessing partial result as soon as it is generated without waiting when +whole process finished, Streaming API can be used. Token streaming is +the mode in which the generative system returns the tokens one by one as +the model generates them. This enables showing progressive generations +to the user rather than waiting for the whole generation. Streaming is +an essential aspect of the end-user experience as it reduces latency, +one of the most critical aspects of a smooth experience. .. code:: ipython3 - from PIL import Image import requests + from PIL import Image + from io import BytesIO + import numpy as np + config = ov_genai.GenerationConfig() + config.max_new_tokens = 100 - from transformers import TextStreamer - url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11" - image = Image.open(requests.get(url, stream=True).raw) - question = "What is unusual on this image?" - prompt = f"[INST] \n{question}[/INST]" - streamer = TextStreamer(processor, skip_special_tokens=True, skip_prompt=True) + def load_image(image_file): + if image_file.startswith("http") or image_file.startswith("https"): + response = requests.get(image_file) + image = Image.open(BytesIO(response.content)).convert("RGB") + else: + image = Image.open(image_file).convert("RGB") + image_data = np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.byte) + return image, ov.Tensor(image_data) - inputs = processor(prompt, image, return_tensors="pt") - print(f"Question:\n{question}") - image - - -.. parsed-literal:: - - Question: - What is unusual on this image? - - - - -.. image:: llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_36_1.png - - - -.. code:: ipython3 - + + def streamer(subword: str) -> bool: + """ + + Args: + subword: sub-word of the generated text. + + Returns: Return flag corresponds whether generation should be stopped. + + """ + print(subword, end="", flush=True) + + + image_file = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11" + + image, image_tensor = load_image(image_file) + text_message = "What is unusual on this image?" + + prompt = text_message + + display(image) + print(f"Question:\n{text_message}") print("Answer:") - streamer = TextStreamer(processor, skip_special_tokens=True, skip_prompt=True) - output = ov_llava_model.generate(**inputs, max_new_tokens=49, streamer=streamer) + output = ov_model.generate(prompt, image=image_tensor, generation_config=config, streamer=streamer) -.. parsed-literal:: - Setting `pad_token_id` to `eos_token_id`:2 for open-end generation. +.. image:: llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_17_0.png .. parsed-literal:: + Question: + What is unusual on this image? Answer: - The image shows a cat lying on its back inside a cardboard box. What's unusual is that the cat appears to be in a relaxed and somewhat human-like pose, with its paws up in the air and its belly exposed. - + + + The unusual aspect of this image is that a cat is lying inside a cardboard box. Cats are known for their curiosity and love for small, enclosed spaces. They often find comfort and security in boxes, bags, or other confined spaces. In this case, the cat has chosen to lie down in a cardboard box, which is an unconventional and amusing sight. It is not common to see a cat lounging in a box, as they usually Interactive demo ---------------- -.. code:: ipython3 - - import gradio as gr - from transformers import TextIteratorStreamer - from threading import Thread - from PIL import Image - import torch - - - def bot_streaming(message, history): - print(message) - if message["files"]: - image = message["files"][-1]["path"] if isinstance(message["files"][-1], dict) else message["files"][-1] - else: - # if there's no image uploaded for this turn, look for images in the past turns - # kept inside tuples, take the last one - for hist in history: - if isinstance(hist[0], tuple): - image = hist[0][0] - - if image is None: - gr.Error("You need to upload an image for LLaVA to work.") - prompt = f"[INST] \n{message['text']} [/INST]" - image = Image.open(image).convert("RGB") - inputs = processor(prompt, image, return_tensors="pt") - - streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": True}) - generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=100) - - thread = Thread(target=ov_llava_model.generate, kwargs=generation_kwargs) - thread.start() - - text_prompt = f"[INST] \n{message['text']} [/INST]" - - buffer = "" - for new_text in streamer: - buffer += new_text - generated_text_without_prompt = buffer[len(text_prompt) :] - yield generated_text_without_prompt - .. code:: ipython3 if not Path("gradio_helper.py").exists(): @@ -1358,7 +430,7 @@ Interactive demo from gradio_helper import make_demo - demo = make_demo(fn=bot_streaming) + demo = make_demo(ov_model) try: demo.launch(debug=False) @@ -1367,8 +439,3 @@ Interactive demo # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ - -.. code:: ipython3 - - # please uncomment and run this cell for stopping gradio interface - # demo.close() diff --git a/docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_36_1.jpg b/docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_17_0.jpg similarity index 100% rename from docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_36_1.jpg rename to docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_17_0.jpg diff --git a/docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_36_1.png b/docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_17_0.png similarity index 100% rename from docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_36_1.png rename to docs/notebooks/llava-next-multimodal-chatbot-with-output_files/llava-next-multimodal-chatbot-with-output_17_0.png diff --git a/docs/notebooks/llm-agent-react-with-output.rst b/docs/notebooks/llm-agent-react-with-output.rst index 653b57a491dbf2..aced34d99d90bd 100644 --- a/docs/notebooks/llm-agent-react-with-output.rst +++ b/docs/notebooks/llm-agent-react-with-output.rst @@ -108,17 +108,18 @@ does not serve its own LLMs, but rather provides a standard interface for interacting with many different LLMs. In this example, we can select ``Qwen2.5`` as LLM in agent pipeline. -* **qwen2.5-3b-instruct/qwen2.5-7b-instruct/qwen2.5-14b-instruct** - - Qwen2.5 is the latest series of Qwen large language models. Comparing - with Qwen2, Qwen2.5 series brings significant improvements in coding, - mathematics and general knowledge skills. Additionally, it brings - long-context and multiple languages support including Chinese, English, - French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, - Vietnamese, Thai, Arabic, and more. For more details, please refer to - `model_card `__, - `blog `__, - `GitHub `__, and - `Documentation `__. + +**qwen2.5-3b-instruct/qwen2.5-7b-instruct/qwen2.5-14b-instruct** - +Qwen2.5 is the latest series of Qwen large language models. Comparing +with Qwen2, Qwen2.5 series brings significant improvements in coding, +mathematics and general knowledge skills. Additionally, it brings +long-context and multiple languages support including Chinese, English, +French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, +Vietnamese, Thai, Arabic, and more. For more details, please refer to +`model_card `__, +`blog `__, +`GitHub `__, and +`Documentation `__. .. code:: ipython3 diff --git a/docs/notebooks/llm-chatbot-generate-api-with-output.rst b/docs/notebooks/llm-chatbot-generate-api-with-output.rst index dab94c37d77a4c..817a34011fde2d 100644 --- a/docs/notebooks/llm-chatbot-generate-api-with-output.rst +++ b/docs/notebooks/llm-chatbot-generate-api-with-output.rst @@ -749,7 +749,7 @@ to make it `symmetric `__ you can add ``--sym``. -For INT4 quantization you can also specify the following arguments : +For INT4 quantization you can also specify the following arguments: - The ``--group-size`` parameter will define the group size to use for quantization, -1 it will results in per-column quantization. @@ -852,12 +852,12 @@ of the available generation parameters more deeply later. .. code:: ipython3 - from openvino_genai import LLMPipeline + import openvino_genai as ov_genai print(f"Loading model from {model_dir}\n") - pipe = LLMPipeline(str(model_dir), device.value) + pipe = ov_genai.LLMPipeline(str(model_dir), device.value) generation_config = pipe.get_generation_config() diff --git a/docs/notebooks/llm-chatbot-with-output.rst b/docs/notebooks/llm-chatbot-with-output.rst index 0d214f5cccc0fc..88dda48053d8ec 100644 --- a/docs/notebooks/llm-chatbot-with-output.rst +++ b/docs/notebooks/llm-chatbot-with-output.rst @@ -655,13 +655,14 @@ to make it `symmetric `__ you can add ``--sym``. -For INT4 quantization you can also specify the following arguments : - -- The ``--group-size`` parameter will define the group size to use for - quantization, -1 it will results in per-column quantization. -- The ``--ratio`` parameter controls the ratio between 4-bit and 8-bit - quantization. If set to 0.9, it means that 90% of the layers will be - quantized to int4 while 10% will be quantized to int8. +For INT4 quantization you can also specify the following arguments: + +- +The ``--group-size`` parameter will define the group size to use for +quantization, -1 it will results in per-column quantization. - The +``--ratio`` parameter controls the ratio between 4-bit and 8-bit +quantization. If set to 0.9, it means that 90% of the layers will be +quantized to int4 while 10% will be quantized to int8. Smaller group_size and ratio values usually improve accuracy at the sacrifice of the model size and inference latency. diff --git a/docs/notebooks/llm-question-answering-with-output.rst b/docs/notebooks/llm-question-answering-with-output.rst index 2feb5ce81a08f5..f9c792ba1657d6 100644 --- a/docs/notebooks/llm-question-answering-with-output.rst +++ b/docs/notebooks/llm-question-answering-with-output.rst @@ -581,9 +581,9 @@ generation is finished, we will write class-iterator based on .. code:: ipython3 - from openvino_genai import LLMPipeline + import openvino_genai as ov_genai - pipe = LLMPipeline(model_dir.as_posix(), device.value) + pipe = ov_genai.LLMPipeline(model_dir.as_posix(), device.value) print(pipe.generate("The Sun is yellow bacause", temperature=1.2, top_k=4, do_sample=True, max_new_tokens=150)) @@ -675,7 +675,6 @@ Setup imports from time import perf_counter from typing import List import numpy as np - from openvino_genai import StreamerBase from queue import Queue import re @@ -695,7 +694,7 @@ when it is needed. It will help estimate performance. detokinizer_dir = Path(model_dir, "openvino_detokenizer.xml") - class TextIteratorStreamer(StreamerBase): + class TextIteratorStreamer(ov_genai.StreamerBase): def __init__(self, tokenizer): super().__init__() self.tokenizer = tokenizer diff --git a/docs/notebooks/magika-content-type-recognition-with-output.rst b/docs/notebooks/magika-content-type-recognition-with-output.rst index 3ef21583fa5807..383fdc6eebf499 100644 --- a/docs/notebooks/magika-content-type-recognition-with-output.rst +++ b/docs/notebooks/magika-content-type-recognition-with-output.rst @@ -43,6 +43,7 @@ In this tutorial we consider how to bring OpenVINO power into Magika. **Table of contents:** + - `Prerequisites <#prerequisites>`__ - `Define model loading class <#define-model-loading-class>`__ - `Run OpenVINO model inference <#run-openvino-model-inference>`__ @@ -77,8 +78,13 @@ Prerequisites .. parsed-literal:: ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - supervision 0.24.0 requires numpy<1.23.3,>=1.21.2; python_full_version <= "3.10.0", but you have numpy 1.24.4 which is incompatible. + supervision 0.25.0 requires numpy<1.23.3,>=1.21.2; python_full_version <= "3.10.0", but you have numpy 1.24.4 which is incompatible. + tensorflow 2.12.0 requires keras<2.13,>=2.12.0, but you have keras 2.13.1 which is incompatible. tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. + tensorflow 2.12.0 requires tensorboard<2.13,>=2.12, but you have tensorboard 2.13.0 which is incompatible. + tensorflow 2.12.0 requires tensorflow-estimator<2.13,>=2.12.0, but you have tensorflow-estimator 2.13.0 which is incompatible. + tensorflow-cpu 2.13.1 requires numpy<=1.24.3,>=1.22, but you have numpy 1.24.4 which is incompatible. + tensorflow-cpu 2.13.1 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.12.2 which is incompatible. Note: you may need to restart the kernel to use updated packages. diff --git a/docs/notebooks/meter-reader-with-output.rst b/docs/notebooks/meter-reader-with-output.rst index c1317625880917..713c4d68edae6a 100644 --- a/docs/notebooks/meter-reader-with-output.rst +++ b/docs/notebooks/meter-reader-with-output.rst @@ -637,7 +637,7 @@ bounds of input batch size. .. parsed-literal:: - + diff --git a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_16_1.png b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_16_1.png index f5f465e5e0dad2..52a1b757cb6589 100644 --- a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_16_1.png +++ b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_16_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5277177823d4b99e277b1ecd207f67b850c5fd312974c2e691e260e016811526 +oid sha256:08c5ae3bb47e095d707bdaa7f8008bed7eeb1f672c82ae4d63334e665ec3e4d8 size 170121 diff --git a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_18_1.png b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_18_1.png index 373f323c93bd56..7151cac5e2d0e8 100644 --- a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_18_1.png +++ b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_18_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:889b083b05c7dd518506e68c76a9c7e78d2cbc1273606e1edbd3c2f308a49d9e +oid sha256:6433ef738eeb00f8d0dc4343ab289073c76321d2e12fe46318fbe374b0f745e2 size 190271 diff --git a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_20_1.png b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_20_1.png index 6c3df0677c7f11..05c23937df9fe5 100644 --- a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_20_1.png +++ b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_20_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8594e7ed5ce58de7b10de8aa066fa4f9adc43308be46e2ef4dd208da4913301e +oid sha256:3d67df91f05c9aeb0442a1c4aaef7527cf27e9be0938642eed807f8b5342aa7b size 26914 diff --git a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_22_1.png b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_22_1.png index 20a9bb7513c0bc..61e57d642da114 100644 --- a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_22_1.png +++ b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_22_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eafe2bfb1d91093d1208523063def5d5b4d13285153568d173c302b3d600adfa +oid sha256:50b9f932b844d99b59b51f2c6947dd048f96bf1553fe36de3975d3a3ad1715e4 size 8966 diff --git a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_24_1.png b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_24_1.png index 4647a76e34c861..b113bcf292fe00 100644 --- a/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_24_1.png +++ b/docs/notebooks/meter-reader-with-output_files/meter-reader-with-output_24_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:91a9b23ec86373699c0dbbb252a2cb1b9351ebb08b771a79a4fec4bffbb1787d +oid sha256:ad7114f80f8925643c865222d0fe0e05d4f65ab54e0b0d354edebe3e5c1ade7c size 170338 diff --git a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst index 98f1217902a587..7f64dd936292c5 100644 --- a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst @@ -31,11 +31,10 @@ techniques like weights compression using - `Prepare model inference pipeline <#prepare-model-inference-pipeline>`__ -- `Run OpenVINO model inference <#run-openvino-model-inference>`__ - `Select device <#select-device>`__ - - `Select language model variant <#select-language-model-variant>`__ +- `Run OpenVINO model inference <#run-openvino-model-inference>`__ - `Interactive demo <#interactive-demo>`__ Installation Instructions @@ -55,30 +54,19 @@ Prerequisites .. code:: ipython3 - %pip install -q "torch>=2.1" "torchvision" "timm>=0.9.2" "transformers>=4.40" "Pillow" "gradio>=4.19" "tqdm" "sentencepiece" "peft" "huggingface-hub>=0.24.0" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino>=2024.3.0" "nncf>=2.12.0" - - -.. parsed-literal:: - - WARNING: Error parsing dependencies of torchsde: .* suffix can only be used with `==` or `!=` operators - numpy (>=1.19.*) ; python_version >= "3.7" - ~~~~~~~^ - Note: you may need to restart the kernel to use updated packages. - WARNING: Error parsing dependencies of torchsde: .* suffix can only be used with `==` or `!=` operators - numpy (>=1.19.*) ; python_version >= "3.7" - ~~~~~~~^ - Note: you may need to restart the kernel to use updated packages. - + %pip install -q "torch>=2.1" "torchvision" "timm>=0.9.2" "transformers>=4.45" "Pillow" "gradio>=4.19" "tqdm" "sentencepiece" "peft" "huggingface-hub>=0.24.0" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "nncf>=2.14.0" + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q -U "openvino>=2024.5" "openvino-tokenizers>=2024.5" "openvino-genai>=2024.5" .. code:: ipython3 import requests from pathlib import Path - if not Path("minicpm_helper.py").exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/minicpm-v-multimodal-chatbot/minicpm_helper.py") - open("minicpm_helper.py", "w").write(r.text) + if not Path("cmd_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py") + open("cmd_helper.py", "w").write(r.text) if not Path("gradio_helper.py").exists(): @@ -97,184 +85,36 @@ Convert model to OpenVINO Intermediate Representation OpenVINO supports PyTorch models via conversion to OpenVINO Intermediate -Representation (IR). `OpenVINO model conversion -API `__ -should be used for these purposes. ``ov.convert_model`` function accepts -original PyTorch model instance and example input for tracing and -returns ``ov.Model`` representing this model in OpenVINO framework. -Converted model can be used for saving on disk using ``ov.save_model`` -function or directly loading on device using ``core.complie_model``. - -``minicpm_helper.py`` script contains helper function for model -conversion, please check its content if you interested in conversion -details. - -.. raw:: html - -
- -.. raw:: html - - - -Click here for more detailed explanation of conversion steps - -.. raw:: html - - - -MiniCPM-V2.6 is autoregressive transformer generative model, it means -that each next model step depends from model output from previous step. -The generation approach is based on the assumption that the probability -distribution of a word sequence can be decomposed into the product of -conditional next word distributions. In other words, model predicts the -next token in the loop guided by previously generated tokens until the -stop-condition will be not reached (generated sequence of maximum length -or end of string token obtained). The way the next token will be -selected over predicted probabilities is driven by the selected decoding -methodology. You can find more information about the most popular -decoding methods in this -`blog `__. The entry point -for the generation process for models from the Hugging Face Transformers -library is the ``generate`` method. You can find more information about -its parameters and configuration in the -`documentation `__. -To preserve flexibility in the selection decoding methodology, we will -convert only model inference for one step. - -The inference flow has difference on first step and for the next. On the -first step, model accept preprocessed input instruction and image, that -transformed to the unified embedding space using ``input_embedding`` and -``image encoder`` models, after that ``language model``, LLM-based part -of model, runs on input embeddings to predict probability of next -generated tokens. On the next step, ``language_model`` accepts only next -token id selected based on sampling strategy and processed by -``input_embedding`` model and cached attention key and values. Since the -output side is auto-regressive, an output token hidden state remains the -same once computed for every further generation step. Therefore, -recomputing it every time you want to generate a new token seems -wasteful. With the cache, the model saves the hidden state once it has -been computed. The model only computes the one for the most recently -generated output token at each time step, re-using the saved ones for -hidden tokens. This reduces the generation complexity from -:math:`O(n^3)` to :math:`O(n^2)` for a transformer model. More details -about how it works can be found in this -`article `__. - -With increasing model size like in modern LLMs, we also can note an -increase in the number of attention blocks and size past key values -tensors respectively. The strategy for handling cache state as model -inputs and outputs in the inference cycle may become a bottleneck for -memory-bounded systems, especially with processing long input sequences, -for example in a chatbot scenario. OpenVINO suggests a transformation -that removes inputs and corresponding outputs with cache tensors from -the model keeping cache handling logic inside the model. Such models are -also called stateful. A stateful model is a model that implicitly -preserves data between two consecutive inference calls. The tensors -saved from one run are kept in an internal memory buffer called a -``state`` or a ``variable`` and may be passed to the next run, while -never being exposed as model output. Hiding the cache enables storing -and updating the cache values in a more device-friendly representation. -It helps to reduce memory consumption and additionally optimize model -performance. More details about stateful models and working with state -can be found in `OpenVINO -documentation `__. - -In LLMs, ``input_embedding`` is a part of language model, but for -multimodal case, the first step hidden state produced by this model part -should be integrated with image embeddings into common embedding space. -For ability to reuse this model part and avoid introduction of llm model -instance, we will use it separately. - -``image_encoder`` is represented in MiniCPM-V by pretrained -`SigLIP `__ -model. Additionally, MiniCPM uses perceiver ``resampler`` that -compresses the image representations. To preserve model ability to -process images of different size with respect aspect ratio combined in -batch, we will use ``image_encoder`` and ``resampler`` as separated -models. - -To sum up above, model consists of 4 parts: - -- **Image Encoder** for encoding input images into embedding space. It - includes SigLIP model. -- **Resampler** for compression image representation. -- **Input Embedding** for conversion input text tokens into embedding - space. -- **Language Model** for generation answer based on input embeddings - provided by Image Encoder and Input Embedding models. - -Let’s convert each model part. - -.. raw:: html - -
- -.. code:: ipython3 - - from minicpm_helper import convert_minicpmv26 - - # uncomment the line to see model conversion code - # ??convert_minicpmv26 - - -.. parsed-literal:: - - 2024-10-07 09:57:53.402018: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-10-07 09:57:53.403877: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. - 2024-10-07 09:57:53.440490: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-10-07 09:57:54.270302: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - - -.. code:: ipython3 - - model_id = "openbmb/MiniCPM-V-2_6" - - model_dir = convert_minicpmv26(model_id) - - -.. parsed-literal:: - - ⌛ openbmb/MiniCPM-V-2_6 conversion started. Be patient, it may takes some time. - ⌛ Load Original model - - - -.. parsed-literal:: - - Loading checkpoint shards: 0%| | 0/4 [00:00`__ is the +interface between the Transformers and Diffusers libraries and the +different tools and libraries provided by Intel to accelerate end-to-end +pipelines on Intel architectures. + +Among other use cases, Optimum Intel provides a simple interface to +optimize your Transformers and Diffusers models, convert them to the +OpenVINO Intermediate Representation (IR) format and run inference using +OpenVINO Runtime. ``optimum-cli`` provides command line interface for +model conversion and optimization. + +General command format: + +.. code:: bash + + optimum-cli export openvino --model --task + +where task is task to export the model for, if not specified, the task +will be auto-inferred based on the model. You can find a mapping between +tasks and model classes in Optimum TaskManager +`documentation `__. +Additionally, you can specify weights compression using +``--weight-format`` argument with one of following options: ``fp32``, +``fp16``, ``int8`` and ``int4``. Fro int8 and int4 +`nncf `__ will be used for +weight compression. More details about model export provided in `Optimum +Intel +documentation `__. Compress Language Model Weights to 4 bits ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -336,50 +176,37 @@ documentation -.. - - **Note:** weights compression process may require additional time and - memory for performing. You can disable it using widget below: - -.. code:: ipython3 - - from minicpm_helper import compression_widget - - to_compress_weights = compression_widget() - - to_compress_weights - - - - -.. parsed-literal:: - - Checkbox(value=True, description='Weights Compression') - - - .. code:: ipython3 + from cmd_helper import optimum_cli import nncf - import gc import openvino as ov - - from minicpm_helper import llm_path, copy_llm_files - - - compression_configuration = {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "ratio": 1.0, "all_layers": True} + import shutil + import gc - core = ov.Core() - llm_int4_path = Path("language_model_int4") / llm_path.name - if to_compress_weights.value and not (model_dir / llm_int4_path).exists(): - ov_model = core.read_model(model_dir / llm_path) + def compress_lm_weights(model_dir): + compression_configuration = {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64, "ratio": 1.0, "all_layers": True} + ov_model_path = model_dir / "openvino_language_model.xml" + ov_int4_model_path = model_dir / "openvino_language_model_int4.xml" + ov_model = ov.Core().read_model(ov_model_path) ov_compressed_model = nncf.compress_weights(ov_model, **compression_configuration) - ov.save_model(ov_compressed_model, model_dir / llm_int4_path) + ov.save_model(ov_compressed_model, ov_int4_model_path) del ov_compressed_model del ov_model gc.collect() - copy_llm_files(model_dir, llm_int4_path.parent) + ov_model_path.unlink() + ov_model_path.with_suffix(".bin").unlink() + shutil.move(ov_int4_model_path, ov_model_path) + shutil.move(ov_int4_model_path.with_suffix(".bin"), ov_model_path.with_suffix(".bin")) + + + model_id = "openbmb/MiniCPM-V-2_6" + model_dir = Path(model_id.split("/")[-1] + "-ov") + + if not model_dir.exists(): + optimum_cli(model_id, model_dir, additional_args={"trust-remote-code": "", "weight-format": "fp16"}) + compress_lm_weights(model_dir) .. parsed-literal:: @@ -394,32 +221,27 @@ Prepare model inference pipeline .. image:: https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/2727402e-3697-442e-beca-26b149967c84 -As discussed, the model comprises Image Encoder and LLM (with separated -text embedding part) that generates answer. In ``minicpm_helper.py`` we -defined LLM inference class ``OvModelForCausalLMWithEmb`` that will -represent generation cycle, It is based on `HuggingFace Transformers -GenerationMixin `__ -and looks similar to `Optimum -Intel `__ -``OVModelForCausalLM``\ that is used for LLM inference with only -difference that it can accept input embedding. In own turn, general -multimodal model class ``OvMiniCPMVModel`` handles chatbot functionality -including image processing and answer generation using LLM. - -.. code:: ipython3 - - from minicpm_helper import OvModelForCausalLMWithEmb, OvMiniCPMV, init_model # noqa: F401 - - # uncomment the line to see model inference class - # ??OVMiniCPMV - - # uncomment the line to see language model inference class - # ??OvModelForCausalLMWithEmb - -Run OpenVINO model inference ----------------------------- - - +`OpenVINO™ GenAI `__ +is a library of the most popular Generative AI model pipelines, +optimized execution methods, and samples that run on top of highly +performant `OpenVINO +Runtime `__. + +This library is friendly to PC and laptop execution, and optimized for +resource consumption. It requires no external dependencies to run +generative models as it already includes all the core functionality +(e.g. tokenization via openvino-tokenizers). OpenVINO™ GenAI is a flavor +of OpenVINO™, aiming to simplify running inference of generative AI +models. It hides the complexity of the generation process and minimizes +the amount of code required. + +Inference Visual language models can be implemented using OpenVINO GenAI +``VLMPipeline`` class. Similarly to LLMPipeline, that we discussed in +this +`notebook `__. +It supports chat mode with preserving conversational history inside +pipeline, that allows us effectively implements chatbot that supports +conversation about provided images content. Select device ~~~~~~~~~~~~~ @@ -443,46 +265,78 @@ Select device -Select language model variant -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - .. code:: ipython3 - from minicpm_helper import lm_variant_selector - + import openvino_genai as ov_genai - use_int4_lang_model = lm_variant_selector(model_dir / llm_int4_path) - - use_int4_lang_model - - + ov_model = ov_genai.VLMPipeline(model_dir, device=device.value) +Run OpenVINO model inference +---------------------------- -.. parsed-literal:: - Checkbox(value=True, description='INT4 language model') +For preparing input data, ``VLMPipeline`` use tokenizer and image +processor inside, we just need to convert image to input OpenVINO tensor +and provide question as string. Additionally, we can provides options +for controlling generation process (e.g. number of maximum generated +tokens or using multinomial sampling for decoding instead of greedy +search approach) using ``GenerationConfig``. +Generation process for long response may be time consuming, for +accessing partial result as soon as it is generated without waiting when +whole process finished, Streaming API can be used. Token streaming is +the mode in which the generative system returns the tokens one by one as +the model generates them. This enables showing progressive generations +to the user rather than waiting for the whole generation. Streaming is +an essential aspect of the end-user experience as it reduces latency, +one of the most critical aspects of a smooth experience. .. code:: ipython3 - ov_model = init_model(model_dir, llm_path.parent if not use_int4_lang_model.value else llm_int4_path.parent, device.value) - - -.. parsed-literal:: - - applied slice for lm head + import requests + from PIL import Image + from io import BytesIO + import numpy as np + image_path = "cat.png" + + + config = ov_genai.GenerationConfig() + config.max_new_tokens = 100 + + + def load_image(image_file): + if isinstance(image_file, str) and (image_file.startswith("http") or image_file.startswith("https")): + response = requests.get(image_file) + image = Image.open(BytesIO(response.content)).convert("RGB") + else: + image = Image.open(image_file).convert("RGB") + image_data = np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.byte) + return image, ov.Tensor(image_data) + + + def streamer(subword: str) -> bool: + """ + + Args: + subword: sub-word of the generated text. + + Returns: Return flag corresponds whether generation should be stopped. + + """ + print(subword, end="", flush=True) + + + if not Path(image_path).exists(): + url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11" + image = Image.open(requests.get(url, stream=True).raw) + image.save(image_path) .. code:: ipython3 - import requests - from PIL import Image + image, image_tensor = load_image(image_path) - url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11" - image = Image.open(requests.get(url, stream=True).raw) question = "What is unusual on this image?" print(f"Question:\n{question}") @@ -497,30 +351,19 @@ Select language model variant -.. image:: minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_17_1.png +.. image:: minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_12_1.png .. code:: ipython3 - tokenizer = ov_model.processor.tokenizer - - msgs = [{"role": "user", "content": question}] - - - print("Answer:") - res = ov_model.chat(image=image, msgs=msgs, context=None, tokenizer=tokenizer, sampling=False, stream=True, max_new_tokens=50) - - generated_text = "" - for new_text in res: - generated_text += new_text - print(new_text, flush=True, end="") + ov_model.start_chat() + output = ov_model.generate(question, image=image_tensor, generation_config=config, streamer=streamer) .. parsed-literal:: - Answer: - The unusual aspect of this image is the cat's relaxed and vulnerable position. Typically, cats avoid exposing their bellies to potential threats or dangers because it leaves them open for attack by predators in nature; however here we see a domesticated pet comfortably lying + The unusual aspect of this image is the cat's relaxed and vulnerable position. Typically, cats avoid exposing their bellies, which are sensitive and vulnerable areas, to potential threats. In this image, the cat is lying on its back in a cardboard box, exposing its belly and hindquarters, which is not a common sight. This behavior could indicate that the cat feels safe and comfortable in its environment, suggesting a strong bond with its owner and a sense of security in its home. Interactive demo ---------------- diff --git a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_17_1.jpg b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_12_1.jpg similarity index 100% rename from docs/notebooks/minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_17_1.jpg rename to docs/notebooks/minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_12_1.jpg diff --git a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_17_1.png b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_12_1.png similarity index 100% rename from docs/notebooks/minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_17_1.png rename to docs/notebooks/minicpm-v-multimodal-chatbot-with-output_files/minicpm-v-multimodal-chatbot-with-output_12_1.png diff --git a/docs/notebooks/mllama-3.2-with-output.rst b/docs/notebooks/mllama-3.2-with-output.rst index ba338d67dc677e..19ebd2d658174e 100644 --- a/docs/notebooks/mllama-3.2-with-output.rst +++ b/docs/notebooks/mllama-3.2-with-output.rst @@ -53,9 +53,9 @@ Prerequisites .. code:: ipython3 - %pip install -q "torch>=2.1" "torchvision" "Pillow" "tqdm" "datasets>=2.14.6" "gradio>=4.36" "nncf>=2.13.0" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "torch>=2.1" "torchvision" "Pillow" "tqdm" "datasets>=2.14.6" "gradio>=4.36" "nncf>=2.14.0" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "transformers>=4.45" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -Uq --pre "openvino>2024.4.0" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + %pip install -Uq "openvino>=2024.5.0" .. code:: ipython3 diff --git a/docs/notebooks/mobileclip-video-search-with-output.rst b/docs/notebooks/mobileclip-video-search-with-output.rst index a606830470aa94..6c195540cda7d7 100644 --- a/docs/notebooks/mobileclip-video-search-with-output.rst +++ b/docs/notebooks/mobileclip-video-search-with-output.rst @@ -62,39 +62,152 @@ Prerequisites .. code:: ipython3 - from pathlib import Path + import requests + + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", + ) + open("cmd_helper.py", "w").write(r.text) - repo_dir = Path("./ml-mobileclip") - if not repo_dir.exists(): - !git clone https://github.com/apple/ml-mobileclip.git .. parsed-literal:: - Cloning into 'ml-mobileclip'... - remote: Enumerating objects: 95, done. - remote: Counting objects: 100% (95/95), done. - remote: Compressing objects: 100% (66/66), done. - remote: Total 95 (delta 38), reused 85 (delta 28), pack-reused 0 (from 0) - Unpacking objects: 100% (95/95), 469.11 KiB | 3.13 MiB/s, done. + 1491 + + + +.. code:: ipython3 + + from cmd_helper import clone_repo + + + clone_repo("https://github.com/apple/ml-mobileclip.git") + + + + +.. parsed-literal:: + + PosixPath('ml-mobileclip') + .. code:: ipython3 %pip install -q "./ml-mobileclip" --no-deps - %pip install -q "clip-benchmark>=1.4.0" "datasets>=2.8.0" "open-clip-torch>=2.20.0" "timm>=0.9.5" "torch>=1.13.1" "torchvision>=0.14.1" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "clip-benchmark>=1.4.0" "datasets>=2.8.0" "open-clip-torch>=2.20.0" "timm>=0.9.5" "torch>=2.5.0" "torchvision>=0.20.0" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino>=2024.0.0" "gradio>=4.19" "matplotlib" "Pillow" "altair" "pandas" "opencv-python" "tqdm" "matplotlib>=3.4" + %pip install -q "matplotlib>=3.4" "Pillow" "altair" "pandas" "tqdm" "salesforce-lavis==1.0.2" .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. + ERROR: Could not find a version that satisfies the requirement torch>=2.5.0 (from versions: 1.4.0, 1.4.0+cpu, 1.5.0, 1.5.0+cpu, 1.5.1, 1.5.1+cpu, 1.6.0, 1.6.0+cpu, 1.7.0, 1.7.0+cpu, 1.7.1, 1.7.1+cpu, 1.8.0, 1.8.0+cpu, 1.8.1, 1.8.1+cpu, 1.9.0, 1.9.0+cpu, 1.9.1, 1.9.1+cpu, 1.10.0, 1.10.0+cpu, 1.10.1, 1.10.1+cpu, 1.10.2, 1.10.2+cpu, 1.11.0, 1.11.0+cpu, 1.12.0, 1.12.0+cpu, 1.12.1, 1.12.1+cpu, 1.13.0, 1.13.0+cpu, 1.13.1, 1.13.1+cpu, 2.0.0, 2.0.0+cpu, 2.0.1, 2.0.1+cpu, 2.1.0, 2.1.0+cpu, 2.1.1, 2.1.1+cpu, 2.1.2, 2.1.2+cpu, 2.2.0, 2.2.0+cpu, 2.2.1, 2.2.1+cpu, 2.2.2, 2.2.2+cpu, 2.3.0, 2.3.0+cpu, 2.3.1, 2.3.1+cpu, 2.4.0, 2.4.0+cpu, 2.4.1, 2.4.1+cpu) + ERROR: No matching distribution found for torch>=2.5.0 Note: you may need to restart the kernel to use updated packages. + error: subprocess-exited-with-error + + × pip subprocess to install build dependencies did not run successfully. + │ exit code: 1 + ╰─> [68 lines of output] + Ignoring numpy: markers 'python_version >= "3.9"' don't match your environment + Collecting setuptools + Using cached setuptools-75.3.0-py3-none-any.whl.metadata (6.9 kB) + Collecting cython<3.0,>=0.25 + Using cached Cython-0.29.37-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (3.1 kB) + Collecting cymem<2.1.0,>=2.0.2 + Using cached cymem-2.0.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB) + Collecting preshed<3.1.0,>=3.0.2 + Using cached preshed-3.0.9-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.2 kB) + Collecting murmurhash<1.1.0,>=0.28.0 + Using cached murmurhash-1.0.10-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.0 kB) + Collecting thinc<8.4.0,>=8.3.0 + Using cached thinc-8.3.2.tar.gz (193 kB) + Installing build dependencies: started + Installing build dependencies: finished with status 'error' + error: subprocess-exited-with-error + + × pip subprocess to install build dependencies did not run successfully. + │ exit code: 1 + ╰─> [38 lines of output] + Ignoring numpy: markers 'python_version >= "3.9"' don't match your environment + Collecting setuptools + Using cached setuptools-75.3.0-py3-none-any.whl.metadata (6.9 kB) + Collecting cython<3.0,>=0.25 + Using cached Cython-0.29.37-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (3.1 kB) + Collecting murmurhash<1.1.0,>=1.0.2 + Using cached murmurhash-1.0.10-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.0 kB) + Collecting cymem<2.1.0,>=2.0.2 + Using cached cymem-2.0.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB) + Collecting preshed<3.1.0,>=3.0.2 + Using cached preshed-3.0.9-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.2 kB) + Collecting blis<1.1.0,>=1.0.0 + Using cached blis-1.0.1.tar.gz (3.6 MB) + Installing build dependencies: started + Installing build dependencies: finished with status 'error' + error: subprocess-exited-with-error + + × pip subprocess to install build dependencies did not run successfully. + │ exit code: 1 + ╰─> [8 lines of output] + Collecting setuptools + Using cached setuptools-75.3.0-py3-none-any.whl.metadata (6.9 kB) + Collecting cython>=0.25 + Using cached Cython-3.0.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.2 kB) + ERROR: Ignored the following versions that require a different python version: 1.25.0 Requires-Python >=3.9; 1.25.1 Requires-Python >=3.9; 1.25.2 Requires-Python >=3.9; 1.26.0 Requires-Python <3.13,>=3.9; 1.26.1 Requires-Python <3.13,>=3.9; 1.26.2 Requires-Python >=3.9; 1.26.3 Requires-Python >=3.9; 1.26.4 Requires-Python >=3.9; 2.0.0 Requires-Python >=3.9; 2.0.1 Requires-Python >=3.9; 2.0.2 Requires-Python >=3.9; 2.1.0 Requires-Python >=3.10; 2.1.0rc1 Requires-Python >=3.10; 2.1.1 Requires-Python >=3.10; 2.1.2 Requires-Python >=3.10; 2.1.3 Requires-Python >=3.10; 75.4.0 Requires-Python >=3.9; 75.5.0 Requires-Python >=3.9; 75.6.0 Requires-Python >=3.9 + ERROR: Could not find a version that satisfies the requirement numpy<3.0.0,>=2.0.0 (from versions: 1.3.0, 1.4.1, 1.5.0, 1.5.1, 1.6.0, 1.6.1, 1.6.2, 1.7.0, 1.7.1, 1.7.2, 1.8.0, 1.8.1, 1.8.2, 1.9.0, 1.9.1, 1.9.2, 1.9.3, 1.10.0.post2, 1.10.1, 1.10.2, 1.10.4, 1.11.0, 1.11.1, 1.11.2, 1.11.3, 1.12.0, 1.12.1, 1.13.0, 1.13.1, 1.13.3, 1.14.0, 1.14.1, 1.14.2, 1.14.3, 1.14.4, 1.14.5, 1.14.6, 1.15.0, 1.15.1, 1.15.2, 1.15.3, 1.15.4, 1.16.0, 1.16.1, 1.16.2, 1.16.3, 1.16.4, 1.16.5, 1.16.6, 1.17.0, 1.17.1, 1.17.2, 1.17.3, 1.17.4, 1.17.5, 1.18.0, 1.18.1, 1.18.2, 1.18.3, 1.18.4, 1.18.5, 1.19.0, 1.19.1, 1.19.2, 1.19.3, 1.19.4, 1.19.5, 1.20.0, 1.20.1, 1.20.2, 1.20.3, 1.21.0, 1.21.1, 1.21.2, 1.21.3, 1.21.4, 1.21.5, 1.21.6, 1.22.0, 1.22.1, 1.22.2, 1.22.3, 1.22.4, 1.23.0, 1.23.1, 1.23.2, 1.23.3, 1.23.4, 1.23.5, 1.24.0, 1.24.1, 1.24.2, 1.24.3, 1.24.4) + ERROR: No matching distribution found for numpy<3.0.0,>=2.0.0 + + [end of output] + + note: This error originates from a subprocess, and is likely not a problem with pip. + error: subprocess-exited-with-error + + × pip subprocess to install build dependencies did not run successfully. + │ exit code: 1 + ╰─> See above for output. + + note: This error originates from a subprocess, and is likely not a problem with pip. + [end of output] + + note: This error originates from a subprocess, and is likely not a problem with pip. + error: subprocess-exited-with-error + + × pip subprocess to install build dependencies did not run successfully. + │ exit code: 1 + ╰─> See above for output. + + note: This error originates from a subprocess, and is likely not a problem with pip. + [end of output] + + note: This error originates from a subprocess, and is likely not a problem with pip. + error: subprocess-exited-with-error + + × pip subprocess to install build dependencies did not run successfully. + │ exit code: 1 + ╰─> See above for output. + + note: This error originates from a subprocess, and is likely not a problem with pip. + Note: you may need to restart the kernel to use updated packages. + + +.. code:: ipython3 + + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" "openvino>=2024.0.0" "altair" "opencv-python" "opencv-contrib-python" "gradio>=4.19" + + +.. parsed-literal:: + Note: you may need to restart the kernel to use updated packages. @@ -138,13 +251,37 @@ comparison purposes, you can select different models among: faster and 2.8x smaller. More details about model can be found in `research paper `__ and `GitHub repository `__. +- **BLIP-2** - BLIP2 was introduced in the paper `BLIP-2: Bootstrapping + Language-Image Pre-training with Frozen Image Encoders and Large + Language Models `__ by Li et + al. and first released in this + `repository `__. + It is a generic and efficient pre-training strategy that easily + harvests development of pretrained vision models and large language + models (LLMs) for vision-language pretraining. BLIP-2 consists of 3 + models: a CLIP-like image encoder, a Querying Transformer (Q-Former) + and a large language model. .. code:: ipython3 + from pathlib import Path + import ipywidgets as widgets + model_dir = Path("checkpoints") + + def default_image_probs(image_features, text_features): + image_probs = (100.0 * text_features @ image_features.T).softmax(dim=-1) + return image_probs + + + def blip2_image_probs(image_features, text_features): + image_probs = image_features[:, 0, :] @ text_features[:, 0, :].t() + return image_probs + + supported_models = { "MobileCLIP": { "mobileclip_s0": { @@ -152,30 +289,35 @@ comparison purposes, you can select different models among: "pretrained": model_dir / "mobileclip_s0.pt", "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/mobileclip_s0.pt", "image_size": 256, + "image_probs": default_image_probs, }, "mobileclip_s1": { "model_name": "mobileclip_s1", "pretrained": model_dir / "mobileclip_s1.pt", "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/mobileclip_s1.pt", "image_size": 256, + "image_probs": default_image_probs, }, "mobileclip_s2": { "model_name": "mobileclip_s0", "pretrained": model_dir / "mobileclip_s2.pt", "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/mobileclip_s2.pt", "image_size": 256, + "image_probs": default_image_probs, }, "mobileclip_b": { "model_name": "mobileclip_b", "pretrained": model_dir / "mobileclip_b.pt", "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/mobileclip_b.pt", "image_size": 224, + "image_probs": default_image_probs, }, "mobileclip_blt": { "model_name": "mobileclip_b", "pretrained": model_dir / "mobileclip_blt.pt", "url": "https://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/mobileclip_blt.pt", "image_size": 224, + "image_probs": default_image_probs, }, }, "CLIP": { @@ -183,21 +325,25 @@ comparison purposes, you can select different models among: "model_name": "ViT-B-32", "pretrained": "laion2b_s34b_b79k", "image_size": 224, + "image_probs": default_image_probs, }, "clip-vit-b-16": { "model_name": "ViT-B-16", "pretrained": "openai", "image_size": 224, + "image_probs": default_image_probs, }, "clip-vit-l-14": { "model_name": "ViT-L-14", "pretrained": "datacomp_xl_s13b_b90k", "image_size": 224, + "image_probs": default_image_probs, }, "clip-vit-h-14": { "model_name": "ViT-H-14", "pretrained": "laion2b_s32b_b79k", "image_size": 224, + "image_probs": default_image_probs, }, }, "SigLIP": { @@ -205,11 +351,21 @@ comparison purposes, you can select different models among: "model_name": "ViT-B-16-SigLIP", "pretrained": "webli", "image_size": 224, + "image_probs": default_image_probs, }, "siglip-vit-l-16": { "model_name": "ViT-L-16-SigLIP-256", "pretrained": "webli", "image_size": 256, + "image_probs": default_image_probs, + }, + }, + "Blip2": { + "blip2_feature_extractor": { + "model_name": "blip2_feature_extractor", + "pretrained": "pretrain_vitL", + "image_size": 224, + "image_probs": blip2_image_probs, }, }, } @@ -223,7 +379,7 @@ comparison purposes, you can select different models among: .. parsed-literal:: - Dropdown(description='Model type:', options=('MobileCLIP', 'CLIP', 'SigLIP'), value='MobileCLIP') + Dropdown(description='Model type:', options=('MobileCLIP', 'CLIP', 'SigLIP', 'Blip2'), value='MobileCLIP') @@ -250,14 +406,6 @@ comparison purposes, you can select different models among: .. code:: ipython3 - import requests - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - - open("notebook_utils.py", "w").write(r.text) - from notebook_utils import download_file, device_widget model_config = available_models[model_checkpoint.value] @@ -373,7 +521,7 @@ Prepare image gallery -.. image:: mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_10_4.png +.. image:: mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_12_4.png Prepare model @@ -384,23 +532,79 @@ Prepare model The code bellow download model weights, create model class instance and preprocessing utilities +.. code:: ipython3 + + import torch + + + class Blip2Model(torch.nn.Module): + def __init__(self, ln_vision, visual_encoder, query_tokens, q_former, vision_proj, text_proj, tokenizer): + super().__init__() + self.ln_vision = ln_vision + self.visual_encoder = visual_encoder + self.query_tokens = query_tokens + self.q_former = q_former + self.vision_proj = vision_proj + self.text_proj = text_proj + self.tok = tokenizer + + def encode_image(self, image): + image_embeds_frozen = self.ln_vision(self.visual_encoder(image)) + image_embeds_frozen = image_embeds_frozen.float() + image_atts = torch.ones(image_embeds_frozen.size()[:-1], dtype=torch.long) + query_tokens = self.query_tokens.expand(image_embeds_frozen.shape[0], -1, -1) + + query_output = self.q_former.bert( + query_embeds=query_tokens, + encoder_hidden_states=image_embeds_frozen, + encoder_attention_mask=image_atts, + return_dict=True, + ) + image_embeds = query_output.last_hidden_state + image_features = self.vision_proj(image_embeds) + + return image_features + + def encode_text(self, input_ids, attention_mask): + text_output = self.q_former.bert( + input_ids, + attention_mask=attention_mask, + return_dict=True, + ) + text_embeds = text_output.last_hidden_state + text_features = self.text_proj(text_embeds) + return text_features + + def tokenizer(self, text_descriptions): + input_ids = self.tok(text_descriptions, return_tensors="pt", padding=True).input_ids + attention_mask = self.tok(text_descriptions, return_tensors="pt", padding=True).attention_mask + text = {"input_ids": input_ids, "attention_mask": attention_mask} + return text + .. code:: ipython3 import torch import time - from PIL import Image import mobileclip import open_clip # instantiate model model_name = model_config["model_name"] pretrained = model_config["pretrained"] + if model_type.value == "MobileCLIP": model_dir.mkdir(exist_ok=True) model_url = model_config["url"] download_file(model_url, directory=model_dir) model, _, preprocess = mobileclip.create_model_and_transforms(model_name, pretrained=pretrained) tokenizer = mobileclip.get_tokenizer(model_name) + elif model_type.value == "Blip2": + from lavis.models import load_model_and_preprocess + + model, vis_processors, txt_processors = load_model_and_preprocess(name=model_name, model_type=pretrained, is_eval=True) + model = Blip2Model(model.ln_vision, model.visual_encoder, model.query_tokens, model.Qformer, model.vision_proj, model.text_proj, model.tokenizer) + preprocess = vis_processors["eval"] + tokenizer = model.tokenizer else: model, _, preprocess = open_clip.create_model_and_transforms(model_name, pretrained=pretrained) tokenizer = open_clip.get_tokenizer(model_name) @@ -408,7 +612,7 @@ preprocessing utilities .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) @@ -427,7 +631,7 @@ Perform search image_tensor = torch.stack([preprocess(image) for image in images]) text = tokenizer(text_descriptions) - + image_probs_function = model_config["image_probs"] with torch.no_grad(): # calculate image embeddings @@ -437,16 +641,13 @@ Perform search print(f"Image encoding took {image_encoding_end - image_encoding_start:.3} ms") # calculate text embeddings text_encoding_start = time.perf_counter() - text_features = model.encode_text(text) + text_features = model.encode_text(**text) if model_type.value == "Blip2" else model.encode_text(text) text_encoding_end = time.perf_counter() print(f"Text encoding took {text_encoding_end - text_encoding_start:.3} ms") - # normalize embeddings image_features /= image_features.norm(dim=-1, keepdim=True) text_features /= text_features.norm(dim=-1, keepdim=True) - - # calcualte similarity score - image_probs = (100.0 * text_features @ image_features.T).softmax(dim=-1) + image_probs = image_probs_function(image_features, text_features) selected_image = [torch.argmax(image_probs).item()] visualize_result(images, input_labels[0], selected_image); @@ -454,12 +655,12 @@ Perform search .. parsed-literal:: - Image encoding took 0.114 ms - Text encoding took 0.0113 ms + Image encoding took 0.0979 ms + Text encoding took 0.0114 ms -.. image:: mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_14_1.png +.. image:: mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_17_1.png Convert Model to OpenVINO Intermediate Representation format @@ -522,7 +723,10 @@ be used separately. Let’s convert each part to OpenVINO. if not text_encoder_path.exists(): model.forward = model.encode_text - ov_text_encoder = ov.convert_model(model, example_input=text, input=[-1, text.shape[1]]) + if model_type.value == "Blip2": + ov_text_encoder = ov.convert_model(model, example_input=text) + else: + ov_text_encoder = ov.convert_model(model, example_input=text, input=[-1, text.shape[1]]) ov.save_model(ov_text_encoder, text_encoder_path) del ov_text_encoder gc.collect() @@ -533,7 +737,7 @@ be used separately. Let’s convert each part to OpenVINO. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/mobileclip/modules/common/transformer.py:125: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/mobileclip-video-search/ml-mobileclip/mobileclip/modules/common/transformer.py:125: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len != self.num_embeddings: @@ -610,7 +814,7 @@ Perform search image_features /= image_features.norm(dim=-1, keepdim=True) text_features /= text_features.norm(dim=-1, keepdim=True) - image_probs = (100.0 * text_features @ image_features.T).softmax(dim=-1) + image_probs = image_probs_function(image_features, text_features) selected_image = [torch.argmax(image_probs).item()] visualize_result(images, input_labels[0], selected_image); @@ -618,12 +822,77 @@ Perform search .. parsed-literal:: - Image encoding took 0.0294 ms - Text encoding took 0.00498 ms + Image encoding took 0.0282 ms + Text encoding took 0.0049 ms + + + +.. image:: mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_28_1.png + + +(optional) Translation model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Since all text embedding models in this notebook natively supports input +in English only, we can insert a translation model in this pipeline to +support searching in Chinese. + +- **opus-mt-zh-en t** - This is a translation model developed by + Language Technology Research Group at the University of Helsinki. It + supports Chinese as source Language and English as target Language + `model card `__. + +.. code:: ipython3 + + from pathlib import Path + + cn2en_trans_model_path = "ov_models/cn2en_trans_model" + cn2en_trans_model_id = "Helsinki-NLP/opus-mt-zh-en" + + if not Path(cn2en_trans_model_path).exists(): + !optimum-cli export openvino --model {cn2en_trans_model_id} --task text2text-generation-with-past --trust-remote-code {cn2en_trans_model_path} + + +.. parsed-literal:: + + 2024-11-22 01:36:23.757087: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-22 01:36:23.781523: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/marian/tokenization_marian.py:175: UserWarning: Recommended: pip install sacremoses. + warnings.warn("Recommended: pip install sacremoses.") + Moving the following attributes in the config to the generation config: {'max_length': 512, 'num_beams': 6, 'bad_words_ids': [[65000]]}. You are seeing this warning because you've set generation parameters in the model config, as opposed to in the generation config. + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/marian/modeling_marian.py:207: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/marian/modeling_marian.py:214: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attention_mask.size() != (bsz, 1, tgt_len, src_len): + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/marian/modeling_marian.py:246: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if input_shape[-1] > 1 or self.sliding_window is not None: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if past_key_values_length > 0: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/marian/modeling_marian.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if ( + Exporting tokenizers to OpenVINO is not supported for tokenizers version > 0.19 and openvino version <= 2024.4. Please downgrade to tokenizers version <= 0.19 to export tokenizers to OpenVINO. + + +.. code:: ipython3 + + from transformers import AutoTokenizer + from optimum.intel import OVModelForSeq2SeqLM + + tr_tokenizer = AutoTokenizer.from_pretrained(cn2en_trans_model_path) + tr_model = OVModelForSeq2SeqLM.from_pretrained(cn2en_trans_model_path) +.. parsed-literal:: -.. image:: mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_25_1.png + 2024-11-22 01:36:43.187797: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-22 01:36:43.213112: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/marian/tokenization_marian.py:175: UserWarning: Recommended: pip install sacremoses. + warnings.warn("Recommended: pip install sacremoses.") Interactive Demo @@ -634,7 +903,9 @@ Interactive Demo In this part, you can try different supported by tutorial models in searching frames in the video by text query or image. Upload video and provide text query or reference image for search and model will find the -most relevant frames according to provided query. Please note, different +most relevant frames according to provided query. You can also try +querying in Chinese, and translation model will be triggered +automatically for Chinese-to-English translation. Please note, different models can require different optimal threshold for search. .. code:: ipython3 @@ -674,7 +945,22 @@ models can require different optimal threshold for search. ) - def get_preprocess_and_tokenizer(model_name): + def is_english(text): + for char in text: + if not char.isascii(): + return False + return True + + + def translate(text): + if tr_tokenizer: + t = tr_tokenizer(text, return_tensors="pt") + r = tr_model.generate(**t) + text = tr_tokenizer.decode(r[0][1:-1]) + return text + + + def get_preprocess_probs_tokenizer(model_name): if "mobileclip" in model_name: resolution = supported_models["MobileCLIP"][model_name]["image_size"] resize_size = resolution @@ -689,13 +975,23 @@ models can require different optimal threshold for search. ] preprocess = Compose(aug_list) tokenizer = mobileclip.get_tokenizer(supported_models["MobileCLIP"][model_name]["model_name"]) + image_probs = default_image_probs + elif "blip2" in model_name: + from lavis.models import load_model_and_preprocess + + model, vis_processors, txt_processors = load_model_and_preprocess(name=model_name, model_type=pretrained, is_eval=True) + model = Blip2Model(model.ln_vision, model.visual_encoder, model.query_tokens, model.Qformer, model.vision_proj, model.text_proj, model.tokenizer) + preprocess = vis_processors["eval"] + tokenizer = model.tokenizer + image_probs = blip2_image_probs else: model_configs = supported_models["SigLIP"] if "siglip" in model_name else supported_models["CLIP"] resize_size = model_configs[model_name]["image_size"] preprocess = image_transform((resize_size, resize_size), is_train=False, resize_mode="longest") tokenizer = open_clip.get_tokenizer(model_configs[model_name]["model_name"]) + image_probs = default_image_probs - return preprocess, tokenizer + return preprocess, image_probs, tokenizer def run( @@ -716,11 +1012,12 @@ models can require different optimal threshold for search. global tokenizer global ov_compiled_image_encoder global ov_compiled_text_encoder + global image_probs_function if current_model != model_name or device != current_device: ov_compiled_image_encoder = core.compile_model(ov_models_dir / f"{model_name}_im_encoder.xml", device) ov_compiled_text_encoder = core.compile_model(ov_models_dir / f"{model_name}_text_encoder.xml", device) - preprocess, tokenizer = get_preprocess_and_tokenizer(model_name) + preprocess, image_probs_function, tokenizer = get_preprocess_probs_tokenizer(model_name) current_model = model_name current_device = device # Load video @@ -734,6 +1031,9 @@ models can require different optimal threshold for search. query_features /= query_features.norm(dim=-1, keepdim=True) # Get text query features else: + if not is_english(text_search): + text_search = translate(text_search) + print(f"Translated input text: {text_search}") # Tokenize search phrase text = tokenizer([text_search]) # Encode text query @@ -748,9 +1048,8 @@ models can require different optimal threshold for search. image_features = torch.from_numpy(ov_compiled_image_encoder(image)[0]) image_features /= image_features.norm(dim=-1, keepdim=True) - probs = query_features.cpu().numpy() @ image_features.cpu().numpy().T - probs = probs[0] - + probs = image_probs_function(image_features, query_features) + probs = probs.cpu().numpy().squeeze(1) if "blip2" in model_name else probs[0] # Save frame similarity values df = pd.DataFrame( { diff --git a/docs/notebooks/mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_10_4.png b/docs/notebooks/mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_10_4.png deleted file mode 100644 index 3097711be91501..00000000000000 --- a/docs/notebooks/mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_10_4.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:012ab44a6e4292be32171ccb588f72c75c17a662e04cf27f271e5ddd33c89b99 -size 627462 diff --git a/docs/notebooks/mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_12_4.png b/docs/notebooks/mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_12_4.png new file mode 100644 index 00000000000000..1ae3f7b2579a93 --- /dev/null +++ b/docs/notebooks/mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_12_4.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3a4911e0d1407c8830ec6c68e3b24190f1a49da24b7532db29d77b298e36af4 +size 627462 diff --git a/docs/notebooks/mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_14_1.png b/docs/notebooks/mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_14_1.png deleted file mode 100644 index 7d60b0ba72dd72..00000000000000 --- a/docs/notebooks/mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_14_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fc816c6ac9360e9432eb81eca13dd8f4afa0c0ad90312c876fad89dbbb80a65e -size 449871 diff --git a/docs/notebooks/mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_17_1.png b/docs/notebooks/mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_17_1.png new file mode 100644 index 00000000000000..4a223a2ea61f46 --- /dev/null +++ b/docs/notebooks/mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_17_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59965056d04847ce7a28c35bc515102682954ca33d8b0dc43f7d54dc6d677f18 +size 449871 diff --git a/docs/notebooks/mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_25_1.png b/docs/notebooks/mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_25_1.png deleted file mode 100644 index 7d60b0ba72dd72..00000000000000 --- a/docs/notebooks/mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_25_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fc816c6ac9360e9432eb81eca13dd8f4afa0c0ad90312c876fad89dbbb80a65e -size 449871 diff --git a/docs/notebooks/mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_28_1.png b/docs/notebooks/mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_28_1.png new file mode 100644 index 00000000000000..4a223a2ea61f46 --- /dev/null +++ b/docs/notebooks/mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_28_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59965056d04847ce7a28c35bc515102682954ca33d8b0dc43f7d54dc6d677f18 +size 449871 diff --git a/docs/notebooks/mobilevlm-language-assistant-with-output.rst b/docs/notebooks/mobilevlm-language-assistant-with-output.rst deleted file mode 100644 index 5902537e3026a5..00000000000000 --- a/docs/notebooks/mobilevlm-language-assistant-with-output.rst +++ /dev/null @@ -1,765 +0,0 @@ -Mobile language assistant with MobileVLM and OpenVINO -===================================================== - -`MobileVLM `__ is a competent -multimodal vision language model (MMVLM) targeted to run on mobile -devices. It is an amalgamation of a myriad of architectural designs and -techniques that are mobile-oriented, which comprises a set of language -models at the scale of 1.4B and 2.7B parameters, trained from scratch, a -multimodal vision model that is pre-trained in the CLIP fashion, -cross-modality interaction via an efficient projector. - -|image0| - -The MobileVLM architecture (right) utilizes -`MobileLLaMA `__ as -its language model, intakes :math:`\mathbf{X}_v` and -:math:`\mathbf{X}_q` which are image and language instructions as -respective inputs and gives :math:`\mathbf{Y}_a` as the output language -response. LDP refers to a lightweight downsample projector (left). - -See more information on official -`GitHub `__ project page -and `paper `__. - - -**Table of contents:** - - -- `Install requirements <#install-requirements>`__ -- `Clone MobileVLM repository <#clone-mobilevlm-repository>`__ -- `Import required packages <#import-required-packages>`__ -- `Load the model <#load-the-model>`__ -- `Convert model to OpenVINO Intermediate Representation - (IR) <#convert-model-to-openvino-intermediate-representation-ir>`__ -- `Inference <#inference>`__ - - - `Load OpenVINO model <#load-openvino-model>`__ - - `Prepare input data <#prepare-input-data>`__ - - `Run generation process <#run-generation-process>`__ - -- `Interactive inference <#interactive-inference>`__ - -Installation Instructions -~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a self-contained example that relies solely on its own code. - -We recommend running the notebook in a virtual environment. You only -need a Jupyter server to start. For details, please refer to -`Installation -Guide `__. - -.. |image0| image:: https://github.com/Meituan-AutoML/MobileVLM/raw/main/assets/mobilevlm_arch.png - -Install requirements --------------------- - - - -.. code:: ipython3 - - %pip install -q "torch>=2.1.0" "timm>=0.9.12" --extra-index-url "https://download.pytorch.org/whl/cpu" - %pip install -q "transformers>=4.33.1,<4.35.0" accelerate "sentencepiece>=0.1.99" "openvino>=2023.2.0" "nncf>=2.7.0" ipywidgets numpy "gradio>=4.19" - - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. - Note: you may need to restart the kernel to use updated packages. - - -Clone MobileVLM repository --------------------------- - - - -.. code:: ipython3 - - from pathlib import Path - import sys - - MOBILEVLM_REPO_DIR = Path("./MobileVLM") - if not MOBILEVLM_REPO_DIR.exists(): - !git clone -q "https://github.com/Meituan-AutoML/MobileVLM.git" - sys.path.insert(0, str(MOBILEVLM_REPO_DIR)) - -Import required packages ------------------------- - - - -.. code:: ipython3 - - import warnings - import itertools - import gc - from typing import Optional, List, Tuple - - from mobilevlm.model.mobilevlm import load_pretrained_model - from mobilevlm.conversation import conv_templates, SeparatorStyle - from mobilevlm.utils import ( - disable_torch_init, - process_images, - tokenizer_image_token, - KeywordsStoppingCriteria, - ) - from mobilevlm.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN - import PIL - import torch - import transformers - import numpy as np - import gradio as gr - import openvino as ov - import nncf - import ipywidgets as widgets - - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. - torch.utils._pytree._register_pytree_node( - 2024-11-05 02:02:06.143728: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-05 02:02:06.177889: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-05 02:02:06.679118: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. - torch.utils._pytree._register_pytree_node( - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino - - -.. code:: ipython3 - - MODELS_DIR = Path("./models") - MODEL_PATH = "mtgv/MobileVLM-1.7B" - - TEMPERATURE = 0.2 - TOP_P = None - NUM_BEAMS = 1 - MAX_NEW_TOKENS = 512 - - IMAGE_PATH = MOBILEVLM_REPO_DIR / "assets" / "samples" / "demo.jpg" - PROMPT_STR = "Who is the author of this book?\nAnswer the question using a single word or phrase." - -Load the model --------------- - - - -To load the model, we use pre-defined ``load_pretrained_model`` function -in ``mobilevlm`` module. It returns the model itself, tokenizer, and -image processor to convert images to appropriate tensors. - -.. code:: ipython3 - - model_name = MODEL_PATH.split("/")[-1] - disable_torch_init() - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - tokenizer, model, image_processor, _ = load_pretrained_model(MODEL_PATH, device="cpu") - model = model.to(dtype=torch.float32) - - -.. parsed-literal:: - - You are resizing the embedding layer without providing a `pad_to_multiple_of` parameter. This means that the new embedding dimension will be 32000. This might induce some performance reduction as *Tensor Cores* will not be available. For more details about this, or help on choosing the correct value for resizing, refer to this guide: https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html#requirements-tc - - -Convert model to OpenVINO Intermediate Representation (IR) ----------------------------------------------------------- - - - -.. code:: ipython3 - - def cleanup_torchscript_cache(): - """ - Helper for removing cached model representation - """ - torch._C._jit_clear_class_registry() - torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() - torch.jit._state._clear_class_state() - -For reducing memory consumption, weights compression optimization can be -applied using `NNCF `__. Weight -compression aims to reduce the memory footprint of a model. It can also -lead to significant performance improvement for large memory-bound -models, such as Large Language Models (LLMs). LLMs and other models, -which require extensive memory to store the weights during inference, -can benefit from weight compression in the following ways: - -- enabling the inference of exceptionally large models that cannot be - accommodated in the memory of the device; - -- improving the inference performance of the models by reducing the - latency of the memory access when computing the operations with - weights, for example, Linear layers. - -`Neural Network Compression Framework -(NNCF) `__ provides 4-bit / -8-bit mixed weight quantization as a compression method primarily -designed to optimize LLMs. The main difference between weights -compression and full model quantization (post-training quantization) is -that activations remain floating-point in the case of weights -compression which leads to a better accuracy. Weight compression for -LLMs provides a solid inference performance improvement which is on par -with the performance of the full model quantization. In addition, weight -compression is data-free and does not require a calibration dataset, -making it easy to use. - -``nncf.compress_weights`` function can be used for performing weights -compression. The function accepts an OpenVINO model and other -compression parameters. Compared to INT8 compression, INT4 compression -improves performance even more, but introduces a minor drop in -prediction quality. - -More details about weights compression, can be found in `OpenVINO -documentation `__. - -Please select below whether you would like to run INT4 weight -compression instead of INT8 weight compression. - -.. code:: ipython3 - - compression_mode = widgets.Dropdown( - options=["INT4", "INT8"], - value="INT4", - description="Compression mode:", - disabled=False, - ) - - compression_mode - - - - -.. parsed-literal:: - - Dropdown(description='Compression mode:', options=('INT4', 'INT8'), value='INT4') - - - -.. code:: ipython3 - - stage1_xml_path = MODELS_DIR / f"stage1_{compression_mode.value}.xml" - stage2_xml_path = MODELS_DIR / f"stage2_{compression_mode.value}.xml" - -.. code:: ipython3 - - if compression_mode.value == "INT4": - wc_parameters = dict(mode=nncf.CompressWeightsMode.INT4_ASYM, group_size=128, ratio=0.8) - else: - wc_parameters = dict(mode=nncf.CompressWeightsMode.INT8) - -.. code:: ipython3 - - class ModelWrapper(torch.nn.Module): - def __init__(self, model): - super().__init__() - self.model = model - - def forward( - self, - input_ids: torch.LongTensor = None, - attention_mask: Optional[torch.Tensor] = None, - past_key_values: Optional[List[torch.FloatTensor]] = None, - inputs_embeds: Optional[torch.FloatTensor] = None, - ): - outputs = self.model.model( - input_ids=input_ids, - attention_mask=attention_mask, - past_key_values=past_key_values, - inputs_embeds=inputs_embeds, - ) - hidden_states = outputs[0] - logits = self.model.lm_head(hidden_states) - - return (logits,) + outputs[1:] - -.. code:: ipython3 - - def set_input_names(model, past_key_values): - input_names = [ - "input_ids", - "attention_mask", - *itertools.chain.from_iterable([f"past_key_values.{idx}.key", f"past_key_values.{idx}.value"] for idx, _ in enumerate(past_key_values)), - ] - assert len(input_names) == len(model.inputs) - for _input, input_name in zip(model.inputs, input_names): - _input.get_tensor().set_names({input_name}) - -.. code:: ipython3 - - def set_output_names(model, past_key_values): - output_names = [ - "logits", - *itertools.chain.from_iterable([f"present.{idx}.key", f"present.{idx}.value"] for idx, _ in enumerate(past_key_values)), - ] - assert len(output_names) == len(model.outputs) - for out, out_name in zip(ov_model.outputs, output_names): - out.get_tensor().set_names({out_name}) - -.. code:: ipython3 - - example_input = { - "inputs_embeds": torch.zeros((1, 205, 2048)), - "attention_mask": torch.ones((1, 205), dtype=torch.long), - } - - wrapped = ModelWrapper(model) - past_key_values = wrapped(**example_input)[1] - - if not stage1_xml_path.exists(): - ov_model = ov.convert_model(wrapped, example_input=example_input) - set_output_names(ov_model, past_key_values) - ov_model = nncf.compress_weights(ov_model, **wc_parameters) - ov.save_model(ov_model, stage1_xml_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - - -.. parsed-literal:: - - WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. - - -.. parsed-literal:: - - [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - - -.. parsed-literal:: - - WARNING:nncf:NNCF provides best results with torch==2.4.*, while current torch version is 2.2.2+cpu. If you encounter issues, consider switching to torch==2.4.* - - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:595: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if input_shape[-1] > 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if seq_len > self.max_seq_len_cached: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:348: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:355: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py:365: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! - if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): - - - -.. parsed-literal:: - - Output() - - - - - - - - - -.. parsed-literal:: - - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 24% (43 / 169) │ 20% (42 / 168) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 76% (126 / 169) │ 80% (126 / 168) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - - - -.. parsed-literal:: - - Output() - - - - - - - - - -.. code:: ipython3 - - example_input = { - "input_ids": torch.ones((1, 1), dtype=torch.long), - "past_key_values": past_key_values, - "attention_mask": torch.ones((1, past_key_values[-1][-1].shape[-2] + 1), dtype=torch.long), - } - - if not stage2_xml_path.exists(): - ov_model = ov.convert_model( - wrapped, - example_input=example_input, - ) - set_input_names(ov_model, past_key_values) - set_output_names(ov_model, past_key_values) - ov_model = nncf.compress_weights(ov_model, **wc_parameters) - ov.save_model(ov_model, stage2_xml_path) - cleanup_torchscript_cache() - del ov_model - gc.collect() - - -.. parsed-literal:: - - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) - if a.grad is not None: - - - -.. parsed-literal:: - - Output() - - - - - - - - - -.. parsed-literal:: - - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 28% (44 / 170) │ 20% (42 / 168) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 72% (126 / 170) │ 80% (126 / 168) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - - - -.. parsed-literal:: - - Output() - - - - - - - - - -.. code:: ipython3 - - prepare_inputs_labels_for_multimodal = model.prepare_inputs_labels_for_multimodal - prepare_inputs_for_generation = model.prepare_inputs_for_generation - config = model.config - config.save_pretrained(MODELS_DIR) - -.. code:: ipython3 - - del wrapped - del model - gc.collect(); - -Inference ---------- - - - -``OVMobileLlamaForCausalLM`` class provides ease-to-use interface for -using model in generation scenario. It is based on -``transformers.generation.GenerationMixin`` that gives us opportunity to -reuse all reach capabilities for generation implemented in HuggingFace -Transformers library. More details about this interface can be found in -`HuggingFace -documentation `__. - -.. code:: ipython3 - - core = ov.Core() - - - class OVMobileLlamaForCausalLM(transformers.GenerationMixin): - def __init__(self, stage1_path, stage2_path, device): - self.stage1 = core.compile_model(stage1_path, device) - self.stage2 = core.read_model(stage2_path) - - self.generation_config = transformers.GenerationConfig.from_model_config(config) - self.config = transformers.AutoConfig.from_pretrained(MODELS_DIR) - self.main_input_name = "input_ids" - self.device = torch.device("cpu") - self.prepare_inputs_for_generation = prepare_inputs_for_generation - self.num_pkv = 2 - self.input_names = {key.get_any_name(): idx for idx, key in enumerate(self.stage2.inputs)} - self.output_names = {key.get_any_name(): idx for idx, key in enumerate(self.stage2.outputs)} - self.key_value_input_names = [key for key in self.input_names if "key_values" in key] - self.key_value_output_names = [key for key in self.output_names if "present" in key] - stage2 = core.compile_model(self.stage2, device) - self.request = stage2.create_infer_request() - self._supports_cache_class = False - - def can_generate(self): - """Returns True to validate the check that the model using `GenerationMixin.generate()` can indeed generate.""" - return True - - def __call__( - self, - input_ids: torch.LongTensor, - images: torch.Tensor, - attention_mask: Optional[torch.LongTensor] = None, - prefix_mask: Optional[torch.LongTensor] = None, - past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, - **kwargs, - ) -> transformers.modeling_outputs.CausalLMOutputWithPast: - return self.forward(input_ids, images, attention_mask, prefix_mask, past_key_values) - - def forward( - self, - input_ids: torch.LongTensor, - images: torch.Tensor, - attention_mask: Optional[torch.LongTensor] = None, - prefix_mask: Optional[torch.LongTensor] = None, - past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, - **kwargs, - ) -> transformers.modeling_outputs.CausalLMOutputWithPast: - """General inference method""" - inputs = {} - if past_key_values is not None: - # Flatten the past_key_values - attention_mask = torch.ones( - (input_ids.shape[0], past_key_values[-1][-1].shape[-2] + 1), - dtype=input_ids.dtype, - ) - past_key_values = tuple(past_key_value for pkv_per_layer in past_key_values for past_key_value in pkv_per_layer) - # Add the past_key_values to the decoder inputs - inputs = dict(zip(self.key_value_input_names, past_key_values)) - - else: - return self.forward_with_image(input_ids, images, attention_mask) - inputs["input_ids"] = np.array(input_ids) - - if "attention_mask" in self.input_names: - inputs["attention_mask"] = np.array(attention_mask) - - # Run inference - self.request.start_async(inputs, share_inputs=True) - self.request.wait() - - logits = torch.from_numpy(self.request.get_tensor("logits").data) - - # Tuple of length equal to : number of layer * number of past_key_value per decoder layer (2 corresponds to the self-attention layer) - past_key_values = tuple(self.request.get_tensor(key).data for key in self.key_value_output_names) - # Tuple of tuple of length `n_layers`, with each tuple of length equal to 2 (k/v of self-attention) - - past_key_values = tuple(past_key_values[i : i + self.num_pkv] for i in range(0, len(past_key_values), self.num_pkv)) - - return transformers.modeling_outputs.CausalLMOutputWithPast(logits=logits, past_key_values=past_key_values) - - def forward_with_image(self, input_ids, images, attention_mask): - """First step inference method, that resolves multimodal data""" - _, attention_mask, _, input_embed, _ = prepare_inputs_labels_for_multimodal(input_ids, attention_mask, images=images, past_key_values=None, labels=None) - outs = self.stage1({"inputs_embeds": input_embed, "attention_mask": attention_mask}) - logits = outs[0] - pkv = list(outs.values())[1:] - pkv = tuple(pkv[i : i + self.num_pkv] for i in range(0, len(pkv), self.num_pkv)) - return transformers.modeling_outputs.CausalLMOutputWithPast(logits=torch.from_numpy(logits), past_key_values=pkv) - -Now, when we have model and defined generation pipeline, we can run -model inference. - -Select device from dropdown list for running inference using OpenVINO. - -.. code:: ipython3 - - import requests - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) - - from notebook_utils import device_widget - - device = device_widget("CPU", exclude=["NPU"]) - - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') - - - -Load OpenVINO model -~~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - ov_model = OVMobileLlamaForCausalLM(stage1_xml_path, stage2_xml_path, device.value) - -Prepare input data -~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - images = [PIL.Image.open(IMAGE_PATH).convert("RGB")] - images_tensor = process_images(images, image_processor, transformers.AutoConfig.from_pretrained(MODELS_DIR)) - -.. code:: ipython3 - - conv = conv_templates["v1"].copy() - conv.append_message(conv.roles[0], DEFAULT_IMAGE_TOKEN + "\n" + PROMPT_STR) - conv.append_message(conv.roles[1], None) - prompt = conv.get_prompt() - stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2 - input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0) - stopping_criteria = KeywordsStoppingCriteria([stop_str], tokenizer, input_ids) - -.. code:: ipython3 - - print(PROMPT_STR) - images[0] - - -.. parsed-literal:: - - Who is the author of this book? - Answer the question using a single word or phrase. - - - - -.. image:: mobilevlm-language-assistant-with-output_files/mobilevlm-language-assistant-with-output_32_1.png - - - -Run generation process -~~~~~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - output_ids = ov_model.generate( - input_ids, - images=images_tensor, - do_sample=True if TEMPERATURE > 0 else False, - temperature=TEMPERATURE, - top_p=TOP_P, - num_beams=NUM_BEAMS, - max_new_tokens=MAX_NEW_TOKENS, - use_cache=True, - stopping_criteria=[stopping_criteria], - ) - input_token_len = input_ids.shape[1] - n_diff_input_output = (input_ids != output_ids[:, :input_token_len]).sum().item() - if n_diff_input_output > 0: - print(f"[Warning] {n_diff_input_output} output_ids are not the same as the input_ids") - outputs = tokenizer.batch_decode(output_ids[:, input_token_len:], skip_special_tokens=True)[0] - outputs = outputs.strip() - if outputs.endswith(stop_str): - outputs = outputs[: -len(stop_str)] - print(f"🚀 {model_name} with OpenVINO: {outputs.strip()}\n") - - -.. parsed-literal:: - - 🚀 MobileVLM-1.7B with OpenVINO: Susan Wise Bauer - - - -Interactive inference ---------------------- - - - -.. code:: ipython3 - - def generate(img, prompt): - images_tensor = process_images([img], image_processor, transformers.AutoConfig.from_pretrained(MODELS_DIR)) - prompt = DEFAULT_IMAGE_TOKEN + "\n" + prompt - conv = conv_templates["v1"].copy() - conv.append_message(conv.roles[0], prompt) - conv.append_message(conv.roles[1], None) - prompt = conv.get_prompt() - stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2 - input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0) - stopping_criteria = KeywordsStoppingCriteria([stop_str], tokenizer, input_ids) - - output_ids = ov_model.generate( - input_ids, - images=images_tensor, - do_sample=True if TEMPERATURE > 0 else False, - temperature=TEMPERATURE, - top_p=TOP_P, - num_beams=NUM_BEAMS, - max_new_tokens=MAX_NEW_TOKENS, - use_cache=True, - stopping_criteria=[stopping_criteria], - ) - input_token_len = input_ids.shape[1] - outputs = tokenizer.batch_decode(output_ids[:, input_token_len:], skip_special_tokens=True)[0] - outputs = outputs.strip() - if outputs.endswith(stop_str): - outputs = outputs[: -len(stop_str)] - - return outputs.strip() - -.. code:: ipython3 - - demo = gr.Interface( - fn=generate, - inputs=[gr.Image(label="Image", type="pil"), gr.Textbox(label="Prompt")], - outputs=gr.Textbox(), - examples=[ - [ - str(IMAGE_PATH), - PROMPT_STR, - ] - ], - allow_flagging="never", - ) - - try: - demo.launch(debug=False) - except Exception: - demo.launch(debug=False, share=True) - # if you are launching remotely, specify server_name and server_port - # demo.launch(server_name='your server name', server_port='server port in int') - # Read more in the docs: https://gradio.app/docs/ - - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - - -.. code:: ipython3 - - # please uncomment and run this cell for stopping gradio interface - # demo.close() diff --git a/docs/notebooks/mobilevlm-language-assistant-with-output_files/mobilevlm-language-assistant-with-output_32_1.jpg b/docs/notebooks/mobilevlm-language-assistant-with-output_files/mobilevlm-language-assistant-with-output_32_1.jpg deleted file mode 100644 index e42650c7277fc7..00000000000000 --- a/docs/notebooks/mobilevlm-language-assistant-with-output_files/mobilevlm-language-assistant-with-output_32_1.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e56dcd6fe79cd88720a73dcbf31e50faf6d057787713d62b0a35fa49d4789a52 -size 24608 diff --git a/docs/notebooks/mobilevlm-language-assistant-with-output_files/mobilevlm-language-assistant-with-output_32_1.png b/docs/notebooks/mobilevlm-language-assistant-with-output_files/mobilevlm-language-assistant-with-output_32_1.png deleted file mode 100644 index 55c71c94f52e35..00000000000000 --- a/docs/notebooks/mobilevlm-language-assistant-with-output_files/mobilevlm-language-assistant-with-output_32_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:74e9cd0ac19f22348008108002eaf4c5c3a666e15c7b205041138107020b3883 -size 162588 diff --git a/docs/notebooks/multilora-image-generation-with-output.rst b/docs/notebooks/multilora-image-generation-with-output.rst new file mode 100644 index 00000000000000..7b6f4bc381ff27 --- /dev/null +++ b/docs/notebooks/multilora-image-generation-with-output.rst @@ -0,0 +1,468 @@ +Multi LoRA Image Generation +=========================== + +LoRA, or `Low-Rank Adaptation `__, is +a popular and lightweight training technique used for fine-tuning Large +Language and Stable Diffusion Models without needing full model +training. Full fine-tuning of larger models (consisting of billions of +parameters) is inherently expensive and time-consuming. LoRA works by +adding a smaller number of new weights to the model for training, rather +than retraining the entire parameter space of the model. This makes +training with LoRA much faster, memory-efficient, and produces smaller +model weights (a few hundred MBs), which are easier to store and share. + +At its core, LoRA leverages the concept of low-rank matrix +factorization. Instead of updating all the parameters in a neural +network, LoRA decomposes the parameter space into two low-rank matrices. +This decomposition allows the model to capture essential information +with fewer parameters, significantly reducing the amount of data and +computation required for fine-tuning. + +|image0| + +By incorporating LoRA into Stable Diffusion models, we can enhance their +ability to understand complex relationships and patterns in data. This +approach opens up numerous possibilities: \* **Art and Design**: Artists +can fine-tune models to generate images that align with their unique +styles, creating personalized artwork effortlessly. \* **Content +Creation**: Businesses can customize image generation models to produce +branded visuals, enhancing marketing and media production. \* +**Entertainment**: Game developers and filmmakers can use fine-tuned +models to create realistic and imaginative worlds, streamlining the +creative process. + +In this tutorial we explore possibilities to use LoRA with OpenVINO +Generative API. + + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Convert Diffusion Model using Optimum + Intel <#convert-diffusion-model-using-optimum-intel>`__ + + - `Applying LoRA to Original Diffusers pipeline before + conversion <#applying-lora-to-original-diffusers-pipeline-before-conversion>`__ + +- `Image Generation using OpenVINO + GenAI <#image-generation-using-openvino-genai>`__ + + - `Integration LoRA into + pipeline <#integration-lora-into-pipeline>`__ + - `Prepare LoRA Adapters <#prepare-lora-adapters>`__ + - `Create Inference Pipeline <#create-inference-pipeline>`__ + - `Selection specific adapter during + generation <#selection-specific-adapter-during-generation>`__ + - `Use multiple adapters + simultaneously <#use-multiple-adapters-simultaneously>`__ + - `Disable adapters <#disable-adapters>`__ + +- `Interactive demo <#interactive-demo>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +.. |image0| image:: https://github.com/user-attachments/assets/bf823c71-13b4-402c-a7b4-d6fc30a60d88 + +.. code:: ipython3 + + import platform + + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu torch torchvision transformers accelerate "diffusers>0.25.0" pillow "gradio>=4.19" "peft>=0.7.0" + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" + %pip install -q -U "openvino>=2024.5.0" "openvino-tokenizers>=2024.5.0" "openvino-genai>=2024.5.0" + + if platform.system() == "Darwin": + %pip install -q "numpy<2.0.0" + +.. code:: ipython3 + + import requests + from pathlib import Path + + notebook_utils_path = Path("notebook_utils.py") + lora_config_path = Path("lora_config.py") + + if not notebook_utils_path.exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + notebook_utils_path.open("w").write(r.text) + + if not lora_config_path.exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/multilora-image-generation/lora_config.py", + ) + lora_config_path.open("w").write(r.text) + +Convert Diffusion Model using Optimum Intel +------------------------------------------- + + + +`Optimum Intel `__ is +the interface between the +`Transformers `__ and +`Diffusers `__ libraries +and OpenVINO to accelerate end-to-end pipelines on Intel architectures. +It provides ease-to-use +`interface `__ +for exporting models to `OpenVINO Intermediate Representation +(IR) `__ +format. + +Applying LoRA to Original Diffusers pipeline before conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +LoRA can be easily added to `Diffusers +pipeline `__ +before export. At the export stage, LoRA weights will be fused to +original model weights and converted model will preserve LoRA provided +behavior. This approach is suitable when you need model with adapter +capabilities by default and it does not required configuration at +inference time (e.g. changing weight coefficient for adapter). For +example, we can use this method for speedup generation process with +integration `LCM LoRA `__. +Previously, we already considered with approach in this +`tutorial `__. + +Using ``optimum-cli`` for exporting models requires to provide model id +on HuggingFace Hub or local directory with saved model. In case, if +model stored in multiple separated repositories or directories (e.g. you +want to replace VAE component or add LoRA), it should be merged and +saved on disk before export. For avoiding this, we will use +``export_from_model`` function that accepts initialized model. +Additionally, for using model with OpenVINO GenAI, we need to export +tokenizers to OpenVINO format using `OpenVINO +Tokenizers `__ +library. + +In this tutorial we will use `Stable Diffusion +XL `__ +model, but the same steps are also applicable to other models of Stable +Diffusion family. + +.. code:: ipython3 + + from pathlib import Path + from diffusers import DiffusionPipeline, AutoencoderKL, LCMScheduler + from optimum.exporters.openvino import export_from_model + from optimum.intel.openvino import OVConfig + from optimum.exporters.openvino.convert import export_tokenizer + import gc + + model_dir = Path("sdxl-lcm") + + if not model_dir.exists(): + model_id = "stabilityai/stable-diffusion-xl-base-1.0" + adapter_id = "latent-consistency/lcm-lora-sdxl" + vae_id = "madebyollin/sdxl-vae-fp16-fix" + vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix") + pipe = DiffusionPipeline.from_pretrained(model_id, vae=vae, variant="fp16", use_safetensors=True) + pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) + pipe.load_lora_weights(adapter_id) + pipe.fuse_lora() + export_from_model(pipe, model_dir, task="text-to-image", stateful=False, ov_config=OVConfig(dtype="fp16")) + for tokenizer in ["tokenizer", "tokenizer_2"]: + tokenizer_model = getattr(pipe, tokenizer, None) + if tokenizer_model is not None: + export_tokenizer(tokenizer_model, model_dir / tokenizer, task="text-to-image") + del vae + del pipe + gc.collect() + + +.. parsed-literal:: + + 2024-11-08 16:49:48.963221: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-08 16:49:48.977712: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered + WARNING: All log messages before absl::InitializeLog() is called are written to STDERR + E0000 00:00:1731070188.992824 718925 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered + E0000 00:00:1731070188.997386 718925 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered + 2024-11-08 16:49:49.014687: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + + +Image Generation using OpenVINO GenAI +------------------------------------- + + + +`OpenVINO™ GenAI `__ +is a library of the most popular Generative AI model pipelines, +optimized execution methods, and samples that run on top of highly +performant `OpenVINO +Runtime `__. + +This library is friendly to PC and laptop execution, and optimized for +resource consumption. It requires no external dependencies to run +generative models as it already includes all the core functionality. + +``openvino_genai.Text2ImagePipeline`` class supports inference of +`Diffusers +models `__. +For pipeline initialization, we should provide directory with converted +by Optimum Intel pipeline and specify inference device. Optionally, we +can provide configuration for LoRA Adapters using ``adapter_config``. +For starting generation process ``generate`` method should be used. +Basically, it required to provide input text prompt for image +generation. You can provide additional arguments like negative prompt, +number of steps, guidance scale, image width and height to control +generation process. + +Integration LoRA into pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +Similarly to Diffusers pipeline, you can store separately and load LoRA +into base pipeline before inference using OpenVINO GenAI. +``openvino_genai.AdapterConfig`` serves for adapters management in +``openvino_genai.Text2ImagePipeline``. It can be used for adding and +removing adapters or changing their weight coefficient for blending into +pipeline. You can add one or multiple adapters into config and also +specify alpha blending coefficients for their addition. OpenVINO GenAI +supports LoRA adapters saved in Safetensors format. You can use one of +publicly available pretrained adapters from +`CivitAI `__ or `HuggingFace +Hub `__ or train your own. > **Important +Note**: Before loading pretrained adapters, please make sure that they +are compatible with your base model architecture. E.g. if you use SDXL +model, you need to provide adapters trained for this model type and +loading adapter, for example, trained for FLUX is not allowed. + +Generally, process of adapters configuration consists of 2 steps: 1. +Register adapters in pipeline constructor. At this moment, it is +recommended to provide all adapters that you plan to use on this stage. +2. Choose which adapter (or a combination of adapters) to apply in each +``generate`` call. It is not obligated to use all of provided in +constructor adapters simultaneously, you can select one or combination +of several among them for each generation cycle. + +Prepare LoRA Adapters +~~~~~~~~~~~~~~~~~~~~~ + + + +.. _prepare-lora-adapters-1: + +Prepare LoRA Adapters +~~~~~~~~~~~~~~~~~~~~~ + +.. code:: ipython3 + + from lora_config import LORA + + # uncomment this line to see predefined LoRA adapters configuration used in this notebook + # LORA + +.. code:: ipython3 + + from huggingface_hub import hf_hub_download + + lora_dir = Path("lora") + adapter_paths = [] + + for lora in LORA: + lora_model_dir = lora_dir / lora["name"].lower().replace(" ", "_") + file_name = lora["file_name"] + if not (lora_model_dir / file_name).exists(): + hf_hub_download(repo_id=lora["model_id"], filename=file_name, local_dir=lora_model_dir) + adapter_paths.append(lora_model_dir / file_name) + +.. code:: ipython3 + + import openvino_genai as ov_genai + + + def prepare_adapter_config(scales=None): + if scales is None: + scales = [1 / len(adapter_paths)] * len(adapter_paths) + if isinstance(scales, float): + scales = [scales] * len(adapter_paths) + adapter_config = ov_genai.AdapterConfig() + for adapter, scale in zip(adapter_paths, scales): + adapter_config.add(ov_genai.Adapter(adapter), scale) + + return adapter_config + + + adapters_config = prepare_adapter_config(0.0) + adapters = adapters_config.get_adapters() + +Create Inference Pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +diffusion process involves random for preparing initial state for +denoising. For reproducibility of generation results, we will use +``Generator`` class. + +.. code:: ipython3 + + from notebook_utils import device_widget + + device = device_widget(default="CPU", exclude=["NPU"]) + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') + + + +.. code:: ipython3 + + import openvino as ov + import torch + + + class Generator(ov_genai.Generator): + def __init__(self, seed): + ov_genai.Generator.__init__(self) + self.generator = torch.Generator(device="cpu").manual_seed(seed) + + def next(self): + return torch.randn(1, generator=self.generator, dtype=torch.float32).item() + + def randn_tensor(self, shape: ov.Shape): + torch_tensor = torch.randn(list(shape), generator=self.generator, dtype=torch.float32) + return ov.Tensor(torch_tensor.numpy()) + + + pipe = ov_genai.Text2ImagePipeline(model_dir, "CPU", adapters=adapters_config) + +Selection specific adapter during generation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +As it was already mention before, it is not necessary to use all +adapters specified at initialization stage for generation in the same +time. Providing adapters argument with ``openvino_genai.AdapterConfig`` +into ``generate`` allow to select one or several from them. For example, +let’s select LoRA for generation images in X-Ray style. + +.. code:: ipython3 + + subject = "a cute cat in sunglasses" + prompt_template = LORA[0].get("prompt", "") + adapter_weight = LORA[0].get("weight", 1.0) + prompt = prompt_template.replace("", subject) + adapter_config = ov_genai.AdapterConfig() + adapter_config.add(adapters[0], adapter_weight) + image_tensor = pipe.generate(prompt, num_inference_steps=4, guidance_scale=0, adapters=adapter_config, generator=Generator(421235)) + +.. code:: ipython3 + + from PIL import Image + + image = Image.fromarray(image_tensor.data[0]) + image + + + + +.. image:: multilora-image-generation-with-output_files/multilora-image-generation-with-output_15_0.png + + + +Use multiple adapters simultaneously +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +You also can use combination of adapters that will be applied in the +same time. Let’s see what happens if traditional Japanese art will meet +modern illustration pointillistic style. + +.. code:: ipython3 + + prompt_template1 = LORA[1].get("prompt", "") + prompt_template2 = LORA[2].get("prompt", "") + adapter1_weight = LORA[1].get("weight", 1.0) + adapter2_weight = LORA[2].get("weight", 1.0) + + prompt = prompt_template2.replace("", prompt_template1.replace("", subject)) + adapter_config = ov_genai.AdapterConfig() + adapter_config.add(adapters[1], adapter1_weight) + adapter_config.add(adapters[2], adapter2_weight) + image_tensor = pipe.generate(prompt, num_inference_steps=4, guidance_scale=0, adapters=adapter_config, generator=Generator(421235)) + +.. code:: ipython3 + + image = Image.fromarray(image_tensor.data[0]) + image + + + + +.. image:: multilora-image-generation-with-output_files/multilora-image-generation-with-output_18_0.png + + + +Disable adapters +~~~~~~~~~~~~~~~~ + + + +You can disable adapters providing empty ``AdapterConfig`` into generate + +.. code:: ipython3 + + image_tensor = pipe.generate(subject, num_inference_steps=4, guidance_scale=0, adapters=ov_genai.AdapterConfig(), generator=Generator(421235)) + +.. code:: ipython3 + + image = Image.fromarray(image_tensor.data[0]) + image + + + + +.. image:: multilora-image-generation-with-output_files/multilora-image-generation-with-output_21_0.png + + + +Interactive demo +---------------- + + + +.. code:: ipython3 + + gradio_helper_path = Path("gradio_helper.py") + + if not gradio_helper_path.exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/multilora-image-generation/gradio_helper.py", + ) + lora_config_path.open("w").write(r.text) + +.. code:: ipython3 + + from gradio_helper import make_demo + + demo = make_demo(pipe, Generator, adapters, LORA) + + try: + demo.launch(debug=False) + except Exception: + demo.launch(share=True, debug=False) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ diff --git a/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_15_0.jpg b/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_15_0.jpg new file mode 100644 index 00000000000000..1427e6afb594ac --- /dev/null +++ b/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_15_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:378fce8c53832fa402e94c50995aa5f188d16a6a6886c08fe4f8323bcf7daabe +size 42135 diff --git a/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_15_0.png b/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_15_0.png new file mode 100644 index 00000000000000..873721f87cc2a3 --- /dev/null +++ b/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_15_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27ac6d45499eb6e67ddf78f8f3493fd3e9dc3885cec2b4fda8067f9b1f7a9ebf +size 1252162 diff --git a/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_18_0.jpg b/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_18_0.jpg new file mode 100644 index 00000000000000..1b6a88d2cde069 --- /dev/null +++ b/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_18_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de879c60657ad9c471ccc971d63cc2ac25be5b477c6ebcd8b2e1a2a438b2f3c1 +size 146062 diff --git a/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_18_0.png b/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_18_0.png new file mode 100644 index 00000000000000..9b26d20ef04ab8 --- /dev/null +++ b/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_18_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae8cec0bac904c1868d7786121978b2ca819ead5c8b02cf09bb07f75b927a3a1 +size 1940316 diff --git a/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_21_0.jpg b/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_21_0.jpg new file mode 100644 index 00000000000000..199be9b483e18f --- /dev/null +++ b/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_21_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:564848925f540cf500457a4996631ba616cc6547b63d377ce22ac8c3e9431c04 +size 87425 diff --git a/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_21_0.png b/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_21_0.png new file mode 100644 index 00000000000000..bbf4eaaf030a42 --- /dev/null +++ b/docs/notebooks/multilora-image-generation-with-output_files/multilora-image-generation-with-output_21_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b573ab59972699e762f8a52c0ce17a0db060230effe78e2ae3408290a9173103 +size 1417021 diff --git a/docs/notebooks/music-generation-with-output.rst b/docs/notebooks/music-generation-with-output.rst index 4adc89b9ff79e7..a5bdcbd8049318 100644 --- a/docs/notebooks/music-generation-with-output.rst +++ b/docs/notebooks/music-generation-with-output.rst @@ -124,14 +124,9 @@ Imports .. parsed-literal:: - 2024-11-05 02:04:23.419260: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-05 02:04:23.453089: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-22 01:43:50.913766: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-22 01:43:50.938403: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-05 02:04:24.059462: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. - torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. - torch.utils._pytree._register_pytree_node( MusicGen in HF Transformers @@ -170,12 +165,134 @@ generate a text-conditioned music sample. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:797: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. - warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. - torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:28: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. - warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.") + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/encodec/modeling_encodec.py:124: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor). + self.register_buffer("padding_total", torch.tensor(kernel_size - stride, dtype=torch.int64), persistent=False) + Config of the text_encoder: is overwritten by shared text_encoder config: T5Config { + "_name_or_path": "t5-base", + "architectures": [ + "T5ForConditionalGeneration" + ], + "classifier_dropout": 0.0, + "d_ff": 3072, + "d_kv": 64, + "d_model": 768, + "decoder_start_token_id": 0, + "dense_act_fn": "relu", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "relu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": false, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "n_positions": 512, + "num_decoder_layers": 12, + "num_heads": 12, + "num_layers": 12, + "output_past": true, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "task_specific_params": { + "summarization": { + "early_stopping": true, + "length_penalty": 2.0, + "max_length": 200, + "min_length": 30, + "no_repeat_ngram_size": 3, + "num_beams": 4, + "prefix": "summarize: " + }, + "translation_en_to_de": { + "early_stopping": true, + "max_length": 300, + "num_beams": 4, + "prefix": "translate English to German: " + }, + "translation_en_to_fr": { + "early_stopping": true, + "max_length": 300, + "num_beams": 4, + "prefix": "translate English to French: " + }, + "translation_en_to_ro": { + "early_stopping": true, + "max_length": 300, + "num_beams": 4, + "prefix": "translate English to Romanian: " + } + }, + "transformers_version": "4.46.3", + "use_cache": true, + "vocab_size": 32128 + } + + Config of the audio_encoder: is overwritten by shared audio_encoder config: EncodecConfig { + "_name_or_path": "facebook/encodec_32khz", + "architectures": [ + "EncodecModel" + ], + "audio_channels": 1, + "chunk_length_s": null, + "codebook_dim": 128, + "codebook_size": 2048, + "compress": 2, + "dilation_growth_rate": 2, + "hidden_size": 128, + "kernel_size": 7, + "last_kernel_size": 7, + "model_type": "encodec", + "norm_type": "weight_norm", + "normalize": false, + "num_filters": 64, + "num_lstm_layers": 2, + "num_residual_layers": 1, + "overlap": null, + "pad_mode": "reflect", + "residual_kernel_size": 3, + "sampling_rate": 32000, + "target_bandwidths": [ + 2.2 + ], + "torch_dtype": "float32", + "transformers_version": "4.46.3", + "trim_right_ratio": 1.0, + "upsampling_ratios": [ + 8, + 5, + 4, + 4 + ], + "use_causal_conv": false, + "use_conv_shortcut": false + } + + Config of the decoder: is overwritten by shared decoder config: MusicgenDecoderConfig { + "activation_dropout": 0.0, + "activation_function": "gelu", + "attention_dropout": 0.0, + "audio_channels": 1, + "bos_token_id": 2048, + "classifier_dropout": 0.0, + "dropout": 0.1, + "ffn_dim": 4096, + "hidden_size": 1024, + "initializer_factor": 0.02, + "layerdrop": 0.0, + "max_position_embeddings": 2048, + "model_type": "musicgen_decoder", + "num_attention_heads": 16, + "num_codebooks": 4, + "num_hidden_layers": 24, + "pad_token_id": 2048, + "scale_embedding": false, + "tie_word_embeddings": false, + "transformers_version": "4.46.3", + "use_cache": true, + "vocab_size": 2048 + } + In the cell below user is free to change the desired music sample @@ -229,7 +346,7 @@ vocabulary. It helps the model understand the context of a sentence. @@ -314,6 +431,9 @@ runtime .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + warnings.warn( + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. 2. Convert MusicGen Language Model @@ -655,7 +775,7 @@ We can now infer the pipeline backed by OpenVINO models. diff --git a/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst b/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst index 337458e35bbf0c..0bac7af3f39c32 100644 --- a/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst @@ -16,7 +16,6 @@ OpenVINO. Additionally, we will optimize model using - `Prerequisites <#prerequisites>`__ - `Select Model <#select-model>`__ -- `Download PyTorch model <#download-pytorch-model>`__ - `Convert and Optimize model <#convert-and-optimize-model>`__ - `Convert model to OpenVINO IR @@ -51,23 +50,23 @@ Prerequisites .. code:: ipython3 - %pip install -q "torch>=2.1" "transformers>=4.40" "accelerate" "pillow" "gradio>=4.26" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "nncf>=2.13" - %pip install -q -U --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly "openvino-tokenizers[transformers]" "openvino>=2024.4.0" - %pip install -q "git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv" + %pip install -q "torch>=2.1" "transformers>=4.45" "accelerate" "pillow" "gradio>=4.26" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "nncf>=2.14" + %pip install -q -U "openvino-tokenizers[transformers]>=2024.5.0" "openvino>=2024.5.0" + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" .. parsed-literal:: - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. Note: you may need to restart the kernel to use updated packages. + ERROR: Ignored the following versions that require a different python version: 2.14.0 Requires-Python >=3.9 + ERROR: Could not find a version that satisfies the requirement nncf>=2.14 (from versions: 1.4, 1.4.1, 1.5.0, 1.6.0, 1.7.0, 1.7.1, 2.0.0, 2.0.1, 2.0.2, 2.1.0, 2.2.0, 2.3.0, 2.4.0, 2.5.0, 2.6.0, 2.7.0, 2.8.0, 2.8.1, 2.9.0, 2.10.0, 2.11.0, 2.12.0, 2.13.0) + ERROR: No matching distribution found for nncf>=2.14 Note: you may need to restart the kernel to use updated packages. - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - openvino-genai 2024.4.0.0 requires openvino_tokenizers~=2024.4.0.0.dev, but you have openvino-tokenizers 2024.5.0.0.dev20241022 which is incompatible. + ERROR: Ignored the following versions that require a different python version: 2024.5.0.0 Requires-Python >=3.9 + ERROR: Could not find a version that satisfies the requirement openvino-tokenizers>=2024.5.0 (from versions: 2023.3.0.0, 2024.0.0.0, 2024.1.0.0, 2024.1.0.2, 2024.2.0.0, 2024.3.0.0, 2024.4.0.0, 2024.4.1.0.dev20240926) + ERROR: No matching distribution found for openvino-tokenizers>=2024.5.0 Note: you may need to restart the kernel to use updated packages. - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -77,6 +76,7 @@ Prerequisites import requests helper_file = Path("ov_nano_llava_helper.py") + cmd_helper_file = Path("cmd_helper.py") if not helper_file.exists(): r = requests.get( @@ -84,6 +84,10 @@ Prerequisites ) helper_file.open("w").write(r.text) + if not cmd_helper_file.exists(): + r = requests.get(url=f"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/{cmd_helper_file.name}") + cmd_helper_file.open("w").write(r.text) + Select Model ------------ @@ -127,40 +131,12 @@ Download PyTorch model .. code:: ipython3 - from ov_nano_llava_helper import download_original_model, converted_model_exists, copy_model_files + from ov_nano_llava_helper import converted_model_exists, copy_model_files model_id = model_dropdown.value model_dir = Path(model_id.split("/")[-1]) ov_model_dir = Path("ov_" + model_dir.name) / "FP16" - if not converted_model_exists(ov_model_dir): - download_original_model(model_id, model_dir) - - - -.. parsed-literal:: - - Fetching 14 files: 0%| | 0/14 [00:00 1 or self.sliding_window is not None) and self.is_causal: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/onnx/model_patcher.py:306: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/onnx/model_patcher.py:306: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/13d60cec183a86755afed64da495fcc2c382ea80/modeling_llava_qwen2.py:939: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.max_seq_len_cached: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:432: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/13d60cec183a86755afed64da495fcc2c382ea80/modeling_llava_qwen2.py:1499: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): - OpenVINO and OpenVINO Tokenizers versions are not binary compatible. - OpenVINO version: 2024.5.0-16993 - OpenVINO Tokenizers version: 2024.5.0.0 - First 3 numbers should be the same. Update OpenVINO Tokenizers to compatible version. It is recommended to use the same day builds for pre-release version. To install both OpenVINO and OpenVINO Tokenizers release version perform: - pip install --force-reinstall openvino openvino-tokenizers - To update both OpenVINO and OpenVINO Tokenizers to the latest pre-release version perform: - pip install --pre -U openvino openvino-tokenizers --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - Tokenizer won't be converted. - Traceback (most recent call last): - File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/bin/optimum-cli", line 10, in - sys.exit(main()) - File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/commands/optimum_cli.py", line 208, in main - service.run() - File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/commands/export/openvino.py", line 349, in run - main_export( - File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/openvino/__main__.py", line 416, in main_export - core = Core() - File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/__init__.py", line 53, in new_core_init - self.add_extension(str(_ext_path)) # Core.add_extension doesn't support Path object - RuntimeError: Exception from src/inference/src/cpp/core.cpp:158: - Cannot add extension. Cannot find entry point to the extension library. This error happened: Cannot load library '/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/lib/libopenvino_tokenizers.so': /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/openvino_tokenizers/lib/libopenvino_tokenizers.so: undefined symbol: _ZNK2ov4Node17can_constant_foldERKSt6vectorINS_6OutputIS0_EESaIS3_EE + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/13d60cec183a86755afed64da495fcc2c382ea80/modeling_llava_qwen2.py:169: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_weights.size() != (batch_size, self.num_heads, q_len, k_v_seq_len): + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/13d60cec183a86755afed64da495fcc2c382ea80/modeling_llava_qwen2.py:187: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_output.size() != (batch_size, self.num_heads, q_len, self.head_dim): + Exporting tokenizers to OpenVINO is not supported for tokenizers version > 0.19 and openvino version <= 2024.4. Please downgrade to tokenizers version <= 0.19 to export tokenizers to OpenVINO. + +.. parsed-literal:: + + [ WARNING ] Unexpectedly found already patched module model.layers.22.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module model.layers.22.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module model.layers.23.self_attn.q_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module model.layers.23.self_attn.k_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module model.layers.23.self_attn.v_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module model.layers.23.self_attn.o_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module model.layers.23.mlp.gate_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module model.layers.23.mlp.up_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module model.layers.23.mlp.down_proj while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module model.mm_projector.0 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module model.mm_projector.2 while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module lm_head while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. + [ WARNING ] Unexpectedly found already patched module while applying ModuleExtension during PyTorch model conversion. Result of the conversion maybe broken. Depending on the exact issue it may lead to broken original model. Compress Model weights to 4 and 8 bits using NNCF @@ -380,12 +530,11 @@ image encoder model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/quantization/quantize_model.py:432: FutureWarning: `CompressWeightsMode.INT8` is deprecated. Please, use `CompressWeightsMode.INT8_ASYM` as value instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/quantization/quantize_model.py:432: FutureWarning: `CompressWeightsMode.INT8` is deprecated. Please, use `CompressWeightsMode.INT8_ASYM` as value instead. warning_deprecated( - 2024-11-05 02:09:38.791476: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-05 02:09:38.825207: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-22 01:48:49.764790: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-22 01:48:49.789684: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-05 02:09:39.427301: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -508,10 +657,11 @@ Select device import requests - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) + if not Path("notebook_utils.py").exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) from notebook_utils import device_widget @@ -558,8 +708,14 @@ can use the same tokenizer and image processor that provided with model. messages = [{"role": "user", "content": f"\n{prompt}"}] text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) - url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/8bf7d9f2-018a-4498-bec4-55f17c273ecc" - image = Image.open(requests.get(url, stream=True).raw) + test_image = Path("nanollava.png") + + if not test_image.exists(): + url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/8bf7d9f2-018a-4498-bec4-55f17c273ecc" + image = Image.open(requests.get(url, stream=True).raw) + image.save(test_image) + else: + image = Image.open(test_image) image_tensor = process_images(image, None, processor) input_ids, attention_mask = process_text_input(text, tokenizer) @@ -569,7 +725,7 @@ can use the same tokenizer and image processor that provided with model. print(f"Question:\n{prompt}") print("Answer:") - output_ids = ov_model.generate(input_ids, attention_mask=attention_mask, images=image_tensor, max_new_tokens=128, use_cache=True, streamer=streamer) + output_ids = ov_model.generate(input_ids, attention_mask=attention_mask, pixel_values=image_tensor, max_new_tokens=128, use_cache=True, streamer=streamer) @@ -586,8 +742,8 @@ can use the same tokenizer and image processor that provided with model. Question: Describe this image in detail Answer: - The image features a small, adorable white lamb standing amidst a fire. The lamb's fur is fluffy and white, and it is adorned with tiny black eyes that are bright and lively. The lamb's face is cute, with a small black nose and a small mouth. It seems like the lamb is looking straight at the camera, making it appear even more adorable. - The lamb's right ear is visible, and it is white and pink. The lamb's right eye is also black and pink. The lamb's face is quite detailed, with the nose and mouth visible. There are also details like the lamb's right foot, which is white + This image features a cute, white lama, possibly a llama, which is depicted in a playful pose. The llama is surrounded by a fire, indicating it's being set on a burner. The flame appears to be a bright, bright yellow, and there are several tiny flames, possibly from the llama's actions. + The llama itself is quite detailed. It has a small brown nose and dark eyes that are expressive. The face of the llama is quite detailed as well, with a pair of ears that are also light brown. The llama's mouth is open, revealing its pink lips. There are also small pink spots on its face, Interactive demo @@ -679,7 +835,7 @@ Interactive demo generation_kwargs = dict( input_ids=input_ids, attention_mask=attention_mask, - images=image_tensor, + pixel_values=image_tensor, streamer=streamer, max_new_tokens=128, stopping_criteria=[stopping_criteria], diff --git a/docs/notebooks/notebooks_with_colab_buttons.txt b/docs/notebooks/notebooks_with_colab_buttons.txt index 0f45238db3a4fb..59b3348a4c90f7 100644 --- a/docs/notebooks/notebooks_with_colab_buttons.txt +++ b/docs/notebooks/notebooks_with_colab_buttons.txt @@ -24,6 +24,7 @@ knowledge-graphs-conve language-quantize-bert magika-content-type-recognition mobileclip-video-search +modelscope-to-openvino music-generation named-entity-recognition nano-llava-multimodal-chatbot diff --git a/docs/notebooks/nuextract-structure-extraction-with-output.rst b/docs/notebooks/nuextract-structure-extraction-with-output.rst index fc2d250626fba4..8dd88ca62bd161 100644 --- a/docs/notebooks/nuextract-structure-extraction-with-output.rst +++ b/docs/notebooks/nuextract-structure-extraction-with-output.rst @@ -391,9 +391,9 @@ LLMPipeline. .. code:: ipython3 - from openvino_genai import LLMPipeline + import openvino_genai as ov_genai - pipe = LLMPipeline(model_dir.as_posix(), device.value) + pipe = ov_genai.LLMPipeline(model_dir.as_posix(), device.value) def run_structure_extraction(text: str, schema: str) -> str: diff --git a/docs/notebooks/object-detection-with-output.rst b/docs/notebooks/object-detection-with-output.rst index a34f72f5d8ff1e..5debc4e7ed88d4 100644 --- a/docs/notebooks/object-detection-with-output.rst +++ b/docs/notebooks/object-detection-with-output.rst @@ -84,7 +84,7 @@ Install requirements .. parsed-literal:: - 24692 + 24717 @@ -136,7 +136,7 @@ Download and convert the Model .. parsed-literal:: - 100%|██████████| 6.25M/6.25M [00:00<00:00, 25.9MB/s] + 100%|██████████| 6.25M/6.25M [00:00<00:00, 26.9MB/s] .. parsed-literal:: @@ -147,10 +147,10 @@ Download and convert the Model PyTorch: starting from 'yolov8n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (6.2 MB) OpenVINO: starting export with openvino 2024.4.0-16579-c3152d32c9c-releases/2024/4... - OpenVINO: export success ✅ 1.3s, saved as 'yolov8n_openvino_model/' (6.4 MB) + OpenVINO: export success ✅ 1.4s, saved as 'yolov8n_openvino_model/' (6.4 MB) - Export complete (1.5s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/object-detection-webcam + Export complete (1.6s) + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/object-detection-webcam Predict: yolo predict task=detect model=yolov8n_openvino_model imgsz=640 half Validate: yolo val task=detect model=yolov8n_openvino_model imgsz=640 data=coco.yaml half Visualize: https://netron.app diff --git a/docs/notebooks/omniparser-with-output.rst b/docs/notebooks/omniparser-with-output.rst new file mode 100644 index 00000000000000..28676a03a84ba7 --- /dev/null +++ b/docs/notebooks/omniparser-with-output.rst @@ -0,0 +1,663 @@ +Screen Parsing with OmniParser and OpenVINO +=========================================== + +Recent breakthrough in Visual Language Processing and Large Language +models made significant strides in understanding and interacting with +the world through text and images. However, accurately parsing and +understanding complex graphical user interfaces (GUIs) remains a +significant challenge. OmniParser is a comprehensive method for parsing +user interface screenshots into structured and easy-to-understand +elements. This enables more accurate and efficient interaction with +GUIs, empowering AI agents to perform tasks across various platforms and +applications. + +|image0| + +More details about model can be found in `Microsoft blog +post `__, +`paper `__, `original +repo `__ and `model +card `__. In this tutorial +we consider how to run OmniParser using OpenVINO. + + +**Table of contents:** + +- `Prerequisites <#prerequisites>`__ +- `Prepare models <#prepare-models>`__ + + - `Convert models to OpenVINO Intermediate representation + format <#convert-models-to-openvino-intermediate-representation-format>`__ + + - `Icon Detector <#icon-detector>`__ + - `Screen captioning model <#screen-captioning-model>`__ + +- `Run OmniParser using OpenVINO <#run-omniparser-using-openvino>`__ + + - `Icon Detector <#icon-detector>`__ + + - `Select inference device for icon + detector <#select-inference-device-for-icon-detector>`__ + + - `Screen regions captioning <#screen-regions-captioning>`__ + + - `Select device for screen region + captioning <#select-device-for-screen-region-captioning>`__ + + - `Recognition text on the + screen <#recognition-text-on-the-screen>`__ + + - `Select device for OCR <#select-device-for-ocr>`__ + + - `Run model inference <#run-model-inference>`__ + +- `Interactive demo <#interactive-demo>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +.. |image0| image:: https://microsoft.github.io/OmniParser/static/images/flow_merged0.png + +Prerequisites +------------- + + + +.. code:: ipython3 + + %pip install -q "torch>=2.1" easyocr torchvision accelerate "supervision==0.18.0" accelerate timm "einops==0.8.0" "ultralytics==8.1.24" pillow opencv-python "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "openvino>=2024.4.0" + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + +.. code:: ipython3 + + from pathlib import Path + import requests + + notebook_utils_path = Path("notebook_utils.py") + florence_helper_path = Path("ov_florence2_helper.py") + + if not notebook_utils_path.exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + notebook_utils_path.open("w").write(r.text) + + if not florence_helper_path.exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/florence2/ov_florence2_helper.py") + florence_helper_path.open("w").write(r.text) + +Prepare models +-------------- + + + +OmniParser leverages a two-step process: 1. Interactable Region +Detection: - Identifies clickable elements like buttons and icons within +a UI. - Employs a specialized model trained on a diverse dataset of web +pages. - Accurately detects interactive elements, even in complex UIs. + +2. Semantic Captioning: + + - Assigns meaningful descriptions to detected elements. + - Combines optical character recognition (OCR) and a captioning + model. + - Provides context for accurate action generation. + +Convert models to OpenVINO Intermediate representation format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For starting work with OpenVINO +we should convert models to OpenVINO Intermediate Representation format +first. + +`OpenVINO model conversion +API `__ +should be used for these purposes. ``ov.convert_model`` function accepts +original model instance and example input for tracing and returns +``ov.Model`` representing this model in OpenVINO framework. Converted +model can be used for saving on disk using ``ov.save_model`` function or +directly loading on device using ``core.complie_model``. + +Let’s consider each pipeline part. + +Icon Detector +^^^^^^^^^^^^^ + + + +Icon detector in OmniParser is represented by YOLO based model trained +on curated by model authors interactable icon detection dataset. + +For conversion and model inference we will utilize Ultralytics provided +API. You can find more examples of this API usage in these +`tutorials `__ + +.. code:: ipython3 + + from ov_omniparser_helper import download_omniparser_icon_detector + + icon_detector_dir = download_omniparser_icon_detector() + + +.. parsed-literal:: + + 2024-11-22 01:51:07.385705: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-22 01:51:07.410345: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + + + +.. parsed-literal:: + + weights/icon_detect/best.pt: 0%| | 0.00/11.7M [00:00`__. + +.. code:: ipython3 + + from ov_omniparser_helper import download_omniparser_florence_model + + florence_caption_dir = download_omniparser_florence_model() + + + +.. parsed-literal:: + + Fetching 15 files: 0%| | 0/15 [00:00`__ is a python module for +extracting text from image. It is a general OCR that can read both +natural scene text and dense text in document and supports 80+ +languages. EasyOCR utilizes AI for detection text regions and recognize +text inside of predicted regions. We will also utilize both text +detection and recognition models using OpenVINO. + +Select device for OCR +^^^^^^^^^^^^^^^^^^^^^ + + + +.. code:: ipython3 + + import ipywidgets as widgets + + device_detector = device_widget(exclude=["NPU"], description="Detector device:") + device_recognizer = device_widget(exclude=["NPU"], description="Recognizer device:") + + device_box = widgets.VBox([device_detector, device_recognizer]) + device_box + + + + +.. parsed-literal:: + + VBox(children=(Dropdown(description='Detector device:', index=1, options=('CPU', 'AUTO'), value='AUTO'), Dropd… + + + +.. code:: ipython3 + + from ov_omniparser_helper import easyocr_reader + + # Uncomment the line to see easyocr_reader helper code + # ??easyocr_reader + +.. code:: ipython3 + + reader = easyocr_reader("weights/easyocr", device_detector.value, device_recognizer.value) + + +.. parsed-literal:: + + Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU. + + + + +.. code:: ipython3 + + from PIL import Image + + test_image_path = Path("examples/windows_home.png") + test_image_path.parent.mkdir(exist_ok=True, parents=True) + + if not test_image_path.exists(): + Image.open(requests.get("https://github.com/microsoft/OmniParser/blob/master/imgs/windows_home.png?raw=true", stream=True).raw).save(test_image_path) + +Run model inference +~~~~~~~~~~~~~~~~~~~ + + + +``process_image`` function defined in ``ov_omniparser_helper.py`` +provides easy-to-use interface for screen parsing process. + +.. code:: ipython3 + + from ov_omniparser_helper import process_image + + # Uncomment this line to see process_image code + # ??process_image + +.. code:: ipython3 + + procesed_image, label_coordinates, icon_descriptions = process_image( + test_image_path, ov_icon_detector, {"model": ov_icon_caption_gen, "processor": processor}, reader + ) + + +.. parsed-literal:: + + + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/omniparser/examples/windows_home.png: 640x640 32 0s, 38.2ms + Speed: 2.4ms preprocess, 38.2ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640) + finish processing + + +Function returns image with drawn detected boxes, boxes coordinates and +description for each region. + +.. code:: ipython3 + + display(procesed_image.resize((1200, 1200))) + print(icon_descriptions) + + + +.. image:: omniparser-with-output_files/omniparser-with-output_32_0.png + + +.. parsed-literal:: + + Text Box ID 0: 3.46 PM + Text Box ID 1: Search + Text Box ID 2: Microsoft + Text Box ID 3: 10/25/2024 + Icon Box ID 4: Microsoft Outlook. + Icon Box ID 5: Image + Icon Box ID 6: Microsoft OneNote. + Icon Box ID 7: Microsoft Office. + Icon Box ID 8: a folder for organizing files. + Icon Box ID 9: Microsoft Office. + Icon Box ID 10: Security shield. + Icon Box ID 11: Microsoft 365. + Icon Box ID 12: Microsoft Edge browser. + Icon Box ID 13: Microsoft Edge browser. + Icon Box ID 14: Decrease + Icon Box ID 15: the Windows operating system. + Icon Box ID 16: mountains and a beach. + Icon Box ID 17: a search function. + + +Interactive demo +---------------- + +.. code:: ipython3 + + from gradio_helper import make_demo + + + def process_image_gradio(image, box_threshold, iou_threshold, imgsz): + image_result, _, parsed_text = process_image( + image, + ov_icon_detector, + {"model": ov_icon_caption_gen, "processor": processor}, + reader, + box_threshold=box_threshold, + iou_threshold=iou_threshold, + imgsz=imgsz, + ) + return image_result, parsed_text + + + demo = make_demo(process_image_gradio) + + try: + demo.launch(debug=False, height=600) + except Exception: + demo.launch(debug=False, share=True, height=600) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + + + + + diff --git a/docs/notebooks/omniparser-with-output_files/omniparser-with-output_32_0.jpg b/docs/notebooks/omniparser-with-output_files/omniparser-with-output_32_0.jpg new file mode 100644 index 00000000000000..513db4e6d0da5d --- /dev/null +++ b/docs/notebooks/omniparser-with-output_files/omniparser-with-output_32_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c83af55e4296ff1dadb270b93c31084e983048437f848323c0e9677d2c3ed22 +size 161384 diff --git a/docs/notebooks/omniparser-with-output_files/omniparser-with-output_32_0.png b/docs/notebooks/omniparser-with-output_files/omniparser-with-output_32_0.png new file mode 100644 index 00000000000000..a09fc0a47cd036 --- /dev/null +++ b/docs/notebooks/omniparser-with-output_files/omniparser-with-output_32_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:382e19a8751851ad8a151bea1f4f7bc4be62b47c7a8a4f70da0a3dae257b0c20 +size 1411816 diff --git a/docs/notebooks/openvino-api-with-output.rst b/docs/notebooks/openvino-api-with-output.rst index b2b4c8c0f04fdd..3931d96040da7e 100644 --- a/docs/notebooks/openvino-api-with-output.rst +++ b/docs/notebooks/openvino-api-with-output.rst @@ -201,7 +201,7 @@ notebooks. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -250,7 +250,7 @@ points to the filename of an ONNX model. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.onnx') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.onnx') @@ -310,7 +310,7 @@ without any conversion step. Pass the filename with extension to .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/inference.pdiparams') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/inference.pdiparams') @@ -354,7 +354,7 @@ TensorFlow models saved in frozen graph format can also be passed to .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.pb') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.pb') @@ -407,7 +407,7 @@ It is pre-trained model optimized to work with TensorFlow Lite. .. parsed-literal:: - Warning: Looks like you're using an outdated `kagglehub` version, please consider updating (latest version: 0.3.3) + Warning: Looks like you're using an outdated `kagglehub` version, please consider updating (latest version: 0.3.4) .. code:: ipython3 @@ -497,7 +497,7 @@ Information about the inputs and outputs of the model are in .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -703,7 +703,7 @@ produced data as values. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -892,7 +892,7 @@ input shape. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/segmentation.bin') @@ -1044,7 +1044,7 @@ the cache. .. parsed-literal:: - PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') + PosixPath('/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvino-api/model/classification.bin') @@ -1074,7 +1074,7 @@ the cache. .. parsed-literal:: - Loading the network to the AUTO device took 0.15 seconds. + Loading the network to the AUTO device took 0.14 seconds. After running the previous cell, we know the model exists in the cache @@ -1092,5 +1092,5 @@ measure the time it takes now. .. parsed-literal:: - Loading the network to the AUTO device took 0.08 seconds. + Loading the network to the AUTO device took 0.07 seconds. diff --git a/docs/notebooks/openvoice-with-output.rst b/docs/notebooks/openvoice-with-output.rst index 2ee11fcded84dc..0c912bfe36ee96 100644 --- a/docs/notebooks/openvoice-with-output.rst +++ b/docs/notebooks/openvoice-with-output.rst @@ -62,13 +62,33 @@ Clone repository and install requirements .. code:: ipython3 - import sys + # Fetch `notebook_utils` module + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", + ) + open("cmd_helper.py", "w").write(r.text) + + + from notebook_utils import download_file, device_widget + +.. code:: ipython3 + from pathlib import Path + from cmd_helper import clone_repo + + repo_dir = Path("OpenVoice") if not repo_dir.exists(): - !git clone https://github.com/myshell-ai/OpenVoice + clone_repo("https://github.com/myshell-ai/OpenVoice") orig_english_path = Path("OpenVoice/openvoice/text/_orig_english.py") english_path = Path("OpenVoice/openvoice/text/english.py") @@ -79,8 +99,6 @@ Clone repository and install requirements data = data.replace("unidecode", "anyascii") with english_path.open("w") as out_f: out_f.write(data) - # append to sys.path so that modules from the repo could be imported - sys.path.append(str(repo_dir)) # fix a problem with silero downloading and installing @@ -97,30 +115,19 @@ Clone repository and install requirements .. parsed-literal:: - Cloning into 'OpenVoice'... - remote: Enumerating objects: 438, done. - remote: Total 438 (delta 0), reused 0 (delta 0), pack-reused 438 (from 1) - Receiving objects: 100% (438/438), 3.84 MiB | 21.51 MiB/s, done. - Resolving deltas: 100% (207/207), done. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + mobileclip 0.1.0 requires clip-benchmark>=1.4.0, which is not installed. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. + tensorflow 2.12.0 requires keras<2.13,>=2.12.0, but you have keras 2.13.1 which is incompatible. tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. + tensorflow 2.12.0 requires tensorboard<2.13,>=2.12, but you have tensorboard 2.13.0 which is incompatible. + tensorflow 2.12.0 requires tensorflow-estimator<2.13,>=2.12.0, but you have tensorflow-estimator 2.13.0 which is incompatible. + tensorflow-cpu 2.13.1 requires numpy<=1.24.3,>=1.22, but you have numpy 1.24.4 which is incompatible. + tensorflow-cpu 2.13.1 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.12.2 which is incompatible. torchvision 0.17.2+cpu requires torch==2.2.2, but you have torch 2.4.1 which is incompatible. Note: you may need to restart the kernel to use updated packages. -.. code:: ipython3 - - # Fetch `notebook_utils` module - import requests - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) - - from notebook_utils import download_file, device_widget - Download checkpoints and load PyTorch model ------------------------------------------- @@ -243,9 +250,9 @@ True .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint_dict = torch.load(ckpt_path, map_location=torch.device(self.device)) @@ -259,9 +266,9 @@ True .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/wavmark/__init__.py:16: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/wavmark/__init__.py:16: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint = torch.load(resume_path, map_location=torch.device('cpu')) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint_dict = torch.load(ckpt_path, map_location=torch.device(self.device)) @@ -411,40 +418,39 @@ documentation 0 No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:283: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:283: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:346: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:346: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! pad_length = max(length - (self.window_size + 1), 0) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:347: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:347: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! slice_start_position = max((self.window_size + 1) - length, 0) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:349: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:349: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if pad_length > 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:114: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:114: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if torch.min(inputs) < left or torch.max(inputs) > right: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if min_bin_width * num_bins > 1.0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:121: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:121: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if min_bin_height * num_bins > 1.0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:171: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:171: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert (discriminant >= 0).all() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: - %3293 : Float(1, 2, 43, strides=[86, 43, 1], requires_grad=0, device=cpu) = aten::randn(%3288, %3289, %3290, %3291, %3292) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 - %5559 : Float(1, 192, 153, strides=[29376, 1, 192], requires_grad=0, device=cpu) = aten::randn_like(%m_p, %5554, %5555, %5556, %5557, %5558) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: + %3293 : Float(1, 2, 43, strides=[86, 43, 1], requires_grad=0, device=cpu) = aten::randn(%3288, %3289, %3290, %3291, %3292) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 + %5559 : Float(1, 192, 153, strides=[29376, 1, 192], requires_grad=0, device=cpu) = aten::randn_like(%m_p, %5554, %5555, %5556, %5557, %5558) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 This may cause errors in trace checking. To disable trace checking, pass check_trace=False to torch.jit.trace() _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 39424]) != torch.Size([1, 1, 38656]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 39680]) != torch.Size([1, 1, 38400]). _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 2. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 154, 43]) != torch.Size([1, 1, 151, 43]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 2. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 155, 43]) != torch.Size([1, 1, 150, 43]). _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 3. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 154]) != torch.Size([1, 1, 151]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 3. of the traced function does not match the corresponding output of the Python function. Detailed error: + The values for attribute 'shape' do not match: torch.Size([1, 1, 155]) != torch.Size([1, 1, 150]). _check_trace( - 2024-11-05 02:13:33.268258: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -477,16 +483,16 @@ documentation )`. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:836.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py:1562: UserWarning: A window was not provided. A rectangular window will be applied,which is known to cause spectral leakage. Other windows such as torch.hann_window or torch.hamming_window can are recommended to reduce spectral leakage.To suppress this warning and use a rectangular window, explicitly set `window=torch.ones(n_fft, device=)`. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:836.) return forward_call(\*args, \*\*kwargs) @@ -714,7 +720,7 @@ Load speaker embeddings .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/functional.py:666: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/functional.py:666: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error. Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:873.) return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined] @@ -869,7 +875,7 @@ And finally, run voice tone conversion with OpenVINO optimized model @@ -887,7 +893,7 @@ And finally, run voice tone conversion with OpenVINO optimized model @@ -1076,7 +1082,7 @@ voice tone conversion online. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/components/dropdown.py:100: UserWarning: The `max_choices` parameter is ignored when `multiselect` is False. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/components/dropdown.py:100: UserWarning: The `max_choices` parameter is ignored when `multiselect` is False. warnings.warn( diff --git a/docs/notebooks/optical-character-recognition-with-output_files/optical-character-recognition-with-output_13_0.png b/docs/notebooks/optical-character-recognition-with-output_files/optical-character-recognition-with-output_13_0.png index 435c1891121eb0..b696d287ded448 100644 --- a/docs/notebooks/optical-character-recognition-with-output_files/optical-character-recognition-with-output_13_0.png +++ b/docs/notebooks/optical-character-recognition-with-output_files/optical-character-recognition-with-output_13_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:82b893e29e948379dac42c19763842f7ade2ccf03853c2c07f0b28bf2d58fe17 +oid sha256:c7a830fedc5653fd506c656144decc048cad5a7651c8e498024f0eb0ab8c8e96 size 305482 diff --git a/docs/notebooks/optical-character-recognition-with-output_files/optical-character-recognition-with-output_23_0.png b/docs/notebooks/optical-character-recognition-with-output_files/optical-character-recognition-with-output_23_0.png index 6fd2096da517cd..8ef607e85695bb 100644 --- a/docs/notebooks/optical-character-recognition-with-output_files/optical-character-recognition-with-output_23_0.png +++ b/docs/notebooks/optical-character-recognition-with-output_files/optical-character-recognition-with-output_23_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f91e976b303813a4c4bea13d923dc370c4aabfbf2256f1f37f86faf7005bf5c +oid sha256:7c26cc00284b92b50ecf8f61935c461f7c243736bf210546323eec6b5f68739f size 923830 diff --git a/docs/notebooks/optimize-preprocessing-with-output.rst b/docs/notebooks/optimize-preprocessing-with-output.rst index 96d307be3d692f..d681f9588b9321 100644 --- a/docs/notebooks/optimize-preprocessing-with-output.rst +++ b/docs/notebooks/optimize-preprocessing-with-output.rst @@ -201,15 +201,6 @@ and save it to the disk. .. parsed-literal:: WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model. - - -.. parsed-literal:: - - WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 94). These functions will not be directly callable after loading. - - -.. parsed-literal:: - INFO:tensorflow:Assets written to: model/InceptionResNetV2/assets @@ -353,7 +344,7 @@ for mean/scale normalization. .. parsed-literal:: - + @@ -384,7 +375,7 @@ may be specified is input data .. parsed-literal:: - + @@ -422,7 +413,7 @@ then such conversion will be added explicitly. .. parsed-literal:: - + @@ -636,6 +627,6 @@ Compare performance .. parsed-literal:: - IR model in OpenVINO Runtime/CPU with manual image preprocessing: 0.0153 seconds per image, FPS: 65.39 - IR model in OpenVINO Runtime/CPU with preprocessing API: 0.0166 seconds per image, FPS: 60.23 + IR model in OpenVINO Runtime/CPU with manual image preprocessing: 0.0153 seconds per image, FPS: 65.56 + IR model in OpenVINO Runtime/CPU with preprocessing API: 0.0143 seconds per image, FPS: 70.14 diff --git a/docs/notebooks/optimize-preprocessing-with-output_files/optimize-preprocessing-with-output_14_1.png b/docs/notebooks/optimize-preprocessing-with-output_files/optimize-preprocessing-with-output_14_1.png index cca7858e3bc4af..a142093f6e675c 100644 --- a/docs/notebooks/optimize-preprocessing-with-output_files/optimize-preprocessing-with-output_14_1.png +++ b/docs/notebooks/optimize-preprocessing-with-output_files/optimize-preprocessing-with-output_14_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bbd7b81cc8e7a73ea9bcb8be0c0575134f50b6af8f7de23ee9feed645a4cf66c +oid sha256:5712bd24e962ae0e0267607554ebe1f2869c223b108876ce10e5d20fe6285126 size 387941 diff --git a/docs/notebooks/paddle-ocr-webcam-with-output.rst b/docs/notebooks/paddle-ocr-webcam-with-output.rst index 9f7510cd5efe96..3fae2e47d99b24 100644 --- a/docs/notebooks/paddle-ocr-webcam-with-output.rst +++ b/docs/notebooks/paddle-ocr-webcam-with-output.rst @@ -76,7 +76,12 @@ Guide =2.12.0, but you have keras 2.13.1 which is incompatible. tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. + tensorflow 2.12.0 requires tensorboard<2.13,>=2.12, but you have tensorboard 2.13.0 which is incompatible. + tensorflow 2.12.0 requires tensorflow-estimator<2.13,>=2.12.0, but you have tensorflow-estimator 2.13.0 which is incompatible. + tensorflow-cpu 2.13.1 requires numpy<=1.24.3,>=1.22, but you have numpy 1.24.4 which is incompatible. + tensorflow-cpu 2.13.1 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.12.2 which is incompatible. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -209,7 +214,7 @@ Download the Model for Text **Detection** .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-no… + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-no… .. parsed-literal:: @@ -255,7 +260,7 @@ Download the Model for Text **Recognition** .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-no… + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-no… .. parsed-literal:: diff --git a/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png b/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png index 38a0d5d593351b..2593a5f1244bc5 100644 --- a/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png +++ b/docs/notebooks/paddle-ocr-webcam-with-output_files/paddle-ocr-webcam-with-output_30_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ac7efd85f2c50b0a189dbf00c0cd2252f362e6469cd014d8f255c53172152c3d -size 591373 +oid sha256:9f0c3a97843163a24439a4c8a7a8206c55f2fb928bb87e985d3835493668af62 +size 593381 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output.rst b/docs/notebooks/paddle-to-openvino-classification-with-output.rst index 25feb9293ee93a..7358ce29c8972c 100644 --- a/docs/notebooks/paddle-to-openvino-classification-with-output.rst +++ b/docs/notebooks/paddle-to-openvino-classification-with-output.rst @@ -56,14 +56,9 @@ Imports .. code:: ipython3 - import platform - - if platform.system() == "Windows": - %pip install -q "paddlepaddle>=2.5.1,<2.6.0" - else: - %pip install -q "paddlepaddle>=2.5.1" + %pip install -q "paddlepaddle>=2.5.1,<2.6.0" %pip install -q "paddleclas>=2.5.2" --no-deps - %pip install -q "prettytable" "ujson" "visualdl>=2.5.3" "faiss-cpu>=1.7.1" Pillow tqdm "matplotlib>=3.4" + %pip install -q "prettytable" "ujson" "visualdl>=2.5.3" "faiss-cpu>=1.7.1" Pillow tqdm "matplotlib>=3.4" "opencv-python" "scikit-learn" # Install openvino package %pip install -q "openvino>=2023.1.0" @@ -73,31 +68,13 @@ Imports Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - paddleclas 2.5.2 requires easydict, which is not installed. - paddleclas 2.5.2 requires gast==0.3.3, but you have gast 0.4.0 which is incompatible. - paddleclas 2.5.2 requires opencv-python==4.6.0.66, but you have opencv-python 4.10.0.84 which is incompatible. + paddleclas 2.6.0 requires easydict, which is not installed. + paddleclas 2.6.0 requires gast==0.3.3, but you have gast 0.4.0 which is incompatible. + paddleclas 2.6.0 requires opencv-python<=4.6.0.66, but you have opencv-python 4.10.0.84 which is incompatible. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. -.. code:: ipython3 - - if platform.system() == "Linux": - !wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb - !sudo dpkg -i libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb - - -.. parsed-literal:: - - --2024-11-05 02:15:59-- http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb - Resolving proxy-dmz.intel.com (proxy-dmz.intel.com)... 10.241.208.166 - Connecting to proxy-dmz.intel.com (proxy-dmz.intel.com)|10.241.208.166|:911... connected. - Proxy request sent, awaiting response... 404 Not Found - 2024-11-05 02:16:00 ERROR 404: Not Found. - - dpkg: error: cannot access archive 'libssl1.1_1.1.1f-1ubuntu2.19_amd64.deb': No such file or directory - - .. code:: ipython3 import time @@ -124,8 +101,8 @@ Imports .. parsed-literal:: - 2024-11-05 02:16:02 INFO: Loading faiss with AVX512 support. - 2024-11-05 02:16:02 INFO: Successfully loaded faiss with AVX512 support. + 2024-11-22 01:57:57 INFO: Loading faiss with AVX512 support. + 2024-11-22 01:57:57 INFO: Successfully loaded faiss with AVX512 support. Settings @@ -209,7 +186,7 @@ inference on that image, and then show the top three prediction results. .. parsed-literal:: - [2024/11/05 02:16:41] ppcls WARNING: The current running environment does not support the use of GPU. CPU has been used instead. + [2024/11/22 01:58:21] ppcls WARNING: The current running environment does not support the use of GPU. CPU has been used instead. Labrador retriever, 0.75138 German short-haired pointer, 0.02373 Great Dane, 0.01848 @@ -218,7 +195,7 @@ inference on that image, and then show the top three prediction results. -.. image:: paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_8_1.png +.. image:: paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_7_1.png ``classifier.predict()`` takes an image file name, reads the image, @@ -275,7 +252,7 @@ clipping values. .. parsed-literal:: - 2024-11-05 02:16:42 WARNING: Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). + 2024-11-22 01:58:22 WARNING: Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). .. parsed-literal:: @@ -287,12 +264,12 @@ clipping values. .. parsed-literal:: - + -.. image:: paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_15_3.png +.. image:: paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_14_3.png To decode the labels predicted by the model to names of classes, we need @@ -403,7 +380,7 @@ Notebook `__ for more information. -.. image:: paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_23_1.png +.. image:: paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_22_1.png Timing and Comparison @@ -462,7 +439,7 @@ Note that many optimizations are possible to improve the performance. .. parsed-literal:: - PaddlePaddle model on CPU: 0.0074 seconds per image, FPS: 134.37 + PaddlePaddle model on CPU: 0.0069 seconds per image, FPS: 144.32 PaddlePaddle result: Labrador retriever, 0.75138 @@ -473,7 +450,7 @@ Note that many optimizations are possible to improve the performance. -.. image:: paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_27_1.png +.. image:: paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_26_1.png Select inference device @@ -523,7 +500,7 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - OpenVINO IR model in OpenVINO Runtime (AUTO): 0.0027 seconds per image, FPS: 373.31 + OpenVINO IR model in OpenVINO Runtime (AUTO): 0.0026 seconds per image, FPS: 380.57 OpenVINO result: Labrador retriever, 0.74909 @@ -534,7 +511,7 @@ select device from dropdown list for running inference using OpenVINO -.. image:: paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_30_1.png +.. image:: paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_29_1.png References diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_14_3.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_14_3.png new file mode 100644 index 00000000000000..35e0c81123f0a1 --- /dev/null +++ b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_14_3.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99b8398ef76f2959d210e2d30bb44420f8d34a885a4480bc26e2af6627ba7119 +size 120883 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_15_3.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_15_3.png deleted file mode 100644 index 97c14460591759..00000000000000 --- a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_15_3.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ba922b89ca992098fd516d86f4d0c97858a8264664f9a49d431978b790a9135f -size 120883 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_22_1.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_22_1.png new file mode 100644 index 00000000000000..35c91e327be1ce --- /dev/null +++ b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_22_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1381e5922057c6bc70eb4ba9a04f3164382ad01191d320c1acbc819e7261f8c1 +size 224886 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_23_1.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_23_1.png deleted file mode 100644 index 74feaaeb12e5bc..00000000000000 --- a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_23_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b77eb48f499b17e5306d574b90a5d123ab82440225c034a20256a0ce6378cba -size 224886 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_26_1.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_26_1.png new file mode 100644 index 00000000000000..35c91e327be1ce --- /dev/null +++ b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_26_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1381e5922057c6bc70eb4ba9a04f3164382ad01191d320c1acbc819e7261f8c1 +size 224886 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_27_1.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_27_1.png deleted file mode 100644 index 74feaaeb12e5bc..00000000000000 --- a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_27_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b77eb48f499b17e5306d574b90a5d123ab82440225c034a20256a0ce6378cba -size 224886 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_29_1.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_29_1.png new file mode 100644 index 00000000000000..35c91e327be1ce --- /dev/null +++ b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_29_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1381e5922057c6bc70eb4ba9a04f3164382ad01191d320c1acbc819e7261f8c1 +size 224886 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_30_1.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_30_1.png deleted file mode 100644 index 74feaaeb12e5bc..00000000000000 --- a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_30_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b77eb48f499b17e5306d574b90a5d123ab82440225c034a20256a0ce6378cba -size 224886 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_7_1.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_7_1.png new file mode 100644 index 00000000000000..35c91e327be1ce --- /dev/null +++ b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_7_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1381e5922057c6bc70eb4ba9a04f3164382ad01191d320c1acbc819e7261f8c1 +size 224886 diff --git a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_8_1.png b/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_8_1.png deleted file mode 100644 index 74feaaeb12e5bc..00000000000000 --- a/docs/notebooks/paddle-to-openvino-classification-with-output_files/paddle-to-openvino-classification-with-output_8_1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b77eb48f499b17e5306d574b90a5d123ab82440225c034a20256a0ce6378cba -size 224886 diff --git a/docs/notebooks/paint-by-example-with-output.rst b/docs/notebooks/paint-by-example-with-output.rst deleted file mode 100644 index 2f1371652c5750..00000000000000 --- a/docs/notebooks/paint-by-example-with-output.rst +++ /dev/null @@ -1,1359 +0,0 @@ -Paint By Example: Exemplar-based Image Editing with Diffusion Models -==================================================================== - - -**Table of contents:** - - -- `Stable Diffusion in Diffusers - library <#stable-diffusion-in-diffusers-library>`__ -- `Download default images <#download-default-images>`__ -- `Convert models to OpenVINO Intermediate representation (IR) - format <#convert-models-to-openvino-intermediate-representation-ir-format>`__ -- `Prepare Inference pipeline <#prepare-inference-pipeline>`__ -- `Select inference device <#select-inference-device>`__ -- `Configure Inference Pipeline <#configure-inference-pipeline>`__ -- `Quantization <#quantization>`__ - - - `Prepare Inference pipeline <#prepare-inference-pipeline>`__ - - `Run quantization <#run-quantization>`__ - - `Run inference and compare inference - time <#run-inference-and-compare-inference-time>`__ - - `Compare UNet file size <#compare-unet-file-size>`__ - -- `Interactive inference <#interactive-inference>`__ - -Installation Instructions -~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is a self-contained example that relies solely on its own code. - -We recommend running the notebook in a virtual environment. You only -need a Jupyter server to start. For details, please refer to -`Installation -Guide `__. - -Stable Diffusion in Diffusers library -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To work with Stable Diffusion, -we will use the Hugging Face -`Diffusers `__ library. To -experiment with in-painting we can use Diffusers which exposes the -`StableDiffusionInpaintPipeline `__ -similar to the `other Diffusers -pipelines `__. -The code below demonstrates how to create -``StableDiffusionInpaintPipeline`` using -``stable-diffusion-2-inpainting``. To create the drawing tool we will -install Gradio for handling user interaction. - -This is the overall flow of the application: - -.. figure:: https://user-images.githubusercontent.com/103226580/236954918-f364b227-293c-4f78-a9bf-9dcebcb1034a.png - :alt: Flow Diagram - - Flow Diagram - -.. code:: ipython3 - - %pip install -q "torch>=2.1" torchvision --extra-index-url "https://download.pytorch.org/whl/cpu" - %pip install -q "diffusers>=0.25.0" "peft>=0.6.2" "openvino>=2023.2.0" "transformers>=4.25.1" "matplotlib>=3.4" ipywidgets opencv-python pillow "nncf>=2.7.0" "gradio==3.44.1" tqdm - -Download the model from `HuggingFace -Paint-by-Example `__. -This might take several minutes because it is over 5GB - -.. code:: ipython3 - - from diffusers import DiffusionPipeline - from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler - - - pipeline = DiffusionPipeline.from_pretrained("Fantasy-Studio/Paint-By-Example") - - scheduler_inpaint = DDIMScheduler.from_config(pipeline.scheduler.config) - -.. code:: ipython3 - - import gc - - extractor = pipeline.feature_extractor - image_encoder = pipeline.image_encoder - image_encoder.eval() - unet_inpaint = pipeline.unet - unet_inpaint.eval() - vae_inpaint = pipeline.vae - vae_inpaint.eval() - - del pipeline - gc.collect(); - -Download default images -~~~~~~~~~~~~~~~~~~~~~~~ - - - -Download default images. - -.. code:: ipython3 - - # Fetch `notebook_utils` module - import requests - - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - - open("notebook_utils.py", "w").write(r.text) - - from notebook_utils import download_file, device_widget, quantization_widget - - download_file( - "https://github-production-user-asset-6210df.s3.amazonaws.com/103226580/286377210-edc98e97-0e43-4796-b771-dacd074c39ea.png", - "0.png", - "data/image", - ) - - download_file( - "https://github-production-user-asset-6210df.s3.amazonaws.com/103226580/286377233-b2c2d902-d379-415a-8183-5bdd37c52429.png", - "1.png", - "data/image", - ) - - download_file( - "https://github-production-user-asset-6210df.s3.amazonaws.com/103226580/286377248-da1db61e-3521-4cdb-85c8-1386d360ce22.png", - "2.png", - "data/image", - ) - - download_file( - "https://github-production-user-asset-6210df.s3.amazonaws.com/103226580/286377279-fa496f17-e850-4351-87c5-2552dfbc4633.jpg", - "bird.jpg", - "data/reference", - ) - - download_file( - "https://github-production-user-asset-6210df.s3.amazonaws.com/103226580/286377298-06a25ff2-84d8-4d46-95cd-8c25efa690d8.jpg", - "car.jpg", - "data/reference", - ) - - download_file( - "https://github-production-user-asset-6210df.s3.amazonaws.com/103226580/286377318-8841a801-1933-4523-a433-7d2fb64c47e6.jpg", - "dog.jpg", - "data/reference", - ) - -Convert models to OpenVINO Intermediate representation (IR) format -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -Adapted from `Stable Diffusion v2 Infinite Zoom -notebook `__ - -.. code:: ipython3 - - from pathlib import Path - import torch - import numpy as np - import openvino as ov - - model_dir = Path("model") - model_dir.mkdir(exist_ok=True) - sd2_inpainting_model_dir = Path("model/paint_by_example") - sd2_inpainting_model_dir.mkdir(exist_ok=True) - -Functions to convert to OpenVINO IR format - -.. code:: ipython3 - - def cleanup_torchscript_cache(): - """ - Helper for removing cached model representation - """ - torch._C._jit_clear_class_registry() - torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore() - torch.jit._state._clear_class_state() - - - def convert_image_encoder(image_encoder: torch.nn.Module, ir_path: Path): - """ - Convert Image Encoder model to IR. - Function accepts pipeline, prepares example inputs for conversion - Parameters: - image_encoder (torch.nn.Module): image encoder PyTorch model - ir_path (Path): File for storing model - Returns: - None - """ - - class ImageEncoderWrapper(torch.nn.Module): - def __init__(self, image_encoder): - super().__init__() - self.image_encoder = image_encoder - - def forward(self, image): - image_embeddings, negative_prompt_embeds = self.image_encoder(image, return_uncond_vector=True) - return image_embeddings, negative_prompt_embeds - - if not ir_path.exists(): - image_encoder = ImageEncoderWrapper(image_encoder) - image_encoder.eval() - input_ids = torch.randn((1, 3, 224, 224)) - # switch model to inference mode - - # disable gradients calculation for reducing memory consumption - with torch.no_grad(): - ov_model = ov.convert_model(image_encoder, example_input=input_ids, input=([1, 3, 224, 224],)) - ov.save_model(ov_model, ir_path) - del ov_model - cleanup_torchscript_cache() - print("Image Encoder successfully converted to IR") - - - def convert_unet( - unet: torch.nn.Module, - ir_path: Path, - num_channels: int = 4, - width: int = 64, - height: int = 64, - ): - """ - Convert Unet model to IR format. - Function accepts pipeline, prepares example inputs for conversion - Parameters: - unet (torch.nn.Module): UNet PyTorch model - ir_path (Path): File for storing model - num_channels (int, optional, 4): number of input channels - width (int, optional, 64): input width - height (int, optional, 64): input height - Returns: - None - """ - dtype_mapping = {torch.float32: ov.Type.f32, torch.float64: ov.Type.f64} - if not ir_path.exists(): - # prepare inputs - encoder_hidden_state = torch.ones((2, 1, 768)) - latents_shape = (2, num_channels, width, height) - latents = torch.randn(latents_shape) - t = torch.from_numpy(np.array(1, dtype=np.float32)) - unet.eval() - dummy_inputs = (latents, t, encoder_hidden_state) - input_info = [] - for input_tensor in dummy_inputs: - shape = ov.PartialShape(tuple(input_tensor.shape)) - element_type = dtype_mapping[input_tensor.dtype] - input_info.append((shape, element_type)) - - with torch.no_grad(): - ov_model = ov.convert_model(unet, example_input=dummy_inputs, input=input_info) - ov.save_model(ov_model, ir_path) - del ov_model - cleanup_torchscript_cache() - print("U-Net successfully converted to IR") - - - def convert_vae_encoder(vae: torch.nn.Module, ir_path: Path, width: int = 512, height: int = 512): - """ - Convert VAE model to IR format. - Function accepts VAE model, creates wrapper class for export only necessary for inference part, - prepares example inputs for conversion, - Parameters: - vae (torch.nn.Module): VAE PyTorch model - ir_path (Path): File for storing model - width (int, optional, 512): input width - height (int, optional, 512): input height - Returns: - None - """ - - class VAEEncoderWrapper(torch.nn.Module): - def __init__(self, vae): - super().__init__() - self.vae = vae - - def forward(self, image): - latents = self.vae.encode(image).latent_dist.sample() - return latents - - if not ir_path.exists(): - vae_encoder = VAEEncoderWrapper(vae) - vae_encoder.eval() - image = torch.zeros((1, 3, width, height)) - with torch.no_grad(): - ov_model = ov.convert_model(vae_encoder, example_input=image, input=([1, 3, width, height],)) - ov.save_model(ov_model, ir_path) - del ov_model - cleanup_torchscript_cache() - print("VAE encoder successfully converted to IR") - - - def convert_vae_decoder(vae: torch.nn.Module, ir_path: Path, width: int = 64, height: int = 64): - """ - Convert VAE decoder model to IR format. - Function accepts VAE model, creates wrapper class for export only necessary for inference part, - prepares example inputs for conversion, - Parameters: - vae (torch.nn.Module): VAE model - ir_path (Path): File for storing model - width (int, optional, 64): input width - height (int, optional, 64): input height - Returns: - None - """ - - class VAEDecoderWrapper(torch.nn.Module): - def __init__(self, vae): - super().__init__() - self.vae = vae - - def forward(self, latents): - latents = 1 / 0.18215 * latents - return self.vae.decode(latents) - - if not ir_path.exists(): - vae_decoder = VAEDecoderWrapper(vae) - latents = torch.zeros((1, 4, width, height)) - - vae_decoder.eval() - with torch.no_grad(): - ov_model = ov.convert_model(vae_decoder, example_input=latents, input=([1, 4, width, height],)) - ov.save_model(ov_model, ir_path) - del ov_model - cleanup_torchscript_cache() - print("VAE decoder successfully converted to ") - -Do the conversion of the in-painting model: - -.. code:: ipython3 - - IMAGE_ENCODER_OV_PATH_INPAINT = sd2_inpainting_model_dir / "image_encoder.xml" - - if not IMAGE_ENCODER_OV_PATH_INPAINT.exists(): - convert_image_encoder(image_encoder, IMAGE_ENCODER_OV_PATH_INPAINT) - else: - print(f"Image encoder will be loaded from {IMAGE_ENCODER_OV_PATH_INPAINT}") - - del image_encoder - gc.collect(); - -Do the conversion of the Unet model - -.. code:: ipython3 - - UNET_OV_PATH_INPAINT = sd2_inpainting_model_dir / "unet.xml" - if not UNET_OV_PATH_INPAINT.exists(): - convert_unet(unet_inpaint, UNET_OV_PATH_INPAINT, num_channels=9, width=64, height=64) - del unet_inpaint - gc.collect() - else: - del unet_inpaint - print(f"U-Net will be loaded from {UNET_OV_PATH_INPAINT}") - gc.collect(); - -Do the conversion of the VAE Encoder model - -.. code:: ipython3 - - VAE_ENCODER_OV_PATH_INPAINT = sd2_inpainting_model_dir / "vae_encoder.xml" - - if not VAE_ENCODER_OV_PATH_INPAINT.exists(): - convert_vae_encoder(vae_inpaint, VAE_ENCODER_OV_PATH_INPAINT, 512, 512) - else: - print(f"VAE encoder will be loaded from {VAE_ENCODER_OV_PATH_INPAINT}") - - VAE_DECODER_OV_PATH_INPAINT = sd2_inpainting_model_dir / "vae_decoder.xml" - if not VAE_DECODER_OV_PATH_INPAINT.exists(): - convert_vae_decoder(vae_inpaint, VAE_DECODER_OV_PATH_INPAINT, 64, 64) - else: - print(f"VAE decoder will be loaded from {VAE_DECODER_OV_PATH_INPAINT}") - - del vae_inpaint - gc.collect(); - -Prepare Inference pipeline -~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -Function to prepare the mask and masked image. - -Adapted from `Stable Diffusion v2 Infinite Zoom -notebook `__ - -The main difference is that instead of encoding a text prompt it will -now encode an image as the prompt. - -This is the detailed flowchart for the pipeline: - -.. figure:: https://github.com/openvinotoolkit/openvino_notebooks/assets/103226580/cde2d5c4-2540-4a45-ad9c-339f7a69459d - :alt: pipeline-flowchart - - pipeline-flowchart - -.. code:: ipython3 - - import inspect - from typing import Optional, Union, Dict - - import PIL - import cv2 - - from transformers import CLIPImageProcessor - from diffusers.pipelines.pipeline_utils import DiffusionPipeline - from openvino.runtime import Model - - - def prepare_mask_and_masked_image(image: PIL.Image.Image, mask: PIL.Image.Image): - """ - Prepares a pair (image, mask) to be consumed by the Stable Diffusion pipeline. This means that those inputs will be - converted to ``np.array`` with shapes ``batch x channels x height x width`` where ``channels`` is ``3`` for the - ``image`` and ``1`` for the ``mask``. - - The ``image`` will be converted to ``np.float32`` and normalized to be in ``[-1, 1]``. The ``mask`` will be - binarized (``mask > 0.5``) and cast to ``np.float32`` too. - - Args: - image (Union[np.array, PIL.Image]): The image to inpaint. - It can be a ``PIL.Image``, or a ``height x width x 3`` ``np.array`` - mask (_type_): The mask to apply to the image, i.e. regions to inpaint. - It can be a ``PIL.Image``, or a ``height x width`` ``np.array``. - - Returns: - tuple[np.array]: The pair (mask, masked_image) as ``torch.Tensor`` with 4 - dimensions: ``batch x channels x height x width``. - """ - if isinstance(image, (PIL.Image.Image, np.ndarray)): - image = [image] - - if isinstance(image, list) and isinstance(image[0], PIL.Image.Image): - image = [np.array(i.convert("RGB"))[None, :] for i in image] - image = np.concatenate(image, axis=0) - elif isinstance(image, list) and isinstance(image[0], np.ndarray): - image = np.concatenate([i[None, :] for i in image], axis=0) - - image = image.transpose(0, 3, 1, 2) - image = image.astype(np.float32) / 127.5 - 1.0 - - # preprocess mask - if isinstance(mask, (PIL.Image.Image, np.ndarray)): - mask = [mask] - - if isinstance(mask, list) and isinstance(mask[0], PIL.Image.Image): - mask = np.concatenate([np.array(m.convert("L"))[None, None, :] for m in mask], axis=0) - mask = mask.astype(np.float32) / 255.0 - elif isinstance(mask, list) and isinstance(mask[0], np.ndarray): - mask = np.concatenate([m[None, None, :] for m in mask], axis=0) - - mask = 1 - mask - - mask[mask < 0.5] = 0 - mask[mask >= 0.5] = 1 - - masked_image = image * mask - - return mask, masked_image - -Class for the pipeline which will connect all the models together: VAE -decode –> image encode –> tokenizer –> Unet –> VAE model –> scheduler - -.. code:: ipython3 - - class OVStableDiffusionInpaintingPipeline(DiffusionPipeline): - def __init__( - self, - vae_decoder: Model, - image_encoder: Model, - image_processor: CLIPImageProcessor, - unet: Model, - scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler], - vae_encoder: Model = None, - ): - """ - Pipeline for text-to-image generation using Stable Diffusion. - Parameters: - vae_decoder (Model): - Variational Auto-Encoder (VAE) Model to decode images to and from latent representations. - image_encoder (Model): - https://huggingface.co/Fantasy-Studio/Paint-by-Example/blob/main/image_encoder/config.json - tokenizer (CLIPTokenizer): - Tokenizer of class CLIPTokenizer(https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). - unet (Model): Conditional U-Net architecture to denoise the encoded image latents. - vae_encoder (Model): - Variational Auto-Encoder (VAE) Model to encode images to latent representation. - scheduler (SchedulerMixin): - A scheduler to be used in combination with unet to denoise the encoded image latents. Can be one of - DDIMScheduler, LMSDiscreteScheduler, or PNDMScheduler. - """ - super().__init__() - self.scheduler = scheduler - self.vae_decoder = vae_decoder - self.vae_encoder = vae_encoder - self.image_encoder = image_encoder - self.unet = unet - self.register_to_config(unet=unet) - self._unet_output = unet.output(0) - self._vae_d_output = vae_decoder.output(0) - self._vae_e_output = vae_encoder.output(0) if vae_encoder is not None else None - self.height = self.unet.input(0).shape[2] * 8 - self.width = self.unet.input(0).shape[3] * 8 - self.image_processor = image_processor - - def prepare_mask_latents( - self, - mask, - masked_image, - height=512, - width=512, - do_classifier_free_guidance=True, - ): - """ - Prepare mask as Unet nput and encode input masked image to latent space using vae encoder - - Parameters: - mask (np.array): input mask array - masked_image (np.array): masked input image tensor - heigh (int, *optional*, 512): generated image height - width (int, *optional*, 512): generated image width - do_classifier_free_guidance (bool, *optional*, True): whether to use classifier free guidance or not - Returns: - mask (np.array): resized mask tensor - masked_image_latents (np.array): masked image encoded into latent space using VAE - """ - mask = torch.nn.functional.interpolate(torch.from_numpy(mask), size=(height // 8, width // 8)) - mask = mask.numpy() - - # encode the mask image into latents space so we can concatenate it to the latents - masked_image_latents = self.vae_encoder(masked_image)[self._vae_e_output] - masked_image_latents = 0.18215 * masked_image_latents - - mask = np.concatenate([mask] * 2) if do_classifier_free_guidance else mask - masked_image_latents = np.concatenate([masked_image_latents] * 2) if do_classifier_free_guidance else masked_image_latents - return mask, masked_image_latents - - def __call__( - self, - image: PIL.Image.Image, - mask_image: PIL.Image.Image, - reference_image: PIL.Image.Image, - num_inference_steps: Optional[int] = 50, - guidance_scale: Optional[float] = 7.5, - eta: Optional[float] = 0, - output_type: Optional[str] = "pil", - seed: Optional[int] = None, - ): - """ - Function invoked when calling the pipeline for generation. - Parameters: - image (PIL.Image.Image): - Source image for inpainting. - mask_image (PIL.Image.Image): - Mask area for inpainting - reference_image (PIL.Image.Image): - Reference image to inpaint in mask area - num_inference_steps (int, *optional*, defaults to 50): - The number of denoising steps. More denoising steps usually lead to a higher quality image at the - expense of slower inference. - guidance_scale (float, *optional*, defaults to 7.5): - Guidance scale as defined in Classifier-Free Diffusion Guidance(https://arxiv.org/abs/2207.12598). - guidance_scale is defined as `w` of equation 2. - Higher guidance scale encourages to generate images that are closely linked to the text prompt, - usually at the expense of lower image quality. - eta (float, *optional*, defaults to 0.0): - Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to - [DDIMScheduler], will be ignored for others. - output_type (`str`, *optional*, defaults to "pil"): - The output format of the generate image. Choose between - [PIL](https://pillow.readthedocs.io/en/stable/): PIL.Image.Image or np.array. - seed (int, *optional*, None): - Seed for random generator state initialization. - Returns: - Dictionary with keys: - sample - the last generated image PIL.Image.Image or np.array - """ - if seed is not None: - np.random.seed(seed) - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) - # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` - # corresponds to doing no classifier free guidance. - do_classifier_free_guidance = guidance_scale > 1.0 - - # get reference image embeddings - image_embeddings = self._encode_image(reference_image, do_classifier_free_guidance=do_classifier_free_guidance) - - # prepare mask - mask, masked_image = prepare_mask_and_masked_image(image, mask_image) - # set timesteps - accepts_offset = "offset" in set(inspect.signature(self.scheduler.set_timesteps).parameters.keys()) - extra_set_kwargs = {} - if accepts_offset: - extra_set_kwargs["offset"] = 1 - - self.scheduler.set_timesteps(num_inference_steps, **extra_set_kwargs) - timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, 1) - latent_timestep = timesteps[:1] - - # get the initial random noise unless the user supplied it - latents, meta = self.prepare_latents(latent_timestep) - mask, masked_image_latents = self.prepare_mask_latents( - mask, - masked_image, - do_classifier_free_guidance=do_classifier_free_guidance, - ) - - # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature - # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. - # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 - # and should be between [0, 1] - accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) - extra_step_kwargs = {} - if accepts_eta: - extra_step_kwargs["eta"] = eta - - for t in self.progress_bar(timesteps): - # expand the latents if we are doing classifier free guidance - latent_model_input = np.concatenate([latents] * 2) if do_classifier_free_guidance else latents - latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) - latent_model_input = np.concatenate([latent_model_input, masked_image_latents, mask], axis=1) - # predict the noise residual - noise_pred = self.unet([latent_model_input, np.array(t, dtype=np.float32), image_embeddings])[self._unet_output] - # perform guidance - if do_classifier_free_guidance: - noise_pred_uncond, noise_pred_text = noise_pred[0], noise_pred[1] - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) - - # compute the previous noisy sample x_t -> x_t-1 - latents = self.scheduler.step( - torch.from_numpy(noise_pred), - t, - torch.from_numpy(latents), - **extra_step_kwargs, - )["prev_sample"].numpy() - # scale and decode the image latents with vae - image = self.vae_decoder(latents)[self._vae_d_output] - - image = self.postprocess_image(image, meta, output_type) - return {"sample": image} - - def _encode_image(self, image: PIL.Image.Image, do_classifier_free_guidance: bool = True): - """ - Encodes the image into image encoder hidden states. - - Parameters: - image (PIL.Image.Image): base image to encode - do_classifier_free_guidance (bool): whether to use classifier free guidance or not - Returns: - image_embeddings (np.ndarray): image encoder hidden states - """ - processed_image = self.image_processor(image) - processed_image = processed_image["pixel_values"][0] - processed_image = np.expand_dims(processed_image, axis=0) - - output = self.image_encoder(processed_image) - image_embeddings = output[self.image_encoder.output(0)] - negative_embeddings = output[self.image_encoder.output(1)] - - image_embeddings = np.concatenate([negative_embeddings, image_embeddings]) - - return image_embeddings - - def prepare_latents(self, latent_timestep: torch.Tensor = None): - """ - Function for getting initial latents for starting generation - - Parameters: - latent_timestep (torch.Tensor, *optional*, None): - Predicted by scheduler initial step for image generation, required for latent image mixing with nosie - Returns: - latents (np.ndarray): - Image encoded in latent space - """ - latents_shape = (1, 4, self.height // 8, self.width // 8) - noise = np.random.randn(*latents_shape).astype(np.float32) - # if we use LMSDiscreteScheduler, let's make sure latents are mulitplied by sigmas - if isinstance(self.scheduler, LMSDiscreteScheduler): - noise = noise * self.scheduler.sigmas[0].numpy() - return noise, {} - - def postprocess_image(self, image: np.ndarray, meta: Dict, output_type: str = "pil"): - """ - Postprocessing for decoded image. Takes generated image decoded by VAE decoder, unpad it to initila image size (if required), - normalize and convert to [0, 255] pixels range. Optionally, convertes it from np.ndarray to PIL.Image format - - Parameters: - image (np.ndarray): - Generated image - meta (Dict): - Metadata obtained on latents preparing step, can be empty - output_type (str, *optional*, pil): - Output format for result, can be pil or numpy - Returns: - image (List of np.ndarray or PIL.Image.Image): - Postprocessed images - """ - if "padding" in meta: - pad = meta["padding"] - (_, end_h), (_, end_w) = pad[1:3] - h, w = image.shape[2:] - unpad_h = h - end_h - unpad_w = w - end_w - image = image[:, :, :unpad_h, :unpad_w] - image = np.clip(image / 2 + 0.5, 0, 1) - image = np.transpose(image, (0, 2, 3, 1)) - # 9. Convert to PIL - if output_type == "pil": - image = self.numpy_to_pil(image) - if "src_height" in meta: - orig_height, orig_width = meta["src_height"], meta["src_width"] - image = [img.resize((orig_width, orig_height), PIL.Image.Resampling.LANCZOS) for img in image] - else: - if "src_height" in meta: - orig_height, orig_width = meta["src_height"], meta["src_width"] - image = [cv2.resize(img, (orig_width, orig_width)) for img in image] - return image - - def get_timesteps(self, num_inference_steps: int, strength: float): - """ - Helper function for getting scheduler timesteps for generation - In case of image-to-image generation, it updates number of steps according to strength - - Parameters: - num_inference_steps (int): - number of inference steps for generation - strength (float): - value between 0.0 and 1.0, that controls the amount of noise that is added to the input image. - Values that approach 1.0 allow for lots of variations but will also produce images that are not semantically consistent with the input. - """ - # get the original timestep using init_timestep - init_timestep = min(int(num_inference_steps * strength), num_inference_steps) - - t_start = max(num_inference_steps - init_timestep, 0) - timesteps = self.scheduler.timesteps[t_start:] - - return timesteps, num_inference_steps - t_start - -Select inference device -~~~~~~~~~~~~~~~~~~~~~~~ - - - -select device from dropdown list for running inference using OpenVINO - -.. code:: ipython3 - - device = device_widget() - - device - - - - -.. parsed-literal:: - - Dropdown(description='Device:', index=4, options=('CPU', 'GPU.0', 'GPU.1', 'GPU.2', 'AUTO'), value='AUTO') - - - -Configure Inference Pipeline -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -Configuration steps: 1. Load models on device 2. Configure tokenizer and -scheduler 3. Create instance of OvStableDiffusionInpaintingPipeline -class - -This can take a while to run. - -.. code:: ipython3 - - ov_config = {"INFERENCE_PRECISION_HINT": "f32"} if device.value != "CPU" else {} - - core = ov.Core() - - - def get_ov_pipeline(): - image_encoder_inpaint = core.compile_model(IMAGE_ENCODER_OV_PATH_INPAINT, device.value) - unet_model_inpaint = core.compile_model(UNET_OV_PATH_INPAINT, device.value) - vae_decoder_inpaint = core.compile_model(VAE_DECODER_OV_PATH_INPAINT, device.value, ov_config) - vae_encoder_inpaint = core.compile_model(VAE_ENCODER_OV_PATH_INPAINT, device.value, ov_config) - - ov_pipe_inpaint = OVStableDiffusionInpaintingPipeline( - image_processor=extractor, - image_encoder=image_encoder_inpaint, - unet=unet_model_inpaint, - vae_encoder=vae_encoder_inpaint, - vae_decoder=vae_decoder_inpaint, - scheduler=scheduler_inpaint, - ) - - return ov_pipe_inpaint - - - ov_pipe_inpaint = get_ov_pipeline() - -Quantization ------------- - - - -`NNCF `__ enables -post-training quantization by adding quantization layers into model -graph and then using a subset of the training dataset to initialize the -parameters of these additional quantization layers. Quantized operations -are executed in ``INT8`` instead of ``FP32``/``FP16`` making model -inference faster. - -According to ``StableDiffusionInpaintingPipeline`` structure, UNet used -for iterative denoising of input. It means that model runs in the cycle -repeating inference on each diffusion step, while other parts of -pipeline take part only once. That is why computation cost and speed of -UNet denoising becomes the critical path in the pipeline. Quantizing the -rest of the SD pipeline does not significantly improve inference -performance but can lead to a substantial degradation of accuracy. - -The optimization process contains the following steps: - -1. Create a calibration dataset for quantization. -2. Run ``nncf.quantize()`` to obtain quantized model. -3. Save the ``INT8`` model using ``openvino.save_model()`` function. - -Please select below whether you would like to run quantization to -improve model inference speed. - -.. code:: ipython3 - - UNET_INT8_OV_PATH = Path("model/unet_int8.xml") - int8_ov_pipe_inpaint = None - - - to_quantize = quantization_widget() - - to_quantize - - - - -.. parsed-literal:: - - Checkbox(value=True, description='Quantization') - - - -Let’s load ``skip magic`` extension to skip quantization if -``to_quantize`` is not selected - -.. code:: ipython3 - - # Fetch `skip_kernel_extension` module - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", - ) - open("skip_kernel_extension.py", "w").write(r.text) - - if to_quantize.value and "GPU" in device.value: - to_quantize.value = False - - %load_ext skip_kernel_extension - -Prepare calibration dataset -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -We use 3 examples from -`Paint-by-Example `__ -to create a calibration dataset. - -.. code:: ipython3 - - import PIL - import requests - from io import BytesIO - - - def download_image(url): - response = requests.get(url) - return PIL.Image.open(BytesIO(response.content)).convert("RGB") - - - example1 = [ - "https://github.com/Fantasy-Studio/Paint-by-Example/blob/main/examples/image/example_1.png?raw=true", - "https://github.com/Fantasy-Studio/Paint-by-Example/blob/main/examples/mask/example_1.png?raw=true", - "https://github.com/Fantasy-Studio/Paint-by-Example/blob/main/examples/reference/example_1.jpg?raw=true", - ] - example2 = [ - "https://github.com/Fantasy-Studio/Paint-by-Example/blob/main/examples/image/example_2.png?raw=true", - "https://github.com/Fantasy-Studio/Paint-by-Example/blob/main/examples/mask/example_2.png?raw=true", - "https://github.com/Fantasy-Studio/Paint-by-Example/blob/main/examples/reference/example_2.jpg?raw=true", - ] - example3 = [ - "https://github.com/Fantasy-Studio/Paint-by-Example/blob/main/examples/image/example_3.png?raw=true", - "https://github.com/Fantasy-Studio/Paint-by-Example/blob/main/examples/mask/example_3.png?raw=true", - "https://github.com/Fantasy-Studio/Paint-by-Example/blob/main/examples/reference/example_3.jpg?raw=true", - ] - examples = [example1, example2, example3] - - - img_examples = [] - for init_image_url, mask_image_url, example_image_url in examples: - init_image = download_image(init_image_url).resize((512, 512)) - mask_image = download_image(mask_image_url).resize((512, 512)) - example_image = download_image(example_image_url).resize((512, 512)) - img_examples.append((init_image, mask_image, example_image)) - -To collect intermediate model inputs for calibration we should customize -``CompiledModel``. - -.. code:: ipython3 - - %%skip not $to_quantize.value - - from tqdm.notebook import tqdm - from transformers import set_seed - from typing import Any, Dict, List - - - class CompiledModelDecorator(ov.CompiledModel): - def __init__(self, compiled_model, data_cache: List[Any] = None): - super().__init__(compiled_model) - self.data_cache = data_cache if data_cache else [] - - def __call__(self, *args, **kwargs): - self.data_cache.append(*args) - return super().__call__(*args, **kwargs) - - - def collect_calibration_data(pipeline) -> List[Dict]: - original_unet = pipeline.unet - pipeline.unet = CompiledModelDecorator(original_unet) - pipeline.set_progress_bar_config(disable=True) - prev_example_image = None - for init_image, mask_image, example_image in img_examples: - - _ = pipeline( - image=init_image, - mask_image=mask_image, - reference_image=example_image, - ) - if prev_example_image: - _ = pipeline( - image=init_image, - mask_image=mask_image, - reference_image=prev_example_image, - ) - prev_example_image = example_image - - - calibration_dataset = pipeline.unet.data_cache - pipeline.set_progress_bar_config(disable=False) - pipeline.unet = original_unet - - return calibration_dataset - -.. code:: ipython3 - - %%skip not $to_quantize.value - - UNET_INT8_OV_PATH = Path("model/unet_int8.xml") - if not UNET_INT8_OV_PATH.exists(): - unet_calibration_data = collect_calibration_data(ov_pipe_inpaint) - -Run quantization -~~~~~~~~~~~~~~~~ - - - -Create a quantized model from the pre-trained converted OpenVINO model. - - **NOTE**: Quantization is time and memory consuming operation. - Running quantization code below may take some time. - -.. code:: ipython3 - - %%skip not $to_quantize.value - - import nncf - - - def get_quantized_pipeline(): - if UNET_INT8_OV_PATH.exists(): - print("Loading quantized model") - quantized_unet = core.read_model(UNET_INT8_OV_PATH) - else: - unet = core.read_model(UNET_OV_PATH_INPAINT) - quantized_unet = nncf.quantize( - model=unet, - preset=nncf.QuantizationPreset.MIXED, - calibration_dataset=nncf.Dataset(unet_calibration_data), - model_type=nncf.ModelType.TRANSFORMER, - ) - ov.save_model(quantized_unet, UNET_INT8_OV_PATH) - - unet_optimized = core.compile_model(UNET_INT8_OV_PATH, device.value) - - image_encoder_inpaint = core.compile_model(IMAGE_ENCODER_OV_PATH_INPAINT, device.value) - vae_decoder_inpaint = core.compile_model(VAE_DECODER_OV_PATH_INPAINT, device.value, ov_config) - vae_encoder_inpaint = core.compile_model(VAE_ENCODER_OV_PATH_INPAINT, device.value, ov_config) - - int8_ov_pipe_inpaint = OVStableDiffusionInpaintingPipeline( - image_processor=extractor, - image_encoder=image_encoder_inpaint, - unet=unet_optimized, - vae_encoder=vae_encoder_inpaint, - vae_decoder=vae_decoder_inpaint, - scheduler=scheduler_inpaint, - ) - - return int8_ov_pipe_inpaint - - - int8_ov_pipe_inpaint = get_quantized_pipeline() - - -.. parsed-literal:: - - INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, openvino - - - -.. parsed-literal:: - - Output() - - - - - - - - - - - - - - - - - - -.. parsed-literal:: - - Output() - - - - - - - - - - - - - - - - - -.. parsed-literal:: - - INFO:nncf:121 ignored nodes were found by name in the NNCFGraph - - - -.. parsed-literal:: - - Output() - - - - - - - - - - - - - - - - - - -.. parsed-literal:: - - Output() - - - - - - - - - - - - - - - - - -Run inference and compare inference time -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - -OV pipeline: - -.. code:: ipython3 - - init_image, mask_image, example_image = img_examples[1] - - - ov_image = ov_pipe_inpaint(image=init_image, mask_image=mask_image, reference_image=example_image, seed=2) - -Quantized pipeline: - -.. code:: ipython3 - - %%skip not $to_quantize.value - - int8_image = int8_ov_pipe_inpaint(image=init_image, mask_image=mask_image, reference_image=example_image, seed=2) - -.. code:: ipython3 - - %%skip not $to_quantize.value - - import matplotlib.pyplot as plt - from PIL import Image - - def visualize_results(orig_img:Image.Image, optimized_img:Image.Image): - """ - Helper function for results visualization - - Parameters: - orig_img (Image.Image): generated image using FP16 models - optimized_img (Image.Image): generated image using quantized models - Returns: - fig (matplotlib.pyplot.Figure): matplotlib generated figure contains drawing result - """ - orig_title = "FP16 pipeline" - control_title = "INT8 pipeline" - figsize = (20, 20) - fig, axs = plt.subplots(1, 2, figsize=figsize, sharex='all', sharey='all') - list_axes = list(axs.flat) - for a in list_axes: - a.set_xticklabels([]) - a.set_yticklabels([]) - a.get_xaxis().set_visible(False) - a.get_yaxis().set_visible(False) - a.grid(False) - list_axes[0].imshow(np.array(orig_img)) - list_axes[1].imshow(np.array(optimized_img)) - list_axes[0].set_title(orig_title, fontsize=15) - list_axes[1].set_title(control_title, fontsize=15) - - fig.subplots_adjust(wspace=0.01, hspace=0.01) - fig.tight_layout() - return fig - - - visualize_results(ov_image["sample"][0], int8_image["sample"][0]) - - - -.. image:: paint-by-example-with-output_files/paint-by-example-with-output_41_0.png - - -.. code:: ipython3 - - %%skip $to_quantize.value - - display(ov_image["sample"][0]) - -Compare UNet file size -~~~~~~~~~~~~~~~~~~~~~~ - - - -.. code:: ipython3 - - %%skip not $to_quantize.value - - fp16_ir_model_size = UNET_OV_PATH_INPAINT.with_suffix(".bin").stat().st_size / 1024 - quantized_model_size = UNET_INT8_OV_PATH.with_suffix(".bin").stat().st_size / 1024 - - print(f"FP16 model size: {fp16_ir_model_size:.2f} KB") - print(f"INT8 model size: {quantized_model_size:.2f} KB") - print(f"Model compression rate: {fp16_ir_model_size / quantized_model_size:.3f}") - - -.. parsed-literal:: - - FP16 model size: 1678780.62 KB - INT8 model size: 840725.98 KB - Model compression rate: 1.997 - - -Interactive inference ---------------------- - - - -Choose what model do you want to use in the interactive interface. You -can choose both, FP16 and INT8. - -.. code:: ipython3 - - import ipywidgets as widgets - - available_models = ["FP16"] - - if UNET_INT8_OV_PATH.exists(): - available_models.append("INT8") - - model_to_use = widgets.Select( - options=available_models, - value="FP16", - description="Select model:", - disabled=False, - ) - - model_to_use - - - - -.. parsed-literal:: - - Select(description='Select model:', options=('FP16', 'INT8'), value='FP16') - - - -.. code:: ipython3 - - if "INT8" == model_to_use.value: - chosen_pipeline = int8_ov_pipe_inpaint or get_quantized_pipeline() - ov_pipe_inpaint = None - else: - chosen_pipeline = ov_pipe_inpaint or get_ov_pipeline() - int8_ov_pipe_inpaint = None - - - gc.collect() - -.. code:: ipython3 - - # Code adapated from https://huggingface.co/spaces/Fantasy-Studio/Paint-by-Example/blob/main/app.py - - import os - - - def predict(input_dict, reference, seed, steps): - """ - This function runs when the 'paint' button is pressed. It takes 3 input images. Takes generated image decoded by VAE decoder, unpad it to initila image size (if required), - normalize and convert to [0, 255] pixels range. Optionally, convertes it from np.ndarray to PIL.Image format - - Parameters: - input_dict (Dict): - Contains two images in a dictionary - 'image' is the image that will be painted on - 'mask' is the black/white image specifying where to paint (white) and not to paint (black) - image (PIL.Image.Image): - Reference image that will be used by the model to know what to paint in the specified area - seed (int): - Used to initialize the random number generator state - steps (int): - The number of denoising steps to run during inference. Low = fast/low quality, High = slow/higher quality - use_quantize_model (bool): - Use fp16 or int8 model - Returns: - image (PIL.Image.Image): - Postprocessed images - """ - width, height = input_dict["image"].size - - # If the image is not 512x512 then resize - if width < height: - factor = width / 512.0 - width = 512 - height = int((height / factor) / 8.0) * 8 - else: - factor = height / 512.0 - height = 512 - width = int((width / factor) / 8.0) * 8 - - init_image = input_dict["image"].convert("RGB").resize((width, height)) - mask = input_dict["mask"].convert("RGB").resize((width, height)) - - # If the image is not a 512x512 square then crop - if width > height: - buffer = (width - height) / 2 - input_image = init_image.crop((buffer, 0, width - buffer, 512)) - mask = mask.crop((buffer, 0, width - buffer, 512)) - elif width < height: - buffer = (height - width) / 2 - input_image = init_image.crop((0, buffer, 512, height - buffer)) - mask = mask.crop((0, buffer, 512, height - buffer)) - else: - input_image = init_image - - if not os.path.exists("output"): - os.mkdir("output") - input_image.save("output/init.png") - mask.save("output/mask.png") - reference.save("output/ref.png") - - mask = [mask] - - result = chosen_pipeline( - image=input_image, - mask_image=mask, - reference_image=reference, - seed=seed, - num_inference_steps=steps, - )[ - "sample" - ][0] - - out_dir = Path("output") - out_dir.mkdir(exist_ok=True) - result.save("output/result.png") - - return result - -Choose a source image and a reference image, draw a mask in source image -and push “Paint!” - -.. code:: ipython3 - - if not Path("gradio_helper.py").exists(): - r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/paint-by-example/gradio_helper.py") - open("gradio_helper.py", "w").write(r.text) - - from gradio_helper import make_demo - - demo = make_demo(fn=predict) - - # Launching the Gradio app - try: - demo.launch(debug=False, height=680) - except Exception: - demo.queue().launch(share=True, debug=False, height=680) - # if you are launching remotely, specify server_name and server_port - # image_blocks.launch(server_name='your server name', server_port='server port in int') - # Read more in the docs: https://gradio.app/docs/ - -.. code:: ipython3 - - # please uncomment and run this cell for stopping gradio interface - # demo.close() diff --git a/docs/notebooks/paint-by-example-with-output_files/paint-by-example-with-output_41_0.png b/docs/notebooks/paint-by-example-with-output_files/paint-by-example-with-output_41_0.png deleted file mode 100644 index be911bee3ee1a5..00000000000000 --- a/docs/notebooks/paint-by-example-with-output_files/paint-by-example-with-output_41_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ff1da225f5d53354c6bcdb34a891dcd1ef77e23b7bd76bee3367414d7efcde6e -size 2092300 diff --git a/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_17_3.png b/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_17_3.png index 20280b15f5dc07..1be4ba9fa45c92 100644 --- a/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_17_3.png +++ b/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_17_3.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3983e7e27f73b5e03e02cfb02950ce8aef26d9d6a19a7376c51a6f0b00913732 +oid sha256:eacdcf6e619052ffe8bea1810c93678559cf210808d871e0e2b8a81939e1fd26 size 106259 diff --git a/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png b/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png index b5ff9a7ccdcd2c..f827c9c1094e46 100644 --- a/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png +++ b/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa4b6d563d3c164036182f80dfc247e3c19a892fae85b49f7eb51518f0bc0141 -size 219418 +oid sha256:5dffde5665ae619cc99fddef72befb32d1002becce56dfccf50e7577f1fab020 +size 218904 diff --git a/docs/notebooks/phi-3-vision-with-output.rst b/docs/notebooks/phi-3-vision-with-output.rst index 778fc5aa7d6bc7..71981daac13be4 100644 --- a/docs/notebooks/phi-3-vision-with-output.rst +++ b/docs/notebooks/phi-3-vision-with-output.rst @@ -260,10 +260,9 @@ documentation 1 or self.sliding_window is not None) and self.is_causal: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:444: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! seq_len = seq_len or torch.max(position_ids) + 1 /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:445: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.original_max_position_embeddings: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. op1 = operator(\*args, \*\*kwargs) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:683: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): @@ -375,7 +374,7 @@ documentation =2.1" "diffusers>=0.26,<0.30" "gradio>=4.19" "openvino>=2024.0.0" "einops" torchvision "peft>=0.6.2" "nncf>=2.9.0" "protobuf==3.20.3" "insightface" "onnxruntime" .. parsed-literal:: - Cloning into 'PhotoMaker'... - remote: Enumerating objects: 306, done. - remote: Counting objects: 100% (151/151), done. - remote: Compressing objects: 100% (98/98), done. - remote: Total 306 (delta 132), reused 53 (delta 53), pack-reused 155 (from 1) - Receiving objects: 100% (306/306), 10.24 MiB | 23.03 MiB/s, done. - Resolving deltas: 100% (164/164), done. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/photo-maker/PhotoMaker Note: switching to '1e78aa6514c11a84ef1be27b56c7c72d6c70f8fc'. You are in 'detached HEAD' state. You can look around, make experimental @@ -119,24 +136,20 @@ Clone PhotoMaker repository Turn off this advice by setting config variable advice.detachedHead to false HEAD is now at 1e78aa6 Update README.md - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/photo-maker - - -Install required packages - -.. code:: ipython3 - - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu \ - transformers "torch>=2.1" "diffusers>=0.26,<0.30" "gradio>=4.19" "openvino>=2024.0.0" "einops" torchvision "peft>=0.6.2" "nncf>=2.9.0" "protobuf==3.20.3" "insightface" "onnxruntime" .. parsed-literal:: ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - paddleclas 2.5.2 requires gast==0.3.3, but you have gast 0.4.0 which is incompatible. - paddleclas 2.5.2 requires opencv-python==4.6.0.66, but you have opencv-python 4.10.0.84 which is incompatible. + paddleclas 2.6.0 requires gast==0.3.3, but you have gast 0.4.0 which is incompatible. + paddleclas 2.6.0 requires opencv-python<=4.6.0.66, but you have opencv-python 4.10.0.84 which is incompatible. parler-tts 0.2.1 requires protobuf>=4.0.0, but you have protobuf 3.20.3 which is incompatible. + tensorflow 2.12.0 requires keras<2.13,>=2.12.0, but you have keras 2.13.1 which is incompatible. tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. + tensorflow 2.12.0 requires tensorboard<2.13,>=2.12, but you have tensorboard 2.13.0 which is incompatible. + tensorflow 2.12.0 requires tensorflow-estimator<2.13,>=2.12.0, but you have tensorflow-estimator 2.13.0 which is incompatible. + tensorflow-cpu 2.13.1 requires numpy<=1.24.3,>=1.22, but you have numpy 1.24.4 which is incompatible. + tensorflow-cpu 2.13.1 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.12.2 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -197,10 +210,9 @@ PhotoMaker to generate the original PhotoMaker pipeline. .. parsed-literal:: - 2024-11-05 02:22:09.727876: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-05 02:22:09.761823: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-22 02:03:50.933677: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-22 02:03:50.958255: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-05 02:22:10.482979: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. code:: ipython3 @@ -389,12 +401,12 @@ output(text embeddings) which will be the input for U-Net model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:243: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:243: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/notebooks/photo-maker/PhotoMaker/photomaker/model.py:84: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/photo-maker/PhotoMaker/photomaker/model.py:84: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert class_tokens_mask.sum() == stacked_id_embeds.shape[0], f"{class_tokens_mask.sum()} != {stacked_id_embeds.shape[0]}" @@ -469,9 +481,9 @@ sequence of latent text embeddings. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: @@ -575,15 +587,15 @@ original Stable Diffusion XL model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if dim % default_overall_up_factor != 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: @@ -686,8 +698,6 @@ Select inference device for Stable Diffusion pipeline .. code:: ipython3 - import requests - r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) @@ -921,7 +931,7 @@ Running Text-to-Image Generation with OpenVINO -.. image:: photo-maker-with-output_files/photo-maker-with-output_33_0.png +.. image:: photo-maker-with-output_files/photo-maker-with-output_34_0.png Interactive Demo diff --git a/docs/notebooks/photo-maker-with-output_files/photo-maker-with-output_33_0.png b/docs/notebooks/photo-maker-with-output_files/photo-maker-with-output_33_0.png deleted file mode 100644 index 28ccdbf331406d..00000000000000 --- a/docs/notebooks/photo-maker-with-output_files/photo-maker-with-output_33_0.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:21913b4e38eb996bd7d8faedb660aa56fdbf4a6c1ef71157d5e845c9b8a31e7e -size 357743 diff --git a/docs/notebooks/photo-maker-with-output_files/photo-maker-with-output_34_0.png b/docs/notebooks/photo-maker-with-output_files/photo-maker-with-output_34_0.png new file mode 100644 index 00000000000000..5c425ae841f4c7 --- /dev/null +++ b/docs/notebooks/photo-maker-with-output_files/photo-maker-with-output_34_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99978c67369aac55e26840e7e4b59aa54bcbf4cda132774760a9e3da86803cb9 +size 357743 diff --git a/docs/notebooks/pixart-with-output.rst b/docs/notebooks/pixart-with-output.rst index c1c9a4b4e8ec57..517191e17501ef 100644 --- a/docs/notebooks/pixart-with-output.rst +++ b/docs/notebooks/pixart-with-output.rst @@ -118,10 +118,9 @@ directly in latent space, achieving super fast inference with few steps. .. parsed-literal:: - 2024-11-05 02:30:04.644117: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-05 02:30:04.680089: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-11-22 02:11:50.540718: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-22 02:11:50.565755: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-05 02:30:05.360275: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -130,6 +129,11 @@ directly in latent space, achieving super fast inference with few steps. Loading pipeline components...: 0%| | 0/5 [00:00. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 + + .. parsed-literal:: @@ -138,7 +142,6 @@ directly in latent space, achieving super fast inference with few steps. .. parsed-literal:: - You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 Some weights of the model checkpoint were not used when initializing PixArtTransformer2DModel: ['caption_projection.y_embedding'] @@ -229,7 +232,7 @@ Convert text encoder .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. @@ -272,11 +275,11 @@ Convert transformer .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/embeddings.py:219: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/embeddings.py:219: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if self.height != height or self.width != width: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:682: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:682: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if current_length != target_length: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:697: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/attention_processor.py:697: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.shape[0] < batch_size * head_size: @@ -301,9 +304,9 @@ Convert VAE decoder .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: @@ -449,7 +452,7 @@ And insert wrappers instances in the pipeline: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -564,7 +567,7 @@ To collect intermediate model inputs for calibration we should customize .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -1649,7 +1652,7 @@ pipelines. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -1706,9 +1709,9 @@ pipelines, we use mean inference time on 3 samples. .. parsed-literal:: - FP16 pipeline: 43.073 seconds - Optimized pipeline: 41.450 seconds - Performance speed-up: 1.039 + FP16 pipeline: 40.248 seconds + Optimized pipeline: 39.688 seconds + Performance speed-up: 1.014 Interactive inference diff --git a/docs/notebooks/pixart-with-output_files/pixart-with-output_40_2.png b/docs/notebooks/pixart-with-output_files/pixart-with-output_40_2.png index 8ac4e49184e284..47dd4083f93179 100644 --- a/docs/notebooks/pixart-with-output_files/pixart-with-output_40_2.png +++ b/docs/notebooks/pixart-with-output_files/pixart-with-output_40_2.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1d0099ef65d0edd9b83af849991c6a48ebcb6b589320f0edd85c317ca3ecfa26 +oid sha256:cf4837ff583d9f6ab905f723de121d443fad3a6955444819659b09fba2a580de size 2311803 diff --git a/docs/notebooks/pixtral-with-output.rst b/docs/notebooks/pixtral-with-output.rst index 25e1004aac09f7..fcbc6b2262118e 100644 --- a/docs/notebooks/pixtral-with-output.rst +++ b/docs/notebooks/pixtral-with-output.rst @@ -153,20 +153,19 @@ documentation 0.19 and openvino version <= 2024.4. Please downgrade to tokenizers version <= 0.19 to export tokenizers to OpenVINO. INFO:nncf:Statistics of the bitwidth distribution: ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ @@ -469,7 +465,7 @@ documentation =0.24.0" transformers "torch>=2.1" "gradio>=4.19" qrcode opencv-python "peft>=0.6.2" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q "openvino>=2023.1.0" "nncf>=2.7.0" + %pip install -q "openvino>=2023.1.0" "nncf>=2.7.0" "matplotlib>=3.4" Instantiating Generation Pipeline --------------------------------- diff --git a/docs/notebooks/qwen2-audio-with-output.rst b/docs/notebooks/qwen2-audio-with-output.rst index 6b32837a5c2c5e..04fb11ed6bae6d 100644 --- a/docs/notebooks/qwen2-audio-with-output.rst +++ b/docs/notebooks/qwen2-audio-with-output.rst @@ -29,8 +29,9 @@ In this tutorial we consider how to convert and optimize Qwen2Audio model for creating multimodal chatbot. Additionally, we demonstrate how to apply stateful transformation on LLM part and model optimization techniques like weights compression using -`NNCF `__ #### Table of -contents: +`NNCF `__ + +**Table of contents:** - `Prerequisites <#prerequisites>`__ - `Convert and Optimize model <#convert-and-optimize-model>`__ @@ -78,11 +79,11 @@ Prerequisites from pathlib import Path import requests - + if not Path("ov_qwen2_audio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/qwen2-audio/ov_qwen2_audio_helper.py") open("ov_qwen2_audio_helper.py", "w").write(r.text) - + if not Path("notebook_utils.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py") open("notebook_utils.py", "w").write(r.text) @@ -211,13 +212,13 @@ documentation target_length: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/810/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors @@ -396,16 +396,16 @@ Intel `__ .. code:: ipython3 from ov_qwen2_audio_helper import OVQwen2AudioForConditionalGeneration - + # Uncomment below lines to see the model inference class code # OVQwen2AudioForConditionalGeneration?? .. code:: ipython3 from notebook_utils import device_widget - + device = device_widget(default="AUTO", exclude=["NPU"]) - + device @@ -431,20 +431,20 @@ Run model inference from transformers import AutoProcessor, TextStreamer import librosa import IPython.display as ipd - - + + processor = AutoProcessor.from_pretrained(model_dir) - + audio_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/1272-128104-0000.flac" audio_chat_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/guess_age_gender.wav" audio_file = Path(audio_url.split("/")[-1]) audio_chat_file = Path(audio_chat_url.split("/")[-1]) - + if not audio_file.exists(): r = requests.get(audio_url) with audio_file.open("wb") as f: f.write(r.content) - + if not audio_chat_file.exists(): r = requests.get(audio_chat_url) with audio_chat_file.open("wb") as f: @@ -466,14 +466,14 @@ Voice chat ], }, ] - + text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False) audios = [librosa.load(audio_chat_file, sr=processor.feature_extractor.sampling_rate)[0]] - + inputs = processor(text=text, audios=audios, return_tensors="pt", padding=True) display(ipd.Audio(audio_chat_file)) print("Answer:") - + generate_ids = ov_model.generate(**inputs, max_new_tokens=50, streamer=TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)) @@ -485,7 +485,7 @@ Voice chat .. raw:: html - +