From aa92d13149d62c5c30e903d71638caced8afd4d0 Mon Sep 17 00:00:00 2001 From: Aleksandr Mokrov Date: Fri, 20 Sep 2024 15:50:02 +0200 Subject: [PATCH] Updates properties (#2403) CVS-120640 --- .ci/skipped_notebooks.yml | 6 +++- .ci/spellcheck/.pyspelling.wordlist.txt | 1 + notebooks/auto-device/auto-device.ipynb | 16 +++++++--- .../clip-language-saliency-map.ipynb | 5 +-- .../cross-lingual-books-alignment.ipynb | 9 ++++-- .../async_pipeline.py | 5 +-- .../dolly-2-instruction-following.ipynb | 8 ++++- notebooks/florence2/gradio_helper.py | 1 - notebooks/florence2/ov_florence2_helper.py | 1 - notebooks/gpu-device/gpu-device.ipynb | 32 ++++++++++++------- .../grounded-segment-anything.ipynb | 1 - notebooks/hello-npu/hello-npu.ipynb | 24 ++++++++------ notebooks/internvl2/gradio_helper.py | 1 - notebooks/internvl2/internvl2_helper.py | 5 --- ...tent-consistency-models-optimum-demo.ipynb | 4 ++- .../llm-agent-functioncall-qwen.ipynb | 13 +++++--- .../llm-agent-rag-llamaindex.ipynb | 8 ++++- .../llm-agent-react-langchain.ipynb | 15 ++++++--- notebooks/llm-chatbot/llm-chatbot.ipynb | 7 +++- .../llm-rag-langchain/llm-rag-langchain.ipynb | 7 +++- .../llm-rag-llamaindex.ipynb | 7 +++- notebooks/openvino-api/openvino-api.ipynb | 5 ++- .../paddle-to-openvino-classification.ipynb | 5 ++- notebooks/pixart/pixart.ipynb | 2 -- .../pose-estimation.ipynb | 5 ++- ...orch-post-training-quantization-nncf.ipynb | 5 ++- .../pytorch-quantization-aware-training.ipynb | 5 ++- ...quantization-sparsity-aware-training.ipynb | 5 ++- .../pytorch-onnx-to-openvino.ipynb | 5 ++- notebooks/qwen2-audio/gradio_helper.py | 1 - .../qwen2-audio/ov_qwen2_audio_helper.py | 1 - notebooks/qwen2-vl/gradio_helper.py | 3 -- notebooks/qwen2-vl/ov_qwen2_vl.py | 2 -- .../stable-diffusion-v2-optimum-demo.ipynb | 4 ++- .../stable-video-diffusion.ipynb | 2 -- ...nsorflow-quantization-aware-training.ipynb | 5 ++- .../vision-monodepth/vision-monodepth.ipynb | 5 ++- .../notebooks/phi3_chatbot_demo.ipynb | 10 ++++-- .../notebooks/phi3_rag_on_client.ipynb | 9 ++++-- supplementary_materials/qwen2/chat.py | 6 +++- 40 files changed, 176 insertions(+), 85 deletions(-) diff --git a/.ci/skipped_notebooks.yml b/.ci/skipped_notebooks.yml index ddde19d8561..6974caacd8e 100644 --- a/.ci/skipped_notebooks.yml +++ b/.ci/skipped_notebooks.yml @@ -586,4 +586,8 @@ - python: - '3.8' - os: - - macos-12 \ No newline at end of file + - macos-12 +- notebook: notebooks/llm-agent-react/llm-agent-react-langchain.ipynb + skips: + - python: + - '3.8' \ No newline at end of file diff --git a/.ci/spellcheck/.pyspelling.wordlist.txt b/.ci/spellcheck/.pyspelling.wordlist.txt index d4f3e90ed7a..123e9b2349e 100644 --- a/.ci/spellcheck/.pyspelling.wordlist.txt +++ b/.ci/spellcheck/.pyspelling.wordlist.txt @@ -617,6 +617,7 @@ perceptron Patil PEFT perceiver +PerformanceMode performant PersonaGPT PGI diff --git a/notebooks/auto-device/auto-device.ipynb b/notebooks/auto-device/auto-device.ipynb index 18ab90edee5..5f4a42bd954 100644 --- a/notebooks/auto-device/auto-device.ipynb +++ b/notebooks/auto-device/auto-device.ipynb @@ -70,7 +70,7 @@ "import platform\n", "\n", "# Install required packages\n", - "%pip install -q \"openvino>=2023.1.0\" Pillow torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu\n", + "%pip install -q \"openvino>=2023.1.0\" \"numpy<2\" Pillow torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu\n", "\n", "if platform.system() != \"Windows\":\n", " %pip install -q \"matplotlib>=3.4\"\n", @@ -187,8 +187,11 @@ } ], "source": [ + "import openvino.properties.log as log\n", + "\n", + "\n", "# Set LOG_LEVEL to LOG_INFO.\n", - "core.set_property(\"AUTO\", {\"LOG_LEVEL\": \"LOG_INFO\"})\n", + "core.set_property(\"AUTO\", {log.level(): log.Level.INFO})\n", "\n", "# Load the model onto the target device.\n", "compiled_model = core.compile_model(ov_model)\n", @@ -249,7 +252,7 @@ ], "source": [ "# Set LOG_LEVEL to LOG_NONE.\n", - "core.set_property(\"AUTO\", {\"LOG_LEVEL\": \"LOG_NONE\"})\n", + "core.set_property(\"AUTO\", {log.level(): log.Level.NO})\n", "\n", "compiled_model = core.compile_model(model=ov_model, device_name=\"AUTO\")\n", "\n", @@ -611,12 +614,15 @@ } ], "source": [ + "import openvino.properties.hint as hints\n", + "\n", + "\n", "THROUGHPUT_hint_context = InferContext(metrics_update_interval, metrics_update_num)\n", "\n", "print(\"Compiling Model for AUTO device with THROUGHPUT hint\")\n", "sys.stdout.flush()\n", "\n", - "compiled_model = core.compile_model(model=ov_model, config={\"PERFORMANCE_HINT\": \"THROUGHPUT\"})\n", + "compiled_model = core.compile_model(model=ov_model, config={hints.performance_mode(): hints.PerformanceMode.THROUGHPUT})\n", "\n", "infer_queue = ov.AsyncInferQueue(compiled_model, 0) # Setting to 0 will query optimal number by default.\n", "infer_queue.set_callback(completion_callback)\n", @@ -680,7 +686,7 @@ "print(\"Compiling Model for AUTO Device with LATENCY hint\")\n", "sys.stdout.flush()\n", "\n", - "compiled_model = core.compile_model(model=ov_model, config={\"PERFORMANCE_HINT\": \"LATENCY\"})\n", + "compiled_model = core.compile_model(model=ov_model, config={hints.performance_mode(): hints.PerformanceMode.LATENCY})\n", "\n", "# Setting to 0 will query optimal number by default.\n", "infer_queue = ov.AsyncInferQueue(compiled_model, 0)\n", diff --git a/notebooks/clip-language-saliency-map/clip-language-saliency-map.ipynb b/notebooks/clip-language-saliency-map/clip-language-saliency-map.ipynb index 63206625bc0..b3d0d07c022 100644 --- a/notebooks/clip-language-saliency-map/clip-language-saliency-map.ipynb +++ b/notebooks/clip-language-saliency-map/clip-language-saliency-map.ipynb @@ -90,7 +90,7 @@ "source": [ "# Install requirements\n", "%pip install -q \"openvino>=2023.1.0\"\n", - "%pip install -q --extra-index-url https://download.pytorch.org/whl/cpu transformers \"torch>=2.1\" \"gradio>=4.19\"" + "%pip install -q --extra-index-url https://download.pytorch.org/whl/cpu transformers \"numpy<2\" \"torch>=2.1\" \"gradio>=4.19\"" ] }, { @@ -759,6 +759,7 @@ "outputs": [], "source": [ "from typing import Dict, Any\n", + "import openvino.properties.hint as hints\n", "\n", "\n", "image_model = core.read_model(image_model_path)\n", @@ -766,7 +767,7 @@ "image_model = core.compile_model(\n", " model=image_model,\n", " device_name=device.value,\n", - " config={\"PERFORMANCE_HINT\": \"THROUGHPUT\"},\n", + " config={hints.performance_mode(): hints.PerformanceMode.THROUGHPUT},\n", ")" ] }, diff --git a/notebooks/cross-lingual-books-alignment/cross-lingual-books-alignment.ipynb b/notebooks/cross-lingual-books-alignment/cross-lingual-books-alignment.ipynb index f2e1b5565af..8f8408cda5f 100644 --- a/notebooks/cross-lingual-books-alignment/cross-lingual-books-alignment.ipynb +++ b/notebooks/cross-lingual-books-alignment/cross-lingual-books-alignment.ipynb @@ -1105,11 +1105,13 @@ "source": [ "from typing import Any\n", "\n", + "import openvino.properties.hint as hints\n", + "\n", "\n", "compiled_throughput_hint = core.compile_model(\n", " ov_model,\n", " device_name=device.value,\n", - " config={\"PERFORMANCE_HINT\": \"THROUGHPUT\"},\n", + " config={hints.performance_mode(): hints.PerformanceMode.THROUGHPUT},\n", ")" ] }, @@ -1323,7 +1325,10 @@ } ], "source": [ - "cpu_name = core.get_property(\"CPU\", \"FULL_DEVICE_NAME\")\n", + "import openvino.properties as props\n", + "\n", + "\n", + "cpu_name = core.get_property(\"CPU\", props.device.full_name)\n", "\n", "plot = sns.barplot(benchmark_dataframe, errorbar=\"sd\")\n", "plot.set(ylabel=\"Sentences Per Second\", title=f\"Sentence Embeddings Benchmark\\n{cpu_name}\")\n", diff --git a/notebooks/ct-segmentation-quantize/async_pipeline.py b/notebooks/ct-segmentation-quantize/async_pipeline.py index 2758a3deff0..ee7f31f1d1d 100644 --- a/notebooks/ct-segmentation-quantize/async_pipeline.py +++ b/notebooks/ct-segmentation-quantize/async_pipeline.py @@ -24,6 +24,7 @@ import cv2 +import openvino.properties as props from custom_segmentation import Model @@ -169,7 +170,7 @@ def __init__(self, ie, model, plugin_config, device="CPU", max_num_requests=0): cache_path.mkdir(exist_ok=True) # Enable model caching for GPU devices if "GPU" in device and "GPU" in ie.available_devices: - ie.set_property(device_name="GPU", properties={"CACHE_DIR": str(cache_path)}) + ie.set_property(device_name="GPU", properties={props.cache_dir(): str(cache_path)}) self.model = model self.logger = logging.getLogger() @@ -177,7 +178,7 @@ def __init__(self, ie, model, plugin_config, device="CPU", max_num_requests=0): self.logger.info("Loading network to {} plugin...".format(device)) self.exec_net = ie.compile_model(self.model.net, device, plugin_config) if max_num_requests == 0: - max_num_requests = self.exec_net.get_property("OPTIMAL_NUMBER_OF_INFER_REQUESTS") + 1 + max_num_requests = self.exec_net.get_property(props.optimal_number_of_infer_requests) + 1 self.requests = [self.exec_net.create_infer_request() for _ in range(max_num_requests)] self.empty_requests = deque(self.requests) self.completed_request_results = {} diff --git a/notebooks/dolly-2-instruction-following/dolly-2-instruction-following.ipynb b/notebooks/dolly-2-instruction-following/dolly-2-instruction-following.ipynb index 2a75c221ef9..3e2efebb991 100644 --- a/notebooks/dolly-2-instruction-following/dolly-2-instruction-following.ipynb +++ b/notebooks/dolly-2-instruction-following/dolly-2-instruction-following.ipynb @@ -564,9 +564,15 @@ ], "source": [ "from pathlib import Path\n", + "\n", "from transformers import AutoTokenizer\n", "from optimum.intel.openvino import OVModelForCausalLM\n", "\n", + "import openvino.properties as props\n", + "import openvino.properties.hint as hints\n", + "import openvino.properties.streams as streams\n", + "\n", + "\n", "if model_to_run.value == \"INT4\":\n", " model_dir = int4_model_dir\n", "elif model_to_run.value == \"INT8\":\n", @@ -579,7 +585,7 @@ "\n", "current_device = device.value\n", "\n", - "ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n", + "ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): \"1\", props.cache_dir(): \"\"}\n", "\n", "ov_model = OVModelForCausalLM.from_pretrained(model_dir, device=current_device, ov_config=ov_config)" ] diff --git a/notebooks/florence2/gradio_helper.py b/notebooks/florence2/gradio_helper.py index dc8e212270e..76e0b5484ae 100644 --- a/notebooks/florence2/gradio_helper.py +++ b/notebooks/florence2/gradio_helper.py @@ -63,7 +63,6 @@ def plot_bbox(image, data): def draw_polygons(image, prediction, fill_mask=False): - draw = ImageDraw.Draw(image) scale = 1 for polygons, label in zip(prediction["polygons"], prediction["labels"]): diff --git a/notebooks/florence2/ov_florence2_helper.py b/notebooks/florence2/ov_florence2_helper.py index f1209a3f4ae..01a3cc8ef70 100644 --- a/notebooks/florence2/ov_florence2_helper.py +++ b/notebooks/florence2/ov_florence2_helper.py @@ -353,7 +353,6 @@ def __init__(self, model_dir, device, ov_config=None) -> None: self.language_model = OVFlorence2LangModel(model_dir, self.config.text_config, device, ov_config) def generate(self, input_ids, inputs_embeds=None, pixel_values=None, **kwargs): - if inputs_embeds is None: # 1. Extra the input embeddings if input_ids is not None: diff --git a/notebooks/gpu-device/gpu-device.ipynb b/notebooks/gpu-device/gpu-device.ipynb index 56da2089a39..5a27a471a38 100644 --- a/notebooks/gpu-device/gpu-device.ipynb +++ b/notebooks/gpu-device/gpu-device.ipynb @@ -256,9 +256,12 @@ } ], "source": [ + "import openvino.properties as props\n", + "\n", + "\n", "device = \"GPU\"\n", "\n", - "core.get_property(device, \"FULL_DEVICE_NAME\")" + "core.get_property(device, props.device.full_name)" ] }, { @@ -267,7 +270,7 @@ "id": "aac3129a-129f-49aa-aba0-71ae1e892ada", "metadata": {}, "source": [ - "Each device also has a specific property called `SUPPORTED_PROPERTIES`, that enables viewing all the available properties in the device. We can check the value for each property by simply looping through the dictionary returned by `core.get_property(\"GPU\", \"SUPPORTED_PROPERTIES\")` and then querying for that property." + "Each device also has a specific property called `SUPPORTED_PROPERTIES`, that enables viewing all the available properties in the device. We can check the value for each property by simply looping through the dictionary returned by `core.get_property(\"GPU\", props.supported_properties)` and then querying for that property." ] }, { @@ -321,7 +324,7 @@ ], "source": [ "print(f\"{device} SUPPORTED_PROPERTIES:\\n\")\n", - "supported_properties = core.get_property(device, \"SUPPORTED_PROPERTIES\")\n", + "supported_properties = core.get_property(device, props.supported_properties)\n", "indent = len(max(supported_properties, key=len))\n", "\n", "for property_key in supported_properties:\n", @@ -677,7 +680,7 @@ "core = ov.Core()\n", "\n", "# Set cache folder\n", - "core.set_property({\"CACHE_DIR\": cache_folder})\n", + "core.set_property({props.cache_dir(): cache_folder})\n", "\n", "# Compile the model as before\n", "model = core.read_model(model=model_path)\n", @@ -717,7 +720,7 @@ "source": [ "start = time.time()\n", "core = ov.Core()\n", - "core.set_property({\"CACHE_DIR\": \"cache\"})\n", + "core.set_property({props.cache_dir(): \"cache\"})\n", "model = core.read_model(model=model_path)\n", "compiled_model = core.compile_model(model, device)\n", "print(f\"Cache enabled - compile time: {time.time() - start}s\")\n", @@ -765,7 +768,7 @@ "id": "7077b662-22f3-4c52-9c80-e5ac1309c482", "metadata": {}, "source": [ - "To use the \"LATENCY\" performance hint, add `{\"PERFORMANCE_HINT\": \"LATENCY\"}` when compiling the model as shown below. For GPUs, this automatically minimizes the batch size and number of parallel streams such that all of the compute resources can focus on completing a single inference as fast as possible." + "To use the \"LATENCY\" performance hint, add `{hints.performance_mode(): hints.PerformanceMode.LATENCY}` when compiling the model as shown below. For GPUs, this automatically minimizes the batch size and number of parallel streams such that all of the compute resources can focus on completing a single inference as fast as possible." ] }, { @@ -780,7 +783,10 @@ }, "outputs": [], "source": [ - "compiled_model = core.compile_model(model, device, {\"PERFORMANCE_HINT\": \"LATENCY\"})" + "import openvino.properties.hint as hints\n", + "\n", + "\n", + "compiled_model = core.compile_model(model, device, {hints.performance_mode(): hints.PerformanceMode.LATENCY})" ] }, { @@ -789,7 +795,7 @@ "id": "06589f38-ce35-457f-8395-a4a3f6327ea0", "metadata": {}, "source": [ - "To use the \"THROUGHPUT\" performance hint, add `{\"PERFORMANCE_HINT\": \"THROUGHPUT\"}` when compiling the model. For GPUs, this creates multiple processing streams to efficiently utilize all the execution cores and optimizes the batch size to fill the available memory." + "To use the \"THROUGHPUT\" performance hint, add `{hints.performance_mode(): hints.PerformanceMode.THROUGHPUT}` when compiling the model. For GPUs, this creates multiple processing streams to efficiently utilize all the execution cores and optimizes the batch size to fill the available memory." ] }, { @@ -804,7 +810,7 @@ }, "outputs": [], "source": [ - "compiled_model = core.compile_model(model, device, {\"PERFORMANCE_HINT\": \"THROUGHPUT\"})" + "compiled_model = core.compile_model(model, device, {hints.performance_mode(): hints.PerformanceMode.THROUGHPUT})" ] }, { @@ -836,7 +842,9 @@ "Note that we always need to explicitly specify the device list for MULTI to work, otherwise MULTI does not know which devices are available for inference. However, this is not the only way to use multiple devices in OpenVINO. There is another performance hint called \"CUMULATIVE_THROUGHPUT\" that works similar to MULTI, except it uses the devices automatically selected by AUTO. This way, we do not need to manually specify devices to use. Below is an example showing how to use \"CUMULATIVE_THROUGHPUT\", equivalent to the MULTI one:\n", "\n", "`\n", - "compiled_model = core.compile_model(model=model, device_name=\"AUTO\", config={\"PERFORMANCE_HINT\": \"CUMULATIVE_THROUGHPUT\"})\n", + "\n", + "\n", + "compiled_model = core.compile_model(model=model, device_name=\"AUTO\", config={hints.performance_mode(): hints.PerformanceMode.CUMULATIVE_THROUGHPUT})\n", "`\n", "\n", "> **Important**: **The “THROUGHPUT”, “MULTI”, and “CUMULATIVE_THROUGHPUT” modes are only applicable to asynchronous inferencing pipelines. The example at the end of this article shows how to set up an asynchronous pipeline that takes advantage of parallelism to increase throughput.** To learn more, see [Asynchronous Inferencing](https://docs.openvino.ai/2024/documentation/openvino-extensibility/openvino-plugin-library/asynch-inference-request.html) in OpenVINO as well as the [Asynchronous Inference notebook](../async-api/async-api.ipynb)." @@ -1584,7 +1592,7 @@ "# Read model and compile it on GPU in THROUGHPUT mode\n", "model = core.read_model(model=model_path)\n", "device_name = \"GPU\"\n", - "compiled_model = core.compile_model(model=model, device_name=device_name, config={\"PERFORMANCE_HINT\": \"THROUGHPUT\"})\n", + "compiled_model = core.compile_model(model=model, device_name=device_name, config={hints.performance_mode(): hints.PerformanceMode.THROUGHPUT})\n", "\n", "# Get the input and output nodes\n", "input_layer = compiled_model.input(0)\n", @@ -1996,7 +2004,7 @@ " )\n", " cv2.putText(\n", " frame,\n", - " f\"hint {compiled_model.get_property('PERFORMANCE_HINT')}\",\n", + " f\"hint {compiled_model.get_property(hints.performance_mode)}\",\n", " (5, 60),\n", " cv2.FONT_ITALIC,\n", " 0.6,\n", diff --git a/notebooks/grounded-segment-anything/grounded-segment-anything.ipynb b/notebooks/grounded-segment-anything/grounded-segment-anything.ipynb index d20b33771db..d2be2507f8e 100644 --- a/notebooks/grounded-segment-anything/grounded-segment-anything.ipynb +++ b/notebooks/grounded-segment-anything/grounded-segment-anything.ipynb @@ -966,7 +966,6 @@ "outputs": [], "source": [ "def draw_mask(mask, draw, random_color=False):\n", - "\n", " if random_color:\n", " color = (\n", " np.random.randint(0, 255),\n", diff --git a/notebooks/hello-npu/hello-npu.ipynb b/notebooks/hello-npu/hello-npu.ipynb index db4640b7791..cc2062e77b4 100644 --- a/notebooks/hello-npu/hello-npu.ipynb +++ b/notebooks/hello-npu/hello-npu.ipynb @@ -196,9 +196,12 @@ } ], "source": [ + "import openvino.properties as props\n", + "\n", + "\n", "device = \"NPU\"\n", "\n", - "core.get_property(device, \"FULL_DEVICE_NAME\")" + "core.get_property(device, props.device.full_name)" ] }, { @@ -206,7 +209,7 @@ "id": "70889c34-74f8-4a7a-b23d-166311c7c02d", "metadata": {}, "source": [ - "Each device also has a specific property called ```SUPPORTED_PROPERTIES```, that enables viewing all the available properties in the device. We can check the value for each property by simply looping through the dictionary returned by ```core.get_property(\"NPU\", \"SUPPORTED_PROPERTIES\")``` and then querying for that property." + "Each device also has a specific property called ```SUPPORTED_PROPERTIES```, that enables viewing all the available properties in the device. We can check the value for each property by simply looping through the dictionary returned by ```core.get_property(\"NPU\", props.supported_properties)``` and then querying for that property." ] }, { @@ -217,7 +220,7 @@ "outputs": [], "source": [ "print(f\"{device} SUPPORTED_PROPERTIES:\\n\")\n", - "supported_properties = core.get_property(device, \"SUPPORTED_PROPERTIES\")\n", + "supported_properties = core.get_property(device, props.supported_properties)\n", "indent = len(max(supported_properties, key=len))\n", "\n", "for property_key in supported_properties:\n", @@ -527,7 +530,7 @@ "core = ov.Core()\n", "\n", "# Set cache folder\n", - "core.set_property({\"CACHE_DIR\": cache_folder})\n", + "core.set_property({props.cache_dir(): cache_folder})\n", "\n", "# Compile the model\n", "model = core.read_model(model=model_path)\n", @@ -538,7 +541,7 @@ "core = ov.Core()\n", "\n", "# Set cache folder\n", - "core.set_property({\"CACHE_DIR\": cache_folder})\n", + "core.set_property({props.cache_dir(): cache_folder})\n", "\n", "# Compile the model as before\n", "model = core.read_model(model=model_path)\n", @@ -606,7 +609,7 @@ "id": "1cccd1b5-4d5a-41f3-8d8a-4ee0bc235a9e", "metadata": {}, "source": [ - "To use the \"LATENCY\" performance hint, add `{\"PERFORMANCE_HINT\": \"LATENCY\"}` when compiling the model as shown below. For NPU, this automatically minimizes the batch size and number of parallel streams such that all of the compute resources can focus on completing a single inference as fast as possible." + "To use the \"LATENCY\" performance hint, add `{hints.performance_mode(): hints.PerformanceMode.LATENCY}` when compiling the model as shown below. For NPU, this automatically minimizes the batch size and number of parallel streams such that all of the compute resources can focus on completing a single inference as fast as possible." ] }, { @@ -616,7 +619,10 @@ "metadata": {}, "outputs": [], "source": [ - "compiled_model = core.compile_model(model, device, {\"PERFORMANCE_HINT\": \"LATENCY\"})" + "import openvino.properties.hint as hints\n", + "\n", + "\n", + "compiled_model = core.compile_model(model, device, {hints.performance_mode(): hints.PerformanceMode.LATENCY})" ] }, { @@ -624,7 +630,7 @@ "id": "7ca1f3d8-202c-4a98-85bc-b66110120dfb", "metadata": {}, "source": [ - "To use the \"THROUGHPUT\" performance hint, add `{\"PERFORMANCE_HINT\": \"THROUGHPUT\"}` when compiling the model. For NPUs, this creates multiple processing streams to efficiently utilize all the execution cores and optimizes the batch size to fill the available memory." + "To use the \"THROUGHPUT\" performance hint, add `{hints.performance_mode(): hints.PerformanceMode.THROUGHPUT}` when compiling the model. For NPUs, this creates multiple processing streams to efficiently utilize all the execution cores and optimizes the batch size to fill the available memory." ] }, { @@ -634,7 +640,7 @@ "metadata": {}, "outputs": [], "source": [ - "compiled_model = core.compile_model(model, device, {\"PERFORMANCE_HINT\": \"THROUGHPUT\"})" + "compiled_model = core.compile_model(model, device, {hints.performance_mode(): hints.PerformanceMode.THROUGHPUT})" ] }, { diff --git a/notebooks/internvl2/gradio_helper.py b/notebooks/internvl2/gradio_helper.py index e95307761a2..24414fe23a4 100644 --- a/notebooks/internvl2/gradio_helper.py +++ b/notebooks/internvl2/gradio_helper.py @@ -377,7 +377,6 @@ def bot( max_new_tokens, max_input_tiles, ): - streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) generation_config = { "num_beams": 1, diff --git a/notebooks/internvl2/internvl2_helper.py b/notebooks/internvl2/internvl2_helper.py index 96c72b5aeab..9ec48083177 100644 --- a/notebooks/internvl2/internvl2_helper.py +++ b/notebooks/internvl2/internvl2_helper.py @@ -254,7 +254,6 @@ def convert_internvl2_model(model_id, output_dir, quantization_config): print("✅ Input embedding model successfully converted") if not image_embed_path.exists(): - print("⌛ Convert Image embedding model") model.forward = model.extract_feature @@ -535,7 +534,6 @@ def __call__(self, *args, **kwargs): class OVInternVLChatModel: - def __init__(self, model_dir: Path, device: str): config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True) image_size = config.force_image_size or config.vision_config.image_size @@ -564,7 +562,6 @@ def forward( past_key_values: Optional[List[torch.FloatTensor]] = None, use_cache: Optional[bool] = None, ) -> Union[Tuple, CausalLMOutputWithPast]: - image_flags = image_flags.squeeze(-1) input_embeds = self.language_model.embed_tokens(input_ids) @@ -678,7 +675,6 @@ def chat( IMG_CONTEXT_TOKEN="", verbose=False, ): - from conversation import get_conv_template if history is None and pixel_values is not None and "" not in question: @@ -740,7 +736,6 @@ def generate( return_dict: Optional[bool] = None, **generate_kwargs, ) -> torch.LongTensor: - assert self.img_context_token_id is not None if pixel_values is not None: if visual_features is not None: diff --git a/notebooks/latent-consistency-models-image-generation/latent-consistency-models-optimum-demo.ipynb b/notebooks/latent-consistency-models-image-generation/latent-consistency-models-optimum-demo.ipynb index 50f25a1d4bf..41b2904a6dd 100644 --- a/notebooks/latent-consistency-models-image-generation/latent-consistency-models-optimum-demo.ipynb +++ b/notebooks/latent-consistency-models-image-generation/latent-consistency-models-optimum-demo.ipynb @@ -116,12 +116,14 @@ ], "source": [ "import openvino as ov\n", + "import openvino.properties as props\n", + "\n", "\n", "core = ov.Core()\n", "devices = core.available_devices\n", "\n", "for device in devices:\n", - " device_name = core.get_property(device, \"FULL_DEVICE_NAME\")\n", + " device_name = core.get_property(device, props.device.full_name)\n", " print(f\"{device}: {device_name}\")" ] }, diff --git a/notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb b/notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb index 5a694cda366..7c7e4e9fdd6 100644 --- a/notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb +++ b/notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb @@ -282,7 +282,12 @@ "source": [ "from qwen_agent.llm import get_chat_model\n", "\n", - "ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n", + "import openvino.properties as props\n", + "import openvino.properties.hint as hints\n", + "import openvino.properties.streams as streams\n", + "\n", + "\n", + "ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): \"1\", props.cache_dir(): \"\"}\n", "llm_cfg = {\n", " \"ov_model_dir\": model_path,\n", " \"model_type\": \"openvino\",\n", @@ -313,9 +318,9 @@ "ov_config = {\n", " \"KV_CACHE_PRECISION\": \"u8\",\n", " \"DYNAMIC_QUANTIZATION_GROUP_SIZE\": \"32\",\n", - " \"PERFORMANCE_HINT\": \"LATENCY\",\n", - " \"NUM_STREAMS\": \"1\",\n", - " \"CACHE_DIR\": \"\",\n", + " hints.performance_mode(): hints.PerformanceMode.LATENCY,\n", + " streams.num(): \"\",\n", + " props.cache_dir(): \"\",\n", "}" ] }, diff --git a/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb b/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb index 36456482115..ce8d5893b7c 100644 --- a/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb +++ b/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb @@ -90,6 +90,7 @@ " \"--extra-index-url\",\n", " \"https://download.pytorch.org/whl/cpu\",\n", " \"llama-index\",\n", + " \"llama-index-llms-huggingface==0.3.3\", # pin to keep compatibility due to https://github.com/run-llama/llama_index/commit/f037de8d0471b37f9c4069ebef5dfb329633d2c6\n", " \"llama-index-readers-file\",\n", " \"llama-index-llms-openvino>=0.2.2\",\n", " \"llama-index-embeddings-openvino>=0.2.0\",\n", @@ -301,7 +302,12 @@ "source": [ "from llama_index.llms.openvino import OpenVINOLLM\n", "\n", - "ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n", + "import openvino.properties as props\n", + "import openvino.properties.hint as hints\n", + "import openvino.properties.streams as streams\n", + "\n", + "\n", + "ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): \"1\", props.cache_dir(): \"\"}\n", "\n", "\n", "def phi_completion_to_prompt(completion):\n", diff --git a/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb b/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb index 41b68752c99..d92e39294f0 100644 --- a/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb +++ b/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb @@ -380,6 +380,10 @@ "from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline\n", "from transformers.generation.stopping_criteria import StoppingCriteriaList, StoppingCriteria\n", "\n", + "import openvino.properties as props\n", + "import openvino.properties.hint as hints\n", + "import openvino.properties.streams as streams\n", + "\n", "\n", "class StopSequenceCriteria(StoppingCriteria):\n", " \"\"\"\n", @@ -403,7 +407,7 @@ " return any(decoded_output.endswith(stop_sequence) for stop_sequence in self.stop_sequences)\n", "\n", "\n", - "ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n", + "ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): \"1\", props.cache_dir(): \"\"}\n", "stop_tokens = [\"Observation:\"]\n", "\n", "ov_llm = HuggingFacePipeline.from_model_id(\n", @@ -442,9 +446,9 @@ "ov_config = {\n", " \"KV_CACHE_PRECISION\": \"u8\",\n", " \"DYNAMIC_QUANTIZATION_GROUP_SIZE\": \"32\",\n", - " \"PERFORMANCE_HINT\": \"LATENCY\",\n", - " \"NUM_STREAMS\": \"1\",\n", - " \"CACHE_DIR\": \"\",\n", + " hints.performance_mode(): hints.PerformanceMode.LATENCY,\n", + " streams.num(): \"1\",\n", + " props.cache_dir(): \"\",\n", "}" ] }, @@ -617,10 +621,11 @@ "source": [ "from langchain_community.tools import WikipediaQueryRun\n", "from langchain_community.utilities import WikipediaAPIWrapper\n", - "from langchain_core.pydantic_v1 import BaseModel, Field\n", "from langchain_core.callbacks import CallbackManagerForToolRun\n", "from typing import Optional\n", "\n", + "from pydantic import BaseModel, Field\n", + "\n", "\n", "class WikipediaQueryRunWrapper(WikipediaQueryRun):\n", " def _run(\n", diff --git a/notebooks/llm-chatbot/llm-chatbot.ipynb b/notebooks/llm-chatbot/llm-chatbot.ipynb index 6d53e7200df..0acfaf39044 100644 --- a/notebooks/llm-chatbot/llm-chatbot.ipynb +++ b/notebooks/llm-chatbot/llm-chatbot.ipynb @@ -928,6 +928,11 @@ "from transformers import AutoConfig, AutoTokenizer\n", "from optimum.intel.openvino import OVModelForCausalLM\n", "\n", + "import openvino.properties as props\n", + "import openvino.properties.hint as hints\n", + "import openvino.properties.streams as streams\n", + "\n", + "\n", "if model_to_run.value == \"INT4\":\n", " model_dir = int4_model_dir\n", "elif model_to_run.value == \"INT8\":\n", @@ -936,7 +941,7 @@ " model_dir = fp16_model_dir\n", "print(f\"Loading model from {model_dir}\")\n", "\n", - "ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n", + "ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): \"1\", props.cache_dir(): \"\"}\n", "\n", "if \"GPU\" in device.value and \"qwen2-7b-instruct\" in model_id.value:\n", " ov_config[\"GPU_ENABLE_SDPA_OPTIMIZATION\"] = \"NO\"\n", diff --git a/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb b/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb index cd90aaf5a2f..527834c7b7d 100644 --- a/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb +++ b/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb @@ -1293,6 +1293,11 @@ "source": [ "from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline\n", "\n", + "import openvino.properties as props\n", + "import openvino.properties.hint as hints\n", + "import openvino.properties.streams as streams\n", + "\n", + "\n", "if model_to_run.value == \"INT4\":\n", " model_dir = int4_model_dir\n", "elif model_to_run.value == \"INT8\":\n", @@ -1301,7 +1306,7 @@ " model_dir = fp16_model_dir\n", "print(f\"Loading model from {model_dir}\")\n", "\n", - "ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n", + "ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): \"1\", props.cache_dir(): \"\"}\n", "\n", "if \"GPU\" in llm_device.value and \"qwen2-7b-instruct\" in llm_model_id.value:\n", " ov_config[\"GPU_ENABLE_SDPA_OPTIMIZATION\"] = \"NO\"\n", diff --git a/notebooks/llm-rag-llamaindex/llm-rag-llamaindex.ipynb b/notebooks/llm-rag-llamaindex/llm-rag-llamaindex.ipynb index 524639e4dfd..4e46df09a1f 100644 --- a/notebooks/llm-rag-llamaindex/llm-rag-llamaindex.ipynb +++ b/notebooks/llm-rag-llamaindex/llm-rag-llamaindex.ipynb @@ -1271,6 +1271,11 @@ "source": [ "from llama_index.llms.openvino import OpenVINOLLM\n", "\n", + "import openvino.properties as props\n", + "import openvino.properties.hint as hints\n", + "import openvino.properties.streams as streams\n", + "\n", + "\n", "if model_to_run.value == \"INT4\":\n", " model_dir = int4_model_dir\n", "elif model_to_run.value == \"INT8\":\n", @@ -1279,7 +1284,7 @@ " model_dir = fp16_model_dir\n", "print(f\"Loading model from {model_dir}\")\n", "\n", - "ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n", + "ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): \"1\", props.cache_dir(): \"\"}\n", "\n", "stop_tokens = llm_model_configuration.get(\"stop_tokens\")\n", "completion_to_prompt = llm_model_configuration.get(\"completion_to_prompt\")\n", diff --git a/notebooks/openvino-api/openvino-api.ipynb b/notebooks/openvino-api/openvino-api.ipynb index 2d0a205885f..de3558451f1 100644 --- a/notebooks/openvino-api/openvino-api.ipynb +++ b/notebooks/openvino-api/openvino-api.ipynb @@ -114,10 +114,13 @@ } ], "source": [ + "import openvino.properties as props\n", + "\n", + "\n", "devices = core.available_devices\n", "\n", "for device in devices:\n", - " device_name = core.get_property(device, \"FULL_DEVICE_NAME\")\n", + " device_name = core.get_property(device, props.device.full_name)\n", " print(f\"{device}: {device_name}\")" ] }, diff --git a/notebooks/paddle-to-openvino/paddle-to-openvino-classification.ipynb b/notebooks/paddle-to-openvino/paddle-to-openvino-classification.ipynb index e625c3e4722..ce5e7d74c0a 100644 --- a/notebooks/paddle-to-openvino/paddle-to-openvino-classification.ipynb +++ b/notebooks/paddle-to-openvino/paddle-to-openvino-classification.ipynb @@ -537,12 +537,15 @@ } ], "source": [ + "import openvino.properties as props\n", + "\n", + "\n", "# Show device information\n", "core = ov.Core()\n", "devices = core.available_devices\n", "\n", "for device_name in devices:\n", - " device_full_name = core.get_property(device_name, \"FULL_DEVICE_NAME\")\n", + " device_full_name = core.get_property(device_name, props.device.full_name)\n", " print(f\"{device_name}: {device_full_name}\")" ] }, diff --git a/notebooks/pixart/pixart.ipynb b/notebooks/pixart/pixart.ipynb index a9972fac7c4..71408c28740 100644 --- a/notebooks/pixart/pixart.ipynb +++ b/notebooks/pixart/pixart.ipynb @@ -247,7 +247,6 @@ " self.transformer = transformer\n", "\n", " def forward(self, hidden_states=None, timestep=None, encoder_hidden_states=None, encoder_attention_mask=None, resolution=None, aspect_ratio=None):\n", - "\n", " return self.transformer.forward(\n", " hidden_states,\n", " timestep=timestep,\n", @@ -289,7 +288,6 @@ "outputs": [], "source": [ "class VAEDecoderWrapper(torch.nn.Module):\n", - "\n", " def __init__(self, vae):\n", " super().__init__()\n", " self.vae = vae\n", diff --git a/notebooks/pose-estimation-webcam/pose-estimation.ipynb b/notebooks/pose-estimation-webcam/pose-estimation.ipynb index 15fb4502c88..66213ac6118 100644 --- a/notebooks/pose-estimation-webcam/pose-estimation.ipynb +++ b/notebooks/pose-estimation-webcam/pose-estimation.ipynb @@ -155,12 +155,15 @@ "metadata": {}, "outputs": [], "source": [ + "import openvino.properties.hint as hints\n", + "\n", + "\n", "# Initialize OpenVINO Runtime\n", "core = ov.Core()\n", "# Read the network from a file.\n", "model = core.read_model(model_path)\n", "# Let the AUTO device decide where to load the model (you can use CPU, GPU as well).\n", - "compiled_model = core.compile_model(model=model, device_name=device.value, config={\"PERFORMANCE_HINT\": \"LATENCY\"})\n", + "compiled_model = core.compile_model(model=model, device_name=device.value, config={hints.performance_mode(): hints.PerformanceMode.LATENCY})\n", "\n", "# Get the input and output names of nodes.\n", "input_layer = compiled_model.input(0)\n", diff --git a/notebooks/pytorch-post-training-quantization-nncf/pytorch-post-training-quantization-nncf.ipynb b/notebooks/pytorch-post-training-quantization-nncf/pytorch-post-training-quantization-nncf.ipynb index 28fbfb28b3b..eb5ccbe456b 100644 --- a/notebooks/pytorch-post-training-quantization-nncf/pytorch-post-training-quantization-nncf.ipynb +++ b/notebooks/pytorch-post-training-quantization-nncf/pytorch-post-training-quantization-nncf.ipynb @@ -957,11 +957,14 @@ } ], "source": [ + "import openvino.properties as props\n", + "\n", + "\n", "core = ov.Core()\n", "devices = core.available_devices\n", "\n", "for device_name in devices:\n", - " device_full_name = core.get_property(device_name, \"FULL_DEVICE_NAME\")\n", + " device_full_name = core.get_property(device_name, props.device.full_name)\n", " print(f\"{device_name}: {device_full_name}\")" ] } diff --git a/notebooks/pytorch-quantization-aware-training/pytorch-quantization-aware-training.ipynb b/notebooks/pytorch-quantization-aware-training/pytorch-quantization-aware-training.ipynb index 59e074197a5..2d5acc6477d 100644 --- a/notebooks/pytorch-quantization-aware-training/pytorch-quantization-aware-training.ipynb +++ b/notebooks/pytorch-quantization-aware-training/pytorch-quantization-aware-training.ipynb @@ -1108,8 +1108,11 @@ } ], "source": [ + "import openvino.properties as props\n", + "\n", + "\n", "core = ov.Core()\n", - "core.get_property(device.value, \"FULL_DEVICE_NAME\")" + "core.get_property(device.value, props.device.full_name)" ] } ], diff --git a/notebooks/pytorch-quantization-sparsity-aware-training/pytorch-quantization-sparsity-aware-training.ipynb b/notebooks/pytorch-quantization-sparsity-aware-training/pytorch-quantization-sparsity-aware-training.ipynb index f5e3ad999a4..8d32cd866cb 100644 --- a/notebooks/pytorch-quantization-sparsity-aware-training/pytorch-quantization-sparsity-aware-training.ipynb +++ b/notebooks/pytorch-quantization-sparsity-aware-training/pytorch-quantization-sparsity-aware-training.ipynb @@ -707,7 +707,10 @@ }, "outputs": [], "source": [ - "core.get_property(device.value, \"FULL_DEVICE_NAME\")" + "import openvino.properties as props\n", + "\n", + "\n", + "core.get_property(device.value, props.device.full_name)" ] } ], diff --git a/notebooks/pytorch-to-openvino/pytorch-onnx-to-openvino.ipynb b/notebooks/pytorch-to-openvino/pytorch-onnx-to-openvino.ipynb index c8da17ee224..64bd4a1efe5 100644 --- a/notebooks/pytorch-to-openvino/pytorch-onnx-to-openvino.ipynb +++ b/notebooks/pytorch-to-openvino/pytorch-onnx-to-openvino.ipynb @@ -843,9 +843,12 @@ } ], "source": [ + "import openvino.properties as props\n", + "\n", + "\n", "devices = core.available_devices\n", "for device in devices:\n", - " device_name = core.get_property(device, \"FULL_DEVICE_NAME\")\n", + " device_name = core.get_property(device, props.device.full_name)\n", " print(f\"{device}: {device_name}\")" ] }, diff --git a/notebooks/qwen2-audio/gradio_helper.py b/notebooks/qwen2-audio/gradio_helper.py index 1e942e9f741..9ca1999859d 100644 --- a/notebooks/qwen2-audio/gradio_helper.py +++ b/notebooks/qwen2-audio/gradio_helper.py @@ -6,7 +6,6 @@ def make_demo(model, processor): - def add_text(chatbot, task_history, input): text_content = input.text content = [] diff --git a/notebooks/qwen2-audio/ov_qwen2_audio_helper.py b/notebooks/qwen2-audio/ov_qwen2_audio_helper.py index 3dc43452ca3..bc9e1d2004c 100644 --- a/notebooks/qwen2-audio/ov_qwen2_audio_helper.py +++ b/notebooks/qwen2-audio/ov_qwen2_audio_helper.py @@ -676,7 +676,6 @@ def forward( use_cache: bool = True, return_dict: bool = True, ) -> Union[Tuple, Qwen2AudioCausalLMOutputWithPast]: - if input_features is not None: input_features = input_features feature_attention_mask = feature_attention_mask diff --git a/notebooks/qwen2-vl/gradio_helper.py b/notebooks/qwen2-vl/gradio_helper.py index ee940ac33d9..1e93738a101 100644 --- a/notebooks/qwen2-vl/gradio_helper.py +++ b/notebooks/qwen2-vl/gradio_helper.py @@ -71,7 +71,6 @@ def transform_messages(original_messages): def make_demo(model, processor): def call_local_model(model, processor, messages): - messages = transform_messages(messages) text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) @@ -92,7 +91,6 @@ def call_local_model(model, processor, messages): yield generated_text def create_predict_fn(): - def predict(_chatbot, task_history): chat_query = _chatbot[-1][0] query = task_history[-1][0] @@ -131,7 +129,6 @@ def predict(_chatbot, task_history): return predict def create_regenerate_fn(): - def regenerate(_chatbot, task_history): if not task_history: return _chatbot diff --git a/notebooks/qwen2-vl/ov_qwen2_vl.py b/notebooks/qwen2-vl/ov_qwen2_vl.py index de578cdb5ee..e944c3b8284 100644 --- a/notebooks/qwen2-vl/ov_qwen2_vl.py +++ b/notebooks/qwen2-vl/ov_qwen2_vl.py @@ -252,7 +252,6 @@ def convert_qwen2vl_model(model_id, output_dir, quantization_config): print("✅ Input embedding model successfully converted") if not image_embed_path.exists() or not image_embed_merger_path.exists(): - print("⌛ Convert Image embedding model") vision_embed_tokens = model.visual @@ -269,7 +268,6 @@ def image_embed_forward(self, hidden_states: torch.Tensor, attention_mask: torch return self.merger(hidden_states) def sdpa_attn_forward(self, hidden_states: torch.Tensor, attention_mask: torch.Tensor, rotary_pos_emb: torch.Tensor = None) -> torch.Tensor: - from transformers.models.qwen2_vl.modeling_qwen2_vl import apply_rotary_pos_emb_vision seq_length = hidden_states.shape[0] diff --git a/notebooks/stable-diffusion-v2/stable-diffusion-v2-optimum-demo.ipynb b/notebooks/stable-diffusion-v2/stable-diffusion-v2-optimum-demo.ipynb index 8bb0265a9a5..d40345164c4 100644 --- a/notebooks/stable-diffusion-v2/stable-diffusion-v2-optimum-demo.ipynb +++ b/notebooks/stable-diffusion-v2/stable-diffusion-v2-optimum-demo.ipynb @@ -105,12 +105,14 @@ ], "source": [ "import openvino as ov\n", + "import openvino.properties as props\n", + "\n", "\n", "core = ov.Core()\n", "devices = core.available_devices\n", "\n", "for device in devices:\n", - " device_name = core.get_property(device, \"FULL_DEVICE_NAME\")\n", + " device_name = core.get_property(device, props.device.full_name)\n", " print(f\"{device}: {device_name}\")" ] }, diff --git a/notebooks/stable-video-diffusion/stable-video-diffusion.ipynb b/notebooks/stable-video-diffusion/stable-video-diffusion.ipynb index 91ce767b3ef..43e92786ca9 100644 --- a/notebooks/stable-video-diffusion/stable-video-diffusion.ipynb +++ b/notebooks/stable-video-diffusion/stable-video-diffusion.ipynb @@ -813,7 +813,6 @@ " ks = ks[0] + 1, ks[1]\n", "\n", " if (ks[1] % 2) == 0:\n", - "\n", " ks = ks[0], ks[1] + 1\n", "\n", " input = _gaussian_blur2d(input, ks, sigmas)\n", @@ -877,7 +876,6 @@ " x = (torch.arange(window_size, device=sigma.device, dtype=sigma.dtype) - window_size // 2).expand(batch_size, -1)\n", "\n", " if window_size % 2 == 0:\n", - "\n", " x = x + 0.5\n", "\n", " gauss = torch.exp(-x.pow(2.0) / (2 * sigma.pow(2.0)))\n", diff --git a/notebooks/tensorflow-quantization-aware-training/tensorflow-quantization-aware-training.ipynb b/notebooks/tensorflow-quantization-aware-training/tensorflow-quantization-aware-training.ipynb index d4f0ed79417..e918367756b 100644 --- a/notebooks/tensorflow-quantization-aware-training/tensorflow-quantization-aware-training.ipynb +++ b/notebooks/tensorflow-quantization-aware-training/tensorflow-quantization-aware-training.ipynb @@ -690,8 +690,11 @@ } ], "source": [ + "import openvino.properties as props\n", + "\n", + "\n", "core = ov.Core()\n", - "core.get_property(device.value, \"FULL_DEVICE_NAME\")" + "core.get_property(device.value, props.device.full_name)" ] } ], diff --git a/notebooks/vision-monodepth/vision-monodepth.ipynb b/notebooks/vision-monodepth/vision-monodepth.ipynb index 5710746ca09..280e81ef03f 100644 --- a/notebooks/vision-monodepth/vision-monodepth.ipynb +++ b/notebooks/vision-monodepth/vision-monodepth.ipynb @@ -297,12 +297,15 @@ }, "outputs": [], "source": [ + "import openvino.properties as props\n", + "\n", + "\n", "# Create cache folder\n", "cache_folder = Path(\"cache\")\n", "cache_folder.mkdir(exist_ok=True)\n", "\n", "core = ov.Core()\n", - "core.set_property({\"CACHE_DIR\": cache_folder})\n", + "core.set_property({props.cache_dir(): cache_folder})\n", "model = core.read_model(model_xml_path)\n", "compiled_model = core.compile_model(model=model, device_name=device.value)\n", "\n", diff --git a/supplementary_materials/notebooks/phi3_chatbot_demo.ipynb b/supplementary_materials/notebooks/phi3_chatbot_demo.ipynb index 9d118e82cfe..9fc218a7d91 100644 --- a/supplementary_materials/notebooks/phi3_chatbot_demo.ipynb +++ b/supplementary_materials/notebooks/phi3_chatbot_demo.ipynb @@ -130,13 +130,17 @@ "metadata": {}, "outputs": [], "source": [ + "import openvino.properties as props\n", + "import openvino.properties.hint as hints\n", + "\n", + "\n", "# Load kwargs\n", "load_kwargs = {\n", " \"device\": device,\n", " \"ov_config\": {\n", - " \"PERFORMANCE_HINT\": \"LATENCY\",\n", - " \"INFERENCE_PRECISION_HINT\": precision,\n", - " \"CACHE_DIR\": os.path.join(save_name, \"model_cache\"), # OpenVINO will use this directory as cache\n", + " hints.performance_mode(): hints.PerformanceMode.LATENCY,\n", + " hints.inference_precision: precision,\n", + " props.cache_dir(): os.path.join(save_name, \"model_cache\"), # OpenVINO will use this directory as cache\n", " },\n", " \"compile\": False,\n", " \"quantization_config\": quantization_config,\n", diff --git a/supplementary_materials/notebooks/phi3_rag_on_client.ipynb b/supplementary_materials/notebooks/phi3_rag_on_client.ipynb index 9b21571b947..7a5682ac98a 100644 --- a/supplementary_materials/notebooks/phi3_rag_on_client.ipynb +++ b/supplementary_materials/notebooks/phi3_rag_on_client.ipynb @@ -280,6 +280,9 @@ "from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig\n", "from functools import wraps\n", "\n", + "import openvino.properties as props\n", + "import openvino.properties.hint as hints\n", + "\n", "\n", "model_name = \"microsoft/Phi-3-mini-4k-instruct\"\n", "save_name = model_name.split(\"/\")[-1] + \"_openvino\"\n", @@ -295,9 +298,9 @@ "load_kwargs = {\n", " \"device\": device,\n", " \"ov_config\": {\n", - " \"PERFORMANCE_HINT\": \"LATENCY\",\n", - " \"INFERENCE_PRECISION_HINT\": precision,\n", - " \"CACHE_DIR\": os.path.join(save_name, \"model_cache\"), # OpenVINO will use this directory as cache\n", + " hints.performance_mode(): hints.PerformanceMode.LATENCY,\n", + " hints.inference_precision: precision,\n", + " props.cache_dir(): os.path.join(save_name, \"model_cache\"), # OpenVINO will use this directory as cache\n", " },\n", " \"quantization_config\": quantization_config,\n", " \"trust_remote_code\": True,\n", diff --git a/supplementary_materials/qwen2/chat.py b/supplementary_materials/qwen2/chat.py index eb963524a29..b426a901cd2 100644 --- a/supplementary_materials/qwen2/chat.py +++ b/supplementary_materials/qwen2/chat.py @@ -5,6 +5,10 @@ from optimum.intel.openvino import OVModelForCausalLM from transformers import AutoTokenizer, AutoConfig, TextIteratorStreamer, StoppingCriteriaList, StoppingCriteria +import openvino.properties as props +import openvino.properties.hint as hints +import openvino.properties.streams as streams + class StopOnTokens(StoppingCriteria): def __init__(self, token_ids): @@ -26,7 +30,7 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwa args = parser.parse_args() model_dir = args.model_path - ov_config = {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": ""} + ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""} tokenizer = AutoTokenizer.from_pretrained(model_dir) print("====Compiling model====")