diff --git a/.github/workflows/llm_bench-python.yml b/.github/workflows/llm_bench-python.yml index 3d31649cea..6903882ca0 100644 --- a/.github/workflows/llm_bench-python.yml +++ b/.github/workflows/llm_bench-python.yml @@ -61,6 +61,7 @@ jobs: SRC_DIR: ${{ github.workspace }} LLM_BENCH_PYPATH: ${{ github.workspace }}/tools/llm_bench WWB_PATH: ${{ github.workspace }}/tools/who_what_benchmark + OPENVINO_LOG_LEVEL: 3 steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 diff --git a/src/README.md b/src/README.md index c90bc8f4e4..6466b431d0 100644 --- a/src/README.md +++ b/src/README.md @@ -403,3 +403,7 @@ For information on how OpenVINO™ GenAI works, refer to the [How It Works Secti ## Supported Models For a list of supported models, refer to the [Supported Models Section](./docs/SUPPORTED_MODELS.md). + +## Debug Log + +For using debug log, refer to [DEBUG Log](./doc/DEBUG_LOG.md). diff --git a/src/cpp/src/continuous_batching_impl.cpp b/src/cpp/src/continuous_batching_impl.cpp index 1e42f5b2d9..bf0c979d39 100644 --- a/src/cpp/src/continuous_batching_impl.cpp +++ b/src/cpp/src/continuous_batching_impl.cpp @@ -46,7 +46,9 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::init( const ov::AnyMap& properties, const DeviceConfig& device_config, ov::Core& core) { - ov::InferRequest infer_request = core.compile_model(model, device_config.get_device(), properties).create_infer_request(); + auto compiled_model = core.compile_model(model, device_config.get_device(), properties); + ov::genai::utils::print_compiled_model_properties(compiled_model, "LLM with Paged Attention"); + ov::InferRequest infer_request = compiled_model.create_infer_request(); // setup KV caches m_cache_manager = std::make_shared(device_config, core); diff --git a/src/cpp/src/image_generation/models/autoencoder_kl.cpp b/src/cpp/src/image_generation/models/autoencoder_kl.cpp index e0d6a44189..d3dd7324ee 100644 --- a/src/cpp/src/image_generation/models/autoencoder_kl.cpp +++ b/src/cpp/src/image_generation/models/autoencoder_kl.cpp @@ -212,12 +212,14 @@ AutoencoderKL& AutoencoderKL::compile(const std::string& device, const ov::AnyMa if (m_encoder_model) { ov::CompiledModel encoder_compiled_model = core.compile_model(m_encoder_model, device, properties); + ov::genai::utils::print_compiled_model_properties(encoder_compiled_model, "Auto encoder KL encoder model"); m_encoder_request = encoder_compiled_model.create_infer_request(); // release the original model m_encoder_model.reset(); } ov::CompiledModel decoder_compiled_model = core.compile_model(m_decoder_model, device, properties); + ov::genai::utils::print_compiled_model_properties(decoder_compiled_model, "Auto encoder KL decoder model"); m_decoder_request = decoder_compiled_model.create_infer_request(); // release the original model m_decoder_model.reset(); diff --git a/src/cpp/src/image_generation/models/clip_text_model.cpp b/src/cpp/src/image_generation/models/clip_text_model.cpp index d2dab30bcf..efbc840d4f 100644 --- a/src/cpp/src/image_generation/models/clip_text_model.cpp +++ b/src/cpp/src/image_generation/models/clip_text_model.cpp @@ -97,6 +97,7 @@ CLIPTextModel& CLIPTextModel::compile(const std::string& device, const ov::AnyMa } else { compiled_model = core.compile_model(m_model, device, properties); } + ov::genai::utils::print_compiled_model_properties(compiled_model, "Clip Text model"); m_request = compiled_model.create_infer_request(); // release the original model m_model.reset(); diff --git a/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp b/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp index 13c7f5a442..982800a701 100644 --- a/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp +++ b/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp @@ -88,6 +88,7 @@ CLIPTextModelWithProjection& CLIPTextModelWithProjection::compile(const std::str } else { compiled_model = core.compile_model(m_model, device, properties); } + ov::genai::utils::print_compiled_model_properties(compiled_model, "Clip Text with projection model"); m_request = compiled_model.create_infer_request(); // release the original model m_model.reset(); diff --git a/src/cpp/src/image_generation/models/flux_transformer_2d_model.cpp b/src/cpp/src/image_generation/models/flux_transformer_2d_model.cpp index 6b28b116b0..b09f099655 100644 --- a/src/cpp/src/image_generation/models/flux_transformer_2d_model.cpp +++ b/src/cpp/src/image_generation/models/flux_transformer_2d_model.cpp @@ -108,6 +108,7 @@ FluxTransformer2DModel& FluxTransformer2DModel::reshape(int batch_size, FluxTransformer2DModel& FluxTransformer2DModel::compile(const std::string& device, const ov::AnyMap& properties) { OPENVINO_ASSERT(m_model, "Model has been already compiled. Cannot re-compile already compiled model"); ov::CompiledModel compiled_model = utils::singleton_core().compile_model(m_model, device, properties); + ov::genai::utils::print_compiled_model_properties(compiled_model, "Flux Transformer 2D model"); m_request = compiled_model.create_infer_request(); // release the original model m_model.reset(); diff --git a/src/cpp/src/image_generation/models/sd3_transformer_2d_model.cpp b/src/cpp/src/image_generation/models/sd3_transformer_2d_model.cpp index 70dddb0476..33771f2316 100644 --- a/src/cpp/src/image_generation/models/sd3_transformer_2d_model.cpp +++ b/src/cpp/src/image_generation/models/sd3_transformer_2d_model.cpp @@ -105,6 +105,7 @@ SD3Transformer2DModel& SD3Transformer2DModel::reshape(int batch_size, SD3Transformer2DModel& SD3Transformer2DModel::compile(const std::string& device, const ov::AnyMap& properties) { OPENVINO_ASSERT(m_model, "Model has been already compiled. Cannot re-compile already compiled model"); ov::CompiledModel compiled_model = utils::singleton_core().compile_model(m_model, device, properties); + ov::genai::utils::print_compiled_model_properties(compiled_model, "SD3 Transformer 2D model"); m_request = compiled_model.create_infer_request(); // release the original model m_model.reset(); diff --git a/src/cpp/src/image_generation/models/t5_encoder_model.cpp b/src/cpp/src/image_generation/models/t5_encoder_model.cpp index 8c6df34667..21df456d46 100644 --- a/src/cpp/src/image_generation/models/t5_encoder_model.cpp +++ b/src/cpp/src/image_generation/models/t5_encoder_model.cpp @@ -63,6 +63,7 @@ T5EncoderModel& T5EncoderModel::compile(const std::string& device, const ov::Any ov::Core core = utils::singleton_core(); ov::CompiledModel compiled_model; compiled_model = core.compile_model(m_model, device, properties); + ov::genai::utils::print_compiled_model_properties(compiled_model, "T5 encoder model"); m_request = compiled_model.create_infer_request(); // release the original model m_model.reset(); diff --git a/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp b/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp index c8658a1c1a..6dc285f76d 100644 --- a/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp +++ b/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp @@ -20,6 +20,7 @@ class UNet2DConditionModel::UNetInferenceDynamic : public UNet2DConditionModel:: ov::Core core = utils::singleton_core(); ov::CompiledModel compiled_model = core.compile_model(model, device, properties); + ov::genai::utils::print_compiled_model_properties(compiled_model, "UNet 2D Condition dynamic model"); m_request = compiled_model.create_infer_request(); } diff --git a/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp b/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp index fcde31e9ee..7aa6f6301c 100644 --- a/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp +++ b/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp @@ -40,6 +40,7 @@ class UNet2DConditionModel::UNetInferenceStaticBS1 : public UNet2DConditionModel ov::Core core = utils::singleton_core(); ov::CompiledModel compiled_model = core.compile_model(model, device, properties); + ov::genai::utils::print_compiled_model_properties(compiled_model, "UNet 2D Condition batch-1 model"); for (int i = 0; i < m_native_batch_size; i++) { diff --git a/src/cpp/src/llm_pipeline.cpp b/src/cpp/src/llm_pipeline.cpp index f663b27dd9..6d9aae30fa 100644 --- a/src/cpp/src/llm_pipeline.cpp +++ b/src/cpp/src/llm_pipeline.cpp @@ -77,6 +77,7 @@ class StatefulLLMPipeline final : public LLMPipelineImplBase { const ov::genai::GenerationConfig& generation_config ) : LLMPipelineImplBase(tokenizer, generation_config) { ov::Core core; + ov::CompiledModel compiled_model; auto [core_plugin_config, plugin_config] = ov::genai::utils::split_core_compile_config(config); utils::slice_matmul_statefull_model(model); m_kv_cache_seq_length_axis = ov::genai::utils::get_seq_len_axis(model); @@ -84,10 +85,13 @@ class StatefulLLMPipeline final : public LLMPipelineImplBase { if (auto filtered_plugin_config = extract_adapters_from_properties(plugin_config, &m_generation_config.adapters)) { m_generation_config.adapters->set_tensor_name_prefix("base_model.model.model."); m_adapter_controller = AdapterController(model, *m_generation_config.adapters, device); // TODO: Make the prefix name configurable - m_model_runner = core.compile_model(model, device, *filtered_plugin_config).create_infer_request(); + compiled_model = core.compile_model(model, device, *filtered_plugin_config); + m_model_runner = compiled_model.create_infer_request(); } else { - m_model_runner = core.compile_model(model, device, plugin_config).create_infer_request(); + compiled_model = core.compile_model(model, device, plugin_config); + m_model_runner = compiled_model.create_infer_request(); } + ov::genai::utils::print_compiled_model_properties(compiled_model, "Stateful LLM model"); // If eos_token_id was not provided, take value if (m_generation_config.eos_token_id == -1) diff --git a/src/cpp/src/llm_pipeline_static.cpp b/src/cpp/src/llm_pipeline_static.cpp index cb83209b4b..090aed9650 100644 --- a/src/cpp/src/llm_pipeline_static.cpp +++ b/src/cpp/src/llm_pipeline_static.cpp @@ -777,12 +777,15 @@ void StaticLLMPipeline::setupAndCompileModels( set_npuw_cache_dir(prefill_config); set_npuw_cache_dir(generate_config); - m_kvcache_request = core.compile_model( + auto kv_compiled_model = core.compile_model( kvcache_model, device, generate_config - ).create_infer_request(); - m_prefill_request = core.compile_model( - prefill_model, device, prefill_config - ).create_infer_request(); + ); + ov::genai::utils::print_compiled_model_properties(kv_compiled_model, "Static LLM kv compiled model"); + m_kvcache_request = kv_compiled_model.create_infer_request(); + + auto prefill_compiled_model = core.compile_model(prefill_model, device, prefill_config); + m_prefill_request = prefill_compiled_model.create_infer_request(); + ov::genai::utils::print_compiled_model_properties(prefill_compiled_model, "Static LLM prefill compiled model"); } void StaticLLMPipeline::setupAndImportModels( diff --git a/src/cpp/src/lora_adapter.cpp b/src/cpp/src/lora_adapter.cpp index 5e8839513e..fd446ef708 100644 --- a/src/cpp/src/lora_adapter.cpp +++ b/src/cpp/src/lora_adapter.cpp @@ -637,7 +637,9 @@ class InferRequestSignatureCache { ov::Core core = ov::genai::utils::singleton_core(); auto model = std::make_shared(request_results, request_parameters); - rwb.request = core.compile_model(model, device).create_infer_request(); + auto compiled_model = core.compile_model(model, device); + ov::genai::utils::print_compiled_model_properties(compiled_model, "Infer Request Signature Cache"); + rwb.request = compiled_model.create_infer_request(); requests.emplace(signature, rwb); } diff --git a/src/cpp/src/tokenizer.cpp b/src/cpp/src/tokenizer.cpp index cff25f07f8..642236d32a 100644 --- a/src/cpp/src/tokenizer.cpp +++ b/src/cpp/src/tokenizer.cpp @@ -203,6 +203,7 @@ class Tokenizer::TokenizerImpl { manager.register_pass(); manager.run_passes(ov_tokenizer); m_tokenizer = core.compile_model(ov_tokenizer, device, properties); + ov::genai::utils::print_compiled_model_properties(m_tokenizer, "OV Tokenizer"); m_ireq_queue_tokenizer = std::make_unique>( m_tokenizer.get_property(ov::optimal_number_of_infer_requests), @@ -216,6 +217,7 @@ class Tokenizer::TokenizerImpl { manager_detok.register_pass(); manager_detok.run_passes(ov_detokenizer); m_detokenizer = core.compile_model(ov_detokenizer, device, properties); + ov::genai::utils::print_compiled_model_properties(m_detokenizer, "OV Detokenizer"); m_ireq_queue_detokenizer = std::make_unique>( m_detokenizer.get_property(ov::optimal_number_of_infer_requests), diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp index 3690920295..9fa14b7f9f 100644 --- a/src/cpp/src/utils.cpp +++ b/src/cpp/src/utils.cpp @@ -381,6 +381,43 @@ void trim_kv_cache(ov::InferRequest request, uint64_t remove_from_end, size_t se } } +void print_compiled_model_properties(ov::CompiledModel& compiled_Model, const char* model_title) { + // Specify the name of the environment variable + const char* env_var_name = "OPENVINO_LOG_LEVEL"; + const char* env_var_value = std::getenv(env_var_name); + + // Check if the environment variable was found + if (env_var_value != nullptr && atoi(env_var_value) > static_cast(ov::log::Level::WARNING)) { + // output of the actual settings that the device selected + auto supported_properties = compiled_Model.get_property(ov::supported_properties); + std::cout << "Model: " << model_title << std::endl; + for (const auto& cfg : supported_properties) { + if (cfg == ov::supported_properties) + continue; + auto prop = compiled_Model.get_property(cfg); + if (cfg == ov::device::properties) { + auto devices_properties = prop.as(); + for (auto& item : devices_properties) { + std::cout << " " << item.first << ": " << std::endl; + for (auto& item2 : item.second.as()) { + std::cout << " " << item2.first << ": " << item2.second.as() << std::endl; + } + } + } else { + std::cout << " " << cfg << ": " << prop.as() << std::endl; + } + } + + ov::Core core; + std::vector exeTargets; + exeTargets = compiled_Model.get_property(ov::execution_devices); + std::cout << "EXECUTION_DEVICES:" << std::endl; + for (const auto& device : exeTargets) { + std::cout << " " << device << ": " << core.get_property(device, ov::device::full_name) << std::endl; + } + } +} + } // namespace utils } // namespace genai } // namespace ov diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp index 57728cd0dc..5342ac427c 100644 --- a/src/cpp/src/utils.hpp +++ b/src/cpp/src/utils.hpp @@ -104,6 +104,8 @@ size_t get_seq_len_axis(std::shared_ptr model); void trim_kv_cache(ov::InferRequest request, uint64_t remove_from_end, size_t seq_length_axis, std::optional adapter_controller); +void print_compiled_model_properties(ov::CompiledModel& compiled_Model, const char* model_title); + } // namespace utils } // namespace genai } // namespace ov diff --git a/src/cpp/src/visual_language/embedding_model.cpp b/src/cpp/src/visual_language/embedding_model.cpp index 88ddfc39cd..307bdcebac 100644 --- a/src/cpp/src/visual_language/embedding_model.cpp +++ b/src/cpp/src/visual_language/embedding_model.cpp @@ -26,6 +26,7 @@ EmbeddingsModel::EmbeddingsModel(const std::filesystem::path& model_dir, merge_postprocess(m_model, scale_emb); ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties); + ov::genai::utils::print_compiled_model_properties(compiled_model, "text embeddings model"); m_request = compiled_model.create_infer_request(); } diff --git a/src/cpp/src/visual_language/inputs_embedder.cpp b/src/cpp/src/visual_language/inputs_embedder.cpp index dfdb1521ef..cf77dfce3c 100644 --- a/src/cpp/src/visual_language/inputs_embedder.cpp +++ b/src/cpp/src/visual_language/inputs_embedder.cpp @@ -259,9 +259,10 @@ class InputsEmbedderMiniCPM : public InputsEmbedder::IInputsEmbedder { const std::string& device, const ov::AnyMap device_config) : IInputsEmbedder(vlm_config, model_dir, device, device_config) { - m_resampler = utils::singleton_core().compile_model( - model_dir / "openvino_resampler_model.xml", device, device_config - ).create_infer_request(); + auto compiled_model = + utils::singleton_core().compile_model(model_dir / "openvino_resampler_model.xml", device, device_config); + ov::genai::utils::print_compiled_model_properties(compiled_model, "VLM resampler model"); + m_resampler = compiled_model.create_infer_request(); m_pos_embed_cache = get_2d_sincos_pos_embed(m_vlm_config.hidden_size, {70, 70}); } diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp index b8e89a8e04..1ce0cbf210 100644 --- a/src/cpp/src/visual_language/pipeline.cpp +++ b/src/cpp/src/visual_language/pipeline.cpp @@ -92,7 +92,7 @@ class ov::genai::VLMPipeline::VLMPipelineImpl { auto compiled_language_model = utils::singleton_core().compile_model( models_dir / "openvino_language_model.xml", device, properties ); - + ov::genai::utils::print_compiled_model_properties(compiled_language_model, "VLM language model"); auto language_model = compiled_language_model.get_runtime_model(); m_kv_cache_seq_length_axis = ov::genai::utils::get_seq_len_axis(language_model); diff --git a/src/cpp/src/visual_language/vision_encoder.cpp b/src/cpp/src/visual_language/vision_encoder.cpp index 0842524820..9f8f9b0498 100644 --- a/src/cpp/src/visual_language/vision_encoder.cpp +++ b/src/cpp/src/visual_language/vision_encoder.cpp @@ -648,10 +648,12 @@ ov::Tensor get_pixel_values_internvl(const ov::Tensor& image, const ProcessorCon VisionEncoder::VisionEncoder(const std::filesystem::path& model_dir, const VLMModelType model_type, const std::string& device, const ov::AnyMap device_config) : model_type(model_type) { - m_vision_encoder = utils::singleton_core().compile_model(model_dir / "openvino_vision_embeddings_model.xml", device, device_config).create_infer_request(); - m_processor_config = utils::from_config_json_if_exists( - model_dir, "preprocessor_config.json" - ); + auto compiled_model = utils::singleton_core().compile_model(model_dir / "openvino_vision_embeddings_model.xml", + device, + device_config); + ov::genai::utils::print_compiled_model_properties(compiled_model, "VLM vision embeddings model"); + m_vision_encoder = compiled_model.create_infer_request(); + m_processor_config = utils::from_config_json_if_exists(model_dir, "preprocessor_config.json"); } VisionEncoder::VisionEncoder( diff --git a/src/cpp/src/whisper_pipeline.cpp b/src/cpp/src/whisper_pipeline.cpp index 5c31d85fec..d472a20238 100644 --- a/src/cpp/src/whisper_pipeline.cpp +++ b/src/cpp/src/whisper_pipeline.cpp @@ -56,15 +56,18 @@ class WhisperPipeline::WhisperPipelineStatefulImpl : public WhisperPipeline::Whi auto [core_properties, compile_properties] = ov::genai::utils::split_core_compile_config(properties); core.set_property(core_properties); - m_models.encoder = - core.compile_model((models_path / "openvino_encoder_model.xml").string(), device, compile_properties) - .create_infer_request(); - m_models.decoder = - core.compile_model((models_path / "openvino_decoder_model.xml").string(), device, compile_properties) - .create_infer_request(); - m_models.decoder_with_past = - core.compile_model(models_path / "openvino_decoder_with_past_model.xml", device, compile_properties) - .create_infer_request(); + ov::CompiledModel compiled_model; + compiled_model = + core.compile_model((models_path / "openvino_encoder_model.xml").string(), device, compile_properties); + ov::genai::utils::print_compiled_model_properties(compiled_model, "whisper encoder model"); + m_models.encoder = compiled_model.create_infer_request(); + compiled_model = + core.compile_model((models_path / "openvino_decoder_model.xml").string(), device, compile_properties); + ov::genai::utils::print_compiled_model_properties(compiled_model, "whisper decoder model"); + m_models.decoder = compiled_model.create_infer_request(); + compiled_model = core.compile_model(models_path / "openvino_decoder_with_past_model.xml", device, compile_properties); + m_models.decoder_with_past = compiled_model.create_infer_request(); + ov::genai::utils::print_compiled_model_properties(compiled_model, "whisper decoder with past model"); // If eos_token_id was not provided, take value if (m_generation_config.eos_token_id == -1) { diff --git a/src/cpp/src/whisper_pipeline_static.cpp b/src/cpp/src/whisper_pipeline_static.cpp index 9937082a81..136819fa01 100644 --- a/src/cpp/src/whisper_pipeline_static.cpp +++ b/src/cpp/src/whisper_pipeline_static.cpp @@ -555,9 +555,16 @@ WhisperPipeline::StaticWhisperPipeline::StaticWhisperPipeline(const std::filesys preprocess_decoder(decoder_model); preprocess_decoder(decoder_with_past_model); - m_models.encoder = core.compile_model(encoder_model, "NPU").create_infer_request(); - m_models.decoder = core.compile_model(decoder_model, "NPU").create_infer_request(); - m_models.decoder_with_past = core.compile_model(decoder_with_past_model, "NPU").create_infer_request(); + ov::CompiledModel compiled_model; + compiled_model = core.compile_model(encoder_model, "NPU"); + ov::genai::utils::print_compiled_model_properties(compiled_model, "Static Whisper encoder model"); + m_models.encoder = compiled_model.create_infer_request(); + compiled_model = core.compile_model(decoder_model, "NPU"); + ov::genai::utils::print_compiled_model_properties(compiled_model, "Static Whisper decoder model"); + m_models.decoder = compiled_model.create_infer_request(); + compiled_model = core.compile_model(decoder_with_past_model, "NPU"); + ov::genai::utils::print_compiled_model_properties(compiled_model, "Static Whisper decoder with past model"); + m_models.decoder_with_past = compiled_model.create_infer_request(); // If eos_token_id was not provided, take value if (m_generation_config.eos_token_id == -1) { diff --git a/src/docs/DEBUG_LOG.md b/src/docs/DEBUG_LOG.md new file mode 100644 index 0000000000..5ed3f35d17 --- /dev/null +++ b/src/docs/DEBUG_LOG.md @@ -0,0 +1,43 @@ +## 1. Using Debug Log + +There are six levels of logs, which can be called explicitly or set via the ``OPENVINO_LOG_LEVEL`` environment variable: + +0 - ``ov::log::Level::NO`` +1 - ``ov::log::Level::ERR`` +2 - ``ov::log::Level::WARNING`` +3 - ``ov::log::Level::INFO`` +4 - ``ov::log::Level::DEBUG`` +5 - ``ov::log::Level::TRACE`` + +When setting the environment variable OPENVINO_LOG_LEVEL > ov::log::Level::WARNING, the properties of the compiled model can be printed. + +For example: + +Linux - export OPENVINO_LOG_LEVEL=3 +Windows - set OPENVINO_LOG_LEVEL=3 + +the properties of the compiled model are printed as follows: +```sh + NETWORK_NAME: Model0 + OPTIMAL_NUMBER_OF_INFER_REQUESTS: 1 + NUM_STREAMS: 1 + INFERENCE_NUM_THREADS: 48 + PERF_COUNT: NO + INFERENCE_PRECISION_HINT: bf16 + PERFORMANCE_HINT: LATENCY + EXECUTION_MODE_HINT: PERFORMANCE + PERFORMANCE_HINT_NUM_REQUESTS: 0 + ENABLE_CPU_PINNING: YES + SCHEDULING_CORE_TYPE: ANY_CORE + MODEL_DISTRIBUTION_POLICY: + ENABLE_HYPER_THREADING: NO + EXECUTION_DEVICES: CPU + CPU_DENORMALS_OPTIMIZATION: NO + LOG_LEVEL: LOG_NONE + CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1 + DYNAMIC_QUANTIZATION_GROUP_SIZE: 32 + KV_CACHE_PRECISION: f16 + AFFINITY: CORE + EXECUTION_DEVICES: + CPU: Intel(R) Xeon(R) Platinum 8468 +``` \ No newline at end of file