diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp index e18b098969eb79..260a1c444284cb 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp @@ -30,13 +30,13 @@ std::shared_ptr redirect_new_kv_to_output(const std::shared_ptr cvt_kvcache_to_fp16(const std::shared_ptr& model) { ov::preprocess::PrePostProcessor ppp(model); - for (auto tensor : model->inputs()) { + for (const auto& tensor : model->inputs()) { if (tensor.get_any_name().find("past_key") != std::string::npos) { ppp.input(tensor.get_any_name()).tensor().set_element_type(ov::element::Type_t::f16); } } - for (auto tensor : model->outputs()) { + for (const auto& tensor : model->outputs()) { if (tensor.get_any_name().find("present") != std::string::npos) { ppp.output(tensor.get_any_name()).tensor().set_element_type(ov::element::Type_t::f16); } @@ -55,7 +55,7 @@ void reshape_to_static(std::shared_ptr model, const uint32_t kvcache_size, const KVAxesPosition& kv_axes_position) { std::map new_shapes; - for (auto input : model->inputs()) { + for (const auto& input : model->inputs()) { const auto& input_name = input.get_any_name(); ov::PartialShape new_shape; if (input_name.find("input_ids") != std::string::npos) { @@ -275,7 +275,7 @@ ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr& m auto npudesc = extract_npu_descriptor(plugin); - ov::AnyMap properties_copy = other_props; + ov::AnyMap properties_copy = std::move(other_props); auto prefill_config = get_default_prefill_config(model, npudesc); // NB: GENERATE_HINT is only applicable for default generate config! const ::intel_npu::npuw::llm::GenerateHint generate_hint = m_cfg.get<::intel_npu::NPUW_LLM_GENERATE_HINT>(); diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp index a8c90884d3d926..a73478c0cab5d2 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp @@ -36,17 +36,17 @@ ov::npuw::LLMInferRequest::LLMInferRequest(const std::shared_ptrm_kvcache_compiled->create_infer_request(); m_prefill_request = compiled_model->m_prefill_compiled->create_infer_request(); - for (auto input_port : m_prefill_request->get_compiled_model()->inputs()) { + for (const auto& input_port : m_prefill_request->get_compiled_model()->inputs()) { m_prefill_in_ports.emplace(input_port.get_any_name(), input_port); } - for (auto output_port : m_prefill_request->get_compiled_model()->outputs()) { + for (const auto& output_port : m_prefill_request->get_compiled_model()->outputs()) { m_prefill_out_ports.emplace(output_port.get_any_name(), output_port); } - for (auto input_port : m_kvcache_request->get_compiled_model()->inputs()) { + for (const auto& input_port : m_kvcache_request->get_compiled_model()->inputs()) { m_kvcache_in_ports.emplace(input_port.get_any_name(), input_port); } - for (auto output_port : m_kvcache_request->get_compiled_model()->outputs()) { + for (const auto& output_port : m_kvcache_request->get_compiled_model()->outputs()) { m_kvcache_out_ports.emplace(output_port.get_any_name(), output_port); } } diff --git a/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp b/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp index 5ff064e7629759..ddc8f10ccf034e 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp @@ -132,10 +132,15 @@ ov::Tensor Bank::eval_and_alloc(const LazyTensor& tensor, bool Bank::is_remote(const LazyTensor& tensor) const { // FIXME: make generic + std::lock_guard guard(m_mutex); + auto npu_bank = m_device_banks.find("NPU"); - if (npu_bank != m_device_banks.end() && npu_bank->second.storage.find(tensor) != npu_bank->second.storage.end()) { - // Found in NPU bank so considered remote (utterly wrong for the generic case) - return true; + if (npu_bank != m_device_banks.end()) { + std::lock_guard dev_guard(npu_bank->second.mutex); + if (npu_bank->second.storage.find(tensor) != npu_bank->second.storage.end()) { + // Found in NPU bank so considered remote (utterly wrong for the generic case) + return true; + } } return false; } diff --git a/src/plugins/intel_npu/src/plugin/npuw/weights_bank.hpp b/src/plugins/intel_npu/src/plugin/npuw/weights_bank.hpp index 491e962a58b438..f2ca0436607fd4 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/weights_bank.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/weights_bank.hpp @@ -38,13 +38,13 @@ class Bank { // Bank for specified device and their allocated memory struct DeviceBank { std::unordered_map storage; - std::mutex mutex; + mutable std::mutex mutex; }; std::unordered_map m_device_banks; ov::Tensor eval_and_alloc(const LazyTensor& tensor, DeviceBank& dbank, const std::string& device); - std::mutex m_mutex; + mutable std::mutex m_mutex; std::shared_ptr m_core = nullptr; std::string m_alloc_device; };