diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/config.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/config.hpp
index 0fa1207bd9935a..a274c8d1c1cae6 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/config/config.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/config.hpp
@@ -74,6 +74,11 @@ struct OptionParser<int32_t> final {
     static int32_t parse(std::string_view val);
 };
 
+template <>
+struct OptionParser<uint32_t> final {
+    static uint32_t parse(std::string_view val);
+};
+
 template <>
 struct OptionParser<int64_t> final {
     static int64_t parse(std::string_view val);
@@ -167,6 +172,25 @@ struct OptionPrinter final {
     }
 };
 
+template <typename K, typename V>
+struct OptionPrinter<std::map<K, V>> final {
+    static std::string toString(const std::map<K, V>& val) {
+        std::stringstream ss;
+        std::size_t counter = 0;
+        std::size_t size = val.size();
+        for (auto& [key, value] : val) {
+            std::string key_str = OptionPrinter<K>::toString(key);
+            std::string value_str = OptionPrinter<V>::toString(value);
+            ss << key_str << ":" << value_str;
+            if (counter < size - 1) {
+                ss << ",";
+            }
+            ++counter;
+        }
+        return ss.str();
+    }
+};
+
 // NB: boolean config option has values YES for true, NO for false
 template <>
 struct OptionPrinter<bool> final {
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/npuw.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/npuw.hpp
index 6d865ad5e4edf3..927b234df8ba15 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/config/npuw.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/npuw.hpp
@@ -17,6 +17,7 @@ namespace intel_npu {
 //
 
 void registerNPUWOptions(OptionsDesc& desc);
+void registerNPUWLLMOptions(OptionsDesc& desc);
 
 #define DEFINE_OPT(Name, Type, DefaultValue, PropertyKey, Mode)                     \
     struct Name final : OptionBase<Name, Type> {                                    \
@@ -66,4 +67,110 @@ DEFINE_OPT(NPUW_DUMP_SUBS, std::string, "", npuw::dump::subgraphs, CompileTime);
 DEFINE_OPT(NPUW_DUMP_SUBS_ON_FAIL, std::string, "", npuw::dump::subgraphs_on_fail, CompileTime);
 DEFINE_OPT(NPUW_DUMP_IO, std::string, "", npuw::dump::inputs_outputs, RunTime);
 DEFINE_OPT(NPUW_DUMP_IO_ITERS, bool, false, npuw::dump::io_iters, RunTime);
+DEFINE_OPT(NPUW_LLM, bool, false, npuw::llm::enabled, CompileTime);
+DEFINE_OPT(NPUW_LLM_MAX_PROMPT_LEN, uint32_t, 1024, npuw::llm::max_prompt_len, CompileTime);
+DEFINE_OPT(NPUW_LLM_MIN_RESPONSE_LEN, uint32_t, 128, npuw::llm::min_response_len, CompileTime);
+
+namespace npuw {
+namespace llm {
+struct ModelDesc {
+    std::string type;
+    std::string name_or_path;
+    int num_key_value_heads;
+};
+enum class GenerateHint { FAST_COMPILE, BEST_PERF };
+}  // namespace llm
+}  // namespace npuw
+
+struct NPUW_LLM_MODEL_DESC final : OptionBase<NPUW_LLM_MODEL_DESC, ::intel_npu::npuw::llm::ModelDesc> {
+    static std::string_view key() {
+        return ov::intel_npu::npuw::llm::model_desc.name();
+    }
+
+    static constexpr std::string_view getTypeName() {
+        return "::intel_npu::npuw::llm::ModelDesc";
+    }
+
+    static ::intel_npu::npuw::llm::ModelDesc defaultValue() {
+        return {};
+    }
+
+    static ::intel_npu::npuw::llm::ModelDesc parse(std::string_view val) {
+        ::intel_npu::npuw::llm::ModelDesc res;
+        std::map<std::string, std::string> res_map = OptionParser<std::map<std::string, std::string>>::parse(val);
+        res.type = res_map["type"];
+        res.name_or_path = res_map["name_or_path"];
+        res.num_key_value_heads = std::stoi(res_map["num_key_value_heads"]);
+        return res;
+    }
+
+    static std::string toString(const ::intel_npu::npuw::llm::ModelDesc& val) {
+        std::string res;
+        std::map<std::string, std::string> res_map;
+        res_map["type"] = val.type;
+        res_map["name_or_path"] = val.name_or_path;
+        res_map["num_key_value_heads"] = std::to_string(val.num_key_value_heads);
+        return OptionPrinter<std::map<std::string, std::string>>::toString(res_map);
+    }
+
+    static OptionMode mode() {
+        return OptionMode::CompileTime;
+    }
+
+    static bool isPublic() {
+        return true;
+    }
+};
+
+struct NPUW_LLM_GENERATE_HINT final : OptionBase<NPUW_LLM_GENERATE_HINT, ::intel_npu::npuw::llm::GenerateHint> {
+    static std::string_view key() {
+        return ov::intel_npu::npuw::llm::generate_hint.name();
+    }
+
+    static constexpr std::string_view getTypeName() {
+        return "::intel_npu::npuw::llm::GenerateHint";
+    }
+
+    static ::intel_npu::npuw::llm::GenerateHint defaultValue() {
+        return ::intel_npu::npuw::llm::GenerateHint::FAST_COMPILE;
+    }
+
+    static ::intel_npu::npuw::llm::GenerateHint parse(std::string_view val) {
+        ::intel_npu::npuw::llm::GenerateHint res;
+
+        if (val == "FAST_COMPILE") {
+            res = ::intel_npu::npuw::llm::GenerateHint::FAST_COMPILE;
+        } else if (val == "BEST_PERF") {
+            res = ::intel_npu::npuw::llm::GenerateHint::BEST_PERF;
+        } else {
+            OPENVINO_THROW("Unsupported \"GENERATE_HINT\" provided: ",
+                           val,
+                           ". Please select either \"FAST_COMPILE\" or \"BEST_PERF\".");
+        }
+        return res;
+    }
+
+    static std::string toString(const ::intel_npu::npuw::llm::GenerateHint& val) {
+        std::string res;
+        switch (val) {
+        case ::intel_npu::npuw::llm::GenerateHint::FAST_COMPILE:
+            res = "FAST_COMPILE";
+            break;
+        case ::intel_npu::npuw::llm::GenerateHint::BEST_PERF:
+            res = "BEST_PERF";
+            break;
+        default:
+            OPENVINO_THROW("Can't convert provided \"GENERATE_HINT\" : ", int(val), " to string.");
+        }
+        return res;
+    }
+
+    static OptionMode mode() {
+        return OptionMode::CompileTime;
+    }
+
+    static bool isPublic() {
+        return true;
+    }
+};
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp
index af4a17988f451e..a416ca51233893 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp
@@ -378,6 +378,51 @@ static constexpr ov::Property<std::string> inputs_outputs{"NPUW_DUMP_IO"};
 static constexpr ov::Property<std::string> io_iters{"NPUW_DUMP_IO_ITERS"};
 }  // namespace dump
 
+namespace llm {
+/**
+ * @brief
+ * Type: bool.
+ * Tell NPUW that you want to pass dynamic stateful LLM model.
+ * Default value: false.
+ */
+static constexpr ov::Property<bool> enabled{"NPUW_LLM"};
+
+/**
+ * @brief
+ * Type: std::map<std::string, std::string>.
+ * Tell NPUW about your LLM model. Use following structure for that:
+ * "type:<type>,name_or_path:<name_or_path>,num_key_value_heads:<number>".
+ * Default value: empty structure defined above.
+ */
+static constexpr ov::Property<std::string> model_desc{"NPUW_LLM_MODEL_DESC"};
+
+/**
+ * @brief
+ * Type: uint32_t.
+ * Tell NPUW your desirable max prompt length.
+ * Default value: 1024.
+ */
+static constexpr ov::Property<uint32_t> max_prompt_len{"NPUW_LLM_MAX_PROMPT_LEN"};
+
+/**
+ * @brief
+ * Type: uint32_t.
+ * Tell NPUW your desirable min response length.
+ * Default value: 128.
+ */
+static constexpr ov::Property<uint32_t> min_response_len{"NPUW_LLM_MIN_RESPONSE_LEN"};
+
+/**
+ * @brief
+ * Type: std::string.
+ * Tell NPUW the preferrable hint for generation stage, that leads to usage of optimal configuration for it.
+ * Possible values: "FAST_COMPILE", "BEST_PERF".
+ * Default value: "FAST_COMPILE".
+ */
+static constexpr ov::Property<std::string> generate_hint{"NPUW_LLM_GENERATE_HINT"};
+
+}  // namespace llm
+
 }  // namespace npuw
 }  // namespace intel_npu
 }  // namespace ov
diff --git a/src/plugins/intel_npu/src/al/src/config/config.cpp b/src/plugins/intel_npu/src/al/src/config/config.cpp
index 9d4c600351afa6..a4e2b515b8e3f6 100644
--- a/src/plugins/intel_npu/src/al/src/config/config.cpp
+++ b/src/plugins/intel_npu/src/al/src/config/config.cpp
@@ -50,6 +50,14 @@ int32_t OptionParser<int32_t>::parse(std::string_view val) {
     }
 }
 
+uint32_t OptionParser<uint32_t>::parse(std::string_view val) {
+    try {
+        return std::stoul(val.data());
+    } catch (...) {
+        OPENVINO_THROW("Value '%s' is not a valid UINT32 option", val.data());
+    }
+}
+
 int64_t OptionParser<int64_t>::parse(std::string_view val) {
     try {
         return std::stoll(val.data());
diff --git a/src/plugins/intel_npu/src/al/src/config/npuw.cpp b/src/plugins/intel_npu/src/al/src/config/npuw.cpp
index 0c7978845c690c..4ee9e392406452 100644
--- a/src/plugins/intel_npu/src/al/src/config/npuw.cpp
+++ b/src/plugins/intel_npu/src/al/src/config/npuw.cpp
@@ -54,3 +54,11 @@ void intel_npu::registerNPUWOptions(OptionsDesc& desc) {
     desc.add<NPUW_DUMP_IO_ITERS>();
 #endif
 }
+
+void intel_npu::registerNPUWLLMOptions(OptionsDesc& desc) {
+    desc.add<NPUW_LLM>();
+    desc.add<NPUW_LLM_MODEL_DESC>();
+    desc.add<NPUW_LLM_MAX_PROMPT_LEN>();
+    desc.add<NPUW_LLM_MIN_RESPONSE_LEN>();
+    desc.add<NPUW_LLM_GENERATE_HINT>();
+}
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
index f9573cb78f21ec..aa02ca8681e80f 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
@@ -28,6 +28,7 @@
 #include "intel_npu/config/config.hpp"
 #include "intel_npu/config/npuw.hpp"
 #include "intel_npu/npuw_private_properties.hpp"
+#include "llm_compiled_model.hpp"
 #include "openvino/runtime/device_id_parser.hpp"
 #include "openvino/runtime/internal_properties.hpp"
 #include "openvino/runtime/properties.hpp"
@@ -85,10 +86,33 @@ ov::npuw::DeviceProperties get_properties_per_device(const std::shared_ptr<const
 }  // namespace npuw
 }  // namespace ov
 
+std::shared_ptr<ov::npuw::ICompiledModel> ov::npuw::ICompiledModel::create(
+    const std::shared_ptr<ov::Model>& model,
+    const std::shared_ptr<const ov::IPlugin>& plugin,
+    const ov::AnyMap& properties) {
+    LOG_INFO("Choosing which NPUW CompiledModel to create");
+    LOG_BLOCK();
+    std::shared_ptr<ov::npuw::ICompiledModel> compiled_model;
+    auto use_llm_key = ov::intel_npu::npuw::llm::enabled.name();
+    if (properties.count(use_llm_key) && properties.at(use_llm_key).as<bool>() == true) {
+        LOG_INFO("ov::npuw::LLMCompiledModel will be created.");
+        compiled_model = std::make_shared<ov::npuw::LLMCompiledModel>(model, plugin, properties);
+    } else {
+        LOG_INFO("ov::npuw::CompiledModel will be created.");
+        compiled_model = std::make_shared<ov::npuw::CompiledModel>(model, plugin, properties);
+    }
+    LOG_INFO("Done");
+    return compiled_model;
+}
+
+ov::npuw::ICompiledModel::ICompiledModel(const std::shared_ptr<ov::Model>& model,
+                                         const std::shared_ptr<const ov::IPlugin>& plugin)
+    : ov::ICompiledModel(model, plugin) {}
+
 ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
                                        const std::shared_ptr<const ov::IPlugin>& plugin,
                                        const ov::AnyMap& properties)
-    : ov::ICompiledModel(model, plugin),
+    : ov::npuw::ICompiledModel(model, plugin),
       m_options_desc(std::make_shared<::intel_npu::OptionsDesc>()),
       m_cfg(m_options_desc),
       m_name(model->get_friendly_name()),
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
index 8ccb1f83349e47..0e728570eda8d5 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
@@ -22,10 +22,16 @@ class Plugin;
 
 namespace ov {
 namespace npuw {
+class ICompiledModel : public ov::ICompiledModel {
+public:
+    static std::shared_ptr<ov::npuw::ICompiledModel> create(const std::shared_ptr<ov::Model>& model,
+                                                            const std::shared_ptr<const ov::IPlugin>& plugin,
+                                                            const ov::AnyMap& properties);
+    ICompiledModel(const std::shared_ptr<ov::Model>& model, const std::shared_ptr<const ov::IPlugin>& plugin);
+};
 
 class InferRequest;
-
-class CompiledModel : public ov::ICompiledModel {
+class CompiledModel : public ov::npuw::ICompiledModel {
     using DevList = std::vector<std::string>;
     using GetPropertiesMap =
         std::map<std::string, std::tuple<ov::PropertyMutability, std::function<ov::Any(const ::intel_npu::Config&)>>>;
diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
new file mode 100644
index 00000000000000..e18b098969eb79
--- /dev/null
+++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
@@ -0,0 +1,346 @@
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "llm_compiled_model.hpp"
+
+#include "llm_infer_request.hpp"
+#include "logging.hpp"
+#include "openvino/pass/stateful_to_stateless.hpp"
+#include "openvino/runtime/iasync_infer_request.hpp"
+
+namespace {
+uint32_t align_to(uint32_t value, uint32_t alignment) {
+    return (value + alignment - 1) & ~(alignment - 1);
+}
+
+std::shared_ptr<ov::Model> redirect_new_kv_to_output(const std::shared_ptr<ov::Model>& model) {
+    const auto kStartOutputKVCacheLayers = 1u;
+    for (std::size_t i = kStartOutputKVCacheLayers; i < model->outputs().size(); ++i) {
+        auto kvout = model->output(i);
+        auto kvrslt = kvout.get_node();
+        auto kvcat = kvrslt->inputs()[0].get_source_output().get_node();
+        auto kvval = kvcat->inputs()[1].get_source_output();
+        kvval.set_names({kvout.get_any_name()});
+        kvrslt->inputs()[0].replace_source_output(kvval);
+    }
+    model->validate_nodes_and_infer_types();
+    return model;
+}
+
+std::shared_ptr<ov::Model> cvt_kvcache_to_fp16(const std::shared_ptr<ov::Model>& model) {
+    ov::preprocess::PrePostProcessor ppp(model);
+
+    for (auto tensor : model->inputs()) {
+        if (tensor.get_any_name().find("past_key") != std::string::npos) {
+            ppp.input(tensor.get_any_name()).tensor().set_element_type(ov::element::Type_t::f16);
+        }
+    }
+
+    for (auto tensor : model->outputs()) {
+        if (tensor.get_any_name().find("present") != std::string::npos) {
+            ppp.output(tensor.get_any_name()).tensor().set_element_type(ov::element::Type_t::f16);
+        }
+    }
+
+    return ppp.build();
+}
+
+struct KVAxesPosition {
+    uint32_t batch;
+    uint32_t seq_len;
+};
+
+void reshape_to_static(std::shared_ptr<ov::Model> model,
+                       const uint32_t input_size,
+                       const uint32_t kvcache_size,
+                       const KVAxesPosition& kv_axes_position) {
+    std::map<std::string, ov::PartialShape> new_shapes;
+    for (auto input : model->inputs()) {
+        const auto& input_name = input.get_any_name();
+        ov::PartialShape new_shape;
+        if (input_name.find("input_ids") != std::string::npos) {
+            new_shape = ov::PartialShape({1, input_size});
+        } else if (input_name.find("attention_mask") != std::string::npos) {
+            new_shape = ov::PartialShape({1, kvcache_size});
+        } else if (input_name.find("position_ids") != std::string::npos) {
+            new_shape = ov::PartialShape({1, input_size});
+        } else {
+            const auto& partial_shape = input.get_partial_shape();
+            new_shape = partial_shape;
+            new_shape[kv_axes_position.batch] = 1;
+            new_shape[kv_axes_position.seq_len] = kvcache_size - input_size;
+        }
+        new_shapes.emplace(input_name, new_shape);
+    }
+    model->reshape(new_shapes);
+}
+
+KVAxesPosition get_kv_axes(const std::string& model_type) {
+    KVAxesPosition axes;
+    if (model_type == "chatglm") {
+        axes.batch = 1u;
+        axes.seq_len = 0u;
+    } else if (model_type == "qwen") {
+        // Note, qwen2 does not fall into this category and conforms to default layout
+        axes.batch = 0u;
+        axes.seq_len = 1u;
+    } else {
+        axes.batch = 0u;
+        axes.seq_len = 2u;
+    }
+    return axes;
+}
+
+bool is_cw_compressed(const std::shared_ptr<ov::Model>& model) {
+    std::vector<std::string> rt_info_path = {"nncf", "weight_compression", "group_size"};
+    if (!model->has_rt_info(rt_info_path)) {
+        // NB: Model isn't compressed by NNCF - skip
+        return false;
+    }
+    auto group_size = model->get_rt_info<int>(rt_info_path);
+    if (group_size == -1) {
+        // NB: Enable DQ for CW quantized models
+        return true;
+    }
+    return false;
+}
+
+struct NPUDesc {
+    std::string arch;
+    int64_t max_tiles;
+};
+
+std::optional<NPUDesc> extract_npu_descriptor(const std::shared_ptr<const ov::IPlugin>& plugin) {
+    const ov::Any arch = plugin->get_property(ov::device::architecture.name(), ov::AnyMap{});
+    const ov::Any max_tiles = plugin->get_property(ov::intel_npu::max_tiles.name(), ov::AnyMap{});
+    return std::make_optional(NPUDesc{arch.as<std::string>(), max_tiles.as<int64_t>()});
+}
+
+std::optional<ov::Any> pop_option(ov::AnyMap& config, const std::string& option_name) {
+    if (auto it = config.find(option_name); it != config.end()) {
+        std::optional<ov::Any> found = std::make_optional(it->second);
+        config.erase(it);
+        return found;
+    }
+    return std::nullopt;
+}
+
+template <typename T>
+std::optional<T> get_option(ov::AnyMap& config, const std::string& option_name) {
+    if (auto it = config.find(option_name); it != config.end()) {
+        return std::make_optional(it->second.as<T>());
+    }
+    return std::nullopt;
+}
+
+template <typename T>
+T pop_or_default(ov::AnyMap& config, const std::string& key, const T& default_value) {
+    auto anyopt = pop_option(config, key);
+    if (anyopt.has_value()) {
+        return anyopt.value().as<T>();
+    }
+    return default_value;
+}
+
+ov::AnyMap get_baseline_common_config() {
+    ov::AnyMap config = {
+        {"NPU_COMPILATION_MODE_PARAMS", "compute-layers-with-higher-precision=Sqrt,Power,ReduceMean,Add_RMSNorm"},
+        {"NPUW_DEVICES", "NPU"},
+        {"NPU_USE_NPUW", "YES"},
+        {"NPUW_FOLD", "YES"},
+        {"NPUW_DCOFF_TYPE", "f16"},
+        {"NPUW_DCOFF_SCALE", "YES"},
+        {"NPUW_WEIGHTS_BANK", "shared"},
+        {"NPUW_SLICE_OUT", "YES"},
+        {"NPUW_FUNCALL_ASYNC", "YES"}};
+    return config;
+}
+
+ov::AnyMap get_default_common_config(const std::shared_ptr<ov::Model>& model) {
+    auto config = get_baseline_common_config();
+    const char* npu_l0 = std::getenv("DISABLE_OPENVINO_GENAI_NPU_L0");
+    if (npu_l0 && std::atoi(npu_l0) == 1) {
+        config.emplace("NPUW_WEIGHTS_BANK_ALLOC", "CPU");
+    } else {
+        config.emplace("NPUW_FUNCALL_FOR_ALL", "YES");
+    }
+    return config;
+}
+
+ov::AnyMap get_default_prefill_config(const std::shared_ptr<ov::Model>& model, const std::optional<NPUDesc>& npudesc) {
+    auto config = get_default_common_config(model);
+    if (is_cw_compressed(model)) {
+        config.emplace("NPUW_DQ", "YES");
+    } else {
+        config.emplace("NPUW_PMM", "NO");
+    }
+    if (npudesc.has_value() && npudesc->arch == "4000" && npudesc->max_tiles != -1) {
+        config.emplace("NPU_DPU_GROUPS", npudesc->max_tiles);
+    }
+    return config;
+}
+
+ov::AnyMap get_default_generate_config(const std::shared_ptr<ov::Model>& model,
+                                       const std::optional<NPUDesc>& npudesc,
+                                       const ::intel_npu::npuw::llm::GenerateHint hint) {
+    auto config = get_default_common_config(model);
+    if (hint == ::intel_npu::npuw::llm::GenerateHint::BEST_PERF) {
+        config.emplace("NPUW_ONLINE_PIPELINE", "NONE");
+    }
+    // NB: Unconditionally set for generation model
+    config.emplace("NPUW_DQ", "YES");
+    if (npudesc.has_value() && npudesc->arch == "4000") {
+        config.emplace("NPU_DPU_GROUPS", 4);
+    }
+    return config;
+}
+
+void merge_config_with(ov::AnyMap& lhs, const ov::AnyMap& rhs) {
+    for (const auto& [key, value] : rhs) {
+        // NB: Overwrite the value if key already exists
+        if (auto it = lhs.find(key); it != lhs.end()) {
+            it->second = value;
+        } else {
+            lhs.emplace(key, value);
+        }
+    }
+}
+
+void drop_cache_dir(ov::AnyMap& config) {
+    if (config.count("NPU_USE_NPUW") != 0u) {
+        pop_option(config, "CACHE_DIR");
+    }
+}
+
+void split_llm_properties(const ov::AnyMap& properties, ov::AnyMap& llm_properties, ov::AnyMap& other_properties) {
+    for (auto it = properties.begin(); it != properties.end(); ++it) {
+        if (it->first.find("NPUW_LLM") != it->first.npos) {
+            llm_properties.insert(*it);
+        } else {
+            other_properties.insert(*it);
+        }
+    }
+}
+
+std::map<std::string, std::string> any_copy(const ov::AnyMap& params) {
+    std::map<std::string, std::string> result;
+    for (auto&& value : params) {
+        result.emplace(value.first, value.second.as<std::string>());
+    }
+    return result;
+}
+}  // namespace
+
+ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr<ov::Model>& model,
+                                             const std::shared_ptr<const ov::IPlugin>& plugin,
+                                             const ov::AnyMap& properties)
+    : ov::npuw::ICompiledModel(model, plugin),
+      m_options_desc(std::make_shared<::intel_npu::OptionsDesc>()),
+      m_cfg(m_options_desc) {
+    LOG_DEBUG("Creating LLMCompiledModel");
+    LOG_BLOCK();
+
+    ::intel_npu::registerNPUWLLMOptions(*m_options_desc);
+
+    std::map<std::string, ov::Any> npuw_llm_props;
+    std::map<std::string, ov::Any> other_props;
+    split_llm_properties(properties, npuw_llm_props, other_props);
+    m_cfg.update(any_copy(npuw_llm_props));
+
+    LOG_DEBUG("1. Creating kvcache model as clone of passed one.");
+    auto kvcache_model = model->clone();
+    LOG_DEBUG("2. Transform kvcache model from stateful to stateless.");
+    ov::pass::StatefulToStateless().run_on_model(kvcache_model);
+
+    LOG_DEBUG("3. Creating prefill model as clone of transformed kvcache one.");
+    auto prefill_model = kvcache_model->clone();
+    prefill_model->set_friendly_name(kvcache_model->get_friendly_name() + "_prefill");
+    LOG_DEBUG("4. Converting KV-cache in prefill model to FP16.");
+    prefill_model = cvt_kvcache_to_fp16(prefill_model);
+
+    LOG_DEBUG("5. Optimize kvcache kvcache model to output key/values for new token.");
+    kvcache_model = redirect_new_kv_to_output(kvcache_model);
+    LOG_DEBUG("6. Converting KV-cache in kvcache model to FP16.");
+    kvcache_model = cvt_kvcache_to_fp16(kvcache_model);
+
+    const uint32_t kMaxPromptLen = align_to(m_cfg.get<::intel_npu::NPUW_LLM_MAX_PROMPT_LEN>(), 64u);
+    const uint32_t kMinResponseLen = align_to(m_cfg.get<::intel_npu::NPUW_LLM_MIN_RESPONSE_LEN>(), 64u);
+    const ::intel_npu::npuw::llm::ModelDesc model_desc = m_cfg.get<::intel_npu::NPUW_LLM_MODEL_DESC>();
+    KVAxesPosition axes = get_kv_axes(model_desc.type);
+    m_kvcache_desc = KVCacheDesc{kMaxPromptLen, kMaxPromptLen + kMinResponseLen, 0u, axes.seq_len};
+    LOG_DEBUG("7. Make prefill model with static shapes");
+    reshape_to_static(prefill_model, m_kvcache_desc.max_prompt_size, m_kvcache_desc.max_prompt_size, axes);
+    LOG_DEBUG("8. Make kvcache model with static shapes");
+    reshape_to_static(kvcache_model, 1u, m_kvcache_desc.total_size, axes);
+
+    auto npudesc = extract_npu_descriptor(plugin);
+
+    ov::AnyMap properties_copy = other_props;
+    auto prefill_config = get_default_prefill_config(model, npudesc);
+    // NB: GENERATE_HINT is only applicable for default generate config!
+    const ::intel_npu::npuw::llm::GenerateHint generate_hint = m_cfg.get<::intel_npu::NPUW_LLM_GENERATE_HINT>();
+    LOG_DEBUG("9. Passed GENERATE_HINT: " << std::string(::intel_npu::NPUW_LLM_GENERATE_HINT::toString(generate_hint)));
+    auto generate_config = get_default_generate_config(model, npudesc, generate_hint);
+    merge_config_with(prefill_config, properties_copy);
+    merge_config_with(generate_config, properties_copy);
+    // FIXME: Drop CACHE_DIR option if NPUW is enabled
+    drop_cache_dir(prefill_config);
+    drop_cache_dir(generate_config);
+
+    m_kvcache_compiled = std::make_shared<ov::npuw::CompiledModel>(kvcache_model, plugin, generate_config);
+    m_prefill_compiled = std::make_shared<ov::npuw::CompiledModel>(prefill_model, plugin, prefill_config);
+
+    implement_properties();
+    LOG_DEBUG("Done");
+}
+
+void ov::npuw::LLMCompiledModel::export_model(std::ostream& model) const {
+    OPENVINO_NOT_IMPLEMENTED;
+}
+
+std::shared_ptr<const ov::Model> ov::npuw::LLMCompiledModel::get_runtime_model() const {
+    OPENVINO_NOT_IMPLEMENTED;
+}
+
+void ov::npuw::LLMCompiledModel::set_property(const ov::AnyMap& properties) {
+    OPENVINO_NOT_IMPLEMENTED;
+}
+
+ov::Any ov::npuw::LLMCompiledModel::get_property(const std::string& name) const {
+    OPENVINO_SUPPRESS_DEPRECATED_START
+    auto&& configIterator = m_prop_to_opt.find(name);
+    if (configIterator != m_prop_to_opt.cend()) {
+        return std::get<1>(configIterator->second)(m_cfg);
+    } else {
+        return m_prefill_compiled->get_property(name);
+    }
+    OPENVINO_SUPPRESS_DEPRECATED_END
+}
+
+std::shared_ptr<ov::ISyncInferRequest> ov::npuw::LLMCompiledModel::create_sync_infer_request() const {
+    auto* non_const_this = const_cast<ov::npuw::LLMCompiledModel*>(this);  // because of const in API
+    return non_const_this->create_llm_infer_request();
+}
+
+std::shared_ptr<ov::ISyncInferRequest> ov::npuw::LLMCompiledModel::create_llm_infer_request() {
+    auto this_sptr = std::static_pointer_cast<ov::npuw::LLMCompiledModel>(shared_from_this());
+    return std::make_shared<ov::npuw::LLMInferRequest>(this_sptr, m_kvcache_desc);
+}
+
+void ov::npuw::LLMCompiledModel::implement_properties() {
+#define BIND(N, T, GETTER)                                                                 \
+    {                                                                                      \
+        ov::intel_npu::N.name(), {                                                         \
+            ov::PropertyMutability::RW, [](const ::intel_npu::Config& config) -> ov::Any { \
+                return config.GETTER<::intel_npu::T>();                                    \
+            }                                                                              \
+        }                                                                                  \
+    }
+
+    m_prop_to_opt.insert({BIND(npuw::llm::enabled, NPUW_LLM, get),
+                          BIND(npuw::llm::model_desc, NPUW_LLM_MODEL_DESC, getString),
+                          BIND(npuw::llm::max_prompt_len, NPUW_LLM_MAX_PROMPT_LEN, get),
+                          BIND(npuw::llm::min_response_len, NPUW_LLM_MIN_RESPONSE_LEN, get),
+                          BIND(npuw::llm::generate_hint, NPUW_LLM_GENERATE_HINT, getString)});
+#undef BIND
+}
diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.hpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.hpp
new file mode 100644
index 00000000000000..1a748997fd48fa
--- /dev/null
+++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.hpp
@@ -0,0 +1,54 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+
+#include "compiled_model.hpp"
+
+namespace ov {
+namespace npuw {
+
+class LLMInferRequest;
+class LLMCompiledModel : public ov::npuw::ICompiledModel {
+    using GetPropertiesMap =
+        std::map<std::string, std::tuple<ov::PropertyMutability, std::function<ov::Any(const ::intel_npu::Config&)>>>;
+
+public:
+    struct KVCacheDesc {
+        uint32_t max_prompt_size = 0u;
+        uint32_t total_size = 0u;
+        uint32_t num_stored_tokens = 0u;
+        uint32_t dim = 0u;
+    };
+
+    LLMCompiledModel(const std::shared_ptr<ov::Model>& model,
+                     const std::shared_ptr<const ov::IPlugin>& plugin,
+                     const ov::AnyMap& properties);
+    LLMCompiledModel() = delete;
+    void export_model(std::ostream& model) const override;
+    std::shared_ptr<const ov::Model> get_runtime_model() const override;
+
+    void set_property(const ov::AnyMap& properties) override;
+    ov::Any get_property(const std::string& name) const override;
+
+private:
+    friend class LLMInferRequest;
+
+    std::shared_ptr<ov::ISyncInferRequest> create_llm_infer_request();
+    std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
+    void implement_properties();
+
+    std::shared_ptr<::intel_npu::OptionsDesc> m_options_desc;
+    ::intel_npu::Config m_cfg;
+    GetPropertiesMap m_prop_to_opt;
+
+    KVCacheDesc m_kvcache_desc;
+    std::shared_ptr<ov::npuw::CompiledModel> m_kvcache_compiled;
+    std::shared_ptr<ov::npuw::CompiledModel> m_prefill_compiled;
+};
+
+}  // namespace npuw
+}  // namespace ov
diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp
new file mode 100644
index 00000000000000..a8c90884d3d926
--- /dev/null
+++ b/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp
@@ -0,0 +1,193 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "llm_infer_request.hpp"
+
+#include <regex>
+
+#include "llm_compiled_model.hpp"
+#include "logging.hpp"
+#include "openvino/runtime/iasync_infer_request.hpp"
+
+namespace {
+template <typename T>
+void fill_tensor(ov::SoPtr<ov::ITensor> tensor, T fill_val, size_t offset = 0u) {
+    T* tensor_data = tensor->data<T>();
+    std::fill(tensor_data + offset, tensor_data + tensor->get_size(), fill_val);
+}
+
+ov::SoPtr<ov::ITensor> make_tensor_slice(ov::SoPtr<ov::ITensor> tensor,
+                                         uint32_t dim,
+                                         uint32_t start_pos,
+                                         uint32_t end_pos) {
+    ov::Shape start_shape(std::vector<size_t>(tensor->get_shape().size(), 0u));
+    start_shape[dim] = start_pos;
+    ov::Shape end_shape = tensor->get_shape();
+    end_shape[dim] = end_pos;
+    return ov::get_tensor_impl(ov::Tensor(ov::make_tensor(tensor), start_shape, end_shape));
+}
+}  // anonymous namespace
+
+ov::npuw::LLMInferRequest::LLMInferRequest(const std::shared_ptr<ov::npuw::LLMCompiledModel>& compiled_model,
+                                           const ov::npuw::LLMCompiledModel::KVCacheDesc& kvcache_desc)
+    : ov::ISyncInferRequest(compiled_model),
+      m_kvcache_desc(kvcache_desc) {
+    m_kvcache_request = compiled_model->m_kvcache_compiled->create_infer_request();
+    m_prefill_request = compiled_model->m_prefill_compiled->create_infer_request();
+
+    for (auto input_port : m_prefill_request->get_compiled_model()->inputs()) {
+        m_prefill_in_ports.emplace(input_port.get_any_name(), input_port);
+    }
+    for (auto output_port : m_prefill_request->get_compiled_model()->outputs()) {
+        m_prefill_out_ports.emplace(output_port.get_any_name(), output_port);
+    }
+
+    for (auto input_port : m_kvcache_request->get_compiled_model()->inputs()) {
+        m_kvcache_in_ports.emplace(input_port.get_any_name(), input_port);
+    }
+    for (auto output_port : m_kvcache_request->get_compiled_model()->outputs()) {
+        m_kvcache_out_ports.emplace(output_port.get_any_name(), output_port);
+    }
+}
+
+void ov::npuw::LLMInferRequest::prepare_for_new_conversation() {
+    // FIXME: for input_ids it must be padding from tokenizer that not available from here
+    // Get it from NPUW options
+    fill_tensor<int64_t>(m_prefill_request->get_tensor(m_prefill_in_ports.at("input_ids")), 0u);
+    fill_tensor<int64_t>(m_prefill_request->get_tensor(m_prefill_in_ports.at("attention_mask")), 0u);
+    fill_tensor<int64_t>(m_prefill_request->get_tensor(m_prefill_in_ports.at("position_ids")), 0u);
+    fill_tensor<int64_t>(m_kvcache_request->get_tensor(m_kvcache_in_ports.at("attention_mask")), 0u);
+    m_kvcache_desc.num_stored_tokens = 0u;
+}
+
+void ov::npuw::LLMInferRequest::infer_prefill(ov::SoPtr<ov::ITensor> input_ids,
+                                              ov::SoPtr<ov::ITensor> attention_mask,
+                                              ov::SoPtr<ov::ITensor> position_ids) {
+    LOG_DEBUG("Calling inference for prefill model...");
+    LOG_BLOCK();
+
+    prepare_for_new_conversation();
+
+    auto padded_input_ids = m_prefill_request->get_tensor(m_prefill_in_ports.at("input_ids"));
+    const size_t offset = padded_input_ids->get_size() - input_ids->get_size();
+    std::copy_n(input_ids->data<int64_t>(), input_ids->get_size(), padded_input_ids->data<int64_t>() + offset);
+
+    auto padded_attention_mask = m_prefill_request->get_tensor(m_prefill_in_ports.at("attention_mask"));
+    std::copy_n(attention_mask->data<int64_t>(),
+                attention_mask->get_size(),
+                padded_attention_mask->data<int64_t>() + offset);
+
+    auto padded_position_ids = m_prefill_request->get_tensor(m_prefill_in_ports.at("position_ids"));
+    std::copy_n(position_ids->data<int64_t>(), position_ids->get_size(), padded_position_ids->data<int64_t>() + offset);
+
+    m_prefill_request->infer();
+    m_kvcache_desc.num_stored_tokens += static_cast<uint32_t>(input_ids->get_size());
+    m_need_copy_kvcache = true;
+
+    m_logits = m_prefill_request->get_tensor(m_prefill_out_ports.at("logits"));
+
+    LOG_DEBUG("Done");
+}
+
+void ov::npuw::LLMInferRequest::infer_generate(ov::SoPtr<ov::ITensor> input_ids,
+                                               ov::SoPtr<ov::ITensor> attention_mask,
+                                               ov::SoPtr<ov::ITensor> position_ids) {
+    LOG_DEBUG("Calling inference for generate model...");
+    LOG_BLOCK();
+
+    // NB: KV-cache is full, further generation is impossible
+    if (m_kvcache_desc.num_stored_tokens == m_kvcache_desc.total_size) {
+        OPENVINO_THROW("KV-Cache is full.");
+    }
+
+    if (m_need_copy_kvcache) {
+        LOG_DEBUG("Copying kv-cache from prefill to generate model.");
+        const std::size_t kStartOutputKVCacheLayers = 1u;
+        const auto& kvcache_compiled = m_kvcache_request->get_compiled_model();
+        for (std::size_t i = 0; i < kvcache_compiled->outputs().size() - 1; ++i) {
+            const auto& output_name = kvcache_compiled->outputs()[kStartOutputKVCacheLayers + i].get_any_name();
+            auto prefill_out_tensor = m_prefill_request->get_tensor(m_prefill_out_ports.at(output_name));
+
+            const auto& input_name = std::regex_replace(output_name, std::regex("present"), "past_key_values");
+            auto kvcache_in_tensor = m_kvcache_request->get_tensor(m_kvcache_in_ports.at(input_name));
+
+            // FIXME: We don't need to fill whole tensor with 0s, but only tensor.size() - num_stored_tokens
+            //        taking into account kvcache dimension.
+            fill_tensor<ov::float16>(kvcache_in_tensor, 0);
+
+            auto prefill_out_slice =
+                make_tensor_slice(prefill_out_tensor,
+                                  m_kvcache_desc.dim,
+                                  m_kvcache_desc.max_prompt_size - m_kvcache_desc.num_stored_tokens,
+                                  m_kvcache_desc.max_prompt_size);
+
+            auto kvcache_in_slice =
+                make_tensor_slice(kvcache_in_tensor, m_kvcache_desc.dim, 0u, m_kvcache_desc.num_stored_tokens);
+
+            prefill_out_slice->copy_to(kvcache_in_slice._ptr);
+        }
+        LOG_DEBUG("Prepare attention mask pattern.");
+        auto* attention_mask_data =
+            m_kvcache_request->get_tensor(m_kvcache_in_ports.at("attention_mask"))->data<int64_t>();
+        attention_mask_data[m_kvcache_desc.total_size - 1] = 1;
+
+        m_need_copy_kvcache = false;
+    }
+
+    // FIXME: these tensors should be shared between the parent & child models
+    auto kv_input_ids = m_kvcache_request->get_tensor(m_kvcache_in_ports.at("input_ids"));
+    std::copy_n(input_ids->data<int64_t>(), input_ids->get_size(), kv_input_ids->data<int64_t>());
+
+    auto kv_attn_mask = m_kvcache_request->get_tensor(m_kvcache_in_ports.at("attention_mask"));
+    std::copy_n(attention_mask->data<int64_t>(), attention_mask->get_size(), kv_attn_mask->data<int64_t>());
+
+    auto kv_pos_ids = m_kvcache_request->get_tensor(m_kvcache_in_ports.at("position_ids"));
+    std::copy_n(position_ids->data<int64_t>(), position_ids->get_size(), kv_pos_ids->data<int64_t>());
+
+    m_kvcache_request->infer();
+    m_logits = m_kvcache_request->get_tensor(m_kvcache_out_ports.at("logits"));
+    m_kvcache_desc.num_stored_tokens += 1;
+
+    LOG_DEBUG("Write KV-cache for the new token to the correct input position for next iteration.");
+    const std::size_t kStartOutputKVCacheLayers = 1u;
+    const auto& kvcache_compiled = m_kvcache_request->get_compiled_model();
+    for (std::size_t i = 0; i < kvcache_compiled->outputs().size() - 1; ++i) {
+        const auto& output_name = kvcache_compiled->outputs()[kStartOutputKVCacheLayers + i].get_any_name();
+        const auto& input_name = std::regex_replace(output_name, std::regex("present"), "past_key_values");
+        auto kvcache_in_tensor = m_kvcache_request->get_tensor(m_kvcache_in_ports.at(input_name));
+        auto kvcache_in_slice = make_tensor_slice(kvcache_in_tensor,
+                                                  m_kvcache_desc.dim,
+                                                  m_kvcache_desc.num_stored_tokens - 1,
+                                                  m_kvcache_desc.num_stored_tokens);
+        auto kvcache_out_tensor = m_kvcache_request->get_tensor(m_kvcache_out_ports.at(output_name));
+        kvcache_out_tensor->copy_to(kvcache_in_slice._ptr);
+    }
+    LOG_DEBUG("Done");
+}
+
+void ov::npuw::LLMInferRequest::infer() {
+    const auto& inputs = get_inputs();
+
+    auto input_ids = get_tensor(inputs[0]);
+    auto attention_mask = get_tensor(inputs[1]);
+    auto position_ids = get_tensor(inputs[2]);
+
+    OPENVINO_ASSERT(ov::element::i64 == input_ids->get_element_type());
+    OPENVINO_ASSERT(ov::element::i64 == attention_mask->get_element_type());
+    OPENVINO_ASSERT(ov::element::i64 == position_ids->get_element_type());
+
+    if (input_ids->get_size() != 1) {
+        infer_prefill(input_ids, attention_mask, position_ids);
+    } else {
+        infer_generate(input_ids, attention_mask, position_ids);
+    }
+}
+
+ov::SoPtr<ov::ITensor> ov::npuw::LLMInferRequest::get_tensor(const ov::Output<const ov::Node>& port) const {
+    // NB: If asked for logits...
+    if (port == get_outputs()[0]) {
+        return m_logits;
+    }
+    return ov::ISyncInferRequest::get_tensor(port);
+}
diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.hpp b/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.hpp
new file mode 100644
index 00000000000000..fbc6c702c4b62a
--- /dev/null
+++ b/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.hpp
@@ -0,0 +1,58 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+
+#include "llm_compiled_model.hpp"
+#include "openvino/core/descriptor/output.hpp"
+#include "openvino/runtime/isync_infer_request.hpp"
+
+namespace ov {
+namespace npuw {
+
+class LLMInferRequest final : public ov::ISyncInferRequest {
+public:
+    explicit LLMInferRequest(const std::shared_ptr<ov::npuw::LLMCompiledModel>& compiled_model,
+                             const ov::npuw::LLMCompiledModel::KVCacheDesc& kvcache_desc);
+
+    void infer() override;
+
+    ov::SoPtr<ov::ITensor> get_tensor(const ov::Output<const ov::Node>& port) const override;
+
+    void check_tensors() const override{};
+
+    std::vector<ov::ProfilingInfo> get_profiling_info() const override {
+        return {};
+    }
+    std::vector<ov::SoPtr<ov::IVariableState>> query_state() const override {
+        return {};
+    }
+
+private:
+    void prepare_for_new_conversation();
+
+    void infer_prefill(ov::SoPtr<ov::ITensor> input_ids,
+                       ov::SoPtr<ov::ITensor> attention_mask,
+                       ov::SoPtr<ov::ITensor> position_ids);
+
+    void infer_generate(ov::SoPtr<ov::ITensor> input_ids,
+                        ov::SoPtr<ov::ITensor> attention_mask,
+                        ov::SoPtr<ov::ITensor> position_ids);
+
+    std::shared_ptr<ov::IAsyncInferRequest> m_kvcache_request;
+    std::shared_ptr<ov::IAsyncInferRequest> m_prefill_request;
+    LLMCompiledModel::KVCacheDesc m_kvcache_desc;
+    ov::SoPtr<ov::ITensor> m_logits;
+    bool m_need_copy_kvcache = false;
+
+    std::unordered_map<std::string, ov::Output<const ov::Node>> m_prefill_in_ports;
+    std::unordered_map<std::string, ov::Output<const ov::Node>> m_prefill_out_ports;
+    std::unordered_map<std::string, ov::Output<const ov::Node>> m_kvcache_in_ports;
+    std::unordered_map<std::string, ov::Output<const ov::Node>> m_kvcache_out_ports;
+};
+
+}  // namespace npuw
+}  // namespace ov
diff --git a/src/plugins/intel_npu/src/plugin/npuw/logging.hpp b/src/plugins/intel_npu/src/plugin/npuw/logging.hpp
index b258e3e6e6bfe9..95c9a742db7842 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/logging.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/logging.hpp
@@ -62,3 +62,7 @@ void dump_failure(const std::shared_ptr<ov::Model>& model, const std::string& de
             OPENVINO_THROW("NPUW: Assertion " #expr " failed"); \
         }                                                       \
     } while (0)
+
+#ifdef _MSC_VER
+#    define __PRETTY_FUNCTION__ __FUNCSIG__
+#endif
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index a57628c2e45510..da425d5d01a5c3 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -7,6 +7,7 @@
 #include <fstream>
 
 #include "compiled_model.hpp"
+#include "npuw/compiled_model.hpp"
 #include "driver_compiler_adapter.hpp"
 #include "intel_npu/common/device_helpers.hpp"
 #include "intel_npu/common/igraph.hpp"
@@ -16,7 +17,6 @@
 #include "intel_npu/config/npuw.hpp"
 #include "intel_npu/config/runtime.hpp"
 #include "intel_npu/utils/zero/zero_init.hpp"
-#include "npuw/compiled_model.hpp"
 #include "openvino/op/constant.hpp"
 #include "openvino/op/parameter.hpp"
 #include "openvino/runtime/intel_npu/properties.hpp"
@@ -637,7 +637,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
             if (localProperties.count(ov::cache_dir.name()) || !_globalConfig.get<CACHE_DIR>().empty()) {
                 OPENVINO_THROW("Option 'CACHE_DIR' is not supported with NPU_USE_NPUW!");
             }
-            return std::make_shared<ov::npuw::CompiledModel>(model->clone(), shared_from_this(), localProperties);
+            return ov::npuw::ICompiledModel::create(model->clone(), shared_from_this(), localProperties);
         } else {
             // NPUW is disabled, remove the key from the properties
             localProperties.erase(useNpuwKey);