Skip to content

Commit

Permalink
[CPU][Refactoring] Introduce VariableExecutor
Browse files Browse the repository at this point in the history
Depending on the parameters a FullyConnected node can
use one or multiple executors.
With the current approach, even when just a single executor
is used, every prepareParams() (executor::update())
call goes through executor selection routine.

The idea is to avoid such overhead for a single executor scenarious,
which are probably the most common ones.

Thus, split the pipeline input two branches:
- only single simple executor is used and updated
- a VariableExecutor is used and updated. VariableExecutor contains
  two or more simple executors
  • Loading branch information
EgorDuplensky committed Dec 3, 2024
1 parent 963b1be commit caa6e7a
Show file tree
Hide file tree
Showing 7 changed files with 233 additions and 175 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
#include <oneapi/dnnl/dnnl.hpp>

#include "cpu_memory.h"
#include "nodes/executors/dnnl/dnnl_fullyconnected_primitive.hpp"
#include "nodes/executors/dnnl/dnnl_convolution_primitive.hpp"
#include "nodes/executors/dnnl/dnnl_aliases.hpp"
#include "nodes/executors/dnnl/dnnl_utils.hpp"
#include "nodes/executors/executor.hpp"
#include "memory_desc/cpu_memory_desc_utils.h"
#include "nodes/executors/memory_arguments.hpp"
#include "post_ops.hpp"

namespace ov {
namespace intel_cpu {
Expand Down
194 changes: 36 additions & 158 deletions src/plugins/intel_cpu/src/nodes/executors/executor_factory.hpp
Original file line number Diff line number Diff line change
@@ -1,55 +1,27 @@
// Copyright (C) 2018-2022 Intel Corporation
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <memory>
#include <string>
#include <unordered_map>

#include "executor.hpp"
#include "nodes/executors/implementations.hpp"
#include "nodes/executors/executor_config.hpp"
#include "nodes/executors/executor_implementation.hpp"
#include "nodes/executors/graph_emitter.hpp"
#include "nodes/executors/implementations.hpp"
#include "nodes/executors/memory_arguments.hpp"
#include "nodes/executors/printers.hpp"
#include "openvino/core/except.hpp"
#include "nodes/executors/variable_executor.hpp"
#include "post_ops.hpp"

namespace ov {
namespace intel_cpu {
using namespace executor;

template <typename Attrs, typename NodeT>
static ExecutorPtr fallback(const executor::Config<Attrs>& config,
const executor::Config<Attrs>& fallbackConfig,
const MemoryArgs& memory,
const ExecutorContext::CPtr context,
const std::string& name) {
DEBUG_LOG("Falling back to graph executor for ",
name,
". Original config: ",
config,
" new config:",
fallbackConfig);

GraphEmitter<Attrs> graphEmitter(config.descs, config.attrs, config.postOps, memory, context, name);

const auto& graphExecutor =
graphEmitter.createGraph(fallbackConfig.descs, fallbackConfig.attrs, fallbackConfig.postOps, context)
.ensureAttrsMatch()
.ensureSrcDescsMatch()
.ensureDstDescsMatch()
.ensurePostOpsMatch()
.emit();
(void)graphExecutor;

OPENVINO_THROW("Fallback logic is not implemented yet"); // return graphExecutor;
}

template <typename Attrs, typename NodeT>
template <typename Attrs>
class ExecutorFactory {
public:
using ExecutorImplementationRef = std::reference_wrapper<const ExecutorImplementation<Attrs>>;
Expand Down Expand Up @@ -95,104 +67,41 @@ class ExecutorFactory {
}

/**
* @brief Preconfigures an executor based on the provided memory arguments.
*
* Preconfigures an executor by selecting an appropriate implementation based on the provided
* memory arguments and by creating an executor using the implementation.
*
* @param memory The memory parameters used for selecting the appropriate executor implementation.
*
* @note The main use case is to offload executor data preparation (i.e. weights packing)
* From the make() call
* @todo Currently supports creating a single executor.
* For some nodes it can be worth to preconfigure all the executors.
*/
void preconfigure(const MemoryArgs& memory) {
executor::Config<Attrs> config{memoryDescsFromMemory(memory), m_attrs, m_postOps};

cacheFallbackStatus(config);

const size_t implId = select(memory, 0);
const auto& impl = m_suitableImplementations[implId].get();
DEBUG_LOG("Preconfiguring executor: ", impl.name());

if (m_implementationRequiresFallback[implId]) {
if (auto fallbackConfig = impl.requiresFallback(config)) {
fallback<Attrs, NodeT>(config, *fallbackConfig, memory, m_context, impl.name());
}
}

(void)create(implId, memory, m_context);
}

/**
* @brief Creates an Executor instance based on provided memory arguments.
*
* Creates an Executor instance using the provided MemoryArgs, selecting an appropriate implementation
* based on the characteristics of the memory. It handles fallback scenarios if necessary and updates the executor
* with the given memory information.
* @brief Creates an Executor instance based on the provided \memory arguments.
* Depending on the number of available implementations, returns:
* - VariableExecutor, if the number of implementations two or more
* - simple Executor, if there is only one implementation is available
*
* @param memory memory arguments.
*
* @return A shared pointer to the created Executor.
*
* The function follows the steps below:
* - Selects an implementation based on the provided memory using the select() function.
* - Retrieves the selected implementation and checks if fallback is required.
* - If fallback is required, it creates a fallback configuration and returns a fallback executor.
* - Otherwise creates the executor using the selected implementation.
* - Updates the executor with the given memory information.
*
*/
ExecutorPtr make(MemoryArgs& memory) {
auto createExec = [this](MemoryArgs& memory, size_t implId) -> ExecutorPtr {
const auto& impl = m_suitableImplementations[implId].get();
if (m_implementationRequiresFallback[implId]) {
executor::Config<Attrs> config{memoryDescsFromMemory(memory), m_attrs, m_postOps};
if (auto fallbackConfig = impl.requiresFallback(config)) {
return fallback<Attrs, NodeT>(config, *fallbackConfig, memory, m_context, impl.name());
}
}
const auto executor = create(implId, memory, m_context);
if (!executor->update(memory)) {
return nullptr;
ExecutorPtr make(const MemoryArgs& memory) {
// only single executor is available and it does not require any fallback
if (m_suitableImplementations.size() == 1) {
auto config = GraphEmitter<Attrs>::createConfig(memory, m_attrs, m_postOps);

const auto& theOnlyImplementation = m_suitableImplementations.front().get();

if (const auto fallbackConfig = theOnlyImplementation.requiresFallback(config)) {
return GraphEmitter<Attrs>::fallback(config,
*fallbackConfig,
memory,
m_context,
theOnlyImplementation.name());
}
return executor;
};

auto implId = select(memory, 0);
auto executor = createExec(memory, implId);
while (!executor) {
implId = select(memory, ++implId);
executor = createExec(memory, implId);
}
return executor;
}

private:
static MemoryDescArgs memoryDescsFromMemory(const MemoryArgs& memory) {
MemoryDescArgs memoryDescs;
memoryDescs.reserve(memory.size());

for (const auto& mem : memory) {
memoryDescs[mem.first] = mem.second->getDescPtr();
return theOnlyImplementation.create(m_attrs, m_postOps, memory, m_context);
}

return memoryDescs;
}

/**
* @brief Caches the fallback status for each suitable implementation.
*/
void cacheFallbackStatus(const executor::Config<Attrs>& config) {
std::transform(m_suitableImplementations.begin(),
m_suitableImplementations.end(),
m_implementationRequiresFallback.begin(),
[&config](const ExecutorImplementationRef& impl) {
return impl.get().requiresFallback(config);
});
return std::make_shared<VariableExecutor<Attrs>>(memory,
m_attrs,
m_postOps,
m_context,
m_suitableImplementations);
}

private:
/**
* @brief Filters and retrieves suitable implementations based on the provided executor configuration.
*
Expand All @@ -205,11 +114,10 @@ class ExecutorFactory {
* @note If an implementation is shape agnostic, no further implementations with lower
* priority are considered.
*/
static std::vector<ExecutorImplementationRef> filter(
const Attrs& attrs,
const PostOps& postOps,
const MemoryDescArgs& descs,
const std::string& implementationPriority = {}) {
static std::vector<ExecutorImplementationRef> filter(const Attrs& attrs,
const PostOps& postOps,
const MemoryDescArgs& descs,
const std::string& implementationPriority = {}) {
const auto& implementations = getImplementations<Attrs>();
std::vector<ExecutorImplementationRef> suitableImplementations;
const executor::Config<Attrs> config{descs, attrs, postOps};
Expand Down Expand Up @@ -244,36 +152,6 @@ class ExecutorFactory {
return suitableImplementations;
}

size_t select(const MemoryArgs& memory, const size_t startIdx) const {
OPENVINO_ASSERT(startIdx < m_suitableImplementations.size(),
"Failed to find an implementation since start indx: ", startIdx,
" is out of range of the suitable implementations array: ", m_suitableImplementations.size());
auto startIt = m_suitableImplementations.begin();
std::advance(startIt, startIdx);
const auto selectedImplementation =
std::find_if(startIt,
m_suitableImplementations.end(),
[&memory](const ExecutorImplementationRef& implementation) {
return implementation.get().shapeAgnostic() || implementation.get().acceptsShapes(memory);
});
OPENVINO_ASSERT(selectedImplementation != m_suitableImplementations.end(), "Failed to select an implemetation");

return std::distance(m_suitableImplementations.begin(), selectedImplementation);
}

ExecutorPtr create(const size_t implId,
const MemoryArgs& memory,
const ExecutorContext::CPtr context) {
assert(implId < m_executors.size() && implId < m_suitableImplementations.size());

if (!m_executors[implId]) {
const auto& impl = m_suitableImplementations[implId].get();
m_executors[implId] = impl.create(m_attrs, m_postOps, memory, context);
}

return m_executors[implId];
}

const Attrs& m_attrs;
const PostOps& m_postOps;
const ExecutorContext::CPtr m_context;
Expand All @@ -284,11 +162,11 @@ class ExecutorFactory {
std::vector<ExecutorPtr> m_executors;
};

template <typename Attrs, typename NodeT>
using ExecutorFactoryPtr = std::shared_ptr<ExecutorFactory<Attrs, NodeT>>;
template <typename Attrs>
using ExecutorFactoryPtr = std::shared_ptr<ExecutorFactory<Attrs>>;

template <typename Attrs, typename NodeT>
using ExecutorFactoryCPtr = std::shared_ptr<const ExecutorFactory<Attrs, NodeT>>;
template <typename Attrs>
using ExecutorFactoryCPtr = std::shared_ptr<const ExecutorFactory<Attrs>>;

} // namespace intel_cpu
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "memory_desc/cpu_memory_desc.h"
#include "nodes/executors/convolution_config.hpp"
#include "nodes/executors/dnnl/dnnl_convolution_primitive.hpp"
#include "nodes/executors/dnnl/dnnl_fullyconnected_primitive.hpp"
#include "nodes/executors/dnnl/dnnl_fullyconnected.hpp"
#include "nodes/executors/dnnl/dnnl_matmul_primitive.hpp"
#include "nodes/executors/dnnl/dnnl_shape_agnostic_data.hpp"
Expand Down
46 changes: 44 additions & 2 deletions src/plugins/intel_cpu/src/nodes/executors/graph_emitter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@
#pragma once

#include <functional>
#include <vector>

#include "graph.h"
#include "memory_desc/cpu_memory_desc.h"
#include "node.h"
#include "nodes/executors/executor.hpp"
#include "nodes/executors/executor_config.hpp"
#include "post_ops.hpp"

namespace ov {
Expand Down Expand Up @@ -72,6 +71,49 @@ class GraphEmitter {
return graph;
}

static MemoryDescArgs memoryDescsFromMemory(const MemoryArgs& memory) {
MemoryDescArgs memoryDescs;
memoryDescs.reserve(memory.size());

for (const auto& mem : memory) {
memoryDescs[mem.first] = mem.second->getDescPtr();
}

return memoryDescs;
}

static executor::Config<Attrs> createConfig(const MemoryArgs& memory,
const Attrs& attrs,
const PostOps& postOps) {
return executor::Config<Attrs>{memoryDescsFromMemory(memory), attrs, postOps};
}

static ExecutorPtr fallback(const executor::Config<Attrs>& config,
const executor::Config<Attrs>& fallbackConfig,
const MemoryArgs& memory,
const ExecutorContext::CPtr context,
const std::string& name) {
DEBUG_LOG("Falling back to graph executor for ",
name,
". Original config: ",
config,
" new config:",
fallbackConfig);

GraphEmitter<Attrs> graphEmitter(config.descs, config.attrs, config.postOps, memory, context, name);

const auto& graphExecutor =
graphEmitter.createGraph(fallbackConfig.descs, fallbackConfig.attrs, fallbackConfig.postOps, context)
.ensureAttrsMatch()
.ensureSrcDescsMatch()
.ensureDstDescsMatch()
.ensurePostOpsMatch()
.emit();
(void)graphExecutor;

OPENVINO_THROW("Fallback logic is not implemented yet"); // return graphExecutor;
}

private:
const MemoryDescArgs& descs;
const Attrs& attrs;
Expand Down
Loading

0 comments on commit caa6e7a

Please sign in to comment.