Skip to content

Commit

Permalink
[GPU] Enable encryption of cache blob with CacheMode::OPTIMIZE_SIZE (#…
Browse files Browse the repository at this point in the history
…27912)

### Details:
- Enables encryption of cache blob with CacheMode::OPTIMIZE_SIZE in GPU
Plugin.
- Some additional test coverage already present in
src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp.
Test coverage in this PR is distinct from those tests because it also
checks correctness of the results.
- #27742 has to be merged first - it guarantees small cache sizes in
majority of cases which is important for encryption.

### Tickets:
 - CVS-158140

---------

Co-authored-by: Sergey Shlyapnikov <[email protected]>
Co-authored-by: Ilya Lavrenov <[email protected]>
  • Loading branch information
3 people authored Dec 12, 2024
1 parent eb05746 commit f1cbaf8
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 18 deletions.
2 changes: 2 additions & 0 deletions src/inference/include/openvino/runtime/properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,8 @@ struct EncryptionCallbacks {
* when loading from the cache. This property is set in core.compile_model only.
* - First value of the struct is encryption function.
* - Second value of the struct is decryption function.
* @note GPU Plugin: encrypts whole blob, not only model structure. Only used when ov::cache_mode property is set to
* "OPTIMIZE_SIZE".
* @ingroup ov_runtime_cpp_prop_api
*/
static constexpr Property<EncryptionCallbacks, PropertyMutability::WO> cache_encryption_callbacks{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,17 @@ class BinaryOutputBuffer : public OutputBuffer<BinaryOutputBuffer> {
BinaryOutputBuffer(std::ostream& stream)
: OutputBuffer<BinaryOutputBuffer>(this), stream(stream), _impl_params(nullptr), _strm(nullptr) {}

void write(void const * data, std::streamsize size) {
virtual ~BinaryOutputBuffer() = default;

virtual void write(void const* data, std::streamsize size) {
auto const written_size = stream.rdbuf()->sputn(reinterpret_cast<const char*>(data), size);
OPENVINO_ASSERT(written_size == size,
"[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " + std::to_string(written_size));
"[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " +
std::to_string(written_size));
}

virtual void flush() {}

void setKernelImplParams(void* impl_params) { _impl_params = impl_params; }
void* getKernelImplParams() const { return _impl_params; }
void set_stream(void* strm) { _strm = strm; }
Expand All @@ -42,7 +47,9 @@ class BinaryInputBuffer : public InputBuffer<BinaryInputBuffer> {
BinaryInputBuffer(std::istream& stream, engine& engine)
: InputBuffer<BinaryInputBuffer>(this, engine), _stream(stream), _impl_params(nullptr) {}

void read(void* const data, std::streamsize size) {
virtual ~BinaryInputBuffer() = default;

virtual void read(void* const data, std::streamsize size) {
auto const read_size = _stream.rdbuf()->sgetn(reinterpret_cast<char*>(data), size);
OPENVINO_ASSERT(read_size == size,
"[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size));
Expand All @@ -51,14 +58,73 @@ class BinaryInputBuffer : public InputBuffer<BinaryInputBuffer> {
void setKernelImplParams(void* impl_params) { _impl_params = impl_params; }
void* getKernelImplParams() const { return _impl_params; }

std::streampos tellg() { return _stream.tellg(); }
void seekg(std::streampos pos) { _stream.seekg(pos); }

private:
std::istream& _stream;
void* _impl_params;
};

class EncryptedBinaryOutputBuffer : public BinaryOutputBuffer {
public:
EncryptedBinaryOutputBuffer(std::ostream& stream, std::function<std::string(const std::string&)> encrypt)
: BinaryOutputBuffer(stream),
encrypt(encrypt) {
OPENVINO_ASSERT(encrypt);
}

~EncryptedBinaryOutputBuffer() override = default;

void write(void const* data, std::streamsize size) override {
plaintext_str.append(reinterpret_cast<const char*>(data), size);
}

void flush() override {
auto encrypted_str = encrypt(plaintext_str);
size_t bytes = encrypted_str.size();
BinaryOutputBuffer::write(make_data(&bytes, sizeof(bytes)).data, sizeof(bytes));
BinaryOutputBuffer::write(make_data(encrypted_str.c_str(), encrypted_str.size()).data, encrypted_str.size());
}

private:
std::string
plaintext_str; // Not using stringstream here because passing to encrypt() would produce an additional copy.
std::function<std::string(const std::string&)> encrypt;
};

class EncryptedBinaryInputBuffer : public BinaryInputBuffer {
public:
EncryptedBinaryInputBuffer(std::istream& stream,
engine& engine,
std::function<std::string(const std::string&)> decrypt)
: BinaryInputBuffer(stream, engine),
decrypt(decrypt) {
OPENVINO_ASSERT(decrypt);

size_t bytes;
BinaryInputBuffer::read(make_data(&bytes, sizeof(bytes)).data, sizeof(bytes));

// Not reading directly to plaintext_stream because decrypt(plaintext_stream.str()) would create an additional
// copy.
std::string str(bytes, 0);
BinaryInputBuffer::read(
make_data(const_cast<void*>(reinterpret_cast<const void*>(str.c_str())), str.size()).data,
str.size());
plaintext_stream.str(decrypt(str));
}

~EncryptedBinaryInputBuffer() override = default;

void read(void* const data, std::streamsize size) override {
auto const read_size = plaintext_stream.rdbuf()->sgetn(reinterpret_cast<char*>(data), size);
OPENVINO_ASSERT(
read_size == size,
"[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size));
}

private:
std::stringstream plaintext_stream;
std::function<std::string(const std::string&)> decrypt;
};

template <typename T>
class Serializer<BinaryOutputBuffer, T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
public:
Expand Down
12 changes: 11 additions & 1 deletion src/plugins/intel_gpu/src/plugin/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,16 @@ void CompiledModel::export_model(std::ostream& model) const {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model");
OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded");

cldnn::BinaryOutputBuffer ob(model);
const ov::EncryptionCallbacks encryption_callbacks = m_config.get_property(ov::cache_encryption_callbacks);

// Do not allow encryption for CacheMode::OPTIMIZE_SPEED - the cache size may cause severe memory penalty.
const bool encryption_enabled = encryption_callbacks.encrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE;
std::unique_ptr<cldnn::BinaryOutputBuffer> ob_ptr =
encryption_enabled
? cldnn::make_unique<cldnn::EncryptedBinaryOutputBuffer>(model, encryption_callbacks.encrypt)
: cldnn::make_unique<cldnn::BinaryOutputBuffer>(model);
auto& ob = *ob_ptr;

ob << cldnn::make_data(&cache_mode, sizeof(ov::CacheMode));

// Inputs
Expand Down Expand Up @@ -222,6 +231,7 @@ void CompiledModel::export_model(std::ostream& model) const {
}

get_graph(0)->export_model(ob);
ob.flush();
}

std::shared_ptr<const ov::Model> CompiledModel::get_runtime_model() const {
Expand Down
18 changes: 14 additions & 4 deletions src/plugins/intel_gpu/src/plugin/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,12 +339,21 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model,
config.set_user_property(_orig_config);
config.apply_user_properties(context_impl->get_engine().get_device_info());

cldnn::BinaryInputBuffer ib(model, context_impl->get_engine());
ov::CacheMode cache_mode = config.get_property(ov::cache_mode);
ov::EncryptionCallbacks encryption_callbacks = config.get_property(ov::cache_encryption_callbacks);
const bool encryption_enabled = encryption_callbacks.decrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE;

ov::CacheMode cache_mode = ov::CacheMode::OPTIMIZE_SPEED;
ib >> cldnn::make_data(&cache_mode, sizeof(ov::CacheMode));
std::unique_ptr<cldnn::BinaryInputBuffer> ib_ptr =
encryption_enabled ? cldnn::make_unique<cldnn::EncryptedBinaryInputBuffer>(model,
context_impl->get_engine(),
encryption_callbacks.decrypt)
: cldnn::make_unique<cldnn::BinaryInputBuffer>(model, context_impl->get_engine());
auto& ib = *ib_ptr;

if (cache_mode != config.get_property(ov::cache_mode)) {
ov::CacheMode loaded_cache_mode = ov::CacheMode::OPTIMIZE_SPEED;
ib >> cldnn::make_data(&loaded_cache_mode, sizeof(ov::CacheMode));

if (loaded_cache_mode != cache_mode) {
return nullptr;
}

Expand Down Expand Up @@ -608,6 +617,7 @@ std::vector<ov::PropertyName> Plugin::get_supported_properties() const {
ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW},
ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RW},
ov::PropertyName{ov::weights_path.name(), PropertyMutability::RW},
ov::PropertyName{ov::cache_encryption_callbacks.name(), PropertyMutability::RW},
};

return supported_properties;
Expand Down
30 changes: 23 additions & 7 deletions src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,21 @@
#include "common_test_utils/subgraph_builders/ti_with_lstm_cell.hpp"
#include "common_test_utils/test_common.hpp"
#include "openvino/pass/serialize.hpp"
#include "openvino/util/codec_xor.hpp"

namespace {
typedef std::tuple<bool, bool> testParams;
class CheckWeightlessCacheAccuracy : public ::testing::Test,
public ::testing::WithParamInterface<bool> {
public ::testing::WithParamInterface<testParams> {
public:
static std::string get_test_case_name(::testing::TestParamInfo<bool> obj) {
bool use_compile_model_api = obj.param;
static std::string get_test_case_name(::testing::TestParamInfo<testParams> obj) {
bool use_compile_model_api_;
bool do_encryption_;
std::tie(use_compile_model_api_, do_encryption_) = obj.param;

std::ostringstream result;
result << "use_compile_model_api=" << use_compile_model_api;
result << "use_compile_model_api=" << use_compile_model_api_;
result << "_do_encryption=" << do_encryption_;
return result.str();
}
protected:
Expand All @@ -50,6 +55,7 @@ class CheckWeightlessCacheAccuracy : public ::testing::Test,
std::string bin_path;
std::string cache_path;
bool use_compile_model_api; // for loading from cache
bool do_encryption;

void SetUp() override;
void TearDown() override;
Expand All @@ -61,7 +67,7 @@ void CheckWeightlessCacheAccuracy::SetUp() {
xml_path = filePrefix + ".xml";
bin_path = filePrefix + ".bin";
cache_path = filePrefix + ".blob";
use_compile_model_api = GetParam();
std::tie(use_compile_model_api, do_encryption) = GetParam();
}

void CheckWeightlessCacheAccuracy::TearDown() {
Expand All @@ -73,6 +79,15 @@ void CheckWeightlessCacheAccuracy::TearDown() {
void CheckWeightlessCacheAccuracy::run() {
ov::AnyMap config = { ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE) };
ov::AnyMap config_with_weights_path = { ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE), ov::weights_path(bin_path) };

if (do_encryption) {
ov::EncryptionCallbacks encryption_callbacks;
encryption_callbacks.encrypt = ov::util::codec_xor;
encryption_callbacks.decrypt = ov::util::codec_xor;
config.insert(ov::cache_encryption_callbacks(encryption_callbacks));
config_with_weights_path.insert(ov::cache_encryption_callbacks(encryption_callbacks));
}

auto core = ov::test::utils::PluginCache::get().core();
ov::pass::Serialize(xml_path, bin_path).run_on_model(model);

Expand Down Expand Up @@ -130,8 +145,9 @@ TEST_P(CheckWeightlessCacheAccuracy, TiWithLstmCell) {
run();
}

INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy, CheckWeightlessCacheAccuracy,
::testing::Bool(),
INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy,
CheckWeightlessCacheAccuracy,
::testing::Combine(::testing::Bool(), ::testing::Bool()),
CheckWeightlessCacheAccuracy::get_test_case_name);

} // namespace

0 comments on commit f1cbaf8

Please sign in to comment.