diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 5674c75dd546d7..8baea3ed408656 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -801,6 +801,8 @@ struct EncryptionCallbacks { * when loading from the cache. This property is set in core.compile_model only. * - First value of the struct is encryption function. * - Second value of the struct is decryption function. + * @note GPU Plugin: encrypts whole blob, not only model structure. Only used when ov::cache_mode property is set to + * "OPTIMIZE_SIZE". * @ingroup ov_runtime_cpp_prop_api */ static constexpr Property cache_encryption_callbacks{ diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp index 2e8bde43abeed4..8e1be37c91b1ef 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp @@ -20,12 +20,17 @@ class BinaryOutputBuffer : public OutputBuffer { BinaryOutputBuffer(std::ostream& stream) : OutputBuffer(this), stream(stream), _impl_params(nullptr), _strm(nullptr) {} - void write(void const * data, std::streamsize size) { + virtual ~BinaryOutputBuffer() = default; + + virtual void write(void const* data, std::streamsize size) { auto const written_size = stream.rdbuf()->sputn(reinterpret_cast(data), size); OPENVINO_ASSERT(written_size == size, - "[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " + std::to_string(written_size)); + "[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " + + std::to_string(written_size)); } + virtual void flush() {} + void setKernelImplParams(void* impl_params) { _impl_params = impl_params; } void* getKernelImplParams() const { return _impl_params; } void set_stream(void* strm) { _strm = strm; } @@ -42,7 +47,9 @@ class BinaryInputBuffer : public InputBuffer { BinaryInputBuffer(std::istream& stream, engine& engine) : InputBuffer(this, engine), _stream(stream), _impl_params(nullptr) {} - void read(void* const data, std::streamsize size) { + virtual ~BinaryInputBuffer() = default; + + virtual void read(void* const data, std::streamsize size) { auto const read_size = _stream.rdbuf()->sgetn(reinterpret_cast(data), size); OPENVINO_ASSERT(read_size == size, "[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size)); @@ -51,14 +58,73 @@ class BinaryInputBuffer : public InputBuffer { void setKernelImplParams(void* impl_params) { _impl_params = impl_params; } void* getKernelImplParams() const { return _impl_params; } - std::streampos tellg() { return _stream.tellg(); } - void seekg(std::streampos pos) { _stream.seekg(pos); } - private: std::istream& _stream; void* _impl_params; }; +class EncryptedBinaryOutputBuffer : public BinaryOutputBuffer { +public: + EncryptedBinaryOutputBuffer(std::ostream& stream, std::function encrypt) + : BinaryOutputBuffer(stream), + encrypt(encrypt) { + OPENVINO_ASSERT(encrypt); + } + + ~EncryptedBinaryOutputBuffer() override = default; + + void write(void const* data, std::streamsize size) override { + plaintext_str.append(reinterpret_cast(data), size); + } + + void flush() override { + auto encrypted_str = encrypt(plaintext_str); + size_t bytes = encrypted_str.size(); + BinaryOutputBuffer::write(make_data(&bytes, sizeof(bytes)).data, sizeof(bytes)); + BinaryOutputBuffer::write(make_data(encrypted_str.c_str(), encrypted_str.size()).data, encrypted_str.size()); + } + +private: + std::string + plaintext_str; // Not using stringstream here because passing to encrypt() would produce an additional copy. + std::function encrypt; +}; + +class EncryptedBinaryInputBuffer : public BinaryInputBuffer { +public: + EncryptedBinaryInputBuffer(std::istream& stream, + engine& engine, + std::function decrypt) + : BinaryInputBuffer(stream, engine), + decrypt(decrypt) { + OPENVINO_ASSERT(decrypt); + + size_t bytes; + BinaryInputBuffer::read(make_data(&bytes, sizeof(bytes)).data, sizeof(bytes)); + + // Not reading directly to plaintext_stream because decrypt(plaintext_stream.str()) would create an additional + // copy. + std::string str(bytes, 0); + BinaryInputBuffer::read( + make_data(const_cast(reinterpret_cast(str.c_str())), str.size()).data, + str.size()); + plaintext_stream.str(decrypt(str)); + } + + ~EncryptedBinaryInputBuffer() override = default; + + void read(void* const data, std::streamsize size) override { + auto const read_size = plaintext_stream.rdbuf()->sgetn(reinterpret_cast(data), size); + OPENVINO_ASSERT( + read_size == size, + "[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size)); + } + +private: + std::stringstream plaintext_stream; + std::function decrypt; +}; + template class Serializer::value>::type> { public: diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 18e7a88fc42f3e..810353fe626c19 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -179,7 +179,16 @@ void CompiledModel::export_model(std::ostream& model) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model"); OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded"); - cldnn::BinaryOutputBuffer ob(model); + const ov::EncryptionCallbacks encryption_callbacks = m_config.get_property(ov::cache_encryption_callbacks); + + // Do not allow encryption for CacheMode::OPTIMIZE_SPEED - the cache size may cause severe memory penalty. + const bool encryption_enabled = encryption_callbacks.encrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; + std::unique_ptr ob_ptr = + encryption_enabled + ? cldnn::make_unique(model, encryption_callbacks.encrypt) + : cldnn::make_unique(model); + auto& ob = *ob_ptr; + ob << cldnn::make_data(&cache_mode, sizeof(ov::CacheMode)); // Inputs @@ -222,6 +231,7 @@ void CompiledModel::export_model(std::ostream& model) const { } get_graph(0)->export_model(ob); + ob.flush(); } std::shared_ptr CompiledModel::get_runtime_model() const { diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index f2fa9bcdeeab1b..ed64fa085ac7ba 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -339,12 +339,21 @@ std::shared_ptr Plugin::import_model(std::istream& model, config.set_user_property(_orig_config); config.apply_user_properties(context_impl->get_engine().get_device_info()); - cldnn::BinaryInputBuffer ib(model, context_impl->get_engine()); + ov::CacheMode cache_mode = config.get_property(ov::cache_mode); + ov::EncryptionCallbacks encryption_callbacks = config.get_property(ov::cache_encryption_callbacks); + const bool encryption_enabled = encryption_callbacks.decrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; - ov::CacheMode cache_mode = ov::CacheMode::OPTIMIZE_SPEED; - ib >> cldnn::make_data(&cache_mode, sizeof(ov::CacheMode)); + std::unique_ptr ib_ptr = + encryption_enabled ? cldnn::make_unique(model, + context_impl->get_engine(), + encryption_callbacks.decrypt) + : cldnn::make_unique(model, context_impl->get_engine()); + auto& ib = *ib_ptr; - if (cache_mode != config.get_property(ov::cache_mode)) { + ov::CacheMode loaded_cache_mode = ov::CacheMode::OPTIMIZE_SPEED; + ib >> cldnn::make_data(&loaded_cache_mode, sizeof(ov::CacheMode)); + + if (loaded_cache_mode != cache_mode) { return nullptr; } @@ -608,6 +617,7 @@ std::vector Plugin::get_supported_properties() const { ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW}, ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RW}, ov::PropertyName{ov::weights_path.name(), PropertyMutability::RW}, + ov::PropertyName{ov::cache_encryption_callbacks.name(), PropertyMutability::RW}, }; return supported_properties; diff --git a/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp index 839b2640ca180c..a558742965ff2f 100644 --- a/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp +++ b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp @@ -32,16 +32,21 @@ #include "common_test_utils/subgraph_builders/ti_with_lstm_cell.hpp" #include "common_test_utils/test_common.hpp" #include "openvino/pass/serialize.hpp" +#include "openvino/util/codec_xor.hpp" namespace { +typedef std::tuple testParams; class CheckWeightlessCacheAccuracy : public ::testing::Test, - public ::testing::WithParamInterface { + public ::testing::WithParamInterface { public: - static std::string get_test_case_name(::testing::TestParamInfo obj) { - bool use_compile_model_api = obj.param; + static std::string get_test_case_name(::testing::TestParamInfo obj) { + bool use_compile_model_api_; + bool do_encryption_; + std::tie(use_compile_model_api_, do_encryption_) = obj.param; std::ostringstream result; - result << "use_compile_model_api=" << use_compile_model_api; + result << "use_compile_model_api=" << use_compile_model_api_; + result << "_do_encryption=" << do_encryption_; return result.str(); } protected: @@ -50,6 +55,7 @@ class CheckWeightlessCacheAccuracy : public ::testing::Test, std::string bin_path; std::string cache_path; bool use_compile_model_api; // for loading from cache + bool do_encryption; void SetUp() override; void TearDown() override; @@ -61,7 +67,7 @@ void CheckWeightlessCacheAccuracy::SetUp() { xml_path = filePrefix + ".xml"; bin_path = filePrefix + ".bin"; cache_path = filePrefix + ".blob"; - use_compile_model_api = GetParam(); + std::tie(use_compile_model_api, do_encryption) = GetParam(); } void CheckWeightlessCacheAccuracy::TearDown() { @@ -73,6 +79,15 @@ void CheckWeightlessCacheAccuracy::TearDown() { void CheckWeightlessCacheAccuracy::run() { ov::AnyMap config = { ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE) }; ov::AnyMap config_with_weights_path = { ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE), ov::weights_path(bin_path) }; + + if (do_encryption) { + ov::EncryptionCallbacks encryption_callbacks; + encryption_callbacks.encrypt = ov::util::codec_xor; + encryption_callbacks.decrypt = ov::util::codec_xor; + config.insert(ov::cache_encryption_callbacks(encryption_callbacks)); + config_with_weights_path.insert(ov::cache_encryption_callbacks(encryption_callbacks)); + } + auto core = ov::test::utils::PluginCache::get().core(); ov::pass::Serialize(xml_path, bin_path).run_on_model(model); @@ -130,8 +145,9 @@ TEST_P(CheckWeightlessCacheAccuracy, TiWithLstmCell) { run(); } -INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy, CheckWeightlessCacheAccuracy, - ::testing::Bool(), +INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy, + CheckWeightlessCacheAccuracy, + ::testing::Combine(::testing::Bool(), ::testing::Bool()), CheckWeightlessCacheAccuracy::get_test_case_name); } // namespace