[GPU] Enable encryption of cache blob with CacheMode::OPTIMIZE_SIZE (#…

…27912) ### Details: - Enables encryption of cache blob with CacheMode::OPTIMIZE_SIZE in GPU Plugin. - Some additional test coverage already present in src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp. Test coverage in this PR is distinct from those tests because it also checks correctness of the results. - #27742 has to be merged first - it guarantees small cache sizes in majority of cases which is important for encryption. ### Tickets: - CVS-158140 --------- Co-authored-by: Sergey Shlyapnikov <[email protected]> Co-authored-by: Ilya Lavrenov <[email protected]>
openvinotoolkit · Dec 12, 2024 · f1cbaf8 · f1cbaf8
1 parent eb05746
commit f1cbaf8
Show file tree

Hide file tree

Showing 5 changed files with 122 additions and 18 deletions.
diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
@@ -801,6 +801,8 @@ struct EncryptionCallbacks {
  * when loading from the cache. This property is set in core.compile_model only.
  * - First value of the struct is encryption function.
  * - Second value of the struct is decryption function.
+ * @note GPU Plugin: encrypts whole blob, not only model structure. Only used when ov::cache_mode property is set to
+ * "OPTIMIZE_SIZE".
  * @ingroup ov_runtime_cpp_prop_api
  */
 static constexpr Property<EncryptionCallbacks, PropertyMutability::WO> cache_encryption_callbacks{

diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp
@@ -20,12 +20,17 @@ class BinaryOutputBuffer : public OutputBuffer<BinaryOutputBuffer> {
     BinaryOutputBuffer(std::ostream& stream)
     : OutputBuffer<BinaryOutputBuffer>(this), stream(stream), _impl_params(nullptr), _strm(nullptr) {}
 
-    void write(void const * data, std::streamsize size) {
+    virtual ~BinaryOutputBuffer() = default;
+
+    virtual void write(void const* data, std::streamsize size) {
         auto const written_size = stream.rdbuf()->sputn(reinterpret_cast<const char*>(data), size);
         OPENVINO_ASSERT(written_size == size,
-            "[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " + std::to_string(written_size));
+                        "[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " +
+                            std::to_string(written_size));
     }
 
+    virtual void flush() {}
+
     void setKernelImplParams(void* impl_params) { _impl_params = impl_params; }
     void* getKernelImplParams() const { return _impl_params; }
     void set_stream(void* strm) { _strm = strm; }
@@ -42,7 +47,9 @@ class BinaryInputBuffer : public InputBuffer<BinaryInputBuffer> {
     BinaryInputBuffer(std::istream& stream, engine& engine)
     : InputBuffer<BinaryInputBuffer>(this, engine), _stream(stream), _impl_params(nullptr) {}
 
-    void read(void* const data, std::streamsize size) {
+    virtual ~BinaryInputBuffer() = default;
+
+    virtual void read(void* const data, std::streamsize size) {
         auto const read_size = _stream.rdbuf()->sgetn(reinterpret_cast<char*>(data), size);
         OPENVINO_ASSERT(read_size == size,
             "[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size));
@@ -51,14 +58,73 @@ class BinaryInputBuffer : public InputBuffer<BinaryInputBuffer> {
     void setKernelImplParams(void* impl_params) { _impl_params = impl_params; }
     void* getKernelImplParams() const { return _impl_params; }
 
-    std::streampos tellg() { return _stream.tellg(); }
-    void seekg(std::streampos pos) { _stream.seekg(pos); }
-
 private:
     std::istream& _stream;
     void* _impl_params;
 };
 
+class EncryptedBinaryOutputBuffer : public BinaryOutputBuffer {
+public:
+    EncryptedBinaryOutputBuffer(std::ostream& stream, std::function<std::string(const std::string&)> encrypt)
+        : BinaryOutputBuffer(stream),
+          encrypt(encrypt) {
+        OPENVINO_ASSERT(encrypt);
+    }
+
+    ~EncryptedBinaryOutputBuffer() override = default;
+
+    void write(void const* data, std::streamsize size) override {
+        plaintext_str.append(reinterpret_cast<const char*>(data), size);
+    }
+
+    void flush() override {
+        auto encrypted_str = encrypt(plaintext_str);
+        size_t bytes = encrypted_str.size();
+        BinaryOutputBuffer::write(make_data(&bytes, sizeof(bytes)).data, sizeof(bytes));
+        BinaryOutputBuffer::write(make_data(encrypted_str.c_str(), encrypted_str.size()).data, encrypted_str.size());
+    }
+
+private:
+    std::string
+        plaintext_str;  // Not using stringstream here because passing to encrypt() would produce an additional copy.
+    std::function<std::string(const std::string&)> encrypt;
+};
+
+class EncryptedBinaryInputBuffer : public BinaryInputBuffer {
+public:
+    EncryptedBinaryInputBuffer(std::istream& stream,
+                               engine& engine,
+                               std::function<std::string(const std::string&)> decrypt)
+        : BinaryInputBuffer(stream, engine),
+          decrypt(decrypt) {
+        OPENVINO_ASSERT(decrypt);
+
+        size_t bytes;
+        BinaryInputBuffer::read(make_data(&bytes, sizeof(bytes)).data, sizeof(bytes));
+
+        // Not reading directly to plaintext_stream because decrypt(plaintext_stream.str()) would create an additional
+        // copy.
+        std::string str(bytes, 0);
+        BinaryInputBuffer::read(
+            make_data(const_cast<void*>(reinterpret_cast<const void*>(str.c_str())), str.size()).data,
+            str.size());
+        plaintext_stream.str(decrypt(str));
+    }
+
+    ~EncryptedBinaryInputBuffer() override = default;
+
+    void read(void* const data, std::streamsize size) override {
+        auto const read_size = plaintext_stream.rdbuf()->sgetn(reinterpret_cast<char*>(data), size);
+        OPENVINO_ASSERT(
+            read_size == size,
+            "[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size));
+    }
+
+private:
+    std::stringstream plaintext_stream;
+    std::function<std::string(const std::string&)> decrypt;
+};
+
 template <typename T>
 class Serializer<BinaryOutputBuffer, T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
 public:

diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
@@ -179,7 +179,16 @@ void CompiledModel::export_model(std::ostream& model) const {
     OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model");
     OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded");
 
-    cldnn::BinaryOutputBuffer ob(model);
+    const ov::EncryptionCallbacks encryption_callbacks = m_config.get_property(ov::cache_encryption_callbacks);
+
+    // Do not allow encryption for CacheMode::OPTIMIZE_SPEED - the cache size may cause severe memory penalty.
+    const bool encryption_enabled = encryption_callbacks.encrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE;
+    std::unique_ptr<cldnn::BinaryOutputBuffer> ob_ptr =
+        encryption_enabled
+            ? cldnn::make_unique<cldnn::EncryptedBinaryOutputBuffer>(model, encryption_callbacks.encrypt)
+            : cldnn::make_unique<cldnn::BinaryOutputBuffer>(model);
+    auto& ob = *ob_ptr;
+
     ob << cldnn::make_data(&cache_mode, sizeof(ov::CacheMode));
 
     // Inputs
@@ -222,6 +231,7 @@ void CompiledModel::export_model(std::ostream& model) const {
     }
 
     get_graph(0)->export_model(ob);
+    ob.flush();
 }
 
 std::shared_ptr<const ov::Model> CompiledModel::get_runtime_model() const {

diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -339,12 +339,21 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model,
     config.set_user_property(_orig_config);
     config.apply_user_properties(context_impl->get_engine().get_device_info());
 
-    cldnn::BinaryInputBuffer ib(model, context_impl->get_engine());
+    ov::CacheMode cache_mode = config.get_property(ov::cache_mode);
+    ov::EncryptionCallbacks encryption_callbacks = config.get_property(ov::cache_encryption_callbacks);
+    const bool encryption_enabled = encryption_callbacks.decrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE;
 
-    ov::CacheMode cache_mode = ov::CacheMode::OPTIMIZE_SPEED;
-    ib >> cldnn::make_data(&cache_mode, sizeof(ov::CacheMode));
+    std::unique_ptr<cldnn::BinaryInputBuffer> ib_ptr =
+        encryption_enabled ? cldnn::make_unique<cldnn::EncryptedBinaryInputBuffer>(model,
+                                                                                   context_impl->get_engine(),
+                                                                                   encryption_callbacks.decrypt)
+                           : cldnn::make_unique<cldnn::BinaryInputBuffer>(model, context_impl->get_engine());
+    auto& ib = *ib_ptr;
 
-    if (cache_mode != config.get_property(ov::cache_mode)) {
+    ov::CacheMode loaded_cache_mode = ov::CacheMode::OPTIMIZE_SPEED;
+    ib >> cldnn::make_data(&loaded_cache_mode, sizeof(ov::CacheMode));
+
+    if (loaded_cache_mode != cache_mode) {
         return nullptr;
     }
 
@@ -608,6 +617,7 @@ std::vector<ov::PropertyName> Plugin::get_supported_properties() const {
         ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW},
         ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RW},
         ov::PropertyName{ov::weights_path.name(), PropertyMutability::RW},
+        ov::PropertyName{ov::cache_encryption_callbacks.name(), PropertyMutability::RW},
     };
 
     return supported_properties;

diff --git a/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp
@@ -32,16 +32,21 @@
 #include "common_test_utils/subgraph_builders/ti_with_lstm_cell.hpp"
 #include "common_test_utils/test_common.hpp"
 #include "openvino/pass/serialize.hpp"
+#include "openvino/util/codec_xor.hpp"
 
 namespace {
+typedef std::tuple<bool, bool> testParams;
 class CheckWeightlessCacheAccuracy : public ::testing::Test,
-                                     public ::testing::WithParamInterface<bool> {
+                                     public ::testing::WithParamInterface<testParams> {
 public:
-    static std::string get_test_case_name(::testing::TestParamInfo<bool> obj) {
-        bool use_compile_model_api = obj.param;
+    static std::string get_test_case_name(::testing::TestParamInfo<testParams> obj) {
+        bool use_compile_model_api_;
+        bool do_encryption_;
+        std::tie(use_compile_model_api_, do_encryption_) = obj.param;
 
         std::ostringstream result;
-        result << "use_compile_model_api=" << use_compile_model_api;
+        result << "use_compile_model_api=" << use_compile_model_api_;
+        result << "_do_encryption=" << do_encryption_;
         return result.str();
     }
 protected:
@@ -50,6 +55,7 @@ class CheckWeightlessCacheAccuracy : public ::testing::Test,
     std::string bin_path;
     std::string cache_path;
     bool use_compile_model_api; // for loading from cache
+    bool do_encryption;
 
     void SetUp() override;
     void TearDown() override;
@@ -61,7 +67,7 @@ void CheckWeightlessCacheAccuracy::SetUp() {
     xml_path = filePrefix + ".xml";
     bin_path = filePrefix + ".bin";
     cache_path = filePrefix + ".blob";
-    use_compile_model_api = GetParam();
+    std::tie(use_compile_model_api, do_encryption) = GetParam();
 }
 
 void CheckWeightlessCacheAccuracy::TearDown() {
@@ -73,6 +79,15 @@ void CheckWeightlessCacheAccuracy::TearDown() {
 void CheckWeightlessCacheAccuracy::run() {
     ov::AnyMap config = { ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE) };
     ov::AnyMap config_with_weights_path = { ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE), ov::weights_path(bin_path) };
+
+    if (do_encryption) {
+        ov::EncryptionCallbacks encryption_callbacks;
+        encryption_callbacks.encrypt = ov::util::codec_xor;
+        encryption_callbacks.decrypt = ov::util::codec_xor;
+        config.insert(ov::cache_encryption_callbacks(encryption_callbacks));
+        config_with_weights_path.insert(ov::cache_encryption_callbacks(encryption_callbacks));
+    }
+
     auto core = ov::test::utils::PluginCache::get().core();
     ov::pass::Serialize(xml_path, bin_path).run_on_model(model);
 
@@ -130,8 +145,9 @@ TEST_P(CheckWeightlessCacheAccuracy, TiWithLstmCell) {
     run();
 }
 
-INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy, CheckWeightlessCacheAccuracy,
-                         ::testing::Bool(),
+INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy,
+                         CheckWeightlessCacheAccuracy,
+                         ::testing::Combine(::testing::Bool(), ::testing::Bool()),
                          CheckWeightlessCacheAccuracy::get_test_case_name);
 
 }  // namespace