From 97d36b3ca8f0d355636b9bec1f8e58161f523bec Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 15 Jan 2025 09:24:19 +0100 Subject: [PATCH] Image generation: rely on activation_scale_factor for GPU --- .../models/autoencoder_kl.cpp | 29 +++++++++++++++++-- .../stable_diffusion_3_pipeline.hpp | 18 ++++-------- .../stable_diffusion_xl_pipeline.hpp | 2 +- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/src/cpp/src/image_generation/models/autoencoder_kl.cpp b/src/cpp/src/image_generation/models/autoencoder_kl.cpp index ab8b87a13e..87fb3b4608 100644 --- a/src/cpp/src/image_generation/models/autoencoder_kl.cpp +++ b/src/cpp/src/image_generation/models/autoencoder_kl.cpp @@ -22,6 +22,8 @@ namespace ov { namespace genai { +namespace { + class DiagonalGaussianDistribution { public: explicit DiagonalGaussianDistribution(ov::Tensor parameters) @@ -64,6 +66,29 @@ class DiagonalGaussianDistribution { ov::Tensor m_mean, m_std; }; +// for BW compatibility with 2024.6.0 +ov::AnyMap handle_scale_factor(std::shared_ptr model, const std::string& device, ov::AnyMap properties) { + std::cout << ov::Any(properties).as() << std::endl; + + auto it = properties.find("WA_INFERENCE_PRECISION_HINT"); + ov::element::Type wa_inference_precision = it != properties.end() ? it->second.as() : ov::element::undefined; + if (it != properties.end()) { + properties.erase(it); + } + + const std::vector activation_scale_factor_path = { "runtime_options", ov::hint::activations_scale_factor.name() }; + const bool activation_scale_factor_defined = model->has_rt_info(activation_scale_factor_path); + + // convert WA inference precision to actual inference precision if activation_scale_factor is not defined in IR + if (device.find("GPU") != std::string::npos && !activation_scale_factor_defined && wa_inference_precision != ov::element::undefined) { + properties[ov::hint::inference_precision.name()] = wa_inference_precision; + } + + return properties; +} + +} // namespace + size_t get_vae_scale_factor(const std::filesystem::path& vae_config_path) { std::ifstream file(vae_config_path); OPENVINO_ASSERT(file.is_open(), "Failed to open ", vae_config_path); @@ -207,14 +232,14 @@ AutoencoderKL& AutoencoderKL::compile(const std::string& device, const ov::AnyMa ov::Core core = utils::singleton_core(); if (m_encoder_model) { - ov::CompiledModel encoder_compiled_model = core.compile_model(m_encoder_model, device, properties); + ov::CompiledModel encoder_compiled_model = core.compile_model(m_encoder_model, device, handle_scale_factor(m_encoder_model, device, properties)); ov::genai::utils::print_compiled_model_properties(encoder_compiled_model, "Auto encoder KL encoder model"); m_encoder_request = encoder_compiled_model.create_infer_request(); // release the original model m_encoder_model.reset(); } - ov::CompiledModel decoder_compiled_model = core.compile_model(m_decoder_model, device, properties); + ov::CompiledModel decoder_compiled_model = core.compile_model(m_decoder_model, device, handle_scale_factor(m_decoder_model, device, properties)); ov::genai::utils::print_compiled_model_properties(decoder_compiled_model, "Auto encoder KL decoder model"); m_decoder_request = decoder_compiled_model.create_infer_request(); // release the original model diff --git a/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp index ca7b1e9ca5..5355318340 100644 --- a/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp +++ b/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp @@ -137,25 +137,17 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { set_scheduler(Scheduler::from_config(root_dir / "scheduler/scheduler_config.json")); - // Temporary fix for GPU - ov::AnyMap updated_properties = properties; - if (device.find("GPU") != std::string::npos && - updated_properties.find("INFERENCE_PRECISION_HINT") == updated_properties.end()) { - updated_properties["INFERENCE_PRECISION_HINT"] = ov::element::f32; - } - const std::string text_encoder = data["text_encoder"][1].get(); if (text_encoder == "CLIPTextModelWithProjection") { m_clip_text_encoder_1 = - std::make_shared(root_dir / "text_encoder", device, updated_properties); + std::make_shared(root_dir / "text_encoder", device, properties); } else { OPENVINO_THROW("Unsupported '", text_encoder, "' text encoder type"); } const std::string text_encoder_2 = data["text_encoder_2"][1].get(); if (text_encoder_2 == "CLIPTextModelWithProjection") { - m_clip_text_encoder_2 = - std::make_shared(root_dir / "text_encoder_2", device, updated_properties); + m_clip_text_encoder_2 = std::make_shared(root_dir / "text_encoder_2", device, properties); } else { OPENVINO_THROW("Unsupported '", text_encoder_2, "' text encoder type"); } @@ -164,7 +156,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { if (!text_encoder_3_json.is_null()) { const std::string text_encoder_3 = text_encoder_3_json.get(); if (text_encoder_3 == "T5EncoderModel") { - m_t5_text_encoder = std::make_shared(root_dir / "text_encoder_3", device, updated_properties); + m_t5_text_encoder = std::make_shared(root_dir / "text_encoder_3", device, properties); } else { OPENVINO_THROW("Unsupported '", text_encoder_3, "' text encoder type"); } @@ -180,9 +172,9 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { const std::string vae = data["vae"][1].get(); if (vae == "AutoencoderKL") { if (m_pipeline_type == PipelineType::TEXT_2_IMAGE) - m_vae = std::make_shared(root_dir / "vae_decoder", device, updated_properties); + m_vae = std::make_shared(root_dir / "vae_decoder", device, properties); else if (m_pipeline_type == PipelineType::IMAGE_2_IMAGE || m_pipeline_type == PipelineType::INPAINTING) { - m_vae = std::make_shared(root_dir / "vae_encoder", root_dir / "vae_decoder", device, updated_properties); + m_vae = std::make_shared(root_dir / "vae_encoder", root_dir / "vae_decoder", device, properties); } else { OPENVINO_ASSERT("Unsupported pipeline type"); } diff --git a/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp index c3ebcdf1f4..e8ec190f0a 100644 --- a/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp +++ b/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp @@ -77,7 +77,7 @@ class StableDiffusionXLPipeline : public StableDiffusionPipeline { ov::AnyMap updated_properties = properties; if (device.find("GPU") != std::string::npos && updated_properties.find("INFERENCE_PRECISION_HINT") == updated_properties.end()) { - updated_properties["INFERENCE_PRECISION_HINT"] = ov::element::f32; + updated_properties["WA_INFERENCE_PRECISION_HINT"] = ov::element::f32; } const std::string vae = data["vae"][1].get();