Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Image generation: rely on activation_scale_factor for GPU #1548

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 27 additions & 2 deletions src/cpp/src/image_generation/models/autoencoder_kl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
namespace ov {
namespace genai {

namespace {

class DiagonalGaussianDistribution {
public:
explicit DiagonalGaussianDistribution(ov::Tensor parameters)
Expand Down Expand Up @@ -64,6 +66,29 @@ class DiagonalGaussianDistribution {
ov::Tensor m_mean, m_std;
};

// for BW compatibility with 2024.6.0
ov::AnyMap handle_scale_factor(std::shared_ptr<ov::Model> model, const std::string& device, ov::AnyMap properties) {
std::cout << ov::Any(properties).as<std::string>() << std::endl;

auto it = properties.find("WA_INFERENCE_PRECISION_HINT");
ov::element::Type wa_inference_precision = it != properties.end() ? it->second.as<ov::element::Type>() : ov::element::undefined;
if (it != properties.end()) {
properties.erase(it);
}

const std::vector<std::string> activation_scale_factor_path = { "runtime_options", ov::hint::activations_scale_factor.name() };
const bool activation_scale_factor_defined = model->has_rt_info(activation_scale_factor_path);

// convert WA inference precision to actual inference precision if activation_scale_factor is not defined in IR
if (device.find("GPU") != std::string::npos && !activation_scale_factor_defined && wa_inference_precision != ov::element::undefined) {
properties[ov::hint::inference_precision.name()] = wa_inference_precision;
}

return properties;
}

} // namespace

size_t get_vae_scale_factor(const std::filesystem::path& vae_config_path) {
std::ifstream file(vae_config_path);
OPENVINO_ASSERT(file.is_open(), "Failed to open ", vae_config_path);
Expand Down Expand Up @@ -207,14 +232,14 @@ AutoencoderKL& AutoencoderKL::compile(const std::string& device, const ov::AnyMa
ov::Core core = utils::singleton_core();

if (m_encoder_model) {
ov::CompiledModel encoder_compiled_model = core.compile_model(m_encoder_model, device, properties);
ov::CompiledModel encoder_compiled_model = core.compile_model(m_encoder_model, device, handle_scale_factor(m_encoder_model, device, properties));
ov::genai::utils::print_compiled_model_properties(encoder_compiled_model, "Auto encoder KL encoder model");
m_encoder_request = encoder_compiled_model.create_infer_request();
// release the original model
m_encoder_model.reset();
}

ov::CompiledModel decoder_compiled_model = core.compile_model(m_decoder_model, device, properties);
ov::CompiledModel decoder_compiled_model = core.compile_model(m_decoder_model, device, handle_scale_factor(m_decoder_model, device, properties));
ov::genai::utils::print_compiled_model_properties(decoder_compiled_model, "Auto encoder KL decoder model");
m_decoder_request = decoder_compiled_model.create_infer_request();
// release the original model
Expand Down
18 changes: 5 additions & 13 deletions src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,25 +137,17 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {

set_scheduler(Scheduler::from_config(root_dir / "scheduler/scheduler_config.json"));

// Temporary fix for GPU
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since this WA is not released yet (added to master after 2024.6), we can safely drop it and rely on official solution

ov::AnyMap updated_properties = properties;
if (device.find("GPU") != std::string::npos &&
updated_properties.find("INFERENCE_PRECISION_HINT") == updated_properties.end()) {
updated_properties["INFERENCE_PRECISION_HINT"] = ov::element::f32;
}

const std::string text_encoder = data["text_encoder"][1].get<std::string>();
if (text_encoder == "CLIPTextModelWithProjection") {
m_clip_text_encoder_1 =
std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder", device, updated_properties);
std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder", device, properties);
} else {
OPENVINO_THROW("Unsupported '", text_encoder, "' text encoder type");
}

const std::string text_encoder_2 = data["text_encoder_2"][1].get<std::string>();
if (text_encoder_2 == "CLIPTextModelWithProjection") {
m_clip_text_encoder_2 =
std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder_2", device, updated_properties);
m_clip_text_encoder_2 = std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder_2", device, properties);
} else {
OPENVINO_THROW("Unsupported '", text_encoder_2, "' text encoder type");
}
Expand All @@ -164,7 +156,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {
if (!text_encoder_3_json.is_null()) {
const std::string text_encoder_3 = text_encoder_3_json.get<std::string>();
if (text_encoder_3 == "T5EncoderModel") {
m_t5_text_encoder = std::make_shared<T5EncoderModel>(root_dir / "text_encoder_3", device, updated_properties);
m_t5_text_encoder = std::make_shared<T5EncoderModel>(root_dir / "text_encoder_3", device, properties);
} else {
OPENVINO_THROW("Unsupported '", text_encoder_3, "' text encoder type");
}
Expand All @@ -180,9 +172,9 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {
const std::string vae = data["vae"][1].get<std::string>();
if (vae == "AutoencoderKL") {
if (m_pipeline_type == PipelineType::TEXT_2_IMAGE)
m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_decoder", device, updated_properties);
m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_decoder", device, properties);
else if (m_pipeline_type == PipelineType::IMAGE_2_IMAGE || m_pipeline_type == PipelineType::INPAINTING) {
m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_encoder", root_dir / "vae_decoder", device, updated_properties);
m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_encoder", root_dir / "vae_decoder", device, properties);
} else {
OPENVINO_ASSERT("Unsupported pipeline type");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class StableDiffusionXLPipeline : public StableDiffusionPipeline {
ov::AnyMap updated_properties = properties;
if (device.find("GPU") != std::string::npos &&
updated_properties.find("INFERENCE_PRECISION_HINT") == updated_properties.end()) {
updated_properties["INFERENCE_PRECISION_HINT"] = ov::element::f32;
updated_properties["WA_INFERENCE_PRECISION_HINT"] = ov::element::f32;
}

const std::string vae = data["vae"][1].get<std::string>();
Expand Down
Loading