Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update setting activation scale for diffusers #1110

Merged
merged 4 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 29 additions & 41 deletions optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
_torch_version,
_transformers_version,
compare_versions,
is_diffusers_version,
is_openvino_tokenizers_version,
is_openvino_version,
is_tokenizers_version,
Expand Down Expand Up @@ -104,10 +103,10 @@ def _set_runtime_options(
):
for model_name in models_and_export_configs.keys():
_, sub_export_config = models_and_export_configs[model_name]
sub_export_config.runtime_options = {}
if not hasattr(sub_export_config, "runtime_options"):
sub_export_config.runtime_options = {}
if (
"diffusers" in library_name
or "text-generation" in task
"text-generation" in task
or ("image-text-to-text" in task and model_name == "language_model")
or getattr(sub_export_config, "stateful", False)
):
Expand Down Expand Up @@ -1014,45 +1013,29 @@ def _get_submodels_and_export_configs(
def get_diffusion_models_for_export_ext(
pipeline: "DiffusionPipeline", int_dtype: str = "int64", float_dtype: str = "fp32", exporter: str = "openvino"
):
if is_diffusers_version(">=", "0.29.0"):
from diffusers import StableDiffusion3Img2ImgPipeline, StableDiffusion3Pipeline

sd3_pipes = [StableDiffusion3Pipeline, StableDiffusion3Img2ImgPipeline]
if is_diffusers_version(">=", "0.30.0"):
from diffusers import StableDiffusion3InpaintPipeline

sd3_pipes.append(StableDiffusion3InpaintPipeline)

is_sd3 = isinstance(pipeline, tuple(sd3_pipes))
else:
is_sd3 = False

if is_diffusers_version(">=", "0.30.0"):
from diffusers import FluxPipeline

flux_pipes = [FluxPipeline]

if is_diffusers_version(">=", "0.31.0"):
from diffusers import FluxImg2ImgPipeline, FluxInpaintPipeline

flux_pipes.extend([FluxPipeline, FluxImg2ImgPipeline, FluxInpaintPipeline])

if is_diffusers_version(">=", "0.32.0"):
from diffusers import FluxFillPipeline

flux_pipes.append(FluxFillPipeline)

is_flux = isinstance(pipeline, tuple(flux_pipes))
else:
is_flux = False

if not is_sd3 and not is_flux:
return None, get_diffusion_models_for_export(pipeline, int_dtype, float_dtype, exporter)
if is_sd3:
is_sdxl = pipeline.__class__.__name__.startswith("StableDiffusionXL")
is_sd3 = pipeline.__class__.__name__.startswith("StableDiffusion3")
is_flux = pipeline.__class__.__name__.startswith("Flux")
is_sd = pipeline.__class__.__name__.startswith("StableDiffusion") and not is_sd3
is_lcm = pipeline.__class__.__name__.startswith("LatentConsistencyModel")

if is_sd or is_sdxl or is_lcm:
models_for_export = get_diffusion_models_for_export(pipeline, int_dtype, float_dtype, exporter)
if is_sdxl and pipeline.vae.config.force_upcast:
models_for_export["vae_encoder"][1].runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "128.0"}
models_for_export["vae_decoder"][1].runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "128.0"}

# only SD 2.1 has overflow issue, it uses different prediction_type than other models
if is_sd and pipeline.scheduler.config.prediction_type == "v_prediction":
eaidova marked this conversation as resolved.
Show resolved Hide resolved
models_for_export["vae_encoder"][1].runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"}
models_for_export["vae_decoder"][1].runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"}

elif is_sd3:
models_for_export = get_sd3_models_for_export(pipeline, exporter, int_dtype, float_dtype)
else:
elif is_flux:
models_for_export = get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype)

else:
raise ValueError(f"Unsupported pipeline type `{pipeline.__class__.__name__}` provided")
return None, models_for_export


Expand Down Expand Up @@ -1150,6 +1133,7 @@ def get_sd3_models_for_export(pipeline, exporter, int_dtype, float_dtype):
int_dtype=int_dtype,
float_dtype=float_dtype,
)
export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"}
models_for_export["text_encoder_3"] = (text_encoder_3, export_config)

return models_for_export
Expand Down Expand Up @@ -1187,6 +1171,7 @@ def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype):
transformer_export_config = export_config_constructor(
pipeline.transformer.config, int_dtype=int_dtype, float_dtype=float_dtype
)
transformer_export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"}
models_for_export["transformer"] = (transformer, transformer_export_config)

# VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565
Expand All @@ -1202,6 +1187,7 @@ def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype):
vae_encoder_export_config = vae_config_constructor(
vae_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype
)
vae_encoder_export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"}
models_for_export["vae_encoder"] = (vae_encoder, vae_encoder_export_config)

# VAE Decoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L600
Expand All @@ -1217,6 +1203,7 @@ def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype):
vae_decoder_export_config = vae_config_constructor(
vae_decoder.config, int_dtype=int_dtype, float_dtype=float_dtype
)
vae_decoder_export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"}
models_for_export["vae_decoder"] = (vae_decoder, vae_decoder_export_config)

text_encoder_2 = getattr(pipeline, "text_encoder_2", None)
Expand All @@ -1233,6 +1220,7 @@ def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype):
int_dtype=int_dtype,
float_dtype=float_dtype,
)
export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"}
models_for_export["text_encoder_2"] = (text_encoder_2, export_config)

return models_for_export
Expand Down
28 changes: 23 additions & 5 deletions optimum/intel/openvino/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
)

from ...exporters.openvino import main_export
from ..utils.import_utils import is_diffusers_version
from ..utils.import_utils import is_diffusers_version, is_openvino_version
from .configuration import OVConfig, OVQuantizationMethod, OVWeightQuantizationConfig
from .loaders import OVTextualInversionLoaderMixin
from .modeling_base import OVBaseModel
Expand All @@ -73,6 +73,7 @@
OV_XML_FILE_NAME,
TemporaryDirectory,
_print_compiled_model_properties,
check_scale_available,
model_has_dynamic_inputs,
np_to_pt_generators,
)
Expand Down Expand Up @@ -484,8 +485,15 @@ def _from_pretrained(
ov_config = kwargs.get("ov_config", {})
device = kwargs.get("device", "CPU")
vae_ov_conifg = {**ov_config}
if "GPU" in device.upper() and "INFERENCE_PRECISION_HINT" not in vae_ov_conifg:
vae_ov_conifg["INFERENCE_PRECISION_HINT"] = "f32"
if (
"GPU" in device.upper()
and "INFERENCE_PRECISION_HINT" not in vae_ov_conifg
and is_openvino_version("<=", "2025.0")
):
vae_model_path = models["vae_decoder"]
required_upcast = check_scale_available(vae_model_path)
if required_upcast:
vae_ov_conifg["INFERENCE_PRECISION_HINT"] = "f32"
for name, path in models.items():
if name in kwargs:
models[name] = kwargs.pop(name)
Expand Down Expand Up @@ -1202,7 +1210,12 @@ def forward(
return ModelOutput(**model_outputs)

def _compile(self):
if "GPU" in self._device and "INFERENCE_PRECISION_HINT" not in self.ov_config:
if (
"GPU" in self._device
and "INFERENCE_PRECISION_HINT" not in self.ov_config
and is_openvino_version("<", "2025.0")
and check_scale_available(self.model)
):
self.ov_config.update({"INFERENCE_PRECISION_HINT": "f32"})
super()._compile()

Expand Down Expand Up @@ -1241,7 +1254,12 @@ def forward(
return ModelOutput(**model_outputs)

def _compile(self):
if "GPU" in self._device and "INFERENCE_PRECISION_HINT" not in self.ov_config:
if (
"GPU" in self._device
and "INFERENCE_PRECISION_HINT" not in self.ov_config
and is_openvino_version("<", "2025.0")
and check_scale_available(self.model)
):
self.ov_config.update({"INFERENCE_PRECISION_HINT": "f32"})
super()._compile()

Expand Down
18 changes: 18 additions & 0 deletions optimum/intel/openvino/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,3 +565,21 @@ def onexc(func, path, exc):
def cleanup(self):
if self._finalizer.detach() or os.path.exists(self.name):
self._rmtree(self.name, ignore_errors=self._ignore_cleanup_errors)


def check_scale_available(model: Union[Model, str, Path]):
if isinstance(model, Model):
return model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
if not Path(model).exists():
return False
import xml.etree.ElementTree as ET

tree = ET.parse(model)
root = tree.getroot()
rt_info = root.find("rt_info")
if rt_info is None:
return False
runtime_options = rt_info.find("runtime_options")
if runtime_options is None:
return False
return runtime_options.find("ACTIVATIONS_SCALE_FACTOR") is not None
60 changes: 34 additions & 26 deletions tests/openvino/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,13 @@ class ExportModelTest(unittest.TestCase):
"llava": OVModelForVisualCausalLM,
}

EXPECTED_DIFFUSERS_SCALE_FACTORS = {
"stable-diffusion-xl": {"vae_encoder": "128.0", "vae_decoder": "128.0"},
"stable-diffusion-3": {"text_encoder_3": "8.0"},
"flux": {"text_encoder_2": "8.0", "transformer": "8.0", "vae_encoder": "8.0", "vae_decoder": "8.0"},
"stable-diffusion-xl-refiner": {"vae_encoder": "128.0", "vae_decoder": "128.0"},
}

if is_transformers_version(">=", "4.45"):
SUPPORTED_ARCHITECTURES.update({"stable-diffusion-3": OVStableDiffusion3Pipeline, "flux": OVFluxPipeline})

Expand Down Expand Up @@ -143,32 +150,33 @@ def _openvino_export(
)

if library_name == "diffusers":
self.assertTrue(
ov_model.vae_encoder.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
)
self.assertTrue(
ov_model.vae_decoder.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
)
if hasattr(ov_model, "text_encoder") and ov_model.text_encoder:
self.assertTrue(
ov_model.text_encoder.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
)
if hasattr(ov_model, "text_encoder_2") and ov_model.text_encoder_2:
self.assertTrue(
ov_model.text_encoder_2.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
)
if hasattr(ov_model, "text_encoder_3") and ov_model.text_encoder_3:
self.assertTrue(
ov_model.text_encoder_3.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
)
if hasattr(ov_model, "unet") and ov_model.unet:
self.assertTrue(
ov_model.unet.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
)
if hasattr(ov_model, "transformer") and ov_model.transformer:
self.assertTrue(
ov_model.transformer.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
)
expected_scale_factors = self.EXPECTED_DIFFUSERS_SCALE_FACTORS.get(model_type, {})
components = [
"unet",
"transformer",
"text_encoder",
"text_encoder_2",
"text_encoder_3",
"vae_encoder",
"vae_decoder",
]
for component in components:
component_model = getattr(ov_model, component, None)
if component_model is None:
continue
component_scale = expected_scale_factors.get(component)
if component_scale is not None:
self.assertTrue(
component_model.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
)
self.assertEqual(
component_model.model.get_rt_info()["runtime_options"]["ACTIVATIONS_SCALE_FACTOR"],
component_scale,
)
else:
self.assertFalse(
component_model.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
)

@parameterized.expand(SUPPORTED_ARCHITECTURES)
def test_export(self, model_type: str):
Expand Down