diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 3ab2bbd550..22a3ca884e 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -46,7 +46,6 @@ _torch_version, _transformers_version, compare_versions, - is_diffusers_version, is_openvino_tokenizers_version, is_openvino_version, is_tokenizers_version, @@ -104,10 +103,10 @@ def _set_runtime_options( ): for model_name in models_and_export_configs.keys(): _, sub_export_config = models_and_export_configs[model_name] - sub_export_config.runtime_options = {} + if not hasattr(sub_export_config, "runtime_options"): + sub_export_config.runtime_options = {} if ( - "diffusers" in library_name - or "text-generation" in task + "text-generation" in task or ("image-text-to-text" in task and model_name == "language_model") or getattr(sub_export_config, "stateful", False) ): @@ -1014,45 +1013,29 @@ def _get_submodels_and_export_configs( def get_diffusion_models_for_export_ext( pipeline: "DiffusionPipeline", int_dtype: str = "int64", float_dtype: str = "fp32", exporter: str = "openvino" ): - if is_diffusers_version(">=", "0.29.0"): - from diffusers import StableDiffusion3Img2ImgPipeline, StableDiffusion3Pipeline - - sd3_pipes = [StableDiffusion3Pipeline, StableDiffusion3Img2ImgPipeline] - if is_diffusers_version(">=", "0.30.0"): - from diffusers import StableDiffusion3InpaintPipeline - - sd3_pipes.append(StableDiffusion3InpaintPipeline) - - is_sd3 = isinstance(pipeline, tuple(sd3_pipes)) - else: - is_sd3 = False - - if is_diffusers_version(">=", "0.30.0"): - from diffusers import FluxPipeline - - flux_pipes = [FluxPipeline] - - if is_diffusers_version(">=", "0.31.0"): - from diffusers import FluxImg2ImgPipeline, FluxInpaintPipeline - - flux_pipes.extend([FluxPipeline, FluxImg2ImgPipeline, FluxInpaintPipeline]) - - if is_diffusers_version(">=", "0.32.0"): - from diffusers import FluxFillPipeline - - flux_pipes.append(FluxFillPipeline) - - is_flux = isinstance(pipeline, tuple(flux_pipes)) - else: - is_flux = False - - if not is_sd3 and not is_flux: - return None, get_diffusion_models_for_export(pipeline, int_dtype, float_dtype, exporter) - if is_sd3: + is_sdxl = pipeline.__class__.__name__.startswith("StableDiffusionXL") + is_sd3 = pipeline.__class__.__name__.startswith("StableDiffusion3") + is_flux = pipeline.__class__.__name__.startswith("Flux") + is_sd = pipeline.__class__.__name__.startswith("StableDiffusion") and not is_sd3 + is_lcm = pipeline.__class__.__name__.startswith("LatentConsistencyModel") + + if is_sd or is_sdxl or is_lcm: + models_for_export = get_diffusion_models_for_export(pipeline, int_dtype, float_dtype, exporter) + if is_sdxl and pipeline.vae.config.force_upcast: + models_for_export["vae_encoder"][1].runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "128.0"} + models_for_export["vae_decoder"][1].runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "128.0"} + + # only SD 2.1 has overflow issue, it uses different prediction_type than other models + if is_sd and pipeline.scheduler.config.prediction_type == "v_prediction": + models_for_export["vae_encoder"][1].runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} + models_for_export["vae_decoder"][1].runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} + + elif is_sd3: models_for_export = get_sd3_models_for_export(pipeline, exporter, int_dtype, float_dtype) - else: + elif is_flux: models_for_export = get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype) - + else: + raise ValueError(f"Unsupported pipeline type `{pipeline.__class__.__name__}` provided") return None, models_for_export @@ -1150,6 +1133,7 @@ def get_sd3_models_for_export(pipeline, exporter, int_dtype, float_dtype): int_dtype=int_dtype, float_dtype=float_dtype, ) + export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} models_for_export["text_encoder_3"] = (text_encoder_3, export_config) return models_for_export @@ -1187,6 +1171,7 @@ def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype): transformer_export_config = export_config_constructor( pipeline.transformer.config, int_dtype=int_dtype, float_dtype=float_dtype ) + transformer_export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} models_for_export["transformer"] = (transformer, transformer_export_config) # VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565 @@ -1202,6 +1187,7 @@ def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype): vae_encoder_export_config = vae_config_constructor( vae_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype ) + vae_encoder_export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} models_for_export["vae_encoder"] = (vae_encoder, vae_encoder_export_config) # VAE Decoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L600 @@ -1217,6 +1203,7 @@ def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype): vae_decoder_export_config = vae_config_constructor( vae_decoder.config, int_dtype=int_dtype, float_dtype=float_dtype ) + vae_decoder_export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} models_for_export["vae_decoder"] = (vae_decoder, vae_decoder_export_config) text_encoder_2 = getattr(pipeline, "text_encoder_2", None) @@ -1233,6 +1220,7 @@ def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype): int_dtype=int_dtype, float_dtype=float_dtype, ) + export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"} models_for_export["text_encoder_2"] = (text_encoder_2, export_config) return models_for_export diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index c059165417..bc2f75e0ce 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -63,7 +63,7 @@ ) from ...exporters.openvino import main_export -from ..utils.import_utils import is_diffusers_version +from ..utils.import_utils import is_diffusers_version, is_openvino_version from .configuration import OVConfig, OVQuantizationMethod, OVWeightQuantizationConfig from .loaders import OVTextualInversionLoaderMixin from .modeling_base import OVBaseModel @@ -73,6 +73,7 @@ OV_XML_FILE_NAME, TemporaryDirectory, _print_compiled_model_properties, + check_scale_available, model_has_dynamic_inputs, np_to_pt_generators, ) @@ -484,8 +485,15 @@ def _from_pretrained( ov_config = kwargs.get("ov_config", {}) device = kwargs.get("device", "CPU") vae_ov_conifg = {**ov_config} - if "GPU" in device.upper() and "INFERENCE_PRECISION_HINT" not in vae_ov_conifg: - vae_ov_conifg["INFERENCE_PRECISION_HINT"] = "f32" + if ( + "GPU" in device.upper() + and "INFERENCE_PRECISION_HINT" not in vae_ov_conifg + and is_openvino_version("<=", "2025.0") + ): + vae_model_path = models["vae_decoder"] + required_upcast = check_scale_available(vae_model_path) + if required_upcast: + vae_ov_conifg["INFERENCE_PRECISION_HINT"] = "f32" for name, path in models.items(): if name in kwargs: models[name] = kwargs.pop(name) @@ -1202,7 +1210,12 @@ def forward( return ModelOutput(**model_outputs) def _compile(self): - if "GPU" in self._device and "INFERENCE_PRECISION_HINT" not in self.ov_config: + if ( + "GPU" in self._device + and "INFERENCE_PRECISION_HINT" not in self.ov_config + and is_openvino_version("<", "2025.0") + and check_scale_available(self.model) + ): self.ov_config.update({"INFERENCE_PRECISION_HINT": "f32"}) super()._compile() @@ -1241,7 +1254,12 @@ def forward( return ModelOutput(**model_outputs) def _compile(self): - if "GPU" in self._device and "INFERENCE_PRECISION_HINT" not in self.ov_config: + if ( + "GPU" in self._device + and "INFERENCE_PRECISION_HINT" not in self.ov_config + and is_openvino_version("<", "2025.0") + and check_scale_available(self.model) + ): self.ov_config.update({"INFERENCE_PRECISION_HINT": "f32"}) super()._compile() diff --git a/optimum/intel/openvino/utils.py b/optimum/intel/openvino/utils.py index a1950e9268..fbb108c7d8 100644 --- a/optimum/intel/openvino/utils.py +++ b/optimum/intel/openvino/utils.py @@ -565,3 +565,21 @@ def onexc(func, path, exc): def cleanup(self): if self._finalizer.detach() or os.path.exists(self.name): self._rmtree(self.name, ignore_errors=self._ignore_cleanup_errors) + + +def check_scale_available(model: Union[Model, str, Path]): + if isinstance(model, Model): + return model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"]) + if not Path(model).exists(): + return False + import xml.etree.ElementTree as ET + + tree = ET.parse(model) + root = tree.getroot() + rt_info = root.find("rt_info") + if rt_info is None: + return False + runtime_options = rt_info.find("runtime_options") + if runtime_options is None: + return False + return runtime_options.find("ACTIVATIONS_SCALE_FACTOR") is not None diff --git a/tests/openvino/test_export.py b/tests/openvino/test_export.py index 55500471ae..8c5cfc9c80 100644 --- a/tests/openvino/test_export.py +++ b/tests/openvino/test_export.py @@ -75,6 +75,13 @@ class ExportModelTest(unittest.TestCase): "llava": OVModelForVisualCausalLM, } + EXPECTED_DIFFUSERS_SCALE_FACTORS = { + "stable-diffusion-xl": {"vae_encoder": "128.0", "vae_decoder": "128.0"}, + "stable-diffusion-3": {"text_encoder_3": "8.0"}, + "flux": {"text_encoder_2": "8.0", "transformer": "8.0", "vae_encoder": "8.0", "vae_decoder": "8.0"}, + "stable-diffusion-xl-refiner": {"vae_encoder": "128.0", "vae_decoder": "128.0"}, + } + if is_transformers_version(">=", "4.45"): SUPPORTED_ARCHITECTURES.update({"stable-diffusion-3": OVStableDiffusion3Pipeline, "flux": OVFluxPipeline}) @@ -143,32 +150,33 @@ def _openvino_export( ) if library_name == "diffusers": - self.assertTrue( - ov_model.vae_encoder.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"]) - ) - self.assertTrue( - ov_model.vae_decoder.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"]) - ) - if hasattr(ov_model, "text_encoder") and ov_model.text_encoder: - self.assertTrue( - ov_model.text_encoder.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"]) - ) - if hasattr(ov_model, "text_encoder_2") and ov_model.text_encoder_2: - self.assertTrue( - ov_model.text_encoder_2.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"]) - ) - if hasattr(ov_model, "text_encoder_3") and ov_model.text_encoder_3: - self.assertTrue( - ov_model.text_encoder_3.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"]) - ) - if hasattr(ov_model, "unet") and ov_model.unet: - self.assertTrue( - ov_model.unet.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"]) - ) - if hasattr(ov_model, "transformer") and ov_model.transformer: - self.assertTrue( - ov_model.transformer.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"]) - ) + expected_scale_factors = self.EXPECTED_DIFFUSERS_SCALE_FACTORS.get(model_type, {}) + components = [ + "unet", + "transformer", + "text_encoder", + "text_encoder_2", + "text_encoder_3", + "vae_encoder", + "vae_decoder", + ] + for component in components: + component_model = getattr(ov_model, component, None) + if component_model is None: + continue + component_scale = expected_scale_factors.get(component) + if component_scale is not None: + self.assertTrue( + component_model.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"]) + ) + self.assertEqual( + component_model.model.get_rt_info()["runtime_options"]["ACTIVATIONS_SCALE_FACTOR"], + component_scale, + ) + else: + self.assertFalse( + component_model.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"]) + ) @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_export(self, model_type: str):