From d579d73a8ee38d474b1b09f54fdc5f400c3dcf58 Mon Sep 17 00:00:00 2001 From: Deependu Jha Date: Thu, 9 Jan 2025 22:56:23 +0530 Subject: [PATCH 1/9] add stability-ai image generation in workflow --- inference/core/workflows/core_steps/loader.py | 4 + .../stability_ai/image_gen/__init__.py | 0 .../foundation/stability_ai/image_gen/v1.py | 183 ++++++++++++++++++ 3 files changed, 187 insertions(+) create mode 100644 inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/__init__.py create mode 100644 inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py diff --git a/inference/core/workflows/core_steps/loader.py b/inference/core/workflows/core_steps/loader.py index 2b7d90964..7f4fb963e 100644 --- a/inference/core/workflows/core_steps/loader.py +++ b/inference/core/workflows/core_steps/loader.py @@ -192,6 +192,9 @@ from inference.core.workflows.core_steps.models.foundation.segment_anything2.v1 import ( SegmentAnything2BlockV1, ) +from inference.core.workflows.core_steps.models.foundation.stability_ai.image_gen.v1 import ( + StabilityAIImageGenBlockV1, +) from inference.core.workflows.core_steps.models.foundation.stability_ai.inpainting.v1 import ( StabilityAIInpaintingBlockV1, ) @@ -572,6 +575,7 @@ def load_blocks() -> List[Type[WorkflowBlock]]: SIFTComparisonBlockV2, SegmentAnything2BlockV1, StabilityAIInpaintingBlockV1, + StabilityAIImageGenBlockV1, StabilizeTrackedDetectionsBlockV1, StitchImagesBlockV1, StitchOCRDetectionsBlockV1, diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/__init__.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py new file mode 100644 index 000000000..9c4d9c862 --- /dev/null +++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py @@ -0,0 +1,183 @@ +""" +Credits to: https://github.com/Fafruch for origin idea +""" + +from typing import List, Literal, Optional, Type, Union + +import cv2 +import numpy as np +import requests +import supervision as sv +from pydantic import ConfigDict, Field +from supervision import Color + +from inference.core.workflows.execution_engine.entities.base import ( + OutputDefinition, + WorkflowImageData, +) +from inference.core.workflows.execution_engine.entities.types import ( + IMAGE_KIND, + INSTANCE_SEGMENTATION_PREDICTION_KIND, + SECRET_KIND, + STRING_KIND, + Selector, +) +from inference.core.workflows.prototypes.block import ( + BlockResult, + WorkflowBlock, + WorkflowBlockManifest, +) + +LONG_DESCRIPTION = """ +The block wraps +[Stability AI image generation API](https://platform.stability.ai/docs/api-reference#tag/Generate) and +let users generate new images from text, or create variations of existing images. +""" + +SHORT_DESCRIPTION = ( + "generate new images from text, or create variations of existing images." +) + +API_HOST = "https://api.stability.ai" +ENDPOINT = { + "ultra": "/v2beta/stable-image/generate/ultra", + "core": "/v2beta/stable-image/generate/core", + "sd3": "/v2beta/stable-image/generate/sd3", +} + + +class BlockManifest(WorkflowBlockManifest): + model_config = ConfigDict( + json_schema_extra={ + "name": "Stability AI Image Generation", + "version": "v1", + "short_description": SHORT_DESCRIPTION, + "long_description": LONG_DESCRIPTION, + "license": "Apache-2.0", + "block_type": "model", + "search_keywords": [ + "Stability AI", + "stability.ai", + "image variation", + "image generation", + ], + "ui_manifest": { + "section": "model", + "icon": "far fa-palette", + }, + } + ) + type: Literal["roboflow_core/stability_ai_image_gen@v1"] + image: Selector(kind=[IMAGE_KIND]) = Field( + description="The image which was the base to generate VLM prediction", + examples=["$inputs.image"], + default=None, + ) + prompt: Union[ + Selector(kind=[STRING_KIND]), + Selector(kind=[STRING_KIND]), + str, + ] = Field( + description="Prompt to generate new images from text (what you wish to see)", + examples=["my prompt", "$inputs.prompt"], + ) + negative_prompt: Optional[ + Union[ + Selector(kind=[STRING_KIND]), + Selector(kind=[STRING_KIND]), + str, + ] + ] = Field( + default=None, + description="Negative prompt to image generation model (what you do not wish to see)", + examples=["my prompt", "$inputs.prompt"], + ) + model: Optional[ + Union[ + Selector(kind=[STRING_KIND]), + Selector(kind=[STRING_KIND]), + str, + ] + ] = Field( + default="core", + description="choose one of {'core', 'ultra', 'sd3'}. Default 'core' ", + examples=["my prompt", "$inputs.prompt"], + ) + api_key: Union[Selector(kind=[STRING_KIND, SECRET_KIND]), str] = Field( + description="Your Stability AI API key", + examples=["xxx-xxx", "$inputs.stability_ai_api_key"], + private=True, + ) + + @classmethod + def describe_outputs(cls) -> List[OutputDefinition]: + return [ + OutputDefinition(name="image", kind=[IMAGE_KIND]), + ] + + @classmethod + def get_execution_engine_compatibility(cls) -> Optional[str]: + return ">=1.4.0,<2.0.0" + + +class StabilityAIImageGenBlockV1(WorkflowBlock): + @classmethod + def get_manifest(cls) -> Type[WorkflowBlockManifest]: + return BlockManifest + + def run( + self, + image: WorkflowImageData, + prompt: str, + negative_prompt: str, + model: str, + api_key: str, + ) -> BlockResult: + files_to_send = {"none": ""} + if image is not None: + encoded_image = numpy_array_to_jpeg_bytes(image=image.numpy_image) + files_to_send = { + "image": encoded_image, + } + request_data = { + "prompt": prompt, + "output_format": "jpeg", + } + if negative_prompt is not None: + request_data["negative_prompt"] = negative_prompt + if model not in ENDPOINT.keys(): + model = "core" + response = requests.post( + f"{API_HOST}{ENDPOINT[model]}", + headers={"authorization": f"Bearer {api_key}", "accept": "image/*"}, + files=files_to_send, + data=request_data, + ) + if response.status_code != 200: + raise RuntimeError( + f"Request to StabilityAI API failed: {str(response.json())}" + ) + result_image = bytes_to_opencv_image(payload=response.content) + return { + "image": WorkflowImageData.copy_and_replace( + origin_image_data=image, + numpy_image=result_image, + ), + } + + +def numpy_array_to_jpeg_bytes( + image: np.ndarray, +) -> bytes: + _, img_encoded = cv2.imencode(".jpg", image) + return np.array(img_encoded).tobytes() + + +def bytes_to_opencv_image( + payload: bytes, array_type: np.number = np.uint8 +) -> np.ndarray: + bytes_array = np.frombuffer(payload, dtype=array_type) + decoding_result = cv2.imdecode(bytes_array, cv2.IMREAD_UNCHANGED) + if decoding_result is None: + raise ValueError("Could not encode bytes to OpenCV image.") + return decoding_result From 8654a06f88d823762863d68cad06f9ad42046d7b Mon Sep 17 00:00:00 2001 From: Deependu Jha Date: Thu, 9 Jan 2025 23:14:52 +0530 Subject: [PATCH 2/9] update --- .../core_steps/models/foundation/stability_ai/image_gen/v1.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py index 9c4d9c862..8309c856d 100644 --- a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py @@ -1,7 +1,3 @@ -""" -Credits to: https://github.com/Fafruch for origin idea -""" - from typing import List, Literal, Optional, Type, Union import cv2 From 043e2837dbad2382ca0c84606d3263eca75eee4b Mon Sep 17 00:00:00 2001 From: Deependu Jha Date: Fri, 10 Jan 2025 00:02:36 +0530 Subject: [PATCH 3/9] remove unused imports --- .../core_steps/models/foundation/stability_ai/image_gen/v1.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py index 8309c856d..c744aa131 100644 --- a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py @@ -3,9 +3,7 @@ import cv2 import numpy as np import requests -import supervision as sv from pydantic import ConfigDict, Field -from supervision import Color from inference.core.workflows.execution_engine.entities.base import ( OutputDefinition, @@ -13,7 +11,6 @@ ) from inference.core.workflows.execution_engine.entities.types import ( IMAGE_KIND, - INSTANCE_SEGMENTATION_PREDICTION_KIND, SECRET_KIND, STRING_KIND, Selector, From 5de88775dbbeb97cfca6167d919a1c863949f9fd Mon Sep 17 00:00:00 2001 From: Deependu Jha Date: Fri, 10 Jan 2025 01:13:06 +0530 Subject: [PATCH 4/9] update --- .../models/foundation/stability_ai/image_gen/v1.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py index c744aa131..4e2ba72bb 100644 --- a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py @@ -150,12 +150,9 @@ def run( raise RuntimeError( f"Request to StabilityAI API failed: {str(response.json())}" ) - result_image = bytes_to_opencv_image(payload=response.content) + result_image = bytes_to_numpy_image(payload=response.content) return { - "image": WorkflowImageData.copy_and_replace( - origin_image_data=image, - numpy_image=result_image, - ), + "image": result_image, } @@ -166,11 +163,8 @@ def numpy_array_to_jpeg_bytes( return np.array(img_encoded).tobytes() -def bytes_to_opencv_image( +def bytes_to_numpy_image( payload: bytes, array_type: np.number = np.uint8 ) -> np.ndarray: bytes_array = np.frombuffer(payload, dtype=array_type) - decoding_result = cv2.imdecode(bytes_array, cv2.IMREAD_UNCHANGED) - if decoding_result is None: - raise ValueError("Could not encode bytes to OpenCV image.") - return decoding_result + return bytes_array From 3f39c0232103fc2eab138f1f4b3775bfa707c287 Mon Sep 17 00:00:00 2001 From: Deependu Jha Date: Fri, 10 Jan 2025 13:01:14 +0530 Subject: [PATCH 5/9] working hurray... --- .../foundation/stability_ai/image_gen/v1.py | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py index 4e2ba72bb..868f56ff0 100644 --- a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py @@ -1,3 +1,5 @@ +import base64 +import uuid from typing import List, Literal, Optional, Type, Union import cv2 @@ -21,6 +23,10 @@ WorkflowBlockManifest, ) +from inference.core.workflows.execution_engine.entities.base import ( + ImageParentMetadata, +) + LONG_DESCRIPTION = """ The block wraps [Stability AI image generation API](https://platform.stability.ai/docs/api-reference#tag/Generate) and @@ -150,9 +156,10 @@ def run( raise RuntimeError( f"Request to StabilityAI API failed: {str(response.json())}" ) - result_image = bytes_to_numpy_image(payload=response.content) + new_image_base64 = base64.b64encode(response.content).decode("utf-8") + parent_metadata = ImageParentMetadata(parent_id=str(uuid.uuid1())) return { - "image": result_image, + "image": WorkflowImageData(parent_metadata, base64_image=new_image_base64), } @@ -168,3 +175,13 @@ def bytes_to_numpy_image( ) -> np.ndarray: bytes_array = np.frombuffer(payload, dtype=array_type) return bytes_array + + +def bytes_to_opencv_image( + payload: bytes, array_type: np.number = np.uint8 +) -> np.ndarray: + bytes_array = np.frombuffer(payload, dtype=array_type) + decoding_result = cv2.imdecode(bytes_array, cv2.IMREAD_UNCHANGED) + if decoding_result is None: + raise ValueError("Could not encode bytes to OpenCV image.") + return decoding_result From 49b38f527ad415e56a9deff965d887ebab58f063 Mon Sep 17 00:00:00 2001 From: Deependu Jha Date: Fri, 10 Jan 2025 13:02:27 +0530 Subject: [PATCH 6/9] remove unused functions --- .../foundation/stability_ai/image_gen/v1.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py index 868f56ff0..1e9d0c9fb 100644 --- a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py @@ -168,20 +168,3 @@ def numpy_array_to_jpeg_bytes( ) -> bytes: _, img_encoded = cv2.imencode(".jpg", image) return np.array(img_encoded).tobytes() - - -def bytes_to_numpy_image( - payload: bytes, array_type: np.number = np.uint8 -) -> np.ndarray: - bytes_array = np.frombuffer(payload, dtype=array_type) - return bytes_array - - -def bytes_to_opencv_image( - payload: bytes, array_type: np.number = np.uint8 -) -> np.ndarray: - bytes_array = np.frombuffer(payload, dtype=array_type) - decoding_result = cv2.imdecode(bytes_array, cv2.IMREAD_UNCHANGED) - if decoding_result is None: - raise ValueError("Could not encode bytes to OpenCV image.") - return decoding_result From 223a9aa935a5e438c87d1779ccd3da18013663eb Mon Sep 17 00:00:00 2001 From: Deependu Jha Date: Fri, 10 Jan 2025 13:36:26 +0530 Subject: [PATCH 7/9] update --- .../foundation/stability_ai/image_gen/v1.py | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py index 1e9d0c9fb..b491b7590 100644 --- a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py @@ -15,6 +15,7 @@ IMAGE_KIND, SECRET_KIND, STRING_KIND, + FLOAT_ZERO_TO_ONE_KIND, Selector, ) from inference.core.workflows.prototypes.block import ( @@ -68,10 +69,15 @@ class BlockManifest(WorkflowBlockManifest): ) type: Literal["roboflow_core/stability_ai_image_gen@v1"] image: Selector(kind=[IMAGE_KIND]) = Field( - description="The image which was the base to generate VLM prediction", + description="The image to use as the starting point for the generation.", examples=["$inputs.image"], default=None, ) + strength: Union[float, Selector(kind=[FLOAT_ZERO_TO_ONE_KIND])] = Field( + description="controls how much influence the image parameter has on the generated image. A value of 0 would yield an image that is identical to the input. A value of 1 would be as if you passed in no image at all.", + default=0.3, + examples=[0.7, "$inputs.strength"], + ) prompt: Union[ Selector(kind=[STRING_KIND]), Selector(kind=[STRING_KIND]), @@ -126,22 +132,25 @@ def get_manifest(cls) -> Type[WorkflowBlockManifest]: def run( self, - image: WorkflowImageData, prompt: str, negative_prompt: str, model: str, api_key: str, + image: WorkflowImageData, + strength: float = 0.3, ) -> BlockResult: + request_data = { + "prompt": prompt, + "output_format": "jpeg", + } files_to_send = {"none": ""} if image is not None: encoded_image = numpy_array_to_jpeg_bytes(image=image.numpy_image) files_to_send = { "image": encoded_image, } - request_data = { - "prompt": prompt, - "output_format": "jpeg", - } + request_data["strength"] = strength + if negative_prompt is not None: request_data["negative_prompt"] = negative_prompt if model not in ENDPOINT.keys(): From c0b8dc613fcc18687458090d4c96975af7f655a1 Mon Sep 17 00:00:00 2001 From: Deependu Jha Date: Fri, 10 Jan 2025 13:42:27 +0530 Subject: [PATCH 8/9] update --- .../models/foundation/stability_ai/image_gen/v1.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py index b491b7590..c83649e84 100644 --- a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py @@ -29,9 +29,7 @@ ) LONG_DESCRIPTION = """ -The block wraps -[Stability AI image generation API](https://platform.stability.ai/docs/api-reference#tag/Generate) and -let users generate new images from text, or create variations of existing images. +The block wraps [Stability AI image generation API](https://platform.stability.ai/docs/api-reference#tag/Generate) and let users generate new images from text, or create variations of existing images. """ SHORT_DESCRIPTION = ( @@ -76,7 +74,7 @@ class BlockManifest(WorkflowBlockManifest): strength: Union[float, Selector(kind=[FLOAT_ZERO_TO_ONE_KIND])] = Field( description="controls how much influence the image parameter has on the generated image. A value of 0 would yield an image that is identical to the input. A value of 1 would be as if you passed in no image at all.", default=0.3, - examples=[0.7, "$inputs.strength"], + examples=[0.3, "$inputs.strength"], ) prompt: Union[ Selector(kind=[STRING_KIND]), From 3c89cef8ed1c83620b342bc85bb8a09cfbb4a49b Mon Sep 17 00:00:00 2001 From: Deependu Jha Date: Fri, 10 Jan 2025 13:43:35 +0530 Subject: [PATCH 9/9] run: `make style` --- .../models/foundation/stability_ai/image_gen/v1.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py index c83649e84..f365dc1bc 100644 --- a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py @@ -8,14 +8,15 @@ from pydantic import ConfigDict, Field from inference.core.workflows.execution_engine.entities.base import ( + ImageParentMetadata, OutputDefinition, WorkflowImageData, ) from inference.core.workflows.execution_engine.entities.types import ( + FLOAT_ZERO_TO_ONE_KIND, IMAGE_KIND, SECRET_KIND, STRING_KIND, - FLOAT_ZERO_TO_ONE_KIND, Selector, ) from inference.core.workflows.prototypes.block import ( @@ -24,10 +25,6 @@ WorkflowBlockManifest, ) -from inference.core.workflows.execution_engine.entities.base import ( - ImageParentMetadata, -) - LONG_DESCRIPTION = """ The block wraps [Stability AI image generation API](https://platform.stability.ai/docs/api-reference#tag/Generate) and let users generate new images from text, or create variations of existing images. """