From d579d73a8ee38d474b1b09f54fdc5f400c3dcf58 Mon Sep 17 00:00:00 2001
From: Deependu Jha <deependujha21@gmail.com>
Date: Thu, 9 Jan 2025 22:56:23 +0530
Subject: [PATCH 1/9] add stability-ai image generation in workflow

---
 inference/core/workflows/core_steps/loader.py |   4 +
 .../stability_ai/image_gen/__init__.py        |   0
 .../foundation/stability_ai/image_gen/v1.py   | 183 ++++++++++++++++++
 3 files changed, 187 insertions(+)
 create mode 100644 inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/__init__.py
 create mode 100644 inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py

diff --git a/inference/core/workflows/core_steps/loader.py b/inference/core/workflows/core_steps/loader.py
index 2b7d90964..7f4fb963e 100644
--- a/inference/core/workflows/core_steps/loader.py
+++ b/inference/core/workflows/core_steps/loader.py
@@ -192,6 +192,9 @@
 from inference.core.workflows.core_steps.models.foundation.segment_anything2.v1 import (
     SegmentAnything2BlockV1,
 )
+from inference.core.workflows.core_steps.models.foundation.stability_ai.image_gen.v1 import (
+    StabilityAIImageGenBlockV1,
+)
 from inference.core.workflows.core_steps.models.foundation.stability_ai.inpainting.v1 import (
     StabilityAIInpaintingBlockV1,
 )
@@ -572,6 +575,7 @@ def load_blocks() -> List[Type[WorkflowBlock]]:
         SIFTComparisonBlockV2,
         SegmentAnything2BlockV1,
         StabilityAIInpaintingBlockV1,
+        StabilityAIImageGenBlockV1,
         StabilizeTrackedDetectionsBlockV1,
         StitchImagesBlockV1,
         StitchOCRDetectionsBlockV1,
diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/__init__.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
new file mode 100644
index 000000000..9c4d9c862
--- /dev/null
+++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
@@ -0,0 +1,183 @@
+"""
+Credits to: https://github.com/Fafruch for origin idea
+"""
+
+from typing import List, Literal, Optional, Type, Union
+
+import cv2
+import numpy as np
+import requests
+import supervision as sv
+from pydantic import ConfigDict, Field
+from supervision import Color
+
+from inference.core.workflows.execution_engine.entities.base import (
+    OutputDefinition,
+    WorkflowImageData,
+)
+from inference.core.workflows.execution_engine.entities.types import (
+    IMAGE_KIND,
+    INSTANCE_SEGMENTATION_PREDICTION_KIND,
+    SECRET_KIND,
+    STRING_KIND,
+    Selector,
+)
+from inference.core.workflows.prototypes.block import (
+    BlockResult,
+    WorkflowBlock,
+    WorkflowBlockManifest,
+)
+
+LONG_DESCRIPTION = """
+The block wraps 
+[Stability AI image generation API](https://platform.stability.ai/docs/api-reference#tag/Generate) and 
+let users generate new images from text, or create variations of existing images.
+"""
+
+SHORT_DESCRIPTION = (
+    "generate new images from text, or create variations of existing images."
+)
+
+API_HOST = "https://api.stability.ai"
+ENDPOINT = {
+    "ultra": "/v2beta/stable-image/generate/ultra",
+    "core": "/v2beta/stable-image/generate/core",
+    "sd3": "/v2beta/stable-image/generate/sd3",
+}
+
+
+class BlockManifest(WorkflowBlockManifest):
+    model_config = ConfigDict(
+        json_schema_extra={
+            "name": "Stability AI Image Generation",
+            "version": "v1",
+            "short_description": SHORT_DESCRIPTION,
+            "long_description": LONG_DESCRIPTION,
+            "license": "Apache-2.0",
+            "block_type": "model",
+            "search_keywords": [
+                "Stability AI",
+                "stability.ai",
+                "image variation",
+                "image generation",
+            ],
+            "ui_manifest": {
+                "section": "model",
+                "icon": "far fa-palette",
+            },
+        }
+    )
+    type: Literal["roboflow_core/stability_ai_image_gen@v1"]
+    image: Selector(kind=[IMAGE_KIND]) = Field(
+        description="The image which was the base to generate VLM prediction",
+        examples=["$inputs.image"],
+        default=None,
+    )
+    prompt: Union[
+        Selector(kind=[STRING_KIND]),
+        Selector(kind=[STRING_KIND]),
+        str,
+    ] = Field(
+        description="Prompt to generate new images from text (what you wish to see)",
+        examples=["my prompt", "$inputs.prompt"],
+    )
+    negative_prompt: Optional[
+        Union[
+            Selector(kind=[STRING_KIND]),
+            Selector(kind=[STRING_KIND]),
+            str,
+        ]
+    ] = Field(
+        default=None,
+        description="Negative prompt to image generation model (what you do not wish to see)",
+        examples=["my prompt", "$inputs.prompt"],
+    )
+    model: Optional[
+        Union[
+            Selector(kind=[STRING_KIND]),
+            Selector(kind=[STRING_KIND]),
+            str,
+        ]
+    ] = Field(
+        default="core",
+        description="choose one of {'core', 'ultra', 'sd3'}. Default 'core' ",
+        examples=["my prompt", "$inputs.prompt"],
+    )
+    api_key: Union[Selector(kind=[STRING_KIND, SECRET_KIND]), str] = Field(
+        description="Your Stability AI API key",
+        examples=["xxx-xxx", "$inputs.stability_ai_api_key"],
+        private=True,
+    )
+
+    @classmethod
+    def describe_outputs(cls) -> List[OutputDefinition]:
+        return [
+            OutputDefinition(name="image", kind=[IMAGE_KIND]),
+        ]
+
+    @classmethod
+    def get_execution_engine_compatibility(cls) -> Optional[str]:
+        return ">=1.4.0,<2.0.0"
+
+
+class StabilityAIImageGenBlockV1(WorkflowBlock):
+    @classmethod
+    def get_manifest(cls) -> Type[WorkflowBlockManifest]:
+        return BlockManifest
+
+    def run(
+        self,
+        image: WorkflowImageData,
+        prompt: str,
+        negative_prompt: str,
+        model: str,
+        api_key: str,
+    ) -> BlockResult:
+        files_to_send = {"none": ""}
+        if image is not None:
+            encoded_image = numpy_array_to_jpeg_bytes(image=image.numpy_image)
+            files_to_send = {
+                "image": encoded_image,
+            }
+        request_data = {
+            "prompt": prompt,
+            "output_format": "jpeg",
+        }
+        if negative_prompt is not None:
+            request_data["negative_prompt"] = negative_prompt
+        if model not in ENDPOINT.keys():
+            model = "core"
+        response = requests.post(
+            f"{API_HOST}{ENDPOINT[model]}",
+            headers={"authorization": f"Bearer {api_key}", "accept": "image/*"},
+            files=files_to_send,
+            data=request_data,
+        )
+        if response.status_code != 200:
+            raise RuntimeError(
+                f"Request to StabilityAI API failed: {str(response.json())}"
+            )
+        result_image = bytes_to_opencv_image(payload=response.content)
+        return {
+            "image": WorkflowImageData.copy_and_replace(
+                origin_image_data=image,
+                numpy_image=result_image,
+            ),
+        }
+
+
+def numpy_array_to_jpeg_bytes(
+    image: np.ndarray,
+) -> bytes:
+    _, img_encoded = cv2.imencode(".jpg", image)
+    return np.array(img_encoded).tobytes()
+
+
+def bytes_to_opencv_image(
+    payload: bytes, array_type: np.number = np.uint8
+) -> np.ndarray:
+    bytes_array = np.frombuffer(payload, dtype=array_type)
+    decoding_result = cv2.imdecode(bytes_array, cv2.IMREAD_UNCHANGED)
+    if decoding_result is None:
+        raise ValueError("Could not encode bytes to OpenCV image.")
+    return decoding_result

From 8654a06f88d823762863d68cad06f9ad42046d7b Mon Sep 17 00:00:00 2001
From: Deependu Jha <deependujha21@gmail.com>
Date: Thu, 9 Jan 2025 23:14:52 +0530
Subject: [PATCH 2/9] update

---
 .../core_steps/models/foundation/stability_ai/image_gen/v1.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
index 9c4d9c862..8309c856d 100644
--- a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
+++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
@@ -1,7 +1,3 @@
-"""
-Credits to: https://github.com/Fafruch for origin idea
-"""
-
 from typing import List, Literal, Optional, Type, Union
 
 import cv2

From 043e2837dbad2382ca0c84606d3263eca75eee4b Mon Sep 17 00:00:00 2001
From: Deependu Jha <deependujha21@gmail.com>
Date: Fri, 10 Jan 2025 00:02:36 +0530
Subject: [PATCH 3/9] remove unused imports

---
 .../core_steps/models/foundation/stability_ai/image_gen/v1.py  | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
index 8309c856d..c744aa131 100644
--- a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
+++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
@@ -3,9 +3,7 @@
 import cv2
 import numpy as np
 import requests
-import supervision as sv
 from pydantic import ConfigDict, Field
-from supervision import Color
 
 from inference.core.workflows.execution_engine.entities.base import (
     OutputDefinition,
@@ -13,7 +11,6 @@
 )
 from inference.core.workflows.execution_engine.entities.types import (
     IMAGE_KIND,
-    INSTANCE_SEGMENTATION_PREDICTION_KIND,
     SECRET_KIND,
     STRING_KIND,
     Selector,

From 5de88775dbbeb97cfca6167d919a1c863949f9fd Mon Sep 17 00:00:00 2001
From: Deependu Jha <deependujha21@gmail.com>
Date: Fri, 10 Jan 2025 01:13:06 +0530
Subject: [PATCH 4/9] update

---
 .../models/foundation/stability_ai/image_gen/v1.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
index c744aa131..4e2ba72bb 100644
--- a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
+++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
@@ -150,12 +150,9 @@ def run(
             raise RuntimeError(
                 f"Request to StabilityAI API failed: {str(response.json())}"
             )
-        result_image = bytes_to_opencv_image(payload=response.content)
+        result_image = bytes_to_numpy_image(payload=response.content)
         return {
-            "image": WorkflowImageData.copy_and_replace(
-                origin_image_data=image,
-                numpy_image=result_image,
-            ),
+            "image": result_image,
         }
 
 
@@ -166,11 +163,8 @@ def numpy_array_to_jpeg_bytes(
     return np.array(img_encoded).tobytes()
 
 
-def bytes_to_opencv_image(
+def bytes_to_numpy_image(
     payload: bytes, array_type: np.number = np.uint8
 ) -> np.ndarray:
     bytes_array = np.frombuffer(payload, dtype=array_type)
-    decoding_result = cv2.imdecode(bytes_array, cv2.IMREAD_UNCHANGED)
-    if decoding_result is None:
-        raise ValueError("Could not encode bytes to OpenCV image.")
-    return decoding_result
+    return bytes_array

From 3f39c0232103fc2eab138f1f4b3775bfa707c287 Mon Sep 17 00:00:00 2001
From: Deependu Jha <deependujha21@gmail.com>
Date: Fri, 10 Jan 2025 13:01:14 +0530
Subject: [PATCH 5/9] working hurray...

---
 .../foundation/stability_ai/image_gen/v1.py   | 21 +++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
index 4e2ba72bb..868f56ff0 100644
--- a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
+++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
@@ -1,3 +1,5 @@
+import base64
+import uuid
 from typing import List, Literal, Optional, Type, Union
 
 import cv2
@@ -21,6 +23,10 @@
     WorkflowBlockManifest,
 )
 
+from inference.core.workflows.execution_engine.entities.base import (
+    ImageParentMetadata,
+)
+
 LONG_DESCRIPTION = """
 The block wraps 
 [Stability AI image generation API](https://platform.stability.ai/docs/api-reference#tag/Generate) and 
@@ -150,9 +156,10 @@ def run(
             raise RuntimeError(
                 f"Request to StabilityAI API failed: {str(response.json())}"
             )
-        result_image = bytes_to_numpy_image(payload=response.content)
+        new_image_base64 = base64.b64encode(response.content).decode("utf-8")
+        parent_metadata = ImageParentMetadata(parent_id=str(uuid.uuid1()))
         return {
-            "image": result_image,
+            "image": WorkflowImageData(parent_metadata, base64_image=new_image_base64),
         }
 
 
@@ -168,3 +175,13 @@ def bytes_to_numpy_image(
 ) -> np.ndarray:
     bytes_array = np.frombuffer(payload, dtype=array_type)
     return bytes_array
+
+
+def bytes_to_opencv_image(
+    payload: bytes, array_type: np.number = np.uint8
+) -> np.ndarray:
+    bytes_array = np.frombuffer(payload, dtype=array_type)
+    decoding_result = cv2.imdecode(bytes_array, cv2.IMREAD_UNCHANGED)
+    if decoding_result is None:
+        raise ValueError("Could not encode bytes to OpenCV image.")
+    return decoding_result

From 49b38f527ad415e56a9deff965d887ebab58f063 Mon Sep 17 00:00:00 2001
From: Deependu Jha <deependujha21@gmail.com>
Date: Fri, 10 Jan 2025 13:02:27 +0530
Subject: [PATCH 6/9] remove unused functions

---
 .../foundation/stability_ai/image_gen/v1.py     | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
index 868f56ff0..1e9d0c9fb 100644
--- a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
+++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
@@ -168,20 +168,3 @@ def numpy_array_to_jpeg_bytes(
 ) -> bytes:
     _, img_encoded = cv2.imencode(".jpg", image)
     return np.array(img_encoded).tobytes()
-
-
-def bytes_to_numpy_image(
-    payload: bytes, array_type: np.number = np.uint8
-) -> np.ndarray:
-    bytes_array = np.frombuffer(payload, dtype=array_type)
-    return bytes_array
-
-
-def bytes_to_opencv_image(
-    payload: bytes, array_type: np.number = np.uint8
-) -> np.ndarray:
-    bytes_array = np.frombuffer(payload, dtype=array_type)
-    decoding_result = cv2.imdecode(bytes_array, cv2.IMREAD_UNCHANGED)
-    if decoding_result is None:
-        raise ValueError("Could not encode bytes to OpenCV image.")
-    return decoding_result

From 223a9aa935a5e438c87d1779ccd3da18013663eb Mon Sep 17 00:00:00 2001
From: Deependu Jha <deependujha21@gmail.com>
Date: Fri, 10 Jan 2025 13:36:26 +0530
Subject: [PATCH 7/9] update

---
 .../foundation/stability_ai/image_gen/v1.py   | 21 +++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
index 1e9d0c9fb..b491b7590 100644
--- a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
+++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
@@ -15,6 +15,7 @@
     IMAGE_KIND,
     SECRET_KIND,
     STRING_KIND,
+    FLOAT_ZERO_TO_ONE_KIND,
     Selector,
 )
 from inference.core.workflows.prototypes.block import (
@@ -68,10 +69,15 @@ class BlockManifest(WorkflowBlockManifest):
     )
     type: Literal["roboflow_core/stability_ai_image_gen@v1"]
     image: Selector(kind=[IMAGE_KIND]) = Field(
-        description="The image which was the base to generate VLM prediction",
+        description="The image to use as the starting point for the generation.",
         examples=["$inputs.image"],
         default=None,
     )
+    strength: Union[float, Selector(kind=[FLOAT_ZERO_TO_ONE_KIND])] = Field(
+        description="controls how much influence the image parameter has on the generated image. A value of 0 would yield an image that is identical to the input. A value of 1 would be as if you passed in no image at all.",
+        default=0.3,
+        examples=[0.7, "$inputs.strength"],
+    )
     prompt: Union[
         Selector(kind=[STRING_KIND]),
         Selector(kind=[STRING_KIND]),
@@ -126,22 +132,25 @@ def get_manifest(cls) -> Type[WorkflowBlockManifest]:
 
     def run(
         self,
-        image: WorkflowImageData,
         prompt: str,
         negative_prompt: str,
         model: str,
         api_key: str,
+        image: WorkflowImageData,
+        strength: float = 0.3,
     ) -> BlockResult:
+        request_data = {
+            "prompt": prompt,
+            "output_format": "jpeg",
+        }
         files_to_send = {"none": ""}
         if image is not None:
             encoded_image = numpy_array_to_jpeg_bytes(image=image.numpy_image)
             files_to_send = {
                 "image": encoded_image,
             }
-        request_data = {
-            "prompt": prompt,
-            "output_format": "jpeg",
-        }
+            request_data["strength"] = strength
+
         if negative_prompt is not None:
             request_data["negative_prompt"] = negative_prompt
         if model not in ENDPOINT.keys():

From c0b8dc613fcc18687458090d4c96975af7f655a1 Mon Sep 17 00:00:00 2001
From: Deependu Jha <deependujha21@gmail.com>
Date: Fri, 10 Jan 2025 13:42:27 +0530
Subject: [PATCH 8/9] update

---
 .../models/foundation/stability_ai/image_gen/v1.py          | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
index b491b7590..c83649e84 100644
--- a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
+++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
@@ -29,9 +29,7 @@
 )
 
 LONG_DESCRIPTION = """
-The block wraps 
-[Stability AI image generation API](https://platform.stability.ai/docs/api-reference#tag/Generate) and 
-let users generate new images from text, or create variations of existing images.
+The block wraps [Stability AI image generation API](https://platform.stability.ai/docs/api-reference#tag/Generate) and let users generate new images from text, or create variations of existing images.
 """
 
 SHORT_DESCRIPTION = (
@@ -76,7 +74,7 @@ class BlockManifest(WorkflowBlockManifest):
     strength: Union[float, Selector(kind=[FLOAT_ZERO_TO_ONE_KIND])] = Field(
         description="controls how much influence the image parameter has on the generated image. A value of 0 would yield an image that is identical to the input. A value of 1 would be as if you passed in no image at all.",
         default=0.3,
-        examples=[0.7, "$inputs.strength"],
+        examples=[0.3, "$inputs.strength"],
     )
     prompt: Union[
         Selector(kind=[STRING_KIND]),

From 3c89cef8ed1c83620b342bc85bb8a09cfbb4a49b Mon Sep 17 00:00:00 2001
From: Deependu Jha <deependujha21@gmail.com>
Date: Fri, 10 Jan 2025 13:43:35 +0530
Subject: [PATCH 9/9] run: `make style`

---
 .../models/foundation/stability_ai/image_gen/v1.py         | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
index c83649e84..f365dc1bc 100644
--- a/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
+++ b/inference/core/workflows/core_steps/models/foundation/stability_ai/image_gen/v1.py
@@ -8,14 +8,15 @@
 from pydantic import ConfigDict, Field
 
 from inference.core.workflows.execution_engine.entities.base import (
+    ImageParentMetadata,
     OutputDefinition,
     WorkflowImageData,
 )
 from inference.core.workflows.execution_engine.entities.types import (
+    FLOAT_ZERO_TO_ONE_KIND,
     IMAGE_KIND,
     SECRET_KIND,
     STRING_KIND,
-    FLOAT_ZERO_TO_ONE_KIND,
     Selector,
 )
 from inference.core.workflows.prototypes.block import (
@@ -24,10 +25,6 @@
     WorkflowBlockManifest,
 )
 
-from inference.core.workflows.execution_engine.entities.base import (
-    ImageParentMetadata,
-)
-
 LONG_DESCRIPTION = """
 The block wraps [Stability AI image generation API](https://platform.stability.ai/docs/api-reference#tag/Generate) and let users generate new images from text, or create variations of existing images.
 """