From ada722bf95d46d77edf040d368247c3412ba8ad6 Mon Sep 17 00:00:00 2001 From: Matthias Georgi Date: Sun, 12 Jan 2025 20:46:36 +0100 Subject: [PATCH] all fal video models --- src/nodetool/metadata/nodes.json | 983 ++++++++++++++++++++++- src/nodetool/nodes/fal/image_to_video.py | 493 +++++++++++- src/nodetool/nodes/fal/text_to_image.py | 4 +- 3 files changed, 1397 insertions(+), 83 deletions(-) diff --git a/src/nodetool/metadata/nodes.json b/src/nodetool/metadata/nodes.json index f01b6b12..1091c9c0 100644 --- a/src/nodetool/metadata/nodes.json +++ b/src/nodetool/metadata/nodes.json @@ -22380,10 +22380,10 @@ "year": 2025, "month": 1, "day": 12, - "hour": 17, - "minute": 44, - "second": 21, - "microsecond": 594860, + "hour": 20, + "minute": 46, + "second": 27, + "microsecond": 406226, "tzinfo": "UTC", "utc_offset": 0.0 }, @@ -23422,10 +23422,10 @@ "year": 2025, "month": 1, "day": 12, - "hour": 17, - "minute": 44, - "second": 21, - "microsecond": 598829, + "hour": 20, + "minute": 46, + "second": 27, + "microsecond": 410185, "tzinfo": "UTC", "utc_offset": 0.0 }, @@ -23475,10 +23475,10 @@ "year": 2025, "month": 1, "day": 12, - "hour": 17, - "minute": 44, - "second": 21, - "microsecond": 599201, + "hour": 20, + "minute": 46, + "second": 27, + "microsecond": 410476, "tzinfo": "UTC", "utc_offset": 0.0 }, @@ -23528,10 +23528,10 @@ "year": 2025, "month": 1, "day": 12, - "hour": 17, - "minute": 44, - "second": 21, - "microsecond": 599500, + "hour": 20, + "minute": 46, + "second": 27, + "microsecond": 410687, "tzinfo": "UTC", "utc_offset": 0.0 }, @@ -23581,10 +23581,10 @@ "year": 2025, "month": 1, "day": 12, - "hour": 17, - "minute": 44, - "second": 21, - "microsecond": 599751, + "hour": 20, + "minute": 46, + "second": 27, + "microsecond": 410891, "tzinfo": "UTC", "utc_offset": 0.0 }, @@ -23634,10 +23634,10 @@ "year": 2025, "month": 1, "day": 12, - "hour": 17, - "minute": 44, - "second": 21, - "microsecond": 599977, + "hour": 20, + "minute": 46, + "second": 27, + "microsecond": 411099, "tzinfo": "UTC", "utc_offset": 0.0 }, @@ -23687,10 +23687,10 @@ "year": 2025, "month": 1, "day": 12, - "hour": 17, - "minute": 44, - "second": 21, - "microsecond": 600238, + "hour": 20, + "minute": 46, + "second": 27, + "microsecond": 411364, "tzinfo": "UTC", "utc_offset": 0.0 }, @@ -23740,10 +23740,10 @@ "year": 2025, "month": 1, "day": 12, - "hour": 17, - "minute": 44, - "second": 21, - "microsecond": 600454, + "hour": 20, + "minute": 46, + "second": 27, + "microsecond": 411589, "tzinfo": "UTC", "utc_offset": 0.0 }, @@ -23809,10 +23809,10 @@ "year": 2025, "month": 1, "day": 12, - "hour": 17, - "minute": 44, - "second": 21, - "microsecond": 600708, + "hour": 20, + "minute": 46, + "second": 27, + "microsecond": 411839, "tzinfo": "UTC", "utc_offset": 0.0 }, @@ -75403,7 +75403,7 @@ }, { "title": "Recraft V 3", - "description": "REDUX3 is a cutting-edge image generation model that combines advanced AI technology with \n advanced image processing techniques to deliver stunning visual results.", + "description": "Recraft V3 is a text-to-image model with the ability to generate long texts, vector art, images in brand style, and much more. \n image, text", "namespace": "fal.text_to_image", "node_type": "fal.text_to_image.RecraftV3", "layout": "default", @@ -83323,7 +83323,7 @@ }, { "title": "Haiper Image To Video", - "description": "Transform images into hyper-realistic videos with Haiper 2.0. Experience industry-leading \n resolution, fluid motion, and rapid generation for stunning AI videos.", + "description": "Transform images into hyper-realistic videos with Haiper 2.0. Experience industry-leading\n resolution, fluid motion, and rapid generation for stunning AI videos.", "namespace": "fal.image_to_video", "node_type": "fal.image_to_video.HaiperImageToVideo", "layout": "default", @@ -83775,6 +83775,913 @@ "duration" ] }, + { + "title": "Cog Video X", + "description": "Generate videos from images using CogVideoX-5B. Features high-quality motion synthesis with\n configurable parameters for fine-tuned control over the output.", + "namespace": "fal.image_to_video", + "node_type": "fal.image_to_video.CogVideoX", + "layout": "default", + "properties": [ + { + "name": "image", + "type": { + "type": "image", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": { + "type": "image", + "uri": "", + "asset_id": null, + "data": null + }, + "title": "Image", + "description": "The image to transform into a video", + "min": null, + "max": null + }, + { + "name": "prompt", + "type": { + "type": "str", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": "", + "title": "Prompt", + "description": "A description of the desired video motion and style", + "min": null, + "max": null + }, + { + "name": "video_size", + "type": { + "type": "enum", + "optional": false, + "values": [ + "square_hd", + "square", + "portrait_4_3", + "portrait_16_9", + "landscape_4_3", + "landscape_16_9" + ], + "type_args": [], + "type_name": "nodetool.nodes.fal.image_to_video.VideoSize" + }, + "default": "landscape_16_9", + "title": "Video Size", + "description": "The size/aspect ratio of the generated video", + "min": null, + "max": null + }, + { + "name": "negative_prompt", + "type": { + "type": "str", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": "Distorted, discontinuous, Ugly, blurry, low resolution, motionless, static, disfigured, disconnected limbs, Ugly faces, incomplete arms", + "title": "Negative Prompt", + "description": "What to avoid in the generated video", + "min": null, + "max": null + }, + { + "name": "num_inference_steps", + "type": { + "type": "int", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": 50, + "title": "Num Inference Steps", + "description": "Number of denoising steps (higher = better quality but slower)", + "min": null, + "max": null + }, + { + "name": "guidance_scale", + "type": { + "type": "float", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": 7.0, + "title": "Guidance Scale", + "description": "How closely to follow the prompt (higher = more faithful but less creative)", + "min": null, + "max": null + }, + { + "name": "use_rife", + "type": { + "type": "bool", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": true, + "title": "Use Rife", + "description": "Whether to use RIFE for video interpolation", + "min": null, + "max": null + }, + { + "name": "export_fps", + "type": { + "type": "int", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": 16, + "title": "Export Fps", + "description": "Target frames per second for the output video", + "min": null, + "max": null + }, + { + "name": "seed", + "type": { + "type": "int", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": -1, + "title": "Seed", + "description": "The same seed will output the same video every time", + "min": null, + "max": null + } + ], + "outputs": [ + { + "type": { + "type": "video", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "name": "output", + "stream": false + } + ], + "the_model_info": {}, + "recommended_models": [], + "basic_fields": [ + "image", + "prompt", + "video_size" + ] + }, + { + "title": "Mini Max Video", + "description": "Generate video clips from your images using MiniMax Video model. Transform static art into dynamic\n masterpieces with enhanced smoothness and vivid motion.", + "namespace": "fal.image_to_video", + "node_type": "fal.image_to_video.MiniMaxVideo", + "layout": "default", + "properties": [ + { + "name": "image", + "type": { + "type": "image", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": { + "type": "image", + "uri": "", + "asset_id": null, + "data": null + }, + "title": "Image", + "description": "The image to transform into a video", + "min": null, + "max": null + }, + { + "name": "prompt", + "type": { + "type": "str", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": "", + "title": "Prompt", + "description": "A description of the desired video motion and style", + "min": null, + "max": null + }, + { + "name": "prompt_optimizer", + "type": { + "type": "bool", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": true, + "title": "Prompt Optimizer", + "description": "Whether to use the model's prompt optimizer", + "min": null, + "max": null + } + ], + "outputs": [ + { + "type": { + "type": "video", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "name": "output", + "stream": false + } + ], + "the_model_info": {}, + "recommended_models": [], + "basic_fields": [ + "image", + "prompt" + ] + }, + { + "title": "LTXVideo", + "description": "Generate videos from images using LTX Video. Best results with 768x512 images and detailed,\n chronological descriptions of actions and scenes.", + "namespace": "fal.image_to_video", + "node_type": "fal.image_to_video.LTXVideo", + "layout": "default", + "properties": [ + { + "name": "image", + "type": { + "type": "image", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": { + "type": "image", + "uri": "", + "asset_id": null, + "data": null + }, + "title": "Image", + "description": "The image to transform into a video (768x512 recommended)", + "min": null, + "max": null + }, + { + "name": "prompt", + "type": { + "type": "str", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": "", + "title": "Prompt", + "description": "A detailed description of the desired video motion and style", + "min": null, + "max": null + }, + { + "name": "negative_prompt", + "type": { + "type": "str", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": "low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly", + "title": "Negative Prompt", + "description": "What to avoid in the generated video", + "min": null, + "max": null + }, + { + "name": "num_inference_steps", + "type": { + "type": "int", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": 30, + "title": "Num Inference Steps", + "description": "Number of inference steps (higher = better quality but slower)", + "min": null, + "max": null + }, + { + "name": "guidance_scale", + "type": { + "type": "float", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": 3.0, + "title": "Guidance Scale", + "description": "How closely to follow the prompt (higher = more faithful)", + "min": null, + "max": null + }, + { + "name": "seed", + "type": { + "type": "int", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": -1, + "title": "Seed", + "description": "The same seed will output the same video every time", + "min": null, + "max": null + } + ], + "outputs": [ + { + "type": { + "type": "video", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "name": "output", + "stream": false + } + ], + "the_model_info": {}, + "recommended_models": [], + "basic_fields": [ + "image", + "prompt" + ] + }, + { + "title": "Stable Video", + "description": "Generate short video clips from your images using Stable Video Diffusion v1.1. Features high-quality\n motion synthesis with configurable parameters.", + "namespace": "fal.image_to_video", + "node_type": "fal.image_to_video.StableVideo", + "layout": "default", + "properties": [ + { + "name": "image", + "type": { + "type": "image", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": { + "type": "image", + "uri": "", + "asset_id": null, + "data": null + }, + "title": "Image", + "description": "The image to transform into a video", + "min": null, + "max": null + }, + { + "name": "motion_bucket_id", + "type": { + "type": "int", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": 127, + "title": "Motion Bucket Id", + "description": "Controls motion intensity (higher = more motion)", + "min": null, + "max": null + }, + { + "name": "cond_aug", + "type": { + "type": "float", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": 0.02, + "title": "Cond Aug", + "description": "Amount of noise added to conditioning (higher = more motion)", + "min": null, + "max": null + }, + { + "name": "fps", + "type": { + "type": "int", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": 25, + "title": "Fps", + "description": "Frames per second of the output video", + "min": null, + "max": null + }, + { + "name": "seed", + "type": { + "type": "int", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": -1, + "title": "Seed", + "description": "The same seed will output the same video every time", + "min": null, + "max": null + } + ], + "outputs": [ + { + "type": { + "type": "video", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "name": "output", + "stream": false + } + ], + "the_model_info": {}, + "recommended_models": [], + "basic_fields": [ + "image", + "motion_bucket_id", + "fps" + ] + }, + { + "title": "Fast SVD", + "description": "Generate short video clips from your images using SVD v1.1 at Lightning Speed. Features high-quality\n motion synthesis with configurable parameters for rapid video generation.", + "namespace": "fal.image_to_video", + "node_type": "fal.image_to_video.FastSVD", + "layout": "default", + "properties": [ + { + "name": "image", + "type": { + "type": "image", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": { + "type": "image", + "uri": "", + "asset_id": null, + "data": null + }, + "title": "Image", + "description": "The image to transform into a video", + "min": null, + "max": null + }, + { + "name": "motion_bucket_id", + "type": { + "type": "int", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": 127, + "title": "Motion Bucket Id", + "description": "Controls motion intensity (higher = more motion)", + "min": null, + "max": null + }, + { + "name": "cond_aug", + "type": { + "type": "float", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": 0.02, + "title": "Cond Aug", + "description": "Amount of noise added to conditioning (higher = more motion)", + "min": null, + "max": null + }, + { + "name": "steps", + "type": { + "type": "int", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": 4, + "title": "Steps", + "description": "Number of inference steps (higher = better quality but slower)", + "min": null, + "max": null + }, + { + "name": "fps", + "type": { + "type": "int", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": 10, + "title": "Fps", + "description": "Frames per second of the output video (total length is 25 frames)", + "min": null, + "max": null + }, + { + "name": "seed", + "type": { + "type": "int", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": -1, + "title": "Seed", + "description": "The same seed will output the same video every time", + "min": null, + "max": null + } + ], + "outputs": [ + { + "type": { + "type": "video", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "name": "output", + "stream": false + } + ], + "the_model_info": {}, + "recommended_models": [], + "basic_fields": [ + "image", + "motion_bucket_id", + "fps" + ] + }, + { + "title": "AMTInterpolation", + "description": "Interpolate between image frames to create smooth video transitions. Supports configurable FPS\n and recursive interpolation passes for higher quality results.", + "namespace": "fal.image_to_video", + "node_type": "fal.image_to_video.AMTInterpolation", + "layout": "default", + "properties": [ + { + "name": "frames", + "type": { + "type": "list", + "optional": false, + "values": null, + "type_args": [ + { + "type": "image", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + } + ], + "type_name": null + }, + "default": [ + { + "type": "image", + "uri": "", + "asset_id": null, + "data": null + }, + { + "type": "image", + "uri": "", + "asset_id": null, + "data": null + } + ], + "title": "Frames", + "description": "List of frames to interpolate between (minimum 2 frames required)", + "min": null, + "max": null + }, + { + "name": "output_fps", + "type": { + "type": "int", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": 24, + "title": "Output Fps", + "description": "Output frames per second", + "min": null, + "max": null + }, + { + "name": "recursive_interpolation_passes", + "type": { + "type": "int", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": 4, + "title": "Recursive Interpolation Passes", + "description": "Number of recursive interpolation passes (higher = smoother)", + "min": null, + "max": null + } + ], + "outputs": [ + { + "type": { + "type": "video", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "name": "output", + "stream": false + } + ], + "the_model_info": {}, + "recommended_models": [], + "basic_fields": [ + "frames", + "output_fps" + ] + }, + { + "title": "Sad Talker", + "description": "Generate talking face animations from a single image and audio file. Features configurable\n face model resolution and expression controls.", + "namespace": "fal.image_to_video", + "node_type": "fal.image_to_video.SadTalker", + "layout": "default", + "properties": [ + { + "name": "image", + "type": { + "type": "image", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": { + "type": "image", + "uri": "", + "asset_id": null, + "data": null + }, + "title": "Image", + "description": "The source image to animate", + "min": null, + "max": null + }, + { + "name": "audio", + "type": { + "type": "str", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": "", + "title": "Audio", + "description": "URL of the audio file to drive the animation", + "min": null, + "max": null + }, + { + "name": "face_model_resolution", + "type": { + "type": "enum", + "optional": false, + "values": [ + "256", + "512" + ], + "type_args": [], + "type_name": "nodetool.nodes.fal.image_to_video.FaceModelResolution" + }, + "default": "256", + "title": "Face Model Resolution", + "description": "Resolution of the face model", + "min": null, + "max": null + }, + { + "name": "expression_scale", + "type": { + "type": "float", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": 1.0, + "title": "Expression Scale", + "description": "Scale of the expression (1.0 = normal)", + "min": null, + "max": null + }, + { + "name": "still_mode", + "type": { + "type": "bool", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": false, + "title": "Still Mode", + "description": "Reduce head motion (works with preprocess 'full')", + "min": null, + "max": null + }, + { + "name": "preprocess", + "type": { + "type": "enum", + "optional": false, + "values": [ + "crop", + "extcrop", + "resize", + "full", + "extfull" + ], + "type_args": [], + "type_name": "nodetool.nodes.fal.image_to_video.PreprocessType" + }, + "default": "crop", + "title": "Preprocess", + "description": "Type of image preprocessing to apply", + "min": null, + "max": null + } + ], + "outputs": [ + { + "type": { + "type": "video", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "name": "output", + "stream": false + } + ], + "the_model_info": {}, + "recommended_models": [], + "basic_fields": [ + "image", + "audio", + "face_model_resolution" + ] + }, + { + "title": "Muse Talk", + "description": "Real-time high quality audio-driven lip-syncing model. Animate a face video with custom audio\n for natural-looking speech animation.", + "namespace": "fal.image_to_video", + "node_type": "fal.image_to_video.MuseTalk", + "layout": "default", + "properties": [ + { + "name": "video", + "type": { + "type": "video", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": { + "type": "video", + "uri": "", + "asset_id": null, + "data": null, + "duration": null, + "format": null + }, + "title": "Video", + "description": "URL of the source video to animate", + "min": null, + "max": null + }, + { + "name": "audio", + "type": { + "type": "audio", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "default": { + "type": "audio", + "uri": "", + "asset_id": null, + "data": null + }, + "title": "Audio", + "description": "URL of the audio file to drive the lip sync", + "min": null, + "max": null + } + ], + "outputs": [ + { + "type": { + "type": "video", + "optional": false, + "values": null, + "type_args": [], + "type_name": null + }, + "name": "output", + "stream": false + } + ], + "the_model_info": {}, + "recommended_models": [], + "basic_fields": [ + "video", + "audio" + ] + }, { "title": "Text To Speech", "description": "Converts text to speech using OpenAI TTS models.\n audio, tts, text-to-speech, voice, synthesis\n\n Use cases:\n - Generate spoken content for videos or podcasts\n - Create voice-overs for presentations\n - Assist visually impaired users with text reading\n - Produce audio versions of written content", diff --git a/src/nodetool/nodes/fal/image_to_video.py b/src/nodetool/nodes/fal/image_to_video.py index 3632b9b2..5272ea09 100644 --- a/src/nodetool/nodes/fal/image_to_video.py +++ b/src/nodetool/nodes/fal/image_to_video.py @@ -1,50 +1,48 @@ from typing import Literal from pydantic import Field -from nodetool.metadata.types import ImageRef, VideoRef +from nodetool.metadata.types import AudioRef, ImageRef, VideoRef from nodetool.nodes.fal.fal_node import FALNode from nodetool.workflows.processing_context import ProcessingContext from enum import Enum + class VideoDuration(Enum): FOUR_SECONDS = 4 SIX_SECONDS = 6 + class HaiperImageToVideo(FALNode): """ - Transform images into hyper-realistic videos with Haiper 2.0. Experience industry-leading + Transform images into hyper-realistic videos with Haiper 2.0. Experience industry-leading resolution, fluid motion, and rapid generation for stunning AI videos. """ image: ImageRef = Field( - default=ImageRef(), - description="The image to transform into a video" + default=ImageRef(), description="The image to transform into a video" ) prompt: str = Field( - default="", - description="A description of the desired video motion and style" + default="", description="A description of the desired video motion and style" ) duration: VideoDuration = Field( default=VideoDuration.FOUR_SECONDS, - description="The duration of the generated video in seconds" + description="The duration of the generated video in seconds", ) prompt_enhancer: bool = Field( - default=True, - description="Whether to use the model's prompt enhancer" + default=True, description="Whether to use the model's prompt enhancer" ) seed: int = Field( - default=-1, - description="The same seed will output the same video every time" + default=-1, description="The same seed will output the same video every time" ) async def process(self, context: ProcessingContext) -> VideoRef: image_base64 = await context.image_to_base64(self.image) - + arguments = { "image_url": f"data:image/png;base64,{image_base64}", "prompt": self.prompt, "duration": self.duration, - "prompt_enhancer": self.prompt_enhancer + "prompt_enhancer": self.prompt_enhancer, } if self.seed != -1: arguments["seed"] = self.seed @@ -61,6 +59,7 @@ async def process(self, context: ProcessingContext) -> VideoRef: def get_basic_fields(cls): return ["image", "prompt", "duration"] + class AspectRatio(Enum): RATIO_16_9 = "16:9" RATIO_9_16 = "9:16" @@ -70,47 +69,46 @@ class AspectRatio(Enum): RATIO_9_21 = "9:21" RATIO_1_1 = "1:1" + class KlingDuration(Enum): FIVE_SECONDS = "5" TEN_SECONDS = "10" + class LumaDreamMachine(FALNode): """ Generate video clips from your images using Luma Dream Machine v1.5. Supports various aspect ratios and optional end-frame blending. """ - + image: ImageRef = Field( - default=ImageRef(), - description="The image to transform into a video" + default=ImageRef(), description="The image to transform into a video" ) prompt: str = Field( - default="", - description="A description of the desired video motion and style" + default="", description="A description of the desired video motion and style" ) aspect_ratio: AspectRatio = Field( default=AspectRatio.RATIO_16_9, - description="The aspect ratio of the generated video" + description="The aspect ratio of the generated video", ) loop: bool = Field( default=False, - description="Whether the video should loop (end blends with beginning)" + description="Whether the video should loop (end blends with beginning)", ) end_image: ImageRef | None = Field( - default=None, - description="Optional image to blend the end of the video with" + default=None, description="Optional image to blend the end of the video with" ) async def process(self, context: ProcessingContext) -> VideoRef: image_base64 = await context.image_to_base64(self.image) - + arguments = { "image_url": f"data:image/png;base64,{image_base64}", "prompt": self.prompt, "aspect_ratio": self.aspect_ratio.value, - "loop": self.loop + "loop": self.loop, } - + if self.end_image: end_image_base64 = await context.image_to_base64(self.end_image) arguments["end_image_url"] = f"data:image/png;base64,{end_image_base64}" @@ -127,36 +125,35 @@ async def process(self, context: ProcessingContext) -> VideoRef: def get_basic_fields(cls): return ["image", "prompt", "aspect_ratio"] + class KlingVideo(FALNode): """ Generate video clips from your images using Kling 1.6. Supports multiple durations and aspect ratios. """ - + image: ImageRef = Field( - default=ImageRef(), - description="The image to transform into a video" + default=ImageRef(), description="The image to transform into a video" ) prompt: str = Field( - default="", - description="A description of the desired video motion and style" + default="", description="A description of the desired video motion and style" ) duration: KlingDuration = Field( default=KlingDuration.FIVE_SECONDS, - description="The duration of the generated video" + description="The duration of the generated video", ) aspect_ratio: AspectRatio = Field( default=AspectRatio.RATIO_16_9, - description="The aspect ratio of the generated video frame" + description="The aspect ratio of the generated video frame", ) async def process(self, context: ProcessingContext) -> VideoRef: image_base64 = await context.image_to_base64(self.image) - + arguments = { "image_url": f"data:image/png;base64,{image_base64}", "prompt": self.prompt, "duration": self.duration.value, - "aspect_ratio": self.aspect_ratio.value + "aspect_ratio": self.aspect_ratio.value, } res = await self.submit_request( @@ -171,37 +168,36 @@ async def process(self, context: ProcessingContext) -> VideoRef: def get_basic_fields(cls): return ["image", "prompt", "duration"] + class KlingVideoPro(FALNode): """ Generate video clips from your images using Kling 1.6 Pro. The professional version offers enhanced quality and performance compared to the standard version. """ - + image: ImageRef = Field( - default=ImageRef(), - description="The image to transform into a video" + default=ImageRef(), description="The image to transform into a video" ) prompt: str = Field( - default="", - description="A description of the desired video motion and style" + default="", description="A description of the desired video motion and style" ) duration: KlingDuration = Field( default=KlingDuration.FIVE_SECONDS, - description="The duration of the generated video" + description="The duration of the generated video", ) aspect_ratio: AspectRatio = Field( default=AspectRatio.RATIO_16_9, - description="The aspect ratio of the generated video frame" + description="The aspect ratio of the generated video frame", ) async def process(self, context: ProcessingContext) -> VideoRef: image_base64 = await context.image_to_base64(self.image) - + arguments = { "image_url": f"data:image/png;base64,{image_base64}", "prompt": self.prompt, "duration": self.duration.value, - "aspect_ratio": self.aspect_ratio.value + "aspect_ratio": self.aspect_ratio.value, } res = await self.submit_request( @@ -215,3 +211,414 @@ async def process(self, context: ProcessingContext) -> VideoRef: @classmethod def get_basic_fields(cls): return ["image", "prompt", "duration"] + + +class VideoSize(Enum): + SQUARE_HD = "square_hd" + SQUARE = "square" + PORTRAIT_4_3 = "portrait_4_3" + PORTRAIT_16_9 = "portrait_16_9" + LANDSCAPE_4_3 = "landscape_4_3" + LANDSCAPE_16_9 = "landscape_16_9" + + +class CogVideoX(FALNode): + """ + Generate videos from images using CogVideoX-5B. Features high-quality motion synthesis with + configurable parameters for fine-tuned control over the output. + """ + + image: ImageRef = Field( + default=ImageRef(), description="The image to transform into a video" + ) + prompt: str = Field( + default="", description="A description of the desired video motion and style" + ) + video_size: VideoSize = Field( + default=VideoSize.LANDSCAPE_16_9, + description="The size/aspect ratio of the generated video", + ) + negative_prompt: str = Field( + default="Distorted, discontinuous, Ugly, blurry, low resolution, motionless, static, disfigured, disconnected limbs, Ugly faces, incomplete arms", + description="What to avoid in the generated video", + ) + num_inference_steps: int = Field( + default=50, + description="Number of denoising steps (higher = better quality but slower)", + ) + guidance_scale: float = Field( + default=7.0, + description="How closely to follow the prompt (higher = more faithful but less creative)", + ) + use_rife: bool = Field( + default=True, description="Whether to use RIFE for video interpolation" + ) + export_fps: int = Field( + default=16, description="Target frames per second for the output video" + ) + seed: int = Field( + default=-1, description="The same seed will output the same video every time" + ) + + async def process(self, context: ProcessingContext) -> VideoRef: + image_base64 = await context.image_to_base64(self.image) + + arguments = { + "image_url": f"data:image/png;base64,{image_base64}", + "prompt": self.prompt, + "video_size": self.video_size.value, + "negative_prompt": self.negative_prompt, + "num_inference_steps": self.num_inference_steps, + "guidance_scale": self.guidance_scale, + "use_rife": self.use_rife, + "export_fps": self.export_fps, + } + if self.seed != -1: + arguments["seed"] = self.seed + + res = await self.submit_request( + context=context, + application="fal-ai/cogvideox-5b/image-to-video", + arguments=arguments, + ) + assert "video" in res + return VideoRef(uri=res["video"]["url"]) + + @classmethod + def get_basic_fields(cls): + return ["image", "prompt", "video_size"] + + +class MiniMaxVideo(FALNode): + """ + Generate video clips from your images using MiniMax Video model. Transform static art into dynamic + masterpieces with enhanced smoothness and vivid motion. + """ + + image: ImageRef = Field( + default=ImageRef(), description="The image to transform into a video" + ) + prompt: str = Field( + default="", description="A description of the desired video motion and style" + ) + prompt_optimizer: bool = Field( + default=True, description="Whether to use the model's prompt optimizer" + ) + + async def process(self, context: ProcessingContext) -> VideoRef: + image_base64 = await context.image_to_base64(self.image) + + arguments = { + "image_url": f"data:image/png;base64,{image_base64}", + "prompt": self.prompt, + "prompt_optimizer": self.prompt_optimizer, + } + + res = await self.submit_request( + context=context, + application="fal-ai/minimax/video-01-live/image-to-video", + arguments=arguments, + ) + assert "video" in res + return VideoRef(uri=res["video"]["url"]) + + @classmethod + def get_basic_fields(cls): + return ["image", "prompt"] + + +class LTXVideo(FALNode): + """ + Generate videos from images using LTX Video. Best results with 768x512 images and detailed, + chronological descriptions of actions and scenes. + """ + + image: ImageRef = Field( + default=ImageRef(), + description="The image to transform into a video (768x512 recommended)", + ) + prompt: str = Field( + default="", + description="A detailed description of the desired video motion and style", + ) + negative_prompt: str = Field( + default="low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly", + description="What to avoid in the generated video", + ) + num_inference_steps: int = Field( + default=30, + description="Number of inference steps (higher = better quality but slower)", + ) + guidance_scale: float = Field( + default=3.0, + description="How closely to follow the prompt (higher = more faithful)", + ) + seed: int = Field( + default=-1, description="The same seed will output the same video every time" + ) + + async def process(self, context: ProcessingContext) -> VideoRef: + image_base64 = await context.image_to_base64(self.image) + + arguments = { + "image_url": f"data:image/png;base64,{image_base64}", + "prompt": self.prompt, + "negative_prompt": self.negative_prompt, + "num_inference_steps": self.num_inference_steps, + "guidance_scale": self.guidance_scale, + } + if self.seed != -1: + arguments["seed"] = self.seed + + res = await self.submit_request( + context=context, + application="fal-ai/ltx-video/image-to-video", + arguments=arguments, + ) + assert "video" in res + return VideoRef(uri=res["video"]["url"]) + + @classmethod + def get_basic_fields(cls): + return ["image", "prompt"] + + +class StableVideo(FALNode): + """ + Generate short video clips from your images using Stable Video Diffusion v1.1. Features high-quality + motion synthesis with configurable parameters. + """ + + image: ImageRef = Field( + default=ImageRef(), description="The image to transform into a video" + ) + motion_bucket_id: int = Field( + default=127, description="Controls motion intensity (higher = more motion)" + ) + cond_aug: float = Field( + default=0.02, + description="Amount of noise added to conditioning (higher = more motion)", + ) + fps: int = Field(default=25, description="Frames per second of the output video") + seed: int = Field( + default=-1, description="The same seed will output the same video every time" + ) + + async def process(self, context: ProcessingContext) -> VideoRef: + image_base64 = await context.image_to_base64(self.image) + + arguments = { + "image_url": f"data:image/png;base64,{image_base64}", + "motion_bucket_id": self.motion_bucket_id, + "cond_aug": self.cond_aug, + "fps": self.fps, + } + if self.seed != -1: + arguments["seed"] = self.seed + + res = await self.submit_request( + context=context, + application="fal-ai/stable-video", + arguments=arguments, + ) + assert "video" in res + return VideoRef(uri=res["video"]["url"]) + + @classmethod + def get_basic_fields(cls): + return ["image", "motion_bucket_id", "fps"] + + +class FastSVD(FALNode): + """ + Generate short video clips from your images using SVD v1.1 at Lightning Speed. Features high-quality + motion synthesis with configurable parameters for rapid video generation. + """ + + image: ImageRef = Field( + default=ImageRef(), description="The image to transform into a video" + ) + motion_bucket_id: int = Field( + default=127, description="Controls motion intensity (higher = more motion)" + ) + cond_aug: float = Field( + default=0.02, + description="Amount of noise added to conditioning (higher = more motion)", + ) + steps: int = Field( + default=4, + description="Number of inference steps (higher = better quality but slower)", + ) + fps: int = Field( + default=10, + description="Frames per second of the output video (total length is 25 frames)", + ) + seed: int = Field( + default=-1, description="The same seed will output the same video every time" + ) + + async def process(self, context: ProcessingContext) -> VideoRef: + image_base64 = await context.image_to_base64(self.image) + + arguments = { + "image_url": f"data:image/png;base64,{image_base64}", + "motion_bucket_id": self.motion_bucket_id, + "cond_aug": self.cond_aug, + "steps": self.steps, + "fps": self.fps, + } + if self.seed != -1: + arguments["seed"] = self.seed + + res = await self.submit_request( + context=context, + application="fal-ai/fast-svd-lcm", + arguments=arguments, + ) + assert "video" in res + return VideoRef(uri=res["video"]["url"]) + + @classmethod + def get_basic_fields(cls): + return ["image", "motion_bucket_id", "fps"] + + +class AMTInterpolation(FALNode): + """ + Interpolate between image frames to create smooth video transitions. Supports configurable FPS + and recursive interpolation passes for higher quality results. + """ + + frames: list[ImageRef] = Field( + default=[ImageRef(), ImageRef()], + description="List of frames to interpolate between (minimum 2 frames required)", + ) + output_fps: int = Field(default=24, description="Output frames per second") + recursive_interpolation_passes: int = Field( + default=4, + description="Number of recursive interpolation passes (higher = smoother)", + ) + + async def process(self, context: ProcessingContext) -> VideoRef: + frames_base64 = [] + for frame in self.frames: + frame_base64 = await context.image_to_base64(frame) + frames_base64.append({"url": f"data:image/png;base64,{frame_base64}"}) + + arguments = { + "frames": frames_base64, + "output_fps": self.output_fps, + "recursive_interpolation_passes": self.recursive_interpolation_passes, + } + + res = await self.submit_request( + context=context, + application="fal-ai/amt-interpolation/frame-interpolation", + arguments=arguments, + ) + assert "video" in res + return VideoRef(uri=res["video"]["url"]) + + @classmethod + def get_basic_fields(cls): + return ["frames", "output_fps"] + + +class FaceModelResolution(Enum): + RESOLUTION_256 = "256" + RESOLUTION_512 = "512" + + +class PreprocessType(Enum): + CROP = "crop" + EXTCROP = "extcrop" + RESIZE = "resize" + FULL = "full" + EXTFULL = "extfull" + + +class SadTalker(FALNode): + """ + Generate talking face animations from a single image and audio file. Features configurable + face model resolution and expression controls. + """ + + image: ImageRef = Field( + default=ImageRef(), description="The source image to animate" + ) + audio: str = Field( + default="", description="URL of the audio file to drive the animation" + ) + + face_model_resolution: FaceModelResolution = Field( + default=FaceModelResolution.RESOLUTION_256, + description="Resolution of the face model", + ) + expression_scale: float = Field( + default=1.0, description="Scale of the expression (1.0 = normal)" + ) + still_mode: bool = Field( + default=False, description="Reduce head motion (works with preprocess 'full')" + ) + preprocess: PreprocessType = Field( + default=PreprocessType.CROP, description="Type of image preprocessing to apply" + ) + + async def process(self, context: ProcessingContext) -> VideoRef: + image_base64 = await context.image_to_base64(self.image) + + arguments = { + "source_image_url": f"data:image/png;base64,{image_base64}", + "driven_audio_url": self.audio, + "face_model_resolution": self.face_model_resolution, + "expression_scale": self.expression_scale, + "still_mode": self.still_mode, + "preprocess": self.preprocess, + } + + res = await self.submit_request( + context=context, + application="fal-ai/sadtalker", + arguments=arguments, + ) + assert "video" in res + return VideoRef(uri=res["video"]["url"]) + + @classmethod + def get_basic_fields(cls): + return ["image", "audio", "face_model_resolution"] + + +class MuseTalk(FALNode): + """ + Real-time high quality audio-driven lip-syncing model. Animate a face video with custom audio + for natural-looking speech animation. + """ + + video: VideoRef = Field( + default=VideoRef(), description="URL of the source video to animate" + ) + audio: AudioRef = Field( + default=AudioRef(), description="URL of the audio file to drive the lip sync" + ) + + async def process(self, context: ProcessingContext) -> VideoRef: + client = self.get_client(context) + video_bytes = await context.asset_to_bytes(self.video) + audio_bytes = await context.asset_to_bytes(self.audio) + video_url = await client.upload(video_bytes, "video/mp4") + audio_url = await client.upload(audio_bytes, "audio/mp3") + + arguments = {"source_video_url": video_url, "audio_url": audio_url} + + res = await self.submit_request( + context=context, + application="fal-ai/musetalk", + arguments=arguments, + ) + assert "video" in res + return VideoRef(uri=res["video"]["url"]) + + @classmethod + def get_basic_fields(cls): + return ["video", "audio"] diff --git a/src/nodetool/nodes/fal/text_to_image.py b/src/nodetool/nodes/fal/text_to_image.py index f705686e..be9b2b9e 100644 --- a/src/nodetool/nodes/fal/text_to_image.py +++ b/src/nodetool/nodes/fal/text_to_image.py @@ -282,8 +282,8 @@ def get_basic_fields(cls): class RecraftV3(FALNode): """ - REDUX3 is a cutting-edge image generation model that combines advanced AI technology with - advanced image processing techniques to deliver stunning visual results. + Recraft V3 is a text-to-image model with the ability to generate long texts, vector art, images in brand style, and much more. + image, text """ prompt: str = Field(default="", description="The prompt to generate an image from")