Spaces:
Running
on
Zero
Running
on
Zero
"""Kling API Nodes | |
For source of truth on the allowed permutations of request fields, please reference: | |
- [Compatibility Table](https://app.klingai.com/global/dev/document-api/apiReference/model/skillsMap) | |
""" | |
from __future__ import annotations | |
from typing import Optional, TypeVar, Any | |
from collections.abc import Callable | |
import math | |
import logging | |
import torch | |
from comfy_api_nodes.apis import ( | |
KlingTaskStatus, | |
KlingCameraControl, | |
KlingCameraConfig, | |
KlingCameraControlType, | |
KlingVideoGenDuration, | |
KlingVideoGenMode, | |
KlingVideoGenAspectRatio, | |
KlingVideoGenModelName, | |
KlingText2VideoRequest, | |
KlingText2VideoResponse, | |
KlingImage2VideoRequest, | |
KlingImage2VideoResponse, | |
KlingVideoExtendRequest, | |
KlingVideoExtendResponse, | |
KlingLipSyncVoiceLanguage, | |
KlingLipSyncInputObject, | |
KlingLipSyncRequest, | |
KlingLipSyncResponse, | |
KlingVirtualTryOnModelName, | |
KlingVirtualTryOnRequest, | |
KlingVirtualTryOnResponse, | |
KlingVideoResult, | |
KlingImageResult, | |
KlingImageGenerationsRequest, | |
KlingImageGenerationsResponse, | |
KlingImageGenImageReferenceType, | |
KlingImageGenModelName, | |
KlingImageGenAspectRatio, | |
KlingVideoEffectsRequest, | |
KlingVideoEffectsResponse, | |
KlingDualCharacterEffectsScene, | |
KlingSingleImageEffectsScene, | |
KlingDualCharacterEffectInput, | |
KlingSingleImageEffectInput, | |
KlingCharacterEffectModelName, | |
KlingSingleImageEffectModelName, | |
) | |
from comfy_api_nodes.apis.client import ( | |
ApiEndpoint, | |
HttpMethod, | |
SynchronousOperation, | |
PollingOperation, | |
EmptyRequest, | |
) | |
from comfy_api_nodes.apinode_utils import ( | |
tensor_to_base64_string, | |
download_url_to_video_output, | |
upload_video_to_comfyapi, | |
upload_audio_to_comfyapi, | |
download_url_to_image_tensor, | |
) | |
from comfy_api_nodes.mapper_utils import model_field_to_node_input | |
from comfy_api_nodes.util.validation_utils import ( | |
validate_image_dimensions, | |
validate_image_aspect_ratio, | |
validate_video_dimensions, | |
validate_video_duration, | |
) | |
from comfy_api.input.basic_types import AudioInput | |
from comfy_api.input.video_types import VideoInput | |
from comfy_api.input_impl import VideoFromFile | |
from comfy.comfy_types.node_typing import IO, InputTypeOptions, ComfyNodeABC | |
KLING_API_VERSION = "v1" | |
PATH_TEXT_TO_VIDEO = f"/proxy/kling/{KLING_API_VERSION}/videos/text2video" | |
PATH_IMAGE_TO_VIDEO = f"/proxy/kling/{KLING_API_VERSION}/videos/image2video" | |
PATH_VIDEO_EXTEND = f"/proxy/kling/{KLING_API_VERSION}/videos/video-extend" | |
PATH_LIP_SYNC = f"/proxy/kling/{KLING_API_VERSION}/videos/lip-sync" | |
PATH_VIDEO_EFFECTS = f"/proxy/kling/{KLING_API_VERSION}/videos/effects" | |
PATH_CHARACTER_IMAGE = f"/proxy/kling/{KLING_API_VERSION}/images/generations" | |
PATH_VIRTUAL_TRY_ON = f"/proxy/kling/{KLING_API_VERSION}/images/kolors-virtual-try-on" | |
PATH_IMAGE_GENERATIONS = f"/proxy/kling/{KLING_API_VERSION}/images/generations" | |
MAX_PROMPT_LENGTH_T2V = 2500 | |
MAX_PROMPT_LENGTH_I2V = 500 | |
MAX_PROMPT_LENGTH_IMAGE_GEN = 500 | |
MAX_NEGATIVE_PROMPT_LENGTH_IMAGE_GEN = 200 | |
MAX_PROMPT_LENGTH_LIP_SYNC = 120 | |
AVERAGE_DURATION_T2V = 319 | |
AVERAGE_DURATION_I2V = 164 | |
AVERAGE_DURATION_LIP_SYNC = 455 | |
AVERAGE_DURATION_VIRTUAL_TRY_ON = 19 | |
AVERAGE_DURATION_IMAGE_GEN = 32 | |
AVERAGE_DURATION_VIDEO_EFFECTS = 320 | |
AVERAGE_DURATION_VIDEO_EXTEND = 320 | |
R = TypeVar("R") | |
class KlingApiError(Exception): | |
"""Base exception for Kling API errors.""" | |
pass | |
def poll_until_finished( | |
auth_kwargs: dict[str, str], | |
api_endpoint: ApiEndpoint[Any, R], | |
result_url_extractor: Optional[Callable[[R], str]] = None, | |
estimated_duration: Optional[int] = None, | |
node_id: Optional[str] = None, | |
) -> R: | |
"""Polls the Kling API endpoint until the task reaches a terminal state, then returns the response.""" | |
return PollingOperation( | |
poll_endpoint=api_endpoint, | |
completed_statuses=[ | |
KlingTaskStatus.succeed.value, | |
], | |
failed_statuses=[KlingTaskStatus.failed.value], | |
status_extractor=lambda response: ( | |
response.data.task_status.value | |
if response.data and response.data.task_status | |
else None | |
), | |
auth_kwargs=auth_kwargs, | |
result_url_extractor=result_url_extractor, | |
estimated_duration=estimated_duration, | |
node_id=node_id, | |
).execute() | |
def is_valid_camera_control_configs(configs: list[float]) -> bool: | |
"""Verifies that at least one camera control configuration is non-zero.""" | |
return any(not math.isclose(value, 0.0) for value in configs) | |
def is_valid_prompt(prompt: str) -> bool: | |
"""Verifies that the prompt is not empty.""" | |
return bool(prompt) | |
def is_valid_task_creation_response(response: KlingText2VideoResponse) -> bool: | |
"""Verifies that the initial response contains a task ID.""" | |
return bool(response.data.task_id) | |
def is_valid_video_response(response: KlingText2VideoResponse) -> bool: | |
"""Verifies that the response contains a task result with at least one video.""" | |
return ( | |
response.data is not None | |
and response.data.task_result is not None | |
and response.data.task_result.videos is not None | |
and len(response.data.task_result.videos) > 0 | |
) | |
def is_valid_image_response(response: KlingVirtualTryOnResponse) -> bool: | |
"""Verifies that the response contains a task result with at least one image.""" | |
return ( | |
response.data is not None | |
and response.data.task_result is not None | |
and response.data.task_result.images is not None | |
and len(response.data.task_result.images) > 0 | |
) | |
def validate_prompts(prompt: str, negative_prompt: str, max_length: int) -> bool: | |
"""Verifies that the positive prompt is not empty and that neither promt is too long.""" | |
if not prompt: | |
raise ValueError("Positive prompt is empty") | |
if len(prompt) > max_length: | |
raise ValueError(f"Positive prompt is too long: {len(prompt)} characters") | |
if negative_prompt and len(negative_prompt) > max_length: | |
raise ValueError( | |
f"Negative prompt is too long: {len(negative_prompt)} characters" | |
) | |
return True | |
def validate_task_creation_response(response) -> None: | |
"""Validates that the Kling task creation request was successful.""" | |
if not is_valid_task_creation_response(response): | |
error_msg = f"Kling initial request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}" | |
logging.error(error_msg) | |
raise KlingApiError(error_msg) | |
def validate_video_result_response(response) -> None: | |
"""Validates that the Kling task result contains a video.""" | |
if not is_valid_video_response(response): | |
error_msg = f"Kling task {response.data.task_id} succeeded but no video data found in response." | |
logging.error(f"Error: {error_msg}.\nResponse: {response}") | |
raise KlingApiError(error_msg) | |
def validate_image_result_response(response) -> None: | |
"""Validates that the Kling task result contains an image.""" | |
if not is_valid_image_response(response): | |
error_msg = f"Kling task {response.data.task_id} succeeded but no image data found in response." | |
logging.error(f"Error: {error_msg}.\nResponse: {response}") | |
raise KlingApiError(error_msg) | |
def validate_input_image(image: torch.Tensor) -> None: | |
""" | |
Validates the input image adheres to the expectations of the Kling API: | |
- The image resolution should not be less than 300*300px | |
- The aspect ratio of the image should be between 1:2.5 ~ 2.5:1 | |
See: https://app.klingai.com/global/dev/document-api/apiReference/model/imageToVideo | |
""" | |
validate_image_dimensions(image, min_width=300, min_height=300) | |
validate_image_aspect_ratio(image, min_aspect_ratio=1 / 2.5, max_aspect_ratio=2.5) | |
def get_camera_control_input_config( | |
tooltip: str, default: float = 0.0 | |
) -> tuple[IO, InputTypeOptions]: | |
"""Returns common InputTypeOptions for Kling camera control configurations.""" | |
input_config = { | |
"default": default, | |
"min": -10.0, | |
"max": 10.0, | |
"step": 0.25, | |
"display": "slider", | |
"tooltip": tooltip, | |
} | |
return IO.FLOAT, input_config | |
def get_video_from_response(response) -> KlingVideoResult: | |
"""Returns the first video object from the Kling video generation task result. | |
Will raise an error if the response is not valid. | |
""" | |
video = response.data.task_result.videos[0] | |
logging.info( | |
"Kling task %s succeeded. Video URL: %s", response.data.task_id, video.url | |
) | |
return video | |
def get_video_url_from_response(response) -> Optional[str]: | |
"""Returns the first video url from the Kling video generation task result. | |
Will not raise an error if the response is not valid. | |
""" | |
if response and is_valid_video_response(response): | |
return str(get_video_from_response(response).url) | |
else: | |
return None | |
def get_images_from_response(response) -> list[KlingImageResult]: | |
"""Returns the list of image objects from the Kling image generation task result. | |
Will raise an error if the response is not valid. | |
""" | |
images = response.data.task_result.images | |
logging.info("Kling task %s succeeded. Images: %s", response.data.task_id, images) | |
return images | |
def get_images_urls_from_response(response) -> Optional[str]: | |
"""Returns the list of image urls from the Kling image generation task result. | |
Will not raise an error if the response is not valid. If there is only one image, returns the url as a string. If there are multiple images, returns a list of urls. | |
""" | |
if response and is_valid_image_response(response): | |
images = get_images_from_response(response) | |
image_urls = [str(image.url) for image in images] | |
return "\n".join(image_urls) | |
else: | |
return None | |
def video_result_to_node_output( | |
video: KlingVideoResult, | |
) -> tuple[VideoFromFile, str, str]: | |
"""Converts a KlingVideoResult to a tuple of (VideoFromFile, str, str) to be used as a ComfyUI node output.""" | |
return ( | |
download_url_to_video_output(video.url), | |
str(video.id), | |
str(video.duration), | |
) | |
def image_result_to_node_output( | |
images: list[KlingImageResult], | |
) -> torch.Tensor: | |
""" | |
Converts a KlingImageResult to a tuple containing a [B, H, W, C] tensor. | |
If multiple images are returned, they will be stacked along the batch dimension. | |
""" | |
if len(images) == 1: | |
return download_url_to_image_tensor(images[0].url) | |
else: | |
return torch.cat([download_url_to_image_tensor(image.url) for image in images]) | |
class KlingNodeBase(ComfyNodeABC): | |
"""Base class for Kling nodes.""" | |
FUNCTION = "api_call" | |
CATEGORY = "api node/video/Kling" | |
API_NODE = True | |
class KlingCameraControls(KlingNodeBase): | |
"""Kling Camera Controls Node""" | |
def INPUT_TYPES(cls): | |
return { | |
"required": { | |
"camera_control_type": model_field_to_node_input( | |
IO.COMBO, | |
KlingCameraControl, | |
"type", | |
enum_type=KlingCameraControlType, | |
), | |
"horizontal_movement": get_camera_control_input_config( | |
"Controls camera's movement along horizontal axis (x-axis). Negative indicates left, positive indicates right" | |
), | |
"vertical_movement": get_camera_control_input_config( | |
"Controls camera's movement along vertical axis (y-axis). Negative indicates downward, positive indicates upward." | |
), | |
"pan": get_camera_control_input_config( | |
"Controls camera's rotation in vertical plane (x-axis). Negative indicates downward rotation, positive indicates upward rotation.", | |
default=0.5, | |
), | |
"tilt": get_camera_control_input_config( | |
"Controls camera's rotation in horizontal plane (y-axis). Negative indicates left rotation, positive indicates right rotation.", | |
), | |
"roll": get_camera_control_input_config( | |
"Controls camera's rolling amount (z-axis). Negative indicates counterclockwise, positive indicates clockwise.", | |
), | |
"zoom": get_camera_control_input_config( | |
"Controls change in camera's focal length. Negative indicates narrower field of view, positive indicates wider field of view.", | |
), | |
} | |
} | |
DESCRIPTION = "Allows specifying configuration options for Kling Camera Controls and motion control effects." | |
RETURN_TYPES = ("CAMERA_CONTROL",) | |
RETURN_NAMES = ("camera_control",) | |
FUNCTION = "main" | |
API_NODE = False # This is just a helper node, it doesn't make an API call | |
def VALIDATE_INPUTS( | |
cls, | |
horizontal_movement: float, | |
vertical_movement: float, | |
pan: float, | |
tilt: float, | |
roll: float, | |
zoom: float, | |
) -> bool | str: | |
if not is_valid_camera_control_configs( | |
[ | |
horizontal_movement, | |
vertical_movement, | |
pan, | |
tilt, | |
roll, | |
zoom, | |
] | |
): | |
return "Invalid camera control configs: at least one of the values must be non-zero" | |
return True | |
def main( | |
self, | |
camera_control_type: str, | |
horizontal_movement: float, | |
vertical_movement: float, | |
pan: float, | |
tilt: float, | |
roll: float, | |
zoom: float, | |
) -> tuple[KlingCameraControl]: | |
return ( | |
KlingCameraControl( | |
type=KlingCameraControlType(camera_control_type), | |
config=KlingCameraConfig( | |
horizontal=horizontal_movement, | |
vertical=vertical_movement, | |
pan=pan, | |
roll=roll, | |
tilt=tilt, | |
zoom=zoom, | |
), | |
), | |
) | |
class KlingTextToVideoNode(KlingNodeBase): | |
"""Kling Text to Video Node""" | |
def get_mode_string_mapping() -> dict[str, tuple[str, str, str]]: | |
""" | |
Returns a mapping of mode strings to their corresponding (mode, duration, model_name) tuples. | |
Only includes config combos that support the `image_tail` request field. | |
See: [Kling API Docs Capability Map](https://app.klingai.com/global/dev/document-api/apiReference/model/skillsMap) | |
""" | |
return { | |
"standard mode / 5s duration / kling-v1": ("std", "5", "kling-v1"), | |
"standard mode / 10s duration / kling-v1": ("std", "10", "kling-v1"), | |
"pro mode / 5s duration / kling-v1": ("pro", "5", "kling-v1"), | |
"pro mode / 10s duration / kling-v1": ("pro", "10", "kling-v1"), | |
"standard mode / 5s duration / kling-v1-6": ("std", "5", "kling-v1-6"), | |
"standard mode / 10s duration / kling-v1-6": ("std", "10", "kling-v1-6"), | |
"pro mode / 5s duration / kling-v2-master": ("pro", "5", "kling-v2-master"), | |
"pro mode / 10s duration / kling-v2-master": ("pro", "10", "kling-v2-master"), | |
"standard mode / 5s duration / kling-v2-master": ("std", "5", "kling-v2-master"), | |
"standard mode / 10s duration / kling-v2-master": ("std", "10", "kling-v2-master"), | |
} | |
def INPUT_TYPES(s): | |
modes = list(KlingTextToVideoNode.get_mode_string_mapping().keys()) | |
return { | |
"required": { | |
"prompt": model_field_to_node_input( | |
IO.STRING, KlingText2VideoRequest, "prompt", multiline=True | |
), | |
"negative_prompt": model_field_to_node_input( | |
IO.STRING, KlingText2VideoRequest, "negative_prompt", multiline=True | |
), | |
"cfg_scale": model_field_to_node_input( | |
IO.FLOAT, | |
KlingText2VideoRequest, | |
"cfg_scale", | |
default=1.0, | |
min=0.0, | |
max=1.0, | |
), | |
"aspect_ratio": model_field_to_node_input( | |
IO.COMBO, | |
KlingText2VideoRequest, | |
"aspect_ratio", | |
enum_type=KlingVideoGenAspectRatio, | |
), | |
"mode": ( | |
modes, | |
{ | |
"default": modes[4], | |
"tooltip": "The configuration to use for the video generation following the format: mode / duration / model_name.", | |
}, | |
), | |
}, | |
"hidden": { | |
"auth_token": "AUTH_TOKEN_COMFY_ORG", | |
"comfy_api_key": "API_KEY_COMFY_ORG", | |
"unique_id": "UNIQUE_ID", | |
}, | |
} | |
RETURN_TYPES = ("VIDEO", "STRING", "STRING") | |
RETURN_NAMES = ("VIDEO", "video_id", "duration") | |
DESCRIPTION = "Kling Text to Video Node" | |
def get_response( | |
self, task_id: str, auth_kwargs: dict[str, str], node_id: Optional[str] = None | |
) -> KlingText2VideoResponse: | |
return poll_until_finished( | |
auth_kwargs, | |
ApiEndpoint( | |
path=f"{PATH_TEXT_TO_VIDEO}/{task_id}", | |
method=HttpMethod.GET, | |
request_model=EmptyRequest, | |
response_model=KlingText2VideoResponse, | |
), | |
result_url_extractor=get_video_url_from_response, | |
estimated_duration=AVERAGE_DURATION_T2V, | |
node_id=node_id, | |
) | |
def api_call( | |
self, | |
prompt: str, | |
negative_prompt: str, | |
cfg_scale: float, | |
mode: str, | |
aspect_ratio: str, | |
camera_control: Optional[KlingCameraControl] = None, | |
model_name: Optional[str] = None, | |
duration: Optional[str] = None, | |
unique_id: Optional[str] = None, | |
**kwargs, | |
) -> tuple[VideoFromFile, str, str]: | |
validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_T2V) | |
if model_name is None: | |
mode, duration, model_name = self.get_mode_string_mapping()[mode] | |
initial_operation = SynchronousOperation( | |
endpoint=ApiEndpoint( | |
path=PATH_TEXT_TO_VIDEO, | |
method=HttpMethod.POST, | |
request_model=KlingText2VideoRequest, | |
response_model=KlingText2VideoResponse, | |
), | |
request=KlingText2VideoRequest( | |
prompt=prompt if prompt else None, | |
negative_prompt=negative_prompt if negative_prompt else None, | |
duration=KlingVideoGenDuration(duration), | |
mode=KlingVideoGenMode(mode), | |
model_name=KlingVideoGenModelName(model_name), | |
cfg_scale=cfg_scale, | |
aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio), | |
camera_control=camera_control, | |
), | |
auth_kwargs=kwargs, | |
) | |
task_creation_response = initial_operation.execute() | |
validate_task_creation_response(task_creation_response) | |
task_id = task_creation_response.data.task_id | |
final_response = self.get_response( | |
task_id, auth_kwargs=kwargs, node_id=unique_id | |
) | |
validate_video_result_response(final_response) | |
video = get_video_from_response(final_response) | |
return video_result_to_node_output(video) | |
class KlingCameraControlT2VNode(KlingTextToVideoNode): | |
""" | |
Kling Text to Video Camera Control Node. This node is a text to video node, but it supports controlling the camera. | |
Duration, mode, and model_name request fields are hard-coded because camera control is only supported in pro mode with the kling-v1-5 model at 5s duration as of 2025-05-02. | |
""" | |
def INPUT_TYPES(s): | |
return { | |
"required": { | |
"prompt": model_field_to_node_input( | |
IO.STRING, KlingText2VideoRequest, "prompt", multiline=True | |
), | |
"negative_prompt": model_field_to_node_input( | |
IO.STRING, | |
KlingText2VideoRequest, | |
"negative_prompt", | |
multiline=True, | |
), | |
"cfg_scale": model_field_to_node_input( | |
IO.FLOAT, | |
KlingText2VideoRequest, | |
"cfg_scale", | |
default=0.75, | |
min=0.0, | |
max=1.0, | |
), | |
"aspect_ratio": model_field_to_node_input( | |
IO.COMBO, | |
KlingText2VideoRequest, | |
"aspect_ratio", | |
enum_type=KlingVideoGenAspectRatio, | |
), | |
"camera_control": ( | |
"CAMERA_CONTROL", | |
{ | |
"tooltip": "Can be created using the Kling Camera Controls node. Controls the camera movement and motion during the video generation.", | |
}, | |
), | |
}, | |
"hidden": { | |
"auth_token": "AUTH_TOKEN_COMFY_ORG", | |
"comfy_api_key": "API_KEY_COMFY_ORG", | |
"unique_id": "UNIQUE_ID", | |
}, | |
} | |
DESCRIPTION = "Transform text into cinematic videos with professional camera movements that simulate real-world cinematography. Control virtual camera actions including zoom, rotation, pan, tilt, and first-person view, while maintaining focus on your original text." | |
def api_call( | |
self, | |
prompt: str, | |
negative_prompt: str, | |
cfg_scale: float, | |
aspect_ratio: str, | |
camera_control: Optional[KlingCameraControl] = None, | |
unique_id: Optional[str] = None, | |
**kwargs, | |
): | |
return super().api_call( | |
model_name=KlingVideoGenModelName.kling_v1, | |
cfg_scale=cfg_scale, | |
mode=KlingVideoGenMode.std, | |
aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio), | |
duration=KlingVideoGenDuration.field_5, | |
prompt=prompt, | |
negative_prompt=negative_prompt, | |
camera_control=camera_control, | |
**kwargs, | |
) | |
class KlingImage2VideoNode(KlingNodeBase): | |
"""Kling Image to Video Node""" | |
def INPUT_TYPES(s): | |
return { | |
"required": { | |
"start_frame": model_field_to_node_input( | |
IO.IMAGE, | |
KlingImage2VideoRequest, | |
"image", | |
tooltip="The reference image used to generate the video.", | |
), | |
"prompt": model_field_to_node_input( | |
IO.STRING, KlingImage2VideoRequest, "prompt", multiline=True | |
), | |
"negative_prompt": model_field_to_node_input( | |
IO.STRING, | |
KlingImage2VideoRequest, | |
"negative_prompt", | |
multiline=True, | |
), | |
"model_name": model_field_to_node_input( | |
IO.COMBO, | |
KlingImage2VideoRequest, | |
"model_name", | |
enum_type=KlingVideoGenModelName, | |
), | |
"cfg_scale": model_field_to_node_input( | |
IO.FLOAT, | |
KlingImage2VideoRequest, | |
"cfg_scale", | |
default=0.8, | |
min=0.0, | |
max=1.0, | |
), | |
"mode": model_field_to_node_input( | |
IO.COMBO, | |
KlingImage2VideoRequest, | |
"mode", | |
enum_type=KlingVideoGenMode, | |
), | |
"aspect_ratio": model_field_to_node_input( | |
IO.COMBO, | |
KlingImage2VideoRequest, | |
"aspect_ratio", | |
enum_type=KlingVideoGenAspectRatio, | |
), | |
"duration": model_field_to_node_input( | |
IO.COMBO, | |
KlingImage2VideoRequest, | |
"duration", | |
enum_type=KlingVideoGenDuration, | |
), | |
}, | |
"hidden": { | |
"auth_token": "AUTH_TOKEN_COMFY_ORG", | |
"comfy_api_key": "API_KEY_COMFY_ORG", | |
"unique_id": "UNIQUE_ID", | |
}, | |
} | |
RETURN_TYPES = ("VIDEO", "STRING", "STRING") | |
RETURN_NAMES = ("VIDEO", "video_id", "duration") | |
DESCRIPTION = "Kling Image to Video Node" | |
def get_response( | |
self, task_id: str, auth_kwargs: dict[str, str], node_id: Optional[str] = None | |
) -> KlingImage2VideoResponse: | |
return poll_until_finished( | |
auth_kwargs, | |
ApiEndpoint( | |
path=f"{PATH_IMAGE_TO_VIDEO}/{task_id}", | |
method=HttpMethod.GET, | |
request_model=KlingImage2VideoRequest, | |
response_model=KlingImage2VideoResponse, | |
), | |
result_url_extractor=get_video_url_from_response, | |
estimated_duration=AVERAGE_DURATION_I2V, | |
node_id=node_id, | |
) | |
def api_call( | |
self, | |
start_frame: torch.Tensor, | |
prompt: str, | |
negative_prompt: str, | |
model_name: str, | |
cfg_scale: float, | |
mode: str, | |
aspect_ratio: str, | |
duration: str, | |
camera_control: Optional[KlingCameraControl] = None, | |
end_frame: Optional[torch.Tensor] = None, | |
unique_id: Optional[str] = None, | |
**kwargs, | |
) -> tuple[VideoFromFile]: | |
validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_I2V) | |
validate_input_image(start_frame) | |
if camera_control is not None: | |
# Camera control type for image 2 video is always `simple` | |
camera_control.type = KlingCameraControlType.simple | |
initial_operation = SynchronousOperation( | |
endpoint=ApiEndpoint( | |
path=PATH_IMAGE_TO_VIDEO, | |
method=HttpMethod.POST, | |
request_model=KlingImage2VideoRequest, | |
response_model=KlingImage2VideoResponse, | |
), | |
request=KlingImage2VideoRequest( | |
model_name=KlingVideoGenModelName(model_name), | |
image=tensor_to_base64_string(start_frame), | |
image_tail=( | |
tensor_to_base64_string(end_frame) | |
if end_frame is not None | |
else None | |
), | |
prompt=prompt, | |
negative_prompt=negative_prompt if negative_prompt else None, | |
cfg_scale=cfg_scale, | |
mode=KlingVideoGenMode(mode), | |
duration=KlingVideoGenDuration(duration), | |
camera_control=camera_control, | |
), | |
auth_kwargs=kwargs, | |
) | |
task_creation_response = initial_operation.execute() | |
validate_task_creation_response(task_creation_response) | |
task_id = task_creation_response.data.task_id | |
final_response = self.get_response( | |
task_id, auth_kwargs=kwargs, node_id=unique_id | |
) | |
validate_video_result_response(final_response) | |
video = get_video_from_response(final_response) | |
return video_result_to_node_output(video) | |
class KlingCameraControlI2VNode(KlingImage2VideoNode): | |
""" | |
Kling Image to Video Camera Control Node. This node is a image to video node, but it supports controlling the camera. | |
Duration, mode, and model_name request fields are hard-coded because camera control is only supported in pro mode with the kling-v1-5 model at 5s duration as of 2025-05-02. | |
""" | |
def INPUT_TYPES(s): | |
return { | |
"required": { | |
"start_frame": model_field_to_node_input( | |
IO.IMAGE, KlingImage2VideoRequest, "image" | |
), | |
"prompt": model_field_to_node_input( | |
IO.STRING, KlingImage2VideoRequest, "prompt", multiline=True | |
), | |
"negative_prompt": model_field_to_node_input( | |
IO.STRING, | |
KlingImage2VideoRequest, | |
"negative_prompt", | |
multiline=True, | |
), | |
"cfg_scale": model_field_to_node_input( | |
IO.FLOAT, | |
KlingImage2VideoRequest, | |
"cfg_scale", | |
default=0.75, | |
min=0.0, | |
max=1.0, | |
), | |
"aspect_ratio": model_field_to_node_input( | |
IO.COMBO, | |
KlingImage2VideoRequest, | |
"aspect_ratio", | |
enum_type=KlingVideoGenAspectRatio, | |
), | |
"camera_control": ( | |
"CAMERA_CONTROL", | |
{ | |
"tooltip": "Can be created using the Kling Camera Controls node. Controls the camera movement and motion during the video generation.", | |
}, | |
), | |
}, | |
"hidden": { | |
"auth_token": "AUTH_TOKEN_COMFY_ORG", | |
"comfy_api_key": "API_KEY_COMFY_ORG", | |
"unique_id": "UNIQUE_ID", | |
}, | |
} | |
DESCRIPTION = "Transform still images into cinematic videos with professional camera movements that simulate real-world cinematography. Control virtual camera actions including zoom, rotation, pan, tilt, and first-person view, while maintaining focus on your original image." | |
def api_call( | |
self, | |
start_frame: torch.Tensor, | |
prompt: str, | |
negative_prompt: str, | |
cfg_scale: float, | |
aspect_ratio: str, | |
camera_control: KlingCameraControl, | |
unique_id: Optional[str] = None, | |
**kwargs, | |
): | |
return super().api_call( | |
model_name=KlingVideoGenModelName.kling_v1_5, | |
start_frame=start_frame, | |
cfg_scale=cfg_scale, | |
mode=KlingVideoGenMode.pro, | |
aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio), | |
duration=KlingVideoGenDuration.field_5, | |
prompt=prompt, | |
negative_prompt=negative_prompt, | |
camera_control=camera_control, | |
unique_id=unique_id, | |
**kwargs, | |
) | |
class KlingStartEndFrameNode(KlingImage2VideoNode): | |
""" | |
Kling First Last Frame Node. This node allows creation of a video from a first and last frame. It calls the normal image to video endpoint, but only allows the subset of input options that support the `image_tail` request field. | |
""" | |
def get_mode_string_mapping() -> dict[str, tuple[str, str, str]]: | |
""" | |
Returns a mapping of mode strings to their corresponding (mode, duration, model_name) tuples. | |
Only includes config combos that support the `image_tail` request field. | |
See: [Kling API Docs Capability Map](https://app.klingai.com/global/dev/document-api/apiReference/model/skillsMap) | |
""" | |
return { | |
"standard mode / 5s duration / kling-v1": ("std", "5", "kling-v1"), | |
"pro mode / 5s duration / kling-v1": ("pro", "5", "kling-v1"), | |
"pro mode / 5s duration / kling-v1-5": ("pro", "5", "kling-v1-5"), | |
"pro mode / 10s duration / kling-v1-5": ("pro", "10", "kling-v1-5"), | |
"pro mode / 5s duration / kling-v1-6": ("pro", "5", "kling-v1-6"), | |
"pro mode / 10s duration / kling-v1-6": ("pro", "10", "kling-v1-6"), | |
} | |
def INPUT_TYPES(s): | |
modes = list(KlingStartEndFrameNode.get_mode_string_mapping().keys()) | |
return { | |
"required": { | |
"start_frame": model_field_to_node_input( | |
IO.IMAGE, KlingImage2VideoRequest, "image" | |
), | |
"end_frame": model_field_to_node_input( | |
IO.IMAGE, KlingImage2VideoRequest, "image_tail" | |
), | |
"prompt": model_field_to_node_input( | |
IO.STRING, KlingImage2VideoRequest, "prompt", multiline=True | |
), | |
"negative_prompt": model_field_to_node_input( | |
IO.STRING, | |
KlingImage2VideoRequest, | |
"negative_prompt", | |
multiline=True, | |
), | |
"cfg_scale": model_field_to_node_input( | |
IO.FLOAT, | |
KlingImage2VideoRequest, | |
"cfg_scale", | |
default=0.5, | |
min=0.0, | |
max=1.0, | |
), | |
"aspect_ratio": model_field_to_node_input( | |
IO.COMBO, | |
KlingImage2VideoRequest, | |
"aspect_ratio", | |
enum_type=KlingVideoGenAspectRatio, | |
), | |
"mode": ( | |
modes, | |
{ | |
"default": modes[2], | |
"tooltip": "The configuration to use for the video generation following the format: mode / duration / model_name.", | |
}, | |
), | |
}, | |
"hidden": { | |
"auth_token": "AUTH_TOKEN_COMFY_ORG", | |
"comfy_api_key": "API_KEY_COMFY_ORG", | |
"unique_id": "UNIQUE_ID", | |
}, | |
} | |
DESCRIPTION = "Generate a video sequence that transitions between your provided start and end images. The node creates all frames in between, producing a smooth transformation from the first frame to the last." | |
def api_call( | |
self, | |
start_frame: torch.Tensor, | |
end_frame: torch.Tensor, | |
prompt: str, | |
negative_prompt: str, | |
cfg_scale: float, | |
aspect_ratio: str, | |
mode: str, | |
unique_id: Optional[str] = None, | |
**kwargs, | |
): | |
mode, duration, model_name = KlingStartEndFrameNode.get_mode_string_mapping()[ | |
mode | |
] | |
return super().api_call( | |
prompt=prompt, | |
negative_prompt=negative_prompt, | |
model_name=model_name, | |
start_frame=start_frame, | |
cfg_scale=cfg_scale, | |
mode=mode, | |
aspect_ratio=aspect_ratio, | |
duration=duration, | |
end_frame=end_frame, | |
unique_id=unique_id, | |
**kwargs, | |
) | |
class KlingVideoExtendNode(KlingNodeBase): | |
def INPUT_TYPES(s): | |
return { | |
"required": { | |
"prompt": model_field_to_node_input( | |
IO.STRING, KlingVideoExtendRequest, "prompt", multiline=True | |
), | |
"negative_prompt": model_field_to_node_input( | |
IO.STRING, | |
KlingVideoExtendRequest, | |
"negative_prompt", | |
multiline=True, | |
), | |
"cfg_scale": model_field_to_node_input( | |
IO.FLOAT, | |
KlingVideoExtendRequest, | |
"cfg_scale", | |
default=0.5, | |
min=0.0, | |
max=1.0, | |
), | |
"video_id": model_field_to_node_input( | |
IO.STRING, KlingVideoExtendRequest, "video_id", forceInput=True | |
), | |
}, | |
"hidden": { | |
"auth_token": "AUTH_TOKEN_COMFY_ORG", | |
"comfy_api_key": "API_KEY_COMFY_ORG", | |
"unique_id": "UNIQUE_ID", | |
}, | |
} | |
RETURN_TYPES = ("VIDEO", "STRING", "STRING") | |
RETURN_NAMES = ("VIDEO", "video_id", "duration") | |
DESCRIPTION = "Kling Video Extend Node. Extend videos made by other Kling nodes. The video_id is created by using other Kling Nodes." | |
def get_response( | |
self, task_id: str, auth_kwargs: dict[str, str], node_id: Optional[str] = None | |
) -> KlingVideoExtendResponse: | |
return poll_until_finished( | |
auth_kwargs, | |
ApiEndpoint( | |
path=f"{PATH_VIDEO_EXTEND}/{task_id}", | |
method=HttpMethod.GET, | |
request_model=EmptyRequest, | |
response_model=KlingVideoExtendResponse, | |
), | |
result_url_extractor=get_video_url_from_response, | |
estimated_duration=AVERAGE_DURATION_VIDEO_EXTEND, | |
node_id=node_id, | |
) | |
def api_call( | |
self, | |
prompt: str, | |
negative_prompt: str, | |
cfg_scale: float, | |
video_id: str, | |
unique_id: Optional[str] = None, | |
**kwargs, | |
) -> tuple[VideoFromFile, str, str]: | |
validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_T2V) | |
initial_operation = SynchronousOperation( | |
endpoint=ApiEndpoint( | |
path=PATH_VIDEO_EXTEND, | |
method=HttpMethod.POST, | |
request_model=KlingVideoExtendRequest, | |
response_model=KlingVideoExtendResponse, | |
), | |
request=KlingVideoExtendRequest( | |
prompt=prompt if prompt else None, | |
negative_prompt=negative_prompt if negative_prompt else None, | |
cfg_scale=cfg_scale, | |
video_id=video_id, | |
), | |
auth_kwargs=kwargs, | |
) | |
task_creation_response = initial_operation.execute() | |
validate_task_creation_response(task_creation_response) | |
task_id = task_creation_response.data.task_id | |
final_response = self.get_response( | |
task_id, auth_kwargs=kwargs, node_id=unique_id | |
) | |
validate_video_result_response(final_response) | |
video = get_video_from_response(final_response) | |
return video_result_to_node_output(video) | |
class KlingVideoEffectsBase(KlingNodeBase): | |
"""Kling Video Effects Base""" | |
RETURN_TYPES = ("VIDEO", "STRING", "STRING") | |
RETURN_NAMES = ("VIDEO", "video_id", "duration") | |
def get_response( | |
self, task_id: str, auth_kwargs: dict[str, str], node_id: Optional[str] = None | |
) -> KlingVideoEffectsResponse: | |
return poll_until_finished( | |
auth_kwargs, | |
ApiEndpoint( | |
path=f"{PATH_VIDEO_EFFECTS}/{task_id}", | |
method=HttpMethod.GET, | |
request_model=EmptyRequest, | |
response_model=KlingVideoEffectsResponse, | |
), | |
result_url_extractor=get_video_url_from_response, | |
estimated_duration=AVERAGE_DURATION_VIDEO_EFFECTS, | |
node_id=node_id, | |
) | |
def api_call( | |
self, | |
dual_character: bool, | |
effect_scene: KlingDualCharacterEffectsScene | KlingSingleImageEffectsScene, | |
model_name: str, | |
duration: KlingVideoGenDuration, | |
image_1: torch.Tensor, | |
image_2: Optional[torch.Tensor] = None, | |
mode: Optional[KlingVideoGenMode] = None, | |
unique_id: Optional[str] = None, | |
**kwargs, | |
): | |
if dual_character: | |
request_input_field = KlingDualCharacterEffectInput( | |
model_name=model_name, | |
mode=mode, | |
images=[ | |
tensor_to_base64_string(image_1), | |
tensor_to_base64_string(image_2), | |
], | |
duration=duration, | |
) | |
else: | |
request_input_field = KlingSingleImageEffectInput( | |
model_name=model_name, | |
image=tensor_to_base64_string(image_1), | |
duration=duration, | |
) | |
initial_operation = SynchronousOperation( | |
endpoint=ApiEndpoint( | |
path=PATH_VIDEO_EFFECTS, | |
method=HttpMethod.POST, | |
request_model=KlingVideoEffectsRequest, | |
response_model=KlingVideoEffectsResponse, | |
), | |
request=KlingVideoEffectsRequest( | |
effect_scene=effect_scene, | |
input=request_input_field, | |
), | |
auth_kwargs=kwargs, | |
) | |
task_creation_response = initial_operation.execute() | |
validate_task_creation_response(task_creation_response) | |
task_id = task_creation_response.data.task_id | |
final_response = self.get_response( | |
task_id, auth_kwargs=kwargs, node_id=unique_id | |
) | |
validate_video_result_response(final_response) | |
video = get_video_from_response(final_response) | |
return video_result_to_node_output(video) | |
class KlingDualCharacterVideoEffectNode(KlingVideoEffectsBase): | |
"""Kling Dual Character Video Effect Node""" | |
def INPUT_TYPES(s): | |
return { | |
"required": { | |
"image_left": (IO.IMAGE, {"tooltip": "Left side image"}), | |
"image_right": (IO.IMAGE, {"tooltip": "Right side image"}), | |
"effect_scene": model_field_to_node_input( | |
IO.COMBO, | |
KlingVideoEffectsRequest, | |
"effect_scene", | |
enum_type=KlingDualCharacterEffectsScene, | |
), | |
"model_name": model_field_to_node_input( | |
IO.COMBO, | |
KlingDualCharacterEffectInput, | |
"model_name", | |
enum_type=KlingCharacterEffectModelName, | |
), | |
"mode": model_field_to_node_input( | |
IO.COMBO, | |
KlingDualCharacterEffectInput, | |
"mode", | |
enum_type=KlingVideoGenMode, | |
), | |
"duration": model_field_to_node_input( | |
IO.COMBO, | |
KlingDualCharacterEffectInput, | |
"duration", | |
enum_type=KlingVideoGenDuration, | |
), | |
}, | |
"hidden": { | |
"auth_token": "AUTH_TOKEN_COMFY_ORG", | |
"comfy_api_key": "API_KEY_COMFY_ORG", | |
"unique_id": "UNIQUE_ID", | |
}, | |
} | |
DESCRIPTION = "Achieve different special effects when generating a video based on the effect_scene. First image will be positioned on left side, second on right side of the composite." | |
RETURN_TYPES = ("VIDEO", "STRING") | |
RETURN_NAMES = ("VIDEO", "duration") | |
def api_call( | |
self, | |
image_left: torch.Tensor, | |
image_right: torch.Tensor, | |
effect_scene: KlingDualCharacterEffectsScene, | |
model_name: KlingCharacterEffectModelName, | |
mode: KlingVideoGenMode, | |
duration: KlingVideoGenDuration, | |
unique_id: Optional[str] = None, | |
**kwargs, | |
): | |
video, _, duration = super().api_call( | |
dual_character=True, | |
effect_scene=effect_scene, | |
model_name=model_name, | |
mode=mode, | |
duration=duration, | |
image_1=image_left, | |
image_2=image_right, | |
unique_id=unique_id, | |
**kwargs, | |
) | |
return video, duration | |
class KlingSingleImageVideoEffectNode(KlingVideoEffectsBase): | |
"""Kling Single Image Video Effect Node""" | |
def INPUT_TYPES(s): | |
return { | |
"required": { | |
"image": ( | |
IO.IMAGE, | |
{ | |
"tooltip": " Reference Image. URL or Base64 encoded string (without data:image prefix). File size cannot exceed 10MB, resolution not less than 300*300px, aspect ratio between 1:2.5 ~ 2.5:1" | |
}, | |
), | |
"effect_scene": model_field_to_node_input( | |
IO.COMBO, | |
KlingVideoEffectsRequest, | |
"effect_scene", | |
enum_type=KlingSingleImageEffectsScene, | |
), | |
"model_name": model_field_to_node_input( | |
IO.COMBO, | |
KlingSingleImageEffectInput, | |
"model_name", | |
enum_type=KlingSingleImageEffectModelName, | |
), | |
"duration": model_field_to_node_input( | |
IO.COMBO, | |
KlingSingleImageEffectInput, | |
"duration", | |
enum_type=KlingVideoGenDuration, | |
), | |
}, | |
"hidden": { | |
"auth_token": "AUTH_TOKEN_COMFY_ORG", | |
"comfy_api_key": "API_KEY_COMFY_ORG", | |
"unique_id": "UNIQUE_ID", | |
}, | |
} | |
DESCRIPTION = "Achieve different special effects when generating a video based on the effect_scene." | |
def api_call( | |
self, | |
image: torch.Tensor, | |
effect_scene: KlingSingleImageEffectsScene, | |
model_name: KlingSingleImageEffectModelName, | |
duration: KlingVideoGenDuration, | |
unique_id: Optional[str] = None, | |
**kwargs, | |
): | |
return super().api_call( | |
dual_character=False, | |
effect_scene=effect_scene, | |
model_name=model_name, | |
duration=duration, | |
image_1=image, | |
unique_id=unique_id, | |
**kwargs, | |
) | |
class KlingLipSyncBase(KlingNodeBase): | |
"""Kling Lip Sync Base""" | |
RETURN_TYPES = ("VIDEO", "STRING", "STRING") | |
RETURN_NAMES = ("VIDEO", "video_id", "duration") | |
def validate_lip_sync_video(self, video: VideoInput): | |
""" | |
Validates the input video adheres to the expectations of the Kling Lip Sync API: | |
- Video length does not exceed 10s and is not shorter than 2s | |
- Length and width dimensions should both be between 720px and 1920px | |
See: https://app.klingai.com/global/dev/document-api/apiReference/model/videoTolip | |
""" | |
validate_video_dimensions(video, 720, 1920) | |
validate_video_duration(video, 2, 10) | |
def validate_text(self, text: str): | |
if not text: | |
raise ValueError("Text is required") | |
if len(text) > MAX_PROMPT_LENGTH_LIP_SYNC: | |
raise ValueError( | |
f"Text is too long. Maximum length is {MAX_PROMPT_LENGTH_LIP_SYNC} characters." | |
) | |
def get_response( | |
self, task_id: str, auth_kwargs: dict[str, str], node_id: Optional[str] = None | |
) -> KlingLipSyncResponse: | |
"""Polls the Kling API endpoint until the task reaches a terminal state.""" | |
return poll_until_finished( | |
auth_kwargs, | |
ApiEndpoint( | |
path=f"{PATH_LIP_SYNC}/{task_id}", | |
method=HttpMethod.GET, | |
request_model=EmptyRequest, | |
response_model=KlingLipSyncResponse, | |
), | |
result_url_extractor=get_video_url_from_response, | |
estimated_duration=AVERAGE_DURATION_LIP_SYNC, | |
node_id=node_id, | |
) | |
def api_call( | |
self, | |
video: VideoInput, | |
audio: Optional[AudioInput] = None, | |
voice_language: Optional[str] = None, | |
mode: Optional[str] = None, | |
text: Optional[str] = None, | |
voice_speed: Optional[float] = None, | |
voice_id: Optional[str] = None, | |
unique_id: Optional[str] = None, | |
**kwargs, | |
) -> tuple[VideoFromFile, str, str]: | |
if text: | |
self.validate_text(text) | |
self.validate_lip_sync_video(video) | |
# Upload video to Comfy API and get download URL | |
video_url = upload_video_to_comfyapi(video, auth_kwargs=kwargs) | |
logging.info("Uploaded video to Comfy API. URL: %s", video_url) | |
# Upload the audio file to Comfy API and get download URL | |
if audio: | |
audio_url = upload_audio_to_comfyapi(audio, auth_kwargs=kwargs) | |
logging.info("Uploaded audio to Comfy API. URL: %s", audio_url) | |
else: | |
audio_url = None | |
initial_operation = SynchronousOperation( | |
endpoint=ApiEndpoint( | |
path=PATH_LIP_SYNC, | |
method=HttpMethod.POST, | |
request_model=KlingLipSyncRequest, | |
response_model=KlingLipSyncResponse, | |
), | |
request=KlingLipSyncRequest( | |
input=KlingLipSyncInputObject( | |
video_url=video_url, | |
mode=mode, | |
text=text, | |
voice_language=voice_language, | |
voice_speed=voice_speed, | |
audio_type="url", | |
audio_url=audio_url, | |
voice_id=voice_id, | |
), | |
), | |
auth_kwargs=kwargs, | |
) | |
task_creation_response = initial_operation.execute() | |
validate_task_creation_response(task_creation_response) | |
task_id = task_creation_response.data.task_id | |
final_response = self.get_response( | |
task_id, auth_kwargs=kwargs, node_id=unique_id | |
) | |
validate_video_result_response(final_response) | |
video = get_video_from_response(final_response) | |
return video_result_to_node_output(video) | |
class KlingLipSyncAudioToVideoNode(KlingLipSyncBase): | |
"""Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file.""" | |
def INPUT_TYPES(s): | |
return { | |
"required": { | |
"video": (IO.VIDEO, {}), | |
"audio": (IO.AUDIO, {}), | |
"voice_language": model_field_to_node_input( | |
IO.COMBO, | |
KlingLipSyncInputObject, | |
"voice_language", | |
enum_type=KlingLipSyncVoiceLanguage, | |
), | |
}, | |
"hidden": { | |
"auth_token": "AUTH_TOKEN_COMFY_ORG", | |
"comfy_api_key": "API_KEY_COMFY_ORG", | |
"unique_id": "UNIQUE_ID", | |
}, | |
} | |
DESCRIPTION = "Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file. When using, ensure that the audio contains clearly distinguishable vocals and that the video contains a distinct face. The audio file should not be larger than 5MB. The video file should not be larger than 100MB, should have height/width between 720px and 1920px, and should be between 2s and 10s in length." | |
def api_call( | |
self, | |
video: VideoInput, | |
audio: AudioInput, | |
voice_language: str, | |
unique_id: Optional[str] = None, | |
**kwargs, | |
): | |
return super().api_call( | |
video=video, | |
audio=audio, | |
voice_language=voice_language, | |
mode="audio2video", | |
unique_id=unique_id, | |
**kwargs, | |
) | |
class KlingLipSyncTextToVideoNode(KlingLipSyncBase): | |
"""Kling Lip Sync Text to Video Node. Syncs mouth movements in a video file to a text prompt.""" | |
def get_voice_config() -> dict[str, tuple[str, str]]: | |
return { | |
# English voices | |
"Melody": ("girlfriend_4_speech02", "en"), | |
"Sunny": ("genshin_vindi2", "en"), | |
"Sage": ("zhinen_xuesheng", "en"), | |
"Ace": ("AOT", "en"), | |
"Blossom": ("ai_shatang", "en"), | |
"Peppy": ("genshin_klee2", "en"), | |
"Dove": ("genshin_kirara", "en"), | |
"Shine": ("ai_kaiya", "en"), | |
"Anchor": ("oversea_male1", "en"), | |
"Lyric": ("ai_chenjiahao_712", "en"), | |
"Tender": ("chat1_female_new-3", "en"), | |
"Siren": ("chat_0407_5-1", "en"), | |
"Zippy": ("cartoon-boy-07", "en"), | |
"Bud": ("uk_boy1", "en"), | |
"Sprite": ("cartoon-girl-01", "en"), | |
"Candy": ("PeppaPig_platform", "en"), | |
"Beacon": ("ai_huangzhong_712", "en"), | |
"Rock": ("ai_huangyaoshi_712", "en"), | |
"Titan": ("ai_laoguowang_712", "en"), | |
"Grace": ("chengshu_jiejie", "en"), | |
"Helen": ("you_pingjing", "en"), | |
"Lore": ("calm_story1", "en"), | |
"Crag": ("uk_man2", "en"), | |
"Prattle": ("laopopo_speech02", "en"), | |
"Hearth": ("heainainai_speech02", "en"), | |
"The Reader": ("reader_en_m-v1", "en"), | |
"Commercial Lady": ("commercial_lady_en_f-v1", "en"), | |
# Chinese voices | |
"阳光少年": ("genshin_vindi2", "zh"), | |
"懂事小弟": ("zhinen_xuesheng", "zh"), | |
"运动少年": ("tiyuxi_xuedi", "zh"), | |
"青春少女": ("ai_shatang", "zh"), | |
"温柔小妹": ("genshin_klee2", "zh"), | |
"元气少女": ("genshin_kirara", "zh"), | |
"阳光男生": ("ai_kaiya", "zh"), | |
"幽默小哥": ("tiexin_nanyou", "zh"), | |
"文艺小哥": ("ai_chenjiahao_712", "zh"), | |
"甜美邻家": ("girlfriend_1_speech02", "zh"), | |
"温柔姐姐": ("chat1_female_new-3", "zh"), | |
"职场女青": ("girlfriend_2_speech02", "zh"), | |
"活泼男童": ("cartoon-boy-07", "zh"), | |
"俏皮女童": ("cartoon-girl-01", "zh"), | |
"稳重老爸": ("ai_huangyaoshi_712", "zh"), | |
"温柔妈妈": ("you_pingjing", "zh"), | |
"严肃上司": ("ai_laoguowang_712", "zh"), | |
"优雅贵妇": ("chengshu_jiejie", "zh"), | |
"慈祥爷爷": ("zhuxi_speech02", "zh"), | |
"唠叨爷爷": ("uk_oldman3", "zh"), | |
"唠叨奶奶": ("laopopo_speech02", "zh"), | |
"和蔼奶奶": ("heainainai_speech02", "zh"), | |
"东北老铁": ("dongbeilaotie_speech02", "zh"), | |
"重庆小伙": ("chongqingxiaohuo_speech02", "zh"), | |
"四川妹子": ("chuanmeizi_speech02", "zh"), | |
"潮汕大叔": ("chaoshandashu_speech02", "zh"), | |
"台湾男生": ("ai_taiwan_man2_speech02", "zh"), | |
"西安掌柜": ("xianzhanggui_speech02", "zh"), | |
"天津姐姐": ("tianjinjiejie_speech02", "zh"), | |
"新闻播报男": ("diyinnansang_DB_CN_M_04-v2", "zh"), | |
"译制片男": ("yizhipiannan-v1", "zh"), | |
"撒娇女友": ("tianmeixuemei-v1", "zh"), | |
"刀片烟嗓": ("daopianyansang-v1", "zh"), | |
"乖巧正太": ("mengwa-v1", "zh"), | |
} | |
def INPUT_TYPES(s): | |
voice_options = list(s.get_voice_config().keys()) | |
return { | |
"required": { | |
"video": (IO.VIDEO, {}), | |
"text": model_field_to_node_input( | |
IO.STRING, KlingLipSyncInputObject, "text", multiline=True | |
), | |
"voice": (voice_options, {"default": voice_options[0]}), | |
"voice_speed": model_field_to_node_input( | |
IO.FLOAT, KlingLipSyncInputObject, "voice_speed", slider=True | |
), | |
}, | |
"hidden": { | |
"auth_token": "AUTH_TOKEN_COMFY_ORG", | |
"comfy_api_key": "API_KEY_COMFY_ORG", | |
"unique_id": "UNIQUE_ID", | |
}, | |
} | |
DESCRIPTION = "Kling Lip Sync Text to Video Node. Syncs mouth movements in a video file to a text prompt. The video file should not be larger than 100MB, should have height/width between 720px and 1920px, and should be between 2s and 10s in length." | |
def api_call( | |
self, | |
video: VideoInput, | |
text: str, | |
voice: str, | |
voice_speed: float, | |
unique_id: Optional[str] = None, | |
**kwargs, | |
): | |
voice_id, voice_language = KlingLipSyncTextToVideoNode.get_voice_config()[voice] | |
return super().api_call( | |
video=video, | |
text=text, | |
voice_language=voice_language, | |
voice_id=voice_id, | |
voice_speed=voice_speed, | |
mode="text2video", | |
unique_id=unique_id, | |
**kwargs, | |
) | |
class KlingImageGenerationBase(KlingNodeBase): | |
"""Kling Image Generation Base Node.""" | |
RETURN_TYPES = ("IMAGE",) | |
CATEGORY = "api node/image/Kling" | |
def validate_prompt(self, prompt: str, negative_prompt: Optional[str] = None): | |
if not prompt or len(prompt) > MAX_PROMPT_LENGTH_IMAGE_GEN: | |
raise ValueError( | |
f"Prompt must be less than {MAX_PROMPT_LENGTH_IMAGE_GEN} characters" | |
) | |
if negative_prompt and len(negative_prompt) > MAX_PROMPT_LENGTH_IMAGE_GEN: | |
raise ValueError( | |
f"Negative prompt must be less than {MAX_PROMPT_LENGTH_IMAGE_GEN} characters" | |
) | |
class KlingVirtualTryOnNode(KlingImageGenerationBase): | |
"""Kling Virtual Try On Node.""" | |
def INPUT_TYPES(s): | |
return { | |
"required": { | |
"human_image": (IO.IMAGE, {}), | |
"cloth_image": (IO.IMAGE, {}), | |
"model_name": model_field_to_node_input( | |
IO.COMBO, | |
KlingVirtualTryOnRequest, | |
"model_name", | |
enum_type=KlingVirtualTryOnModelName, | |
), | |
}, | |
"hidden": { | |
"auth_token": "AUTH_TOKEN_COMFY_ORG", | |
"comfy_api_key": "API_KEY_COMFY_ORG", | |
"unique_id": "UNIQUE_ID", | |
}, | |
} | |
DESCRIPTION = "Kling Virtual Try On Node. Input a human image and a cloth image to try on the cloth on the human. You can merge multiple clothing item pictures into one image with a white background." | |
def get_response( | |
self, task_id: str, auth_kwargs: dict[str, str], node_id: Optional[str] = None | |
) -> KlingVirtualTryOnResponse: | |
return poll_until_finished( | |
auth_kwargs, | |
ApiEndpoint( | |
path=f"{PATH_VIRTUAL_TRY_ON}/{task_id}", | |
method=HttpMethod.GET, | |
request_model=EmptyRequest, | |
response_model=KlingVirtualTryOnResponse, | |
), | |
result_url_extractor=get_images_urls_from_response, | |
estimated_duration=AVERAGE_DURATION_VIRTUAL_TRY_ON, | |
node_id=node_id, | |
) | |
def api_call( | |
self, | |
human_image: torch.Tensor, | |
cloth_image: torch.Tensor, | |
model_name: KlingVirtualTryOnModelName, | |
unique_id: Optional[str] = None, | |
**kwargs, | |
): | |
initial_operation = SynchronousOperation( | |
endpoint=ApiEndpoint( | |
path=PATH_VIRTUAL_TRY_ON, | |
method=HttpMethod.POST, | |
request_model=KlingVirtualTryOnRequest, | |
response_model=KlingVirtualTryOnResponse, | |
), | |
request=KlingVirtualTryOnRequest( | |
human_image=tensor_to_base64_string(human_image), | |
cloth_image=tensor_to_base64_string(cloth_image), | |
model_name=model_name, | |
), | |
auth_kwargs=kwargs, | |
) | |
task_creation_response = initial_operation.execute() | |
validate_task_creation_response(task_creation_response) | |
task_id = task_creation_response.data.task_id | |
final_response = self.get_response( | |
task_id, auth_kwargs=kwargs, node_id=unique_id | |
) | |
validate_image_result_response(final_response) | |
images = get_images_from_response(final_response) | |
return (image_result_to_node_output(images),) | |
class KlingImageGenerationNode(KlingImageGenerationBase): | |
"""Kling Image Generation Node. Generate an image from a text prompt with an optional reference image.""" | |
def INPUT_TYPES(s): | |
return { | |
"required": { | |
"prompt": model_field_to_node_input( | |
IO.STRING, | |
KlingImageGenerationsRequest, | |
"prompt", | |
multiline=True, | |
max_length=MAX_PROMPT_LENGTH_IMAGE_GEN, | |
), | |
"negative_prompt": model_field_to_node_input( | |
IO.STRING, | |
KlingImageGenerationsRequest, | |
"negative_prompt", | |
multiline=True, | |
), | |
"image_type": model_field_to_node_input( | |
IO.COMBO, | |
KlingImageGenerationsRequest, | |
"image_reference", | |
enum_type=KlingImageGenImageReferenceType, | |
), | |
"image_fidelity": model_field_to_node_input( | |
IO.FLOAT, | |
KlingImageGenerationsRequest, | |
"image_fidelity", | |
slider=True, | |
step=0.01, | |
), | |
"human_fidelity": model_field_to_node_input( | |
IO.FLOAT, | |
KlingImageGenerationsRequest, | |
"human_fidelity", | |
slider=True, | |
step=0.01, | |
), | |
"model_name": model_field_to_node_input( | |
IO.COMBO, | |
KlingImageGenerationsRequest, | |
"model_name", | |
enum_type=KlingImageGenModelName, | |
), | |
"aspect_ratio": model_field_to_node_input( | |
IO.COMBO, | |
KlingImageGenerationsRequest, | |
"aspect_ratio", | |
enum_type=KlingImageGenAspectRatio, | |
), | |
"n": model_field_to_node_input( | |
IO.INT, | |
KlingImageGenerationsRequest, | |
"n", | |
), | |
}, | |
"optional": { | |
"image": (IO.IMAGE, {}), | |
}, | |
"hidden": { | |
"auth_token": "AUTH_TOKEN_COMFY_ORG", | |
"comfy_api_key": "API_KEY_COMFY_ORG", | |
"unique_id": "UNIQUE_ID", | |
}, | |
} | |
DESCRIPTION = "Kling Image Generation Node. Generate an image from a text prompt with an optional reference image." | |
def get_response( | |
self, | |
task_id: str, | |
auth_kwargs: Optional[dict[str, str]], | |
node_id: Optional[str] = None, | |
) -> KlingImageGenerationsResponse: | |
return poll_until_finished( | |
auth_kwargs, | |
ApiEndpoint( | |
path=f"{PATH_IMAGE_GENERATIONS}/{task_id}", | |
method=HttpMethod.GET, | |
request_model=EmptyRequest, | |
response_model=KlingImageGenerationsResponse, | |
), | |
result_url_extractor=get_images_urls_from_response, | |
estimated_duration=AVERAGE_DURATION_IMAGE_GEN, | |
node_id=node_id, | |
) | |
def api_call( | |
self, | |
model_name: KlingImageGenModelName, | |
prompt: str, | |
negative_prompt: str, | |
image_type: KlingImageGenImageReferenceType, | |
image_fidelity: float, | |
human_fidelity: float, | |
n: int, | |
aspect_ratio: KlingImageGenAspectRatio, | |
image: Optional[torch.Tensor] = None, | |
unique_id: Optional[str] = None, | |
**kwargs, | |
): | |
self.validate_prompt(prompt, negative_prompt) | |
if image is not None: | |
image = tensor_to_base64_string(image) | |
initial_operation = SynchronousOperation( | |
endpoint=ApiEndpoint( | |
path=PATH_IMAGE_GENERATIONS, | |
method=HttpMethod.POST, | |
request_model=KlingImageGenerationsRequest, | |
response_model=KlingImageGenerationsResponse, | |
), | |
request=KlingImageGenerationsRequest( | |
model_name=model_name, | |
prompt=prompt, | |
negative_prompt=negative_prompt, | |
image=image, | |
image_reference=image_type, | |
image_fidelity=image_fidelity, | |
human_fidelity=human_fidelity, | |
n=n, | |
aspect_ratio=aspect_ratio, | |
), | |
auth_kwargs=kwargs, | |
) | |
task_creation_response = initial_operation.execute() | |
validate_task_creation_response(task_creation_response) | |
task_id = task_creation_response.data.task_id | |
final_response = self.get_response( | |
task_id, auth_kwargs=kwargs, node_id=unique_id | |
) | |
validate_image_result_response(final_response) | |
images = get_images_from_response(final_response) | |
return (image_result_to_node_output(images),) | |
NODE_CLASS_MAPPINGS = { | |
"KlingCameraControls": KlingCameraControls, | |
"KlingTextToVideoNode": KlingTextToVideoNode, | |
"KlingImage2VideoNode": KlingImage2VideoNode, | |
"KlingCameraControlI2VNode": KlingCameraControlI2VNode, | |
"KlingCameraControlT2VNode": KlingCameraControlT2VNode, | |
"KlingStartEndFrameNode": KlingStartEndFrameNode, | |
"KlingVideoExtendNode": KlingVideoExtendNode, | |
"KlingLipSyncAudioToVideoNode": KlingLipSyncAudioToVideoNode, | |
"KlingLipSyncTextToVideoNode": KlingLipSyncTextToVideoNode, | |
"KlingVirtualTryOnNode": KlingVirtualTryOnNode, | |
"KlingImageGenerationNode": KlingImageGenerationNode, | |
"KlingSingleImageVideoEffectNode": KlingSingleImageVideoEffectNode, | |
"KlingDualCharacterVideoEffectNode": KlingDualCharacterVideoEffectNode, | |
} | |
NODE_DISPLAY_NAME_MAPPINGS = { | |
"KlingCameraControls": "Kling Camera Controls", | |
"KlingTextToVideoNode": "Kling Text to Video", | |
"KlingImage2VideoNode": "Kling Image to Video", | |
"KlingCameraControlI2VNode": "Kling Image to Video (Camera Control)", | |
"KlingCameraControlT2VNode": "Kling Text to Video (Camera Control)", | |
"KlingStartEndFrameNode": "Kling Start-End Frame to Video", | |
"KlingVideoExtendNode": "Kling Video Extend", | |
"KlingLipSyncAudioToVideoNode": "Kling Lip Sync Video with Audio", | |
"KlingLipSyncTextToVideoNode": "Kling Lip Sync Video with Text", | |
"KlingVirtualTryOnNode": "Kling Virtual Try On", | |
"KlingImageGenerationNode": "Kling Image Generation", | |
"KlingSingleImageVideoEffectNode": "Kling Video Effects", | |
"KlingDualCharacterVideoEffectNode": "Kling Dual Character Video Effects", | |
} | |