File size: 5,917 Bytes
a12ee00
7621093
a12ee00
7621093
 
9902278
7621093
a12ee00
7621093
 
 
a12ee00
7621093
 
 
 
 
 
 
9902278
 
 
 
 
 
 
 
 
 
7621093
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9902278
7621093
a12ee00
9902278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7621093
9902278
 
 
7621093
9902278
7621093
9902278
7621093
9902278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7621093
 
 
9902278
7621093
 
 
9902278
7621093
3637e81
9902278
7621093
9902278
 
 
7621093
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52d2c68
7621093
 
9902278
 
7621093
9902278
 
 
 
 
 
 
 
 
7621093
 
9902278
 
 
 
7621093
 
9902278
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import requests
import base64

# Important: the NVIDIA L40S will only support small resolutions, short length and no post-processing.
# If you want those features, you might need to use the NVIDIA A100.

# Use your own Inference Endpoint URL
API_URL = "https://<use your own Inference Endpoint here>.endpoints.huggingface.cloud"

# Use you own API token
API_TOKEN = "hf_<replace by your own Hugging Face token>"
def query(payload):
    response = requests.post(API_URL, headers={
        "Accept": "application/json",
        "Authorization": f"Bearer {API_TOKEN}",
        "Content-Type": "application/json" 
    }, json=payload)
    return response.json()

def save_video(json_response, filename):

    try:
        error = json_response["error"]
        if error:
            print(error)
            return
    except Exception as e:
        pass

    video_data_uri = ""
    try:
        # Extract the video data URI from the response
        video_data_uri = json_response["video"]
    except Exception as e:
        message = str(json_response)
        print(message)
        raise ValueError(message)
    
    # Remove the data URI prefix to get just the base64 data
    # Assumes format like "data:video/mp4;base64,<actual_base64_data>"
    base64_data = video_data_uri.split(",")[1]
    
    # Decode the base64 data
    video_data = base64.b64decode(base64_data)
    
    # Write the binary data to an MP4 file
    with open(filename, "wb") as f:
        f.write(video_data)

def encode_image(image_path):
    """
    Load and encode an image file to base64
    
    Args:
        image_path (str): Path to the image file
        
    Returns:
        str: Base64 encoded image data URI
    """

    with Image.open(image_path) as img:
        # Convert to RGB if necessary
        if img.mode != "RGB":
            img = img.convert("RGB")
        
        # Save image to bytes
        img_byte_arr = BytesIO()
        img.save(img_byte_arr, format="JPEG")

        # Encode to base64
        base64_encoded = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
        return f"data:image/jpeg;base64,{base64_encoded}"

# Example usage with image-to-video generation
image_filename = "input.jpg"
video_filename = "output.mp4"

config = {
    "inputs": {
       #"prompt": "magnificent underwater footage, clownfishes swimming around coral inside the carribean sea, real gopro footage",
       # OR
       "image": encode_image(image_filename)
    },
    
    "parameters": {

        # ------------------- settings for LTX-Video -----------------------
        
        #"negative_prompt": "saturated, highlight, overexposed, highlighted, overlit, shaking, too bright, worst quality, inconsistent motion, blurry, jittery, distorted, cropped, watermarked, watermark, logo, subtitle, subtitles, lowres",

        # note about resolution:
        # we cannot use 720 since it cannot be divided by 32
        #
        # for a cinematic look:
        "width": 768,
        "height": 480,

        # this is a hack to fool LTX-Video into believing our input image is an actual video frame with poor encoding quality
        #"input_image_quality": 70,

        # for a vertical video look:
        #"width": 480,
        #"height": 768,

        # LTX-Video requires a frame number divisible by 8, plus one frame
        # note: glitches might appear if you use more than 168 frames
        "num_frames": (8 * 16) + 1,

        # using 30 steps seems to be enough for most cases, otherwise use 50 for best quality
        # I think using a large number of steps (> 30) might create some overexposure and saturation
        "num_inference_steps": 50,

        # values between 3.0 and 4.0 are nice
        "guidance_scale": 4.0,

        #"seed": 1209877,

        # ----------------------------------------------------------------

        # ------------------- settings for Varnish -----------------------
        # This will double the number of frames.
        # You can activate this if you want:
        # - a slow motion effect (in that case use double_num_frames=True and fps=24, 25 or 30)
        # - a HD soap / video game effect (in that case use double_num_frames=True and fps=60)
        "double_num_frames": True,

        # controls the number of frames per second
        # use this in combination with the num_frames and double_num_frames settings to control the duration and "feel" of your video
        "fps": 60, # typical values are: 24, 25, 30, 60

        # upscale the video using Real-ESRGAN.
        # This upscaling algorithm is relatively fast,
        # but might create an uncanny "3D render" or "drawing" effect.
        "super_resolution": True,

        # for cosmetic purposes and get a "cinematic" feel, you can optionally add some film grain.
        # it is not recommended to add film grain if your theme doesn't match (film grain is great for black & white, retro looks)
        # and if you do, adding more than 12% will start to negatively impact file size (video codecs aren't great are compressing film grain)
        # 0% = no grain
        # 10% = a bit of grain
        "grain_amount": 12, # value between 0-100


        # The range of the CRF scale is 0–51, where:
        # 0 is lossless (for 8 bit only, for 10 bit use -qp 0)
        # 23 is the default
        # 51 is worst quality possible
        # A lower value generally leads to higher quality, and a subjectively sane range is 17–28.
        # Consider 17 or 18 to be visually lossless or nearly so;
        # it should look the same or nearly the same as the input but it isn't technically lossless.
        # The range is exponential, so increasing the CRF value +6 results in roughly half the bitrate / file size, while -6 leads to roughly twice the bitrate.
        #"quality": 18,

    }
}

# Make the API call
output = query(config)

# Save the video
save_video(output, video_filename)