Spaces:
Running
on
T4
Running
on
T4
DeepBeepMeep
commited on
Commit
·
7e8fb61
1
Parent(s):
660ae45
Polishing
Browse files- hyvideo/hunyuan.py +8 -5
- ltx_video/utils/prompt_enhance_utils.py +8 -7
- preprocessing/dwpose/util.py +2 -2
- wgp.py +74 -61
hyvideo/hunyuan.py
CHANGED
@@ -20,6 +20,7 @@ from PIL import Image
|
|
20 |
import numpy as np
|
21 |
import torchvision.transforms as transforms
|
22 |
import cv2
|
|
|
23 |
|
24 |
def pad_image(crop_img, size, color=(255, 255, 255), resize_ratio=1):
|
25 |
crop_h, crop_w = crop_img.shape[:2]
|
@@ -606,6 +607,7 @@ class HunyuanVideoSampler(Inference):
|
|
606 |
VAE_tile_size = None,
|
607 |
joint_pass = False,
|
608 |
cfg_star_switch = False,
|
|
|
609 |
**kwargs,
|
610 |
):
|
611 |
|
@@ -737,12 +739,13 @@ class HunyuanVideoSampler(Inference):
|
|
737 |
|
738 |
# semantic_images = [Image.open(i2v_image_path).convert('RGB')]
|
739 |
semantic_images = [image_start.convert('RGB')] #
|
740 |
-
|
741 |
origin_size = semantic_images[0].size
|
742 |
-
|
743 |
-
|
744 |
-
|
745 |
-
|
|
|
|
|
746 |
ref_image_transform = transforms.Compose([
|
747 |
transforms.Resize(closest_size),
|
748 |
transforms.CenterCrop(closest_size),
|
|
|
20 |
import numpy as np
|
21 |
import torchvision.transforms as transforms
|
22 |
import cv2
|
23 |
+
from wan.utils.utils import resize_lanczos, calculate_new_dimensions
|
24 |
|
25 |
def pad_image(crop_img, size, color=(255, 255, 255), resize_ratio=1):
|
26 |
crop_h, crop_w = crop_img.shape[:2]
|
|
|
607 |
VAE_tile_size = None,
|
608 |
joint_pass = False,
|
609 |
cfg_star_switch = False,
|
610 |
+
fit_into_canvas = True,
|
611 |
**kwargs,
|
612 |
):
|
613 |
|
|
|
739 |
|
740 |
# semantic_images = [Image.open(i2v_image_path).convert('RGB')]
|
741 |
semantic_images = [image_start.convert('RGB')] #
|
|
|
742 |
origin_size = semantic_images[0].size
|
743 |
+
h, w = origin_size
|
744 |
+
h, w = calculate_new_dimensions(height, width, h, w, fit_into_canvas)
|
745 |
+
closest_size = (w, h)
|
746 |
+
# crop_size_list = generate_crop_size_list(bucket_hw_base_size, 32)
|
747 |
+
# aspect_ratios = np.array([round(float(h)/float(w), 5) for h, w in crop_size_list])
|
748 |
+
# closest_size, closest_ratio = get_closest_ratio(origin_size[1], origin_size[0], aspect_ratios, crop_size_list)
|
749 |
ref_image_transform = transforms.Compose([
|
750 |
transforms.Resize(closest_size),
|
751 |
transforms.CenterCrop(closest_size),
|
ltx_video/utils/prompt_enhance_utils.py
CHANGED
@@ -125,14 +125,15 @@ def _generate_t2v_prompt(
|
|
125 |
)
|
126 |
for m in messages
|
127 |
]
|
128 |
-
model_inputs = prompt_enhancer_tokenizer(texts, return_tensors="pt").to(
|
129 |
-
prompt_enhancer_model.device
|
130 |
-
)
|
131 |
|
132 |
-
|
133 |
-
|
134 |
-
|
|
|
|
|
|
|
135 |
|
|
|
136 |
|
137 |
def _generate_i2v_prompt(
|
138 |
image_caption_model,
|
@@ -201,7 +202,7 @@ def _generate_and_decode_prompts(
|
|
201 |
) -> List[str]:
|
202 |
with torch.inference_mode():
|
203 |
outputs = prompt_enhancer_model.generate(
|
204 |
-
**model_inputs,
|
205 |
)
|
206 |
generated_ids = [
|
207 |
output_ids[len(input_ids) :]
|
|
|
125 |
)
|
126 |
for m in messages
|
127 |
]
|
|
|
|
|
|
|
128 |
|
129 |
+
out_prompts = []
|
130 |
+
for text in texts:
|
131 |
+
model_inputs = prompt_enhancer_tokenizer(text, return_tensors="pt").to(
|
132 |
+
prompt_enhancer_model.device
|
133 |
+
)
|
134 |
+
out_prompts.append(_generate_and_decode_prompts(prompt_enhancer_model, prompt_enhancer_tokenizer, model_inputs, max_new_tokens)[0])
|
135 |
|
136 |
+
return out_prompts
|
137 |
|
138 |
def _generate_i2v_prompt(
|
139 |
image_caption_model,
|
|
|
202 |
) -> List[str]:
|
203 |
with torch.inference_mode():
|
204 |
outputs = prompt_enhancer_model.generate(
|
205 |
+
**model_inputs, max_new_tokens=max_new_tokens
|
206 |
)
|
207 |
generated_ids = [
|
208 |
output_ids[len(input_ids) :]
|
preprocessing/dwpose/util.py
CHANGED
@@ -3,8 +3,8 @@
|
|
3 |
import math
|
4 |
import numpy as np
|
5 |
import matplotlib
|
6 |
-
import cv2
|
7 |
-
|
8 |
|
9 |
eps = 0.01
|
10 |
|
|
|
3 |
import math
|
4 |
import numpy as np
|
5 |
import matplotlib
|
6 |
+
import cv2
|
7 |
+
matplotlib.use('TkAgg')
|
8 |
|
9 |
eps = 0.01
|
10 |
|
wgp.py
CHANGED
@@ -470,6 +470,9 @@ def get_preview_images(inputs):
|
|
470 |
else:
|
471 |
end_image_data = image
|
472 |
break
|
|
|
|
|
|
|
473 |
return start_image_data, end_image_data
|
474 |
|
475 |
def add_video_task(**inputs):
|
@@ -1544,7 +1547,7 @@ def get_dependent_models(model_filename, quantization, dtype_policy ):
|
|
1544 |
return [get_model_filename("ltxv_13B", quantization, dtype_policy)]
|
1545 |
else:
|
1546 |
return []
|
1547 |
-
model_types = [ "t2v_1.3B", "
|
1548 |
model_signatures = {"t2v": "text2video_14B", "t2v_1.3B" : "text2video_1.3B", "fun_inp_1.3B" : "Fun_InP_1.3B", "fun_inp" : "Fun_InP_14B",
|
1549 |
"i2v" : "image2video_480p", "i2v_720p" : "image2video_720p" , "vace_1.3B" : "Vace_1.3B", "vace_14B" : "Vace_14B","recam_1.3B": "recammaster_1.3B",
|
1550 |
"flf2v_720p" : "FLF2V_720p", "sky_df_1.3B" : "sky_reels2_diffusion_forcing_1.3B", "sky_df_14B" : "sky_reels2_diffusion_forcing_14B",
|
@@ -1583,6 +1586,7 @@ def get_model_name(model_filename, description_container = [""]):
|
|
1583 |
elif "image" in model_filename:
|
1584 |
model_name = "Wan2.1 image2video"
|
1585 |
model_name += " 720p" if "720p" in model_filename else " 480p"
|
|
|
1586 |
if "720p" in model_filename:
|
1587 |
description = "The standard Wan Image 2 Video specialized to generate 720p images. It also offers Start and End Image support (End Image is not supported in the original model but seems to work well)"
|
1588 |
else:
|
@@ -1594,6 +1598,7 @@ def get_model_name(model_filename, description_container = [""]):
|
|
1594 |
elif "FLF2V" in model_filename:
|
1595 |
model_name = "Wan2.1 FLF2V"
|
1596 |
model_name += " 720p" if "720p" in model_filename else " 480p"
|
|
|
1597 |
description = "The First Last Frame 2 Video model is the official model Image 2 Video model that support Start and End frames."
|
1598 |
elif "sky_reels2_diffusion_forcing" in model_filename:
|
1599 |
model_name = "SkyReels2 Diffusion Forcing"
|
@@ -1612,19 +1617,19 @@ def get_model_name(model_filename, description_container = [""]):
|
|
1612 |
model_name += " 14B" if "14B" in model_filename else " 1.3B"
|
1613 |
description = "The Fantasy Speaking model corresponds to the original Wan image 2 video model combined with the Fantasy Speaking extension to process an audio Input."
|
1614 |
elif "ltxv_0.9.7_13B_dev" in model_filename:
|
1615 |
-
model_name = "LTX Video 0.9.7"
|
1616 |
description = "LTX Video is a fast model that can be used to generate long videos (up to 260 frames).It is recommended to keep the number of steps to 30 or you will need to update the file 'ltxv_video/configs/ltxv-13b-0.9.7-dev.yaml'.The LTX Video model expects very long prompts, so don't hesitate to use the Prompt Enhancer."
|
1617 |
elif "ltxv_0.9.7_13B_distilled" in model_filename:
|
1618 |
-
model_name = "LTX Video 0.9.7 Distilled"
|
1619 |
description = "LTX Video is a fast model that can be used to generate long videos (up to 260 frames).This distilled version is a very fast version and retains a high level of quality. The LTX Video model expects very long prompts, so don't hesitate to use the Prompt Enhancer."
|
1620 |
elif "hunyuan_video_720" in model_filename:
|
1621 |
-
model_name = "Hunyuan Video text2video 720p"
|
1622 |
description = "Probably the best text 2 video model available."
|
1623 |
elif "hunyuan_video_i2v" in model_filename:
|
1624 |
-
model_name = "Hunyuan Video image2video 720p"
|
1625 |
description = "A good looking image 2 video model, but not so good in prompt adherence."
|
1626 |
elif "hunyuan_video_custom" in model_filename:
|
1627 |
-
model_name = "Hunyuan Video Custom 720p"
|
1628 |
description = "The Hunyuan Video Custom model is proably the best model to transfer people (only people for the momment) as it is quite good to keep their identity. However it is slow as to get good results, you need to generate 720p videos with 30 steps."
|
1629 |
else:
|
1630 |
model_name = "Wan2.1 text2video"
|
@@ -2147,25 +2152,24 @@ def load_models(model_filename):
|
|
2147 |
else:
|
2148 |
raise Exception(f"Model '{new_transformer_filename}' not supported.")
|
2149 |
wan_model._model_file_name = new_transformer_filename
|
2150 |
-
kwargs = { "extraModelsToQuantize": None}
|
2151 |
-
if profile
|
2152 |
-
kwargs["budgets"] = { "transformer" : 100 if preload == 0 else preload, "text_encoder" : 100 if preload == 0 else preload, "*" : max(3000, preload) }
|
2153 |
-
# if profile == 4:
|
2154 |
-
# kwargs["partialPinning"] = True
|
2155 |
elif profile == 3:
|
2156 |
kwargs["budgets"] = { "*" : "70%" }
|
2157 |
-
|
2158 |
global prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer
|
2159 |
if server_config.get("enhancer_enabled", 0) == 1:
|
2160 |
from transformers import ( AutoModelForCausalLM, AutoProcessor, AutoTokenizer, LlamaForCausalLM )
|
2161 |
prompt_enhancer_image_caption_model = AutoModelForCausalLM.from_pretrained( "ckpts/Florence2", trust_remote_code=True)
|
2162 |
prompt_enhancer_image_caption_processor = AutoProcessor.from_pretrained( "ckpts/Florence2", trust_remote_code=True)
|
2163 |
-
prompt_enhancer_llm_model = offload.fast_load_transformers_model("ckpts/Llama3_2/Llama3_2_quanto_bf16_int8.safetensors")
|
2164 |
prompt_enhancer_llm_tokenizer = AutoTokenizer.from_pretrained("ckpts/Llama3_2")
|
2165 |
pipe["prompt_enhancer_image_caption_model"] = prompt_enhancer_image_caption_model
|
2166 |
pipe["prompt_enhancer_llm_model"] = prompt_enhancer_llm_model
|
2167 |
prompt_enhancer_image_caption_model._model_dtype = torch.float
|
2168 |
-
|
|
|
2169 |
else:
|
2170 |
prompt_enhancer_image_caption_model = None
|
2171 |
prompt_enhancer_image_caption_processor = None
|
@@ -2318,7 +2322,7 @@ def apply_changes( state,
|
|
2318 |
model_choice = generate_dropdown_model_list()
|
2319 |
|
2320 |
header = generate_header(state["model_filename"], compile=compile, attention_mode= attention_mode)
|
2321 |
-
return "<DIV ALIGN=CENTER>The new configuration has been succesfully applied</DIV>", header, model_choice, gr.
|
2322 |
|
2323 |
|
2324 |
|
@@ -2365,23 +2369,23 @@ def build_callback(state, pipe, send_cmd, status, num_inference_steps):
|
|
2365 |
step_idx += 1
|
2366 |
if gen.get("abort", False):
|
2367 |
# pipe._interrupt = True
|
2368 |
-
phase = "
|
2369 |
elif step_idx == num_inference_steps:
|
2370 |
-
phase = "
|
2371 |
else:
|
2372 |
if pass_no <=0:
|
2373 |
-
phase = "
|
2374 |
elif pass_no == 1:
|
2375 |
-
phase = "
|
2376 |
elif pass_no == 2:
|
2377 |
-
phase = "
|
2378 |
elif pass_no == 3:
|
2379 |
-
phase = "
|
2380 |
else:
|
2381 |
-
phase = f"
|
2382 |
|
2383 |
gen["progress_phase"] = (phase, step_idx)
|
2384 |
-
status_msg = status
|
2385 |
if step_idx >= 0:
|
2386 |
progress_args = [(step_idx , num_inference_steps) , status_msg , num_inference_steps]
|
2387 |
else:
|
@@ -2444,7 +2448,7 @@ def refresh_gallery(state): #, msg
|
|
2444 |
window_no = len(prompts)
|
2445 |
window_no -= 1
|
2446 |
prompts[window_no]="<B>" + prompts[window_no] + "</B>"
|
2447 |
-
prompt = "<BR>".join(prompts)
|
2448 |
if enhanced:
|
2449 |
prompt = "<U><B>Enhanced:</B></U><BR>" + prompt
|
2450 |
|
@@ -2799,7 +2803,7 @@ def generate_video(
|
|
2799 |
hunyuan_t2v = "hunyuan_video_720" in model_filename
|
2800 |
hunyuan_i2v = "hunyuan_video_i2v" in model_filename
|
2801 |
hunyuan_custom = "hunyuan_video_custom" in model_filename
|
2802 |
-
if diffusion_forcing or hunyuan_t2v or hunyuan_i2v:
|
2803 |
fps = 24
|
2804 |
elif audio_guide != None:
|
2805 |
fps = 23
|
@@ -2810,7 +2814,7 @@ def generate_video(
|
|
2810 |
|
2811 |
original_image_refs = image_refs
|
2812 |
if image_refs != None and len(image_refs) > 0 and (hunyuan_custom or phantom or vace):
|
2813 |
-
send_cmd("progress", [0, get_latest_status(state
|
2814 |
os.environ["U2NET_HOME"] = os.path.join(os.getcwd(), "ckpts", "rembg")
|
2815 |
from wan.utils.utils import resize_and_remove_background
|
2816 |
image_refs = resize_and_remove_background(image_refs, width, height, remove_background_image_ref ==1, fit_into_canvas= not vace)
|
@@ -2840,7 +2844,7 @@ def generate_video(
|
|
2840 |
source_video = None
|
2841 |
target_camera = None
|
2842 |
if "recam" in model_filename:
|
2843 |
-
source_video = preprocess_video("", width=width, height=height,video_in=video_source, max_frames= video_length, start_frame = 0, fit_canvas= fit_canvas)
|
2844 |
target_camera = model_mode
|
2845 |
|
2846 |
audio_proj_split = None
|
@@ -2909,10 +2913,12 @@ def generate_video(
|
|
2909 |
guide_start_frame = 0
|
2910 |
video_length = first_window_video_length
|
2911 |
gen["extra_windows"] = 0
|
|
|
|
|
2912 |
start_time = time.time()
|
2913 |
if prompt_enhancer_image_caption_model != None and prompt_enhancer !=None and len(prompt_enhancer)>0:
|
2914 |
text_encoder_max_tokens = 256
|
2915 |
-
send_cmd("progress", [0, get_latest_status(state
|
2916 |
from ltx_video.utils.prompt_enhance_utils import generate_cinematic_prompt
|
2917 |
prompt_images = []
|
2918 |
if "I" in prompt_enhancer:
|
@@ -2976,17 +2982,15 @@ def generate_video(
|
|
2976 |
if any(process in video_prompt_type for process in ("P", "D", "G")) :
|
2977 |
prompts_max = gen["prompts_max"]
|
2978 |
|
2979 |
-
status = get_latest_status(state)
|
2980 |
-
|
2981 |
preprocess_type = None
|
2982 |
if "P" in video_prompt_type :
|
2983 |
-
progress_args = [0,
|
2984 |
preprocess_type = "pose"
|
2985 |
elif "D" in video_prompt_type :
|
2986 |
-
progress_args = [0,
|
2987 |
preprocess_type = "depth"
|
2988 |
elif "G" in video_prompt_type :
|
2989 |
-
progress_args = [0,
|
2990 |
preprocess_type = "gray"
|
2991 |
|
2992 |
if preprocess_type != None :
|
@@ -3015,9 +3019,9 @@ def generate_video(
|
|
3015 |
|
3016 |
|
3017 |
gen["progress_status"] = status
|
3018 |
-
gen["progress_phase"] = ("
|
3019 |
callback = build_callback(state, trans, send_cmd, status, num_inference_steps)
|
3020 |
-
progress_args = [0, status
|
3021 |
send_cmd("progress", progress_args)
|
3022 |
|
3023 |
# samples = torch.empty( (1,2)) #for testing
|
@@ -3043,7 +3047,7 @@ def generate_video(
|
|
3043 |
frame_num=(video_length // 4)* 4 + 1,
|
3044 |
height = height,
|
3045 |
width = width,
|
3046 |
-
fit_into_canvas = fit_canvas,
|
3047 |
shift=flow_shift,
|
3048 |
sampling_steps=num_inference_steps,
|
3049 |
guide_scale=guidance_scale,
|
@@ -3151,14 +3155,8 @@ def generate_video(
|
|
3151 |
sample = sample[: , reuse_frames:]
|
3152 |
|
3153 |
guide_start_frame -= reuse_frames
|
3154 |
-
time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%Hh%Mm%Ss")
|
3155 |
-
if os.name == 'nt':
|
3156 |
-
file_name = f"{time_flag}_seed{seed}_{sanitize_file_name(prompt[:50]).strip()}.mp4"
|
3157 |
-
else:
|
3158 |
-
file_name = f"{time_flag}_seed{seed}_{sanitize_file_name(prompt[:100]).strip()}.mp4"
|
3159 |
-
video_path = os.path.join(save_path, file_name)
|
3160 |
-
exp = 0
|
3161 |
|
|
|
3162 |
if len(temporal_upsampling) > 0 or len(spatial_upsampling) > 0:
|
3163 |
progress_args = [(num_inference_steps , num_inference_steps) , status + " - Upsampling" , num_inference_steps]
|
3164 |
send_cmd("progress", progress_args)
|
@@ -3208,6 +3206,14 @@ def generate_video(
|
|
3208 |
sample = torch.cat([frames_already_processed, sample], dim=1)
|
3209 |
frames_already_processed = sample
|
3210 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3211 |
if audio_guide == None:
|
3212 |
cache_video( tensor=sample[None], save_file=video_path, fps=output_fps, nrow=1, normalize=True, value_range=(-1, 1))
|
3213 |
else:
|
@@ -3223,8 +3229,10 @@ def generate_video(
|
|
3223 |
inputs = get_function_arguments(generate_video, locals())
|
3224 |
inputs.pop("send_cmd")
|
3225 |
inputs.pop("task")
|
3226 |
-
inputs["prompt"] = "\n".join(prompts)
|
3227 |
configs = prepare_inputs_dict("metadata", inputs)
|
|
|
|
|
|
|
3228 |
configs["generation_time"] = round(end_time-start_time)
|
3229 |
metadata_choice = server_config.get("metadata_type","metadata")
|
3230 |
if metadata_choice == "json":
|
@@ -3474,9 +3482,6 @@ def generate_preview(latents):
|
|
3474 |
scale = 200 / h
|
3475 |
images= Image.fromarray(images)
|
3476 |
images = images.resize(( int(w*scale),int(h*scale)), resample=Image.Resampling.BILINEAR)
|
3477 |
-
if images != None:
|
3478 |
-
images.save("prepreview.png")
|
3479 |
-
|
3480 |
return images
|
3481 |
|
3482 |
|
@@ -3591,7 +3596,7 @@ def process_tasks(state):
|
|
3591 |
def get_generation_status(prompt_no, prompts_max, repeat_no, repeat_max, window_no, total_windows):
|
3592 |
if prompts_max == 1:
|
3593 |
if repeat_max == 1:
|
3594 |
-
status = "
|
3595 |
else:
|
3596 |
status = f"Sample {repeat_no}/{repeat_max}"
|
3597 |
else:
|
@@ -3600,7 +3605,9 @@ def get_generation_status(prompt_no, prompts_max, repeat_no, repeat_max, window_
|
|
3600 |
else:
|
3601 |
status = f"Prompt {prompt_no}/{prompts_max}, Sample {repeat_no}/{repeat_max}"
|
3602 |
if total_windows > 1:
|
3603 |
-
status
|
|
|
|
|
3604 |
|
3605 |
return status
|
3606 |
|
@@ -3611,7 +3618,15 @@ def get_new_refresh_id():
|
|
3611 |
refresh_id += 1
|
3612 |
return refresh_id
|
3613 |
|
3614 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3615 |
gen = get_gen_info(state)
|
3616 |
prompt_no = gen["prompt_no"]
|
3617 |
prompts_max = gen.get("prompts_max",0)
|
@@ -3622,7 +3637,7 @@ def get_latest_status(state):
|
|
3622 |
total_windows += gen.get("extra_windows", 0)
|
3623 |
window_no = gen.get("window_no", 0)
|
3624 |
status = get_generation_status(prompt_no, prompts_max, repeat_no, total_generation, window_no, total_windows)
|
3625 |
-
return status
|
3626 |
|
3627 |
def update_status(state):
|
3628 |
gen = get_gen_info(state)
|
@@ -4354,8 +4369,8 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
|
|
4354 |
hunyuan_t2v = "hunyuan_video_720" in model_filename
|
4355 |
hunyuan_i2v = "hunyuan_video_i2v" in model_filename
|
4356 |
hunyuan_video_custom = "hunyuan_video_custom" in model_filename
|
4357 |
-
|
4358 |
-
|
4359 |
|
4360 |
with gr.Column(visible= test_class_i2v(model_filename) or diffusion_forcing or ltxv or recammaster) as image_prompt_column:
|
4361 |
if diffusion_forcing or ltxv:
|
@@ -4484,7 +4499,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
|
|
4484 |
default_wizard_prompt, variables, values, errors = extract_wizard_prompt(launch_prompt)
|
4485 |
advanced_prompt = len(errors) > 0
|
4486 |
with gr.Column(visible= advanced_prompt) as prompt_column_advanced:
|
4487 |
-
prompt = gr.Textbox( visible= advanced_prompt, label="Prompts (
|
4488 |
|
4489 |
with gr.Column(visible=not advanced_prompt and len(variables) > 0) as prompt_column_wizard_vars:
|
4490 |
gr.Markdown("<B>Please fill the following input fields to adapt automatically the Prompt:</B>")
|
@@ -4500,9 +4515,8 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
|
|
4500 |
wizard_variables = "\n".join(variables)
|
4501 |
for _ in range( PROMPT_VARS_MAX - len(prompt_vars)):
|
4502 |
prompt_vars.append(gr.Textbox(visible= False, min_width=80, show_label= False))
|
4503 |
-
|
4504 |
with gr.Column(not advanced_prompt) as prompt_column_wizard:
|
4505 |
-
wizard_prompt = gr.Textbox(visible = not advanced_prompt, label="Prompts (
|
4506 |
wizard_prompt_activated_var = gr.Text(wizard_prompt_activated, visible= False)
|
4507 |
wizard_variables_var = gr.Text(wizard_variables, visible = False)
|
4508 |
with gr.Row(visible= server_config.get("enhancer_enabled", 0) == 1 ) as prompt_enhancer_row:
|
@@ -4688,8 +4702,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
|
|
4688 |
)
|
4689 |
with gr.Row():
|
4690 |
cfg_zero_step = gr.Slider(-1, 39, value=ui_defaults.get("cfg_zero_step",-1), step=1, label="CFG Zero below this Layer (Extra Process)")
|
4691 |
-
|
4692 |
-
with gr.Tab("Sliding Window", visible= vace or diffusion_forcing or ltxv) as sliding_window_tab:
|
4693 |
|
4694 |
with gr.Column():
|
4695 |
gr.Markdown("<B>A Sliding Window allows you to generate video with a duration not limited by the Model</B>")
|
@@ -4701,7 +4714,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
|
|
4701 |
sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=4, visible = False)
|
4702 |
elif ltxv:
|
4703 |
sliding_window_size = gr.Slider(41, 257, value=ui_defaults.get("sliding_window_size", 129), step=8, label="Sliding Window Size")
|
4704 |
-
sliding_window_overlap = gr.Slider(9, 97, value=ui_defaults.get("sliding_window_overlap",
|
4705 |
sliding_window_overlap_noise = gr.Slider(0, 100, value=ui_defaults.get("sliding_window_overlap_noise",20), step=1, label="Noise to be added to overlapped frames to reduce blur effect")
|
4706 |
sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=4, visible = False)
|
4707 |
else:
|
@@ -5159,7 +5172,7 @@ def generate_configuration_tab(state, blocks, header, model_choice, prompt_enhan
|
|
5159 |
mixed_precision_choice = gr.Dropdown(
|
5160 |
choices=[
|
5161 |
("16 bits only, requires less VRAM", "0"),
|
5162 |
-
("Mixed 16 / 32 bits, slightly more VRAM needed but better Quality", "1"),
|
5163 |
],
|
5164 |
value= server_config.get("mixed_precision", "0"),
|
5165 |
label="Transformer Engine Calculation"
|
@@ -5637,7 +5650,7 @@ def create_demo():
|
|
5637 |
else:
|
5638 |
theme = gr.themes.Soft(font=["Verdana"], primary_hue="sky", neutral_hue="slate", text_size="md")
|
5639 |
|
5640 |
-
with gr.Blocks(css=css, theme=theme, title= "
|
5641 |
gr.Markdown("<div align=center><H1>Wan<SUP>GP</SUP> v5.1 <FONT SIZE=4>by <I>DeepBeepMeep</I></FONT> <FONT SIZE=3>") # (<A HREF='https://github.com/deepbeepmeep/Wan2GP'>Updates</A>)</FONT SIZE=3></H1></div>")
|
5642 |
global model_list
|
5643 |
|
|
|
470 |
else:
|
471 |
end_image_data = image
|
472 |
break
|
473 |
+
if start_image_data != None and len(start_image_data) > 1 and end_image_data == None:
|
474 |
+
end_image_data = start_image_data [1:]
|
475 |
+
start_image_data = start_image_data [:1]
|
476 |
return start_image_data, end_image_data
|
477 |
|
478 |
def add_video_task(**inputs):
|
|
|
1547 |
return [get_model_filename("ltxv_13B", quantization, dtype_policy)]
|
1548 |
else:
|
1549 |
return []
|
1550 |
+
model_types = [ "t2v_1.3B", "t2v", "i2v", "i2v_720p", "flf2v_720p", "vace_1.3B","vace_14B", "phantom_1.3B", "fantasy", "fun_inp_1.3B", "fun_inp", "recam_1.3B", "sky_df_1.3B", "sky_df_14B", "sky_df_720p_14B", "ltxv_13B", "ltxv_13B_distilled", "hunyuan", "hunyuan_i2v", "hunyuan_custom"]
|
1551 |
model_signatures = {"t2v": "text2video_14B", "t2v_1.3B" : "text2video_1.3B", "fun_inp_1.3B" : "Fun_InP_1.3B", "fun_inp" : "Fun_InP_14B",
|
1552 |
"i2v" : "image2video_480p", "i2v_720p" : "image2video_720p" , "vace_1.3B" : "Vace_1.3B", "vace_14B" : "Vace_14B","recam_1.3B": "recammaster_1.3B",
|
1553 |
"flf2v_720p" : "FLF2V_720p", "sky_df_1.3B" : "sky_reels2_diffusion_forcing_1.3B", "sky_df_14B" : "sky_reels2_diffusion_forcing_14B",
|
|
|
1586 |
elif "image" in model_filename:
|
1587 |
model_name = "Wan2.1 image2video"
|
1588 |
model_name += " 720p" if "720p" in model_filename else " 480p"
|
1589 |
+
model_name += " 14B"
|
1590 |
if "720p" in model_filename:
|
1591 |
description = "The standard Wan Image 2 Video specialized to generate 720p images. It also offers Start and End Image support (End Image is not supported in the original model but seems to work well)"
|
1592 |
else:
|
|
|
1598 |
elif "FLF2V" in model_filename:
|
1599 |
model_name = "Wan2.1 FLF2V"
|
1600 |
model_name += " 720p" if "720p" in model_filename else " 480p"
|
1601 |
+
model_name += " 14B"
|
1602 |
description = "The First Last Frame 2 Video model is the official model Image 2 Video model that support Start and End frames."
|
1603 |
elif "sky_reels2_diffusion_forcing" in model_filename:
|
1604 |
model_name = "SkyReels2 Diffusion Forcing"
|
|
|
1617 |
model_name += " 14B" if "14B" in model_filename else " 1.3B"
|
1618 |
description = "The Fantasy Speaking model corresponds to the original Wan image 2 video model combined with the Fantasy Speaking extension to process an audio Input."
|
1619 |
elif "ltxv_0.9.7_13B_dev" in model_filename:
|
1620 |
+
model_name = "LTX Video 0.9.7 13B"
|
1621 |
description = "LTX Video is a fast model that can be used to generate long videos (up to 260 frames).It is recommended to keep the number of steps to 30 or you will need to update the file 'ltxv_video/configs/ltxv-13b-0.9.7-dev.yaml'.The LTX Video model expects very long prompts, so don't hesitate to use the Prompt Enhancer."
|
1622 |
elif "ltxv_0.9.7_13B_distilled" in model_filename:
|
1623 |
+
model_name = "LTX Video 0.9.7 Distilled 13B"
|
1624 |
description = "LTX Video is a fast model that can be used to generate long videos (up to 260 frames).This distilled version is a very fast version and retains a high level of quality. The LTX Video model expects very long prompts, so don't hesitate to use the Prompt Enhancer."
|
1625 |
elif "hunyuan_video_720" in model_filename:
|
1626 |
+
model_name = "Hunyuan Video text2video 720p 13B"
|
1627 |
description = "Probably the best text 2 video model available."
|
1628 |
elif "hunyuan_video_i2v" in model_filename:
|
1629 |
+
model_name = "Hunyuan Video image2video 720p 13B"
|
1630 |
description = "A good looking image 2 video model, but not so good in prompt adherence."
|
1631 |
elif "hunyuan_video_custom" in model_filename:
|
1632 |
+
model_name = "Hunyuan Video Custom 720p 13B"
|
1633 |
description = "The Hunyuan Video Custom model is proably the best model to transfer people (only people for the momment) as it is quite good to keep their identity. However it is slow as to get good results, you need to generate 720p videos with 30 steps."
|
1634 |
else:
|
1635 |
model_name = "Wan2.1 text2video"
|
|
|
2152 |
else:
|
2153 |
raise Exception(f"Model '{new_transformer_filename}' not supported.")
|
2154 |
wan_model._model_file_name = new_transformer_filename
|
2155 |
+
kwargs = { "extraModelsToQuantize": None }
|
2156 |
+
if profile in (2, 4, 5):
|
2157 |
+
kwargs["budgets"] = { "transformer" : 100 if preload == 0 else preload, "text_encoder" : 100 if preload == 0 else preload, "*" : max(1000 if profile==5 else 3000 , preload) }
|
|
|
|
|
2158 |
elif profile == 3:
|
2159 |
kwargs["budgets"] = { "*" : "70%" }
|
2160 |
+
|
2161 |
global prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer
|
2162 |
if server_config.get("enhancer_enabled", 0) == 1:
|
2163 |
from transformers import ( AutoModelForCausalLM, AutoProcessor, AutoTokenizer, LlamaForCausalLM )
|
2164 |
prompt_enhancer_image_caption_model = AutoModelForCausalLM.from_pretrained( "ckpts/Florence2", trust_remote_code=True)
|
2165 |
prompt_enhancer_image_caption_processor = AutoProcessor.from_pretrained( "ckpts/Florence2", trust_remote_code=True)
|
2166 |
+
prompt_enhancer_llm_model = offload.fast_load_transformers_model("ckpts/Llama3_2/Llama3_2_quanto_bf16_int8.safetensors") #, configKwargs= {"_attn_implementation" :"XXXsdpa"}
|
2167 |
prompt_enhancer_llm_tokenizer = AutoTokenizer.from_pretrained("ckpts/Llama3_2")
|
2168 |
pipe["prompt_enhancer_image_caption_model"] = prompt_enhancer_image_caption_model
|
2169 |
pipe["prompt_enhancer_llm_model"] = prompt_enhancer_llm_model
|
2170 |
prompt_enhancer_image_caption_model._model_dtype = torch.float
|
2171 |
+
if "budgets" in kwargs:
|
2172 |
+
kwargs["budgets"]["prompt_enhancer_llm_model"] = 5000
|
2173 |
else:
|
2174 |
prompt_enhancer_image_caption_model = None
|
2175 |
prompt_enhancer_image_caption_processor = None
|
|
|
2322 |
model_choice = generate_dropdown_model_list()
|
2323 |
|
2324 |
header = generate_header(state["model_filename"], compile=compile, attention_mode= attention_mode)
|
2325 |
+
return "<DIV ALIGN=CENTER>The new configuration has been succesfully applied</DIV>", header, model_choice, gr.Row(visible= server_config["enhancer_enabled"] == 1)
|
2326 |
|
2327 |
|
2328 |
|
|
|
2369 |
step_idx += 1
|
2370 |
if gen.get("abort", False):
|
2371 |
# pipe._interrupt = True
|
2372 |
+
phase = "Aborting"
|
2373 |
elif step_idx == num_inference_steps:
|
2374 |
+
phase = "VAE Decoding"
|
2375 |
else:
|
2376 |
if pass_no <=0:
|
2377 |
+
phase = "Denoising"
|
2378 |
elif pass_no == 1:
|
2379 |
+
phase = "Denoising First Pass"
|
2380 |
elif pass_no == 2:
|
2381 |
+
phase = "Denoising Second Pass"
|
2382 |
elif pass_no == 3:
|
2383 |
+
phase = "Denoising Third Pass"
|
2384 |
else:
|
2385 |
+
phase = f"Denoising {pass_no}th Pass"
|
2386 |
|
2387 |
gen["progress_phase"] = (phase, step_idx)
|
2388 |
+
status_msg = merge_status_context(status, phase)
|
2389 |
if step_idx >= 0:
|
2390 |
progress_args = [(step_idx , num_inference_steps) , status_msg , num_inference_steps]
|
2391 |
else:
|
|
|
2448 |
window_no = len(prompts)
|
2449 |
window_no -= 1
|
2450 |
prompts[window_no]="<B>" + prompts[window_no] + "</B>"
|
2451 |
+
prompt = "<BR><DIV style='height:8px'></DIV>".join(prompts)
|
2452 |
if enhanced:
|
2453 |
prompt = "<U><B>Enhanced:</B></U><BR>" + prompt
|
2454 |
|
|
|
2803 |
hunyuan_t2v = "hunyuan_video_720" in model_filename
|
2804 |
hunyuan_i2v = "hunyuan_video_i2v" in model_filename
|
2805 |
hunyuan_custom = "hunyuan_video_custom" in model_filename
|
2806 |
+
if diffusion_forcing or hunyuan_t2v or hunyuan_i2v or hunyuan_custom:
|
2807 |
fps = 24
|
2808 |
elif audio_guide != None:
|
2809 |
fps = 23
|
|
|
2814 |
|
2815 |
original_image_refs = image_refs
|
2816 |
if image_refs != None and len(image_refs) > 0 and (hunyuan_custom or phantom or vace):
|
2817 |
+
send_cmd("progress", [0, get_latest_status(state, "Removing Images References Background")])
|
2818 |
os.environ["U2NET_HOME"] = os.path.join(os.getcwd(), "ckpts", "rembg")
|
2819 |
from wan.utils.utils import resize_and_remove_background
|
2820 |
image_refs = resize_and_remove_background(image_refs, width, height, remove_background_image_ref ==1, fit_into_canvas= not vace)
|
|
|
2844 |
source_video = None
|
2845 |
target_camera = None
|
2846 |
if "recam" in model_filename:
|
2847 |
+
source_video = preprocess_video("", width=width, height=height,video_in=video_source, max_frames= video_length, start_frame = 0, fit_canvas= fit_canvas == 1)
|
2848 |
target_camera = model_mode
|
2849 |
|
2850 |
audio_proj_split = None
|
|
|
2913 |
guide_start_frame = 0
|
2914 |
video_length = first_window_video_length
|
2915 |
gen["extra_windows"] = 0
|
2916 |
+
gen["total_windows"] = 1
|
2917 |
+
gen["window_no"] = 1
|
2918 |
start_time = time.time()
|
2919 |
if prompt_enhancer_image_caption_model != None and prompt_enhancer !=None and len(prompt_enhancer)>0:
|
2920 |
text_encoder_max_tokens = 256
|
2921 |
+
send_cmd("progress", [0, get_latest_status(state, "Enhancing Prompt")])
|
2922 |
from ltx_video.utils.prompt_enhance_utils import generate_cinematic_prompt
|
2923 |
prompt_images = []
|
2924 |
if "I" in prompt_enhancer:
|
|
|
2982 |
if any(process in video_prompt_type for process in ("P", "D", "G")) :
|
2983 |
prompts_max = gen["prompts_max"]
|
2984 |
|
|
|
|
|
2985 |
preprocess_type = None
|
2986 |
if "P" in video_prompt_type :
|
2987 |
+
progress_args = [0, get_latest_status(state,"Extracting Open Pose Information")]
|
2988 |
preprocess_type = "pose"
|
2989 |
elif "D" in video_prompt_type :
|
2990 |
+
progress_args = [0, get_latest_status(state,"Extracting Depth Information")]
|
2991 |
preprocess_type = "depth"
|
2992 |
elif "G" in video_prompt_type :
|
2993 |
+
progress_args = [0, get_latest_status(state,"Extracting Gray Level Information")]
|
2994 |
preprocess_type = "gray"
|
2995 |
|
2996 |
if preprocess_type != None :
|
|
|
3019 |
|
3020 |
|
3021 |
gen["progress_status"] = status
|
3022 |
+
gen["progress_phase"] = ("Encoding Prompt", -1 )
|
3023 |
callback = build_callback(state, trans, send_cmd, status, num_inference_steps)
|
3024 |
+
progress_args = [0, merge_status_context(status, "Encoding Prompt")]
|
3025 |
send_cmd("progress", progress_args)
|
3026 |
|
3027 |
# samples = torch.empty( (1,2)) #for testing
|
|
|
3047 |
frame_num=(video_length // 4)* 4 + 1,
|
3048 |
height = height,
|
3049 |
width = width,
|
3050 |
+
fit_into_canvas = fit_canvas == 1,
|
3051 |
shift=flow_shift,
|
3052 |
sampling_steps=num_inference_steps,
|
3053 |
guide_scale=guidance_scale,
|
|
|
3155 |
sample = sample[: , reuse_frames:]
|
3156 |
|
3157 |
guide_start_frame -= reuse_frames
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3158 |
|
3159 |
+
exp = 0
|
3160 |
if len(temporal_upsampling) > 0 or len(spatial_upsampling) > 0:
|
3161 |
progress_args = [(num_inference_steps , num_inference_steps) , status + " - Upsampling" , num_inference_steps]
|
3162 |
send_cmd("progress", progress_args)
|
|
|
3206 |
sample = torch.cat([frames_already_processed, sample], dim=1)
|
3207 |
frames_already_processed = sample
|
3208 |
|
3209 |
+
time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%Hh%Mm%Ss")
|
3210 |
+
save_prompt = original_prompts[0]
|
3211 |
+
if os.name == 'nt':
|
3212 |
+
file_name = f"{time_flag}_seed{seed}_{sanitize_file_name(save_prompt[:50]).strip()}.mp4"
|
3213 |
+
else:
|
3214 |
+
file_name = f"{time_flag}_seed{seed}_{sanitize_file_name(save_prompt[:100]).strip()}.mp4"
|
3215 |
+
video_path = os.path.join(save_path, file_name)
|
3216 |
+
|
3217 |
if audio_guide == None:
|
3218 |
cache_video( tensor=sample[None], save_file=video_path, fps=output_fps, nrow=1, normalize=True, value_range=(-1, 1))
|
3219 |
else:
|
|
|
3229 |
inputs = get_function_arguments(generate_video, locals())
|
3230 |
inputs.pop("send_cmd")
|
3231 |
inputs.pop("task")
|
|
|
3232 |
configs = prepare_inputs_dict("metadata", inputs)
|
3233 |
+
configs["prompt"] = "\n".join(original_prompts)
|
3234 |
+
if prompt_enhancer_image_caption_model != None:
|
3235 |
+
configs["enhanced_prompt"] = "\n".join(prompts)
|
3236 |
configs["generation_time"] = round(end_time-start_time)
|
3237 |
metadata_choice = server_config.get("metadata_type","metadata")
|
3238 |
if metadata_choice == "json":
|
|
|
3482 |
scale = 200 / h
|
3483 |
images= Image.fromarray(images)
|
3484 |
images = images.resize(( int(w*scale),int(h*scale)), resample=Image.Resampling.BILINEAR)
|
|
|
|
|
|
|
3485 |
return images
|
3486 |
|
3487 |
|
|
|
3596 |
def get_generation_status(prompt_no, prompts_max, repeat_no, repeat_max, window_no, total_windows):
|
3597 |
if prompts_max == 1:
|
3598 |
if repeat_max == 1:
|
3599 |
+
status = ""
|
3600 |
else:
|
3601 |
status = f"Sample {repeat_no}/{repeat_max}"
|
3602 |
else:
|
|
|
3605 |
else:
|
3606 |
status = f"Prompt {prompt_no}/{prompts_max}, Sample {repeat_no}/{repeat_max}"
|
3607 |
if total_windows > 1:
|
3608 |
+
if len(status) > 0:
|
3609 |
+
status += ", "
|
3610 |
+
status += f"Sliding Window {window_no}/{total_windows}"
|
3611 |
|
3612 |
return status
|
3613 |
|
|
|
3618 |
refresh_id += 1
|
3619 |
return refresh_id
|
3620 |
|
3621 |
+
def merge_status_context(status="", context=""):
|
3622 |
+
if len(status) == 0:
|
3623 |
+
return context
|
3624 |
+
elif len(context) == 0:
|
3625 |
+
return status
|
3626 |
+
else:
|
3627 |
+
return status + " - " + context
|
3628 |
+
|
3629 |
+
def get_latest_status(state, context=""):
|
3630 |
gen = get_gen_info(state)
|
3631 |
prompt_no = gen["prompt_no"]
|
3632 |
prompts_max = gen.get("prompts_max",0)
|
|
|
3637 |
total_windows += gen.get("extra_windows", 0)
|
3638 |
window_no = gen.get("window_no", 0)
|
3639 |
status = get_generation_status(prompt_no, prompts_max, repeat_no, total_generation, window_no, total_windows)
|
3640 |
+
return merge_status_context(status, context)
|
3641 |
|
3642 |
def update_status(state):
|
3643 |
gen = get_gen_info(state)
|
|
|
4369 |
hunyuan_t2v = "hunyuan_video_720" in model_filename
|
4370 |
hunyuan_i2v = "hunyuan_video_i2v" in model_filename
|
4371 |
hunyuan_video_custom = "hunyuan_video_custom" in model_filename
|
4372 |
+
sliding_window_enabled = vace or diffusion_forcing or ltxv
|
4373 |
+
new_line_text = "each new line of prompt will be used for a window" if sliding_window_enabled else "each new line of prompt will generate a new video"
|
4374 |
|
4375 |
with gr.Column(visible= test_class_i2v(model_filename) or diffusion_forcing or ltxv or recammaster) as image_prompt_column:
|
4376 |
if diffusion_forcing or ltxv:
|
|
|
4499 |
default_wizard_prompt, variables, values, errors = extract_wizard_prompt(launch_prompt)
|
4500 |
advanced_prompt = len(errors) > 0
|
4501 |
with gr.Column(visible= advanced_prompt) as prompt_column_advanced:
|
4502 |
+
prompt = gr.Textbox( visible= advanced_prompt, label="Prompts (" + new_line_text + ", # lines = comments, ! lines = macros)", value=launch_prompt, lines=3)
|
4503 |
|
4504 |
with gr.Column(visible=not advanced_prompt and len(variables) > 0) as prompt_column_wizard_vars:
|
4505 |
gr.Markdown("<B>Please fill the following input fields to adapt automatically the Prompt:</B>")
|
|
|
4515 |
wizard_variables = "\n".join(variables)
|
4516 |
for _ in range( PROMPT_VARS_MAX - len(prompt_vars)):
|
4517 |
prompt_vars.append(gr.Textbox(visible= False, min_width=80, show_label= False))
|
|
|
4518 |
with gr.Column(not advanced_prompt) as prompt_column_wizard:
|
4519 |
+
wizard_prompt = gr.Textbox(visible = not advanced_prompt, label="Prompts (" + new_line_text + ", # lines = comments)", value=default_wizard_prompt, lines=3)
|
4520 |
wizard_prompt_activated_var = gr.Text(wizard_prompt_activated, visible= False)
|
4521 |
wizard_variables_var = gr.Text(wizard_variables, visible = False)
|
4522 |
with gr.Row(visible= server_config.get("enhancer_enabled", 0) == 1 ) as prompt_enhancer_row:
|
|
|
4702 |
)
|
4703 |
with gr.Row():
|
4704 |
cfg_zero_step = gr.Slider(-1, 39, value=ui_defaults.get("cfg_zero_step",-1), step=1, label="CFG Zero below this Layer (Extra Process)")
|
4705 |
+
with gr.Tab("Sliding Window", visible= sliding_window_enabled) as sliding_window_tab:
|
|
|
4706 |
|
4707 |
with gr.Column():
|
4708 |
gr.Markdown("<B>A Sliding Window allows you to generate video with a duration not limited by the Model</B>")
|
|
|
4714 |
sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=4, visible = False)
|
4715 |
elif ltxv:
|
4716 |
sliding_window_size = gr.Slider(41, 257, value=ui_defaults.get("sliding_window_size", 129), step=8, label="Sliding Window Size")
|
4717 |
+
sliding_window_overlap = gr.Slider(9, 97, value=ui_defaults.get("sliding_window_overlap",9), step=8, label="Windows Frames Overlap (needed to maintain continuity between windows, a higher value will require more windows)")
|
4718 |
sliding_window_overlap_noise = gr.Slider(0, 100, value=ui_defaults.get("sliding_window_overlap_noise",20), step=1, label="Noise to be added to overlapped frames to reduce blur effect")
|
4719 |
sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=4, visible = False)
|
4720 |
else:
|
|
|
5172 |
mixed_precision_choice = gr.Dropdown(
|
5173 |
choices=[
|
5174 |
("16 bits only, requires less VRAM", "0"),
|
5175 |
+
("Mixed 16 / 32 bits, slightly more VRAM needed but better Quality mainly for 1.3B models", "1"),
|
5176 |
],
|
5177 |
value= server_config.get("mixed_precision", "0"),
|
5178 |
label="Transformer Engine Calculation"
|
|
|
5650 |
else:
|
5651 |
theme = gr.themes.Soft(font=["Verdana"], primary_hue="sky", neutral_hue="slate", text_size="md")
|
5652 |
|
5653 |
+
with gr.Blocks(css=css, theme=theme, title= "WanGP") as main:
|
5654 |
gr.Markdown("<div align=center><H1>Wan<SUP>GP</SUP> v5.1 <FONT SIZE=4>by <I>DeepBeepMeep</I></FONT> <FONT SIZE=3>") # (<A HREF='https://github.com/deepbeepmeep/Wan2GP'>Updates</A>)</FONT SIZE=3></H1></div>")
|
5655 |
global model_list
|
5656 |
|