wedyanessam commited on
Commit
fa13218
ยท
verified ยท
1 Parent(s): 1743a16

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -31
app.py CHANGED
@@ -1,11 +1,24 @@
1
  import subprocess
2
  import os
3
  import sys
 
4
  from pathlib import Path
5
  import argparse
6
  import gradio as gr
7
 
8
- # โœ… ุชุญู…ูŠู„ ุงู„ู…ูˆุฏูŠู„ุงุช ู„ูˆ ู…ุง ูƒุงู†ุช ู…ูˆุฌูˆุฏุฉ
 
 
 
 
 
 
 
 
 
 
 
 
9
  if not os.path.exists("./models/fantasytalking_model.ckpt"):
10
  print("๐Ÿ› ๏ธ ุฌุงุฑูŠ ุชุญู…ูŠู„ ุงู„ู†ู…ุงุฐุฌ ุนุจุฑ download_models.py ...")
11
  subprocess.run(["python", "download_models.py"])
@@ -19,22 +32,22 @@ from LLM.llm import generate_reply
19
  from TTS_X.tts import generate_voice
20
  from FantasyTalking.infer import load_models, main
21
 
22
- # โœ… ุฅุนุฏุงุฏ ุซูˆุงุจุช ุงู„ู…ูˆุฏูŠู„
23
  args_template = argparse.Namespace(
24
  fantasytalking_model_path="./models/fantasytalking_model.ckpt",
25
  wav2vec_model_dir="./models/wav2vec2-base-960h",
26
- wan_model_dir="./models/Wan2.1-I2V-14B-720P", # ู†ู‚ุฏุฑ ู†ุดูŠู„ู‡ ู„ูˆ ุชุจุบูŠ ู„ุงุญู‚ู‹ุง
27
  image_path="",
28
  audio_path="",
29
  prompt="",
30
  output_dir="./output",
31
- image_size=384, # โฌ…๏ธ ู‚ู„ู„ู†ุง ุญุฌู… ุงู„ุตูˆุฑุฉ
32
  audio_scale=1.0,
33
- prompt_cfg_scale=3.0, # โฌ…๏ธ ุฃู‚ู„ ุดูˆูŠุฉ
34
- audio_cfg_scale=3.0,
35
- max_num_frames=36, # โฌ…๏ธ ู‚ู„ู„ู†ุง ุนุฏุฏ ุงู„ูุฑูŠู…ุงุช
36
- inference_steps=8, # โฌ…๏ธ ู‚ู„ู„ู†ุง ุฎุทูˆุงุช ุงู„ุชูˆู„ูŠุฏ
37
- fps=20, # โฌ…๏ธ ุฃู‚ู„ ุดูˆูŠุฉ ุนุดุงู† ุงู„ุชูˆู„ูŠุฏ ุฃุณุฑุน
38
  num_persistent_param_in_dit=None,
39
  seed=1111
40
  )
@@ -44,14 +57,8 @@ print("๐Ÿš€ ุฌุงุฑูŠ ุชุญู…ูŠู„ FantasyTalking ูˆ Wav2Vec...")
44
  pipe, fantasytalking, wav2vec_processor, wav2vec = load_models(args_template)
45
  print("โœ… ุชู… ุงู„ุชุญู…ูŠู„!")
46
 
47
- # โœ… ุชูˆู„ูŠุฏ ุงู„ููŠุฏูŠูˆ - ู…ุน ุทุจุงุนุฉ Debug
48
  def generate_video(image_path, audio_path, prompt, output_dir="./output"):
49
- print(f"[๐ŸŽจ] generate_video() ุจุฏุฃ ุงู„ุชุดุบูŠู„")
50
- print(f"[๐Ÿ“] image_path: {image_path}")
51
- print(f"[๐Ÿ“] audio_path: {audio_path}")
52
- print(f"[๐Ÿ’ฌ] prompt: {prompt}")
53
- print(f"[๐Ÿ“] output_dir: {output_dir}")
54
-
55
  args = argparse.Namespace(
56
  **vars(args_template),
57
  image_path=image_path,
@@ -59,28 +66,18 @@ def generate_video(image_path, audio_path, prompt, output_dir="./output"):
59
  prompt=prompt,
60
  output_dir=output_dir
61
  )
 
62
 
63
- video_path = main(args, pipe, fantasytalking, wav2vec_processor, wav2vec)
64
- print(f"[โœ…] generate_video() ุงู†ุชู‡ู‰ุŒ ุงู„ููŠุฏูŠูˆ ู…ุญููˆุธ ู‡ู†ุง: {video_path}")
65
- return video_path
66
-
67
- # โœ… ุฎุท ุงู„ุฃู†ุงุจูŠุจ ุงู„ูƒุงู…ู„ - ู…ุน ุทุจุงุนุฉ Debug
68
  def full_pipeline(user_audio, user_image):
69
- print("[๐Ÿš€] full_pipeline() ุจุฏุฃ ุงู„ุชุดุบูŠู„")
70
- print(f"[๐Ÿ”Š] ู…ู„ู ุงู„ุตูˆุช ุงู„ู…ูุฏุฎู„: {user_audio}")
71
- print(f"[๐Ÿ–ผ๏ธ] ู…ู„ู ุงู„ุตูˆุฑุฉ ุงู„ู…ูุฏุฎู„ุฉ: {user_image}")
72
-
73
  print("๐ŸŽค ุชุญูˆูŠู„ ุงู„ุตูˆุช ุฅู„ู‰ ู†ุต...")
74
  user_text = speech_to_text(user_audio)
75
- print(f"[๐Ÿ“] ุงู„ู†ุต ุงู„ู…ุณุชุฎุฑุฌ ู…ู† ุงู„ุตูˆุช: {user_text}")
76
 
77
  print("๐Ÿ’ฌ ุชูˆู„ูŠุฏ ุงู„ุฑุฏ...")
78
  reply = generate_reply(user_text)
79
- print(f"[๐Ÿค–] ุงู„ุฑุฏ ุงู„ู…ููˆู„ุฏ: {reply}")
80
 
81
  print("๐Ÿ”Š ุชุญูˆูŠู„ ุงู„ุฑุฏ ุฅู„ู‰ ุตูˆุช...")
82
  reply_audio_path = generate_voice(reply)
83
- print(f"[๐Ÿ”Š] ู…ุณุงุฑ ุงู„ุตูˆุช ุงู„ู…ููˆู„ุฏ: {reply_audio_path}")
84
 
85
  print("๐Ÿ“ฝ๏ธ ุชูˆู„ูŠุฏ ุงู„ููŠุฏูŠูˆ...")
86
  Path("./output").mkdir(parents=True, exist_ok=True)
@@ -90,7 +87,6 @@ def full_pipeline(user_audio, user_image):
90
  prompt=reply
91
  )
92
 
93
- print(f"[โœ…] full_pipeline() ุงู†ุชู‡ู‰ุŒ ุงู„ููŠุฏูŠูˆ ุงู„ู†ู‡ุงุฆูŠ ู‡ู†ุง: {video_path}")
94
  return user_text, reply, reply_audio_path, video_path
95
 
96
  # โœ… ูˆุงุฌู‡ุฉ Gradio
@@ -114,5 +110,3 @@ with gr.Blocks(title="๐Ÿง  ุตูˆุชูƒ ูŠุญุฑูƒ ุตูˆุฑุฉ!") as demo:
114
  outputs=[user_text, reply_text, reply_audio, video_output])
115
 
116
  demo.launch(inbrowser=True, share=True)
117
-
118
-
 
1
  import subprocess
2
  import os
3
  import sys
4
+ import shutil
5
  from pathlib import Path
6
  import argparse
7
  import gradio as gr
8
 
9
+ # โœ… ุงู„ุชู†ุธูŠู ุฃูˆู„ุงู‹: ูู‚ุท ู„ู„ู…ุฌู„ุฏุงุช ุงู„ู…ุคู‚ุชุฉ
10
+ folders_to_delete = ["./output", "./__pycache__", "./.cache", "./temp"]
11
+ for folder in folders_to_delete:
12
+ if os.path.exists(folder):
13
+ print(f"๐Ÿ—‘๏ธ ุญุฐู {folder}")
14
+ shutil.rmtree(folder)
15
+
16
+ # โœ… ุทุจุงุนุฉ ุญุงู„ุฉ ุงู„ุฐุงูƒุฑุฉ
17
+ import psutil
18
+ mem = psutil.virtual_memory()
19
+ print(f"๐Ÿ” RAM ุงู„ู…ุณุชุฎุฏู…ุฉ: {mem.used / 1e9:.2f} GB / {mem.total / 1e9:.2f} GB")
20
+
21
+ # โœ… ุชุญู…ูŠู„ ุงู„ู…ูˆุฏูŠู„ุงุช ุฅุฐุง ู…ุง ูƒุงู†ุช ู…ูˆุฌูˆุฏุฉ
22
  if not os.path.exists("./models/fantasytalking_model.ckpt"):
23
  print("๐Ÿ› ๏ธ ุฌุงุฑูŠ ุชุญู…ูŠู„ ุงู„ู†ู…ุงุฐุฌ ุนุจุฑ download_models.py ...")
24
  subprocess.run(["python", "download_models.py"])
 
32
  from TTS_X.tts import generate_voice
33
  from FantasyTalking.infer import load_models, main
34
 
35
+ # โœ… ุซุงุจุชุงุช ุงู„ู†ู…ูˆุฐุฌ
36
  args_template = argparse.Namespace(
37
  fantasytalking_model_path="./models/fantasytalking_model.ckpt",
38
  wav2vec_model_dir="./models/wav2vec2-base-960h",
39
+ wan_model_dir="./models/Wan2.1-I2V-14B-720P",
40
  image_path="",
41
  audio_path="",
42
  prompt="",
43
  output_dir="./output",
44
+ image_size=512,
45
  audio_scale=1.0,
46
+ prompt_cfg_scale=5.0,
47
+ audio_cfg_scale=5.0,
48
+ max_num_frames=81,
49
+ inference_steps=20,
50
+ fps=23,
51
  num_persistent_param_in_dit=None,
52
  seed=1111
53
  )
 
57
  pipe, fantasytalking, wav2vec_processor, wav2vec = load_models(args_template)
58
  print("โœ… ุชู… ุงู„ุชุญู…ูŠู„!")
59
 
60
+ # โœ… ุชูˆู„ูŠุฏ ููŠุฏูŠูˆ
61
  def generate_video(image_path, audio_path, prompt, output_dir="./output"):
 
 
 
 
 
 
62
  args = argparse.Namespace(
63
  **vars(args_template),
64
  image_path=image_path,
 
66
  prompt=prompt,
67
  output_dir=output_dir
68
  )
69
+ return main(args, pipe, fantasytalking, wav2vec_processor, wav2vec)
70
 
71
+ # โœ… ุฎุท ุงู„ุฃู†ุงุจูŠุจ ุงู„ูƒุงู…ู„
 
 
 
 
72
  def full_pipeline(user_audio, user_image):
 
 
 
 
73
  print("๐ŸŽค ุชุญูˆูŠู„ ุงู„ุตูˆุช ุฅู„ู‰ ู†ุต...")
74
  user_text = speech_to_text(user_audio)
 
75
 
76
  print("๐Ÿ’ฌ ุชูˆู„ูŠุฏ ุงู„ุฑุฏ...")
77
  reply = generate_reply(user_text)
 
78
 
79
  print("๐Ÿ”Š ุชุญูˆูŠู„ ุงู„ุฑุฏ ุฅู„ู‰ ุตูˆุช...")
80
  reply_audio_path = generate_voice(reply)
 
81
 
82
  print("๐Ÿ“ฝ๏ธ ุชูˆู„ูŠุฏ ุงู„ููŠุฏูŠูˆ...")
83
  Path("./output").mkdir(parents=True, exist_ok=True)
 
87
  prompt=reply
88
  )
89
 
 
90
  return user_text, reply, reply_audio_path, video_path
91
 
92
  # โœ… ูˆุงุฌู‡ุฉ Gradio
 
110
  outputs=[user_text, reply_text, reply_audio, video_output])
111
 
112
  demo.launch(inbrowser=True, share=True)