wedyanessam commited on
Commit
bad5ae3
ยท
verified ยท
1 Parent(s): 93d986f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -47
app.py CHANGED
@@ -5,34 +5,20 @@ import shutil
5
  from pathlib import Path
6
  import argparse
7
  import gradio as gr
 
 
 
 
8
 
9
- # โœ… ุงู„ุชู†ุธูŠู ุฃูˆู„ุงู‹: ูู‚ุท ู„ู„ู…ุฌู„ุฏุงุช ุงู„ู…ุคู‚ุชุฉ
10
- folders_to_delete = ["./output", "./__pycache__", "./.cache", "./temp"]
11
- for folder in folders_to_delete:
12
- if os.path.exists(folder):
13
- print(f"๐Ÿ—‘๏ธ ุญุฐู {folder}")
14
- shutil.rmtree(folder)
15
 
16
- # โœ… ุทุจุงุนุฉ ุญุงู„ุฉ ุงู„ุฐุงูƒุฑุฉ
17
- import psutil
18
- mem = psutil.virtual_memory()
19
- print(f"๐Ÿ” RAM ุงู„ู…ุณุชุฎุฏู…ุฉ: {mem.used / 1e9:.2f} GB / {mem.total / 1e9:.2f} GB")
20
 
21
- # โœ… ุชุญู…ูŠู„ ุงู„ู…ูˆุฏูŠู„ุงุช ุฅุฐุง ู…ุง ูƒุงู†ุช ู…ูˆุฌูˆุฏุฉ
22
  if not os.path.exists("./models/fantasytalking_model.ckpt"):
23
- print("๐Ÿ› ๏ธ ุฌุงุฑูŠ ุชุญู…ูŠู„ ุงู„ู†ู…ุงุฐุฌ ุนุจุฑ download_models.py ...")
24
  subprocess.run(["python", "download_models.py"])
25
 
26
- # โœ… ุฅุนุฏุงุฏ ุงู„ู…ุณุงุฑุงุช
27
- sys.path.append(os.path.abspath("."))
28
 
29
- # โœ… ุงุณุชูŠุฑุงุฏ ุงู„ู…ูƒูˆู†ุงุช
30
- from STT.sst import speech_to_text
31
- from LLM.llm import generate_reply
32
- from TTS_X.tts import generate_voice
33
- from FantasyTalking.infer import load_models, main
34
 
35
- # โœ… ุซุงุจุชุงุช ุงู„ู†ู…ูˆุฐุฌ
36
  args_template = argparse.Namespace(
37
  fantasytalking_model_path="./models/fantasytalking_model.ckpt",
38
  wav2vec_model_dir="./models/wav2vec2-base-960h",
@@ -52,17 +38,13 @@ args_template = argparse.Namespace(
52
  seed=1111
53
  )
54
 
55
- # โœ… ุชุญู…ูŠู„ ุงู„ู†ู…ุงุฐุฌ
56
- print("๐Ÿš€ ุฌุงุฑูŠ ุชุญู…ูŠู„ FantasyTalking ูˆ Wav2Vec...")
57
  pipe, fantasytalking, wav2vec_processor, wav2vec = load_models(args_template)
58
- print("โœ… ุชู… ุงู„ุชุญู…ูŠู„!")
 
59
 
60
- # โœ… ุชูˆู„ูŠุฏ ููŠุฏูŠูˆ
61
  def generate_video(image_path, audio_path, prompt, output_dir="./output"):
62
- # ุงู†ุณุฎูŠ args_template ุฅู„ู‰ dict ุนุดุงู† ู†ุนุฏู„ ุนู„ูŠู‡ ุจุณู‡ูˆู„ุฉ
63
  args_dict = vars(args_template).copy()
64
-
65
- # ู†ุญุฏุซ ูู‚ุท ุงู„ู„ูŠ ู†ุญุชุงุฌู‡
66
  args_dict.update({
67
  "image_path": image_path,
68
  "audio_path": audio_path,
@@ -70,23 +52,15 @@ def generate_video(image_path, audio_path, prompt, output_dir="./output"):
70
  "output_dir": output_dir
71
  })
72
 
73
- # ู†ุญูˆู„ ู…ู† dict ุฅู„ู‰ argparse.Namespace
74
  args = argparse.Namespace(**args_dict)
75
-
76
  return main(args, pipe, fantasytalking, wav2vec_processor, wav2vec)
77
 
78
- # โœ… ุฎุท ุงู„ุฃู†ุงุจูŠุจ ุงู„ูƒุงู…ู„
79
  def full_pipeline(user_audio, user_image):
80
- print("๐ŸŽค ุชุญูˆูŠู„ ุงู„ุตูˆุช ุฅู„ู‰ ู†ุต...")
81
- user_text = speech_to_text(user_audio)
82
 
83
- print("๐Ÿ’ฌ ุชูˆู„ูŠุฏ ุงู„ุฑุฏ...")
84
  reply = generate_reply(user_text)
85
-
86
- print("๐Ÿ”Š ุชุญูˆูŠู„ ุงู„ุฑุฏ ุฅู„ู‰ ุตูˆุช...")
87
  reply_audio_path = generate_voice(reply)
88
-
89
- print("๐Ÿ“ฝ๏ธ ุชูˆู„ูŠุฏ ุงู„ููŠุฏูŠูˆ...")
90
  Path("./output").mkdir(parents=True, exist_ok=True)
91
  video_path = generate_video(
92
  image_path=user_image,
@@ -96,24 +70,25 @@ def full_pipeline(user_audio, user_image):
96
 
97
  return user_text, reply, reply_audio_path, video_path
98
 
99
- # โœ… ูˆุงุฌู‡ุฉ Gradio
100
- with gr.Blocks(title="๐Ÿง  ุตูˆุชูƒ ูŠุญุฑูƒ ุตูˆุฑุฉ!") as demo:
101
- gr.Markdown("## ๐ŸŽคโžก๏ธ๐Ÿ’ฌโžก๏ธ๐Ÿ”Šโžก๏ธ๐Ÿ“ฝ๏ธ ู…ู† ุตูˆุชูƒ ุฅู„ู‰ ููŠุฏูŠูˆ ู…ุชูƒู„ู…!")
102
 
103
  with gr.Row():
104
  with gr.Column():
105
- audio_input = gr.Audio(label="๐ŸŽ™๏ธ ุงุฑูุน ุตูˆุชูƒ", type="filepath")
106
- image_input = gr.Image(label="๐Ÿ–ผ๏ธ ุตูˆุฑุฉ ุงู„ู…ุชุญุฏุซ", type="filepath")
107
- btn = gr.Button("๐ŸŽฌ ุดุบู„")
108
 
109
  with gr.Column():
110
- user_text = gr.Textbox(label="๐Ÿ“ ุงู„ู†ุต ุงู„ู…ุณู…ูˆุน")
111
- reply_text = gr.Textbox(label="๐Ÿค– ุฑุฏ ุงู„ู…ุณุงุนุฏ")
112
- reply_audio = gr.Audio(label="๐Ÿ”Š ุงู„ุฑุฏ ุงู„ู…ู†ุทูˆู‚")
113
- video_output = gr.Video(label="๐Ÿ“ฝ๏ธ ุงู„ููŠุฏูŠูˆ ุงู„ู†ุงุชุฌ")
114
 
115
  btn.click(fn=full_pipeline,
116
  inputs=[audio_input, image_input],
117
  outputs=[user_text, reply_text, reply_audio, video_output])
118
 
119
  demo.launch(inbrowser=True, share=True)
 
 
5
  from pathlib import Path
6
  import argparse
7
  import gradio as gr
8
+ from STT.sst import speech_to_text
9
+ from LLM.llm import generate_reply
10
+ from TTS_X.tts import generate_voice
11
+ from FantasyTalking.infer import load_models, main
12
 
 
 
 
 
 
 
13
 
 
 
 
 
14
 
15
+ # downloading of models if didn't exist
16
  if not os.path.exists("./models/fantasytalking_model.ckpt"):
 
17
  subprocess.run(["python", "download_models.py"])
18
 
 
 
19
 
 
 
 
 
 
20
 
21
+
22
  args_template = argparse.Namespace(
23
  fantasytalking_model_path="./models/fantasytalking_model.ckpt",
24
  wav2vec_model_dir="./models/wav2vec2-base-960h",
 
38
  seed=1111
39
  )
40
 
41
+
 
42
  pipe, fantasytalking, wav2vec_processor, wav2vec = load_models(args_template)
43
+ print("โœ…")
44
+
45
 
 
46
  def generate_video(image_path, audio_path, prompt, output_dir="./output"):
 
47
  args_dict = vars(args_template).copy()
 
 
48
  args_dict.update({
49
  "image_path": image_path,
50
  "audio_path": audio_path,
 
52
  "output_dir": output_dir
53
  })
54
 
 
55
  args = argparse.Namespace(**args_dict)
 
56
  return main(args, pipe, fantasytalking, wav2vec_processor, wav2vec)
57
 
58
+
59
  def full_pipeline(user_audio, user_image):
 
 
60
 
61
+ user_text = speech_to_text(user_audio)
62
  reply = generate_reply(user_text)
 
 
63
  reply_audio_path = generate_voice(reply)
 
 
64
  Path("./output").mkdir(parents=True, exist_ok=True)
65
  video_path = generate_video(
66
  image_path=user_image,
 
70
 
71
  return user_text, reply, reply_audio_path, video_path
72
 
73
+
74
+ with gr.Blocks() as demo:
75
+ gr.Markdown(" Realtime Interactive Avatar ๐ŸŽญ")
76
 
77
  with gr.Row():
78
  with gr.Column():
79
+ audio_input = gr.Audio(label="Upload Voice", type="filepath")
80
+ image_input = gr.Image(label="Upload Image", type="filepath")
81
+ btn = gr.Button("Generate")
82
 
83
  with gr.Column():
84
+ user_text = gr.Textbox(label="Transcribed Text (Speech to Text)")
85
+ reply_text = gr.Textbox(label="Assistant Response (LLM)")
86
+ reply_audio = gr.Audio(label="Spoken Response (Text to Speech)")
87
+ video_output = gr.Video(label="Final Generated Video")
88
 
89
  btn.click(fn=full_pipeline,
90
  inputs=[audio_input, image_input],
91
  outputs=[user_text, reply_text, reply_audio, video_output])
92
 
93
  demo.launch(inbrowser=True, share=True)
94
+