HusseinBashir commited on
Commit
a47d925
·
verified ·
1 Parent(s): 8e5335e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -31
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import os
2
-
3
  os.environ["HF_HOME"] = "/tmp"
4
  os.environ["TRANSFORMERS_CACHE"] = "/tmp"
5
  os.environ["TORCH_HOME"] = "/tmp"
@@ -36,7 +35,8 @@ number_words = {
36
  100: "boqol", 1000: "kun"
37
  }
38
 
39
- def number_to_words(number: int) -> str:
 
40
  if number < 20:
41
  return number_words[number]
42
  elif number < 100:
@@ -71,10 +71,20 @@ def number_to_words(number: int) -> str:
71
  else:
72
  return str(number)
73
 
74
- def normalize_text(text: str) -> str:
75
- numbers = re.findall(r'\d+', text)
76
- for num in numbers:
77
- text = text.replace(num, number_to_words(int(num)))
 
 
 
 
 
 
 
 
 
 
78
  text = text.replace("KH", "qa").replace("Z", "S")
79
  text = text.replace("SH", "SHa'a").replace("DH", "Dha'a")
80
  text = text.replace("ZamZam", "SamSam")
@@ -98,36 +108,49 @@ class TextIn(BaseModel):
98
 
99
  @app.post("/synthesize")
100
  async def synthesize_post(data: TextIn):
101
- text = normalize_text(data.inputs)
102
- inputs = tokenizer(text, return_tensors="pt").to(device)
103
- with torch.no_grad():
104
- output = model(**inputs)
105
- waveform = (
106
- output.waveform if hasattr(output, "waveform") else
107
- output["waveform"] if isinstance(output, dict) and "waveform" in output else
108
- output[0] if isinstance(output, (tuple, list)) else
109
- None
110
- )
111
- if waveform is None:
112
- return {"error": "Waveform not found in model output"}
113
  sample_rate = getattr(model.config, "sampling_rate", 22050)
114
- wav_bytes = waveform_to_wav_bytes(waveform, sample_rate=sample_rate)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  return StreamingResponse(io.BytesIO(wav_bytes), media_type="audio/wav")
116
 
117
  @app.get("/synthesize")
118
  async def synthesize_get(text: str = Query(..., description="Text to synthesize"), test: bool = Query(False)):
119
- if test:
120
- paragraphs = text.count("\n") + 1 # Tirinta paragraphs-ka qoraalka
121
- duration_s = paragraphs * 6 # 6 ilbiriqsi per paragraph
122
- sample_rate = 22050
123
- t = np.linspace(0, duration_s, int(sample_rate * duration_s), endpoint=False)
124
- freq = 440
125
- waveform = 0.5 * np.sin(2 * math.pi * freq * t).astype(np.float32)
126
- pcm_waveform = (waveform * 32767).astype(np.int16)
127
- buf = io.BytesIO()
128
- scipy.io.wavfile.write(buf, rate=sample_rate, data=pcm_waveform)
129
- buf.seek(0)
130
- return StreamingResponse(buf, media_type="audio/wav")
 
131
  normalized = normalize_text(text)
132
  inputs = tokenizer(normalized, return_tensors="pt").to(device)
133
  with torch.no_grad():
 
1
  import os
 
2
  os.environ["HF_HOME"] = "/tmp"
3
  os.environ["TRANSFORMERS_CACHE"] = "/tmp"
4
  os.environ["TORCH_HOME"] = "/tmp"
 
35
  100: "boqol", 1000: "kun"
36
  }
37
 
38
+ def number_to_words(number):
39
+ number = int(number)
40
  if number < 20:
41
  return number_words[number]
42
  elif number < 100:
 
71
  else:
72
  return str(number)
73
 
74
+ def normalize_text(text):
75
+ text = re.sub(r'(\d{1,3})(,\d{3})+', lambda m: m.group(0).replace(",", ""), text)
76
+ text = re.sub(r'\.\d+', '', text)
77
+ def replace_num(match):
78
+ return number_to_words(match.group())
79
+ text = re.sub(r'\d+', replace_num, text)
80
+ symbol_map = {
81
+ '$': 'doolar',
82
+ '=': 'egwal',
83
+ '+': 'balaas',
84
+ '#': 'haash'
85
+ }
86
+ for sym, word in symbol_map.items():
87
+ text = text.replace(sym, ' ' + word + ' ')
88
  text = text.replace("KH", "qa").replace("Z", "S")
89
  text = text.replace("SH", "SHa'a").replace("DH", "Dha'a")
90
  text = text.replace("ZamZam", "SamSam")
 
108
 
109
  @app.post("/synthesize")
110
  async def synthesize_post(data: TextIn):
111
+ paragraphs = [p.strip() for p in data.inputs.split('\n') if p.strip()]
 
 
 
 
 
 
 
 
 
 
 
112
  sample_rate = getattr(model.config, "sampling_rate", 22050)
113
+ all_waveforms = []
114
+
115
+ for paragraph in paragraphs:
116
+ normalized = normalize_text(paragraph)
117
+ inputs = tokenizer(normalized, return_tensors="pt").to(device)
118
+ with torch.no_grad():
119
+ output = model(**inputs)
120
+ waveform = (
121
+ output.waveform if hasattr(output, "waveform") else
122
+ output["waveform"] if isinstance(output, dict) and "waveform" in output else
123
+ output[0] if isinstance(output, (tuple, list)) else
124
+ None
125
+ )
126
+ if waveform is None:
127
+ continue
128
+ all_waveforms.append(waveform)
129
+ silence = torch.zeros(1, sample_rate).to(waveform.device)
130
+ all_waveforms.append(silence)
131
+
132
+ if not all_waveforms:
133
+ return {"error": "No audio generated."}
134
+
135
+ final_waveform = torch.cat(all_waveforms, dim=-1)
136
+ wav_bytes = waveform_to_wav_bytes(final_waveform, sample_rate=sample_rate)
137
  return StreamingResponse(io.BytesIO(wav_bytes), media_type="audio/wav")
138
 
139
  @app.get("/synthesize")
140
  async def synthesize_get(text: str = Query(..., description="Text to synthesize"), test: bool = Query(False)):
141
+ if test:
142
+ paragraphs = text.count("\n") + 1
143
+ duration_s = paragraphs * 6
144
+ sample_rate = 22050
145
+ t = np.linspace(0, duration_s, int(sample_rate * duration_s), endpoint=False)
146
+ freq = 440
147
+ waveform = 0.5 * np.sin(2 * math.pi * freq * t).astype(np.float32)
148
+ pcm_waveform = (waveform * 32767).astype(np.int16)
149
+ buf = io.BytesIO()
150
+ scipy.io.wavfile.write(buf, rate=sample_rate, data=pcm_waveform)
151
+ buf.seek(0)
152
+ return StreamingResponse(buf, media_type="audio/wav")
153
+
154
  normalized = normalize_text(text)
155
  inputs = tokenizer(normalized, return_tensors="pt").to(device)
156
  with torch.no_grad():