NickyNicky commited on
Commit
6bb0e31
·
1 Parent(s): c139119

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -1
app.py CHANGED
@@ -1,3 +1,86 @@
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- gr.Interface.load("hackathon-somos-nlp-2023/T5unami-small").launch()
 
1
+
2
+ import pandas as pd
3
+ import time
4
+
5
+ import torch
6
+ from peft import PeftModel, PeftConfig
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLM
8
+
9
  import gradio as gr
10
+ import speech_recognition as sr
11
+ from math import log2, pow
12
+ import os
13
+
14
+ import numpy as np
15
+ from scipy.fftpack import fft
16
+ import gc
17
+
18
+ peft_model_id='hackathon-somos-nlp-2023/T5unami-small-v1'
19
+
20
+ config = PeftConfig.from_pretrained(peft_model_id)
21
+ model2 = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto')
22
+ tokenizer2 = AutoTokenizer.from_pretrained(peft_model_id)
23
+
24
+ model2 = PeftModel.from_pretrained(model2, peft_model_id)
25
+
26
+ Problema_tarjetaCredito= os.path.abspath("Problema_tarjetaCredito.ogg")
27
+ list_audios= [[Problema_tarjetaCredito]]
28
+
29
+ def gen_conversation(text,max_new_tokens=100):
30
+ text = "<SN>instruction: " + text + "\n "
31
+ batch = tokenizer2(text, return_tensors='pt')
32
+
33
+ output_tokens = model2.generate(**batch,
34
+ max_new_tokens=max_new_tokens,
35
+ eos_token_id= tokenizer2.eos_token_id,
36
+ pad_token_id= tokenizer2.pad_token_id,
37
+ bos_token_id= tokenizer2.bos_token_id,
38
+ early_stopping = True,
39
+ no_repeat_ngram_size=2,
40
+ repetition_penalty=1.2,
41
+ temperature=.69,
42
+ num_beams=3
43
+ )
44
+ gc.collect()
45
+ return tokenizer2.decode(output_tokens[0], skip_special_tokens=True).split("\n")[-1].replace("output:","")
46
+
47
+ conversacion = ""
48
+ def speech_to_text(audio_file, texto_adicional):
49
+ global conversacion
50
+ if audio_file is not None:
51
+ # Lógica para entrada de audio
52
+ r = sr.Recognizer()
53
+ audio_data = sr.AudioFile(audio_file)
54
+ with audio_data as source:
55
+ audio = r.record(source)
56
+ text_enrada=""
57
+
58
+ texto_generado = r.recognize_google(audio, language="es-ES")
59
+ texto_generado= f"[|Audio a texto|]:{texto_generado}\n" + "<br>[AGENTE]:"+gen_conversation(texto_generado,max_new_tokens=50)
60
+ texto_generado = "<div style='color: #66b3ff;'>" + texto_generado + "</div><br>"
61
+ else:
62
+ texto_generado= f"[|Solo texto|]:{texto_adicional}\n" + "<br>[AGENTE]:"+gen_conversation(texto_adicional,max_new_tokens=50)
63
+ texto_generado = "<div style='color: #66b3ff;'> " + texto_generado + "</div><br>"
64
+ conversacion += texto_generado
65
+ return conversacion
66
+
67
+ iface = gr.Interface(
68
+ fn=speech_to_text,
69
+ inputs=[gr.inputs.Audio(label="Voz", type="filepath"), gr.inputs.Textbox(label="Texto adicional")],
70
+ outputs=gr.outputs.HTML(label=["chatbot","state"]),
71
+ title="Chat bot para empresas.",
72
+ description="Este modelo convierte la entrada de voz o texto y hace inferencia",
73
+ examples=list_audios,
74
+ theme="default",
75
+ layout="vertical",
76
+ allow_flagging=False,
77
+ flagging_dir=None,
78
+ server_name=None,
79
+ server_port=None,
80
+ live=False,
81
+ capture_session=False
82
+ )
83
+
84
+ iface.launch()
85
+
86