Spaces:
Sleeping
Sleeping
Joao-Ale
commited on
Commit
·
5020fa4
1
Parent(s):
394a3c9
adjust model
Browse files- __init__.py +0 -0
- configuration/config +0 -2
- configuration/config.py +2 -0
- models/model.py +21 -6
- service/chatbot.py +10 -16
__init__.py
ADDED
File without changes
|
configuration/config
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
MODEL_PATH_BRISA_7B = "./models/brisa/BRisa-7B-Instruct-v0.2.Q4_K_S.gguf"
|
2 |
-
MODEL_PATH_META_LLM_8B = "./models/llama/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"
|
|
|
|
|
|
configuration/config.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
MODEL_FLAN_T5 = "google/flan-t5-small"
|
2 |
+
MODEL_FALCON_RW_1B = "tiiuae/falcon-rw-1b"
|
models/model.py
CHANGED
@@ -1,10 +1,25 @@
|
|
1 |
-
from
|
|
|
2 |
|
3 |
|
4 |
class Model:
|
5 |
-
def __init__(self,
|
6 |
-
self.
|
|
|
7 |
|
8 |
-
def
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
2 |
+
import torch
|
3 |
|
4 |
|
5 |
class Model:
|
6 |
+
def __init__(self, model_name):
|
7 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
8 |
+
self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
9 |
|
10 |
+
def generate_response_model_1(self, question: str) -> str:
|
11 |
+
prompt = f"Give a short and factual answer: {question}"
|
12 |
+
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
|
13 |
+
|
14 |
+
with torch.no_grad():
|
15 |
+
outputs = self.model.generate(
|
16 |
+
**inputs,
|
17 |
+
max_length=30,
|
18 |
+
temperature=0.4,
|
19 |
+
top_k=40,
|
20 |
+
top_p=0.85,
|
21 |
+
repetition_penalty=1.2,
|
22 |
+
do_sample=True
|
23 |
+
)
|
24 |
+
|
25 |
+
return self.tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
|
service/chatbot.py
CHANGED
@@ -1,23 +1,17 @@
|
|
1 |
from models.model import Model
|
2 |
-
from config import
|
3 |
from models.arbitrator import Arbitrator
|
|
|
4 |
|
5 |
-
model_a = Model(model_path=
|
6 |
-
model_b = Model(model_path=
|
7 |
arbitrator = Arbitrator()
|
8 |
|
|
|
9 |
|
10 |
-
def process_prompt(prompt: str) -> str:
|
11 |
-
print("Gerando resposta A...")
|
12 |
-
response_a = model_a.generate_response(prompt.strip())
|
13 |
-
print("Resposta A gerada:", response_a)
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
best_response = arbitrator.evaluate(prompt.strip(), response_a.strip(), response_b.strip())
|
21 |
-
print("Avaliação concluída. Melhor resposta selecionada.")
|
22 |
-
|
23 |
-
return best_response
|
|
|
1 |
from models.model import Model
|
2 |
+
from configuration.config import MODEL_FLAN_T5, MODEL_FALCON_RW_1B
|
3 |
from models.arbitrator import Arbitrator
|
4 |
+
from sentence_transformers import SentenceTransformer, util
|
5 |
|
6 |
+
model_a = Model(model_path=MODEL_FLAN_T5)
|
7 |
+
model_b = Model(model_path=MODEL_FALCON_RW_1B)
|
8 |
arbitrator = Arbitrator()
|
9 |
|
10 |
+
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
11 |
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
def process_prompt(prompt, model_a, model_b):
|
14 |
+
embeddings = model.encode([prompt, model_a, model_b])
|
15 |
+
score1 = util.cos_sim(embeddings[0], embeddings[1])
|
16 |
+
score2 = util.cos_sim(embeddings[0], embeddings[2])
|
17 |
+
return model_a if score1 > score2 else model_b
|
|
|
|
|
|
|
|