Joao-Ale commited on
Commit
5020fa4
·
1 Parent(s): 394a3c9

adjust model

Browse files
__init__.py ADDED
File without changes
configuration/config DELETED
@@ -1,2 +0,0 @@
1
- MODEL_PATH_BRISA_7B = "./models/brisa/BRisa-7B-Instruct-v0.2.Q4_K_S.gguf"
2
- MODEL_PATH_META_LLM_8B = "./models/llama/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"
 
 
 
configuration/config.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ MODEL_FLAN_T5 = "google/flan-t5-small"
2
+ MODEL_FALCON_RW_1B = "tiiuae/falcon-rw-1b"
models/model.py CHANGED
@@ -1,10 +1,25 @@
1
- from llama_cpp import Llama
 
2
 
3
 
4
  class Model:
5
- def __init__(self, model_path):
6
- self.model = Llama(model_path=model_path)
 
7
 
8
- def generate_response(self, prompt: str) -> str:
9
- output = self.model(prompt, max_tokens=60)
10
- return output['choices'][0]['text'].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
+ import torch
3
 
4
 
5
  class Model:
6
+ def __init__(self, model_name):
7
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
9
 
10
+ def generate_response_model_1(self, question: str) -> str:
11
+ prompt = f"Give a short and factual answer: {question}"
12
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
13
+
14
+ with torch.no_grad():
15
+ outputs = self.model.generate(
16
+ **inputs,
17
+ max_length=30,
18
+ temperature=0.4,
19
+ top_k=40,
20
+ top_p=0.85,
21
+ repetition_penalty=1.2,
22
+ do_sample=True
23
+ )
24
+
25
+ return self.tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
service/chatbot.py CHANGED
@@ -1,23 +1,17 @@
1
  from models.model import Model
2
- from config import MODEL_PATH_BRISA_7B, MODEL_PATH_META_LLM_8B
3
  from models.arbitrator import Arbitrator
 
4
 
5
- model_a = Model(model_path=MODEL_PATH_BRISA_7B)
6
- model_b = Model(model_path=MODEL_PATH_META_LLM_8B)
7
  arbitrator = Arbitrator()
8
 
 
9
 
10
- def process_prompt(prompt: str) -> str:
11
- print("Gerando resposta A...")
12
- response_a = model_a.generate_response(prompt.strip())
13
- print("Resposta A gerada:", response_a)
14
 
15
- print("Gerando resposta B...")
16
- response_b = model_b.generate_response(prompt.strip())
17
- print("Resposta B gerada:", response_b)
18
-
19
- print("Avaliando...")
20
- best_response = arbitrator.evaluate(prompt.strip(), response_a.strip(), response_b.strip())
21
- print("Avaliação concluída. Melhor resposta selecionada.")
22
-
23
- return best_response
 
1
  from models.model import Model
2
+ from configuration.config import MODEL_FLAN_T5, MODEL_FALCON_RW_1B
3
  from models.arbitrator import Arbitrator
4
+ from sentence_transformers import SentenceTransformer, util
5
 
6
+ model_a = Model(model_path=MODEL_FLAN_T5)
7
+ model_b = Model(model_path=MODEL_FALCON_RW_1B)
8
  arbitrator = Arbitrator()
9
 
10
+ model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
11
 
 
 
 
 
12
 
13
+ def process_prompt(prompt, model_a, model_b):
14
+ embeddings = model.encode([prompt, model_a, model_b])
15
+ score1 = util.cos_sim(embeddings[0], embeddings[1])
16
+ score2 = util.cos_sim(embeddings[0], embeddings[2])
17
+ return model_a if score1 > score2 else model_b