Spaces:
Sleeping
Sleeping
vectostoreindex
Browse files- app.py +4 -7
- backend.py +28 -0
app.py
CHANGED
@@ -42,9 +42,6 @@ model.config.sliding_window = 4096
|
|
42 |
#model = model.to(device)
|
43 |
model.eval()
|
44 |
|
45 |
-
Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")
|
46 |
-
|
47 |
-
|
48 |
###------####
|
49 |
# rag
|
50 |
documents_paths = {
|
@@ -64,14 +61,14 @@ INSTRUCTION_1 = 'In italiano, chiedi sempre se la domanda si riferisce agli "Oss
|
|
64 |
INSTRUCTION_2 = 'Sei un assistente che risponde sempre in italiano alle domande basandosi solo sulle informazioni fornite nel contesto che ti darò. Se non trovi informazioni, rispondi "Puoi chiedere maggiori informazioni all\'ufficio di riferimento.". Se invece la domanda è completamente fuori contesto, non rispondere e rammenta il topic del contesto'
|
65 |
|
66 |
|
67 |
-
# Reading documents from disk
|
68 |
docs = SimpleDirectoryReader(input_files=["data/blockchainprova.txt"]).load_data()
|
69 |
# Splitting the document into chunks with
|
70 |
# predefined size and overlap
|
71 |
parser = SentenceSplitter.from_defaults(
|
72 |
chunk_size=256, chunk_overlap=64, paragraph_separator="\n\n"
|
73 |
)
|
74 |
-
nodes = parser.get_nodes_from_documents(docs)
|
75 |
|
76 |
|
77 |
@spaces.GPU()
|
@@ -117,7 +114,7 @@ def generate(
|
|
117 |
|
118 |
######
|
119 |
|
120 |
-
index = VectorStoreIndex(nodes)
|
121 |
# get retriver
|
122 |
retriever = index.as_retriever(similarity_top_k=3)
|
123 |
relevant_chunks = retriever.retrieve(message)
|
@@ -126,7 +123,7 @@ def generate(
|
|
126 |
|
127 |
info_message += f"{idx + 1}) {chunk.text[:64]}...\n"
|
128 |
print(info_message)
|
129 |
-
gr.Info(info_message)
|
130 |
|
131 |
session_state["index"] = True
|
132 |
|
|
|
42 |
#model = model.to(device)
|
43 |
model.eval()
|
44 |
|
|
|
|
|
|
|
45 |
###------####
|
46 |
# rag
|
47 |
documents_paths = {
|
|
|
61 |
INSTRUCTION_2 = 'Sei un assistente che risponde sempre in italiano alle domande basandosi solo sulle informazioni fornite nel contesto che ti darò. Se non trovi informazioni, rispondi "Puoi chiedere maggiori informazioni all\'ufficio di riferimento.". Se invece la domanda è completamente fuori contesto, non rispondere e rammenta il topic del contesto'
|
62 |
|
63 |
|
64 |
+
"""# Reading documents from disk
|
65 |
docs = SimpleDirectoryReader(input_files=["data/blockchainprova.txt"]).load_data()
|
66 |
# Splitting the document into chunks with
|
67 |
# predefined size and overlap
|
68 |
parser = SentenceSplitter.from_defaults(
|
69 |
chunk_size=256, chunk_overlap=64, paragraph_separator="\n\n"
|
70 |
)
|
71 |
+
nodes = parser.get_nodes_from_documents(docs)"""
|
72 |
|
73 |
|
74 |
@spaces.GPU()
|
|
|
114 |
|
115 |
######
|
116 |
|
117 |
+
"""index = VectorStoreIndex(nodes)
|
118 |
# get retriver
|
119 |
retriever = index.as_retriever(similarity_top_k=3)
|
120 |
relevant_chunks = retriever.retrieve(message)
|
|
|
123 |
|
124 |
info_message += f"{idx + 1}) {chunk.text[:64]}...\n"
|
125 |
print(info_message)
|
126 |
+
gr.Info(info_message)"""
|
127 |
|
128 |
session_state["index"] = True
|
129 |
|
backend.py
CHANGED
@@ -16,6 +16,7 @@ from llama_index.core.llms import ChatMessage, MessageRole , CompletionResponse
|
|
16 |
from IPython.display import Markdown, display
|
17 |
#from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
18 |
#from llama_index import LangchainEmbedding, ServiceContext
|
|
|
19 |
|
20 |
|
21 |
|
@@ -38,8 +39,35 @@ model.eval()"""
|
|
38 |
Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")
|
39 |
#Settings.embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2'))
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
Settings.llm = GemmaLLMInterface()
|
|
|
|
|
43 |
|
44 |
documents_paths = {
|
45 |
'blockchain': 'data/blockchainprova.txt',
|
|
|
16 |
from IPython.display import Markdown, display
|
17 |
#from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
18 |
#from llama_index import LangchainEmbedding, ServiceContext
|
19 |
+
from llama_index.llms.huggingface import HuggingFaceLLM
|
20 |
|
21 |
|
22 |
|
|
|
39 |
Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")
|
40 |
#Settings.embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2'))
|
41 |
|
42 |
+
SYSTEM_PROMPT = """You are an AI assistant that answers questions in a friendly manner, based on the given source documents. Here are some rules you always follow:
|
43 |
+
- Generate human readable output, avoid creating output with gibberish text.
|
44 |
+
- Generate only the requested output, don't include any other language before or after the requested output.
|
45 |
+
- Never say thank you, that you are happy to help, that you are an AI agent, etc. Just answer directly.
|
46 |
+
- Generate professional language typically used in business documents in North America.
|
47 |
+
- Never generate offensive or foul language.
|
48 |
+
"""
|
49 |
+
|
50 |
+
query_wrapper_prompt = PromptTemplate(
|
51 |
+
"[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
|
52 |
+
)
|
53 |
+
|
54 |
+
|
55 |
+
model_id = "google/gemma-2-2b-it"
|
56 |
+
llm = HuggingFaceLLM(
|
57 |
+
context_window=4096,
|
58 |
+
max_new_tokens=2048,
|
59 |
+
generate_kwargs={"temperature": 0.0, "do_sample": False},
|
60 |
+
query_wrapper_prompt=query_wrapper_prompt,
|
61 |
+
tokenizer_name=model_id,
|
62 |
+
model_name=model_id,
|
63 |
+
device_map="auto",
|
64 |
+
# change these settings below depending on your GPU
|
65 |
+
model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True},
|
66 |
+
)
|
67 |
|
68 |
Settings.llm = GemmaLLMInterface()
|
69 |
+
Settings.llm = llm
|
70 |
+
|
71 |
|
72 |
documents_paths = {
|
73 |
'blockchain': 'data/blockchainprova.txt',
|