gufett0 commited on
Commit
cd97913
·
1 Parent(s): 140d8c6

vectostoreindex

Browse files
Files changed (2) hide show
  1. app.py +4 -7
  2. backend.py +28 -0
app.py CHANGED
@@ -42,9 +42,6 @@ model.config.sliding_window = 4096
42
  #model = model.to(device)
43
  model.eval()
44
 
45
- Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")
46
-
47
-
48
  ###------####
49
  # rag
50
  documents_paths = {
@@ -64,14 +61,14 @@ INSTRUCTION_1 = 'In italiano, chiedi sempre se la domanda si riferisce agli "Oss
64
  INSTRUCTION_2 = 'Sei un assistente che risponde sempre in italiano alle domande basandosi solo sulle informazioni fornite nel contesto che ti darò. Se non trovi informazioni, rispondi "Puoi chiedere maggiori informazioni all\'ufficio di riferimento.". Se invece la domanda è completamente fuori contesto, non rispondere e rammenta il topic del contesto'
65
 
66
 
67
- # Reading documents from disk
68
  docs = SimpleDirectoryReader(input_files=["data/blockchainprova.txt"]).load_data()
69
  # Splitting the document into chunks with
70
  # predefined size and overlap
71
  parser = SentenceSplitter.from_defaults(
72
  chunk_size=256, chunk_overlap=64, paragraph_separator="\n\n"
73
  )
74
- nodes = parser.get_nodes_from_documents(docs)
75
 
76
 
77
  @spaces.GPU()
@@ -117,7 +114,7 @@ def generate(
117
 
118
  ######
119
 
120
- index = VectorStoreIndex(nodes)
121
  # get retriver
122
  retriever = index.as_retriever(similarity_top_k=3)
123
  relevant_chunks = retriever.retrieve(message)
@@ -126,7 +123,7 @@ def generate(
126
 
127
  info_message += f"{idx + 1}) {chunk.text[:64]}...\n"
128
  print(info_message)
129
- gr.Info(info_message)
130
 
131
  session_state["index"] = True
132
 
 
42
  #model = model.to(device)
43
  model.eval()
44
 
 
 
 
45
  ###------####
46
  # rag
47
  documents_paths = {
 
61
  INSTRUCTION_2 = 'Sei un assistente che risponde sempre in italiano alle domande basandosi solo sulle informazioni fornite nel contesto che ti darò. Se non trovi informazioni, rispondi "Puoi chiedere maggiori informazioni all\'ufficio di riferimento.". Se invece la domanda è completamente fuori contesto, non rispondere e rammenta il topic del contesto'
62
 
63
 
64
+ """# Reading documents from disk
65
  docs = SimpleDirectoryReader(input_files=["data/blockchainprova.txt"]).load_data()
66
  # Splitting the document into chunks with
67
  # predefined size and overlap
68
  parser = SentenceSplitter.from_defaults(
69
  chunk_size=256, chunk_overlap=64, paragraph_separator="\n\n"
70
  )
71
+ nodes = parser.get_nodes_from_documents(docs)"""
72
 
73
 
74
  @spaces.GPU()
 
114
 
115
  ######
116
 
117
+ """index = VectorStoreIndex(nodes)
118
  # get retriver
119
  retriever = index.as_retriever(similarity_top_k=3)
120
  relevant_chunks = retriever.retrieve(message)
 
123
 
124
  info_message += f"{idx + 1}) {chunk.text[:64]}...\n"
125
  print(info_message)
126
+ gr.Info(info_message)"""
127
 
128
  session_state["index"] = True
129
 
backend.py CHANGED
@@ -16,6 +16,7 @@ from llama_index.core.llms import ChatMessage, MessageRole , CompletionResponse
16
  from IPython.display import Markdown, display
17
  #from langchain.embeddings.huggingface import HuggingFaceEmbeddings
18
  #from llama_index import LangchainEmbedding, ServiceContext
 
19
 
20
 
21
 
@@ -38,8 +39,35 @@ model.eval()"""
38
  Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")
39
  #Settings.embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2'))
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  Settings.llm = GemmaLLMInterface()
 
 
43
 
44
  documents_paths = {
45
  'blockchain': 'data/blockchainprova.txt',
 
16
  from IPython.display import Markdown, display
17
  #from langchain.embeddings.huggingface import HuggingFaceEmbeddings
18
  #from llama_index import LangchainEmbedding, ServiceContext
19
+ from llama_index.llms.huggingface import HuggingFaceLLM
20
 
21
 
22
 
 
39
  Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")
40
  #Settings.embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2'))
41
 
42
+ SYSTEM_PROMPT = """You are an AI assistant that answers questions in a friendly manner, based on the given source documents. Here are some rules you always follow:
43
+ - Generate human readable output, avoid creating output with gibberish text.
44
+ - Generate only the requested output, don't include any other language before or after the requested output.
45
+ - Never say thank you, that you are happy to help, that you are an AI agent, etc. Just answer directly.
46
+ - Generate professional language typically used in business documents in North America.
47
+ - Never generate offensive or foul language.
48
+ """
49
+
50
+ query_wrapper_prompt = PromptTemplate(
51
+ "[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
52
+ )
53
+
54
+
55
+ model_id = "google/gemma-2-2b-it"
56
+ llm = HuggingFaceLLM(
57
+ context_window=4096,
58
+ max_new_tokens=2048,
59
+ generate_kwargs={"temperature": 0.0, "do_sample": False},
60
+ query_wrapper_prompt=query_wrapper_prompt,
61
+ tokenizer_name=model_id,
62
+ model_name=model_id,
63
+ device_map="auto",
64
+ # change these settings below depending on your GPU
65
+ model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True},
66
+ )
67
 
68
  Settings.llm = GemmaLLMInterface()
69
+ Settings.llm = llm
70
+
71
 
72
  documents_paths = {
73
  'blockchain': 'data/blockchainprova.txt',