# RAG System with FAISS Vector DB for Dr Llama 2 Experimental

In [2]:
!pip install -q pypdf
!pip install -q langchain openai faiss-gpu tiktoken
!pip install -q pypdf
!pip install -q sentence-transformers
!pip install -q transformers

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m277.4/277.4 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m220.3/220.3 kB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m94.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.3/45.3 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.0/75.0 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━

## Medical Disease Database

In [1]:
import requests

def download_csv(url, filename):
    response = requests.get(url)
    response.raise_for_status()  # Check that the request was successful
    with open(filename, 'wb') as f:
        f.write(response.content)

# Replace with the URL of your CSV file
url = 'https://github.com/Kent0n-Li/ChatDoctor/raw/main/format_dataset.csv'

# Replace with the desired location and name of the downloaded file
filename = 'medical_db.csv'

download_csv(url, filename)


In [1]:
import matplotlib.pyplot as plt
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import PromptTemplate
from langchain.chains import RetrievalQA , ConversationChain
from langchain.llms import CTransformers
from langchain.chains.conversation.memory import ConversationBufferMemory

In [2]:
loader = CSVLoader(file_path='/content/medical_db.csv', encoding="utf-8", csv_args={
                'delimiter': ','})
data = loader.load()

In [5]:
data[0]

Document(page_content="idx: 0\ndisease: Panic disorder\nSymptom: ['Anxiety and nervousness', 'Depression', 'Shortness of breath', 'Depressive or psychotic symptoms', 'Sharp chest pain', 'Dizziness', 'Insomnia', 'Abnormal involuntary movements', 'Chest tightness', 'Palpitations', 'Irregular heartbeat', 'Breathing fast']\nreason: Panic disorder is an anxiety disorder characterized by recurring severe panic attacks. It may also include significant behavioral changes lasting at least a month and of ongoing worry about the implications or concern about having other attacks. The latter are called anticipatory attacks (DSM-IVR). Panic disorder is not the same as agoraphobia (fear of public places), although many afflicted with panic disorder also suffer from agoraphobia. Panic attacks cannot be predicted, therefore an individual may become stressed, anxious or worried wondering when the next panic attack will occur. Panic disorder may be differentiated as a medical condition, or chemical imba

## Vector Store Creation and Indexing with FAISS

In [3]:
def get_embedding_model(model_name ,model_kwargs ):
    embedding_model = HuggingFaceEmbeddings(
            model_name = model_name,  # also works with model_path
            model_kwargs = model_kwargs)
    return embedding_model

In [4]:
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = get_embedding_model(embedding_model_name ,
                                      # model_kwargs={'device': 'cpu'} ,
                                      model_kwargs={'device': 'cuda'}
                                     )

In [8]:
vectorstore = FAISS.from_documents(data, embeddings)

In [9]:
DB_FAISS_PATH = "/content/drive/MyDrive/Medical-Assistant-Project /VectorStore/db_faiss"
vectorstore.save_local(DB_FAISS_PATH)

In [6]:
DB_FAISS_PATH = "/content/drive/MyDrive/Medical-Assistant-Project /VectorStore/db_faiss"
vectorstore = FAISS.load_local(DB_FAISS_PATH, embeddings)

### Testing Retreival from Vector Store using Similarity

In [113]:
query = "The left leg seems broken , I guess it is some kind of fracture I have"
docs = vectorstore.similarity_search(query , k=2)

In [114]:
docs

[Document(page_content="idx: 527\ndisease: Fracture of the foot\nSymptom: ['Foot or toe pain', 'Ankle pain', 'Foot or toe swelling', 'Ankle swelling', 'Foot or toe weakness', 'Foot or toe stiffness or tightness']\nreason: A bone fracture (sometimes abbreviated FRX or Fx, Fx, or #) is a medical condition in which there is a break in the continuity of the bone. A bone fracture can be the result of high force impact or stress, or trivial injury as a result of certain medical conditions that weaken the bones, such as osteoporosis, bone cancer, or osteogenesis imperfecta, where the fracture is then properly termed a pathologic fracture.\nTestsAndProcedures: ['Radiographic imaging procedure', 'Plain x-ray (X ray)', 'Application of splint (Splinting)', 'Examination of foot', 'Wound care management', 'Orthopedic casting', 'Physical therapy exercises (Exercises)']\ncommonMedications: ['Hyaluronidase', 'Deferoxamine', 'Diflorasone Topical']", metadata={'source': '/content/medical_db.csv', 'row':

In [47]:
docs[0].page_content

"idx: 527\ndisease: Fracture of the foot\nSymptom: ['Foot or toe pain', 'Ankle pain', 'Foot or toe swelling', 'Ankle swelling', 'Foot or toe weakness', 'Foot or toe stiffness or tightness']\nreason: A bone fracture (sometimes abbreviated FRX or Fx, Fx, or #) is a medical condition in which there is a break in the continuity of the bone. A bone fracture can be the result of high force impact or stress, or trivial injury as a result of certain medical conditions that weaken the bones, such as osteoporosis, bone cancer, or osteogenesis imperfecta, where the fracture is then properly termed a pathologic fracture.\nTestsAndProcedures: ['Radiographic imaging procedure', 'Plain x-ray (X ray)', 'Application of splint (Splinting)', 'Examination of foot', 'Wound care management', 'Orthopedic casting', 'Physical therapy exercises (Exercises)']\ncommonMedications: ['Hyaluronidase', 'Deferoxamine', 'Diflorasone Topical']"

## Loading the LLM

In [11]:
!huggingface-cli login --token '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'

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [18]:
%pip install -q datasets bitsandbytes einops wandb
%pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m493.7/493.7 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m87.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m243.9/243.9 kB[0m [31m28.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━

In [24]:
!pip -qqq install bitsandbytes accelerate

In [12]:
import json
import os
from pprint import pprint

import bitsandbytes as bnb
import pandas as pd
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login

from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training,
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)



In [13]:
PEFT_MODEL = "Hrithik2212/Dr.Llama2-7b-qlora-chat-experimental"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4", #
    bnb_4bit_compute_dtype=torch.bfloat16,
)

config = PeftConfig.from_pretrained(PEFT_MODEL)
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

model = PeftModel.from_pretrained(model, PEFT_MODEL)

Downloading (…)model.bin.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/14 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00014.bin:   0%|          | 0.00/981M [00:00<?, ?B/s]

Downloading (…)l-00002-of-00014.bin:   0%|          | 0.00/967M [00:00<?, ?B/s]

Downloading (…)l-00003-of-00014.bin:   0%|          | 0.00/967M [00:00<?, ?B/s]

Downloading (…)l-00004-of-00014.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

Downloading (…)l-00005-of-00014.bin:   0%|          | 0.00/944M [00:00<?, ?B/s]

Downloading (…)l-00006-of-00014.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

Downloading (…)l-00007-of-00014.bin:   0%|          | 0.00/967M [00:00<?, ?B/s]

Downloading (…)l-00008-of-00014.bin:   0%|          | 0.00/967M [00:00<?, ?B/s]

Downloading (…)l-00009-of-00014.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

Downloading (…)l-00010-of-00014.bin:   0%|          | 0.00/944M [00:00<?, ?B/s]

Downloading (…)l-00011-of-00014.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

Downloading (…)l-00012-of-00014.bin:   0%|          | 0.00/967M [00:00<?, ?B/s]

Downloading (…)l-00013-of-00014.bin:   0%|          | 0.00/967M [00:00<?, ?B/s]

Downloading (…)l-00014-of-00014.bin:   0%|          | 0.00/847M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/14 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/676 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

Downloading (…)er_model.safetensors:   0%|          | 0.00/134M [00:00<?, ?B/s]

In [14]:
import warnings
warnings.filterwarnings('ignore')

In [68]:
generation_config = model.generation_config
generation_config.max_new_tokens = 4000
generation_config.temperature = 0.3
generation_config.top_p = 0.5
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id
generation_config.repetition_penalty = 1.5
generation_config

GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": 2,
  "max_new_tokens": 4000,
  "pad_token_id": 2,
  "repetition_penalty": 1.5,
  "temperature": 0.3,
  "top_p": 0.5
}

In [69]:
from transformers import pipeline
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,repetition_penalty =1.5)


The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PersimmonForCausalLM', 'PLBartFo

## Retrieval and Generation

In [92]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=pipe)

In [102]:
custom_prompt_template = """You are an AI doctor
Use the following pieces of information to simulate a doctor pateint conversation with the user's question.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct.
If you don't know the answer, just say that you don't know , don't try to make up an answer.
Finally also guide them to the correct clinical expert department for their problem

Context: {context}
Question: {question}

Guide the user to gain further information on his condition in relation to his Question
Helpful answer:
"""

def set_custom_prompt():
    """
    Prompt template for QA retrieval for each vectorstore
    """
    prompt = PromptTemplate(template=custom_prompt_template,
                            input_variables=['context', 'question'])
    return prompt


qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever= vectorstore.as_retriever(search_kwargs={'k': 1}),
    chain_type_kwargs={'prompt': set_custom_prompt()}
)


In [115]:
qa.run("I am feeling nauseous and I am unable to sleep,can you help me " )

'You may be suffering from primary Insoimia. Please visit your nearest hospital so they can diagnose it properly.. If this doesn’t work please contact us again we will do our best! :)'

In [116]:
qa.run("Suggest me some remedies for headache , it happens once in a  while")

'You should consult your physician about this issue as soon possible because there might be more serious problems behind these symptons like migraine which needs special treatment. If they persist longer than usual then please visit urgent care center near u so we could help better!!!!!!'

In [117]:
prompt = "Hi doctor,I am getting one-sided headache. I do not get them often. They are behind my right eye and temple area. Now this time I am also fighting with heavy cold and cough. It came on late morning today and it hurts more when I cough or shake my head or bend over. Prior to this, I had the same headache about four months ago. At that time it came on midafternoon and lasted till next morning. Prior to that, I had the same headache exactly on the right side and lasted for a day. I am on CCB and Protonix. I am scared of brain tumor. Please help"
result = qa.run(prompt)

In [118]:
print("Prompt : ")
pprint(prompt)
print()
print("Response : ")
pprint(result)

Prompt : 
('Hi doctor,I am getting one-sided headache. I do not get them often. They are '
 'behind my right eye and temple area. Now this time I am also fighting with '
 'heavy cold and cough. It came on late morning today and it hurts more when I '
 'cough or shake my head or bend over. Prior to this, I had the same headache '
 'about four months ago. At that time it came on midafternoon and lasted till '
 'next morning. Prior to that, I had the same headache exactly on the right '
 'side and lasted for a day. I am on CCB and Protonix. I am scared of brain '
 'tumor. Please help')

Query : 
('You have symptons which could be related either as migraine attack(which '
 'usually occurs at night), cluster attacks/headachessyndrome where there may '
 'occur multiple episodes per year but each episode will only affect partof '
 'your scalpee region like yours. Or its possible they mightbe due some kind '
 'of trauma /injury causing nerve damage leadingto these types off symptonns.. '
 'But