# Fine-Tuning Llama Chat 7B model on medical domain knowledge

In [1]:
%%capture
%pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git
%pip install -q datasets bitsandbytes einops wandb
%pip install --upgrade -q huggingface_hub

In [2]:
!huggingface-cli login --token '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'

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [4]:
## imports
import json
import os
from pprint import pprint

import bitsandbytes as bnb
import pandas as pd
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login

from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training,
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)


In [61]:
print("torch.cuda.memory_allocated: %fGB"%(torch.cuda.memory_allocated(0)/1024/1024/1024))
print("torch.cuda.memory_reserved: %fGB"%(torch.cuda.memory_reserved(0)/1024/1024/1024))
print("torch.cuda.max_memory_reserved: %fGB"%(torch.cuda.max_memory_reserved(0)/1024/1024/1024))

torch.cuda.memory_allocated: 4.573752GB
torch.cuda.memory_reserved: 5.058594GB
torch.cuda.max_memory_reserved: 8.244141GB


## 1) Downloading dataset

In [None]:
import gdown
url = 'https://drive.google.com/file/d/1lyfqIwlLSClhgrCutWuEe_IACNq6XNUt/view'
file_id = url.split('/')[-2]
download_url = 'https://drive.google.com/uc?id=' + file_id
gdown.download(download_url, output=None, quiet=False)


Downloading...
From: https://drive.google.com/uc?id=1lyfqIwlLSClhgrCutWuEe_IACNq6XNUt
To: /content/HealthCareMagic-100k.json
100%|██████████| 144M/144M [00:02<00:00, 69.5MB/s]


'HealthCareMagic-100k.json'

In [None]:
!mv /content/HealthCareMagic-100k.json /content/drive/MyDrive/Med-Bot/TrainData

## Loading the dataset

In [8]:
medical_dataset = "Kabatubare/medical"
dataset = load_dataset(medical_dataset, split="train")

Downloading readme:   0%|          | 0.00/1.09k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/21.1M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [9]:
from pprint import pprint as pp
print(f"Number of rows in the datset : {len(dataset)}")
pp(dataset[0])

Number of rows in the datset : 23437
{'Answer': 'adhd and bipolar mood disorder (bmd) can coexist or be mistaken '
           'for one another. bmd usually is not diagnosed until young '
           'adulthood. however studies have shown that the earlier a person is '
           'diagnosed with bmd the more likely he is to have been diagnosed '
           'with adhd previously. in this case i would just like to reiterate '
           'that there is not enough information to discuss either possibility '
           'for your son. you mentioned that he becomes hyperactive for 3 '
           'weeks but not what his behaviors are like during those 10 days. '
           'you also do not mention irritability or mood swings just adhd '
           'symptoms. keep documenting the symptoms you are concerned about '
           'including what goes on in the home and at school when you see '
           'changes in behavior (do you work those weeks does he visit a '
           'relative or have a dif

In [12]:
# Random sample
import random
random_index = random.randint(0, len(dataset)-1)
random_example = dataset[random_index]
pp(random_example, sort_dicts=False)


{'Context': 'You are a medical knowledge assistant trained to provide '
            'information and guidance on various health-related topics.',
 'Question': 'i stop taking the shots on sept 30',
 'Answer': 'most helpful answer hi candy so how long was you on them? if your '
           'trying for pregnancy this would answer your question. if it was '
           'depo shot you can google how long for pregnancy after depo shot '
           'your find a few forums all from other women saying how long its '
           'taken sometimes short and sometimes very long the odd few never '
           'all because of depo shot. good luck'}


## 3) Traning Configuration
#### Model Configuration

In [13]:
# Load Llama-7b bf16 shared Model & Tokenizer

MODEL_NAME = "TinyPixel/Llama-2-7B-bf16-sharded"


bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4", #
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    trust_remote_code=True,
    quantization_config=bnb_config,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

(…)7B-bf16-sharded/resolve/main/config.json:   0%|          | 0.00/626 [00:00<?, ?B/s]

(…)esolve/main/pytorch_model.bin.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/14 [00:00<?, ?it/s]

pytorch_model-00001-of-00014.bin:   0%|          | 0.00/981M [00:00<?, ?B/s]

pytorch_model-00002-of-00014.bin:   0%|          | 0.00/967M [00:00<?, ?B/s]

pytorch_model-00003-of-00014.bin:   0%|          | 0.00/967M [00:00<?, ?B/s]

pytorch_model-00004-of-00014.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

pytorch_model-00005-of-00014.bin:   0%|          | 0.00/944M [00:00<?, ?B/s]

pytorch_model-00006-of-00014.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

pytorch_model-00007-of-00014.bin:   0%|          | 0.00/967M [00:00<?, ?B/s]

pytorch_model-00008-of-00014.bin:   0%|          | 0.00/967M [00:00<?, ?B/s]

pytorch_model-00009-of-00014.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

pytorch_model-00010-of-00014.bin:   0%|          | 0.00/944M [00:00<?, ?B/s]

pytorch_model-00011-of-00014.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

pytorch_model-00012-of-00014.bin:   0%|          | 0.00/967M [00:00<?, ?B/s]

pytorch_model-00013-of-00014.bin:   0%|          | 0.00/967M [00:00<?, ?B/s]

pytorch_model-00014-of-00014.bin:   0%|          | 0.00/847M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/14 [00:00<?, ?it/s]

(…)rded/resolve/main/generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

(…)arded/resolve/main/tokenizer_config.json:   0%|          | 0.00/676 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

(…)bf16-sharded/resolve/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

(…)ded/resolve/main/special_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

In [14]:

def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )


In [15]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [16]:
# QLora Config
from peft import LoraConfig, get_peft_model

lora_alpha = 16
lora_dropout = 0.1
lora_r = 64

config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 33554432 || all params: 3533967360 || trainable%: 0.9494833591219133


In [17]:
# Inference Before Training

prompt = f"""
: Hi , I am having headache for the past two days after coming back from an amusement park trip , suggest me some remedies
:
""".strip()
print(prompt)


: Hi , I am having headache for the past two days after coming back from an amusement park trip , suggest me some remedies 
:


In [19]:

generation_config = model.generation_config
generation_config.max_new_tokens = 300
generation_config.temperature = 0.3
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id
generation_config

GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": 2,
  "max_new_tokens": 300,
  "pad_token_id": 2,
  "temperature": 0.3,
  "top_p": 0.7
}

In [20]:
%%time
# Specify the target device for model execution, typically a GPU.
device = "cuda:0"

# Tokenize the input prompt and move it to the specified device.
encoding = tokenizer(prompt, return_tensors="pt").to(device)

# Run model inference in evaluation mode (inference_mode) for efficiency.
with torch.inference_mode():
    outputs = model.generate(
        input_ids=encoding.input_ids,
        attention_mask=encoding.attention_mask,
        generation_config=generation_config,
    )


# Decode the generated output and print it, excluding special tokens.
print(tokenizer.decode(outputs[0], skip_special_tokens=True))



: Hi , I am having headache for the past two days after coming back from an amusement park trip , suggest me some remedies 
: : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : :
CPU times: user 29.8 s, sys: 914 ms, total: 30.7 s
Wall time: 35 s


In [24]:
def generate_prompt(data_point):
    return f"""
: {data_point["Question"]}
: {data_point["Answer"]}
""".strip()

def generate_and_tokenize_prompt(data_point):
    full_prompt = generate_prompt(data_point)
    tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)
    return tokenized_full_prompt

In [25]:
data = dataset.shuffle().map(generate_and_tokenize_prompt)

Map:   0%|          | 0/23437 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [26]:
data

Dataset({
    features: ['Context', 'Question', 'Answer', 'input_ids', 'attention_mask'],
    num_rows: 23437
})

## 4) Training

In [27]:
OUTPUT_DIR = "/content/drive/MyDrive/Medical-Assistant-Project/experiments"

In [29]:
training_args = transformers.TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    learning_rate=2e-4,
    fp16=True,
    save_total_limit=3,
    logging_steps=1,
    output_dir=OUTPUT_DIR,
    max_steps=80,
    optim="paged_adamw_8bit",
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
    report_to="tensorboard",
)

trainer = transformers.Trainer(
    model=model,
    train_dataset=data,
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
1,2.3719
2,2.4328
3,2.7103
4,2.1478
5,2.0532
6,2.9539
7,2.2341
8,2.4387
9,2.0942
10,2.3405


TrainOutput(global_step=80, training_loss=2.1052857413887978, metrics={'train_runtime': 558.9016, 'train_samples_per_second': 0.573, 'train_steps_per_second': 0.143, 'total_flos': 2424233268830208.0, 'train_loss': 2.1052857413887978, 'epoch': 0.01})

## Inference

In [46]:
import warnings
warnings.filterwarnings('ignore')

In [49]:
from transformers import pipeline
prompt = "preferred food for sugar patients"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200 ,repetition_penalty =1.5)
result = pipe(f"<s>[INST] {prompt} [/INST]")


The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PersimmonForCausalLM', 'PLBartFo

In [50]:
pprint(result[0]['generated_text'])

('<s>[INST] preferred food for sugar patients [/INST]\n'
 'sugar patient should eat more of the following: fruits, vegetables and whole '
 'grains. they are high in fiber which helps to slow down digestion process so '
 'that blood glucose level will not rise too quickly after eating a meal or '
 'snack containing these types if carbohydrates (starches). this is important '
 "because when you have diabetes your body does'nt produce enough insulin "
 'hormone needed by cells throughout our bodies including muscles liver brain '
 'kidneys etc..to convert sugars into energy thus causing elevated levels '
 'within circulation leading towards complications such as blindness nerve '
 'damage heart attack stroke depression anxiety obstructive sleep apnea '
 'hypertension retinal detachment cataract glaucoma neuropathy amputations '
 'impotence erectile dysfunction loss weight gain fatigue weakened immune '
 'system increased risk cancer premature death among other things! it also '
 'means ha

In [57]:
prompt = "Hi , I am having headache for the past two days after coming back from an amusement park trip , suggest me some remedies"
result = pipe(f"<s>[INST] {prompt} [/INST]")
pprint(result[0]['generated_text'])


Both `max_new_tokens` (=300) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


('<s>[INST] Hi , I am having headache for the past two days after coming back '
 'from an amusement park trip , suggest me some remedies [/INST]\n'
 'Hi, you should take rest and avoid stress. You can also apply ice packs on '
 'your forehead to reduce pain. Take paracetamol or ibuprofen as per '
 'instructions of doctor if required otherwise do not use it without '
 'consulting a physician first because these medicines are harmful in high '
 'doses so please be careful while taking them especially when pregnant women '
 'have been advised against using any kind of medicine during their gestation '
 'period unless prescribed by her gynaecologist only! If symptoms persist then '
 'seek medical attention immediately before things get worse than expected due '
 'lack proper treatment at right time which could lead serious complications '
 'later down line such heart attack stroke etc.. Good luck hope everything '
 'goes well soon enough :)')


In [54]:
prompt = "Hi doctor,I am getting one-sided headache. I do not get them often. They are behind my right eye and temple area. Now this time I am also fighting with heavy cold and cough. It came on late morning today and it hurts more when I cough or shake my head or bend over. Prior to this, I had the same headache about four months ago. At that time it came on midafternoon and lasted till next morning. Prior to that, I had the same headache exactly on the right side and lasted for a day. I am on CCB and Protonix. I am scared of brain tumor. Please help"
result = pipe(f"<s>[INST] {prompt} [/INST]")
pprint(result[0]['generated_text'])
## The model hallucinates and gives some random information

Both `max_new_tokens` (=300) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


('<s>[INST] Hi doctor,I am getting one-sided headache. I do not get them '
 'often. They are behind my right eye and temple area. Now this time I am also '
 'fighting with heavy cold and cough. It came on late morning today and it '
 'hurts more when I cough or shake my head or bend over. Prior to this, I had '
 'the same headache about four months ago. At that time it came on '
 'midafternoon and lasted till next morning. Prior to that, I had the same '
 'headache exactly on the right side and lasted for a day. I am on CCB and '
 'Protonix. I am scared of brain tumor. Please help [/INST]\n'
 'Hi Dr., thank you so much! You have been very helpful in answering all our '
 'questions regarding medications etc.. My husband has stage 4 lung cancer '
 '(adenocarcinoma) he is currently taking Tarceva which we were told was '
 'working well but now his blood counts keep dropping low enough where they '
 "want him off treatment until numbers come back up again. He'll be starting "
 'chemo soon 

## Save the model

In [56]:
model.push_to_hub(
    "Hrithik2212/Dr.Llama2-7b-qlora-chat-experimental", use_auth_token=True)

adapter_model.safetensors:   0%|          | 0.00/134M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Hrithik2212/Dr.Llama2-7b-qlora-chat-experimental/commit/23e94aaea5d4c7fcf88345d902c72f747332f68f', commit_message='Upload model', commit_description='', oid='23e94aaea5d4c7fcf88345d902c72f747332f68f', pr_url=None, pr_revision=None, pr_num=None)

In [59]:
del model
del trainer
torch.cuda.empty_cache()

## Code to load and run the model

In [67]:
!mv /content/drive/MyDrive/Medical-Assistant-Project1/runs /content/drive/MyDrive/Medical-Assistant-Project/


In [68]:
PEFT_MODEL = "Hrithik2212/Dr.Llama2-7b-qlora-chat-experimental"

config = PeftConfig.from_pretrained(PEFT_MODEL)
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

model = PeftModel.from_pretrained(model, PEFT_MODEL)

(…)imental/resolve/main/adapter_config.json:   0%|          | 0.00/500 [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/14 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/134M [00:00<?, ?B/s]

In [69]:
generation_config = model.generation_config
generation_config.max_new_tokens = 200
generation_config.temperature = 0.3
generation_config.top_p = 0.5
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id
generation_config

GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": 2,
  "max_new_tokens": 200,
  "pad_token_id": 2,
  "temperature": 0.3,
  "top_p": 0.5
}

In [70]:
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200 ,repetition_penalty = 1.5)


The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PersimmonForCausalLM', 'PLBartFo

In [71]:
prompt = "I've been experiencing persistent headaches for the past week. The pain seems to be concentrated on the left side of my head. It's not unbearable, but it's been quite consistent. I've tried over-the-counter pain relievers, but they provide only temporary relief. Additionally, I've noticed some sensitivity to light. Could this be a sign of something serious, or is it likely a minor issue? What steps should I take to address this?"
result = pipe(f"<s>[INST] {prompt} [/INST]")



Both `max_new_tokens` (=200) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


In [72]:
pprint(result)

[{'generated_text': "<s>[INST] I've been experiencing persistent headaches for "
                    'the past week. The pain seems to be concentrated on the '
                    "left side of my head. It's not unbearable, but it's been "
                    "quite consistent. I've tried over-the-counter pain "
                    'relievers, but they provide only temporary relief. '
                    "Additionally, I've noticed some sensitivity to light. "
                    'Could this be a sign of something serious, or is it '
                    'likely a minor issue? What steps should I take to address '
                    'this? [/INST]\n'
                    'I have had similar symptoms and was diagnosed with '
                    'migraines about 10 years ago (age:35). My doctor '
                    'prescribed me Imitrex which has helped tremendously in '
                    'reducing frequency as well as severity when i do get them '
                    'nowadays! If yo

In [73]:
prompt = "I've been having difficulty falling asleep at night. It takes me a long time to drift off, and even when I do, I often wake up multiple times during the night. As a result, I find myself feeling tired and lacking energy during the day. I've tried adjusting my sleep routine and creating a calming bedtime environment, but the issue persists. Are there any suggestions or recommendations you have to help improve my sleep quality?"
result = pipe(f"<s>[INST] {prompt} [/INST]")



Both `max_new_tokens` (=200) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


In [74]:
pprint(result)

[{'generated_text': "<s>[INST] I've been having difficulty falling asleep at "
                    'night. It takes me a long time to drift off, and even '
                    'when I do, I often wake up multiple times during the '
                    'night. As a result, I find myself feeling tired and '
                    "lacking energy during the day. I've tried adjusting my "
                    'sleep routine and creating a calming bedtime environment, '
                    'but the issue persists. Are there any suggestions or '
                    'recommendations you have to help improve my sleep '
                    'quality? [/INST]\n'
                    'Sometimes it can be difficult for people with insomnia '
                    '(difficulty getting enough restful shut-eye) because they '
                    'are not able get into deep stages of their natural '
                    'circadian rhythm cycle which is what allows us all humans '
                    'regardles

---