ruslanmv
/

Meta-Llama-3.1-8B-Text-to-SQL-4bit

@@ -49,29 +49,68 @@ pip install transformers accelerate bitsandbytes
 Here’s an example of how to load this fine-tuned model using Hugging Face's `transformers` library:
 ```python
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-# Load the model and tokenizer
-model_name = "Meta-Llama-3.1-8B-Text-to-SQL-4bit"
 # Ensure you have the right device setup
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Load the model and tokenizer from the Hugging Face Hub
-model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-# Example usage
-input_text = "Recupera il conteggio di tutte le righe nella tabella table1"
-inputs = tokenizer(input_text, return_tensors="pt").to(device)
-# Generate output text
-outputs = model.generate(**inputs, max_length=50)
-# Decode and print the generated text
-generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-print(generated_text)
 ```
 ### Model Features

 Here’s an example of how to load this fine-tuned model using Hugging Face's `transformers` library:
 ```python
+#!pip install bitsandbytes
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import torch
+# Define the quantization config
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+)
 # Ensure you have the right device setup
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load the model and tokenizer from the Hugging Face Hub with BitsAndBytesConfig
+model_name = "ruslanmv/Meta-Llama-3.1-8B-Text-to-SQL-4bit"
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    device_map="auto",
+    quantization_config=bnb_config)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Define EOS token for terminating the sequences
+EOS_TOKEN = tokenizer.eos_token
+# Define Alpaca-style prompt template
+alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+### Instruction:
+{}
+### Input:
+{}
+### Response:
+"""
+# Format the prompt without the response part
+prompt = alpaca_prompt.format(
+    "Provide the SQL query",
+    "Seleziona tutte le colonne della tabella table1 dove la colonna anni è uguale a 2020"
+)
+# Tokenize the prompt and generate text
+inputs = tokenizer([prompt], return_tensors="pt").to(device)
+outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
+# Decode the generated text
+generated_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+# Extract the generated response only (remove the prompt part)
+response_start = generated_text.find("### Response:") + len("### Response:\n")
+response = generated_text[response_start:].strip()
+# Print the response (excluding the prompt)
+print(response)
+```
+and the ansewer is
+```
+SELECT * FROM table1 WHERE anni = 2020
 ```
 ### Model Features