ruslanmv commited on
Commit
ebaa21d
1 Parent(s): 15584a6

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +52 -13
README.md CHANGED
@@ -49,29 +49,68 @@ pip install transformers accelerate bitsandbytes
49
  Here’s an example of how to load this fine-tuned model using Hugging Face's `transformers` library:
50
 
51
  ```python
52
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
53
  import torch
54
 
55
- # Load the model and tokenizer
56
- model_name = "Meta-Llama-3.1-8B-Text-to-SQL-4bit"
 
 
 
 
 
57
 
58
  # Ensure you have the right device setup
59
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
60
 
61
- # Load the model and tokenizer from the Hugging Face Hub
62
- model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
 
 
 
 
63
  tokenizer = AutoTokenizer.from_pretrained(model_name)
64
 
65
- # Example usage
66
- input_text = "Recupera il conteggio di tutte le righe nella tabella table1"
67
- inputs = tokenizer(input_text, return_tensors="pt").to(device)
68
 
69
- # Generate output text
70
- outputs = model.generate(**inputs, max_length=50)
71
 
72
- # Decode and print the generated text
73
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
74
- print(generated_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  ```
76
 
77
  ### Model Features
 
49
  Here’s an example of how to load this fine-tuned model using Hugging Face's `transformers` library:
50
 
51
  ```python
52
+ #!pip install bitsandbytes
53
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
54
  import torch
55
 
56
+ # Define the quantization config
57
+ bnb_config = BitsAndBytesConfig(
58
+ load_in_4bit=True,
59
+ bnb_4bit_use_double_quant=True,
60
+ bnb_4bit_quant_type="nf4",
61
+ bnb_4bit_compute_dtype=torch.float16,
62
+ )
63
 
64
  # Ensure you have the right device setup
65
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
66
 
67
+ # Load the model and tokenizer from the Hugging Face Hub with BitsAndBytesConfig
68
+ model_name = "ruslanmv/Meta-Llama-3.1-8B-Text-to-SQL-4bit"
69
+ model = AutoModelForCausalLM.from_pretrained(
70
+ model_name,
71
+ device_map="auto",
72
+ quantization_config=bnb_config)
73
  tokenizer = AutoTokenizer.from_pretrained(model_name)
74
 
75
+ # Define EOS token for terminating the sequences
76
+ EOS_TOKEN = tokenizer.eos_token
 
77
 
78
+ # Define Alpaca-style prompt template
79
+ alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
80
 
81
+ ### Instruction:
82
+ {}
83
+
84
+ ### Input:
85
+ {}
86
+
87
+ ### Response:
88
+ """
89
+
90
+ # Format the prompt without the response part
91
+ prompt = alpaca_prompt.format(
92
+ "Provide the SQL query",
93
+ "Seleziona tutte le colonne della tabella table1 dove la colonna anni è uguale a 2020"
94
+ )
95
+
96
+ # Tokenize the prompt and generate text
97
+ inputs = tokenizer([prompt], return_tensors="pt").to(device)
98
+ outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
99
+
100
+ # Decode the generated text
101
+ generated_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
102
+
103
+ # Extract the generated response only (remove the prompt part)
104
+ response_start = generated_text.find("### Response:") + len("### Response:\n")
105
+ response = generated_text[response_start:].strip()
106
+
107
+ # Print the response (excluding the prompt)
108
+ print(response)
109
+
110
+ ```
111
+ and the ansewer is
112
+ ```
113
+ SELECT * FROM table1 WHERE anni = 2020
114
  ```
115
 
116
  ### Model Features