Update README.md
Browse files
README.md
CHANGED
|
@@ -111,7 +111,7 @@ Developers can easily integrate Llama-3.1-Storm-8B into their projects using pop
|
|
| 111 |
```python
|
| 112 |
import transformers
|
| 113 |
import torch
|
| 114 |
-
model_id = "
|
| 115 |
pipeline = transformers.pipeline(
|
| 116 |
"text-generation",
|
| 117 |
model=model_id,
|
|
@@ -138,7 +138,7 @@ from transformers import AutoTokenizer, LlamaForCausalLM
|
|
| 138 |
def format_prompt(user_query):
|
| 139 |
template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"""
|
| 140 |
return template.format(user_query)
|
| 141 |
-
model_id = '
|
| 142 |
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
| 143 |
model = LlamaForCausalLM.from_pretrained(
|
| 144 |
model_id,
|
|
@@ -160,7 +160,7 @@ print(response) # Expected Output: '2 + 2 = 4'
|
|
| 160 |
```python
|
| 161 |
from vllm import LLM, SamplingParams
|
| 162 |
from transformers import AutoTokenizer
|
| 163 |
-
model_id = "akjindal53244/Llama-3.1-Storm-8B" # FP8 model: "
|
| 164 |
num_gpus = 1
|
| 165 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 166 |
llm = LLM(model=model_id, tensor_parallel_size=num_gpus)
|
|
@@ -176,12 +176,12 @@ print(llm.generate([prompt], sampling_params)[0].outputs[0].text.strip()) # Exp
|
|
| 176 |
#### Use with [LitGPT](https://github.com/Lightning-AI/litgpt)
|
| 177 |
```bash
|
| 178 |
pip install 'litgpt[all]'
|
| 179 |
-
litgpt download
|
| 180 |
```
|
| 181 |
|
| 182 |
```python
|
| 183 |
from litgpt import LLM
|
| 184 |
-
llm = LLM.load(model="
|
| 185 |
llm.generate("What do Llamas eat?")
|
| 186 |
```
|
| 187 |
|
|
@@ -190,7 +190,7 @@ llm.generate("What do Llamas eat?")
|
|
| 190 |
[**Llama-3.1-Storm-8B**](https://huggingface.co/collections/akjindal53244/storm-66ba6c96b7e24ecb592787a9) has impressive function calling capabilities compared to Meta-Llama-3.1-8B-Instruct as demonstrated by the BFCL benchmark.
|
| 191 |
|
| 192 |
#### Prompt Format for Function Calling
|
| 193 |
-
Llama-3.1-
|
| 194 |
```
|
| 195 |
You are a function calling AI model. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into function. The user may use the terms function calling or tool use interchangeably.
|
| 196 |
Here are the available functions:
|
|
@@ -206,7 +206,7 @@ Above system prompt should be used with passing `LIST_OF_TOOLS` as input.
|
|
| 206 |
import json
|
| 207 |
from vllm import LLM, SamplingParams
|
| 208 |
from transformers import AutoTokenizer
|
| 209 |
-
model_id = "
|
| 210 |
num_gpus = 1
|
| 211 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 212 |
llm = LLM(model=model_id, tensor_parallel_size=num_gpus)
|
|
|
|
| 111 |
```python
|
| 112 |
import transformers
|
| 113 |
import torch
|
| 114 |
+
model_id = "EpistemeAI2/FireStorm-Llama-3.1-8B"
|
| 115 |
pipeline = transformers.pipeline(
|
| 116 |
"text-generation",
|
| 117 |
model=model_id,
|
|
|
|
| 138 |
def format_prompt(user_query):
|
| 139 |
template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"""
|
| 140 |
return template.format(user_query)
|
| 141 |
+
model_id = 'EpistemeAI2/FireStorm-Llama-3.1-8B'
|
| 142 |
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
| 143 |
model = LlamaForCausalLM.from_pretrained(
|
| 144 |
model_id,
|
|
|
|
| 160 |
```python
|
| 161 |
from vllm import LLM, SamplingParams
|
| 162 |
from transformers import AutoTokenizer
|
| 163 |
+
model_id = "akjindal53244/Llama-3.1-Storm-8B" # FP8 model: "EpistemeAI2/FireStorm-Llama-3.1-8B"
|
| 164 |
num_gpus = 1
|
| 165 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 166 |
llm = LLM(model=model_id, tensor_parallel_size=num_gpus)
|
|
|
|
| 176 |
#### Use with [LitGPT](https://github.com/Lightning-AI/litgpt)
|
| 177 |
```bash
|
| 178 |
pip install 'litgpt[all]'
|
| 179 |
+
litgpt download EpistemeAI2/FireStorm-Llama-3.1-8B --model_name meta-llama/Meta-Llama-3.1-8B
|
| 180 |
```
|
| 181 |
|
| 182 |
```python
|
| 183 |
from litgpt import LLM
|
| 184 |
+
llm = LLM.load(model="EpistemeAI2/FireStorm-Llama-3.1-8B")
|
| 185 |
llm.generate("What do Llamas eat?")
|
| 186 |
```
|
| 187 |
|
|
|
|
| 190 |
[**Llama-3.1-Storm-8B**](https://huggingface.co/collections/akjindal53244/storm-66ba6c96b7e24ecb592787a9) has impressive function calling capabilities compared to Meta-Llama-3.1-8B-Instruct as demonstrated by the BFCL benchmark.
|
| 191 |
|
| 192 |
#### Prompt Format for Function Calling
|
| 193 |
+
FireStorm-Llama-3.1-8B is trained with specific system prompt for Function Calling:
|
| 194 |
```
|
| 195 |
You are a function calling AI model. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into function. The user may use the terms function calling or tool use interchangeably.
|
| 196 |
Here are the available functions:
|
|
|
|
| 206 |
import json
|
| 207 |
from vllm import LLM, SamplingParams
|
| 208 |
from transformers import AutoTokenizer
|
| 209 |
+
model_id = "EpistemeAI2/FireStorm-Llama-3.1-8B" # FP8 model: "akjindal53244/Llama-3.1-Storm-8B-FP8-Dynamic"
|
| 210 |
num_gpus = 1
|
| 211 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 212 |
llm = LLM(model=model_id, tensor_parallel_size=num_gpus)
|