Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
language:
|
4 |
+
- en
|
5 |
+
---
|
6 |
+
|
7 |
+
# Gorilla-openfunctions-v0 - Sharded
|
8 |
+
|
9 |
+
<img src="https://gorilla.cs.berkeley.edu/assets/img/blog_post_4_gorilla_open_function_calling.png" alt="Gorilla Open Functions" width="800" style="margin-left:'auto' margin-right:'auto' display:'block'"/>
|
10 |
+
|
11 |
+
|
12 |
+
🧩🧩🧩 Just a **sharded version of [gorilla-openfunctions-v0](https://huggingface.co/gorilla-llm/gorilla-openfunctions-v0)**.
|
13 |
+
|
14 |
+
💻 Using this version, you can smoothly load the model on Colab and play with it!
|
15 |
+
|
16 |
+
From the [original model card](https://huggingface.co/gorilla-llm/gorilla-openfunctions-v0):
|
17 |
+
> Gorilla OpenFunctions extends Large Language Model(LLM) Chat Completion feature to formulate executable APIs call given natural language instructions and API context.
|
18 |
+
|
19 |
+
## Usage
|
20 |
+
This version of the model is meant primarily to run smoothly on **Colab**.
|
21 |
+
I suggest loading the model with **8-bit quantization**, so that you have some free GPU to perform inference.
|
22 |
+
|
23 |
+
*However, it is perfectly fine to load the model in half-precision or with stronger quantization (4-bit).*
|
24 |
+
|
25 |
+
```python
|
26 |
+
! pip install transformers accelerate bitsandbytes
|
27 |
+
|
28 |
+
import json
|
29 |
+
import torch
|
30 |
+
from transformers import pipeline
|
31 |
+
|
32 |
+
def get_prompt(user_query: str, functions: list = []) -> str:
|
33 |
+
"""
|
34 |
+
Generates a conversation prompt based on the user's query and a list of functions.
|
35 |
+
|
36 |
+
Parameters:
|
37 |
+
- user_query (str): The user's query.
|
38 |
+
- functions (list): A list of functions to include in the prompt.
|
39 |
+
|
40 |
+
Returns:
|
41 |
+
- str: The formatted conversation prompt.
|
42 |
+
"""
|
43 |
+
if len(functions) == 0:
|
44 |
+
return f"USER: <<question>> {user_query}\nASSISTANT: "
|
45 |
+
functions_string = json.dumps(functions)
|
46 |
+
return f"USER: <<question>> {user_query} <<function>> {functions_string}\nASSISTANT: "
|
47 |
+
|
48 |
+
# Pipeline setup
|
49 |
+
pipe = pipeline(
|
50 |
+
"text-generation",
|
51 |
+
model="anakin87/gorilla-openfunctions-v0-sharded",
|
52 |
+
device_map="auto",
|
53 |
+
model_kwargs={"load_in_8bit":True, "torch_dtype":torch.float16},
|
54 |
+
max_new_tokens=128,
|
55 |
+
batch_size=16
|
56 |
+
)
|
57 |
+
|
58 |
+
# Example usage
|
59 |
+
query: str = "Call me an Uber ride type \"Plus\" in Berkeley at zipcode 94704 in 10 minutes"
|
60 |
+
functions = [
|
61 |
+
{
|
62 |
+
"name": "Uber Carpool",
|
63 |
+
"api_name": "uber.ride",
|
64 |
+
"description": "Find suitable ride for customers given the location, type of ride, and the amount of time the customer is willing to wait as parameters",
|
65 |
+
"parameters": [
|
66 |
+
{"name": "loc", "description": "Location of the starting place of the Uber ride"},
|
67 |
+
{"name": "type", "enum": ["plus", "comfort", "black"], "description": "Types of Uber ride user is ordering"},
|
68 |
+
{"name": "time", "description": "The amount of time in minutes the customer is willing to wait"}
|
69 |
+
]
|
70 |
+
}
|
71 |
+
]
|
72 |
+
|
73 |
+
# Generate prompt and obtain model output
|
74 |
+
prompt = get_prompt(query, functions=functions)
|
75 |
+
output = pipe(prompt)
|
76 |
+
|
77 |
+
print(output[0]['generated_text'].rpartition("ASSISTANT:")[-1].strip())
|
78 |
+
# uber.ride(loc="berkeley", type="plus", time=10)
|
79 |
+
```
|