Upload folder using huggingface_hub

Browse files

Files changed (15) hide show

.gitattributes +1 -0
README.md +19 -0
added_tokens.json +24 -0
config.json +36 -0
merges.txt +0 -0
model-00001-of-00004.safetensors +3 -0
model-00002-of-00004.safetensors +3 -0
model-00003-of-00004.safetensors +3 -0
model-00004-of-00004.safetensors +3 -0
model.safetensors.index.json +0 -0
special_tokens_map.json +31 -0
tokenizer.json +3 -0
tokenizer_config.json +209 -0
vocab.json +0 -0
xlam_tool_call_parser.py +198 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,19 @@

+---
+license: cc-by-nc-4.0
+datasets:
+- Salesforce/xlam-function-calling-60k
+language:
+- en
+pipeline_tag: text-generation
+tags:
+- function-calling
+- LLM Agent
+- tool-use
+- llama
+- qwen
+- pytorch
+- LLaMA-factory
+- mlx
+library_name: mlx
+base_model: Salesforce/xLAM-2-32b-fc-r
+---

added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+    "architectures": [
+        "Qwen2ForCausalLM"
+    ],
+    "attention_dropout": 0.0,
+    "bos_token_id": 151643,
+    "eos_token_id": 151645,
+    "hidden_act": "silu",
+    "hidden_size": 5120,
+    "initializer_range": 0.02,
+    "intermediate_size": 27648,
+    "max_position_embeddings": 32768,
+    "max_window_layers": 70,
+    "model_type": "qwen2",
+    "num_attention_heads": 40,
+    "num_hidden_layers": 64,
+    "num_key_value_heads": 8,
+    "quantization": {
+        "group_size": 64,
+        "bits": 4
+    },
+    "quantization_config": {
+        "group_size": 64,
+        "bits": 4
+    },
+    "rms_norm_eps": 1e-06,
+    "rope_scaling": null,
+    "rope_theta": 1000000.0,
+    "sliding_window": null,
+    "tie_word_embeddings": false,
+    "torch_dtype": "bfloat16",
+    "transformers_version": "4.46.1",
+    "use_cache": false,
+    "use_sliding_window": false,
+    "vocab_size": 152064
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3d14ef29b484d1d6155870f301f07fdf81b26a3a06650b28d22aaf71916f25b
+size 5366583057

model-00002-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1420f801a13d8f7624768a485305ac8e0a85e4f51665b6174c474b4730b384b9
+size 5335713350

model-00003-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:765644815115ab7e174db79b7d6af6ddd14b0efb40741dc8c46b2ed64298c682
+size 5366642308

model-00004-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8396e5ec01844a656744516ea0ba7d15a2621d085c97bb101058041ce601a80
+size 2362540963

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
+size 11421896

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,209 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{# System message #}\n{{- \"<|im_start|>system\\n\" }}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content'] | trim %}\n    {%- set messages = messages[1:] %}\n    {{- system_message + \"\\n\" }}\n{%- else %}\n    {%- set system_message = \"You are a helpful assistant that can use tools. You are developed by Salesforce xLAM team.\" %}\n    {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: \n\n[{\"name\": \"tool_call_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}}, ... (additional parallel tool calls as needed)]\n\nIf no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}\n    {{- system_message + \"\\n\" }}\n    {%- if tools is not none %}\n        {{- format_instruction + \"\\n\\n\" }}\n    {%- endif %}\n{%- endif %}\n\n{%- if tools is not none %}\n    {%- for func in tools %}\n        {{- func | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- \"<|im_end|>\" }}\n{%- for message in messages %}\n    {%- if message['role'] == 'tool' %}\n        {{- \"<|im_start|>tool\\n\" }}\n        {%- if message.content is defined and message.content.content is defined %}\n            {%- set content = message.content.content %}\n        {%- else %}\n            {%- set content = message.content %}\n        {%- endif %}\n        {%- if content is mapping or content is iterable and content is not string %}\n            {{- content | tojson }}\n        {%- else %}\n            {{- content }}\n        {%- endif %}\n        {{- \"<|im_end|>\" }}\n    {%- elif 'tool_calls' in message %}\n        {{- \"<|im_start|>assistant\\n\" }}\n        {%- if message['tool_calls'] %}\n            {{- \"[\" }}\n            {%- for tool_call in message.tool_calls %}\n                {%- set out = tool_call.function | tojson %}\n                {{- out }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- endif %}\n            {%- endfor %}\n            {{- \"]\"}}\n        {%- elif message['content'] %}\n            {{- message['content'] | trim }}\n        {%- else %}\n            {{- \"[]\\n\" }}\n        {%- endif %}\n        {{- \"<|im_end|>\" }}\n    {%- else %}\n        {{- \"<|im_start|>\" + message['role'] + \"\\n\" + message['content'] | trim + \"<|im_end|>\" }}\n    {%- endif %}\n{%- endfor %}\n\n{%- if add_generation_prompt %}\n    {{- \"<|im_start|>assistant\\n\" }}\n{%- endif %}\n",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 16384,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

xlam_tool_call_parser.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import json
+import re
+from typing import Dict, List, Sequence, Union
+import partial_json_parser
+from partial_json_parser.core.options import Allow
+from vllm.entrypoints.openai.protocol import (
+    ChatCompletionRequest, DeltaMessage, DeltaToolCall,
+    DeltaFunctionCall, ExtractedToolCallInformation, ToolCall, FunctionCall
+)
+from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ToolParser, ToolParserManager
+from vllm.utils import random_uuid
+from vllm.logger import init_logger
+from transformers import PreTrainedTokenizerBase
+from vllm.entrypoints.openai.tool_parsers.utils import (find_common_prefix,
+                                                        is_complete_json,
+                                                        partial_json_loads)
+logger = init_logger(__name__)
+@ToolParserManager.register_module("xlam")
+class xLAMToolParser(ToolParser):
+    def __init__(self, tokenizer: PreTrainedTokenizerBase):
+        super().__init__(tokenizer)
+        # State for streaming mode
+        self.prev_tool_calls: List[Dict] = []
+        self.current_tools_sent: List[bool] = []
+        self.streamed_args: List[str] = []
+        # Remove regex since we're parsing direct JSON
+    def extract_tool_calls(
+        self,
+        model_output: str,
+        request: ChatCompletionRequest
+    ) -> ExtractedToolCallInformation:
+        try:
+            # Modified: Direct JSON parsing without looking for ```
+            if not model_output.strip().startswith('['):
+                return ExtractedToolCallInformation(
+                    tools_called=False,
+                    tool_calls=[],
+                    content=model_output
+                )
+            tool_calls_data = json.loads(model_output)
+            tool_calls: List[ToolCall] = []
+            for idx, call in enumerate(tool_calls_data):
+                tool_call = ToolCall(
+                    id=f"call_{idx}_{random_uuid()}",
+                    type="function",
+                    function=FunctionCall(
+                        name=call["name"],
+                        arguments=json.dumps(call["arguments"])
+                    )
+                )
+                tool_calls.append(tool_call)
+            return ExtractedToolCallInformation(
+                tools_called=True,
+                tool_calls=tool_calls,
+                content=None
+            )
+        except Exception:
+            logger.exception("Error extracting tool calls")
+            return ExtractedToolCallInformation(
+                tools_called=False,
+                tool_calls=[],
+                content=model_output
+            )
+    def extract_tool_calls_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
+    ) -> Union[DeltaMessage, None]:
+        if not current_text.strip().startswith('['):
+            return DeltaMessage(content=delta_text)
+        flags = Allow.ALL if self.current_tool_name_sent else Allow.ALL & ~Allow.STR
+        try:
+            tool_call_arr = []
+            is_complete = []
+            try:
+                # Parse the JSON array
+                start_idx = 0
+                while start_idx < len(current_text):
+                    obj, end_idx = partial_json_loads(current_text[start_idx:], flags)
+                    is_complete.append(
+                        is_complete_json(current_text[start_idx:start_idx + end_idx])
+                    )
+                    start_idx += end_idx
+                    tool_call_arr.append(obj)
+            except partial_json_parser.core.exceptions.MalformedJSON:
+                logger.debug('not enough tokens to parse into JSON yet')
+                return None
+            # Get current tool call based on state
+            current_tool_call: Dict = tool_call_arr[self.current_tool_id] \
+                if len(tool_call_arr) > 0 else {}
+            # Case 1: No tools parsed yet
+            if len(tool_call_arr) == 0:
+                return None
+            # Case 2: Starting a new tool in array
+            elif (len(tool_call_arr) > 0
+                and len(tool_call_arr) > self.current_tool_id + 1):
+                # Handle any remaining arguments from previous tool
+                if self.current_tool_id >= 0:
+                    cur_arguments = current_tool_call.get("arguments")
+                    if cur_arguments:
+                        cur_args_json = json.dumps(cur_arguments)
+                        sent = len(self.streamed_args[self.current_tool_id])
+                        argument_diff = cur_args_json[sent:]
+                        if argument_diff:
+                            delta = DeltaMessage(tool_calls=[
+                                DeltaToolCall(
+                                    index=self.current_tool_id,
+                                    function=DeltaFunctionCall(
+                                        arguments=argument_diff
+                                    ).model_dump(exclude_none=True)
+                                )
+                            ])
+                            self.streamed_args[self.current_tool_id] += argument_diff
+                            return delta
+                # Setup new tool
+                self.current_tool_id = len(tool_call_arr) - 1
+                self.current_tools_sent.append(False)
+                self.streamed_args.append("")
+                logger.debug("starting new tool %d", self.current_tool_id)
+                return None
+            # Case 3: Send tool name if not sent yet
+            elif not self.current_tools_sent[self.current_tool_id]:
+                function_name = current_tool_call.get("name")
+                if function_name:
+                    delta = DeltaMessage(tool_calls=[
+                        DeltaToolCall(
+                            index=self.current_tool_id,
+                            type="function",
+                            id=f"call_{self.current_tool_id}_{random_uuid()}",
+                            function=DeltaFunctionCall(
+                                name=function_name
+                            ).model_dump(exclude_none=True)
+                        )
+                    ])
+                    self.current_tools_sent[self.current_tool_id] = True
+                    return delta
+                return None
+            # Case 4: Stream arguments
+            else:
+                cur_arguments = current_tool_call.get("arguments")
+                if cur_arguments:
+                    sent = len(self.streamed_args[self.current_tool_id])
+                    cur_args_json = json.dumps(cur_arguments)
+                    prev_arguments = self.prev_tool_calls[self.current_tool_id].get("arguments")
+                    argument_diff = None
+                    if is_complete[self.current_tool_id]:
+                        argument_diff = cur_args_json[sent:]
+                    elif prev_arguments:
+                        prev_args_json = json.dumps(prev_arguments)
+                        if cur_args_json != prev_args_json:
+                            prefix = find_common_prefix(prev_args_json, cur_args_json)
+                            argument_diff = prefix[sent:]
+                    if argument_diff is not None:
+                        delta = DeltaMessage(tool_calls=[
+                            DeltaToolCall(
+                                index=self.current_tool_id,
+                                function=DeltaFunctionCall(
+                                    arguments=argument_diff
+                                ).model_dump(exclude_none=True)
+                            )
+                        ])
+                        self.streamed_args[self.current_tool_id] += argument_diff
+                        return delta
+            self.prev_tool_calls = tool_call_arr
+            return None
+        except Exception:
+            logger.exception("Error in streaming tool calls")
+            logger.debug("Skipping chunk due to streaming error")
+            return None