Tlgoa commited on
Commit
7e2dc3f
·
verified ·
1 Parent(s): 6d11767

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
LICENSE CHANGED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This model is a derivative of Google's Gemma-3-270m-it and is subject to the original Gemma Terms of Use. The dataset used for fine-tuning is licensed under Apache 2.0.
2
+
3
+ ---
4
+
5
+ **A. Gemma Terms of Use**
6
+
7
+ The use of this model is subject to the Gemma Terms of Use, which can be found here:
8
+ https://ai.google.dev/gemma/terms
9
+
10
+ **B. Apache 2.0 License (for the dataset)**
11
+
12
+ The dataset `Josephgflowers/Finance-Instruct-500k` used to fine-tune this model is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. You may obtain a copy of the License at:
13
+
14
+ http://www.apache.org/licenses/LICENSE-2.0
15
+
16
+ Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
README.md CHANGED
@@ -1,5 +1,72 @@
1
- ---
2
- license: other
3
- license_name: gemma-tou-apache-2.0
4
- license_link: LICENSE
5
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ base_model: google/gemma-3-270m-it
4
+ tags:
5
+ - mlx
6
+ - finance
7
+ - gemma
8
+ - instruction-tuning
9
+ datasets:
10
+ - Josephgflowers/Finance-Instruct-500k
11
+ ---
12
+
13
+ # Gemma-3-270M - Fine-tuned for Financial Instructions
14
+
15
+ This is a fine-tuned version of Google's `gemma-3-270m-it` model, adapted for financial instruction-following tasks.
16
+
17
+ ## Model Description
18
+
19
+ This model was fine-tuned using the Apple MLX framework. The goal was to specialize the base model for financial reporting summary and decision-making assistance. It was trained on the `Josephgflowers/Finance-Instruct-500k` dataset.
20
+
21
+ ## Intended Use
22
+
23
+ This model is intended for tasks related to the financial domain, such as:
24
+ * Answering questions about financial concepts.
25
+ * Summarizing financial reports.
26
+ * Following instructions based on financial data.
27
+
28
+ ## How to Use
29
+
30
+ You can use this model with the `transformers` library just like any other standard Hugging Face model.
31
+
32
+ ```python
33
+ from transformers import AutoTokenizer, AutoModelForCausalLM
34
+
35
+ model_name = "tlgoa/tmr-ai-nano" # <-- IMPORTANT: Replace with your HF repo name
36
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
37
+ model = AutoModelForCausalLM.from_pretrained(model_name)
38
+
39
+ # Note: Gemma 3 uses a specific chat template.
40
+ # For single-turn inference, you can format it like this:
41
+ prompt = "What is the difference between revenue and profit?"
42
+ formatted_prompt = f"### User:\n{prompt}\n\n### Assistant:"
43
+
44
+ inputs = tokenizer(formatted_prompt, return_tensors="pt")
45
+ outputs = model.generate(**inputs, max_new_tokens=200)
46
+
47
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
48
+ # Clean up the response to only show the assistant's part
49
+ assistant_response = response.split("### Assistant:")[1].strip()
50
+
51
+ print(assistant_response)
52
+ ```
53
+
54
+ ## Training Procedure
55
+
56
+ ### Dataset
57
+ The model was fine-tuned on the `Josephgflowers/Finance-Instruct-500k` dataset. The data was preprocessed to fit the following format:
58
+ ```
59
+ ### User:
60
+ {user_prompt}
61
+
62
+ ### Assistant:
63
+ {assistant_response}
64
+ ```
65
+
66
+ ### Fine-tuning
67
+ The model was fine-tuned directly (full parameter tuning) using an Adam optimizer. Due to challenges with LoRA implementation in the available MLX version, a full fine-tuning approach was chosen. The fine-tuned weights were originally saved in MLX's `.npz` format and subsequently converted back to Hugging Face `safetensors` format for distribution.
68
+
69
+ ## Licenses
70
+
71
+ - **Base Model:** This model is based on Google's Gemma-3-270M, which is subject to the [Gemma Terms of Use](https://ai.google.dev/gemma/terms).
72
+ - **Dataset:** The training data from `Josephgflowers/Finance-Instruct-500k` is available under the Apache 2.0 License.
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144
3
+ }
chat_template.jinja ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{ bos_token }}
2
+ {%- if messages[0]['role'] == 'system' -%}
3
+ {%- if messages[0]['content'] is string -%}
4
+ {%- set first_user_prefix = messages[0]['content'] + '
5
+
6
+ ' -%}
7
+ {%- else -%}
8
+ {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
9
+
10
+ ' -%}
11
+ {%- endif -%}
12
+ {%- set loop_messages = messages[1:] -%}
13
+ {%- else -%}
14
+ {%- set first_user_prefix = "" -%}
15
+ {%- set loop_messages = messages -%}
16
+ {%- endif -%}
17
+ {%- for message in loop_messages -%}
18
+ {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
19
+ {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
20
+ {%- endif -%}
21
+ {%- if (message['role'] == 'assistant') -%}
22
+ {%- set role = "model" -%}
23
+ {%- else -%}
24
+ {%- set role = message['role'] -%}
25
+ {%- endif -%}
26
+ {{ '<start_of_turn>' + role + '
27
+ ' + (first_user_prefix if loop.first else "") }}
28
+ {%- if message['content'] is string -%}
29
+ {{ message['content'] | trim }}
30
+ {%- elif message['content'] is iterable -%}
31
+ {%- for item in message['content'] -%}
32
+ {%- if item['type'] == 'image' -%}
33
+ {{ '<start_of_image>' }}
34
+ {%- elif item['type'] == 'text' -%}
35
+ {{ item['text'] | trim }}
36
+ {%- endif -%}
37
+ {%- endfor -%}
38
+ {%- else -%}
39
+ {{ raise_exception("Invalid content type") }}
40
+ {%- endif -%}
41
+ {{ '<end_of_turn>
42
+ ' }}
43
+ {%- endfor -%}
44
+ {%- if add_generation_prompt -%}
45
+ {{'<start_of_turn>model
46
+ '}}
47
+ {%- endif -%}
config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_sliding_window_pattern": 6,
3
+ "architectures": [
4
+ "Gemma3ForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "attn_logit_softcapping": null,
9
+ "bos_token_id": 2,
10
+ "dtype": "float32",
11
+ "eos_token_id": 1,
12
+ "final_logit_softcapping": null,
13
+ "head_dim": 256,
14
+ "hidden_activation": "gelu_pytorch_tanh",
15
+ "hidden_size": 640,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 2048,
18
+ "layer_types": [
19
+ "sliding_attention",
20
+ "sliding_attention",
21
+ "sliding_attention",
22
+ "sliding_attention",
23
+ "sliding_attention",
24
+ "full_attention",
25
+ "sliding_attention",
26
+ "sliding_attention",
27
+ "sliding_attention",
28
+ "sliding_attention",
29
+ "sliding_attention",
30
+ "full_attention",
31
+ "sliding_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "sliding_attention",
36
+ "full_attention"
37
+ ],
38
+ "max_position_embeddings": 32768,
39
+ "model_type": "gemma3_text",
40
+ "num_attention_heads": 4,
41
+ "num_hidden_layers": 18,
42
+ "num_key_value_heads": 1,
43
+ "pad_token_id": 0,
44
+ "query_pre_attn_scalar": 256,
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_local_base_freq": 10000.0,
47
+ "rope_scaling": null,
48
+ "rope_theta": 1000000.0,
49
+ "sliding_window": 512,
50
+ "transformers_version": "4.56.0",
51
+ "use_bidirectional_attention": false,
52
+ "use_cache": true,
53
+ "vocab_size": 262144
54
+ }
generation_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cache_implementation": "hybrid",
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 1,
6
+ 106
7
+ ],
8
+ "top_k": 64,
9
+ "top_p": 0.95,
10
+ "transformers_version": "4.56.0"
11
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:965e4ebcb453b4b02e04a1132d90484cae0de6828fa5e52acdcbd539a085c2b2
3
+ size 1072419256
special_tokens_map.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "boi_token": "<start_of_image>",
3
+ "bos_token": {
4
+ "content": "<bos>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "eoi_token": "<end_of_image>",
11
+ "eos_token": {
12
+ "content": "<eos>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "image_token": "<image_soft_token>",
19
+ "pad_token": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "unk_token": {
27
+ "content": "<unk>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
3
+ size 33384568
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff