h7m commited on
Commit
c197cf1
·
verified ·
1 Parent(s): 3c386a2

Upload README.md

Browse files
Files changed (1) hide show
  1. README.md +105 -5
README.md CHANGED
@@ -1,3 +1,12 @@
 
 
 
 
 
 
 
 
 
1
  # llm-jp-3-13b-zzzzzzzz-lora
2
  This is a LoRA adapter for llm-jp/llm-jp-3-13b, fine-tuned mainly for chat in Japanese.
3
 
@@ -10,28 +19,119 @@ This is a LoRA adapter for llm-jp/llm-jp-3-13b, fine-tuned mainly for chat in Ja
10
  Dataset details: [日本語インストラクションデータ](https://liat-aip.sakura.ne.jp/wp/llmのための日本語インストラクションデータ作成/llmのための日本語インストラクションデータ-公開/)
11
 
12
  ## Usage
 
 
13
  ```python
14
  from transformers import AutoModelForCausalLM, AutoTokenizer
15
  from peft import PeftModel, PeftConfig
16
 
17
- # Load base model
18
  base_model = AutoModelForCausalLM.from_pretrained("llm-jp/llm-jp-3-13b")
19
  tokenizer = AutoTokenizer.from_pretrained("llm-jp/llm-jp-3-13b")
20
 
21
  # Load LoRA adapter
 
22
  model = PeftModel.from_pretrained(
23
  base_model,
24
- "llm-jp-3-13b-zzzzzzzz-lora",
25
  is_trainable=False
26
  )
27
 
28
- # Example usage
29
- text = "###\n### 指示\n日本の首都は?\n### 回答\n"
30
- inputs = tokenizer(text, return_tensors="pt")
31
  outputs = model.generate(**inputs)
32
  result = tokenizer.decode(outputs[0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  ```
34
 
 
 
35
  ## Requirements
36
  ```
37
  transformers
 
1
+ ---
2
+ base_model: llm-jp/llm-jp-3-13b
3
+ tags:
4
+ - lora
5
+ license: apache-2.0
6
+ library_name: transformers
7
+ ---
8
+
9
+
10
  # llm-jp-3-13b-zzzzzzzz-lora
11
  This is a LoRA adapter for llm-jp/llm-jp-3-13b, fine-tuned mainly for chat in Japanese.
12
 
 
19
  Dataset details: [日本語インストラクションデータ](https://liat-aip.sakura.ne.jp/wp/llmのための日本語インストラクションデータ作成/llmのための日本語インストラクションデータ-公開/)
20
 
21
  ## Usage
22
+
23
+ ### Single Input
24
  ```python
25
  from transformers import AutoModelForCausalLM, AutoTokenizer
26
  from peft import PeftModel, PeftConfig
27
 
28
+ # Load base model and tokenizer
29
  base_model = AutoModelForCausalLM.from_pretrained("llm-jp/llm-jp-3-13b")
30
  tokenizer = AutoTokenizer.from_pretrained("llm-jp/llm-jp-3-13b")
31
 
32
  # Load LoRA adapter
33
+ model_name = "llm-jp-3-13b-xxx-lora"
34
  model = PeftModel.from_pretrained(
35
  base_model,
36
+ model_name,
37
  is_trainable=False
38
  )
39
 
40
+ # Generate response
41
+ input_text = "###\n### 指示\n日本の首都は?\n### 回答\n"
42
+ inputs = tokenizer(input_text, return_tensors="pt")
43
  outputs = model.generate(**inputs)
44
  result = tokenizer.decode(outputs[0])
45
+
46
+ ```
47
+
48
+ ### Batch Processing and Saving Results to a JSONL File
49
+
50
+ ```python
51
+ # The batch processing implementation handles multiple prompts and
52
+ # supports multi-step generation to manage long outputs.
53
+ # The results are saved to a JSONL file for downstream use or evaluation.
54
+
55
+ # datalst is a list of dictionaries, each containing a "task_id" and "input" key.
56
+ # Example:
57
+ # datalst = [{"task_id": 1, "input": "日本の首都は?"}, ...]
58
+
59
+ num_elements_per_batch = 20
60
+ device = "cuda"
61
+
62
+ datalst_result=[]
63
+ for iBatch in range(0, len(datalst), num_elements_per_batch):
64
+
65
+ batch = datalst[iBatch:iBatch + num_elements_per_batch]
66
+
67
+ # Prepare first input from datalst
68
+ indices = [entry["task_id"] for entry in batch]
69
+ first_input_texts = ["\n### 指示\n" + entry["input"] + "\n### 回答\n" for entry in batch]
70
+
71
+ total_new_tokens = 250 # Total number of tokens to generate per input.
72
+ unit_new_tokens = 50 # Number of tokens to generate in each step.
73
+
74
+ nStep = (total_new_tokens + unit_new_tokens - 1) // unit_new_tokens
75
+
76
+ # prep for first step
77
+ inputs = tokenizer(first_input_texts,
78
+ return_tensors="pt", padding=True, truncation=True,
79
+ return_token_type_ids=False)
80
+ inputs = {key: value.to(device) for key, value in inputs.items()}
81
+
82
+ totalstep_texts = first_input_texts
83
+
84
+ # Perform multi-step generation to handle long outputs in smaller chunks.
85
+ for iStep in range(nStep):
86
+ max_new_tokens=min(unit_new_tokens,total_new_tokens-iStep*unit_new_tokens)
87
+
88
+ # generate outpus from inputs
89
+ with torch.no_grad():
90
+ outputs = model.generate(**inputs,
91
+ max_new_tokens=max_new_tokens,
92
+ do_sample=False,
93
+ repetition_penalty=1.2,
94
+ pad_token_id=tokenizer.pad_token_id,
95
+ )
96
+
97
+ stepwise_texts = tokenizer.batch_decode(
98
+ outputs[:, inputs["input_ids"].shape[1]:],
99
+ skip_special_tokens=True)
100
+
101
+ totalstep_texts = [old + new for old, new in zip(totalstep_texts, stepwise_texts)]
102
+
103
+ if iStep< nStep-1:
104
+ # prep for next step
105
+ inputs = tokenizer(
106
+ totalstep_texts,
107
+ return_tensors="pt", padding=True, truncation=True,
108
+ return_token_type_ids=False
109
+ ).to(device)
110
+
111
+ if inputs["input_ids"].shape[1] > tokenizer.model_max_length:
112
+ print(f"Warning: Input length exceeds model_max_length ({tokenizer.model_max_length}). Truncation applied.")
113
+
114
+
115
+ # Update results
116
+ for idx, first_input_text, totalstep_text in zip(indices, first_input_texts, totalstep_texts):
117
+
118
+ # remove the input from the generated text
119
+ new_generated_text = totalstep_text[len(first_input_text):].strip() # Trim extra spaces
120
+
121
+ new_entry = {"task_id": idx, "input": first_input_text, "output": new_generated_text}
122
+ datalst_result.append(new_entry)
123
+
124
+ # Save results to a JSONL file
125
+ # {"task_id": 0, "input": "\n### 指示\n日本の首都は?\n### 回答\n", "output": "東京です。"}
126
+ # {"task_id": 1, "input": ...
127
+ with open(f"./{model_name}-outputs.jsonl", 'w', encoding='utf-8') as f:
128
+ for entry in datalst_result:
129
+ json.dump(entry, f, ensure_ascii=False) # ensure_ascii=False for handling non-ASCII characters
130
+ f.write('\n')
131
  ```
132
 
133
+
134
+
135
  ## Requirements
136
  ```
137
  transformers