File size: 2,604 Bytes
e5e720e a34b788 e448c6d 1ed09ae a34b788 e448c6d 1ed09ae d2f37c1 1ed09ae e448c6d d2f37c1 e448c6d d2f37c1 1ed09ae d2f37c1 1ed09ae d2f37c1 1ed09ae d2f37c1 1ed09ae d2f37c1 1ed09ae d2f37c1 1ed09ae d2f37c1 1ed09ae d2f37c1 1ed09ae 6ffc1f9 1ed09ae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
---
base_model: llm-jp/llm-jp-3-13b
tags:
- text-generation-inference
- transformers
- unsloth
- llama
- trl
license: apache-2.0
language:
- en
---
# Uploaded model
- **Developed by:** Hide101111001111000
- **License:** apache-2.0
- **Finetuned from model :** llm-jp/llm-jp-3-13b
This llama model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
[<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
# 推論用コード
本モデルを用いてELYZA-tasks-100-TVの出力を得るためのコードです。
このコードを動作させることとで課題として提出可能なjsonlファイル得れるようになっています。
```python
!pip uninstall unsloth -y
!pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --upgrade torch
!pip install --upgrade xformers
```
```python
import torch
if torch.cuda.get_device_capability()[0] >= 8:
!pip install --no-deps packaging ninja einops "flash-attn>=2.6.3"
from unsloth import FastLanguageModel
max_seq_length = 512 # unslothではRoPEをサポートしているのでコンテキスト長は自由に設定可能
dtype = None # Noneにしておけば自動で設定
load_in_4bit = True # 今回は13Bモデルを扱うためTrue
model, tokenizer = FastLanguageModel.from_pretrained(
model_name="Hide101111001111000/llm-jp-3-13b-it_lora-DPO-ja",
dtype=dtype,
load_in_4bit=load_in_4bit,
trust_remote_code=True,
)
import json
datasets = []
with open("/content/elyza-tasks-100-TV_0 .jsonl", "r") as f:
item = ""
for line in f:
line = line.strip()
item += line
if item.endswith("}"):
datasets.append(json.loads(item))
item = ""
from tqdm import tqdm
FastLanguageModel.for_inference(model)
results = []
for dt in tqdm(datasets):
input = dt["input"]
prompt = f"""### 指示\n{input}\n### 回答\n"""
inputs = tokenizer([prompt], return_tensors = "pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens = 512, use_cache = True, do_sample=False, repetition_penalty=1.2)
prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split('\n### 回答')[-1]
results.append({"task_id": dt["task_id"], "input": input, "output": prediction})
with open(f"model_output.jsonl", 'w', encoding='utf-8') as f:
for result in results:
json.dump(result, f, ensure_ascii=False)
f.write('\n') |