JacobLinCool commited on
Commit
b5b6ac9
·
verified ·
1 Parent(s): 28ac369

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +56 -5
README.md CHANGED
@@ -17,12 +17,63 @@ It has been trained using [TRL](https://github.com/huggingface/trl).
17
  ## Quick start
18
 
19
  ```python
20
- from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
- generator = pipeline("text-generation", model="JacobLinCool/gemma-3n-E2B-transcribe-zh-tw-1", device="cuda")
24
- output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
- print(output["generated_text"])
26
  ```
27
 
28
  ## Training procedure
 
17
  ## Quick start
18
 
19
  ```python
20
+ import torch
21
+ from peft import PeftModel
22
+ from transformers import AutoModelForCausalLM, AutoProcessor
23
+
24
+ device = "cuda" if torch.cuda.is_available() else "cpu"
25
+
26
+ processor = AutoProcessor.from_pretrained("google/gemma-3n-E2B-it", device_map="auto")
27
+ base_model = AutoModelForCausalLM.from_pretrained("google/gemma-3n-E2B-it")
28
+ model = PeftModel.from_pretrained(
29
+ base_model, "JacobLinCool/gemma-3n-E2B-transcribe-zh-tw-1"
30
+ ).to(device)
31
+
32
+
33
+ def trascribe(model, processor, audio):
34
+ messages = [
35
+ {
36
+ "role": "system",
37
+ "content": [
38
+ {
39
+ "type": "text",
40
+ "text": "You are an assistant that transcribes speech accurately.",
41
+ }
42
+ ],
43
+ },
44
+ {
45
+ "role": "user",
46
+ "content": [
47
+ {"type": "audio", "audio": audio},
48
+ {"type": "text", "text": "Transcribe this audio."},
49
+ ],
50
+ },
51
+ ]
52
+
53
+ input_ids = processor.apply_chat_template(
54
+ messages,
55
+ add_generation_prompt=True,
56
+ tokenize=True,
57
+ return_dict=True,
58
+ return_tensors="pt",
59
+ )
60
+ input_ids = input_ids.to(device, dtype=model.dtype)
61
+
62
+ model.eval()
63
+ with torch.no_grad():
64
+ outputs = model.generate(**input_ids, max_new_tokens=128)
65
+
66
+ prediction = processor.batch_decode(
67
+ outputs, skip_special_tokens=True, clean_up_tokenization_spaces=False
68
+ )[0]
69
+ prediction = prediction.split("\nmodel\n")[-1].strip()
70
+ return prediction
71
+
72
+
73
+ if __name__ == "__main__":
74
+ prediction = trascribe(model, processor, "/workspace/audio.mp3")
75
+ print(prediction)
76
 
 
 
 
 
77
  ```
78
 
79
  ## Training procedure