snuh
/

hari-q2.5

@@ -125,46 +125,49 @@ import torch
 # Load tokenizer and model
 model_name = "snuh/hari-q2.5"
-tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
-model.eval()
-# Define multi-turn chat in ChatML format
 messages = [
-    {
-        "role": "system",
-        "content": "You are a helpful and knowledgeable Korean medical assistant. Answer the user's question accurately based on clinical reasoning."
-    },
-    {
-        "role": "user",
-        "content": (
-            "60세 남성이 복통과 발열을 호소하며 내원하였습니다. "
-            "혈액 검사 결과 백혈구 수치가 상승했고, 우측 하복부 압통이 확인되었습니다. "
-            "가장 가능성이 높은 진단명은 무엇인가요?"
-        )
-    }
 ]
-# Apply ChatML template
-prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-# Tokenize input
-inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-# Generate model output
-with torch.no_grad():
-    outputs = model.generate(
-        **inputs,
-        max_new_tokens=128,
-        do_sample=True,
-        temperature=0.7,
-        top_p=0.9,
-        eos_token_id=tokenizer.eos_token_id,
-    )
-# Decode and display response
-response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-print("🧠 Model Response:\n")
 print(response)
 ````

 # Load tokenizer and model
 model_name = "snuh/hari-q2.5"
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype="auto",
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+prompt = '''
+### Instruction:
+당신은 임상 지식을 갖춘 유능하고 신뢰할 수 있는 한국어 기반 의료 어시스턴트입니다.
+사용자의 질문에 대해 정확하고 신중한 임상 추론을 바탕으로 진단 가능성을 제시해 주세요.
+반드시 환자의 연령, 증상, 검사 결과, 통증 부위 등 모든 단서를 종합적으로 고려하여 추론 과정과 진단명을 제시해야 합니다.
+의학적으로 정확한 용어를 사용하되, 필요하다면 일반인이 이해하기 쉬운 용어도 병행해 설명해 주세요.
+### Question:
+60세 남성이 복통과 발열을 호소하며 내원하였습니다.
+혈액 검사 결과 백혈구 수치가 상승했고, 우측 하복부 압통이 확인되었습니다.
+가장 가능성이 높은 진단명은 무엇인가요?
+### Reasoning:
+'''.strip()
 messages = [
+    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
+    {"role": "user", "content": prompt}
+]
+text = tokenizer.apply_chat_template(
+    messages,
+    tokenize=False,
+    add_generation_prompt=True
+)
+model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+generated_ids = model.generate(
+    **model_inputs,
+    max_new_tokens=512
+)
+generated_ids = [
+    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
 ]
+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
 print(response)
 ````