500 Server Error when downloading omi-health/sum-small model config.json
I'm experiencing a 500 Server Error when downloading the configuration file for the omi-health/sum-small model. My internet connection is working fine, and I've tried multiple times.
Error details:
500 Server Error on https://huggingface.co/omi-health/sum-small/resolve/main/config.json
Request ID: Root=1-67fe2dd7-5ec4a47c4fd10a2b1bccd6b1;3e2a7d6e-1512-4391-9f79-c2ec5bc5c368
I'd appreciate any help in resolving this issue.
Code
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import warnings
warnings.filterwarnings('ignore')
MODEL_NAME = 'omi-health/sum-small'
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map='cuda', load_in_8bit=True,trust_remote_code=True)
Hi,
Please look at the following example code how to load the model and use for inference:
from transformers import AutoModelForCausalLM, AutoTokenizer, TextGenerationPipeline
from datasets import load_dataset
import evaluate
import torch
from tqdm.auto import tqdm
torch.random.manual_seed(0)
# Load dataset
dataset = load_dataset("omi-health/soap")
train_dataset = dataset['train']
validation_dataset = dataset['validation']
test_dataset = dataset['test']
model_id = "omi-health/sum-small"
# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Initialize model (full precision for this small model)
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
trust_remote_code=True,
torch_dtype="auto" # Will use full precision
)
# Function for generating summaries
from transformers import pipeline
import random
def generate_summary(dialogue):
# Create messages using Qwen's chat template format
messages = [
{
"role": "system",
"content": "You are an expert medical professor assisting in the creation of medically accurate SOAP summaries."
},
{
"role": "user",
"content": f"Create a medical SOAP summary from this dialogue:\n### Dialogue:\n{dialogue}\n### Your SOAP Summary:"
}
]
# Apply chat template
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# Initialize the text-generation pipeline
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device_map="auto"
)
# Keep original generation config
generation_config = {
"max_new_tokens": 600,
"temperature": 1.0,
"do_sample": True,
"return_full_text": False,
"pad_token_id": tokenizer.eos_token_id,
"eos_token_id": tokenizer.eos_token_id,
}
# Generate the summary
output = pipe(prompt, **generation_config)
# Extract the assistant's response
return output[0]['generated_text']
def generate_summaries(dialogues):
return [generate_summary(d) for d in dialogues]
# Test the summarization on random samples
random_samples = random.sample(list(test_dataset), 3)
for sample in random_samples:
dialogue = sample["dialogue"]
true_summary = sample["soap"]
# Pass dialogue as a list with one item
generated_summary = generate_summary(dialogue) # Note: calling generate_summary instead of generate_summaries
print(f"True SOAP: {true_summary}\nGenerated SOAP: {generated_summary}\n")
Thank you very much