500 Server Error when downloading omi-health/sum-small model config.json

#3
by ninte - opened

I'm experiencing a 500 Server Error when downloading the configuration file for the omi-health/sum-small model. My internet connection is working fine, and I've tried multiple times.
Error details:

500 Server Error on https://huggingface.co/omi-health/sum-small/resolve/main/config.json
Request ID: Root=1-67fe2dd7-5ec4a47c4fd10a2b1bccd6b1;3e2a7d6e-1512-4391-9f79-c2ec5bc5c368

I'd appreciate any help in resolving this issue.

Code

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

import warnings
warnings.filterwarnings('ignore')

MODEL_NAME = 'omi-health/sum-small'

model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map='cuda', load_in_8bit=True,trust_remote_code=True)

Hi,

Please look at the following example code how to load the model and use for inference:

from transformers import AutoModelForCausalLM, AutoTokenizer, TextGenerationPipeline
from datasets import load_dataset
import evaluate
import torch
from tqdm.auto import tqdm

torch.random.manual_seed(0)

# Load dataset
dataset = load_dataset("omi-health/soap")
train_dataset = dataset['train']
validation_dataset = dataset['validation']
test_dataset = dataset['test']

model_id = "omi-health/sum-small"

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Initialize model (full precision for this small model)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    trust_remote_code=True,
    torch_dtype="auto"  # Will use full precision
)

# Function for generating summaries
from transformers import pipeline
import random

def generate_summary(dialogue):
    # Create messages using Qwen's chat template format
    messages = [
        {
            "role": "system",
            "content": "You are an expert medical professor assisting in the creation of medically accurate SOAP summaries."
        },
        {
            "role": "user",
            "content": f"Create a medical SOAP summary from this dialogue:\n### Dialogue:\n{dialogue}\n### Your SOAP Summary:"
        }
    ]

    # Apply chat template
    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    # Initialize the text-generation pipeline
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        device_map="auto"
    )

    # Keep original generation config
    generation_config = {
        "max_new_tokens": 600,
        "temperature": 1.0,
        "do_sample": True,
        "return_full_text": False,
        "pad_token_id": tokenizer.eos_token_id,
        "eos_token_id": tokenizer.eos_token_id,
    }

    # Generate the summary
    output = pipe(prompt, **generation_config)
    # Extract the assistant's response
    return output[0]['generated_text']

def generate_summaries(dialogues):
    return [generate_summary(d) for d in dialogues]

# Test the summarization on random samples
random_samples = random.sample(list(test_dataset), 3)

for sample in random_samples:
    dialogue = sample["dialogue"]
    true_summary = sample["soap"]

    # Pass dialogue as a list with one item
    generated_summary = generate_summary(dialogue)  # Note: calling generate_summary instead of generate_summaries
    print(f"True SOAP: {true_summary}\nGenerated SOAP: {generated_summary}\n")

Thank you very much

ninte changed discussion status to closed
Your need to confirm your account before you can post a new comment.

Sign up or log in to comment