barretts/Seed-Coder-8B-Reasoning-mlx-8Bit


from mlx_lm import load, generate

model_name = "barretts/Seed-Coder-8B-Reasoning-mlx-8Bit"
model, tokenizer = load(model_name)
prefill = """You are a helpful, concise, and precise assistant.
When answering questions:

Be direct and clear
Avoid unnecessary elaboration
Provide accurate information
Use simple language
Focus on the core of the question
Do not overthink or generate multiple redundant explanations"""

while True:
    user_input = input("Enter your prompt (or type 'exit' to quit): ")
    if user_input.lower() == 'exit':
        break
if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template is not None:

    messages = [
    {"role": "user", "content": user_input},
    {"role": "assistant", "content": prefill},
    ]

    prompt = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
else:
    
    prompt = f"<|user|>\n{full_prompt}<|end|>\n<|assistant|>"

response = generate(
    model=model,
    tokenizer=tokenizer,
    prompt=prompt,
    max_tokens=2048,
    verbose=True
)

print("\nResponse:\n", response)
barretts
/

Seed-Coder-8B-Reasoning-mlx-8Bit

less-rambling inference