Spaces:

ByteDance-Seed
/

Seed-X

Running on Zero

File size: 4,895 Bytes

import os
import gradio as gr
import spaces
import torch

# 支持的语言选项
LANGUAGES = {
    "Auto Detect": "auto",
    "English": "en",
    "Chinese": "zh",
    "Russian": "ru",
    "Japanese": "ka",
    "Korean": "ko",
    "Spanish": "es",
    "French": "fr",
    "Portuguese": "pt",
    "German": "de",
    "Italian": "it",
    "Thai": "th",
    "Vietnamese": "vi",
    "Indonesian": "id",
    "Malay": "ms",
    "Arabic": "ar",
    "Polish": "pl",
    "Dutch": "nl",
    "Romanian": "ro",
    "Turkish": "tr",
    "Czech": "cs",
    "Danish": "da",
    "Finnish": "fi",
    "Ukrainian": "uk",
    "Norwegian Bokmal":"nb",
    "Norwegian":"no",
    "Croatian":"hr",
    "Swedish":"sv",
    "Hungarian":"hu"
}

from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cuda"
MODEL_NAME = "ByteDance-Seed/Seed-X-PPO-7B"
print("Start dowload")
def load_model():
    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,torch_dtype="bfloat16").to(device)
    print(f"Model loaded in {device}")
    return model

model = load_model()
print("Ednd dowload")
# Loading the tokenizer once, because re-loading it takes about 1.5 seconds each time
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def prompting(text, source_lang, target_lang):
    l = LANGUAGES[target_lang]
    if l=="auto":
        prompt=f"Translate the following sentence into {target_lang} and explain it in detail:\n{text} <{l}>"
    else:
        prompt=f"Translate the following {source_lang} sentence into {target_lang} and explain it in detail:\n{text} <{l}>"
    return prompt

@spaces.GPU(duration=120)
def translate_text(text, source_lang, target_lang):
    if not text.strip():
        return "请输入要翻译的文本"
    try:
        prompt = prompting(text, source_lang, target_lang)
        print(prompt)
        input_tokens = (
            tokenizer(prompt, return_tensors="pt")
            .input_ids[0]
            .cpu()
            .numpy()
            .tolist()
        )
        translated_chunk = model.generate(
            input_ids=torch.tensor([input_tokens]).to(device),
            max_length=512,
            num_beams=4,
            num_return_sequences=1,
        )
        full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True)
        full_output = full_output.replace(prompt.strip(),"")
        yield full_output
    except Exception as e:
        yield f"翻译出错: {str(e)}"


# 创建 Gradio 界面
with gr.Blocks(title="Seed-X") as demo:
    gr.Markdown("# 👋 Seed-X, powered by Bytedance")
    gr.Markdown(
        'A real-time translation tool based on Seed-X. It pushes the boundaries of translation capabilities within 7 billion parameters.'
    )

    with gr.Column():
        with gr.Row():
            source_lang = gr.Dropdown(
                    choices=list(LANGUAGES.keys()),
                    value="Auto Detect",
                    label="Source Language"
                )
            target_lang = gr.Dropdown(
                    choices=list(LANGUAGES.keys())[1:],  # Exclude "Auto Detect"
                    value="English",
                    label="Target Language"
            )
        with gr.Row():
            translate_btn = gr.Button("Translate", variant='secondary')
        with gr.Row():
            source_text = gr.Textbox(
                label="Input Text",
                placeholder="Please enter the text to translate...",
                lines=5
            )
            target_text = gr.Textbox(
                label="Translation Result",
                interactive=False,
                lines=5
            )

    gr.Markdown(
        '(The content of the input and output is limited to no more than 5 lines.)'
    )
    
    # # 示例
    gr.Examples(
        examples=[
            ["我说一句你说一车啊", "Chinese", "English"],
            ["离谱她妈给离谱开门，离谱到家了", "Chinese", "English"],
            ["雨女无瓜", "Chinese", "English"],
            ["Their relationship is a total situationship.", "English", "Chinese"]
        ],
        inputs=[source_text, source_lang, target_lang],
        outputs=target_text,
        fn=translate_text,
        cache_examples=True
    )
    
    # 按钮点击事件
    translate_btn.click(
        fn=translate_text,
        inputs=[source_text, source_lang, target_lang],
        outputs=target_text
    )
    
    # 支持回车键翻译
    source_text.submit(
        fn=translate_text,
        inputs=[source_text, source_lang, target_lang],
        outputs=target_text
    )

    gr.Markdown(
        "🌐[Github](https://github.com/ByteDance-Seed/Seed-X-7B)&nbsp;&nbsp;📄[Report](https://arxiv.org/pdf/2507.13618)&nbsp;&nbsp;🤗[Model](https://huggingface.co/collections/ByteDance-Seed/seed-x-6878753f2858bc17afa78543)"
    )

# 启动应用
if __name__ == "__main__":
    demo.launch()