File size: 4,895 Bytes
f8500fe
 
1322d33
09f5d1d
f8500fe
 
 
 
7226c0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8500fe
 
f51d543
68e2760
f51d543
 
 
 
 
 
 
 
 
 
 
 
68e2760
7226c0f
 
a2e4ed6
 
 
 
d38c2aa
68e2760
26b4c70
f8500fe
 
 
 
d38c2aa
 
f51d543
d38c2aa
f51d543
 
 
 
 
 
 
09f5d1d
 
f51d543
 
 
f480044
f51d543
f8500fe
 
 
 
 
10d8708
e166128
 
 
 
75e9f94
 
 
bd7801a
 
 
 
 
75e9f94
 
 
 
 
e166128
9528106
f456e1e
f8500fe
 
 
 
 
 
 
 
 
 
e4e4818
4d0bc11
 
 
f8500fe
 
e4e4818
 
10d8708
75e9f94
f59fce3
a2e4ed6
e4e4818
 
 
 
 
 
f8500fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81418d5
 
 
 
f8500fe
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import os
import gradio as gr
import spaces
import torch

# 支持的语言选项
LANGUAGES = {
    "Auto Detect": "auto",
    "English": "en",
    "Chinese": "zh",
    "Russian": "ru",
    "Japanese": "ka",
    "Korean": "ko",
    "Spanish": "es",
    "French": "fr",
    "Portuguese": "pt",
    "German": "de",
    "Italian": "it",
    "Thai": "th",
    "Vietnamese": "vi",
    "Indonesian": "id",
    "Malay": "ms",
    "Arabic": "ar",
    "Polish": "pl",
    "Dutch": "nl",
    "Romanian": "ro",
    "Turkish": "tr",
    "Czech": "cs",
    "Danish": "da",
    "Finnish": "fi",
    "Ukrainian": "uk",
    "Norwegian Bokmal":"nb",
    "Norwegian":"no",
    "Croatian":"hr",
    "Swedish":"sv",
    "Hungarian":"hu"
}

from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cuda"
MODEL_NAME = "ByteDance-Seed/Seed-X-PPO-7B"
print("Start dowload")
def load_model():
    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,torch_dtype="bfloat16").to(device)
    print(f"Model loaded in {device}")
    return model

model = load_model()
print("Ednd dowload")
# Loading the tokenizer once, because re-loading it takes about 1.5 seconds each time
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def prompting(text, source_lang, target_lang):
    l = LANGUAGES[target_lang]
    if l=="auto":
        prompt=f"Translate the following sentence into {target_lang} and explain it in detail:\n{text} <{l}>"
    else:
        prompt=f"Translate the following {source_lang} sentence into {target_lang} and explain it in detail:\n{text} <{l}>"
    return prompt

@spaces.GPU(duration=120)
def translate_text(text, source_lang, target_lang):
    if not text.strip():
        return "请输入要翻译的文本"
    try:
        prompt = prompting(text, source_lang, target_lang)
        print(prompt)
        input_tokens = (
            tokenizer(prompt, return_tensors="pt")
            .input_ids[0]
            .cpu()
            .numpy()
            .tolist()
        )
        translated_chunk = model.generate(
            input_ids=torch.tensor([input_tokens]).to(device),
            max_length=512,
            num_beams=4,
            num_return_sequences=1,
        )
        full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True)
        full_output = full_output.replace(prompt.strip(),"")
        yield full_output
    except Exception as e:
        yield f"翻译出错: {str(e)}"


# 创建 Gradio 界面
with gr.Blocks(title="Seed-X") as demo:
    gr.Markdown("# 👋 Seed-X, powered by Bytedance")
    gr.Markdown(
        'A real-time translation tool based on Seed-X. It pushes the boundaries of translation capabilities within 7 billion parameters.'
    )

    with gr.Column():
        with gr.Row():
            source_lang = gr.Dropdown(
                    choices=list(LANGUAGES.keys()),
                    value="Auto Detect",
                    label="Source Language"
                )
            target_lang = gr.Dropdown(
                    choices=list(LANGUAGES.keys())[1:],  # Exclude "Auto Detect"
                    value="English",
                    label="Target Language"
            )
        with gr.Row():
            translate_btn = gr.Button("Translate", variant='secondary')
        with gr.Row():
            source_text = gr.Textbox(
                label="Input Text",
                placeholder="Please enter the text to translate...",
                lines=5
            )
            target_text = gr.Textbox(
                label="Translation Result",
                interactive=False,
                lines=5
            )

    gr.Markdown(
        '(The content of the input and output is limited to no more than 5 lines.)'
    )
    
    # # 示例
    gr.Examples(
        examples=[
            ["我说一句你说一车啊", "Chinese", "English"],
            ["离谱她妈给离谱开门,离谱到家了", "Chinese", "English"],
            ["雨女无瓜", "Chinese", "English"],
            ["Their relationship is a total situationship.", "English", "Chinese"]
        ],
        inputs=[source_text, source_lang, target_lang],
        outputs=target_text,
        fn=translate_text,
        cache_examples=True
    )
    
    # 按钮点击事件
    translate_btn.click(
        fn=translate_text,
        inputs=[source_text, source_lang, target_lang],
        outputs=target_text
    )
    
    # 支持回车键翻译
    source_text.submit(
        fn=translate_text,
        inputs=[source_text, source_lang, target_lang],
        outputs=target_text
    )

    gr.Markdown(
        "🌐[Github](https://github.com/ByteDance-Seed/Seed-X-7B)&nbsp;&nbsp;📄[Report](https://arxiv.org/pdf/2507.13618)&nbsp;&nbsp;🤗[Model](https://huggingface.co/collections/ByteDance-Seed/seed-x-6878753f2858bc17afa78543)"
    )

# 启动应用
if __name__ == "__main__":
    demo.launch()