Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,895 Bytes
f8500fe 1322d33 09f5d1d f8500fe 7226c0f f8500fe f51d543 68e2760 f51d543 68e2760 7226c0f a2e4ed6 d38c2aa 68e2760 26b4c70 f8500fe d38c2aa f51d543 d38c2aa f51d543 09f5d1d f51d543 f480044 f51d543 f8500fe 10d8708 e166128 75e9f94 bd7801a 75e9f94 e166128 9528106 f456e1e f8500fe e4e4818 4d0bc11 f8500fe e4e4818 10d8708 75e9f94 f59fce3 a2e4ed6 e4e4818 f8500fe 81418d5 f8500fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
import os
import gradio as gr
import spaces
import torch
# 支持的语言选项
LANGUAGES = {
"Auto Detect": "auto",
"English": "en",
"Chinese": "zh",
"Russian": "ru",
"Japanese": "ka",
"Korean": "ko",
"Spanish": "es",
"French": "fr",
"Portuguese": "pt",
"German": "de",
"Italian": "it",
"Thai": "th",
"Vietnamese": "vi",
"Indonesian": "id",
"Malay": "ms",
"Arabic": "ar",
"Polish": "pl",
"Dutch": "nl",
"Romanian": "ro",
"Turkish": "tr",
"Czech": "cs",
"Danish": "da",
"Finnish": "fi",
"Ukrainian": "uk",
"Norwegian Bokmal":"nb",
"Norwegian":"no",
"Croatian":"hr",
"Swedish":"sv",
"Hungarian":"hu"
}
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda"
MODEL_NAME = "ByteDance-Seed/Seed-X-PPO-7B"
print("Start dowload")
def load_model():
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,torch_dtype="bfloat16").to(device)
print(f"Model loaded in {device}")
return model
model = load_model()
print("Ednd dowload")
# Loading the tokenizer once, because re-loading it takes about 1.5 seconds each time
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
def prompting(text, source_lang, target_lang):
l = LANGUAGES[target_lang]
if l=="auto":
prompt=f"Translate the following sentence into {target_lang} and explain it in detail:\n{text} <{l}>"
else:
prompt=f"Translate the following {source_lang} sentence into {target_lang} and explain it in detail:\n{text} <{l}>"
return prompt
@spaces.GPU(duration=120)
def translate_text(text, source_lang, target_lang):
if not text.strip():
return "请输入要翻译的文本"
try:
prompt = prompting(text, source_lang, target_lang)
print(prompt)
input_tokens = (
tokenizer(prompt, return_tensors="pt")
.input_ids[0]
.cpu()
.numpy()
.tolist()
)
translated_chunk = model.generate(
input_ids=torch.tensor([input_tokens]).to(device),
max_length=512,
num_beams=4,
num_return_sequences=1,
)
full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True)
full_output = full_output.replace(prompt.strip(),"")
yield full_output
except Exception as e:
yield f"翻译出错: {str(e)}"
# 创建 Gradio 界面
with gr.Blocks(title="Seed-X") as demo:
gr.Markdown("# 👋 Seed-X, powered by Bytedance")
gr.Markdown(
'A real-time translation tool based on Seed-X. It pushes the boundaries of translation capabilities within 7 billion parameters.'
)
with gr.Column():
with gr.Row():
source_lang = gr.Dropdown(
choices=list(LANGUAGES.keys()),
value="Auto Detect",
label="Source Language"
)
target_lang = gr.Dropdown(
choices=list(LANGUAGES.keys())[1:], # Exclude "Auto Detect"
value="English",
label="Target Language"
)
with gr.Row():
translate_btn = gr.Button("Translate", variant='secondary')
with gr.Row():
source_text = gr.Textbox(
label="Input Text",
placeholder="Please enter the text to translate...",
lines=5
)
target_text = gr.Textbox(
label="Translation Result",
interactive=False,
lines=5
)
gr.Markdown(
'(The content of the input and output is limited to no more than 5 lines.)'
)
# # 示例
gr.Examples(
examples=[
["我说一句你说一车啊", "Chinese", "English"],
["离谱她妈给离谱开门,离谱到家了", "Chinese", "English"],
["雨女无瓜", "Chinese", "English"],
["Their relationship is a total situationship.", "English", "Chinese"]
],
inputs=[source_text, source_lang, target_lang],
outputs=target_text,
fn=translate_text,
cache_examples=True
)
# 按钮点击事件
translate_btn.click(
fn=translate_text,
inputs=[source_text, source_lang, target_lang],
outputs=target_text
)
# 支持回车键翻译
source_text.submit(
fn=translate_text,
inputs=[source_text, source_lang, target_lang],
outputs=target_text
)
gr.Markdown(
"🌐[Github](https://github.com/ByteDance-Seed/Seed-X-7B) 📄[Report](https://arxiv.org/pdf/2507.13618) 🤗[Model](https://huggingface.co/collections/ByteDance-Seed/seed-x-6878753f2858bc17afa78543)"
)
# 启动应用
if __name__ == "__main__":
demo.launch() |