feat(youtube): add time-synced subtitles via IFrame API; add proxy fallback to fetch auto-generated captions; annotate with hiragana + POS colors; clickable to seek
Browse files- app.py +169 -8
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -7,6 +7,12 @@ import pytesseract
|
|
| 7 |
import cv2
|
| 8 |
import numpy as np
|
| 9 |
from typing import List, Dict, Optional
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# YouTube transcript & translation (optional)
|
| 12 |
try:
|
|
@@ -340,7 +346,13 @@ def fetch_transcript(video_id: str, lang_priority: List[str]) -> List[Dict]:
|
|
| 340 |
except TranscriptsDisabled:
|
| 341 |
raise RuntimeError("该视频字幕被禁用。")
|
| 342 |
except Exception as e:
|
| 343 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
|
| 345 |
for lang in lang_priority:
|
| 346 |
try:
|
|
@@ -356,8 +368,72 @@ def fetch_transcript(video_id: str, lang_priority: List[str]) -> List[Dict]:
|
|
| 356 |
translated = tr.translate(target)
|
| 357 |
return translated.fetch()
|
| 358 |
except Exception:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
raise RuntimeError("未找到可用字幕。建议换个视频或检查语言。")
|
| 360 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
def translate_zh(text: str) -> Optional[str]:
|
| 362 |
if not text or GoogleTranslator is None:
|
| 363 |
return None
|
|
@@ -368,26 +444,32 @@ def translate_zh(text: str) -> Optional[str]:
|
|
| 368 |
|
| 369 |
def render_transcript_html(items: List[Dict], grammar_mode: bool, with_zh: bool) -> str:
|
| 370 |
lines = []
|
|
|
|
| 371 |
for it in items:
|
| 372 |
raw = normalize_text(it.get('text', ''))
|
| 373 |
if not raw:
|
|
|
|
| 374 |
continue
|
| 375 |
-
|
|
|
|
|
|
|
| 376 |
jp_html = to_furigana_inline(raw, grammar_mode)
|
| 377 |
zh = translate_zh(raw) if with_zh else None
|
| 378 |
zh_html = f"<div style='color:#444;margin-top:4px;'>【中】{zh}</div>" if zh else ""
|
| 379 |
lines.append(
|
| 380 |
-
f"<div
|
|
|
|
| 381 |
f"<div style='font-size:0.9em;color:#888;'>⏱ {ts}</div>"
|
| 382 |
f"<div style='line-height:2.0;font-size:1.1em;'>{jp_html}</div>"
|
| 383 |
f"{zh_html}"
|
| 384 |
f"</div>"
|
| 385 |
)
|
|
|
|
| 386 |
if not lines:
|
| 387 |
return "<div>未获取到字幕内容。</div>"
|
| 388 |
legend = create_pos_legend() if grammar_mode else ""
|
| 389 |
return (
|
| 390 |
-
"<div style='max-height:70vh;overflow:auto;padding:8px;background:#f6f7f9;border-radius:8px;'>"
|
| 391 |
+ legend
|
| 392 |
+ "".join(lines)
|
| 393 |
+ "</div>"
|
|
@@ -400,9 +482,10 @@ def show_youtube(url: str, grammar_mode: bool, with_zh: bool):
|
|
| 400 |
vid = parse_video_id(url)
|
| 401 |
if not vid:
|
| 402 |
return "", "", "未能解析视频ID,请检查链接。"
|
|
|
|
| 403 |
iframe = (
|
| 404 |
f"<div style='position:relative;padding-top:56.25%;'>"
|
| 405 |
-
f"<iframe src='https://www.youtube.com/embed/{vid}' "
|
| 406 |
f"style='position:absolute;top:0;left:0;width:100%;height:100%;border:0;' "
|
| 407 |
f"allow='accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture' allowfullscreen></iframe>"
|
| 408 |
f"</div>"
|
|
@@ -411,9 +494,87 @@ def show_youtube(url: str, grammar_mode: bool, with_zh: bool):
|
|
| 411 |
items = fetch_transcript(vid, ["ja", "ja-JP"])
|
| 412 |
except Exception as e:
|
| 413 |
return iframe, "", f"加载字幕失败:{e}"
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
|
| 418 |
# ----- OCR from screenshot -----
|
| 419 |
def ocr_image(img: np.ndarray):
|
|
|
|
| 7 |
import cv2
|
| 8 |
import numpy as np
|
| 9 |
from typing import List, Dict, Optional
|
| 10 |
+
import html as _html
|
| 11 |
+
import json as _json
|
| 12 |
+
try:
|
| 13 |
+
import requests
|
| 14 |
+
except Exception:
|
| 15 |
+
requests = None
|
| 16 |
|
| 17 |
# YouTube transcript & translation (optional)
|
| 18 |
try:
|
|
|
|
| 346 |
except TranscriptsDisabled:
|
| 347 |
raise RuntimeError("该视频字幕被禁用。")
|
| 348 |
except Exception as e:
|
| 349 |
+
# 常见于平台无法访问 YouTube 的网络/DNS 限制,尝试代理抓取
|
| 350 |
+
fallback = _fetch_transcript_via_proxy(video_id, lang_priority)
|
| 351 |
+
if fallback:
|
| 352 |
+
return fallback
|
| 353 |
+
raise RuntimeError(
|
| 354 |
+
"拉取字幕失败: {}。若 Space 无法直连 YouTube,已尝试代理抓取。".format(e)
|
| 355 |
+
)
|
| 356 |
|
| 357 |
for lang in lang_priority:
|
| 358 |
try:
|
|
|
|
| 368 |
translated = tr.translate(target)
|
| 369 |
return translated.fetch()
|
| 370 |
except Exception:
|
| 371 |
+
# 尝试代理抓取
|
| 372 |
+
fallback = _fetch_transcript_via_proxy(video_id, lang_priority)
|
| 373 |
+
if fallback:
|
| 374 |
+
return fallback
|
| 375 |
raise RuntimeError("未找到可用字幕。建议换个视频或检查语言。")
|
| 376 |
|
| 377 |
+
def _fetch_transcript_via_proxy(video_id: str, lang_priority: List[str]) -> Optional[List[Dict]]:
|
| 378 |
+
"""Fallback: use r.jina.ai to fetch YouTube page and timedtext via a CORS-friendly proxy.
|
| 379 |
+
Returns list of dicts with text,start,duration or None if fails.
|
| 380 |
+
"""
|
| 381 |
+
if requests is None:
|
| 382 |
+
return None
|
| 383 |
+
try:
|
| 384 |
+
resp = requests.get(f"https://r.jina.ai/https://www.youtube.com/watch?v={video_id}", timeout=12)
|
| 385 |
+
if resp.status_code != 200:
|
| 386 |
+
return None
|
| 387 |
+
html = resp.text
|
| 388 |
+
# Find captionTracks JSON
|
| 389 |
+
m = re.search(r'"captionTracks":\s*(\[.*?\])', html)
|
| 390 |
+
if not m:
|
| 391 |
+
return None
|
| 392 |
+
tracks_json = m.group(1)
|
| 393 |
+
tracks = _json.loads(tracks_json)
|
| 394 |
+
# Pick best track
|
| 395 |
+
chosen = None
|
| 396 |
+
# preferred languages
|
| 397 |
+
for lang in (lang_priority or []):
|
| 398 |
+
for tr in tracks:
|
| 399 |
+
if tr.get('languageCode') == lang:
|
| 400 |
+
chosen = tr
|
| 401 |
+
break
|
| 402 |
+
if chosen:
|
| 403 |
+
break
|
| 404 |
+
# fallback: any Japanese or ASR ja
|
| 405 |
+
if not chosen:
|
| 406 |
+
for tr in tracks:
|
| 407 |
+
lc = tr.get('languageCode', '')
|
| 408 |
+
if lc.startswith('ja'):
|
| 409 |
+
chosen = tr
|
| 410 |
+
break
|
| 411 |
+
# last resort: first track
|
| 412 |
+
if not chosen and tracks:
|
| 413 |
+
chosen = tracks[0]
|
| 414 |
+
if not chosen:
|
| 415 |
+
return None
|
| 416 |
+
base_url = chosen.get('baseUrl')
|
| 417 |
+
if not base_url:
|
| 418 |
+
return None
|
| 419 |
+
# Fetch timedtext XML through proxy
|
| 420 |
+
xml_url = f"https://r.jina.ai/{base_url}"
|
| 421 |
+
x = requests.get(xml_url, timeout=12)
|
| 422 |
+
if x.status_code != 200:
|
| 423 |
+
return None
|
| 424 |
+
xml = x.text
|
| 425 |
+
items: List[Dict] = []
|
| 426 |
+
for m in re.finditer(r'<text[^>]*start="([0-9.]+)"[^>]*dur="([0-9.]+)"[^>]*>(.*?)</text>', xml):
|
| 427 |
+
st = float(m.group(1))
|
| 428 |
+
du = float(m.group(2))
|
| 429 |
+
tx = m.group(3)
|
| 430 |
+
# Unescape HTML entities and replace line breaks
|
| 431 |
+
tx = _html.unescape(tx.replace('\n', ' ').replace('<br>', ' '))
|
| 432 |
+
items.append({'text': tx, 'start': st, 'duration': du})
|
| 433 |
+
return items or None
|
| 434 |
+
except Exception:
|
| 435 |
+
return None
|
| 436 |
+
|
| 437 |
def translate_zh(text: str) -> Optional[str]:
|
| 438 |
if not text or GoogleTranslator is None:
|
| 439 |
return None
|
|
|
|
| 444 |
|
| 445 |
def render_transcript_html(items: List[Dict], grammar_mode: bool, with_zh: bool) -> str:
|
| 446 |
lines = []
|
| 447 |
+
idx = 0
|
| 448 |
for it in items:
|
| 449 |
raw = normalize_text(it.get('text', ''))
|
| 450 |
if not raw:
|
| 451 |
+
idx += 1
|
| 452 |
continue
|
| 453 |
+
start = float(it.get('start', 0.0))
|
| 454 |
+
dur = float(it.get('duration', 0.0))
|
| 455 |
+
ts = seconds_to_mmss(start)
|
| 456 |
jp_html = to_furigana_inline(raw, grammar_mode)
|
| 457 |
zh = translate_zh(raw) if with_zh else None
|
| 458 |
zh_html = f"<div style='color:#444;margin-top:4px;'>【中】{zh}</div>" if zh else ""
|
| 459 |
lines.append(
|
| 460 |
+
f"<div class='yt-line' data-yt-idx='{idx}' data-yt-start='{start}' data-yt-dur='{dur}' "
|
| 461 |
+
f"style='padding:8px 10px;margin:6px 0;border-left:4px solid #ddd;background:#fff;border-radius:6px;'>"
|
| 462 |
f"<div style='font-size:0.9em;color:#888;'>⏱ {ts}</div>"
|
| 463 |
f"<div style='line-height:2.0;font-size:1.1em;'>{jp_html}</div>"
|
| 464 |
f"{zh_html}"
|
| 465 |
f"</div>"
|
| 466 |
)
|
| 467 |
+
idx += 1
|
| 468 |
if not lines:
|
| 469 |
return "<div>未获取到字幕内容。</div>"
|
| 470 |
legend = create_pos_legend() if grammar_mode else ""
|
| 471 |
return (
|
| 472 |
+
"<div data-yt-scroll style='max-height:70vh;overflow:auto;padding:8px;background:#f6f7f9;border-radius:8px;'>"
|
| 473 |
+ legend
|
| 474 |
+ "".join(lines)
|
| 475 |
+ "</div>"
|
|
|
|
| 482 |
vid = parse_video_id(url)
|
| 483 |
if not vid:
|
| 484 |
return "", "", "未能解析视频ID,请检查链接。"
|
| 485 |
+
iframe_id = f"yt-player-{vid}"
|
| 486 |
iframe = (
|
| 487 |
f"<div style='position:relative;padding-top:56.25%;'>"
|
| 488 |
+
f"<iframe id='{iframe_id}' src='https://www.youtube.com/embed/{vid}?enablejsapi=1' "
|
| 489 |
f"style='position:absolute;top:0;left:0;width:100%;height:100%;border:0;' "
|
| 490 |
f"allow='accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture' allowfullscreen></iframe>"
|
| 491 |
f"</div>"
|
|
|
|
| 494 |
items = fetch_transcript(vid, ["ja", "ja-JP"])
|
| 495 |
except Exception as e:
|
| 496 |
return iframe, "", f"加载字幕失败:{e}"
|
| 497 |
+
# 渲染字幕并注入同步脚本
|
| 498 |
+
html_core = render_transcript_html(items, grammar_mode, with_zh)
|
| 499 |
+
# 将字幕的时间戳打包为 JSON,供前端脚本同步
|
| 500 |
+
safe_items = []
|
| 501 |
+
for it in items:
|
| 502 |
+
safe_items.append({
|
| 503 |
+
'start': float(it.get('start', 0.0)),
|
| 504 |
+
'duration': float(it.get('duration', 0.0)),
|
| 505 |
+
})
|
| 506 |
+
import json
|
| 507 |
+
times_json = json.dumps(safe_items)
|
| 508 |
+
sync_script = f"""
|
| 509 |
+
<style>
|
| 510 |
+
.yt-line-active {{
|
| 511 |
+
background: #fff4cc;
|
| 512 |
+
border-left-color: #f0b400 !important;
|
| 513 |
+
}}
|
| 514 |
+
.yt-line {{ cursor: pointer; }}
|
| 515 |
+
</style>
|
| 516 |
+
<script>
|
| 517 |
+
(function(){{
|
| 518 |
+
var vid = {repr(vid)};
|
| 519 |
+
var iframeId = {repr(iframe_id)};
|
| 520 |
+
var items = {times_json};
|
| 521 |
+
// 加载 IFrame API(若尚未加载)
|
| 522 |
+
function ensureAPI(cb){{
|
| 523 |
+
if (window.YT && window.YT.Player) return cb();
|
| 524 |
+
if (!document.getElementById('yt-iframe-api')){{
|
| 525 |
+
var s = document.createElement('script');
|
| 526 |
+
s.id='yt-iframe-api';
|
| 527 |
+
s.src='https://www.youtube.com/iframe_api';
|
| 528 |
+
document.body.appendChild(s);
|
| 529 |
+
}}
|
| 530 |
+
var t = setInterval(function(){{
|
| 531 |
+
if (window.YT && window.YT.Player) {{ clearInterval(t); cb(); }}
|
| 532 |
+
}}, 200);
|
| 533 |
+
}}
|
| 534 |
+
var player;
|
| 535 |
+
function startSync(){{
|
| 536 |
+
try {{
|
| 537 |
+
player = player || new YT.Player(iframeId);
|
| 538 |
+
}} catch(e) {{ return; }}
|
| 539 |
+
// 点击跳转
|
| 540 |
+
document.querySelectorAll('[data-yt-start]').forEach(function(el){{
|
| 541 |
+
el.addEventListener('click', function(){{
|
| 542 |
+
var st = parseFloat(el.getAttribute('data-yt-start')||'0');
|
| 543 |
+
if (player && player.seekTo) player.seekTo(st, true);
|
| 544 |
+
}});
|
| 545 |
+
}});
|
| 546 |
+
// 定时高亮
|
| 547 |
+
var last = -1;
|
| 548 |
+
setInterval(function(){{
|
| 549 |
+
if (!player || !player.getCurrentTime) return;
|
| 550 |
+
var t = player.getCurrentTime();
|
| 551 |
+
var idx = -1;
|
| 552 |
+
for (var i=0;i<items.length;i++){{
|
| 553 |
+
var s = items[i].start, e = s + items[i].duration;
|
| 554 |
+
if (t >= s && t < e) {{ idx = i; break; }}
|
| 555 |
+
}}
|
| 556 |
+
if (idx !== last) {{
|
| 557 |
+
last = idx;
|
| 558 |
+
document.querySelectorAll('[data-yt-idx]').forEach(function(el){{ el.classList.remove('yt-line-active'); }});
|
| 559 |
+
var cur = document.querySelector('[data-yt-idx="'+idx+'"]');
|
| 560 |
+
if (cur) {{
|
| 561 |
+
cur.classList.add('yt-line-active');
|
| 562 |
+
// 滚动到可见区域
|
| 563 |
+
var parent = cur.closest('[data-yt-scroll]');
|
| 564 |
+
if (parent) {{
|
| 565 |
+
var top = cur.offsetTop - 80;
|
| 566 |
+
parent.scrollTo({{ top: top, behavior: 'smooth' }});
|
| 567 |
+
}}
|
| 568 |
+
}}
|
| 569 |
+
}}
|
| 570 |
+
}}, 250);
|
| 571 |
+
}}
|
| 572 |
+
ensureAPI(startSync);
|
| 573 |
+
}})();
|
| 574 |
+
</script>
|
| 575 |
+
"""
|
| 576 |
+
tip = "已加载字幕并开启同步。支持点击字幕跳转、自动高亮。" + (" 已附中文释义。" if with_zh else "")
|
| 577 |
+
return iframe, html_core + sync_script, tip
|
| 578 |
|
| 579 |
# ----- OCR from screenshot -----
|
| 580 |
def ocr_image(img: np.ndarray):
|
requirements.txt
CHANGED
|
@@ -6,3 +6,4 @@ pytesseract==0.3.13
|
|
| 6 |
opencv-python-headless==4.10.0.84
|
| 7 |
youtube-transcript-api==0.6.2
|
| 8 |
deep-translator==1.11.4
|
|
|
|
|
|
| 6 |
opencv-python-headless==4.10.0.84
|
| 7 |
youtube-transcript-api==0.6.2
|
| 8 |
deep-translator==1.11.4
|
| 9 |
+
requests==2.32.3
|