Spaces:
Running
Running
Upload 3 files
Browse files
app.py
CHANGED
@@ -508,68 +508,59 @@ def process_text(text, model_type="bert"):
|
|
508 |
return ent_text, rel_text, kg_text, f"{total_duration:.2f} 秒"
|
509 |
|
510 |
# ======================== 知识图谱可视化 ========================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
511 |
def generate_kg_image(entities, relations):
|
|
|
|
|
|
|
512 |
try:
|
513 |
-
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
matplotlib.font_manager._rebuild()
|
525 |
-
|
526 |
-
# 查找所有可用字体
|
527 |
-
font_paths = []
|
528 |
-
for font in font_manager.fontManager.ttflist:
|
529 |
-
if 'Noto Sans CJK' in font.name or 'SimHei' in font.name:
|
530 |
-
font_paths.append(font.fname)
|
531 |
-
|
532 |
-
# 优先使用Noto Sans CJK SC
|
533 |
-
selected_font = None
|
534 |
-
for font_path in font_paths:
|
535 |
-
if 'NotoSansCJKsc' in font_path:
|
536 |
-
selected_font = font_path
|
537 |
-
break
|
538 |
-
|
539 |
-
if selected_font:
|
540 |
-
font_prop = font_manager.FontProperties(fname=selected_font)
|
541 |
-
plt.rcParams['font.family'] = font_prop.get_name()
|
542 |
-
print(f"使用字体: {selected_font}")
|
543 |
-
else:
|
544 |
-
# 方法2:使用绝对路径字体(上传到项目)
|
545 |
-
font_path = os.path.join(os.path.dirname(__file__), 'fonts', 'SimHei.ttf')
|
546 |
-
if os.path.exists(font_path):
|
547 |
-
font_prop = font_manager.FontProperties(fname=font_path)
|
548 |
-
plt.rcParams['font.family'] = font_prop.get_name()
|
549 |
-
print(f"使用本地字体: {font_path}")
|
550 |
-
else:
|
551 |
-
# 方法3:最后手段 - 使用系统默认字体
|
552 |
-
plt.rcParams['font.family'] = ['DejaVu Sans']
|
553 |
-
print("警告:使用默认字体,中文可能显示为方框")
|
554 |
-
|
555 |
-
plt.rcParams['axes.unicode_minus'] = False
|
556 |
-
except Exception as e:
|
557 |
-
print(f"字体配置错误: {e}")
|
558 |
|
559 |
# === 2. 创建图谱 ===
|
560 |
G = nx.DiGraph()
|
561 |
entity_colors = {
|
562 |
-
'PER': '#FF6B6B', #
|
563 |
-
'ORG': '#4ECDC4', #
|
564 |
-
'LOC': '#45B7D1', #
|
565 |
-
'TIME': '#96CEB4'
|
566 |
}
|
567 |
|
568 |
-
# 简化标签(只显示中文名称)
|
569 |
for entity in entities:
|
570 |
G.add_node(
|
571 |
entity["text"],
|
572 |
-
label=entity[
|
573 |
color=entity_colors.get(entity['type'], '#D3D3D3')
|
574 |
)
|
575 |
|
@@ -582,14 +573,14 @@ def generate_kg_image(entities, relations):
|
|
582 |
)
|
583 |
|
584 |
# === 3. 绘图配置 ===
|
585 |
-
plt.figure(figsize=(12, 8), dpi=
|
586 |
-
pos = nx.spring_layout(G, k=0.
|
587 |
|
588 |
-
# 绘制元素
|
589 |
nx.draw_networkx_nodes(
|
590 |
G, pos,
|
591 |
node_color=[G.nodes[n]['color'] for n in G.nodes],
|
592 |
-
node_size=800
|
|
|
593 |
)
|
594 |
nx.draw_networkx_edges(
|
595 |
G, pos,
|
@@ -599,35 +590,40 @@ def generate_kg_image(entities, relations):
|
|
599 |
arrowsize=20
|
600 |
)
|
601 |
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
|
608 |
-
|
609 |
-
|
610 |
|
611 |
-
# 边标签(英文关系)
|
612 |
edge_labels = nx.get_edge_attributes(G, 'label')
|
613 |
nx.draw_networkx_edge_labels(
|
614 |
G, pos,
|
615 |
edge_labels=edge_labels,
|
616 |
-
font_size=8
|
|
|
617 |
)
|
618 |
|
619 |
plt.axis('off')
|
620 |
-
|
621 |
-
|
622 |
-
|
|
|
623 |
output_path = os.path.join(temp_dir, "kg.png")
|
|
|
|
|
|
|
|
|
624 |
plt.savefig(output_path, bbox_inches='tight', pad_inches=0.1)
|
625 |
plt.close()
|
626 |
-
|
627 |
return output_path
|
628 |
|
629 |
except Exception as e:
|
630 |
-
|
631 |
return None
|
632 |
|
633 |
|
|
|
508 |
return ent_text, rel_text, kg_text, f"{total_duration:.2f} 秒"
|
509 |
|
510 |
# ======================== 知识图谱可视化 ========================
|
511 |
+
import matplotlib.pyplot as plt
|
512 |
+
import networkx as nx
|
513 |
+
import tempfile
|
514 |
+
import os
|
515 |
+
import logging
|
516 |
+
from matplotlib import font_manager
|
517 |
+
|
518 |
+
# 这个函数用于查找并验证中文字体路径
|
519 |
+
def find_chinese_font():
|
520 |
+
# 尝试查找 Noto Sans CJK 字体
|
521 |
+
font_paths = [
|
522 |
+
"/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc", # Noto CJK 字体
|
523 |
+
"/usr/share/fonts/truetype/wqy/wqy-microhei.ttc" # 微软雅黑
|
524 |
+
]
|
525 |
+
|
526 |
+
for font_path in font_paths:
|
527 |
+
if os.path.exists(font_path):
|
528 |
+
logging.info(f"Found font at {font_path}")
|
529 |
+
return font_path
|
530 |
+
|
531 |
+
logging.error("No Chinese font found!")
|
532 |
+
return None
|
533 |
+
|
534 |
def generate_kg_image(entities, relations):
|
535 |
+
"""
|
536 |
+
中文知识图谱生成函数,支持自动匹配系统中的中文字体,避免中文显示为方框。
|
537 |
+
"""
|
538 |
try:
|
539 |
+
# === 1. 确保使用合适的中文字体 ===
|
540 |
+
chinese_font = find_chinese_font() # 调用查找字体函数
|
541 |
+
if chinese_font:
|
542 |
+
font_prop = font_manager.FontProperties(fname=chinese_font)
|
543 |
+
plt.rcParams['font.family'] = font_prop.get_name()
|
544 |
+
else:
|
545 |
+
# 如果字体路径未找到,使用默认字体(DejaVu Sans)
|
546 |
+
logging.warning("Using default font")
|
547 |
+
plt.rcParams['font.family'] = ['DejaVu Sans']
|
548 |
+
|
549 |
+
plt.rcParams['axes.unicode_minus'] = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
550 |
|
551 |
# === 2. 创建图谱 ===
|
552 |
G = nx.DiGraph()
|
553 |
entity_colors = {
|
554 |
+
'PER': '#FF6B6B', # 人物-红色
|
555 |
+
'ORG': '#4ECDC4', # 组织-青色
|
556 |
+
'LOC': '#45B7D1', # 地点-蓝色
|
557 |
+
'TIME': '#96CEB4' # 时间-绿色
|
558 |
}
|
559 |
|
|
|
560 |
for entity in entities:
|
561 |
G.add_node(
|
562 |
entity["text"],
|
563 |
+
label=f"{entity['text']} ({entity['type']})",
|
564 |
color=entity_colors.get(entity['type'], '#D3D3D3')
|
565 |
)
|
566 |
|
|
|
573 |
)
|
574 |
|
575 |
# === 3. 绘图配置 ===
|
576 |
+
plt.figure(figsize=(12, 8), dpi=150)
|
577 |
+
pos = nx.spring_layout(G, k=0.7, seed=42)
|
578 |
|
|
|
579 |
nx.draw_networkx_nodes(
|
580 |
G, pos,
|
581 |
node_color=[G.nodes[n]['color'] for n in G.nodes],
|
582 |
+
node_size=800,
|
583 |
+
alpha=0.9
|
584 |
)
|
585 |
nx.draw_networkx_edges(
|
586 |
G, pos,
|
|
|
590 |
arrowsize=20
|
591 |
)
|
592 |
|
593 |
+
node_labels = {n: G.nodes[n]['label'] for n in G.nodes}
|
594 |
+
nx.draw_networkx_labels(
|
595 |
+
G, pos,
|
596 |
+
labels=node_labels,
|
597 |
+
font_size=10,
|
598 |
+
font_family=font_prop.get_name() if chinese_font else 'SimHei',
|
599 |
+
font_weight='bold'
|
600 |
+
)
|
601 |
|
|
|
602 |
edge_labels = nx.get_edge_attributes(G, 'label')
|
603 |
nx.draw_networkx_edge_labels(
|
604 |
G, pos,
|
605 |
edge_labels=edge_labels,
|
606 |
+
font_size=8,
|
607 |
+
font_family=font_prop.get_name() if chinese_font else 'SimHei'
|
608 |
)
|
609 |
|
610 |
plt.axis('off')
|
611 |
+
plt.tight_layout()
|
612 |
+
|
613 |
+
# === 4. 保存图片 ===
|
614 |
+
temp_dir = tempfile.mkdtemp() # 确保在 Docker 容器中有权限写入
|
615 |
output_path = os.path.join(temp_dir, "kg.png")
|
616 |
+
|
617 |
+
# 打印路径以方便调试
|
618 |
+
logging.info(f"Saving graph image to {output_path}")
|
619 |
+
|
620 |
plt.savefig(output_path, bbox_inches='tight', pad_inches=0.1)
|
621 |
plt.close()
|
622 |
+
|
623 |
return output_path
|
624 |
|
625 |
except Exception as e:
|
626 |
+
logging.error(f"[ERROR] 图谱生成失败: {str(e)}")
|
627 |
return None
|
628 |
|
629 |
|
apt.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
fonts-noto-cjk
|