Spaces:
Running
Running
Upload 3 files
Browse files- Dockerfile +11 -8
- app.py +91 -79
- requirements.txt +2 -4
Dockerfile
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
-
#
|
2 |
-
FROM python:3.10-slim
|
3 |
|
4 |
# 设置工作目录
|
5 |
WORKDIR /app
|
6 |
|
7 |
-
#
|
8 |
RUN apt-get update && apt-get install -y \
|
9 |
libglib2.0-0 \
|
10 |
libx11-6 \
|
@@ -17,17 +17,20 @@ RUN apt-get update && apt-get install -y \
|
|
17 |
&& apt-get clean \
|
18 |
&& rm -rf /var/lib/apt/lists/*
|
19 |
|
20 |
-
#
|
21 |
-
COPY . /app
|
22 |
|
23 |
# 安装 Python 依赖
|
24 |
RUN pip install --no-cache-dir -r requirements.txt
|
25 |
|
26 |
-
#
|
|
|
|
|
|
|
27 |
EXPOSE 7860
|
28 |
|
29 |
-
#
|
30 |
ENV PYTHONUNBUFFERED=1
|
31 |
|
32 |
-
#
|
33 |
CMD ["python", "app.py"]
|
|
|
1 |
+
# 使用更小的基础镜像 python:3.10-slim-buster,减少镜像体积
|
2 |
+
FROM python:3.10-slim-buster
|
3 |
|
4 |
# 设置工作目录
|
5 |
WORKDIR /app
|
6 |
|
7 |
+
# 安装系统依赖,包括图形库和中文字体
|
8 |
RUN apt-get update && apt-get install -y \
|
9 |
libglib2.0-0 \
|
10 |
libx11-6 \
|
|
|
17 |
&& apt-get clean \
|
18 |
&& rm -rf /var/lib/apt/lists/*
|
19 |
|
20 |
+
# 复制 requirements.txt 文件先安装 Python 依赖,减少不必要的层
|
21 |
+
COPY requirements.txt /app/
|
22 |
|
23 |
# 安装 Python 依赖
|
24 |
RUN pip install --no-cache-dir -r requirements.txt
|
25 |
|
26 |
+
# 复制剩余应用代码到容器中
|
27 |
+
COPY . /app
|
28 |
+
|
29 |
+
# 暴露端口(假设 Gradio 应用在 7860 端口运行)
|
30 |
EXPOSE 7860
|
31 |
|
32 |
+
# 设置环境变量,确保 Python 输出不缓冲
|
33 |
ENV PYTHONUNBUFFERED=1
|
34 |
|
35 |
+
# 设置默认命令,启动应用
|
36 |
CMD ["python", "app.py"]
|
app.py
CHANGED
@@ -515,103 +515,115 @@ import os
|
|
515 |
import logging
|
516 |
from matplotlib import font_manager
|
517 |
|
518 |
-
#
|
519 |
-
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
#
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
print(cjk_fonts if cjk_fonts else "未找到Noto Sans CJK字体!")
|
534 |
|
535 |
def generate_kg_image(entities, relations):
|
|
|
|
|
|
|
536 |
try:
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
|
|
546 |
plt.rcParams['axes.unicode_minus'] = False
|
547 |
-
|
548 |
-
# 方法2:如果仍然失败,使用最后手段
|
549 |
-
try:
|
550 |
-
# 测试字体是否有效
|
551 |
-
plt.text(0.5, 0.5, "中文测试", fontsize=12)
|
552 |
-
plt.close()
|
553 |
-
except:
|
554 |
-
print("主字体失效,启用备用方案...")
|
555 |
-
# 使用系统默认字体(至少能显示部分字符)
|
556 |
-
plt.rcParams['font.sans-serif'] = ['DejaVu Sans']
|
557 |
-
|
558 |
# === 2. 创建图谱 ===
|
559 |
G = nx.DiGraph()
|
560 |
-
entity_colors = {
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
color=entity_colors.get(entity['type'], 'gray'))
|
567 |
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
572 |
|
573 |
# === 3. 绘图配置 ===
|
574 |
-
plt.figure(figsize=(
|
575 |
-
pos = nx.spring_layout(G, k=0.
|
576 |
-
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
-
nx.draw_networkx_edges(
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
|
590 |
-
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
-
|
597 |
-
|
|
|
|
|
598 |
edge_labels = nx.get_edge_attributes(G, 'label')
|
599 |
-
nx.draw_networkx_edge_labels(
|
600 |
-
|
601 |
-
|
|
|
|
|
|
|
602 |
|
603 |
plt.axis('off')
|
604 |
-
|
605 |
-
|
606 |
-
|
|
|
607 |
output_path = os.path.join(temp_dir, "kg.png")
|
|
|
|
|
|
|
|
|
608 |
plt.savefig(output_path, bbox_inches='tight', pad_inches=0.1)
|
609 |
plt.close()
|
610 |
-
|
611 |
return output_path
|
612 |
|
613 |
except Exception as e:
|
614 |
-
|
615 |
return None
|
616 |
|
617 |
|
|
|
515 |
import logging
|
516 |
from matplotlib import font_manager
|
517 |
|
518 |
+
# 这个函数用于查找并验证中文字体路径
|
519 |
+
def find_chinese_font():
|
520 |
+
# 尝试查找 Noto Sans CJK 字体
|
521 |
+
font_paths = [
|
522 |
+
"/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc", # Noto CJK 字体
|
523 |
+
"/usr/share/fonts/truetype/wqy/wqy-microhei.ttc" # 微软雅黑
|
524 |
+
]
|
525 |
+
|
526 |
+
for font_path in font_paths:
|
527 |
+
if os.path.exists(font_path):
|
528 |
+
logging.info(f"Found font at {font_path}")
|
529 |
+
return font_path
|
530 |
+
|
531 |
+
logging.error("No Chinese font found!")
|
532 |
+
return None
|
|
|
533 |
|
534 |
def generate_kg_image(entities, relations):
|
535 |
+
"""
|
536 |
+
中文知识图谱生成函数,支持自动匹配系统中的中文字体,避免中文显示为方框。
|
537 |
+
"""
|
538 |
try:
|
539 |
+
# === 1. 确保使用合适的中文字体 ===
|
540 |
+
chinese_font = find_chinese_font() # 调用查找字体函数
|
541 |
+
if chinese_font:
|
542 |
+
font_prop = font_manager.FontProperties(fname=chinese_font)
|
543 |
+
plt.rcParams['font.family'] = font_prop.get_name()
|
544 |
+
else:
|
545 |
+
# 如果字体路径未找到,使用默认字体(DejaVu Sans)
|
546 |
+
logging.warning("Using default font")
|
547 |
+
plt.rcParams['font.family'] = ['DejaVu Sans']
|
548 |
+
|
549 |
plt.rcParams['axes.unicode_minus'] = False
|
550 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
551 |
# === 2. 创建图谱 ===
|
552 |
G = nx.DiGraph()
|
553 |
+
entity_colors = {
|
554 |
+
'PER': '#FF6B6B', # 人物-红色
|
555 |
+
'ORG': '#4ECDC4', # 组织-青色
|
556 |
+
'LOC': '#45B7D1', # 地点-蓝色
|
557 |
+
'TIME': '#96CEB4' # 时间-绿色
|
558 |
+
}
|
|
|
559 |
|
560 |
+
for entity in entities:
|
561 |
+
G.add_node(
|
562 |
+
entity["text"],
|
563 |
+
label=f"{entity['text']} ({entity['type']})",
|
564 |
+
color=entity_colors.get(entity['type'], '#D3D3D3')
|
565 |
+
)
|
566 |
+
|
567 |
+
for relation in relations:
|
568 |
+
if relation["head"] in G.nodes and relation["tail"] in G.nodes:
|
569 |
+
G.add_edge(
|
570 |
+
relation["head"],
|
571 |
+
relation["tail"],
|
572 |
+
label=relation["relation"]
|
573 |
+
)
|
574 |
|
575 |
# === 3. 绘图配置 ===
|
576 |
+
plt.figure(figsize=(12, 8), dpi=150)
|
577 |
+
pos = nx.spring_layout(G, k=0.7, seed=42)
|
578 |
+
|
579 |
+
nx.draw_networkx_nodes(
|
580 |
+
G, pos,
|
581 |
+
node_color=[G.nodes[n]['color'] for n in G.nodes],
|
582 |
+
node_size=800,
|
583 |
+
alpha=0.9
|
584 |
+
)
|
585 |
+
nx.draw_networkx_edges(
|
586 |
+
G, pos,
|
587 |
+
edge_color='#888888',
|
588 |
+
width=1.5,
|
589 |
+
arrows=True,
|
590 |
+
arrowsize=20
|
591 |
+
)
|
592 |
+
|
593 |
+
node_labels = {n: G.nodes[n]['label'] for n in G.nodes}
|
594 |
+
nx.draw_networkx_labels(
|
595 |
+
G, pos,
|
596 |
+
labels=node_labels,
|
597 |
+
font_size=10,
|
598 |
+
font_family=font_prop.get_name() if chinese_font else 'SimHei',
|
599 |
+
font_weight='bold'
|
600 |
+
)
|
601 |
+
|
602 |
edge_labels = nx.get_edge_attributes(G, 'label')
|
603 |
+
nx.draw_networkx_edge_labels(
|
604 |
+
G, pos,
|
605 |
+
edge_labels=edge_labels,
|
606 |
+
font_size=8,
|
607 |
+
font_family=font_prop.get_name() if chinese_font else 'SimHei'
|
608 |
+
)
|
609 |
|
610 |
plt.axis('off')
|
611 |
+
plt.tight_layout()
|
612 |
+
|
613 |
+
# === 4. 保存图片 ===
|
614 |
+
temp_dir = tempfile.mkdtemp() # 确保在 Docker 容器中有权限写入
|
615 |
output_path = os.path.join(temp_dir, "kg.png")
|
616 |
+
|
617 |
+
# 打印路径以方便调试
|
618 |
+
logging.info(f"Saving graph image to {output_path}")
|
619 |
+
|
620 |
plt.savefig(output_path, bbox_inches='tight', pad_inches=0.1)
|
621 |
plt.close()
|
622 |
+
|
623 |
return output_path
|
624 |
|
625 |
except Exception as e:
|
626 |
+
logging.error(f"[ERROR] 图谱生成失败: {str(e)}")
|
627 |
return None
|
628 |
|
629 |
|
requirements.txt
CHANGED
@@ -22,7 +22,7 @@ python-json-logger>=2.0.0
|
|
22 |
# 知识图谱可视化
|
23 |
networkx>=3.0
|
24 |
pyvis>=0.3.0
|
25 |
-
matplotlib>=3.
|
26 |
plotly>=5.10.0
|
27 |
|
28 |
# 可选依赖(如果使用ChatGLM)
|
@@ -32,6 +32,4 @@ accelerate>=0.20.0
|
|
32 |
|
33 |
# 其他依赖
|
34 |
requests>=2.25.0
|
35 |
-
beautifulsoup4>=4.10.0
|
36 |
-
|
37 |
-
fonts-noto-cjk
|
|
|
22 |
# 知识图谱可视化
|
23 |
networkx>=3.0
|
24 |
pyvis>=0.3.0
|
25 |
+
matplotlib>=3.5.0
|
26 |
plotly>=5.10.0
|
27 |
|
28 |
# 可选依赖(如果使用ChatGLM)
|
|
|
32 |
|
33 |
# 其他依赖
|
34 |
requests>=2.25.0
|
35 |
+
beautifulsoup4>=4.10.0
|
|
|
|