import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import pandas as pd
import torch
import matplotlib.pyplot as plt
import numpy as np
import re

st.set_page_config(page_title="🛡️ 智盾内容安全审核平台", layout="wide")

PAGES = {
    "🏠 首页": "home",
    "📋 审核": "moderation",
    "📊 分析": "analysis",
    "🧠 产品能力": "capability",
    "🧾 策略配置": "strategy"
}

if "page" not in st.session_state:
    st.session_state.page = "home"
if "analysis_input_text" not in st.session_state:
    st.session_state.analysis_input_text = ""

selected_page = st.sidebar.radio("📌 导航", list(PAGES.keys()))
st.session_state.page = PAGES[selected_page]

def parse_scores_from_llm_output(text):
    matches = re.findall(r"([\u4e00-\u9fa5A-Za-z]+)[:：]?\s*([0]\.\d+|1\.0+)", text)
    score_dict = {}
    for label, score in matches:
        try:
            score_dict[label.strip()] = float(score)
        except:
            continue
    return score_dict

def plot_radar_chart(score_dict):
    labels = list(score_dict.keys())
    scores = list(score_dict.values())
    angles = np.linspace(0, 2 * np.pi, len(labels), endpoint=False).tolist()
    scores += scores[:1]
    angles += angles[:1]
    fig, ax = plt.subplots(figsize=(5, 5), subplot_kw=dict(polar=True))
    ax.plot(angles, scores, "o-", linewidth=2)
    ax.fill(angles, scores, alpha=0.25)
    ax.set_thetagrids(np.degrees(angles[:-1]), labels)
    ax.set_ylim(0, 1)
    st.pyplot(fig)

def plot_bar_chart(score_dict):
    labels = list(score_dict.keys())
    scores = list(score_dict.values())
    fig, ax = plt.subplots()
    ax.barh(labels, scores)
    ax.set_xlim(0, 1)
    ax.set_xlabel("分数 (0-1)")
    st.pyplot(fig)

if st.session_state.page == "home":
    st.title("🛡️ 智盾内容安全审核平台")
    st.markdown("欢迎使用智盾平台。本系统支持多维度内容审核与分析，适用于金融、政府、互联网等高风险行业。")

elif st.session_state.page == "moderation":
    st.title("📋 文本内容审核")
    model_choice = st.selectbox("选择模型", ["BERT", "DeepSeek", "GPT"])
    model_map = {
        "BERT": "uer/roberta-base-finetuned-jd-binary-chinese",
        "DeepSeek": "deepseek-ai/deepseek-llm-7b-chat",
        "GPT": "IDEA-CCNL/Taiyi-CLUE-small"
    }
    model_name = model_map[model_choice]
    input_text = st.text_area("请输入待审核文本：", height=150)

    if st.button("🚨 开始审核"):
        with st.spinner("模型分析中..."):
            if model_choice == "DeepSeek":
                tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
                model = AutoModelForCausalLM.from_pretrained(
                    model_name,
                    device_map="auto",
                    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                    trust_remote_code=True
                )
                prompt = f"""你是一个中文内容审核助手，请对以下文本内容进行分析。
任务包括：
1. 判断是否存在违规内容，并简要解释原因。
2. 给出以下五个风险维度的评分（0-1之间）：
- 人身攻击
- 暴力
- 政治敏感
- 舆论攻击
- 歧视成分

输出格式如下：
- 人身攻击: x.xx
- 暴力: x.xx
- 政治敏感: x.xx
- 舆论攻击: x.xx
- 歧视成分: x.xx

文本如下：
{input_text}
"""
                inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
                outputs = model.generate(**inputs, max_new_tokens=512)
                result = tokenizer.decode(outputs[0], skip_special_tokens=True)
                st.code(result)
                st.session_state.analysis_input_text = input_text
                if st.button("🔍 深度分析此文本"):
                    st.session_state.page = "analysis"
                    st.experimental_rerun()
            else:
                classifier = pipeline("text-classification", model=model_name, tokenizer=model_name, device=0 if torch.cuda.is_available() else -1)
                result = classifier(input_text)[0]
                st.write(f"标签: {result['label']} / 置信度: {result['score']:.2f}")

elif st.session_state.page == "analysis":
    st.title("📊 文本风险分析")
    input_text = st.session_state.analysis_input_text or st.text_area("请输入需要分析的文本：", height=150)
    if st.button("🔍 分析文本"):
        with st.spinner("分析中..."):
            tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-llm-7b-chat", trust_remote_code=True)
            model = AutoModelForCausalLM.from_pretrained(
                "deepseek-ai/deepseek-llm-7b-chat",
                device_map="auto",
                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                trust_remote_code=True
            )
            prompt = f"""
你是一个中文文本风险分析专家，请根据以下文本的内容，对以下五个维度评分（范围0-1，1为最严重）：
- 人身攻击
- 暴力
- 政治敏感
- 舆论攻击
- 歧视成分

输出格式如下：
- personal abuse: x.xx
- violence: x.xx
- Political sensitivity: x.xx
- Public opinion attack: x.xx
- Component of discrimination: x.xx

文本如下：
{input_text}
"""
            inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
            outputs = model.generate(**inputs, max_new_tokens=512)
            result = tokenizer.decode(outputs[0], skip_special_tokens=True)
            st.markdown("### 🧾 模型原始输出")
            st.code(result)
            scores = parse_scores_from_llm_output(result)
            if scores:
                plot_bar_chart(scores)
                plot_radar_chart(scores)
                avg = np.mean(list(scores.values()))
                if avg >= 0.7:
                    level = "严重 🔴"
                elif avg >= 0.4:
                    level = "中等 🟠"
                else:
                    level = "轻微 🟢"
                st.success(f"综合风险等级：{level}（平均分 {avg:.2f}）")

elif st.session_state.page == "capability":
    st.title("🧠 产品能力")
    features = [
        ("📝 字词错误", "错别字、音近字、形近字、多字、重叠、颠倒、异形词等"),
        ("📌 常识错误", "标点符号、地名关联、表达不当、语义错误、不语名词等"),
        ("🚫 敏感词过滤", "涉及暴恐、色情、违禁、侮辱、歧视等不健康词语"),
        ("⚠️ 政治性差错", "领导人姓名、职务、讲话、政治口号、固定表述等"),
        ("📄 文本比对", "快速找出两个文本之间的差异之处，高清高亮显示"),
        ("📐 格式错误", "参照国家标准和党政公文规范，自动识别格式问题"),
        ("🤖 智能写作", "自动生成新闻稿、公告、任务文书，响应快速"),
        ("🌐 网站巡检", "自动抓取网页历史快照，输出违规风险报告")
    ]
    for i in range(0, len(features), 2):
        col1, col2 = st.columns(2)
        for col, feat in zip((col1, col2), features[i:i+2]):
            with col.expander(feat[0], expanded=True):
                st.markdown(f"**功能描述：** {feat[1]}")
                st.button(f"👉 体验 {feat[0]}", key=feat[0])

elif st.session_state.page == "strategy":
    st.title("📋 冒犯性内容风控策略配置")
    st.markdown("配置不同风险维度的评分阈值，并设定处理策略。")
    st.markdown("---")
    risk_dimensions = [
        ("人身攻击", 0.75, "封禁账号 + 内容拦截"),
        ("暴力", 0.70, "内容拦截 + 人工复审"),
        ("政治敏感", 0.65, "限流处理 + 推送复审"),
        ("舆论攻击", 0.60, "记录风险 + 降权曝光"),
        ("歧视成分", 0.60, "提醒整改 + 限流")
    ]
    config = {}
    for dim, default_thresh, default_action in risk_dimensions:
        st.subheader(f"🧠 风险维度：{dim}")
        col1, col2 = st.columns([1, 2])
        with col1:
            thresh = st.slider(f"{dim} 风险评分阈值", 0.0, 1.0, default_thresh, 0.01, key=f"{dim}_slider")
        with col2:
            action = st.text_input(f"{dim} 处理策略", value=default_action, key=f"{dim}_action")
        config[dim] = (thresh, action)
    if st.button("💾 保存配置"):
        st.success("✅ 策略保存成功（模拟）")
        st.json(config)