Site_effect

Sleeping

App Files Files Community

cwadayi commited on 27 days ago

Commit

efb533d

verified ·

1 Parent(s): 959c7bb

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +303 -152

src/streamlit_app.py CHANGED Viewed

@@ -1,161 +1,312 @@
 import streamlit as st
 import pandas as pd
-import numpy as np
-# --- 核心計算函式 (保持不變) ---
-def calculate_theoretical_pga(magnitude, distance):
-    if distance <= 0:
-        return np.inf
-    exp_term = np.exp(0.533 * magnitude)
-    dist_term = distance ** (-1.607)
-    return 1.657 * exp_term * dist_term
-def calculate_site_amplification(df):
-    df_copy = df.copy()
-    df_copy['theoretical_pga'] = df_copy.apply(
-        lambda row: calculate_theoretical_pga(row['magnitude'], row['distance_km']),
-        axis=1
-    )
-    df_copy['single_event_amplification'] = df_copy['observed_pga'] / df_copy['theoretical_pga']
-    site_amplification_factors = df_copy.groupby('station_id')['single_event_amplification'].mean().reset_index()
-    site_amplification_factors.rename(columns={'single_event_amplification': 'site_amplification_factor_S'}, inplace=True)
-    return df_copy, site_amplification_factors
-# --- 創意功能函式 ---
-def get_s_factor_interpretation(s_value):
-    """將 S 因子轉換成白話文解說"""
-    if s_value > 2.0:
-        return f"🔴 **極顯著放大 ({s_value:.2f})**: 此處地質鬆軟，搖晃程度可能是堅硬岩盤地區的 **2倍以上**！需特別注意建築結構安全。"
-    elif s_value > 1.5:
-        return f"🟠 **顯著放大 ({s_value:.2f})**: 此處有明顯的場址放大效應，搖晃會比理論值強烈很多。"
-    elif s_value > 1.1:
-        return f"🟡 **輕微放大 ({s_value:.2f})**: 搖晃程度略高於理論值，存在一定的放大效應。"
-    elif s_value < 0.9:
-        return f"🔵 **減弱效應 ({s_value:.2f})**: 此處地質可能較為堅硬，搖晃程度反而比理論值小。"
     else:
-        return f"🟢 **接近基準 ({s_value:.2f})**: 此處的搖晃程度與標準岩盤地區接近。"
-def predict_pga_with_s(magnitude, distance, s_factor):
-    """使用S因子預測一個假想地震的PGA"""
-    base_pga = calculate_theoretical_pga(magnitude, distance)
-    return base_pga * s_factor
-# --- Streamlit 介面 ---
-st.set_page_config(page_title="創意場址放大因子儀表板", layout="wide")
-# 1. 標題與介紹
-st.title("🌋 創意場址放大因子儀表板")
-st.image("https://media.giphy.com/media/l41lGvinEgARjB2HC/giphy.gif", caption="地震波在地層中傳遞示意圖")
-st.markdown("不僅計算 S 因子，更透過地圖、情境模擬，讓您『看見』並『感受』場址效應的威力！")
-# 2. 側邊欄
-st.sidebar.header("Step 1: 載入資料")
-data_source = st.sidebar.radio("請選擇資料來源：", ("使用內建範例資料", "上傳自己的 CSV 檔案"))
-input_df = None
-if data_source == "使用內建範例資料":
-    st.sidebar.info("範例資料已加入經緯度(lat, lon)，以用於地圖視覺化。")
-    data = {
-        'station_id': ['TPE', 'KHH', 'HUA', 'TPE', 'KHH', 'HUA', 'TPE', 'KHH', 'HUA', 'TPE', 'KHH', 'HUA'],
-        'lat': [25.0330, 22.6273, 23.9739, 25.0330, 22.6273, 23.9739, 25.0330, 22.6273, 23.9739, 25.0330, 22.6273, 23.9739],
-        'lon': [121.5654, 120.3014, 121.6059, 121.5654, 120.3014, 121.6059, 121.5654, 120.3014, 121.6059, 121.5654, 120.3014, 121.6059],
-        'earthquake_id': ['EQ1', 'EQ1', 'EQ1', 'EQ2', 'EQ2', 'EQ2', 'EQ3', 'EQ3', 'EQ3', 'EQ4', 'EQ4', 'EQ4'],
-        'magnitude': [6.2, 6.2, 6.2, 5.5, 5.5, 5.5, 7.0, 7.0, 7.0, 6.5, 6.5, 6.5],
-        'distance_km': [50, 200, 30, 80, 150, 60, 120, 250, 40, 40, 180, 25],
-        # KHH(高雄)的地質條件較軟，預期會有較大的放大效應
-        'observed_pga': [80, 40, 150, 30, 25, 60, 60, 35, 180, 150, 55, 280]
-    }
-    input_df = pd.DataFrame(data)
-else:
-    uploaded_file = st.sidebar.file_uploader("上傳 CSV (需含 lat, lon 欄位)", type=["csv"])
-    if uploaded_file:
-        input_df = pd.read_csv(uploaded_file)
-# 3. 主畫面
-if input_df is not None:
-    st.sidebar.header("Step 2: 執行計算")
-    if st.sidebar.button("🚀 點我開始分析！"):
-        with st.spinner('科學計算中，請稍候...'):
-            required_cols = ['station_id', 'lat', 'lon', 'magnitude', 'distance_km', 'observed_pga']
-            if not all(col in input_df.columns for col in required_cols):
-                 st.error(f"資料格式錯誤！請確保您的 CSV 包含以下欄位: {required_cols}")
             else:
-                intermediate_df, final_factors_df = calculate_site_amplification(input_df)
-                # 將 station 的經緯度資訊合併到最終結果中
-                station_locations = input_df[['station_id', 'lat', 'lon']].drop_duplicates().set_index('station_id')
-                st.session_state.final_results = final_factors_df.join(station_locations, on='station_id')
-                st.session_state.intermediate_results = intermediate_df
-                st.success("計算完成！請查看下方分頁結果。")
-    if 'final_results' in st.session_state:
-        # 使用分頁呈現結果
-        tab1, tab2, tab3, tab4 = st.tabs(["📊 結果總覽", "🗺️ 地圖視覺化", "🔬 What-If 模擬器", "📄 資料詳情"])
-        final_df = st.session_state.final_results
-        with tab1:
-            st.header("📊 各測站平均場址放大因子 (S)")
-            col1, col2 = st.columns([0.5, 0.5])
-            with col1:
-                st.dataframe(final_df[['station_id', 'site_amplification_factor_S']].style.format({'site_amplification_factor_S': "{:.2f}"}))
-                st.bar_chart(final_df.set_index('station_id')['site_amplification_factor_S'])
-            with col2:
-                st.subheader("💬 結果白話文解說")
-                for index, row in final_df.iterrows():
-                    st.markdown(f"**{row['station_id']} 測站:**")
-                    st.markdown(get_s_factor_interpretation(row['site_amplification_factor_S']))
-                    st.markdown("---")
-        with tab2:
-            st.header("🗺️ 場址放大效應地理分佈")
-            st.markdown("地圖上的點越大，代表該地的場址放大效應越強烈。")
-            # 為了讓地圖上的點大小差異更明顯，進行正規化
-            min_s = final_df['site_amplification_factor_S'].min()
-            max_s = final_df['site_amplification_factor_S'].max()
-            # 避免 max_s == min_s 的情況
-            if (max_s - min_s) > 0:
-                final_df['size'] = 50 + ((final_df['site_amplification_factor_S'] - min_s) / (max_s - min_s)) * 500
-            else:
-                final_df['size'] = 100
-            st.map(final_df, latitude='lat', longitude='lon', size='size', zoom=6)
-        with tab3:
-            st.header("🔬 What-If 情境模擬器")
-            st.markdown("如果現在發生一場地震，各地搖晃程度會是多少？")
-            sim_col1, sim_col2 = st.columns(2)
-            with sim_col1:
-                sim_mag = st.slider("設定假想地震規模 (M)", min_value=4.0, max_value=8.0, value=6.5, step=0.1)
-            with sim_col2:
-                sim_dist = st.number_input("設定您與震央的距離 (公里)", min_value=10, max_value=300, value=50)
-            st.markdown(f"#### 模擬結果：規模 **{sim_mag}**、距離 **{sim_dist}** 公里的地震")
-            sim_results = []
-            for index, row in final_df.iterrows():
-                predicted_pga = predict_pga_with_s(sim_mag, sim_dist, row['site_amplification_factor_S'])
-                sim_results.append({
-                    "測站": row['station_id'],
-                    "場址放大因子 (S)": f"{row['site_amplification_factor_S']:.2f}",
-                    "預估搖晃程度 (PGA)": f"{predicted_pga:.2f}"
-                })
-            st.table(pd.DataFrame(sim_results))
-            st.info("💡 注意：PGA 越高，代表地表加速度越大，感受到的搖晃越劇烈。")
-        with tab4:
-            st.header("📄 詳細資料與計算過程")
-            with st.expander("點此查看原始觀測資料"):
-                st.dataframe(input_df)
-            with st.expander("點此查看詳細計算過程 (含單次放大效應)"):
-                st.dataframe(st.session_state.intermediate_results)
-else:
-    st.info("請在左方側邊欄選擇資料來源，然後點擊按鈕開始分析。")

 import streamlit as st
 import pandas as pd
+import requests
+from bs4 import BeautifulSoup
+from transformers import pipeline
+import plotly.express as px
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+from collections import Counter
+import re
+# --- 網頁設定與標題 ---
+st.set_page_config(
+    page_title="地震輿情分析系統",
+    page_icon="📊",
+    layout="wide"
+)
+st.title("📊 地震輿情分析與洞察報告系統")
+st.markdown("依據[規格文件](https.example.com)開發，旨在提供即時的地震相關公眾輿情分析。") # 您可以將連結替換為規格文件的實際網址
+# --- 核心功能函式 ---
+# FR1: 關鍵字搜尋與資料獲取 (使用 requests + BeautifulSoup 模擬)
+# 注意：直接、大量的 Google 搜尋可能會被阻擋。在正式專案中，應使用 Google Search API。
+# 這裡我們用 DuckDuckGo 作為一個較不易被阻擋的替代方案來示範。
+def search_and_scrape(keyword, num_results):
+    """
+    根據關鍵字搜尋並抓取網頁內容。
+    """
+    st.info(f"🔍 正在使用 DuckDuckGo 搜尋關鍵字：'{keyword}'...")
+    search_url = "https://html.duckduckgo.com/html/"
+    params = {"q": keyword}
+    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"}
+    try:
+        response = requests.get(search_url, params=params, headers=headers, timeout=10)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+        links = [a['href'] for a in soup.find_all('a', class_='result__a')]
+    except requests.RequestException as e:
+        st.error(f"搜尋失敗：{e}")
+        return []
+    scraped_data = []
+    if not links:
+        st.warning("找不到相關的搜尋結果。")
+        return []
+    st.info(f"📈 找到 {len(links)} 個結果，將分析前 {num_results} 個...")
+    progress_bar = st.progress(0)
+    for i, url in enumerate(links[:num_results]):
+        try:
+            # 修正相對路徑的 URL
+            if url.startswith("//"):
+                url = "https:" + url
+            page_response = requests.get(url, headers=headers, timeout=10)
+            page_response.raise_for_status()
+            page_soup = BeautifulSoup(page_response.content, 'html.parser', from_encoding='utf-8')
+            # 提取純文字，移除 script 和 style 標籤
+            for script_or_style in page_soup(['script', 'style']):
+                script_or_style.decompose()
+            text = ' '.join(p.get_text() for p in page_soup.find_all('p'))
+            text = re.sub(r'\s+', ' ', text).strip() # 清理多餘空白
+            if len(text) > 100: # 只分析有足夠內容的頁面
+                scraped_data.append({"url": url, "content": text})
+        except Exception as e:
+            st.warning(f"無法抓取或解析 URL: {url} ({e})")
+        progress_bar.progress((i + 1) / num_results)
+    st.success(f"✅ 成功抓取並解析 {len(scraped_data)} 篇文章。")
+    return scraped_data
+# FR2 & FR3: Hugging Face 模型分析 (情緒分析)
+@st.cache_resource
+def get_sentiment_pipeline(token):
+    """
+    載入情緒分析模型。使用快取避免重複載入。
+    """
+    try:
+        # 使用一個支援多語言（含中文）的模型
+        sentiment_pipeline = pipeline(
+            "sentiment-analysis",
+            model="lxyuan/distilbert-base-multilingual-cased-sent-analysis-finance",
+            use_auth_token=token
+        )
+        return sentiment_pipeline
+    except Exception as e:
+        st.error(f"無法載入情緒分析模型，請確認您的 Token 是否正確且有權限存取此模型。錯誤：{e}")
+        return None
+# FR4: 熱議焦點分析 (命名實體辨識)
+@st.cache_resource
+def get_ner_pipeline(token):
+    """
+    載入命名實體辨識模型。
+    """
+    try:
+        # 使用 ckiplab 的模型，對繁體中文效果很好
+        ner_pipeline = pipeline(
+            "ner",
+            model="ckiplab/bert-base-chinese-ner",
+            use_auth_token=token
+        )
+        return ner_pipeline
+    except Exception as e:
+        st.error(f"無法載入命名實體辨識模型。錯誤：{e}")
+        return None
+def analyze_content(data, sentiment_pipeline, ner_pipeline):
+    """
+    對抓取到的內容進行全面的 NLP 分析。
+    """
+    sentiments = []
+    all_entities = []
+    st.info("🧠 正在進行 NLP 分析...")
+    progress_bar = st.progress(0)
+    for i, item in enumerate(data):
+        # 情緒分析
+        try:
+            # Hugging Face 模型通常有最大長度限制，這裡截斷處理
+            sentiment_result = sentiment_pipeline(item['content'][:512])
+            sentiments.append(sentiment_result[0]['label'])
+        except Exception:
+            sentiments.append('neutral') # 分析失敗則視為中性
+        # 命名實體辨識
+        try:
+            ner_results = ner_pipeline(item['content'])
+            # 只提取我們關心的實體，且長度大於1
+            entities = [entity['word'] for entity in ner_results if entity['entity'] in ['GPE', 'ORG', 'PERSON', 'LOC'] and len(entity['word']) > 1]
+            all_entities.extend(entities)
+        except Exception:
+            pass # 分析失敗則忽略
+        progress_bar.progress((i + 1) / len(data))
+    st.success("🤖 NLP 分析完成！")
+    return sentiments, all_entities
+# FR5: 結果視覺化
+def create_sentiment_chart(sentiments):
+    """
+    建立情緒分佈圖。
+    """
+    df = pd.DataFrame(sentiments, columns=['sentiment'])
+    sentiment_counts = df['sentiment'].value_counts().reset_index()
+    sentiment_counts.columns = ['sentiment', 'count']
+    fig = px.pie(sentiment_counts, names='sentiment', values='count',
+                 title='整體情緒分佈',
+                 color_discrete_map={'positive':'green', 'negative':'red', 'neutral':'blue'})
+    return fig
+def create_word_cloud(entities):
+    """
+    建立熱議焦點文字雲。
+    """
+    # 確保字體檔案路徑正確
+    font_path = 'NotoSansTC-Regular.otf'
+    text = ' '.join(entities)
+    if not text:
+        return None
+    try:
+        wordcloud = WordCloud(
+            width=800,
+            height=400,
+            background_color='white',
+            font_path=font_path
+        ).generate(text)
+        fig, ax = plt.subplots(figsize=(10, 5))
+        ax.imshow(wordcloud, interpolation='bilinear')
+        ax.axis('off')
+        return fig
+    except Exception as e:
+        st.error(f"無法生成文字雲，請確認字體檔案 'NotoSansTC-Regular.otf' 已上傳。錯誤：{e}")
+        return None
+# FR6: 報告自動產出
+def generate_report(sentiment_counts, top_entities, keyword):
+    """
+    生成最終的分析報告。
+    """
+    total = sentiment_counts['count'].sum()
+    positive_pct = (sentiment_counts[sentiment_counts['sentiment'] == 'positive']['count'].sum() / total) * 100
+    negative_pct = (sentiment_counts[sentiment_counts['sentiment'] == 'negative']['count'].sum() / total) * 100
+    neutral_pct = 100 - positive_pct - negative_pct
+    # 罐頭建議
+    recommendation = "初步建議：輿情整體平穩，可持續觀察。"
+    if negative_pct > 40:
+        recommendation = f"初步建議：負面聲量較高，建議針對 '{', '.join(dict(top_entities).keys())}' 等熱議議題發布說明，以緩解公眾疑慮。"
+    elif positive_pct > 40:
+        recommendation = "初步建議：正面聲量佔優，可加強相關正面訊息的傳播。"
+    report = f"""
+# 地震輿情分析報告
+## 1. 總覽
+- **分析關鍵字**: {keyword}
+- **分析摘要**: 針對網路輿情進行分析，目前情緒分佈以 **{sentiment_counts['sentiment'].iloc[0]}** 為主。
+## 2. 情緒儀表板
+- **正面情緒**: {positive_pct:.2f}%
+- **負面情緒**: {negative_pct:.2f}%
+- **中性情緒**: {neutral_pct:.2f}%
+## 3. 熱議焦點 (Top 5)
+"""
+    for entity, count in top_entities:
+        report += f"- {entity} (提及 {count} 次)\n"
+    report += f"""
+## 4. 初步建議
+{recommendation}
+"""
+    return report
+# --- UI 介面 ---
+# FR7: 使用者介面 (側邊欄)
+st.sidebar.header("⚙️ 控制面板")
+keyword = st.sidebar.text_input("1. 輸入搜尋關鍵字", "花蓮地震")
+hf_token = st.sidebar.text_input("2. 輸入您的 Hugging Face Token", type="password")
+num_results = st.sidebar.slider("3. 設定分析文章數量", min_value=5, max_value=20, value=10, step=1)
+analyze_button = st.sidebar.button("🚀 開始分析", type="primary")
+# --- 主流程 ---
+if analyze_button:
+    if not keyword:
+        st.sidebar.error("請輸入關鍵字！")
+    elif not hf_token:
+        st.sidebar.error("請輸入 Hugging Face Token！")
     else:
+        # 執行分析
+        scraped_data = search_and_scrape(keyword, num_results)
+        if scraped_data:
+            sentiment_pipeline = get_sentiment_pipeline(hf_token)
+            ner_pipeline = get_ner_pipeline(hf_token)
+            if sentiment_pipeline and ner_pipeline:
+                sentiments, all_entities = analyze_content(scraped_data, sentiment_pipeline, ner_pipeline)
+                # 儲存結果以供後續使用
+                st.session_state['analysis_complete'] = True
+                st.session_state['sentiments'] = sentiments
+                st.session_state['all_entities'] = all_entities
+                st.session_state['keyword'] = keyword
             else:
+                st.session_state['analysis_complete'] = False
+        else:
+            st.error("未能獲取任何可分析的資料。")
+            st.session_state['analysis_complete'] = False
+# 在分析完成後顯示結果
+if 'analysis_complete' in st.session_state and st.session_state['analysis_complete']:
+    st.header("📈 輿情儀表板")
+    sentiments = st.session_state['sentiments']
+    all_entities = st.session_state['all_entities']
+    keyword = st.session_state['keyword']
+    col1, col2 = st.columns(2)
+    with col1:
+        # 情緒分析圖
+        sentiment_fig = create_sentiment_chart(sentiments)
+        st.plotly_chart(sentiment_fig, use_container_width=True)
+    with col2:
+        # 熱議焦點文字雲
+        st.subheader("熱議焦點")
+        wordcloud_fig = create_word_cloud(all_entities)
+        if wordcloud_fig:
+            st.pyplot(wordcloud_fig)
+        else:
+            st.info("沒有足夠的關鍵詞來生成文字雲。")
+    st.header("📄 分析報告產出")
+    # 計算報告所需數據
+    sentiment_df = pd.DataFrame(sentiments, columns=['sentiment'])
+    sentiment_counts = sentiment_df['sentiment'].value_counts().reset_index()
+    sentiment_counts.columns = ['sentiment', 'count']
+    entity_counts = Counter(all_entities).most_common(5)
+    # 生成並顯示報告
+    report_text = generate_report(sentiment_counts, entity_counts, keyword)
+    st.markdown(report_text)
+    # 下載按鈕
+    st.download_button(
+        label="📥 下載完整報告 (.md)",
+        data=report_text,
+        file_name=f"輿情分析報告_{keyword}.md",
+        mime="text/markdown",
+    )