Spaces:
Running
Running
File size: 6,612 Bytes
3a1a521 5785b6c 3a1a521 5785b6c 3a1a521 5785b6c 3a1a521 779693b 5785b6c 3a1a521 5785b6c 3a1a521 779693b 5785b6c 3a1a521 5785b6c 3a1a521 5785b6c 779693b 5785b6c 3a1a521 5785b6c 3a1a521 5785b6c 3a1a521 5785b6c 3a1a521 5785b6c 3a1a521 5785b6c 3a1a521 5785b6c 3a1a521 5785b6c 3a1a521 5785b6c 3a1a521 5785b6c 3a1a521 5785b6c 3a1a521 5785b6c 3a1a521 5785b6c 3a1a521 5785b6c 3a1a521 5785b6c 3a1a521 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
import streamlit as st
import geopandas as gpd
import sqlite3
import pandas as pd
import os
import torch
from transformers import pipeline
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
# Set the SHAPE_RESTORE_SHX configuration option to YES
os.environ['SHAPE_RESTORE_SHX'] = 'YES'
# Set device to GPU if available, otherwise CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
st.write(f"Using device: {device}")
# Connect to SQLite database
conn = sqlite3.connect('NY.db')
cursor = conn.cursor()
# Create table for floodland data
cursor.execute('''
CREATE TABLE IF NOT EXISTS floodlands (
id INTEGER PRIMARY KEY AUTOINCREMENT,
DFIRM_ID TEXT,
VERSION_ID TEXT,
FLD_AR_ID TEXT,
STUDY_TYP TEXT,
FLD_ZONE TEXT,
ZONE_SUBTY TEXT,
SFHA_TF TEXT,
STATIC_BFE FLOAT,
V_DATUM TEXT,
DEPTH FLOAT,
LEN_UNIT TEXT,
VELOCITY FLOAT,
VEL_UNIT TEXT,
AR_REVERT TEXT,
AR_SUBTRV TEXT,
BFE_REVERT FLOAT,
DEP_REVERT FLOAT,
DUAL_ZONE TEXT,
SOURCE_CIT TEXT,
geometry TEXT,
acreage FLOAT,
usable_area FLOAT
)
''')
conn.commit()
# Load and process the shapefile
shapefile_path = os.path.join(os.path.dirname(__file__), 'S_FLD_HAZ_AR.shp')
gdf = gpd.read_file(shapefile_path)
# Check the initial CRS
st.write("Initial CRS:", gdf.crs)
# If the CRS is None, set it to WGS84 (EPSG:4326), which is common for FEMA shapefiles
if gdf.crs is None:
gdf.set_crs(epsg=4326, inplace=True)
st.write("CRS was missing; set to EPSG:4326 (WGS84).")
# Validate geometries
gdf['geometry'] = gdf['geometry'].apply(lambda geom: geom if geom.is_valid else None)
if gdf['geometry'].isnull().any():
st.write(f"Found {gdf['geometry'].isnull().sum()} invalid or null geometries. Dropping these rows.")
gdf = gdf.dropna(subset=['geometry'])
# Reproject to UTM Zone 18N (EPSG:32618) for accurate area calculations
gdf = gdf.to_crs(epsg=32618)
st.write("CRS after reprojection:", gdf.crs)
# Calculate acreage (1 square meter = 0.000247105 acres)
gdf['acreage'] = gdf.geometry.area * 0.000247105
# Calculate usable area (excluding flood-prone zones)
flood_prone_zones = ['A', 'AE', 'AH', 'AO', 'VE']
gdf['usable_area'] = gdf.apply(
lambda row: row['acreage'] if row['FLD_ZONE'] not in flood_prone_zones else 0, axis=1
)
# Convert geometry to WKT for storage in SQLite
gdf['wkt_geometry'] = gdf['geometry'].apply(lambda geom: geom.wkt)
# Create a new DataFrame without the geometry column for SQLite storage
gdf_for_sql = gdf.drop(columns=['geometry'])
# Store in SQLite
rows_inserted = gdf_for_sql.to_sql('floodlands', conn, if_exists='replace', index=False)
st.write(f"Inserted {rows_inserted} rows into the database.")
# Close the database connection
conn.close()
# Load sentence transformer for embedding
embedder = SentenceTransformer('all-MiniLM-L6-v2')
# Load summarization model (using google/pegasus-xsum, which is publicly accessible)
summarizer = pipeline("summarization", model="google/pegasus-xsum", device=0 if torch.cuda.is_available() else -1)
# Create a text representation of each floodland record for embedding
gdf['text'] = gdf.apply(
lambda row: f"Floodland area with FLD_ZONE: {row['FLD_ZONE']}, ZONE_SUBTY: {row['ZONE_SUBTY']}, "
f"acreage: {row['acreage']:.2f} acres, usable area: {row['usable_area']:.2f} acres",
axis=1
)
# Embed the text representations
embeddings = embedder.encode(gdf['text'].tolist(), show_progress_bar=True)
# Create a FAISS index for retrieval
d = embeddings.shape[1] # Dimension of embeddings
index = faiss.IndexFlatL2(d)
index.add(embeddings)
# Store the embeddings
gdf['embedding'] = list(embeddings)
# RAG-based summarization function (without prompt)
def rag_summarize(query, gdf, index, k=5):
# Embed the query
query_embedding = embedder.encode([query])[0]
# Retrieve top-k relevant documents
distances, indices = index.search(np.array([query_embedding]), k)
retrieved_docs = gdf.iloc[indices[0]]
# Aggregate acreage and usable area from retrieved documents
total_acreage = retrieved_docs['acreage'].sum()
usable_acreage = retrieved_docs['usable_area'].sum()
# Create a simplified narrative context
context = (f"The floodlands in {query} cover a total of {total_acreage:.2f} acres, with {usable_acreage:.2f} acres usable for development. "
f"A significant portion, spanning {retrieved_docs.iloc[0]['acreage']:.2f} acres, is classified as {retrieved_docs.iloc[0]['FLD_ZONE']} flood zone, "
f"indicating potential flood risks that could impact development projects.")
# Debug: Display the narrative context
st.write("Narrative context for summarization:")
st.write(context)
# Generate summary without a prompt
try:
summary_output = summarizer(context, max_length=150, min_length=100, do_sample=False)
st.write("Raw summarizer output:", summary_output)
if isinstance(summary_output, list) and len(summary_output) > 0 and 'summary_text' in summary_output[0]:
summary = summary_output[0]['summary_text']
else:
raise ValueError("Unexpected output format from summarizer.")
except Exception as e:
st.write(f"Error in summarization: {e}")
# Fallback: Generate a basic summary manually
summary = (f"The floodlands in {query} cover a total of {total_acreage:.2f} acres. "
f"Only {usable_acreage:.2f} acres are usable for development due to flood risks. "
f"A significant portion, including {retrieved_docs.iloc[0]['acreage']:.2f} acres, falls under the {retrieved_docs.iloc[0]['FLD_ZONE']} flood zone. "
f"This indicates a high risk of flooding, which could impact power plant projects. "
f"Overall, the limited usable land poses challenges for development in this region.")
return summary
# Streamlit interface
st.title("Floodland Summary Bot")
# Input field for the user to enter a location
user_input = st.text_input("Input:", placeholder="Enter state name (e.g., Chicago)")
# Submit button
if st.button("Submit"):
if user_input:
st.write("Hi, How can I help you today?")
st.write(f"User input: {user_input}")
# Generate summary
summary = rag_summarize(user_input, gdf, index, k=5)
st.write(summary)
else:
st.write("Please enter a location to proceed.") |