from src.table_creator.table_extractor import TableExtraction import streamlit as st import base64 from PIL import Image import os import cv2 import numpy as np import tempfile import traceback # Load models only once if 'tab_ext' not in st.session_state: st.session_state.tab_ext = TableExtraction() print('Models loaded.') def process_image(imgpath): return st.session_state.tab_ext.detect(imgpath) def draw_bounding_box(image, bbox): """Draw a bounding box on the image""" img_array = np.array(image) if len(img_array.shape) == 3: img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR) x_min, y_min, x_max, y_max = bbox cv2.rectangle(img_array, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2) if len(img_array.shape) == 3: img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB) return Image.fromarray(img_array) # Set page config st.set_page_config( page_title="Table Extraction Tool", layout="wide", initial_sidebar_state="expanded" # Changed to expanded to show guide by default ) # Enhanced CSS styling with updated upload section st.markdown(""" """, unsafe_allow_html=True) # Create sidebar with guide content with st.sidebar: # st.markdown('
', unsafe_allow_html=True) st.divider() st.markdown('

📚 User Guide

', unsafe_allow_html=True) # How It Works section st.markdown('

đŸŽ¯ How It Works

', unsafe_allow_html=True) st.markdown("""
This tool uses advanced computer vision and machine learning techniques to:
""", unsafe_allow_html=True) # Usage Instructions st.markdown('

📝 Usage Instructions

', unsafe_allow_html=True) st.markdown("""
1
Upload a document image containing a table (JPG, or JPEG format)
2
The tool will automatically detect and highlight the table in your image
3
View both raw and enhanced versions of the extracted data
4
Download the results in CSV format for further use
""", unsafe_allow_html=True) # Best Practices st.markdown('

💡 Best Practices

', unsafe_allow_html=True) st.markdown("""
For Best Results:
""", unsafe_allow_html=True) # Technical Details (collapsible) with st.expander("🔧 Technical Details"): st.markdown("""

Algorithm Overview:

""", unsafe_allow_html=True) # Support Info st.markdown('

🔗 Connect with Me

', unsafe_allow_html=True) st.markdown("""
If you encounter any issues or have questions, feel free to reach out: GitHub GitHub | LinkedIn LinkedIn | Medium Medium
""", unsafe_allow_html=True) # Initialize session state for expanded view if 'is_expanded' not in st.session_state: st.session_state.is_expanded = False # Title and description st.markdown("""

📊 Table Extraction Tool

Upload an image containing tables and instantly convert them into structured data formats.

""", unsafe_allow_html=True) # File upload section - Reduced size # st.markdown('
', unsafe_allow_html=True) # st.markdown(""" #
#
đŸ“Ĩ
#

Upload Table Image

#

Supported formats: PNG, JPG, JPEG

#
# """, unsafe_allow_html=True) uploaded_file = st.file_uploader("", type=['png', 'jpg', 'jpeg']) st.markdown('
', unsafe_allow_html=True) # Process the uploaded file if uploaded_file is not None: with st.spinner('🔄 Processing your image...'): with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as tmp_file: tmp_file.write(uploaded_file.getvalue()) temp_path = tmp_file.name try: image = Image.open(uploaded_file) (raw_df, cleaned_df), bbox = process_image(temp_path) st.session_state.raw_data = raw_df st.session_state.processed_data = cleaned_df marked_image = draw_bounding_box(image, bbox[0]) st.session_state.marked_image = marked_image # Side by side layout col1, col2 = st.columns([0.4, 0.6]) with col1: # st.markdown('
', unsafe_allow_html=True) st.divider() st.markdown('

Detected Table

', unsafe_allow_html=True) st.image(marked_image, use_container_width=True) st.markdown('
', unsafe_allow_html=True) with col2: # st.markdown('
', unsafe_allow_html=True) st.divider() st.markdown('

Extracted Data

', unsafe_allow_html=True) # # Toggle button for expanded view # if st.button("🔍 Toggle Full View" if not st.session_state.is_expanded else "âŦ†ī¸ Collapse View"): # st.session_state.is_expanded = not st.session_state.is_expanded tabs = st.tabs(["🔍 Raw Data", "✨ Enhanced Data ⭐"]) with tabs[0]: st.dataframe(st.session_state.raw_data, use_container_width=True, height=600 if not st.session_state.is_expanded else None) # Add HTML copy section for raw data st.markdown("### 📋 Copy HTML Table") html_raw = st.session_state.raw_data.to_html(index=False) st.markdown("""

â„šī¸ This HTML can be copied and used directly in websites, LLM prompts, or other applications.

""", unsafe_allow_html=True) st.markdown("""
""", unsafe_allow_html=True) st.code(html_raw, language="html") st.markdown("
", unsafe_allow_html=True) with tabs[1]: st.markdown("""

⭐ This is our enhanced version of the table with improved formatting and structure.

""", unsafe_allow_html=True) st.dataframe(st.session_state.processed_data, use_container_width=True, height=600 if not st.session_state.is_expanded else None) # Add HTML copy section for enhanced data st.markdown("### 📋 Copy HTML Table") html_enhanced = st.session_state.processed_data.to_html(index=False) st.markdown("""

â„šī¸ This HTML can be copied and used directly in websites, LLM prompts, or other applications.

""", unsafe_allow_html=True) st.markdown("""
""", unsafe_allow_html=True) st.code(html_enhanced, language="html") st.markdown("
", unsafe_allow_html=True) # st.markdown('
', unsafe_allow_html=True) # Download section below both columns # st.markdown('
', unsafe_allow_html=True) # Download section below both columns st.divider() st.markdown('

Download Options

', unsafe_allow_html=True) download_cols = st.columns([1, 0.1, 1]) def get_csv_download_link(df, filename): csv = df.to_csv(index=False).encode() b64 = base64.b64encode(csv).decode() return f'đŸ“Ĩ Download {filename}' with download_cols[0]: if 'raw_data' in st.session_state: csv = st.session_state.raw_data.to_csv(index=False) st.download_button( label="đŸ“Ĩ Download Raw Data", data=csv, file_name="raw_data.csv", mime="text/csv", use_container_width=True, key="raw_download" ) with download_cols[2]: if 'processed_data' in st.session_state: csv = st.session_state.processed_data.to_csv(index=False) st.download_button( label="đŸ“Ĩ Download Enhanced Data ⭐", data=csv, file_name="enhanced_data.csv", mime="text/csv", use_container_width=True, key="enhanced_download" ) st.markdown('
', unsafe_allow_html=True) except Exception as e: st.error(f"❌ Error processing image: {str(traceback.format_exc())}") finally: try: os.unlink(temp_path) except Exception as e: st.warning(f"âš ī¸ Error removing temporary file: {str(e)}")