Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
import re
|
@@ -790,11 +791,11 @@ def create_catalyst_interface():
|
|
790 |
def get_sentiment_indicator(sentiment_score):
|
791 |
"""Get sentiment indicator HTML"""
|
792 |
if sentiment_score > 0.1:
|
793 |
-
return '<span class="sentiment-indicator sentiment-positive"
|
794 |
elif sentiment_score < -0.1:
|
795 |
-
return '<span class="sentiment-indicator sentiment-negative"
|
796 |
else:
|
797 |
-
return '<span class="sentiment-indicator sentiment-neutral"
|
798 |
|
799 |
def process_and_display(file, summary_type, summary_length, enable_ai_features):
|
800 |
"""Enhanced processing with comprehensive results display"""
|
@@ -806,7 +807,7 @@ def create_catalyst_interface():
|
|
806 |
gr.update(visible=False),
|
807 |
gr.update(value="""
|
808 |
<div style="text-align: center; padding: 60px; color: #666;">
|
809 |
-
<h3
|
810 |
<p>Upload a document to begin advanced AI-powered analysis</p>
|
811 |
<p><small>Supports: PDF, Word (.docx), Text (.txt, .md, .rtf)</small></p>
|
812 |
</div>
|
@@ -822,7 +823,7 @@ def create_catalyst_interface():
|
|
822 |
if error:
|
823 |
error_html = f'''
|
824 |
<div class="error-container">
|
825 |
-
<h4
|
826 |
<p><strong>Error:</strong> {error}</p>
|
827 |
<p><small>Please try a different file or check the file format.</small></p>
|
828 |
</div>
|
@@ -838,12 +839,12 @@ def create_catalyst_interface():
|
|
838 |
# Format summary display
|
839 |
summary_html = f'''
|
840 |
<div class="summary-container">
|
841 |
-
<h3
|
842 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 15px; margin-bottom: 15px;">
|
843 |
-
<div><strong
|
844 |
-
<div><strong
|
845 |
-
<div><strong
|
846 |
-
<div><strong
|
847 |
</div>
|
848 |
<div style="background: rgba(255,255,255,0.15); padding: 20px; border-radius: 10px; line-height: 1.6;">
|
849 |
{result["summary"]}
|
@@ -861,29 +862,29 @@ def create_catalyst_interface():
|
|
861 |
|
862 |
stats_html = f'''
|
863 |
<div class="stats-container">
|
864 |
-
<h3
|
865 |
|
866 |
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin: 20px 0;">
|
867 |
<div class="metric-card">
|
868 |
-
<h4 style="margin: 0; color: #007bff;"
|
869 |
<small>Words</small>
|
870 |
</div>
|
871 |
<div class="metric-card">
|
872 |
-
<h4 style="margin: 0; color: #28a745;"
|
873 |
<small>Reading Time</small>
|
874 |
</div>
|
875 |
<div class="metric-card">
|
876 |
-
<h4 style="margin: 0; color: #17a2b8;"
|
877 |
<small>Sentences</small>
|
878 |
</div>
|
879 |
<div class="metric-card">
|
880 |
-
<h4 style="margin: 0; color: #6f42c1;"
|
881 |
<small>Unique Words</small>
|
882 |
</div>
|
883 |
</div>
|
884 |
|
885 |
<div style="margin: 20px 0;">
|
886 |
-
<h4
|
887 |
<div class="progress-bar">
|
888 |
<div class="progress-fill" style="width: {readability}%; background-color: {readability_color};"></div>
|
889 |
</div>
|
@@ -897,7 +898,7 @@ def create_catalyst_interface():
|
|
897 |
sentiment_html = get_sentiment_indicator(sentiment['compound'])
|
898 |
stats_html += f'''
|
899 |
<div style="margin: 20px 0;">
|
900 |
-
<h4
|
901 |
{sentiment_html}
|
902 |
<div style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; margin-top: 10px;">
|
903 |
<small>Positive: {sentiment['positive']:.2f}</small>
|
@@ -911,7 +912,7 @@ def create_catalyst_interface():
|
|
911 |
if stats.get('top_words'):
|
912 |
stats_html += f'''
|
913 |
<div style="margin: 20px 0;">
|
914 |
-
<h4
|
915 |
<div style="display: flex; flex-wrap: wrap; gap: 8px; margin-top: 10px;">
|
916 |
{" ".join([f'<span style="background: rgba(255,255,255,0.2); padding: 6px 12px; border-radius: 15px; font-size: 13px;">{word} ({count})</span>' for word, count in stats["top_words"][:10]])}
|
917 |
</div>
|
@@ -923,7 +924,7 @@ def create_catalyst_interface():
|
|
923 |
# Format key points
|
924 |
key_points_html = f'''
|
925 |
<div class="key-points-container">
|
926 |
-
<h3
|
927 |
<ul style="list-style: none; padding: 0;">
|
928 |
'''
|
929 |
for i, point in enumerate(result["key_points"], 1):
|
@@ -935,7 +936,7 @@ def create_catalyst_interface():
|
|
935 |
if result.get("outline"):
|
936 |
outline_html = f'''
|
937 |
<div class="outline-container">
|
938 |
-
<h3
|
939 |
<ol style="padding-left: 20px;">
|
940 |
'''
|
941 |
for item in result["outline"]:
|
@@ -953,7 +954,7 @@ def create_catalyst_interface():
|
|
953 |
except Exception as e:
|
954 |
error_html = f'''
|
955 |
<div class="error-container">
|
956 |
-
<h4
|
957 |
<p><strong>Details:</strong> {str(e)}</p>
|
958 |
<p><small>Please try again or contact support if the issue persists.</small></p>
|
959 |
</div>
|
@@ -967,12 +968,12 @@ def create_catalyst_interface():
|
|
967 |
)
|
968 |
|
969 |
# Create the main interface
|
970 |
-
with gr.Blocks(css=css, title="
|
971 |
|
972 |
# Header
|
973 |
gr.HTML("""
|
974 |
<div class="catalyst-header">
|
975 |
-
<h1 style="margin: 0; font-size: 3em; font-weight: bold;"
|
976 |
<h2 style="margin: 10px 0; font-size: 1.5em; opacity: 0.9;">Advanced Document Summarizer</h2>
|
977 |
<p style="margin: 15px 0 0 0; font-size: 1.1em; opacity: 0.8;">
|
978 |
Powered by AI β’ Extractive & Abstractive Summarization β’ Comprehensive Analytics
|
@@ -986,17 +987,17 @@ def create_catalyst_interface():
|
|
986 |
with gr.Group():
|
987 |
gr.HTML('<div class="control-panel">')
|
988 |
|
989 |
-
gr.Markdown("###
|
990 |
file_upload = gr.File(
|
991 |
label="Choose your document",
|
992 |
file_types=[".pdf", ".docx", ".txt", ".md", ".rtf"],
|
993 |
elem_classes="file-upload-area"
|
994 |
)
|
995 |
|
996 |
-
gr.Markdown("###
|
997 |
|
998 |
enable_ai_features = gr.Checkbox(
|
999 |
-
label="
|
1000 |
value=TRANSFORMERS_AVAILABLE,
|
1001 |
info="Use advanced AI models for better summarization",
|
1002 |
interactive=TRANSFORMERS_AVAILABLE
|
@@ -1004,8 +1005,8 @@ def create_catalyst_interface():
|
|
1004 |
|
1005 |
summary_type = gr.Radio(
|
1006 |
choices=[
|
1007 |
-
("
|
1008 |
-
("
|
1009 |
],
|
1010 |
value="ai" if TRANSFORMERS_AVAILABLE else "extractive",
|
1011 |
label="Summarization Method",
|
@@ -1014,10 +1015,10 @@ def create_catalyst_interface():
|
|
1014 |
|
1015 |
summary_length = gr.Radio(
|
1016 |
choices=[
|
1017 |
-
("
|
1018 |
-
("
|
1019 |
-
("
|
1020 |
-
("
|
1021 |
],
|
1022 |
value="medium",
|
1023 |
label="Analysis Depth",
|
@@ -1025,7 +1026,7 @@ def create_catalyst_interface():
|
|
1025 |
)
|
1026 |
|
1027 |
analyze_btn = gr.Button(
|
1028 |
-
"
|
1029 |
variant="primary",
|
1030 |
size="lg",
|
1031 |
elem_classes="analyze-button"
|
@@ -1035,18 +1036,18 @@ def create_catalyst_interface():
|
|
1035 |
|
1036 |
# Enhanced Library Status
|
1037 |
gr.Markdown(f"""
|
1038 |
-
###
|
1039 |
|
1040 |
**Core Features:**
|
1041 |
-
-
|
1042 |
-
-
|
1043 |
-
-
|
1044 |
-
-
|
1045 |
-
-
|
1046 |
|
1047 |
**Performance:**
|
1048 |
-
-
|
1049 |
-
-
|
1050 |
""")
|
1051 |
|
1052 |
# Right column - Enhanced Results
|
@@ -1122,7 +1123,7 @@ def create_catalyst_interface():
|
|
1122 |
gr.HTML("""
|
1123 |
<div style="margin-top: 50px; padding: 30px; background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
|
1124 |
border-radius: 15px; text-align: center; border-top: 3px solid #007bff;">
|
1125 |
-
<h3 style="color: #333; margin-bottom: 20px;"
|
1126 |
|
1127 |
<div style="background: #343a40; color: #fff; padding: 15px; border-radius: 8px;
|
1128 |
font-family: 'Courier New', monospace; margin: 15px 0;">
|
@@ -1132,15 +1133,15 @@ def create_catalyst_interface():
|
|
1132 |
|
1133 |
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin-top: 20px;">
|
1134 |
<div style="background: white; padding: 15px; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
|
1135 |
-
<strong
|
1136 |
<small>Multi-format support, AI summarization, key insights extraction</small>
|
1137 |
</div>
|
1138 |
<div style="background: white; padding: 15px; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
|
1139 |
-
<strong
|
1140 |
<small>Sentiment analysis, readability scoring, word frequency</small>
|
1141 |
</div>
|
1142 |
<div style="background: white; padding: 15px; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
|
1143 |
-
<strong
|
1144 |
<small>Intelligent caching, GPU acceleration, batch processing</small>
|
1145 |
</div>
|
1146 |
</div>
|
@@ -1156,8 +1157,7 @@ def create_catalyst_interface():
|
|
1156 |
if __name__ == "__main__":
|
1157 |
demo = create_catalyst_interface()
|
1158 |
demo.launch(
|
1159 |
-
|
1160 |
-
|
1161 |
-
|
1162 |
-
|
1163 |
-
)
|
|
|
1 |
+
pip install NTLK
|
2 |
import gradio as gr
|
3 |
import os
|
4 |
import re
|
|
|
791 |
def get_sentiment_indicator(sentiment_score):
|
792 |
"""Get sentiment indicator HTML"""
|
793 |
if sentiment_score > 0.1:
|
794 |
+
return '<span class="sentiment-indicator sentiment-positive">Positive</span>'
|
795 |
elif sentiment_score < -0.1:
|
796 |
+
return '<span class="sentiment-indicator sentiment-negative">Negative</span>'
|
797 |
else:
|
798 |
+
return '<span class="sentiment-indicator sentiment-neutral">Neutral</span>'
|
799 |
|
800 |
def process_and_display(file, summary_type, summary_length, enable_ai_features):
|
801 |
"""Enhanced processing with comprehensive results display"""
|
|
|
807 |
gr.update(visible=False),
|
808 |
gr.update(value="""
|
809 |
<div style="text-align: center; padding: 60px; color: #666;">
|
810 |
+
<h3>CatalystGPT-4 Ready</h3>
|
811 |
<p>Upload a document to begin advanced AI-powered analysis</p>
|
812 |
<p><small>Supports: PDF, Word (.docx), Text (.txt, .md, .rtf)</small></p>
|
813 |
</div>
|
|
|
823 |
if error:
|
824 |
error_html = f'''
|
825 |
<div class="error-container">
|
826 |
+
<h4>Processing Error</h4>
|
827 |
<p><strong>Error:</strong> {error}</p>
|
828 |
<p><small>Please try a different file or check the file format.</small></p>
|
829 |
</div>
|
|
|
839 |
# Format summary display
|
840 |
summary_html = f'''
|
841 |
<div class="summary-container">
|
842 |
+
<h3>Document Summary</h3>
|
843 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 15px; margin-bottom: 15px;">
|
844 |
+
<div><strong>File:</strong> {result["file_name"]}</div>
|
845 |
+
<div><strong>Size:</strong> {format_file_size(result["file_size"])}</div>
|
846 |
+
<div><strong>Model:</strong> {result["model_used"]}</div>
|
847 |
+
<div><strong>Length:</strong> {result["summary_length"].title()}</div>
|
848 |
</div>
|
849 |
<div style="background: rgba(255,255,255,0.15); padding: 20px; border-radius: 10px; line-height: 1.6;">
|
850 |
{result["summary"]}
|
|
|
862 |
|
863 |
stats_html = f'''
|
864 |
<div class="stats-container">
|
865 |
+
<h3>Document Analytics</h3>
|
866 |
|
867 |
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin: 20px 0;">
|
868 |
<div class="metric-card">
|
869 |
+
<h4 style="margin: 0; color: #007bff;">{stats["word_count"]:,}</h4>
|
870 |
<small>Words</small>
|
871 |
</div>
|
872 |
<div class="metric-card">
|
873 |
+
<h4 style="margin: 0; color: #28a745;">{stats["estimated_reading_time"]} min</h4>
|
874 |
<small>Reading Time</small>
|
875 |
</div>
|
876 |
<div class="metric-card">
|
877 |
+
<h4 style="margin: 0; color: #17a2b8;">{stats["sentence_count"]:,}</h4>
|
878 |
<small>Sentences</small>
|
879 |
</div>
|
880 |
<div class="metric-card">
|
881 |
+
<h4 style="margin: 0; color: #6f42c1;">{stats.get("unique_words", "N/A")}</h4>
|
882 |
<small>Unique Words</small>
|
883 |
</div>
|
884 |
</div>
|
885 |
|
886 |
<div style="margin: 20px 0;">
|
887 |
+
<h4>Readability Score</h4>
|
888 |
<div class="progress-bar">
|
889 |
<div class="progress-fill" style="width: {readability}%; background-color: {readability_color};"></div>
|
890 |
</div>
|
|
|
898 |
sentiment_html = get_sentiment_indicator(sentiment['compound'])
|
899 |
stats_html += f'''
|
900 |
<div style="margin: 20px 0;">
|
901 |
+
<h4>Document Sentiment</h4>
|
902 |
{sentiment_html}
|
903 |
<div style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; margin-top: 10px;">
|
904 |
<small>Positive: {sentiment['positive']:.2f}</small>
|
|
|
912 |
if stats.get('top_words'):
|
913 |
stats_html += f'''
|
914 |
<div style="margin: 20px 0;">
|
915 |
+
<h4>Most Frequent Words</h4>
|
916 |
<div style="display: flex; flex-wrap: wrap; gap: 8px; margin-top: 10px;">
|
917 |
{" ".join([f'<span style="background: rgba(255,255,255,0.2); padding: 6px 12px; border-radius: 15px; font-size: 13px;">{word} ({count})</span>' for word, count in stats["top_words"][:10]])}
|
918 |
</div>
|
|
|
924 |
# Format key points
|
925 |
key_points_html = f'''
|
926 |
<div class="key-points-container">
|
927 |
+
<h3>Key Insights</h3>
|
928 |
<ul style="list-style: none; padding: 0;">
|
929 |
'''
|
930 |
for i, point in enumerate(result["key_points"], 1):
|
|
|
936 |
if result.get("outline"):
|
937 |
outline_html = f'''
|
938 |
<div class="outline-container">
|
939 |
+
<h3>Document Structure</h3>
|
940 |
<ol style="padding-left: 20px;">
|
941 |
'''
|
942 |
for item in result["outline"]:
|
|
|
954 |
except Exception as e:
|
955 |
error_html = f'''
|
956 |
<div class="error-container">
|
957 |
+
<h4>Unexpected Error</h4>
|
958 |
<p><strong>Details:</strong> {str(e)}</p>
|
959 |
<p><small>Please try again or contact support if the issue persists.</small></p>
|
960 |
</div>
|
|
|
968 |
)
|
969 |
|
970 |
# Create the main interface
|
971 |
+
with gr.Blocks(css=css, title="CatalystGPT-4 Document Summarizer", theme=gr.themes.Soft()) as demo:
|
972 |
|
973 |
# Header
|
974 |
gr.HTML("""
|
975 |
<div class="catalyst-header">
|
976 |
+
<h1 style="margin: 0; font-size: 3em; font-weight: bold;">CatalystGPT-4</h1>
|
977 |
<h2 style="margin: 10px 0; font-size: 1.5em; opacity: 0.9;">Advanced Document Summarizer</h2>
|
978 |
<p style="margin: 15px 0 0 0; font-size: 1.1em; opacity: 0.8;">
|
979 |
Powered by AI β’ Extractive & Abstractive Summarization β’ Comprehensive Analytics
|
|
|
987 |
with gr.Group():
|
988 |
gr.HTML('<div class="control-panel">')
|
989 |
|
990 |
+
gr.Markdown("### Document Upload")
|
991 |
file_upload = gr.File(
|
992 |
label="Choose your document",
|
993 |
file_types=[".pdf", ".docx", ".txt", ".md", ".rtf"],
|
994 |
elem_classes="file-upload-area"
|
995 |
)
|
996 |
|
997 |
+
gr.Markdown("### Analysis Settings")
|
998 |
|
999 |
enable_ai_features = gr.Checkbox(
|
1000 |
+
label="Enable AI Features",
|
1001 |
value=TRANSFORMERS_AVAILABLE,
|
1002 |
info="Use advanced AI models for better summarization",
|
1003 |
interactive=TRANSFORMERS_AVAILABLE
|
|
|
1005 |
|
1006 |
summary_type = gr.Radio(
|
1007 |
choices=[
|
1008 |
+
("AI Summary (Neural)", "ai"),
|
1009 |
+
("Extractive Summary", "extractive")
|
1010 |
],
|
1011 |
value="ai" if TRANSFORMERS_AVAILABLE else "extractive",
|
1012 |
label="Summarization Method",
|
|
|
1015 |
|
1016 |
summary_length = gr.Radio(
|
1017 |
choices=[
|
1018 |
+
("Short & Concise", "short"),
|
1019 |
+
("Standard Length", "medium"),
|
1020 |
+
("Detailed Analysis", "long"),
|
1021 |
+
("Comprehensive Report", "detailed")
|
1022 |
],
|
1023 |
value="medium",
|
1024 |
label="Analysis Depth",
|
|
|
1026 |
)
|
1027 |
|
1028 |
analyze_btn = gr.Button(
|
1029 |
+
"Analyze Document",
|
1030 |
variant="primary",
|
1031 |
size="lg",
|
1032 |
elem_classes="analyze-button"
|
|
|
1036 |
|
1037 |
# Enhanced Library Status
|
1038 |
gr.Markdown(f"""
|
1039 |
+
### System Status
|
1040 |
|
1041 |
**Core Features:**
|
1042 |
+
- **PDF Processing:** {"β
PyMuPDF" if PYMUPDF_AVAILABLE else ("β
PyPDF2" if PDF_AVAILABLE else "β Not Available")}
|
1043 |
+
- **Word Documents:** {"β
Available" if DOCX_AVAILABLE else "β Install python-docx"}
|
1044 |
+
- **AI Summarization:** {"β
Available" if TRANSFORMERS_AVAILABLE else "β Install transformers"}
|
1045 |
+
- **Advanced NLP:** {"β
Available" if NLTK_AVAILABLE else "β οΈ Basic processing"}
|
1046 |
+
- **Sentiment Analysis:** {"β
Available" if (NLTK_AVAILABLE and summarizer.sentiment_analyzer) else "β Not Available"}
|
1047 |
|
1048 |
**Performance:**
|
1049 |
+
- **Device:** {"GPU" if DEVICE >= 0 else "CPU"}
|
1050 |
+
- **Cache:** {"Enabled" if summarizer.cache is not None else "Disabled"}
|
1051 |
""")
|
1052 |
|
1053 |
# Right column - Enhanced Results
|
|
|
1123 |
gr.HTML("""
|
1124 |
<div style="margin-top: 50px; padding: 30px; background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
|
1125 |
border-radius: 15px; text-align: center; border-top: 3px solid #007bff;">
|
1126 |
+
<h3 style="color: #333; margin-bottom: 20px;">Installation & Setup</h3>
|
1127 |
|
1128 |
<div style="background: #343a40; color: #fff; padding: 15px; border-radius: 8px;
|
1129 |
font-family: 'Courier New', monospace; margin: 15px 0;">
|
|
|
1133 |
|
1134 |
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin-top: 20px;">
|
1135 |
<div style="background: white; padding: 15px; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
|
1136 |
+
<strong>Core Features</strong><br>
|
1137 |
<small>Multi-format support, AI summarization, key insights extraction</small>
|
1138 |
</div>
|
1139 |
<div style="background: white; padding: 15px; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
|
1140 |
+
<strong>Advanced Analytics</strong><br>
|
1141 |
<small>Sentiment analysis, readability scoring, word frequency</small>
|
1142 |
</div>
|
1143 |
<div style="background: white; padding: 15px; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
|
1144 |
+
<strong>Performance</strong><br>
|
1145 |
<small>Intelligent caching, GPU acceleration, batch processing</small>
|
1146 |
</div>
|
1147 |
</div>
|
|
|
1157 |
if __name__ == "__main__":
|
1158 |
demo = create_catalyst_interface()
|
1159 |
demo.launch(
|
1160 |
+
server_name="0.0.0.0",
|
1161 |
+
server_port=7860,
|
1162 |
+
show_error=True
|
1163 |
+
)
|
|