Spaces:
Sleeping
Sleeping
# CatalystGPT-4 Advanced Document Summarizer Requirements | |
# Core Dependencies | |
# Web Interface Framework | |
gradio>=4.0.0 | |
# Document Processing Libraries | |
python-docx>=1.1.0 # Microsoft Word document processing | |
PyPDF2>=3.0.1 # PDF text extraction (primary) | |
PyMuPDF>=1.23.0 # PDF processing (alternative, better performance) | |
# AI and Machine Learning | |
transformers>=4.30.0 # Hugging Face transformers for AI summarization | |
torch>=2.0.0 # PyTorch backend for transformers | |
tokenizers>=0.13.0 # Tokenization support | |
# Natural Language Processing | |
nltk>=3.8 # Advanced text processing and analysis | |
textstat>=0.7.0 # Text readability statistics | |
# Data Processing and Utilities | |
numpy>=1.24.0 # Numerical operations | |
pandas>=2.0.0 # Data manipulation (optional, for advanced features) | |
regex>=2023.0.0 # Enhanced regular expression support | |
# Optional GPU Support (uncomment if you have CUDA-compatible GPU) | |
# torch-audio>=2.0.0 | |
# torchaudio>=2.0.0 | |
# Development and Testing (optional) | |
# pytest>=7.0.0 | |
# black>=23.0.0 | |
# flake8>=6.0.0 | |
# Additional Text Processing (optional but recommended) | |
spacy>=3.6.0 # Advanced NLP (optional) | |
textblob>=0.17.0 # Simple text processing (alternative to NLTK) | |
# File Format Support Extensions (optional) | |
python-pptx>=0.6.21 # PowerPoint support (future feature) | |
openpyxl>=3.1.0 # Excel file support (future feature) | |
markdown>=3.4.0 # Enhanced Markdown processing | |
# Performance and Caching (optional) | |
joblib>=1.3.0 # Efficient caching and serialization | |
psutil>=5.9.0 # System resource monitoring | |
# Security and Validation (recommended) | |
validators>=0.20.0 # Input validation | |
bleach>=6.0.0 # Text sanitization | |
# Logging and Monitoring (optional) | |
colorlog>=6.7.0 # Colored logging output | |
tqdm>=4.65.0 # Progress bars for long operations |