Spaces:

Rickyheyhey
/

finbert_market_evaluation

Running

App Files Files Community

RickyGuoTheCrazish commited on Jun 30

Commit

47d83ad

1 Parent(s): 33c14bd

update docker file

Browse files

Files changed (4) hide show

.dockerignore +73 -0
Dockerfile +18 -2
README.md +7 -0
src/sentiment_analyzer.py +27 -3

.dockerignore ADDED Viewed

	@@ -0,0 +1,73 @@

+# Python cache files
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual environments
+venv/
+env/
+ENV/
+# IDE files
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS files
+.DS_Store
+Thumbs.db
+# Git
+.git/
+.gitignore
+# Test files
+test_*.py
+*_test.py
+tests/
+# Documentation
+*.md
+USAGE.md
+# Local development files
+.env
+.env.local
+*.log
+# Jupyter notebooks
+*.ipynb
+.ipynb_checkpoints/
+# Coverage reports
+htmlcov/
+.coverage
+.coverage.*
+coverage.xml
+# pytest
+.pytest_cache/
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json

Dockerfile CHANGED Viewed

@@ -1,7 +1,8 @@
-FROM python:3.9-slim
 WORKDIR /app
 RUN apt-get update && apt-get install -y \
     build-essential \
     curl \
@@ -9,10 +10,25 @@ RUN apt-get update && apt-get install -y \
     git \
     && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt ./
 COPY src/ ./src/
-RUN pip3 install -r requirements.txt
 EXPOSE 8501

+FROM python:3.11-slim
 WORKDIR /app
+# Install system dependencies
 RUN apt-get update && apt-get install -y \
     build-essential \
     curl \
     git \
     && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for better caching
 COPY requirements.txt ./
+# Install Python dependencies
+RUN pip3 install --no-cache-dir -r requirements.txt
+# Copy source code
 COPY src/ ./src/
+# Set environment variables for model loading
+ENV TRANSFORMERS_CACHE=/app/.cache/transformers
+ENV HF_HOME=/app/.cache/huggingface
+ENV TORCH_HOME=/app/.cache/torch
+# Create cache directories
+RUN mkdir -p /app/.cache/transformers /app/.cache/huggingface /app/.cache/torch
+# Pre-download the FinBERT model to avoid runtime download issues
+RUN python3 -c "from transformers import AutoTokenizer, AutoModelForSequenceClassification; import torch; print('Pre-downloading FinBERT model...'); tokenizer = AutoTokenizer.from_pretrained('ProsusAI/finbert'); model = AutoModelForSequenceClassification.from_pretrained('ProsusAI/finbert'); print('FinBERT model downloaded successfully!')"
 EXPOSE 8501

README.md CHANGED Viewed

@@ -59,3 +59,10 @@ streamlit run src/streamlit_app.py
 - 30-second rate limit between requests
 - Needs 1+ day old news (requires market data)
 - Uses Yahoo Finance (free but limited)

 - 30-second rate limit between requests
 - Needs 1+ day old news (requires market data)
 - Uses Yahoo Finance (free but limited)
+# Build the Docker image
+docker build -t finbert-market-eval .
+# Run locally to test
+docker run -p 8501:8501 finbert-market-eval

src/sentiment_analyzer.py CHANGED Viewed

@@ -39,14 +39,38 @@ class FinBERTAnalyzer:
         """
         try:
             logger.info(f"Loading FinBERT model: {_self.model_name}")
-            _self.tokenizer = AutoTokenizer.from_pretrained(_self.model_name)
-            _self.model = AutoModelForSequenceClassification.from_pretrained(_self.model_name)
             _self.model.to(_self.device)
             _self.model.eval()
             logger.info("FinBERT model loaded successfully")
             return True
         except Exception as e:
-            logger.error(f"Error loading FinBERT model: {str(e)}")
             return False
     def analyze_sentiment(self, text: str) -> Dict[str, float]:

         """
         try:
             logger.info(f"Loading FinBERT model: {_self.model_name}")
+            # Try to load tokenizer first
+            _self.tokenizer = AutoTokenizer.from_pretrained(
+                _self.model_name,
+                cache_dir=None,  # Use default cache
+                local_files_only=False  # Allow downloading if needed
+            )
+            logger.info("Tokenizer loaded successfully")
+            # Load model
+            _self.model = AutoModelForSequenceClassification.from_pretrained(
+                _self.model_name,
+                cache_dir=None,  # Use default cache
+                local_files_only=False  # Allow downloading if needed
+            )
             _self.model.to(_self.device)
             _self.model.eval()
             logger.info("FinBERT model loaded successfully")
             return True
         except Exception as e:
+            error_msg = f"Error loading FinBERT model: {str(e)}"
+            logger.error(error_msg)
+            # Provide helpful error messages
+            if "Connection" in str(e) or "timeout" in str(e).lower():
+                logger.error("Network connection issue. Check internet connectivity.")
+            elif "disk" in str(e).lower() or "space" in str(e).lower():
+                logger.error("Insufficient disk space for model download.")
+            elif "permission" in str(e).lower():
+                logger.error("Permission denied. Check file/directory permissions.")
             return False
     def analyze_sentiment(self, text: str) -> Dict[str, float]: