RickyGuoTheCrazish commited on
Commit
47d83ad
·
1 Parent(s): 33c14bd

update docker file

Browse files
Files changed (4) hide show
  1. .dockerignore +73 -0
  2. Dockerfile +18 -2
  3. README.md +7 -0
  4. src/sentiment_analyzer.py +27 -3
.dockerignore ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python cache files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual environments
24
+ venv/
25
+ env/
26
+ ENV/
27
+
28
+ # IDE files
29
+ .vscode/
30
+ .idea/
31
+ *.swp
32
+ *.swo
33
+ *~
34
+
35
+ # OS files
36
+ .DS_Store
37
+ Thumbs.db
38
+
39
+ # Git
40
+ .git/
41
+ .gitignore
42
+
43
+ # Test files
44
+ test_*.py
45
+ *_test.py
46
+ tests/
47
+
48
+ # Documentation
49
+ *.md
50
+ USAGE.md
51
+
52
+ # Local development files
53
+ .env
54
+ .env.local
55
+ *.log
56
+
57
+ # Jupyter notebooks
58
+ *.ipynb
59
+ .ipynb_checkpoints/
60
+
61
+ # Coverage reports
62
+ htmlcov/
63
+ .coverage
64
+ .coverage.*
65
+ coverage.xml
66
+
67
+ # pytest
68
+ .pytest_cache/
69
+
70
+ # mypy
71
+ .mypy_cache/
72
+ .dmypy.json
73
+ dmypy.json
Dockerfile CHANGED
@@ -1,7 +1,8 @@
1
- FROM python:3.9-slim
2
 
3
  WORKDIR /app
4
 
 
5
  RUN apt-get update && apt-get install -y \
6
  build-essential \
7
  curl \
@@ -9,10 +10,25 @@ RUN apt-get update && apt-get install -y \
9
  git \
10
  && rm -rf /var/lib/apt/lists/*
11
 
 
12
  COPY requirements.txt ./
 
 
 
 
 
13
  COPY src/ ./src/
14
 
15
- RUN pip3 install -r requirements.txt
 
 
 
 
 
 
 
 
 
16
 
17
  EXPOSE 8501
18
 
 
1
+ FROM python:3.11-slim
2
 
3
  WORKDIR /app
4
 
5
+ # Install system dependencies
6
  RUN apt-get update && apt-get install -y \
7
  build-essential \
8
  curl \
 
10
  git \
11
  && rm -rf /var/lib/apt/lists/*
12
 
13
+ # Copy requirements first for better caching
14
  COPY requirements.txt ./
15
+
16
+ # Install Python dependencies
17
+ RUN pip3 install --no-cache-dir -r requirements.txt
18
+
19
+ # Copy source code
20
  COPY src/ ./src/
21
 
22
+ # Set environment variables for model loading
23
+ ENV TRANSFORMERS_CACHE=/app/.cache/transformers
24
+ ENV HF_HOME=/app/.cache/huggingface
25
+ ENV TORCH_HOME=/app/.cache/torch
26
+
27
+ # Create cache directories
28
+ RUN mkdir -p /app/.cache/transformers /app/.cache/huggingface /app/.cache/torch
29
+
30
+ # Pre-download the FinBERT model to avoid runtime download issues
31
+ RUN python3 -c "from transformers import AutoTokenizer, AutoModelForSequenceClassification; import torch; print('Pre-downloading FinBERT model...'); tokenizer = AutoTokenizer.from_pretrained('ProsusAI/finbert'); model = AutoModelForSequenceClassification.from_pretrained('ProsusAI/finbert'); print('FinBERT model downloaded successfully!')"
32
 
33
  EXPOSE 8501
34
 
README.md CHANGED
@@ -59,3 +59,10 @@ streamlit run src/streamlit_app.py
59
  - 30-second rate limit between requests
60
  - Needs 1+ day old news (requires market data)
61
  - Uses Yahoo Finance (free but limited)
 
 
 
 
 
 
 
 
59
  - 30-second rate limit between requests
60
  - Needs 1+ day old news (requires market data)
61
  - Uses Yahoo Finance (free but limited)
62
+
63
+
64
+ # Build the Docker image
65
+ docker build -t finbert-market-eval .
66
+
67
+ # Run locally to test
68
+ docker run -p 8501:8501 finbert-market-eval
src/sentiment_analyzer.py CHANGED
@@ -39,14 +39,38 @@ class FinBERTAnalyzer:
39
  """
40
  try:
41
  logger.info(f"Loading FinBERT model: {_self.model_name}")
42
- _self.tokenizer = AutoTokenizer.from_pretrained(_self.model_name)
43
- _self.model = AutoModelForSequenceClassification.from_pretrained(_self.model_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  _self.model.to(_self.device)
45
  _self.model.eval()
46
  logger.info("FinBERT model loaded successfully")
47
  return True
 
48
  except Exception as e:
49
- logger.error(f"Error loading FinBERT model: {str(e)}")
 
 
 
 
 
 
 
 
 
 
50
  return False
51
 
52
  def analyze_sentiment(self, text: str) -> Dict[str, float]:
 
39
  """
40
  try:
41
  logger.info(f"Loading FinBERT model: {_self.model_name}")
42
+
43
+ # Try to load tokenizer first
44
+ _self.tokenizer = AutoTokenizer.from_pretrained(
45
+ _self.model_name,
46
+ cache_dir=None, # Use default cache
47
+ local_files_only=False # Allow downloading if needed
48
+ )
49
+ logger.info("Tokenizer loaded successfully")
50
+
51
+ # Load model
52
+ _self.model = AutoModelForSequenceClassification.from_pretrained(
53
+ _self.model_name,
54
+ cache_dir=None, # Use default cache
55
+ local_files_only=False # Allow downloading if needed
56
+ )
57
  _self.model.to(_self.device)
58
  _self.model.eval()
59
  logger.info("FinBERT model loaded successfully")
60
  return True
61
+
62
  except Exception as e:
63
+ error_msg = f"Error loading FinBERT model: {str(e)}"
64
+ logger.error(error_msg)
65
+
66
+ # Provide helpful error messages
67
+ if "Connection" in str(e) or "timeout" in str(e).lower():
68
+ logger.error("Network connection issue. Check internet connectivity.")
69
+ elif "disk" in str(e).lower() or "space" in str(e).lower():
70
+ logger.error("Insufficient disk space for model download.")
71
+ elif "permission" in str(e).lower():
72
+ logger.error("Permission denied. Check file/directory permissions.")
73
+
74
  return False
75
 
76
  def analyze_sentiment(self, text: str) -> Dict[str, float]: