jens-l commited on
Commit
3620617
·
1 Parent(s): a5e8309

Fix 502 Bad Gateway errors: Enhanced nginx config, added restart cooldown, improved health checks, better error handling, graceful shutdown

Browse files
Files changed (3) hide show
  1. app.py +123 -20
  2. nginx.conf +32 -2
  3. start.sh +17 -17
app.py CHANGED
@@ -1,6 +1,9 @@
 
1
  import os
 
2
  import socket
3
  import subprocess
 
4
  import time
5
  from pathlib import Path
6
 
@@ -13,6 +16,27 @@ from ui_helpers import stream_to_gradio
13
 
14
  preview_process = None
15
  PREVIEW_PORT = 7861 # Internal port for preview apps
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
 
18
  def get_preview_url():
@@ -91,16 +115,46 @@ def stop_preview_app():
91
 
92
  def start_preview_app():
93
  """Start the preview app in a subprocess if it's not already running."""
94
- global preview_process
95
 
96
- # Check if preview app is already running
97
  if preview_process and preview_process.poll() is None:
98
- print(f"✅ Preview app already running (PID: {preview_process.pid})")
99
- return True, f"Preview running at {PREVIEW_URL}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  # Stop any existing process before starting a new one
102
  stop_preview_app()
103
 
 
 
 
104
  # Wait for the port to become available (up to 5 seconds)
105
  for i in range(10): # 10 attempts * 0.5 seconds = 5 seconds max
106
  if is_port_available(PREVIEW_PORT):
@@ -206,7 +260,9 @@ def create_iframe_preview():
206
  )
207
  return iframe_html
208
  else:
209
- print(f"⚠️ Preview app unhealthy: {status}, restarting...")
 
 
210
 
211
  # Try to start the preview app and show an iframe
212
  success, message = start_preview_app()
@@ -218,8 +274,23 @@ def create_iframe_preview():
218
  print(f"🔍 Creating iframe: {iframe_html}")
219
  return iframe_html
220
  else:
221
- error_html = f'<div style="color: red; padding: 20px;">{message}</div>'
222
- print(f"🔍 Error in preview: {error_html}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  return error_html
224
 
225
 
@@ -259,17 +330,35 @@ def check_preview_health():
259
  preview_process = None
260
  return False, "Process died"
261
 
262
- # Check if responsive
263
- try:
264
- with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
265
- sock.settimeout(1)
266
- result = sock.connect_ex(("127.0.0.1", PREVIEW_PORT))
267
- if result == 0:
268
- return True, "Healthy"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  else:
270
- return False, "Not responsive on port"
271
- except Exception as e:
272
- return False, f"Connection check failed: {e}"
273
 
274
 
275
  def ensure_preview_running():
@@ -645,8 +734,22 @@ class GradioUI:
645
  return create_iframe_preview()
646
 
647
  def refresh_all():
648
- # First, ensure the preview app is (re)started
649
- preview_content = create_iframe_preview()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
650
 
651
  # Then, update the file explorer and code editor
652
  file_explorer_val = gr.FileExplorer(
@@ -663,7 +766,7 @@ class GradioUI:
663
  interactive=True,
664
  autocomplete=True,
665
  )
666
- return file_explorer_val, code_editor_val, preview_content
667
 
668
  save_btn.click(
669
  fn=save_file,
 
1
+ import atexit
2
  import os
3
+ import signal
4
  import socket
5
  import subprocess
6
+ import sys
7
  import time
8
  from pathlib import Path
9
 
 
16
 
17
  preview_process = None
18
  PREVIEW_PORT = 7861 # Internal port for preview apps
19
+ last_restart_time = 0 # Track when we last restarted the preview app
20
+ RESTART_COOLDOWN = 10 # Minimum seconds between restarts
21
+
22
+
23
+ def cleanup_preview_on_exit():
24
+ """Cleanup function called on program exit."""
25
+ print("🧹 Cleaning up preview app on exit...")
26
+ stop_preview_app()
27
+
28
+
29
+ def signal_handler(signum, frame):
30
+ """Handle shutdown signals gracefully."""
31
+ print(f"🔔 Received signal {signum}, shutting down gracefully...")
32
+ cleanup_preview_on_exit()
33
+ sys.exit(0)
34
+
35
+
36
+ # Register signal handlers and exit handler
37
+ signal.signal(signal.SIGTERM, signal_handler)
38
+ signal.signal(signal.SIGINT, signal_handler)
39
+ atexit.register(cleanup_preview_on_exit)
40
 
41
 
42
  def get_preview_url():
 
115
 
116
  def start_preview_app():
117
  """Start the preview app in a subprocess if it's not already running."""
118
+ global preview_process, last_restart_time
119
 
120
+ # Check if preview app is already running and healthy
121
  if preview_process and preview_process.poll() is None:
122
+ # Verify it's actually responsive on the port
123
+ try:
124
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
125
+ sock.settimeout(1)
126
+ result = sock.connect_ex(("127.0.0.1", PREVIEW_PORT))
127
+ if result == 0:
128
+ print(
129
+ f"✅ Preview app already running and healthy "
130
+ f"(PID: {preview_process.pid})"
131
+ )
132
+ return True, f"Preview running at {PREVIEW_URL}"
133
+ except Exception:
134
+ pass
135
+
136
+ # Check cooldown period to avoid too frequent restarts
137
+ current_time = time.time()
138
+ if current_time - last_restart_time < RESTART_COOLDOWN:
139
+ remaining_cooldown = RESTART_COOLDOWN - (current_time - last_restart_time)
140
+ print(
141
+ f"⏳ Preview app restart on cooldown, {remaining_cooldown:.1f}s remaining"
142
+ )
143
+ if preview_process and preview_process.poll() is None:
144
+ # If there's still a process running, return success
145
+ return True, f"Preview running at {PREVIEW_URL}"
146
+ else:
147
+ return (
148
+ False,
149
+ f"Preview app on cooldown for {remaining_cooldown:.1f} more seconds",
150
+ )
151
 
152
  # Stop any existing process before starting a new one
153
  stop_preview_app()
154
 
155
+ # Update restart time
156
+ last_restart_time = current_time
157
+
158
  # Wait for the port to become available (up to 5 seconds)
159
  for i in range(10): # 10 attempts * 0.5 seconds = 5 seconds max
160
  if is_port_available(PREVIEW_PORT):
 
260
  )
261
  return iframe_html
262
  else:
263
+ print(f"⚠️ Preview app unhealthy: {status}, attempting restart...")
264
+ else:
265
+ print("🔍 No preview process exists, starting new one")
266
 
267
  # Try to start the preview app and show an iframe
268
  success, message = start_preview_app()
 
274
  print(f"🔍 Creating iframe: {iframe_html}")
275
  return iframe_html
276
  else:
277
+ # Show a more user-friendly error message with retry option
278
+ error_html = f"""
279
+ <div style="color: #d32f2f; padding: 20px; text-align: center;
280
+ border: 1px solid #d32f2f; border-radius: 8px;
281
+ background: #ffebee;">
282
+ <h3>🚧 Preview App Temporarily Unavailable</h3>
283
+ <p><strong>Status:</strong> {message}</p>
284
+ <p>The preview app is starting up. Please wait a few seconds
285
+ and try refreshing.</p>
286
+ <button onclick="location.reload()" style="
287
+ background: #1976d2; color: white; border: none;
288
+ padding: 8px 16px; border-radius: 4px; cursor: pointer;">
289
+ Refresh Preview
290
+ </button>
291
+ </div>
292
+ """
293
+ print(f"🔍 Error in preview: {message}")
294
  return error_html
295
 
296
 
 
330
  preview_process = None
331
  return False, "Process died"
332
 
333
+ # Check if responsive with multiple attempts and longer timeout
334
+ max_attempts = 3
335
+ for attempt in range(max_attempts):
336
+ try:
337
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
338
+ sock.settimeout(3) # Increased timeout from 1 to 3 seconds
339
+ result = sock.connect_ex(("127.0.0.1", PREVIEW_PORT))
340
+ if result == 0:
341
+ return True, "Healthy"
342
+ else:
343
+ if attempt < max_attempts - 1:
344
+ print(
345
+ f"🔍 Health check attempt {attempt + 1}/"
346
+ f"{max_attempts} failed, retrying..."
347
+ )
348
+ time.sleep(1) # Wait before retrying
349
+ else:
350
+ return False, "Not responsive on port after multiple attempts"
351
+ except Exception as e:
352
+ if attempt < max_attempts - 1:
353
+ print(
354
+ f"🔍 Health check attempt {attempt + 1}/"
355
+ f"{max_attempts} failed with error: {e}, retrying..."
356
+ )
357
+ time.sleep(1)
358
  else:
359
+ return False, f"Connection check failed: {e}"
360
+
361
+ return False, "Health check failed"
362
 
363
 
364
  def ensure_preview_running():
 
734
  return create_iframe_preview()
735
 
736
  def refresh_all():
737
+ # Only refresh preview if it's not currently healthy
738
+ current_preview = None
739
+ if preview_process is not None:
740
+ healthy, status = check_preview_health()
741
+ if healthy:
742
+ # Preview is healthy, just return existing iframe
743
+ current_preview = (
744
+ f'<iframe src="{PREVIEW_URL}" '
745
+ 'width="100%" height="500px"></iframe>'
746
+ )
747
+ else:
748
+ # Preview needs refresh
749
+ current_preview = create_iframe_preview()
750
+ else:
751
+ # No preview process, create one
752
+ current_preview = create_iframe_preview()
753
 
754
  # Then, update the file explorer and code editor
755
  file_explorer_val = gr.FileExplorer(
 
766
  interactive=True,
767
  autocomplete=True,
768
  )
769
+ return file_explorer_val, code_editor_val, current_preview
770
 
771
  save_btn.click(
772
  fn=save_file,
nginx.conf CHANGED
@@ -20,6 +20,17 @@ http {
20
 
21
  access_log /var/log/nginx/access.log;
22
 
 
 
 
 
 
 
 
 
 
 
 
23
  server {
24
  listen 7860 default_server;
25
  listen [::]:7860 default_server;
@@ -28,7 +39,7 @@ http {
28
 
29
  # Main Gradio app - serve on root path, proxy to internal port 7862
30
  location / {
31
- proxy_pass http://localhost:7862;
32
  proxy_http_version 1.1;
33
  proxy_set_header Upgrade $http_upgrade;
34
  proxy_set_header Connection 'upgrade';
@@ -39,14 +50,22 @@ http {
39
  proxy_set_header X-Forwarded-Proto $scheme;
40
  proxy_cache_bypass $http_upgrade;
41
  proxy_read_timeout 86400;
 
 
42
  proxy_redirect off;
 
 
 
 
 
 
43
  }
44
 
45
  # Preview apps - route to internal port 7861
46
  location /preview/ {
47
  # Remove /preview prefix and pass to the sandbox app
48
  rewrite /preview/(.*) /$1 break;
49
- proxy_pass http://localhost:7861;
50
  proxy_http_version 1.1;
51
  proxy_set_header Upgrade $http_upgrade;
52
  proxy_set_header Connection 'upgrade';
@@ -57,7 +76,18 @@ http {
57
  proxy_set_header X-Forwarded-Proto $scheme;
58
  proxy_cache_bypass $http_upgrade;
59
  proxy_read_timeout 86400;
 
 
60
  proxy_redirect off;
 
 
 
 
 
 
 
 
 
61
  }
62
  }
63
  }
 
20
 
21
  access_log /var/log/nginx/access.log;
22
 
23
+ # Upstream configuration for better load balancing and failover
24
+ upstream preview_backend {
25
+ server localhost:7861 max_fails=1 fail_timeout=5s;
26
+ keepalive 32;
27
+ }
28
+
29
+ upstream main_backend {
30
+ server localhost:7862 max_fails=1 fail_timeout=5s;
31
+ keepalive 32;
32
+ }
33
+
34
  server {
35
  listen 7860 default_server;
36
  listen [::]:7860 default_server;
 
39
 
40
  # Main Gradio app - serve on root path, proxy to internal port 7862
41
  location / {
42
+ proxy_pass http://main_backend;
43
  proxy_http_version 1.1;
44
  proxy_set_header Upgrade $http_upgrade;
45
  proxy_set_header Connection 'upgrade';
 
50
  proxy_set_header X-Forwarded-Proto $scheme;
51
  proxy_cache_bypass $http_upgrade;
52
  proxy_read_timeout 86400;
53
+ proxy_connect_timeout 10s;
54
+ proxy_send_timeout 10s;
55
  proxy_redirect off;
56
+
57
+ # Buffer settings to handle temporary unavailability
58
+ proxy_buffering on;
59
+ proxy_buffer_size 4k;
60
+ proxy_buffers 8 4k;
61
+ proxy_busy_buffers_size 8k;
62
  }
63
 
64
  # Preview apps - route to internal port 7861
65
  location /preview/ {
66
  # Remove /preview prefix and pass to the sandbox app
67
  rewrite /preview/(.*) /$1 break;
68
+ proxy_pass http://preview_backend;
69
  proxy_http_version 1.1;
70
  proxy_set_header Upgrade $http_upgrade;
71
  proxy_set_header Connection 'upgrade';
 
76
  proxy_set_header X-Forwarded-Proto $scheme;
77
  proxy_cache_bypass $http_upgrade;
78
  proxy_read_timeout 86400;
79
+ proxy_connect_timeout 10s;
80
+ proxy_send_timeout 10s;
81
  proxy_redirect off;
82
+
83
+ # Buffer settings and retry logic for preview apps
84
+ proxy_buffering on;
85
+ proxy_buffer_size 4k;
86
+ proxy_buffers 8 4k;
87
+ proxy_busy_buffers_size 8k;
88
+ proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
89
+ proxy_next_upstream_tries 3;
90
+ proxy_next_upstream_timeout 10s;
91
  }
92
  }
93
  }
start.sh CHANGED
@@ -1,25 +1,25 @@
1
  #!/bin/bash
2
 
3
- # Start nginx in background as non-root user
4
- echo "Starting nginx..."
5
- nginx -g 'daemon off;' &
6
- NGINX_PID=$!
7
 
8
- # Function to handle shutdown
9
- cleanup() {
10
- echo "Shutting down..."
11
- kill $NGINX_PID 2>/dev/null
12
- wait $NGINX_PID 2>/dev/null
13
- exit 0
14
- }
15
 
16
- # Set up signal handlers
17
- trap cleanup SIGTERM SIGINT
 
 
 
 
18
 
19
- # Wait a moment for nginx to start
 
 
20
  sleep 2
21
 
 
22
  echo "Starting Gradio app on port 7862..."
23
- # Run the main app in foreground so Docker captures its logs
24
- # Use -u flag to disable Python output buffering
25
- python -u app.py --server-port 7862 --server-name 0.0.0.0
 
1
  #!/bin/bash
2
 
3
+ echo "===== Application Startup at $(date) ====="
4
+
5
+ # Create necessary directories
6
+ mkdir -p /var/run/nginx /var/lib/nginx/body /var/lib/nginx/proxy /var/lib/nginx/fastcgi /var/lib/nginx/uwsgi /var/lib/nginx/scgi /var/log/nginx
7
 
8
+ # Set proper permissions
9
+ chmod 755 /var/run/nginx /var/lib/nginx/* /var/log/nginx
 
 
 
 
 
10
 
11
+ # Check if nginx is already running and stop it gracefully
12
+ if pgrep nginx > /dev/null; then
13
+ echo "Stopping existing nginx..."
14
+ pkill nginx
15
+ sleep 2
16
+ fi
17
 
18
+ # Start nginx with our configuration
19
+ echo "Starting nginx..."
20
+ nginx -c /app/nginx.conf -g "daemon off;" &
21
  sleep 2
22
 
23
+ # Start the main Gradio app
24
  echo "Starting Gradio app on port 7862..."
25
+ exec python app.py --server-port 7862 --server-name 0.0.0.0