feat: Add auto-start for stopped containers in watchdog

- Added AUTO_START option (default: true) to automatically start
  stopped containers, not just restart unhealthy ones
- Added handle_stopped_container() function
- Updated documentation with new configuration option

Set AUTO_START=false to disable automatic starting of stopped containers.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
MarekWo
2026-01-31 14:05:51 +01:00
parent bb292b1a1d
commit aa788d7a0b
3 changed files with 52 additions and 2 deletions

View File

@@ -97,6 +97,7 @@ Environment=MCWEBUI_DIR=${MCWEBUI_DIR}
Environment=CHECK_INTERVAL=30
Environment=LOG_FILE=${LOG_FILE}
Environment=HTTP_PORT=5051
Environment=AUTO_START=true
ExecStart=/usr/bin/python3 -u ${SCRIPT_DIR}/watchdog.py
Restart=always
RestartSec=10

View File

@@ -7,6 +7,7 @@ Designed to run as a systemd service on the host.
Features:
- Monitors container health status
- Automatically starts stopped containers (configurable)
- Captures logs before restart for diagnostics
- Logs all events to file
- HTTP endpoint for status check
@@ -16,6 +17,7 @@ Configuration via environment variables:
- CHECK_INTERVAL: Seconds between checks (default: 30)
- LOG_FILE: Path to log file (default: /var/log/mc-webui-watchdog.log)
- HTTP_PORT: Port for status endpoint (default: 5051, 0 to disable)
- AUTO_START: Start stopped containers (default: true, set to 'false' to disable)
"""
import os
@@ -33,6 +35,7 @@ MCWEBUI_DIR = os.environ.get('MCWEBUI_DIR', os.path.expanduser('~/mc-webui'))
CHECK_INTERVAL = int(os.environ.get('CHECK_INTERVAL', '30'))
LOG_FILE = os.environ.get('LOG_FILE', '/var/log/mc-webui-watchdog.log')
HTTP_PORT = int(os.environ.get('HTTP_PORT', '5051'))
AUTO_START = os.environ.get('AUTO_START', 'true').lower() != 'false'
# Containers to monitor
CONTAINERS = ['meshcore-bridge', 'mc-webui']
@@ -150,6 +153,45 @@ def restart_container(container_name: str) -> bool:
return False
def start_container(container_name: str) -> bool:
"""Start a stopped container using docker compose."""
log(f"Starting container: {container_name}", 'WARN')
success, stdout, stderr = run_compose_command([
'start', container_name
], timeout=120)
if success:
log(f"Container {container_name} started successfully")
return True
else:
log(f"Failed to start {container_name}: {stderr}", 'ERROR')
return False
def handle_stopped_container(container_name: str, status: dict):
"""Handle a stopped container - log and start it."""
global restart_history
log(f"Container {container_name} is stopped! Status: {status['status']}", 'WARN')
# Start the container
start_success = start_container(container_name)
# Record in history
restart_history.append({
'timestamp': datetime.now().isoformat(),
'container': container_name,
'action': 'start',
'status_before': status,
'success': start_success
})
# Keep only last 50 entries
if len(restart_history) > 50:
restart_history = restart_history[-50:]
def handle_unhealthy_container(container_name: str, status: dict):
"""Handle an unhealthy container - log details and restart."""
global restart_history
@@ -206,7 +248,10 @@ def check_containers():
if not status['exists']:
log(f"Container {container_name} not found", 'WARN')
elif status['status'] != 'running':
log(f"Container {container_name} is not running (status: {status['status']})", 'WARN')
if AUTO_START:
handle_stopped_container(container_name, status)
else:
log(f"Container {container_name} is not running (status: {status['status']}), AUTO_START disabled", 'WARN')
elif status['health'] == 'unhealthy':
handle_unhealthy_container(container_name, status)
@@ -273,6 +318,7 @@ def main():
log(f" Check interval: {CHECK_INTERVAL}s")
log(f" Log file: {LOG_FILE}")
log(f" HTTP port: {HTTP_PORT if HTTP_PORT > 0 else 'disabled'}")
log(f" Auto-start stopped containers: {AUTO_START}")
log(f" Monitoring containers: {', '.join(CONTAINERS)}")
log("=" * 60)