From aa788d7a0b0e703d6ec53b08385b25ba50cee903 Mon Sep 17 00:00:00 2001
From: MarekWo <mwojtaszek@gmail.com>
Date: Sat, 31 Jan 2026 14:05:51 +0100
Subject: [PATCH] feat: Add auto-start for stopped containers in watchdog

- Added AUTO_START option (default: true) to automatically start
  stopped containers, not just restart unhealthy ones
- Added handle_stopped_container() function
- Updated documentation with new configuration option

Set AUTO_START=false to disable automatic starting of stopped containers.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 docs/watchdog.md             |  5 +++-
 scripts/watchdog/install.sh  |  1 +
 scripts/watchdog/watchdog.py | 48 +++++++++++++++++++++++++++++++++++-
 3 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/docs/watchdog.md b/docs/watchdog.md
index 356d8c4..da45e16 100644
--- a/docs/watchdog.md
+++ b/docs/watchdog.md
@@ -1,11 +1,12 @@
 # Container Watchdog
 
-The Container Watchdog is a systemd service that monitors Docker containers and automatically restarts unhealthy ones. This is useful for ensuring reliability, especially on resource-constrained systems.
+The Container Watchdog is a systemd service that monitors Docker containers and automatically restarts unhealthy or stopped ones. This is useful for ensuring reliability, especially on resource-constrained systems.
 
 ## Features
 
 - **Health monitoring** - Checks container status every 30 seconds
 - **Automatic restart** - Restarts containers that become unhealthy
+- **Auto-start stopped containers** - Starts containers that have stopped (configurable)
 - **Diagnostic logging** - Captures container logs before restart for troubleshooting
 - **HTTP status endpoint** - Query container status via HTTP API
 - **Restart history** - Tracks all automatic restarts with timestamps
@@ -80,6 +81,7 @@ If you need to customize the behavior, the service supports these environment va
 | `CHECK_INTERVAL` | `30` | Seconds between health checks |
 | `LOG_FILE` | `/var/log/mc-webui-watchdog.log` | Path to log file |
 | `HTTP_PORT` | `5051` | HTTP status port (0 to disable) |
+| `AUTO_START` | `true` | Start stopped containers (set to `false` to disable) |
 
 To modify defaults, create an override file:
 ```bash
@@ -90,6 +92,7 @@ Then add your overrides, for example:
 ```ini
 [Service]
 Environment=CHECK_INTERVAL=60
+Environment=AUTO_START=false
 ```
 
 ## Uninstall
diff --git a/scripts/watchdog/install.sh b/scripts/watchdog/install.sh
index 569b5ee..8c1a564 100755
--- a/scripts/watchdog/install.sh
+++ b/scripts/watchdog/install.sh
@@ -97,6 +97,7 @@ Environment=MCWEBUI_DIR=${MCWEBUI_DIR}
 Environment=CHECK_INTERVAL=30
 Environment=LOG_FILE=${LOG_FILE}
 Environment=HTTP_PORT=5051
+Environment=AUTO_START=true
 ExecStart=/usr/bin/python3 -u ${SCRIPT_DIR}/watchdog.py
 Restart=always
 RestartSec=10
diff --git a/scripts/watchdog/watchdog.py b/scripts/watchdog/watchdog.py
index 7060175..9651717 100755
--- a/scripts/watchdog/watchdog.py
+++ b/scripts/watchdog/watchdog.py
@@ -7,6 +7,7 @@ Designed to run as a systemd service on the host.
 
 Features:
 - Monitors container health status
+- Automatically starts stopped containers (configurable)
 - Captures logs before restart for diagnostics
 - Logs all events to file
 - HTTP endpoint for status check
@@ -16,6 +17,7 @@ Configuration via environment variables:
 - CHECK_INTERVAL: Seconds between checks (default: 30)
 - LOG_FILE: Path to log file (default: /var/log/mc-webui-watchdog.log)
 - HTTP_PORT: Port for status endpoint (default: 5051, 0 to disable)
+- AUTO_START: Start stopped containers (default: true, set to 'false' to disable)
 """
 
 import os
@@ -33,6 +35,7 @@ MCWEBUI_DIR = os.environ.get('MCWEBUI_DIR', os.path.expanduser('~/mc-webui'))
 CHECK_INTERVAL = int(os.environ.get('CHECK_INTERVAL', '30'))
 LOG_FILE = os.environ.get('LOG_FILE', '/var/log/mc-webui-watchdog.log')
 HTTP_PORT = int(os.environ.get('HTTP_PORT', '5051'))
+AUTO_START = os.environ.get('AUTO_START', 'true').lower() != 'false'
 
 # Containers to monitor
 CONTAINERS = ['meshcore-bridge', 'mc-webui']
@@ -150,6 +153,45 @@ def restart_container(container_name: str) -> bool:
         return False
 
 
+def start_container(container_name: str) -> bool:
+    """Start a stopped container using docker compose."""
+    log(f"Starting container: {container_name}", 'WARN')
+
+    success, stdout, stderr = run_compose_command([
+        'start', container_name
+    ], timeout=120)
+
+    if success:
+        log(f"Container {container_name} started successfully")
+        return True
+    else:
+        log(f"Failed to start {container_name}: {stderr}", 'ERROR')
+        return False
+
+
+def handle_stopped_container(container_name: str, status: dict):
+    """Handle a stopped container - log and start it."""
+    global restart_history
+
+    log(f"Container {container_name} is stopped! Status: {status['status']}", 'WARN')
+
+    # Start the container
+    start_success = start_container(container_name)
+
+    # Record in history
+    restart_history.append({
+        'timestamp': datetime.now().isoformat(),
+        'container': container_name,
+        'action': 'start',
+        'status_before': status,
+        'success': start_success
+    })
+
+    # Keep only last 50 entries
+    if len(restart_history) > 50:
+        restart_history = restart_history[-50:]
+
+
 def handle_unhealthy_container(container_name: str, status: dict):
     """Handle an unhealthy container - log details and restart."""
     global restart_history
@@ -206,7 +248,10 @@ def check_containers():
         if not status['exists']:
             log(f"Container {container_name} not found", 'WARN')
         elif status['status'] != 'running':
-            log(f"Container {container_name} is not running (status: {status['status']})", 'WARN')
+            if AUTO_START:
+                handle_stopped_container(container_name, status)
+            else:
+                log(f"Container {container_name} is not running (status: {status['status']}), AUTO_START disabled", 'WARN')
         elif status['health'] == 'unhealthy':
             handle_unhealthy_container(container_name, status)
 
@@ -273,6 +318,7 @@ def main():
     log(f"  Check interval: {CHECK_INTERVAL}s")
     log(f"  Log file: {LOG_FILE}")
     log(f"  HTTP port: {HTTP_PORT if HTTP_PORT > 0 else 'disabled'}")
+    log(f"  Auto-start stopped containers: {AUTO_START}")
     log(f"  Monitoring containers: {', '.join(CONTAINERS)}")
     log("=" * 60)