diff --git a/Dockerfile b/Dockerfile index 3fce8dc..5e2db88 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,5 +32,11 @@ ENV FLASK_HOST=0.0.0.0 ENV FLASK_PORT=5000 ENV FLASK_DEBUG=false -# Run the application +# Entrypoint: disconnect stale BLE connections before starting the app. +# BlueZ auto-reconnects trusted devices, leaving stale GATT notification +# handles that block bleak from establishing a new session. +COPY scripts/docker-entrypoint.sh /docker-entrypoint.sh +RUN chmod +x /docker-entrypoint.sh + +ENTRYPOINT ["/docker-entrypoint.sh"] CMD ["python", "-m", "app.main"] diff --git a/app/device_manager.py b/app/device_manager.py index 87226c1..721fdbb 100644 --- a/app/device_manager.py +++ b/app/device_manager.py @@ -120,7 +120,6 @@ class DeviceManager: self._retry_context = {} # {dm_id: {attempt, max_attempts, path}} — for _on_ack self._ble_keepalive_task = None # asyncio.Task for BLE keepalive self._ble_permanently_failed = False # True when all reconnect attempts exhausted - self._ble_reconnecting = False # Guard against concurrent reconnect attempts @property def is_connected(self) -> bool: @@ -168,10 +167,6 @@ class DeviceManager: logger.error(f"Connection attempt {attempt}/{max_retries} failed: {e}") if attempt < max_retries: - # BLE: power-cycle adapter every 3rd failed attempt to clear - # stale GATT notification handles from previous sessions - if self.config.use_ble and attempt % 3 == 0: - await self._ble_power_cycle_adapter() delay = min(base_delay * attempt, 30.0) logger.info(f"Retrying in {delay:.0f}s...") await asyncio.sleep(delay) @@ -208,18 +203,15 @@ class DeviceManager: raise RuntimeError("No serial port detected. Set MC_SERIAL_PORT explicitly.") @staticmethod - async def _ble_ensure_connected(address: str): - """Ensure the BLE device is connected via BlueZ before bleak takes over. + async def _ble_force_disconnect(address: str): + """Force-disconnect a BLE device via D-Bus if BlueZ still holds a stale connection. - bleak inside Docker cannot initiate new BLE connections via - Device1.Connect() — it can only take over connections that BlueZ - has already established. We use D-Bus to trigger the connection - from BlueZ directly, then bleak takes over the GATT session. + BlueZ auto-reconnects trusted devices, which prevents bleak from + establishing a new GATT session after a container restart. """ - import subprocess - dbus_path = '/org/bluez/hci0/dev_' + address.replace(':', '_') try: - # Check if device is already connected + import subprocess + dbus_path = '/org/bluez/hci0/dev_' + address.replace(':', '_') result = subprocess.run( ['dbus-send', '--system', '--print-reply', '--dest=org.bluez', dbus_path, 'org.freedesktop.DBus.Properties.Get', @@ -227,108 +219,16 @@ class DeviceManager: capture_output=True, text=True, timeout=5 ) if 'boolean true' in result.stdout: - logger.info(f"BLE device {address} already connected via BlueZ") - return True - - # Device not connected — trigger connection via BlueZ D-Bus - logger.info(f"Connecting BLE device {address} via BlueZ D-Bus...") - result = subprocess.run( - ['dbus-send', '--system', '--print-reply', '--dest=org.bluez', - dbus_path, 'org.bluez.Device1.Connect'], - capture_output=True, text=True, timeout=30 - ) - if result.returncode == 0: - await asyncio.sleep(1) # Let GATT services resolve - logger.info(f"BLE device {address} connected via BlueZ") - return True - else: - logger.warning(f"BlueZ connect failed: {result.stderr.strip()}") - return False + logger.info(f"BLE device {address} has stale BlueZ connection, disconnecting...") + subprocess.run( + ['dbus-send', '--system', '--print-reply', '--dest=org.bluez', + dbus_path, 'org.bluez.Device1.Disconnect'], + capture_output=True, text=True, timeout=5 + ) + await asyncio.sleep(2) # Let BlueZ settle + logger.info("Stale BLE connection cleared") except Exception as e: - logger.warning(f"BLE ensure-connected failed: {e}") - return False - - @staticmethod - async def _ble_power_cycle_adapter(): - """Power-cycle the Bluetooth adapter via D-Bus to clear all stale state. - - This clears stale GATT notification handles ('Notify acquired' error) - that persist after an abnormal bleak disconnect. A simple - Device1.Disconnect is not enough — the notification subscriptions are - per-adapter, not per-device. - """ - import subprocess - adapter_path = '/org/bluez/hci0' - try: - logger.info("Power-cycling Bluetooth adapter to clear stale GATT state...") - # Power OFF - subprocess.run( - ['dbus-send', '--system', '--print-reply', '--dest=org.bluez', - adapter_path, 'org.freedesktop.DBus.Properties.Set', - 'string:org.bluez.Adapter1', 'string:Powered', - 'variant:boolean:false'], - capture_output=True, text=True, timeout=5 - ) - await asyncio.sleep(2) - # Power ON - subprocess.run( - ['dbus-send', '--system', '--print-reply', '--dest=org.bluez', - adapter_path, 'org.freedesktop.DBus.Properties.Set', - 'string:org.bluez.Adapter1', 'string:Powered', - 'variant:boolean:true'], - capture_output=True, text=True, timeout=5 - ) - await asyncio.sleep(5) # BlueZ needs time to re-init and auto-connect trusted devices - logger.info("Bluetooth adapter power-cycled successfully") - except Exception as e: - logger.warning(f"Bluetooth adapter power-cycle failed: {e}") - - async def _ble_reconnect(self): - """Reconnect BLE with adapter power-cycle to clear stale GATT state. - - Uses aggressive cleanup (adapter power-cycle) between attempts to - avoid the 'Notify acquired' error that blocks reconnection after - an abnormal disconnect. - """ - if self._ble_reconnecting: - logger.debug("BLE reconnect already in progress, skipping") - return - self._ble_reconnecting = True - - MAX_ATTEMPTS = 5 - for attempt in range(1, MAX_ATTEMPTS + 1): - delay = 5 * attempt - logger.info(f"BLE reconnecting in {delay}s (attempt {attempt}/{MAX_ATTEMPTS})...") - await asyncio.sleep(delay) - - try: - # Clean up old mc instance - if self.mc: - try: - await self.mc.disconnect() - except Exception: - pass - self.mc = None - - # Power-cycle adapter to clear stale notification handles - await self._ble_power_cycle_adapter() - - await self._connect() - if self._connected: - logger.info("BLE reconnected successfully") - self._ble_reconnecting = False - if self.socketio: - self.socketio.emit('device_status', { - 'connected': True, - }, namespace='/chat') - return - except Exception as e: - logger.error(f"BLE reconnect attempt {attempt} failed: {e}") - - self._ble_reconnecting = False - logger.error(f"BLE reconnection failed after {MAX_ATTEMPTS} attempts — " - "marking permanently failed (healthcheck will trigger restart)") - self._ble_permanently_failed = True + logger.debug(f"BLE force-disconnect check skipped: {e}") async def _connect(self): """Connect to device via BLE, TCP, or serial and subscribe to events.""" @@ -337,25 +237,12 @@ class DeviceManager: try: if self.config.use_ble: logger.info(f"Connecting via BLE: {self.config.MC_BLE_ADDRESS}") - # bleak inside Docker cannot initiate new BLE connections — - # it can only take over connections already established by - # BlueZ. Ensure the device is connected via BlueZ first. - await self._ble_ensure_connected(self.config.MC_BLE_ADDRESS) - - # bleak 3.x: BleakClient(address_string) can't find paired - # devices. Use BleakScanner to get a BLEDevice object. - from bleak import BleakScanner - ble_device = await BleakScanner.find_device_by_address( - self.config.MC_BLE_ADDRESS, timeout=10 - ) - if not ble_device: - raise RuntimeError( - f"BLE device {self.config.MC_BLE_ADDRESS} not found " - "in BlueZ — check pairing" - ) - logger.info(f"BLE device found: {ble_device.name}") + # Force-disconnect any stale BlueZ connection before connecting. + # BlueZ auto-reconnects trusted devices, which blocks bleak from + # establishing a fresh GATT session after a container restart. + await self._ble_force_disconnect(self.config.MC_BLE_ADDRESS) self.mc = await MeshCore.create_ble( - device=ble_device, + address=self.config.MC_BLE_ADDRESS, auto_reconnect=False, ) elif self.config.use_tcp: @@ -531,15 +418,14 @@ class DeviceManager: ) logger.debug("BLE keepalive OK") except Exception as e: - logger.warning(f"BLE keepalive failed: {e} — triggering reconnect") - # Synthesize a disconnect event to reuse existing reconnection logic + logger.warning(f"BLE keepalive failed: {e} — marking for restart") self._connected = False + self._ble_permanently_failed = True if self.socketio: self.socketio.emit('device_status', { 'connected': False, }, namespace='/chat') - await self._ble_reconnect() - return # reconnect loop takes over + return def _sync_contacts_to_db(self): """Sync device contacts to database (bidirectional). @@ -1350,9 +1236,15 @@ class DeviceManager: 'connected': False, }, namespace='/chat') - # BLE needs adapter power-cycle to clear stale GATT state + # BLE: reconnection from inside a running container is unreliable + # because bleak leaves stale GATT notification handles that block + # new connections ('Notify acquired' error). Mark as permanently + # failed so the health check returns 503 and Docker restarts the + # container, which gives us a clean BLE state. if self.config.use_ble: - await self._ble_reconnect() + logger.error("BLE disconnected — marking permanently failed " + "(container restart required for clean BLE state)") + self._ble_permanently_failed = True return # Serial/TCP: simple reconnect with backoff diff --git a/scripts/docker-entrypoint.sh b/scripts/docker-entrypoint.sh new file mode 100644 index 0000000..781227b --- /dev/null +++ b/scripts/docker-entrypoint.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# Docker entrypoint for mc-webui +# +# Disconnects stale BLE connections before starting the app. +# BlueZ on the host auto-reconnects trusted devices, leaving stale GATT +# notification handles that block bleak from establishing a new session. +# A clean disconnect here ensures the app starts with a fresh BLE state. + +set -e + +# If MC_BLE_ADDRESS is set, clean up stale BLE connections +if [ -n "$MC_BLE_ADDRESS" ]; then + DBUS_PATH="/org/bluez/hci0/dev_${MC_BLE_ADDRESS//:/_}" + + # Check if device is connected via BlueZ + CONNECTED=$(dbus-send --system --print-reply --dest=org.bluez \ + "$DBUS_PATH" org.freedesktop.DBus.Properties.Get \ + string:org.bluez.Device1 string:Connected 2>/dev/null \ + | grep -c "boolean true" || true) + + if [ "$CONNECTED" = "1" ]; then + echo "[entrypoint] BLE device $MC_BLE_ADDRESS is connected, disconnecting stale session..." + dbus-send --system --print-reply --dest=org.bluez \ + "$DBUS_PATH" org.bluez.Device1.Disconnect 2>/dev/null || true + sleep 2 + echo "[entrypoint] Stale BLE connection cleared" + fi +fi + +# Run the main application +exec "$@"