fix(perf): polling-only Socket.IO + channels DB fallback on USB timeout

Werkzeug dev server can't upgrade WebSockets, so every io() upgrade attempt
returned HTTP 500 and clients fell into a polling/upgrade reconnect loop —
visible as 10-15s freezes on app load. Force transports: ['polling'] on
/chat, /console and /logs clients; long-poll keeps real-time pushes
working with ~1-2s latency.

When the MeshCore device briefly stalls, get_channel_info() used to block
on the default 30s timeout per slot, so iterating max_channels slots could
take minutes; in practice only Public answered and the rest timed out,
leaving the UI with just one channel. Drop per-call timeout to 3s, raise
TimeoutError to the caller, and have cli.get_channels() break on first
timeout and merge the remaining slots from the channels table in the DB
(which already mirrors device state via upsert_channel).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
MarekWo
2026-06-07 07:31:47 +02:00
parent f34c95c15b
commit 1d47c9c0e8
6 changed files with 70 additions and 9 deletions
+11 -3
View File
@@ -14,6 +14,7 @@ import logging
import struct
import threading
import time
from concurrent.futures import TimeoutError as FuturesTimeoutError
from typing import Optional, Any, Dict, List, Tuple
from urllib.parse import urlparse, parse_qs
@@ -2187,13 +2188,17 @@ class DeviceManager:
logger.error(f"Failed to get device info: {e}")
return {}
def get_channel_info(self, idx: int) -> Optional[Dict]:
"""Get info for a specific channel."""
def get_channel_info(self, idx: int, timeout: float = 3) -> Optional[Dict]:
"""Get info for a specific channel.
Raises TimeoutError when the device fails to respond within `timeout`
seconds so callers can distinguish "device sluggish" from "empty slot".
"""
if not self.is_connected:
return None
try:
event = self.execute(self.mc.commands.get_channel(idx))
event = self.execute(self.mc.commands.get_channel(idx), timeout=timeout)
if event:
data = getattr(event, 'payload', None) or getattr(event, 'data', None)
if data and isinstance(data, dict):
@@ -2209,6 +2214,9 @@ class DeviceManager:
'secret': secret,
'channel_idx': data.get('channel_idx', idx),
}
except FuturesTimeoutError:
# Re-raise so caller can break the loop instead of hammering a stuck device
raise
except Exception as e:
logger.error(f"Failed to get channel {idx}: {e}")
return None
+51 -2
View File
@@ -5,6 +5,7 @@ Function signatures preserved for backward compatibility with api.py.
"""
import logging
from concurrent.futures import TimeoutError as FuturesTimeoutError
from typing import Tuple, Optional, List, Dict
from app.config import config
@@ -318,18 +319,66 @@ def check_connection() -> bool:
# =============================================================================
def get_channels() -> Tuple[bool, List[Dict]]:
"""Get list of configured channels."""
"""Get list of configured channels.
When the USB device is briefly unresponsive a single get_channel_info()
times out (3 s) and the rest of the slots would too so we stop hitting
the device and merge whatever we got with the locally cached channels in
the DB. This guarantees the UI shows all channels instead of just Public.
"""
try:
dm = _get_dm()
channels = []
seen_idx = set()
device_partial = False
for idx in range(dm._max_channels):
info = dm.get_channel_info(idx)
try:
info = dm.get_channel_info(idx)
except FuturesTimeoutError:
logger.warning(
f"get_channels: device timeout at slot {idx}"
f"falling back to DB for remaining slots"
)
device_partial = True
break
if info and info.get('name'):
channels.append({
'index': idx,
'name': info.get('name', ''),
'key': info.get('secret', info.get('key', '')),
})
seen_idx.add(idx)
# Keep the DB in sync with what the device just told us
try:
secret_hex = info.get('secret', '') or None
dm.db.upsert_channel(idx, info.get('name', ''), secret_hex)
except Exception as e:
logger.debug(f"upsert_channel({idx}) failed: {e}")
if device_partial:
try:
for row in dm.db.get_channels():
db_idx = row.get('idx')
if db_idx is None or db_idx in seen_idx:
continue
name = row.get('name') or ''
if not name:
continue
channels.append({
'index': db_idx,
'name': name,
'key': row.get('secret', '') or '',
})
channels.sort(key=lambda c: c['index'])
logger.info(
f"get_channels: returned {len(channels)} channels "
f"({len(seen_idx)} from device + DB fallback)"
)
except Exception as e:
logger.error(f"get_channels DB fallback failed: {e}")
return True, channels
except Exception as e:
logger.error(f"get_channels error: {e}")
+2 -1
View File
@@ -403,7 +403,8 @@ function connectChatSocket() {
const wsUrl = window.location.origin;
chatSocket = io(wsUrl + '/chat', {
transports: ['websocket', 'polling'],
transports: ['polling'],
upgrade: false,
reconnection: true,
reconnectionDelay: 2000,
reconnectionDelayMax: 10000,
+2 -1
View File
@@ -38,7 +38,8 @@ function connectWebSocket() {
try {
socket = io(wsUrl + '/console', {
transports: ['websocket', 'polling'],
transports: ['polling'],
upgrade: false,
reconnection: true,
reconnectionAttempts: Infinity,
reconnectionDelay: 1000,
+2 -1
View File
@@ -125,7 +125,8 @@ function connectChatSocket() {
const wsUrl = window.location.origin;
chatSocket = io(wsUrl + '/chat', {
transports: ['websocket', 'polling'],
transports: ['polling'],
upgrade: false,
reconnection: true,
reconnectionDelay: 2000,
reconnectionDelayMax: 10000,
+2 -1
View File
@@ -33,7 +33,8 @@
// --- WebSocket ---
const socket = io('/logs', {
transports: ['websocket', 'polling'],
transports: ['polling'],
upgrade: false,
reconnection: true,
reconnectionDelay: 2000,
});