From 05493d06fc813bc4d14566e16cf06c923d3426bd Mon Sep 17 00:00:00 2001 From: Jack Kingsman Date: Sat, 18 Apr 2026 01:39:26 -0700 Subject: [PATCH] Extend contact read timeouts and add circular load/autoevict load mode --- AGENTS.md | 1 + README_ADVANCED.md | 24 ++++++++++ app/config.py | 1 + app/radio_sync.py | 99 ++++++++++++++++++++++++++++++++++++---- tests/test_radio_sync.py | 2 +- 5 files changed, 116 insertions(+), 11 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index c3ba749..d731778 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -503,6 +503,7 @@ mc.subscribe(EventType.ACK, handler) | `MESHCORE_BASIC_AUTH_PASSWORD` | *(none)* | Optional app-wide HTTP Basic auth password; must be set together with `MESHCORE_BASIC_AUTH_USERNAME` | | `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK` | `false` | Switch the always-on radio audit task from hourly checks to aggressive 10-second polling; the audit checks both missed message drift and channel-slot cache drift | | `MESHCORE_FORCE_CHANNEL_SLOT_RECONFIGURE` | `false` | Disable channel-slot reuse and force `set_channel(...)` before every channel send, even on serial/BLE | +| `MESHCORE_LOAD_WITH_AUTOEVICT` | `false` | Enable autoevict contact loading: sets `AUTO_ADD_OVERWRITE_OLDEST` on the radio so adds never fail with TABLE_FULL, skips the removal phase during reconcile, and allows blind loading when `get_contacts` fails. Loaded contacts are not radio-favorited and may be evicted by new adverts when the table is full. | **Note:** Runtime app settings are stored in the database (`app_settings` table), not environment variables. These include `max_radio_contacts`, `auto_decrypt_dm_on_advert`, `advert_interval`, `last_advert_time`, `last_message_times`, `flood_scope`, `blocked_keys`, `blocked_names`, `discovery_blocked_types`, `tracked_telemetry_repeaters`, `auto_resend_channel`, and `telemetry_interval_hours`. `max_radio_contacts` is the configured radio contact capacity baseline used by background maintenance: favorites reload first, non-favorite fill targets about 80% of that value, and full offload/reload triggers around 95% occupancy. They are configured via `GET/PATCH /api/settings`. MQTT, bot, webhook, Apprise, and SQS configs are stored in the `fanout_configs` table, managed via `/api/fanout`. If the radio's channel slots appear unstable or another client is mutating them underneath this app, operators can force the old always-reconfigure send path with `MESHCORE_FORCE_CHANNEL_SLOT_RECONFIGURE=true`. diff --git a/README_ADVANCED.md b/README_ADVANCED.md index f5d82bf..504bed1 100644 --- a/README_ADVANCED.md +++ b/README_ADVANCED.md @@ -8,6 +8,7 @@ These are intended for diagnosing or working around radios that behave oddly. |----------|---------|-------------| | `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK` | false | Run aggressive 10-second `get_msg()` fallback polling to check for messages | | `MESHCORE_FORCE_CHANNEL_SLOT_RECONFIGURE` | false | Disable channel-slot reuse and force `set_channel(...)` before every channel send | +| `MESHCORE_LOAD_WITH_AUTOEVICT` | false | Enable autoevict mode for contact loading (see [Contact Loading Issues](#contact-loading-issues) below) | | `__CLOWNTOWN_DO_CLOCK_WRAPAROUND` | false | Highly experimental: if the radio clock is ahead of system time, try forcing the clock to `0xFFFFFFFF`, wait for uint32 wraparound, and then retry normal time sync before falling back to reboot | By default the app relies on radio events plus MeshCore auto-fetch for incoming messages, and also runs a low-frequency hourly audit poll. That audit checks both: @@ -19,6 +20,29 @@ If the audit finds a mismatch, you'll see an error in the application UI and you `__CLOWNTOWN_DO_CLOCK_WRAPAROUND=true` is a last-resort clock remediation for nodes whose RTC is stuck in the future and where rescue-mode time setting or GPS-based time is not available. It intentionally relies on the clock rolling past the 32-bit epoch boundary, which is board-specific behavior and may not be safe or effective on all MeshCore targets. Treat it as highly experimental. +## Contact Loading Issues + +RemoteTerm loads favorite and recently active contacts onto the radio so that the radio can automatically acknowledge incoming DMs on your behalf. To do this, it first enumerates the radio's existing contact table, then reconciles it with the desired working set. + +On BLE connections with many contacts (or radios with large contact tables from organic advertisements), the initial contact enumeration may take longer than the default timeout. If this happens, the app will automatically retry with an extended 60-second timeout. You may see a toast warning that the radio is temporarily unresponsive during this process. + +If the radio's contact table is already full (from contacts added by advertisements or another client), the app may not be able to load all desired contacts. In this case you'll see a warning that auto-DM acking may not work for all contacts. To resolve this: + +- **Clear the radio's contact table** using another MeshCore client (e.g., the official companion app), then restart RemoteTerm +- **Lower the contact fill target** in Radio Settings to reduce how many contacts the app tries to load +- **Enable autoevict mode** (see below) to let the radio automatically make room +- If you don't need auto-DM acking, you can safely ignore these warnings — **sending and receiving messages is never affected** + +### Autoevict Mode + +Setting `MESHCORE_LOAD_WITH_AUTOEVICT=true` enables an alternative contact loading strategy that avoids TABLE_FULL errors entirely. On connect, the app enables the radio's `AUTO_ADD_OVERWRITE_OLDEST` preference, which makes the radio automatically evict the oldest non-favorite contact when the contact table is full. This means: + +- Contact adds never fail — the radio always makes room by evicting stale contacts +- The app can load contacts even when it can't enumerate the radio's existing contact table (e.g., on slow BLE connections) +- No contact removal step is needed during reconciliation + +**Trade-off:** Contacts loaded by the app are not marked as radio-side favorites, so they are eviction candidates if the radio receives a new advertisement while full. In practice, freshly-loaded contacts have a recent `lastmod` timestamp and will be among the last to be evicted. If you disconnect the radio from RemoteTerm and use it standalone, your contacts will not be protected from eviction by newer advertisements. + ## Sub-Path Reverse Proxy RemoteTerm works behind a reverse proxy that serves it under a sub-path (e.g. `/meshcore/` or Home Assistant ingress). All frontend asset and API paths are relative, so they resolve correctly under any prefix. diff --git a/app/config.py b/app/config.py index a08f836..8a3d1dc 100644 --- a/app/config.py +++ b/app/config.py @@ -26,6 +26,7 @@ class Settings(BaseSettings): default=False, validation_alias="__CLOWNTOWN_DO_CLOCK_WRAPAROUND", ) + load_with_autoevict: bool = False skip_post_connect_sync: bool = False basic_auth_username: str = "" basic_auth_password: str = "" diff --git a/app/radio_sync.py b/app/radio_sync.py index 3f2d9f9..26784f3 100644 --- a/app/radio_sync.py +++ b/app/radio_sync.py @@ -43,9 +43,37 @@ from app.websocket import broadcast_error, broadcast_event logger = logging.getLogger(__name__) DEFAULT_MAX_CHANNELS = 40 +_GET_CONTACTS_TIMEOUT = 10 AdvertMode = Literal["flood", "zero_hop"] +_AUTO_ADD_OVERWRITE_OLDEST = 0x01 + + +async def _enable_autoevict_on_radio(mc: MeshCore) -> None: + """Ensure the radio's AUTO_ADD_OVERWRITE_OLDEST preference bit is set.""" + try: + current = await mc.commands.get_autoadd_config() + if current is None or current.type == EventType.ERROR: + logger.warning("Could not read autoadd config from radio: %s", current) + return + current_flags = current.payload.get("config", 0) + if current_flags & _AUTO_ADD_OVERWRITE_OLDEST: + logger.debug("Radio autoevict already enabled (autoadd_config=0x%02x)", current_flags) + return + new_flags = current_flags | _AUTO_ADD_OVERWRITE_OLDEST + result = await mc.commands.set_autoadd_config(new_flags) + if result is not None and result.type == EventType.OK: + logger.info( + "Enabled radio autoevict (autoadd_config 0x%02x -> 0x%02x)", + current_flags, + new_flags, + ) + else: + logger.warning("Failed to enable radio autoevict: %s", result) + except Exception as exc: + logger.warning("Error enabling radio autoevict: %s", exc) + def _contact_sync_debug_fields(contact: Contact) -> dict[str, object]: """Return key contact fields for sync failure diagnostics.""" @@ -239,7 +267,7 @@ async def should_run_full_periodic_sync(mc: MeshCore) -> bool: capacity = _effective_radio_capacity(app_settings.max_radio_contacts) refill_target, full_sync_trigger = _compute_radio_contact_limits(capacity) - result = await mc.commands.get_contacts() + result = await mc.commands.get_contacts(timeout=_GET_CONTACTS_TIMEOUT) if result is None or result.type == EventType.ERROR: logger.warning("Periodic sync occupancy check failed: %s", result) return False @@ -430,6 +458,10 @@ async def ensure_default_channels() -> None: async def sync_and_offload_all(mc: MeshCore) -> dict: """Run fast startup sync, then background contact reconcile.""" + autoevict = settings.load_with_autoevict + + if autoevict: + await _enable_autoevict_on_radio(mc) # Contact on_radio is legacy/stale metadata. Clear it during the offload/reload # cycle so old rows stop claiming radio residency we do not actively track. @@ -441,15 +473,31 @@ async def sync_and_offload_all(mc: MeshCore) -> dict: # Ensure default channels exist await ensure_default_channels() - start_background_contact_reconciliation( - initial_radio_contacts=contacts_result.get("radio_contacts", {}), - expected_mc=mc, - ) + contact_reconcile_started = False + if "error" in contacts_result and not autoevict: + # In normal mode, we can't reconcile blind — skip and warn. + # In autoevict mode, we can load blind because adds never fail. + logger.warning("Skipping background contact reconcile — could not enumerate radio contacts") + broadcast_error( + "Could not enumerate radio contacts", + "Contact loading skipped — DM auto-acking for favorites and recent " + "contacts may not work, but sending and receiving is not affected. " + "Set MESHCORE_LOAD_WITH_AUTOEVICT=true to load contacts without " + "needing to read the radio first. See 'Contact Loading Issues' in " + "the Advanced Setup documentation.", + ) + else: + start_background_contact_reconciliation( + initial_radio_contacts=contacts_result.get("radio_contacts", {}), + expected_mc=mc, + autoevict=autoevict, + ) + contact_reconcile_started = True return { "contacts": contacts_result, "channels": channels_result, - "contact_reconcile_started": True, + "contact_reconcile_started": contact_reconcile_started, } @@ -1045,7 +1093,7 @@ async def sync_contacts_from_radio(mc: MeshCore) -> dict: synced = 0 try: - result = await mc.commands.get_contacts() + result = await mc.commands.get_contacts(timeout=_GET_CONTACTS_TIMEOUT) if result is None or result.type == EventType.ERROR: logger.error( @@ -1108,12 +1156,19 @@ async def _reconcile_radio_contacts_in_background( *, initial_radio_contacts: dict[str, dict], expected_mc: MeshCore, + autoevict: bool = False, ) -> None: - """Converge radio contacts toward the desired favorites+recents working set.""" + """Converge radio contacts toward the desired favorites+recents working set. + + When *autoevict* is ``True`` the removal phase is skipped entirely and the + radio's ``AUTO_ADD_OVERWRITE_OLDEST`` preference is assumed to be enabled, + so ``add_contact`` never returns ``TABLE_FULL``. + """ radio_contacts = dict(initial_radio_contacts) removed = 0 loaded = 0 failed = 0 + table_full = False try: while True: @@ -1127,7 +1182,9 @@ async def _reconcile_radio_contacts_in_background( for contact in selected_contacts if len(contact.public_key) >= 64 } - removable_keys = [key for key in radio_contacts if key not in desired_contacts] + removable_keys = ( + [] if autoevict else [key for key in radio_contacts if key not in desired_contacts] + ) missing_contacts = [ contact for key, contact in desired_contacts.items() if key not in radio_contacts ] @@ -1229,6 +1286,14 @@ async def _reconcile_radio_contacts_in_background( else: failed += 1 reason = add_result.payload + if isinstance(reason, dict) and reason.get("error_code") == 3: + logger.warning( + "Radio contact table full — stopping " + "contact reconcile (loaded %d this cycle)", + loaded, + ) + table_full = True + break hint = "" if reason is None: hint = ( @@ -1247,6 +1312,17 @@ async def _reconcile_radio_contacts_in_background( await asyncio.sleep(CONTACT_RECONCILE_BUSY_BACKOFF_SECONDS) continue + if table_full: + broadcast_error( + "Could not load all desired contacts onto the radio for auto-DM ack", + "The radio's contact table is full. Clearing your radio contacts " + "using another client, lowering your contact fill target in " + "settings, or setting MESHCORE_LOAD_WITH_AUTOEVICT=true may " + "relieve this. See 'Contact Loading Issues' in the Advanced " + "Setup documentation.", + ) + break + await asyncio.sleep(CONTACT_RECONCILE_YIELD_SECONDS) if not progressed: continue @@ -1269,6 +1345,7 @@ def start_background_contact_reconciliation( *, initial_radio_contacts: dict[str, dict], expected_mc: MeshCore, + autoevict: bool = False, ) -> None: """Start or replace the background contact reconcile task for the current radio.""" global _contact_reconcile_task @@ -1280,11 +1357,13 @@ def start_background_contact_reconciliation( _reconcile_radio_contacts_in_background( initial_radio_contacts=initial_radio_contacts, expected_mc=expected_mc, + autoevict=autoevict, ) ) logger.info( - "Started background contact reconcile for %d radio contact(s)", + "Started background contact reconcile for %d radio contact(s)%s", len(initial_radio_contacts), + " (autoevict mode)" if autoevict else "", ) diff --git a/tests/test_radio_sync.py b/tests/test_radio_sync.py index a0da173..2b1075b 100644 --- a/tests/test_radio_sync.py +++ b/tests/test_radio_sync.py @@ -516,7 +516,7 @@ class TestSyncAndOffloadAll: result = await sync_and_offload_all(mock_mc) mock_start.assert_called_once_with( - initial_radio_contacts=radio_contacts, expected_mc=mock_mc + initial_radio_contacts=radio_contacts, expected_mc=mock_mc, autoevict=False ) assert result["contact_reconcile_started"] is True