From 05493d06fc813bc4d14566e16cf06c923d3426bd Mon Sep 17 00:00:00 2001 From: Jack Kingsman Date: Sat, 18 Apr 2026 01:39:26 -0700 Subject: [PATCH 1/4] Extend contact read timeouts and add circular load/autoevict load mode --- AGENTS.md | 1 + README_ADVANCED.md | 24 ++++++++++ app/config.py | 1 + app/radio_sync.py | 99 ++++++++++++++++++++++++++++++++++++---- tests/test_radio_sync.py | 2 +- 5 files changed, 116 insertions(+), 11 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index c3ba749..d731778 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -503,6 +503,7 @@ mc.subscribe(EventType.ACK, handler) | `MESHCORE_BASIC_AUTH_PASSWORD` | *(none)* | Optional app-wide HTTP Basic auth password; must be set together with `MESHCORE_BASIC_AUTH_USERNAME` | | `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK` | `false` | Switch the always-on radio audit task from hourly checks to aggressive 10-second polling; the audit checks both missed message drift and channel-slot cache drift | | `MESHCORE_FORCE_CHANNEL_SLOT_RECONFIGURE` | `false` | Disable channel-slot reuse and force `set_channel(...)` before every channel send, even on serial/BLE | +| `MESHCORE_LOAD_WITH_AUTOEVICT` | `false` | Enable autoevict contact loading: sets `AUTO_ADD_OVERWRITE_OLDEST` on the radio so adds never fail with TABLE_FULL, skips the removal phase during reconcile, and allows blind loading when `get_contacts` fails. Loaded contacts are not radio-favorited and may be evicted by new adverts when the table is full. | **Note:** Runtime app settings are stored in the database (`app_settings` table), not environment variables. These include `max_radio_contacts`, `auto_decrypt_dm_on_advert`, `advert_interval`, `last_advert_time`, `last_message_times`, `flood_scope`, `blocked_keys`, `blocked_names`, `discovery_blocked_types`, `tracked_telemetry_repeaters`, `auto_resend_channel`, and `telemetry_interval_hours`. `max_radio_contacts` is the configured radio contact capacity baseline used by background maintenance: favorites reload first, non-favorite fill targets about 80% of that value, and full offload/reload triggers around 95% occupancy. They are configured via `GET/PATCH /api/settings`. MQTT, bot, webhook, Apprise, and SQS configs are stored in the `fanout_configs` table, managed via `/api/fanout`. If the radio's channel slots appear unstable or another client is mutating them underneath this app, operators can force the old always-reconfigure send path with `MESHCORE_FORCE_CHANNEL_SLOT_RECONFIGURE=true`. diff --git a/README_ADVANCED.md b/README_ADVANCED.md index f5d82bf..504bed1 100644 --- a/README_ADVANCED.md +++ b/README_ADVANCED.md @@ -8,6 +8,7 @@ These are intended for diagnosing or working around radios that behave oddly. |----------|---------|-------------| | `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK` | false | Run aggressive 10-second `get_msg()` fallback polling to check for messages | | `MESHCORE_FORCE_CHANNEL_SLOT_RECONFIGURE` | false | Disable channel-slot reuse and force `set_channel(...)` before every channel send | +| `MESHCORE_LOAD_WITH_AUTOEVICT` | false | Enable autoevict mode for contact loading (see [Contact Loading Issues](#contact-loading-issues) below) | | `__CLOWNTOWN_DO_CLOCK_WRAPAROUND` | false | Highly experimental: if the radio clock is ahead of system time, try forcing the clock to `0xFFFFFFFF`, wait for uint32 wraparound, and then retry normal time sync before falling back to reboot | By default the app relies on radio events plus MeshCore auto-fetch for incoming messages, and also runs a low-frequency hourly audit poll. That audit checks both: @@ -19,6 +20,29 @@ If the audit finds a mismatch, you'll see an error in the application UI and you `__CLOWNTOWN_DO_CLOCK_WRAPAROUND=true` is a last-resort clock remediation for nodes whose RTC is stuck in the future and where rescue-mode time setting or GPS-based time is not available. It intentionally relies on the clock rolling past the 32-bit epoch boundary, which is board-specific behavior and may not be safe or effective on all MeshCore targets. Treat it as highly experimental. +## Contact Loading Issues + +RemoteTerm loads favorite and recently active contacts onto the radio so that the radio can automatically acknowledge incoming DMs on your behalf. To do this, it first enumerates the radio's existing contact table, then reconciles it with the desired working set. + +On BLE connections with many contacts (or radios with large contact tables from organic advertisements), the initial contact enumeration may take longer than the default timeout. If this happens, the app will automatically retry with an extended 60-second timeout. You may see a toast warning that the radio is temporarily unresponsive during this process. + +If the radio's contact table is already full (from contacts added by advertisements or another client), the app may not be able to load all desired contacts. In this case you'll see a warning that auto-DM acking may not work for all contacts. To resolve this: + +- **Clear the radio's contact table** using another MeshCore client (e.g., the official companion app), then restart RemoteTerm +- **Lower the contact fill target** in Radio Settings to reduce how many contacts the app tries to load +- **Enable autoevict mode** (see below) to let the radio automatically make room +- If you don't need auto-DM acking, you can safely ignore these warnings — **sending and receiving messages is never affected** + +### Autoevict Mode + +Setting `MESHCORE_LOAD_WITH_AUTOEVICT=true` enables an alternative contact loading strategy that avoids TABLE_FULL errors entirely. On connect, the app enables the radio's `AUTO_ADD_OVERWRITE_OLDEST` preference, which makes the radio automatically evict the oldest non-favorite contact when the contact table is full. This means: + +- Contact adds never fail — the radio always makes room by evicting stale contacts +- The app can load contacts even when it can't enumerate the radio's existing contact table (e.g., on slow BLE connections) +- No contact removal step is needed during reconciliation + +**Trade-off:** Contacts loaded by the app are not marked as radio-side favorites, so they are eviction candidates if the radio receives a new advertisement while full. In practice, freshly-loaded contacts have a recent `lastmod` timestamp and will be among the last to be evicted. If you disconnect the radio from RemoteTerm and use it standalone, your contacts will not be protected from eviction by newer advertisements. + ## Sub-Path Reverse Proxy RemoteTerm works behind a reverse proxy that serves it under a sub-path (e.g. `/meshcore/` or Home Assistant ingress). All frontend asset and API paths are relative, so they resolve correctly under any prefix. diff --git a/app/config.py b/app/config.py index a08f836..8a3d1dc 100644 --- a/app/config.py +++ b/app/config.py @@ -26,6 +26,7 @@ class Settings(BaseSettings): default=False, validation_alias="__CLOWNTOWN_DO_CLOCK_WRAPAROUND", ) + load_with_autoevict: bool = False skip_post_connect_sync: bool = False basic_auth_username: str = "" basic_auth_password: str = "" diff --git a/app/radio_sync.py b/app/radio_sync.py index 3f2d9f9..26784f3 100644 --- a/app/radio_sync.py +++ b/app/radio_sync.py @@ -43,9 +43,37 @@ from app.websocket import broadcast_error, broadcast_event logger = logging.getLogger(__name__) DEFAULT_MAX_CHANNELS = 40 +_GET_CONTACTS_TIMEOUT = 10 AdvertMode = Literal["flood", "zero_hop"] +_AUTO_ADD_OVERWRITE_OLDEST = 0x01 + + +async def _enable_autoevict_on_radio(mc: MeshCore) -> None: + """Ensure the radio's AUTO_ADD_OVERWRITE_OLDEST preference bit is set.""" + try: + current = await mc.commands.get_autoadd_config() + if current is None or current.type == EventType.ERROR: + logger.warning("Could not read autoadd config from radio: %s", current) + return + current_flags = current.payload.get("config", 0) + if current_flags & _AUTO_ADD_OVERWRITE_OLDEST: + logger.debug("Radio autoevict already enabled (autoadd_config=0x%02x)", current_flags) + return + new_flags = current_flags | _AUTO_ADD_OVERWRITE_OLDEST + result = await mc.commands.set_autoadd_config(new_flags) + if result is not None and result.type == EventType.OK: + logger.info( + "Enabled radio autoevict (autoadd_config 0x%02x -> 0x%02x)", + current_flags, + new_flags, + ) + else: + logger.warning("Failed to enable radio autoevict: %s", result) + except Exception as exc: + logger.warning("Error enabling radio autoevict: %s", exc) + def _contact_sync_debug_fields(contact: Contact) -> dict[str, object]: """Return key contact fields for sync failure diagnostics.""" @@ -239,7 +267,7 @@ async def should_run_full_periodic_sync(mc: MeshCore) -> bool: capacity = _effective_radio_capacity(app_settings.max_radio_contacts) refill_target, full_sync_trigger = _compute_radio_contact_limits(capacity) - result = await mc.commands.get_contacts() + result = await mc.commands.get_contacts(timeout=_GET_CONTACTS_TIMEOUT) if result is None or result.type == EventType.ERROR: logger.warning("Periodic sync occupancy check failed: %s", result) return False @@ -430,6 +458,10 @@ async def ensure_default_channels() -> None: async def sync_and_offload_all(mc: MeshCore) -> dict: """Run fast startup sync, then background contact reconcile.""" + autoevict = settings.load_with_autoevict + + if autoevict: + await _enable_autoevict_on_radio(mc) # Contact on_radio is legacy/stale metadata. Clear it during the offload/reload # cycle so old rows stop claiming radio residency we do not actively track. @@ -441,15 +473,31 @@ async def sync_and_offload_all(mc: MeshCore) -> dict: # Ensure default channels exist await ensure_default_channels() - start_background_contact_reconciliation( - initial_radio_contacts=contacts_result.get("radio_contacts", {}), - expected_mc=mc, - ) + contact_reconcile_started = False + if "error" in contacts_result and not autoevict: + # In normal mode, we can't reconcile blind — skip and warn. + # In autoevict mode, we can load blind because adds never fail. + logger.warning("Skipping background contact reconcile — could not enumerate radio contacts") + broadcast_error( + "Could not enumerate radio contacts", + "Contact loading skipped — DM auto-acking for favorites and recent " + "contacts may not work, but sending and receiving is not affected. " + "Set MESHCORE_LOAD_WITH_AUTOEVICT=true to load contacts without " + "needing to read the radio first. See 'Contact Loading Issues' in " + "the Advanced Setup documentation.", + ) + else: + start_background_contact_reconciliation( + initial_radio_contacts=contacts_result.get("radio_contacts", {}), + expected_mc=mc, + autoevict=autoevict, + ) + contact_reconcile_started = True return { "contacts": contacts_result, "channels": channels_result, - "contact_reconcile_started": True, + "contact_reconcile_started": contact_reconcile_started, } @@ -1045,7 +1093,7 @@ async def sync_contacts_from_radio(mc: MeshCore) -> dict: synced = 0 try: - result = await mc.commands.get_contacts() + result = await mc.commands.get_contacts(timeout=_GET_CONTACTS_TIMEOUT) if result is None or result.type == EventType.ERROR: logger.error( @@ -1108,12 +1156,19 @@ async def _reconcile_radio_contacts_in_background( *, initial_radio_contacts: dict[str, dict], expected_mc: MeshCore, + autoevict: bool = False, ) -> None: - """Converge radio contacts toward the desired favorites+recents working set.""" + """Converge radio contacts toward the desired favorites+recents working set. + + When *autoevict* is ``True`` the removal phase is skipped entirely and the + radio's ``AUTO_ADD_OVERWRITE_OLDEST`` preference is assumed to be enabled, + so ``add_contact`` never returns ``TABLE_FULL``. + """ radio_contacts = dict(initial_radio_contacts) removed = 0 loaded = 0 failed = 0 + table_full = False try: while True: @@ -1127,7 +1182,9 @@ async def _reconcile_radio_contacts_in_background( for contact in selected_contacts if len(contact.public_key) >= 64 } - removable_keys = [key for key in radio_contacts if key not in desired_contacts] + removable_keys = ( + [] if autoevict else [key for key in radio_contacts if key not in desired_contacts] + ) missing_contacts = [ contact for key, contact in desired_contacts.items() if key not in radio_contacts ] @@ -1229,6 +1286,14 @@ async def _reconcile_radio_contacts_in_background( else: failed += 1 reason = add_result.payload + if isinstance(reason, dict) and reason.get("error_code") == 3: + logger.warning( + "Radio contact table full — stopping " + "contact reconcile (loaded %d this cycle)", + loaded, + ) + table_full = True + break hint = "" if reason is None: hint = ( @@ -1247,6 +1312,17 @@ async def _reconcile_radio_contacts_in_background( await asyncio.sleep(CONTACT_RECONCILE_BUSY_BACKOFF_SECONDS) continue + if table_full: + broadcast_error( + "Could not load all desired contacts onto the radio for auto-DM ack", + "The radio's contact table is full. Clearing your radio contacts " + "using another client, lowering your contact fill target in " + "settings, or setting MESHCORE_LOAD_WITH_AUTOEVICT=true may " + "relieve this. See 'Contact Loading Issues' in the Advanced " + "Setup documentation.", + ) + break + await asyncio.sleep(CONTACT_RECONCILE_YIELD_SECONDS) if not progressed: continue @@ -1269,6 +1345,7 @@ def start_background_contact_reconciliation( *, initial_radio_contacts: dict[str, dict], expected_mc: MeshCore, + autoevict: bool = False, ) -> None: """Start or replace the background contact reconcile task for the current radio.""" global _contact_reconcile_task @@ -1280,11 +1357,13 @@ def start_background_contact_reconciliation( _reconcile_radio_contacts_in_background( initial_radio_contacts=initial_radio_contacts, expected_mc=expected_mc, + autoevict=autoevict, ) ) logger.info( - "Started background contact reconcile for %d radio contact(s)", + "Started background contact reconcile for %d radio contact(s)%s", len(initial_radio_contacts), + " (autoevict mode)" if autoevict else "", ) diff --git a/tests/test_radio_sync.py b/tests/test_radio_sync.py index a0da173..2b1075b 100644 --- a/tests/test_radio_sync.py +++ b/tests/test_radio_sync.py @@ -516,7 +516,7 @@ class TestSyncAndOffloadAll: result = await sync_and_offload_all(mock_mc) mock_start.assert_called_once_with( - initial_radio_contacts=radio_contacts, expected_mc=mock_mc + initial_radio_contacts=radio_contacts, expected_mc=mock_mc, autoevict=False ) assert result["contact_reconcile_started"] is True From c098f9eeb5bb599ece3751e1ebc2790adfb0bfd8 Mon Sep 17 00:00:00 2001 From: Jack Kingsman Date: Sun, 19 Apr 2026 00:27:53 -0700 Subject: [PATCH 2/4] Be better about blind loading/auto-evict logging and run-through --- app/radio_sync.py | 307 +++++++++++++++++++++++++++------------ tests/test_radio_sync.py | 78 ++++++++++ 2 files changed, 290 insertions(+), 95 deletions(-) diff --git a/app/radio_sync.py b/app/radio_sync.py index 26784f3..a63185c 100644 --- a/app/radio_sync.py +++ b/app/radio_sync.py @@ -50,17 +50,17 @@ AdvertMode = Literal["flood", "zero_hop"] _AUTO_ADD_OVERWRITE_OLDEST = 0x01 -async def _enable_autoevict_on_radio(mc: MeshCore) -> None: +async def _enable_autoevict_on_radio(mc: MeshCore) -> bool: """Ensure the radio's AUTO_ADD_OVERWRITE_OLDEST preference bit is set.""" try: current = await mc.commands.get_autoadd_config() if current is None or current.type == EventType.ERROR: logger.warning("Could not read autoadd config from radio: %s", current) - return + return False current_flags = current.payload.get("config", 0) if current_flags & _AUTO_ADD_OVERWRITE_OLDEST: logger.debug("Radio autoevict already enabled (autoadd_config=0x%02x)", current_flags) - return + return True new_flags = current_flags | _AUTO_ADD_OVERWRITE_OLDEST result = await mc.commands.set_autoadd_config(new_flags) if result is not None and result.type == EventType.OK: @@ -69,10 +69,13 @@ async def _enable_autoevict_on_radio(mc: MeshCore) -> None: current_flags, new_flags, ) + return True else: logger.warning("Failed to enable radio autoevict: %s", result) + return False except Exception as exc: logger.warning("Error enabling radio autoevict: %s", exc) + return False def _contact_sync_debug_fields(contact: Contact) -> dict[str, object]: @@ -458,10 +461,16 @@ async def ensure_default_channels() -> None: async def sync_and_offload_all(mc: MeshCore) -> dict: """Run fast startup sync, then background contact reconcile.""" - autoevict = settings.load_with_autoevict + autoevict_requested = settings.load_with_autoevict + autoevict = False - if autoevict: - await _enable_autoevict_on_radio(mc) + if autoevict_requested: + autoevict = await _enable_autoevict_on_radio(mc) + if not autoevict: + logger.warning( + "Autoevict requested but unavailable; falling back to snapshot-based " + "background contact reconcile" + ) # Contact on_radio is legacy/stale metadata. Clear it during the offload/reload # cycle so old rows stop claiming radio residency we do not actively track. @@ -475,8 +484,7 @@ async def sync_and_offload_all(mc: MeshCore) -> dict: contact_reconcile_started = False if "error" in contacts_result and not autoevict: - # In normal mode, we can't reconcile blind — skip and warn. - # In autoevict mode, we can load blind because adds never fail. + # Without confirmed autoevict support we cannot reconcile blindly. logger.warning("Skipping background contact reconcile — could not enumerate radio contacts") broadcast_error( "Could not enumerate radio contacts", @@ -1161,14 +1169,17 @@ async def _reconcile_radio_contacts_in_background( """Converge radio contacts toward the desired favorites+recents working set. When *autoevict* is ``True`` the removal phase is skipped entirely and the - radio's ``AUTO_ADD_OVERWRITE_OLDEST`` preference is assumed to be enabled, - so ``add_contact`` never returns ``TABLE_FULL``. + desired working set is blind-refreshed. Re-adding the full desired list + refreshes each contact's recency on supported firmware, so one successful + full pass converges the radio toward the desired working set without relying + on a stale contact snapshot. """ radio_contacts = dict(initial_radio_contacts) removed = 0 loaded = 0 failed = 0 table_full = False + autoevict_next_index = 0 try: while True: @@ -1177,10 +1188,20 @@ async def _reconcile_radio_contacts_in_background( break selected_contacts = await get_contacts_selected_for_radio_sync() + desired_fill_contacts = [ + contact for contact in selected_contacts if len(contact.public_key) >= 64 + ] + + if autoevict: + if not desired_fill_contacts: + logger.info( + "Background contact blind fill complete: no desired contacts selected" + ) + break + if autoevict_next_index >= len(desired_fill_contacts): + autoevict_next_index = 0 desired_contacts = { - contact.public_key.lower(): contact - for contact in selected_contacts - if len(contact.public_key) >= 64 + contact.public_key.lower(): contact for contact in desired_fill_contacts } removable_keys = ( [] if autoevict else [key for key in radio_contacts if key not in desired_contacts] @@ -1189,7 +1210,7 @@ async def _reconcile_radio_contacts_in_background( contact for key, contact in desired_contacts.items() if key not in radio_contacts ] - if not removable_keys and not missing_contacts: + if not autoevict and not removable_keys and not missing_contacts: logger.info( "Background contact reconcile complete: %d contacts on radio working set", len(radio_contacts), @@ -1197,6 +1218,8 @@ async def _reconcile_radio_contacts_in_background( break progressed = False + autoevict_pass_complete = False + autoevict_pass_failed = False try: async with radio_manager.radio_operation( "background_contact_reconcile", @@ -1210,119 +1233,213 @@ async def _reconcile_radio_contacts_in_background( budget = CONTACT_RECONCILE_BATCH_SIZE selected_contacts = await get_contacts_selected_for_radio_sync() + desired_fill_contacts = [ + contact for contact in selected_contacts if len(contact.public_key) >= 64 + ] + if autoevict and autoevict_next_index >= len(desired_fill_contacts): + autoevict_next_index = 0 desired_contacts = { - contact.public_key.lower(): contact - for contact in selected_contacts - if len(contact.public_key) >= 64 + contact.public_key.lower(): contact for contact in desired_fill_contacts } - for public_key in list(radio_contacts): - if budget <= 0: - break - if public_key in desired_contacts: - continue - - remove_payload = ( - mc.get_contact_by_key_prefix(public_key[:12]) - or radio_contacts.get(public_key) - or {"public_key": public_key} - ) - try: - remove_result = await mc.commands.remove_contact(remove_payload) - except Exception as exc: - failed += 1 - budget -= 1 - logger.warning( - "Error removing contact %s during background reconcile: %s", - public_key[:12], - exc, - ) - continue - - budget -= 1 - if remove_result.type == EventType.OK: - radio_contacts.pop(public_key, None) - _evict_removed_contact_from_library_cache(mc, public_key) - removed += 1 - progressed = True - else: - failed += 1 - logger.warning( - "Failed to remove contact %s during background reconcile: %s", - public_key[:12], - remove_result.payload, - ) - - if budget > 0: - for public_key, contact in desired_contacts.items(): + if not autoevict: + for public_key in list(radio_contacts): if budget <= 0: break - if public_key in radio_contacts: - continue - - if mc.get_contact_by_key_prefix(public_key[:12]): - radio_contacts[public_key] = {"public_key": public_key} + if public_key in desired_contacts: continue + remove_payload = ( + mc.get_contact_by_key_prefix(public_key[:12]) + or radio_contacts.get(public_key) + or {"public_key": public_key} + ) try: - add_payload = contact.to_radio_dict() - add_result = await mc.commands.add_contact(add_payload) + remove_result = await mc.commands.remove_contact(remove_payload) except Exception as exc: failed += 1 budget -= 1 logger.warning( - "Error adding contact %s during background reconcile: %s", + "Error removing contact %s during background reconcile: %s", public_key[:12], exc, - exc_info=True, ) continue budget -= 1 - if add_result.type == EventType.OK: - radio_contacts[public_key] = add_payload - loaded += 1 + if remove_result.type == EventType.OK: + radio_contacts.pop(public_key, None) + _evict_removed_contact_from_library_cache(mc, public_key) + removed += 1 progressed = True else: failed += 1 - reason = add_result.payload - if isinstance(reason, dict) and reason.get("error_code") == 3: - logger.warning( - "Radio contact table full — stopping " - "contact reconcile (loaded %d this cycle)", - loaded, - ) - table_full = True - break - hint = "" - if reason is None: - hint = ( - " (no response from radio — if this repeats, check for " - "serial port contention from another process or try a " - "power cycle)" - ) logger.warning( - "Failed to add contact %s during background reconcile: %s%s", + "Failed to remove contact %s during background reconcile: %s", public_key[:12], - reason, - hint, + remove_result.payload, ) + + if budget > 0: + if autoevict: + batch_contacts = desired_fill_contacts[ + autoevict_next_index : autoevict_next_index + budget + ] + processed_contacts = 0 + for contact in batch_contacts: + public_key = contact.public_key.lower() + try: + add_payload = contact.to_radio_dict() + add_result = await mc.commands.add_contact(add_payload) + except Exception as exc: + failed += 1 + logger.warning( + "Error blind-filling contact %s during background reconcile: %s", + public_key[:12], + exc, + exc_info=True, + ) + autoevict_pass_failed = True + processed_contacts += 1 + continue + + if add_result.type == EventType.OK: + radio_contacts[public_key] = add_payload + loaded += 1 + progressed = True + else: + failed += 1 + autoevict_pass_failed = True + reason = add_result.payload + if isinstance(reason, dict) and reason.get("error_code") == 3: + logger.warning( + "Radio contact table full — stopping " + "contact reconcile (loaded %d this cycle)", + loaded, + ) + table_full = True + break + hint = "" + if reason is None: + hint = ( + " (no response from radio — if this repeats, check for " + "serial port contention from another process or try a " + "power cycle)" + ) + logger.warning( + "Failed to blind-fill contact %s during background reconcile: %s%s", + public_key[:12], + reason, + hint, + ) + processed_contacts += 1 + + autoevict_next_index += processed_contacts + autoevict_pass_complete = autoevict_next_index >= len( + desired_fill_contacts + ) + else: + for public_key, contact in desired_contacts.items(): + if budget <= 0: + break + if public_key in radio_contacts: + continue + + if mc.get_contact_by_key_prefix(public_key[:12]): + radio_contacts[public_key] = {"public_key": public_key} + continue + + try: + add_payload = contact.to_radio_dict() + add_result = await mc.commands.add_contact(add_payload) + except Exception as exc: + failed += 1 + budget -= 1 + logger.warning( + "Error adding contact %s during background reconcile: %s", + public_key[:12], + exc, + exc_info=True, + ) + continue + + budget -= 1 + if add_result.type == EventType.OK: + radio_contacts[public_key] = add_payload + loaded += 1 + progressed = True + else: + failed += 1 + reason = add_result.payload + if isinstance(reason, dict) and reason.get("error_code") == 3: + logger.warning( + "Radio contact table full — stopping " + "contact reconcile (loaded %d this cycle)", + loaded, + ) + table_full = True + break + hint = "" + if reason is None: + hint = ( + " (no response from radio — if this repeats, check for " + "serial port contention from another process or try a " + "power cycle)" + ) + logger.warning( + "Failed to add contact %s during background reconcile: %s%s", + public_key[:12], + reason, + hint, + ) except RadioOperationBusyError: logger.debug("Background contact reconcile yielding: radio busy") await asyncio.sleep(CONTACT_RECONCILE_BUSY_BACKOFF_SECONDS) continue if table_full: - broadcast_error( - "Could not load all desired contacts onto the radio for auto-DM ack", - "The radio's contact table is full. Clearing your radio contacts " - "using another client, lowering your contact fill target in " - "settings, or setting MESHCORE_LOAD_WITH_AUTOEVICT=true may " - "relieve this. See 'Contact Loading Issues' in the Advanced " - "Setup documentation.", - ) + if autoevict: + logger.error( + "We're expecting the radio to be in AUTO_ADD_OVERWRITE_OLDEST mode, " + "so a full-table error means we have no idea what is going on with " + "this radio; it is misbehaving. You should consider DM auto-acking " + "to be unreliable and/or not working for this radio. Sending and " + "receiving messages are not impacted by this error unless other " + "things are broken on your radio." + ) + broadcast_error( + "Could not load all desired contacts onto the radio for auto-DM ack: ", + "Despite having auto-evict enabled, we got a contact-table-full error " + "from your radio. DM auto-ack is likely unavailable.", + ) + else: + normal_table_full_message = ( + "The radio's contact table is full. Clearing your radio contacts " + "using another client, lowering your contact fill target in " + "settings, or setting MESHCORE_LOAD_WITH_AUTOEVICT=true may " + "relieve this. See 'Contact Loading Issues' in the Advanced " + "README.md" + ) + logger.error( + "Contact reconcile hit TABLE_FULL. %s", + normal_table_full_message, + ) + broadcast_error( + "Could not load all desired contacts onto the radio for auto-DM ack", + normal_table_full_message, + ) break + if autoevict and autoevict_pass_complete: + if autoevict_pass_failed: + autoevict_next_index = 0 + else: + logger.info( + "Background contact blind fill complete: refreshed %d desired contacts", + len(desired_fill_contacts), + ) + break + await asyncio.sleep(CONTACT_RECONCILE_YIELD_SECONDS) if not progressed: continue diff --git a/tests/test_radio_sync.py b/tests/test_radio_sync.py index 2b1075b..6b92f8c 100644 --- a/tests/test_radio_sync.py +++ b/tests/test_radio_sync.py @@ -520,6 +520,37 @@ class TestSyncAndOffloadAll: ) assert result["contact_reconcile_started"] is True + @pytest.mark.asyncio + async def test_falls_back_to_snapshot_reconcile_when_autoevict_enable_fails(self, test_db): + mock_mc = MagicMock() + radio_contacts = {KEY_A: {"public_key": KEY_A}} + + with ( + patch.object(radio_sync.settings, "load_with_autoevict", True), + patch( + "app.radio_sync._enable_autoevict_on_radio", + new=AsyncMock(return_value=False), + ), + patch( + "app.radio_sync.sync_contacts_from_radio", + new=AsyncMock(return_value={"synced": 1, "radio_contacts": radio_contacts}), + ), + patch( + "app.radio_sync.sync_and_offload_channels", + new=AsyncMock(return_value={"synced": 0, "cleared": 0}), + ), + patch("app.radio_sync.ensure_default_channels", new=AsyncMock()), + patch("app.radio_sync.start_background_contact_reconciliation") as mock_start, + ): + result = await sync_and_offload_all(mock_mc) + + mock_start.assert_called_once_with( + initial_radio_contacts=radio_contacts, + expected_mc=mock_mc, + autoevict=False, + ) + assert result["contact_reconcile_started"] is True + @pytest.mark.asyncio async def test_advert_fill_skips_repeaters(self, test_db): """Recent advert fallback only considers non-repeaters.""" @@ -844,6 +875,53 @@ class TestBackgroundContactReconcile: payload = mock_mc.commands.add_contact.call_args.args[0] assert payload["public_key"] == KEY_B + @pytest.mark.asyncio + async def test_autoevict_blind_fill_readds_full_desired_set(self, test_db): + await _insert_contact(KEY_A, "Alice", last_contacted=2000) + await _insert_contact(KEY_B, "Bob", last_contacted=1000) + alice = await ContactRepository.get_by_key(KEY_A) + bob = await ContactRepository.get_by_key(KEY_B) + assert alice is not None + assert bob is not None + + mock_mc = MagicMock() + mock_mc.is_connected = True + mock_mc.get_contact_by_key_prefix = MagicMock(return_value=None) + mock_mc.commands.remove_contact = AsyncMock(return_value=MagicMock(type=EventType.OK)) + mock_mc.commands.add_contact = AsyncMock(return_value=MagicMock(type=EventType.OK)) + radio_manager._meshcore = mock_mc + + @asynccontextmanager + async def _radio_operation(*args, **kwargs): + del args, kwargs + yield mock_mc + + with ( + patch.object( + radio_sync.radio_manager, + "radio_operation", + side_effect=lambda *args, **kwargs: _radio_operation(*args, **kwargs), + ), + patch("app.radio_sync.CONTACT_RECONCILE_BATCH_SIZE", 10), + patch( + "app.radio_sync.get_contacts_selected_for_radio_sync", + side_effect=[[alice, bob], [alice, bob]], + ), + patch("app.radio_sync.asyncio.sleep", new=AsyncMock()), + ): + await radio_sync._reconcile_radio_contacts_in_background( + initial_radio_contacts={KEY_A: {"public_key": KEY_A}}, + expected_mc=mock_mc, + autoevict=True, + ) + + mock_mc.commands.remove_contact.assert_not_called() + assert mock_mc.commands.add_contact.await_count == 2 + loaded_keys = [ + call.args[0]["public_key"] for call in mock_mc.commands.add_contact.call_args_list + ] + assert loaded_keys == [KEY_A, KEY_B] + @pytest.mark.asyncio async def test_yields_radio_lock_every_two_contact_operations(self, test_db): await _insert_contact(KEY_A, "Alice", last_contacted=3000) From 09f807230be6d3b608a961cb0b8004ea9decddd1 Mon Sep 17 00:00:00 2001 From: Jack Kingsman Date: Sun, 19 Apr 2026 00:46:57 -0700 Subject: [PATCH 3/4] Patch up some vagaries and maintain best-effort loading. --- README_ADVANCED.md | 2 +- app/radio_sync.py | 54 ++++++--- tests/test_radio_sync.py | 230 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 267 insertions(+), 19 deletions(-) diff --git a/README_ADVANCED.md b/README_ADVANCED.md index 504bed1..b463e5d 100644 --- a/README_ADVANCED.md +++ b/README_ADVANCED.md @@ -24,7 +24,7 @@ If the audit finds a mismatch, you'll see an error in the application UI and you RemoteTerm loads favorite and recently active contacts onto the radio so that the radio can automatically acknowledge incoming DMs on your behalf. To do this, it first enumerates the radio's existing contact table, then reconciles it with the desired working set. -On BLE connections with many contacts (or radios with large contact tables from organic advertisements), the initial contact enumeration may take longer than the default timeout. If this happens, the app will automatically retry with an extended 60-second timeout. You may see a toast warning that the radio is temporarily unresponsive during this process. +On BLE connections with many contacts (or radios with large contact tables from organic advertisements), the initial contact enumeration may time out. If this happens, the app will still attempt to load your favorites and recent contacts onto the radio on a best-effort basis, but without a full snapshot of what's already on the radio, some adds may be redundant or fail. If the radio's contact table is already full (from contacts added by advertisements or another client), the app may not be able to load all desired contacts. In this case you'll see a warning that auto-DM acking may not work for all contacts. To resolve this: diff --git a/app/radio_sync.py b/app/radio_sync.py index a63185c..a2159d2 100644 --- a/app/radio_sync.py +++ b/app/radio_sync.py @@ -482,30 +482,31 @@ async def sync_and_offload_all(mc: MeshCore) -> dict: # Ensure default channels exist await ensure_default_channels() - contact_reconcile_started = False - if "error" in contacts_result and not autoevict: - # Without confirmed autoevict support we cannot reconcile blindly. - logger.warning("Skipping background contact reconcile — could not enumerate radio contacts") + snapshot_failed = "error" in contacts_result + if snapshot_failed and not autoevict: + logger.warning( + "Radio contact snapshot failed — attempting best-effort contact " + "loading without a full picture of what's already on the radio" + ) broadcast_error( "Could not enumerate radio contacts", - "Contact loading skipped — DM auto-acking for favorites and recent " - "contacts may not work, but sending and receiving is not affected. " - "Set MESHCORE_LOAD_WITH_AUTOEVICT=true to load contacts without " - "needing to read the radio first. See 'Contact Loading Issues' in " - "the Advanced Setup documentation.", + "Loading favorites and recent contacts on a best-effort basis — " + "some adds may be redundant or fail if the radio's contact table " + "is already full. Set MESHCORE_LOAD_WITH_AUTOEVICT=true for more " + "reliable loading without needing to read the radio first. " + "See 'Contact Loading Issues' in the Advanced Setup documentation.", ) - else: - start_background_contact_reconciliation( - initial_radio_contacts=contacts_result.get("radio_contacts", {}), - expected_mc=mc, - autoevict=autoevict, - ) - contact_reconcile_started = True + + start_background_contact_reconciliation( + initial_radio_contacts=contacts_result.get("radio_contacts", {}), + expected_mc=mc, + autoevict=autoevict, + ) return { "contacts": contacts_result, "channels": channels_result, - "contact_reconcile_started": contact_reconcile_started, + "contact_reconcile_started": True, } @@ -1180,6 +1181,8 @@ async def _reconcile_radio_contacts_in_background( failed = 0 table_full = False autoevict_next_index = 0 + autoevict_full_pass_retries = 0 + _MAX_AUTOEVICT_RETRIES = 3 try: while True: @@ -1187,6 +1190,8 @@ async def _reconcile_radio_contacts_in_background( logger.info("Stopping background contact reconcile: radio transport changed") break + # Pre-lock snapshot for quick-exit checks; authoritative list is + # re-fetched inside the radio lock below. selected_contacts = await get_contacts_selected_for_radio_sync() desired_fill_contacts = [ contact for contact in selected_contacts if len(contact.public_key) >= 64 @@ -1282,6 +1287,9 @@ async def _reconcile_radio_contacts_in_background( if budget > 0: if autoevict: + # Budget is consumed by the slice bound rather than + # per-operation decrement — autoevict skips the + # removal phase so the full budget is always available. batch_contacts = desired_fill_contacts[ autoevict_next_index : autoevict_next_index + budget ] @@ -1408,7 +1416,7 @@ async def _reconcile_radio_contacts_in_background( "things are broken on your radio." ) broadcast_error( - "Could not load all desired contacts onto the radio for auto-DM ack: ", + "Could not load all desired contacts onto the radio for auto-DM ack", "Despite having auto-evict enabled, we got a contact-table-full error " "from your radio. DM auto-ack is likely unavailable.", ) @@ -1432,6 +1440,16 @@ async def _reconcile_radio_contacts_in_background( if autoevict and autoevict_pass_complete: if autoevict_pass_failed: + autoevict_full_pass_retries += 1 + if autoevict_full_pass_retries >= _MAX_AUTOEVICT_RETRIES: + logger.warning( + "Background contact blind fill giving up after %d full passes " + "with persistent failures (loaded %d, failed %d)", + autoevict_full_pass_retries, + loaded, + failed, + ) + break autoevict_next_index = 0 else: logger.info( diff --git a/tests/test_radio_sync.py b/tests/test_radio_sync.py index 6b92f8c..2d4ebce 100644 --- a/tests/test_radio_sync.py +++ b/tests/test_radio_sync.py @@ -15,6 +15,7 @@ from meshcore.events import Event import app.radio_sync as radio_sync from app.radio import RadioManager, radio_manager from app.radio_sync import ( + _enable_autoevict_on_radio, _message_poll_loop, _periodic_advert_loop, _periodic_sync_loop, @@ -551,6 +552,66 @@ class TestSyncAndOffloadAll: ) assert result["contact_reconcile_started"] is True + @pytest.mark.asyncio + async def test_autoevict_success_passes_flag_to_reconcile(self, test_db): + mock_mc = MagicMock() + radio_contacts = {KEY_A: {"public_key": KEY_A}} + + with ( + patch.object(radio_sync.settings, "load_with_autoevict", True), + patch( + "app.radio_sync._enable_autoevict_on_radio", + new=AsyncMock(return_value=True), + ), + patch( + "app.radio_sync.sync_contacts_from_radio", + new=AsyncMock(return_value={"synced": 1, "radio_contacts": radio_contacts}), + ), + patch( + "app.radio_sync.sync_and_offload_channels", + new=AsyncMock(return_value={"synced": 0, "cleared": 0}), + ), + patch("app.radio_sync.ensure_default_channels", new=AsyncMock()), + patch("app.radio_sync.start_background_contact_reconciliation") as mock_start, + ): + result = await sync_and_offload_all(mock_mc) + + mock_start.assert_called_once_with( + initial_radio_contacts=radio_contacts, + expected_mc=mock_mc, + autoevict=True, + ) + assert result["contact_reconcile_started"] is True + + @pytest.mark.asyncio + async def test_best_effort_reconcile_when_snapshot_fails(self, test_db): + """When sync_contacts_from_radio errors, reconcile still starts with empty snapshot.""" + mock_mc = MagicMock() + + with ( + patch( + "app.radio_sync.sync_contacts_from_radio", + new=AsyncMock(return_value={"synced": 0, "radio_contacts": {}, "error": "timeout"}), + ), + patch( + "app.radio_sync.sync_and_offload_channels", + new=AsyncMock(return_value={"synced": 0, "cleared": 0}), + ), + patch("app.radio_sync.ensure_default_channels", new=AsyncMock()), + patch("app.radio_sync.start_background_contact_reconciliation") as mock_start, + patch("app.radio_sync.broadcast_error") as mock_broadcast, + ): + result = await sync_and_offload_all(mock_mc) + + mock_start.assert_called_once_with( + initial_radio_contacts={}, + expected_mc=mock_mc, + autoevict=False, + ) + assert result["contact_reconcile_started"] is True + mock_broadcast.assert_called_once() + assert "best-effort" in mock_broadcast.call_args.args[1] + @pytest.mark.asyncio async def test_advert_fill_skips_repeaters(self, test_db): """Recent advert fallback only considers non-repeaters.""" @@ -829,6 +890,81 @@ class TestSyncAndOffloadAll: assert payload["public_key"] == KEY_A +class TestEnableAutoevictOnRadio: + """Test _enable_autoevict_on_radio read-modify-write flow.""" + + @pytest.mark.asyncio + async def test_sets_flag_when_not_already_set(self): + mc = MagicMock() + mc.commands.get_autoadd_config = AsyncMock( + return_value=MagicMock(type=EventType.OK, payload={"config": 0x00}) + ) + mc.commands.set_autoadd_config = AsyncMock(return_value=MagicMock(type=EventType.OK)) + + result = await _enable_autoevict_on_radio(mc) + + assert result is True + mc.commands.set_autoadd_config.assert_awaited_once_with(0x01) + + @pytest.mark.asyncio + async def test_noop_when_already_enabled(self): + mc = MagicMock() + mc.commands.get_autoadd_config = AsyncMock( + return_value=MagicMock(type=EventType.OK, payload={"config": 0x01}) + ) + mc.commands.set_autoadd_config = AsyncMock() + + result = await _enable_autoevict_on_radio(mc) + + assert result is True + mc.commands.set_autoadd_config.assert_not_awaited() + + @pytest.mark.asyncio + async def test_preserves_other_flags(self): + mc = MagicMock() + mc.commands.get_autoadd_config = AsyncMock( + return_value=MagicMock(type=EventType.OK, payload={"config": 0x04}) + ) + mc.commands.set_autoadd_config = AsyncMock(return_value=MagicMock(type=EventType.OK)) + + result = await _enable_autoevict_on_radio(mc) + + assert result is True + mc.commands.set_autoadd_config.assert_awaited_once_with(0x05) + + @pytest.mark.asyncio + async def test_returns_false_on_get_error(self): + mc = MagicMock() + mc.commands.get_autoadd_config = AsyncMock( + return_value=MagicMock(type=EventType.ERROR, payload=None) + ) + + result = await _enable_autoevict_on_radio(mc) + + assert result is False + + @pytest.mark.asyncio + async def test_returns_false_on_set_failure(self): + mc = MagicMock() + mc.commands.get_autoadd_config = AsyncMock( + return_value=MagicMock(type=EventType.OK, payload={"config": 0x00}) + ) + mc.commands.set_autoadd_config = AsyncMock(return_value=MagicMock(type=EventType.ERROR)) + + result = await _enable_autoevict_on_radio(mc) + + assert result is False + + @pytest.mark.asyncio + async def test_returns_false_on_exception(self): + mc = MagicMock() + mc.commands.get_autoadd_config = AsyncMock(side_effect=RuntimeError("timeout")) + + result = await _enable_autoevict_on_radio(mc) + + assert result is False + + class TestBackgroundContactReconcile: """Test the yielding background contact reconcile loop.""" @@ -922,6 +1058,100 @@ class TestBackgroundContactReconcile: ] assert loaded_keys == [KEY_A, KEY_B] + @pytest.mark.asyncio + async def test_autoevict_table_full_breaks_with_error(self, test_db): + """TABLE_FULL during autoevict stops the loop and broadcasts an error.""" + await _insert_contact(KEY_A, "Alice", last_contacted=2000) + alice = await ContactRepository.get_by_key(KEY_A) + assert alice is not None + + mock_mc = MagicMock() + mock_mc.is_connected = True + mock_mc.get_contact_by_key_prefix = MagicMock(return_value=None) + table_full_result = MagicMock(type=EventType.ERROR, payload={"error_code": 3}) + mock_mc.commands.add_contact = AsyncMock(return_value=table_full_result) + radio_manager._meshcore = mock_mc + + @asynccontextmanager + async def _radio_operation(*args, **kwargs): + del args, kwargs + yield mock_mc + + with ( + patch.object( + radio_sync.radio_manager, + "radio_operation", + side_effect=lambda *args, **kwargs: _radio_operation(*args, **kwargs), + ), + patch("app.radio_sync.CONTACT_RECONCILE_BATCH_SIZE", 10), + patch( + "app.radio_sync.get_contacts_selected_for_radio_sync", + side_effect=[[alice], [alice]], + ), + patch("app.radio_sync.asyncio.sleep", new=AsyncMock()), + patch("app.radio_sync.broadcast_error") as mock_broadcast, + ): + await radio_sync._reconcile_radio_contacts_in_background( + initial_radio_contacts={}, + expected_mc=mock_mc, + autoevict=True, + ) + + mock_broadcast.assert_called_once() + assert "auto-evict" in mock_broadcast.call_args.args[1].lower() + + @pytest.mark.asyncio + async def test_autoevict_retry_cap_stops_after_max_retries(self, test_db): + """Autoevict gives up after _MAX_AUTOEVICT_RETRIES full passes with failures.""" + await _insert_contact(KEY_A, "Alice", last_contacted=2000) + alice = await ContactRepository.get_by_key(KEY_A) + assert alice is not None + + mock_mc = MagicMock() + mock_mc.is_connected = True + mock_mc.get_contact_by_key_prefix = MagicMock(return_value=None) + # Every add fails with a non-TABLE_FULL error + fail_result = MagicMock(type=EventType.ERROR, payload={"error_code": 99}) + mock_mc.commands.add_contact = AsyncMock(return_value=fail_result) + radio_manager._meshcore = mock_mc + + @asynccontextmanager + async def _radio_operation(*args, **kwargs): + del args, kwargs + yield mock_mc + + call_count = 0 + + async def _get_selected(): + nonlocal call_count + call_count += 1 + return [alice] + + with ( + patch.object( + radio_sync.radio_manager, + "radio_operation", + side_effect=lambda *args, **kwargs: _radio_operation(*args, **kwargs), + ), + patch("app.radio_sync.CONTACT_RECONCILE_BATCH_SIZE", 10), + patch( + "app.radio_sync.get_contacts_selected_for_radio_sync", + side_effect=_get_selected, + ), + patch("app.radio_sync.asyncio.sleep", new=AsyncMock()), + ): + await radio_sync._reconcile_radio_contacts_in_background( + initial_radio_contacts={}, + expected_mc=mock_mc, + autoevict=True, + ) + + # 2 calls per iteration (pre-lock + in-lock), 3 retries = 6 calls, + # plus 1 pre-lock call on the initial iteration = at most 8. + # The key assertion: it terminates rather than looping forever. + assert mock_mc.commands.add_contact.await_count <= 4 + assert call_count <= 8 + @pytest.mark.asyncio async def test_yields_radio_lock_every_two_contact_operations(self, test_db): await _insert_contact(KEY_A, "Alice", last_contacted=3000) From f7a311d74b5c37a1ea70d51576a89d44a0f17cb2 Mon Sep 17 00:00:00 2001 From: Jack Kingsman Date: Sun, 19 Apr 2026 01:25:08 -0700 Subject: [PATCH 4/4] Always clear fav flag on a blind load --- app/radio_sync.py | 7 +++++++ tests/test_radio_sync.py | 9 +++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/app/radio_sync.py b/app/radio_sync.py index a2159d2..0a391e5 100644 --- a/app/radio_sync.py +++ b/app/radio_sync.py @@ -48,6 +48,7 @@ _GET_CONTACTS_TIMEOUT = 10 AdvertMode = Literal["flood", "zero_hop"] _AUTO_ADD_OVERWRITE_OLDEST = 0x01 +_RADIO_CONTACT_FAVORITE = 0x01 async def _enable_autoevict_on_radio(mc: MeshCore) -> bool: @@ -1298,6 +1299,12 @@ async def _reconcile_radio_contacts_in_background( public_key = contact.public_key.lower() try: add_payload = contact.to_radio_dict() + # In autoevict mode, app-loaded contacts should + # remain evictable by the radio even if the + # stored contact record carries the favorite bit. + add_payload["flags"] = ( + int(add_payload.get("flags", 0)) & ~_RADIO_CONTACT_FAVORITE + ) add_result = await mc.commands.add_contact(add_payload) except Exception as exc: failed += 1 diff --git a/tests/test_radio_sync.py b/tests/test_radio_sync.py index 2d4ebce..98dff18 100644 --- a/tests/test_radio_sync.py +++ b/tests/test_radio_sync.py @@ -77,6 +77,7 @@ async def _insert_contact( name="Alice", on_radio=False, contact_type=0, + flags=0, last_contacted=None, last_advert=None, direct_path=None, @@ -89,7 +90,7 @@ async def _insert_contact( "public_key": public_key, "name": name, "type": contact_type, - "flags": 0, + "flags": flags, "direct_path": direct_path, "direct_path_len": direct_path_len, "direct_path_hash_mode": direct_path_hash_mode, @@ -1013,7 +1014,7 @@ class TestBackgroundContactReconcile: @pytest.mark.asyncio async def test_autoevict_blind_fill_readds_full_desired_set(self, test_db): - await _insert_contact(KEY_A, "Alice", last_contacted=2000) + await _insert_contact(KEY_A, "Alice", flags=0x01, last_contacted=2000) await _insert_contact(KEY_B, "Bob", last_contacted=1000) alice = await ContactRepository.get_by_key(KEY_A) bob = await ContactRepository.get_by_key(KEY_B) @@ -1057,6 +1058,10 @@ class TestBackgroundContactReconcile: call.args[0]["public_key"] for call in mock_mc.commands.add_contact.call_args_list ] assert loaded_keys == [KEY_A, KEY_B] + loaded_flags = [ + call.args[0]["flags"] for call in mock_mc.commands.add_contact.call_args_list + ] + assert loaded_flags == [0, 0] @pytest.mark.asyncio async def test_autoevict_table_full_breaks_with_error(self, test_db):