diff --git a/AGENTS.md b/AGENTS.md index d46f0ee..8f5b325 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -442,7 +442,7 @@ mc.subscribe(EventType.ACK, handler) | `MESHCORE_LOG_LEVEL` | `INFO` | Logging level (`DEBUG`/`INFO`/`WARNING`/`ERROR`) | | `MESHCORE_DATABASE_PATH` | `data/meshcore.db` | SQLite database location | | `MESHCORE_DISABLE_BOTS` | `false` | Disable bot system entirely (blocks execution and config) | -| `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK` | `false` | Enable periodic `get_msg()` fallback polling if radio events are not reliably surfacing messages | +| `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK` | `false` | Switch the always-on message audit task from hourly checks to aggressive 10-second `get_msg()` fallback polling | **Note:** Runtime app settings are stored in the database (`app_settings` table), not environment variables. These include `max_radio_contacts`, `auto_decrypt_dm_on_advert`, `sidebar_sort_order`, `advert_interval`, `last_advert_time`, `favorites`, `last_message_times`, `flood_scope`, `blocked_keys`, and `blocked_names`. `max_radio_contacts` is the configured radio contact capacity baseline used by background maintenance: favorites reload first, non-favorite fill targets about 80% of that value, and full offload/reload triggers around 95% occupancy. They are configured via `GET/PATCH /api/settings`. MQTT, bot, webhook, and Apprise configs are stored in the `fanout_configs` table, managed via `/api/fanout`. diff --git a/README.md b/README.md index 419cada..2e5e7b8 100644 --- a/README.md +++ b/README.md @@ -224,11 +224,11 @@ npm run build # build the frontend | `MESHCORE_LOG_LEVEL` | INFO | DEBUG, INFO, WARNING, ERROR | | `MESHCORE_DATABASE_PATH` | data/meshcore.db | SQLite database path | | `MESHCORE_DISABLE_BOTS` | false | Disable bot system entirely (blocks execution and config) | -| `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK` | false | Enable periodic `get_msg()` fallback polling if radio events are not reliably surfacing messages | +| `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK` | false | Run aggressive 10-second `get_msg()` fallback polling instead of the default hourly audit task | Only one transport may be active at a time. If multiple are set, the server will refuse to start. -By default the app relies on radio events plus MeshCore auto-fetch for incoming messages. If you see messages on the radio that never show up in the app, try `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK=true` to add a more aggressive periodic `get_msg()` safety net. +By default the app relies on radio events plus MeshCore auto-fetch for incoming messages, and also runs a low-frequency hourly audit poll. If that audit ever finds radio data that was not surfaced through event subscription, the backend logs an error and the UI shows a toast telling the operator to check the logs. If you see that warning, or if messages on the radio never show up in the app, try `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK=true` to switch that task into a more aggressive 10-second `get_msg()` safety net. ## Additional Setup diff --git a/app/AGENTS.md b/app/AGENTS.md index 7b6af5f..85b1427 100644 --- a/app/AGENTS.md +++ b/app/AGENTS.md @@ -87,7 +87,7 @@ app/ - `RadioManager.post_connect_setup()` delegates to `services/radio_lifecycle.py`. - Routers, startup/lifespan code, fanout helpers, and `radio_sync.py` should reach radio state through `services/radio_runtime.py`, not by importing `app.radio.radio_manager` directly. - Shared reconnect/setup helpers in `services/radio_lifecycle.py` are used by startup, the monitor, and manual reconnect/reboot flows before broadcasting healthy state. -- Setup still includes handler registration, key export, time sync, contact/channel sync, and advertisement tasks. Fallback message polling starts only when `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK=true`. +- Setup still includes handler registration, key export, time sync, contact/channel sync, and advertisement tasks. The message-poll task always starts: by default it runs as a low-frequency hourly audit, and `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK=true` switches it to aggressive 10-second polling. - Post-connect setup is timeout-bounded. If initial radio offload/setup hangs too long, the backend logs the failure and broadcasts an `error` toast telling the operator to reboot the radio and restart the server. ## Important Behaviors diff --git a/app/radio_sync.py b/app/radio_sync.py index 9129ae4..444a6a1 100644 --- a/app/radio_sync.py +++ b/app/radio_sync.py @@ -17,6 +17,7 @@ from contextlib import asynccontextmanager from meshcore import EventType, MeshCore +from app.config import settings from app.event_handlers import cleanup_expired_acks from app.models import Contact, ContactUpsert from app.radio import RadioOperationBusyError @@ -28,6 +29,7 @@ from app.repository import ( ) from app.services.contact_reconciliation import reconcile_contact_messages from app.services.radio_runtime import radio_runtime as radio_manager +from app.websocket import broadcast_error logger = logging.getLogger(__name__) @@ -99,9 +101,12 @@ async def upsert_channel_from_radio_slot(payload: dict, *, on_radio: bool) -> st # Message poll task handle _message_poll_task: asyncio.Task | None = None -# Message poll interval in seconds (10s gives DM ACKs plenty of time to arrive) +# Message poll interval in seconds when aggressive fallback is enabled. MESSAGE_POLL_INTERVAL = 10 +# Always-on audit interval when aggressive fallback is disabled. +MESSAGE_POLL_AUDIT_INTERVAL = 3600 + # Periodic advertisement task handle _advert_task: asyncio.Task | None = None @@ -445,7 +450,10 @@ async def _message_poll_loop(): """Background task that periodically polls for messages.""" while True: try: - await asyncio.sleep(MESSAGE_POLL_INTERVAL) + aggressive_fallback = settings.enable_message_poll_fallback + await asyncio.sleep( + MESSAGE_POLL_INTERVAL if aggressive_fallback else MESSAGE_POLL_AUDIT_INTERVAL + ) if radio_manager.is_connected and not is_polling_paused(): try: @@ -456,10 +464,24 @@ async def _message_poll_loop(): ) as mc: count = await poll_for_messages(mc) if count > 0: - logger.warning( - "Poll loop caught %d message(s) missed by auto-fetch", - count, - ) + if aggressive_fallback: + logger.warning( + "Poll loop caught %d message(s) missed by auto-fetch", + count, + ) + else: + logger.error( + "Periodic radio audit caught %d message(s) that were not " + "surfaced via event subscription. See README and consider " + "setting MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK=true to " + "enable more frequent polling.", + count, + ) + broadcast_error( + "A periodic poll task has discovered radio inconsistencies.", + "Please check the logs for recommendations (search " + "'MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK').", + ) except RadioOperationBusyError: logger.debug("Skipping message poll: radio busy") @@ -474,7 +496,16 @@ def start_message_polling(): global _message_poll_task if _message_poll_task is None or _message_poll_task.done(): _message_poll_task = asyncio.create_task(_message_poll_loop()) - logger.info("Started periodic message polling (interval: %ds)", MESSAGE_POLL_INTERVAL) + if settings.enable_message_poll_fallback: + logger.info( + "Started periodic message polling task (aggressive fallback, interval: %ds)", + MESSAGE_POLL_INTERVAL, + ) + else: + logger.info( + "Started periodic message audit task (interval: %ds)", + MESSAGE_POLL_AUDIT_INTERVAL, + ) async def stop_message_polling(): diff --git a/app/services/radio_lifecycle.py b/app/services/radio_lifecycle.py index 58bcd04..4709cc8 100644 --- a/app/services/radio_lifecycle.py +++ b/app/services/radio_lifecycle.py @@ -1,8 +1,6 @@ import asyncio import logging -from app.config import settings - logger = logging.getLogger(__name__) POST_CONNECT_SETUP_TIMEOUT_SECONDS = 300 @@ -137,10 +135,7 @@ async def run_post_connect_setup(radio_manager) -> None: # These tasks acquire their own locks when they need radio access. start_periodic_sync() start_periodic_advert() - if settings.enable_message_poll_fallback: - start_message_polling() - else: - logger.info("Fallback message polling disabled; relying on radio events") + start_message_polling() radio_manager._setup_complete = True finally: diff --git a/frontend/src/test/settingsModal.test.tsx b/frontend/src/test/settingsModal.test.tsx index 81b571d..29ad99e 100644 --- a/frontend/src/test/settingsModal.test.tsx +++ b/frontend/src/test/settingsModal.test.tsx @@ -183,9 +183,7 @@ describe('SettingsModal', () => { renderModal(); openRadioSection(); - expect( - screen.getByText(/Configured radio contact capacity/i) - ).toBeInTheDocument(); + expect(screen.getByText(/Configured radio contact capacity/i)).toBeInTheDocument(); }); it('saves changed max contacts value through onSaveAppSettings', async () => { diff --git a/tests/test_radio.py b/tests/test_radio.py index 551d37c..70cdac5 100644 --- a/tests/test_radio.py +++ b/tests/test_radio.py @@ -751,8 +751,8 @@ class TestPostConnectSetupOrdering: mock_mc.commands.set_flood_scope.assert_awaited_once_with("") @pytest.mark.asyncio - async def test_message_polling_disabled_by_default(self): - """Post-connect setup does not start fallback polling unless explicitly enabled.""" + async def test_message_polling_starts_hourly_audit_by_default(self): + """Post-connect setup always starts the message audit task by default.""" from app.models import AppSettings from app.radio import RadioManager @@ -780,11 +780,11 @@ class TestPostConnectSetupOrdering: ): await rm.post_connect_setup() - mock_start_message_polling.assert_not_called() + mock_start_message_polling.assert_called_once() @pytest.mark.asyncio async def test_message_polling_starts_when_env_flag_enabled(self): - """Post-connect setup starts fallback polling when the env-backed setting is enabled.""" + """Post-connect setup also starts the same task when aggressive fallback is enabled.""" from app.models import AppSettings from app.radio import RadioManager @@ -809,7 +809,6 @@ class TestPostConnectSetupOrdering: patch("app.radio_sync.start_periodic_advert"), patch("app.radio_sync.drain_pending_messages", new_callable=AsyncMock, return_value=0), patch("app.radio_sync.start_message_polling") as mock_start_message_polling, - patch("app.services.radio_lifecycle.settings.enable_message_poll_fallback", True), ): await rm.post_connect_setup() diff --git a/tests/test_radio_sync.py b/tests/test_radio_sync.py index 0904ff9..6066953 100644 --- a/tests/test_radio_sync.py +++ b/tests/test_radio_sync.py @@ -1191,6 +1191,34 @@ def _sleep_controller(*, cancel_after: int = 2): class TestMessagePollLoopRaces: """Regression tests for disconnect/reconnect race paths in _message_poll_loop.""" + @pytest.mark.asyncio + async def test_uses_hourly_audit_interval_when_fallback_disabled(self): + rm, _mc = _make_connected_manager() + mock_sleep, sleep_calls = _sleep_controller(cancel_after=1) + + with ( + patch("app.radio_sync.radio_manager", rm), + patch("app.radio_sync.settings.enable_message_poll_fallback", False), + patch("asyncio.sleep", side_effect=mock_sleep), + ): + await _message_poll_loop() + + assert sleep_calls == [3600] + + @pytest.mark.asyncio + async def test_uses_fast_poll_interval_when_fallback_enabled(self): + rm, _mc = _make_connected_manager() + mock_sleep, sleep_calls = _sleep_controller(cancel_after=1) + + with ( + patch("app.radio_sync.radio_manager", rm), + patch("app.radio_sync.settings.enable_message_poll_fallback", True), + patch("asyncio.sleep", side_effect=mock_sleep), + ): + await _message_poll_loop() + + assert sleep_calls == [10] + @pytest.mark.asyncio async def test_disconnect_race_between_precheck_and_lock(self): """RadioDisconnectedError between is_connected and radio_operation() @@ -1248,6 +1276,46 @@ class TestMessagePollLoopRaces: mock_poll.assert_called_once_with(mock_mc) + @pytest.mark.asyncio + async def test_hourly_audit_crows_loudly_when_it_finds_hidden_messages(self): + rm, mock_mc = _make_connected_manager() + mock_sleep, _ = _sleep_controller(cancel_after=2) + + with ( + patch("app.radio_sync.radio_manager", rm), + patch("app.radio_sync.settings.enable_message_poll_fallback", False), + patch("asyncio.sleep", side_effect=mock_sleep), + patch("app.radio_sync.poll_for_messages", new_callable=AsyncMock, return_value=2), + patch("app.radio_sync.logger") as mock_logger, + patch("app.radio_sync.broadcast_error") as mock_broadcast_error, + ): + await _message_poll_loop() + + mock_logger.error.assert_called_once() + mock_broadcast_error.assert_called_once_with( + "A periodic poll task has discovered radio inconsistencies.", + "Please check the logs for recommendations (search " + "'MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK').", + ) + + @pytest.mark.asyncio + async def test_fast_poll_logs_missed_messages_without_error_toast(self): + rm, mock_mc = _make_connected_manager() + mock_sleep, _ = _sleep_controller(cancel_after=2) + + with ( + patch("app.radio_sync.radio_manager", rm), + patch("app.radio_sync.settings.enable_message_poll_fallback", True), + patch("asyncio.sleep", side_effect=mock_sleep), + patch("app.radio_sync.poll_for_messages", new_callable=AsyncMock, return_value=2), + patch("app.radio_sync.logger") as mock_logger, + patch("app.radio_sync.broadcast_error") as mock_broadcast_error, + ): + await _message_poll_loop() + + mock_logger.warning.assert_called_once() + mock_broadcast_error.assert_not_called() + class TestPeriodicAdvertLoopRaces: """Regression tests for disconnect/reconnect race paths in _periodic_advert_loop.""" @@ -1356,7 +1424,9 @@ class TestPeriodicSyncLoopRaces: patch("app.radio_sync.radio_manager", rm), patch("asyncio.sleep", side_effect=mock_sleep), patch("app.radio_sync.cleanup_expired_acks") as mock_cleanup, - patch("app.radio_sync.should_run_full_periodic_sync", new_callable=AsyncMock) as mock_check, + patch( + "app.radio_sync.should_run_full_periodic_sync", new_callable=AsyncMock + ) as mock_check, patch("app.radio_sync.sync_and_offload_all", new_callable=AsyncMock) as mock_sync, patch("app.radio_sync.sync_radio_time", new_callable=AsyncMock) as mock_time, ): @@ -1380,7 +1450,9 @@ class TestPeriodicSyncLoopRaces: patch("app.radio_sync.radio_manager", rm), patch("asyncio.sleep", side_effect=mock_sleep), patch("app.radio_sync.cleanup_expired_acks") as mock_cleanup, - patch("app.radio_sync.should_run_full_periodic_sync", new_callable=AsyncMock) as mock_check, + patch( + "app.radio_sync.should_run_full_periodic_sync", new_callable=AsyncMock + ) as mock_check, patch("app.radio_sync.sync_and_offload_all", new_callable=AsyncMock) as mock_sync, patch("app.radio_sync.sync_radio_time", new_callable=AsyncMock) as mock_time, ): @@ -1405,7 +1477,11 @@ class TestPeriodicSyncLoopRaces: patch("app.radio_sync.radio_manager", rm), patch("asyncio.sleep", side_effect=mock_sleep), patch("app.radio_sync.cleanup_expired_acks") as mock_cleanup, - patch("app.radio_sync.should_run_full_periodic_sync", new_callable=AsyncMock, return_value=True), + patch( + "app.radio_sync.should_run_full_periodic_sync", + new_callable=AsyncMock, + return_value=True, + ), patch("app.radio_sync.sync_and_offload_all", new_callable=AsyncMock) as mock_sync, patch("app.radio_sync.sync_radio_time", new_callable=AsyncMock) as mock_time, ): @@ -1425,7 +1501,11 @@ class TestPeriodicSyncLoopRaces: patch("app.radio_sync.radio_manager", rm), patch("asyncio.sleep", side_effect=mock_sleep), patch("app.radio_sync.cleanup_expired_acks") as mock_cleanup, - patch("app.radio_sync.should_run_full_periodic_sync", new_callable=AsyncMock, return_value=False), + patch( + "app.radio_sync.should_run_full_periodic_sync", + new_callable=AsyncMock, + return_value=False, + ), patch("app.radio_sync.sync_and_offload_all", new_callable=AsyncMock) as mock_sync, patch("app.radio_sync.sync_radio_time", new_callable=AsyncMock) as mock_time, ):