Add hourly sync and crow loudly if it finds a discrepancy

This commit is contained in:
Jack Kingsman
2026-03-10 14:47:18 -07:00
parent 738e0b9815
commit d0ed3484ce
8 changed files with 132 additions and 29 deletions

View File

@@ -442,7 +442,7 @@ mc.subscribe(EventType.ACK, handler)
| `MESHCORE_LOG_LEVEL` | `INFO` | Logging level (`DEBUG`/`INFO`/`WARNING`/`ERROR`) |
| `MESHCORE_DATABASE_PATH` | `data/meshcore.db` | SQLite database location |
| `MESHCORE_DISABLE_BOTS` | `false` | Disable bot system entirely (blocks execution and config) |
| `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK` | `false` | Enable periodic `get_msg()` fallback polling if radio events are not reliably surfacing messages |
| `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK` | `false` | Switch the always-on message audit task from hourly checks to aggressive 10-second `get_msg()` fallback polling |
**Note:** Runtime app settings are stored in the database (`app_settings` table), not environment variables. These include `max_radio_contacts`, `auto_decrypt_dm_on_advert`, `sidebar_sort_order`, `advert_interval`, `last_advert_time`, `favorites`, `last_message_times`, `flood_scope`, `blocked_keys`, and `blocked_names`. `max_radio_contacts` is the configured radio contact capacity baseline used by background maintenance: favorites reload first, non-favorite fill targets about 80% of that value, and full offload/reload triggers around 95% occupancy. They are configured via `GET/PATCH /api/settings`. MQTT, bot, webhook, and Apprise configs are stored in the `fanout_configs` table, managed via `/api/fanout`.

View File

@@ -224,11 +224,11 @@ npm run build # build the frontend
| `MESHCORE_LOG_LEVEL` | INFO | DEBUG, INFO, WARNING, ERROR |
| `MESHCORE_DATABASE_PATH` | data/meshcore.db | SQLite database path |
| `MESHCORE_DISABLE_BOTS` | false | Disable bot system entirely (blocks execution and config) |
| `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK` | false | Enable periodic `get_msg()` fallback polling if radio events are not reliably surfacing messages |
| `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK` | false | Run aggressive 10-second `get_msg()` fallback polling instead of the default hourly audit task |
Only one transport may be active at a time. If multiple are set, the server will refuse to start.
By default the app relies on radio events plus MeshCore auto-fetch for incoming messages. If you see messages on the radio that never show up in the app, try `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK=true` to add a more aggressive periodic `get_msg()` safety net.
By default the app relies on radio events plus MeshCore auto-fetch for incoming messages, and also runs a low-frequency hourly audit poll. If that audit ever finds radio data that was not surfaced through event subscription, the backend logs an error and the UI shows a toast telling the operator to check the logs. If you see that warning, or if messages on the radio never show up in the app, try `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK=true` to switch that task into a more aggressive 10-second `get_msg()` safety net.
## Additional Setup

View File

@@ -87,7 +87,7 @@ app/
- `RadioManager.post_connect_setup()` delegates to `services/radio_lifecycle.py`.
- Routers, startup/lifespan code, fanout helpers, and `radio_sync.py` should reach radio state through `services/radio_runtime.py`, not by importing `app.radio.radio_manager` directly.
- Shared reconnect/setup helpers in `services/radio_lifecycle.py` are used by startup, the monitor, and manual reconnect/reboot flows before broadcasting healthy state.
- Setup still includes handler registration, key export, time sync, contact/channel sync, and advertisement tasks. Fallback message polling starts only when `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK=true`.
- Setup still includes handler registration, key export, time sync, contact/channel sync, and advertisement tasks. The message-poll task always starts: by default it runs as a low-frequency hourly audit, and `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK=true` switches it to aggressive 10-second polling.
- Post-connect setup is timeout-bounded. If initial radio offload/setup hangs too long, the backend logs the failure and broadcasts an `error` toast telling the operator to reboot the radio and restart the server.
## Important Behaviors

View File

@@ -17,6 +17,7 @@ from contextlib import asynccontextmanager
from meshcore import EventType, MeshCore
from app.config import settings
from app.event_handlers import cleanup_expired_acks
from app.models import Contact, ContactUpsert
from app.radio import RadioOperationBusyError
@@ -28,6 +29,7 @@ from app.repository import (
)
from app.services.contact_reconciliation import reconcile_contact_messages
from app.services.radio_runtime import radio_runtime as radio_manager
from app.websocket import broadcast_error
logger = logging.getLogger(__name__)
@@ -99,9 +101,12 @@ async def upsert_channel_from_radio_slot(payload: dict, *, on_radio: bool) -> st
# Message poll task handle
_message_poll_task: asyncio.Task | None = None
# Message poll interval in seconds (10s gives DM ACKs plenty of time to arrive)
# Message poll interval in seconds when aggressive fallback is enabled.
MESSAGE_POLL_INTERVAL = 10
# Always-on audit interval when aggressive fallback is disabled.
MESSAGE_POLL_AUDIT_INTERVAL = 3600
# Periodic advertisement task handle
_advert_task: asyncio.Task | None = None
@@ -445,7 +450,10 @@ async def _message_poll_loop():
"""Background task that periodically polls for messages."""
while True:
try:
await asyncio.sleep(MESSAGE_POLL_INTERVAL)
aggressive_fallback = settings.enable_message_poll_fallback
await asyncio.sleep(
MESSAGE_POLL_INTERVAL if aggressive_fallback else MESSAGE_POLL_AUDIT_INTERVAL
)
if radio_manager.is_connected and not is_polling_paused():
try:
@@ -456,10 +464,24 @@ async def _message_poll_loop():
) as mc:
count = await poll_for_messages(mc)
if count > 0:
logger.warning(
"Poll loop caught %d message(s) missed by auto-fetch",
count,
)
if aggressive_fallback:
logger.warning(
"Poll loop caught %d message(s) missed by auto-fetch",
count,
)
else:
logger.error(
"Periodic radio audit caught %d message(s) that were not "
"surfaced via event subscription. See README and consider "
"setting MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK=true to "
"enable more frequent polling.",
count,
)
broadcast_error(
"A periodic poll task has discovered radio inconsistencies.",
"Please check the logs for recommendations (search "
"'MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK').",
)
except RadioOperationBusyError:
logger.debug("Skipping message poll: radio busy")
@@ -474,7 +496,16 @@ def start_message_polling():
global _message_poll_task
if _message_poll_task is None or _message_poll_task.done():
_message_poll_task = asyncio.create_task(_message_poll_loop())
logger.info("Started periodic message polling (interval: %ds)", MESSAGE_POLL_INTERVAL)
if settings.enable_message_poll_fallback:
logger.info(
"Started periodic message polling task (aggressive fallback, interval: %ds)",
MESSAGE_POLL_INTERVAL,
)
else:
logger.info(
"Started periodic message audit task (interval: %ds)",
MESSAGE_POLL_AUDIT_INTERVAL,
)
async def stop_message_polling():

View File

@@ -1,8 +1,6 @@
import asyncio
import logging
from app.config import settings
logger = logging.getLogger(__name__)
POST_CONNECT_SETUP_TIMEOUT_SECONDS = 300
@@ -137,10 +135,7 @@ async def run_post_connect_setup(radio_manager) -> None:
# These tasks acquire their own locks when they need radio access.
start_periodic_sync()
start_periodic_advert()
if settings.enable_message_poll_fallback:
start_message_polling()
else:
logger.info("Fallback message polling disabled; relying on radio events")
start_message_polling()
radio_manager._setup_complete = True
finally:

View File

@@ -183,9 +183,7 @@ describe('SettingsModal', () => {
renderModal();
openRadioSection();
expect(
screen.getByText(/Configured radio contact capacity/i)
).toBeInTheDocument();
expect(screen.getByText(/Configured radio contact capacity/i)).toBeInTheDocument();
});
it('saves changed max contacts value through onSaveAppSettings', async () => {

View File

@@ -751,8 +751,8 @@ class TestPostConnectSetupOrdering:
mock_mc.commands.set_flood_scope.assert_awaited_once_with("")
@pytest.mark.asyncio
async def test_message_polling_disabled_by_default(self):
"""Post-connect setup does not start fallback polling unless explicitly enabled."""
async def test_message_polling_starts_hourly_audit_by_default(self):
"""Post-connect setup always starts the message audit task by default."""
from app.models import AppSettings
from app.radio import RadioManager
@@ -780,11 +780,11 @@ class TestPostConnectSetupOrdering:
):
await rm.post_connect_setup()
mock_start_message_polling.assert_not_called()
mock_start_message_polling.assert_called_once()
@pytest.mark.asyncio
async def test_message_polling_starts_when_env_flag_enabled(self):
"""Post-connect setup starts fallback polling when the env-backed setting is enabled."""
"""Post-connect setup also starts the same task when aggressive fallback is enabled."""
from app.models import AppSettings
from app.radio import RadioManager
@@ -809,7 +809,6 @@ class TestPostConnectSetupOrdering:
patch("app.radio_sync.start_periodic_advert"),
patch("app.radio_sync.drain_pending_messages", new_callable=AsyncMock, return_value=0),
patch("app.radio_sync.start_message_polling") as mock_start_message_polling,
patch("app.services.radio_lifecycle.settings.enable_message_poll_fallback", True),
):
await rm.post_connect_setup()

View File

@@ -1191,6 +1191,34 @@ def _sleep_controller(*, cancel_after: int = 2):
class TestMessagePollLoopRaces:
"""Regression tests for disconnect/reconnect race paths in _message_poll_loop."""
@pytest.mark.asyncio
async def test_uses_hourly_audit_interval_when_fallback_disabled(self):
rm, _mc = _make_connected_manager()
mock_sleep, sleep_calls = _sleep_controller(cancel_after=1)
with (
patch("app.radio_sync.radio_manager", rm),
patch("app.radio_sync.settings.enable_message_poll_fallback", False),
patch("asyncio.sleep", side_effect=mock_sleep),
):
await _message_poll_loop()
assert sleep_calls == [3600]
@pytest.mark.asyncio
async def test_uses_fast_poll_interval_when_fallback_enabled(self):
rm, _mc = _make_connected_manager()
mock_sleep, sleep_calls = _sleep_controller(cancel_after=1)
with (
patch("app.radio_sync.radio_manager", rm),
patch("app.radio_sync.settings.enable_message_poll_fallback", True),
patch("asyncio.sleep", side_effect=mock_sleep),
):
await _message_poll_loop()
assert sleep_calls == [10]
@pytest.mark.asyncio
async def test_disconnect_race_between_precheck_and_lock(self):
"""RadioDisconnectedError between is_connected and radio_operation()
@@ -1248,6 +1276,46 @@ class TestMessagePollLoopRaces:
mock_poll.assert_called_once_with(mock_mc)
@pytest.mark.asyncio
async def test_hourly_audit_crows_loudly_when_it_finds_hidden_messages(self):
rm, mock_mc = _make_connected_manager()
mock_sleep, _ = _sleep_controller(cancel_after=2)
with (
patch("app.radio_sync.radio_manager", rm),
patch("app.radio_sync.settings.enable_message_poll_fallback", False),
patch("asyncio.sleep", side_effect=mock_sleep),
patch("app.radio_sync.poll_for_messages", new_callable=AsyncMock, return_value=2),
patch("app.radio_sync.logger") as mock_logger,
patch("app.radio_sync.broadcast_error") as mock_broadcast_error,
):
await _message_poll_loop()
mock_logger.error.assert_called_once()
mock_broadcast_error.assert_called_once_with(
"A periodic poll task has discovered radio inconsistencies.",
"Please check the logs for recommendations (search "
"'MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK').",
)
@pytest.mark.asyncio
async def test_fast_poll_logs_missed_messages_without_error_toast(self):
rm, mock_mc = _make_connected_manager()
mock_sleep, _ = _sleep_controller(cancel_after=2)
with (
patch("app.radio_sync.radio_manager", rm),
patch("app.radio_sync.settings.enable_message_poll_fallback", True),
patch("asyncio.sleep", side_effect=mock_sleep),
patch("app.radio_sync.poll_for_messages", new_callable=AsyncMock, return_value=2),
patch("app.radio_sync.logger") as mock_logger,
patch("app.radio_sync.broadcast_error") as mock_broadcast_error,
):
await _message_poll_loop()
mock_logger.warning.assert_called_once()
mock_broadcast_error.assert_not_called()
class TestPeriodicAdvertLoopRaces:
"""Regression tests for disconnect/reconnect race paths in _periodic_advert_loop."""
@@ -1356,7 +1424,9 @@ class TestPeriodicSyncLoopRaces:
patch("app.radio_sync.radio_manager", rm),
patch("asyncio.sleep", side_effect=mock_sleep),
patch("app.radio_sync.cleanup_expired_acks") as mock_cleanup,
patch("app.radio_sync.should_run_full_periodic_sync", new_callable=AsyncMock) as mock_check,
patch(
"app.radio_sync.should_run_full_periodic_sync", new_callable=AsyncMock
) as mock_check,
patch("app.radio_sync.sync_and_offload_all", new_callable=AsyncMock) as mock_sync,
patch("app.radio_sync.sync_radio_time", new_callable=AsyncMock) as mock_time,
):
@@ -1380,7 +1450,9 @@ class TestPeriodicSyncLoopRaces:
patch("app.radio_sync.radio_manager", rm),
patch("asyncio.sleep", side_effect=mock_sleep),
patch("app.radio_sync.cleanup_expired_acks") as mock_cleanup,
patch("app.radio_sync.should_run_full_periodic_sync", new_callable=AsyncMock) as mock_check,
patch(
"app.radio_sync.should_run_full_periodic_sync", new_callable=AsyncMock
) as mock_check,
patch("app.radio_sync.sync_and_offload_all", new_callable=AsyncMock) as mock_sync,
patch("app.radio_sync.sync_radio_time", new_callable=AsyncMock) as mock_time,
):
@@ -1405,7 +1477,11 @@ class TestPeriodicSyncLoopRaces:
patch("app.radio_sync.radio_manager", rm),
patch("asyncio.sleep", side_effect=mock_sleep),
patch("app.radio_sync.cleanup_expired_acks") as mock_cleanup,
patch("app.radio_sync.should_run_full_periodic_sync", new_callable=AsyncMock, return_value=True),
patch(
"app.radio_sync.should_run_full_periodic_sync",
new_callable=AsyncMock,
return_value=True,
),
patch("app.radio_sync.sync_and_offload_all", new_callable=AsyncMock) as mock_sync,
patch("app.radio_sync.sync_radio_time", new_callable=AsyncMock) as mock_time,
):
@@ -1425,7 +1501,11 @@ class TestPeriodicSyncLoopRaces:
patch("app.radio_sync.radio_manager", rm),
patch("asyncio.sleep", side_effect=mock_sleep),
patch("app.radio_sync.cleanup_expired_acks") as mock_cleanup,
patch("app.radio_sync.should_run_full_periodic_sync", new_callable=AsyncMock, return_value=False),
patch(
"app.radio_sync.should_run_full_periodic_sync",
new_callable=AsyncMock,
return_value=False,
),
patch("app.radio_sync.sync_and_offload_all", new_callable=AsyncMock) as mock_sync,
patch("app.radio_sync.sync_radio_time", new_callable=AsyncMock) as mock_time,
):