mirror of
https://github.com/jkingsman/Remote-Terminal-for-MeshCore.git
synced 2026-06-27 21:41:02 +02:00
Add documentation and force-lock-acquisition mode for channel management
This commit is contained in:
@@ -437,11 +437,12 @@ mc.subscribe(EventType.ACK, handler)
|
||||
| `MESHCORE_LOG_LEVEL` | `INFO` | Logging level (`DEBUG`/`INFO`/`WARNING`/`ERROR`) |
|
||||
| `MESHCORE_DATABASE_PATH` | `data/meshcore.db` | SQLite database location |
|
||||
| `MESHCORE_DISABLE_BOTS` | `false` | Disable bot system entirely (blocks execution and config) |
|
||||
| `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK` | `false` | Switch the always-on message audit task from hourly checks to aggressive 10-second `get_msg()` fallback polling |
|
||||
| `MESHCORE_BASIC_AUTH_USERNAME` | *(none)* | Optional app-wide HTTP Basic auth username; must be set together with `MESHCORE_BASIC_AUTH_PASSWORD` |
|
||||
| `MESHCORE_BASIC_AUTH_PASSWORD` | *(none)* | Optional app-wide HTTP Basic auth password; must be set together with `MESHCORE_BASIC_AUTH_USERNAME` |
|
||||
| `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK` | `false` | Switch the always-on radio audit task from hourly checks to aggressive 10-second polling; the audit checks both missed message drift and channel-slot cache drift |
|
||||
| `MESHCORE_FORCE_CHANNEL_SLOT_RECONFIGURE` | `false` | Disable channel-slot reuse and force `set_channel(...)` before every channel send, even on serial/BLE |
|
||||
|
||||
**Note:** Runtime app settings are stored in the database (`app_settings` table), not environment variables. These include `max_radio_contacts`, `auto_decrypt_dm_on_advert`, `sidebar_sort_order`, `advert_interval`, `last_advert_time`, `favorites`, `last_message_times`, `flood_scope`, `blocked_keys`, and `blocked_names`. `max_radio_contacts` is the configured radio contact capacity baseline used by background maintenance: favorites reload first, non-favorite fill targets about 80% of that value, and full offload/reload triggers around 95% occupancy. They are configured via `GET/PATCH /api/settings`. MQTT, bot, webhook, Apprise, and SQS configs are stored in the `fanout_configs` table, managed via `/api/fanout`.
|
||||
**Note:** Runtime app settings are stored in the database (`app_settings` table), not environment variables. These include `max_radio_contacts`, `auto_decrypt_dm_on_advert`, `sidebar_sort_order`, `advert_interval`, `last_advert_time`, `favorites`, `last_message_times`, `flood_scope`, `blocked_keys`, and `blocked_names`. `max_radio_contacts` is the configured radio contact capacity baseline used by background maintenance: favorites reload first, non-favorite fill targets about 80% of that value, and full offload/reload triggers around 95% occupancy. They are configured via `GET/PATCH /api/settings`. MQTT, bot, webhook, Apprise, and SQS configs are stored in the `fanout_configs` table, managed via `/api/fanout`. If the radio's channel slots appear unstable or another client is mutating them underneath this app, operators can force the old always-reconfigure send path with `MESHCORE_FORCE_CHANNEL_SLOT_RECONFIGURE=true`.
|
||||
|
||||
Byte-perfect channel retries are user-triggered via `POST /api/messages/channel/{message_id}/resend` and are allowed for 30 seconds after the original send.
|
||||
|
||||
|
||||
@@ -224,7 +224,6 @@ npm run build # build the frontend
|
||||
| `MESHCORE_LOG_LEVEL` | INFO | DEBUG, INFO, WARNING, ERROR |
|
||||
| `MESHCORE_DATABASE_PATH` | data/meshcore.db | SQLite database path |
|
||||
| `MESHCORE_DISABLE_BOTS` | false | Disable bot system entirely (blocks execution and config) |
|
||||
| `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK` | false | Run aggressive 10-second `get_msg()` fallback polling instead of the default hourly audit task |
|
||||
| `MESHCORE_BASIC_AUTH_USERNAME` | | Optional app-wide HTTP Basic auth username; must be set together with `MESHCORE_BASIC_AUTH_PASSWORD` |
|
||||
| `MESHCORE_BASIC_AUTH_PASSWORD` | | Optional app-wide HTTP Basic auth password; must be set together with `MESHCORE_BASIC_AUTH_USERNAME` |
|
||||
|
||||
@@ -232,7 +231,21 @@ Only one transport may be active at a time. If multiple are set, the server will
|
||||
|
||||
If you enable Basic Auth, protect the app with HTTPS. HTTP Basic credentials are not safe on plain HTTP.
|
||||
|
||||
By default the app relies on radio events plus MeshCore auto-fetch for incoming messages, and also runs a low-frequency hourly audit poll. If that audit ever finds radio data that was not surfaced through event subscription, the backend logs an error and the UI shows a toast telling the operator to check the logs. If you see that warning, or if messages on the radio never show up in the app, try `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK=true` to switch that task into a more aggressive 10-second `get_msg()` safety net.
|
||||
### Remediation Environment Variables
|
||||
|
||||
These are intended for diagnosing or working around radios that behave oddly.
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK` | false | Run aggressive 10-second `get_msg()` fallback polling instead of the default hourly sanity check |
|
||||
| `MESHCORE_FORCE_CHANNEL_SLOT_RECONFIGURE` | false | Disable channel-slot reuse and force `set_channel(...)` before every channel send |
|
||||
|
||||
By default the app relies on radio events plus MeshCore auto-fetch for incoming messages, and also runs a low-frequency hourly audit poll. That audit checks both:
|
||||
|
||||
- whether messages were left on the radio without reaching the app through event subscription
|
||||
- whether the app's channel-slot expectations still match the radio's actual channel listing
|
||||
|
||||
If the audit finds a mismatch, you'll see an error in the application UI and your logs. If you see that warning, or if messages on the radio never show up in the app, try `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK=true` to switch that task into a more aggressive 10-second safety net. If room sends appear to be using the wrong channel slot or another client is changing slots underneath this app, try `MESHCORE_FORCE_CHANNEL_SLOT_RECONFIGURE=true` to force the radio to validate the channel slot is valid before sending (will delay sending by ~500ms).
|
||||
|
||||
## Additional Setup
|
||||
|
||||
|
||||
+2
-1
@@ -89,7 +89,7 @@ app/
|
||||
- `RadioManager.post_connect_setup()` delegates to `services/radio_lifecycle.py`.
|
||||
- Routers, startup/lifespan code, fanout helpers, and `radio_sync.py` should reach radio state through `services/radio_runtime.py`, not by importing `app.radio.radio_manager` directly.
|
||||
- Shared reconnect/setup helpers in `services/radio_lifecycle.py` are used by startup, the monitor, and manual reconnect/reboot flows before broadcasting healthy state.
|
||||
- Setup still includes handler registration, key export, time sync, contact/channel sync, and advertisement tasks. The message-poll task always starts: by default it runs as a low-frequency hourly audit, and `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK=true` switches it to aggressive 10-second polling.
|
||||
- Setup still includes handler registration, key export, time sync, contact/channel sync, and advertisement tasks. The message-poll task always starts: by default it runs as a low-frequency hourly audit, and `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK=true` switches it to aggressive 10-second polling. That audit checks both missed-radio-message drift and channel-slot cache drift; cache mismatches are logged, toasted, and the send-slot cache is reset.
|
||||
- Post-connect setup is timeout-bounded. If initial radio offload/setup hangs too long, the backend logs the failure and broadcasts an `error` toast telling the operator to reboot the radio and restart the server.
|
||||
|
||||
## Important Behaviors
|
||||
@@ -101,6 +101,7 @@ app/
|
||||
- Channel slot count comes from firmware-reported `DEVICE_INFO.max_channels`; do not hardcode `40` when scanning/offloading channel slots.
|
||||
- Channel sends use a session-local LRU slot cache after startup channel offload clears the radio. Repeated sends to the same room reuse the loaded slot; new rooms fill free slots up to the discovered channel capacity, then evict the least recently used cached room.
|
||||
- TCP radios do not reuse cached slot contents. For TCP, channel sends still force `set_channel(...)` before every send because this backend does not have exclusive device access.
|
||||
- `MESHCORE_FORCE_CHANNEL_SLOT_RECONFIGURE=true` disables slot reuse on all transports and forces the old always-`set_channel(...)` behavior before every channel send.
|
||||
- Contacts persist `out_path_hash_mode` in the database so contact sync and outbound DM routing reuse the exact stored mode instead of inferring from path bytes.
|
||||
- Contacts may also persist `route_override_path`, `route_override_len`, and `route_override_hash_mode`. `Contact.to_radio_dict()` gives these override fields precedence over learned `last_path*`, while advert processing still updates the learned route for telemetry/fallback.
|
||||
- `contact_advert_paths` identity is `(public_key, path_hex, path_len)` because the same hex bytes can represent different routes at different hop widths.
|
||||
|
||||
@@ -19,6 +19,7 @@ class Settings(BaseSettings):
|
||||
database_path: str = "data/meshcore.db"
|
||||
disable_bots: bool = False
|
||||
enable_message_poll_fallback: bool = False
|
||||
force_channel_slot_reconfigure: bool = False
|
||||
basic_auth_username: str = ""
|
||||
basic_auth_password: str = ""
|
||||
|
||||
|
||||
@@ -234,6 +234,8 @@ class RadioManager:
|
||||
|
||||
def channel_slot_reuse_enabled(self) -> bool:
|
||||
"""Return whether this transport can safely reuse cached channel slots."""
|
||||
if settings.force_channel_slot_reconfigure:
|
||||
return False
|
||||
if self._connection_info:
|
||||
return not self._connection_info.startswith("TCP:")
|
||||
return settings.connection_type != "tcp"
|
||||
@@ -304,6 +306,10 @@ class RadioManager:
|
||||
self._channel_slot_by_key.move_to_end(normalized_key)
|
||||
self._channel_key_by_slot[slot] = normalized_key
|
||||
|
||||
def get_channel_send_cache_snapshot(self) -> list[tuple[str, int]]:
|
||||
"""Return the current channel send cache contents in LRU order."""
|
||||
return list(self._channel_slot_by_key.items())
|
||||
|
||||
def _find_first_free_channel_slot(self, capacity: int, preferred_slot: int) -> int:
|
||||
"""Pick the first unclaimed app-managed slot, preferring the requested slot."""
|
||||
if preferred_slot < capacity and preferred_slot not in self._channel_key_by_slot:
|
||||
|
||||
@@ -466,6 +466,63 @@ async def poll_for_messages(mc: MeshCore) -> int:
|
||||
return count
|
||||
|
||||
|
||||
def _normalize_channel_secret(payload: dict) -> bytes:
|
||||
"""Return a normalized bytes representation of a radio channel secret."""
|
||||
secret = payload.get("channel_secret", b"")
|
||||
if isinstance(secret, bytes):
|
||||
return secret
|
||||
return bytes(secret)
|
||||
|
||||
|
||||
async def audit_channel_send_cache(mc: MeshCore) -> bool:
|
||||
"""Verify cached send-slot expectations still match radio channel contents.
|
||||
|
||||
If a mismatch is detected, the app's send-slot cache is reset so future sends
|
||||
fall back to reloading channels before reuse resumes.
|
||||
"""
|
||||
if not radio_manager.channel_slot_reuse_enabled():
|
||||
return True
|
||||
|
||||
cached_slots = radio_manager.get_channel_send_cache_snapshot()
|
||||
if not cached_slots:
|
||||
return True
|
||||
|
||||
mismatches: list[str] = []
|
||||
for channel_key, slot in cached_slots:
|
||||
result = await mc.commands.get_channel(slot)
|
||||
if result.type != EventType.CHANNEL_INFO:
|
||||
mismatches.append(
|
||||
f"slot {slot}: expected {channel_key[:8]} but radio returned {result.type}"
|
||||
)
|
||||
continue
|
||||
|
||||
observed_name = result.payload.get("channel_name") or ""
|
||||
observed_key = _normalize_channel_secret(result.payload).hex().upper()
|
||||
expected_channel = await ChannelRepository.get_by_key(channel_key)
|
||||
expected_name = expected_channel.name if expected_channel is not None else None
|
||||
|
||||
if observed_key != channel_key or expected_name is None or observed_name != expected_name:
|
||||
mismatches.append(
|
||||
f"slot {slot}: expected {expected_name or '(missing db row)'} "
|
||||
f"{channel_key[:8]}, got {observed_name or '(empty)'} {observed_key[:8]}"
|
||||
)
|
||||
|
||||
if not mismatches:
|
||||
return True
|
||||
|
||||
logger.error(
|
||||
"A periodic radio audit discovered that the channel send-slot cache fell out of sync with radio state. This indicates that some other system, internal or external to the radio, has updated the channel slots on the radio (which the app assumes it has exclusive rights to, except on TCP-linked devices). The cache is resetting now, but you should review the README.md and consider using the environment variable MESHCORE_FORCE_CHANNEL_SLOT_RECONFIGURE=true to make the radio use non-optimistic channel management and force-write the channel to radio before each send. This is a minor performance hit, but guarantees consistency. Mismatches found: %s",
|
||||
"; ".join(mismatches),
|
||||
)
|
||||
radio_manager.reset_channel_send_cache()
|
||||
broadcast_error(
|
||||
"A periodic poll task has discovered radio inconsistencies.",
|
||||
"Please check the logs for recommendations (search "
|
||||
"'MESHCORE_FORCE_CHANNEL_SLOT_RECONFIGURE').",
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
async def _message_poll_loop():
|
||||
"""Background task that periodically polls for messages."""
|
||||
while True:
|
||||
@@ -483,6 +540,7 @@ async def _message_poll_loop():
|
||||
suspend_auto_fetch=True,
|
||||
) as mc:
|
||||
count = await poll_for_messages(mc)
|
||||
await audit_channel_send_cache(mc)
|
||||
if count > 0:
|
||||
if aggressive_fallback:
|
||||
logger.warning(
|
||||
|
||||
@@ -16,6 +16,7 @@ from app.radio_sync import (
|
||||
_message_poll_loop,
|
||||
_periodic_advert_loop,
|
||||
_periodic_sync_loop,
|
||||
audit_channel_send_cache,
|
||||
ensure_contact_on_radio,
|
||||
is_polling_paused,
|
||||
pause_polling,
|
||||
@@ -38,6 +39,7 @@ def reset_sync_state():
|
||||
prev_mc = radio_manager._meshcore
|
||||
prev_lock = radio_manager._operation_lock
|
||||
prev_max_channels = radio_manager.max_channels
|
||||
prev_connection_info = radio_manager._connection_info
|
||||
prev_slot_by_key = radio_manager._channel_slot_by_key.copy()
|
||||
prev_key_by_slot = radio_manager._channel_key_by_slot.copy()
|
||||
|
||||
@@ -49,6 +51,7 @@ def reset_sync_state():
|
||||
radio_manager._meshcore = prev_mc
|
||||
radio_manager._operation_lock = prev_lock
|
||||
radio_manager.max_channels = prev_max_channels
|
||||
radio_manager._connection_info = prev_connection_info
|
||||
radio_manager._channel_slot_by_key = prev_slot_by_key
|
||||
radio_manager._channel_key_by_slot = prev_key_by_slot
|
||||
|
||||
@@ -1341,6 +1344,70 @@ class TestMessagePollLoopRaces:
|
||||
mock_broadcast_error.assert_not_called()
|
||||
|
||||
|
||||
class TestChannelSendCacheAudit:
|
||||
"""Verify session-local channel-slot reuse state is audited against the radio."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_audit_channel_send_cache_accepts_matching_radio_state(self, test_db):
|
||||
chan_key = "ab" * 16
|
||||
await ChannelRepository.upsert(key=chan_key, name="#flightless")
|
||||
radio_manager.note_channel_slot_loaded(chan_key, 0)
|
||||
|
||||
ok_result = MagicMock()
|
||||
ok_result.type = EventType.CHANNEL_INFO
|
||||
ok_result.payload = {
|
||||
"channel_name": "#flightless",
|
||||
"channel_secret": bytes.fromhex(chan_key),
|
||||
}
|
||||
|
||||
mock_mc = MagicMock()
|
||||
mock_mc.commands.get_channel = AsyncMock(return_value=ok_result)
|
||||
|
||||
with patch("app.radio_sync.broadcast_error") as mock_broadcast_error:
|
||||
assert await audit_channel_send_cache(mock_mc) is True
|
||||
|
||||
mock_mc.commands.get_channel.assert_awaited_once_with(0)
|
||||
mock_broadcast_error.assert_not_called()
|
||||
assert radio_manager.get_cached_channel_slot(chan_key) == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_audit_channel_send_cache_resets_and_toasts_on_mismatch(self, test_db):
|
||||
chan_key = "cd" * 16
|
||||
await ChannelRepository.upsert(key=chan_key, name="#flightless")
|
||||
radio_manager.note_channel_slot_loaded(chan_key, 0)
|
||||
|
||||
mismatch_result = MagicMock()
|
||||
mismatch_result.type = EventType.CHANNEL_INFO
|
||||
mismatch_result.payload = {
|
||||
"channel_name": "#elsewhere",
|
||||
"channel_secret": bytes.fromhex("ef" * 16),
|
||||
}
|
||||
|
||||
mock_mc = MagicMock()
|
||||
mock_mc.commands.get_channel = AsyncMock(return_value=mismatch_result)
|
||||
|
||||
with (
|
||||
patch("app.radio_sync.logger") as mock_logger,
|
||||
patch("app.radio_sync.broadcast_error") as mock_broadcast_error,
|
||||
):
|
||||
assert await audit_channel_send_cache(mock_mc) is False
|
||||
|
||||
mock_logger.error.assert_called_once()
|
||||
mock_broadcast_error.assert_called_once()
|
||||
assert radio_manager.get_cached_channel_slot(chan_key) is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_audit_channel_send_cache_skips_when_reuse_forced_off(self, test_db):
|
||||
chan_key = "ef" * 16
|
||||
radio_manager.note_channel_slot_loaded(chan_key, 0)
|
||||
mock_mc = MagicMock()
|
||||
|
||||
with patch("app.radio.settings.force_channel_slot_reconfigure", True):
|
||||
assert await audit_channel_send_cache(mock_mc) is True
|
||||
|
||||
mock_mc.commands.get_channel.assert_not_called()
|
||||
|
||||
|
||||
class TestPeriodicAdvertLoopRaces:
|
||||
"""Regression tests for disconnect/reconnect race paths in _periodic_advert_loop."""
|
||||
|
||||
|
||||
@@ -450,6 +450,31 @@ class TestOutgoingChannelBroadcast:
|
||||
assert mc.commands.send_chan_msg.await_count == 2
|
||||
assert radio_manager.get_cached_channel_slot(chan_key) is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_channel_msg_force_reconfigure_env_disables_reuse(self, test_db):
|
||||
mc = _make_mc(name="MyNode")
|
||||
chan_key = "e1" * 16
|
||||
await ChannelRepository.upsert(key=chan_key, name="#forced")
|
||||
radio_manager.max_channels = 4
|
||||
radio_manager._connection_info = "Serial: /dev/ttyUSB0"
|
||||
|
||||
with (
|
||||
patch("app.routers.messages.require_connected", return_value=mc),
|
||||
patch.object(radio_manager, "_meshcore", mc),
|
||||
patch("app.decoder.calculate_channel_hash", return_value="abcd"),
|
||||
patch("app.routers.messages.broadcast_event"),
|
||||
patch("app.radio.settings.force_channel_slot_reconfigure", True),
|
||||
):
|
||||
await send_channel_message(
|
||||
SendChannelMessageRequest(channel_key=chan_key, text="first send")
|
||||
)
|
||||
await send_channel_message(
|
||||
SendChannelMessageRequest(channel_key=chan_key, text="second send")
|
||||
)
|
||||
|
||||
assert mc.commands.set_channel.await_count == 2
|
||||
assert radio_manager.get_cached_channel_slot(chan_key) is None
|
||||
|
||||
|
||||
class TestResendChannelMessage:
|
||||
"""Test the user-triggered resend endpoint."""
|
||||
|
||||
Reference in New Issue
Block a user