Add clowntown clock rollover trick

This commit is contained in:
Jack Kingsman
2026-03-16 22:37:13 -07:00
parent 86170766eb
commit a0459edf62
5 changed files with 183 additions and 7 deletions

View File

@@ -8,6 +8,7 @@ These are intended for diagnosing or working around radios that behave oddly.
|----------|---------|-------------|
| `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK` | false | Run aggressive 10-second `get_msg()` fallback polling to check for messages |
| `MESHCORE_FORCE_CHANNEL_SLOT_RECONFIGURE` | false | Disable channel-slot reuse and force `set_channel(...)` before every channel send |
| `__CLOWNTOWN_DO_CLOCK_WRAPAROUND` | false | Highly experimental: if the radio clock is ahead of system time, try forcing the clock to `0xFFFFFFFF`, wait for uint32 wraparound, and then retry normal time sync before falling back to reboot |
By default the app relies on radio events plus MeshCore auto-fetch for incoming messages, and also runs a low-frequency hourly audit poll. That audit checks both:
@@ -16,6 +17,8 @@ By default the app relies on radio events plus MeshCore auto-fetch for incoming
If the audit finds a mismatch, you'll see an error in the application UI and your logs. If you see that warning, or if messages on the radio never show up in the app, try `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK=true` to switch that task into a more aggressive 10-second safety net. If room sends appear to be using the wrong channel slot or another client is changing slots underneath this app, try `MESHCORE_FORCE_CHANNEL_SLOT_RECONFIGURE=true` to force the radio to validate the channel slot is valid before sending (will delay sending by ~500ms).
`__CLOWNTOWN_DO_CLOCK_WRAPAROUND=true` is a last-resort clock remediation for nodes whose RTC is stuck in the future and where rescue-mode time setting or GPS-based time is not available. It intentionally relies on the clock rolling past the 32-bit epoch boundary, which is board-specific behavior and may not be safe or effective on all MeshCore targets. Treat it as highly experimental.
## HTTPS
WebGPU room-finding requires a secure context when you are not on `localhost`.

View File

@@ -4,7 +4,7 @@ from collections import deque
from threading import Lock
from typing import Literal
from pydantic import model_validator
from pydantic import Field, model_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
@@ -22,6 +22,10 @@ class Settings(BaseSettings):
disable_bots: bool = False
enable_message_poll_fallback: bool = False
force_channel_slot_reconfigure: bool = False
clowntown_do_clock_wraparound: bool = Field(
default=False,
validation_alias="__CLOWNTOWN_DO_CLOCK_WRAPAROUND",
)
basic_auth_username: str = ""
basic_auth_password: str = ""

View File

@@ -799,6 +799,75 @@ async def stop_periodic_advert():
# Prevents reboot-loop: once we've rebooted to fix clock skew this session,
# don't do it again (the hardware RTC case can't be fixed by reboot).
_clock_reboot_attempted: bool = False
_CLOCK_WRAP_TARGET = 0xFFFFFFFF
_CLOCK_WRAP_POLL_INTERVAL = 0.2
_CLOCK_WRAP_TIMEOUT = 3.0
async def _query_radio_time(mc: MeshCore) -> int | None:
"""Return the radio's current epoch, or None if it can't be read."""
try:
result = await mc.commands.get_time()
except Exception:
return None
if result.payload is None:
return None
value = result.payload.get("time")
if isinstance(value, int):
return value
return None
async def _attempt_clock_wraparound(mc: MeshCore, *, now: int, observed_radio_time: int) -> bool:
"""Try the experimental uint32 wraparound trick, then retry normal time sync."""
logger.warning(
"Experimental __CLOWNTOWN_DO_CLOCK_WRAPAROUND enabled: attempting uint32 "
"clock wraparound before normal time sync (radio=%d, system=%d).",
observed_radio_time,
now,
)
result = await mc.commands.set_time(_CLOCK_WRAP_TARGET)
if result.type != EventType.OK:
logger.warning(
"Clock wraparound pre-step failed: set_time(%d) returned %s.",
_CLOCK_WRAP_TARGET,
result.type,
)
return False
deadline = time.monotonic() + _CLOCK_WRAP_TIMEOUT
wrapped_time: int | None = None
while time.monotonic() < deadline:
await asyncio.sleep(_CLOCK_WRAP_POLL_INTERVAL)
wrapped_time = await _query_radio_time(mc)
if wrapped_time is not None and wrapped_time < 60:
break
else:
wrapped_time = None
if wrapped_time is None:
logger.warning(
"Clock wraparound experiment did not observe a wrapped epoch within %.1f "
"seconds; falling back to normal recovery.",
_CLOCK_WRAP_TIMEOUT,
)
return False
logger.warning(
"Clock wraparound experiment observed wrapped epoch %d; retrying normal time sync.",
wrapped_time,
)
retry = await mc.commands.set_time(now)
if retry.type == EventType.OK:
logger.warning("Clock sync succeeded after experimental wraparound.")
return True
logger.warning(
"Clock sync still failed after experimental wraparound: set_time(%d) returned %s.",
now,
retry.type,
)
return False
async def sync_radio_time(mc: MeshCore) -> bool:
@@ -821,6 +890,20 @@ async def sync_radio_time(mc: MeshCore) -> bool:
global _clock_reboot_attempted # noqa: PLW0603
try:
now = int(time.time())
preflight_radio_time: int | None = None
wraparound_attempted = False
if settings.clowntown_do_clock_wraparound:
preflight_radio_time = await _query_radio_time(mc)
if preflight_radio_time is not None and preflight_radio_time > now:
wraparound_attempted = True
if await _attempt_clock_wraparound(
mc,
now=now,
observed_radio_time=preflight_radio_time,
):
return True
result = await mc.commands.set_time(now)
if result.type == EventType.OK:
@@ -829,11 +912,7 @@ async def sync_radio_time(mc: MeshCore) -> bool:
# Firmware rejected the time (most likely radio clock is ahead).
# Query actual radio time so we can report the delta.
try:
time_result = await mc.commands.get_time()
radio_time = time_result.payload.get("time") if time_result.payload else None
except Exception:
radio_time = None
radio_time = await _query_radio_time(mc)
if radio_time is not None:
delta = radio_time - now
@@ -853,6 +932,19 @@ async def sync_radio_time(mc: MeshCore) -> bool:
result.type,
)
if (
settings.clowntown_do_clock_wraparound
and not wraparound_attempted
and radio_time is not None
and radio_time > now
and await _attempt_clock_wraparound(
mc,
now=now,
observed_radio_time=radio_time,
)
):
return True
# If the clock is significantly ahead and we haven't already tried
# a corrective reboot this session, reboot the radio. Boards with
# a volatile RTC (most companion radios) will reset their clock on

View File

@@ -118,3 +118,12 @@ class TestBasicAuthConfiguration:
basic_auth_username="",
basic_auth_password="secret",
)
class TestExperimentalAliases:
"""Ensure exact-name experimental env vars still map into settings."""
def test_clowntown_wraparound_alias_reads_exact_env_var(self, monkeypatch):
monkeypatch.setenv("__CLOWNTOWN_DO_CLOCK_WRAPAROUND", "true")
s = Settings(serial_port="", tcp_host="", ble_address="")
assert s.clowntown_do_clock_wraparound is True

View File

@@ -5,7 +5,7 @@ contact/channel sync operations, and default channel management.
"""
import asyncio
from unittest.mock import AsyncMock, MagicMock, patch
from unittest.mock import AsyncMock, MagicMock, call, patch
import pytest
from meshcore import EventType
@@ -169,8 +169,11 @@ class TestSyncRadioTime:
import app.radio_sync as _mod
_mod._clock_reboot_attempted = False
prev_wrap = _mod.settings.clowntown_do_clock_wraparound
_mod.settings.clowntown_do_clock_wraparound = False
yield
_mod._clock_reboot_attempted = False
_mod.settings.clowntown_do_clock_wraparound = prev_wrap
@pytest.mark.asyncio
async def test_returns_true_on_success(self):
@@ -219,6 +222,71 @@ class TestSyncRadioTime:
mock_mc.commands.get_time.assert_called_once()
mock_mc.commands.reboot.assert_called_once()
@pytest.mark.asyncio
async def test_wraparound_can_fix_future_skew_before_normal_set(self):
"""Experimental wraparound retries time sync before the reboot fallback."""
import app.radio_sync as _mod
_mod.settings.clowntown_do_clock_wraparound = True
mock_mc = MagicMock()
mock_mc.commands.get_time = AsyncMock(
side_effect=[
Event(EventType.CURRENT_TIME, {"time": 2000}),
Event(EventType.CURRENT_TIME, {"time": 1}),
]
)
mock_mc.commands.set_time = AsyncMock(
side_effect=[
Event(EventType.OK, {}),
Event(EventType.OK, {}),
]
)
mock_mc.commands.reboot = AsyncMock()
with (
patch("app.radio_sync.asyncio.sleep", new=AsyncMock()),
patch("app.radio_sync.time.time", return_value=1000),
patch("app.radio_sync.time.monotonic", side_effect=[0.0, 0.0]),
):
result = await sync_radio_time(mock_mc)
assert result is True
assert mock_mc.commands.set_time.call_args_list == [
call(0xFFFFFFFF),
call(1000),
]
assert mock_mc.commands.get_time.call_count == 2
mock_mc.commands.reboot.assert_not_called()
@pytest.mark.asyncio
async def test_wraparound_failure_falls_back_to_reboot(self):
"""A failed experimental wraparound still uses the existing reboot recovery."""
import app.radio_sync as _mod
_mod.settings.clowntown_do_clock_wraparound = True
mock_mc = MagicMock()
mock_mc.commands.set_time = AsyncMock(
return_value=Event(EventType.ERROR, {"reason": "illegal_arg"})
)
mock_mc.commands.get_time = AsyncMock(
side_effect=[
Event(EventType.CURRENT_TIME, {"time": 2000}),
Event(EventType.CURRENT_TIME, {"time": 2000}),
]
)
mock_mc.commands.reboot = AsyncMock()
with (
patch("app.radio_sync.time.time", return_value=1000),
patch("app.radio_sync._attempt_clock_wraparound", new=AsyncMock(return_value=False)),
):
result = await sync_radio_time(mock_mc)
assert result is False
mock_mc.commands.reboot.assert_called_once()
@pytest.mark.asyncio
async def test_does_not_reboot_twice(self):
"""Second rejection logs hardware RTC warning instead of rebooting."""