From a0459edf62af345d74249380c550cc51ae7a5d4a Mon Sep 17 00:00:00 2001 From: Jack Kingsman Date: Mon, 16 Mar 2026 22:37:13 -0700 Subject: [PATCH] Add clowntown clock rollover trick --- README_ADVANCED.md | 3 ++ app/config.py | 6 ++- app/radio_sync.py | 102 +++++++++++++++++++++++++++++++++++++-- tests/test_config.py | 9 ++++ tests/test_radio_sync.py | 70 ++++++++++++++++++++++++++- 5 files changed, 183 insertions(+), 7 deletions(-) diff --git a/README_ADVANCED.md b/README_ADVANCED.md index 69b6936..f061324 100644 --- a/README_ADVANCED.md +++ b/README_ADVANCED.md @@ -8,6 +8,7 @@ These are intended for diagnosing or working around radios that behave oddly. |----------|---------|-------------| | `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK` | false | Run aggressive 10-second `get_msg()` fallback polling to check for messages | | `MESHCORE_FORCE_CHANNEL_SLOT_RECONFIGURE` | false | Disable channel-slot reuse and force `set_channel(...)` before every channel send | +| `__CLOWNTOWN_DO_CLOCK_WRAPAROUND` | false | Highly experimental: if the radio clock is ahead of system time, try forcing the clock to `0xFFFFFFFF`, wait for uint32 wraparound, and then retry normal time sync before falling back to reboot | By default the app relies on radio events plus MeshCore auto-fetch for incoming messages, and also runs a low-frequency hourly audit poll. That audit checks both: @@ -16,6 +17,8 @@ By default the app relies on radio events plus MeshCore auto-fetch for incoming If the audit finds a mismatch, you'll see an error in the application UI and your logs. If you see that warning, or if messages on the radio never show up in the app, try `MESHCORE_ENABLE_MESSAGE_POLL_FALLBACK=true` to switch that task into a more aggressive 10-second safety net. If room sends appear to be using the wrong channel slot or another client is changing slots underneath this app, try `MESHCORE_FORCE_CHANNEL_SLOT_RECONFIGURE=true` to force the radio to validate the channel slot is valid before sending (will delay sending by ~500ms). +`__CLOWNTOWN_DO_CLOCK_WRAPAROUND=true` is a last-resort clock remediation for nodes whose RTC is stuck in the future and where rescue-mode time setting or GPS-based time is not available. It intentionally relies on the clock rolling past the 32-bit epoch boundary, which is board-specific behavior and may not be safe or effective on all MeshCore targets. Treat it as highly experimental. + ## HTTPS WebGPU room-finding requires a secure context when you are not on `localhost`. diff --git a/app/config.py b/app/config.py index 8d293c4..6bc5e0d 100644 --- a/app/config.py +++ b/app/config.py @@ -4,7 +4,7 @@ from collections import deque from threading import Lock from typing import Literal -from pydantic import model_validator +from pydantic import Field, model_validator from pydantic_settings import BaseSettings, SettingsConfigDict @@ -22,6 +22,10 @@ class Settings(BaseSettings): disable_bots: bool = False enable_message_poll_fallback: bool = False force_channel_slot_reconfigure: bool = False + clowntown_do_clock_wraparound: bool = Field( + default=False, + validation_alias="__CLOWNTOWN_DO_CLOCK_WRAPAROUND", + ) basic_auth_username: str = "" basic_auth_password: str = "" diff --git a/app/radio_sync.py b/app/radio_sync.py index 0dcce0e..40ff4b1 100644 --- a/app/radio_sync.py +++ b/app/radio_sync.py @@ -799,6 +799,75 @@ async def stop_periodic_advert(): # Prevents reboot-loop: once we've rebooted to fix clock skew this session, # don't do it again (the hardware RTC case can't be fixed by reboot). _clock_reboot_attempted: bool = False +_CLOCK_WRAP_TARGET = 0xFFFFFFFF +_CLOCK_WRAP_POLL_INTERVAL = 0.2 +_CLOCK_WRAP_TIMEOUT = 3.0 + + +async def _query_radio_time(mc: MeshCore) -> int | None: + """Return the radio's current epoch, or None if it can't be read.""" + try: + result = await mc.commands.get_time() + except Exception: + return None + if result.payload is None: + return None + value = result.payload.get("time") + if isinstance(value, int): + return value + return None + + +async def _attempt_clock_wraparound(mc: MeshCore, *, now: int, observed_radio_time: int) -> bool: + """Try the experimental uint32 wraparound trick, then retry normal time sync.""" + logger.warning( + "Experimental __CLOWNTOWN_DO_CLOCK_WRAPAROUND enabled: attempting uint32 " + "clock wraparound before normal time sync (radio=%d, system=%d).", + observed_radio_time, + now, + ) + result = await mc.commands.set_time(_CLOCK_WRAP_TARGET) + if result.type != EventType.OK: + logger.warning( + "Clock wraparound pre-step failed: set_time(%d) returned %s.", + _CLOCK_WRAP_TARGET, + result.type, + ) + return False + + deadline = time.monotonic() + _CLOCK_WRAP_TIMEOUT + wrapped_time: int | None = None + while time.monotonic() < deadline: + await asyncio.sleep(_CLOCK_WRAP_POLL_INTERVAL) + wrapped_time = await _query_radio_time(mc) + if wrapped_time is not None and wrapped_time < 60: + break + else: + wrapped_time = None + + if wrapped_time is None: + logger.warning( + "Clock wraparound experiment did not observe a wrapped epoch within %.1f " + "seconds; falling back to normal recovery.", + _CLOCK_WRAP_TIMEOUT, + ) + return False + + logger.warning( + "Clock wraparound experiment observed wrapped epoch %d; retrying normal time sync.", + wrapped_time, + ) + retry = await mc.commands.set_time(now) + if retry.type == EventType.OK: + logger.warning("Clock sync succeeded after experimental wraparound.") + return True + + logger.warning( + "Clock sync still failed after experimental wraparound: set_time(%d) returned %s.", + now, + retry.type, + ) + return False async def sync_radio_time(mc: MeshCore) -> bool: @@ -821,6 +890,20 @@ async def sync_radio_time(mc: MeshCore) -> bool: global _clock_reboot_attempted # noqa: PLW0603 try: now = int(time.time()) + preflight_radio_time: int | None = None + wraparound_attempted = False + + if settings.clowntown_do_clock_wraparound: + preflight_radio_time = await _query_radio_time(mc) + if preflight_radio_time is not None and preflight_radio_time > now: + wraparound_attempted = True + if await _attempt_clock_wraparound( + mc, + now=now, + observed_radio_time=preflight_radio_time, + ): + return True + result = await mc.commands.set_time(now) if result.type == EventType.OK: @@ -829,11 +912,7 @@ async def sync_radio_time(mc: MeshCore) -> bool: # Firmware rejected the time (most likely radio clock is ahead). # Query actual radio time so we can report the delta. - try: - time_result = await mc.commands.get_time() - radio_time = time_result.payload.get("time") if time_result.payload else None - except Exception: - radio_time = None + radio_time = await _query_radio_time(mc) if radio_time is not None: delta = radio_time - now @@ -853,6 +932,19 @@ async def sync_radio_time(mc: MeshCore) -> bool: result.type, ) + if ( + settings.clowntown_do_clock_wraparound + and not wraparound_attempted + and radio_time is not None + and radio_time > now + and await _attempt_clock_wraparound( + mc, + now=now, + observed_radio_time=radio_time, + ) + ): + return True + # If the clock is significantly ahead and we haven't already tried # a corrective reboot this session, reboot the radio. Boards with # a volatile RTC (most companion radios) will reset their clock on diff --git a/tests/test_config.py b/tests/test_config.py index 1aa6ce4..a917cd4 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -118,3 +118,12 @@ class TestBasicAuthConfiguration: basic_auth_username="", basic_auth_password="secret", ) + + +class TestExperimentalAliases: + """Ensure exact-name experimental env vars still map into settings.""" + + def test_clowntown_wraparound_alias_reads_exact_env_var(self, monkeypatch): + monkeypatch.setenv("__CLOWNTOWN_DO_CLOCK_WRAPAROUND", "true") + s = Settings(serial_port="", tcp_host="", ble_address="") + assert s.clowntown_do_clock_wraparound is True diff --git a/tests/test_radio_sync.py b/tests/test_radio_sync.py index bc2ab60..7031c67 100644 --- a/tests/test_radio_sync.py +++ b/tests/test_radio_sync.py @@ -5,7 +5,7 @@ contact/channel sync operations, and default channel management. """ import asyncio -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import AsyncMock, MagicMock, call, patch import pytest from meshcore import EventType @@ -169,8 +169,11 @@ class TestSyncRadioTime: import app.radio_sync as _mod _mod._clock_reboot_attempted = False + prev_wrap = _mod.settings.clowntown_do_clock_wraparound + _mod.settings.clowntown_do_clock_wraparound = False yield _mod._clock_reboot_attempted = False + _mod.settings.clowntown_do_clock_wraparound = prev_wrap @pytest.mark.asyncio async def test_returns_true_on_success(self): @@ -219,6 +222,71 @@ class TestSyncRadioTime: mock_mc.commands.get_time.assert_called_once() mock_mc.commands.reboot.assert_called_once() + @pytest.mark.asyncio + async def test_wraparound_can_fix_future_skew_before_normal_set(self): + """Experimental wraparound retries time sync before the reboot fallback.""" + import app.radio_sync as _mod + + _mod.settings.clowntown_do_clock_wraparound = True + + mock_mc = MagicMock() + mock_mc.commands.get_time = AsyncMock( + side_effect=[ + Event(EventType.CURRENT_TIME, {"time": 2000}), + Event(EventType.CURRENT_TIME, {"time": 1}), + ] + ) + mock_mc.commands.set_time = AsyncMock( + side_effect=[ + Event(EventType.OK, {}), + Event(EventType.OK, {}), + ] + ) + mock_mc.commands.reboot = AsyncMock() + + with ( + patch("app.radio_sync.asyncio.sleep", new=AsyncMock()), + patch("app.radio_sync.time.time", return_value=1000), + patch("app.radio_sync.time.monotonic", side_effect=[0.0, 0.0]), + ): + result = await sync_radio_time(mock_mc) + + assert result is True + assert mock_mc.commands.set_time.call_args_list == [ + call(0xFFFFFFFF), + call(1000), + ] + assert mock_mc.commands.get_time.call_count == 2 + mock_mc.commands.reboot.assert_not_called() + + @pytest.mark.asyncio + async def test_wraparound_failure_falls_back_to_reboot(self): + """A failed experimental wraparound still uses the existing reboot recovery.""" + import app.radio_sync as _mod + + _mod.settings.clowntown_do_clock_wraparound = True + + mock_mc = MagicMock() + mock_mc.commands.set_time = AsyncMock( + return_value=Event(EventType.ERROR, {"reason": "illegal_arg"}) + ) + mock_mc.commands.get_time = AsyncMock( + side_effect=[ + Event(EventType.CURRENT_TIME, {"time": 2000}), + Event(EventType.CURRENT_TIME, {"time": 2000}), + ] + ) + mock_mc.commands.reboot = AsyncMock() + + with ( + patch("app.radio_sync.time.time", return_value=1000), + patch("app.radio_sync._attempt_clock_wraparound", new=AsyncMock(return_value=False)), + ): + result = await sync_radio_time(mock_mc) + + assert result is False + mock_mc.commands.reboot.assert_called_once() + @pytest.mark.asyncio async def test_does_not_reboot_twice(self): """Second rejection logs hardware RTC warning instead of rebooting."""