mirror of
https://github.com/jkingsman/Remote-Terminal-for-MeshCore.git
synced 2026-03-28 17:43:05 +01:00
Fix stuck post-connect failure state
This commit is contained in:
18
app/radio.py
18
app/radio.py
@@ -133,6 +133,7 @@ class RadioManager:
|
||||
self._operation_lock: asyncio.Lock | None = None
|
||||
self._setup_lock: asyncio.Lock | None = None
|
||||
self._setup_in_progress: bool = False
|
||||
self._setup_complete: bool = False
|
||||
|
||||
async def _acquire_operation_lock(
|
||||
self,
|
||||
@@ -247,6 +248,7 @@ class RadioManager:
|
||||
if not self._meshcore:
|
||||
return
|
||||
self._setup_in_progress = True
|
||||
self._setup_complete = False
|
||||
mc = self._meshcore
|
||||
try:
|
||||
register_event_handlers(mc)
|
||||
@@ -284,6 +286,8 @@ class RadioManager:
|
||||
|
||||
# Start periodic message polling as fallback (idempotent)
|
||||
start_message_polling()
|
||||
|
||||
self._setup_complete = True
|
||||
finally:
|
||||
self._setup_in_progress = False
|
||||
|
||||
@@ -309,6 +313,10 @@ class RadioManager:
|
||||
def is_setup_in_progress(self) -> bool:
|
||||
return self._setup_in_progress
|
||||
|
||||
@property
|
||||
def is_setup_complete(self) -> bool:
|
||||
return self._setup_complete
|
||||
|
||||
async def connect(self) -> None:
|
||||
"""Connect to the radio using the configured transport."""
|
||||
if self._meshcore is not None:
|
||||
@@ -346,6 +354,7 @@ class RadioManager:
|
||||
)
|
||||
self._connection_info = f"Serial: {port}"
|
||||
self._last_connected = True
|
||||
self._setup_complete = False
|
||||
logger.debug("Serial connection established")
|
||||
|
||||
async def _connect_tcp(self) -> None:
|
||||
@@ -362,6 +371,7 @@ class RadioManager:
|
||||
)
|
||||
self._connection_info = f"TCP: {host}:{port}"
|
||||
self._last_connected = True
|
||||
self._setup_complete = False
|
||||
logger.debug("TCP connection established")
|
||||
|
||||
async def _connect_ble(self) -> None:
|
||||
@@ -378,6 +388,7 @@ class RadioManager:
|
||||
)
|
||||
self._connection_info = f"BLE: {address}"
|
||||
self._last_connected = True
|
||||
self._setup_complete = False
|
||||
logger.debug("BLE connection established")
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
@@ -386,6 +397,7 @@ class RadioManager:
|
||||
logger.debug("Disconnecting from radio")
|
||||
await self._meshcore.disconnect()
|
||||
self._meshcore = None
|
||||
self._setup_complete = False
|
||||
logger.debug("Radio disconnected")
|
||||
|
||||
async def reconnect(self, *, broadcast_on_success: bool = True) -> bool:
|
||||
@@ -475,6 +487,12 @@ class RadioManager:
|
||||
broadcast_health(True, self._connection_info)
|
||||
self._last_connected = True
|
||||
|
||||
elif current_connected and not self._setup_complete:
|
||||
# Transport connected but setup incomplete — retry
|
||||
logger.info("Retrying post-connect setup...")
|
||||
await self.post_connect_setup()
|
||||
broadcast_health(True, self._connection_info)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
# Task is being cancelled, exit cleanly
|
||||
break
|
||||
|
||||
@@ -206,7 +206,14 @@ async def reboot_radio() -> dict:
|
||||
success = await radio_manager.reconnect()
|
||||
|
||||
if success:
|
||||
await radio_manager.post_connect_setup()
|
||||
try:
|
||||
await radio_manager.post_connect_setup()
|
||||
except Exception as e:
|
||||
logger.exception("Post-connect setup failed after reconnect")
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail=f"Radio connected but setup failed: {e}",
|
||||
) from e
|
||||
|
||||
return {"status": "ok", "message": "Reconnected successfully", "connected": True}
|
||||
else:
|
||||
@@ -224,7 +231,20 @@ async def reconnect_radio() -> dict:
|
||||
or power-cycled.
|
||||
"""
|
||||
if radio_manager.is_connected:
|
||||
return {"status": "ok", "message": "Already connected", "connected": True}
|
||||
if radio_manager.is_setup_complete:
|
||||
return {"status": "ok", "message": "Already connected", "connected": True}
|
||||
|
||||
# Connected but setup incomplete — retry setup
|
||||
logger.info("Radio connected but setup incomplete, retrying setup")
|
||||
try:
|
||||
await radio_manager.post_connect_setup()
|
||||
return {"status": "ok", "message": "Setup completed", "connected": True}
|
||||
except Exception as e:
|
||||
logger.exception("Post-connect setup failed")
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail=f"Radio connected but setup failed: {e}",
|
||||
) from e
|
||||
|
||||
if radio_manager.is_reconnecting:
|
||||
return {
|
||||
@@ -237,7 +257,14 @@ async def reconnect_radio() -> dict:
|
||||
success = await radio_manager.reconnect()
|
||||
|
||||
if success:
|
||||
await radio_manager.post_connect_setup()
|
||||
try:
|
||||
await radio_manager.post_connect_setup()
|
||||
except Exception as e:
|
||||
logger.exception("Post-connect setup failed after reconnect")
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail=f"Radio connected but setup failed: {e}",
|
||||
) from e
|
||||
|
||||
return {"status": "ok", "message": "Reconnected successfully", "connected": True}
|
||||
else:
|
||||
|
||||
@@ -221,6 +221,58 @@ class TestConnectionMonitor:
|
||||
assert healthy_calls == []
|
||||
assert rm._last_connected is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_monitor_retries_setup_when_connected_but_incomplete(self):
|
||||
"""Monitor retries setup when transport is connected but setup previously failed."""
|
||||
from app.radio import RadioManager
|
||||
|
||||
rm = RadioManager()
|
||||
rm._connection_info = "TCP: test:4000"
|
||||
|
||||
# Simulate: transport connected, _last_connected=True (set by _connect_*),
|
||||
# but setup failed so _setup_complete=False.
|
||||
mock_mc = MagicMock()
|
||||
mock_mc.is_connected = True
|
||||
rm._meshcore = mock_mc
|
||||
rm._last_connected = True
|
||||
rm._setup_complete = False
|
||||
|
||||
setup_calls = 0
|
||||
|
||||
async def _mock_setup():
|
||||
nonlocal setup_calls
|
||||
setup_calls += 1
|
||||
if setup_calls == 1:
|
||||
raise RuntimeError("setup failed")
|
||||
# Second call succeeds
|
||||
rm._setup_complete = True
|
||||
|
||||
rm.post_connect_setup = AsyncMock(side_effect=_mock_setup)
|
||||
|
||||
sleep_count = 0
|
||||
|
||||
async def _sleep(_seconds: float):
|
||||
nonlocal sleep_count
|
||||
sleep_count += 1
|
||||
if sleep_count >= 4:
|
||||
raise asyncio.CancelledError()
|
||||
|
||||
with (
|
||||
patch("app.radio.asyncio.sleep", side_effect=_sleep),
|
||||
patch("app.websocket.broadcast_health") as mock_broadcast,
|
||||
):
|
||||
await rm.start_connection_monitor()
|
||||
try:
|
||||
await rm._reconnect_task
|
||||
finally:
|
||||
await rm.stop_connection_monitor()
|
||||
|
||||
# Setup should have been retried and eventually succeeded
|
||||
assert setup_calls >= 2
|
||||
# Should broadcast healthy after setup succeeds
|
||||
mock_broadcast.assert_any_call(True, "TCP: test:4000")
|
||||
assert rm._setup_complete is True
|
||||
|
||||
|
||||
class TestReconnectLock:
|
||||
"""Tests for reconnect() lock serialization — no duplicate reconnections."""
|
||||
|
||||
Reference in New Issue
Block a user