From 48dab293ae3860848f021631c1c5edfecd343422 Mon Sep 17 00:00:00 2001 From: Jack Kingsman Date: Sat, 7 Mar 2026 22:24:46 -0800 Subject: [PATCH] Advert-path uses correct identity for dedupe --- AGENTS.md | 2 +- app/database.py | 2 +- app/migrations.py | 88 ++++++++++++++++++++++++++++++++++- app/repository/contacts.py | 7 ++- tests/test_contacts_router.py | 22 +++++++++ tests/test_migrations.py | 72 ++++++++++++++++++++++++++-- tests/test_repository.py | 37 +++++++++++++++ 7 files changed, 218 insertions(+), 12 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index d9c0fd3..a8cc29d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -112,7 +112,7 @@ Frontend packet-feed consumers should treat `observation_id` as the dedup/render To improve repeater disambiguation in the network visualizer, the backend stores recent unique advertisement paths per contact in a dedicated table (`contact_advert_paths`). - This is independent of raw-packet payload deduplication. -- Paths are keyed per contact + path, with `heard_count`, `first_seen`, and `last_seen`. +- Paths are keyed per contact + path + hop count, with `heard_count`, `first_seen`, and `last_seen`. - Only the N most recent unique paths are retained per contact (currently 10). - See `frontend/src/components/AGENTS_packet_visualizer.md` ยง "Advert-Path Identity Hints" for how the visualizer consumes this data. diff --git a/app/database.py b/app/database.py index 933ffcc..de575a0 100644 --- a/app/database.py +++ b/app/database.py @@ -71,7 +71,7 @@ CREATE TABLE IF NOT EXISTS contact_advert_paths ( first_seen INTEGER NOT NULL, last_seen INTEGER NOT NULL, heard_count INTEGER NOT NULL DEFAULT 1, - UNIQUE(public_key, path_hex), + UNIQUE(public_key, path_hex, path_len), FOREIGN KEY (public_key) REFERENCES contacts(public_key) ); diff --git a/app/migrations.py b/app/migrations.py index 865cc0d..bddd80d 100644 --- a/app/migrations.py +++ b/app/migrations.py @@ -310,6 +310,13 @@ async def run_migrations(conn: aiosqlite.Connection) -> int: await set_version(conn, 39) applied += 1 + # Migration 40: Distinguish advert paths by hop count as well as bytes + if version < 40: + logger.info("Applying migration 40: rebuild contact_advert_paths uniqueness with path_len") + await _migrate_040_rebuild_contact_advert_paths_identity(conn) + await set_version(conn, 40) + applied += 1 + if applied > 0: logger.info( "Applied %d migration(s), schema now at version %d", applied, await get_version(conn) @@ -1693,7 +1700,7 @@ async def _migrate_026_rename_advert_paths_table(conn: aiosqlite.Connection) -> first_seen INTEGER NOT NULL, last_seen INTEGER NOT NULL, heard_count INTEGER NOT NULL DEFAULT 1, - UNIQUE(public_key, path_hex), + UNIQUE(public_key, path_hex, path_len), FOREIGN KEY (public_key) REFERENCES contacts(public_key) ) """ @@ -1717,7 +1724,7 @@ async def _migrate_026_rename_advert_paths_table(conn: aiosqlite.Connection) -> first_seen INTEGER NOT NULL, last_seen INTEGER NOT NULL, heard_count INTEGER NOT NULL DEFAULT 1, - UNIQUE(public_key, path_hex), + UNIQUE(public_key, path_hex, path_len), FOREIGN KEY (public_key) REFERENCES contacts(public_key) ) """ @@ -2355,3 +2362,80 @@ async def _migrate_039_add_contact_out_path_hash_mode(conn: aiosqlite.Connection """ ) await conn.commit() + + +async def _migrate_040_rebuild_contact_advert_paths_identity( + conn: aiosqlite.Connection, +) -> None: + """Rebuild contact_advert_paths so uniqueness includes path_len. + + Multi-byte routing can produce the same path_hex bytes with a different hop count, + which changes the hop boundaries and therefore the semantic next-hop identity. + """ + cursor = await conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name='contact_advert_paths'" + ) + if await cursor.fetchone() is None: + await conn.execute( + """ + CREATE TABLE IF NOT EXISTS contact_advert_paths ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + public_key TEXT NOT NULL, + path_hex TEXT NOT NULL, + path_len INTEGER NOT NULL, + first_seen INTEGER NOT NULL, + last_seen INTEGER NOT NULL, + heard_count INTEGER NOT NULL DEFAULT 1, + UNIQUE(public_key, path_hex, path_len), + FOREIGN KEY (public_key) REFERENCES contacts(public_key) + ) + """ + ) + await conn.execute("DROP INDEX IF EXISTS idx_contact_advert_paths_recent") + await conn.execute( + "CREATE INDEX IF NOT EXISTS idx_contact_advert_paths_recent " + "ON contact_advert_paths(public_key, last_seen DESC)" + ) + await conn.commit() + return + + await conn.execute( + """ + CREATE TABLE contact_advert_paths_new ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + public_key TEXT NOT NULL, + path_hex TEXT NOT NULL, + path_len INTEGER NOT NULL, + first_seen INTEGER NOT NULL, + last_seen INTEGER NOT NULL, + heard_count INTEGER NOT NULL DEFAULT 1, + UNIQUE(public_key, path_hex, path_len), + FOREIGN KEY (public_key) REFERENCES contacts(public_key) + ) + """ + ) + + await conn.execute( + """ + INSERT INTO contact_advert_paths_new + (public_key, path_hex, path_len, first_seen, last_seen, heard_count) + SELECT + public_key, + path_hex, + path_len, + MIN(first_seen), + MAX(last_seen), + SUM(heard_count) + FROM contact_advert_paths + GROUP BY public_key, path_hex, path_len + """ + ) + + await conn.execute("DROP TABLE contact_advert_paths") + await conn.execute("ALTER TABLE contact_advert_paths_new RENAME TO contact_advert_paths") + await conn.execute("DROP INDEX IF EXISTS idx_contact_advert_paths_recent") + await conn.execute( + "CREATE INDEX IF NOT EXISTS idx_contact_advert_paths_recent " + "ON contact_advert_paths(public_key, last_seen DESC)" + ) + await conn.commit() diff --git a/app/repository/contacts.py b/app/repository/contacts.py index 52e31c9..11e3ffc 100644 --- a/app/repository/contacts.py +++ b/app/repository/contacts.py @@ -337,9 +337,8 @@ class ContactAdvertPathRepository: INSERT INTO contact_advert_paths (public_key, path_hex, path_len, first_seen, last_seen, heard_count) VALUES (?, ?, ?, ?, ?, 1) - ON CONFLICT(public_key, path_hex) DO UPDATE SET + ON CONFLICT(public_key, path_hex, path_len) DO UPDATE SET last_seen = MAX(contact_advert_paths.last_seen, excluded.last_seen), - path_len = excluded.path_len, heard_count = contact_advert_paths.heard_count + 1 """, (normalized_key, normalized_path, path_len, timestamp, timestamp), @@ -350,8 +349,8 @@ class ContactAdvertPathRepository: """ DELETE FROM contact_advert_paths WHERE public_key = ? - AND path_hex NOT IN ( - SELECT path_hex + AND id NOT IN ( + SELECT id FROM contact_advert_paths WHERE public_key = ? ORDER BY last_seen DESC, heard_count DESC, path_len ASC, path_hex ASC diff --git a/tests/test_contacts_router.py b/tests/test_contacts_router.py index a6593ce..33a56f0 100644 --- a/tests/test_contacts_router.py +++ b/tests/test_contacts_router.py @@ -214,6 +214,28 @@ class TestAdvertPaths: assert data[0]["path"] == "" assert data[0]["next_hop"] is None + @pytest.mark.asyncio + async def test_get_contact_advert_paths_distinguishes_same_bytes_by_hop_count( + self, test_db, client + ): + repeater_key = KEY_A + await _insert_contact(repeater_key, "R1", type=2) + await ContactAdvertPathRepository.record_observation( + repeater_key, "aa00", 1000, hop_count=1 + ) + await ContactAdvertPathRepository.record_observation( + repeater_key, "aa00", 1010, hop_count=2 + ) + + response = await client.get(f"/api/contacts/{repeater_key}/advert-paths") + + assert response.status_code == 200 + data = response.json() + assert [(item["path"], item["path_len"], item["next_hop"]) for item in data] == [ + ("aa00", 2, "aa"), + ("aa00", 1, "aa00"), + ] + @pytest.mark.asyncio async def test_get_contact_advert_paths_works_for_non_repeater(self, test_db, client): await _insert_contact(KEY_A, "Alice", type=1) diff --git a/tests/test_migrations.py b/tests/test_migrations.py index 9e7ad89..ac032f7 100644 --- a/tests/test_migrations.py +++ b/tests/test_migrations.py @@ -1116,8 +1116,8 @@ class TestMigration039: applied = await run_migrations(conn) - assert applied == 1 - assert await get_version(conn) == 39 + assert applied == 2 + assert await get_version(conn) == 40 cursor = await conn.execute( """ @@ -1186,8 +1186,8 @@ class TestMigration039: applied = await run_migrations(conn) - assert applied == 1 - assert await get_version(conn) == 39 + assert applied == 2 + assert await get_version(conn) == 40 cursor = await conn.execute( """ @@ -1205,3 +1205,67 @@ class TestMigration039: assert rows[1]["out_path_hash_mode"] == -1 finally: await conn.close() + + +class TestMigration040: + """Test migration 040: include path_len in advert-path identity.""" + + @pytest.mark.asyncio + async def test_rebuilds_contact_advert_paths_to_distinguish_same_bytes_by_hop_count(self): + conn = await aiosqlite.connect(":memory:") + conn.row_factory = aiosqlite.Row + try: + await set_version(conn, 39) + await conn.execute(""" + CREATE TABLE contact_advert_paths ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + public_key TEXT NOT NULL, + path_hex TEXT NOT NULL, + path_len INTEGER NOT NULL, + first_seen INTEGER NOT NULL, + last_seen INTEGER NOT NULL, + heard_count INTEGER NOT NULL DEFAULT 1, + UNIQUE(public_key, path_hex) + ) + """) + await conn.execute( + """ + INSERT INTO contact_advert_paths + (public_key, path_hex, path_len, first_seen, last_seen, heard_count) + VALUES (?, ?, ?, ?, ?, ?) + """, + ("aa" * 32, "aa00", 1, 1000, 1001, 2), + ) + await conn.commit() + + applied = await run_migrations(conn) + + assert applied == 1 + assert await get_version(conn) == 40 + + await conn.execute( + """ + INSERT INTO contact_advert_paths + (public_key, path_hex, path_len, first_seen, last_seen, heard_count) + VALUES (?, ?, ?, ?, ?, ?) + """, + ("aa" * 32, "aa00", 2, 1002, 1002, 1), + ) + await conn.commit() + + cursor = await conn.execute( + """ + SELECT path_hex, path_len, heard_count + FROM contact_advert_paths + WHERE public_key = ? + ORDER BY path_len ASC + """, + ("aa" * 32,), + ) + rows = await cursor.fetchall() + assert [(row["path_hex"], row["path_len"], row["heard_count"]) for row in rows] == [ + ("aa00", 1, 2), + ("aa00", 2, 1), + ] + finally: + await conn.close() diff --git a/tests/test_repository.py b/tests/test_repository.py index 9563388..7b137c9 100644 --- a/tests/test_repository.py +++ b/tests/test_repository.py @@ -286,6 +286,43 @@ class TestContactAdvertPathRepository: assert len(paths) == 1 assert paths[0].next_hop == "aa11" + @pytest.mark.asyncio + async def test_same_path_hex_with_different_path_len_is_stored_separately(self, test_db): + repeater_key = "ac" * 32 + await ContactRepository.upsert({"public_key": repeater_key, "name": "Rsplit", "type": 2}) + + await ContactAdvertPathRepository.record_observation( + repeater_key, "aa00", 1000, hop_count=1 + ) + await ContactAdvertPathRepository.record_observation( + repeater_key, "aa00", 1010, hop_count=2 + ) + + paths = await ContactAdvertPathRepository.get_recent_for_contact(repeater_key, limit=10) + assert len(paths) == 2 + assert [(p.path, p.path_len, p.next_hop) for p in paths] == [ + ("aa00", 2, "aa"), + ("aa00", 1, "aa00"), + ] + + @pytest.mark.asyncio + async def test_prune_distinguishes_same_path_hex_with_different_path_len(self, test_db): + repeater_key = "ad" * 32 + await ContactRepository.upsert({"public_key": repeater_key, "name": "Rprune", "type": 2}) + + await ContactAdvertPathRepository.record_observation( + repeater_key, "aa00", 1000, max_paths=2, hop_count=1 + ) + await ContactAdvertPathRepository.record_observation( + repeater_key, "aa00", 1001, max_paths=2, hop_count=2 + ) + await ContactAdvertPathRepository.record_observation( + repeater_key, "bb00", 1002, max_paths=2, hop_count=1 + ) + + paths = await ContactAdvertPathRepository.get_recent_for_contact(repeater_key, limit=10) + assert [(p.path, p.path_len) for p in paths] == [("bb00", 1), ("aa00", 2)] + @pytest.mark.asyncio async def test_prunes_to_most_recent_n_unique_paths(self, test_db): repeater_key = "bb" * 32