Advert-path uses correct identity for dedupe

This commit is contained in:
Jack Kingsman
2026-03-07 22:24:46 -08:00
parent 76d11b01a7
commit 48dab293ae
7 changed files with 218 additions and 12 deletions

View File

@@ -112,7 +112,7 @@ Frontend packet-feed consumers should treat `observation_id` as the dedup/render
To improve repeater disambiguation in the network visualizer, the backend stores recent unique advertisement paths per contact in a dedicated table (`contact_advert_paths`).
- This is independent of raw-packet payload deduplication.
- Paths are keyed per contact + path, with `heard_count`, `first_seen`, and `last_seen`.
- Paths are keyed per contact + path + hop count, with `heard_count`, `first_seen`, and `last_seen`.
- Only the N most recent unique paths are retained per contact (currently 10).
- See `frontend/src/components/AGENTS_packet_visualizer.md` § "Advert-Path Identity Hints" for how the visualizer consumes this data.

View File

@@ -71,7 +71,7 @@ CREATE TABLE IF NOT EXISTS contact_advert_paths (
first_seen INTEGER NOT NULL,
last_seen INTEGER NOT NULL,
heard_count INTEGER NOT NULL DEFAULT 1,
UNIQUE(public_key, path_hex),
UNIQUE(public_key, path_hex, path_len),
FOREIGN KEY (public_key) REFERENCES contacts(public_key)
);

View File

@@ -310,6 +310,13 @@ async def run_migrations(conn: aiosqlite.Connection) -> int:
await set_version(conn, 39)
applied += 1
# Migration 40: Distinguish advert paths by hop count as well as bytes
if version < 40:
logger.info("Applying migration 40: rebuild contact_advert_paths uniqueness with path_len")
await _migrate_040_rebuild_contact_advert_paths_identity(conn)
await set_version(conn, 40)
applied += 1
if applied > 0:
logger.info(
"Applied %d migration(s), schema now at version %d", applied, await get_version(conn)
@@ -1693,7 +1700,7 @@ async def _migrate_026_rename_advert_paths_table(conn: aiosqlite.Connection) ->
first_seen INTEGER NOT NULL,
last_seen INTEGER NOT NULL,
heard_count INTEGER NOT NULL DEFAULT 1,
UNIQUE(public_key, path_hex),
UNIQUE(public_key, path_hex, path_len),
FOREIGN KEY (public_key) REFERENCES contacts(public_key)
)
"""
@@ -1717,7 +1724,7 @@ async def _migrate_026_rename_advert_paths_table(conn: aiosqlite.Connection) ->
first_seen INTEGER NOT NULL,
last_seen INTEGER NOT NULL,
heard_count INTEGER NOT NULL DEFAULT 1,
UNIQUE(public_key, path_hex),
UNIQUE(public_key, path_hex, path_len),
FOREIGN KEY (public_key) REFERENCES contacts(public_key)
)
"""
@@ -2355,3 +2362,80 @@ async def _migrate_039_add_contact_out_path_hash_mode(conn: aiosqlite.Connection
"""
)
await conn.commit()
async def _migrate_040_rebuild_contact_advert_paths_identity(
conn: aiosqlite.Connection,
) -> None:
"""Rebuild contact_advert_paths so uniqueness includes path_len.
Multi-byte routing can produce the same path_hex bytes with a different hop count,
which changes the hop boundaries and therefore the semantic next-hop identity.
"""
cursor = await conn.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='contact_advert_paths'"
)
if await cursor.fetchone() is None:
await conn.execute(
"""
CREATE TABLE IF NOT EXISTS contact_advert_paths (
id INTEGER PRIMARY KEY AUTOINCREMENT,
public_key TEXT NOT NULL,
path_hex TEXT NOT NULL,
path_len INTEGER NOT NULL,
first_seen INTEGER NOT NULL,
last_seen INTEGER NOT NULL,
heard_count INTEGER NOT NULL DEFAULT 1,
UNIQUE(public_key, path_hex, path_len),
FOREIGN KEY (public_key) REFERENCES contacts(public_key)
)
"""
)
await conn.execute("DROP INDEX IF EXISTS idx_contact_advert_paths_recent")
await conn.execute(
"CREATE INDEX IF NOT EXISTS idx_contact_advert_paths_recent "
"ON contact_advert_paths(public_key, last_seen DESC)"
)
await conn.commit()
return
await conn.execute(
"""
CREATE TABLE contact_advert_paths_new (
id INTEGER PRIMARY KEY AUTOINCREMENT,
public_key TEXT NOT NULL,
path_hex TEXT NOT NULL,
path_len INTEGER NOT NULL,
first_seen INTEGER NOT NULL,
last_seen INTEGER NOT NULL,
heard_count INTEGER NOT NULL DEFAULT 1,
UNIQUE(public_key, path_hex, path_len),
FOREIGN KEY (public_key) REFERENCES contacts(public_key)
)
"""
)
await conn.execute(
"""
INSERT INTO contact_advert_paths_new
(public_key, path_hex, path_len, first_seen, last_seen, heard_count)
SELECT
public_key,
path_hex,
path_len,
MIN(first_seen),
MAX(last_seen),
SUM(heard_count)
FROM contact_advert_paths
GROUP BY public_key, path_hex, path_len
"""
)
await conn.execute("DROP TABLE contact_advert_paths")
await conn.execute("ALTER TABLE contact_advert_paths_new RENAME TO contact_advert_paths")
await conn.execute("DROP INDEX IF EXISTS idx_contact_advert_paths_recent")
await conn.execute(
"CREATE INDEX IF NOT EXISTS idx_contact_advert_paths_recent "
"ON contact_advert_paths(public_key, last_seen DESC)"
)
await conn.commit()

View File

@@ -337,9 +337,8 @@ class ContactAdvertPathRepository:
INSERT INTO contact_advert_paths
(public_key, path_hex, path_len, first_seen, last_seen, heard_count)
VALUES (?, ?, ?, ?, ?, 1)
ON CONFLICT(public_key, path_hex) DO UPDATE SET
ON CONFLICT(public_key, path_hex, path_len) DO UPDATE SET
last_seen = MAX(contact_advert_paths.last_seen, excluded.last_seen),
path_len = excluded.path_len,
heard_count = contact_advert_paths.heard_count + 1
""",
(normalized_key, normalized_path, path_len, timestamp, timestamp),
@@ -350,8 +349,8 @@ class ContactAdvertPathRepository:
"""
DELETE FROM contact_advert_paths
WHERE public_key = ?
AND path_hex NOT IN (
SELECT path_hex
AND id NOT IN (
SELECT id
FROM contact_advert_paths
WHERE public_key = ?
ORDER BY last_seen DESC, heard_count DESC, path_len ASC, path_hex ASC

View File

@@ -214,6 +214,28 @@ class TestAdvertPaths:
assert data[0]["path"] == ""
assert data[0]["next_hop"] is None
@pytest.mark.asyncio
async def test_get_contact_advert_paths_distinguishes_same_bytes_by_hop_count(
self, test_db, client
):
repeater_key = KEY_A
await _insert_contact(repeater_key, "R1", type=2)
await ContactAdvertPathRepository.record_observation(
repeater_key, "aa00", 1000, hop_count=1
)
await ContactAdvertPathRepository.record_observation(
repeater_key, "aa00", 1010, hop_count=2
)
response = await client.get(f"/api/contacts/{repeater_key}/advert-paths")
assert response.status_code == 200
data = response.json()
assert [(item["path"], item["path_len"], item["next_hop"]) for item in data] == [
("aa00", 2, "aa"),
("aa00", 1, "aa00"),
]
@pytest.mark.asyncio
async def test_get_contact_advert_paths_works_for_non_repeater(self, test_db, client):
await _insert_contact(KEY_A, "Alice", type=1)

View File

@@ -1116,8 +1116,8 @@ class TestMigration039:
applied = await run_migrations(conn)
assert applied == 1
assert await get_version(conn) == 39
assert applied == 2
assert await get_version(conn) == 40
cursor = await conn.execute(
"""
@@ -1186,8 +1186,8 @@ class TestMigration039:
applied = await run_migrations(conn)
assert applied == 1
assert await get_version(conn) == 39
assert applied == 2
assert await get_version(conn) == 40
cursor = await conn.execute(
"""
@@ -1205,3 +1205,67 @@ class TestMigration039:
assert rows[1]["out_path_hash_mode"] == -1
finally:
await conn.close()
class TestMigration040:
"""Test migration 040: include path_len in advert-path identity."""
@pytest.mark.asyncio
async def test_rebuilds_contact_advert_paths_to_distinguish_same_bytes_by_hop_count(self):
conn = await aiosqlite.connect(":memory:")
conn.row_factory = aiosqlite.Row
try:
await set_version(conn, 39)
await conn.execute("""
CREATE TABLE contact_advert_paths (
id INTEGER PRIMARY KEY AUTOINCREMENT,
public_key TEXT NOT NULL,
path_hex TEXT NOT NULL,
path_len INTEGER NOT NULL,
first_seen INTEGER NOT NULL,
last_seen INTEGER NOT NULL,
heard_count INTEGER NOT NULL DEFAULT 1,
UNIQUE(public_key, path_hex)
)
""")
await conn.execute(
"""
INSERT INTO contact_advert_paths
(public_key, path_hex, path_len, first_seen, last_seen, heard_count)
VALUES (?, ?, ?, ?, ?, ?)
""",
("aa" * 32, "aa00", 1, 1000, 1001, 2),
)
await conn.commit()
applied = await run_migrations(conn)
assert applied == 1
assert await get_version(conn) == 40
await conn.execute(
"""
INSERT INTO contact_advert_paths
(public_key, path_hex, path_len, first_seen, last_seen, heard_count)
VALUES (?, ?, ?, ?, ?, ?)
""",
("aa" * 32, "aa00", 2, 1002, 1002, 1),
)
await conn.commit()
cursor = await conn.execute(
"""
SELECT path_hex, path_len, heard_count
FROM contact_advert_paths
WHERE public_key = ?
ORDER BY path_len ASC
""",
("aa" * 32,),
)
rows = await cursor.fetchall()
assert [(row["path_hex"], row["path_len"], row["heard_count"]) for row in rows] == [
("aa00", 1, 2),
("aa00", 2, 1),
]
finally:
await conn.close()

View File

@@ -286,6 +286,43 @@ class TestContactAdvertPathRepository:
assert len(paths) == 1
assert paths[0].next_hop == "aa11"
@pytest.mark.asyncio
async def test_same_path_hex_with_different_path_len_is_stored_separately(self, test_db):
repeater_key = "ac" * 32
await ContactRepository.upsert({"public_key": repeater_key, "name": "Rsplit", "type": 2})
await ContactAdvertPathRepository.record_observation(
repeater_key, "aa00", 1000, hop_count=1
)
await ContactAdvertPathRepository.record_observation(
repeater_key, "aa00", 1010, hop_count=2
)
paths = await ContactAdvertPathRepository.get_recent_for_contact(repeater_key, limit=10)
assert len(paths) == 2
assert [(p.path, p.path_len, p.next_hop) for p in paths] == [
("aa00", 2, "aa"),
("aa00", 1, "aa00"),
]
@pytest.mark.asyncio
async def test_prune_distinguishes_same_path_hex_with_different_path_len(self, test_db):
repeater_key = "ad" * 32
await ContactRepository.upsert({"public_key": repeater_key, "name": "Rprune", "type": 2})
await ContactAdvertPathRepository.record_observation(
repeater_key, "aa00", 1000, max_paths=2, hop_count=1
)
await ContactAdvertPathRepository.record_observation(
repeater_key, "aa00", 1001, max_paths=2, hop_count=2
)
await ContactAdvertPathRepository.record_observation(
repeater_key, "bb00", 1002, max_paths=2, hop_count=1
)
paths = await ContactAdvertPathRepository.get_recent_for_contact(repeater_key, limit=10)
assert [(p.path, p.path_len) for p in paths] == [("bb00", 1), ("aa00", 2)]
@pytest.mark.asyncio
async def test_prunes_to_most_recent_n_unique_paths(self, test_db):
repeater_key = "bb" * 32