diff --git a/app/database.py b/app/database.py index 23a2e3d..7e267be 100644 --- a/app/database.py +++ b/app/database.py @@ -41,12 +41,13 @@ CREATE TABLE IF NOT EXISTS messages ( txt_type INTEGER DEFAULT 0, signature TEXT, outgoing INTEGER DEFAULT 0, - acked INTEGER DEFAULT 0, + acked INTEGER DEFAULT 0 -- Deduplication: identical text + timestamp in the same conversation is treated as a -- mesh echo/repeat. Second-precision timestamps mean two intentional identical messages -- within the same second would collide, but this is not feasible in practice — LoRa -- transmission takes several seconds per message, and the UI clears the input on send. - UNIQUE(type, conversation_key, text, sender_timestamp) + -- Enforced via idx_messages_dedup_null_safe (unique index) rather than a table constraint + -- to avoid the storage overhead of SQLite's autoindex duplicating every message text. ); CREATE TABLE IF NOT EXISTS raw_packets ( @@ -60,6 +61,8 @@ CREATE TABLE IF NOT EXISTS raw_packets ( CREATE INDEX IF NOT EXISTS idx_messages_conversation ON messages(type, conversation_key); CREATE INDEX IF NOT EXISTS idx_messages_received ON messages(received_at); +CREATE UNIQUE INDEX IF NOT EXISTS idx_messages_dedup_null_safe + ON messages(type, conversation_key, text, COALESCE(sender_timestamp, 0)); CREATE INDEX IF NOT EXISTS idx_raw_packets_message_id ON raw_packets(message_id); CREATE UNIQUE INDEX IF NOT EXISTS idx_raw_packets_payload_hash ON raw_packets(payload_hash); CREATE INDEX IF NOT EXISTS idx_contacts_on_radio ON contacts(on_radio); diff --git a/app/migrations.py b/app/migrations.py index 0782fd7..146f567 100644 --- a/app/migrations.py +++ b/app/migrations.py @@ -156,10 +156,30 @@ async def run_migrations(conn: aiosqlite.Connection) -> int: await set_version(conn, 17) applied += 1 + # Migration 18: Drop UNIQUE(data) constraint on raw_packets (redundant with payload_hash) + if version < 18: + logger.info("Applying migration 18: drop raw_packets UNIQUE(data) constraint") + await _migrate_018_drop_raw_packets_data_unique(conn) + await set_version(conn, 18) + applied += 1 + + # Migration 19: Drop UNIQUE constraint on messages (redundant with dedup_null_safe index) + if version < 19: + logger.info("Applying migration 19: drop messages UNIQUE constraint") + await _migrate_019_drop_messages_unique_constraint(conn) + await set_version(conn, 19) + applied += 1 + if applied > 0: logger.info( "Applied %d migration(s), schema now at version %d", applied, await get_version(conn) ) + + # Reclaim disk space after table-rebuild migrations + if version < 19: + logger.info("Running VACUUM to reclaim disk space (this may take a moment)...") + await conn.execute("VACUUM") + logger.info("VACUUM complete") else: logger.debug("Schema up to date at version %d", version) @@ -1054,3 +1074,140 @@ async def _migrate_017_drop_experimental_channel_double_send(conn: aiosqlite.Con raise await conn.commit() + + +async def _migrate_018_drop_raw_packets_data_unique(conn: aiosqlite.Connection) -> None: + """ + Drop the UNIQUE constraint on raw_packets.data via table rebuild. + + This constraint creates a large autoindex (~30 MB on a 340K-row database) that + stores a complete copy of every raw packet BLOB in a B-tree. Deduplication is + already handled by the unique index on payload_hash, making the data UNIQUE + constraint pure storage overhead. + + Requires table recreation since SQLite doesn't support DROP CONSTRAINT. + """ + # Check if the autoindex exists (indicates UNIQUE constraint on data) + cursor = await conn.execute( + "SELECT name FROM sqlite_master WHERE type='index' " + "AND name='sqlite_autoindex_raw_packets_1'" + ) + if not await cursor.fetchone(): + logger.debug("raw_packets.data UNIQUE constraint already absent, skipping rebuild") + await conn.commit() + return + + logger.info("Rebuilding raw_packets table to remove UNIQUE(data) constraint...") + + # Get current columns from the existing table + cursor = await conn.execute("PRAGMA table_info(raw_packets)") + old_cols = {col[1] for col in await cursor.fetchall()} + + # Target schema without UNIQUE on data + await conn.execute(""" + CREATE TABLE raw_packets_new ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + timestamp INTEGER NOT NULL, + data BLOB NOT NULL, + message_id INTEGER, + payload_hash TEXT, + FOREIGN KEY (message_id) REFERENCES messages(id) + ) + """) + + # Copy only columns that exist in both old and new tables + new_cols = {"id", "timestamp", "data", "message_id", "payload_hash"} + copy_cols = ", ".join(sorted(c for c in new_cols if c in old_cols)) + + await conn.execute( + f"INSERT INTO raw_packets_new ({copy_cols}) SELECT {copy_cols} FROM raw_packets" + ) + await conn.execute("DROP TABLE raw_packets") + await conn.execute("ALTER TABLE raw_packets_new RENAME TO raw_packets") + + # Recreate indexes + await conn.execute( + "CREATE UNIQUE INDEX idx_raw_packets_payload_hash ON raw_packets(payload_hash)" + ) + await conn.execute("CREATE INDEX idx_raw_packets_message_id ON raw_packets(message_id)") + + await conn.commit() + logger.info("raw_packets table rebuilt without UNIQUE(data) constraint") + + +async def _migrate_019_drop_messages_unique_constraint(conn: aiosqlite.Connection) -> None: + """ + Drop the UNIQUE(type, conversation_key, text, sender_timestamp) constraint on messages. + + This constraint creates a large autoindex (~13 MB on a 112K-row database) that + stores the full message text in a B-tree. The idx_messages_dedup_null_safe unique + index already provides identical dedup protection — no rows have NULL + sender_timestamp since migration 15 backfilled them all. + + INSERT OR IGNORE still works correctly because it checks all unique constraints, + including unique indexes like idx_messages_dedup_null_safe. + + Requires table recreation since SQLite doesn't support DROP CONSTRAINT. + """ + # Check if the autoindex exists (indicates UNIQUE constraint) + cursor = await conn.execute( + "SELECT name FROM sqlite_master WHERE type='index' AND name='sqlite_autoindex_messages_1'" + ) + if not await cursor.fetchone(): + logger.debug("messages UNIQUE constraint already absent, skipping rebuild") + await conn.commit() + return + + logger.info("Rebuilding messages table to remove UNIQUE constraint...") + + # Get current columns from the existing table + cursor = await conn.execute("PRAGMA table_info(messages)") + old_cols = {col[1] for col in await cursor.fetchall()} + + # Target schema without the UNIQUE table constraint + await conn.execute(""" + CREATE TABLE messages_new ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + type TEXT NOT NULL, + conversation_key TEXT NOT NULL, + text TEXT NOT NULL, + sender_timestamp INTEGER, + received_at INTEGER NOT NULL, + txt_type INTEGER DEFAULT 0, + signature TEXT, + outgoing INTEGER DEFAULT 0, + acked INTEGER DEFAULT 0, + paths TEXT + ) + """) + + # Copy only columns that exist in both old and new tables + new_cols = { + "id", + "type", + "conversation_key", + "text", + "sender_timestamp", + "received_at", + "txt_type", + "signature", + "outgoing", + "acked", + "paths", + } + copy_cols = ", ".join(sorted(c for c in new_cols if c in old_cols)) + + await conn.execute(f"INSERT INTO messages_new ({copy_cols}) SELECT {copy_cols} FROM messages") + await conn.execute("DROP TABLE messages") + await conn.execute("ALTER TABLE messages_new RENAME TO messages") + + # Recreate indexes + await conn.execute("CREATE INDEX idx_messages_conversation ON messages(type, conversation_key)") + await conn.execute("CREATE INDEX idx_messages_received ON messages(received_at)") + await conn.execute( + """CREATE UNIQUE INDEX idx_messages_dedup_null_safe + ON messages(type, conversation_key, text, COALESCE(sender_timestamp, 0))""" + ) + + await conn.commit() + logger.info("messages table rebuilt without UNIQUE constraint") diff --git a/tests/test_migrations.py b/tests/test_migrations.py index 927641c..d4f1fbb 100644 --- a/tests/test_migrations.py +++ b/tests/test_migrations.py @@ -100,8 +100,8 @@ class TestMigration001: # Run migrations applied = await run_migrations(conn) - assert applied == 17 # All 17 migrations run - assert await get_version(conn) == 17 + assert applied == 19 # All 17 migrations run + assert await get_version(conn) == 19 # Verify columns exist by inserting and selecting await conn.execute( @@ -183,9 +183,9 @@ class TestMigration001: applied1 = await run_migrations(conn) applied2 = await run_migrations(conn) - assert applied1 == 17 # All 17 migrations run + assert applied1 == 19 # All 19 migrations run assert applied2 == 0 # No migrations on second run - assert await get_version(conn) == 17 + assert await get_version(conn) == 19 finally: await conn.close() @@ -246,8 +246,8 @@ class TestMigration001: applied = await run_migrations(conn) # All 17 migrations applied (version incremented) but no error - assert applied == 17 - assert await get_version(conn) == 17 + assert applied == 19 + assert await get_version(conn) == 19 finally: await conn.close() @@ -374,10 +374,10 @@ class TestMigration013: ) await conn.commit() - # Run migration 13 (plus 14+15+16+17 which also run) + # Run migration 13 (plus 14-19 which also run) applied = await run_migrations(conn) - assert applied == 5 - assert await get_version(conn) == 17 + assert applied == 7 + assert await get_version(conn) == 19 # Verify bots array was created with migrated data cursor = await conn.execute("SELECT bots FROM app_settings WHERE id = 1") @@ -431,3 +431,253 @@ class TestMigration013: assert bots == [] finally: await conn.close() + + +class TestMigration018: + """Test migration 018: drop UNIQUE(data) from raw_packets.""" + + @pytest.mark.asyncio + async def test_migration_drops_data_unique_constraint(self): + """Migration rebuilds raw_packets without UNIQUE(data), preserving data.""" + conn = await aiosqlite.connect(":memory:") + conn.row_factory = aiosqlite.Row + try: + await set_version(conn, 17) + + # Create raw_packets WITH UNIQUE(data) — simulates production schema + await conn.execute(""" + CREATE TABLE raw_packets ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + timestamp INTEGER NOT NULL, + data BLOB NOT NULL UNIQUE, + message_id INTEGER, + payload_hash TEXT + ) + """) + await conn.execute( + "CREATE UNIQUE INDEX idx_raw_packets_payload_hash ON raw_packets(payload_hash)" + ) + await conn.execute("CREATE INDEX idx_raw_packets_message_id ON raw_packets(message_id)") + + # Insert test data + await conn.execute( + "INSERT INTO raw_packets (timestamp, data, payload_hash) VALUES (?, ?, ?)", + (1000, b"\x01\x02\x03", "hash_a"), + ) + await conn.execute( + "INSERT INTO raw_packets (timestamp, data, message_id, payload_hash) VALUES (?, ?, ?, ?)", + (2000, b"\x04\x05\x06", 42, "hash_b"), + ) + # Create messages table stub (needed for migration 19) + await conn.execute(""" + CREATE TABLE messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + type TEXT NOT NULL, + conversation_key TEXT NOT NULL, + text TEXT NOT NULL, + sender_timestamp INTEGER, + received_at INTEGER NOT NULL, + txt_type INTEGER DEFAULT 0, + signature TEXT, + outgoing INTEGER DEFAULT 0, + acked INTEGER DEFAULT 0, + paths TEXT + ) + """) + await conn.execute( + """CREATE UNIQUE INDEX idx_messages_dedup_null_safe + ON messages(type, conversation_key, text, COALESCE(sender_timestamp, 0))""" + ) + await conn.commit() + + # Verify autoindex exists before migration + cursor = await conn.execute( + "SELECT name FROM sqlite_master WHERE name='sqlite_autoindex_raw_packets_1'" + ) + assert await cursor.fetchone() is not None + + await run_migrations(conn) + assert await get_version(conn) == 19 + + # Verify autoindex is gone + cursor = await conn.execute( + "SELECT name FROM sqlite_master WHERE name='sqlite_autoindex_raw_packets_1'" + ) + assert await cursor.fetchone() is None + + # Verify data is preserved + cursor = await conn.execute("SELECT COUNT(*) FROM raw_packets") + assert (await cursor.fetchone())[0] == 2 + + cursor = await conn.execute( + "SELECT timestamp, data, message_id, payload_hash FROM raw_packets ORDER BY id" + ) + rows = await cursor.fetchall() + assert rows[0]["timestamp"] == 1000 + assert bytes(rows[0]["data"]) == b"\x01\x02\x03" + assert rows[0]["message_id"] is None + assert rows[0]["payload_hash"] == "hash_a" + assert rows[1]["message_id"] == 42 + + # Verify payload_hash unique index still works + cursor = await conn.execute( + "SELECT name FROM sqlite_master WHERE name='idx_raw_packets_payload_hash'" + ) + assert await cursor.fetchone() is not None + finally: + await conn.close() + + @pytest.mark.asyncio + async def test_migration_skips_when_no_unique_constraint(self): + """Migration is a no-op when UNIQUE(data) is already absent.""" + conn = await aiosqlite.connect(":memory:") + conn.row_factory = aiosqlite.Row + try: + await set_version(conn, 17) + + # Create raw_packets WITHOUT UNIQUE(data) — fresh install schema + await conn.execute(""" + CREATE TABLE raw_packets ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + timestamp INTEGER NOT NULL, + data BLOB NOT NULL, + message_id INTEGER, + payload_hash TEXT + ) + """) + await conn.execute( + "CREATE UNIQUE INDEX idx_raw_packets_payload_hash ON raw_packets(payload_hash)" + ) + # Messages stub for migration 19 + await conn.execute(""" + CREATE TABLE messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + type TEXT NOT NULL, + conversation_key TEXT NOT NULL, + text TEXT NOT NULL, + sender_timestamp INTEGER, + received_at INTEGER NOT NULL, + txt_type INTEGER DEFAULT 0, + signature TEXT, + outgoing INTEGER DEFAULT 0, + acked INTEGER DEFAULT 0, + paths TEXT + ) + """) + await conn.execute( + """CREATE UNIQUE INDEX idx_messages_dedup_null_safe + ON messages(type, conversation_key, text, COALESCE(sender_timestamp, 0))""" + ) + await conn.commit() + + applied = await run_migrations(conn) + assert applied == 2 # Migrations 18+19 run (but both skip internally) + assert await get_version(conn) == 19 + finally: + await conn.close() + + +class TestMigration019: + """Test migration 019: drop UNIQUE constraint from messages.""" + + @pytest.mark.asyncio + async def test_migration_drops_messages_unique_constraint(self): + """Migration rebuilds messages without UNIQUE, preserving data and dedup index.""" + conn = await aiosqlite.connect(":memory:") + conn.row_factory = aiosqlite.Row + try: + await set_version(conn, 17) + + # raw_packets stub (no UNIQUE on data, so migration 18 skips) + await conn.execute(""" + CREATE TABLE raw_packets ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + timestamp INTEGER NOT NULL, + data BLOB NOT NULL, + message_id INTEGER, + payload_hash TEXT + ) + """) + # Create messages WITH UNIQUE constraint — simulates production schema + await conn.execute(""" + CREATE TABLE messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + type TEXT NOT NULL, + conversation_key TEXT NOT NULL, + text TEXT NOT NULL, + sender_timestamp INTEGER, + received_at INTEGER NOT NULL, + txt_type INTEGER DEFAULT 0, + signature TEXT, + outgoing INTEGER DEFAULT 0, + acked INTEGER DEFAULT 0, + paths TEXT, + UNIQUE(type, conversation_key, text, sender_timestamp) + ) + """) + await conn.execute( + "CREATE INDEX idx_messages_conversation ON messages(type, conversation_key)" + ) + await conn.execute("CREATE INDEX idx_messages_received ON messages(received_at)") + await conn.execute( + """CREATE UNIQUE INDEX idx_messages_dedup_null_safe + ON messages(type, conversation_key, text, COALESCE(sender_timestamp, 0))""" + ) + + # Insert test data + await conn.execute( + "INSERT INTO messages (type, conversation_key, text, sender_timestamp, received_at, paths) " + "VALUES (?, ?, ?, ?, ?, ?)", + ("CHAN", "KEY1", "hello world", 1000, 1000, '[{"path":"ab","received_at":1000}]'), + ) + await conn.execute( + "INSERT INTO messages (type, conversation_key, text, sender_timestamp, received_at, outgoing) " + "VALUES (?, ?, ?, ?, ?, ?)", + ("PRIV", "abc123", "dm text", 2000, 2000, 1), + ) + await conn.commit() + + # Verify autoindex exists before migration + cursor = await conn.execute( + "SELECT name FROM sqlite_master WHERE name='sqlite_autoindex_messages_1'" + ) + assert await cursor.fetchone() is not None + + await run_migrations(conn) + assert await get_version(conn) == 19 + + # Verify autoindex is gone + cursor = await conn.execute( + "SELECT name FROM sqlite_master WHERE name='sqlite_autoindex_messages_1'" + ) + assert await cursor.fetchone() is None + + # Verify data is preserved + cursor = await conn.execute("SELECT COUNT(*) FROM messages") + assert (await cursor.fetchone())[0] == 2 + + cursor = await conn.execute( + "SELECT type, conversation_key, text, paths, outgoing FROM messages ORDER BY id" + ) + rows = await cursor.fetchall() + assert rows[0]["type"] == "CHAN" + assert rows[0]["text"] == "hello world" + assert rows[0]["paths"] == '[{"path":"ab","received_at":1000}]' + assert rows[1]["type"] == "PRIV" + assert rows[1]["outgoing"] == 1 + + # Verify dedup index still works (INSERT OR IGNORE should ignore duplicates) + cursor = await conn.execute( + "INSERT OR IGNORE INTO messages (type, conversation_key, text, sender_timestamp, received_at) " + "VALUES (?, ?, ?, ?, ?)", + ("CHAN", "KEY1", "hello world", 1000, 9999), + ) + assert cursor.rowcount == 0 # Duplicate ignored + + # Verify dedup index exists + cursor = await conn.execute( + "SELECT name FROM sqlite_master WHERE name='idx_messages_dedup_null_safe'" + ) + assert await cursor.fetchone() is not None + finally: + await conn.close()