mirror of
https://github.com/jkingsman/Remote-Terminal-for-MeshCore.git
synced 2026-03-28 17:43:05 +01:00
351 lines
13 KiB
Python
351 lines
13 KiB
Python
"""
|
|
Database migrations using SQLite's user_version pragma.
|
|
|
|
Migrations run automatically on startup. The user_version pragma tracks
|
|
which migrations have been applied (defaults to 0 for existing databases).
|
|
|
|
This approach is safe for existing users - their databases have user_version=0,
|
|
so all migrations run in order on first startup after upgrade.
|
|
"""
|
|
|
|
import logging
|
|
from hashlib import sha256
|
|
|
|
import aiosqlite
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def get_version(conn: aiosqlite.Connection) -> int:
|
|
"""Get current schema version from SQLite user_version pragma."""
|
|
cursor = await conn.execute("PRAGMA user_version")
|
|
row = await cursor.fetchone()
|
|
return row[0] if row else 0
|
|
|
|
|
|
async def set_version(conn: aiosqlite.Connection, version: int) -> None:
|
|
"""Set schema version using SQLite user_version pragma."""
|
|
await conn.execute(f"PRAGMA user_version = {version}")
|
|
|
|
|
|
async def run_migrations(conn: aiosqlite.Connection) -> int:
|
|
"""
|
|
Run all pending migrations.
|
|
|
|
Returns the number of migrations applied.
|
|
"""
|
|
version = await get_version(conn)
|
|
applied = 0
|
|
|
|
# Migration 1: Add last_read_at columns for server-side read tracking
|
|
if version < 1:
|
|
logger.info("Applying migration 1: add last_read_at columns")
|
|
await _migrate_001_add_last_read_at(conn)
|
|
await set_version(conn, 1)
|
|
applied += 1
|
|
|
|
# Migration 2: Drop unused decrypt_attempts and last_attempt columns
|
|
if version < 2:
|
|
logger.info("Applying migration 2: drop decrypt_attempts and last_attempt columns")
|
|
await _migrate_002_drop_decrypt_attempt_columns(conn)
|
|
await set_version(conn, 2)
|
|
applied += 1
|
|
|
|
# Migration 3: Drop decrypted column (redundant with message_id), update index
|
|
if version < 3:
|
|
logger.info("Applying migration 3: drop decrypted column, add message_id index")
|
|
await _migrate_003_drop_decrypted_column(conn)
|
|
await set_version(conn, 3)
|
|
applied += 1
|
|
|
|
# Migration 4: Add payload_hash column for deduplication
|
|
if version < 4:
|
|
logger.info("Applying migration 4: add payload_hash column")
|
|
await _migrate_004_add_payload_hash_column(conn)
|
|
await set_version(conn, 4)
|
|
applied += 1
|
|
|
|
# Migration 5: Backfill payload hashes and deduplicate existing packets
|
|
if version < 5:
|
|
logger.info("Applying migration 5: backfill payload hashes and dedupe")
|
|
await _migrate_005_backfill_payload_hashes(conn)
|
|
await set_version(conn, 5)
|
|
applied += 1
|
|
|
|
if applied > 0:
|
|
logger.info(
|
|
"Applied %d migration(s), schema now at version %d", applied, await get_version(conn)
|
|
)
|
|
else:
|
|
logger.debug("Schema up to date at version %d", version)
|
|
|
|
return applied
|
|
|
|
|
|
async def _migrate_001_add_last_read_at(conn: aiosqlite.Connection) -> None:
|
|
"""
|
|
Add last_read_at column to contacts and channels tables.
|
|
|
|
This enables server-side read state tracking, replacing the localStorage
|
|
approach for consistent read state across devices.
|
|
|
|
ALTER TABLE ADD COLUMN is safe - it preserves existing data and handles
|
|
the "column already exists" case gracefully.
|
|
"""
|
|
# Add to contacts table
|
|
try:
|
|
await conn.execute("ALTER TABLE contacts ADD COLUMN last_read_at INTEGER")
|
|
logger.debug("Added last_read_at to contacts table")
|
|
except aiosqlite.OperationalError as e:
|
|
if "duplicate column name" in str(e).lower():
|
|
logger.debug("contacts.last_read_at already exists, skipping")
|
|
else:
|
|
raise
|
|
|
|
# Add to channels table
|
|
try:
|
|
await conn.execute("ALTER TABLE channels ADD COLUMN last_read_at INTEGER")
|
|
logger.debug("Added last_read_at to channels table")
|
|
except aiosqlite.OperationalError as e:
|
|
if "duplicate column name" in str(e).lower():
|
|
logger.debug("channels.last_read_at already exists, skipping")
|
|
else:
|
|
raise
|
|
|
|
await conn.commit()
|
|
|
|
|
|
async def _migrate_002_drop_decrypt_attempt_columns(conn: aiosqlite.Connection) -> None:
|
|
"""
|
|
Drop unused decrypt_attempts and last_attempt columns from raw_packets.
|
|
|
|
These columns were added for a retry-limiting feature that was never implemented.
|
|
They are written to but never read, so we can safely remove them.
|
|
|
|
SQLite 3.35.0+ supports ALTER TABLE DROP COLUMN. For older versions,
|
|
we silently skip (the columns will remain but are harmless).
|
|
"""
|
|
for column in ["decrypt_attempts", "last_attempt"]:
|
|
try:
|
|
await conn.execute(f"ALTER TABLE raw_packets DROP COLUMN {column}")
|
|
logger.debug("Dropped %s from raw_packets table", column)
|
|
except aiosqlite.OperationalError as e:
|
|
error_msg = str(e).lower()
|
|
if "no such column" in error_msg:
|
|
logger.debug("raw_packets.%s already dropped, skipping", column)
|
|
elif "syntax error" in error_msg or "drop column" in error_msg:
|
|
# SQLite version doesn't support DROP COLUMN - harmless, column stays
|
|
logger.debug("SQLite doesn't support DROP COLUMN, %s column will remain", column)
|
|
else:
|
|
raise
|
|
|
|
await conn.commit()
|
|
|
|
|
|
async def _migrate_003_drop_decrypted_column(conn: aiosqlite.Connection) -> None:
|
|
"""
|
|
Drop the decrypted column and update indexes.
|
|
|
|
The decrypted column is redundant with message_id - a packet is decrypted
|
|
iff message_id IS NOT NULL. We replace the decrypted index with a message_id index.
|
|
|
|
SQLite 3.35.0+ supports ALTER TABLE DROP COLUMN. For older versions,
|
|
we silently skip the column drop but still update the index.
|
|
"""
|
|
# First, drop the old index on decrypted (safe even if it doesn't exist)
|
|
try:
|
|
await conn.execute("DROP INDEX IF EXISTS idx_raw_packets_decrypted")
|
|
logger.debug("Dropped idx_raw_packets_decrypted index")
|
|
except aiosqlite.OperationalError:
|
|
pass # Index didn't exist
|
|
|
|
# Create new index on message_id for efficient undecrypted packet queries
|
|
try:
|
|
await conn.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_raw_packets_message_id ON raw_packets(message_id)"
|
|
)
|
|
logger.debug("Created idx_raw_packets_message_id index")
|
|
except aiosqlite.OperationalError as e:
|
|
if "already exists" not in str(e).lower():
|
|
raise
|
|
|
|
# Try to drop the decrypted column
|
|
try:
|
|
await conn.execute("ALTER TABLE raw_packets DROP COLUMN decrypted")
|
|
logger.debug("Dropped decrypted from raw_packets table")
|
|
except aiosqlite.OperationalError as e:
|
|
error_msg = str(e).lower()
|
|
if "no such column" in error_msg:
|
|
logger.debug("raw_packets.decrypted already dropped, skipping")
|
|
elif "syntax error" in error_msg or "drop column" in error_msg:
|
|
# SQLite version doesn't support DROP COLUMN - harmless, column stays
|
|
logger.debug("SQLite doesn't support DROP COLUMN, decrypted column will remain")
|
|
else:
|
|
raise
|
|
|
|
await conn.commit()
|
|
|
|
|
|
async def _migrate_004_add_payload_hash_column(conn: aiosqlite.Connection) -> None:
|
|
"""
|
|
Add payload_hash column to raw_packets for deduplication.
|
|
|
|
This column stores the SHA-256 hash of the packet payload (excluding routing/path info).
|
|
It will be used with a unique index to prevent duplicate packets from being stored.
|
|
"""
|
|
try:
|
|
await conn.execute("ALTER TABLE raw_packets ADD COLUMN payload_hash TEXT")
|
|
logger.debug("Added payload_hash column to raw_packets table")
|
|
except aiosqlite.OperationalError as e:
|
|
if "duplicate column name" in str(e).lower():
|
|
logger.debug("raw_packets.payload_hash already exists, skipping")
|
|
else:
|
|
raise
|
|
|
|
await conn.commit()
|
|
|
|
|
|
def _extract_payload_for_hash(raw_packet: bytes) -> bytes | None:
|
|
"""
|
|
Extract payload from a raw packet for hashing (migration-local copy of decoder logic).
|
|
|
|
Returns the payload bytes, or None if packet is malformed.
|
|
"""
|
|
if len(raw_packet) < 2:
|
|
return None
|
|
|
|
try:
|
|
header = raw_packet[0]
|
|
route_type = header & 0x03
|
|
offset = 1
|
|
|
|
# Skip transport codes if present (TRANSPORT_FLOOD=0, TRANSPORT_DIRECT=3)
|
|
if route_type in (0x00, 0x03):
|
|
if len(raw_packet) < offset + 4:
|
|
return None
|
|
offset += 4
|
|
|
|
# Get path length
|
|
if len(raw_packet) < offset + 1:
|
|
return None
|
|
path_length = raw_packet[offset]
|
|
offset += 1
|
|
|
|
# Skip path bytes
|
|
if len(raw_packet) < offset + path_length:
|
|
return None
|
|
offset += path_length
|
|
|
|
# Rest is payload (may be empty, matching decoder.py behavior)
|
|
return raw_packet[offset:]
|
|
except (IndexError, ValueError):
|
|
return None
|
|
|
|
|
|
async def _migrate_005_backfill_payload_hashes(conn: aiosqlite.Connection) -> None:
|
|
"""
|
|
Backfill payload_hash for existing packets and remove duplicates.
|
|
|
|
This may take a while for large databases. Progress is logged.
|
|
After backfilling, a unique index is created to prevent future duplicates.
|
|
"""
|
|
# Get count first
|
|
cursor = await conn.execute("SELECT COUNT(*) FROM raw_packets WHERE payload_hash IS NULL")
|
|
row = await cursor.fetchone()
|
|
total = row[0] if row else 0
|
|
|
|
if total == 0:
|
|
logger.debug("No packets need hash backfill")
|
|
else:
|
|
logger.info("Backfilling payload hashes for %d packets. This may take a while...", total)
|
|
|
|
# Process in batches to avoid memory issues
|
|
batch_size = 1000
|
|
processed = 0
|
|
duplicates_deleted = 0
|
|
|
|
# Track seen hashes to identify duplicates (keep oldest = lowest ID)
|
|
seen_hashes: dict[str, int] = {} # hash -> oldest packet ID
|
|
|
|
# First pass: compute hashes and identify duplicates
|
|
cursor = await conn.execute("SELECT id, data FROM raw_packets ORDER BY id ASC")
|
|
|
|
packets_to_update: list[tuple[str, int]] = [] # (hash, id)
|
|
ids_to_delete: list[int] = []
|
|
|
|
while True:
|
|
rows = await cursor.fetchmany(batch_size)
|
|
if not rows:
|
|
break
|
|
|
|
for row in rows:
|
|
packet_id = row[0]
|
|
packet_data = bytes(row[1])
|
|
|
|
# Extract payload and compute hash
|
|
payload = _extract_payload_for_hash(packet_data)
|
|
if payload:
|
|
payload_hash = sha256(payload).hexdigest()
|
|
else:
|
|
# For malformed packets, hash the full data
|
|
payload_hash = sha256(packet_data).hexdigest()
|
|
|
|
if payload_hash in seen_hashes:
|
|
# Duplicate - mark for deletion (we keep the older one)
|
|
ids_to_delete.append(packet_id)
|
|
duplicates_deleted += 1
|
|
else:
|
|
# New hash - keep this packet
|
|
seen_hashes[payload_hash] = packet_id
|
|
packets_to_update.append((payload_hash, packet_id))
|
|
|
|
processed += 1
|
|
|
|
if processed % 10000 == 0:
|
|
logger.info("Processed %d/%d packets...", processed, total)
|
|
|
|
# Second pass: update hashes for packets we're keeping
|
|
total_updates = len(packets_to_update)
|
|
logger.info("Updating %d packets with hashes...", total_updates)
|
|
for idx, (payload_hash, packet_id) in enumerate(packets_to_update, 1):
|
|
await conn.execute(
|
|
"UPDATE raw_packets SET payload_hash = ? WHERE id = ?",
|
|
(payload_hash, packet_id),
|
|
)
|
|
if idx % 10000 == 0:
|
|
logger.info("Updated %d/%d packets...", idx, total_updates)
|
|
|
|
# Third pass: delete duplicates
|
|
if ids_to_delete:
|
|
total_deletes = len(ids_to_delete)
|
|
logger.info("Removing %d duplicate packets...", total_deletes)
|
|
deleted_count = 0
|
|
# Delete in batches to avoid "too many SQL variables" error
|
|
for i in range(0, len(ids_to_delete), 500):
|
|
batch = ids_to_delete[i : i + 500]
|
|
placeholders = ",".join("?" * len(batch))
|
|
await conn.execute(f"DELETE FROM raw_packets WHERE id IN ({placeholders})", batch)
|
|
deleted_count += len(batch)
|
|
if deleted_count % 10000 < 500: # Log roughly every 10k
|
|
logger.info("Removed %d/%d duplicates...", deleted_count, total_deletes)
|
|
|
|
await conn.commit()
|
|
logger.info(
|
|
"Hash backfill complete: %d packets updated, %d duplicates removed",
|
|
len(packets_to_update),
|
|
duplicates_deleted,
|
|
)
|
|
|
|
# Create unique index on payload_hash (this enforces uniqueness going forward)
|
|
try:
|
|
await conn.execute(
|
|
"CREATE UNIQUE INDEX IF NOT EXISTS idx_raw_packets_payload_hash "
|
|
"ON raw_packets(payload_hash)"
|
|
)
|
|
logger.debug("Created unique index on payload_hash")
|
|
except aiosqlite.OperationalError as e:
|
|
if "already exists" not in str(e).lower():
|
|
raise
|
|
|
|
await conn.commit()
|