mirror of
https://github.com/MarekWo/mc-webui.git
synced 2026-06-22 19:15:19 +02:00
f5fedbc96c
Implements automatic daily archiving of messages to improve performance
and enable browsing historical chat by date.
Backend changes:
- Add APScheduler for daily archiving at midnight (00:00 UTC)
- Create app/archiver/manager.py with archive logic and scheduler
- Extend parser.py to read from archive files and filter by days
- Add archive configuration to config.py (MC_ARCHIVE_*)
API changes:
- Extend GET /api/messages with archive_date and days parameters
- Add GET /api/archives endpoint to list available archives
- Add POST /api/archive/trigger for manual archiving
Frontend changes:
- Add date selector dropdown in navbar for archive browsing
- Implement archive list loading and date selection
- Update formatTime() to show full dates in archive view
- Live view now shows only last 7 days (configurable)
Docker & Config:
- Add archive volume mount in docker-compose.yml
- Add MC_ARCHIVE_DIR, MC_ARCHIVE_ENABLED, MC_ARCHIVE_RETENTION_DAYS env vars
- Update .env.example with archive configuration section
Documentation:
- Update README.md with archive feature and usage instructions
- Update .claude/instructions.md with archive endpoints
Key features:
- Automatic daily archiving (midnight UTC)
- Live view filtered to last 7 days for better performance
- Browse historical messages by date via dropdown selector
- Archives stored as dated files: {device}.YYYY-MM-DD.msgs
- Original .msgs file never modified (safe, read-only approach)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
247 lines
7.2 KiB
Python
247 lines
7.2 KiB
Python
"""
|
|
Message parser - reads and parses .msgs file (JSON Lines format)
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import List, Dict, Optional
|
|
from datetime import datetime, timedelta
|
|
from app.config import config
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def parse_message(line: Dict) -> Optional[Dict]:
|
|
"""
|
|
Parse a single message line from .msgs file.
|
|
|
|
Args:
|
|
line: Raw JSON object from .msgs file
|
|
|
|
Returns:
|
|
Parsed message dict or None if not a valid chat message
|
|
"""
|
|
msg_type = line.get('type')
|
|
channel_idx = line.get('channel_idx', 0)
|
|
|
|
# Only process Public channel (channel 0) messages
|
|
if channel_idx != 0:
|
|
return None
|
|
|
|
# Only process CHAN (received) and SENT_CHAN (sent) messages
|
|
if msg_type not in ['CHAN', 'SENT_CHAN']:
|
|
return None
|
|
|
|
timestamp = line.get('timestamp', 0)
|
|
text = line.get('text', '').strip()
|
|
|
|
if not text:
|
|
return None
|
|
|
|
# Determine if message is sent or received
|
|
is_own = msg_type == 'SENT_CHAN'
|
|
|
|
# Extract sender name
|
|
if is_own:
|
|
# For sent messages, use device name from config or 'name' field
|
|
sender = line.get('name', config.MC_DEVICE_NAME)
|
|
content = text
|
|
else:
|
|
# For received messages, extract sender from "SenderName: message" format
|
|
if ':' in text:
|
|
sender, content = text.split(':', 1)
|
|
sender = sender.strip()
|
|
content = content.strip()
|
|
else:
|
|
# Fallback if format is unexpected
|
|
sender = "Unknown"
|
|
content = text
|
|
|
|
return {
|
|
'sender': sender,
|
|
'content': content,
|
|
'timestamp': timestamp,
|
|
'datetime': datetime.fromtimestamp(timestamp).isoformat() if timestamp > 0 else None,
|
|
'is_own': is_own,
|
|
'snr': line.get('SNR'),
|
|
'path_len': line.get('path_len')
|
|
}
|
|
|
|
|
|
def read_messages(limit: Optional[int] = None, offset: int = 0, archive_date: Optional[str] = None, days: Optional[int] = None) -> List[Dict]:
|
|
"""
|
|
Read and parse messages from .msgs file or archive file.
|
|
|
|
Args:
|
|
limit: Maximum number of messages to return (None = all)
|
|
offset: Number of messages to skip from the end
|
|
archive_date: If provided, read from archive file for this date (YYYY-MM-DD)
|
|
days: If provided, filter messages from the last N days (only for live .msgs)
|
|
|
|
Returns:
|
|
List of parsed message dictionaries, sorted by timestamp (oldest first)
|
|
"""
|
|
# If archive_date is provided, read from archive
|
|
if archive_date:
|
|
return read_archive_messages(archive_date, limit, offset)
|
|
|
|
msgs_file = config.msgs_file_path
|
|
|
|
if not msgs_file.exists():
|
|
logger.warning(f"Messages file not found: {msgs_file}")
|
|
return []
|
|
|
|
messages = []
|
|
|
|
try:
|
|
with open(msgs_file, 'r', encoding='utf-8') as f:
|
|
for line_num, line in enumerate(f, 1):
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
try:
|
|
data = json.loads(line)
|
|
parsed = parse_message(data)
|
|
if parsed:
|
|
messages.append(parsed)
|
|
except json.JSONDecodeError as e:
|
|
logger.warning(f"Invalid JSON at line {line_num}: {e}")
|
|
continue
|
|
except Exception as e:
|
|
logger.error(f"Error parsing line {line_num}: {e}")
|
|
continue
|
|
|
|
except FileNotFoundError:
|
|
logger.error(f"Messages file not found: {msgs_file}")
|
|
return []
|
|
except Exception as e:
|
|
logger.error(f"Error reading messages file: {e}")
|
|
return []
|
|
|
|
# Sort by timestamp (oldest first)
|
|
messages.sort(key=lambda m: m['timestamp'])
|
|
|
|
# Filter by days if specified
|
|
if days is not None and days > 0:
|
|
messages = filter_messages_by_days(messages, days)
|
|
|
|
# Apply offset and limit
|
|
if offset > 0:
|
|
messages = messages[:-offset] if offset < len(messages) else []
|
|
|
|
if limit is not None and limit > 0:
|
|
messages = messages[-limit:]
|
|
|
|
logger.info(f"Loaded {len(messages)} messages from {msgs_file}")
|
|
return messages
|
|
|
|
|
|
def get_latest_message() -> Optional[Dict]:
|
|
"""
|
|
Get the most recent message.
|
|
|
|
Returns:
|
|
Latest message dict or None if no messages
|
|
"""
|
|
messages = read_messages(limit=1)
|
|
return messages[0] if messages else None
|
|
|
|
|
|
def count_messages() -> int:
|
|
"""
|
|
Count total number of messages in the file.
|
|
|
|
Returns:
|
|
Message count
|
|
"""
|
|
return len(read_messages())
|
|
|
|
|
|
def read_archive_messages(archive_date: str, limit: Optional[int] = None, offset: int = 0) -> List[Dict]:
|
|
"""
|
|
Read messages from an archive file.
|
|
|
|
Args:
|
|
archive_date: Archive date in YYYY-MM-DD format
|
|
limit: Maximum number of messages to return (None = all)
|
|
offset: Number of messages to skip from the end
|
|
|
|
Returns:
|
|
List of parsed message dictionaries, sorted by timestamp (oldest first)
|
|
"""
|
|
from app.archiver.manager import get_archive_path
|
|
|
|
archive_file = get_archive_path(archive_date)
|
|
|
|
if not archive_file.exists():
|
|
logger.warning(f"Archive file not found: {archive_file}")
|
|
return []
|
|
|
|
messages = []
|
|
|
|
try:
|
|
with open(archive_file, 'r', encoding='utf-8') as f:
|
|
for line_num, line in enumerate(f, 1):
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
try:
|
|
data = json.loads(line)
|
|
parsed = parse_message(data)
|
|
if parsed:
|
|
messages.append(parsed)
|
|
except json.JSONDecodeError as e:
|
|
logger.warning(f"Invalid JSON at line {line_num} in archive: {e}")
|
|
continue
|
|
except Exception as e:
|
|
logger.error(f"Error parsing line {line_num} in archive: {e}")
|
|
continue
|
|
|
|
except FileNotFoundError:
|
|
logger.error(f"Archive file not found: {archive_file}")
|
|
return []
|
|
except Exception as e:
|
|
logger.error(f"Error reading archive file: {e}")
|
|
return []
|
|
|
|
# Sort by timestamp (oldest first)
|
|
messages.sort(key=lambda m: m['timestamp'])
|
|
|
|
# Apply offset and limit
|
|
if offset > 0:
|
|
messages = messages[:-offset] if offset < len(messages) else []
|
|
|
|
if limit is not None and limit > 0:
|
|
messages = messages[-limit:]
|
|
|
|
logger.info(f"Loaded {len(messages)} messages from archive {archive_date}")
|
|
return messages
|
|
|
|
|
|
def filter_messages_by_days(messages: List[Dict], days: int) -> List[Dict]:
|
|
"""
|
|
Filter messages to include only those from the last N days.
|
|
|
|
Args:
|
|
messages: List of message dicts
|
|
days: Number of days to include (from now)
|
|
|
|
Returns:
|
|
Filtered list of messages
|
|
"""
|
|
if not messages:
|
|
return []
|
|
|
|
# Calculate cutoff timestamp
|
|
cutoff_date = datetime.now() - timedelta(days=days)
|
|
cutoff_timestamp = cutoff_date.timestamp()
|
|
|
|
# Filter messages
|
|
filtered = [msg for msg in messages if msg['timestamp'] >= cutoff_timestamp]
|
|
|
|
logger.info(f"Filtered {len(filtered)} messages from last {days} days (out of {len(messages)} total)")
|
|
return filtered
|