From 69e73440db84e649e1d5b21ddf032f179a28fd5b Mon Sep 17 00:00:00 2001 From: pelgraine <140762863+pelgraine@users.noreply.github.com> Date: Mon, 9 Feb 2026 09:08:48 +1100 Subject: [PATCH] Epub and Epub3 converter now working --- examples/companion_radio/main.cpp | 4 +- .../companion_radio/ui-new/Textreaderscreen.h | 237 +++++- variants/lilygo_tdeck_pro/EpubZipReader.h | 538 ++++++++++++ variants/lilygo_tdeck_pro/Epubprocessor.h | 776 ++++++++++++++++++ 4 files changed, 1522 insertions(+), 33 deletions(-) create mode 100644 variants/lilygo_tdeck_pro/EpubZipReader.h create mode 100644 variants/lilygo_tdeck_pro/Epubprocessor.h diff --git a/examples/companion_radio/main.cpp b/examples/companion_radio/main.cpp index 2b0c5202..f67bc4b3 100644 --- a/examples/companion_radio/main.cpp +++ b/examples/companion_radio/main.cpp @@ -351,9 +351,7 @@ void setup() { reader->bootIndex(*disp); } } - } else { - MESH_DEBUG_PRINTLN("setup() - SD card initialization failed!"); - } + } } #endif diff --git a/examples/companion_radio/ui-new/Textreaderscreen.h b/examples/companion_radio/ui-new/Textreaderscreen.h index a7b4a072..de391fe2 100644 --- a/examples/companion_radio/ui-new/Textreaderscreen.h +++ b/examples/companion_radio/ui-new/Textreaderscreen.h @@ -4,6 +4,7 @@ #include #include #include +#include "EpubProcessor.h" // Forward declarations class UITask; @@ -196,6 +197,38 @@ private: // Draw directly to display outside the normal render cycle. // Matches the style of the standalone text reader firmware splash. + // Generic splash screen: title (large green), subtitle (normal), detail (normal) + void drawSplash(const char* title, const char* subtitle, const char* detail) { + if (!_display) return; + _display->startFrame(); + + // Title in large text + _display->setTextSize(2); + _display->setColor(DisplayDriver::GREEN); + _display->setCursor(10, 11); + _display->print(title); + + _display->setTextSize(1); + _display->setColor(DisplayDriver::LIGHT); + + int y = 35; + + // Subtitle + if (subtitle && subtitle[0]) { + _display->setCursor(10, y); + _display->print(subtitle); + y += 8; + } + + // Detail line + if (detail && detail[0]) { + _display->setCursor(10, y); + _display->print(detail); + } + + _display->endFrame(); + } + // Word-wrapping splash for opening a large book. // Shows: "Indexing / Pages..." (large), word-wrapped filename, "Please wait. / Loading shortly..." void drawIndexingSplash(const String& filename) { @@ -353,9 +386,14 @@ private: idxFile.read(&fullyFlag, 1); idxFile.read((uint8_t*)&lastRead, 4); - // Verify file hasn't changed + // Verify file hasn't changed - try BOOKS_FOLDER first, then epub cache String fullPath = String(BOOKS_FOLDER) + "/" + filename; File txtFile = SD.open(fullPath.c_str(), FILE_READ); + if (!txtFile) { + // Fallback: check epub cache directory + String cachePath = String("/books/.epub_cache/") + filename; + txtFile = SD.open(cachePath.c_str(), FILE_READ); + } if (!txtFile) { idxFile.close(); return false; } unsigned long curSize = txtFile.size(); txtFile.close(); @@ -457,7 +495,8 @@ private: if (slash >= 0) name = name.substring(slash + 1); if (!name.startsWith(".") && - (name.endsWith(".txt") || name.endsWith(".TXT"))) { + (name.endsWith(".txt") || name.endsWith(".TXT") || + name.endsWith(".epub") || name.endsWith(".EPUB"))) { _fileList.push_back(name); } } @@ -472,23 +511,86 @@ private: void openBook(const String& filename) { if (_fileOpen) closeBook(); - // Find cached index + // ---- EPUB auto-conversion ---- + String actualFilename = filename; + String actualFullPath = String(BOOKS_FOLDER) + "/" + filename; + bool isEpub = filename.endsWith(".epub") || filename.endsWith(".EPUB"); + + if (isEpub) { + // Build cache path for this EPUB + char cachePath[160]; + EpubProcessor::buildCachePath(actualFullPath.c_str(), cachePath, sizeof(cachePath)); + + // Check if already converted + digitalWrite(SDCARD_CS, LOW); + bool cached = SD.exists(cachePath); + digitalWrite(SDCARD_CS, HIGH); + + if (!cached) { + // Show conversion splash on e-ink + char shortName[28]; + if (filename.length() > 24) { + strncpy(shortName, filename.c_str(), 21); + shortName[21] = '\0'; + strcat(shortName, "..."); + } else { + strncpy(shortName, filename.c_str(), sizeof(shortName) - 1); + shortName[sizeof(shortName) - 1] = '\0'; + } + drawSplash("Converting EPUB...", "Please wait", shortName); + + Serial.printf("TextReader: Converting EPUB '%s'\n", filename.c_str()); + unsigned long t0 = millis(); + + digitalWrite(SDCARD_CS, LOW); + bool ok = EpubProcessor::processToText(actualFullPath.c_str(), cachePath); + digitalWrite(SDCARD_CS, HIGH); + + if (!ok) { + Serial.println("TextReader: EPUB conversion failed!"); + drawSplash("Convert failed!", "", shortName); + delay(2000); + return; // Stay in file list + } + Serial.printf("TextReader: EPUB converted in %lu ms\n", millis() - t0); + } else { + Serial.printf("TextReader: EPUB cache hit for '%s'\n", filename.c_str()); + } + + // Redirect to the cached .txt + actualFullPath = String(cachePath); + const char* lastSlash = strrchr(cachePath, '/'); + actualFilename = String(lastSlash ? lastSlash + 1 : cachePath); + } + // ---- End EPUB auto-conversion ---- + + // Find cached index for this file FileCache* cache = nullptr; for (int i = 0; i < (int)_fileCache.size(); i++) { - if (_fileCache[i].filename == filename) { + if (_fileCache[i].filename == actualFilename) { cache = &_fileCache[i]; break; } } - String fullPath = String(BOOKS_FOLDER) + "/" + filename; - _file = SD.open(fullPath.c_str(), FILE_READ); + _file = SD.open(actualFullPath.c_str(), FILE_READ); + + // Fallback: try epub cache dir (for files discovered during boot scan) + if (!_file && !isEpub) { + String cacheFallback = String("/books/.epub_cache/") + actualFilename; + _file = SD.open(cacheFallback.c_str(), FILE_READ); + if (_file) { + actualFullPath = cacheFallback; + Serial.printf("TextReader: Opened from epub cache: %s\n", actualFilename.c_str()); + } + } + if (!_file) { - Serial.printf("TextReader: Failed to open %s\n", filename.c_str()); + Serial.printf("TextReader: Failed to open %s\n", actualFilename.c_str()); return; } - _currentFile = filename; + _currentFile = actualFilename; _fileOpen = true; _currentPage = 0; _pagePositions.clear(); @@ -501,55 +603,91 @@ private: _currentPage = cache->lastReadPage; } - // Already fully indexed - open immediately + // Already fully indexed — open immediately if (cache->fullyIndexed) { _totalPages = _pagePositions.size(); _mode = READING; loadPageContent(); Serial.printf("TextReader: Opened %s, %d pages, resume pg %d\n", - filename.c_str(), _totalPages, _currentPage + 1); + actualFilename.c_str(), _totalPages, _currentPage + 1); return; } - // Partially indexed - show splash and finish indexing + // Partially indexed — finish indexing with splash Serial.printf("TextReader: Finishing index for %s (have %d pages so far)\n", - filename.c_str(), (int)_pagePositions.size()); + actualFilename.c_str(), (int)_pagePositions.size()); - drawIndexingSplash(filename); + char shortName[28]; + if (actualFilename.length() > 24) { + strncpy(shortName, actualFilename.c_str(), 21); + shortName[21] = '\0'; + strcat(shortName, "..."); + } else { + strncpy(shortName, actualFilename.c_str(), sizeof(shortName) - 1); + shortName[sizeof(shortName) - 1] = '\0'; + } + drawSplash("Indexing...", "Please wait", shortName); - long lastPos = cache->pagePositions.back(); - indexPagesWordWrap(_file, lastPos, _pagePositions, - _linesPerPage, _charsPerLine, 0); + if (_pagePositions.empty()) { + // Cache had no pages (e.g. dummy entry) — full index from scratch + _pagePositions.push_back(0); + indexPagesWordWrap(_file, 0, _pagePositions, + _linesPerPage, _charsPerLine, 0); + } else { + long lastPos = cache->pagePositions.back(); + indexPagesWordWrap(_file, lastPos, _pagePositions, + _linesPerPage, _charsPerLine, 0); + } } else { - // No cache at all - full index from scratch with splash - Serial.printf("TextReader: Full index for %s\n", filename.c_str()); + // No cache — full index from scratch + Serial.printf("TextReader: Full index for %s\n", actualFilename.c_str()); - drawIndexingSplash(filename); + char shortName[28]; + if (actualFilename.length() > 24) { + strncpy(shortName, actualFilename.c_str(), 21); + shortName[21] = '\0'; + strcat(shortName, "..."); + } else { + strncpy(shortName, actualFilename.c_str(), sizeof(shortName) - 1); + shortName[sizeof(shortName) - 1] = '\0'; + } + drawSplash("Indexing...", "Please wait", shortName); _pagePositions.push_back(0); indexPagesWordWrap(_file, 0, _pagePositions, _linesPerPage, _charsPerLine, 0); } + // Save complete index _totalPages = _pagePositions.size(); - saveIndex(filename, _pagePositions, _file.size(), true, _currentPage); - // Update cache entry + // Update or create cache entry + bool foundCache = false; for (int i = 0; i < (int)_fileCache.size(); i++) { - if (_fileCache[i].filename == filename) { + if (_fileCache[i].filename == actualFilename) { _fileCache[i].pagePositions = _pagePositions; _fileCache[i].fullyIndexed = true; + _fileCache[i].fileSize = _file.size(); + foundCache = true; break; } } + if (!foundCache) { + FileCache newCache; + newCache.filename = actualFilename; + newCache.fileSize = _file.size(); + newCache.fullyIndexed = true; + newCache.lastReadPage = _currentPage; + newCache.pagePositions = _pagePositions; + _fileCache.push_back(newCache); + } - // Deselect SD to free SPI bus - digitalWrite(SDCARD_CS, HIGH); + saveIndex(actualFilename, _pagePositions, _file.size(), true, _currentPage); _mode = READING; loadPageContent(); - Serial.printf("TextReader: Opened %s, %d pages, resume pg %d\n", - filename.c_str(), _totalPages, _currentPage + 1); + Serial.printf("TextReader: Opened %s, %d pages\n", + actualFilename.c_str(), _totalPages); } void closeBook() { @@ -623,11 +761,11 @@ private: if (_fileList.size() == 0) { display.setCursor(0, 18); display.setColor(DisplayDriver::LIGHT); - display.print("No .txt files found"); + display.print("No files found"); display.setCursor(0, 30); - display.print("Add files to /books/"); + display.print("Add .txt or .epub to"); display.setCursor(0, 42); - display.print("on SD card"); + display.print("/books/ on SD card"); } else { display.setTextSize(0); // Tiny font for file list int listLineH = 8; // Approximate tiny font line height in virtual coords @@ -820,9 +958,37 @@ public: drawBootSplash(0, 0, "Scanning..."); Serial.println("TextReader: Boot indexing started"); - // Scan for files + // Scan for files (includes .txt and .epub) scanFiles(); + // Also pick up previously converted EPUB cache files + if (SD.exists("/books/.epub_cache")) { + File cacheDir = SD.open("/books/.epub_cache"); + if (cacheDir && cacheDir.isDirectory()) { + File f = cacheDir.openNextFile(); + while (f && _fileList.size() < READER_MAX_FILES) { + if (!f.isDirectory()) { + String name = String(f.name()); + int slash = name.lastIndexOf('/'); + if (slash >= 0) name = name.substring(slash + 1); + if (name.endsWith(".txt") || name.endsWith(".TXT")) { + // Avoid duplicates + bool dup = false; + for (int i = 0; i < (int)_fileList.size(); i++) { + if (_fileList[i] == name) { dup = true; break; } + } + if (!dup) { + _fileList.push_back(name); + Serial.printf("TextReader: Found cached EPUB txt: %s\n", name.c_str()); + } + } + } + f = cacheDir.openNextFile(); + } + cacheDir.close(); + } + } + if (_fileList.size() == 0) { Serial.println("TextReader: No files to index"); _bootIndexed = true; @@ -860,11 +1026,22 @@ public: // Skip files that loaded from cache if (_fileCache[i].filename.length() > 0) continue; + // Skip .epub files — they'll be converted on first open via openBook() + if (_fileList[i].endsWith(".epub") || _fileList[i].endsWith(".EPUB")) { + needsIndexCount--; // Don't count epubs in progress display + continue; + } + indexProgress++; drawBootSplash(indexProgress, needsIndexCount, _fileList[i]); + // Try BOOKS_FOLDER first, then epub cache fallback String fullPath = String(BOOKS_FOLDER) + "/" + _fileList[i]; File file = SD.open(fullPath.c_str(), FILE_READ); + if (!file) { + String cacheFallback = String("/books/.epub_cache/") + _fileList[i]; + file = SD.open(cacheFallback.c_str(), FILE_READ); + } if (!file) continue; FileCache& cache = _fileCache[i]; diff --git a/variants/lilygo_tdeck_pro/EpubZipReader.h b/variants/lilygo_tdeck_pro/EpubZipReader.h new file mode 100644 index 00000000..e98c7b7b --- /dev/null +++ b/variants/lilygo_tdeck_pro/EpubZipReader.h @@ -0,0 +1,538 @@ +#pragma once +// ============================================================================= +// EpubZipReader.h - Minimal ZIP reader for EPUB files on ESP32-S3 +// +// Parses ZIP archives directly from SD card File objects. +// Uses the ESP32 ROM's built-in tinfl decompressor for DEFLATE. +// No external library dependencies. +// +// Supports: +// - STORED (method 0) entries - direct copy +// - DEFLATED (method 8) entries - ROM tinfl decompression +// - ZIP64 is NOT supported (EPUBs don't need it) +// +// Memory: Allocates decompression buffers from PSRAM when available. +// Typical EPUB chapter is 5-50KB, well within ESP32-S3's 8MB PSRAM. +// ============================================================================= + +#include +#include + +// ROM tinfl decompressor - built into ESP32/ESP32-S3 ROM +// If this include fails on your platform, see the fallback note at bottom +#if __has_include() + #include + #define HAS_ROM_TINFL 1 +#elif __has_include() + #include + #define HAS_ROM_TINFL 1 +#elif __has_include() + #include + #define HAS_ROM_TINFL 1 +#else + #warning "ROM miniz not found - DEFLATED entries will not be supported" + #define HAS_ROM_TINFL 0 +#endif + +// ---- ZIP format constants ---- +#define ZIP_LOCAL_FILE_HEADER_SIG 0x04034b50 +#define ZIP_CENTRAL_DIR_SIG 0x02014b50 +#define ZIP_END_OF_CENTRAL_DIR_SIG 0x06054b50 + +#define ZIP_METHOD_STORED 0 +#define ZIP_METHOD_DEFLATED 8 + +// Maximum files we track in a ZIP (EPUBs typically have 20-100 files) +#define ZIP_MAX_ENTRIES 128 + +// Maximum filename length within the ZIP +#define ZIP_MAX_FILENAME 128 + +// ---- Data structures ---- + +struct ZipEntry { + char filename[ZIP_MAX_FILENAME]; + uint16_t compressionMethod; // 0=STORED, 8=DEFLATED + uint32_t compressedSize; + uint32_t uncompressedSize; + uint32_t localHeaderOffset; // Offset to local file header in ZIP + uint32_t crc32; +}; + +// ---- Helper: read little-endian values from a byte buffer ---- + +static inline uint16_t zipRead16(const uint8_t* p) { + return (uint16_t)p[0] | ((uint16_t)p[1] << 8); +} + +static inline uint32_t zipRead32(const uint8_t* p) { + return (uint32_t)p[0] | ((uint32_t)p[1] << 8) | + ((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24); +} + +// ============================================================================= +// EpubZipReader class +// ============================================================================= + +class EpubZipReader { +public: + EpubZipReader() : _entryCount(0), _isOpen(false), _entries(nullptr) { + // Allocate entries array from PSRAM to avoid stack overflow + // (128 entries × ~146 bytes = ~19KB — too large for 8KB loopTask stack) +#ifdef BOARD_HAS_PSRAM + _entries = (ZipEntry*)ps_malloc(ZIP_MAX_ENTRIES * sizeof(ZipEntry)); +#endif + if (!_entries) { + _entries = (ZipEntry*)malloc(ZIP_MAX_ENTRIES * sizeof(ZipEntry)); + } + if (!_entries) { + Serial.println("ZipReader: FATAL - failed to allocate entry table"); + } + } + + ~EpubZipReader() { + if (_entries) { + free(_entries); + _entries = nullptr; + } + } + + // ---------------------------------------------------------- + // Open a ZIP file and parse its central directory. + // Returns true on success, false on error. + // After open(), entries are available via getEntryCount()/getEntry(). + // ---------------------------------------------------------- + bool open(File& zipFile) { + _isOpen = false; + _entryCount = 0; + + if (!_entries) { + Serial.println("ZipReader: entry table not allocated"); + return false; + } + + if (!zipFile || !zipFile.available()) { + Serial.println("ZipReader: file not valid"); + return false; + } + + _file = zipFile; + uint32_t fileSize = _file.size(); + + if (fileSize < 22) { + Serial.println("ZipReader: file too small for ZIP"); + return false; + } + + // ---- Step 1: Find the End of Central Directory record ---- + // EOCD is at least 22 bytes, at end of file. + // Search backwards from end for the EOCD signature. + // Comment can be up to 65535 bytes, but EPUBs typically have none. + uint32_t searchStart = (fileSize > 65557) ? (fileSize - 65557) : 0; + uint32_t eocdOffset = 0; + bool foundEocd = false; + + // Read the last chunk into a buffer to search for EOCD signature + uint32_t searchLen = fileSize - searchStart; + // Cap search buffer to a reasonable size + if (searchLen > 1024) { + searchStart = fileSize - 1024; + searchLen = 1024; + } + + uint8_t* searchBuf = (uint8_t*)_allocBuffer(searchLen); + if (!searchBuf) { + Serial.println("ZipReader: failed to alloc search buffer"); + return false; + } + + _file.seek(searchStart); + if (_file.read(searchBuf, searchLen) != (int)searchLen) { + free(searchBuf); + Serial.println("ZipReader: failed to read EOCD area"); + return false; + } + + // Scan backwards for EOCD signature (0x06054b50) + for (int i = (int)searchLen - 22; i >= 0; i--) { + if (zipRead32(&searchBuf[i]) == ZIP_END_OF_CENTRAL_DIR_SIG) { + eocdOffset = searchStart + i; + // Parse EOCD fields + uint16_t totalEntries = zipRead16(&searchBuf[i + 10]); + uint32_t cdSize = zipRead32(&searchBuf[i + 12]); + uint32_t cdOffset = zipRead32(&searchBuf[i + 16]); + + _cdOffset = cdOffset; + _cdSize = cdSize; + _totalEntries = totalEntries; + foundEocd = true; + break; + } + } + free(searchBuf); + + if (!foundEocd) { + Serial.println("ZipReader: EOCD not found - not a valid ZIP"); + return false; + } + + Serial.printf("ZipReader: EOCD found at %u, %u entries, CD at %u (%u bytes)\n", + eocdOffset, _totalEntries, _cdOffset, _cdSize); + + // ---- Step 2: Parse Central Directory entries ---- + if (_cdSize == 0 || _cdSize > 512 * 1024) { + Serial.println("ZipReader: central directory size unreasonable"); + return false; + } + + uint8_t* cdBuf = (uint8_t*)_allocBuffer(_cdSize); + if (!cdBuf) { + Serial.printf("ZipReader: failed to alloc %u bytes for central directory\n", _cdSize); + return false; + } + + _file.seek(_cdOffset); + if (_file.read(cdBuf, _cdSize) != (int)_cdSize) { + free(cdBuf); + Serial.println("ZipReader: failed to read central directory"); + return false; + } + + uint32_t pos = 0; + _entryCount = 0; + + while (pos + 46 <= _cdSize && _entryCount < ZIP_MAX_ENTRIES) { + if (zipRead32(&cdBuf[pos]) != ZIP_CENTRAL_DIR_SIG) { + break; // No more central directory entries + } + + uint16_t method = zipRead16(&cdBuf[pos + 10]); + uint32_t crc = zipRead32(&cdBuf[pos + 16]); + uint32_t compSize = zipRead32(&cdBuf[pos + 20]); + uint32_t uncompSize = zipRead32(&cdBuf[pos + 24]); + uint16_t fnLen = zipRead16(&cdBuf[pos + 28]); + uint16_t extraLen = zipRead16(&cdBuf[pos + 30]); + uint16_t commentLen = zipRead16(&cdBuf[pos + 32]); + uint32_t localOffset = zipRead32(&cdBuf[pos + 42]); + + // Copy filename (truncate if necessary) + int copyLen = (fnLen < ZIP_MAX_FILENAME - 1) ? fnLen : ZIP_MAX_FILENAME - 1; + memcpy(_entries[_entryCount].filename, &cdBuf[pos + 46], copyLen); + _entries[_entryCount].filename[copyLen] = '\0'; + + _entries[_entryCount].compressionMethod = method; + _entries[_entryCount].compressedSize = compSize; + _entries[_entryCount].uncompressedSize = uncompSize; + _entries[_entryCount].localHeaderOffset = localOffset; + _entries[_entryCount].crc32 = crc; + + // Skip directories (filenames ending with '/') + if (copyLen > 0 && _entries[_entryCount].filename[copyLen - 1] != '/') { + _entryCount++; + } + + // Advance past this central directory entry + pos += 46 + fnLen + extraLen + commentLen; + } + + free(cdBuf); + + Serial.printf("ZipReader: parsed %d file entries\n", _entryCount); + _isOpen = true; + return true; + } + + // ---------------------------------------------------------- + // Close the reader (does not close the underlying File). + // ---------------------------------------------------------- + void close() { + _isOpen = false; + _entryCount = 0; + } + + // ---------------------------------------------------------- + // Get entry count and entries + // ---------------------------------------------------------- + int getEntryCount() const { return _entryCount; } + + const ZipEntry* getEntry(int index) const { + if (index < 0 || index >= _entryCount) return nullptr; + return &_entries[index]; + } + + // ---------------------------------------------------------- + // Find an entry by filename (case-sensitive). + // Returns index, or -1 if not found. + // ---------------------------------------------------------- + int findEntry(const char* filename) const { + for (int i = 0; i < _entryCount; i++) { + if (strcmp(_entries[i].filename, filename) == 0) { + return i; + } + } + return -1; + } + + // ---------------------------------------------------------- + // Find an entry by filename suffix (e.g., ".opf", ".ncx"). + // Returns index of first match, or -1 if not found. + // ---------------------------------------------------------- + int findEntryBySuffix(const char* suffix) const { + int suffixLen = strlen(suffix); + for (int i = 0; i < _entryCount; i++) { + int fnLen = strlen(_entries[i].filename); + if (fnLen >= suffixLen && + strcasecmp(&_entries[i].filename[fnLen - suffixLen], suffix) == 0) { + return i; + } + } + return -1; + } + + // ---------------------------------------------------------- + // Find entries matching a path prefix (e.g., "OEBPS/"). + // Fills matchIndices[] up to maxMatches. Returns count found. + // ---------------------------------------------------------- + int findEntriesByPrefix(const char* prefix, int* matchIndices, int maxMatches) const { + int count = 0; + int prefixLen = strlen(prefix); + for (int i = 0; i < _entryCount && count < maxMatches; i++) { + if (strncmp(_entries[i].filename, prefix, prefixLen) == 0) { + matchIndices[count++] = i; + } + } + return count; + } + + // ---------------------------------------------------------- + // Extract a file entry to a newly allocated buffer. + // + // On success, returns a malloc'd buffer (caller must free!) + // and sets *outSize to the uncompressed size. + // + // On failure, returns nullptr. + // + // The buffer is allocated from PSRAM if available. + // ---------------------------------------------------------- + uint8_t* extractEntry(int index, uint32_t* outSize) { + if (!_isOpen || index < 0 || index >= _entryCount) { + return nullptr; + } + + const ZipEntry& entry = _entries[index]; + + // ---- Read the local file header to get actual data offset ---- + // Local header: 30 bytes fixed + variable filename + extra field + uint8_t localHeader[30]; + _file.seek(entry.localHeaderOffset); + if (_file.read(localHeader, 30) != 30) { + Serial.println("ZipReader: failed to read local header"); + return nullptr; + } + + if (zipRead32(localHeader) != ZIP_LOCAL_FILE_HEADER_SIG) { + Serial.println("ZipReader: bad local header signature"); + return nullptr; + } + + uint16_t localFnLen = zipRead16(&localHeader[26]); + uint16_t localExtraLen = zipRead16(&localHeader[28]); + uint32_t dataOffset = entry.localHeaderOffset + 30 + localFnLen + localExtraLen; + + // ---- Handle based on compression method ---- + if (entry.compressionMethod == ZIP_METHOD_STORED) { + return _extractStored(dataOffset, entry.uncompressedSize, outSize); + } + else if (entry.compressionMethod == ZIP_METHOD_DEFLATED) { + return _extractDeflated(dataOffset, entry.compressedSize, + entry.uncompressedSize, outSize); + } + else { + Serial.printf("ZipReader: unsupported compression method %d for %s\n", + entry.compressionMethod, entry.filename); + return nullptr; + } + } + + // ---------------------------------------------------------- + // Extract a file entry by filename. + // Convenience wrapper around findEntry() + extractEntry(). + // ---------------------------------------------------------- + uint8_t* extractByName(const char* filename, uint32_t* outSize) { + int idx = findEntry(filename); + if (idx < 0) return nullptr; + return extractEntry(idx, outSize); + } + + // ---------------------------------------------------------- + // Check if reader is open and valid + // ---------------------------------------------------------- + bool isOpen() const { return _isOpen; } + + // ---------------------------------------------------------- + // Debug: print all entries + // ---------------------------------------------------------- + void printEntries() const { + Serial.printf("ZIP contains %d files:\n", _entryCount); + for (int i = 0; i < _entryCount; i++) { + const ZipEntry& e = _entries[i]; + Serial.printf(" [%d] %s (%s, %u -> %u bytes)\n", + i, e.filename, + e.compressionMethod == 0 ? "STORED" : "DEFLATED", + e.compressedSize, e.uncompressedSize); + } + } + +private: + File _file; + ZipEntry* _entries; // Heap-allocated (PSRAM) entry table + int _entryCount; + bool _isOpen; + uint32_t _cdOffset; + uint32_t _cdSize; + uint16_t _totalEntries; + + // ---------------------------------------------------------- + // Allocate buffer, preferring PSRAM if available + // ---------------------------------------------------------- + void* _allocBuffer(size_t size) { + void* buf = nullptr; +#ifdef BOARD_HAS_PSRAM + buf = ps_malloc(size); +#endif + if (!buf) { + buf = malloc(size); + } + return buf; + } + + // ---------------------------------------------------------- + // Extract a STORED (uncompressed) entry + // ---------------------------------------------------------- + uint8_t* _extractStored(uint32_t dataOffset, uint32_t size, uint32_t* outSize) { + uint8_t* buf = (uint8_t*)_allocBuffer(size + 1); // +1 for null terminator + if (!buf) { + Serial.printf("ZipReader: failed to alloc %u bytes for stored entry\n", size); + return nullptr; + } + + _file.seek(dataOffset); + uint32_t bytesRead = _file.read(buf, size); + if (bytesRead != size) { + Serial.printf("ZipReader: short read (got %u, expected %u)\n", bytesRead, size); + free(buf); + return nullptr; + } + + buf[size] = '\0'; // Null-terminate for text files + *outSize = size; + + // Release SD CS pin for other SPI users + digitalWrite(SDCARD_CS, HIGH); + + return buf; + } + + // ---------------------------------------------------------- + // Extract a DEFLATED entry using ROM tinfl + // ---------------------------------------------------------- + uint8_t* _extractDeflated(uint32_t dataOffset, uint32_t compSize, + uint32_t uncompSize, uint32_t* outSize) { +#if HAS_ROM_TINFL + // Allocate compressed data buffer (from PSRAM) + uint8_t* compBuf = (uint8_t*)_allocBuffer(compSize); + if (!compBuf) { + Serial.printf("ZipReader: failed to alloc %u bytes for compressed data\n", compSize); + return nullptr; + } + + // Allocate output buffer (+1 for null terminator) + uint8_t* outBuf = (uint8_t*)_allocBuffer(uncompSize + 1); + if (!outBuf) { + Serial.printf("ZipReader: failed to alloc %u bytes for decompressed data\n", uncompSize); + free(compBuf); + return nullptr; + } + + // Heap-allocate the decompressor (~11KB struct - too large for 8KB loopTask stack!) + tinfl_decompressor* decomp = (tinfl_decompressor*)_allocBuffer(sizeof(tinfl_decompressor)); + if (!decomp) { + Serial.printf("ZipReader: failed to alloc tinfl_decompressor (%u bytes)\n", + (uint32_t)sizeof(tinfl_decompressor)); + free(compBuf); + free(outBuf); + return nullptr; + } + + // Read compressed data from file + _file.seek(dataOffset); + if (_file.read(compBuf, compSize) != (int)compSize) { + Serial.println("ZipReader: failed to read compressed data"); + free(decomp); + free(compBuf); + free(outBuf); + return nullptr; + } + + // Release SD CS pin for other SPI users + digitalWrite(SDCARD_CS, HIGH); + + // Decompress using ROM tinfl (low-level API to avoid stack allocation) + // ZIP DEFLATE is raw deflate (no zlib header). + tinfl_init(decomp); + + size_t inBytes = compSize; + size_t outBytes = uncompSize; + tinfl_status status = tinfl_decompress( + decomp, + (const mz_uint8*)compBuf, // compressed input + &inBytes, // in: available, out: consumed + outBuf, // output buffer base + outBuf, // current output position + &outBytes, // in: available, out: produced + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF // raw deflate, single-shot + ); + + free(decomp); + free(compBuf); + + if (status != TINFL_STATUS_DONE) { + Serial.printf("ZipReader: DEFLATE failed (status %d)\n", (int)status); + free(outBuf); + return nullptr; + } + + outBuf[outBytes] = '\0'; // Null-terminate for text files + *outSize = (uint32_t)outBytes; + + if (outBytes != uncompSize) { + Serial.printf("ZipReader: decompressed %u bytes, expected %u\n", + (uint32_t)outBytes, uncompSize); + } + + return outBuf; + +#else + // No ROM tinfl available + Serial.println("ZipReader: DEFLATE not supported (no ROM tinfl)"); + *outSize = 0; + return nullptr; +#endif + } +}; + +// ============================================================================= +// FALLBACK NOTE: +// +// If the ROM tinfl includes fail to compile on your ESP32 variant, you have +// two options: +// +// 1. Install lbernstone/miniz-esp32 from PlatformIO: +// lib_deps = https://github.com/lbernstone/miniz-esp32.git +// Then change the includes above to: #include +// +// 2. Copy just the tinfl source (~550 lines) from: +// https://github.com/richgel999/miniz/blob/master/miniz_tinfl.c +// into your project. Only tinfl_decompress_mem_to_mem() is needed. +// +// ============================================================================= \ No newline at end of file diff --git a/variants/lilygo_tdeck_pro/Epubprocessor.h b/variants/lilygo_tdeck_pro/Epubprocessor.h new file mode 100644 index 00000000..3474eca5 --- /dev/null +++ b/variants/lilygo_tdeck_pro/Epubprocessor.h @@ -0,0 +1,776 @@ +#pragma once +// ============================================================================= +// EpubProcessor.h - Convert EPUB files to plain text for TextReaderScreen +// +// Pipeline: EPUB (ZIP) → container.xml → OPF spine → extract chapters → +// strip XHTML tags → concatenated plain text → cached .txt on SD +// +// The resulting .txt file is placed in /books/ and picked up automatically +// by TextReaderScreen's existing pagination, indexing, and bookmarking. +// +// Dependencies: EpubZipReader.h (for ZIP extraction) +// ============================================================================= + +#include +#include +#include "EpubZipReader.h" + +// Maximum chapters in spine (most novels have 20-80) +#define EPUB_MAX_CHAPTERS 200 + +// Maximum manifest items we track +#define EPUB_MAX_MANIFEST 256 + +// Buffer size for reading OPF/container XML +// (These are small files, typically 1-20KB) +#define EPUB_XML_BUF_SIZE 64 + +class EpubProcessor { +public: + + // ---------------------------------------------------------- + // Process an EPUB file: extract text and write to SD cache. + // + // epubPath: source, e.g. "/books/The Iliad.epub" + // txtPath: output, e.g. "/books/The Iliad by Homer.txt" + // + // Returns true if the .txt file was written successfully. + // If txtPath already exists, returns true immediately (cached). + // ---------------------------------------------------------- + static bool processToText(const char* epubPath, const char* txtPath) { + // Check if already cached + if (SD.exists(txtPath)) { + Serial.printf("EpubProc: '%s' already cached\n", txtPath); + return true; + } + + Serial.printf("EpubProc: Processing '%s'\n", epubPath); + unsigned long t0 = millis(); + + // Open the EPUB (ZIP archive) + File epubFile = SD.open(epubPath, FILE_READ); + if (!epubFile) { + Serial.println("EpubProc: Cannot open EPUB file"); + return false; + } + + // Heap-allocate zip reader (entries table is ~19KB) + EpubZipReader* zip = new EpubZipReader(); + if (!zip) { + epubFile.close(); + Serial.println("EpubProc: Cannot allocate ZipReader"); + return false; + } + + if (!zip->open(epubFile)) { + delete zip; + epubFile.close(); + Serial.println("EpubProc: Cannot parse ZIP structure"); + return false; + } + + // Step 1: Find OPF path from container.xml + char opfPath[EPUB_XML_BUF_SIZE]; + opfPath[0] = '\0'; + if (!_findOpfPath(zip, opfPath, sizeof(opfPath))) { + delete zip; + epubFile.close(); + Serial.println("EpubProc: Cannot find OPF path"); + return false; + } + Serial.printf("EpubProc: OPF at '%s'\n", opfPath); + + // Determine the content base directory (e.g., "OEBPS/") + char baseDir[EPUB_XML_BUF_SIZE]; + _getDirectory(opfPath, baseDir, sizeof(baseDir)); + + // Step 2: Parse OPF to get title and spine chapter order + char title[128]; + title[0] = '\0'; + + // Chapter paths in spine order + char** chapterPaths = nullptr; + int chapterCount = 0; + + if (!_parseOpf(zip, opfPath, baseDir, title, sizeof(title), + &chapterPaths, &chapterCount)) { + delete zip; + epubFile.close(); + Serial.println("EpubProc: Cannot parse OPF"); + return false; + } + + Serial.printf("EpubProc: Title='%s', %d chapters\n", title, chapterCount); + + // Step 3: Extract each chapter, strip XHTML, write to output .txt + File outFile = SD.open(txtPath, FILE_WRITE); + if (!outFile) { + _freeChapterPaths(chapterPaths, chapterCount); + delete zip; + epubFile.close(); + Serial.printf("EpubProc: Cannot create '%s'\n", txtPath); + return false; + } + + // Write title as first line + if (title[0]) { + outFile.println(title); + outFile.println(); + } + + int chaptersWritten = 0; + uint32_t totalBytes = 0; + + for (int i = 0; i < chapterCount; i++) { + int entryIdx = zip->findEntry(chapterPaths[i]); + if (entryIdx < 0) { + Serial.printf("EpubProc: Chapter not found: '%s'\n", chapterPaths[i]); + continue; + } + + uint32_t rawSize = 0; + uint8_t* rawData = zip->extractEntry(entryIdx, &rawSize); + if (!rawData || rawSize == 0) { + Serial.printf("EpubProc: Failed to extract chapter %d\n", i); + if (rawData) free(rawData); + continue; + } + + // Strip XHTML tags and write plain text + uint32_t textLen = 0; + uint8_t* plainText = _stripXhtml(rawData, rawSize, &textLen); + free(rawData); + + if (plainText && textLen > 0) { + outFile.write(plainText, textLen); + // Add chapter separator + outFile.print("\n\n"); + totalBytes += textLen + 2; + chaptersWritten++; + } + if (plainText) free(plainText); + } + + outFile.flush(); + outFile.close(); + + // Release SD CS for other SPI users + digitalWrite(SDCARD_CS, HIGH); + + _freeChapterPaths(chapterPaths, chapterCount); + delete zip; + epubFile.close(); + + unsigned long elapsed = millis() - t0; + Serial.printf("EpubProc: Done! %d chapters, %u bytes in %lu ms -> '%s'\n", + chaptersWritten, totalBytes, elapsed, txtPath); + + return chaptersWritten > 0; + } + + // ---------------------------------------------------------- + // Extract just the title from an EPUB (for display in file list). + // Returns false if it can't be determined. + // ---------------------------------------------------------- + static bool getTitle(const char* epubPath, char* titleBuf, int titleBufSize) { + File epubFile = SD.open(epubPath, FILE_READ); + if (!epubFile) return false; + + EpubZipReader* zip = new EpubZipReader(); + if (!zip) { epubFile.close(); return false; } + + if (!zip->open(epubFile)) { + delete zip; epubFile.close(); return false; + } + + char opfPath[EPUB_XML_BUF_SIZE]; + if (!_findOpfPath(zip, opfPath, sizeof(opfPath))) { + delete zip; epubFile.close(); return false; + } + + // Extract OPF and find + int opfIdx = zip->findEntry(opfPath); + if (opfIdx < 0) { delete zip; epubFile.close(); return false; } + + uint32_t opfSize = 0; + uint8_t* opfData = zip->extractEntry(opfIdx, &opfSize); + delete zip; + epubFile.close(); + + if (!opfData) return false; + + bool found = _extractTagContent((const char*)opfData, opfSize, + "dc:title", titleBuf, titleBufSize); + free(opfData); + return found; + } + + // ---------------------------------------------------------- + // Build a cache .txt path from an .epub path. + // e.g., "/books/mybook.epub" -> "/books/.epub_cache/mybook.txt" + // ---------------------------------------------------------- + static void buildCachePath(const char* epubPath, char* cachePath, int cachePathSize) { + // Extract filename without extension + const char* lastSlash = strrchr(epubPath, '/'); + const char* filename = lastSlash ? lastSlash + 1 : epubPath; + + // Find the directory part + char dir[128]; + if (lastSlash) { + int dirLen = lastSlash - epubPath; + if (dirLen >= (int)sizeof(dir)) dirLen = sizeof(dir) - 1; + strncpy(dir, epubPath, dirLen); + dir[dirLen] = '\0'; + } else { + strcpy(dir, "/books"); + } + + // Create cache directory if needed + char cacheDir[160]; + snprintf(cacheDir, sizeof(cacheDir), "%s/.epub_cache", dir); + if (!SD.exists(cacheDir)) { + SD.mkdir(cacheDir); + } + + // Strip .epub extension + char baseName[128]; + strncpy(baseName, filename, sizeof(baseName) - 1); + baseName[sizeof(baseName) - 1] = '\0'; + char* dot = strrchr(baseName, '.'); + if (dot) *dot = '\0'; + + snprintf(cachePath, cachePathSize, "%s/%s.txt", cacheDir, baseName); + } + +private: + + // ---------------------------------------------------------- + // Parse container.xml to find the OPF file path. + // Returns true if found. + // ---------------------------------------------------------- + static bool _findOpfPath(EpubZipReader* zip, char* opfPath, int opfPathSize) { + int idx = zip->findEntry("META-INF/container.xml"); + if (idx < 0) { + // Fallback: find any .opf file directly + idx = zip->findEntryBySuffix(".opf"); + if (idx >= 0) { + const ZipEntry* e = zip->getEntry(idx); + strncpy(opfPath, e->filename, opfPathSize - 1); + opfPath[opfPathSize - 1] = '\0'; + return true; + } + return false; + } + + uint32_t size = 0; + uint8_t* data = zip->extractEntry(idx, &size); + if (!data) return false; + + // Find: full-path="OEBPS/content.opf" + bool found = _extractAttribute((const char*)data, size, + "full-path", opfPath, opfPathSize); + free(data); + return found; + } + + // ---------------------------------------------------------- + // Parse OPF to extract title, build manifest, and resolve spine. + // + // Populates chapterPaths (heap-allocated array of strings) with + // full ZIP paths for each chapter in spine order. + // Caller must free with _freeChapterPaths(). + // ---------------------------------------------------------- + static bool _parseOpf(EpubZipReader* zip, const char* opfPath, + const char* baseDir, char* title, int titleSize, + char*** outChapterPaths, int* outChapterCount) { + int opfIdx = zip->findEntry(opfPath); + if (opfIdx < 0) return false; + + uint32_t opfSize = 0; + uint8_t* opfData = zip->extractEntry(opfIdx, &opfSize); + if (!opfData) return false; + + const char* xml = (const char*)opfData; + + // Extract title + _extractTagContent(xml, opfSize, "dc:title", title, titleSize); + + // Build manifest: map id -> href + // We use two parallel arrays to avoid complex data structures + struct ManifestItem { + char id[64]; + char href[128]; + bool isContent; // has media-type containing "html" or "xml" + }; + + // Heap-allocate manifest (could be large) + ManifestItem* manifest = (ManifestItem*)ps_malloc( + EPUB_MAX_MANIFEST * sizeof(ManifestItem)); + if (!manifest) { + manifest = (ManifestItem*)malloc(EPUB_MAX_MANIFEST * sizeof(ManifestItem)); + } + if (!manifest) { + free(opfData); + return false; + } + int manifestCount = 0; + + // Parse elements from + const char* manifestStart = _findTag(xml, opfSize, "= manifestEnd) break; + + // Find the closing > of this + const char* tagEnd = (const char*)memchr(pos, '>', manifestEnd - pos); + if (!tagEnd) break; + tagEnd++; + + ManifestItem& item = manifest[manifestCount]; + item.id[0] = '\0'; + item.href[0] = '\0'; + item.isContent = false; + + _extractAttributeFromTag(pos, tagEnd - pos, "id", + item.id, sizeof(item.id)); + _extractAttributeFromTag(pos, tagEnd - pos, "href", + item.href, sizeof(item.href)); + + // Check media-type for content files + char mediaType[64]; + mediaType[0] = '\0'; + _extractAttributeFromTag(pos, tagEnd - pos, "media-type", + mediaType, sizeof(mediaType)); + item.isContent = (strstr(mediaType, "html") != nullptr || + strstr(mediaType, "xml") != nullptr); + + if (item.id[0] && item.href[0]) { + manifestCount++; + } + + pos = tagEnd; + } + } + + Serial.printf("EpubProc: Manifest has %d items\n", manifestCount); + + // Parse to get reading order + // Spine contains elements + const char* spineStart = _findTag(xml, opfSize, "= spineEnd) break; + + const char* tagEnd = (const char*)memchr(pos, '>', spineEnd - pos); + if (!tagEnd) break; + tagEnd++; + + char idref[64]; + idref[0] = '\0'; + _extractAttributeFromTag(pos, tagEnd - pos, "idref", + idref, sizeof(idref)); + + if (idref[0]) { + // Look up in manifest + for (int m = 0; m < manifestCount; m++) { + if (strcmp(manifest[m].id, idref) == 0 && manifest[m].isContent) { + // Build full path: baseDir + href + int pathLen = strlen(baseDir) + strlen(manifest[m].href) + 1; + char* fullPath = (char*)malloc(pathLen); + if (fullPath) { + snprintf(fullPath, pathLen, "%s%s", baseDir, manifest[m].href); + chapterPaths[chapterCount++] = fullPath; + } + break; + } + } + } + + pos = tagEnd; + } + } + + free(manifest); + free(opfData); + + *outChapterPaths = chapterPaths; + *outChapterCount = chapterCount; + + return chapterCount > 0; + } + + // ---------------------------------------------------------- + // Strip XHTML/HTML tags from raw content, producing plain text. + // + // Handles: + // - Tag removal (everything between < and >) + // -

,
,

,

-

→ newlines + // - HTML entity decoding (& < > " ' &#NNN; &#xHH;) + // - Collapse multiple whitespace/newlines + // - Skip ,