From f46745d70c814b709022c7d0fee654cb2ea97550 Mon Sep 17 00:00:00 2001 From: Zeedif Date: Thu, 21 Aug 2025 17:18:27 -0600 Subject: [PATCH] feat(kosync): Implement On-the-Fly Deterministic Hashing for KOReader Sync (#1606) * fix(archive): unify CBZ generation to produce deterministic archives Previously, CBZ files generated on-the-fly (`FolderProvider`) had a different hash than those created directly (`ArchiveProvider`), even with identical content. This inconsistency was caused by using two different ZIP libraries (`java.util.zip` vs. `org.apache.commons.compress`) and not normalizing file metadata. This inconsistent hashing breaks binary-based synchronization with external services like KOReader Sync Server, as the same chapter could be identified as a different file on each generation. This change ensures CBZ generation is fully deterministic by: - Unifying both providers to use `org.apache.commons.compress`. - Setting a fixed epoch timestamp (`time = 0L`) for all ZIP entries. - Explicitly setting the compression method and level to `DEFLATED` with default compression. This guarantees that a CBZ file for a given chapter will always have the same hash, regardless of how it's generated, resolving synchronization issues. * feat(kosync): lazily generate and cache CBZ hashes for sync Previously, KOReader progress sync in binary mode was limited to chapters explicitly downloaded as CBZ files. Chapters stored as folders lacked a hash, preventing them from being synced. With the recent move to deterministic CBZ generation, it's now possible to create a consistent hash for any downloaded chapter on-the-fly. This commit enhances the `getOrGenerateChapterHash` function to act as a central point for hash management. If a hash is requested for a downloaded chapter that doesn't have one cached in the database: 1. It generates the CBZ archive in-memory from the downloaded folder or existing CBZ using `ChapterDownloadHelper.getAsArchiveStream()`. 2. It calculates the deterministic hash of the generated archive content. 3. It saves this hash to the `koreader_hash` column in the `Chapter` table for future use. The cached hash is cleared when the chapter download is deleted, ensuring hashes are only tracked for available content. This change transparently extends Koreader Sync compatibility to all downloaded chapters, regardless of their storage format, without requiring users to pre-convert their library to CBZ. * fix: rename getAsArchiveStream to getArchiveStreamWithSize --- .../fileProvider/impl/ArchiveProvider.kt | 4 ++ .../fileProvider/impl/FolderProvider.kt | 17 ++++++--- .../manga/impl/sync/KoreaderSyncService.kt | 38 +++++++++---------- 3 files changed, 33 insertions(+), 26 deletions(-) diff --git a/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/download/fileProvider/impl/ArchiveProvider.kt b/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/download/fileProvider/impl/ArchiveProvider.kt index 9ea6e8b2..99e62654 100644 --- a/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/download/fileProvider/impl/ArchiveProvider.kt +++ b/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/download/fileProvider/impl/ArchiveProvider.kt @@ -20,6 +20,7 @@ import suwayomi.tachidesk.server.ApplicationDirs import uy.kohesive.injekt.injectLazy import java.io.File import java.io.InputStream +import java.util.zip.Deflater private val applicationDirs: ApplicationDirs by injectLazy() @@ -61,9 +62,12 @@ class ArchiveProvider( } ZipArchiveOutputStream(outputFile.outputStream()).use { zipOut -> + zipOut.setMethod(ZipArchiveOutputStream.DEFLATED) + zipOut.setLevel(Deflater.DEFAULT_COMPRESSION) if (chapterCacheFolder.isDirectory) { chapterCacheFolder.listFiles()?.sortedBy { it.name }?.forEach { val entry = ZipArchiveEntry(it.name) + entry.time = 0L try { zipOut.putArchiveEntry(entry) it.inputStream().use { inputStream -> diff --git a/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/download/fileProvider/impl/FolderProvider.kt b/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/download/fileProvider/impl/FolderProvider.kt index f07f52f9..9d457f01 100644 --- a/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/download/fileProvider/impl/FolderProvider.kt +++ b/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/download/fileProvider/impl/FolderProvider.kt @@ -1,5 +1,7 @@ package suwayomi.tachidesk.manga.impl.download.fileProvider.impl +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry +import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream import org.jetbrains.exposed.sql.transactions.transaction import org.jetbrains.exposed.sql.update import suwayomi.tachidesk.manga.impl.download.fileProvider.ChaptersFilesProvider @@ -16,8 +18,7 @@ import java.io.ByteArrayOutputStream import java.io.File import java.io.FileInputStream import java.io.InputStream -import java.util.zip.ZipEntry -import java.util.zip.ZipOutputStream +import java.util.zip.Deflater private val applicationDirs: ApplicationDirs by injectLazy() @@ -83,17 +84,21 @@ class FolderProvider( } val byteArrayOutputStream = ByteArrayOutputStream() - ZipOutputStream(BufferedOutputStream(byteArrayOutputStream)).use { zipOutputStream -> + ZipArchiveOutputStream(BufferedOutputStream(byteArrayOutputStream)).use { zipOutputStream -> + zipOutputStream.setMethod(ZipArchiveOutputStream.DEFLATED) + zipOutputStream.setLevel(Deflater.DEFAULT_COMPRESSION) + chapterDir .listFiles() ?.filter { it.isFile } ?.sortedBy { it.name } ?.forEach { imageFile -> FileInputStream(imageFile).use { fileInputStream -> - val zipEntry = ZipEntry(imageFile.name) - zipOutputStream.putNextEntry(zipEntry) + val zipEntry = ZipArchiveEntry(imageFile.name) + zipEntry.time = 0L + zipOutputStream.putArchiveEntry(zipEntry) fileInputStream.copyTo(zipOutputStream) - zipOutputStream.closeEntry() + zipOutputStream.closeArchiveEntry() } } } diff --git a/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/sync/KoreaderSyncService.kt b/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/sync/KoreaderSyncService.kt index e9b59913..80658869 100644 --- a/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/sync/KoreaderSyncService.kt +++ b/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/sync/KoreaderSyncService.kt @@ -17,13 +17,11 @@ import org.jetbrains.exposed.sql.update import suwayomi.tachidesk.graphql.types.KoSyncStatusPayload import suwayomi.tachidesk.graphql.types.KoreaderSyncChecksumMethod import suwayomi.tachidesk.graphql.types.KoreaderSyncStrategy -import suwayomi.tachidesk.manga.impl.util.KoreaderHelper -import suwayomi.tachidesk.manga.impl.util.getChapterCbzPath +import suwayomi.tachidesk.manga.impl.ChapterDownloadHelper import suwayomi.tachidesk.manga.model.table.ChapterTable import suwayomi.tachidesk.manga.model.table.MangaTable import suwayomi.tachidesk.server.serverConfig import uy.kohesive.injekt.injectLazy -import java.io.File import java.util.UUID import kotlin.math.abs @@ -102,35 +100,35 @@ object KoreaderSyncService { private fun getOrGenerateChapterHash(chapterId: Int): String? { return transaction { - val existingHash = + val chapterRow = ChapterTable - .select(ChapterTable.koreaderHash) + .select(ChapterTable.koreaderHash, ChapterTable.manga, ChapterTable.isDownloaded) .where { ChapterTable.id eq chapterId } - .firstOrNull() - ?.get(ChapterTable.koreaderHash) + .firstOrNull() ?: return@transaction null + val existingHash = chapterRow[ChapterTable.koreaderHash] if (!existingHash.isNullOrBlank()) { return@transaction existingHash } + val mangaId = chapterRow[ChapterTable.manga].value val checksumMethod = serverConfig.koreaderSyncChecksumMethod.value + val newHash = when (checksumMethod) { KoreaderSyncChecksumMethod.BINARY -> { - logger.info { "[KOSYNC HASH] No hash for chapterId=$chapterId. Generating from CBZ content." } - val mangaId = - ChapterTable - .select(ChapterTable.manga) - .where { ChapterTable.id eq chapterId } - .firstOrNull() - ?.get(ChapterTable.manga) - ?.value ?: return@transaction null - val cbzFile = File(getChapterCbzPath(mangaId, chapterId)) - if (!cbzFile.exists()) { - logger.info { "[KOSYNC HASH] Could not generate hash for chapterId=$chapterId. CBZ not found." } - return@transaction null + logger.info { "[KOSYNC HASH] No hash for chapterId=$chapterId. Generating from downloaded content." } + try { + // This generates a deterministic CBZ stream from either a folder or an existing CBZ file. + // If it fails, it means the chapter is not available for hashing. + val (stream, _) = ChapterDownloadHelper.getArchiveStreamWithSize(mangaId, chapterId) + stream.use { + Hash.md5(it.readBytes()) + } + } catch (e: Exception) { + logger.warn(e) { "[KOSYNC HASH] Failed to generate archive stream for chapterId=$chapterId." } + null } - KoreaderHelper.hashContents(cbzFile) } KoreaderSyncChecksumMethod.FILENAME -> { logger.info { "[KOSYNC HASH] No hash for chapterId=$chapterId. Generating from filename." }