From dd1ef483ef90f35218f5a4a3ea37a624b38ca8da Mon Sep 17 00:00:00 2001 From: Wolfgang Müller Date: Wed, 26 Mar 2025 17:29:22 +0100 Subject: backend: Handle corrupt zip files Corrupt zip files would already make hircine throw an error, but depending on the exact problem it would not report which file (or even which entry in a zip file) is affected. Use ZipFile.testzip() to catch common problems and make sure to re-raise any exception within as a BadZipFile exception. This makes sure to also report decompression problems that are raised as a zlib.error exception, for example. --- src/hircine/scanner.py | 7 ++++++- tests/scanner/data/bad/bad_compression.zip | Bin 0 -> 28046 bytes tests/scanner/data/bad/bad_entry.zip | Bin 0 -> 126 bytes tests/scanner/test_scanner.py | 18 +++++++++++++++++- 4 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 tests/scanner/data/bad/bad_compression.zip create mode 100644 tests/scanner/data/bad/bad_entry.zip diff --git a/src/hircine/scanner.py b/src/hircine/scanner.py index 29ae04f..6e3fafb 100644 --- a/src/hircine/scanner.py +++ b/src/hircine/scanner.py @@ -8,7 +8,7 @@ from datetime import datetime, timezone from enum import Enum from hashlib import file_digest from typing import NamedTuple -from zipfile import ZipFile, is_zipfile +from zipfile import BadZipFile, ZipFile, is_zipfile from blake3 import blake3 from natsort import natsorted, ns @@ -286,6 +286,11 @@ class Scanner: hash = blake3() with ZipFile(path, mode="r") as z: + try: + z.testzip() + except Exception as e: + raise BadZipFile(f"Corrupt zip file {path}") from e + input = [(path, info.filename) for info in z.infolist()] loop = asyncio.get_event_loop() diff --git a/tests/scanner/data/bad/bad_compression.zip b/tests/scanner/data/bad/bad_compression.zip new file mode 100644 index 0000000..4dbbc1f Binary files /dev/null and b/tests/scanner/data/bad/bad_compression.zip differ diff --git a/tests/scanner/data/bad/bad_entry.zip b/tests/scanner/data/bad/bad_entry.zip new file mode 100644 index 0000000..0bf6e13 Binary files /dev/null and b/tests/scanner/data/bad/bad_entry.zip differ diff --git a/tests/scanner/test_scanner.py b/tests/scanner/test_scanner.py index 6fc6650..141698c 100644 --- a/tests/scanner/test_scanner.py +++ b/tests/scanner/test_scanner.py @@ -3,7 +3,7 @@ import os import shutil from datetime import datetime, timezone from pathlib import Path -from zipfile import ZipFile +from zipfile import BadZipFile, ZipFile import pytest from conftest import DB @@ -309,3 +309,19 @@ async def test_scanner_reprocess(archive, data, scanner, capsys): captured = capsys.readouterr() assert captured.out == "[~] archive.zip\n" + + +@pytest.mark.anyio +async def test_scanner_handles_bad_zip_entry(data, scanner): + Path(data("bad/bad_entry.zip")).rename(data("contents/bad_entry.zip")) + + with pytest.raises(BadZipFile): + await scanner.scan() + + +@pytest.mark.anyio +async def test_scanner_handles_bad_zip_compression(data, scanner): + Path(data("bad/bad_compression.zip")).rename(data("contents/bad_compression.zip")) + + with pytest.raises(BadZipFile): + await scanner.scan() -- cgit v1.2.3-2-gb3c3