diff options
author | Wolfgang Müller | 2025-03-26 17:29:22 +0100 |
---|---|---|
committer | Wolfgang Müller | 2025-03-26 17:29:22 +0100 |
commit | dd1ef483ef90f35218f5a4a3ea37a624b38ca8da (patch) | |
tree | cad45146f8a34b7afbff2ecfe4f0bf17e9c7fe04 | |
parent | 89da8855282e03e4b8e25146fa041aabf49c82cb (diff) | |
download | hircine-dd1ef483ef90f35218f5a4a3ea37a624b38ca8da.tar.gz |
backend: Handle corrupt zip files
Corrupt zip files would already make hircine throw an error, but
depending on the exact problem it would not report which file (or even
which entry in a zip file) is affected. Use ZipFile.testzip() to catch
common problems and make sure to re-raise any exception within as a
BadZipFile exception. This makes sure to also report decompression
problems that are raised as a zlib.error exception, for example.
-rw-r--r-- | src/hircine/scanner.py | 7 | ||||
-rw-r--r-- | tests/scanner/data/bad/bad_compression.zip | bin | 0 -> 28046 bytes | |||
-rw-r--r-- | tests/scanner/data/bad/bad_entry.zip | bin | 0 -> 126 bytes | |||
-rw-r--r-- | tests/scanner/test_scanner.py | 18 |
4 files changed, 23 insertions, 2 deletions
diff --git a/src/hircine/scanner.py b/src/hircine/scanner.py index 29ae04f..6e3fafb 100644 --- a/src/hircine/scanner.py +++ b/src/hircine/scanner.py @@ -8,7 +8,7 @@ from datetime import datetime, timezone from enum import Enum from hashlib import file_digest from typing import NamedTuple -from zipfile import ZipFile, is_zipfile +from zipfile import BadZipFile, ZipFile, is_zipfile from blake3 import blake3 from natsort import natsorted, ns @@ -286,6 +286,11 @@ class Scanner: hash = blake3() with ZipFile(path, mode="r") as z: + try: + z.testzip() + except Exception as e: + raise BadZipFile(f"Corrupt zip file {path}") from e + input = [(path, info.filename) for info in z.infolist()] loop = asyncio.get_event_loop() diff --git a/tests/scanner/data/bad/bad_compression.zip b/tests/scanner/data/bad/bad_compression.zip Binary files differnew file mode 100644 index 0000000..4dbbc1f --- /dev/null +++ b/tests/scanner/data/bad/bad_compression.zip diff --git a/tests/scanner/data/bad/bad_entry.zip b/tests/scanner/data/bad/bad_entry.zip Binary files differnew file mode 100644 index 0000000..0bf6e13 --- /dev/null +++ b/tests/scanner/data/bad/bad_entry.zip diff --git a/tests/scanner/test_scanner.py b/tests/scanner/test_scanner.py index 6fc6650..141698c 100644 --- a/tests/scanner/test_scanner.py +++ b/tests/scanner/test_scanner.py @@ -3,7 +3,7 @@ import os import shutil from datetime import datetime, timezone from pathlib import Path -from zipfile import ZipFile +from zipfile import BadZipFile, ZipFile import pytest from conftest import DB @@ -309,3 +309,19 @@ async def test_scanner_reprocess(archive, data, scanner, capsys): captured = capsys.readouterr() assert captured.out == "[~] archive.zip\n" + + +@pytest.mark.anyio +async def test_scanner_handles_bad_zip_entry(data, scanner): + Path(data("bad/bad_entry.zip")).rename(data("contents/bad_entry.zip")) + + with pytest.raises(BadZipFile): + await scanner.scan() + + +@pytest.mark.anyio +async def test_scanner_handles_bad_zip_compression(data, scanner): + Path(data("bad/bad_compression.zip")).rename(data("contents/bad_compression.zip")) + + with pytest.raises(BadZipFile): + await scanner.scan() |