summaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorWolfgang Müller2025-03-26 17:29:22 +0100
committerWolfgang Müller2025-03-26 17:29:22 +0100
commitdd1ef483ef90f35218f5a4a3ea37a624b38ca8da (patch)
treecad45146f8a34b7afbff2ecfe4f0bf17e9c7fe04
parent89da8855282e03e4b8e25146fa041aabf49c82cb (diff)
downloadhircine-dd1ef483ef90f35218f5a4a3ea37a624b38ca8da.tar.gz
backend: Handle corrupt zip files
Corrupt zip files would already make hircine throw an error, but depending on the exact problem it would not report which file (or even which entry in a zip file) is affected. Use ZipFile.testzip() to catch common problems and make sure to re-raise any exception within as a BadZipFile exception. This makes sure to also report decompression problems that are raised as a zlib.error exception, for example.
-rw-r--r--src/hircine/scanner.py7
-rw-r--r--tests/scanner/data/bad/bad_compression.zipbin0 -> 28046 bytes
-rw-r--r--tests/scanner/data/bad/bad_entry.zipbin0 -> 126 bytes
-rw-r--r--tests/scanner/test_scanner.py18
4 files changed, 23 insertions, 2 deletions
diff --git a/src/hircine/scanner.py b/src/hircine/scanner.py
index 29ae04f..6e3fafb 100644
--- a/src/hircine/scanner.py
+++ b/src/hircine/scanner.py
@@ -8,7 +8,7 @@ from datetime import datetime, timezone
from enum import Enum
from hashlib import file_digest
from typing import NamedTuple
-from zipfile import ZipFile, is_zipfile
+from zipfile import BadZipFile, ZipFile, is_zipfile
from blake3 import blake3
from natsort import natsorted, ns
@@ -286,6 +286,11 @@ class Scanner:
hash = blake3()
with ZipFile(path, mode="r") as z:
+ try:
+ z.testzip()
+ except Exception as e:
+ raise BadZipFile(f"Corrupt zip file {path}") from e
+
input = [(path, info.filename) for info in z.infolist()]
loop = asyncio.get_event_loop()
diff --git a/tests/scanner/data/bad/bad_compression.zip b/tests/scanner/data/bad/bad_compression.zip
new file mode 100644
index 0000000..4dbbc1f
--- /dev/null
+++ b/tests/scanner/data/bad/bad_compression.zip
Binary files differ
diff --git a/tests/scanner/data/bad/bad_entry.zip b/tests/scanner/data/bad/bad_entry.zip
new file mode 100644
index 0000000..0bf6e13
--- /dev/null
+++ b/tests/scanner/data/bad/bad_entry.zip
Binary files differ
diff --git a/tests/scanner/test_scanner.py b/tests/scanner/test_scanner.py
index 6fc6650..141698c 100644
--- a/tests/scanner/test_scanner.py
+++ b/tests/scanner/test_scanner.py
@@ -3,7 +3,7 @@ import os
import shutil
from datetime import datetime, timezone
from pathlib import Path
-from zipfile import ZipFile
+from zipfile import BadZipFile, ZipFile
import pytest
from conftest import DB
@@ -309,3 +309,19 @@ async def test_scanner_reprocess(archive, data, scanner, capsys):
captured = capsys.readouterr()
assert captured.out == "[~] archive.zip\n"
+
+
+@pytest.mark.anyio
+async def test_scanner_handles_bad_zip_entry(data, scanner):
+ Path(data("bad/bad_entry.zip")).rename(data("contents/bad_entry.zip"))
+
+ with pytest.raises(BadZipFile):
+ await scanner.scan()
+
+
+@pytest.mark.anyio
+async def test_scanner_handles_bad_zip_compression(data, scanner):
+ Path(data("bad/bad_compression.zip")).rename(data("contents/bad_compression.zip"))
+
+ with pytest.raises(BadZipFile):
+ await scanner.scan()