summaryrefslogtreecommitdiffstatshomepage
path: root/tests/scrapers/test_scraper_utils.py
blob: 4b02aadddabb0722048b2aa4180af313f1c18cee (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import json
import os
from zipfile import ZipFile

import pytest

from hircine.scraper.utils import open_archive_file, parse_dict


def test_parse_dict():
    dict = {
        "scalar": "foo",
        "list": ["bar", "baz"],
        "dict": {"nested_scalar": "qux", "nested_list": ["plugh", "xyzzy"]},
    }

    def id(type):
        return lambda item: f"{type}_{item}"

    parsers = {
        "scalar": id("scalar"),
        "list": id("list"),
        "dict": {"nested_scalar": id("scalar"), "nested_list": id("list")},
        "missing": id("missing"),
    }

    assert [f() for f in parse_dict(parsers, dict)] == [
        "scalar_foo",
        "list_bar",
        "list_baz",
        "scalar_qux",
        "list_plugh",
        "list_xyzzy",
    ]


@pytest.mark.parametrize(
    "check_sidecar",
    [
        (False),
        (True),
    ],
    ids=[
        "zip",
        "sidecar",
    ],
)
def test_open_archive_file(gen_archive, tmpdir, check_sidecar):
    archive = next(gen_archive)
    archive.path = os.path.join(tmpdir, "archive.zip")

    zip_data = {"zip": "data"}
    sidecar_data = {"sidecar": "data"}

    with open(f"{archive.path}.info.json", "x") as handle:
        json.dump(sidecar_data, handle)

    with ZipFile(archive.path, "x") as ziph:
        ziph.writestr("info.json", json.dumps(zip_data))

    with open_archive_file(archive, "info.json", check_sidecar=check_sidecar) as file:
        data = json.load(file)

    if check_sidecar:
        assert data == sidecar_data
    else:
        assert data == zip_data