diff options
Diffstat (limited to 'tests/scrapers')
-rw-r--r-- | tests/scrapers/test_scraper.py | 55 | ||||
-rw-r--r-- | tests/scrapers/test_scraper_utils.py | 28 | ||||
-rw-r--r-- | tests/scrapers/test_types.py | 131 |
3 files changed, 214 insertions, 0 deletions
diff --git a/tests/scrapers/test_scraper.py b/tests/scrapers/test_scraper.py new file mode 100644 index 0000000..6f6f29d --- /dev/null +++ b/tests/scrapers/test_scraper.py @@ -0,0 +1,55 @@ +from hircine.scraper import Scraper, ScrapeWarning + + +class MockScraper(Scraper): + is_available = True + + def scrape(self): + yield lambda: "foo" + yield "bar" + + +class WarningScraper(Scraper): + is_available = True + + def warn(self, str): + raise ScrapeWarning("Invalid input") + + def scrape(self): + yield lambda: "foo" + yield lambda: self.warn("bar") + yield "baz" + + +class ParserlessScraper(Scraper): + is_available = True + + def scrape(self): + yield "literal" + + +def test_scraper_collects(): + generator = MockScraper(None).collect() + + assert set(generator) == set(["foo", "bar"]) + + +def test_scraper_collects_with_transformer(): + generator = MockScraper(None).collect([lambda gen, info: map(str.upper, gen)]) + + assert set(generator) == set(["FOO", "BAR"]) + + +def test_scraper_collects_warnings(): + scraper = WarningScraper(None) + generator = scraper.collect() + + assert set(generator) == set(["foo", "baz"]) + assert scraper.get_warnings() == ["Invalid input"] + + +def test_scraper_collects_literal(): + scraper = ParserlessScraper(None) + generator = scraper.collect() + + assert set(generator) == set(["literal"]) diff --git a/tests/scrapers/test_scraper_utils.py b/tests/scrapers/test_scraper_utils.py new file mode 100644 index 0000000..193cf2a --- /dev/null +++ b/tests/scrapers/test_scraper_utils.py @@ -0,0 +1,28 @@ +from hircine.scraper.utils import parse_dict + + +def test_parse_dict(): + dict = { + "scalar": "foo", + "list": ["bar", "baz"], + "dict": {"nested_scalar": "qux", "nested_list": ["plugh", "xyzzy"]}, + } + + def id(type): + return lambda item: f"{type}_{item}" + + parsers = { + "scalar": id("scalar"), + "list": id("list"), + "dict": {"nested_scalar": id("scalar"), "nested_list": id("list")}, + "missing": id("missing"), + } + + assert [f() for f in parse_dict(parsers, dict)] == [ + "scalar_foo", + "list_bar", + "list_baz", + "scalar_qux", + "list_plugh", + "list_xyzzy", + ] diff --git a/tests/scrapers/test_types.py b/tests/scrapers/test_types.py new file mode 100644 index 0000000..ed937e7 --- /dev/null +++ b/tests/scrapers/test_types.py @@ -0,0 +1,131 @@ +from datetime import date + +import pytest +from hircine.api.types import ScrapedComic +from hircine.scraper import ScrapeWarning +from hircine.scraper.types import ( + Artist, + Category, + Character, + Circle, + Date, + Language, + OriginalTitle, + Rating, + Tag, + Title, + World, +) + + +@pytest.mark.parametrize( + "input,options,want", + [ + ("foo", {}, Tag(namespace="none", tag="foo")), + ("foo:bar", {}, Tag(namespace="foo", tag="bar")), + ("foo:bar:baz", {}, Tag(namespace="foo", tag="bar:baz")), + ("foo/bar", {"delimiter": "/"}, Tag(namespace="foo", tag="bar")), + ], + ids=[ + "tag only", + "tag and namespace", + "tag with delimiter", + "custom delimiter", + ], +) +def test_tag_from_string(input, options, want): + assert Tag.from_string(input, **options) == want + + +@pytest.mark.parametrize( + "input,want", + [ + ("1998-02-07", Date(value=date(1998, 2, 7))), + ("2018-07-18T19:15", Date(value=date(2018, 7, 18))), + ( + "2003-12-30T10:37Z", + Date(value=date(2003, 12, 30)), + ), + ], +) +def test_date_from_iso(input, want): + assert Date.from_iso(input) == want + + +@pytest.mark.parametrize( + "input", + [ + ("text"), + ("1997 02 07"), + ("1997/02/07"), + ], +) +def test_date_from_iso_fails(input): + with pytest.raises(ScrapeWarning, match="Could not parse date:"): + Date.from_iso(input) + + +@pytest.mark.parametrize( + "input,want", + [ + ("886806000", Date(value=date(1998, 2, 7))), + (886806000, Date(value=date(1998, 2, 7))), + ], +) +def test_date_from_timestamp(input, want): + assert Date.from_timestamp(input) == want + + +@pytest.mark.parametrize( + "input", + [ + ("text"), + ], +) +def test_date_from_timestamp_fails(input): + with pytest.raises(ScrapeWarning, match="Could not parse date:"): + Date.from_timestamp(input) + + +@pytest.mark.parametrize( + "item,attr,empty", + [ + (Title(""), "title", None), + (OriginalTitle(""), "original_title", None), + (Language(None), "language", None), + (Date(None), "date", None), + (Rating(None), "rating", None), + (Category(None), "category", None), + (Tag("", ""), "tags", []), + (Tag(namespace="", tag=""), "tags", []), + (Tag(namespace=None, tag=""), "tags", []), + (Tag(namespace="foo", tag=""), "tags", []), + (Artist(""), "artists", []), + (Character(""), "characters", []), + (Circle(""), "circles", []), + (World(""), "worlds", []), + ], + ids=[ + "title", + "original title", + "language", + "date", + "rating", + "category", + "tag (both empty, positional)", + "tag (both empty)", + "tag (namespace None, tag empty)", + "tag (tag empty)", + "artist", + "character", + "circle", + "world", + ], +) +def test_scraped_comic_silently_ignores_empty(item, attr, empty): + def gen(): + yield item + + comic = ScrapedComic.from_generator(gen()) + + assert getattr(comic, attr) == empty |