summaryrefslogtreecommitdiffstatshomepage
path: root/tests/scrapers
diff options
context:
space:
mode:
Diffstat (limited to 'tests/scrapers')
-rw-r--r--tests/scrapers/test_scraper.py55
-rw-r--r--tests/scrapers/test_scraper_utils.py28
-rw-r--r--tests/scrapers/test_types.py131
3 files changed, 214 insertions, 0 deletions
diff --git a/tests/scrapers/test_scraper.py b/tests/scrapers/test_scraper.py
new file mode 100644
index 0000000..6f6f29d
--- /dev/null
+++ b/tests/scrapers/test_scraper.py
@@ -0,0 +1,55 @@
+from hircine.scraper import Scraper, ScrapeWarning
+
+
+class MockScraper(Scraper):
+ is_available = True
+
+ def scrape(self):
+ yield lambda: "foo"
+ yield "bar"
+
+
+class WarningScraper(Scraper):
+ is_available = True
+
+ def warn(self, str):
+ raise ScrapeWarning("Invalid input")
+
+ def scrape(self):
+ yield lambda: "foo"
+ yield lambda: self.warn("bar")
+ yield "baz"
+
+
+class ParserlessScraper(Scraper):
+ is_available = True
+
+ def scrape(self):
+ yield "literal"
+
+
+def test_scraper_collects():
+ generator = MockScraper(None).collect()
+
+ assert set(generator) == set(["foo", "bar"])
+
+
+def test_scraper_collects_with_transformer():
+ generator = MockScraper(None).collect([lambda gen, info: map(str.upper, gen)])
+
+ assert set(generator) == set(["FOO", "BAR"])
+
+
+def test_scraper_collects_warnings():
+ scraper = WarningScraper(None)
+ generator = scraper.collect()
+
+ assert set(generator) == set(["foo", "baz"])
+ assert scraper.get_warnings() == ["Invalid input"]
+
+
+def test_scraper_collects_literal():
+ scraper = ParserlessScraper(None)
+ generator = scraper.collect()
+
+ assert set(generator) == set(["literal"])
diff --git a/tests/scrapers/test_scraper_utils.py b/tests/scrapers/test_scraper_utils.py
new file mode 100644
index 0000000..193cf2a
--- /dev/null
+++ b/tests/scrapers/test_scraper_utils.py
@@ -0,0 +1,28 @@
+from hircine.scraper.utils import parse_dict
+
+
+def test_parse_dict():
+ dict = {
+ "scalar": "foo",
+ "list": ["bar", "baz"],
+ "dict": {"nested_scalar": "qux", "nested_list": ["plugh", "xyzzy"]},
+ }
+
+ def id(type):
+ return lambda item: f"{type}_{item}"
+
+ parsers = {
+ "scalar": id("scalar"),
+ "list": id("list"),
+ "dict": {"nested_scalar": id("scalar"), "nested_list": id("list")},
+ "missing": id("missing"),
+ }
+
+ assert [f() for f in parse_dict(parsers, dict)] == [
+ "scalar_foo",
+ "list_bar",
+ "list_baz",
+ "scalar_qux",
+ "list_plugh",
+ "list_xyzzy",
+ ]
diff --git a/tests/scrapers/test_types.py b/tests/scrapers/test_types.py
new file mode 100644
index 0000000..ed937e7
--- /dev/null
+++ b/tests/scrapers/test_types.py
@@ -0,0 +1,131 @@
+from datetime import date
+
+import pytest
+from hircine.api.types import ScrapedComic
+from hircine.scraper import ScrapeWarning
+from hircine.scraper.types import (
+ Artist,
+ Category,
+ Character,
+ Circle,
+ Date,
+ Language,
+ OriginalTitle,
+ Rating,
+ Tag,
+ Title,
+ World,
+)
+
+
+@pytest.mark.parametrize(
+ "input,options,want",
+ [
+ ("foo", {}, Tag(namespace="none", tag="foo")),
+ ("foo:bar", {}, Tag(namespace="foo", tag="bar")),
+ ("foo:bar:baz", {}, Tag(namespace="foo", tag="bar:baz")),
+ ("foo/bar", {"delimiter": "/"}, Tag(namespace="foo", tag="bar")),
+ ],
+ ids=[
+ "tag only",
+ "tag and namespace",
+ "tag with delimiter",
+ "custom delimiter",
+ ],
+)
+def test_tag_from_string(input, options, want):
+ assert Tag.from_string(input, **options) == want
+
+
+@pytest.mark.parametrize(
+ "input,want",
+ [
+ ("1998-02-07", Date(value=date(1998, 2, 7))),
+ ("2018-07-18T19:15", Date(value=date(2018, 7, 18))),
+ (
+ "2003-12-30T10:37Z",
+ Date(value=date(2003, 12, 30)),
+ ),
+ ],
+)
+def test_date_from_iso(input, want):
+ assert Date.from_iso(input) == want
+
+
+@pytest.mark.parametrize(
+ "input",
+ [
+ ("text"),
+ ("1997 02 07"),
+ ("1997/02/07"),
+ ],
+)
+def test_date_from_iso_fails(input):
+ with pytest.raises(ScrapeWarning, match="Could not parse date:"):
+ Date.from_iso(input)
+
+
+@pytest.mark.parametrize(
+ "input,want",
+ [
+ ("886806000", Date(value=date(1998, 2, 7))),
+ (886806000, Date(value=date(1998, 2, 7))),
+ ],
+)
+def test_date_from_timestamp(input, want):
+ assert Date.from_timestamp(input) == want
+
+
+@pytest.mark.parametrize(
+ "input",
+ [
+ ("text"),
+ ],
+)
+def test_date_from_timestamp_fails(input):
+ with pytest.raises(ScrapeWarning, match="Could not parse date:"):
+ Date.from_timestamp(input)
+
+
+@pytest.mark.parametrize(
+ "item,attr,empty",
+ [
+ (Title(""), "title", None),
+ (OriginalTitle(""), "original_title", None),
+ (Language(None), "language", None),
+ (Date(None), "date", None),
+ (Rating(None), "rating", None),
+ (Category(None), "category", None),
+ (Tag("", ""), "tags", []),
+ (Tag(namespace="", tag=""), "tags", []),
+ (Tag(namespace=None, tag=""), "tags", []),
+ (Tag(namespace="foo", tag=""), "tags", []),
+ (Artist(""), "artists", []),
+ (Character(""), "characters", []),
+ (Circle(""), "circles", []),
+ (World(""), "worlds", []),
+ ],
+ ids=[
+ "title",
+ "original title",
+ "language",
+ "date",
+ "rating",
+ "category",
+ "tag (both empty, positional)",
+ "tag (both empty)",
+ "tag (namespace None, tag empty)",
+ "tag (tag empty)",
+ "artist",
+ "character",
+ "circle",
+ "world",
+ ],
+)
+def test_scraped_comic_silently_ignores_empty(item, attr, empty):
+ def gen():
+ yield item
+
+ comic = ScrapedComic.from_generator(gen())
+
+ assert getattr(comic, attr) == empty