2 files changed, 646 insertions, 0 deletions
diff --git a/tests/plugins/scrapers/test_gallery_dl.py b/tests/plugins/scrapers/test_gallery_dl.py
new file mode 100644
index 0000000..b4e7a4a
--- /dev/null
+++ b/tests/plugins/scrapers/test_gallery_dl.py
@@ -0,0 +1,52 @@
+import json
+import os
+from zipfile import ZipFile
+
+import pytest
+
+import hircine.plugins.scrapers.gallery_dl
+from hircine.plugins.scrapers.gallery_dl import GalleryDLScraper
+from hircine.scraper.types import Title
+
+
+class MockHandler:
+    source = "mock"
+
+    def scrape(self, data):
+        yield Title(data["title"])
+
+
+@pytest.fixture
+def archive_file(tmpdir):
+    file = os.path.join(tmpdir, "archive.zip")
+
+    with ZipFile(file, "x") as ziph:
+        ziph.writestr("info.json", json.dumps({"category": "mock", "title": "test"}))
+
+    yield file
+
+
+def test_does_scrape(monkeypatch, archive_file, gen_comic):
+    comic = next(gen_comic)
+    comic.archive.path = archive_file
+
+    monkeypatch.setattr(
+        hircine.plugins.scrapers.gallery_dl, "HANDLERS", {"mock": MockHandler}
+    )
+
+    scraper = GalleryDLScraper(comic)
+
+    assert scraper.is_available
+    assert scraper.source == MockHandler.source
+    assert scraper.name == f"gallery-dl info.json ({MockHandler.source})"
+    assert set(scraper.collect()) == set([Title(value="test")])
+
+
+def test_does_not_scrape_on_error(tmpdir, monkeypatch, gen_comic):
+    comic = next(gen_comic)
+    comic.archive.path = os.path.join(tmpdir, "nonexistent.zip")
+
+    scraper = GalleryDLScraper(comic)
+
+    assert scraper.data == {}
+    assert not scraper.is_available
diff --git a/tests/plugins/scrapers/test_handlers.py b/tests/plugins/scrapers/test_handlers.py
new file mode 100644
index 0000000..e9f5d0e
--- /dev/null
+++ b/tests/plugins/scrapers/test_handlers.py
@@ -0,0 +1,594 @@
+import json
+from datetime import date
+
+import pytest
+
+import hircine.enums as enums
+from hircine.plugins.scrapers.handlers.dynastyscans import DynastyScansHandler
+from hircine.plugins.scrapers.handlers.e621 import E621Handler
+from hircine.plugins.scrapers.handlers.exhentai import (
+    ExHentaiHandler,
+)
+from hircine.plugins.scrapers.handlers.exhentai import (
+    sanitize as exhentai_sanitize,
+)
+from hircine.plugins.scrapers.handlers.mangadex import MangadexHandler
+from hircine.scraper import Scraper
+from hircine.scraper.types import (
+    URL,
+    Artist,
+    Category,
+    Censorship,
+    Character,
+    Circle,
+    Date,
+    Direction,
+    Language,
+    OriginalTitle,
+    Rating,
+    Tag,
+    Title,
+    World,
+)
+
+
+class Scraper(Scraper):
+    def __init__(self, handler, json):
+        self.handler = handler
+        self.json = json
+        super().__init__(None)
+
+    def scrape(self):
+        yield from self.handler.scrape(json.loads(self.json))
+
+
+def test_dynastyscans():
+    scraper = Scraper(
+        DynastyScansHandler(),
+        """
+        {
+            "manga": "Hoshiiro GirlDrop Comic Anthology",
+            "chapter": 1,
+            "chapter_minor": "",
+            "title": "Hop, Step, Drop!",
+            "author": "Fujisawa Kamiya",
+            "group": "Cyan Steam (Stan Miller)",
+            "date": "2018-02-05 00:00:00",
+            "lang": "en",
+            "language": "English",
+            "count": 15,
+            "category": "dynastyscans",
+            "subcategory": "manga"
+        }
+        """,
+    )
+
+    assert set(scraper.collect()) == set(
+        [
+            Artist(name="Fujisawa Kamiya"),
+            Circle(name="Cyan Steam (Stan Miller)"),
+            Date(value=date(2018, 2, 5)),
+            Language(value=enums.Language.EN),
+            Title(value="Hoshiiro GirlDrop Comic Anthology Ch. 1: Hop, Step, Drop!"),
+        ]
+    )
+
+
+def test_mangadex():
+    scraper = Scraper(
+        MangadexHandler(),
+        """
+        {
+            "manga": "Shimeji Simulation",
+            "manga_id": "28b5d037-175d-4119-96f8-e860e408ebe9",
+            "title": "Danchi",
+            "volume": 1,
+            "chapter": 8,
+            "chapter_minor": "",
+            "chapter_id": "2a115ccb-de52-4b84-9166-cebd152d9396",
+            "date": "2019-09-22 04:19:15",
+            "lang": "en",
+            "language": "English",
+            "count": 12,
+            "artist": [
+                "Tsukumizu"
+            ],
+            "author": [
+                "Tsukumizu"
+            ],
+            "group": [
+                "Orchesc/a/ns"
+            ],
+            "status": "completed",
+            "tags": [
+                "Sci-Fi",
+                "Comedy",
+                "Girls' Love",
+                "4-Koma",
+                "Philosophical",
+                "School Life",
+                "Slice of Life"
+            ],
+            "category": "mangadex",
+            "subcategory": "chapter"
+        }
+        """,
+    )
+
+    assert set(scraper.collect()) == set(
+        [
+            Artist(name="Tsukumizu"),
+            Circle(name="Orchesc/a/ns"),
+            Date(value=date(2019, 9, 22)),
+            Language(value=enums.Language.EN),
+            Tag(namespace="none", tag="4-Koma"),
+            Tag(namespace="none", tag="Comedy"),
+            Tag(namespace="none", tag="Girls' Love"),
+            Tag(namespace="none", tag="Philosophical"),
+            Tag(namespace="none", tag="School Life"),
+            Tag(namespace="none", tag="Sci-Fi"),
+            Tag(namespace="none", tag="Slice of Life"),
+            Title(value="Shimeji Simulation Vol. 1, Ch. 8: Danchi"),
+            URL("https://mangadex.org/chapter/2a115ccb-de52-4b84-9166-cebd152d9396"),
+        ]
+    )
+
+
+@pytest.mark.parametrize(
+    "data, title",
+    [
+        ({"volume": 1, "chapter": 8}, "Manga Vol. 1, Ch. 8: Title"),
+        ({"volume": 0, "chapter": 1}, "Manga Ch. 1: Title"),
+        ({"volume": 0, "chapter": 0}, "Manga: Title"),
+    ],
+    ids=[
+        "volume and chapter",
+        "chapter only",
+        "none",
+    ],
+)
+def test_mangadex_handles_volume_and_chapter(data, title):
+    common = {"manga": "Manga", "title": "Title"}
+    scraper = Scraper(MangadexHandler(), json.dumps(common | data))
+
+    assert list(scraper.collect()) == [Title(value=title)]
+
+
+def test_e621_pool():
+    scraper = Scraper(
+        E621Handler(),
+        """
+        {
+            "id": 2968472,
+            "created_at": "2021-10-10T04:13:53.286-04:00",
+            "updated_at": "2024-11-02T08:58:06.724-04:00",
+            "file": {
+                "width": 800,
+                "height": 800,
+                "ext": "jpg",
+                "size": 530984,
+                "md5": "1ec7e397bb22c1454ab1986fd3f3edc5",
+                "url": "https://static1.e621.net/data/1e/c7/1ec7e397bb22c1454ab1986fd3f3edc5.jpg"
+            },
+            "preview": {
+                "width": 150,
+                "height": 150,
+                "url": "https://static1.e621.net/data/preview/1e/c7/1ec7e397bb22c1454ab1986fd3f3edc5.jpg"
+            },
+            "sample": {
+                "has": false,
+                "height": 800,
+                "width": 800,
+                "url": "https://static1.e621.net/data/1e/c7/1ec7e397bb22c1454ab1986fd3f3edc5.jpg",
+                "alternates": {}
+            },
+            "score": {
+                "up": 202,
+                "down": -1,
+                "total": 201
+            },
+            "tags": {
+                "general": [
+                    "beak"
+                ],
+                "artist": [
+                    "falseknees"
+                ],
+                "copyright": [],
+                "character": [],
+                "species": [
+                    "bird"
+                ],
+                "invalid": [],
+                "meta": [
+                    "comic",
+                    "english_text"
+                ],
+                "lore": [
+                    "parent_(lore)",
+                    "parent_and_child_(lore)"
+                ]
+            },
+            "locked_tags": [],
+            "change_seq": 60808337,
+            "flags": {
+                "pending": false,
+                "flagged": false,
+                "note_locked": false,
+                "status_locked": false,
+                "rating_locked": false,
+                "deleted": false
+            },
+            "rating": "s",
+            "fav_count": 194,
+            "sources": [
+                "https://twitter.com/FalseKnees/status/1324869853627478022"
+            ],
+            "pools": [
+                25779
+            ],
+            "relationships": {
+                "parent_id": null,
+                "has_children": false,
+                "has_active_children": false,
+                "children": []
+            },
+            "approver_id": 171673,
+            "uploader_id": 178921,
+            "description": "",
+            "comment_count": 1,
+            "is_favorited": false,
+            "has_notes": false,
+            "duration": null,
+            "num": 1,
+            "filename": "1ec7e397bb22c1454ab1986fd3f3edc5",
+            "extension": "jpg",
+            "date": "2021-10-10 08:13:53",
+            "pool": {
+                "id": 25779,
+                "name": "Kids say the darnedest shit - falseknees",
+                "created_at": "2021-10-10T04:17:07.006-04:00",
+                "updated_at": "2021-10-10T04:17:07.006-04:00",
+                "creator_id": 178921,
+                "description": "The terror of every parent.",
+                "is_active": true,
+                "category": "series",
+                "creator_name": "OneMoreAnonymous",
+                "post_count": 4
+            },
+            "category": "e621",
+            "subcategory": "pool"
+        }
+        """,
+    )
+
+    assert set(scraper.collect()) == set(
+        [
+            Artist(name="falseknees"),
+            Category(value=enums.Category.COMIC),
+            Censorship(value=enums.Censorship.NONE),
+            Date(value=date(2021, 10, 10)),
+            Language(value=enums.Language.EN),
+            Rating(value=enums.Rating.SAFE),
+            Tag(namespace="none", tag="beak"),
+            Tag(namespace="none", tag="bird"),
+            Title(value="Kids say the darnedest shit - falseknees"),
+            URL("https://e621.net/pools/25779"),
+        ]
+    )
+
+
+@pytest.mark.parametrize(
+    "data, censorship",
+    [
+        ({"tags": {"meta": ["censor_bar"]}}, enums.Censorship.BAR),
+        ({"tags": {"meta": ["mosaic_censorship"]}}, enums.Censorship.MOSAIC),
+        ({"tags": {"meta": ["uncensored"]}}, enums.Censorship.NONE),
+        ({"tags": {"meta": []}}, enums.Censorship.NONE),
+    ],
+    ids=[
+        "bars",
+        "mosaic",
+        "uncensored",
+        "uncensored (implied)",
+    ],
+)
+def test_e621_handles_censorship(data, censorship):
+    common = {"subcategory": "pool"}
+    scraper = Scraper(E621Handler(), json.dumps(common | data))
+
+    assert set(scraper.collect()) == set([Censorship(value=censorship)])
+
+
+def test_exhentai_explicit():
+    scraper = Scraper(
+        ExHentaiHandler(),
+        """
+        {
+            "gid": 2771624,
+            "token": "43108ee23b",
+            "thumb": "https://s.exhentai.org/t/12/80/1280a064a2ab3d70b9feb56bd0c55dbfc3ab6a39-309830-950-1351-jpg_250.jpg",
+            "title": "[NAGABE] Smell ch.01 - ch.06",
+            "title_jpn": "SMELL",
+            "eh_category": "Doujinshi",
+            "uploader": "randaldog",
+            "date": "2023-12-19 23:50:00",
+            "parent": "https://exhentai.org/g/2736803/b191bfed72/",
+            "expunged": false,
+            "language": "English",
+            "filesize": 74469868,
+            "filecount": "170",
+            "favorites": "751",
+            "rating": "4.83",
+            "torrentcount": "0",
+            "lang": "en",
+            "tags": [
+                "language:english",
+                "language:translated",
+                "parody:original",
+                "artist:nagabe",
+                "male:dog boy",
+                "male:furry",
+                "male:males only",
+                "male:smell",
+                "male:yaoi",
+                "other:story arc"
+            ],
+            "category": "exhentai",
+            "subcategory": "gallery"
+        }
+        """,
+    )
+
+    assert set(scraper.collect()) == set(
+        [
+            Artist(name="nagabe"),
+            Category(value=enums.Category.DOUJINSHI),
+            Censorship(value=enums.Censorship.BAR),
+            Date(value=date(2023, 12, 19)),
+            Direction(value=enums.Direction.RIGHT_TO_LEFT),
+            Language(value=enums.Language.EN),
+            OriginalTitle(value="SMELL"),
+            Rating(value=enums.Rating.EXPLICIT),
+            Tag(namespace="male", tag="dog boy"),
+            Tag(namespace="male", tag="furry"),
+            Tag(namespace="male", tag="males only"),
+            Tag(namespace="male", tag="smell"),
+            Tag(namespace="male", tag="yaoi"),
+            Tag(namespace="other", tag="story arc"),
+            Title(value="Smell ch.01 - ch.06"),
+            URL("https://exhentai.org/g/2771624/43108ee23b"),
+            World(name="original"),
+        ]
+    )
+
+
+def test_exhentai_non_h():
+    scraper = Scraper(
+        ExHentaiHandler(),
+        """
+        {
+            "gid": 1025913,
+            "token": "fdaabef1a2",
+            "thumb": "https://s.exhentai.org/t/51/17/5117cde63cc14436c5ad7f2dd06abb52c86aff65-23642001-2866-4047-png_250.jpg",
+            "title": "(C91) [Animachine (Shimahara)] Iya na Kao Sarenagara Opantsu Misete Moraitai Manga | A manga about girl showing you her panties while making a disgusted face [English] [葛の寺]",
+            "title_jpn": "(C91) [アニマルマシーン (40原)] 嫌な顔されながらおパンツ見せてもらいたい漫画 [英訳]",
+            "eh_category": "Non-H",
+            "uploader": "葛の寺",
+            "date": "2017-02-04 04:25:00",
+            "parent": "https://exhentai.org/g/1025875/cfe6adccb8/",
+            "expunged": false,
+            "language": "English",
+            "filesize": 0,
+            "filecount": "23",
+            "favorites": "1088",
+            "rating": "4.74",
+            "torrentcount": "1",
+            "lang": "en",
+            "tags": [
+                "language:english",
+                "language:translated",
+                "parody:iya na kao sare nagara opantsu misete moraitai",
+                "group:animachine",
+                "artist:shimahara",
+                "female:femdom",
+                "female:schoolgirl uniform",
+                "other:full color"
+            ],
+            "category": "exhentai",
+            "subcategory": "gallery"
+        }
+        """,  # noqa: E501
+    )
+
+    assert set(scraper.collect()) == set(
+        [
+            Artist(name="shimahara"),
+            Date(value=date(2017, 2, 4)),
+            Language(value=enums.Language.EN),
+            OriginalTitle(value="嫌な顔されながらおパンツ見せてもらいたい漫画"),
+            Rating(value=enums.Rating.QUESTIONABLE),
+            World(name="iya na kao sare nagara opantsu misete moraitai"),
+            Circle(name="animachine"),
+            Tag(namespace="female", tag="femdom"),
+            Tag(namespace="female", tag="schoolgirl uniform"),
+            Tag(namespace="other", tag="full color"),
+            Title(
+                value="A manga about girl showing you her panties while making a disgusted face"  # noqa: E501
+            ),
+            URL("https://exhentai.org/g/1025913/fdaabef1a2"),
+        ]
+    )
+
+
+@pytest.mark.parametrize(
+    "text, sanitized",
+    [
+        ("(foo) Title", "Title"),
+        ("[foo] {bar} =baz= Title", "Title"),
+        ("Foreign Title | Localized Title", "Localized Title"),
+    ],
+    ids=[
+        "parens at beginning",
+        "bracket-likes",
+        "split titles",
+    ],
+)
+def test_exhentai_sanitizes(text, sanitized):
+    assert exhentai_sanitize(text, split=True) == sanitized
+
+
+@pytest.mark.parametrize(
+    "data, expect",
+    [
+        (
+            {"category": "doujinshi"},
+            set(
+                [
+                    Category(value=enums.Category.DOUJINSHI),
+                    Censorship(value=enums.Censorship.BAR),
+                    Rating(value=enums.Rating.EXPLICIT),
+                    Direction(value=enums.Direction.RIGHT_TO_LEFT),
+                ]
+            ),
+        ),
+        (
+            {"eh_category": "doujinshi"},
+            set(
+                [
+                    Category(value=enums.Category.DOUJINSHI),
+                    Censorship(value=enums.Censorship.BAR),
+                    Rating(value=enums.Rating.EXPLICIT),
+                    Direction(value=enums.Direction.RIGHT_TO_LEFT),
+                ]
+            ),
+        ),
+        (
+            {"category": "manga"},
+            set(
+                [
+                    Category(value=enums.Category.MANGA),
+                    Censorship(value=enums.Censorship.BAR),
+                    Rating(value=enums.Rating.EXPLICIT),
+                    Direction(value=enums.Direction.RIGHT_TO_LEFT),
+                ]
+            ),
+        ),
+        (
+            {"category": "western"},
+            set(
+                [
+                    Censorship(value=enums.Censorship.NONE),
+                    Rating(value=enums.Rating.EXPLICIT),
+                ]
+            ),
+        ),
+        (
+            {"category": "artist cg"},
+            set(
+                [
+                    Category(value=enums.Category.COMIC),
+                    Censorship(value=enums.Censorship.BAR),
+                    Rating(value=enums.Rating.EXPLICIT),
+                ]
+            ),
+        ),
+        (
+            {"category": "game cg"},
+            set(
+                [
+                    Category(value=enums.Category.GAME_CG),
+                    Censorship(value=enums.Censorship.BAR),
+                    Rating(value=enums.Rating.EXPLICIT),
+                ]
+            ),
+        ),
+        (
+            {"category": "image set"},
+            set(
+                [
+                    Category(value=enums.Category.IMAGE_SET),
+                    Censorship(value=enums.Censorship.BAR),
+                    Rating(value=enums.Rating.EXPLICIT),
+                ]
+            ),
+        ),
+        (
+            {"category": "non-h"},
+            set(
+                [
+                    Rating(value=enums.Rating.QUESTIONABLE),
+                ]
+            ),
+        ),
+        (
+            {"category": "western", "tags": ["other:western non-h"]},
+            set(
+                [
+                    Rating(value=enums.Rating.QUESTIONABLE),
+                ]
+            ),
+        ),
+    ],
+    ids=[
+        "category from category field",
+        "category from eh_category field",
+        "manga category",
+        "western category",
+        "artist cg category",
+        "game cg category",
+        "image set category",
+        "non-h category",
+        "western non-h tag",
+    ],
+)
+def test_exhentai_parses(data, expect):
+    scraper = Scraper(ExHentaiHandler(), json.dumps(data | {"gid": 1, "token": 1}))
+
+    expect.add(URL(value="https://exhentai.org/g/1/1"))
+
+    assert set(scraper.collect()) == expect
+
+
+@pytest.mark.parametrize(
+    "tag, parsed",
+    [
+        ("parody:foo", World(name="foo")),
+        ("artist:foo", Artist(name="foo")),
+        ("character:foo", Character(name="foo")),
+        ("group:foo", Circle(name="foo")),
+        ("other:artbook", Category(value=enums.Category.ARTBOOK)),
+        ("other:non-h imageset", Category(value=enums.Category.IMAGE_SET)),
+        ("other:western imageset", Category(value=enums.Category.IMAGE_SET)),
+        ("other:comic", Category(value=enums.Category.COMIC)),
+        ("other:variant set", Category(value=enums.Category.VARIANT_SET)),
+        ("other:webtoon", Category(value=enums.Category.WEBTOON)),
+        ("other:full censorship", Censorship(value=enums.Censorship.FULL)),
+        ("other:mosaic censorship", Censorship(value=enums.Censorship.MOSAIC)),
+        ("other:uncensored", Censorship(value=enums.Censorship.NONE)),
+        ("generic", Tag(namespace=None, tag="generic")),
+    ],
+    ids=[
+        "parody",
+        "group",
+        "artist",
+        "character",
+        "other:artbook",
+        "other:image set",
+        "other:western image set",
+        "other:comic",
+        "other:variant set",
+        "other:webtoon",
+        "other:full censorship",
+        "other:mosaic censorship",
+        "other:uncensored",
+        "generic",
+    ],
+)
+def test_exhentai_parses_tags(tag, parsed):
+    scraper = Scraper(
+        ExHentaiHandler(), json.dumps({"tags": [tag], "gid": 1, "token": 1})
+    )
+    expect = set([URL(value="https://exhentai.org/g/1/1"), parsed])
+
+    assert set(scraper.collect()) > expect