summaryrefslogtreecommitdiffstatshomepage
path: root/tests/plugins/scrapers
diff options
context:
space:
mode:
authorWolfgang Müller2024-11-14 23:22:58 +0100
committerWolfgang Müller2024-11-14 23:22:58 +0100
commita495162ab6d0bf324c300eca398532ee397cf9a1 (patch)
tree4f415b8770c4c697a3d948da0eb729370aafd595 /tests/plugins/scrapers
parent47d464fbfc1dd4174c4f0ab39268297c14b972a3 (diff)
downloadhircine-a495162ab6d0bf324c300eca398532ee397cf9a1.tar.gz
backend/tests: Add tests for gallery_dl scrapers
Diffstat (limited to 'tests/plugins/scrapers')
-rw-r--r--tests/plugins/scrapers/test_gallery_dl.py52
-rw-r--r--tests/plugins/scrapers/test_handlers.py594
2 files changed, 646 insertions, 0 deletions
diff --git a/tests/plugins/scrapers/test_gallery_dl.py b/tests/plugins/scrapers/test_gallery_dl.py
new file mode 100644
index 0000000..b4e7a4a
--- /dev/null
+++ b/tests/plugins/scrapers/test_gallery_dl.py
@@ -0,0 +1,52 @@
+import json
+import os
+from zipfile import ZipFile
+
+import pytest
+
+import hircine.plugins.scrapers.gallery_dl
+from hircine.plugins.scrapers.gallery_dl import GalleryDLScraper
+from hircine.scraper.types import Title
+
+
+class MockHandler:
+ source = "mock"
+
+ def scrape(self, data):
+ yield Title(data["title"])
+
+
+@pytest.fixture
+def archive_file(tmpdir):
+ file = os.path.join(tmpdir, "archive.zip")
+
+ with ZipFile(file, "x") as ziph:
+ ziph.writestr("info.json", json.dumps({"category": "mock", "title": "test"}))
+
+ yield file
+
+
+def test_does_scrape(monkeypatch, archive_file, gen_comic):
+ comic = next(gen_comic)
+ comic.archive.path = archive_file
+
+ monkeypatch.setattr(
+ hircine.plugins.scrapers.gallery_dl, "HANDLERS", {"mock": MockHandler}
+ )
+
+ scraper = GalleryDLScraper(comic)
+
+ assert scraper.is_available
+ assert scraper.source == MockHandler.source
+ assert scraper.name == f"gallery-dl info.json ({MockHandler.source})"
+ assert set(scraper.collect()) == set([Title(value="test")])
+
+
+def test_does_not_scrape_on_error(tmpdir, monkeypatch, gen_comic):
+ comic = next(gen_comic)
+ comic.archive.path = os.path.join(tmpdir, "nonexistent.zip")
+
+ scraper = GalleryDLScraper(comic)
+
+ assert scraper.data == {}
+ assert not scraper.is_available
diff --git a/tests/plugins/scrapers/test_handlers.py b/tests/plugins/scrapers/test_handlers.py
new file mode 100644
index 0000000..e9f5d0e
--- /dev/null
+++ b/tests/plugins/scrapers/test_handlers.py
@@ -0,0 +1,594 @@
+import json
+from datetime import date
+
+import pytest
+
+import hircine.enums as enums
+from hircine.plugins.scrapers.handlers.dynastyscans import DynastyScansHandler
+from hircine.plugins.scrapers.handlers.e621 import E621Handler
+from hircine.plugins.scrapers.handlers.exhentai import (
+ ExHentaiHandler,
+)
+from hircine.plugins.scrapers.handlers.exhentai import (
+ sanitize as exhentai_sanitize,
+)
+from hircine.plugins.scrapers.handlers.mangadex import MangadexHandler
+from hircine.scraper import Scraper
+from hircine.scraper.types import (
+ URL,
+ Artist,
+ Category,
+ Censorship,
+ Character,
+ Circle,
+ Date,
+ Direction,
+ Language,
+ OriginalTitle,
+ Rating,
+ Tag,
+ Title,
+ World,
+)
+
+
+class Scraper(Scraper):
+ def __init__(self, handler, json):
+ self.handler = handler
+ self.json = json
+ super().__init__(None)
+
+ def scrape(self):
+ yield from self.handler.scrape(json.loads(self.json))
+
+
+def test_dynastyscans():
+ scraper = Scraper(
+ DynastyScansHandler(),
+ """
+ {
+ "manga": "Hoshiiro GirlDrop Comic Anthology",
+ "chapter": 1,
+ "chapter_minor": "",
+ "title": "Hop, Step, Drop!",
+ "author": "Fujisawa Kamiya",
+ "group": "Cyan Steam (Stan Miller)",
+ "date": "2018-02-05 00:00:00",
+ "lang": "en",
+ "language": "English",
+ "count": 15,
+ "category": "dynastyscans",
+ "subcategory": "manga"
+ }
+ """,
+ )
+
+ assert set(scraper.collect()) == set(
+ [
+ Artist(name="Fujisawa Kamiya"),
+ Circle(name="Cyan Steam (Stan Miller)"),
+ Date(value=date(2018, 2, 5)),
+ Language(value=enums.Language.EN),
+ Title(value="Hoshiiro GirlDrop Comic Anthology Ch. 1: Hop, Step, Drop!"),
+ ]
+ )
+
+
+def test_mangadex():
+ scraper = Scraper(
+ MangadexHandler(),
+ """
+ {
+ "manga": "Shimeji Simulation",
+ "manga_id": "28b5d037-175d-4119-96f8-e860e408ebe9",
+ "title": "Danchi",
+ "volume": 1,
+ "chapter": 8,
+ "chapter_minor": "",
+ "chapter_id": "2a115ccb-de52-4b84-9166-cebd152d9396",
+ "date": "2019-09-22 04:19:15",
+ "lang": "en",
+ "language": "English",
+ "count": 12,
+ "artist": [
+ "Tsukumizu"
+ ],
+ "author": [
+ "Tsukumizu"
+ ],
+ "group": [
+ "Orchesc/a/ns"
+ ],
+ "status": "completed",
+ "tags": [
+ "Sci-Fi",
+ "Comedy",
+ "Girls' Love",
+ "4-Koma",
+ "Philosophical",
+ "School Life",
+ "Slice of Life"
+ ],
+ "category": "mangadex",
+ "subcategory": "chapter"
+ }
+ """,
+ )
+
+ assert set(scraper.collect()) == set(
+ [
+ Artist(name="Tsukumizu"),
+ Circle(name="Orchesc/a/ns"),
+ Date(value=date(2019, 9, 22)),
+ Language(value=enums.Language.EN),
+ Tag(namespace="none", tag="4-Koma"),
+ Tag(namespace="none", tag="Comedy"),
+ Tag(namespace="none", tag="Girls' Love"),
+ Tag(namespace="none", tag="Philosophical"),
+ Tag(namespace="none", tag="School Life"),
+ Tag(namespace="none", tag="Sci-Fi"),
+ Tag(namespace="none", tag="Slice of Life"),
+ Title(value="Shimeji Simulation Vol. 1, Ch. 8: Danchi"),
+ URL("https://mangadex.org/chapter/2a115ccb-de52-4b84-9166-cebd152d9396"),
+ ]
+ )
+
+
+@pytest.mark.parametrize(
+ "data, title",
+ [
+ ({"volume": 1, "chapter": 8}, "Manga Vol. 1, Ch. 8: Title"),
+ ({"volume": 0, "chapter": 1}, "Manga Ch. 1: Title"),
+ ({"volume": 0, "chapter": 0}, "Manga: Title"),
+ ],
+ ids=[
+ "volume and chapter",
+ "chapter only",
+ "none",
+ ],
+)
+def test_mangadex_handles_volume_and_chapter(data, title):
+ common = {"manga": "Manga", "title": "Title"}
+ scraper = Scraper(MangadexHandler(), json.dumps(common | data))
+
+ assert list(scraper.collect()) == [Title(value=title)]
+
+
+def test_e621_pool():
+ scraper = Scraper(
+ E621Handler(),
+ """
+ {
+ "id": 2968472,
+ "created_at": "2021-10-10T04:13:53.286-04:00",
+ "updated_at": "2024-11-02T08:58:06.724-04:00",
+ "file": {
+ "width": 800,
+ "height": 800,
+ "ext": "jpg",
+ "size": 530984,
+ "md5": "1ec7e397bb22c1454ab1986fd3f3edc5",
+ "url": "https://static1.e621.net/data/1e/c7/1ec7e397bb22c1454ab1986fd3f3edc5.jpg"
+ },
+ "preview": {
+ "width": 150,
+ "height": 150,
+ "url": "https://static1.e621.net/data/preview/1e/c7/1ec7e397bb22c1454ab1986fd3f3edc5.jpg"
+ },
+ "sample": {
+ "has": false,
+ "height": 800,
+ "width": 800,
+ "url": "https://static1.e621.net/data/1e/c7/1ec7e397bb22c1454ab1986fd3f3edc5.jpg",
+ "alternates": {}
+ },
+ "score": {
+ "up": 202,
+ "down": -1,
+ "total": 201
+ },
+ "tags": {
+ "general": [
+ "beak"
+ ],
+ "artist": [
+ "falseknees"
+ ],
+ "copyright": [],
+ "character": [],
+ "species": [
+ "bird"
+ ],
+ "invalid": [],
+ "meta": [
+ "comic",
+ "english_text"
+ ],
+ "lore": [
+ "parent_(lore)",
+ "parent_and_child_(lore)"
+ ]
+ },
+ "locked_tags": [],
+ "change_seq": 60808337,
+ "flags": {
+ "pending": false,
+ "flagged": false,
+ "note_locked": false,
+ "status_locked": false,
+ "rating_locked": false,
+ "deleted": false
+ },
+ "rating": "s",
+ "fav_count": 194,
+ "sources": [
+ "https://twitter.com/FalseKnees/status/1324869853627478022"
+ ],
+ "pools": [
+ 25779
+ ],
+ "relationships": {
+ "parent_id": null,
+ "has_children": false,
+ "has_active_children": false,
+ "children": []
+ },
+ "approver_id": 171673,
+ "uploader_id": 178921,
+ "description": "",
+ "comment_count": 1,
+ "is_favorited": false,
+ "has_notes": false,
+ "duration": null,
+ "num": 1,
+ "filename": "1ec7e397bb22c1454ab1986fd3f3edc5",
+ "extension": "jpg",
+ "date": "2021-10-10 08:13:53",
+ "pool": {
+ "id": 25779,
+ "name": "Kids say the darnedest shit - falseknees",
+ "created_at": "2021-10-10T04:17:07.006-04:00",
+ "updated_at": "2021-10-10T04:17:07.006-04:00",
+ "creator_id": 178921,
+ "description": "The terror of every parent.",
+ "is_active": true,
+ "category": "series",
+ "creator_name": "OneMoreAnonymous",
+ "post_count": 4
+ },
+ "category": "e621",
+ "subcategory": "pool"
+ }
+ """,
+ )
+
+ assert set(scraper.collect()) == set(
+ [
+ Artist(name="falseknees"),
+ Category(value=enums.Category.COMIC),
+ Censorship(value=enums.Censorship.NONE),
+ Date(value=date(2021, 10, 10)),
+ Language(value=enums.Language.EN),
+ Rating(value=enums.Rating.SAFE),
+ Tag(namespace="none", tag="beak"),
+ Tag(namespace="none", tag="bird"),
+ Title(value="Kids say the darnedest shit - falseknees"),
+ URL("https://e621.net/pools/25779"),
+ ]
+ )
+
+
+@pytest.mark.parametrize(
+ "data, censorship",
+ [
+ ({"tags": {"meta": ["censor_bar"]}}, enums.Censorship.BAR),
+ ({"tags": {"meta": ["mosaic_censorship"]}}, enums.Censorship.MOSAIC),
+ ({"tags": {"meta": ["uncensored"]}}, enums.Censorship.NONE),
+ ({"tags": {"meta": []}}, enums.Censorship.NONE),
+ ],
+ ids=[
+ "bars",
+ "mosaic",
+ "uncensored",
+ "uncensored (implied)",
+ ],
+)
+def test_e621_handles_censorship(data, censorship):
+ common = {"subcategory": "pool"}
+ scraper = Scraper(E621Handler(), json.dumps(common | data))
+
+ assert set(scraper.collect()) == set([Censorship(value=censorship)])
+
+
+def test_exhentai_explicit():
+ scraper = Scraper(
+ ExHentaiHandler(),
+ """
+ {
+ "gid": 2771624,
+ "token": "43108ee23b",
+ "thumb": "https://s.exhentai.org/t/12/80/1280a064a2ab3d70b9feb56bd0c55dbfc3ab6a39-309830-950-1351-jpg_250.jpg",
+ "title": "[NAGABE] Smell ch.01 - ch.06",
+ "title_jpn": "SMELL",
+ "eh_category": "Doujinshi",
+ "uploader": "randaldog",
+ "date": "2023-12-19 23:50:00",
+ "parent": "https://exhentai.org/g/2736803/b191bfed72/",
+ "expunged": false,
+ "language": "English",
+ "filesize": 74469868,
+ "filecount": "170",
+ "favorites": "751",
+ "rating": "4.83",
+ "torrentcount": "0",
+ "lang": "en",
+ "tags": [
+ "language:english",
+ "language:translated",
+ "parody:original",
+ "artist:nagabe",
+ "male:dog boy",
+ "male:furry",
+ "male:males only",
+ "male:smell",
+ "male:yaoi",
+ "other:story arc"
+ ],
+ "category": "exhentai",
+ "subcategory": "gallery"
+ }
+ """,
+ )
+
+ assert set(scraper.collect()) == set(
+ [
+ Artist(name="nagabe"),
+ Category(value=enums.Category.DOUJINSHI),
+ Censorship(value=enums.Censorship.BAR),
+ Date(value=date(2023, 12, 19)),
+ Direction(value=enums.Direction.RIGHT_TO_LEFT),
+ Language(value=enums.Language.EN),
+ OriginalTitle(value="SMELL"),
+ Rating(value=enums.Rating.EXPLICIT),
+ Tag(namespace="male", tag="dog boy"),
+ Tag(namespace="male", tag="furry"),
+ Tag(namespace="male", tag="males only"),
+ Tag(namespace="male", tag="smell"),
+ Tag(namespace="male", tag="yaoi"),
+ Tag(namespace="other", tag="story arc"),
+ Title(value="Smell ch.01 - ch.06"),
+ URL("https://exhentai.org/g/2771624/43108ee23b"),
+ World(name="original"),
+ ]
+ )
+
+
+def test_exhentai_non_h():
+ scraper = Scraper(
+ ExHentaiHandler(),
+ """
+ {
+ "gid": 1025913,
+ "token": "fdaabef1a2",
+ "thumb": "https://s.exhentai.org/t/51/17/5117cde63cc14436c5ad7f2dd06abb52c86aff65-23642001-2866-4047-png_250.jpg",
+ "title": "(C91) [Animachine (Shimahara)] Iya na Kao Sarenagara Opantsu Misete Moraitai Manga | A manga about girl showing you her panties while making a disgusted face [English] [葛の寺]",
+ "title_jpn": "(C91) [アニマルマシーン (40原)] 嫌な顔されながらおパンツ見せてもらいたい漫画 [英訳]",
+ "eh_category": "Non-H",
+ "uploader": "葛の寺",
+ "date": "2017-02-04 04:25:00",
+ "parent": "https://exhentai.org/g/1025875/cfe6adccb8/",
+ "expunged": false,
+ "language": "English",
+ "filesize": 0,
+ "filecount": "23",
+ "favorites": "1088",
+ "rating": "4.74",
+ "torrentcount": "1",
+ "lang": "en",
+ "tags": [
+ "language:english",
+ "language:translated",
+ "parody:iya na kao sare nagara opantsu misete moraitai",
+ "group:animachine",
+ "artist:shimahara",
+ "female:femdom",
+ "female:schoolgirl uniform",
+ "other:full color"
+ ],
+ "category": "exhentai",
+ "subcategory": "gallery"
+ }
+ """, # noqa: E501
+ )
+
+ assert set(scraper.collect()) == set(
+ [
+ Artist(name="shimahara"),
+ Date(value=date(2017, 2, 4)),
+ Language(value=enums.Language.EN),
+ OriginalTitle(value="嫌な顔されながらおパンツ見せてもらいたい漫画"),
+ Rating(value=enums.Rating.QUESTIONABLE),
+ World(name="iya na kao sare nagara opantsu misete moraitai"),
+ Circle(name="animachine"),
+ Tag(namespace="female", tag="femdom"),
+ Tag(namespace="female", tag="schoolgirl uniform"),
+ Tag(namespace="other", tag="full color"),
+ Title(
+ value="A manga about girl showing you her panties while making a disgusted face" # noqa: E501
+ ),
+ URL("https://exhentai.org/g/1025913/fdaabef1a2"),
+ ]
+ )
+
+
+@pytest.mark.parametrize(
+ "text, sanitized",
+ [
+ ("(foo) Title", "Title"),
+ ("[foo] {bar} =baz= Title", "Title"),
+ ("Foreign Title | Localized Title", "Localized Title"),
+ ],
+ ids=[
+ "parens at beginning",
+ "bracket-likes",
+ "split titles",
+ ],
+)
+def test_exhentai_sanitizes(text, sanitized):
+ assert exhentai_sanitize(text, split=True) == sanitized
+
+
+@pytest.mark.parametrize(
+ "data, expect",
+ [
+ (
+ {"category": "doujinshi"},
+ set(
+ [
+ Category(value=enums.Category.DOUJINSHI),
+ Censorship(value=enums.Censorship.BAR),
+ Rating(value=enums.Rating.EXPLICIT),
+ Direction(value=enums.Direction.RIGHT_TO_LEFT),
+ ]
+ ),
+ ),
+ (
+ {"eh_category": "doujinshi"},
+ set(
+ [
+ Category(value=enums.Category.DOUJINSHI),
+ Censorship(value=enums.Censorship.BAR),
+ Rating(value=enums.Rating.EXPLICIT),
+ Direction(value=enums.Direction.RIGHT_TO_LEFT),
+ ]
+ ),
+ ),
+ (
+ {"category": "manga"},
+ set(
+ [
+ Category(value=enums.Category.MANGA),
+ Censorship(value=enums.Censorship.BAR),
+ Rating(value=enums.Rating.EXPLICIT),
+ Direction(value=enums.Direction.RIGHT_TO_LEFT),
+ ]
+ ),
+ ),
+ (
+ {"category": "western"},
+ set(
+ [
+ Censorship(value=enums.Censorship.NONE),
+ Rating(value=enums.Rating.EXPLICIT),
+ ]
+ ),
+ ),
+ (
+ {"category": "artist cg"},
+ set(
+ [
+ Category(value=enums.Category.COMIC),
+ Censorship(value=enums.Censorship.BAR),
+ Rating(value=enums.Rating.EXPLICIT),
+ ]
+ ),
+ ),
+ (
+ {"category": "game cg"},
+ set(
+ [
+ Category(value=enums.Category.GAME_CG),
+ Censorship(value=enums.Censorship.BAR),
+ Rating(value=enums.Rating.EXPLICIT),
+ ]
+ ),
+ ),
+ (
+ {"category": "image set"},
+ set(
+ [
+ Category(value=enums.Category.IMAGE_SET),
+ Censorship(value=enums.Censorship.BAR),
+ Rating(value=enums.Rating.EXPLICIT),
+ ]
+ ),
+ ),
+ (
+ {"category": "non-h"},
+ set(
+ [
+ Rating(value=enums.Rating.QUESTIONABLE),
+ ]
+ ),
+ ),
+ (
+ {"category": "western", "tags": ["other:western non-h"]},
+ set(
+ [
+ Rating(value=enums.Rating.QUESTIONABLE),
+ ]
+ ),
+ ),
+ ],
+ ids=[
+ "category from category field",
+ "category from eh_category field",
+ "manga category",
+ "western category",
+ "artist cg category",
+ "game cg category",
+ "image set category",
+ "non-h category",
+ "western non-h tag",
+ ],
+)
+def test_exhentai_parses(data, expect):
+ scraper = Scraper(ExHentaiHandler(), json.dumps(data | {"gid": 1, "token": 1}))
+
+ expect.add(URL(value="https://exhentai.org/g/1/1"))
+
+ assert set(scraper.collect()) == expect
+
+
+@pytest.mark.parametrize(
+ "tag, parsed",
+ [
+ ("parody:foo", World(name="foo")),
+ ("artist:foo", Artist(name="foo")),
+ ("character:foo", Character(name="foo")),
+ ("group:foo", Circle(name="foo")),
+ ("other:artbook", Category(value=enums.Category.ARTBOOK)),
+ ("other:non-h imageset", Category(value=enums.Category.IMAGE_SET)),
+ ("other:western imageset", Category(value=enums.Category.IMAGE_SET)),
+ ("other:comic", Category(value=enums.Category.COMIC)),
+ ("other:variant set", Category(value=enums.Category.VARIANT_SET)),
+ ("other:webtoon", Category(value=enums.Category.WEBTOON)),
+ ("other:full censorship", Censorship(value=enums.Censorship.FULL)),
+ ("other:mosaic censorship", Censorship(value=enums.Censorship.MOSAIC)),
+ ("other:uncensored", Censorship(value=enums.Censorship.NONE)),
+ ("generic", Tag(namespace=None, tag="generic")),
+ ],
+ ids=[
+ "parody",
+ "group",
+ "artist",
+ "character",
+ "other:artbook",
+ "other:image set",
+ "other:western image set",
+ "other:comic",
+ "other:variant set",
+ "other:webtoon",
+ "other:full censorship",
+ "other:mosaic censorship",
+ "other:uncensored",
+ "generic",
+ ],
+)
+def test_exhentai_parses_tags(tag, parsed):
+ scraper = Scraper(
+ ExHentaiHandler(), json.dumps({"tags": [tag], "gid": 1, "token": 1})
+ )
+ expect = set([URL(value="https://exhentai.org/g/1/1"), parsed])
+
+ assert set(scraper.collect()) > expect