diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/plugins/scrapers/test_gallery_dl.py | 52 | ||||
-rw-r--r-- | tests/plugins/scrapers/test_handlers.py | 594 |
2 files changed, 646 insertions, 0 deletions
diff --git a/tests/plugins/scrapers/test_gallery_dl.py b/tests/plugins/scrapers/test_gallery_dl.py new file mode 100644 index 0000000..b4e7a4a --- /dev/null +++ b/tests/plugins/scrapers/test_gallery_dl.py @@ -0,0 +1,52 @@ +import json +import os +from zipfile import ZipFile + +import pytest + +import hircine.plugins.scrapers.gallery_dl +from hircine.plugins.scrapers.gallery_dl import GalleryDLScraper +from hircine.scraper.types import Title + + +class MockHandler: + source = "mock" + + def scrape(self, data): + yield Title(data["title"]) + + +@pytest.fixture +def archive_file(tmpdir): + file = os.path.join(tmpdir, "archive.zip") + + with ZipFile(file, "x") as ziph: + ziph.writestr("info.json", json.dumps({"category": "mock", "title": "test"})) + + yield file + + +def test_does_scrape(monkeypatch, archive_file, gen_comic): + comic = next(gen_comic) + comic.archive.path = archive_file + + monkeypatch.setattr( + hircine.plugins.scrapers.gallery_dl, "HANDLERS", {"mock": MockHandler} + ) + + scraper = GalleryDLScraper(comic) + + assert scraper.is_available + assert scraper.source == MockHandler.source + assert scraper.name == f"gallery-dl info.json ({MockHandler.source})" + assert set(scraper.collect()) == set([Title(value="test")]) + + +def test_does_not_scrape_on_error(tmpdir, monkeypatch, gen_comic): + comic = next(gen_comic) + comic.archive.path = os.path.join(tmpdir, "nonexistent.zip") + + scraper = GalleryDLScraper(comic) + + assert scraper.data == {} + assert not scraper.is_available diff --git a/tests/plugins/scrapers/test_handlers.py b/tests/plugins/scrapers/test_handlers.py new file mode 100644 index 0000000..e9f5d0e --- /dev/null +++ b/tests/plugins/scrapers/test_handlers.py @@ -0,0 +1,594 @@ +import json +from datetime import date + +import pytest + +import hircine.enums as enums +from hircine.plugins.scrapers.handlers.dynastyscans import DynastyScansHandler +from hircine.plugins.scrapers.handlers.e621 import E621Handler +from hircine.plugins.scrapers.handlers.exhentai import ( + ExHentaiHandler, +) +from hircine.plugins.scrapers.handlers.exhentai import ( + sanitize as exhentai_sanitize, +) +from hircine.plugins.scrapers.handlers.mangadex import MangadexHandler +from hircine.scraper import Scraper +from hircine.scraper.types import ( + URL, + Artist, + Category, + Censorship, + Character, + Circle, + Date, + Direction, + Language, + OriginalTitle, + Rating, + Tag, + Title, + World, +) + + +class Scraper(Scraper): + def __init__(self, handler, json): + self.handler = handler + self.json = json + super().__init__(None) + + def scrape(self): + yield from self.handler.scrape(json.loads(self.json)) + + +def test_dynastyscans(): + scraper = Scraper( + DynastyScansHandler(), + """ + { + "manga": "Hoshiiro GirlDrop Comic Anthology", + "chapter": 1, + "chapter_minor": "", + "title": "Hop, Step, Drop!", + "author": "Fujisawa Kamiya", + "group": "Cyan Steam (Stan Miller)", + "date": "2018-02-05 00:00:00", + "lang": "en", + "language": "English", + "count": 15, + "category": "dynastyscans", + "subcategory": "manga" + } + """, + ) + + assert set(scraper.collect()) == set( + [ + Artist(name="Fujisawa Kamiya"), + Circle(name="Cyan Steam (Stan Miller)"), + Date(value=date(2018, 2, 5)), + Language(value=enums.Language.EN), + Title(value="Hoshiiro GirlDrop Comic Anthology Ch. 1: Hop, Step, Drop!"), + ] + ) + + +def test_mangadex(): + scraper = Scraper( + MangadexHandler(), + """ + { + "manga": "Shimeji Simulation", + "manga_id": "28b5d037-175d-4119-96f8-e860e408ebe9", + "title": "Danchi", + "volume": 1, + "chapter": 8, + "chapter_minor": "", + "chapter_id": "2a115ccb-de52-4b84-9166-cebd152d9396", + "date": "2019-09-22 04:19:15", + "lang": "en", + "language": "English", + "count": 12, + "artist": [ + "Tsukumizu" + ], + "author": [ + "Tsukumizu" + ], + "group": [ + "Orchesc/a/ns" + ], + "status": "completed", + "tags": [ + "Sci-Fi", + "Comedy", + "Girls' Love", + "4-Koma", + "Philosophical", + "School Life", + "Slice of Life" + ], + "category": "mangadex", + "subcategory": "chapter" + } + """, + ) + + assert set(scraper.collect()) == set( + [ + Artist(name="Tsukumizu"), + Circle(name="Orchesc/a/ns"), + Date(value=date(2019, 9, 22)), + Language(value=enums.Language.EN), + Tag(namespace="none", tag="4-Koma"), + Tag(namespace="none", tag="Comedy"), + Tag(namespace="none", tag="Girls' Love"), + Tag(namespace="none", tag="Philosophical"), + Tag(namespace="none", tag="School Life"), + Tag(namespace="none", tag="Sci-Fi"), + Tag(namespace="none", tag="Slice of Life"), + Title(value="Shimeji Simulation Vol. 1, Ch. 8: Danchi"), + URL("https://mangadex.org/chapter/2a115ccb-de52-4b84-9166-cebd152d9396"), + ] + ) + + +@pytest.mark.parametrize( + "data, title", + [ + ({"volume": 1, "chapter": 8}, "Manga Vol. 1, Ch. 8: Title"), + ({"volume": 0, "chapter": 1}, "Manga Ch. 1: Title"), + ({"volume": 0, "chapter": 0}, "Manga: Title"), + ], + ids=[ + "volume and chapter", + "chapter only", + "none", + ], +) +def test_mangadex_handles_volume_and_chapter(data, title): + common = {"manga": "Manga", "title": "Title"} + scraper = Scraper(MangadexHandler(), json.dumps(common | data)) + + assert list(scraper.collect()) == [Title(value=title)] + + +def test_e621_pool(): + scraper = Scraper( + E621Handler(), + """ + { + "id": 2968472, + "created_at": "2021-10-10T04:13:53.286-04:00", + "updated_at": "2024-11-02T08:58:06.724-04:00", + "file": { + "width": 800, + "height": 800, + "ext": "jpg", + "size": 530984, + "md5": "1ec7e397bb22c1454ab1986fd3f3edc5", + "url": "https://static1.e621.net/data/1e/c7/1ec7e397bb22c1454ab1986fd3f3edc5.jpg" + }, + "preview": { + "width": 150, + "height": 150, + "url": "https://static1.e621.net/data/preview/1e/c7/1ec7e397bb22c1454ab1986fd3f3edc5.jpg" + }, + "sample": { + "has": false, + "height": 800, + "width": 800, + "url": "https://static1.e621.net/data/1e/c7/1ec7e397bb22c1454ab1986fd3f3edc5.jpg", + "alternates": {} + }, + "score": { + "up": 202, + "down": -1, + "total": 201 + }, + "tags": { + "general": [ + "beak" + ], + "artist": [ + "falseknees" + ], + "copyright": [], + "character": [], + "species": [ + "bird" + ], + "invalid": [], + "meta": [ + "comic", + "english_text" + ], + "lore": [ + "parent_(lore)", + "parent_and_child_(lore)" + ] + }, + "locked_tags": [], + "change_seq": 60808337, + "flags": { + "pending": false, + "flagged": false, + "note_locked": false, + "status_locked": false, + "rating_locked": false, + "deleted": false + }, + "rating": "s", + "fav_count": 194, + "sources": [ + "https://twitter.com/FalseKnees/status/1324869853627478022" + ], + "pools": [ + 25779 + ], + "relationships": { + "parent_id": null, + "has_children": false, + "has_active_children": false, + "children": [] + }, + "approver_id": 171673, + "uploader_id": 178921, + "description": "", + "comment_count": 1, + "is_favorited": false, + "has_notes": false, + "duration": null, + "num": 1, + "filename": "1ec7e397bb22c1454ab1986fd3f3edc5", + "extension": "jpg", + "date": "2021-10-10 08:13:53", + "pool": { + "id": 25779, + "name": "Kids say the darnedest shit - falseknees", + "created_at": "2021-10-10T04:17:07.006-04:00", + "updated_at": "2021-10-10T04:17:07.006-04:00", + "creator_id": 178921, + "description": "The terror of every parent.", + "is_active": true, + "category": "series", + "creator_name": "OneMoreAnonymous", + "post_count": 4 + }, + "category": "e621", + "subcategory": "pool" + } + """, + ) + + assert set(scraper.collect()) == set( + [ + Artist(name="falseknees"), + Category(value=enums.Category.COMIC), + Censorship(value=enums.Censorship.NONE), + Date(value=date(2021, 10, 10)), + Language(value=enums.Language.EN), + Rating(value=enums.Rating.SAFE), + Tag(namespace="none", tag="beak"), + Tag(namespace="none", tag="bird"), + Title(value="Kids say the darnedest shit - falseknees"), + URL("https://e621.net/pools/25779"), + ] + ) + + +@pytest.mark.parametrize( + "data, censorship", + [ + ({"tags": {"meta": ["censor_bar"]}}, enums.Censorship.BAR), + ({"tags": {"meta": ["mosaic_censorship"]}}, enums.Censorship.MOSAIC), + ({"tags": {"meta": ["uncensored"]}}, enums.Censorship.NONE), + ({"tags": {"meta": []}}, enums.Censorship.NONE), + ], + ids=[ + "bars", + "mosaic", + "uncensored", + "uncensored (implied)", + ], +) +def test_e621_handles_censorship(data, censorship): + common = {"subcategory": "pool"} + scraper = Scraper(E621Handler(), json.dumps(common | data)) + + assert set(scraper.collect()) == set([Censorship(value=censorship)]) + + +def test_exhentai_explicit(): + scraper = Scraper( + ExHentaiHandler(), + """ + { + "gid": 2771624, + "token": "43108ee23b", + "thumb": "https://s.exhentai.org/t/12/80/1280a064a2ab3d70b9feb56bd0c55dbfc3ab6a39-309830-950-1351-jpg_250.jpg", + "title": "[NAGABE] Smell ch.01 - ch.06", + "title_jpn": "SMELL", + "eh_category": "Doujinshi", + "uploader": "randaldog", + "date": "2023-12-19 23:50:00", + "parent": "https://exhentai.org/g/2736803/b191bfed72/", + "expunged": false, + "language": "English", + "filesize": 74469868, + "filecount": "170", + "favorites": "751", + "rating": "4.83", + "torrentcount": "0", + "lang": "en", + "tags": [ + "language:english", + "language:translated", + "parody:original", + "artist:nagabe", + "male:dog boy", + "male:furry", + "male:males only", + "male:smell", + "male:yaoi", + "other:story arc" + ], + "category": "exhentai", + "subcategory": "gallery" + } + """, + ) + + assert set(scraper.collect()) == set( + [ + Artist(name="nagabe"), + Category(value=enums.Category.DOUJINSHI), + Censorship(value=enums.Censorship.BAR), + Date(value=date(2023, 12, 19)), + Direction(value=enums.Direction.RIGHT_TO_LEFT), + Language(value=enums.Language.EN), + OriginalTitle(value="SMELL"), + Rating(value=enums.Rating.EXPLICIT), + Tag(namespace="male", tag="dog boy"), + Tag(namespace="male", tag="furry"), + Tag(namespace="male", tag="males only"), + Tag(namespace="male", tag="smell"), + Tag(namespace="male", tag="yaoi"), + Tag(namespace="other", tag="story arc"), + Title(value="Smell ch.01 - ch.06"), + URL("https://exhentai.org/g/2771624/43108ee23b"), + World(name="original"), + ] + ) + + +def test_exhentai_non_h(): + scraper = Scraper( + ExHentaiHandler(), + """ + { + "gid": 1025913, + "token": "fdaabef1a2", + "thumb": "https://s.exhentai.org/t/51/17/5117cde63cc14436c5ad7f2dd06abb52c86aff65-23642001-2866-4047-png_250.jpg", + "title": "(C91) [Animachine (Shimahara)] Iya na Kao Sarenagara Opantsu Misete Moraitai Manga | A manga about girl showing you her panties while making a disgusted face [English] [葛の寺]", + "title_jpn": "(C91) [アニマルマシーン (40原)] 嫌な顔されながらおパンツ見せてもらいたい漫画 [英訳]", + "eh_category": "Non-H", + "uploader": "葛の寺", + "date": "2017-02-04 04:25:00", + "parent": "https://exhentai.org/g/1025875/cfe6adccb8/", + "expunged": false, + "language": "English", + "filesize": 0, + "filecount": "23", + "favorites": "1088", + "rating": "4.74", + "torrentcount": "1", + "lang": "en", + "tags": [ + "language:english", + "language:translated", + "parody:iya na kao sare nagara opantsu misete moraitai", + "group:animachine", + "artist:shimahara", + "female:femdom", + "female:schoolgirl uniform", + "other:full color" + ], + "category": "exhentai", + "subcategory": "gallery" + } + """, # noqa: E501 + ) + + assert set(scraper.collect()) == set( + [ + Artist(name="shimahara"), + Date(value=date(2017, 2, 4)), + Language(value=enums.Language.EN), + OriginalTitle(value="嫌な顔されながらおパンツ見せてもらいたい漫画"), + Rating(value=enums.Rating.QUESTIONABLE), + World(name="iya na kao sare nagara opantsu misete moraitai"), + Circle(name="animachine"), + Tag(namespace="female", tag="femdom"), + Tag(namespace="female", tag="schoolgirl uniform"), + Tag(namespace="other", tag="full color"), + Title( + value="A manga about girl showing you her panties while making a disgusted face" # noqa: E501 + ), + URL("https://exhentai.org/g/1025913/fdaabef1a2"), + ] + ) + + +@pytest.mark.parametrize( + "text, sanitized", + [ + ("(foo) Title", "Title"), + ("[foo] {bar} =baz= Title", "Title"), + ("Foreign Title | Localized Title", "Localized Title"), + ], + ids=[ + "parens at beginning", + "bracket-likes", + "split titles", + ], +) +def test_exhentai_sanitizes(text, sanitized): + assert exhentai_sanitize(text, split=True) == sanitized + + +@pytest.mark.parametrize( + "data, expect", + [ + ( + {"category": "doujinshi"}, + set( + [ + Category(value=enums.Category.DOUJINSHI), + Censorship(value=enums.Censorship.BAR), + Rating(value=enums.Rating.EXPLICIT), + Direction(value=enums.Direction.RIGHT_TO_LEFT), + ] + ), + ), + ( + {"eh_category": "doujinshi"}, + set( + [ + Category(value=enums.Category.DOUJINSHI), + Censorship(value=enums.Censorship.BAR), + Rating(value=enums.Rating.EXPLICIT), + Direction(value=enums.Direction.RIGHT_TO_LEFT), + ] + ), + ), + ( + {"category": "manga"}, + set( + [ + Category(value=enums.Category.MANGA), + Censorship(value=enums.Censorship.BAR), + Rating(value=enums.Rating.EXPLICIT), + Direction(value=enums.Direction.RIGHT_TO_LEFT), + ] + ), + ), + ( + {"category": "western"}, + set( + [ + Censorship(value=enums.Censorship.NONE), + Rating(value=enums.Rating.EXPLICIT), + ] + ), + ), + ( + {"category": "artist cg"}, + set( + [ + Category(value=enums.Category.COMIC), + Censorship(value=enums.Censorship.BAR), + Rating(value=enums.Rating.EXPLICIT), + ] + ), + ), + ( + {"category": "game cg"}, + set( + [ + Category(value=enums.Category.GAME_CG), + Censorship(value=enums.Censorship.BAR), + Rating(value=enums.Rating.EXPLICIT), + ] + ), + ), + ( + {"category": "image set"}, + set( + [ + Category(value=enums.Category.IMAGE_SET), + Censorship(value=enums.Censorship.BAR), + Rating(value=enums.Rating.EXPLICIT), + ] + ), + ), + ( + {"category": "non-h"}, + set( + [ + Rating(value=enums.Rating.QUESTIONABLE), + ] + ), + ), + ( + {"category": "western", "tags": ["other:western non-h"]}, + set( + [ + Rating(value=enums.Rating.QUESTIONABLE), + ] + ), + ), + ], + ids=[ + "category from category field", + "category from eh_category field", + "manga category", + "western category", + "artist cg category", + "game cg category", + "image set category", + "non-h category", + "western non-h tag", + ], +) +def test_exhentai_parses(data, expect): + scraper = Scraper(ExHentaiHandler(), json.dumps(data | {"gid": 1, "token": 1})) + + expect.add(URL(value="https://exhentai.org/g/1/1")) + + assert set(scraper.collect()) == expect + + +@pytest.mark.parametrize( + "tag, parsed", + [ + ("parody:foo", World(name="foo")), + ("artist:foo", Artist(name="foo")), + ("character:foo", Character(name="foo")), + ("group:foo", Circle(name="foo")), + ("other:artbook", Category(value=enums.Category.ARTBOOK)), + ("other:non-h imageset", Category(value=enums.Category.IMAGE_SET)), + ("other:western imageset", Category(value=enums.Category.IMAGE_SET)), + ("other:comic", Category(value=enums.Category.COMIC)), + ("other:variant set", Category(value=enums.Category.VARIANT_SET)), + ("other:webtoon", Category(value=enums.Category.WEBTOON)), + ("other:full censorship", Censorship(value=enums.Censorship.FULL)), + ("other:mosaic censorship", Censorship(value=enums.Censorship.MOSAIC)), + ("other:uncensored", Censorship(value=enums.Censorship.NONE)), + ("generic", Tag(namespace=None, tag="generic")), + ], + ids=[ + "parody", + "group", + "artist", + "character", + "other:artbook", + "other:image set", + "other:western image set", + "other:comic", + "other:variant set", + "other:webtoon", + "other:full censorship", + "other:mosaic censorship", + "other:uncensored", + "generic", + ], +) +def test_exhentai_parses_tags(tag, parsed): + scraper = Scraper( + ExHentaiHandler(), json.dumps({"tags": [tag], "gid": 1, "token": 1}) + ) + expect = set([URL(value="https://exhentai.org/g/1/1"), parsed]) + + assert set(scraper.collect()) > expect |