import json from datetime import date import pytest import hircine.enums as enums from hircine.plugins.scrapers.handlers.dynastyscans import DynastyScansHandler from hircine.plugins.scrapers.handlers.e621 import E621Handler from hircine.plugins.scrapers.handlers.exhentai import ( ExHentaiHandler, ) from hircine.plugins.scrapers.handlers.exhentai import ( sanitize as exhentai_sanitize, ) from hircine.plugins.scrapers.handlers.mangadex import MangadexHandler from hircine.scraper import Scraper from hircine.scraper.types import ( URL, Artist, Category, Censorship, Character, Circle, Date, Direction, Language, OriginalTitle, Rating, Tag, Title, World, ) class Scraper(Scraper): def __init__(self, handler, json): self.handler = handler self.json = json super().__init__(None) def scrape(self): yield from self.handler.scrape(json.loads(self.json)) def test_dynastyscans(): scraper = Scraper( DynastyScansHandler(), """ { "manga": "Hoshiiro GirlDrop Comic Anthology", "chapter": 1, "chapter_minor": "", "title": "Hop, Step, Drop!", "author": "Fujisawa Kamiya", "group": "Cyan Steam (Stan Miller)", "date": "2018-02-05 00:00:00", "lang": "en", "language": "English", "count": 15, "category": "dynastyscans", "subcategory": "manga" } """, ) assert set(scraper.collect()) == set( [ Artist(name="Fujisawa Kamiya"), Circle(name="Cyan Steam (Stan Miller)"), Date(value=date(2018, 2, 5)), Language(value=enums.Language.EN), Title(value="Hoshiiro GirlDrop Comic Anthology Ch. 1: Hop, Step, Drop!"), ] ) def test_mangadex(): scraper = Scraper( MangadexHandler(), """ { "manga": "Shimeji Simulation", "manga_id": "28b5d037-175d-4119-96f8-e860e408ebe9", "title": "Danchi", "volume": 1, "chapter": 8, "chapter_minor": "", "chapter_id": "2a115ccb-de52-4b84-9166-cebd152d9396", "date": "2019-09-22 04:19:15", "lang": "en", "language": "English", "count": 12, "artist": [ "Tsukumizu" ], "author": [ "Tsukumizu" ], "group": [ "Orchesc/a/ns" ], "status": "completed", "tags": [ "Sci-Fi", "Comedy", "Girls' Love", "4-Koma", "Philosophical", "School Life", "Slice of Life" ], "category": "mangadex", "subcategory": "chapter" } """, ) assert set(scraper.collect()) == set( [ Artist(name="Tsukumizu"), Circle(name="Orchesc/a/ns"), Date(value=date(2019, 9, 22)), Language(value=enums.Language.EN), Tag(namespace="none", tag="4-Koma"), Tag(namespace="none", tag="Comedy"), Tag(namespace="none", tag="Girls' Love"), Tag(namespace="none", tag="Philosophical"), Tag(namespace="none", tag="School Life"), Tag(namespace="none", tag="Sci-Fi"), Tag(namespace="none", tag="Slice of Life"), Title(value="Shimeji Simulation Vol. 1, Ch. 8: Danchi"), URL("https://mangadex.org/chapter/2a115ccb-de52-4b84-9166-cebd152d9396"), ] ) @pytest.mark.parametrize( "data, title", [ ({"volume": 1, "chapter": 8}, "Manga Vol. 1, Ch. 8: Title"), ({"volume": 0, "chapter": 1}, "Manga Ch. 1: Title"), ({"volume": 0, "chapter": 0}, "Manga: Title"), ], ids=[ "volume and chapter", "chapter only", "none", ], ) def test_mangadex_handles_volume_and_chapter(data, title): common = {"manga": "Manga", "title": "Title"} scraper = Scraper(MangadexHandler(), json.dumps(common | data)) assert list(scraper.collect()) == [Title(value=title)] def test_e621_pool(): scraper = Scraper( E621Handler(), """ { "id": 2968472, "created_at": "2021-10-10T04:13:53.286-04:00", "updated_at": "2024-11-02T08:58:06.724-04:00", "file": { "width": 800, "height": 800, "ext": "jpg", "size": 530984, "md5": "1ec7e397bb22c1454ab1986fd3f3edc5", "url": "https://static1.e621.net/data/1e/c7/1ec7e397bb22c1454ab1986fd3f3edc5.jpg" }, "preview": { "width": 150, "height": 150, "url": "https://static1.e621.net/data/preview/1e/c7/1ec7e397bb22c1454ab1986fd3f3edc5.jpg" }, "sample": { "has": false, "height": 800, "width": 800, "url": "https://static1.e621.net/data/1e/c7/1ec7e397bb22c1454ab1986fd3f3edc5.jpg", "alternates": {} }, "score": { "up": 202, "down": -1, "total": 201 }, "tags": { "general": [ "beak" ], "artist": [ "falseknees" ], "copyright": [], "character": [], "species": [ "bird" ], "invalid": [], "meta": [ "comic", "english_text" ], "lore": [ "parent_(lore)", "parent_and_child_(lore)" ] }, "locked_tags": [], "change_seq": 60808337, "flags": { "pending": false, "flagged": false, "note_locked": false, "status_locked": false, "rating_locked": false, "deleted": false }, "rating": "s", "fav_count": 194, "sources": [ "https://twitter.com/FalseKnees/status/1324869853627478022" ], "pools": [ 25779 ], "relationships": { "parent_id": null, "has_children": false, "has_active_children": false, "children": [] }, "approver_id": 171673, "uploader_id": 178921, "description": "", "comment_count": 1, "is_favorited": false, "has_notes": false, "duration": null, "num": 1, "filename": "1ec7e397bb22c1454ab1986fd3f3edc5", "extension": "jpg", "date": "2021-10-10 08:13:53", "pool": { "id": 25779, "name": "Kids say the darnedest shit - falseknees", "created_at": "2021-10-10T04:17:07.006-04:00", "updated_at": "2021-10-10T04:17:07.006-04:00", "creator_id": 178921, "description": "The terror of every parent.", "is_active": true, "category": "series", "creator_name": "OneMoreAnonymous", "post_count": 4 }, "category": "e621", "subcategory": "pool" } """, ) assert set(scraper.collect()) == set( [ Artist(name="falseknees"), Category(value=enums.Category.COMIC), Censorship(value=enums.Censorship.NONE), Date(value=date(2021, 10, 10)), Language(value=enums.Language.EN), Rating(value=enums.Rating.SAFE), Tag(namespace="none", tag="beak"), Tag(namespace="none", tag="bird"), Title(value="Kids say the darnedest shit - falseknees"), URL("https://e621.net/pools/25779"), ] ) @pytest.mark.parametrize( "data, censorship", [ ({"tags": {"meta": ["censor_bar"]}}, enums.Censorship.BAR), ({"tags": {"meta": ["mosaic_censorship"]}}, enums.Censorship.MOSAIC), ({"tags": {"meta": ["uncensored"]}}, enums.Censorship.NONE), ({"tags": {"meta": []}}, enums.Censorship.NONE), ], ids=[ "bars", "mosaic", "uncensored", "uncensored (implied)", ], ) def test_e621_handles_censorship(data, censorship): common = {"subcategory": "pool"} scraper = Scraper(E621Handler(), json.dumps(common | data)) assert set(scraper.collect()) == set([Censorship(value=censorship)]) def test_exhentai_explicit(): scraper = Scraper( ExHentaiHandler(), """ { "gid": 2771624, "token": "43108ee23b", "thumb": "https://s.exhentai.org/t/12/80/1280a064a2ab3d70b9feb56bd0c55dbfc3ab6a39-309830-950-1351-jpg_250.jpg", "title": "[NAGABE] Smell ch.01 - ch.06", "title_jpn": "SMELL", "eh_category": "Doujinshi", "uploader": "randaldog", "date": "2023-12-19 23:50:00", "parent": "https://exhentai.org/g/2736803/b191bfed72/", "expunged": false, "language": "English", "filesize": 74469868, "filecount": "170", "favorites": "751", "rating": "4.83", "torrentcount": "0", "lang": "en", "tags": [ "language:english", "language:translated", "parody:original", "artist:nagabe", "male:dog boy", "male:furry", "male:males only", "male:smell", "male:yaoi", "other:story arc" ], "category": "exhentai", "subcategory": "gallery" } """, ) assert set(scraper.collect()) == set( [ Artist(name="nagabe"), Category(value=enums.Category.DOUJINSHI), Censorship(value=enums.Censorship.BAR), Date(value=date(2023, 12, 19)), Direction(value=enums.Direction.RIGHT_TO_LEFT), Language(value=enums.Language.EN), OriginalTitle(value="SMELL"), Rating(value=enums.Rating.EXPLICIT), Tag(namespace="male", tag="dog boy"), Tag(namespace="male", tag="furry"), Tag(namespace="male", tag="males only"), Tag(namespace="male", tag="smell"), Tag(namespace="male", tag="yaoi"), Tag(namespace="other", tag="story arc"), Title(value="Smell ch.01 - ch.06"), URL("https://exhentai.org/g/2771624/43108ee23b"), World(name="original"), ] ) def test_exhentai_non_h(): scraper = Scraper( ExHentaiHandler(), """ { "gid": 1025913, "token": "fdaabef1a2", "thumb": "https://s.exhentai.org/t/51/17/5117cde63cc14436c5ad7f2dd06abb52c86aff65-23642001-2866-4047-png_250.jpg", "title": "(C91) [Animachine (Shimahara)] Iya na Kao Sarenagara Opantsu Misete Moraitai Manga | A manga about girl showing you her panties while making a disgusted face [English] [葛の寺]", "title_jpn": "(C91) [アニマルマシーン (40原)] 嫌な顔されながらおパンツ見せてもらいたい漫画 [英訳]", "eh_category": "Non-H", "uploader": "葛の寺", "date": "2017-02-04 04:25:00", "parent": "https://exhentai.org/g/1025875/cfe6adccb8/", "expunged": false, "language": "English", "filesize": 0, "filecount": "23", "favorites": "1088", "rating": "4.74", "torrentcount": "1", "lang": "en", "tags": [ "language:english", "language:translated", "parody:iya na kao sare nagara opantsu misete moraitai", "group:animachine", "artist:shimahara", "female:femdom", "female:schoolgirl uniform", "other:full color" ], "category": "exhentai", "subcategory": "gallery" } """, # noqa: E501 ) assert set(scraper.collect()) == set( [ Artist(name="shimahara"), Censorship(value=enums.Censorship.NONE), Circle(name="animachine"), Date(value=date(2017, 2, 4)), Language(value=enums.Language.EN), OriginalTitle(value="嫌な顔されながらおパンツ見せてもらいたい漫画"), Rating(value=enums.Rating.QUESTIONABLE), Tag(namespace="female", tag="femdom"), Tag(namespace="female", tag="schoolgirl uniform"), Tag(namespace="other", tag="full color"), Title( value="A manga about girl showing you her panties while making a disgusted face" # noqa: E501 ), URL("https://exhentai.org/g/1025913/fdaabef1a2"), World(name="iya na kao sare nagara opantsu misete moraitai"), ] ) @pytest.mark.parametrize( "text, sanitized", [ ("(foo) Title", "Title"), ("[foo] {bar} =baz= Title", "Title"), ("Foreign Title | Localized Title", "Localized Title"), ], ids=[ "parens at beginning", "bracket-likes", "split titles", ], ) def test_exhentai_sanitizes(text, sanitized): assert exhentai_sanitize(text, split=True) == sanitized @pytest.mark.parametrize( "data, expect", [ ( {"category": "doujinshi"}, set( [ Category(value=enums.Category.DOUJINSHI), Censorship(value=enums.Censorship.BAR), Rating(value=enums.Rating.EXPLICIT), Direction(value=enums.Direction.RIGHT_TO_LEFT), ] ), ), ( {"eh_category": "doujinshi"}, set( [ Category(value=enums.Category.DOUJINSHI), Censorship(value=enums.Censorship.BAR), Rating(value=enums.Rating.EXPLICIT), Direction(value=enums.Direction.RIGHT_TO_LEFT), ] ), ), ( {"category": "manga"}, set( [ Category(value=enums.Category.MANGA), Censorship(value=enums.Censorship.BAR), Rating(value=enums.Rating.EXPLICIT), Direction(value=enums.Direction.RIGHT_TO_LEFT), ] ), ), ( {"category": "western"}, set( [ Censorship(value=enums.Censorship.NONE), Rating(value=enums.Rating.EXPLICIT), ] ), ), ( {"category": "artist cg"}, set( [ Category(value=enums.Category.COMIC), Censorship(value=enums.Censorship.BAR), Rating(value=enums.Rating.EXPLICIT), ] ), ), ( {"category": "game cg"}, set( [ Category(value=enums.Category.GAME_CG), Censorship(value=enums.Censorship.BAR), Rating(value=enums.Rating.EXPLICIT), ] ), ), ( {"category": "image set"}, set( [ Category(value=enums.Category.IMAGE_SET), Censorship(value=enums.Censorship.BAR), Rating(value=enums.Rating.EXPLICIT), ] ), ), ( {"category": "non-h"}, set( [ Censorship(value=enums.Censorship.NONE), Rating(value=enums.Rating.QUESTIONABLE), ] ), ), ( {"category": "western", "tags": ["other:western non-h"]}, set( [ Censorship(value=enums.Censorship.NONE), Rating(value=enums.Rating.QUESTIONABLE), ] ), ), ], ids=[ "category from category field", "category from eh_category field", "manga category", "western category", "artist cg category", "game cg category", "image set category", "non-h category", "western non-h tag", ], ) def test_exhentai_parses(data, expect): scraper = Scraper(ExHentaiHandler(), json.dumps(data | {"gid": 1, "token": 1})) expect.add(URL(value="https://exhentai.org/g/1/1")) assert set(scraper.collect()) == expect @pytest.mark.parametrize( "tag, parsed", [ ("parody:foo", World(name="foo")), ("artist:foo", Artist(name="foo")), ("character:foo", Character(name="foo")), ("group:foo", Circle(name="foo")), ("other:artbook", Category(value=enums.Category.ARTBOOK)), ("other:non-h imageset", Category(value=enums.Category.IMAGE_SET)), ("other:western imageset", Category(value=enums.Category.IMAGE_SET)), ("other:comic", Category(value=enums.Category.COMIC)), ("other:variant set", Category(value=enums.Category.VARIANT_SET)), ("other:webtoon", Category(value=enums.Category.WEBTOON)), ("other:full censorship", Censorship(value=enums.Censorship.FULL)), ("other:mosaic censorship", Censorship(value=enums.Censorship.MOSAIC)), ("other:uncensored", Censorship(value=enums.Censorship.NONE)), ("generic", Tag(namespace=None, tag="generic")), ], ids=[ "parody", "group", "artist", "character", "other:artbook", "other:image set", "other:western image set", "other:comic", "other:variant set", "other:webtoon", "other:full censorship", "other:mosaic censorship", "other:uncensored", "generic", ], ) def test_exhentai_parses_tags(tag, parsed): scraper = Scraper( ExHentaiHandler(), json.dumps({"tags": [tag], "gid": 1, "token": 1}) ) expect = set([URL(value="https://exhentai.org/g/1/1"), parsed]) assert set(scraper.collect()) > expect