import json
from datetime import date
import pytest
import hircine.enums as enums
from hircine.plugins.scrapers.handlers.dynastyscans import DynastyScansHandler
from hircine.plugins.scrapers.handlers.e621 import E621Handler
from hircine.plugins.scrapers.handlers.exhentai import (
ExHentaiHandler,
)
from hircine.plugins.scrapers.handlers.exhentai import (
sanitize as exhentai_sanitize,
)
from hircine.plugins.scrapers.handlers.mangadex import MangadexHandler
from hircine.scraper import Scraper
from hircine.scraper.types import (
URL,
Artist,
Category,
Censorship,
Character,
Circle,
Date,
Direction,
Language,
OriginalTitle,
Rating,
Tag,
Title,
World,
)
class Scraper(Scraper):
def __init__(self, handler, json):
self.handler = handler
self.json = json
super().__init__(None)
def scrape(self):
yield from self.handler.scrape(json.loads(self.json))
def test_dynastyscans():
scraper = Scraper(
DynastyScansHandler(),
"""
{
"manga": "Hoshiiro GirlDrop Comic Anthology",
"chapter": 1,
"chapter_minor": "",
"title": "Hop, Step, Drop!",
"author": "Fujisawa Kamiya",
"group": "Cyan Steam (Stan Miller)",
"date": "2018-02-05 00:00:00",
"lang": "en",
"language": "English",
"count": 15,
"category": "dynastyscans",
"subcategory": "manga"
}
""",
)
assert set(scraper.collect()) == set(
[
Artist(name="Fujisawa Kamiya"),
Circle(name="Cyan Steam (Stan Miller)"),
Date(value=date(2018, 2, 5)),
Language(value=enums.Language.EN),
Title(value="Hoshiiro GirlDrop Comic Anthology Ch. 1: Hop, Step, Drop!"),
]
)
def test_mangadex():
scraper = Scraper(
MangadexHandler(),
"""
{
"manga": "Shimeji Simulation",
"manga_id": "28b5d037-175d-4119-96f8-e860e408ebe9",
"title": "Danchi",
"volume": 1,
"chapter": 8,
"chapter_minor": "",
"chapter_id": "2a115ccb-de52-4b84-9166-cebd152d9396",
"date": "2019-09-22 04:19:15",
"lang": "en",
"language": "English",
"count": 12,
"artist": [
"Tsukumizu"
],
"author": [
"Tsukumizu"
],
"group": [
"Orchesc/a/ns"
],
"status": "completed",
"tags": [
"Sci-Fi",
"Comedy",
"Girls' Love",
"4-Koma",
"Philosophical",
"School Life",
"Slice of Life"
],
"category": "mangadex",
"subcategory": "chapter"
}
""",
)
assert set(scraper.collect()) == set(
[
Artist(name="Tsukumizu"),
Circle(name="Orchesc/a/ns"),
Date(value=date(2019, 9, 22)),
Language(value=enums.Language.EN),
Tag(namespace="none", tag="4-Koma"),
Tag(namespace="none", tag="Comedy"),
Tag(namespace="none", tag="Girls' Love"),
Tag(namespace="none", tag="Philosophical"),
Tag(namespace="none", tag="School Life"),
Tag(namespace="none", tag="Sci-Fi"),
Tag(namespace="none", tag="Slice of Life"),
Title(value="Shimeji Simulation Vol. 1, Ch. 8: Danchi"),
URL("https://mangadex.org/chapter/2a115ccb-de52-4b84-9166-cebd152d9396"),
]
)
@pytest.mark.parametrize(
"data, title",
[
({"volume": 1, "chapter": 8}, "Manga Vol. 1, Ch. 8: Title"),
({"volume": 0, "chapter": 1}, "Manga Ch. 1: Title"),
({"volume": 0, "chapter": 0}, "Manga: Title"),
],
ids=[
"volume and chapter",
"chapter only",
"none",
],
)
def test_mangadex_handles_volume_and_chapter(data, title):
common = {"manga": "Manga", "title": "Title"}
scraper = Scraper(MangadexHandler(), json.dumps(common | data))
assert list(scraper.collect()) == [Title(value=title)]
def test_e621_pool():
scraper = Scraper(
E621Handler(),
"""
{
"id": 2968472,
"created_at": "2021-10-10T04:13:53.286-04:00",
"updated_at": "2024-11-02T08:58:06.724-04:00",
"file": {
"width": 800,
"height": 800,
"ext": "jpg",
"size": 530984,
"md5": "1ec7e397bb22c1454ab1986fd3f3edc5",
"url": "https://static1.e621.net/data/1e/c7/1ec7e397bb22c1454ab1986fd3f3edc5.jpg"
},
"preview": {
"width": 150,
"height": 150,
"url": "https://static1.e621.net/data/preview/1e/c7/1ec7e397bb22c1454ab1986fd3f3edc5.jpg"
},
"sample": {
"has": false,
"height": 800,
"width": 800,
"url": "https://static1.e621.net/data/1e/c7/1ec7e397bb22c1454ab1986fd3f3edc5.jpg",
"alternates": {}
},
"score": {
"up": 202,
"down": -1,
"total": 201
},
"tags": {
"general": [
"beak"
],
"artist": [
"falseknees"
],
"copyright": [],
"character": [],
"species": [
"bird"
],
"invalid": [],
"meta": [
"comic",
"english_text"
],
"lore": [
"parent_(lore)",
"parent_and_child_(lore)"
]
},
"locked_tags": [],
"change_seq": 60808337,
"flags": {
"pending": false,
"flagged": false,
"note_locked": false,
"status_locked": false,
"rating_locked": false,
"deleted": false
},
"rating": "s",
"fav_count": 194,
"sources": [
"https://twitter.com/FalseKnees/status/1324869853627478022"
],
"pools": [
25779
],
"relationships": {
"parent_id": null,
"has_children": false,
"has_active_children": false,
"children": []
},
"approver_id": 171673,
"uploader_id": 178921,
"description": "",
"comment_count": 1,
"is_favorited": false,
"has_notes": false,
"duration": null,
"num": 1,
"filename": "1ec7e397bb22c1454ab1986fd3f3edc5",
"extension": "jpg",
"date": "2021-10-10 08:13:53",
"pool": {
"id": 25779,
"name": "Kids say the darnedest shit - falseknees",
"created_at": "2021-10-10T04:17:07.006-04:00",
"updated_at": "2021-10-10T04:17:07.006-04:00",
"creator_id": 178921,
"description": "The terror of every parent.",
"is_active": true,
"category": "series",
"creator_name": "OneMoreAnonymous",
"post_count": 4
},
"category": "e621",
"subcategory": "pool"
}
""",
)
assert set(scraper.collect()) == set(
[
Artist(name="falseknees"),
Category(value=enums.Category.COMIC),
Censorship(value=enums.Censorship.NONE),
Date(value=date(2021, 10, 10)),
Language(value=enums.Language.EN),
Rating(value=enums.Rating.SAFE),
Tag(namespace="none", tag="beak"),
Tag(namespace="none", tag="bird"),
Title(value="Kids say the darnedest shit - falseknees"),
URL("https://e621.net/pools/25779"),
]
)
@pytest.mark.parametrize(
"data, censorship",
[
({"tags": {"meta": ["censor_bar"]}}, enums.Censorship.BAR),
({"tags": {"meta": ["mosaic_censorship"]}}, enums.Censorship.MOSAIC),
({"tags": {"meta": ["uncensored"]}}, enums.Censorship.NONE),
({"tags": {"meta": []}}, enums.Censorship.NONE),
],
ids=[
"bars",
"mosaic",
"uncensored",
"uncensored (implied)",
],
)
def test_e621_handles_censorship(data, censorship):
common = {"subcategory": "pool"}
scraper = Scraper(E621Handler(), json.dumps(common | data))
assert set(scraper.collect()) == set([Censorship(value=censorship)])
def test_exhentai_explicit():
scraper = Scraper(
ExHentaiHandler(),
"""
{
"gid": 2771624,
"token": "43108ee23b",
"thumb": "https://s.exhentai.org/t/12/80/1280a064a2ab3d70b9feb56bd0c55dbfc3ab6a39-309830-950-1351-jpg_250.jpg",
"title": "[NAGABE] Smell ch.01 - ch.06",
"title_jpn": "SMELL",
"eh_category": "Doujinshi",
"uploader": "randaldog",
"date": "2023-12-19 23:50:00",
"parent": "https://exhentai.org/g/2736803/b191bfed72/",
"expunged": false,
"language": "English",
"filesize": 74469868,
"filecount": "170",
"favorites": "751",
"rating": "4.83",
"torrentcount": "0",
"lang": "en",
"tags": [
"language:english",
"language:translated",
"parody:original",
"artist:nagabe",
"male:dog boy",
"male:furry",
"male:males only",
"male:smell",
"male:yaoi",
"other:story arc"
],
"category": "exhentai",
"subcategory": "gallery"
}
""",
)
assert set(scraper.collect()) == set(
[
Artist(name="nagabe"),
Category(value=enums.Category.DOUJINSHI),
Censorship(value=enums.Censorship.BAR),
Date(value=date(2023, 12, 19)),
Direction(value=enums.Direction.RIGHT_TO_LEFT),
Language(value=enums.Language.EN),
OriginalTitle(value="SMELL"),
Rating(value=enums.Rating.EXPLICIT),
Tag(namespace="male", tag="dog boy"),
Tag(namespace="male", tag="furry"),
Tag(namespace="male", tag="males only"),
Tag(namespace="male", tag="smell"),
Tag(namespace="male", tag="yaoi"),
Tag(namespace="other", tag="story arc"),
Title(value="Smell ch.01 - ch.06"),
URL("https://exhentai.org/g/2771624/43108ee23b"),
World(name="original"),
]
)
def test_exhentai_non_h():
scraper = Scraper(
ExHentaiHandler(),
"""
{
"gid": 1025913,
"token": "fdaabef1a2",
"thumb": "https://s.exhentai.org/t/51/17/5117cde63cc14436c5ad7f2dd06abb52c86aff65-23642001-2866-4047-png_250.jpg",
"title": "(C91) [Animachine (Shimahara)] Iya na Kao Sarenagara Opantsu Misete Moraitai Manga | A manga about girl showing you her panties while making a disgusted face [English] [葛の寺]",
"title_jpn": "(C91) [アニマルマシーン (40原)] 嫌な顔されながらおパンツ見せてもらいたい漫画 [英訳]",
"eh_category": "Non-H",
"uploader": "葛の寺",
"date": "2017-02-04 04:25:00",
"parent": "https://exhentai.org/g/1025875/cfe6adccb8/",
"expunged": false,
"language": "English",
"filesize": 0,
"filecount": "23",
"favorites": "1088",
"rating": "4.74",
"torrentcount": "1",
"lang": "en",
"tags": [
"language:english",
"language:translated",
"parody:iya na kao sare nagara opantsu misete moraitai",
"group:animachine",
"artist:shimahara",
"female:femdom",
"female:schoolgirl uniform",
"other:full color"
],
"category": "exhentai",
"subcategory": "gallery"
}
""", # noqa: E501
)
assert set(scraper.collect()) == set(
[
Artist(name="shimahara"),
Date(value=date(2017, 2, 4)),
Language(value=enums.Language.EN),
OriginalTitle(value="嫌な顔されながらおパンツ見せてもらいたい漫画"),
Rating(value=enums.Rating.QUESTIONABLE),
World(name="iya na kao sare nagara opantsu misete moraitai"),
Circle(name="animachine"),
Tag(namespace="female", tag="femdom"),
Tag(namespace="female", tag="schoolgirl uniform"),
Tag(namespace="other", tag="full color"),
Title(
value="A manga about girl showing you her panties while making a disgusted face" # noqa: E501
),
URL("https://exhentai.org/g/1025913/fdaabef1a2"),
]
)
@pytest.mark.parametrize(
"text, sanitized",
[
("(foo) Title", "Title"),
("[foo] {bar} =baz= Title", "Title"),
("Foreign Title | Localized Title", "Localized Title"),
],
ids=[
"parens at beginning",
"bracket-likes",
"split titles",
],
)
def test_exhentai_sanitizes(text, sanitized):
assert exhentai_sanitize(text, split=True) == sanitized
@pytest.mark.parametrize(
"data, expect",
[
(
{"category": "doujinshi"},
set(
[
Category(value=enums.Category.DOUJINSHI),
Censorship(value=enums.Censorship.BAR),
Rating(value=enums.Rating.EXPLICIT),
Direction(value=enums.Direction.RIGHT_TO_LEFT),
]
),
),
(
{"eh_category": "doujinshi"},
set(
[
Category(value=enums.Category.DOUJINSHI),
Censorship(value=enums.Censorship.BAR),
Rating(value=enums.Rating.EXPLICIT),
Direction(value=enums.Direction.RIGHT_TO_LEFT),
]
),
),
(
{"category": "manga"},
set(
[
Category(value=enums.Category.MANGA),
Censorship(value=enums.Censorship.BAR),
Rating(value=enums.Rating.EXPLICIT),
Direction(value=enums.Direction.RIGHT_TO_LEFT),
]
),
),
(
{"category": "western"},
set(
[
Censorship(value=enums.Censorship.NONE),
Rating(value=enums.Rating.EXPLICIT),
]
),
),
(
{"category": "artist cg"},
set(
[
Category(value=enums.Category.COMIC),
Censorship(value=enums.Censorship.BAR),
Rating(value=enums.Rating.EXPLICIT),
]
),
),
(
{"category": "game cg"},
set(
[
Category(value=enums.Category.GAME_CG),
Censorship(value=enums.Censorship.BAR),
Rating(value=enums.Rating.EXPLICIT),
]
),
),
(
{"category": "image set"},
set(
[
Category(value=enums.Category.IMAGE_SET),
Censorship(value=enums.Censorship.BAR),
Rating(value=enums.Rating.EXPLICIT),
]
),
),
(
{"category": "non-h"},
set(
[
Rating(value=enums.Rating.QUESTIONABLE),
]
),
),
(
{"category": "western", "tags": ["other:western non-h"]},
set(
[
Rating(value=enums.Rating.QUESTIONABLE),
]
),
),
],
ids=[
"category from category field",
"category from eh_category field",
"manga category",
"western category",
"artist cg category",
"game cg category",
"image set category",
"non-h category",
"western non-h tag",
],
)
def test_exhentai_parses(data, expect):
scraper = Scraper(ExHentaiHandler(), json.dumps(data | {"gid": 1, "token": 1}))
expect.add(URL(value="https://exhentai.org/g/1/1"))
assert set(scraper.collect()) == expect
@pytest.mark.parametrize(
"tag, parsed",
[
("parody:foo", World(name="foo")),
("artist:foo", Artist(name="foo")),
("character:foo", Character(name="foo")),
("group:foo", Circle(name="foo")),
("other:artbook", Category(value=enums.Category.ARTBOOK)),
("other:non-h imageset", Category(value=enums.Category.IMAGE_SET)),
("other:western imageset", Category(value=enums.Category.IMAGE_SET)),
("other:comic", Category(value=enums.Category.COMIC)),
("other:variant set", Category(value=enums.Category.VARIANT_SET)),
("other:webtoon", Category(value=enums.Category.WEBTOON)),
("other:full censorship", Censorship(value=enums.Censorship.FULL)),
("other:mosaic censorship", Censorship(value=enums.Censorship.MOSAIC)),
("other:uncensored", Censorship(value=enums.Censorship.NONE)),
("generic", Tag(namespace=None, tag="generic")),
],
ids=[
"parody",
"group",
"artist",
"character",
"other:artbook",
"other:image set",
"other:western image set",
"other:comic",
"other:variant set",
"other:webtoon",
"other:full censorship",
"other:mosaic censorship",
"other:uncensored",
"generic",
],
)
def test_exhentai_parses_tags(tag, parsed):
scraper = Scraper(
ExHentaiHandler(), json.dumps({"tags": [tag], "gid": 1, "token": 1})
)
expect = set([URL(value="https://exhentai.org/g/1/1"), parsed])
assert set(scraper.collect()) > expect