diff options
Diffstat (limited to '')
-rw-r--r-- | src/hircine/plugins/scrapers/handlers/__init__.py | 0 | ||||
-rw-r--r-- | src/hircine/plugins/scrapers/handlers/dynastyscans.py | 41 | ||||
-rw-r--r-- | src/hircine/plugins/scrapers/handlers/e621.py | 81 | ||||
-rw-r--r-- | src/hircine/plugins/scrapers/handlers/exhentai.py | 139 | ||||
-rw-r--r-- | src/hircine/plugins/scrapers/handlers/mangadex.py | 54 |
5 files changed, 315 insertions, 0 deletions
diff --git a/src/hircine/plugins/scrapers/handlers/__init__.py b/src/hircine/plugins/scrapers/handlers/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/hircine/plugins/scrapers/handlers/__init__.py diff --git a/src/hircine/plugins/scrapers/handlers/dynastyscans.py b/src/hircine/plugins/scrapers/handlers/dynastyscans.py new file mode 100644 index 0000000..ded015b --- /dev/null +++ b/src/hircine/plugins/scrapers/handlers/dynastyscans.py @@ -0,0 +1,41 @@ +import hircine.enums as enums +from hircine.scraper import ScrapeWarning +from hircine.scraper.types import ( + Artist, + Circle, + Date, + Language, + Title, +) +from hircine.scraper.utils import parse_dict + + +class DynastyScansHandler: + source = "dynastyscans" + + def scrape(self, data): + parsers = { + "date": Date.from_iso, + "lang": self.parse_language, + "author": Artist, + "group": Circle, + } + + yield from parse_dict(parsers, data) + + if manga := data.get("manga"): + title = manga + + if chapter := data.get("chapter"): + title = title + f" Ch. {chapter}" + + if subtitle := data.get("title"): + title = title + f": {subtitle}" + + yield Title(title) + + def parse_language(self, input): + try: + return Language(value=enums.Language[input.upper()]) + except (KeyError, ValueError) as e: + raise ScrapeWarning(f"Could not parse language: '{input}'") from e diff --git a/src/hircine/plugins/scrapers/handlers/e621.py b/src/hircine/plugins/scrapers/handlers/e621.py new file mode 100644 index 0000000..6b798fd --- /dev/null +++ b/src/hircine/plugins/scrapers/handlers/e621.py @@ -0,0 +1,81 @@ +import hircine.enums as enums +from hircine.scraper import ScrapeWarning +from hircine.scraper.types import ( + URL, + Artist, + Category, + Censorship, + Character, + Date, + Language, + Rating, + Tag, + Title, + World, +) +from hircine.scraper.utils import parse_dict + + +def replace_underscore(fun): + return lambda input: fun(input.replace("_", " ")) + + +class E621Handler: + source = "e621" + + ratings = { + "e": Rating(enums.Rating.EXPLICIT), + "q": Rating(enums.Rating.QUESTIONABLE), + "s": Rating(enums.Rating.SAFE), + } + + def scrape(self, data): + match data.get("subcategory"): + case "pool": + yield from self.scrape_pool(data) + + def scrape_pool(self, data): + parsers = { + "date": Date.from_iso, + "rating": self.ratings.get, + "pool": { + "id": lambda pid: URL(f"https://e621.net/pools/{pid}"), + "name": Title, + }, + "tags": { + "general": replace_underscore(Tag.from_string), + "artist": replace_underscore(Artist), + "character": replace_underscore(Character), + "copyright": replace_underscore(World), + "species": replace_underscore(Tag.from_string), + "meta": self.parse_meta, + }, + } + + self.is_likely_uncensored = True + + yield from parse_dict(parsers, data) + + if self.is_likely_uncensored: + yield Censorship(enums.Censorship.NONE) + + def parse_meta(self, input): + match input: + case "comic": + return Category(enums.Category.COMIC) + case "censor_bar": + self.is_likely_uncensored = False + return Censorship(enums.Censorship.BAR) + case "mosaic_censorship": + self.is_likely_uncensored = False + return Censorship(enums.Censorship.MOSAIC) + case "uncensored": + return Censorship(enums.Censorship.NONE) + + if input.endswith("_text"): + lang, _ = input.split("_text", 1) + + try: + return Language(value=enums.Language(lang.capitalize())) + except ValueError as e: + raise ScrapeWarning(f"Could not parse language: '{input}'") from e diff --git a/src/hircine/plugins/scrapers/handlers/exhentai.py b/src/hircine/plugins/scrapers/handlers/exhentai.py new file mode 100644 index 0000000..12c22d7 --- /dev/null +++ b/src/hircine/plugins/scrapers/handlers/exhentai.py @@ -0,0 +1,139 @@ +import re + +import hircine.enums as enums +from hircine.scraper import ScrapeWarning +from hircine.scraper.types import ( + URL, + Artist, + Category, + Censorship, + Character, + Circle, + Date, + Direction, + Language, + OriginalTitle, + Rating, + Tag, + Title, + World, +) +from hircine.scraper.utils import parse_dict + + +def sanitize(title, split=False): + text = re.sub(r"\[[^\]]+\]|{[^}]+}|=[^=]+=|^\([^)]+\)", "", title) + if "|" in text and split: + orig, text = text.split("|", 1) + + return re.sub(r"\s{2,}", " ", text).strip() + + +class ExHentaiHandler: + source = "exhentai" + + def scrape(self, data): + category_field = "eh_category" if "eh_category" in data else "category" + + parsers = { + category_field: self.parse_category, + "posted": Date.from_timestamp, + "date": Date.from_iso, + "lang": self.parse_language, + "tags": self.parse_tag, + "title": lambda t: Title(sanitize(t, split=True)), + "title_jpn": lambda t: OriginalTitle(sanitize(t)), + } + + self.is_likely_pornographic = True + self.is_likely_rtl = False + self.has_censorship_tag = False + self.is_western = False + + yield from parse_dict(parsers, data) + + if self.is_likely_pornographic: + yield Rating(enums.Rating.EXPLICIT) + + if not self.has_censorship_tag: + if self.is_western: + yield Censorship(enums.Censorship.NONE) + else: + yield Censorship(enums.Censorship.BAR) + + if self.is_likely_rtl: + yield Direction(enums.Direction.RIGHT_TO_LEFT) + + if (gid := data["gid"]) and (token := data["token"]): + yield URL(f"https://exhentai.org/g/{gid}/{token}") + + def parse_category(self, input): + match input.lower(): + case "doujinshi": + self.is_likely_rtl = True + return Category(value=enums.Category.DOUJINSHI) + case "manga": + self.is_likely_rtl = True + return Category(value=enums.Category.MANGA) + case "western": + self.is_western = True + case "artist cg": + return Category(value=enums.Category.COMIC) + case "game cg": + return Category(value=enums.Category.GAME_CG) + case "image set": + return Category(value=enums.Category.IMAGE_SET) + case "non-h": + self.is_likely_pornographic = False + return Rating(value=enums.Rating.QUESTIONABLE) + + def parse_tag(self, input): + match input.split(":"): + case ["parody", value]: + return World(value) + case ["group", value]: + return Circle(value) + case ["artist", value]: + return Artist(value) + case ["character", value]: + return Character(value) + case ["language", value]: + return self.parse_language(value, from_value=True) + case ["other", "artbook"]: + return Category(enums.Category.ARTBOOK) + case ["other", "full censorship"]: + self.has_censorship_tag = True + return Censorship(enums.Censorship.FULL) + case ["other", "mosaic censorship"]: + self.has_censorship_tag = True + return Censorship(enums.Censorship.MOSAIC) + case ["other", "uncensored"]: + self.has_censorship_tag = True + return Censorship(enums.Censorship.NONE) + case ["other", "non-h imageset" | "western imageset"]: + return Category(value=enums.Category.IMAGE_SET) + case ["other", "western non-h"]: + self.is_likely_pornographic = False + return Rating(value=enums.Rating.QUESTIONABLE) + case ["other", "comic"]: + return Category(value=enums.Category.COMIC) + case ["other", "variant set"]: + return Category(value=enums.Category.VARIANT_SET) + case ["other", "webtoon"]: + return Category(value=enums.Category.WEBTOON) + case [namespace, tag]: + return Tag(namespace=namespace, tag=tag) + case [tag]: + return Tag(namespace=None, tag=tag) + + def parse_language(self, input, from_value=False): + if not input or input in ["translated", "speechless", "N/A"]: + return + + try: + if from_value: + return Language(value=enums.Language(input.capitalize())) + else: + return Language(value=enums.Language[input.upper()]) + except (KeyError, ValueError) as e: + raise ScrapeWarning(f"Could not parse language: '{input}'") from e diff --git a/src/hircine/plugins/scrapers/handlers/mangadex.py b/src/hircine/plugins/scrapers/handlers/mangadex.py new file mode 100644 index 0000000..7bc371d --- /dev/null +++ b/src/hircine/plugins/scrapers/handlers/mangadex.py @@ -0,0 +1,54 @@ +import hircine.enums as enums +from hircine.scraper import ScrapeWarning +from hircine.scraper.types import ( + URL, + Artist, + Circle, + Date, + Language, + Tag, + Title, +) +from hircine.scraper.utils import parse_dict + + +class MangadexHandler: + source = "mangadex" + + def scrape(self, data): + parsers = { + "date": Date.from_iso, + "lang": self.parse_language, + "tags": Tag.from_string, + "artist": Artist, + "author": Artist, + "group": Circle, + } + + yield from parse_dict(parsers, data) + + if chapter_id := data.get("chapter_id"): + yield URL(f"https://mangadex.org/chapter/{chapter_id}") + + if manga := data.get("manga"): + title = manga + + if volume := data.get("volume"): + title = title + f" Vol. {volume}" + + if chapter := data.get("chapter"): + if volume: + title = title + f", Ch. {chapter}" + else: + title = title + f"Ch. {chapter}" + + if subtitle := data.get("title"): + title = title + f": {subtitle}" + + yield Title(title) + + def parse_language(self, input): + try: + return Language(value=enums.Language[input.upper()]) + except (KeyError, ValueError) as e: + raise ScrapeWarning(f"Could not parse language: '{input}'") from e |