From 22fd0e2b1b117e78529c9f562cab79da4c02797e Mon Sep 17 00:00:00 2001
From: Wolfgang Müller
Date: Thu, 16 Jan 2025 17:38:48 +0100
Subject: backend/plugins: Add scraper for info.yaml from schale.network

---
 docs/plugins/builtin.rst                       |  2 +
 pyproject.toml                                 |  1 +
 src/hircine/plugins/scrapers/schale_network.py | 82 ++++++++++++++++++++++++
 tests/plugins/scrapers/test_schale_network.py  | 88 ++++++++++++++++++++++++++
 4 files changed, 173 insertions(+)
 create mode 100644 src/hircine/plugins/scrapers/schale_network.py
 create mode 100644 tests/plugins/scrapers/test_schale_network.py

diff --git a/docs/plugins/builtin.rst b/docs/plugins/builtin.rst
index 61d531f..7b815ce 100644
--- a/docs/plugins/builtin.rst
+++ b/docs/plugins/builtin.rst
@@ -14,3 +14,5 @@ Scrapers
 .. autoclass:: hircine.plugins.scrapers.ehentai_api.EHentaiAPIScraper()
 
 .. autoclass:: hircine.plugins.scrapers.anchira.AnchiraYamlScraper()
+
+.. autoclass:: hircine.plugins.scrapers.schale_network.SchaleNetworkScraper()
diff --git a/pyproject.toml b/pyproject.toml
index 20861dc..f83359b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,6 +13,7 @@ hircine = 'hircine.cli:main'
 gallery_dl = "hircine.plugins.scrapers.gallery_dl:GalleryDLScraper"
 ehentai_api = "hircine.plugins.scrapers.ehentai_api:EHentaiAPIScraper"
 anchira_yaml = "hircine.plugins.scrapers.anchira:AnchiraYamlScraper"
+schale_network_yaml = "hircine.plugins.scrapers.schale_network:SchaleNetworkScraper"
 
 [tool.poetry.dependencies]
 python = "^3.12"
diff --git a/src/hircine/plugins/scrapers/schale_network.py b/src/hircine/plugins/scrapers/schale_network.py
new file mode 100644
index 0000000..e38cfe8
--- /dev/null
+++ b/src/hircine/plugins/scrapers/schale_network.py
@@ -0,0 +1,82 @@
+import re
+
+import yaml
+
+import hircine.enums as enums
+from hircine.scraper import Scraper
+from hircine.scraper.types import (
+    Artist,
+    Censorship,
+    Circle,
+    Direction,
+    Language,
+    Tag,
+    Title,
+)
+from hircine.scraper.utils import open_archive_file, parse_dict
+
+SOURCE_REGEX = re.compile(r"^SchaleNetwork:")
+
+
+class SchaleNetworkScraper(Scraper):
+    """
+    A scraper for ``info.yaml`` files found in archives downloaded from
+    *schale.network*.
+
+    .. list-table::
+       :align: left
+
+       * - **Requires**
+         - ``info.yaml`` in the archive or as a sidecar.
+       * - **Source**
+         - ``schale.network``
+    """
+
+    name = "schale.network info.yaml"
+    source = "schale.network"
+
+    def __init__(self, comic):
+        super().__init__(comic)
+
+        self.data = self.load()
+        source = self.data.get("source")
+
+        if source and re.match(SOURCE_REGEX, source):
+            self.is_available = True
+
+    def load(self):
+        try:
+            with open_archive_file(self.comic.archive, "info.yaml") as yif:
+                return yaml.safe_load(yif)
+        except Exception:
+            return {}
+
+    def scrape(self):
+        parsers = {
+            "title": Title,
+            "artist": Artist,
+            "circle": Circle,
+            "general": Tag.from_string,
+            "male": lambda s: Tag(namespace="male", tag=s),
+            "female": lambda s: Tag(namespace="female", tag=s),
+            "mixed": lambda s: Tag(namespace="mixed", tag=s),
+            "language": self.parse_language,
+            "other": self.parse_other,
+        }
+
+        yield from parse_dict(parsers, self.data)
+
+        yield Direction(enums.Direction.RIGHT_TO_LEFT)
+
+    def parse_language(self, input):
+        if not input or input in ["translated"]:
+            return
+
+        return Language.from_name(input)
+
+    def parse_other(self, input):
+        match input:
+            case "uncensored":
+                return Censorship(value=enums.Censorship.NONE)
+            case _:
+                return Tag.from_string(input)
diff --git a/tests/plugins/scrapers/test_schale_network.py b/tests/plugins/scrapers/test_schale_network.py
new file mode 100644
index 0000000..236520b
--- /dev/null
+++ b/tests/plugins/scrapers/test_schale_network.py
@@ -0,0 +1,88 @@
+import os
+from zipfile import ZipFile
+
+import pytest
+
+import hircine.enums as enums
+from hircine.plugins.scrapers.schale_network import SchaleNetworkScraper
+from hircine.scraper.types import (
+    Artist,
+    Censorship,
+    Circle,
+    Direction,
+    Language,
+    Tag,
+    Title,
+)
+
+
+@pytest.fixture
+def archive_file(tmpdir):
+    file = os.path.join(tmpdir, "archive.zip")
+
+    data = """
+source: SchaleNetwork:/g/1/1
+title: 'Example Title'
+general:
+  - example
+artist:
+  - example
+circle:
+  - example
+magazine:
+  - example
+male:
+  - example
+female:
+  - example
+mixed:
+  - example
+language:
+  - english
+  - translated
+other:
+  - uncensored
+  - vanilla
+"""
+
+    with ZipFile(file, "x") as ziph:
+        ziph.writestr("info.yaml", data)
+
+    yield file
+
+
+def test_does_scrape(monkeypatch, archive_file, gen_comic):
+    comic = next(gen_comic)
+    comic.archive.path = archive_file
+
+    scraper = SchaleNetworkScraper(comic)
+
+    assert scraper.is_available
+    assert scraper.source == SchaleNetworkScraper.source
+    assert scraper.name == "schale.network info.yaml"
+
+    assert set(scraper.collect()) == set(
+        [
+            Artist(name="example"),
+            Circle(name="example"),
+            Direction(value=enums.Direction.RIGHT_TO_LEFT),
+            Censorship(value=enums.Censorship.NONE),
+            Language(value=enums.Language.EN),
+            Tag(namespace="none", tag="example"),
+            Tag(namespace="none", tag="vanilla"),
+            Tag(namespace="male", tag="example"),
+            Tag(namespace="female", tag="example"),
+            Tag(namespace="mixed", tag="example"),
+            Title(value="Example Title"),
+        ]
+    )
+
+
+def test_does_not_scrape_on_error(tmpdir, gen_comic):
+    comic = next(gen_comic)
+    comic.archive.path = os.path.join(tmpdir, "nonexistent.zip")
+
+    scraper = SchaleNetworkScraper(comic)
+
+    assert scraper.data == {}
+    assert not scraper.is_available
-- 
cgit v1.2.3-2-gb3c3