import hircine.enums as enums import hircine.plugins import hircine.scraper.types as scraped import pytest from conftest import DB, Response from hircine.scraper import ScrapeError, Scraper, ScrapeWarning @pytest.fixture def query_comic_scrapers(schema_execute): query = """ query comicScrapers($id: Int!) { comicScrapers(id: $id) { __typename id name } } """ async def _execute(id): return await schema_execute(query, {"id": id}) return _execute @pytest.fixture def query_scrape_comic(schema_execute): query = """ query scrapeComic($id: Int!, $scraper: String!) { scrapeComic(id: $id, scraper: $scraper) { __typename ... on ScrapeComicResult { data { title originalTitle url artists category censorship characters circles date direction language layout rating tags worlds } warnings } ... on Error { message } ... on ScraperNotFoundError { name } ... on ScraperNotAvailableError { scraper comicId } ... on IDNotFoundError { id } } } """ async def _execute(id, scraper): return await schema_execute(query, {"id": id, "scraper": scraper}) return _execute @pytest.fixture def scrapers(empty_plugins): class GoodScraper(Scraper): name = "Good Scraper" is_available = True source = "good" def scrape(self): yield scraped.Title("Arid Savannah Adventures") yield scraped.OriginalTitle("Arid Savannah Hijinx") yield scraped.URL("file:///home/savannah/adventures") yield scraped.Language(enums.Language.EN) yield scraped.Date.from_iso("2010-07-05") yield scraped.Direction(enums.Direction["LEFT_TO_RIGHT"]) yield scraped.Layout(enums.Layout.SINGLE) yield scraped.Rating(enums.Rating.SAFE) yield scraped.Category(enums.Category.MANGA) yield scraped.Censorship(enums.Censorship.NONE) yield scraped.Tag.from_string("animal:small") yield scraped.Tag.from_string("animal:medium") yield scraped.Tag.from_string("animal:big") yield scraped.Tag.from_string("animal:massive") yield scraped.Artist("alan smithee") yield scraped.Artist("david agnew") yield scraped.Character("greta giraffe") yield scraped.Character("bob bear") yield scraped.Character("rico rhinoceros") yield scraped.Character("ziggy zebra") yield scraped.Circle("archimedes") yield scraped.World("animal friends") class DuplicateScraper(Scraper): name = "Duplicate Scraper" is_available = True source = "dupe" def gen(self): yield scraped.Title("Arid Savannah Adventures") yield scraped.OriginalTitle("Arid Savannah Hijinx") yield scraped.URL("file:///home/savannah/adventures") yield scraped.Language(enums.Language.EN) yield scraped.Date.from_iso("2010-07-05") yield scraped.Direction(enums.Direction["LEFT_TO_RIGHT"]) yield scraped.Layout(enums.Layout.SINGLE) yield scraped.Rating(enums.Rating.SAFE) yield scraped.Category(enums.Category.MANGA) yield scraped.Censorship(enums.Censorship.NONE) yield scraped.Tag.from_string("animal:small") yield scraped.Tag.from_string("animal:medium") yield scraped.Tag.from_string("animal:big") yield scraped.Tag.from_string("animal:massive") yield scraped.Artist("alan smithee") yield scraped.Artist("david agnew") yield scraped.Character("greta giraffe") yield scraped.Character("bob bear") yield scraped.Character("rico rhinoceros") yield scraped.Character("ziggy zebra") yield scraped.Circle("archimedes") yield scraped.World("animal friends") def scrape(self): yield from self.gen() yield from self.gen() class WarnScraper(Scraper): name = "Warn Scraper" is_available = True source = "warn" def warn_on_purpose(self, item): raise ScrapeWarning(f"Could not parse: {item}") def scrape(self): yield scraped.Title("Arid Savannah Adventures") yield lambda: self.warn_on_purpose("Arid Savannah Hijinx") yield scraped.Language(enums.Language.EN) class FailScraper(Scraper): name = "Fail Scraper" is_available = True source = "fail" def scrape(self): yield scraped.Title("Arid Savannah Adventures") raise ScrapeError("Could not continue") yield scraped.Language(enums.Language.EN) class UnavailableScraper(Scraper): name = "Unavailable Scraper" is_available = False source = "unavail" def scrape(self): yield None hircine.plugins.register_scraper("good", GoodScraper) hircine.plugins.register_scraper("dupe", DuplicateScraper) hircine.plugins.register_scraper("warn", WarnScraper) hircine.plugins.register_scraper("fail", FailScraper) hircine.plugins.register_scraper("unavail", UnavailableScraper) return [ ("good", GoodScraper), ("dupe", DuplicateScraper), ("warn", WarnScraper), ("fail", FailScraper), ("unavail", UnavailableScraper), ] @pytest.mark.anyio async def test_comic_scrapers(gen_comic, query_comic_scrapers, scrapers): comic = await DB.add(next(gen_comic)) response = Response(await query_comic_scrapers(comic.id)) assert isinstance((response.data), list) available_scrapers = [] for name, cls in sorted(scrapers, key=lambda s: s[1].name): instance = cls(comic) if instance.is_available: available_scrapers.append((name, cls)) assert len(response.data) == len(available_scrapers) data = iter(response.data) for id, scraper in available_scrapers: field = next(data) assert field["__typename"] == "ComicScraper" assert field["id"] == id assert field["name"] == scraper.name @pytest.mark.anyio async def test_comic_empty_for_missing_comic(gen_comic, query_comic_scrapers, scrapers): response = Response(await query_comic_scrapers(1)) assert response.data == [] @pytest.mark.anyio async def test_scrape_comic(gen_comic, query_scrape_comic, scrapers): comic = await DB.add(next(gen_comic)) response = Response(await query_scrape_comic(comic.id, "good")) response.assert_is("ScrapeComicResult") assert response.warnings == [] scraped_comic = response.data["data"] assert scraped_comic["title"] == "Arid Savannah Adventures" assert scraped_comic["originalTitle"] == "Arid Savannah Hijinx" assert scraped_comic["url"] == "file:///home/savannah/adventures" assert scraped_comic["language"] == "EN" assert scraped_comic["date"] == "2010-07-05" assert scraped_comic["rating"] == "SAFE" assert scraped_comic["category"] == "MANGA" assert scraped_comic["direction"] == "LEFT_TO_RIGHT" assert scraped_comic["layout"] == "SINGLE" assert scraped_comic["tags"] == [ "animal:small", "animal:medium", "animal:big", "animal:massive", ] assert scraped_comic["artists"] == ["alan smithee", "david agnew"] assert scraped_comic["characters"] == [ "greta giraffe", "bob bear", "rico rhinoceros", "ziggy zebra", ] assert scraped_comic["circles"] == ["archimedes"] assert scraped_comic["worlds"] == ["animal friends"] @pytest.mark.anyio async def test_scrape_comic_removes_duplicates(gen_comic, query_scrape_comic, scrapers): comic = await DB.add(next(gen_comic)) response = Response(await query_scrape_comic(comic.id, "dupe")) response.assert_is("ScrapeComicResult") assert response.warnings == [] scraped_comic = response.data["data"] assert scraped_comic["title"] == "Arid Savannah Adventures" assert scraped_comic["originalTitle"] == "Arid Savannah Hijinx" assert scraped_comic["url"] == "file:///home/savannah/adventures" assert scraped_comic["language"] == "EN" assert scraped_comic["date"] == "2010-07-05" assert scraped_comic["rating"] == "SAFE" assert scraped_comic["category"] == "MANGA" assert scraped_comic["direction"] == "LEFT_TO_RIGHT" assert scraped_comic["layout"] == "SINGLE" assert scraped_comic["tags"] == [ "animal:small", "animal:medium", "animal:big", "animal:massive", ] assert scraped_comic["artists"] == ["alan smithee", "david agnew"] assert scraped_comic["characters"] == [ "greta giraffe", "bob bear", "rico rhinoceros", "ziggy zebra", ] assert scraped_comic["circles"] == ["archimedes"] assert scraped_comic["worlds"] == ["animal friends"] @pytest.mark.anyio async def test_scrape_comic_fails_comic_not_found(query_scrape_comic, scrapers): response = Response(await query_scrape_comic(1, "good")) response.assert_is("IDNotFoundError") assert response.id == 1 assert response.message == "Comic ID not found: '1'" @pytest.mark.anyio async def test_scrape_comic_fails_scraper_not_found( gen_comic, query_scrape_comic, scrapers ): comic = await DB.add(next(gen_comic)) response = Response(await query_scrape_comic(comic.id, "missing")) response.assert_is("ScraperNotFoundError") assert response.name == "missing" assert response.message == "Scraper not found: 'missing'" @pytest.mark.anyio async def test_scrape_comic_fails_scraper_not_available( gen_comic, query_scrape_comic, scrapers ): comic = await DB.add(next(gen_comic)) response = Response(await query_scrape_comic(comic.id, "unavail")) response.assert_is("ScraperNotAvailableError") assert response.scraper == "unavail" assert response.comicId == comic.id assert response.message == f"Scraper unavail not available for comic ID {comic.id}" async def test_scrape_comic_with_transformer(gen_comic, query_scrape_comic, scrapers): def keep(generator, info): for item in generator: match item: case scraped.Title(): yield item hircine.plugins.transformers = [keep] comic = await DB.add(next(gen_comic)) response = Response(await query_scrape_comic(comic.id, "good")) response.assert_is("ScrapeComicResult") assert response.warnings == [] scraped_comic = response.data["data"] assert scraped_comic["title"] == "Arid Savannah Adventures" assert scraped_comic["originalTitle"] is None assert scraped_comic["url"] is None assert scraped_comic["language"] is None assert scraped_comic["date"] is None assert scraped_comic["rating"] is None assert scraped_comic["category"] is None assert scraped_comic["censorship"] is None assert scraped_comic["direction"] is None assert scraped_comic["layout"] is None assert scraped_comic["tags"] == [] assert scraped_comic["artists"] == [] assert scraped_comic["characters"] == [] assert scraped_comic["circles"] == [] assert scraped_comic["worlds"] == [] @pytest.mark.anyio async def test_scrape_comic_catches_warnings(gen_comic, query_scrape_comic, scrapers): comic = await DB.add(next(gen_comic)) response = Response(await query_scrape_comic(comic.id, "warn")) response.assert_is("ScrapeComicResult") assert response.warnings == ["Could not parse: Arid Savannah Hijinx"] scraped_comic = response.data["data"] assert scraped_comic["title"] == "Arid Savannah Adventures" assert scraped_comic["originalTitle"] is None assert scraped_comic["language"] == "EN" assert scraped_comic["date"] is None assert scraped_comic["rating"] is None assert scraped_comic["category"] is None assert scraped_comic["direction"] is None assert scraped_comic["layout"] is None assert scraped_comic["tags"] == [] assert scraped_comic["artists"] == [] assert scraped_comic["characters"] == [] assert scraped_comic["circles"] == [] assert scraped_comic["worlds"] == [] @pytest.mark.anyio async def test_scrape_comic_fails_with_scraper_error( gen_comic, query_scrape_comic, scrapers ): comic = await DB.add(next(gen_comic)) response = Response(await query_scrape_comic(comic.id, "fail")) response.assert_is("ScraperError") assert response.message == "Scraping failed: Could not continue"