From d1d654ebac2d51e3841675faeb56480e440f622f Mon Sep 17 00:00:00 2001 From: Wolfgang Müller Date: Tue, 5 Mar 2024 18:08:09 +0100 Subject: Initial commit --- tests/api/test_scraper_api.py | 395 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 395 insertions(+) create mode 100644 tests/api/test_scraper_api.py (limited to 'tests/api/test_scraper_api.py') diff --git a/tests/api/test_scraper_api.py b/tests/api/test_scraper_api.py new file mode 100644 index 0000000..1edd74f --- /dev/null +++ b/tests/api/test_scraper_api.py @@ -0,0 +1,395 @@ +import hircine.enums as enums +import hircine.plugins +import hircine.scraper.types as scraped +import pytest +from conftest import DB, Response +from hircine.scraper import ScrapeError, Scraper, ScrapeWarning + + +@pytest.fixture +def query_comic_scrapers(schema_execute): + query = """ + query comicScrapers($id: Int!) { + comicScrapers(id: $id) { + __typename + id + name + } + } + """ + + async def _execute(id): + return await schema_execute(query, {"id": id}) + + return _execute + + +@pytest.fixture +def query_scrape_comic(schema_execute): + query = """ + query scrapeComic($id: Int!, $scraper: String!) { + scrapeComic(id: $id, scraper: $scraper) { + __typename + ... on ScrapeComicResult { + data { + title + originalTitle + url + artists + category + censorship + characters + circles + date + direction + language + layout + rating + tags + worlds + } + warnings + } + ... on Error { + message + } + ... on ScraperNotFoundError { + name + } + ... on ScraperNotAvailableError { + scraper + comicId + } + ... on IDNotFoundError { + id + } + } + } + """ + + async def _execute(id, scraper): + return await schema_execute(query, {"id": id, "scraper": scraper}) + + return _execute + + +@pytest.fixture +def scrapers(empty_plugins): + class GoodScraper(Scraper): + name = "Good Scraper" + is_available = True + source = "good" + + def scrape(self): + yield scraped.Title("Arid Savannah Adventures") + yield scraped.OriginalTitle("Arid Savannah Hijinx") + yield scraped.URL("file:///home/savannah/adventures") + yield scraped.Language(enums.Language.EN) + yield scraped.Date.from_iso("2010-07-05") + yield scraped.Direction(enums.Direction["LEFT_TO_RIGHT"]) + yield scraped.Layout(enums.Layout.SINGLE) + yield scraped.Rating(enums.Rating.SAFE) + yield scraped.Category(enums.Category.MANGA) + yield scraped.Censorship(enums.Censorship.NONE) + yield scraped.Tag.from_string("animal:small") + yield scraped.Tag.from_string("animal:medium") + yield scraped.Tag.from_string("animal:big") + yield scraped.Tag.from_string("animal:massive") + yield scraped.Artist("alan smithee") + yield scraped.Artist("david agnew") + yield scraped.Character("greta giraffe") + yield scraped.Character("bob bear") + yield scraped.Character("rico rhinoceros") + yield scraped.Character("ziggy zebra") + yield scraped.Circle("archimedes") + yield scraped.World("animal friends") + + class DuplicateScraper(Scraper): + name = "Duplicate Scraper" + is_available = True + source = "dupe" + + def gen(self): + yield scraped.Title("Arid Savannah Adventures") + yield scraped.OriginalTitle("Arid Savannah Hijinx") + yield scraped.URL("file:///home/savannah/adventures") + yield scraped.Language(enums.Language.EN) + yield scraped.Date.from_iso("2010-07-05") + yield scraped.Direction(enums.Direction["LEFT_TO_RIGHT"]) + yield scraped.Layout(enums.Layout.SINGLE) + yield scraped.Rating(enums.Rating.SAFE) + yield scraped.Category(enums.Category.MANGA) + yield scraped.Censorship(enums.Censorship.NONE) + yield scraped.Tag.from_string("animal:small") + yield scraped.Tag.from_string("animal:medium") + yield scraped.Tag.from_string("animal:big") + yield scraped.Tag.from_string("animal:massive") + yield scraped.Artist("alan smithee") + yield scraped.Artist("david agnew") + yield scraped.Character("greta giraffe") + yield scraped.Character("bob bear") + yield scraped.Character("rico rhinoceros") + yield scraped.Character("ziggy zebra") + yield scraped.Circle("archimedes") + yield scraped.World("animal friends") + + def scrape(self): + yield from self.gen() + yield from self.gen() + + class WarnScraper(Scraper): + name = "Warn Scraper" + is_available = True + source = "warn" + + def warn_on_purpose(self, item): + raise ScrapeWarning(f"Could not parse: {item}") + + def scrape(self): + yield scraped.Title("Arid Savannah Adventures") + yield lambda: self.warn_on_purpose("Arid Savannah Hijinx") + yield scraped.Language(enums.Language.EN) + + class FailScraper(Scraper): + name = "Fail Scraper" + is_available = True + source = "fail" + + def scrape(self): + yield scraped.Title("Arid Savannah Adventures") + raise ScrapeError("Could not continue") + yield scraped.Language(enums.Language.EN) + + class UnavailableScraper(Scraper): + name = "Unavailable Scraper" + is_available = False + source = "unavail" + + def scrape(self): + yield None + + hircine.plugins.register_scraper("good", GoodScraper) + hircine.plugins.register_scraper("dupe", DuplicateScraper) + hircine.plugins.register_scraper("warn", WarnScraper) + hircine.plugins.register_scraper("fail", FailScraper) + hircine.plugins.register_scraper("unavail", UnavailableScraper) + + return [ + ("good", GoodScraper), + ("dupe", DuplicateScraper), + ("warn", WarnScraper), + ("fail", FailScraper), + ("unavail", UnavailableScraper), + ] + + +@pytest.mark.anyio +async def test_comic_scrapers(gen_comic, query_comic_scrapers, scrapers): + comic = await DB.add(next(gen_comic)) + response = Response(await query_comic_scrapers(comic.id)) + + assert isinstance((response.data), list) + + available_scrapers = [] + for name, cls in sorted(scrapers, key=lambda s: s[1].name): + instance = cls(comic) + if instance.is_available: + available_scrapers.append((name, cls)) + + assert len(response.data) == len(available_scrapers) + + data = iter(response.data) + for id, scraper in available_scrapers: + field = next(data) + assert field["__typename"] == "ComicScraper" + assert field["id"] == id + assert field["name"] == scraper.name + + +@pytest.mark.anyio +async def test_comic_empty_for_missing_comic(gen_comic, query_comic_scrapers, scrapers): + response = Response(await query_comic_scrapers(1)) + + assert response.data == [] + + +@pytest.mark.anyio +async def test_scrape_comic(gen_comic, query_scrape_comic, scrapers): + comic = await DB.add(next(gen_comic)) + + response = Response(await query_scrape_comic(comic.id, "good")) + response.assert_is("ScrapeComicResult") + + assert response.warnings == [] + + scraped_comic = response.data["data"] + + assert scraped_comic["title"] == "Arid Savannah Adventures" + assert scraped_comic["originalTitle"] == "Arid Savannah Hijinx" + assert scraped_comic["url"] == "file:///home/savannah/adventures" + assert scraped_comic["language"] == "EN" + assert scraped_comic["date"] == "2010-07-05" + assert scraped_comic["rating"] == "SAFE" + assert scraped_comic["category"] == "MANGA" + assert scraped_comic["direction"] == "LEFT_TO_RIGHT" + assert scraped_comic["layout"] == "SINGLE" + assert scraped_comic["tags"] == [ + "animal:small", + "animal:medium", + "animal:big", + "animal:massive", + ] + assert scraped_comic["artists"] == ["alan smithee", "david agnew"] + assert scraped_comic["characters"] == [ + "greta giraffe", + "bob bear", + "rico rhinoceros", + "ziggy zebra", + ] + assert scraped_comic["circles"] == ["archimedes"] + assert scraped_comic["worlds"] == ["animal friends"] + + +@pytest.mark.anyio +async def test_scrape_comic_removes_duplicates(gen_comic, query_scrape_comic, scrapers): + comic = await DB.add(next(gen_comic)) + + response = Response(await query_scrape_comic(comic.id, "dupe")) + response.assert_is("ScrapeComicResult") + + assert response.warnings == [] + + scraped_comic = response.data["data"] + + assert scraped_comic["title"] == "Arid Savannah Adventures" + assert scraped_comic["originalTitle"] == "Arid Savannah Hijinx" + assert scraped_comic["url"] == "file:///home/savannah/adventures" + assert scraped_comic["language"] == "EN" + assert scraped_comic["date"] == "2010-07-05" + assert scraped_comic["rating"] == "SAFE" + assert scraped_comic["category"] == "MANGA" + assert scraped_comic["direction"] == "LEFT_TO_RIGHT" + assert scraped_comic["layout"] == "SINGLE" + assert scraped_comic["tags"] == [ + "animal:small", + "animal:medium", + "animal:big", + "animal:massive", + ] + assert scraped_comic["artists"] == ["alan smithee", "david agnew"] + assert scraped_comic["characters"] == [ + "greta giraffe", + "bob bear", + "rico rhinoceros", + "ziggy zebra", + ] + assert scraped_comic["circles"] == ["archimedes"] + assert scraped_comic["worlds"] == ["animal friends"] + + +@pytest.mark.anyio +async def test_scrape_comic_fails_comic_not_found(query_scrape_comic, scrapers): + response = Response(await query_scrape_comic(1, "good")) + response.assert_is("IDNotFoundError") + + assert response.id == 1 + assert response.message == "Comic ID not found: '1'" + + +@pytest.mark.anyio +async def test_scrape_comic_fails_scraper_not_found( + gen_comic, query_scrape_comic, scrapers +): + comic = await DB.add(next(gen_comic)) + + response = Response(await query_scrape_comic(comic.id, "missing")) + response.assert_is("ScraperNotFoundError") + + assert response.name == "missing" + assert response.message == "Scraper not found: 'missing'" + + +@pytest.mark.anyio +async def test_scrape_comic_fails_scraper_not_available( + gen_comic, query_scrape_comic, scrapers +): + comic = await DB.add(next(gen_comic)) + + response = Response(await query_scrape_comic(comic.id, "unavail")) + response.assert_is("ScraperNotAvailableError") + + assert response.scraper == "unavail" + assert response.comicId == comic.id + assert response.message == f"Scraper unavail not available for comic ID {comic.id}" + + +async def test_scrape_comic_with_transformer(gen_comic, query_scrape_comic, scrapers): + def keep(generator, info): + for item in generator: + match item: + case scraped.Title(): + yield item + + hircine.plugins.transformers = [keep] + + comic = await DB.add(next(gen_comic)) + + response = Response(await query_scrape_comic(comic.id, "good")) + response.assert_is("ScrapeComicResult") + + assert response.warnings == [] + + scraped_comic = response.data["data"] + + assert scraped_comic["title"] == "Arid Savannah Adventures" + assert scraped_comic["originalTitle"] is None + assert scraped_comic["url"] is None + assert scraped_comic["language"] is None + assert scraped_comic["date"] is None + assert scraped_comic["rating"] is None + assert scraped_comic["category"] is None + assert scraped_comic["censorship"] is None + assert scraped_comic["direction"] is None + assert scraped_comic["layout"] is None + assert scraped_comic["tags"] == [] + assert scraped_comic["artists"] == [] + assert scraped_comic["characters"] == [] + assert scraped_comic["circles"] == [] + assert scraped_comic["worlds"] == [] + + +@pytest.mark.anyio +async def test_scrape_comic_catches_warnings(gen_comic, query_scrape_comic, scrapers): + comic = await DB.add(next(gen_comic)) + + response = Response(await query_scrape_comic(comic.id, "warn")) + response.assert_is("ScrapeComicResult") + + assert response.warnings == ["Could not parse: Arid Savannah Hijinx"] + + scraped_comic = response.data["data"] + + assert scraped_comic["title"] == "Arid Savannah Adventures" + assert scraped_comic["originalTitle"] is None + assert scraped_comic["language"] == "EN" + assert scraped_comic["date"] is None + assert scraped_comic["rating"] is None + assert scraped_comic["category"] is None + assert scraped_comic["direction"] is None + assert scraped_comic["layout"] is None + assert scraped_comic["tags"] == [] + assert scraped_comic["artists"] == [] + assert scraped_comic["characters"] == [] + assert scraped_comic["circles"] == [] + assert scraped_comic["worlds"] == [] + + +@pytest.mark.anyio +async def test_scrape_comic_fails_with_scraper_error( + gen_comic, query_scrape_comic, scrapers +): + comic = await DB.add(next(gen_comic)) + + response = Response(await query_scrape_comic(comic.id, "fail")) + response.assert_is("ScraperError") + assert response.message == "Scraping failed: Could not continue" -- cgit v1.2.3-2-gb3c3