summaryrefslogtreecommitdiffstatshomepage
path: root/tests/api/test_scraper_api.py
diff options
context:
space:
mode:
authorWolfgang Müller2024-03-05 18:08:09 +0100
committerWolfgang Müller2024-03-05 19:25:59 +0100
commitd1d654ebac2d51e3841675faeb56480e440f622f (patch)
tree56ef123c1a15a10dfd90836e4038e27efde950c6 /tests/api/test_scraper_api.py
downloadhircine-0.1.0.tar.gz
Initial commit0.1.0
Diffstat (limited to '')
-rw-r--r--tests/api/test_scraper_api.py395
1 files changed, 395 insertions, 0 deletions
diff --git a/tests/api/test_scraper_api.py b/tests/api/test_scraper_api.py
new file mode 100644
index 0000000..1edd74f
--- /dev/null
+++ b/tests/api/test_scraper_api.py
@@ -0,0 +1,395 @@
+import hircine.enums as enums
+import hircine.plugins
+import hircine.scraper.types as scraped
+import pytest
+from conftest import DB, Response
+from hircine.scraper import ScrapeError, Scraper, ScrapeWarning
+
+
+@pytest.fixture
+def query_comic_scrapers(schema_execute):
+ query = """
+ query comicScrapers($id: Int!) {
+ comicScrapers(id: $id) {
+ __typename
+ id
+ name
+ }
+ }
+ """
+
+ async def _execute(id):
+ return await schema_execute(query, {"id": id})
+
+ return _execute
+
+
+@pytest.fixture
+def query_scrape_comic(schema_execute):
+ query = """
+ query scrapeComic($id: Int!, $scraper: String!) {
+ scrapeComic(id: $id, scraper: $scraper) {
+ __typename
+ ... on ScrapeComicResult {
+ data {
+ title
+ originalTitle
+ url
+ artists
+ category
+ censorship
+ characters
+ circles
+ date
+ direction
+ language
+ layout
+ rating
+ tags
+ worlds
+ }
+ warnings
+ }
+ ... on Error {
+ message
+ }
+ ... on ScraperNotFoundError {
+ name
+ }
+ ... on ScraperNotAvailableError {
+ scraper
+ comicId
+ }
+ ... on IDNotFoundError {
+ id
+ }
+ }
+ }
+ """
+
+ async def _execute(id, scraper):
+ return await schema_execute(query, {"id": id, "scraper": scraper})
+
+ return _execute
+
+
+@pytest.fixture
+def scrapers(empty_plugins):
+ class GoodScraper(Scraper):
+ name = "Good Scraper"
+ is_available = True
+ source = "good"
+
+ def scrape(self):
+ yield scraped.Title("Arid Savannah Adventures")
+ yield scraped.OriginalTitle("Arid Savannah Hijinx")
+ yield scraped.URL("file:///home/savannah/adventures")
+ yield scraped.Language(enums.Language.EN)
+ yield scraped.Date.from_iso("2010-07-05")
+ yield scraped.Direction(enums.Direction["LEFT_TO_RIGHT"])
+ yield scraped.Layout(enums.Layout.SINGLE)
+ yield scraped.Rating(enums.Rating.SAFE)
+ yield scraped.Category(enums.Category.MANGA)
+ yield scraped.Censorship(enums.Censorship.NONE)
+ yield scraped.Tag.from_string("animal:small")
+ yield scraped.Tag.from_string("animal:medium")
+ yield scraped.Tag.from_string("animal:big")
+ yield scraped.Tag.from_string("animal:massive")
+ yield scraped.Artist("alan smithee")
+ yield scraped.Artist("david agnew")
+ yield scraped.Character("greta giraffe")
+ yield scraped.Character("bob bear")
+ yield scraped.Character("rico rhinoceros")
+ yield scraped.Character("ziggy zebra")
+ yield scraped.Circle("archimedes")
+ yield scraped.World("animal friends")
+
+ class DuplicateScraper(Scraper):
+ name = "Duplicate Scraper"
+ is_available = True
+ source = "dupe"
+
+ def gen(self):
+ yield scraped.Title("Arid Savannah Adventures")
+ yield scraped.OriginalTitle("Arid Savannah Hijinx")
+ yield scraped.URL("file:///home/savannah/adventures")
+ yield scraped.Language(enums.Language.EN)
+ yield scraped.Date.from_iso("2010-07-05")
+ yield scraped.Direction(enums.Direction["LEFT_TO_RIGHT"])
+ yield scraped.Layout(enums.Layout.SINGLE)
+ yield scraped.Rating(enums.Rating.SAFE)
+ yield scraped.Category(enums.Category.MANGA)
+ yield scraped.Censorship(enums.Censorship.NONE)
+ yield scraped.Tag.from_string("animal:small")
+ yield scraped.Tag.from_string("animal:medium")
+ yield scraped.Tag.from_string("animal:big")
+ yield scraped.Tag.from_string("animal:massive")
+ yield scraped.Artist("alan smithee")
+ yield scraped.Artist("david agnew")
+ yield scraped.Character("greta giraffe")
+ yield scraped.Character("bob bear")
+ yield scraped.Character("rico rhinoceros")
+ yield scraped.Character("ziggy zebra")
+ yield scraped.Circle("archimedes")
+ yield scraped.World("animal friends")
+
+ def scrape(self):
+ yield from self.gen()
+ yield from self.gen()
+
+ class WarnScraper(Scraper):
+ name = "Warn Scraper"
+ is_available = True
+ source = "warn"
+
+ def warn_on_purpose(self, item):
+ raise ScrapeWarning(f"Could not parse: {item}")
+
+ def scrape(self):
+ yield scraped.Title("Arid Savannah Adventures")
+ yield lambda: self.warn_on_purpose("Arid Savannah Hijinx")
+ yield scraped.Language(enums.Language.EN)
+
+ class FailScraper(Scraper):
+ name = "Fail Scraper"
+ is_available = True
+ source = "fail"
+
+ def scrape(self):
+ yield scraped.Title("Arid Savannah Adventures")
+ raise ScrapeError("Could not continue")
+ yield scraped.Language(enums.Language.EN)
+
+ class UnavailableScraper(Scraper):
+ name = "Unavailable Scraper"
+ is_available = False
+ source = "unavail"
+
+ def scrape(self):
+ yield None
+
+ hircine.plugins.register_scraper("good", GoodScraper)
+ hircine.plugins.register_scraper("dupe", DuplicateScraper)
+ hircine.plugins.register_scraper("warn", WarnScraper)
+ hircine.plugins.register_scraper("fail", FailScraper)
+ hircine.plugins.register_scraper("unavail", UnavailableScraper)
+
+ return [
+ ("good", GoodScraper),
+ ("dupe", DuplicateScraper),
+ ("warn", WarnScraper),
+ ("fail", FailScraper),
+ ("unavail", UnavailableScraper),
+ ]
+
+
+@pytest.mark.anyio
+async def test_comic_scrapers(gen_comic, query_comic_scrapers, scrapers):
+ comic = await DB.add(next(gen_comic))
+ response = Response(await query_comic_scrapers(comic.id))
+
+ assert isinstance((response.data), list)
+
+ available_scrapers = []
+ for name, cls in sorted(scrapers, key=lambda s: s[1].name):
+ instance = cls(comic)
+ if instance.is_available:
+ available_scrapers.append((name, cls))
+
+ assert len(response.data) == len(available_scrapers)
+
+ data = iter(response.data)
+ for id, scraper in available_scrapers:
+ field = next(data)
+ assert field["__typename"] == "ComicScraper"
+ assert field["id"] == id
+ assert field["name"] == scraper.name
+
+
+@pytest.mark.anyio
+async def test_comic_empty_for_missing_comic(gen_comic, query_comic_scrapers, scrapers):
+ response = Response(await query_comic_scrapers(1))
+
+ assert response.data == []
+
+
+@pytest.mark.anyio
+async def test_scrape_comic(gen_comic, query_scrape_comic, scrapers):
+ comic = await DB.add(next(gen_comic))
+
+ response = Response(await query_scrape_comic(comic.id, "good"))
+ response.assert_is("ScrapeComicResult")
+
+ assert response.warnings == []
+
+ scraped_comic = response.data["data"]
+
+ assert scraped_comic["title"] == "Arid Savannah Adventures"
+ assert scraped_comic["originalTitle"] == "Arid Savannah Hijinx"
+ assert scraped_comic["url"] == "file:///home/savannah/adventures"
+ assert scraped_comic["language"] == "EN"
+ assert scraped_comic["date"] == "2010-07-05"
+ assert scraped_comic["rating"] == "SAFE"
+ assert scraped_comic["category"] == "MANGA"
+ assert scraped_comic["direction"] == "LEFT_TO_RIGHT"
+ assert scraped_comic["layout"] == "SINGLE"
+ assert scraped_comic["tags"] == [
+ "animal:small",
+ "animal:medium",
+ "animal:big",
+ "animal:massive",
+ ]
+ assert scraped_comic["artists"] == ["alan smithee", "david agnew"]
+ assert scraped_comic["characters"] == [
+ "greta giraffe",
+ "bob bear",
+ "rico rhinoceros",
+ "ziggy zebra",
+ ]
+ assert scraped_comic["circles"] == ["archimedes"]
+ assert scraped_comic["worlds"] == ["animal friends"]
+
+
+@pytest.mark.anyio
+async def test_scrape_comic_removes_duplicates(gen_comic, query_scrape_comic, scrapers):
+ comic = await DB.add(next(gen_comic))
+
+ response = Response(await query_scrape_comic(comic.id, "dupe"))
+ response.assert_is("ScrapeComicResult")
+
+ assert response.warnings == []
+
+ scraped_comic = response.data["data"]
+
+ assert scraped_comic["title"] == "Arid Savannah Adventures"
+ assert scraped_comic["originalTitle"] == "Arid Savannah Hijinx"
+ assert scraped_comic["url"] == "file:///home/savannah/adventures"
+ assert scraped_comic["language"] == "EN"
+ assert scraped_comic["date"] == "2010-07-05"
+ assert scraped_comic["rating"] == "SAFE"
+ assert scraped_comic["category"] == "MANGA"
+ assert scraped_comic["direction"] == "LEFT_TO_RIGHT"
+ assert scraped_comic["layout"] == "SINGLE"
+ assert scraped_comic["tags"] == [
+ "animal:small",
+ "animal:medium",
+ "animal:big",
+ "animal:massive",
+ ]
+ assert scraped_comic["artists"] == ["alan smithee", "david agnew"]
+ assert scraped_comic["characters"] == [
+ "greta giraffe",
+ "bob bear",
+ "rico rhinoceros",
+ "ziggy zebra",
+ ]
+ assert scraped_comic["circles"] == ["archimedes"]
+ assert scraped_comic["worlds"] == ["animal friends"]
+
+
+@pytest.mark.anyio
+async def test_scrape_comic_fails_comic_not_found(query_scrape_comic, scrapers):
+ response = Response(await query_scrape_comic(1, "good"))
+ response.assert_is("IDNotFoundError")
+
+ assert response.id == 1
+ assert response.message == "Comic ID not found: '1'"
+
+
+@pytest.mark.anyio
+async def test_scrape_comic_fails_scraper_not_found(
+ gen_comic, query_scrape_comic, scrapers
+):
+ comic = await DB.add(next(gen_comic))
+
+ response = Response(await query_scrape_comic(comic.id, "missing"))
+ response.assert_is("ScraperNotFoundError")
+
+ assert response.name == "missing"
+ assert response.message == "Scraper not found: 'missing'"
+
+
+@pytest.mark.anyio
+async def test_scrape_comic_fails_scraper_not_available(
+ gen_comic, query_scrape_comic, scrapers
+):
+ comic = await DB.add(next(gen_comic))
+
+ response = Response(await query_scrape_comic(comic.id, "unavail"))
+ response.assert_is("ScraperNotAvailableError")
+
+ assert response.scraper == "unavail"
+ assert response.comicId == comic.id
+ assert response.message == f"Scraper unavail not available for comic ID {comic.id}"
+
+
+async def test_scrape_comic_with_transformer(gen_comic, query_scrape_comic, scrapers):
+ def keep(generator, info):
+ for item in generator:
+ match item:
+ case scraped.Title():
+ yield item
+
+ hircine.plugins.transformers = [keep]
+
+ comic = await DB.add(next(gen_comic))
+
+ response = Response(await query_scrape_comic(comic.id, "good"))
+ response.assert_is("ScrapeComicResult")
+
+ assert response.warnings == []
+
+ scraped_comic = response.data["data"]
+
+ assert scraped_comic["title"] == "Arid Savannah Adventures"
+ assert scraped_comic["originalTitle"] is None
+ assert scraped_comic["url"] is None
+ assert scraped_comic["language"] is None
+ assert scraped_comic["date"] is None
+ assert scraped_comic["rating"] is None
+ assert scraped_comic["category"] is None
+ assert scraped_comic["censorship"] is None
+ assert scraped_comic["direction"] is None
+ assert scraped_comic["layout"] is None
+ assert scraped_comic["tags"] == []
+ assert scraped_comic["artists"] == []
+ assert scraped_comic["characters"] == []
+ assert scraped_comic["circles"] == []
+ assert scraped_comic["worlds"] == []
+
+
+@pytest.mark.anyio
+async def test_scrape_comic_catches_warnings(gen_comic, query_scrape_comic, scrapers):
+ comic = await DB.add(next(gen_comic))
+
+ response = Response(await query_scrape_comic(comic.id, "warn"))
+ response.assert_is("ScrapeComicResult")
+
+ assert response.warnings == ["Could not parse: Arid Savannah Hijinx"]
+
+ scraped_comic = response.data["data"]
+
+ assert scraped_comic["title"] == "Arid Savannah Adventures"
+ assert scraped_comic["originalTitle"] is None
+ assert scraped_comic["language"] == "EN"
+ assert scraped_comic["date"] is None
+ assert scraped_comic["rating"] is None
+ assert scraped_comic["category"] is None
+ assert scraped_comic["direction"] is None
+ assert scraped_comic["layout"] is None
+ assert scraped_comic["tags"] == []
+ assert scraped_comic["artists"] == []
+ assert scraped_comic["characters"] == []
+ assert scraped_comic["circles"] == []
+ assert scraped_comic["worlds"] == []
+
+
+@pytest.mark.anyio
+async def test_scrape_comic_fails_with_scraper_error(
+ gen_comic, query_scrape_comic, scrapers
+):
+ comic = await DB.add(next(gen_comic))
+
+ response = Response(await query_scrape_comic(comic.id, "fail"))
+ response.assert_is("ScraperError")
+ assert response.message == "Scraping failed: Could not continue"