import hircine.enums as enums
import hircine.plugins
import hircine.scraper.types as scraped
import pytest
from conftest import DB, Response
from hircine.scraper import ScrapeError, Scraper, ScrapeWarning
@pytest.fixture
def query_comic_scrapers(schema_execute):
query = """
query comicScrapers($id: Int!) {
comicScrapers(id: $id) {
__typename
id
name
}
}
"""
async def _execute(id):
return await schema_execute(query, {"id": id})
return _execute
@pytest.fixture
def query_scrape_comic(schema_execute):
query = """
query scrapeComic($id: Int!, $scraper: String!) {
scrapeComic(id: $id, scraper: $scraper) {
__typename
... on ScrapeComicResult {
data {
title
originalTitle
url
artists
category
censorship
characters
circles
date
direction
language
layout
rating
tags
worlds
}
warnings
}
... on Error {
message
}
... on ScraperNotFoundError {
name
}
... on ScraperNotAvailableError {
scraper
comicId
}
... on IDNotFoundError {
id
}
}
}
"""
async def _execute(id, scraper):
return await schema_execute(query, {"id": id, "scraper": scraper})
return _execute
@pytest.fixture
def scrapers(empty_plugins):
class GoodScraper(Scraper):
name = "Good Scraper"
is_available = True
source = "good"
def scrape(self):
yield scraped.Title("Arid Savannah Adventures")
yield scraped.OriginalTitle("Arid Savannah Hijinx")
yield scraped.URL("file:///home/savannah/adventures")
yield scraped.Language(enums.Language.EN)
yield scraped.Date.from_iso("2010-07-05")
yield scraped.Direction(enums.Direction["LEFT_TO_RIGHT"])
yield scraped.Layout(enums.Layout.SINGLE)
yield scraped.Rating(enums.Rating.SAFE)
yield scraped.Category(enums.Category.MANGA)
yield scraped.Censorship(enums.Censorship.NONE)
yield scraped.Tag.from_string("animal:small")
yield scraped.Tag.from_string("animal:medium")
yield scraped.Tag.from_string("animal:big")
yield scraped.Tag.from_string("animal:massive")
yield scraped.Artist("alan smithee")
yield scraped.Artist("david agnew")
yield scraped.Character("greta giraffe")
yield scraped.Character("bob bear")
yield scraped.Character("rico rhinoceros")
yield scraped.Character("ziggy zebra")
yield scraped.Circle("archimedes")
yield scraped.World("animal friends")
class DuplicateScraper(Scraper):
name = "Duplicate Scraper"
is_available = True
source = "dupe"
def gen(self):
yield scraped.Title("Arid Savannah Adventures")
yield scraped.OriginalTitle("Arid Savannah Hijinx")
yield scraped.URL("file:///home/savannah/adventures")
yield scraped.Language(enums.Language.EN)
yield scraped.Date.from_iso("2010-07-05")
yield scraped.Direction(enums.Direction["LEFT_TO_RIGHT"])
yield scraped.Layout(enums.Layout.SINGLE)
yield scraped.Rating(enums.Rating.SAFE)
yield scraped.Category(enums.Category.MANGA)
yield scraped.Censorship(enums.Censorship.NONE)
yield scraped.Tag.from_string("animal:small")
yield scraped.Tag.from_string("animal:medium")
yield scraped.Tag.from_string("animal:big")
yield scraped.Tag.from_string("animal:massive")
yield scraped.Artist("alan smithee")
yield scraped.Artist("david agnew")
yield scraped.Character("greta giraffe")
yield scraped.Character("bob bear")
yield scraped.Character("rico rhinoceros")
yield scraped.Character("ziggy zebra")
yield scraped.Circle("archimedes")
yield scraped.World("animal friends")
def scrape(self):
yield from self.gen()
yield from self.gen()
class WarnScraper(Scraper):
name = "Warn Scraper"
is_available = True
source = "warn"
def warn_on_purpose(self, item):
raise ScrapeWarning(f"Could not parse: {item}")
def scrape(self):
yield scraped.Title("Arid Savannah Adventures")
yield lambda: self.warn_on_purpose("Arid Savannah Hijinx")
yield scraped.Language(enums.Language.EN)
class FailScraper(Scraper):
name = "Fail Scraper"
is_available = True
source = "fail"
def scrape(self):
yield scraped.Title("Arid Savannah Adventures")
raise ScrapeError("Could not continue")
yield scraped.Language(enums.Language.EN)
class UnavailableScraper(Scraper):
name = "Unavailable Scraper"
is_available = False
source = "unavail"
def scrape(self):
yield None
hircine.plugins.register_scraper("good", GoodScraper)
hircine.plugins.register_scraper("dupe", DuplicateScraper)
hircine.plugins.register_scraper("warn", WarnScraper)
hircine.plugins.register_scraper("fail", FailScraper)
hircine.plugins.register_scraper("unavail", UnavailableScraper)
return [
("good", GoodScraper),
("dupe", DuplicateScraper),
("warn", WarnScraper),
("fail", FailScraper),
("unavail", UnavailableScraper),
]
@pytest.mark.anyio
async def test_comic_scrapers(gen_comic, query_comic_scrapers, scrapers):
comic = await DB.add(next(gen_comic))
response = Response(await query_comic_scrapers(comic.id))
assert isinstance((response.data), list)
available_scrapers = []
for name, cls in sorted(scrapers, key=lambda s: s[1].name):
instance = cls(comic)
if instance.is_available:
available_scrapers.append((name, cls))
assert len(response.data) == len(available_scrapers)
data = iter(response.data)
for id, scraper in available_scrapers:
field = next(data)
assert field["__typename"] == "ComicScraper"
assert field["id"] == id
assert field["name"] == scraper.name
@pytest.mark.anyio
async def test_comic_empty_for_missing_comic(gen_comic, query_comic_scrapers, scrapers):
response = Response(await query_comic_scrapers(1))
assert response.data == []
@pytest.mark.anyio
async def test_scrape_comic(gen_comic, query_scrape_comic, scrapers):
comic = await DB.add(next(gen_comic))
response = Response(await query_scrape_comic(comic.id, "good"))
response.assert_is("ScrapeComicResult")
assert response.warnings == []
scraped_comic = response.data["data"]
assert scraped_comic["title"] == "Arid Savannah Adventures"
assert scraped_comic["originalTitle"] == "Arid Savannah Hijinx"
assert scraped_comic["url"] == "file:///home/savannah/adventures"
assert scraped_comic["language"] == "EN"
assert scraped_comic["date"] == "2010-07-05"
assert scraped_comic["rating"] == "SAFE"
assert scraped_comic["category"] == "MANGA"
assert scraped_comic["direction"] == "LEFT_TO_RIGHT"
assert scraped_comic["layout"] == "SINGLE"
assert scraped_comic["tags"] == [
"animal:small",
"animal:medium",
"animal:big",
"animal:massive",
]
assert scraped_comic["artists"] == ["alan smithee", "david agnew"]
assert scraped_comic["characters"] == [
"greta giraffe",
"bob bear",
"rico rhinoceros",
"ziggy zebra",
]
assert scraped_comic["circles"] == ["archimedes"]
assert scraped_comic["worlds"] == ["animal friends"]
@pytest.mark.anyio
async def test_scrape_comic_removes_duplicates(gen_comic, query_scrape_comic, scrapers):
comic = await DB.add(next(gen_comic))
response = Response(await query_scrape_comic(comic.id, "dupe"))
response.assert_is("ScrapeComicResult")
assert response.warnings == []
scraped_comic = response.data["data"]
assert scraped_comic["title"] == "Arid Savannah Adventures"
assert scraped_comic["originalTitle"] == "Arid Savannah Hijinx"
assert scraped_comic["url"] == "file:///home/savannah/adventures"
assert scraped_comic["language"] == "EN"
assert scraped_comic["date"] == "2010-07-05"
assert scraped_comic["rating"] == "SAFE"
assert scraped_comic["category"] == "MANGA"
assert scraped_comic["direction"] == "LEFT_TO_RIGHT"
assert scraped_comic["layout"] == "SINGLE"
assert scraped_comic["tags"] == [
"animal:small",
"animal:medium",
"animal:big",
"animal:massive",
]
assert scraped_comic["artists"] == ["alan smithee", "david agnew"]
assert scraped_comic["characters"] == [
"greta giraffe",
"bob bear",
"rico rhinoceros",
"ziggy zebra",
]
assert scraped_comic["circles"] == ["archimedes"]
assert scraped_comic["worlds"] == ["animal friends"]
@pytest.mark.anyio
async def test_scrape_comic_fails_comic_not_found(query_scrape_comic, scrapers):
response = Response(await query_scrape_comic(1, "good"))
response.assert_is("IDNotFoundError")
assert response.id == 1
assert response.message == "Comic ID not found: '1'"
@pytest.mark.anyio
async def test_scrape_comic_fails_scraper_not_found(
gen_comic, query_scrape_comic, scrapers
):
comic = await DB.add(next(gen_comic))
response = Response(await query_scrape_comic(comic.id, "missing"))
response.assert_is("ScraperNotFoundError")
assert response.name == "missing"
assert response.message == "Scraper not found: 'missing'"
@pytest.mark.anyio
async def test_scrape_comic_fails_scraper_not_available(
gen_comic, query_scrape_comic, scrapers
):
comic = await DB.add(next(gen_comic))
response = Response(await query_scrape_comic(comic.id, "unavail"))
response.assert_is("ScraperNotAvailableError")
assert response.scraper == "unavail"
assert response.comicId == comic.id
assert response.message == f"Scraper unavail not available for comic ID {comic.id}"
async def test_scrape_comic_with_transformer(gen_comic, query_scrape_comic, scrapers):
def keep(generator, info):
for item in generator:
match item:
case scraped.Title():
yield item
hircine.plugins.transformers = [keep]
comic = await DB.add(next(gen_comic))
response = Response(await query_scrape_comic(comic.id, "good"))
response.assert_is("ScrapeComicResult")
assert response.warnings == []
scraped_comic = response.data["data"]
assert scraped_comic["title"] == "Arid Savannah Adventures"
assert scraped_comic["originalTitle"] is None
assert scraped_comic["url"] is None
assert scraped_comic["language"] is None
assert scraped_comic["date"] is None
assert scraped_comic["rating"] is None
assert scraped_comic["category"] is None
assert scraped_comic["censorship"] is None
assert scraped_comic["direction"] is None
assert scraped_comic["layout"] is None
assert scraped_comic["tags"] == []
assert scraped_comic["artists"] == []
assert scraped_comic["characters"] == []
assert scraped_comic["circles"] == []
assert scraped_comic["worlds"] == []
@pytest.mark.anyio
async def test_scrape_comic_catches_warnings(gen_comic, query_scrape_comic, scrapers):
comic = await DB.add(next(gen_comic))
response = Response(await query_scrape_comic(comic.id, "warn"))
response.assert_is("ScrapeComicResult")
assert response.warnings == ["Could not parse: Arid Savannah Hijinx"]
scraped_comic = response.data["data"]
assert scraped_comic["title"] == "Arid Savannah Adventures"
assert scraped_comic["originalTitle"] is None
assert scraped_comic["language"] == "EN"
assert scraped_comic["date"] is None
assert scraped_comic["rating"] is None
assert scraped_comic["category"] is None
assert scraped_comic["direction"] is None
assert scraped_comic["layout"] is None
assert scraped_comic["tags"] == []
assert scraped_comic["artists"] == []
assert scraped_comic["characters"] == []
assert scraped_comic["circles"] == []
assert scraped_comic["worlds"] == []
@pytest.mark.anyio
async def test_scrape_comic_fails_with_scraper_error(
gen_comic, query_scrape_comic, scrapers
):
comic = await DB.add(next(gen_comic))
response = Response(await query_scrape_comic(comic.id, "fail"))
response.assert_is("ScraperError")
assert response.message == "Scraping failed: Could not continue"