diff options
author | Wolfgang Müller | 2024-11-14 20:12:19 +0100 |
---|---|---|
committer | Wolfgang Müller | 2024-11-14 20:45:03 +0100 |
commit | a68bdd1419150a98b4255ca6f7db6889e73b7aa0 (patch) | |
tree | 61754e35cf075d1be634b4be8908b788efd73840 | |
parent | a43a295335f24bcb924e96718edcdd64a08f3597 (diff) | |
download | hircine-a68bdd1419150a98b4255ca6f7db6889e73b7aa0.tar.gz |
backend/scraper: Add parser methods for Language
We can expect a number of scraper sources to either give languages as
ISO 639-3 or as their English name, so it makes sense to implement a
simple parser method on our side.
-rw-r--r-- | src/hircine/scraper/types.py | 32 | ||||
-rw-r--r-- | tests/scrapers/test_types.py | 33 |
2 files changed, 65 insertions, 0 deletions
diff --git a/src/hircine/scraper/types.py b/src/hircine/scraper/types.py index 534792b..23cb634 100644 --- a/src/hircine/scraper/types.py +++ b/src/hircine/scraper/types.py @@ -137,6 +137,38 @@ class Language: def __bool__(self): return self.value is not None + @classmethod + def from_iso_639_3(cls, string): + """ + Returns a new instance of this class given a case-insensitive ISO 639-3 + language code. + + :param str string: The ISO 639-3 language code. + :raise: :exc:`~hircine.scraper.ScrapeWarning` if the language code could + not be parsed. + """ + try: + return Language(value=hircine.enums.Language[string.upper()]) + except KeyError as e: + raise ScrapeWarning( + f"Could not parse language code: '{string}' as ISO 639-3" + ) from e + + @classmethod + def from_name(cls, string): + """ + Returns a new instance of this class given a case-insensitive language name. + Permissible language names are defined in :class:`hircine.enums.Language`. + + :param str string: The language name. + :raise: :exc:`~hircine.scraper.ScrapeWarning` if the language name could + not be parsed. + """ + try: + return Language(value=hircine.enums.Language(string.capitalize())) + except ValueError as e: + raise ScrapeWarning(f"Could not parse language name: '{string}'") from e + @dataclass(frozen=True) class Direction: diff --git a/tests/scrapers/test_types.py b/tests/scrapers/test_types.py index ee6b802..33f9f89 100644 --- a/tests/scrapers/test_types.py +++ b/tests/scrapers/test_types.py @@ -2,6 +2,7 @@ from datetime import date import pytest +import hircine.enums as enums from hircine.api.types import ScrapedComic from hircine.scraper import ScrapeWarning from hircine.scraper.types import ( @@ -130,3 +131,35 @@ def test_scraped_comic_silently_ignores_empty(item, attr, empty): comic = ScrapedComic.from_generator(gen()) assert getattr(comic, attr) == empty + + +@pytest.mark.parametrize( + "input,want", + [ + ("EN", Language(value=enums.Language.EN)), + ("de", Language(value=enums.Language.DE)), + ], +) +def test_language_from_iso_639_3(input, want): + assert Language.from_iso_639_3(input) == want + + +def test_language_from_iso_639_3_fails(): + with pytest.raises(ScrapeWarning, match="Could not parse language code:"): + Language.from_iso_639_3("ENG") + + +@pytest.mark.parametrize( + "input,want", + [ + ("English", Language(value=enums.Language.EN)), + ("german", Language(value=enums.Language.DE)), + ], +) +def test_language_from_name(input, want): + assert Language.from_name(input) == want + + +def test_language_from_name_fails(): + with pytest.raises(ScrapeWarning, match="Could not parse language name:"): + Language.from_name("nonexistent") |