summaryrefslogtreecommitdiffstatshomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/hircine/__init__.py2
-rw-r--r--src/hircine/api/__init__.py5
-rw-r--r--src/hircine/api/filters.py69
-rw-r--r--src/hircine/api/inputs.py28
-rw-r--r--src/hircine/api/mutation/resolvers.py43
-rw-r--r--src/hircine/api/query/__init__.py24
-rw-r--r--src/hircine/api/query/resolvers.py31
-rw-r--r--src/hircine/api/responses.py30
-rw-r--r--src/hircine/api/sort.py11
-rw-r--r--src/hircine/api/types.py34
-rw-r--r--src/hircine/db/models.py41
-rw-r--r--src/hircine/db/ops.py28
-rw-r--r--src/hircine/enums.py8
-rw-r--r--src/hircine/migrations/env.py3
-rw-r--r--src/hircine/plugins/__init__.py7
-rw-r--r--src/hircine/plugins/scrapers/anchira.py27
-rw-r--r--src/hircine/plugins/scrapers/ehentai_api.py38
-rw-r--r--src/hircine/plugins/scrapers/gallery_dl.py2
-rw-r--r--src/hircine/plugins/scrapers/handlers/dynastyscans.py10
-rw-r--r--src/hircine/plugins/scrapers/handlers/e621.py6
-rw-r--r--src/hircine/plugins/scrapers/handlers/exhentai.py14
-rw-r--r--src/hircine/plugins/scrapers/handlers/mangadex.py12
-rw-r--r--src/hircine/plugins/scrapers/schale_network.py82
-rw-r--r--src/hircine/scanner.py36
-rw-r--r--src/hircine/scraper/__init__.py14
-rw-r--r--src/hircine/scraper/types.py32
-rw-r--r--src/hircine/scraper/utils.py13
27 files changed, 441 insertions, 209 deletions
diff --git a/src/hircine/__init__.py b/src/hircine/__init__.py
index 38b969d..935742b 100644
--- a/src/hircine/__init__.py
+++ b/src/hircine/__init__.py
@@ -1 +1 @@
-codename = "Satanic Satyr"
+codename = "Profligate Pixie"
diff --git a/src/hircine/api/__init__.py b/src/hircine/api/__init__.py
index 951f375..0f696b8 100644
--- a/src/hircine/api/__init__.py
+++ b/src/hircine/api/__init__.py
@@ -1,8 +1,3 @@
-import strawberry
-
-int = strawberry.scalar(int, name="int")
-
-
class APIException(Exception):
def __init__(self, graphql_error):
self.graphql_error = graphql_error
diff --git a/src/hircine/api/filters.py b/src/hircine/api/filters.py
index ab44cf9..7ed5649 100644
--- a/src/hircine/api/filters.py
+++ b/src/hircine/api/filters.py
@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
-from typing import Generic, List, Optional, TypeVar
+from typing import Generic, Optional, TypeVar
import strawberry
from sqlalchemy import and_, func, or_, select
@@ -7,7 +7,7 @@ from strawberry import UNSET
import hircine.db
from hircine.db.models import ComicTag
-from hircine.enums import Category, Censorship, Language, Rating
+from hircine.enums import Category, Censorship, Language, Operator, Rating
T = TypeVar("T")
@@ -28,11 +28,23 @@ class Matchable(ABC):
@strawberry.input
+class CountFilter:
+ operator: Optional[Operator] = Operator.EQUAL
+ value: int
+
+ def include(self, column, sql):
+ return sql.where(self.operator.value(column, self.value))
+
+ def exclude(self, column, sql):
+ return sql.where(~self.operator.value(column, self.value))
+
+
+@strawberry.input
class AssociationFilter(Matchable):
- any: Optional[List[int]] = strawberry.field(default_factory=lambda: None)
- all: Optional[List[int]] = strawberry.field(default_factory=lambda: None)
- exact: Optional[List[int]] = strawberry.field(default_factory=lambda: None)
- empty: Optional[bool] = None
+ any: Optional[list[int]] = strawberry.field(default_factory=lambda: None)
+ all: Optional[list[int]] = strawberry.field(default_factory=lambda: None)
+ exact: Optional[list[int]] = strawberry.field(default_factory=lambda: None)
+ count: Optional[CountFilter] = UNSET
def _exists(self, condition):
# The property.primaryjoin expression specifies the primary join path
@@ -71,12 +83,6 @@ class AssociationFilter(Matchable):
def _where_not_all_exist(self, sql):
return sql.where(~self._all_exist(self.all))
- def _empty(self):
- if self.empty:
- return ~self._exists(True)
- else:
- return self._exists(True)
-
def _count_of(self, column):
return (
select(func.count(column))
@@ -117,8 +123,8 @@ class AssociationFilter(Matchable):
elif self.all == []:
sql = sql.where(False)
- if self.empty is not None:
- sql = sql.where(self._empty())
+ if self.count:
+ sql = self.count.include(self.count_column, sql)
if self.exact is not None:
sql = sql.where(self._exact())
@@ -134,8 +140,8 @@ class AssociationFilter(Matchable):
if self.all:
sql = self._where_not_all_exist(sql)
- if self.empty is not None:
- sql = sql.where(~self._empty())
+ if self.count:
+ sql = self.count.exclude(self.count_column, sql)
if self.exact is not None:
sql = sql.where(~self._exact())
@@ -160,8 +166,14 @@ class Root:
column = getattr(self._model, field, None)
+ # count columns are historically singular, so we need this hack
+ singular_field = field[:-1]
+ count_column = getattr(self._model, f"{singular_field}_count", None)
+
if issubclass(type(matcher), Matchable):
matcher.column = column
+ matcher.count_column = count_column
+
if not negate:
sql = matcher.include(sql)
else:
@@ -213,6 +225,17 @@ class StringFilter(Matchable):
@strawberry.input
+class BasicCountFilter(Matchable):
+ count: CountFilter
+
+ def include(self, sql):
+ return self.count.include(self.count_column, sql)
+
+ def exclude(self, sql):
+ return self.count.exclude(self.count_column, sql)
+
+
+@strawberry.input
class TagAssociationFilter(AssociationFilter):
"""
Tags need special handling since their IDs are strings instead of numbers.
@@ -220,9 +243,9 @@ class TagAssociationFilter(AssociationFilter):
we unpack the database IDs from the input IDs.
"""
- any: Optional[List[str]] = strawberry.field(default_factory=lambda: None)
- all: Optional[List[str]] = strawberry.field(default_factory=lambda: None)
- exact: Optional[List[str]] = strawberry.field(default_factory=lambda: None)
+ any: Optional[list[str]] = strawberry.field(default_factory=lambda: None)
+ all: Optional[list[str]] = strawberry.field(default_factory=lambda: None)
+ exact: Optional[list[str]] = strawberry.field(default_factory=lambda: None)
def where(self, id):
try:
@@ -252,7 +275,7 @@ class TagAssociationFilter(AssociationFilter):
@strawberry.input
class Filter(Matchable, Generic[T]):
- any: Optional[List["T"]] = strawberry.field(default_factory=lambda: None)
+ any: Optional[list["T"]] = strawberry.field(default_factory=lambda: None)
empty: Optional[bool] = None
def _empty(self):
@@ -314,24 +337,28 @@ class ArchiveFilter(Root):
@strawberry.input
class ArtistFilter(Root):
name: Optional[StringFilter] = UNSET
+ comics: Optional[BasicCountFilter] = UNSET
@hircine.db.model("Character")
@strawberry.input
class CharacterFilter(Root):
name: Optional[StringFilter] = UNSET
+ comics: Optional[BasicCountFilter] = UNSET
@hircine.db.model("Circle")
@strawberry.input
class CircleFilter(Root):
name: Optional[StringFilter] = UNSET
+ comics: Optional[BasicCountFilter] = UNSET
@hircine.db.model("Namespace")
@strawberry.input
class NamespaceFilter(Root):
name: Optional[StringFilter] = UNSET
+ tags: Optional[BasicCountFilter] = UNSET
@hircine.db.model("Tag")
@@ -339,9 +366,11 @@ class NamespaceFilter(Root):
class TagFilter(Root):
name: Optional[StringFilter] = UNSET
namespaces: Optional[AssociationFilter] = UNSET
+ comics: Optional[BasicCountFilter] = UNSET
@hircine.db.model("World")
@strawberry.input
class WorldFilter(Root):
name: Optional[StringFilter] = UNSET
+ comics: Optional[BasicCountFilter] = UNSET
diff --git a/src/hircine/api/inputs.py b/src/hircine/api/inputs.py
index c88bcce..039c211 100644
--- a/src/hircine/api/inputs.py
+++ b/src/hircine/api/inputs.py
@@ -1,6 +1,6 @@
import datetime
from abc import ABC, abstractmethod
-from typing import List, Optional, Type
+from typing import Optional
import strawberry
from sqlalchemy.orm.util import identity_key
@@ -72,7 +72,7 @@ class Fetchable(ABC):
Additionally, fetched items can be "constrained" to enforce API rules.
"""
- _model: Type[Base]
+ _model: type[Base]
@abstractmethod
async def fetch(self, ctx: MutationContext):
@@ -84,7 +84,7 @@ class Fetchable(ABC):
except AttributeError:
return UpdateMode.REPLACE
- @classmethod
+ @classmethod # noqa: B027
async def constrain_item(cls, item, ctx: MutationContext):
pass
@@ -151,7 +151,7 @@ class Input(FetchableID):
@strawberry.input
class InputList(FetchableID):
- ids: List[int]
+ ids: list[int]
async def fetch(self, ctx: MutationContext):
if not self.ids:
@@ -271,19 +271,19 @@ class WorldsUpdateInput(UpdateInputList):
@strawberry.input
class ComicTagsUpdateInput(UpdateInputList):
- ids: List[str] = strawberry.field(default_factory=lambda: [])
+ ids: list[str] = strawberry.field(default_factory=lambda: [])
@classmethod
def parse_input(cls, id):
try:
return [int(i) for i in id.split(":")]
- except ValueError:
+ except ValueError as err:
raise APIException(
InvalidParameterError(
parameter="id",
text="ComicTag ID must be specified as <namespace_id>:<tag_id>",
)
- )
+ ) from err
@classmethod
async def get_from_ids(cls, ids, ctx: MutationContext):
@@ -334,7 +334,7 @@ class UpsertOptions:
@strawberry.input
class UpsertInputList(FetchableName):
- names: List[str] = strawberry.field(default_factory=lambda: [])
+ names: list[str] = strawberry.field(default_factory=lambda: [])
options: Optional[UpsertOptions] = UNSET
async def fetch(self, ctx: MutationContext):
@@ -383,13 +383,13 @@ class ComicTagsUpsertInput(UpsertInputList):
raise ValueError()
return namespace, tag
- except ValueError:
+ except ValueError as err:
raise APIException(
InvalidParameterError(
parameter="name",
text="ComicTag name must be specified as <namespace>:<tag>",
)
- )
+ ) from err
@classmethod
async def get_from_names(cls, input, ctx: MutationContext, on_missing: OnMissing):
@@ -408,14 +408,14 @@ class ComicTagsUpsertInput(UpsertInputList):
have, missing = await ops.get_all_names(
ctx.session, model, names, options=model.load_full()
)
- dict = {}
+ data = {}
for item in have:
- dict[item.name] = (item, True)
+ data[item.name] = (item, True)
for item in missing:
- dict[item] = (model(name=item), False)
+ data[item] = (model(name=item), False)
- return dict
+ return data
remaining_ns, remaining_tags = zip(*missing)
diff --git a/src/hircine/api/mutation/resolvers.py b/src/hircine/api/mutation/resolvers.py
index 069669e..b3587f7 100644
--- a/src/hircine/api/mutation/resolvers.py
+++ b/src/hircine/api/mutation/resolvers.py
@@ -1,6 +1,5 @@
from datetime import datetime, timezone
from pathlib import Path
-from typing import List
from strawberry import UNSET
@@ -84,26 +83,26 @@ def add(modelcls, post_add=None):
async with db.session() as s:
try:
- object = modelcls()
- ctx = MutationContext(input, object, s)
+ obj = modelcls()
+ ctx = MutationContext(input, obj, s)
async for field, value, _ in fetch_fields(input, ctx):
- setattr(object, field, value)
+ setattr(obj, field, value)
except APIException as e:
return e.graphql_error
- s.add(object)
+ s.add(obj)
await s.flush()
if post_add:
- returnval = await post_add(s, input, object)
+ returnval = await post_add(s, input, obj)
await s.commit()
if returnval:
return returnval
else:
- return AddSuccess(modelcls, object.id)
+ return AddSuccess(modelcls, obj.id)
return inner
@@ -118,19 +117,19 @@ async def post_add_comic(session, input, comic):
return AddComicSuccess(Comic, comic.id, has_remaining)
-def update_attr(object, field, value, mode):
+def update_attr(obj, field, value, mode):
if mode != UpdateMode.REPLACE and isinstance(value, list):
- attr = getattr(object, field)
+ attr = getattr(obj, field)
match mode:
case UpdateMode.ADD:
value.extend(attr)
case UpdateMode.REMOVE:
value = list(set(attr) - set(value))
- setattr(object, field, value)
+ setattr(obj, field, value)
-async def _update(ids: List[int], modelcls, input, successcls):
+async def _update(ids: list[int], modelcls, input, successcls):
multiple = len(ids) > 1
async with db.session() as s:
@@ -143,19 +142,19 @@ async def _update(ids: List[int], modelcls, input, successcls):
if missing:
return IDNotFoundError(modelcls, missing.pop())
- for object in objects:
- s.add(object)
+ for obj in objects:
+ s.add(obj)
try:
- ctx = MutationContext(input, object, s, multiple=multiple)
+ ctx = MutationContext(input, obj, s, multiple=multiple)
async for field, value, mode in fetch_fields(input, ctx):
- update_attr(object, field, value, mode)
+ update_attr(obj, field, value, mode)
except APIException as e:
return e.graphql_error
- if isinstance(object, MixinModifyDates) and s.is_modified(object):
- object.updated_at = datetime.now(tz=timezone.utc)
+ if isinstance(obj, MixinModifyDates) and s.is_modified(obj):
+ obj.updated_at = datetime.now(tz=timezone.utc)
await s.commit()
@@ -163,28 +162,28 @@ async def _update(ids: List[int], modelcls, input, successcls):
def update(modelcls):
- async def inner(ids: List[int], input: update_input_cls(modelcls)):
+ async def inner(ids: list[int], input: update_input_cls(modelcls)):
return await _update(ids, modelcls, input, UpdateSuccess)
return inner
def upsert(modelcls):
- async def inner(ids: List[int], input: upsert_input_cls(modelcls)):
+ async def inner(ids: list[int], input: upsert_input_cls(modelcls)):
return await _update(ids, modelcls, input, UpsertSuccess)
return inner
def delete(modelcls, post_delete=None):
- async def inner(ids: List[int]):
+ async def inner(ids: list[int]):
async with db.session() as s:
objects, missing = await ops.get_all(s, modelcls, ids)
if missing:
return IDNotFoundError(modelcls, missing.pop())
- for object in objects:
- await s.delete(object)
+ for obj in objects:
+ await s.delete(obj)
await s.flush()
diff --git a/src/hircine/api/query/__init__.py b/src/hircine/api/query/__init__.py
index 9d81989..37b22df 100644
--- a/src/hircine/api/query/__init__.py
+++ b/src/hircine/api/query/__init__.py
@@ -1,5 +1,3 @@
-from typing import List
-
import strawberry
import hircine.api.responses as rp
@@ -19,11 +17,12 @@ from hircine.api.types import (
)
from .resolvers import (
- all,
comic_scrapers,
comic_tags,
+ every,
scrape_comic,
single,
+ statistics,
)
@@ -34,21 +33,22 @@ def query(resolver):
@strawberry.type
class Query:
archive: rp.ArchiveResponse = query(single(models.Archive, full=True))
- archives: FilterResult[Archive] = query(all(models.Archive))
+ archives: FilterResult[Archive] = query(every(models.Archive))
artist: rp.ArtistResponse = query(single(models.Artist))
- artists: FilterResult[Artist] = query(all(models.Artist))
+ artists: FilterResult[Artist] = query(every(models.Artist))
character: rp.CharacterResponse = query(single(models.Character))
- characters: FilterResult[Character] = query(all(models.Character))
+ characters: FilterResult[Character] = query(every(models.Character))
circle: rp.CircleResponse = query(single(models.Circle))
- circles: FilterResult[Circle] = query(all(models.Circle))
+ circles: FilterResult[Circle] = query(every(models.Circle))
comic: rp.ComicResponse = query(single(models.Comic, full=True))
- comic_scrapers: List[ComicScraper] = query(comic_scrapers)
+ comic_scrapers: list[ComicScraper] = query(comic_scrapers)
comic_tags: FilterResult[ComicTag] = query(comic_tags)
- comics: FilterResult[Comic] = query(all(models.Comic))
+ comics: FilterResult[Comic] = query(every(models.Comic))
namespace: rp.NamespaceResponse = query(single(models.Namespace))
- namespaces: FilterResult[Namespace] = query(all(models.Namespace))
+ namespaces: FilterResult[Namespace] = query(every(models.Namespace))
tag: rp.TagResponse = query(single(models.Tag, full=True))
- tags: FilterResult[Tag] = query(all(models.Tag))
+ tags: FilterResult[Tag] = query(every(models.Tag))
world: rp.WorldResponse = query(single(models.World))
- worlds: FilterResult[World] = query(all(models.World))
+ worlds: FilterResult[World] = query(every(models.World))
scrape_comic: rp.ScrapeComicResponse = query(scrape_comic)
+ statistics: rp.Statistics = query(statistics)
diff --git a/src/hircine/api/query/resolvers.py b/src/hircine/api/query/resolvers.py
index a18e63e..389a200 100644
--- a/src/hircine/api/query/resolvers.py
+++ b/src/hircine/api/query/resolvers.py
@@ -10,10 +10,13 @@ import hircine.plugins as plugins
from hircine.api.filters import Input as FilterInput
from hircine.api.inputs import Pagination
from hircine.api.responses import (
+ ComicTotals,
IDNotFoundError,
ScraperError,
ScraperNotAvailableError,
ScraperNotFoundError,
+ Statistics,
+ Totals,
)
from hircine.api.sort import Input as SortInput
from hircine.api.types import (
@@ -50,7 +53,7 @@ def single(model, full=False):
return inner
-def all(model):
+def every(model):
typecls = getattr(types, model.__name__)
filtercls = getattr(filters, f"{model.__name__}Filter")
sortcls = getattr(sort, f"{model.__name__}Sort")
@@ -144,3 +147,29 @@ async def scrape_comic(id: int, scraper: str):
)
except ScrapeError as e:
return ScraperError(error=str(e))
+
+
+async def statistics():
+ async with db.session() as s:
+ total = Totals(
+ archives=await ops.count(s, models.Archive),
+ artists=await ops.count(s, models.Artist),
+ characters=await ops.count(s, models.Character),
+ circles=await ops.count(s, models.Circle),
+ comic=ComicTotals(
+ artists=await ops.count(s, models.ComicArtist),
+ characters=await ops.count(s, models.ComicCharacter),
+ circles=await ops.count(s, models.ComicCircle),
+ tags=await ops.count(s, models.ComicTag),
+ worlds=await ops.count(s, models.ComicWorld),
+ ),
+ comics=await ops.count(s, models.Comic),
+ images=await ops.count(s, models.Image),
+ namespaces=await ops.count(s, models.Namespace),
+ pages=await ops.count(s, models.Page),
+ scrapers=len(plugins.get_scrapers()),
+ tags=await ops.count(s, models.Tag),
+ worlds=await ops.count(s, models.World),
+ )
+
+ return Statistics(total=total)
diff --git a/src/hircine/api/responses.py b/src/hircine/api/responses.py
index 99d5113..883705b 100644
--- a/src/hircine/api/responses.py
+++ b/src/hircine/api/responses.py
@@ -147,6 +147,36 @@ class ScraperNotAvailableError(Error):
return f"Scraper {self.scraper} not available for comic ID {self.comic_id}"
+@strawberry.type
+class ComicTotals:
+ artists: int
+ characters: int
+ circles: int
+ tags: int
+ worlds: int
+
+
+@strawberry.type
+class Totals:
+ archives: int
+ artists: int
+ characters: int
+ circles: int
+ comics: int
+ comic: ComicTotals
+ images: int
+ namespaces: int
+ pages: int
+ scrapers: int
+ tags: int
+ worlds: int
+
+
+@strawberry.type
+class Statistics:
+ total: Totals
+
+
AddComicResponse = Annotated[
Union[
AddComicSuccess,
diff --git a/src/hircine/api/sort.py b/src/hircine/api/sort.py
index 17043a6..a4ccaf1 100644
--- a/src/hircine/api/sort.py
+++ b/src/hircine/api/sort.py
@@ -22,6 +22,10 @@ class ComicSort(enum.Enum):
DATE = strawberry.enum_value(models.Comic.date)
CREATED_AT = strawberry.enum_value(models.Comic.created_at)
UPDATED_AT = strawberry.enum_value(models.Comic.updated_at)
+ ARTIST_COUNT = strawberry.enum_value(models.Comic.artist_count)
+ CHARACTER_COUNT = strawberry.enum_value(models.Comic.character_count)
+ CIRCLE_COUNT = strawberry.enum_value(models.Comic.circle_count)
+ WORLD_COUNT = strawberry.enum_value(models.Comic.world_count)
TAG_COUNT = strawberry.enum_value(models.Comic.tag_count)
PAGE_COUNT = strawberry.enum_value(models.Comic.page_count)
RANDOM = "Random"
@@ -41,6 +45,7 @@ class ArtistSort(enum.Enum):
NAME = strawberry.enum_value(models.Artist.name)
CREATED_AT = strawberry.enum_value(models.Artist.created_at)
UPDATED_AT = strawberry.enum_value(models.Artist.updated_at)
+ COMIC_COUNT = strawberry.enum_value(models.Artist.comic_count)
RANDOM = "Random"
@@ -49,6 +54,7 @@ class CharacterSort(enum.Enum):
NAME = strawberry.enum_value(models.Character.name)
CREATED_AT = strawberry.enum_value(models.Character.created_at)
UPDATED_AT = strawberry.enum_value(models.Character.updated_at)
+ COMIC_COUNT = strawberry.enum_value(models.Character.comic_count)
RANDOM = "Random"
@@ -57,6 +63,7 @@ class CircleSort(enum.Enum):
NAME = strawberry.enum_value(models.Circle.name)
CREATED_AT = strawberry.enum_value(models.Circle.created_at)
UPDATED_AT = strawberry.enum_value(models.Circle.updated_at)
+ COMIC_COUNT = strawberry.enum_value(models.Circle.comic_count)
RANDOM = "Random"
@@ -66,6 +73,7 @@ class NamespaceSort(enum.Enum):
NAME = strawberry.enum_value(models.Namespace.name)
CREATED_AT = strawberry.enum_value(models.Namespace.created_at)
UPDATED_AT = strawberry.enum_value(models.Namespace.updated_at)
+ TAG_COUNT = strawberry.enum_value(models.Namespace.tag_count)
RANDOM = "Random"
@@ -74,6 +82,8 @@ class TagSort(enum.Enum):
NAME = strawberry.enum_value(models.Tag.name)
CREATED_AT = strawberry.enum_value(models.Tag.created_at)
UPDATED_AT = strawberry.enum_value(models.Tag.updated_at)
+ COMIC_COUNT = strawberry.enum_value(models.Tag.comic_count)
+ NAMESPACE_COUNT = strawberry.enum_value(models.Tag.namespace_count)
RANDOM = "Random"
@@ -82,6 +92,7 @@ class WorldSort(enum.Enum):
NAME = strawberry.enum_value(models.World.name)
CREATED_AT = strawberry.enum_value(models.World.created_at)
UPDATED_AT = strawberry.enum_value(models.World.updated_at)
+ COMIC_COUNT = strawberry.enum_value(models.World.comic_count)
RANDOM = "Random"
diff --git a/src/hircine/api/types.py b/src/hircine/api/types.py
index bbd13fa..68b2ccc 100644
--- a/src/hircine/api/types.py
+++ b/src/hircine/api/types.py
@@ -1,5 +1,5 @@
import datetime
-from typing import Generic, List, Optional, TypeVar
+from typing import Generic, Optional, TypeVar
import strawberry
@@ -74,7 +74,7 @@ class MixinModifyDates(MixinCreatedAt):
@strawberry.type
class FilterResult(Generic[T]):
count: int
- edges: List["T"]
+ edges: list["T"]
@strawberry.type
@@ -94,8 +94,8 @@ class Archive(MixinName, MixinOrganized, Base):
@strawberry.type
class FullArchive(MixinCreatedAt, Archive):
- pages: List["Page"]
- comics: List["Comic"]
+ pages: list["Page"]
+ comics: list["Comic"]
mtime: datetime.datetime
def __init__(self, model):
@@ -143,11 +143,11 @@ class Comic(MixinFavourite, MixinOrganized, MixinBookmarked, Base):
rating: Optional[Rating]
category: Optional[Category]
censorship: Optional[Censorship]
- tags: List["ComicTag"]
- artists: List["Artist"]
- characters: List["Character"]
- circles: List["Circle"]
- worlds: List["World"]
+ tags: list["ComicTag"]
+ artists: list["Artist"]
+ characters: list["Character"]
+ circles: list["Circle"]
+ worlds: list["World"]
page_count: int
def __init__(self, model):
@@ -172,7 +172,7 @@ class Comic(MixinFavourite, MixinOrganized, MixinBookmarked, Base):
class FullComic(MixinModifyDates, Comic):
archive: "Archive"
url: Optional[str]
- pages: List["Page"]
+ pages: list["Page"]
direction: Direction
layout: Layout
@@ -196,7 +196,7 @@ class Tag(MixinName, Base):
@strawberry.type
class FullTag(Tag):
- namespaces: List["Namespace"]
+ namespaces: list["Namespace"]
def __init__(self, model):
super().__init__(model)
@@ -270,7 +270,7 @@ class ComicScraper:
@strawberry.type
class ScrapeComicResult:
data: "ScrapedComic"
- warnings: List[str] = strawberry.field(default_factory=lambda: [])
+ warnings: list[str] = strawberry.field(default_factory=lambda: [])
@strawberry.type
@@ -285,11 +285,11 @@ class ScrapedComic:
censorship: Optional[Censorship] = None
direction: Optional[Direction] = None
layout: Optional[Layout] = None
- tags: List[str] = strawberry.field(default_factory=lambda: [])
- artists: List[str] = strawberry.field(default_factory=lambda: [])
- characters: List[str] = strawberry.field(default_factory=lambda: [])
- circles: List[str] = strawberry.field(default_factory=lambda: [])
- worlds: List[str] = strawberry.field(default_factory=lambda: [])
+ tags: list[str] = strawberry.field(default_factory=lambda: [])
+ artists: list[str] = strawberry.field(default_factory=lambda: [])
+ characters: list[str] = strawberry.field(default_factory=lambda: [])
+ circles: list[str] = strawberry.field(default_factory=lambda: [])
+ worlds: list[str] = strawberry.field(default_factory=lambda: [])
@classmethod
def from_generator(cls, generator):
diff --git a/src/hircine/db/models.py b/src/hircine/db/models.py
index 575771b..5d1a59a 100644
--- a/src/hircine/db/models.py
+++ b/src/hircine/db/models.py
@@ -1,6 +1,6 @@
import os
from datetime import date, datetime, timezone
-from typing import List, Optional
+from typing import Optional
from sqlalchemy import (
DateTime,
@@ -104,12 +104,12 @@ class Archive(MixinID, MixinCreatedAt, MixinOrganized, Base):
cover_id: Mapped[int] = mapped_column(ForeignKey("image.id"))
cover: Mapped["Image"] = relationship(lazy="joined", innerjoin=True)
- pages: Mapped[List["Page"]] = relationship(
+ pages: Mapped[list["Page"]] = relationship(
back_populates="archive",
order_by="(Page.index)",
cascade="save-update, merge, expunge, delete, delete-orphan",
)
- comics: Mapped[List["Comic"]] = relationship(
+ comics: Mapped[list["Comic"]] = relationship(
back_populates="archive",
cascade="save-update, merge, expunge, delete, delete-orphan",
)
@@ -176,37 +176,37 @@ class Comic(
archive_id: Mapped[int] = mapped_column(ForeignKey("archive.id"))
archive: Mapped["Archive"] = relationship(back_populates="comics")
- pages: Mapped[List["Page"]] = relationship(order_by="(Page.index)")
+ pages: Mapped[list["Page"]] = relationship(order_by="(Page.index)")
page_count: Mapped[int]
- tags: Mapped[List["ComicTag"]] = relationship(
+ tags: Mapped[list["ComicTag"]] = relationship(
lazy="selectin",
cascade="save-update, merge, expunge, delete, delete-orphan",
passive_deletes=True,
)
- artists: Mapped[List["Artist"]] = relationship(
+ artists: Mapped[list["Artist"]] = relationship(
secondary="comicartist",
lazy="selectin",
order_by="(Artist.name, Artist.id)",
passive_deletes=True,
)
- characters: Mapped[List["Character"]] = relationship(
+ characters: Mapped[list["Character"]] = relationship(
secondary="comiccharacter",
lazy="selectin",
order_by="(Character.name, Character.id)",
passive_deletes=True,
)
- circles: Mapped[List["Circle"]] = relationship(
+ circles: Mapped[list["Circle"]] = relationship(
secondary="comiccircle",
lazy="selectin",
order_by="(Circle.name, Circle.id)",
passive_deletes=True,
)
- worlds: Mapped[List["World"]] = relationship(
+ worlds: Mapped[list["World"]] = relationship(
secondary="comicworld",
lazy="selectin",
order_by="(World.name, World.id)",
@@ -233,7 +233,7 @@ class Comic(
class Tag(MixinID, MixinModifyDates, MixinName, Base):
description: Mapped[Optional[str]]
- namespaces: Mapped[List["Namespace"]] = relationship(
+ namespaces: Mapped[list["Namespace"]] = relationship(
secondary="tagnamespaces",
passive_deletes=True,
order_by="(Namespace.sort_name, Namespace.name, Namespace.id)",
@@ -356,7 +356,10 @@ class ComicWorld(Base):
def defer_relationship_count(relationship, secondary=False):
- left, right = relationship.property.synchronize_pairs[0]
+ if secondary:
+ left, right = relationship.property.secondary_synchronize_pairs[0]
+ else:
+ left, right = relationship.property.synchronize_pairs[0]
return deferred(
select(func.count(right))
@@ -366,7 +369,23 @@ def defer_relationship_count(relationship, secondary=False):
)
+Comic.artist_count = defer_relationship_count(Comic.artists)
+Comic.character_count = defer_relationship_count(Comic.characters)
+Comic.circle_count = defer_relationship_count(Comic.circles)
Comic.tag_count = defer_relationship_count(Comic.tags)
+Comic.world_count = defer_relationship_count(Comic.worlds)
+
+Artist.comic_count = defer_relationship_count(Comic.artists, secondary=True)
+Character.comic_count = defer_relationship_count(Comic.characters, secondary=True)
+Circle.comic_count = defer_relationship_count(Comic.circles, secondary=True)
+Namespace.tag_count = defer_relationship_count(Tag.namespaces, secondary=True)
+Tag.comic_count = deferred(
+ select(func.count(ComicTag.tag_id))
+ .where(Tag.id == ComicTag.tag_id)
+ .scalar_subquery()
+)
+Tag.namespace_count = defer_relationship_count(Tag.namespaces)
+World.comic_count = defer_relationship_count(Comic.worlds, secondary=True)
@event.listens_for(Comic.pages, "bulk_replace")
diff --git a/src/hircine/db/ops.py b/src/hircine/db/ops.py
index c164cd2..8cc5ddc 100644
--- a/src/hircine/db/ops.py
+++ b/src/hircine/db/ops.py
@@ -1,7 +1,7 @@
import random
from collections import defaultdict
-from sqlalchemy import delete, func, null, select, text, tuple_
+from sqlalchemy import delete, func, literal_column, null, select, text, tuple_
from sqlalchemy.orm import contains_eager, undefer
from sqlalchemy.orm.util import identity_key
from strawberry import UNSET
@@ -116,15 +116,18 @@ def lookup_identity(session, model, ids):
satisfied = set()
for id in ids:
- object = session.identity_map.get(identity_key(model, id), None)
- if object is not None:
- objects.append(object)
+ obj = session.identity_map.get(identity_key(model, id), None)
+ if obj is not None:
+ objects.append(obj)
satisfied.add(id)
return objects, satisfied
-async def get_all(session, model, ids, options=[], use_identity_map=False):
+async def get_all(session, model, ids, options=None, use_identity_map=False):
+ if not options:
+ options = []
+
objects = []
ids = set(ids)
@@ -140,20 +143,23 @@ async def get_all(session, model, ids, options=[], use_identity_map=False):
objects += (await session.scalars(sql)).unique().all()
- fetched_ids = [object.id for object in objects]
+ fetched_ids = [obj.id for obj in objects]
missing = set(ids) - set(fetched_ids)
return objects, missing
-async def get_all_names(session, model, names, options=[]):
+async def get_all_names(session, model, names, options=None):
+ if not options:
+ options = []
+
names = set(names)
sql = select(model).where(model.name.in_(names)).options(*options)
objects = (await session.scalars(sql)).unique().all()
- fetched_names = [object.name for object in objects]
+ fetched_names = [obj.name for obj in objects]
missing = set(names) - set(fetched_names)
return objects, missing
@@ -198,3 +204,9 @@ async def delete_all(session, model, ids):
result = await session.execute(delete(model).where(model.id.in_(ids)))
return result.rowcount
+
+
+async def count(session, model):
+ sql = select(func.count(literal_column("1"))).select_from(model)
+
+ return (await session.execute(sql)).scalar_one()
diff --git a/src/hircine/enums.py b/src/hircine/enums.py
index 7f95f02..f267270 100644
--- a/src/hircine/enums.py
+++ b/src/hircine/enums.py
@@ -1,4 +1,5 @@
import enum
+import operator
import strawberry
@@ -57,6 +58,13 @@ class OnMissing(enum.Enum):
@strawberry.enum
+class Operator(enum.Enum):
+ GREATER_THAN = operator.gt
+ LOWER_THAN = operator.lt
+ EQUAL = operator.eq
+
+
+@strawberry.enum
class Language(enum.Enum):
AA = "Afar"
AB = "Abkhazian"
diff --git a/src/hircine/migrations/env.py b/src/hircine/migrations/env.py
index 6df03ec..1351e54 100644
--- a/src/hircine/migrations/env.py
+++ b/src/hircine/migrations/env.py
@@ -2,11 +2,12 @@ import asyncio
from logging.config import fileConfig
from alembic import context
-from hircine.db.models import Base
from sqlalchemy import pool
from sqlalchemy.engine import Connection
from sqlalchemy.ext.asyncio import async_engine_from_config
+from hircine.db.models import Base
+
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config
diff --git a/src/hircine/plugins/__init__.py b/src/hircine/plugins/__init__.py
index 27e55a7..7bd3612 100644
--- a/src/hircine/plugins/__init__.py
+++ b/src/hircine/plugins/__init__.py
@@ -1,14 +1,13 @@
from importlib.metadata import entry_points
-from typing import Dict, Type
from hircine.scraper import Scraper
-scraper_registry: Dict[str, Type[Scraper]] = {}
+scraper_registry: dict[str, type[Scraper]] = {}
transformers = []
def get_scraper(name):
- return scraper_registry.get(name, None)
+ return scraper_registry.get(name)
def get_scrapers():
@@ -38,7 +37,7 @@ def transformer(function):
return _decorate(function)
-def load(): # pragma: nocover
+def load(): # pragma: no cover
for entry in entry_points(group="hircine.scraper"):
register_scraper(entry.name, entry.load())
diff --git a/src/hircine/plugins/scrapers/anchira.py b/src/hircine/plugins/scrapers/anchira.py
index aa224b9..1e89ffb 100644
--- a/src/hircine/plugins/scrapers/anchira.py
+++ b/src/hircine/plugins/scrapers/anchira.py
@@ -17,9 +17,10 @@ from hircine.scraper.types import (
Title,
World,
)
-from hircine.scraper.utils import open_archive_file
+from hircine.scraper.utils import open_archive_file, parse_dict
-URL_REGEX = re.compile(r"^https?://anchira\.to/g/")
+ANCHIRA_REGEX = re.compile(r"^https?://anchira\.to/g/")
+NEXUS_REGEX = re.compile(r"^https?://hentainexus\.com/")
class AnchiraYamlScraper(Scraper):
@@ -45,8 +46,15 @@ class AnchiraYamlScraper(Scraper):
self.data = self.load()
source = self.data.get("Source")
- if source and re.match(URL_REGEX, source):
- self.is_available = True
+ if source:
+ if re.match(ANCHIRA_REGEX, source) or re.match(NEXUS_REGEX, source):
+ self.is_available = True
+ else:
+ # heuristic, but should be good enough
+ url = self.data.get("URL")
+ parody = self.data.get("Parody")
+
+ self.is_available = url is not None and parody is not None
def load(self):
try:
@@ -66,16 +74,7 @@ class AnchiraYamlScraper(Scraper):
"Tags": self.parse_tag,
}
- for field, parser in parsers.items():
- if field not in self.data:
- continue
-
- value = self.data[field]
-
- if isinstance(value, list):
- yield from [lambda i=x: parser(i) for x in value]
- else:
- yield lambda: parser(value)
+ yield from parse_dict(parsers, self.data)
yield Language(enums.Language.EN)
yield Direction(enums.Direction.RIGHT_TO_LEFT)
diff --git a/src/hircine/plugins/scrapers/ehentai_api.py b/src/hircine/plugins/scrapers/ehentai_api.py
index 70fcf57..e34d80c 100644
--- a/src/hircine/plugins/scrapers/ehentai_api.py
+++ b/src/hircine/plugins/scrapers/ehentai_api.py
@@ -55,21 +55,23 @@ class EHentaiAPIScraper(Scraper):
request = requests.post(API_URL, data=data)
- if request.status_code == requests.codes.ok:
- try:
- response = json.loads(request.text)["gmetadata"][0]
-
- title = response.get("title")
- if title:
- response["title"] = html.unescape(title)
-
- title_jpn = response.get("title_jpn")
- if title_jpn:
- response["title_jpn"] = html.unescape(title_jpn)
-
- handler = ExHentaiHandler()
- yield from handler.scrape(response)
- except json.JSONDecodeError:
- raise ScrapeError("Could not parse JSON response")
- else:
- raise ScrapeError(f"Request failed with status code {request.status_code}'")
+ if request.status_code != requests.codes.ok:
+ raise ScrapeError(f"Request failed with status code {request.status_code}")
+
+ try:
+ response = json.loads(request.text)["gmetadata"][0]
+ except json.JSONDecodeError as err:
+ raise ScrapeError("Could not parse JSON response") from err
+ except (KeyError, IndexError) as err:
+ raise ScrapeError("Response is missing 'gmetadata' field") from err
+
+ title = response.get("title")
+ if title:
+ response["title"] = html.unescape(title)
+
+ title_jpn = response.get("title_jpn")
+ if title_jpn:
+ response["title_jpn"] = html.unescape(title_jpn)
+
+ handler = ExHentaiHandler()
+ yield from handler.scrape(response)
diff --git a/src/hircine/plugins/scrapers/gallery_dl.py b/src/hircine/plugins/scrapers/gallery_dl.py
index a6cebc4..17f85d0 100644
--- a/src/hircine/plugins/scrapers/gallery_dl.py
+++ b/src/hircine/plugins/scrapers/gallery_dl.py
@@ -36,7 +36,7 @@ class GalleryDLScraper(Scraper):
self.data = self.load()
category = self.data.get("category")
- if category in HANDLERS.keys():
+ if category in HANDLERS:
self.is_available = True
self.handler = HANDLERS.get(category)()
diff --git a/src/hircine/plugins/scrapers/handlers/dynastyscans.py b/src/hircine/plugins/scrapers/handlers/dynastyscans.py
index ded015b..fd9bf42 100644
--- a/src/hircine/plugins/scrapers/handlers/dynastyscans.py
+++ b/src/hircine/plugins/scrapers/handlers/dynastyscans.py
@@ -1,5 +1,3 @@
-import hircine.enums as enums
-from hircine.scraper import ScrapeWarning
from hircine.scraper.types import (
Artist,
Circle,
@@ -16,7 +14,7 @@ class DynastyScansHandler:
def scrape(self, data):
parsers = {
"date": Date.from_iso,
- "lang": self.parse_language,
+ "lang": Language.from_iso_639_3,
"author": Artist,
"group": Circle,
}
@@ -33,9 +31,3 @@ class DynastyScansHandler:
title = title + f": {subtitle}"
yield Title(title)
-
- def parse_language(self, input):
- try:
- return Language(value=enums.Language[input.upper()])
- except (KeyError, ValueError) as e:
- raise ScrapeWarning(f"Could not parse language: '{input}'") from e
diff --git a/src/hircine/plugins/scrapers/handlers/e621.py b/src/hircine/plugins/scrapers/handlers/e621.py
index 6b798fd..b7cd83a 100644
--- a/src/hircine/plugins/scrapers/handlers/e621.py
+++ b/src/hircine/plugins/scrapers/handlers/e621.py
@@ -1,5 +1,4 @@
import hircine.enums as enums
-from hircine.scraper import ScrapeWarning
from hircine.scraper.types import (
URL,
Artist,
@@ -75,7 +74,4 @@ class E621Handler:
if input.endswith("_text"):
lang, _ = input.split("_text", 1)
- try:
- return Language(value=enums.Language(lang.capitalize()))
- except ValueError as e:
- raise ScrapeWarning(f"Could not parse language: '{input}'") from e
+ return Language.from_name(lang)
diff --git a/src/hircine/plugins/scrapers/handlers/exhentai.py b/src/hircine/plugins/scrapers/handlers/exhentai.py
index 12c22d7..7bffc4f 100644
--- a/src/hircine/plugins/scrapers/handlers/exhentai.py
+++ b/src/hircine/plugins/scrapers/handlers/exhentai.py
@@ -1,7 +1,6 @@
import re
import hircine.enums as enums
-from hircine.scraper import ScrapeWarning
from hircine.scraper.types import (
URL,
Artist,
@@ -39,7 +38,7 @@ class ExHentaiHandler:
category_field: self.parse_category,
"posted": Date.from_timestamp,
"date": Date.from_iso,
- "lang": self.parse_language,
+ "lang": Language.from_iso_639_3,
"tags": self.parse_tag,
"title": lambda t: Title(sanitize(t, split=True)),
"title_jpn": lambda t: OriginalTitle(sanitize(t)),
@@ -60,6 +59,9 @@ class ExHentaiHandler:
yield Censorship(enums.Censorship.NONE)
else:
yield Censorship(enums.Censorship.BAR)
+ else:
+ if not self.has_censorship_tag:
+ yield Censorship(enums.Censorship.NONE)
if self.is_likely_rtl:
yield Direction(enums.Direction.RIGHT_TO_LEFT)
@@ -130,10 +132,4 @@ class ExHentaiHandler:
if not input or input in ["translated", "speechless", "N/A"]:
return
- try:
- if from_value:
- return Language(value=enums.Language(input.capitalize()))
- else:
- return Language(value=enums.Language[input.upper()])
- except (KeyError, ValueError) as e:
- raise ScrapeWarning(f"Could not parse language: '{input}'") from e
+ return Language.from_name(input)
diff --git a/src/hircine/plugins/scrapers/handlers/mangadex.py b/src/hircine/plugins/scrapers/handlers/mangadex.py
index 7bc371d..4a8d5fd 100644
--- a/src/hircine/plugins/scrapers/handlers/mangadex.py
+++ b/src/hircine/plugins/scrapers/handlers/mangadex.py
@@ -1,5 +1,3 @@
-import hircine.enums as enums
-from hircine.scraper import ScrapeWarning
from hircine.scraper.types import (
URL,
Artist,
@@ -18,7 +16,7 @@ class MangadexHandler:
def scrape(self, data):
parsers = {
"date": Date.from_iso,
- "lang": self.parse_language,
+ "lang": Language.from_iso_639_3,
"tags": Tag.from_string,
"artist": Artist,
"author": Artist,
@@ -40,15 +38,9 @@ class MangadexHandler:
if volume:
title = title + f", Ch. {chapter}"
else:
- title = title + f"Ch. {chapter}"
+ title = title + f" Ch. {chapter}"
if subtitle := data.get("title"):
title = title + f": {subtitle}"
yield Title(title)
-
- def parse_language(self, input):
- try:
- return Language(value=enums.Language[input.upper()])
- except (KeyError, ValueError) as e:
- raise ScrapeWarning(f"Could not parse language: '{input}'") from e
diff --git a/src/hircine/plugins/scrapers/schale_network.py b/src/hircine/plugins/scrapers/schale_network.py
new file mode 100644
index 0000000..e38cfe8
--- /dev/null
+++ b/src/hircine/plugins/scrapers/schale_network.py
@@ -0,0 +1,82 @@
+import re
+
+import yaml
+
+import hircine.enums as enums
+from hircine.scraper import Scraper
+from hircine.scraper.types import (
+ Artist,
+ Censorship,
+ Circle,
+ Direction,
+ Language,
+ Tag,
+ Title,
+)
+from hircine.scraper.utils import open_archive_file, parse_dict
+
+SOURCE_REGEX = re.compile(r"^SchaleNetwork:")
+
+
+class SchaleNetworkScraper(Scraper):
+ """
+ A scraper for ``info.yaml`` files found in archives downloaded from
+ *schale.network*.
+
+ .. list-table::
+ :align: left
+
+ * - **Requires**
+ - ``info.yaml`` in the archive or as a sidecar.
+ * - **Source**
+ - ``schale.network``
+ """
+
+ name = "schale.network info.yaml"
+ source = "schale.network"
+
+ def __init__(self, comic):
+ super().__init__(comic)
+
+ self.data = self.load()
+ source = self.data.get("source")
+
+ if source and re.match(SOURCE_REGEX, source):
+ self.is_available = True
+
+ def load(self):
+ try:
+ with open_archive_file(self.comic.archive, "info.yaml") as yif:
+ return yaml.safe_load(yif)
+ except Exception:
+ return {}
+
+ def scrape(self):
+ parsers = {
+ "title": Title,
+ "artist": Artist,
+ "circle": Circle,
+ "general": Tag.from_string,
+ "male": lambda s: Tag(namespace="male", tag=s),
+ "female": lambda s: Tag(namespace="female", tag=s),
+ "mixed": lambda s: Tag(namespace="mixed", tag=s),
+ "language": self.parse_language,
+ "other": self.parse_other,
+ }
+
+ yield from parse_dict(parsers, self.data)
+
+ yield Direction(enums.Direction.RIGHT_TO_LEFT)
+
+ def parse_language(self, input):
+ if not input or input in ["translated"]:
+ return
+
+ return Language.from_name(input)
+
+ def parse_other(self, input):
+ match input:
+ case "uncensored":
+ return Censorship(value=enums.Censorship.NONE)
+ case _:
+ return Tag.from_string(input)
diff --git a/src/hircine/scanner.py b/src/hircine/scanner.py
index 162e1f0..6e3fafb 100644
--- a/src/hircine/scanner.py
+++ b/src/hircine/scanner.py
@@ -7,8 +7,8 @@ from concurrent.futures import ProcessPoolExecutor
from datetime import datetime, timezone
from enum import Enum
from hashlib import file_digest
-from typing import List, NamedTuple
-from zipfile import ZipFile, is_zipfile
+from typing import NamedTuple
+from zipfile import BadZipFile, ZipFile, is_zipfile
from blake3 import blake3
from natsort import natsorted, ns
@@ -56,7 +56,7 @@ class Registry:
@property
def duplicates(self):
- for hash, value in self.marked.items():
+ for _, value in self.marked.items():
if len(value) > 1:
yield value
@@ -86,7 +86,7 @@ class AddArchive(NamedTuple):
path: str
size: int
mtime: datetime
- members: List[Member]
+ members: list[Member]
async def upsert_images(self, session):
input = [
@@ -286,6 +286,11 @@ class Scanner:
hash = blake3()
with ZipFile(path, mode="r") as z:
+ try:
+ z.testzip()
+ except Exception as e:
+ raise BadZipFile(f"Corrupt zip file {path}") from e
+
input = [(path, info.filename) for info in z.infolist()]
loop = asyncio.get_event_loop()
@@ -302,19 +307,18 @@ class Scanner:
def process_member(self, input):
path, name = input
- with ZipFile(path, mode="r") as zip:
- with zip.open(name, mode="r") as member:
- _, ext = os.path.splitext(name)
- digest = file_digest(member, blake3).digest()
+ with ZipFile(path, mode="r") as ziph, ziph.open(name, mode="r") as member:
+ _, ext = os.path.splitext(name)
+ digest = file_digest(member, blake3).digest()
- if self.thumbnailer.can_process(ext):
- hash = digest.hex()
+ if self.thumbnailer.can_process(ext):
+ hash = digest.hex()
- width, height = self.thumbnailer.process(
- member, hash, reprocess=self.reprocess
- )
- return digest, Member(
- path=member.name, hash=hash, width=width, height=height
- )
+ width, height = self.thumbnailer.process(
+ member, hash, reprocess=self.reprocess
+ )
+ return digest, Member(
+ path=member.name, hash=hash, width=width, height=height
+ )
return digest, None
diff --git a/src/hircine/scraper/__init__.py b/src/hircine/scraper/__init__.py
index c04265a..4606f4a 100644
--- a/src/hircine/scraper/__init__.py
+++ b/src/hircine/scraper/__init__.py
@@ -81,15 +81,21 @@ class Scraper(ABC):
"""
pass
- def collect(self, transformers=[]):
+ def collect(self, transformers=None):
+ if transformers is None:
+ transformers = []
+
def generator():
- for result in self.scrape():
- if callable(result):
+ for item in self.scrape():
+ if callable(item):
try:
- yield result()
+ result = item()
except ScrapeWarning as e:
self.log_warning(e)
else:
+ result = item
+
+ if result is not None:
yield result
gen = generator()
diff --git a/src/hircine/scraper/types.py b/src/hircine/scraper/types.py
index 534792b..23cb634 100644
--- a/src/hircine/scraper/types.py
+++ b/src/hircine/scraper/types.py
@@ -137,6 +137,38 @@ class Language:
def __bool__(self):
return self.value is not None
+ @classmethod
+ def from_iso_639_3(cls, string):
+ """
+ Returns a new instance of this class given a case-insensitive ISO 639-3
+ language code.
+
+ :param str string: The ISO 639-3 language code.
+ :raise: :exc:`~hircine.scraper.ScrapeWarning` if the language code could
+ not be parsed.
+ """
+ try:
+ return Language(value=hircine.enums.Language[string.upper()])
+ except KeyError as e:
+ raise ScrapeWarning(
+ f"Could not parse language code: '{string}' as ISO 639-3"
+ ) from e
+
+ @classmethod
+ def from_name(cls, string):
+ """
+ Returns a new instance of this class given a case-insensitive language name.
+ Permissible language names are defined in :class:`hircine.enums.Language`.
+
+ :param str string: The language name.
+ :raise: :exc:`~hircine.scraper.ScrapeWarning` if the language name could
+ not be parsed.
+ """
+ try:
+ return Language(value=hircine.enums.Language(string.capitalize()))
+ except ValueError as e:
+ raise ScrapeWarning(f"Could not parse language name: '{string}'") from e
+
@dataclass(frozen=True)
class Direction:
diff --git a/src/hircine/scraper/utils.py b/src/hircine/scraper/utils.py
index 6afa2ed..4e6e9f1 100644
--- a/src/hircine/scraper/utils.py
+++ b/src/hircine/scraper/utils.py
@@ -27,15 +27,15 @@ def parse_dict(parsers, data):
value = data[field]
if isinstance(value, list):
- yield from [lambda i=x: parser(i) for x in value]
+ yield from [lambda x=x, parser=parser: parser(x) for x in value]
elif isinstance(value, dict):
yield from parse_dict(parser, value)
else:
- yield lambda: parser(value)
+ yield lambda parser=parser, value=value: parser(value)
@contextmanager
-def open_archive_file(archive, member, check_sidecar=True): # pragma: no cover
+def open_archive_file(archive, member, check_sidecar=True):
"""
Open an archive file for use with the :ref:`with <with>` statement. Yields
a :term:`file object` obtained from:
@@ -52,11 +52,10 @@ def open_archive_file(archive, member, check_sidecar=True): # pragma: no cover
sidecar = f"{archive.path}.{member}"
if os.path.exists(sidecar):
- with open(sidecar, "r") as file:
+ with open(sidecar) as file:
yield file
return
- with ZipFile(archive.path, "r") as zip:
- with zip.open(member, "r") as file:
- yield file
+ with ZipFile(archive.path, "r") as ziph, ziph.open(member, "r") as file:
+ yield file