diff options
author | Wolfgang Müller | 2024-03-05 18:08:09 +0100 |
---|---|---|
committer | Wolfgang Müller | 2024-03-05 19:25:59 +0100 |
commit | d1d654ebac2d51e3841675faeb56480e440f622f (patch) | |
tree | 56ef123c1a15a10dfd90836e4038e27efde950c6 /docs | |
download | hircine-d1d654ebac2d51e3841675faeb56480e440f622f.tar.gz |
Initial commit0.1.0
Diffstat (limited to '')
37 files changed, 1306 insertions, 0 deletions
diff --git a/docs/_examples/example_scraper.json b/docs/_examples/example_scraper.json new file mode 100644 index 0000000..9efe126 --- /dev/null +++ b/docs/_examples/example_scraper.json @@ -0,0 +1,8 @@ +{ + "title": "This is a Title", + "tags": { + "artists": ["Alan Smithee", "Noah Ward"], + "characters": ["A", "B", "C"], + "misc": ["horror", "sci-fi"] + } +} diff --git a/docs/_examples/example_scraper.py b/docs/_examples/example_scraper.py new file mode 100644 index 0000000..d00c292 --- /dev/null +++ b/docs/_examples/example_scraper.py @@ -0,0 +1,37 @@ +import json + +from hircine.scraper import Scraper +from hircine.scraper.types import Artist, Character, Tag, Title +from hircine.scraper.utils import open_archive_file, parse_dict + + +class MyScraper(Scraper): + name = "Example scraper" + source = "example" + + def __init__(self, comic): + super().__init__(comic) + + self.data = self.load() + + if self.data: + self.is_available = True + + def load(self): + try: + with open_archive_file(self.comic.archive, "metadata.json") as jif: + return json.load(jif) + except Exception: + return {} + + def scrape(self): + parsers = { + "title": Title, + "tags": { + "artists": Artist, + "misc": Tag.from_string, + "characters": Character, + }, + } + + yield from parse_dict(parsers, self.data) diff --git a/docs/_examples/example_transformer.py b/docs/_examples/example_transformer.py new file mode 100644 index 0000000..6e443ae --- /dev/null +++ b/docs/_examples/example_transformer.py @@ -0,0 +1,21 @@ +from hircine.plugins import transformer +from hircine.scraper.types import Artist, Tag + + +@transformer +def transform(generator, info): + for item in generator: + # Ignore the "Drama" tag when scraping from mangadex + if info.source == "mangadex": + match item: + case Tag(tag="Drama"): + continue + + # convert all Artist names to lowercase + match item: + case Artist(name): + yield Artist(name.lower()) + continue + + # other items are not modified + yield item diff --git a/docs/_images/archive.jpg b/docs/_images/archive.jpg Binary files differnew file mode 100644 index 0000000..3ea2310 --- /dev/null +++ b/docs/_images/archive.jpg diff --git a/docs/_images/comic-edit.jpg b/docs/_images/comic-edit.jpg Binary files differnew file mode 100644 index 0000000..cef6455 --- /dev/null +++ b/docs/_images/comic-edit.jpg diff --git a/docs/_images/comics.jpg b/docs/_images/comics.jpg Binary files differnew file mode 100644 index 0000000..5dd9c04 --- /dev/null +++ b/docs/_images/comics.jpg diff --git a/docs/_images/filtering.jpg b/docs/_images/filtering.jpg Binary files differnew file mode 100644 index 0000000..a61204d --- /dev/null +++ b/docs/_images/filtering.jpg diff --git a/docs/_images/scraper.jpg b/docs/_images/scraper.jpg Binary files differnew file mode 100644 index 0000000..1da82a8 --- /dev/null +++ b/docs/_images/scraper.jpg diff --git a/docs/_static/favicon.svg b/docs/_static/favicon.svg new file mode 100644 index 0000000..6c7be45 --- /dev/null +++ b/docs/_static/favicon.svg @@ -0,0 +1,25 @@ +<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="64" height="64"> + <defs> + <linearGradient id="b"> + <stop offset=".261" stop-color="#d9825f"/> + <stop offset="1" stop-color="#f1d6b0"/> + </linearGradient> + <linearGradient id="a"> + <stop offset="0" stop-color="#b46643"/> + <stop offset=".521" stop-color="#400d02"/> + </linearGradient> + <linearGradient xlink:href="#a" id="d" x1="6.023" x2="57.975" y1="31.667" y2="31.667" gradientUnits="userSpaceOnUse"/> + <linearGradient xlink:href="#a" id="e" x1="5.523" x2="58.48" y1="31.67" y2="31.67" gradientUnits="userSpaceOnUse"/> + <radialGradient xlink:href="#b" id="c" cx="35.966" cy="29.958" r="17.403" fx="35.966" fy="29.958" gradientTransform="matrix(.98534 0 0 .67716 .782 9.643)" gradientUnits="userSpaceOnUse"/> + <filter id="f" width="1.316" height="1.453" x="-.167" y="-.24" color-interpolation-filters="sRGB"> + <feFlood flood-color="#b46643" flood-opacity=".498" result="flood"/> + <feGaussianBlur in="SourceGraphic" result="blur" stdDeviation="3"/> + <feOffset dx="-1" dy="-1" in="blur" result="offset"/> + <feComposite in="flood" in2="offset" operator="in" result="comp1"/> + <feComposite in="SourceGraphic" in2="comp1" result="comp2"/> + </filter> + </defs> + <path fill="url(#c)" d="M51.66 18.146c-2.207 0-23.763 6.417-28.092 8.362-3.382 1.52-4.51 3.09-4.495 6.252.016 3.116-3.929 3.866-.79 5.738 3.693 2.203 18.797 3.788 23.607 1.634 1.805-.809 4.193-2.037 5.309-2.733 2.642-1.647 5.336-8.207 5.926-14.434.452-4.77.436-4.82-1.464-4.82z" transform="translate(-5.778 -5.386) scale(1.1806)"/> + <path fill="url(#d)" stroke="url(#e)" d="M53.375 13.541c-1.724.004-23.937 8.24-28.492 10.564-3.433 1.753-4.371 2.931-7.09 8.909-1.287 2.828-3.65 6.58-5.252 8.334-4.408 4.826-6.518 7.542-6.518 8.392 0 .422 2.505-1.723 5.565-4.767 6.321-6.289 5.42-6.1 15.758-3.297 16.264 4.408 28.65-4.69 30.476-22.383.33-3.2.208-3.676-1.197-4.66-.86-.603-2.323-1.094-3.25-1.092Zm-1.467 4.662c1.626 0 1.639.047 1.252 4.656-.504 6.018-2.808 12.358-5.068 13.95-.954.672-2.997 1.859-4.541 2.64-4.114 2.082-12.428 2.03-15.586-.1-2.686-1.809-3.918-4.011-3.932-7.023-.013-3.056.952-4.574 3.846-6.043 3.703-1.88 22.14-8.08 24.03-8.08zM21.014 32.086l.42 2.055c.23 1.13.795 2.506 1.253 3.058.72.867.459 1.004-1.906 1.004h-2.74l1.486-3.058z" filter="url(#f)" transform="translate(-5.778 -5.386) scale(1.1806)"/> + <path fill="#360c03" d="M30.712 34.768c-1.46-1.157-2.165-2.45-2.165-3.973 0-2.867.69-3.258 6.233-3.543a20.459 20.459 0 0 0 8.539-2.328c5.15-2.693 9.132-2.536 9.447.372.574 5.307-6.13 10.22-14.774 10.828-4.394.308-5.421.117-7.28-1.356z"/> +</svg> diff --git a/docs/_static/logo.webp b/docs/_static/logo.webp Binary files differnew file mode 100644 index 0000000..e41cbb0 --- /dev/null +++ b/docs/_static/logo.webp diff --git a/docs/about.rst b/docs/about.rst new file mode 100644 index 0000000..0ae7bbc --- /dev/null +++ b/docs/about.rst @@ -0,0 +1,12 @@ +About +===== + +**hircine** was designed and written by `Wolfgang Müller +<https://oriole.systems>`_. + +Special thanks +-------------- + +- `Nate <https://www.deviantart.com/hasiruh>`_ for designing the lovely logo. +- `nortti <https://ahti.space/~nortti>`_ for invaluable feedback, testing, and + patiently supporting my obsession with this project. diff --git a/docs/advanced/api.rst b/docs/advanced/api.rst new file mode 100644 index 0000000..61f6d01 --- /dev/null +++ b/docs/advanced/api.rst @@ -0,0 +1,14 @@ +GraphQL API & Versioning +======================== + +**hircine** exposes the `GraphQL <https://graphql.org>`_ endpoint on `/graphql +</graphql>`_. When accessing this documentation on a running instance, clicking +that link will open an interactive GraphQL IDE with a built-in documentation +explorer. + +Versioning +---------- + +**hircine** uses `Semantic Versioning <https://semver.org>`_. The *public API* +consists of both the frontend (command-line interface and web application) and +the backend (GraphQL API and plugin infrastructure). diff --git a/docs/advanced/hashing.rst b/docs/advanced/hashing.rst new file mode 100644 index 0000000..90da3db --- /dev/null +++ b/docs/advanced/hashing.rst @@ -0,0 +1,15 @@ +Hashing +======= + +**hircine** uses the `BLAKE3 cryptographic hash function +<https://github.com/BLAKE3-team/BLAKE3>`_ to compute hashes of archives and all +its contained files. + +Whilst the latter files are hashed directly (i.e. their data is passed directly +to the hash function), the *ZIP* archives are not. Instead, **hircine** +calculates the hash of an archive by concatenating the hashes of *all* files +within it in archive order. + +This means that changes to the archive files themselves will invalidate an +archive's hash, but changes to *ZIP* compression levels or other basic metadata +will not. diff --git a/docs/advanced/image-processing.rst b/docs/advanced/image-processing.rst new file mode 100644 index 0000000..dba71d0 --- /dev/null +++ b/docs/advanced/image-processing.rst @@ -0,0 +1,19 @@ +Image processing +================ + +Images are processed by the `Python Imaging Library (Pillow) +<https://pillow.readthedocs.io/en/stable/index.html>`_ which supports a `wide +variety +<https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html>`_ of +image formats. Processed images are stored in the :ref:`overview-object-store` +using the `webp <https://developers.google.com/speed/webp>`_ format. Images are +resampled using a `Lanczos filter +<https://pillow.readthedocs.io/en/stable/handbook/concepts.html#PIL.Image.Resampling.LANCZOS>`_. + +Scaling +------- + +By default, images are scaled to fit within the bounds of ``4200x2000`` pixels +for display in the reader, and ``1680x800`` pixels for use as thumbnails. These +values are optimized for larger displays and may be :ref:`changed in the +configuration file <cfg-scale>` if you do not require such high resolutions. diff --git a/docs/advanced/import-process.rst b/docs/advanced/import-process.rst new file mode 100644 index 0000000..b33a927 --- /dev/null +++ b/docs/advanced/import-process.rst @@ -0,0 +1,53 @@ +Import process +============== + +When importing a new archive, **hircine** will do the following: + +1. Calculate the hash of the archive its contents. See :doc:`/advanced/hashing`. +2. Process each image for display in the application. See :doc:`/advanced/image-processing`. +3. Collate all images in the archive in "natural" sort order. See `natsort + <https://github.com/SethMMorton/natsort?tab=readme-ov-file#quick-description>`_. +4. Add the images and archive to the database. + +Status display +-------------- + +For each new or updated archive, **hircine** will report its status on the +command line: + ++---------+--------------------------------------------------------------------+ +| Symbol | Meaning | ++=========+====================================================================+ +| ``[+]`` | This is a new archive. | ++---------+--------------------------------------------------------------------+ +| ``[*]`` | This archive was updated (i.e. its modified time has changed). | ++---------+--------------------------------------------------------------------+ +| ``[>]`` | This archive has been renamed. | ++---------+--------------------------------------------------------------------+ +| ``[I]`` | This archive was ignored as it is a duplicate. | ++---------+--------------------------------------------------------------------+ +| ``[!]`` | This archive conflicts with another archive. | ++---------+--------------------------------------------------------------------+ +| ``[?]`` | This archive is referenced in the database but could not be found. | ++---------+--------------------------------------------------------------------+ +| ``[~]`` | The images from this archive were reprocessed. | ++---------+--------------------------------------------------------------------+ + + + +Duplicates +---------- + +**hircine** will not add duplicate archives to its database. If two or more +archives have the same content (i.e. their hashes match), a warning will be +issued. + +Conflicts +--------- + +A conflict occurs when an archive hash in the database no longer matches the +hash of the archive file on disk. **hircine** will take no further action other +than printing an error message including the path of the archive and both +hashes; it is up to the user to reconcile conflicts. An easy (but destructive) +solution is to delete the affected archive in the web application and +reimport it. diff --git a/docs/advanced/index.rst b/docs/advanced/index.rst new file mode 100644 index 0000000..3300030 --- /dev/null +++ b/docs/advanced/index.rst @@ -0,0 +1,13 @@ +Advanced topics +=============== + +This section describes advanced topics that are not crucial for usage of +**hircine**, but may nevertheless be of interest. + +.. toctree:: + :maxdepth: 1 + + import-process + hashing + image-processing + api diff --git a/docs/changelog.rst b/docs/changelog.rst new file mode 100644 index 0000000..a072d67 --- /dev/null +++ b/docs/changelog.rst @@ -0,0 +1,7 @@ +Changelog +========= + +0.1.0 "Satanic Satyr" +--------------------- + +- Initial release. diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..53eac6b --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,31 @@ +import importlib.metadata + +_META = importlib.metadata.metadata("hircine") + +author = _META["Author"] +project = _META["Name"] +release = _META["Version"] +version = release +copyright = "2022-2024, Wolfgang Müller" + +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", +] +templates_path = ["_templates"] +exclude_patterns = [] + +html_theme = "furo" +html_static_path = ["_static"] +html_favicon = "_static/favicon.svg" +html_logo = "_static/logo.webp" +html_show_copyright = False +html_copy_source = False + +autodoc_typehints_format = "short" +autosectionlabel_prefix_document = True + +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "packaging": ("https://packaging.python.org/en/latest", None), +} diff --git a/docs/glossary.rst b/docs/glossary.rst new file mode 100644 index 0000000..303ed6e --- /dev/null +++ b/docs/glossary.rst @@ -0,0 +1,13 @@ +Glossary +======== + +.. glossary:: + :sorted: + + qualified tag + A specific pairing of a namespace and a tag. See :ref:`overview-tags` + for more. + + object store + The content-addressable filesystem for processed image files. See + :ref:`overview-object-store` for more. diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..ac62829 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,38 @@ +Intro +===== + +**hircine** [#f1]_ is a web-based comic organizer written in `Python +<https://www.python.org>`_ and `SvelteKit <https://kit.svelte.dev>`_. + +.. image:: /_images/comics.jpg + :align: center + :alt: An overview of comics + +It imports image files from *ZIP* archives, supports a wide range of metadata +including custom tags and namespaces, comes with a powerful filtering system, +and keeps its own object store of processed and deduplicated image files for +easy and fast access via the browser. This is done without ever modifying any +source archives so you can keep your collection in pristine condition. + +**hircine** contains a simple reader interface that supports per-comic page +layouts and includes an extensible scraper framework that allows easy importing +of metadata from local files or online sources. + +.. toctree:: + :maxdepth: 2 + + overview + setup + usage/index + advanced/index + plugins/index + glossary + changelog + about + git repository <https://git.oriole.systems/hircine/> + +| + +.. rubric:: Footnotes + +.. [#f1] Oxford English Dictionary, 2nd Edition: (ˈhɜːsaɪn) [ad. L. hircīnus (hirquīnus) of a goat; having a goatish smell.] diff --git a/docs/overview.rst b/docs/overview.rst new file mode 100644 index 0000000..8eb327f --- /dev/null +++ b/docs/overview.rst @@ -0,0 +1,124 @@ +Overview +======== + +Before setting up **hircine** it is important to understand its choice of +technologies as well as its design goals and core concepts. This will allow you +to make an informed decision on whether or not it is the right solution for +you. + +.. _overview-technologies: + +Technologies +------------ + +**hircine** consists of two core parts: a `Python <https://www.python.org>`_ +backend that exposes a `GraphQL <https://graphql.org>`_ API and a `SvelteKit +<https://kit.svelte.dev>`_ frontend written in `TypeScript +<https://www.typescriptlang.org>`_ that communicates with it. Data is stored in +an `SQLite <https://www.sqlite.org/index.html>`_ database. + +Image processing is done using `Pillow +<https://pillow.readthedocs.io/en/stable>`_. hircine uses the `BLAKE3 Python +bindings <https://pypi.org/project/blake3>`_ to build hashes used for file +identification and deduplication. + +The web application is designed to be the canonical user interface, but any +program may freely use the :doc:`provided API </advanced/api>`. + +.. _overview-goals: + +Design goals +------------ + +**hircine** is designed to organize a large personal collection of comics and +make it easily queryable. It provides a set of concepts and tools that allows +categorization and classification of comics and comes with a powerful filtering +system. + +Whilst **hircine** does have basic support for categories of metadata such as +artists or characters, it is mostly concerned with classifying the *content* of +a comic through user-defined namespaces and tags. The primary goal is to find +something you are in the mood to read, and not to provide a full archival +system where you keep track of the minute details of a comic's publication or +creation. + +As such, it is designed to tackle large and diverse collections of comics, +manga, or doujin where the story, art, and characters are the primary appeal. + +.. _overview-concepts: + +Core concepts +------------- + +Archives +^^^^^^^^ + +**hircine** reads image files from *ZIP* archives. Loose image files are +**not** supported. Usually an archive contains a single comic (or chapter), but +it may also contain a whole volume - once imported, an archive can be split +into multiple comics in the web application. + +Comics +^^^^^^ + +A comic is a logical grouping of pages (image files) that can be annotated with +metadata. Most of the functionality in the web application pertains to +organizing, querying, and reading comics. + +Comics are created by collating a sequence of pages from a single archive. This +sequence is exclusive, meaning that a page may only ever be allocated to a +single comic. Not all pages of an archive have to be used. + +Metadata +^^^^^^^^ + +A fair chunk of comic metadata is self-explanatory. For example, a comic may be +annotated with the date of its publication, the language it is written in, or +what kind of censorship is in use. There also exist a number of well-defined +categories of metadata that are managed by the user: + ++------------+-------------------------------------------------------------------------+ +| Category | Description | ++============+=========================================================================+ +| Artists | A person involved in the creating of a comic. | ++------------+-------------------------------------------------------------------------+ +| Circles | A group of people involved in the publishing or translation of a comic. | ++------------+-------------------------------------------------------------------------+ +| Characters | A fictional character portrayed in a comic. | ++------------+-------------------------------------------------------------------------+ +| Worlds | A fictional world portrayed in a comic. | ++------------+-------------------------------------------------------------------------+ + +.. _overview-tags: + +Namespaces & Tags +^^^^^^^^^^^^^^^^^ + +Alongside those well-defined categories, **hircine** supports user-defined +namespaces and tags. The latter is a familiar concept: a tag is a simple piece +of information that is attached to an object of interest. + +Namespaces enhance that concept by introducing a context in which the tag is +placed. Let's say we want to keep track of the gender of a story's love +interest. We may solve this using tags alone, like ``male love interest`` or +``female love interest``, but this is quite unwieldy. Namespaces instead allow +us to create a ``male`` and ``female`` namespace and a single tag ``love +interest`` which can then be combined with either namespace. + +**hircine** requires the use of namespaces when tagging comics. A tag cannot be +applied to a comic unless it is paired with a namespace. Such a pairing is +called a *qualified tag*. When filtering, either the namespace or tag is +optional: you may decide to exclude comics with any ``love interest``, or +filter for comics that only have tags in the ``female`` namespace. + +.. _overview-object-store: + +Object store +^^^^^^^^^^^^ + +**hircine** keeps all processed images files in a a `content-addressable +filesystem <https://en.wikipedia.org/wiki/Content-addressable_storage>`_ called +the object store. The purpose of the store is twofold: Firstly, if multiple +archives contain the same image, it only needs to be stored once in the object +store. Secondly, the object store allows the application to used without having +to serve potentially large image files. diff --git a/docs/plugins/builtin.rst b/docs/plugins/builtin.rst new file mode 100644 index 0000000..61d531f --- /dev/null +++ b/docs/plugins/builtin.rst @@ -0,0 +1,16 @@ +Built-in plugins +================ + +**hircine** comes with a number of plugins already built-in. This page serves +as a quick reference for users and developers alike. + +.. _builtin-scrapers: + +Scrapers +-------- + +.. autoclass:: hircine.plugins.scrapers.gallery_dl.GalleryDLScraper() + +.. autoclass:: hircine.plugins.scrapers.ehentai_api.EHentaiAPIScraper() + +.. autoclass:: hircine.plugins.scrapers.anchira.AnchiraYamlScraper() diff --git a/docs/plugins/index.rst b/docs/plugins/index.rst new file mode 100644 index 0000000..2263fa2 --- /dev/null +++ b/docs/plugins/index.rst @@ -0,0 +1,17 @@ +Plugins +======= + +Plugins are `Python <https://www.python.org>`_ programs that use **hircine**'s +plugin architecture to customize or enhance the behaviour of certain parts of +the application. + +There are two types of plugins. **Scrapers** read and report metadata from +arbitrary sources and **Transformers** may modify that metadata freely before +it is shown in the :ref:`scraper-interface`. As such, transformers cater to the +needs of a specific user (e.g. to ignore certain pieces of metadata). + +.. toctree:: + :maxdepth: 1 + + builtin + writing/index diff --git a/docs/plugins/writing/index.rst b/docs/plugins/writing/index.rst new file mode 100644 index 0000000..42afebd --- /dev/null +++ b/docs/plugins/writing/index.rst @@ -0,0 +1,20 @@ +Writing plugins +=============== + +Before writing plugins, please familiarize yourself with the basics of the +:ref:`Python programming language <python:tutorial-index>`. It is also +recommended to read the :doc:`packaging:overview`. **hircine** discovers +plugins via :ref:`package metadata <packaging:plugin-entry-points>`, so it is +also useful to have a basic understanding of the :ref:`packaging:entry-points`. + +The plugin examples on the following pages are a good place to start once you +are ready. You may also have a look at the `source code +<https://git.oriole.systems/hircine/tree/src/hircine/plugins/scrapers>`_ for +the built-in scrapers. + +.. toctree:: + :maxdepth: 1 + + scrapers + transformers + reference diff --git a/docs/plugins/writing/reference.rst b/docs/plugins/writing/reference.rst new file mode 100644 index 0000000..1be281e --- /dev/null +++ b/docs/plugins/writing/reference.rst @@ -0,0 +1,51 @@ +Plugin API Reference +==================== + +.. _scraped-data: + +Scraped Data +------------ + +.. automodule:: hircine.scraper.types + :members: + +API Data +-------- + +.. autoclass:: hircine.api.types.FullComic + :members: + :inherited-members: + :undoc-members: + :exclude-members: cover + +.. autoclass:: hircine.api.types.Archive + :members: + :undoc-members: + :exclude-members: cover + +Enums +----- + +.. autoclass:: hircine.enums.Category() + :members: + :undoc-members: + +.. autoclass:: hircine.enums.Censorship() + :members: + :undoc-members: + +.. autoclass:: hircine.enums.Direction() + :members: + :undoc-members: + +.. autoclass:: hircine.enums.Language() + :members: + :undoc-members: + +.. autoclass:: hircine.enums.Layout() + :members: + :undoc-members: + +.. autoclass:: hircine.enums.Rating() + :members: + :undoc-members: diff --git a/docs/plugins/writing/scrapers.rst b/docs/plugins/writing/scrapers.rst new file mode 100644 index 0000000..258d3a8 --- /dev/null +++ b/docs/plugins/writing/scrapers.rst @@ -0,0 +1,48 @@ +Scrapers +======== + +A scraper extends the abstract :class:`~hircine.scraper.Scraper` class and +implements its :meth:`~hircine.scraper.Scraper.scrape` method. The latter is a +generator function yielding :ref:`scraped-data`. + +.. autoclass:: hircine.scraper.Scraper + :members: + :special-members: __init__ + +Exceptions +---------- + +A scraper may raise two kinds of exceptions: + +.. autoexception:: hircine.scraper.ScrapeWarning + +.. autoexception:: hircine.scraper.ScrapeError + +Utility functions +----------------- + +.. automodule:: hircine.scraper.utils + :members: + +Registering a scraper +--------------------- + +To register your class as a scraper, place it into the ``hircine.scraper`` +:ref:`entry point group <packaging:entry-points>`. For example, put the +following in a ``pyproject.toml`` file: + +.. code-block:: toml + + [project.entry-points.'hircine.scraper'] + my_scraper = 'myscraper.MyScraper' + +Example +------- + +.. literalinclude:: /_examples/example_scraper.py + :language: python + +The scraper above will scrape a JSON file with the following structure: + +.. literalinclude:: /_examples/example_scraper.json + :language: json diff --git a/docs/plugins/writing/transformers.rst b/docs/plugins/writing/transformers.rst new file mode 100644 index 0000000..045058d --- /dev/null +++ b/docs/plugins/writing/transformers.rst @@ -0,0 +1,31 @@ +Transformers +============ + +**hircine** supports modification of scraper results by the use of +transformers. Transformers are functions that hook into the scraping process +and may freely modify any :ref:`scraped-data` before it is shown to the user. + +A transformer is specified by decorating a generator function with the +:func:`~hircine.plugins.transformer` decorator. + +.. autodecorator:: hircine.plugins.transformer + +.. autoclass:: hircine.scraper.ScraperInfo + +Registering transformers +------------------------ + +To register transformers, place them into a module in the +``hircine.transformer`` :ref:`entry point group <packaging:entry-points>`. For +example, put the following in a ``pyproject.toml`` file: + +.. code-block:: toml + + [project.entry-points.'hircine.transformer'] + my_transformers = 'mytransformers.transformers' + +Example +------- + +.. literalinclude:: /_examples/example_transformer.py + :language: python diff --git a/docs/setup.rst b/docs/setup.rst new file mode 100644 index 0000000..ad2123e --- /dev/null +++ b/docs/setup.rst @@ -0,0 +1,113 @@ +Setup +===== + +Requirements +------------ + +- `Python 3.12 <https://www.python.org>`_ or newer. It is likely that your + system already comes with this. Otherwise, refer to the `Python Beginners + Guide <https://wiki.python.org/moin/BeginnersGuide/Download>`_. + +- A modern browser. **hircine** is built to target `ES2022 + <https://262.ecma-international.org/13.0>`_ and should run on all common + browsers at the time of writing. See `this support table + <https://caniuse.com/?feats=mdn-javascript_builtins_array_at,mdn-javascript_builtins_regexp_hasindices,mdn-javascript_builtins_object_hasown,mdn-javascript_builtins_error_cause,mdn-javascript_operators_await_top_level,mdn-javascript_classes_private_class_fields,mdn-javascript_classes_private_class_methods,mdn-javascript_classes_static_class_fields,mdn-javascript_classes_static_initialization_blocks>`_ + for a detailed breakdown. The web interface was successfully tested on the + following systems and browsers: + + - Linux 6.7.6: Firefox 123.0 + - Windows 10 Pro 22H2: Edge 122.0.2365.59, Firefox 123.0 + - Windows 11 Pro 23H2: Edge 122.0.2365.59, Firefox 123.0 + +**hircine** is designed to be hosted on Linux systems but may be set up for +Windows as well. Keep in mind that some utilities (e.g. ``gunicorn``) are not +available for Windows. + +Installation +------------ + +**hircine** should be installed in a :ref:`virtual environment +<packaging:creating and using virtual environments>`: + +.. code-block:: console + + $ python -m venv <VENVDIR> + $ source <VENVDIR>/bin/activate + +.. note:: + + ``VENVDIR`` should only ever contain program files and should not be the + directory you choose for the database in the next step. + + For example, ``~/.local/share/hircine`` is a sensible setting for + ``VENVDIR``. + +Once the environment is set up, download the `latest wheel +<https://hircine.oriole.systems/dist/hircine-0.1.0-py3-none-any.whl>`_ and +install it using `pip <https://pip.pypa.io/en/stable/>`_: + +.. code-block:: console + + (.venv) $ python -m pip install <WHEEL> + +Now the ``hircine`` command is available from within your shell: + +.. code-block:: console + + (.venv) $ hircine version + hircine 0.1.0 "Satanic Satyr" + +.. important:: + + Outside of this document it is assumed that the virtual environment is + activated and that the ``hircine`` command is present. + +Initializing the database +------------------------- + +Next, navigate to where you want to store the database and initialize it: + +.. code-block:: console + + (.venv) $ cd <DIR> + (.venv) $ hircine init + +This will create the following structure: + ++------------+------------------------------------------------------------------------+ +| Item | Description | ++============+========================================================================+ +| hircine.db | the SQLite database | ++------------+------------------------------------------------------------------------+ +| content/ | the directory containing your archives (may be nested arbitrarily) | ++------------+------------------------------------------------------------------------+ +| objects/ | the :term:`object store` for processed images | ++------------+------------------------------------------------------------------------+ +| backups/ | backups of the SQLite database | ++------------+------------------------------------------------------------------------+ + +.. tip:: + + By default, the command-line interface and the web application will always + look for the database in the current directory. Whilst this behaviour cannot + be changed when launching the web application, you may direct the + command-line program to a different directory using ``-C <DIR>``. + + If ``-C <DIR>`` is given on the command line, it must appear before any + sub-command (``import``, etc.) + +Starting the web application +---------------------------- + +To serve the web application, you need a compatible ASGI server. We recommend +`gunicorn <https://gunicorn.org>`_. The endpoint for the web application is the +``app()`` factory in ``hircine.app``: + +.. code-block:: console + + (.venv) $ python -m pip install gunicorn + (.venv) $ gunicorn -k uvicorn.workers.UvicornWorker --bind localhost:8000 "hircine.app:app()" + +Now you can point your browser to http://localhost:8000 to open the web +application. To stop it, simply terminate ``gunicorn`` or the ASGI server of +your choice. diff --git a/docs/usage/admin.rst b/docs/usage/admin.rst new file mode 100644 index 0000000..5fe2e90 --- /dev/null +++ b/docs/usage/admin.rst @@ -0,0 +1,58 @@ +Administrative tasks +==================== + +Administrative tasks are handled by the command-line interface. To get a quick +overview of available commands, run: + +.. code-block:: console + + $ hircine -h + +Updating the application +------------------------ + +To update **hircine**, download the newest wheel and install it in the virtual +environment: + +.. code-block:: console + + $ source <VENVDIR>/bin/activate + (.venv) $ python -m pip install <WHEEL> + +Running database migrations +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +After the update it might be required to bring the database up-to-date. To do +this, run: + +.. code-block:: console + + $ hircine migrate + +A backup will be created automatically. + +Backing up the database +----------------------- + +To save a current backup of the database into the ``backup/`` directory, run: + +.. code-block:: console + + $ hircine backup + +To restore a previously saved backup, stop **hircine** and simply replace the +``hircine.db`` file with the backup. + +Optimizing the database +----------------------- + +To optimize the SQLite database, run: + +.. code-block:: console + + $ hircine vacuum + +This is generally only recommended after deleting a large amount of data. Refer +to the `SQLite documentation +<https://www.sqlite.org/lang_vacuum.html#description>`_ for details on this +process. diff --git a/docs/usage/configuration.rst b/docs/usage/configuration.rst new file mode 100644 index 0000000..e0f3669 --- /dev/null +++ b/docs/usage/configuration.rst @@ -0,0 +1,41 @@ +Configuration +============= + +**hircine** looks for an optional configuration file named ``hircine.ini`` in +its regular directory structure. Refer to Python's :mod:`configparser` module +for details on its format. + +Sections +-------- + +.. _cfg-scale: + +import.scale +^^^^^^^^^^^^ + +This section is split into two subsections, ``full`` and ``thumb``. The former +controls scaling for images displayed in the reader and the latter controls +scaling for thumbnails. See :doc:`/advanced/image-processing`. + +The ``width`` and ``height`` settings in each subsection control the maximum +pixel dimensions for the processed image. They are given as integers and must +be greater than zero. The defaults are as follows: + +.. code-block:: ini + + [import.scale.full] + width = 4200 + height = 2000 + + [import.scale.thumb] + width = 1680 + height = 800 + +.. important:: + + Changes to these settings will only apply to newly processed image files. If + you want to reprocess your entire collection, run: + + .. code-block:: console + + $ hircine import -r diff --git a/docs/usage/filtering.rst b/docs/usage/filtering.rst new file mode 100644 index 0000000..05066fb --- /dev/null +++ b/docs/usage/filtering.rst @@ -0,0 +1,45 @@ +Filtering +========= + +A filter is a combination of predicates that have to be satisfied. **hircine** +provides a filter interface that allows using almost any type of metadata as a +predicate and can therefore build complex and expressive queries. + +.. image:: /_images/filtering.jpg + :align: center + :alt: Filtering comics + +If given multiple types of predicates, all of them have to match. If one type +of predicate contains multiple selections, the selected mode determines how +they are combined: + ++----------------------+--------------------------------------------------+ +| Mode | Behaviour | ++======================+==================================================+ +| ∀ ("for all") | matches if all given entities match | ++----------------------+--------------------------------------------------+ +| ∃ ("there exists") | matches if any of the given entities match | ++----------------------+--------------------------------------------------+ +| = ("exactly") | matches if entities are present exactly as given | ++----------------------+--------------------------------------------------+ + +For example, in the picture above, a comic only matches if *all* of the +following is true: + +- It is tagged as ``female:idol`` or ``:tights``. +- It is tagged with the artist ``40hara``. +- It is rated as *Questionable*. +- It is uncensored. +- It is in any language except Japanese. + +Matching empty sets +------------------- + +Each type of predicate may also match on the ∅ empty set. If enabled, the +predicate only matches if the object does not contain any entities of that +type. + +.. important:: + + If matching on the empty set, make sure there are no selections present in + the corresponding dropdown menu, as otherwise the filter will never match. diff --git a/docs/usage/getting-started.rst b/docs/usage/getting-started.rst new file mode 100644 index 0000000..60a167e --- /dev/null +++ b/docs/usage/getting-started.rst @@ -0,0 +1,121 @@ +Getting started +=============== + + +Importing archives +------------------ + +Place your archives in the ``content/`` directory and import them using the +command-line interface: + +.. code-block:: console + + $ hircine import + +As **hircine** can identify an archive by its contents, subsequent import jobs +won't import the same archive again. Archives may also be renamed or moved +freely within the ``content/`` directory; the next import job will recognize +these changes automatically. Symbolic links will **not** be followed. + +.. note:: + + For a more technical breakdown see :doc:`/advanced/import-process`. + +Adding comics +------------- + +Once the import job has finished, navigate to the archive tab in the web +application and hit the refresh button [#f1]_ to load the newly added archives. +Next, navigate to the archive that contains the comic you want to add. You'll +be presented with an tabbed pane on the left and all the pages in the archive +on the right. + +.. image:: /_images/archive.jpg + :align: center + :alt: The archive view + +By default, the pane shows the *Details* tab. Here you can see basic +information on the archive and, once added, a list of comics from this archive. +Clicking on a page will open the reader interface. + +.. note:: + + Once a comic has been added, you may specify its reading direction and page + layout. However, in archive view, the reader always defaults to + left-to-right single-page. For more information on the reader interface, see + :doc:`/usage/reading`. + +To add a new comic, navigate to the *Edit* tab and click the button to enter +selection mode. Now, instead of opening the reader interface, clicking on a +page adds it to the current selection. + +.. tip:: + + When in selection mode you may use Ctrl+Click to access the reader interface + instead. + +Select all pages that you want to add to a new comic and click the *Add* button +that has appeared. The newly added comic will appear below and the selection +mode exits. You may now add further comics by the same process, or add pages to +an already existing comic by clicking on the *Add* button that appears over +each comic. + +.. tip:: + + Once all relevant pages from an archive have been allocated, you may mark + the archive as "organized". This will automatically happen once the last + page has been added to a comic. + +Next, click on the newly added comic. + +Editing comics +-------------- + +Comics use the same layout as archives - a tabbed pane on the left and comic +pages on the right. Just like before, clicking a page will open the reader. +Navigate to the *Edit* tab to start annotating the comic with metadata. + +.. image:: /_images/comic-edit.jpg + :align: center + :alt: Editing a comic + +The top section of the edit form controls basic information about a comic, +whilst the bottom section contains dropdown fields for the user-managed +metadata categories. The picture above shows two categories that already have a +selection, but for you these lists will be empty. + +Adding metadata +--------------- + +To add new metadata entities, navigate to the respective tab in the web +application and hit the *Add* button in the top right corner. Alternatively, +new entities may be added *at any time* using :ref:`shortcuts +<shortcut-metadata>`. + +Let's add a new character. Hit ``nh`` on your keyboard, type in the character's +name in the modal that appears and confirm by clicking on *Save* or hitting the +``Enter`` key. The character is now available in the *Characters* dropdown. +Select it there and save your changes - the comic is now tagged with this +character. + +Removing comic pages +-------------------- + +Comic pages may be removed in the *Edit* tab by entering selection mode, +selecting the pages that should be removed, and clicking the *Remove selected +pages* button or hitting ``Delete``. Removed pages will be available again for +allocation in the archive. + +Setting the cover +----------------- + +The cover of a comic or an archive may be set at any time outside of selection +mode by control clicking a page. + +| + +.. rubric:: Footnotes + +.. [#f1] This is the only time you need to refresh something manually in the + web application. Care has been taken that all other elements update + automatically. diff --git a/docs/usage/index.rst b/docs/usage/index.rst new file mode 100644 index 0000000..a685ad8 --- /dev/null +++ b/docs/usage/index.rst @@ -0,0 +1,18 @@ +Usage +===== + +**hircine** is mainly controlled via the web interface. The command-line +interface handles various administrative tasks and, crucially, is used to +import your archives into the application. + +.. toctree:: + :maxdepth: 2 + + getting-started + namespaces + reading + filtering + scraping + shortcuts + admin + configuration diff --git a/docs/usage/namespaces.rst b/docs/usage/namespaces.rst new file mode 100644 index 0000000..4eacb14 --- /dev/null +++ b/docs/usage/namespaces.rst @@ -0,0 +1,43 @@ +Namespaces & Tags +================= + +As :ref:`mentioned earlier <overview-tags>`, the use of namespaces is required +when tagging comics. The user must also choose which namespaces a tag is +applicable to. That means that in order for the :term:`qualified tag` +``female:love interest`` to appear as a valid selection, the following must be +true: + +1. The namespace ``female`` must exist. +2. The tag ``love interest`` must exist. +3. The tag ``love interest`` must specify ``female`` as a valid namespace. + +.. note:: + + Qualified tags that are subsequently rendered invalid will **not** + automatically be removed from comics. Whilst the qualified tag can no longer + be selected in the editing or filtering interface, it can still be removed + from the comic manually. + +Namespace sorting +----------------- + +Namespaces may be configured with a "sort name". This name will be used when +sorting lists of namespaces or qualified tags. If no such name is given, the +namespace name is used for sorting instead. + +Tag descriptions +---------------- + +Tags may be annotated with a custom description that further explains how the +tag should be used. This description will be displayed as a tooltip when +hovering over a qualified tag. + +Qualified tag display +--------------------- + +When selecting from a list, qualified tags are displayed by combining the +namespace and tag with a colon, like ``female:love interest``. In all other +contexts, qualified tags are rendered as small pills that contain the tag name +only. A small number of namespaces have special handling, however: *female*, +*male*, *trans*, *mixed*, and *location*. These are displayed with a specific +colour and icon. diff --git a/docs/usage/reading.rst b/docs/usage/reading.rst new file mode 100644 index 0000000..f48e4c6 --- /dev/null +++ b/docs/usage/reading.rst @@ -0,0 +1,50 @@ +Reading +======= + +The reader interface may be accessed by clicking on any comic page. The reader +overlay will then open with the selected page in view. There are two metadata +settings that affect the reader, **Direction** and **Layout**. The former +adjusts reading direction and the latter determines how many pages are rendered +at once. + +Navigation +---------- + +The reader is controlled with the mouse or the keyboard. When displaying a +single page, clicking on the left side of the image will advance left, whilst +clicking on the right side of the image will advance right. Similarly, when +displaying two pages, clicking on the left image will advance left while +clicking on the right image will advance right. + +Additionally, the following keyboard shortcuts are available: + ++-------------+----------------------------------------------------+ +| Key | Action | ++=============+====================================================+ +| Left Arrow | Advance left. | ++-------------+----------------------------------------------------+ +| Right Arrow | Advance right. | ++-------------+----------------------------------------------------+ +| Down Arrow | Advance to the next page in reading direction. | ++-------------+ | +| Page Down | | ++-------------+ | +| Space | | ++-------------+----------------------------------------------------+ +| Up Arrow | Advance to the previous page in reading direction. | ++-------------+ | +| Page Up | | ++-------------+ | +| Backspace | | ++-------------+----------------------------------------------------+ + +Editing when reading +-------------------- + +A comic may be edited in the reader interface by opening the edit menu in the +top left corner or hitting ``z``. Changes to **Direction** and **Layout** will +be visible in the reader right away, but will only persist if saved. This makes +it easy to preview changes. + +Pending changes will not be lost when closing the reader interface - they can +still be modified and saved in the normal *Edit* tab. diff --git a/docs/usage/scraping.rst b/docs/usage/scraping.rst new file mode 100644 index 0000000..37bae98 --- /dev/null +++ b/docs/usage/scraping.rst @@ -0,0 +1,90 @@ +Scraping +======== + +**hircine** comes with a generic scraper interface that allows scraping comic +metadata from virtually any source. A number of scrapers for common file +formats and websites are :ref:`included <builtin-scrapers>` in the base +installation. Refer to :doc:`/plugins/index` if you want to write your own. + + +Scraper sources +--------------- + +Usually, a scraper will access a location on the web or a local file on your +disk. The former may be an online API, whilst the latter may be a `JSON +<https://www.json.org/json-en.html>`_ file like `gallery-dl +<https://github.com/mikf/gallery-dl>`_'s ``info.json``. + +For local files, two locations are considered. The comic's archive may contain +this file, or it may be stored as sidecar file alongside the archive in the +``content/`` directory. + +.. _sidecar-files: + +Archive & sidecar files +^^^^^^^^^^^^^^^^^^^^^^^ + +Sidecar files need to be prefixed with the full name of the archive. For +example, if a scraper accesses a file named ``info.json`` for an archive +``Hoshiiro GirlDrop Comic Anthology.zip``, the following locations will be +considered: + ++----------+-------------------------------------------------------------+ +| Location | Name | ++==========+=============================================================+ +| Archive | ``info.json`` | ++----------+-------------------------------------------------------------+ +| Sidecar | ``content/Hoshiiro GirlDrop Comic Anthology.zip.info.json`` | ++----------+-------------------------------------------------------------+ + +.. note:: + + If a file exists in both locations, the sidecar file is preferred. + +.. _scraper-interface: + +Scraper interface +----------------- + +If a comic has scrapers available, they will be shown in the *Scrape* tab. +Selecting the desired scraper and clicking on the *Scrape* button will start +the scraping process. + +.. image:: /_images/scraper.jpg + :align: center + :alt: Scraping a comic. + +Once the scraper has returned results, they are shown in the pane below. Only +results that differ from existing comic metadata will be displayed. + +Metadata that should not be kept may be deselected. For groups with a larger +set of entries, the selection may be inverted to quickly deselect the whole +group, or to only select a few entries. Pressing the *Merge* button will update +the comic with the selected metadata. + +Options +^^^^^^^ + +By default, **hircine** does not automatically create missing metadata entries. +This can be controlled using the *Create missing items* option. + +.. note:: + + Scrapers always return :term:`qualified tags <qualified tag>` (the namespace + is set to ``none`` if it could not be determined). When requested to create + a missing qualified tag, the namespace and tag will be created (if needed), + and the tag will be marked as applicable to the namespace. + + A qualified tag is considered to be missing if any of the following apply: + + 1. The namespace does not exist. + 2. The tag does not exist. + 3. The tag is not applicable to the namespace. + + +Modifying scraper results +------------------------- + +**hircine** allows modifying results that are returned by a scraper without +having to change the scraper logic. Refer to the documentation on +:doc:`/plugins/index` for more. diff --git a/docs/usage/shortcuts.rst b/docs/usage/shortcuts.rst new file mode 100644 index 0000000..b3f88bb --- /dev/null +++ b/docs/usage/shortcuts.rst @@ -0,0 +1,114 @@ +Shortcuts +========= + +**hircine** supports a number of shortcuts that are meant to streamline a few +common actions. Shortcuts may be made up of multiple keys. In that case, type +them in order. + +.. _shortcut-navigation: + +Navigation +---------- + +.. list-table:: + :align: left + :header-rows: 1 + + * - Shortcut + - Navigates to + * - ``go`` + - Home + * - ``gc`` + - Comics + * - ``gn`` + - Namespaces + * - ``gt`` + - Tags + * - ``gh`` + - Characters + * - ``gw`` + - Worlds + * - ``ga`` + - Artists + * - ``gi`` + - Circles + * - ``gz`` + - Archives + * - ``?`` + - Help + +.. _shortcut-metadata: + +Adding metadata +--------------- + +.. list-table:: + :align: left + :header-rows: 1 + + * - Shortcut + - Action + * - ``na`` + - Add a new artist. + * - ``ni`` + - Add a new circle. + * - ``nh`` + - Add a new character. + * - ``nw`` + - Add a new world. + * - ``nn`` + - Add a new namespace. + * - ``nt`` + - Add a new tag. + +.. _shortcut-reader: + +Reader +------ + +.. list-table:: + :align: left + :header-rows: 1 + + * - Shortcut + - Action + * - ``z`` + - Open edit menu. + * - ``Escape`` + - Close reader. + +.. _shortcut-filtering: + +Filtering +--------- + +.. list-table:: + :align: left + :header-rows: 1 + + * - Shortcut + - Action + * - ``F`` + - Focus search. + * - ``f`` + - Toggle favourites. + * - ``b`` + - Toggle bookmarked. + * - ``o`` + - Toggle organized. + +.. _shortcut-misc: + +Miscellaneous +------------- + +.. list-table:: + :align: left + :header-rows: 1 + + * - Shortcut + - Action + * - ``s`` + - Toggle selection mode if available. + * - ``Delete`` + - Delete selected items. |