From d1d654ebac2d51e3841675faeb56480e440f622f Mon Sep 17 00:00:00 2001 From: Wolfgang Müller Date: Tue, 5 Mar 2024 18:08:09 +0100 Subject: Initial commit --- docs/_examples/example_scraper.json | 8 +++ docs/_examples/example_scraper.py | 37 ++++++++++ docs/_examples/example_transformer.py | 21 ++++++ docs/_images/archive.jpg | Bin 0 -> 221038 bytes docs/_images/comic-edit.jpg | Bin 0 -> 244959 bytes docs/_images/comics.jpg | Bin 0 -> 248591 bytes docs/_images/filtering.jpg | Bin 0 -> 243578 bytes docs/_images/scraper.jpg | Bin 0 -> 533309 bytes docs/_static/favicon.svg | 25 +++++++ docs/_static/logo.webp | Bin 0 -> 89322 bytes docs/about.rst | 12 ++++ docs/advanced/api.rst | 14 ++++ docs/advanced/hashing.rst | 15 ++++ docs/advanced/image-processing.rst | 19 ++++++ docs/advanced/import-process.rst | 53 +++++++++++++++ docs/advanced/index.rst | 13 ++++ docs/changelog.rst | 7 ++ docs/conf.py | 31 +++++++++ docs/glossary.rst | 13 ++++ docs/index.rst | 38 +++++++++++ docs/overview.rst | 124 ++++++++++++++++++++++++++++++++++ docs/plugins/builtin.rst | 16 +++++ docs/plugins/index.rst | 17 +++++ docs/plugins/writing/index.rst | 20 ++++++ docs/plugins/writing/reference.rst | 51 ++++++++++++++ docs/plugins/writing/scrapers.rst | 48 +++++++++++++ docs/plugins/writing/transformers.rst | 31 +++++++++ docs/setup.rst | 113 +++++++++++++++++++++++++++++++ docs/usage/admin.rst | 58 ++++++++++++++++ docs/usage/configuration.rst | 41 +++++++++++ docs/usage/filtering.rst | 45 ++++++++++++ docs/usage/getting-started.rst | 121 +++++++++++++++++++++++++++++++++ docs/usage/index.rst | 18 +++++ docs/usage/namespaces.rst | 43 ++++++++++++ docs/usage/reading.rst | 50 ++++++++++++++ docs/usage/scraping.rst | 90 ++++++++++++++++++++++++ docs/usage/shortcuts.rst | 114 +++++++++++++++++++++++++++++++ 37 files changed, 1306 insertions(+) create mode 100644 docs/_examples/example_scraper.json create mode 100644 docs/_examples/example_scraper.py create mode 100644 docs/_examples/example_transformer.py create mode 100644 docs/_images/archive.jpg create mode 100644 docs/_images/comic-edit.jpg create mode 100644 docs/_images/comics.jpg create mode 100644 docs/_images/filtering.jpg create mode 100644 docs/_images/scraper.jpg create mode 100644 docs/_static/favicon.svg create mode 100644 docs/_static/logo.webp create mode 100644 docs/about.rst create mode 100644 docs/advanced/api.rst create mode 100644 docs/advanced/hashing.rst create mode 100644 docs/advanced/image-processing.rst create mode 100644 docs/advanced/import-process.rst create mode 100644 docs/advanced/index.rst create mode 100644 docs/changelog.rst create mode 100644 docs/conf.py create mode 100644 docs/glossary.rst create mode 100644 docs/index.rst create mode 100644 docs/overview.rst create mode 100644 docs/plugins/builtin.rst create mode 100644 docs/plugins/index.rst create mode 100644 docs/plugins/writing/index.rst create mode 100644 docs/plugins/writing/reference.rst create mode 100644 docs/plugins/writing/scrapers.rst create mode 100644 docs/plugins/writing/transformers.rst create mode 100644 docs/setup.rst create mode 100644 docs/usage/admin.rst create mode 100644 docs/usage/configuration.rst create mode 100644 docs/usage/filtering.rst create mode 100644 docs/usage/getting-started.rst create mode 100644 docs/usage/index.rst create mode 100644 docs/usage/namespaces.rst create mode 100644 docs/usage/reading.rst create mode 100644 docs/usage/scraping.rst create mode 100644 docs/usage/shortcuts.rst (limited to 'docs') diff --git a/docs/_examples/example_scraper.json b/docs/_examples/example_scraper.json new file mode 100644 index 0000000..9efe126 --- /dev/null +++ b/docs/_examples/example_scraper.json @@ -0,0 +1,8 @@ +{ + "title": "This is a Title", + "tags": { + "artists": ["Alan Smithee", "Noah Ward"], + "characters": ["A", "B", "C"], + "misc": ["horror", "sci-fi"] + } +} diff --git a/docs/_examples/example_scraper.py b/docs/_examples/example_scraper.py new file mode 100644 index 0000000..d00c292 --- /dev/null +++ b/docs/_examples/example_scraper.py @@ -0,0 +1,37 @@ +import json + +from hircine.scraper import Scraper +from hircine.scraper.types import Artist, Character, Tag, Title +from hircine.scraper.utils import open_archive_file, parse_dict + + +class MyScraper(Scraper): + name = "Example scraper" + source = "example" + + def __init__(self, comic): + super().__init__(comic) + + self.data = self.load() + + if self.data: + self.is_available = True + + def load(self): + try: + with open_archive_file(self.comic.archive, "metadata.json") as jif: + return json.load(jif) + except Exception: + return {} + + def scrape(self): + parsers = { + "title": Title, + "tags": { + "artists": Artist, + "misc": Tag.from_string, + "characters": Character, + }, + } + + yield from parse_dict(parsers, self.data) diff --git a/docs/_examples/example_transformer.py b/docs/_examples/example_transformer.py new file mode 100644 index 0000000..6e443ae --- /dev/null +++ b/docs/_examples/example_transformer.py @@ -0,0 +1,21 @@ +from hircine.plugins import transformer +from hircine.scraper.types import Artist, Tag + + +@transformer +def transform(generator, info): + for item in generator: + # Ignore the "Drama" tag when scraping from mangadex + if info.source == "mangadex": + match item: + case Tag(tag="Drama"): + continue + + # convert all Artist names to lowercase + match item: + case Artist(name): + yield Artist(name.lower()) + continue + + # other items are not modified + yield item diff --git a/docs/_images/archive.jpg b/docs/_images/archive.jpg new file mode 100644 index 0000000..3ea2310 Binary files /dev/null and b/docs/_images/archive.jpg differ diff --git a/docs/_images/comic-edit.jpg b/docs/_images/comic-edit.jpg new file mode 100644 index 0000000..cef6455 Binary files /dev/null and b/docs/_images/comic-edit.jpg differ diff --git a/docs/_images/comics.jpg b/docs/_images/comics.jpg new file mode 100644 index 0000000..5dd9c04 Binary files /dev/null and b/docs/_images/comics.jpg differ diff --git a/docs/_images/filtering.jpg b/docs/_images/filtering.jpg new file mode 100644 index 0000000..a61204d Binary files /dev/null and b/docs/_images/filtering.jpg differ diff --git a/docs/_images/scraper.jpg b/docs/_images/scraper.jpg new file mode 100644 index 0000000..1da82a8 Binary files /dev/null and b/docs/_images/scraper.jpg differ diff --git a/docs/_static/favicon.svg b/docs/_static/favicon.svg new file mode 100644 index 0000000..6c7be45 --- /dev/null +++ b/docs/_static/favicon.svg @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/_static/logo.webp b/docs/_static/logo.webp new file mode 100644 index 0000000..e41cbb0 Binary files /dev/null and b/docs/_static/logo.webp differ diff --git a/docs/about.rst b/docs/about.rst new file mode 100644 index 0000000..0ae7bbc --- /dev/null +++ b/docs/about.rst @@ -0,0 +1,12 @@ +About +===== + +**hircine** was designed and written by `Wolfgang Müller +`_. + +Special thanks +-------------- + +- `Nate `_ for designing the lovely logo. +- `nortti `_ for invaluable feedback, testing, and + patiently supporting my obsession with this project. diff --git a/docs/advanced/api.rst b/docs/advanced/api.rst new file mode 100644 index 0000000..61f6d01 --- /dev/null +++ b/docs/advanced/api.rst @@ -0,0 +1,14 @@ +GraphQL API & Versioning +======================== + +**hircine** exposes the `GraphQL `_ endpoint on `/graphql +`_. When accessing this documentation on a running instance, clicking +that link will open an interactive GraphQL IDE with a built-in documentation +explorer. + +Versioning +---------- + +**hircine** uses `Semantic Versioning `_. The *public API* +consists of both the frontend (command-line interface and web application) and +the backend (GraphQL API and plugin infrastructure). diff --git a/docs/advanced/hashing.rst b/docs/advanced/hashing.rst new file mode 100644 index 0000000..90da3db --- /dev/null +++ b/docs/advanced/hashing.rst @@ -0,0 +1,15 @@ +Hashing +======= + +**hircine** uses the `BLAKE3 cryptographic hash function +`_ to compute hashes of archives and all +its contained files. + +Whilst the latter files are hashed directly (i.e. their data is passed directly +to the hash function), the *ZIP* archives are not. Instead, **hircine** +calculates the hash of an archive by concatenating the hashes of *all* files +within it in archive order. + +This means that changes to the archive files themselves will invalidate an +archive's hash, but changes to *ZIP* compression levels or other basic metadata +will not. diff --git a/docs/advanced/image-processing.rst b/docs/advanced/image-processing.rst new file mode 100644 index 0000000..dba71d0 --- /dev/null +++ b/docs/advanced/image-processing.rst @@ -0,0 +1,19 @@ +Image processing +================ + +Images are processed by the `Python Imaging Library (Pillow) +`_ which supports a `wide +variety +`_ of +image formats. Processed images are stored in the :ref:`overview-object-store` +using the `webp `_ format. Images are +resampled using a `Lanczos filter +`_. + +Scaling +------- + +By default, images are scaled to fit within the bounds of ``4200x2000`` pixels +for display in the reader, and ``1680x800`` pixels for use as thumbnails. These +values are optimized for larger displays and may be :ref:`changed in the +configuration file ` if you do not require such high resolutions. diff --git a/docs/advanced/import-process.rst b/docs/advanced/import-process.rst new file mode 100644 index 0000000..b33a927 --- /dev/null +++ b/docs/advanced/import-process.rst @@ -0,0 +1,53 @@ +Import process +============== + +When importing a new archive, **hircine** will do the following: + +1. Calculate the hash of the archive its contents. See :doc:`/advanced/hashing`. +2. Process each image for display in the application. See :doc:`/advanced/image-processing`. +3. Collate all images in the archive in "natural" sort order. See `natsort + `_. +4. Add the images and archive to the database. + +Status display +-------------- + +For each new or updated archive, **hircine** will report its status on the +command line: + ++---------+--------------------------------------------------------------------+ +| Symbol | Meaning | ++=========+====================================================================+ +| ``[+]`` | This is a new archive. | ++---------+--------------------------------------------------------------------+ +| ``[*]`` | This archive was updated (i.e. its modified time has changed). | ++---------+--------------------------------------------------------------------+ +| ``[>]`` | This archive has been renamed. | ++---------+--------------------------------------------------------------------+ +| ``[I]`` | This archive was ignored as it is a duplicate. | ++---------+--------------------------------------------------------------------+ +| ``[!]`` | This archive conflicts with another archive. | ++---------+--------------------------------------------------------------------+ +| ``[?]`` | This archive is referenced in the database but could not be found. | ++---------+--------------------------------------------------------------------+ +| ``[~]`` | The images from this archive were reprocessed. | ++---------+--------------------------------------------------------------------+ + + + +Duplicates +---------- + +**hircine** will not add duplicate archives to its database. If two or more +archives have the same content (i.e. their hashes match), a warning will be +issued. + +Conflicts +--------- + +A conflict occurs when an archive hash in the database no longer matches the +hash of the archive file on disk. **hircine** will take no further action other +than printing an error message including the path of the archive and both +hashes; it is up to the user to reconcile conflicts. An easy (but destructive) +solution is to delete the affected archive in the web application and +reimport it. diff --git a/docs/advanced/index.rst b/docs/advanced/index.rst new file mode 100644 index 0000000..3300030 --- /dev/null +++ b/docs/advanced/index.rst @@ -0,0 +1,13 @@ +Advanced topics +=============== + +This section describes advanced topics that are not crucial for usage of +**hircine**, but may nevertheless be of interest. + +.. toctree:: + :maxdepth: 1 + + import-process + hashing + image-processing + api diff --git a/docs/changelog.rst b/docs/changelog.rst new file mode 100644 index 0000000..a072d67 --- /dev/null +++ b/docs/changelog.rst @@ -0,0 +1,7 @@ +Changelog +========= + +0.1.0 "Satanic Satyr" +--------------------- + +- Initial release. diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..53eac6b --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,31 @@ +import importlib.metadata + +_META = importlib.metadata.metadata("hircine") + +author = _META["Author"] +project = _META["Name"] +release = _META["Version"] +version = release +copyright = "2022-2024, Wolfgang Müller" + +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", +] +templates_path = ["_templates"] +exclude_patterns = [] + +html_theme = "furo" +html_static_path = ["_static"] +html_favicon = "_static/favicon.svg" +html_logo = "_static/logo.webp" +html_show_copyright = False +html_copy_source = False + +autodoc_typehints_format = "short" +autosectionlabel_prefix_document = True + +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "packaging": ("https://packaging.python.org/en/latest", None), +} diff --git a/docs/glossary.rst b/docs/glossary.rst new file mode 100644 index 0000000..303ed6e --- /dev/null +++ b/docs/glossary.rst @@ -0,0 +1,13 @@ +Glossary +======== + +.. glossary:: + :sorted: + + qualified tag + A specific pairing of a namespace and a tag. See :ref:`overview-tags` + for more. + + object store + The content-addressable filesystem for processed image files. See + :ref:`overview-object-store` for more. diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..ac62829 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,38 @@ +Intro +===== + +**hircine** [#f1]_ is a web-based comic organizer written in `Python +`_ and `SvelteKit `_. + +.. image:: /_images/comics.jpg + :align: center + :alt: An overview of comics + +It imports image files from *ZIP* archives, supports a wide range of metadata +including custom tags and namespaces, comes with a powerful filtering system, +and keeps its own object store of processed and deduplicated image files for +easy and fast access via the browser. This is done without ever modifying any +source archives so you can keep your collection in pristine condition. + +**hircine** contains a simple reader interface that supports per-comic page +layouts and includes an extensible scraper framework that allows easy importing +of metadata from local files or online sources. + +.. toctree:: + :maxdepth: 2 + + overview + setup + usage/index + advanced/index + plugins/index + glossary + changelog + about + git repository + +| + +.. rubric:: Footnotes + +.. [#f1] Oxford English Dictionary, 2nd Edition: (ˈhɜːsaɪn) [ad. L. hircīnus (hirquīnus) of a goat; having a goatish smell.] diff --git a/docs/overview.rst b/docs/overview.rst new file mode 100644 index 0000000..8eb327f --- /dev/null +++ b/docs/overview.rst @@ -0,0 +1,124 @@ +Overview +======== + +Before setting up **hircine** it is important to understand its choice of +technologies as well as its design goals and core concepts. This will allow you +to make an informed decision on whether or not it is the right solution for +you. + +.. _overview-technologies: + +Technologies +------------ + +**hircine** consists of two core parts: a `Python `_ +backend that exposes a `GraphQL `_ API and a `SvelteKit +`_ frontend written in `TypeScript +`_ that communicates with it. Data is stored in +an `SQLite `_ database. + +Image processing is done using `Pillow +`_. hircine uses the `BLAKE3 Python +bindings `_ to build hashes used for file +identification and deduplication. + +The web application is designed to be the canonical user interface, but any +program may freely use the :doc:`provided API `. + +.. _overview-goals: + +Design goals +------------ + +**hircine** is designed to organize a large personal collection of comics and +make it easily queryable. It provides a set of concepts and tools that allows +categorization and classification of comics and comes with a powerful filtering +system. + +Whilst **hircine** does have basic support for categories of metadata such as +artists or characters, it is mostly concerned with classifying the *content* of +a comic through user-defined namespaces and tags. The primary goal is to find +something you are in the mood to read, and not to provide a full archival +system where you keep track of the minute details of a comic's publication or +creation. + +As such, it is designed to tackle large and diverse collections of comics, +manga, or doujin where the story, art, and characters are the primary appeal. + +.. _overview-concepts: + +Core concepts +------------- + +Archives +^^^^^^^^ + +**hircine** reads image files from *ZIP* archives. Loose image files are +**not** supported. Usually an archive contains a single comic (or chapter), but +it may also contain a whole volume - once imported, an archive can be split +into multiple comics in the web application. + +Comics +^^^^^^ + +A comic is a logical grouping of pages (image files) that can be annotated with +metadata. Most of the functionality in the web application pertains to +organizing, querying, and reading comics. + +Comics are created by collating a sequence of pages from a single archive. This +sequence is exclusive, meaning that a page may only ever be allocated to a +single comic. Not all pages of an archive have to be used. + +Metadata +^^^^^^^^ + +A fair chunk of comic metadata is self-explanatory. For example, a comic may be +annotated with the date of its publication, the language it is written in, or +what kind of censorship is in use. There also exist a number of well-defined +categories of metadata that are managed by the user: + ++------------+-------------------------------------------------------------------------+ +| Category | Description | ++============+=========================================================================+ +| Artists | A person involved in the creating of a comic. | ++------------+-------------------------------------------------------------------------+ +| Circles | A group of people involved in the publishing or translation of a comic. | ++------------+-------------------------------------------------------------------------+ +| Characters | A fictional character portrayed in a comic. | ++------------+-------------------------------------------------------------------------+ +| Worlds | A fictional world portrayed in a comic. | ++------------+-------------------------------------------------------------------------+ + +.. _overview-tags: + +Namespaces & Tags +^^^^^^^^^^^^^^^^^ + +Alongside those well-defined categories, **hircine** supports user-defined +namespaces and tags. The latter is a familiar concept: a tag is a simple piece +of information that is attached to an object of interest. + +Namespaces enhance that concept by introducing a context in which the tag is +placed. Let's say we want to keep track of the gender of a story's love +interest. We may solve this using tags alone, like ``male love interest`` or +``female love interest``, but this is quite unwieldy. Namespaces instead allow +us to create a ``male`` and ``female`` namespace and a single tag ``love +interest`` which can then be combined with either namespace. + +**hircine** requires the use of namespaces when tagging comics. A tag cannot be +applied to a comic unless it is paired with a namespace. Such a pairing is +called a *qualified tag*. When filtering, either the namespace or tag is +optional: you may decide to exclude comics with any ``love interest``, or +filter for comics that only have tags in the ``female`` namespace. + +.. _overview-object-store: + +Object store +^^^^^^^^^^^^ + +**hircine** keeps all processed images files in a a `content-addressable +filesystem `_ called +the object store. The purpose of the store is twofold: Firstly, if multiple +archives contain the same image, it only needs to be stored once in the object +store. Secondly, the object store allows the application to used without having +to serve potentially large image files. diff --git a/docs/plugins/builtin.rst b/docs/plugins/builtin.rst new file mode 100644 index 0000000..61d531f --- /dev/null +++ b/docs/plugins/builtin.rst @@ -0,0 +1,16 @@ +Built-in plugins +================ + +**hircine** comes with a number of plugins already built-in. This page serves +as a quick reference for users and developers alike. + +.. _builtin-scrapers: + +Scrapers +-------- + +.. autoclass:: hircine.plugins.scrapers.gallery_dl.GalleryDLScraper() + +.. autoclass:: hircine.plugins.scrapers.ehentai_api.EHentaiAPIScraper() + +.. autoclass:: hircine.plugins.scrapers.anchira.AnchiraYamlScraper() diff --git a/docs/plugins/index.rst b/docs/plugins/index.rst new file mode 100644 index 0000000..2263fa2 --- /dev/null +++ b/docs/plugins/index.rst @@ -0,0 +1,17 @@ +Plugins +======= + +Plugins are `Python `_ programs that use **hircine**'s +plugin architecture to customize or enhance the behaviour of certain parts of +the application. + +There are two types of plugins. **Scrapers** read and report metadata from +arbitrary sources and **Transformers** may modify that metadata freely before +it is shown in the :ref:`scraper-interface`. As such, transformers cater to the +needs of a specific user (e.g. to ignore certain pieces of metadata). + +.. toctree:: + :maxdepth: 1 + + builtin + writing/index diff --git a/docs/plugins/writing/index.rst b/docs/plugins/writing/index.rst new file mode 100644 index 0000000..42afebd --- /dev/null +++ b/docs/plugins/writing/index.rst @@ -0,0 +1,20 @@ +Writing plugins +=============== + +Before writing plugins, please familiarize yourself with the basics of the +:ref:`Python programming language `. It is also +recommended to read the :doc:`packaging:overview`. **hircine** discovers +plugins via :ref:`package metadata `, so it is +also useful to have a basic understanding of the :ref:`packaging:entry-points`. + +The plugin examples on the following pages are a good place to start once you +are ready. You may also have a look at the `source code +`_ for +the built-in scrapers. + +.. toctree:: + :maxdepth: 1 + + scrapers + transformers + reference diff --git a/docs/plugins/writing/reference.rst b/docs/plugins/writing/reference.rst new file mode 100644 index 0000000..1be281e --- /dev/null +++ b/docs/plugins/writing/reference.rst @@ -0,0 +1,51 @@ +Plugin API Reference +==================== + +.. _scraped-data: + +Scraped Data +------------ + +.. automodule:: hircine.scraper.types + :members: + +API Data +-------- + +.. autoclass:: hircine.api.types.FullComic + :members: + :inherited-members: + :undoc-members: + :exclude-members: cover + +.. autoclass:: hircine.api.types.Archive + :members: + :undoc-members: + :exclude-members: cover + +Enums +----- + +.. autoclass:: hircine.enums.Category() + :members: + :undoc-members: + +.. autoclass:: hircine.enums.Censorship() + :members: + :undoc-members: + +.. autoclass:: hircine.enums.Direction() + :members: + :undoc-members: + +.. autoclass:: hircine.enums.Language() + :members: + :undoc-members: + +.. autoclass:: hircine.enums.Layout() + :members: + :undoc-members: + +.. autoclass:: hircine.enums.Rating() + :members: + :undoc-members: diff --git a/docs/plugins/writing/scrapers.rst b/docs/plugins/writing/scrapers.rst new file mode 100644 index 0000000..258d3a8 --- /dev/null +++ b/docs/plugins/writing/scrapers.rst @@ -0,0 +1,48 @@ +Scrapers +======== + +A scraper extends the abstract :class:`~hircine.scraper.Scraper` class and +implements its :meth:`~hircine.scraper.Scraper.scrape` method. The latter is a +generator function yielding :ref:`scraped-data`. + +.. autoclass:: hircine.scraper.Scraper + :members: + :special-members: __init__ + +Exceptions +---------- + +A scraper may raise two kinds of exceptions: + +.. autoexception:: hircine.scraper.ScrapeWarning + +.. autoexception:: hircine.scraper.ScrapeError + +Utility functions +----------------- + +.. automodule:: hircine.scraper.utils + :members: + +Registering a scraper +--------------------- + +To register your class as a scraper, place it into the ``hircine.scraper`` +:ref:`entry point group `. For example, put the +following in a ``pyproject.toml`` file: + +.. code-block:: toml + + [project.entry-points.'hircine.scraper'] + my_scraper = 'myscraper.MyScraper' + +Example +------- + +.. literalinclude:: /_examples/example_scraper.py + :language: python + +The scraper above will scrape a JSON file with the following structure: + +.. literalinclude:: /_examples/example_scraper.json + :language: json diff --git a/docs/plugins/writing/transformers.rst b/docs/plugins/writing/transformers.rst new file mode 100644 index 0000000..045058d --- /dev/null +++ b/docs/plugins/writing/transformers.rst @@ -0,0 +1,31 @@ +Transformers +============ + +**hircine** supports modification of scraper results by the use of +transformers. Transformers are functions that hook into the scraping process +and may freely modify any :ref:`scraped-data` before it is shown to the user. + +A transformer is specified by decorating a generator function with the +:func:`~hircine.plugins.transformer` decorator. + +.. autodecorator:: hircine.plugins.transformer + +.. autoclass:: hircine.scraper.ScraperInfo + +Registering transformers +------------------------ + +To register transformers, place them into a module in the +``hircine.transformer`` :ref:`entry point group `. For +example, put the following in a ``pyproject.toml`` file: + +.. code-block:: toml + + [project.entry-points.'hircine.transformer'] + my_transformers = 'mytransformers.transformers' + +Example +------- + +.. literalinclude:: /_examples/example_transformer.py + :language: python diff --git a/docs/setup.rst b/docs/setup.rst new file mode 100644 index 0000000..ad2123e --- /dev/null +++ b/docs/setup.rst @@ -0,0 +1,113 @@ +Setup +===== + +Requirements +------------ + +- `Python 3.12 `_ or newer. It is likely that your + system already comes with this. Otherwise, refer to the `Python Beginners + Guide `_. + +- A modern browser. **hircine** is built to target `ES2022 + `_ and should run on all common + browsers at the time of writing. See `this support table + `_ + for a detailed breakdown. The web interface was successfully tested on the + following systems and browsers: + + - Linux 6.7.6: Firefox 123.0 + - Windows 10 Pro 22H2: Edge 122.0.2365.59, Firefox 123.0 + - Windows 11 Pro 23H2: Edge 122.0.2365.59, Firefox 123.0 + +**hircine** is designed to be hosted on Linux systems but may be set up for +Windows as well. Keep in mind that some utilities (e.g. ``gunicorn``) are not +available for Windows. + +Installation +------------ + +**hircine** should be installed in a :ref:`virtual environment +`: + +.. code-block:: console + + $ python -m venv + $ source /bin/activate + +.. note:: + + ``VENVDIR`` should only ever contain program files and should not be the + directory you choose for the database in the next step. + + For example, ``~/.local/share/hircine`` is a sensible setting for + ``VENVDIR``. + +Once the environment is set up, download the `latest wheel +`_ and +install it using `pip `_: + +.. code-block:: console + + (.venv) $ python -m pip install + +Now the ``hircine`` command is available from within your shell: + +.. code-block:: console + + (.venv) $ hircine version + hircine 0.1.0 "Satanic Satyr" + +.. important:: + + Outside of this document it is assumed that the virtual environment is + activated and that the ``hircine`` command is present. + +Initializing the database +------------------------- + +Next, navigate to where you want to store the database and initialize it: + +.. code-block:: console + + (.venv) $ cd + (.venv) $ hircine init + +This will create the following structure: + ++------------+------------------------------------------------------------------------+ +| Item | Description | ++============+========================================================================+ +| hircine.db | the SQLite database | ++------------+------------------------------------------------------------------------+ +| content/ | the directory containing your archives (may be nested arbitrarily) | ++------------+------------------------------------------------------------------------+ +| objects/ | the :term:`object store` for processed images | ++------------+------------------------------------------------------------------------+ +| backups/ | backups of the SQLite database | ++------------+------------------------------------------------------------------------+ + +.. tip:: + + By default, the command-line interface and the web application will always + look for the database in the current directory. Whilst this behaviour cannot + be changed when launching the web application, you may direct the + command-line program to a different directory using ``-C ``. + + If ``-C `` is given on the command line, it must appear before any + sub-command (``import``, etc.) + +Starting the web application +---------------------------- + +To serve the web application, you need a compatible ASGI server. We recommend +`gunicorn `_. The endpoint for the web application is the +``app()`` factory in ``hircine.app``: + +.. code-block:: console + + (.venv) $ python -m pip install gunicorn + (.venv) $ gunicorn -k uvicorn.workers.UvicornWorker --bind localhost:8000 "hircine.app:app()" + +Now you can point your browser to http://localhost:8000 to open the web +application. To stop it, simply terminate ``gunicorn`` or the ASGI server of +your choice. diff --git a/docs/usage/admin.rst b/docs/usage/admin.rst new file mode 100644 index 0000000..5fe2e90 --- /dev/null +++ b/docs/usage/admin.rst @@ -0,0 +1,58 @@ +Administrative tasks +==================== + +Administrative tasks are handled by the command-line interface. To get a quick +overview of available commands, run: + +.. code-block:: console + + $ hircine -h + +Updating the application +------------------------ + +To update **hircine**, download the newest wheel and install it in the virtual +environment: + +.. code-block:: console + + $ source /bin/activate + (.venv) $ python -m pip install + +Running database migrations +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +After the update it might be required to bring the database up-to-date. To do +this, run: + +.. code-block:: console + + $ hircine migrate + +A backup will be created automatically. + +Backing up the database +----------------------- + +To save a current backup of the database into the ``backup/`` directory, run: + +.. code-block:: console + + $ hircine backup + +To restore a previously saved backup, stop **hircine** and simply replace the +``hircine.db`` file with the backup. + +Optimizing the database +----------------------- + +To optimize the SQLite database, run: + +.. code-block:: console + + $ hircine vacuum + +This is generally only recommended after deleting a large amount of data. Refer +to the `SQLite documentation +`_ for details on this +process. diff --git a/docs/usage/configuration.rst b/docs/usage/configuration.rst new file mode 100644 index 0000000..e0f3669 --- /dev/null +++ b/docs/usage/configuration.rst @@ -0,0 +1,41 @@ +Configuration +============= + +**hircine** looks for an optional configuration file named ``hircine.ini`` in +its regular directory structure. Refer to Python's :mod:`configparser` module +for details on its format. + +Sections +-------- + +.. _cfg-scale: + +import.scale +^^^^^^^^^^^^ + +This section is split into two subsections, ``full`` and ``thumb``. The former +controls scaling for images displayed in the reader and the latter controls +scaling for thumbnails. See :doc:`/advanced/image-processing`. + +The ``width`` and ``height`` settings in each subsection control the maximum +pixel dimensions for the processed image. They are given as integers and must +be greater than zero. The defaults are as follows: + +.. code-block:: ini + + [import.scale.full] + width = 4200 + height = 2000 + + [import.scale.thumb] + width = 1680 + height = 800 + +.. important:: + + Changes to these settings will only apply to newly processed image files. If + you want to reprocess your entire collection, run: + + .. code-block:: console + + $ hircine import -r diff --git a/docs/usage/filtering.rst b/docs/usage/filtering.rst new file mode 100644 index 0000000..05066fb --- /dev/null +++ b/docs/usage/filtering.rst @@ -0,0 +1,45 @@ +Filtering +========= + +A filter is a combination of predicates that have to be satisfied. **hircine** +provides a filter interface that allows using almost any type of metadata as a +predicate and can therefore build complex and expressive queries. + +.. image:: /_images/filtering.jpg + :align: center + :alt: Filtering comics + +If given multiple types of predicates, all of them have to match. If one type +of predicate contains multiple selections, the selected mode determines how +they are combined: + ++----------------------+--------------------------------------------------+ +| Mode | Behaviour | ++======================+==================================================+ +| ∀ ("for all") | matches if all given entities match | ++----------------------+--------------------------------------------------+ +| ∃ ("there exists") | matches if any of the given entities match | ++----------------------+--------------------------------------------------+ +| = ("exactly") | matches if entities are present exactly as given | ++----------------------+--------------------------------------------------+ + +For example, in the picture above, a comic only matches if *all* of the +following is true: + +- It is tagged as ``female:idol`` or ``:tights``. +- It is tagged with the artist ``40hara``. +- It is rated as *Questionable*. +- It is uncensored. +- It is in any language except Japanese. + +Matching empty sets +------------------- + +Each type of predicate may also match on the ∅ empty set. If enabled, the +predicate only matches if the object does not contain any entities of that +type. + +.. important:: + + If matching on the empty set, make sure there are no selections present in + the corresponding dropdown menu, as otherwise the filter will never match. diff --git a/docs/usage/getting-started.rst b/docs/usage/getting-started.rst new file mode 100644 index 0000000..60a167e --- /dev/null +++ b/docs/usage/getting-started.rst @@ -0,0 +1,121 @@ +Getting started +=============== + + +Importing archives +------------------ + +Place your archives in the ``content/`` directory and import them using the +command-line interface: + +.. code-block:: console + + $ hircine import + +As **hircine** can identify an archive by its contents, subsequent import jobs +won't import the same archive again. Archives may also be renamed or moved +freely within the ``content/`` directory; the next import job will recognize +these changes automatically. Symbolic links will **not** be followed. + +.. note:: + + For a more technical breakdown see :doc:`/advanced/import-process`. + +Adding comics +------------- + +Once the import job has finished, navigate to the archive tab in the web +application and hit the refresh button [#f1]_ to load the newly added archives. +Next, navigate to the archive that contains the comic you want to add. You'll +be presented with an tabbed pane on the left and all the pages in the archive +on the right. + +.. image:: /_images/archive.jpg + :align: center + :alt: The archive view + +By default, the pane shows the *Details* tab. Here you can see basic +information on the archive and, once added, a list of comics from this archive. +Clicking on a page will open the reader interface. + +.. note:: + + Once a comic has been added, you may specify its reading direction and page + layout. However, in archive view, the reader always defaults to + left-to-right single-page. For more information on the reader interface, see + :doc:`/usage/reading`. + +To add a new comic, navigate to the *Edit* tab and click the button to enter +selection mode. Now, instead of opening the reader interface, clicking on a +page adds it to the current selection. + +.. tip:: + + When in selection mode you may use Ctrl+Click to access the reader interface + instead. + +Select all pages that you want to add to a new comic and click the *Add* button +that has appeared. The newly added comic will appear below and the selection +mode exits. You may now add further comics by the same process, or add pages to +an already existing comic by clicking on the *Add* button that appears over +each comic. + +.. tip:: + + Once all relevant pages from an archive have been allocated, you may mark + the archive as "organized". This will automatically happen once the last + page has been added to a comic. + +Next, click on the newly added comic. + +Editing comics +-------------- + +Comics use the same layout as archives - a tabbed pane on the left and comic +pages on the right. Just like before, clicking a page will open the reader. +Navigate to the *Edit* tab to start annotating the comic with metadata. + +.. image:: /_images/comic-edit.jpg + :align: center + :alt: Editing a comic + +The top section of the edit form controls basic information about a comic, +whilst the bottom section contains dropdown fields for the user-managed +metadata categories. The picture above shows two categories that already have a +selection, but for you these lists will be empty. + +Adding metadata +--------------- + +To add new metadata entities, navigate to the respective tab in the web +application and hit the *Add* button in the top right corner. Alternatively, +new entities may be added *at any time* using :ref:`shortcuts +`. + +Let's add a new character. Hit ``nh`` on your keyboard, type in the character's +name in the modal that appears and confirm by clicking on *Save* or hitting the +``Enter`` key. The character is now available in the *Characters* dropdown. +Select it there and save your changes - the comic is now tagged with this +character. + +Removing comic pages +-------------------- + +Comic pages may be removed in the *Edit* tab by entering selection mode, +selecting the pages that should be removed, and clicking the *Remove selected +pages* button or hitting ``Delete``. Removed pages will be available again for +allocation in the archive. + +Setting the cover +----------------- + +The cover of a comic or an archive may be set at any time outside of selection +mode by control clicking a page. + +| + +.. rubric:: Footnotes + +.. [#f1] This is the only time you need to refresh something manually in the + web application. Care has been taken that all other elements update + automatically. diff --git a/docs/usage/index.rst b/docs/usage/index.rst new file mode 100644 index 0000000..a685ad8 --- /dev/null +++ b/docs/usage/index.rst @@ -0,0 +1,18 @@ +Usage +===== + +**hircine** is mainly controlled via the web interface. The command-line +interface handles various administrative tasks and, crucially, is used to +import your archives into the application. + +.. toctree:: + :maxdepth: 2 + + getting-started + namespaces + reading + filtering + scraping + shortcuts + admin + configuration diff --git a/docs/usage/namespaces.rst b/docs/usage/namespaces.rst new file mode 100644 index 0000000..4eacb14 --- /dev/null +++ b/docs/usage/namespaces.rst @@ -0,0 +1,43 @@ +Namespaces & Tags +================= + +As :ref:`mentioned earlier `, the use of namespaces is required +when tagging comics. The user must also choose which namespaces a tag is +applicable to. That means that in order for the :term:`qualified tag` +``female:love interest`` to appear as a valid selection, the following must be +true: + +1. The namespace ``female`` must exist. +2. The tag ``love interest`` must exist. +3. The tag ``love interest`` must specify ``female`` as a valid namespace. + +.. note:: + + Qualified tags that are subsequently rendered invalid will **not** + automatically be removed from comics. Whilst the qualified tag can no longer + be selected in the editing or filtering interface, it can still be removed + from the comic manually. + +Namespace sorting +----------------- + +Namespaces may be configured with a "sort name". This name will be used when +sorting lists of namespaces or qualified tags. If no such name is given, the +namespace name is used for sorting instead. + +Tag descriptions +---------------- + +Tags may be annotated with a custom description that further explains how the +tag should be used. This description will be displayed as a tooltip when +hovering over a qualified tag. + +Qualified tag display +--------------------- + +When selecting from a list, qualified tags are displayed by combining the +namespace and tag with a colon, like ``female:love interest``. In all other +contexts, qualified tags are rendered as small pills that contain the tag name +only. A small number of namespaces have special handling, however: *female*, +*male*, *trans*, *mixed*, and *location*. These are displayed with a specific +colour and icon. diff --git a/docs/usage/reading.rst b/docs/usage/reading.rst new file mode 100644 index 0000000..f48e4c6 --- /dev/null +++ b/docs/usage/reading.rst @@ -0,0 +1,50 @@ +Reading +======= + +The reader interface may be accessed by clicking on any comic page. The reader +overlay will then open with the selected page in view. There are two metadata +settings that affect the reader, **Direction** and **Layout**. The former +adjusts reading direction and the latter determines how many pages are rendered +at once. + +Navigation +---------- + +The reader is controlled with the mouse or the keyboard. When displaying a +single page, clicking on the left side of the image will advance left, whilst +clicking on the right side of the image will advance right. Similarly, when +displaying two pages, clicking on the left image will advance left while +clicking on the right image will advance right. + +Additionally, the following keyboard shortcuts are available: + ++-------------+----------------------------------------------------+ +| Key | Action | ++=============+====================================================+ +| Left Arrow | Advance left. | ++-------------+----------------------------------------------------+ +| Right Arrow | Advance right. | ++-------------+----------------------------------------------------+ +| Down Arrow | Advance to the next page in reading direction. | ++-------------+ | +| Page Down | | ++-------------+ | +| Space | | ++-------------+----------------------------------------------------+ +| Up Arrow | Advance to the previous page in reading direction. | ++-------------+ | +| Page Up | | ++-------------+ | +| Backspace | | ++-------------+----------------------------------------------------+ + +Editing when reading +-------------------- + +A comic may be edited in the reader interface by opening the edit menu in the +top left corner or hitting ``z``. Changes to **Direction** and **Layout** will +be visible in the reader right away, but will only persist if saved. This makes +it easy to preview changes. + +Pending changes will not be lost when closing the reader interface - they can +still be modified and saved in the normal *Edit* tab. diff --git a/docs/usage/scraping.rst b/docs/usage/scraping.rst new file mode 100644 index 0000000..37bae98 --- /dev/null +++ b/docs/usage/scraping.rst @@ -0,0 +1,90 @@ +Scraping +======== + +**hircine** comes with a generic scraper interface that allows scraping comic +metadata from virtually any source. A number of scrapers for common file +formats and websites are :ref:`included ` in the base +installation. Refer to :doc:`/plugins/index` if you want to write your own. + + +Scraper sources +--------------- + +Usually, a scraper will access a location on the web or a local file on your +disk. The former may be an online API, whilst the latter may be a `JSON +`_ file like `gallery-dl +`_'s ``info.json``. + +For local files, two locations are considered. The comic's archive may contain +this file, or it may be stored as sidecar file alongside the archive in the +``content/`` directory. + +.. _sidecar-files: + +Archive & sidecar files +^^^^^^^^^^^^^^^^^^^^^^^ + +Sidecar files need to be prefixed with the full name of the archive. For +example, if a scraper accesses a file named ``info.json`` for an archive +``Hoshiiro GirlDrop Comic Anthology.zip``, the following locations will be +considered: + ++----------+-------------------------------------------------------------+ +| Location | Name | ++==========+=============================================================+ +| Archive | ``info.json`` | ++----------+-------------------------------------------------------------+ +| Sidecar | ``content/Hoshiiro GirlDrop Comic Anthology.zip.info.json`` | ++----------+-------------------------------------------------------------+ + +.. note:: + + If a file exists in both locations, the sidecar file is preferred. + +.. _scraper-interface: + +Scraper interface +----------------- + +If a comic has scrapers available, they will be shown in the *Scrape* tab. +Selecting the desired scraper and clicking on the *Scrape* button will start +the scraping process. + +.. image:: /_images/scraper.jpg + :align: center + :alt: Scraping a comic. + +Once the scraper has returned results, they are shown in the pane below. Only +results that differ from existing comic metadata will be displayed. + +Metadata that should not be kept may be deselected. For groups with a larger +set of entries, the selection may be inverted to quickly deselect the whole +group, or to only select a few entries. Pressing the *Merge* button will update +the comic with the selected metadata. + +Options +^^^^^^^ + +By default, **hircine** does not automatically create missing metadata entries. +This can be controlled using the *Create missing items* option. + +.. note:: + + Scrapers always return :term:`qualified tags ` (the namespace + is set to ``none`` if it could not be determined). When requested to create + a missing qualified tag, the namespace and tag will be created (if needed), + and the tag will be marked as applicable to the namespace. + + A qualified tag is considered to be missing if any of the following apply: + + 1. The namespace does not exist. + 2. The tag does not exist. + 3. The tag is not applicable to the namespace. + + +Modifying scraper results +------------------------- + +**hircine** allows modifying results that are returned by a scraper without +having to change the scraper logic. Refer to the documentation on +:doc:`/plugins/index` for more. diff --git a/docs/usage/shortcuts.rst b/docs/usage/shortcuts.rst new file mode 100644 index 0000000..b3f88bb --- /dev/null +++ b/docs/usage/shortcuts.rst @@ -0,0 +1,114 @@ +Shortcuts +========= + +**hircine** supports a number of shortcuts that are meant to streamline a few +common actions. Shortcuts may be made up of multiple keys. In that case, type +them in order. + +.. _shortcut-navigation: + +Navigation +---------- + +.. list-table:: + :align: left + :header-rows: 1 + + * - Shortcut + - Navigates to + * - ``go`` + - Home + * - ``gc`` + - Comics + * - ``gn`` + - Namespaces + * - ``gt`` + - Tags + * - ``gh`` + - Characters + * - ``gw`` + - Worlds + * - ``ga`` + - Artists + * - ``gi`` + - Circles + * - ``gz`` + - Archives + * - ``?`` + - Help + +.. _shortcut-metadata: + +Adding metadata +--------------- + +.. list-table:: + :align: left + :header-rows: 1 + + * - Shortcut + - Action + * - ``na`` + - Add a new artist. + * - ``ni`` + - Add a new circle. + * - ``nh`` + - Add a new character. + * - ``nw`` + - Add a new world. + * - ``nn`` + - Add a new namespace. + * - ``nt`` + - Add a new tag. + +.. _shortcut-reader: + +Reader +------ + +.. list-table:: + :align: left + :header-rows: 1 + + * - Shortcut + - Action + * - ``z`` + - Open edit menu. + * - ``Escape`` + - Close reader. + +.. _shortcut-filtering: + +Filtering +--------- + +.. list-table:: + :align: left + :header-rows: 1 + + * - Shortcut + - Action + * - ``F`` + - Focus search. + * - ``f`` + - Toggle favourites. + * - ``b`` + - Toggle bookmarked. + * - ``o`` + - Toggle organized. + +.. _shortcut-misc: + +Miscellaneous +------------- + +.. list-table:: + :align: left + :header-rows: 1 + + * - Shortcut + - Action + * - ``s`` + - Toggle selection mode if available. + * - ``Delete`` + - Delete selected items. -- cgit v1.2.3-2-gb3c3