diff options
author | Wolfgang Müller | 2024-11-14 22:05:05 +0100 |
---|---|---|
committer | Wolfgang Müller | 2024-11-14 22:05:05 +0100 |
commit | 47d464fbfc1dd4174c4f0ab39268297c14b972a3 (patch) | |
tree | 7e302077861d05741df0e9d4f327f93e8f2632b9 | |
parent | e3b667cfac8344e0582740774024cf03ece1445b (diff) | |
download | hircine-47d464fbfc1dd4174c4f0ab39268297c14b972a3.tar.gz |
backend/scraper: Have collect() ignore None results
If a parser function returned None we yield it regardless, even though
it won't have any impact further down the line. Instead clean up the
collect() stream as early as possible.
Diffstat (limited to '')
-rw-r--r-- | src/hircine/scraper/__init__.py | 9 | ||||
-rw-r--r-- | tests/scrapers/test_scraper.py | 15 |
2 files changed, 21 insertions, 3 deletions
diff --git a/src/hircine/scraper/__init__.py b/src/hircine/scraper/__init__.py index bc83436..4606f4a 100644 --- a/src/hircine/scraper/__init__.py +++ b/src/hircine/scraper/__init__.py @@ -86,13 +86,16 @@ class Scraper(ABC): transformers = [] def generator(): - for result in self.scrape(): - if callable(result): + for item in self.scrape(): + if callable(item): try: - yield result() + result = item() except ScrapeWarning as e: self.log_warning(e) else: + result = item + + if result is not None: yield result gen = generator() diff --git a/tests/scrapers/test_scraper.py b/tests/scrapers/test_scraper.py index 6f6f29d..8492425 100644 --- a/tests/scrapers/test_scraper.py +++ b/tests/scrapers/test_scraper.py @@ -9,6 +9,14 @@ class MockScraper(Scraper): yield "bar" +class NoneScraper(Scraper): + is_available = True + + def scrape(self): + yield lambda: "foo" + yield None + + class WarningScraper(Scraper): is_available = True @@ -53,3 +61,10 @@ def test_scraper_collects_literal(): generator = scraper.collect() assert set(generator) == set(["literal"]) + + +def test_scraper_collect_ignores_none(): + scraper = NoneScraper(None) + generator = scraper.collect() + + assert set(generator) == set(["foo"]) |