From 47d464fbfc1dd4174c4f0ab39268297c14b972a3 Mon Sep 17 00:00:00 2001 From: Wolfgang Müller Date: Thu, 14 Nov 2024 22:05:05 +0100 Subject: backend/scraper: Have collect() ignore None results If a parser function returned None we yield it regardless, even though it won't have any impact further down the line. Instead clean up the collect() stream as early as possible. --- src/hircine/scraper/__init__.py | 9 ++++++--- tests/scrapers/test_scraper.py | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/hircine/scraper/__init__.py b/src/hircine/scraper/__init__.py index bc83436..4606f4a 100644 --- a/src/hircine/scraper/__init__.py +++ b/src/hircine/scraper/__init__.py @@ -86,13 +86,16 @@ class Scraper(ABC): transformers = [] def generator(): - for result in self.scrape(): - if callable(result): + for item in self.scrape(): + if callable(item): try: - yield result() + result = item() except ScrapeWarning as e: self.log_warning(e) else: + result = item + + if result is not None: yield result gen = generator() diff --git a/tests/scrapers/test_scraper.py b/tests/scrapers/test_scraper.py index 6f6f29d..8492425 100644 --- a/tests/scrapers/test_scraper.py +++ b/tests/scrapers/test_scraper.py @@ -9,6 +9,14 @@ class MockScraper(Scraper): yield "bar" +class NoneScraper(Scraper): + is_available = True + + def scrape(self): + yield lambda: "foo" + yield None + + class WarningScraper(Scraper): is_available = True @@ -53,3 +61,10 @@ def test_scraper_collects_literal(): generator = scraper.collect() assert set(generator) == set(["literal"]) + + +def test_scraper_collect_ignores_none(): + scraper = NoneScraper(None) + generator = scraper.collect() + + assert set(generator) == set(["foo"]) -- cgit v1.2.3-2-gb3c3