diff options
author | Wolfgang Müller | 2024-11-14 22:05:05 +0100 |
---|---|---|
committer | Wolfgang Müller | 2024-11-14 22:05:05 +0100 |
commit | 47d464fbfc1dd4174c4f0ab39268297c14b972a3 (patch) | |
tree | 7e302077861d05741df0e9d4f327f93e8f2632b9 | |
parent | e3b667cfac8344e0582740774024cf03ece1445b (diff) | |
download | hircine-47d464fbfc1dd4174c4f0ab39268297c14b972a3.tar.gz |
backend/scraper: Have collect() ignore None results
If a parser function returned None we yield it regardless, even though
it won't have any impact further down the line. Instead clean up the
collect() stream as early as possible.
-rw-r--r-- | src/hircine/scraper/__init__.py | 9 | ||||
-rw-r--r-- | tests/scrapers/test_scraper.py | 15 |
2 files changed, 21 insertions, 3 deletions
diff --git a/src/hircine/scraper/__init__.py b/src/hircine/scraper/__init__.py index bc83436..4606f4a 100644 --- a/src/hircine/scraper/__init__.py +++ b/src/hircine/scraper/__init__.py @@ -86,13 +86,16 @@ class Scraper(ABC): transformers = [] def generator(): - for result in self.scrape(): - if callable(result): + for item in self.scrape(): + if callable(item): try: - yield result() + result = item() except ScrapeWarning as e: self.log_warning(e) else: + result = item + + if result is not None: yield result gen = generator() diff --git a/tests/scrapers/test_scraper.py b/tests/scrapers/test_scraper.py index 6f6f29d..8492425 100644 --- a/tests/scrapers/test_scraper.py +++ b/tests/scrapers/test_scraper.py @@ -9,6 +9,14 @@ class MockScraper(Scraper): yield "bar" +class NoneScraper(Scraper): + is_available = True + + def scrape(self): + yield lambda: "foo" + yield None + + class WarningScraper(Scraper): is_available = True @@ -53,3 +61,10 @@ def test_scraper_collects_literal(): generator = scraper.collect() assert set(generator) == set(["literal"]) + + +def test_scraper_collect_ignores_none(): + scraper = NoneScraper(None) + generator = scraper.collect() + + assert set(generator) == set(["foo"]) |