summaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorWolfgang Müller2024-11-14 22:05:05 +0100
committerWolfgang Müller2024-11-14 22:05:05 +0100
commit47d464fbfc1dd4174c4f0ab39268297c14b972a3 (patch)
tree7e302077861d05741df0e9d4f327f93e8f2632b9
parente3b667cfac8344e0582740774024cf03ece1445b (diff)
downloadhircine-47d464fbfc1dd4174c4f0ab39268297c14b972a3.tar.gz
backend/scraper: Have collect() ignore None results
If a parser function returned None we yield it regardless, even though it won't have any impact further down the line. Instead clean up the collect() stream as early as possible.
Diffstat (limited to '')
-rw-r--r--src/hircine/scraper/__init__.py9
-rw-r--r--tests/scrapers/test_scraper.py15
2 files changed, 21 insertions, 3 deletions
diff --git a/src/hircine/scraper/__init__.py b/src/hircine/scraper/__init__.py
index bc83436..4606f4a 100644
--- a/src/hircine/scraper/__init__.py
+++ b/src/hircine/scraper/__init__.py
@@ -86,13 +86,16 @@ class Scraper(ABC):
transformers = []
def generator():
- for result in self.scrape():
- if callable(result):
+ for item in self.scrape():
+ if callable(item):
try:
- yield result()
+ result = item()
except ScrapeWarning as e:
self.log_warning(e)
else:
+ result = item
+
+ if result is not None:
yield result
gen = generator()
diff --git a/tests/scrapers/test_scraper.py b/tests/scrapers/test_scraper.py
index 6f6f29d..8492425 100644
--- a/tests/scrapers/test_scraper.py
+++ b/tests/scrapers/test_scraper.py
@@ -9,6 +9,14 @@ class MockScraper(Scraper):
yield "bar"
+class NoneScraper(Scraper):
+ is_available = True
+
+ def scrape(self):
+ yield lambda: "foo"
+ yield None
+
+
class WarningScraper(Scraper):
is_available = True
@@ -53,3 +61,10 @@ def test_scraper_collects_literal():
generator = scraper.collect()
assert set(generator) == set(["literal"])
+
+
+def test_scraper_collect_ignores_none():
+ scraper = NoneScraper(None)
+ generator = scraper.collect()
+
+ assert set(generator) == set(["foo"])