summaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorWolfgang Müller2024-11-14 22:05:05 +0100
committerWolfgang Müller2024-11-14 22:05:05 +0100
commit47d464fbfc1dd4174c4f0ab39268297c14b972a3 (patch)
tree7e302077861d05741df0e9d4f327f93e8f2632b9
parente3b667cfac8344e0582740774024cf03ece1445b (diff)
downloadhircine-47d464fbfc1dd4174c4f0ab39268297c14b972a3.tar.gz
backend/scraper: Have collect() ignore None results
If a parser function returned None we yield it regardless, even though it won't have any impact further down the line. Instead clean up the collect() stream as early as possible.
-rw-r--r--src/hircine/scraper/__init__.py9
-rw-r--r--tests/scrapers/test_scraper.py15
2 files changed, 21 insertions, 3 deletions
diff --git a/src/hircine/scraper/__init__.py b/src/hircine/scraper/__init__.py
index bc83436..4606f4a 100644
--- a/src/hircine/scraper/__init__.py
+++ b/src/hircine/scraper/__init__.py
@@ -86,13 +86,16 @@ class Scraper(ABC):
transformers = []
def generator():
- for result in self.scrape():
- if callable(result):
+ for item in self.scrape():
+ if callable(item):
try:
- yield result()
+ result = item()
except ScrapeWarning as e:
self.log_warning(e)
else:
+ result = item
+
+ if result is not None:
yield result
gen = generator()
diff --git a/tests/scrapers/test_scraper.py b/tests/scrapers/test_scraper.py
index 6f6f29d..8492425 100644
--- a/tests/scrapers/test_scraper.py
+++ b/tests/scrapers/test_scraper.py
@@ -9,6 +9,14 @@ class MockScraper(Scraper):
yield "bar"
+class NoneScraper(Scraper):
+ is_available = True
+
+ def scrape(self):
+ yield lambda: "foo"
+ yield None
+
+
class WarningScraper(Scraper):
is_available = True
@@ -53,3 +61,10 @@ def test_scraper_collects_literal():
generator = scraper.collect()
assert set(generator) == set(["literal"])
+
+
+def test_scraper_collect_ignores_none():
+ scraper = NoneScraper(None)
+ generator = scraper.collect()
+
+ assert set(generator) == set(["foo"])