Stop paying attention to the robots.txt for feeds

2024-07-29 10:01:11 -07:00 · 2024-07-29 10:01:11 -07:00 · 65e4b3f1f7
commit 65e4b3f1f7
parent e83e5c9602
1 changed files with 14 additions and 2 deletions
--- a/cry/feed.py
+++ b/cry/feed.py
@ -354,8 +354,20 @@ class Guardian:

    async def can_fetch(self, url: str) -> bool:
        """Returns true if we are allowed to fetch the given URL."""
-        parser = await self.get_robots_parser(url)
-        return parser.can_fetch(USER_AGENT, url)
+        # Look, opinions differ on whether feed readers are supposed to be
+        # considered robots. I added robots.txt support for feeds based on
+        # the example of the feed finder python code but on reflection it
+        # does not do what I want it to do and the world seems to suggest
+        # that RSS readers should ignore it. (i.e., jwz blocks robots from
+        # accessing the RSS feed, so.)
+        #
+        # I'm leaving this code here so that I can resurrect it later if
+        # necessary.
+        #
+        # parser = await self.get_robots_parser(url)
+        # return parser.can_fetch(USER_AGENT, url)
+        del url
+        return True

    async def crawl_delay(self, url: str) -> int | None:
        """Returns the number of seconds we should wait before fetching again."""