Stop paying attention to the robots.txt for feeds

2024-07-29 10:01:11 -07:00 · 2024-07-29 10:01:11 -07:00 · 65e4b3f1f7
commit 65e4b3f1f7
parent e83e5c9602
1 changed files with 14 additions and 2 deletions
--- a/cry/feed.py
+++ b/cry/feed.py
@ -354,8 +354,20 @@ class Guardian:
    async def can_fetch(self, url: str) -> bool:
        """Returns true if we are allowed to fetch the given URL."""
-        parser = await self.get_robots_parser(url)
+        # Look, opinions differ on whether feed readers are supposed to be
-        return parser.can_fetch(USER_AGENT, url)
+        # considered robots. I added robots.txt support for feeds based on
        # the example of the feed finder python code but on reflection it
        # does not do what I want it to do and the world seems to suggest
        # that RSS readers should ignore it. (i.e., jwz blocks robots from
        # accessing the RSS feed, so.)
        #
        # I'm leaving this code here so that I can resurrect it later if
        # necessary.
        #
        # parser = await self.get_robots_parser(url)
        # return parser.can_fetch(USER_AGENT, url)
        del url
        return True
    async def crawl_delay(self, url: str) -> int | None:
        """Returns the number of seconds we should wait before fetching again."""