From 51049b4d3039906bdbfe6279dd54e8bfc20e31ab Mon Sep 17 00:00:00 2001 From: John Doty Date: Fri, 13 Sep 2024 10:30:03 -0700 Subject: [PATCH] Add "feed" as something we look for in an url Helps us find cat and girl's feed automatically. Also just try "/feed" maybe it works? --- cry/feed.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/cry/feed.py b/cry/feed.py index 0bbceea..e2883d8 100644 --- a/cry/feed.py +++ b/cry/feed.py @@ -664,14 +664,14 @@ def classify_links(links, baseuri) -> typing.Tuple[list[str], list[str]]: """ baseuri = baseuri.lower() - local, remote = [], [] + local, remote = set(), set() for link in links: if link.lower().startswith(baseuri): - local.append(link) + local.add(link) else: - remote.append(link) + remote.add(link) - return local, remote + return list(local), list(remote) def is_feed_link(link: str) -> bool: @@ -687,18 +687,27 @@ def is_feed_link(link: str) -> bool: def is_XML_related_link(link: str) -> bool: link = link.lower() - return "rss" in link or "rdf" in link or "xml" in link or "atom" in link + return ( + "rss" in link + or "rdf" in link + or "xml" in link + or "atom" in link + or "feed" in link + ) async def check_feed(url: str) -> Feed | None: """Check to see if the given URL is a feed. If it is, return the feed, otherwise return None. """ + LOG.debug(f"Checking {url}: checking...") meta = FeedMeta.from_url(url) result, meta = await fetch_feed(meta) if isinstance(result, Feed): + LOG.debug(f"Checking {url}: is a feed") return result + LOG.debug(f"Checking {url}: is not a feed") return None @@ -738,6 +747,11 @@ async def feed_search(uri: str) -> list[Feed]: LOG.debug("No links, checking A tags...") local_links, remote_links = classify_links(parser.a_links, meta.url) + for link in local_links: + LOG.debug(f" LOCAL {link}") + for link in remote_links: + LOG.debug(f" REMOTE {link}") + outfeeds = await check_links(filter(is_feed_link, local_links)) if len(outfeeds) > 0: return outfeeds @@ -759,6 +773,7 @@ async def feed_search(uri: str) -> list[Feed]: "rss.xml", # Dave Winer/Manila "index.xml", # MT "index.rss", # Slash + "feed", # catandgirl.com and sometimes others. ] outfeeds = await check_links([urllib.parse.urljoin(meta.url, x) for x in suffixes]) return outfeeds