Add "feed" as something we look for in an url

Helps us find cat and girl's feed automatically. Also just try "/feed"
maybe it works?
This commit is contained in:
John Doty 2024-09-13 10:30:03 -07:00
parent 3c278a09ba
commit 51049b4d30

View file

@ -664,14 +664,14 @@ def classify_links(links, baseuri) -> typing.Tuple[list[str], list[str]]:
"""
baseuri = baseuri.lower()
local, remote = [], []
local, remote = set(), set()
for link in links:
if link.lower().startswith(baseuri):
local.append(link)
local.add(link)
else:
remote.append(link)
remote.add(link)
return local, remote
return list(local), list(remote)
def is_feed_link(link: str) -> bool:
@ -687,18 +687,27 @@ def is_feed_link(link: str) -> bool:
def is_XML_related_link(link: str) -> bool:
link = link.lower()
return "rss" in link or "rdf" in link or "xml" in link or "atom" in link
return (
"rss" in link
or "rdf" in link
or "xml" in link
or "atom" in link
or "feed" in link
)
async def check_feed(url: str) -> Feed | None:
"""Check to see if the given URL is a feed. If it is, return the feed,
otherwise return None.
"""
LOG.debug(f"Checking {url}: checking...")
meta = FeedMeta.from_url(url)
result, meta = await fetch_feed(meta)
if isinstance(result, Feed):
LOG.debug(f"Checking {url}: is a feed")
return result
LOG.debug(f"Checking {url}: is not a feed")
return None
@ -738,6 +747,11 @@ async def feed_search(uri: str) -> list[Feed]:
LOG.debug("No links, checking A tags...")
local_links, remote_links = classify_links(parser.a_links, meta.url)
for link in local_links:
LOG.debug(f" LOCAL {link}")
for link in remote_links:
LOG.debug(f" REMOTE {link}")
outfeeds = await check_links(filter(is_feed_link, local_links))
if len(outfeeds) > 0:
return outfeeds
@ -759,6 +773,7 @@ async def feed_search(uri: str) -> list[Feed]:
"rss.xml", # Dave Winer/Manila
"index.xml", # MT
"index.rss", # Slash
"feed", # catandgirl.com and sometimes others.
]
outfeeds = await check_links([urllib.parse.urljoin(meta.url, x) for x in suffixes])
return outfeeds