Add "feed" as something we look for in an url
Helps us find cat and girl's feed automatically. Also just try "/feed" maybe it works?
This commit is contained in:
parent
3c278a09ba
commit
51049b4d30
1 changed files with 20 additions and 5 deletions
25
cry/feed.py
25
cry/feed.py
|
|
@ -664,14 +664,14 @@ def classify_links(links, baseuri) -> typing.Tuple[list[str], list[str]]:
|
|||
"""
|
||||
baseuri = baseuri.lower()
|
||||
|
||||
local, remote = [], []
|
||||
local, remote = set(), set()
|
||||
for link in links:
|
||||
if link.lower().startswith(baseuri):
|
||||
local.append(link)
|
||||
local.add(link)
|
||||
else:
|
||||
remote.append(link)
|
||||
remote.add(link)
|
||||
|
||||
return local, remote
|
||||
return list(local), list(remote)
|
||||
|
||||
|
||||
def is_feed_link(link: str) -> bool:
|
||||
|
|
@ -687,18 +687,27 @@ def is_feed_link(link: str) -> bool:
|
|||
|
||||
def is_XML_related_link(link: str) -> bool:
|
||||
link = link.lower()
|
||||
return "rss" in link or "rdf" in link or "xml" in link or "atom" in link
|
||||
return (
|
||||
"rss" in link
|
||||
or "rdf" in link
|
||||
or "xml" in link
|
||||
or "atom" in link
|
||||
or "feed" in link
|
||||
)
|
||||
|
||||
|
||||
async def check_feed(url: str) -> Feed | None:
|
||||
"""Check to see if the given URL is a feed. If it is, return the feed,
|
||||
otherwise return None.
|
||||
"""
|
||||
LOG.debug(f"Checking {url}: checking...")
|
||||
meta = FeedMeta.from_url(url)
|
||||
result, meta = await fetch_feed(meta)
|
||||
if isinstance(result, Feed):
|
||||
LOG.debug(f"Checking {url}: is a feed")
|
||||
return result
|
||||
|
||||
LOG.debug(f"Checking {url}: is not a feed")
|
||||
return None
|
||||
|
||||
|
||||
|
|
@ -738,6 +747,11 @@ async def feed_search(uri: str) -> list[Feed]:
|
|||
|
||||
LOG.debug("No links, checking A tags...")
|
||||
local_links, remote_links = classify_links(parser.a_links, meta.url)
|
||||
for link in local_links:
|
||||
LOG.debug(f" LOCAL {link}")
|
||||
for link in remote_links:
|
||||
LOG.debug(f" REMOTE {link}")
|
||||
|
||||
outfeeds = await check_links(filter(is_feed_link, local_links))
|
||||
if len(outfeeds) > 0:
|
||||
return outfeeds
|
||||
|
|
@ -759,6 +773,7 @@ async def feed_search(uri: str) -> list[Feed]:
|
|||
"rss.xml", # Dave Winer/Manila
|
||||
"index.xml", # MT
|
||||
"index.rss", # Slash
|
||||
"feed", # catandgirl.com and sometimes others.
|
||||
]
|
||||
outfeeds = await check_links([urllib.parse.urljoin(meta.url, x) for x in suffixes])
|
||||
return outfeeds
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue