Test redirects
This commit is contained in:
parent
b4a4d39220
commit
005c3188cf
2 changed files with 199 additions and 10 deletions
32
cry/feed.py
32
cry/feed.py
|
|
@ -340,17 +340,29 @@ async def fetch_feed(meta: FeedMeta) -> typing.Tuple[str | Feed | None, FeedMeta
|
|||
LOG.error(f"{meta.url} failed for too long, giving up")
|
||||
return (None, dataclasses.replace(meta, status=FEED_STATUS_DEAD))
|
||||
|
||||
if response and response.is_permanent_redirect:
|
||||
# Permanent redirect, update the stored URL, but mark this as a
|
||||
# successful fetch.
|
||||
if response:
|
||||
# Check for permanent redirects and handle them properly. Note that
|
||||
# requests is kinda dumb when it comes to permanent redirects: we
|
||||
# have to slog through the history itself when it comes to the
|
||||
# redirects, and we have to note the URL of the request *after* the
|
||||
# permanent redirect in order to get the right one.
|
||||
#
|
||||
# TODO: Is this actually the right URL to store? We need the last
|
||||
# permanently redirected URL, not just whatever the last thing
|
||||
# is... e.g. imagine a permanent followed by a temporary
|
||||
# redirect, then what?
|
||||
LOG.info(f"{meta.url} permanently redirected to {response.url}")
|
||||
assert response.url is not None
|
||||
meta = dataclasses.replace(meta, url=response.url)
|
||||
new_url = None
|
||||
|
||||
history = list(response.history)
|
||||
history.append(response)
|
||||
history.reverse()
|
||||
|
||||
last_url = response.url
|
||||
for h in history:
|
||||
if h.is_permanent_redirect:
|
||||
new_url = last_url
|
||||
break
|
||||
last_url = h.url
|
||||
|
||||
if new_url is not None:
|
||||
LOG.info(f"{meta.url} permanently redirected to {new_url}")
|
||||
meta = dataclasses.replace(meta, url=new_url)
|
||||
|
||||
# TODO: Handle that bogus non-HTTP redirect that feedfinder uses.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue