Test redirects

This commit is contained in:
John Doty 2024-07-16 06:01:43 -07:00
parent b4a4d39220
commit 005c3188cf
2 changed files with 199 additions and 10 deletions

View file

@ -340,17 +340,29 @@ async def fetch_feed(meta: FeedMeta) -> typing.Tuple[str | Feed | None, FeedMeta
LOG.error(f"{meta.url} failed for too long, giving up")
return (None, dataclasses.replace(meta, status=FEED_STATUS_DEAD))
if response and response.is_permanent_redirect:
# Permanent redirect, update the stored URL, but mark this as a
# successful fetch.
if response:
# Check for permanent redirects and handle them properly. Note that
# requests is kinda dumb when it comes to permanent redirects: we
# have to slog through the history itself when it comes to the
# redirects, and we have to note the URL of the request *after* the
# permanent redirect in order to get the right one.
#
# TODO: Is this actually the right URL to store? We need the last
# permanently redirected URL, not just whatever the last thing
# is... e.g. imagine a permanent followed by a temporary
# redirect, then what?
LOG.info(f"{meta.url} permanently redirected to {response.url}")
assert response.url is not None
meta = dataclasses.replace(meta, url=response.url)
new_url = None
history = list(response.history)
history.append(response)
history.reverse()
last_url = response.url
for h in history:
if h.is_permanent_redirect:
new_url = last_url
break
last_url = h.url
if new_url is not None:
LOG.info(f"{meta.url} permanently redirected to {new_url}")
meta = dataclasses.replace(meta, url=new_url)
# TODO: Handle that bogus non-HTTP redirect that feedfinder uses.