fix: handle permanent redirects in the database

This commit is contained in:
John Doty 2024-07-16 06:37:12 -07:00
parent 005c3188cf
commit cce0ad9f8f
2 changed files with 53 additions and 1 deletions

View file

@ -163,7 +163,10 @@ def refresh(url):
results = asyncio.run(feed.fetch_many(feeds))
new_count = 0
for d, meta in results:
for i, (d, meta) in enumerate(results):
if meta.url != feeds[i].url:
db.redirect_feed(feeds[i].url, meta.url)
if d is None:
# Nothing new.
db.update_meta(meta)

View file

@ -80,6 +80,10 @@ def database_path(origin: str) -> pathlib.Path:
return pathlib.Path.home() / "Dropbox" / "cry" / f"{origin}.db"
# TODO: Refactor into:
# -top level: transactions
# -bottom level: queries
# to enable reuse
class Database:
db: sqlite3.Connection
origin: str
@ -421,3 +425,48 @@ class Database:
[status, int(time.time()), url],
)
return cursor.rowcount
def redirect_feed(self, old_url: str, new_url: str):
with self.db:
cursor = self.db.execute(
"SELECT COUNT(1) FROM feeds WHERE url=?", [new_url]
)
row = cursor.fetchone()
if row[0] == 0:
self.db.execute(
"UPDATE feeds SET url = ? WHERE url = ?", [new_url, old_url]
)
else:
# Preserve the entries that were under the old url.
self.db.execute(
"""
UPDATE entries
SET feed_url = ?
WHERE feed_url = ?
ON CONFLICT DO UPDATE
SET
-- NOTE: This is also part of the feed merge algorithm, BUT
-- we implement it here. See the comment in store_feed
-- for the rationale.
inserted_at=MIN(inserted_at, excluded.inserted_at),
title=CASE
WHEN inserted_at < excluded.inserted_at THEN title
ELSE excluded.title
END,
link=CASE
WHEN inserted_at < excluded.inserted_at THEN link
ELSE excluded.link
END
"""
)
# Mark the old feed dead.
self.db.execute(
"""
UPDATE feeds
SET status = ?,
last_fetched_ts = ?
WHERE url = ?
""",
[feed.FEED_STATUS_DEAD, int(time.time()), old_url],
)