From cce0ad9f8fadbd43310bb949a9d075130f277dae Mon Sep 17 00:00:00 2001 From: John Doty Date: Tue, 16 Jul 2024 06:37:12 -0700 Subject: [PATCH] fix: handle permanent redirects in the database --- cry/cli.py | 5 ++++- cry/database.py | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/cry/cli.py b/cry/cli.py index f04e4d0..5164e24 100644 --- a/cry/cli.py +++ b/cry/cli.py @@ -163,7 +163,10 @@ def refresh(url): results = asyncio.run(feed.fetch_many(feeds)) new_count = 0 - for d, meta in results: + for i, (d, meta) in enumerate(results): + if meta.url != feeds[i].url: + db.redirect_feed(feeds[i].url, meta.url) + if d is None: # Nothing new. db.update_meta(meta) diff --git a/cry/database.py b/cry/database.py index c63b8f4..334fe7e 100644 --- a/cry/database.py +++ b/cry/database.py @@ -80,6 +80,10 @@ def database_path(origin: str) -> pathlib.Path: return pathlib.Path.home() / "Dropbox" / "cry" / f"{origin}.db" +# TODO: Refactor into: +# -top level: transactions +# -bottom level: queries +# to enable reuse class Database: db: sqlite3.Connection origin: str @@ -421,3 +425,48 @@ class Database: [status, int(time.time()), url], ) return cursor.rowcount + + def redirect_feed(self, old_url: str, new_url: str): + with self.db: + cursor = self.db.execute( + "SELECT COUNT(1) FROM feeds WHERE url=?", [new_url] + ) + row = cursor.fetchone() + if row[0] == 0: + self.db.execute( + "UPDATE feeds SET url = ? WHERE url = ?", [new_url, old_url] + ) + else: + # Preserve the entries that were under the old url. + self.db.execute( + """ + UPDATE entries + SET feed_url = ? + WHERE feed_url = ? + ON CONFLICT DO UPDATE + SET + -- NOTE: This is also part of the feed merge algorithm, BUT + -- we implement it here. See the comment in store_feed + -- for the rationale. + inserted_at=MIN(inserted_at, excluded.inserted_at), + title=CASE + WHEN inserted_at < excluded.inserted_at THEN title + ELSE excluded.title + END, + link=CASE + WHEN inserted_at < excluded.inserted_at THEN link + ELSE excluded.link + END + """ + ) + + # Mark the old feed dead. + self.db.execute( + """ + UPDATE feeds + SET status = ?, + last_fetched_ts = ? + WHERE url = ? + """, + [feed.FEED_STATUS_DEAD, int(time.time()), old_url], + )