fix: handle permanent redirects in the database
This commit is contained in:
parent
005c3188cf
commit
cce0ad9f8f
2 changed files with 53 additions and 1 deletions
|
|
@ -163,7 +163,10 @@ def refresh(url):
|
|||
results = asyncio.run(feed.fetch_many(feeds))
|
||||
|
||||
new_count = 0
|
||||
for d, meta in results:
|
||||
for i, (d, meta) in enumerate(results):
|
||||
if meta.url != feeds[i].url:
|
||||
db.redirect_feed(feeds[i].url, meta.url)
|
||||
|
||||
if d is None:
|
||||
# Nothing new.
|
||||
db.update_meta(meta)
|
||||
|
|
|
|||
|
|
@ -80,6 +80,10 @@ def database_path(origin: str) -> pathlib.Path:
|
|||
return pathlib.Path.home() / "Dropbox" / "cry" / f"{origin}.db"
|
||||
|
||||
|
||||
# TODO: Refactor into:
|
||||
# -top level: transactions
|
||||
# -bottom level: queries
|
||||
# to enable reuse
|
||||
class Database:
|
||||
db: sqlite3.Connection
|
||||
origin: str
|
||||
|
|
@ -421,3 +425,48 @@ class Database:
|
|||
[status, int(time.time()), url],
|
||||
)
|
||||
return cursor.rowcount
|
||||
|
||||
def redirect_feed(self, old_url: str, new_url: str):
|
||||
with self.db:
|
||||
cursor = self.db.execute(
|
||||
"SELECT COUNT(1) FROM feeds WHERE url=?", [new_url]
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
if row[0] == 0:
|
||||
self.db.execute(
|
||||
"UPDATE feeds SET url = ? WHERE url = ?", [new_url, old_url]
|
||||
)
|
||||
else:
|
||||
# Preserve the entries that were under the old url.
|
||||
self.db.execute(
|
||||
"""
|
||||
UPDATE entries
|
||||
SET feed_url = ?
|
||||
WHERE feed_url = ?
|
||||
ON CONFLICT DO UPDATE
|
||||
SET
|
||||
-- NOTE: This is also part of the feed merge algorithm, BUT
|
||||
-- we implement it here. See the comment in store_feed
|
||||
-- for the rationale.
|
||||
inserted_at=MIN(inserted_at, excluded.inserted_at),
|
||||
title=CASE
|
||||
WHEN inserted_at < excluded.inserted_at THEN title
|
||||
ELSE excluded.title
|
||||
END,
|
||||
link=CASE
|
||||
WHEN inserted_at < excluded.inserted_at THEN link
|
||||
ELSE excluded.link
|
||||
END
|
||||
"""
|
||||
)
|
||||
|
||||
# Mark the old feed dead.
|
||||
self.db.execute(
|
||||
"""
|
||||
UPDATE feeds
|
||||
SET status = ?,
|
||||
last_fetched_ts = ?
|
||||
WHERE url = ?
|
||||
""",
|
||||
[feed.FEED_STATUS_DEAD, int(time.time()), old_url],
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue