fix: handle permanent redirects in the database
This commit is contained in:
parent
005c3188cf
commit
cce0ad9f8f
2 changed files with 53 additions and 1 deletions
|
|
@ -163,7 +163,10 @@ def refresh(url):
|
||||||
results = asyncio.run(feed.fetch_many(feeds))
|
results = asyncio.run(feed.fetch_many(feeds))
|
||||||
|
|
||||||
new_count = 0
|
new_count = 0
|
||||||
for d, meta in results:
|
for i, (d, meta) in enumerate(results):
|
||||||
|
if meta.url != feeds[i].url:
|
||||||
|
db.redirect_feed(feeds[i].url, meta.url)
|
||||||
|
|
||||||
if d is None:
|
if d is None:
|
||||||
# Nothing new.
|
# Nothing new.
|
||||||
db.update_meta(meta)
|
db.update_meta(meta)
|
||||||
|
|
|
||||||
|
|
@ -80,6 +80,10 @@ def database_path(origin: str) -> pathlib.Path:
|
||||||
return pathlib.Path.home() / "Dropbox" / "cry" / f"{origin}.db"
|
return pathlib.Path.home() / "Dropbox" / "cry" / f"{origin}.db"
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Refactor into:
|
||||||
|
# -top level: transactions
|
||||||
|
# -bottom level: queries
|
||||||
|
# to enable reuse
|
||||||
class Database:
|
class Database:
|
||||||
db: sqlite3.Connection
|
db: sqlite3.Connection
|
||||||
origin: str
|
origin: str
|
||||||
|
|
@ -421,3 +425,48 @@ class Database:
|
||||||
[status, int(time.time()), url],
|
[status, int(time.time()), url],
|
||||||
)
|
)
|
||||||
return cursor.rowcount
|
return cursor.rowcount
|
||||||
|
|
||||||
|
def redirect_feed(self, old_url: str, new_url: str):
|
||||||
|
with self.db:
|
||||||
|
cursor = self.db.execute(
|
||||||
|
"SELECT COUNT(1) FROM feeds WHERE url=?", [new_url]
|
||||||
|
)
|
||||||
|
row = cursor.fetchone()
|
||||||
|
if row[0] == 0:
|
||||||
|
self.db.execute(
|
||||||
|
"UPDATE feeds SET url = ? WHERE url = ?", [new_url, old_url]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Preserve the entries that were under the old url.
|
||||||
|
self.db.execute(
|
||||||
|
"""
|
||||||
|
UPDATE entries
|
||||||
|
SET feed_url = ?
|
||||||
|
WHERE feed_url = ?
|
||||||
|
ON CONFLICT DO UPDATE
|
||||||
|
SET
|
||||||
|
-- NOTE: This is also part of the feed merge algorithm, BUT
|
||||||
|
-- we implement it here. See the comment in store_feed
|
||||||
|
-- for the rationale.
|
||||||
|
inserted_at=MIN(inserted_at, excluded.inserted_at),
|
||||||
|
title=CASE
|
||||||
|
WHEN inserted_at < excluded.inserted_at THEN title
|
||||||
|
ELSE excluded.title
|
||||||
|
END,
|
||||||
|
link=CASE
|
||||||
|
WHEN inserted_at < excluded.inserted_at THEN link
|
||||||
|
ELSE excluded.link
|
||||||
|
END
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mark the old feed dead.
|
||||||
|
self.db.execute(
|
||||||
|
"""
|
||||||
|
UPDATE feeds
|
||||||
|
SET status = ?,
|
||||||
|
last_fetched_ts = ?
|
||||||
|
WHERE url = ?
|
||||||
|
""",
|
||||||
|
[feed.FEED_STATUS_DEAD, int(time.time()), old_url],
|
||||||
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue