This commit is contained in:
John Doty 2024-07-22 15:58:30 -07:00
parent abd70e2ad3
commit 5a834d5d26
2 changed files with 203 additions and 33 deletions

View file

@ -45,6 +45,60 @@ SCHEMA_STATEMENTS = [
ELSE status
END
""",
# The "clock" is a number that increments as we make changes. We use this
# to do reconciliation, and track which versions of other databases we
# have reconciled already.
"""
INSERT INTO properties (name, value) VALUES ('clock', 1);
CREATE TRIGGER update_clock_on_feed_insert
AFTER INSERT ON feeds
BEGIN
UPDATE properties SET value=value + 1 WHERE name='clock';
END;
CREATE TRIGGER update_clock_on_feed_delete
AFTER DELETE ON feeds
BEGIN
UPDATE properties SET value=value + 1 WHERE name='clock';
END;
CREATE TRIGGER update_clock_on_feed_update
AFTER UPDATE ON feeds
WHEN (NEW.last_fetched_ts IS NOT OLD.last_fetched_ts)
OR (NEW.retry_after_ts IS NOT OLD.retry_after_ts)
OR (NEW.status IS NOT OLD.status)
OR (NEW.etag IS NOT OLD.etag)
OR (NEW.modified IS NOT OLD.modified)
OR (NEW.title IS NOT OLD.title)
OR (NEW.link IS NOT OLD.link)
BEGIN
UPDATE properties SET value=value + 1 WHERE name='clock';
END;
CREATE TRIGGER update_clock_on_entries_insert
AFTER INSERT ON entries
BEGIN
UPDATE properties SET value=value + 1 WHERE name='clock';
END;
CREATE TRIGGER update_clock_on_entries_delete
AFTER DELETE ON entries
BEGIN
UPDATE properties SET value=value + 1 WHERE name='clock';
END;
CREATE TRIGGER update_clock_on_entries_update
AFTER UPDATE ON entries
WHEN (NEW.id IS NOT OLD.id)
OR (NEW.inserted_at IS NOT OLD.inserted_at)
OR (NEW.feed_url IS NOT OLD.feed_url)
OR (NEW.title IS NOT OLD.title)
OR (NEW.link IS NOT OLD.link)
BEGIN
UPDATE properties SET value=value + 1 WHERE name='clock';
END;
""",
]
@ -121,6 +175,9 @@ class Database:
with self.db:
return self._set_property(prop, value)
def get_clock(self) -> int:
return int(self.get_property("clock", 0))
def ensure_database_schema(self):
with self.db:
self.db.execute(
@ -133,11 +190,10 @@ class Database:
)
version = int(self._get_property("version", 0))
for script in SCHEMA_STATEMENTS[version:]:
for statement in script.split(";"):
try:
self.db.execute(statement)
except Exception as e:
raise Exception(f"Error executing:\n{statement}") from e
try:
self.db.executescript(script)
except Exception as e:
raise Exception(f"Error executing:\n{script}") from e
self._set_property("version", len(SCHEMA_STATEMENTS))
self._set_property("origin", self.origin)
@ -248,33 +304,7 @@ class Database:
def load_meta(self, url: str) -> feed.FeedMeta | None:
with self.db:
cursor = self.db.execute(
"""
SELECT
last_fetched_ts,
retry_after_ts,
status,
etag,
modified
FROM feeds
WHERE url=?
""",
[url],
)
row = cursor.fetchone()
if row is None:
return None
last_fetched_ts, retry_after_ts, status, etag, modified = row
return feed.FeedMeta(
url=url,
last_fetched_ts=last_fetched_ts,
retry_after_ts=retry_after_ts,
status=status,
etag=etag,
modified=modified,
)
return self._load_meta(url)
def update_meta(self, f: feed.FeedMeta):
with self.db:
@ -475,3 +505,32 @@ class Database:
[status, new_ts, meta.url],
)
return cursor.rowcount
def _load_meta(self, url: str) -> feed.FeedMeta | None:
cursor = self.db.execute(
"""
SELECT
last_fetched_ts,
retry_after_ts,
status,
etag,
modified
FROM feeds
WHERE url=?
""",
[url],
)
row = cursor.fetchone()
if row is None:
return None
last_fetched_ts, retry_after_ts, status, etag, modified = row
return feed.FeedMeta(
url=url,
last_fetched_ts=last_fetched_ts,
retry_after_ts=retry_after_ts,
status=status,
etag=etag,
modified=modified,
)

View file

@ -69,6 +69,18 @@ def test_database_prop_get_set():
assert db.get_property("foo") == val
def test_database_prop_get_set_clock():
db = database.Database(":memory:", random_slug())
db.ensure_database_schema()
clock_a = db.get_clock()
db.get_property("foo")
assert db.get_clock() == clock_a
val = random_slug()
db.set_property("foo", val)
assert db.get_clock() == clock_a
REF_TIME = int(time.time())
FEED = feed.Feed(
meta=feed.FeedMeta(
@ -121,6 +133,17 @@ def test_database_store_feed_dups():
assert new_entries == 0
def test_database_store_feed_dups_clock():
db = database.Database(":memory:", random_slug())
db.ensure_database_schema()
db.store_feed(FEED)
old_clock = db.get_clock()
db.store_feed(FEED)
assert db.get_clock() == old_clock, "No change, no change!"
def test_database_store_feed_fetch_meta():
db = database.Database(":memory:", random_slug())
db.ensure_database_schema()
@ -150,12 +173,26 @@ def test_database_store_feed_fetch_all_dups():
assert all_feeds == [FEED]
def test_database_store_feed_reads_no_clock():
db = database.Database(":memory:", random_slug())
db.ensure_database_schema()
db.store_feed(FEED)
old_clock = db.get_clock()
db.load_meta(FEED.meta.url)
assert db.get_clock() == old_clock
db.load_all()
assert db.get_clock() == old_clock
db.load_all_meta()
assert db.get_clock() == old_clock
def test_database_store_feed_fetch_pattern_miss():
db = database.Database(":memory:", random_slug())
db.ensure_database_schema()
db.store_feed(FEED)
expected = dataclasses.replace(FEED, entries=FEED.entries[:13])
all_feeds = db.load_all(feed_limit=13, pattern="no_existo")
assert all_feeds == []
@ -205,6 +242,31 @@ def test_database_store_with_update():
assert all_feeds == [updated_feed]
def test_database_store_with_update_clock():
db = database.Database(":memory:", random_slug())
db.ensure_database_schema()
db.store_feed(FEED)
old_clock = db.get_clock()
updated_feed = dataclasses.replace(
FEED,
meta=dataclasses.replace(
FEED.meta,
last_fetched_ts=FEED.meta.last_fetched_ts + 10,
retry_after_ts=FEED.meta.retry_after_ts + 20,
# status=feed.FEED_STATUS_UNSUBSCRIBED,
etag=None,
modified=random_slug(),
),
title=FEED.title + " (updated)",
link=FEED.link + "/updated",
)
db.store_feed(updated_feed)
assert db.get_clock() != old_clock
def test_database_store_with_older_entries():
db = database.Database(":memory:", random_slug())
db.ensure_database_schema()
@ -252,6 +314,26 @@ def test_database_store_update_meta():
assert db.load_all_meta()[0] == new_meta
def test_database_store_update_meta():
db = database.Database(":memory:", random_slug())
db.ensure_database_schema()
db.store_feed(FEED)
old_clock = db.get_clock()
new_meta = dataclasses.replace(
FEED.meta,
last_fetched_ts=FEED.meta.last_fetched_ts + 10,
retry_after_ts=FEED.meta.last_fetched_ts + 20,
status=feed.FEED_STATUS_DEAD,
etag=random_slug(),
modified=random_slug(),
)
db.update_meta(new_meta)
assert db.get_clock() != old_clock
def test_database_update_feed_status():
db = database.Database(":memory:", random_slug())
db.ensure_database_schema()
@ -267,6 +349,21 @@ def test_database_update_feed_status():
assert db.load_all_meta()[0].status == feed.FEED_STATUS_UNSUBSCRIBED
def test_database_update_feed_status_clock():
db = database.Database(":memory:", random_slug())
db.ensure_database_schema()
db.store_feed(FEED)
old_clock = db.get_clock()
db.update_feed_status(
FEED.meta,
feed.FEED_STATUS_UNSUBSCRIBED,
)
assert db.get_clock() != old_clock
def test_database_redirect_clean():
db = database.Database(":memory:", random_slug())
db.ensure_database_schema()
@ -302,3 +399,17 @@ def test_database_redirect_with_merge():
old_dead_meta = dataclasses.replace(FEED.meta, status=feed.FEED_STATUS_UNSUBSCRIBED)
assert db.load_all_meta() == [old_dead_meta, expected_meta]
assert db.load_all(feed_limit=9999) == [expected_feed]
def test_database_redirect_clock():
db = database.Database(":memory:", random_slug())
db.ensure_database_schema()
db.store_feed(FEED)
old_clock = db.get_clock()
new_url = f"http://example.com/redirect/{random_slug()}"
db.redirect_feed(FEED.meta.url, new_url)
assert db.get_clock() != old_clock