Some database refactor

This commit is contained in:
John Doty 2024-07-19 07:30:14 -07:00
parent f9b14a7622
commit 275926dffe
3 changed files with 79 additions and 94 deletions

View file

@ -96,7 +96,7 @@ def subscribe(url, literal):
result = d result = d
# Check to see if this URL is already in the database. # Check to see if this URL is already in the database.
existing = db.load_feed(result.meta.url) # TODO: Replace with 'load_meta'? existing = db.load_meta(result.meta.url)
if existing is not None: if existing is not None:
click.echo(f"This feed already exists (as {result.meta.url})") click.echo(f"This feed already exists (as {result.meta.url})")
return 1 return 1
@ -129,7 +129,7 @@ def import_opml(opml_file):
click.echo(f"{url} does not seem to be a feed, skipping...") click.echo(f"{url} does not seem to be a feed, skipping...")
continue continue
existing = db.load_feed(meta.url) # TODO: Replace with 'load_meta'? existing = db.load_meta(meta.url)
if existing is not None: if existing is not None:
LOG.info(f"{url} already exists (as {meta.url})") LOG.info(f"{url} already exists (as {meta.url})")
continue continue
@ -151,11 +151,11 @@ def refresh(url):
db = database.Database.local() db = database.Database.local()
if url: if url:
f = db.load_feed(url) # TODO: Replace with 'load_meta'? f = db.load_meta(url)
if f is None: if f is None:
click.echo(f"Not subscribed to {url}") click.echo(f"Not subscribed to {url}")
return 1 return 1
feeds = [f.meta] feeds = [f]
else: else:
feeds = db.load_all_meta() feeds = db.load_all_meta()

View file

@ -115,23 +115,11 @@ class Database:
def get_property(self, prop: str, default=None) -> typing.Any: def get_property(self, prop: str, default=None) -> typing.Any:
with self.db: with self.db:
cursor = self.db.execute( return self._get_property(prop, default)
"SELECT value FROM properties WHERE name=?", (prop,)
)
result = cursor.fetchone()
if result is None:
return default
return result[0]
def set_property(self, prop: str, value): def set_property(self, prop: str, value):
with self.db: with self.db:
self.db.execute( return self._set_property(prop, value)
"""
INSERT INTO properties (name, value) VALUES (?, ?)
ON CONFLICT DO UPDATE SET value=excluded.value
""",
(prop, value),
)
def ensure_database_schema(self): def ensure_database_schema(self):
with self.db: with self.db:
@ -143,15 +131,15 @@ class Database:
) )
""" """
) )
version = int(self.get_property("version", 0)) version = int(self._get_property("version", 0))
for script in SCHEMA_STATEMENTS[version:]: for script in SCHEMA_STATEMENTS[version:]:
for statement in script.split(";"): for statement in script.split(";"):
try: try:
self.db.execute(statement) self.db.execute(statement)
except Exception as e: except Exception as e:
raise Exception(f"Error executing:\n{statement}") from e raise Exception(f"Error executing:\n{statement}") from e
self.set_property("version", len(SCHEMA_STATEMENTS)) self._set_property("version", len(SCHEMA_STATEMENTS))
self.set_property("origin", self.origin) self._set_property("origin", self.origin)
def load_all_meta(self) -> list[feed.FeedMeta]: def load_all_meta(self) -> list[feed.FeedMeta]:
with self.db: with self.db:
@ -258,7 +246,7 @@ class Database:
return feeds return feeds
def load_feed(self, url: str) -> feed.Feed | None: def load_meta(self, url: str) -> feed.FeedMeta | None:
with self.db: with self.db:
cursor = self.db.execute( cursor = self.db.execute(
""" """
@ -267,9 +255,7 @@ class Database:
retry_after_ts, retry_after_ts,
status, status,
etag, etag,
modified, modified
title,
link
FROM feeds FROM feeds
WHERE url=? WHERE url=?
""", """,
@ -280,8 +266,8 @@ class Database:
if row is None: if row is None:
return None return None
last_fetched_ts, retry_after_ts, status, etag, modified, title, link = row last_fetched_ts, retry_after_ts, status, etag, modified = row
meta = feed.FeedMeta( return feed.FeedMeta(
url=url, url=url,
last_fetched_ts=last_fetched_ts, last_fetched_ts=last_fetched_ts,
retry_after_ts=retry_after_ts, retry_after_ts=retry_after_ts,
@ -290,27 +276,6 @@ class Database:
modified=modified, modified=modified,
) )
cursor = self.db.execute(
"""
SELECT
id,
inserted_at,
title,
link
FROM entries
WHERE feed_url=?
""",
[url],
)
rows = cursor.fetchall()
entries = [
feed.Entry(id=id, inserted_at=inserted_at, title=title, link=link)
for id, inserted_at, title, link in rows
]
return feed.Feed(meta=meta, title=title, link=link, entries=entries)
def update_meta(self, f: feed.FeedMeta): def update_meta(self, f: feed.FeedMeta):
with self.db: with self.db:
self.db.execute( self.db.execute(
@ -373,50 +338,8 @@ class Database:
], ],
) )
cursor = self.db.execute( change_count = self._insert_entries(f.meta.url, f.entries)
"SELECT COUNT (*) FROM entries WHERE feed_url=?", [f.meta.url] return change_count
)
start_count = cursor.fetchone()[0]
self.db.executemany(
"""
INSERT INTO entries (
id,
inserted_at,
feed_url,
title,
link
) VALUES (?, ?, ?, ?, ?)
ON CONFLICT DO UPDATE
SET
-- NOTE: This is also part of the feed merge algorithm, BUT
-- we implement it here because feeds tend to be rolling
-- windows over some external content and we don't want
-- to read and write the entire feed just to update the
-- few new items. But we can't just do ON CONFLICT DO
-- NOTHING because we *might* be storing a feed where we
-- resolved conflicts with another instance. So we want
-- to handle all the cases. (In theory we could make two
-- different INSERTs to handle the two cases but that is
-- more complexity than it is worth.)
inserted_at=MIN(inserted_at, excluded.inserted_at),
title=CASE
WHEN inserted_at < excluded.inserted_at THEN title
ELSE excluded.title
END,
link=CASE
WHEN inserted_at < excluded.inserted_at THEN link
ELSE excluded.link
END
""",
[(e.id, e.inserted_at, f.meta.url, e.title, e.link) for e in f.entries],
)
cursor = self.db.execute(
"SELECT COUNT (*) FROM entries WHERE feed_url=?", [f.meta.url]
)
end_count = cursor.fetchone()[0]
return end_count - start_count
def set_feed_status(self, url: str, status: int) -> int: def set_feed_status(self, url: str, status: int) -> int:
with self.db: with self.db:
@ -466,3 +389,65 @@ class Database:
""", """,
[feed.FEED_STATUS_UNSUBSCRIBED, int(time.time()), old_url], [feed.FEED_STATUS_UNSUBSCRIBED, int(time.time()), old_url],
) )
def _get_property(self, prop: str, default=None) -> typing.Any:
cursor = self.db.execute("SELECT value FROM properties WHERE name=?", (prop,))
result = cursor.fetchone()
if result is None:
return default
return result[0]
def _set_property(self, prop: str, value):
self.db.execute(
"""
INSERT INTO properties (name, value) VALUES (?, ?)
ON CONFLICT DO UPDATE SET value=excluded.value
""",
(prop, value),
)
def _insert_entries(self, feed_url: str, entries: list[feed.Entry]) -> int:
cursor = self.db.execute(
"SELECT COUNT (*) FROM entries WHERE feed_url=?", [feed_url]
)
start_count = cursor.fetchone()[0]
self.db.executemany(
"""
INSERT INTO entries (
id,
inserted_at,
feed_url,
title,
link
) VALUES (?, ?, ?, ?, ?)
ON CONFLICT DO UPDATE
SET
-- NOTE: This is also part of the feed merge algorithm, BUT
-- we implement it here because feeds tend to be rolling
-- windows over some external content and we don't want
-- to read and write the entire feed just to update the
-- few new items. But we can't just do ON CONFLICT DO
-- NOTHING because we *might* be storing a feed where we
-- resolved conflicts with another instance. So we want
-- to handle all the cases. (In theory we could make two
-- different INSERTs to handle the two cases but that is
-- more complexity than it is worth.)
inserted_at=MIN(inserted_at, excluded.inserted_at),
title=CASE
WHEN inserted_at < excluded.inserted_at THEN title
ELSE excluded.title
END,
link=CASE
WHEN inserted_at < excluded.inserted_at THEN link
ELSE excluded.link
END
""",
[(e.id, e.inserted_at, feed_url, e.title, e.link) for e in entries],
)
cursor = self.db.execute(
"SELECT COUNT (*) FROM entries WHERE feed_url=?", [feed_url]
)
end_count = cursor.fetchone()[0]
return end_count - start_count

View file

@ -106,8 +106,8 @@ def test_database_store_feed():
db.ensure_database_schema() db.ensure_database_schema()
db.store_feed(FEED) db.store_feed(FEED)
loaded = db.load_feed(FEED.meta.url) loaded_meta = db.load_meta(FEED.meta.url)
assert loaded == FEED assert loaded_meta == FEED.meta
def test_database_store_feed_dups(): def test_database_store_feed_dups():