diff --git a/cry/cli.py b/cry/cli.py index 63601ba..86f0d69 100644 --- a/cry/cli.py +++ b/cry/cli.py @@ -96,7 +96,7 @@ def subscribe(url, literal): result = d # Check to see if this URL is already in the database. - existing = db.load_feed(result.meta.url) # TODO: Replace with 'load_meta'? + existing = db.load_meta(result.meta.url) if existing is not None: click.echo(f"This feed already exists (as {result.meta.url})") return 1 @@ -129,7 +129,7 @@ def import_opml(opml_file): click.echo(f"{url} does not seem to be a feed, skipping...") continue - existing = db.load_feed(meta.url) # TODO: Replace with 'load_meta'? + existing = db.load_meta(meta.url) if existing is not None: LOG.info(f"{url} already exists (as {meta.url})") continue @@ -151,11 +151,11 @@ def refresh(url): db = database.Database.local() if url: - f = db.load_feed(url) # TODO: Replace with 'load_meta'? + f = db.load_meta(url) if f is None: click.echo(f"Not subscribed to {url}") return 1 - feeds = [f.meta] + feeds = [f] else: feeds = db.load_all_meta() diff --git a/cry/database.py b/cry/database.py index 1834245..9f5b7bd 100644 --- a/cry/database.py +++ b/cry/database.py @@ -115,23 +115,11 @@ class Database: def get_property(self, prop: str, default=None) -> typing.Any: with self.db: - cursor = self.db.execute( - "SELECT value FROM properties WHERE name=?", (prop,) - ) - result = cursor.fetchone() - if result is None: - return default - return result[0] + return self._get_property(prop, default) def set_property(self, prop: str, value): with self.db: - self.db.execute( - """ - INSERT INTO properties (name, value) VALUES (?, ?) - ON CONFLICT DO UPDATE SET value=excluded.value - """, - (prop, value), - ) + return self._set_property(prop, value) def ensure_database_schema(self): with self.db: @@ -143,15 +131,15 @@ class Database: ) """ ) - version = int(self.get_property("version", 0)) + version = int(self._get_property("version", 0)) for script in SCHEMA_STATEMENTS[version:]: for statement in script.split(";"): try: self.db.execute(statement) except Exception as e: raise Exception(f"Error executing:\n{statement}") from e - self.set_property("version", len(SCHEMA_STATEMENTS)) - self.set_property("origin", self.origin) + self._set_property("version", len(SCHEMA_STATEMENTS)) + self._set_property("origin", self.origin) def load_all_meta(self) -> list[feed.FeedMeta]: with self.db: @@ -258,7 +246,7 @@ class Database: return feeds - def load_feed(self, url: str) -> feed.Feed | None: + def load_meta(self, url: str) -> feed.FeedMeta | None: with self.db: cursor = self.db.execute( """ @@ -267,9 +255,7 @@ class Database: retry_after_ts, status, etag, - modified, - title, - link + modified FROM feeds WHERE url=? """, @@ -280,8 +266,8 @@ class Database: if row is None: return None - last_fetched_ts, retry_after_ts, status, etag, modified, title, link = row - meta = feed.FeedMeta( + last_fetched_ts, retry_after_ts, status, etag, modified = row + return feed.FeedMeta( url=url, last_fetched_ts=last_fetched_ts, retry_after_ts=retry_after_ts, @@ -290,27 +276,6 @@ class Database: modified=modified, ) - cursor = self.db.execute( - """ - SELECT - id, - inserted_at, - title, - link - FROM entries - WHERE feed_url=? - """, - [url], - ) - - rows = cursor.fetchall() - entries = [ - feed.Entry(id=id, inserted_at=inserted_at, title=title, link=link) - for id, inserted_at, title, link in rows - ] - - return feed.Feed(meta=meta, title=title, link=link, entries=entries) - def update_meta(self, f: feed.FeedMeta): with self.db: self.db.execute( @@ -373,50 +338,8 @@ class Database: ], ) - cursor = self.db.execute( - "SELECT COUNT (*) FROM entries WHERE feed_url=?", [f.meta.url] - ) - start_count = cursor.fetchone()[0] - - self.db.executemany( - """ - INSERT INTO entries ( - id, - inserted_at, - feed_url, - title, - link - ) VALUES (?, ?, ?, ?, ?) - ON CONFLICT DO UPDATE - SET - -- NOTE: This is also part of the feed merge algorithm, BUT - -- we implement it here because feeds tend to be rolling - -- windows over some external content and we don't want - -- to read and write the entire feed just to update the - -- few new items. But we can't just do ON CONFLICT DO - -- NOTHING because we *might* be storing a feed where we - -- resolved conflicts with another instance. So we want - -- to handle all the cases. (In theory we could make two - -- different INSERTs to handle the two cases but that is - -- more complexity than it is worth.) - inserted_at=MIN(inserted_at, excluded.inserted_at), - title=CASE - WHEN inserted_at < excluded.inserted_at THEN title - ELSE excluded.title - END, - link=CASE - WHEN inserted_at < excluded.inserted_at THEN link - ELSE excluded.link - END - """, - [(e.id, e.inserted_at, f.meta.url, e.title, e.link) for e in f.entries], - ) - - cursor = self.db.execute( - "SELECT COUNT (*) FROM entries WHERE feed_url=?", [f.meta.url] - ) - end_count = cursor.fetchone()[0] - return end_count - start_count + change_count = self._insert_entries(f.meta.url, f.entries) + return change_count def set_feed_status(self, url: str, status: int) -> int: with self.db: @@ -466,3 +389,65 @@ class Database: """, [feed.FEED_STATUS_UNSUBSCRIBED, int(time.time()), old_url], ) + + def _get_property(self, prop: str, default=None) -> typing.Any: + cursor = self.db.execute("SELECT value FROM properties WHERE name=?", (prop,)) + result = cursor.fetchone() + if result is None: + return default + return result[0] + + def _set_property(self, prop: str, value): + self.db.execute( + """ + INSERT INTO properties (name, value) VALUES (?, ?) + ON CONFLICT DO UPDATE SET value=excluded.value + """, + (prop, value), + ) + + def _insert_entries(self, feed_url: str, entries: list[feed.Entry]) -> int: + cursor = self.db.execute( + "SELECT COUNT (*) FROM entries WHERE feed_url=?", [feed_url] + ) + start_count = cursor.fetchone()[0] + + self.db.executemany( + """ + INSERT INTO entries ( + id, + inserted_at, + feed_url, + title, + link + ) VALUES (?, ?, ?, ?, ?) + ON CONFLICT DO UPDATE + SET + -- NOTE: This is also part of the feed merge algorithm, BUT + -- we implement it here because feeds tend to be rolling + -- windows over some external content and we don't want + -- to read and write the entire feed just to update the + -- few new items. But we can't just do ON CONFLICT DO + -- NOTHING because we *might* be storing a feed where we + -- resolved conflicts with another instance. So we want + -- to handle all the cases. (In theory we could make two + -- different INSERTs to handle the two cases but that is + -- more complexity than it is worth.) + inserted_at=MIN(inserted_at, excluded.inserted_at), + title=CASE + WHEN inserted_at < excluded.inserted_at THEN title + ELSE excluded.title + END, + link=CASE + WHEN inserted_at < excluded.inserted_at THEN link + ELSE excluded.link + END + """, + [(e.id, e.inserted_at, feed_url, e.title, e.link) for e in entries], + ) + + cursor = self.db.execute( + "SELECT COUNT (*) FROM entries WHERE feed_url=?", [feed_url] + ) + end_count = cursor.fetchone()[0] + return end_count - start_count diff --git a/tests/test_database.py b/tests/test_database.py index a6b1d1e..daa3b7b 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -106,8 +106,8 @@ def test_database_store_feed(): db.ensure_database_schema() db.store_feed(FEED) - loaded = db.load_feed(FEED.meta.url) - assert loaded == FEED + loaded_meta = db.load_meta(FEED.meta.url) + assert loaded_meta == FEED.meta def test_database_store_feed_dups():