Compare commits

...

2 commits

Author SHA1 Message Date
463abcb923 Stuff 2024-07-19 08:11:10 -07:00
275926dffe Some database refactor 2024-07-19 07:30:14 -07:00
3 changed files with 170 additions and 150 deletions

View file

@ -96,7 +96,7 @@ def subscribe(url, literal):
result = d
# Check to see if this URL is already in the database.
existing = db.load_feed(result.meta.url) # TODO: Replace with 'load_meta'?
existing = db.load_meta(result.meta.url)
if existing is not None:
click.echo(f"This feed already exists (as {result.meta.url})")
return 1
@ -129,7 +129,7 @@ def import_opml(opml_file):
click.echo(f"{url} does not seem to be a feed, skipping...")
continue
existing = db.load_feed(meta.url) # TODO: Replace with 'load_meta'?
existing = db.load_meta(meta.url)
if existing is not None:
LOG.info(f"{url} already exists (as {meta.url})")
continue
@ -151,11 +151,11 @@ def refresh(url):
db = database.Database.local()
if url:
f = db.load_feed(url) # TODO: Replace with 'load_meta'?
f = db.load_meta(url)
if f is None:
click.echo(f"Not subscribed to {url}")
return 1
feeds = [f.meta]
feeds = [f]
else:
feeds = db.load_all_meta()
@ -239,11 +239,13 @@ def unsubscribe(url):
`list` command.)
"""
db = database.Database.local()
count = db.set_feed_status(url, feed.FEED_STATUS_UNSUBSCRIBED)
if count == 0:
meta = db.load_meta(url)
if meta is None:
click.echo(f"Not subscribed to feed {url}")
return 1
db.update_feed_status(meta, feed.FEED_STATUS_UNSUBSCRIBED)
@cli.command("serve")
def serve():
@ -262,6 +264,11 @@ def serve():
<head>
<meta charset="utf8">
<title>Subscribed Feeds</title>
<style>
body { margin-left: 4rem; }
li.entry { display: inline; padding-right: 1rem; }
li.entry:before { content: '\\2022'; padding-right: 0.5rem; }
</style>
</head>
<h1>Feeds</h1>
"""
@ -272,17 +279,19 @@ def serve():
ago = f" ({f.entries[0].time_ago()})"
else:
ago = ""
buffer.write(f"<div class='feed'>")
buffer.write(f'<h2><a href="{f.link}">{feed_title}</a>{ago}</h2>')
buffer.write(f"<div>")
if len(f.entries) > 0:
buffer.write(f"<ul>")
for entry in f.entries:
title = html.escape(entry.title)
buffer.write(
f'<span class="entry">&bull; <a href="{entry.link}">{title}</a> ({entry.time_ago()})</span> '
f'<li class="entry"><a href="{entry.link}">{title}</a> ({entry.time_ago()})</li>'
)
buffer.write(f"</ul>")
else:
buffer.write("<i>No entries...</i>")
buffer.write(f"</div>")
buffer.write(f"</div>") # feed
buffer.flush()
text = buffer.getvalue()
response = text.encode("utf-8")

View file

@ -115,23 +115,11 @@ class Database:
def get_property(self, prop: str, default=None) -> typing.Any:
with self.db:
cursor = self.db.execute(
"SELECT value FROM properties WHERE name=?", (prop,)
)
result = cursor.fetchone()
if result is None:
return default
return result[0]
return self._get_property(prop, default)
def set_property(self, prop: str, value):
with self.db:
self.db.execute(
"""
INSERT INTO properties (name, value) VALUES (?, ?)
ON CONFLICT DO UPDATE SET value=excluded.value
""",
(prop, value),
)
return self._set_property(prop, value)
def ensure_database_schema(self):
with self.db:
@ -143,15 +131,15 @@ class Database:
)
"""
)
version = int(self.get_property("version", 0))
version = int(self._get_property("version", 0))
for script in SCHEMA_STATEMENTS[version:]:
for statement in script.split(";"):
try:
self.db.execute(statement)
except Exception as e:
raise Exception(f"Error executing:\n{statement}") from e
self.set_property("version", len(SCHEMA_STATEMENTS))
self.set_property("origin", self.origin)
self._set_property("version", len(SCHEMA_STATEMENTS))
self._set_property("origin", self.origin)
def load_all_meta(self) -> list[feed.FeedMeta]:
with self.db:
@ -258,7 +246,7 @@ class Database:
return feeds
def load_feed(self, url: str) -> feed.Feed | None:
def load_meta(self, url: str) -> feed.FeedMeta | None:
with self.db:
cursor = self.db.execute(
"""
@ -267,9 +255,7 @@ class Database:
retry_after_ts,
status,
etag,
modified,
title,
link
modified
FROM feeds
WHERE url=?
""",
@ -280,8 +266,8 @@ class Database:
if row is None:
return None
last_fetched_ts, retry_after_ts, status, etag, modified, title, link = row
meta = feed.FeedMeta(
last_fetched_ts, retry_after_ts, status, etag, modified = row
return feed.FeedMeta(
url=url,
last_fetched_ts=last_fetched_ts,
retry_after_ts=retry_after_ts,
@ -290,27 +276,6 @@ class Database:
modified=modified,
)
cursor = self.db.execute(
"""
SELECT
id,
inserted_at,
title,
link
FROM entries
WHERE feed_url=?
""",
[url],
)
rows = cursor.fetchall()
entries = [
feed.Entry(id=id, inserted_at=inserted_at, title=title, link=link)
for id, inserted_at, title, link in rows
]
return feed.Feed(meta=meta, title=title, link=link, entries=entries)
def update_meta(self, f: feed.FeedMeta):
with self.db:
self.db.execute(
@ -339,47 +304,127 @@ class Database:
Returns the number of new entries inserted.
"""
with self.db:
self.db.execute(
"""
INSERT INTO feeds (
url,
last_fetched_ts,
retry_after_ts,
status,
etag,
modified,
title,
link
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT DO UPDATE
SET
last_fetched_ts=excluded.last_fetched_ts,
retry_after_ts=excluded.retry_after_ts,
status=excluded.status,
etag=excluded.etag,
modified=excluded.modified,
title=excluded.title,
link=excluded.link
""",
[
f.meta.url,
f.meta.last_fetched_ts,
f.meta.retry_after_ts,
f.meta.status,
f.meta.etag,
f.meta.modified,
f.title,
f.link,
],
)
self._insert_feed(f.meta, f.title, f.link)
return self._insert_entries(f.meta.url, f.entries)
def update_feed_status(self, meta: feed.FeedMeta, status: int) -> int:
with self.db:
return self._update_feed_status(meta, status)
def redirect_feed(self, old_url: str, new_url: str):
with self.db:
cursor = self.db.execute(
"SELECT COUNT (*) FROM entries WHERE feed_url=?", [f.meta.url]
"SELECT COUNT(1) FROM feeds WHERE url=?", [new_url]
)
start_count = cursor.fetchone()[0]
row = cursor.fetchone()
if row[0] == 0:
self.db.execute(
"UPDATE feeds SET url = ? WHERE url = ?", [new_url, old_url]
)
else:
# First update all the entries that you can with the old url.
self.db.execute(
"""
UPDATE OR IGNORE entries
SET feed_url = ?
WHERE feed_url = ?
""",
[new_url, old_url],
)
self.db.executemany(
"""
# TODO: It is expensive and not worth it to try to load and
# re-insert all the old stuff so I'm not going to
# bother.
# Mark the old feed unsubscribed.
# TODO: Rebuild with helpers
self.db.execute(
"""
UPDATE feeds
SET status = ?,
last_fetched_ts = ?
WHERE url = ?
""",
[feed.FEED_STATUS_UNSUBSCRIBED, int(time.time()), old_url],
)
def _get_property(self, prop: str, default=None) -> typing.Any:
cursor = self.db.execute("SELECT value FROM properties WHERE name=?", (prop,))
result = cursor.fetchone()
if result is None:
return default
return result[0]
def _set_property(self, prop: str, value):
self.db.execute(
"""
INSERT INTO properties (name, value) VALUES (?, ?)
ON CONFLICT DO UPDATE SET value=excluded.value
""",
(prop, value),
)
def _insert_feed(self, meta: feed.FeedMeta, title: str, link: str):
"""Insert into the feeds table, handling collisions with UPSERT."""
self.db.execute(
"""
INSERT INTO feeds (
url,
last_fetched_ts,
retry_after_ts,
status,
etag,
modified,
title,
link
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT DO UPDATE
SET
last_fetched_ts=MAX(last_fetched_ts, excluded.last_fetched_ts),
retry_after_ts=MAX(retry_after_ts, excluded.retry_after_ts),
-- For all other fields, take the value that was computed by the
-- most recent fetch.
status=CASE
WHEN last_fetched_ts > excluded.last_fetched_ts THEN status
ELSE excluded.status
END,
etag=CASE
WHEN last_fetched_ts > excluded.last_fetched_ts THEN etag
ELSE excluded.etag
END,
modified=CASE
WHEN last_fetched_ts > excluded.last_fetched_ts THEN modified
ELSE excluded.modified
END,
title=CASE
WHEN last_fetched_ts > excluded.last_fetched_ts THEN title
ELSE excluded.title
END,
link=CASE
WHEN last_fetched_ts > excluded.last_fetched_ts THEN link
ELSE excluded.link
END
""",
[
meta.url,
meta.last_fetched_ts,
meta.retry_after_ts,
meta.status,
meta.etag,
meta.modified,
title,
link,
],
)
def _insert_entries(self, feed_url: str, entries: list[feed.Entry]) -> int:
cursor = self.db.execute(
"SELECT COUNT (*) FROM entries WHERE feed_url=?", [feed_url]
)
start_count = cursor.fetchone()[0]
self.db.executemany(
"""
INSERT INTO entries (
id,
inserted_at,
@ -409,60 +454,24 @@ class Database:
ELSE excluded.link
END
""",
[(e.id, e.inserted_at, f.meta.url, e.title, e.link) for e in f.entries],
)
[(e.id, e.inserted_at, feed_url, e.title, e.link) for e in entries],
)
cursor = self.db.execute(
"SELECT COUNT (*) FROM entries WHERE feed_url=?", [f.meta.url]
)
end_count = cursor.fetchone()[0]
return end_count - start_count
cursor = self.db.execute(
"SELECT COUNT (*) FROM entries WHERE feed_url=?", [feed_url]
)
end_count = cursor.fetchone()[0]
return end_count - start_count
def set_feed_status(self, url: str, status: int) -> int:
with self.db:
cursor = self.db.execute(
"""
UPDATE feeds
SET status = ?,
last_fetched_ts = ?
WHERE url = ?
""",
[status, int(time.time()), url],
)
return cursor.rowcount
def redirect_feed(self, old_url: str, new_url: str):
with self.db:
cursor = self.db.execute(
"SELECT COUNT(1) FROM feeds WHERE url=?", [new_url]
)
row = cursor.fetchone()
if row[0] == 0:
self.db.execute(
"UPDATE feeds SET url = ? WHERE url = ?", [new_url, old_url]
)
else:
# First update all the entries that you can with the old url.
self.db.execute(
"""
UPDATE OR IGNORE entries
SET feed_url = ?
WHERE feed_url = ?
""",
[new_url, old_url],
)
# TODO: It is expensive and not worth it to try to load and
# re-insert all the old stuff so I'm not going to
# bother.
# Mark the old feed unsubscribed.
self.db.execute(
"""
UPDATE feeds
SET status = ?,
last_fetched_ts = ?
WHERE url = ?
""",
[feed.FEED_STATUS_UNSUBSCRIBED, int(time.time()), old_url],
)
def _update_feed_status(self, meta: feed.FeedMeta, status: int) -> int:
new_ts = max(int(time.time()), meta.last_fetched_ts + 1)
cursor = self.db.execute(
"""
UPDATE feeds
SET status = ?,
last_fetched_ts = ?
WHERE url = ?
""",
[status, new_ts, meta.url],
)
return cursor.rowcount

View file

@ -106,8 +106,8 @@ def test_database_store_feed():
db.ensure_database_schema()
db.store_feed(FEED)
loaded = db.load_feed(FEED.meta.url)
assert loaded == FEED
loaded_meta = db.load_meta(FEED.meta.url)
assert loaded_meta == FEED.meta
def test_database_store_feed_dups():
@ -252,16 +252,18 @@ def test_database_store_update_meta():
assert db.load_all_meta()[0] == new_meta
def test_database_set_feed_status():
def test_database_update_feed_status():
db = database.Database(":memory:", random_slug())
db.ensure_database_schema()
db.store_feed(FEED)
assert db.load_all_meta()[0].status != feed.FEED_STATUS_UNSUBSCRIBED
db.set_feed_status(FEED.meta.url, feed.FEED_STATUS_UNSUBSCRIBED)
db.update_feed_status(
FEED.meta,
feed.FEED_STATUS_UNSUBSCRIBED,
)
# TODO: Ensure that the updated time is touched too.
assert db.load_all_meta()[0].status == feed.FEED_STATUS_UNSUBSCRIBED