diff --git a/README.md b/README.md index a404c25..975c925 100644 --- a/README.md +++ b/README.md @@ -1,41 +1,3 @@ # cry -[![PyPI](https://img.shields.io/pypi/v/cry.svg)](https://pypi.org/project/cry/) -[![Changelog](https://img.shields.io/github/v/release/decarabas/cry?include_prereleases&label=changelog)](https://github.com/decarabas/cry/releases) -[![Tests](https://github.com/decarabas/cry/actions/workflows/test.yml/badge.svg)](https://github.com/decarabas/cry/actions/workflows/test.yml) -[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/decarabas/cry/blob/master/LICENSE) - -Command line feed reader - -## Installation - -Install this tool using `pip`: -```bash -pip install cry -``` -## Usage - -For help, run: -```bash -cry --help -``` -You can also use: -```bash -python -m cry --help -``` -## Development - -To contribute to this tool, first checkout the code. Then create a new virtual environment: -```bash -cd cry -python -m venv venv -source venv/bin/activate -``` -Now install the dependencies and test dependencies: -```bash -pip install -e '.[test]' -``` -To run the tests: -```bash -pytest -``` +A local-first, command-line feed reader. Kinda. diff --git a/cry/cli.py b/cry/cli.py index d7ea8e6..d08e001 100644 --- a/cry/cli.py +++ b/cry/cli.py @@ -1,15 +1,37 @@ # https://simonwillison.net/2023/Sep/30/cli-tools-python/ import asyncio +import html +import http.server +import io +import logging + import click from . import feed from . import database +from . import opml + +LOG = logging.getLogger(__name__) @click.group() @click.version_option() -def cli(): +@click.option( + "-v", + "--verbose", + count=True, + help="Increase the verbosity of the output. This option can be specified multiple times.", +) +def cli(verbose): "Command line feed reader" + if verbose > 1: + level = logging.DEBUG + elif verbose > 0: + level = logging.INFO + else: + level = logging.WARN + + logging.basicConfig(level=level) @cli.command(name="subscribe") @@ -36,3 +58,189 @@ def subscribe(url): db.store_feed(f) click.echo(f"Subscribed to {meta.url}") + + +@cli.command(name="import") +@click.argument("opml_file", type=click.File("r", encoding="utf-8")) +def import_opml(opml_file): + "Import the specified OPML file." + + db = database.Database.local() + urls = opml.parse_opml(opml_file.read()) + metas = [feed.FeedMeta.from_url(url, db.origin) for url in urls] + + click.echo(f"Fetching {len(urls)} feeds ...") + results = asyncio.run(feed.fetch_many(metas)) + + subscribed = 0 + for index, result in enumerate(results): + d, meta = result + url = urls[index] + if d is None: + LOG.warn(f"Unable to fetch {url}, skipping...") + continue + + existing = db.load_feed(meta.url) + if existing is not None: + LOG.info(f"{url} already exists (as {meta.url})") + continue + + f = feed.Feed.from_parsed(d, meta) + db.store_feed(f) + subscribed = subscribed + 1 + + click.echo(f"Subscribed to {subscribed} new feeds") + + +@cli.command(name="refresh") +@click.argument("url", required=False, default=None) +def refresh(url): + """Refresh one or more feeds. + + If a URL is specified, refresh that URL. Otherwise, refresh all subscribed + feeds. + """ + + db = database.Database.local() + if url: + f = db.load_feed(url) + if f is None: + click.echo(f"Not subscribed to {url}") + return 1 + feeds = [f.meta] + else: + feeds = db.load_all_meta() + + click.echo(f"Refreshing {len(feeds)} feed(s)...") + results = asyncio.run(feed.fetch_many(feeds)) + + new_count = 0 + for d, meta in results: + if d is None: + # Nothing new. + db.update_meta(meta) + else: + # New items, possibly! + f = feed.Feed.from_parsed(d, meta) + new_count = new_count + db.store_feed(f) + + click.echo(f"Fetched {new_count} new entries.") + + +@cli.command(name="show") +@click.argument("pattern", required=False, default="") +@click.option( + "--count", + "-c", + type=int, + default=10, + show_default=True, + help="Show at most this many entries from each feed.", +) +def show(pattern, count): + """Show feeds and entries. + + If a pattern is supplied, then filter the feeds to urls or titles that + match the pattern. Otherwise, just show everything. + """ + + db = database.Database.local() + feeds = db.load_all(feed_limit=count, pattern=pattern or "") + + feeds.sort(key=feed.sort_key, reverse=True) + for f in feeds: + click.echo(f"{f.title}") + if len(f.entries) > 0: + for entry in f.entries: + click.echo(f" {entry.title}") + else: + click.echo(f" ") + click.echo() + + +@cli.command("list") +@click.argument("pattern", required=False, default="") +def list_feeds(pattern): + """List subscribed feeds. + + If a pattern is supplied, then filter the feeds to urls or titles that + match the pattern. Otherwise, just show everything. + """ + db = database.Database.local() + feeds = db.load_all(feed_limit=0, pattern=pattern) + + max_title = max(len(f.title) for f in feeds) + max_url = max(len(f.meta.url) for f in feeds) + + feeds.sort(key=lambda f: f.title) + + for f in feeds: + click.echo(f"{f.title:{max_title}} {f.meta.url:{max_url}}") + + +@cli.command("unsubscribe") +@click.argument("url") +def unsubscribe(url): + """Unsubscribe from the specified feed. + + (If you need to find the URL for the feed to unsubscribe from, use the + `list` command.) + """ + db = database.Database.local() + count = db.set_feed_status(url, feed.FEED_STATUS_UNSUBSCRIBED) + if count == 0: + click.echo(f"Not subscribed to feed {url}") + return 1 + + +@cli.command("serve") +def serve(): + class Handler(http.server.BaseHTTPRequestHandler): + def do_GET(self): + db = database.Database.local() + feeds = db.load_all(feed_limit=10) + del db + + feeds.sort(key=feed.sort_key, reverse=True) + + buffer = io.StringIO() + buffer.write( + """ + + + + Subscribed Feeds + +

Feeds

+ """ + ) + for f in feeds: + feed_title = html.escape(f.title) + if len(f.entries) > 0: + ago = f" ({f.entries[0].time_ago()})" + else: + ago = "" + buffer.write(f'

{feed_title}{ago}

') + buffer.write(f"
") + if len(f.entries) > 0: + for entry in f.entries: + title = html.escape(entry.title) + buffer.write( + f'{title} ({entry.time_ago()}) ' + ) + else: + buffer.write("No entries...") + buffer.write(f"
") + buffer.flush() + text = buffer.getvalue() + response = text.encode("utf-8") + + self.send_response(200) + self.send_header("content-type", "text/html") + self.send_header("content-length", str(len(response))) + self.end_headers() + self.wfile.write(response) + + with http.server.HTTPServer(("", 8000), Handler) as server: + click.echo("Serving at http://127.0.0.1:8000/") + server.serve_forever() diff --git a/cry/database.py b/cry/database.py index 86b9797..c63b8f4 100644 --- a/cry/database.py +++ b/cry/database.py @@ -3,6 +3,7 @@ import random import socket import sqlite3 import string +import time import typing import platformdirs @@ -33,7 +34,17 @@ SCHEMA_STATEMENTS = [ ON UPDATE CASCADE ON DELETE CASCADE ); + """, + # I went and changed the status enum to make ALIVE == 0 when I added the + # "unsubscribed" status. I should probably make these strings huh. """ + UPDATE feeds + SET status=CASE + WHEN status = 0 THEN 1 + WHEN status = 1 THEN 0 + ELSE status + END + """, ] @@ -91,20 +102,24 @@ class Database: return db def get_property(self, prop: str, default=None) -> typing.Any: - cursor = self.db.execute("SELECT value FROM properties WHERE name=?", (prop,)) - result = cursor.fetchone() - if result is None: - return default - return result[0] + with self.db: + cursor = self.db.execute( + "SELECT value FROM properties WHERE name=?", (prop,) + ) + result = cursor.fetchone() + if result is None: + return default + return result[0] def set_property(self, prop: str, value): - self.db.execute( - """ - INSERT INTO properties (name, value) VALUES (?, ?) - ON CONFLICT DO UPDATE SET value=excluded.value - """, - (prop, value), - ) + with self.db: + self.db.execute( + """ + INSERT INTO properties (name, value) VALUES (?, ?) + ON CONFLICT DO UPDATE SET value=excluded.value + """, + (prop, value), + ) def ensure_database_schema(self): with self.db: @@ -126,60 +141,194 @@ class Database: self.set_property("version", len(SCHEMA_STATEMENTS)) self.set_property("origin", self.origin) + def load_all_meta(self) -> list[feed.FeedMeta]: + with self.db: + cursor = self.db.execute( + """ + SELECT + url, + last_fetched_ts, + retry_after_ts, + status, + etag, + modified + FROM feeds + """ + ) + rows = cursor.fetchall() + return [ + feed.FeedMeta( + url=url, + last_fetched_ts=int(last_fetched_ts), + retry_after_ts=int(retry_after_ts), + status=int(status), + etag=etag, + modified=modified, + origin=self.origin, + ) + for url, last_fetched_ts, retry_after_ts, status, etag, modified in rows + ] + + def load_all(self, feed_limit: int = 20, pattern: str = "") -> list[feed.Feed]: + with self.db: + pattern = ( + pattern.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_") + ) + sql_pattern = f"%{pattern}%" + cursor = self.db.execute( + """ + SELECT + url, + last_fetched_ts, + retry_after_ts, + status, + etag, + modified, + title, + link + FROM feeds + WHERE (title LIKE :sql_pattern ESCAPE '\\' + OR link LIKE :sql_pattern ESCAPE '\\') + AND status != 2 -- UNSUBSCRIBED + """, + {"sql_pattern": sql_pattern}, + ) + rows = cursor.fetchall() + + almost_feeds = [] + for row in rows: + ( + url, + last_fetched_ts, + retry_after_ts, + status, + etag, + modified, + title, + link, + ) = row + meta = feed.FeedMeta( + url=url, + last_fetched_ts=last_fetched_ts, + retry_after_ts=retry_after_ts, + status=status, + etag=etag, + modified=modified, + origin=self.origin, + ) + almost_feeds.append((meta, title, link)) + + feeds = [] + for meta, title, link in almost_feeds: + if feed_limit > 0: + cursor = self.db.execute( + """ + SELECT + id, + inserted_at, + title, + link + FROM entries + WHERE feed_url=? + ORDER BY inserted_at DESC + LIMIT ? + """, + [meta.url, feed_limit], + ) + rows = cursor.fetchall() + else: + rows = [] + + entries = [ + feed.Entry(id=id, inserted_at=inserted_at, title=title, link=link) + for id, inserted_at, title, link in rows + ] + f = feed.Feed(meta=meta, title=title, link=link, entries=entries) + feeds.append(f) + + return feeds + def load_feed(self, url: str) -> feed.Feed | None: - cursor = self.db.execute( - """ - SELECT - last_fetched_ts, - retry_after_ts, - status, - etag, - modified, - title, - link - FROM feeds - WHERE url=? - """, - [url], - ) + with self.db: + cursor = self.db.execute( + """ + SELECT + last_fetched_ts, + retry_after_ts, + status, + etag, + modified, + title, + link + FROM feeds + WHERE url=? + """, + [url], + ) - row = cursor.fetchone() - if row is None: - return None + row = cursor.fetchone() + if row is None: + return None - last_fetched_ts, retry_after_ts, status, etag, modified, title, link = row - meta = feed.FeedMeta( - url=url, - last_fetched_ts=last_fetched_ts, - retry_after_ts=retry_after_ts, - status=status, - etag=etag, - modified=modified, - origin=self.origin, - ) + last_fetched_ts, retry_after_ts, status, etag, modified, title, link = row + meta = feed.FeedMeta( + url=url, + last_fetched_ts=last_fetched_ts, + retry_after_ts=retry_after_ts, + status=status, + etag=etag, + modified=modified, + origin=self.origin, + ) - cursor = self.db.execute( - """ - SELECT - id, - inserted_at, - title, - link - FROM entries - WHERE feed_url=? - """, - [url], - ) + cursor = self.db.execute( + """ + SELECT + id, + inserted_at, + title, + link + FROM entries + WHERE feed_url=? + """, + [url], + ) - rows = cursor.fetchall() - entries = [ - feed.Entry(id=id, inserted_at=inserted_at, title=title, link=link) - for id, inserted_at, title, link in rows - ] + rows = cursor.fetchall() + entries = [ + feed.Entry(id=id, inserted_at=inserted_at, title=title, link=link) + for id, inserted_at, title, link in rows + ] return feed.Feed(meta=meta, title=title, link=link, entries=entries) - def store_feed(self, f: feed.Feed): + def update_meta(self, f: feed.FeedMeta): + with self.db: + self.db.execute( + """ + UPDATE feeds SET + last_fetched_ts=?, + retry_after_ts=?, + status=?, + etag=?, + modified=? + WHERE url=? + """, + [ + f.last_fetched_ts, + f.retry_after_ts, + f.status, + f.etag, + f.modified, + f.url, + ], + ) + + def store_feed(self, f: feed.Feed) -> int: + """Store the given feed in the database. + + Returns the number of new entries inserted. + """ with self.db: self.db.execute( """ @@ -215,6 +364,11 @@ class Database: ], ) + cursor = self.db.execute( + "SELECT COUNT (*) FROM entries WHERE feed_url=?", [f.meta.url] + ) + start_count = cursor.fetchone()[0] + self.db.executemany( """ INSERT INTO entries ( @@ -248,3 +402,22 @@ class Database: """, [(e.id, e.inserted_at, f.meta.url, e.title, e.link) for e in f.entries], ) + + cursor = self.db.execute( + "SELECT COUNT (*) FROM entries WHERE feed_url=?", [f.meta.url] + ) + end_count = cursor.fetchone()[0] + return end_count - start_count + + def set_feed_status(self, url: str, status: int) -> int: + with self.db: + cursor = self.db.execute( + """ + UPDATE feeds + SET status = ?, + last_fetched_ts = ? + WHERE url = ? + """, + [status, int(time.time()), url], + ) + return cursor.rowcount diff --git a/cry/feed.py b/cry/feed.py index 3539543..479857d 100644 --- a/cry/feed.py +++ b/cry/feed.py @@ -18,9 +18,9 @@ import requests.structures LOG = logging.getLogger(__name__) -FEED_STATUS_DEAD = 0 -FEED_STATUS_ALIVE = 1 -FEED_STATUS_MISSING = 2 +FEED_STATUS_ALIVE = 0 +FEED_STATUS_DEAD = 1 +FEED_STATUS_UNSUBSCRIBED = 2 # TODO: Consider configuration here. http = requests.Session() @@ -143,7 +143,8 @@ async def fetch_feed( Regardless, the new FeedMeta has the latest state of the feed. """ - if feed.status == FEED_STATUS_DEAD: + if feed.status != FEED_STATUS_ALIVE: + LOG.info(f"{feed.url} is dead or unsubscribed") return (None, feed) if time.time() < feed.retry_after_ts: @@ -198,6 +199,7 @@ async def fetch_feed( # permanently redirected URL, not just whatever the last thing # is... e.g. imagine a permanent followed by a temporary # redirect, then what? + LOG.info(f"{feed.url} permanently redirected to {response.url}") assert response.url is not None feed = dataclasses.replace(feed, url=response.url) @@ -239,6 +241,14 @@ async def fetch_feed( return (parsed, feed) +async def fetch_many( + metas: list[FeedMeta], +) -> list[typing.Tuple[feedparser.FeedParserDict | None, FeedMeta]]: + async with asyncio.TaskGroup() as group: + tasks = [group.create_task(fetch_feed(m)) for m in metas] + return [t.result() for t in tasks] + + @dataclasses.dataclass(frozen=True) class Entry: id: str @@ -284,6 +294,27 @@ class Entry: title = clean_text(str(title)) return Entry(id=id, inserted_at=insert_time, title=title, link=link) + def time_ago(self) -> str: + inserted = self.inserted_at / 1000 + seconds = int(time.time()) - inserted + if seconds <= 90: + return f"{seconds}s" + minutes = int(seconds / 60) + if minutes <= 90: + return f"{minutes}m" + hours = int(minutes / 60) + if hours < 24: + return f"{hours}h" + days = int(hours / 24) + if days <= 7: + return f"{days}d" + weeks = int(days / 7) + if weeks < 52: + return f"{weeks}w" + + years = int(weeks / 52) + return f"{years}y" + @dataclasses.dataclass(frozen=True) class Feed: @@ -392,3 +423,10 @@ def merge_feeds(a: Feed, b: Feed) -> Feed: link=source_feed.link, entries=entries, ) + + +def sort_key(f: Feed) -> int: + """A sort key for sorting feeds by recency.""" + if len(f.entries) > 0: + return max(e.inserted_at for e in f.entries) + return -1