diff --git a/README.md b/README.md index 975c925..a404c25 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,41 @@ # cry -A local-first, command-line feed reader. Kinda. +[![PyPI](https://img.shields.io/pypi/v/cry.svg)](https://pypi.org/project/cry/) +[![Changelog](https://img.shields.io/github/v/release/decarabas/cry?include_prereleases&label=changelog)](https://github.com/decarabas/cry/releases) +[![Tests](https://github.com/decarabas/cry/actions/workflows/test.yml/badge.svg)](https://github.com/decarabas/cry/actions/workflows/test.yml) +[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/decarabas/cry/blob/master/LICENSE) + +Command line feed reader + +## Installation + +Install this tool using `pip`: +```bash +pip install cry +``` +## Usage + +For help, run: +```bash +cry --help +``` +You can also use: +```bash +python -m cry --help +``` +## Development + +To contribute to this tool, first checkout the code. Then create a new virtual environment: +```bash +cd cry +python -m venv venv +source venv/bin/activate +``` +Now install the dependencies and test dependencies: +```bash +pip install -e '.[test]' +``` +To run the tests: +```bash +pytest +``` diff --git a/cry/cli.py b/cry/cli.py index d08e001..d7ea8e6 100644 --- a/cry/cli.py +++ b/cry/cli.py @@ -1,37 +1,15 @@ # https://simonwillison.net/2023/Sep/30/cli-tools-python/ import asyncio -import html -import http.server -import io -import logging - import click from . import feed from . import database -from . import opml - -LOG = logging.getLogger(__name__) @click.group() @click.version_option() -@click.option( - "-v", - "--verbose", - count=True, - help="Increase the verbosity of the output. This option can be specified multiple times.", -) -def cli(verbose): +def cli(): "Command line feed reader" - if verbose > 1: - level = logging.DEBUG - elif verbose > 0: - level = logging.INFO - else: - level = logging.WARN - - logging.basicConfig(level=level) @cli.command(name="subscribe") @@ -58,189 +36,3 @@ def subscribe(url): db.store_feed(f) click.echo(f"Subscribed to {meta.url}") - - -@cli.command(name="import") -@click.argument("opml_file", type=click.File("r", encoding="utf-8")) -def import_opml(opml_file): - "Import the specified OPML file." - - db = database.Database.local() - urls = opml.parse_opml(opml_file.read()) - metas = [feed.FeedMeta.from_url(url, db.origin) for url in urls] - - click.echo(f"Fetching {len(urls)} feeds ...") - results = asyncio.run(feed.fetch_many(metas)) - - subscribed = 0 - for index, result in enumerate(results): - d, meta = result - url = urls[index] - if d is None: - LOG.warn(f"Unable to fetch {url}, skipping...") - continue - - existing = db.load_feed(meta.url) - if existing is not None: - LOG.info(f"{url} already exists (as {meta.url})") - continue - - f = feed.Feed.from_parsed(d, meta) - db.store_feed(f) - subscribed = subscribed + 1 - - click.echo(f"Subscribed to {subscribed} new feeds") - - -@cli.command(name="refresh") -@click.argument("url", required=False, default=None) -def refresh(url): - """Refresh one or more feeds. - - If a URL is specified, refresh that URL. Otherwise, refresh all subscribed - feeds. - """ - - db = database.Database.local() - if url: - f = db.load_feed(url) - if f is None: - click.echo(f"Not subscribed to {url}") - return 1 - feeds = [f.meta] - else: - feeds = db.load_all_meta() - - click.echo(f"Refreshing {len(feeds)} feed(s)...") - results = asyncio.run(feed.fetch_many(feeds)) - - new_count = 0 - for d, meta in results: - if d is None: - # Nothing new. - db.update_meta(meta) - else: - # New items, possibly! - f = feed.Feed.from_parsed(d, meta) - new_count = new_count + db.store_feed(f) - - click.echo(f"Fetched {new_count} new entries.") - - -@cli.command(name="show") -@click.argument("pattern", required=False, default="") -@click.option( - "--count", - "-c", - type=int, - default=10, - show_default=True, - help="Show at most this many entries from each feed.", -) -def show(pattern, count): - """Show feeds and entries. - - If a pattern is supplied, then filter the feeds to urls or titles that - match the pattern. Otherwise, just show everything. - """ - - db = database.Database.local() - feeds = db.load_all(feed_limit=count, pattern=pattern or "") - - feeds.sort(key=feed.sort_key, reverse=True) - for f in feeds: - click.echo(f"{f.title}") - if len(f.entries) > 0: - for entry in f.entries: - click.echo(f" {entry.title}") - else: - click.echo(f" ") - click.echo() - - -@cli.command("list") -@click.argument("pattern", required=False, default="") -def list_feeds(pattern): - """List subscribed feeds. - - If a pattern is supplied, then filter the feeds to urls or titles that - match the pattern. Otherwise, just show everything. - """ - db = database.Database.local() - feeds = db.load_all(feed_limit=0, pattern=pattern) - - max_title = max(len(f.title) for f in feeds) - max_url = max(len(f.meta.url) for f in feeds) - - feeds.sort(key=lambda f: f.title) - - for f in feeds: - click.echo(f"{f.title:{max_title}} {f.meta.url:{max_url}}") - - -@cli.command("unsubscribe") -@click.argument("url") -def unsubscribe(url): - """Unsubscribe from the specified feed. - - (If you need to find the URL for the feed to unsubscribe from, use the - `list` command.) - """ - db = database.Database.local() - count = db.set_feed_status(url, feed.FEED_STATUS_UNSUBSCRIBED) - if count == 0: - click.echo(f"Not subscribed to feed {url}") - return 1 - - -@cli.command("serve") -def serve(): - class Handler(http.server.BaseHTTPRequestHandler): - def do_GET(self): - db = database.Database.local() - feeds = db.load_all(feed_limit=10) - del db - - feeds.sort(key=feed.sort_key, reverse=True) - - buffer = io.StringIO() - buffer.write( - """ - - - - Subscribed Feeds - -

Feeds

- """ - ) - for f in feeds: - feed_title = html.escape(f.title) - if len(f.entries) > 0: - ago = f" ({f.entries[0].time_ago()})" - else: - ago = "" - buffer.write(f'

{feed_title}{ago}

') - buffer.write(f"
") - if len(f.entries) > 0: - for entry in f.entries: - title = html.escape(entry.title) - buffer.write( - f'{title} ({entry.time_ago()}) ' - ) - else: - buffer.write("No entries...") - buffer.write(f"
") - buffer.flush() - text = buffer.getvalue() - response = text.encode("utf-8") - - self.send_response(200) - self.send_header("content-type", "text/html") - self.send_header("content-length", str(len(response))) - self.end_headers() - self.wfile.write(response) - - with http.server.HTTPServer(("", 8000), Handler) as server: - click.echo("Serving at http://127.0.0.1:8000/") - server.serve_forever() diff --git a/cry/database.py b/cry/database.py index c63b8f4..86b9797 100644 --- a/cry/database.py +++ b/cry/database.py @@ -3,7 +3,6 @@ import random import socket import sqlite3 import string -import time import typing import platformdirs @@ -34,17 +33,7 @@ SCHEMA_STATEMENTS = [ ON UPDATE CASCADE ON DELETE CASCADE ); - """, - # I went and changed the status enum to make ALIVE == 0 when I added the - # "unsubscribed" status. I should probably make these strings huh. """ - UPDATE feeds - SET status=CASE - WHEN status = 0 THEN 1 - WHEN status = 1 THEN 0 - ELSE status - END - """, ] @@ -102,24 +91,20 @@ class Database: return db def get_property(self, prop: str, default=None) -> typing.Any: - with self.db: - cursor = self.db.execute( - "SELECT value FROM properties WHERE name=?", (prop,) - ) - result = cursor.fetchone() - if result is None: - return default - return result[0] + cursor = self.db.execute("SELECT value FROM properties WHERE name=?", (prop,)) + result = cursor.fetchone() + if result is None: + return default + return result[0] def set_property(self, prop: str, value): - with self.db: - self.db.execute( - """ - INSERT INTO properties (name, value) VALUES (?, ?) - ON CONFLICT DO UPDATE SET value=excluded.value - """, - (prop, value), - ) + self.db.execute( + """ + INSERT INTO properties (name, value) VALUES (?, ?) + ON CONFLICT DO UPDATE SET value=excluded.value + """, + (prop, value), + ) def ensure_database_schema(self): with self.db: @@ -141,194 +126,60 @@ class Database: self.set_property("version", len(SCHEMA_STATEMENTS)) self.set_property("origin", self.origin) - def load_all_meta(self) -> list[feed.FeedMeta]: - with self.db: - cursor = self.db.execute( - """ - SELECT - url, - last_fetched_ts, - retry_after_ts, - status, - etag, - modified - FROM feeds - """ - ) - rows = cursor.fetchall() - return [ - feed.FeedMeta( - url=url, - last_fetched_ts=int(last_fetched_ts), - retry_after_ts=int(retry_after_ts), - status=int(status), - etag=etag, - modified=modified, - origin=self.origin, - ) - for url, last_fetched_ts, retry_after_ts, status, etag, modified in rows - ] - - def load_all(self, feed_limit: int = 20, pattern: str = "") -> list[feed.Feed]: - with self.db: - pattern = ( - pattern.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_") - ) - sql_pattern = f"%{pattern}%" - cursor = self.db.execute( - """ - SELECT - url, - last_fetched_ts, - retry_after_ts, - status, - etag, - modified, - title, - link - FROM feeds - WHERE (title LIKE :sql_pattern ESCAPE '\\' - OR link LIKE :sql_pattern ESCAPE '\\') - AND status != 2 -- UNSUBSCRIBED - """, - {"sql_pattern": sql_pattern}, - ) - rows = cursor.fetchall() - - almost_feeds = [] - for row in rows: - ( - url, - last_fetched_ts, - retry_after_ts, - status, - etag, - modified, - title, - link, - ) = row - meta = feed.FeedMeta( - url=url, - last_fetched_ts=last_fetched_ts, - retry_after_ts=retry_after_ts, - status=status, - etag=etag, - modified=modified, - origin=self.origin, - ) - almost_feeds.append((meta, title, link)) - - feeds = [] - for meta, title, link in almost_feeds: - if feed_limit > 0: - cursor = self.db.execute( - """ - SELECT - id, - inserted_at, - title, - link - FROM entries - WHERE feed_url=? - ORDER BY inserted_at DESC - LIMIT ? - """, - [meta.url, feed_limit], - ) - rows = cursor.fetchall() - else: - rows = [] - - entries = [ - feed.Entry(id=id, inserted_at=inserted_at, title=title, link=link) - for id, inserted_at, title, link in rows - ] - f = feed.Feed(meta=meta, title=title, link=link, entries=entries) - feeds.append(f) - - return feeds - def load_feed(self, url: str) -> feed.Feed | None: - with self.db: - cursor = self.db.execute( - """ - SELECT - last_fetched_ts, - retry_after_ts, - status, - etag, - modified, - title, - link - FROM feeds - WHERE url=? - """, - [url], - ) + cursor = self.db.execute( + """ + SELECT + last_fetched_ts, + retry_after_ts, + status, + etag, + modified, + title, + link + FROM feeds + WHERE url=? + """, + [url], + ) - row = cursor.fetchone() - if row is None: - return None + row = cursor.fetchone() + if row is None: + return None - last_fetched_ts, retry_after_ts, status, etag, modified, title, link = row - meta = feed.FeedMeta( - url=url, - last_fetched_ts=last_fetched_ts, - retry_after_ts=retry_after_ts, - status=status, - etag=etag, - modified=modified, - origin=self.origin, - ) + last_fetched_ts, retry_after_ts, status, etag, modified, title, link = row + meta = feed.FeedMeta( + url=url, + last_fetched_ts=last_fetched_ts, + retry_after_ts=retry_after_ts, + status=status, + etag=etag, + modified=modified, + origin=self.origin, + ) - cursor = self.db.execute( - """ - SELECT - id, - inserted_at, - title, - link - FROM entries - WHERE feed_url=? - """, - [url], - ) + cursor = self.db.execute( + """ + SELECT + id, + inserted_at, + title, + link + FROM entries + WHERE feed_url=? + """, + [url], + ) - rows = cursor.fetchall() - entries = [ - feed.Entry(id=id, inserted_at=inserted_at, title=title, link=link) - for id, inserted_at, title, link in rows - ] + rows = cursor.fetchall() + entries = [ + feed.Entry(id=id, inserted_at=inserted_at, title=title, link=link) + for id, inserted_at, title, link in rows + ] return feed.Feed(meta=meta, title=title, link=link, entries=entries) - def update_meta(self, f: feed.FeedMeta): - with self.db: - self.db.execute( - """ - UPDATE feeds SET - last_fetched_ts=?, - retry_after_ts=?, - status=?, - etag=?, - modified=? - WHERE url=? - """, - [ - f.last_fetched_ts, - f.retry_after_ts, - f.status, - f.etag, - f.modified, - f.url, - ], - ) - - def store_feed(self, f: feed.Feed) -> int: - """Store the given feed in the database. - - Returns the number of new entries inserted. - """ + def store_feed(self, f: feed.Feed): with self.db: self.db.execute( """ @@ -364,11 +215,6 @@ class Database: ], ) - cursor = self.db.execute( - "SELECT COUNT (*) FROM entries WHERE feed_url=?", [f.meta.url] - ) - start_count = cursor.fetchone()[0] - self.db.executemany( """ INSERT INTO entries ( @@ -402,22 +248,3 @@ class Database: """, [(e.id, e.inserted_at, f.meta.url, e.title, e.link) for e in f.entries], ) - - cursor = self.db.execute( - "SELECT COUNT (*) FROM entries WHERE feed_url=?", [f.meta.url] - ) - end_count = cursor.fetchone()[0] - return end_count - start_count - - def set_feed_status(self, url: str, status: int) -> int: - with self.db: - cursor = self.db.execute( - """ - UPDATE feeds - SET status = ?, - last_fetched_ts = ? - WHERE url = ? - """, - [status, int(time.time()), url], - ) - return cursor.rowcount diff --git a/cry/feed.py b/cry/feed.py index 479857d..3539543 100644 --- a/cry/feed.py +++ b/cry/feed.py @@ -18,9 +18,9 @@ import requests.structures LOG = logging.getLogger(__name__) -FEED_STATUS_ALIVE = 0 -FEED_STATUS_DEAD = 1 -FEED_STATUS_UNSUBSCRIBED = 2 +FEED_STATUS_DEAD = 0 +FEED_STATUS_ALIVE = 1 +FEED_STATUS_MISSING = 2 # TODO: Consider configuration here. http = requests.Session() @@ -143,8 +143,7 @@ async def fetch_feed( Regardless, the new FeedMeta has the latest state of the feed. """ - if feed.status != FEED_STATUS_ALIVE: - LOG.info(f"{feed.url} is dead or unsubscribed") + if feed.status == FEED_STATUS_DEAD: return (None, feed) if time.time() < feed.retry_after_ts: @@ -199,7 +198,6 @@ async def fetch_feed( # permanently redirected URL, not just whatever the last thing # is... e.g. imagine a permanent followed by a temporary # redirect, then what? - LOG.info(f"{feed.url} permanently redirected to {response.url}") assert response.url is not None feed = dataclasses.replace(feed, url=response.url) @@ -241,14 +239,6 @@ async def fetch_feed( return (parsed, feed) -async def fetch_many( - metas: list[FeedMeta], -) -> list[typing.Tuple[feedparser.FeedParserDict | None, FeedMeta]]: - async with asyncio.TaskGroup() as group: - tasks = [group.create_task(fetch_feed(m)) for m in metas] - return [t.result() for t in tasks] - - @dataclasses.dataclass(frozen=True) class Entry: id: str @@ -294,27 +284,6 @@ class Entry: title = clean_text(str(title)) return Entry(id=id, inserted_at=insert_time, title=title, link=link) - def time_ago(self) -> str: - inserted = self.inserted_at / 1000 - seconds = int(time.time()) - inserted - if seconds <= 90: - return f"{seconds}s" - minutes = int(seconds / 60) - if minutes <= 90: - return f"{minutes}m" - hours = int(minutes / 60) - if hours < 24: - return f"{hours}h" - days = int(hours / 24) - if days <= 7: - return f"{days}d" - weeks = int(days / 7) - if weeks < 52: - return f"{weeks}w" - - years = int(weeks / 52) - return f"{years}y" - @dataclasses.dataclass(frozen=True) class Feed: @@ -423,10 +392,3 @@ def merge_feeds(a: Feed, b: Feed) -> Feed: link=source_feed.link, entries=entries, ) - - -def sort_key(f: Feed) -> int: - """A sort key for sorting feeds by recency.""" - if len(f.entries) > 0: - return max(e.inserted_at for e in f.entries) - return -1