Compare commits

...

10 commits

Author SHA1 Message Date
21aeedd1d1 Read 2024-07-11 10:34:40 +09:00
6b02fb66bc Deal with locked databases, tweaks 2024-07-11 10:32:48 +09:00
fb9bfe0084 The most basic HTML UI 2024-07-11 10:18:40 +09:00
02232c9c3e Unsubscribe 2024-07-11 07:35:32 +09:00
c06f3ef114 Count, show in the right order 2024-07-11 07:09:46 +09:00
34c0b6dd7d Filter show by pattern, if requested 2024-07-10 09:37:50 +09:00
26353a2779 Show 2024-07-10 09:09:17 +09:00
cbeadbd302 Import OPML 2024-07-10 08:21:40 +09:00
491df5f942 Logging 2024-07-10 08:07:57 +09:00
a3858f6395 Refresh? 2024-07-10 08:04:33 +09:00
4 changed files with 483 additions and 102 deletions

View file

@ -1,41 +1,3 @@
# cry
[![PyPI](https://img.shields.io/pypi/v/cry.svg)](https://pypi.org/project/cry/)
[![Changelog](https://img.shields.io/github/v/release/decarabas/cry?include_prereleases&label=changelog)](https://github.com/decarabas/cry/releases)
[![Tests](https://github.com/decarabas/cry/actions/workflows/test.yml/badge.svg)](https://github.com/decarabas/cry/actions/workflows/test.yml)
[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/decarabas/cry/blob/master/LICENSE)
Command line feed reader
## Installation
Install this tool using `pip`:
```bash
pip install cry
```
## Usage
For help, run:
```bash
cry --help
```
You can also use:
```bash
python -m cry --help
```
## Development
To contribute to this tool, first checkout the code. Then create a new virtual environment:
```bash
cd cry
python -m venv venv
source venv/bin/activate
```
Now install the dependencies and test dependencies:
```bash
pip install -e '.[test]'
```
To run the tests:
```bash
pytest
```
A local-first, command-line feed reader. Kinda.

View file

@ -1,15 +1,37 @@
# https://simonwillison.net/2023/Sep/30/cli-tools-python/
import asyncio
import html
import http.server
import io
import logging
import click
from . import feed
from . import database
from . import opml
LOG = logging.getLogger(__name__)
@click.group()
@click.version_option()
def cli():
@click.option(
"-v",
"--verbose",
count=True,
help="Increase the verbosity of the output. This option can be specified multiple times.",
)
def cli(verbose):
"Command line feed reader"
if verbose > 1:
level = logging.DEBUG
elif verbose > 0:
level = logging.INFO
else:
level = logging.WARN
logging.basicConfig(level=level)
@cli.command(name="subscribe")
@ -36,3 +58,189 @@ def subscribe(url):
db.store_feed(f)
click.echo(f"Subscribed to {meta.url}")
@cli.command(name="import")
@click.argument("opml_file", type=click.File("r", encoding="utf-8"))
def import_opml(opml_file):
"Import the specified OPML file."
db = database.Database.local()
urls = opml.parse_opml(opml_file.read())
metas = [feed.FeedMeta.from_url(url, db.origin) for url in urls]
click.echo(f"Fetching {len(urls)} feeds ...")
results = asyncio.run(feed.fetch_many(metas))
subscribed = 0
for index, result in enumerate(results):
d, meta = result
url = urls[index]
if d is None:
LOG.warn(f"Unable to fetch {url}, skipping...")
continue
existing = db.load_feed(meta.url)
if existing is not None:
LOG.info(f"{url} already exists (as {meta.url})")
continue
f = feed.Feed.from_parsed(d, meta)
db.store_feed(f)
subscribed = subscribed + 1
click.echo(f"Subscribed to {subscribed} new feeds")
@cli.command(name="refresh")
@click.argument("url", required=False, default=None)
def refresh(url):
"""Refresh one or more feeds.
If a URL is specified, refresh that URL. Otherwise, refresh all subscribed
feeds.
"""
db = database.Database.local()
if url:
f = db.load_feed(url)
if f is None:
click.echo(f"Not subscribed to {url}")
return 1
feeds = [f.meta]
else:
feeds = db.load_all_meta()
click.echo(f"Refreshing {len(feeds)} feed(s)...")
results = asyncio.run(feed.fetch_many(feeds))
new_count = 0
for d, meta in results:
if d is None:
# Nothing new.
db.update_meta(meta)
else:
# New items, possibly!
f = feed.Feed.from_parsed(d, meta)
new_count = new_count + db.store_feed(f)
click.echo(f"Fetched {new_count} new entries.")
@cli.command(name="show")
@click.argument("pattern", required=False, default="")
@click.option(
"--count",
"-c",
type=int,
default=10,
show_default=True,
help="Show at most this many entries from each feed.",
)
def show(pattern, count):
"""Show feeds and entries.
If a pattern is supplied, then filter the feeds to urls or titles that
match the pattern. Otherwise, just show everything.
"""
db = database.Database.local()
feeds = db.load_all(feed_limit=count, pattern=pattern or "")
feeds.sort(key=feed.sort_key, reverse=True)
for f in feeds:
click.echo(f"{f.title}")
if len(f.entries) > 0:
for entry in f.entries:
click.echo(f" {entry.title}")
else:
click.echo(f" <No Entries>")
click.echo()
@cli.command("list")
@click.argument("pattern", required=False, default="")
def list_feeds(pattern):
"""List subscribed feeds.
If a pattern is supplied, then filter the feeds to urls or titles that
match the pattern. Otherwise, just show everything.
"""
db = database.Database.local()
feeds = db.load_all(feed_limit=0, pattern=pattern)
max_title = max(len(f.title) for f in feeds)
max_url = max(len(f.meta.url) for f in feeds)
feeds.sort(key=lambda f: f.title)
for f in feeds:
click.echo(f"{f.title:{max_title}} {f.meta.url:{max_url}}")
@cli.command("unsubscribe")
@click.argument("url")
def unsubscribe(url):
"""Unsubscribe from the specified feed.
(If you need to find the URL for the feed to unsubscribe from, use the
`list` command.)
"""
db = database.Database.local()
count = db.set_feed_status(url, feed.FEED_STATUS_UNSUBSCRIBED)
if count == 0:
click.echo(f"Not subscribed to feed {url}")
return 1
@cli.command("serve")
def serve():
class Handler(http.server.BaseHTTPRequestHandler):
def do_GET(self):
db = database.Database.local()
feeds = db.load_all(feed_limit=10)
del db
feeds.sort(key=feed.sort_key, reverse=True)
buffer = io.StringIO()
buffer.write(
"""
<!doctype html>
<head>
<meta charset="utf8">
<title>Subscribed Feeds</title>
</head>
<h1>Feeds</h1>
"""
)
for f in feeds:
feed_title = html.escape(f.title)
if len(f.entries) > 0:
ago = f" ({f.entries[0].time_ago()})"
else:
ago = ""
buffer.write(f'<h2><a href="{f.link}">{feed_title}</a>{ago}</h2>')
buffer.write(f"<div>")
if len(f.entries) > 0:
for entry in f.entries:
title = html.escape(entry.title)
buffer.write(
f'<span class="entry">&bull; <a href="{entry.link}">{title}</a> ({entry.time_ago()})</span> '
)
else:
buffer.write("<i>No entries...</i>")
buffer.write(f"</div>")
buffer.flush()
text = buffer.getvalue()
response = text.encode("utf-8")
self.send_response(200)
self.send_header("content-type", "text/html")
self.send_header("content-length", str(len(response)))
self.end_headers()
self.wfile.write(response)
with http.server.HTTPServer(("", 8000), Handler) as server:
click.echo("Serving at http://127.0.0.1:8000/")
server.serve_forever()

View file

@ -3,6 +3,7 @@ import random
import socket
import sqlite3
import string
import time
import typing
import platformdirs
@ -33,7 +34,17 @@ SCHEMA_STATEMENTS = [
ON UPDATE CASCADE
ON DELETE CASCADE
);
""",
# I went and changed the status enum to make ALIVE == 0 when I added the
# "unsubscribed" status. I should probably make these strings huh.
"""
UPDATE feeds
SET status=CASE
WHEN status = 0 THEN 1
WHEN status = 1 THEN 0
ELSE status
END
""",
]
@ -91,20 +102,24 @@ class Database:
return db
def get_property(self, prop: str, default=None) -> typing.Any:
cursor = self.db.execute("SELECT value FROM properties WHERE name=?", (prop,))
result = cursor.fetchone()
if result is None:
return default
return result[0]
with self.db:
cursor = self.db.execute(
"SELECT value FROM properties WHERE name=?", (prop,)
)
result = cursor.fetchone()
if result is None:
return default
return result[0]
def set_property(self, prop: str, value):
self.db.execute(
"""
INSERT INTO properties (name, value) VALUES (?, ?)
ON CONFLICT DO UPDATE SET value=excluded.value
""",
(prop, value),
)
with self.db:
self.db.execute(
"""
INSERT INTO properties (name, value) VALUES (?, ?)
ON CONFLICT DO UPDATE SET value=excluded.value
""",
(prop, value),
)
def ensure_database_schema(self):
with self.db:
@ -126,60 +141,194 @@ class Database:
self.set_property("version", len(SCHEMA_STATEMENTS))
self.set_property("origin", self.origin)
def load_all_meta(self) -> list[feed.FeedMeta]:
with self.db:
cursor = self.db.execute(
"""
SELECT
url,
last_fetched_ts,
retry_after_ts,
status,
etag,
modified
FROM feeds
"""
)
rows = cursor.fetchall()
return [
feed.FeedMeta(
url=url,
last_fetched_ts=int(last_fetched_ts),
retry_after_ts=int(retry_after_ts),
status=int(status),
etag=etag,
modified=modified,
origin=self.origin,
)
for url, last_fetched_ts, retry_after_ts, status, etag, modified in rows
]
def load_all(self, feed_limit: int = 20, pattern: str = "") -> list[feed.Feed]:
with self.db:
pattern = (
pattern.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
)
sql_pattern = f"%{pattern}%"
cursor = self.db.execute(
"""
SELECT
url,
last_fetched_ts,
retry_after_ts,
status,
etag,
modified,
title,
link
FROM feeds
WHERE (title LIKE :sql_pattern ESCAPE '\\'
OR link LIKE :sql_pattern ESCAPE '\\')
AND status != 2 -- UNSUBSCRIBED
""",
{"sql_pattern": sql_pattern},
)
rows = cursor.fetchall()
almost_feeds = []
for row in rows:
(
url,
last_fetched_ts,
retry_after_ts,
status,
etag,
modified,
title,
link,
) = row
meta = feed.FeedMeta(
url=url,
last_fetched_ts=last_fetched_ts,
retry_after_ts=retry_after_ts,
status=status,
etag=etag,
modified=modified,
origin=self.origin,
)
almost_feeds.append((meta, title, link))
feeds = []
for meta, title, link in almost_feeds:
if feed_limit > 0:
cursor = self.db.execute(
"""
SELECT
id,
inserted_at,
title,
link
FROM entries
WHERE feed_url=?
ORDER BY inserted_at DESC
LIMIT ?
""",
[meta.url, feed_limit],
)
rows = cursor.fetchall()
else:
rows = []
entries = [
feed.Entry(id=id, inserted_at=inserted_at, title=title, link=link)
for id, inserted_at, title, link in rows
]
f = feed.Feed(meta=meta, title=title, link=link, entries=entries)
feeds.append(f)
return feeds
def load_feed(self, url: str) -> feed.Feed | None:
cursor = self.db.execute(
"""
SELECT
last_fetched_ts,
retry_after_ts,
status,
etag,
modified,
title,
link
FROM feeds
WHERE url=?
""",
[url],
)
with self.db:
cursor = self.db.execute(
"""
SELECT
last_fetched_ts,
retry_after_ts,
status,
etag,
modified,
title,
link
FROM feeds
WHERE url=?
""",
[url],
)
row = cursor.fetchone()
if row is None:
return None
row = cursor.fetchone()
if row is None:
return None
last_fetched_ts, retry_after_ts, status, etag, modified, title, link = row
meta = feed.FeedMeta(
url=url,
last_fetched_ts=last_fetched_ts,
retry_after_ts=retry_after_ts,
status=status,
etag=etag,
modified=modified,
origin=self.origin,
)
last_fetched_ts, retry_after_ts, status, etag, modified, title, link = row
meta = feed.FeedMeta(
url=url,
last_fetched_ts=last_fetched_ts,
retry_after_ts=retry_after_ts,
status=status,
etag=etag,
modified=modified,
origin=self.origin,
)
cursor = self.db.execute(
"""
SELECT
id,
inserted_at,
title,
link
FROM entries
WHERE feed_url=?
""",
[url],
)
cursor = self.db.execute(
"""
SELECT
id,
inserted_at,
title,
link
FROM entries
WHERE feed_url=?
""",
[url],
)
rows = cursor.fetchall()
entries = [
feed.Entry(id=id, inserted_at=inserted_at, title=title, link=link)
for id, inserted_at, title, link in rows
]
rows = cursor.fetchall()
entries = [
feed.Entry(id=id, inserted_at=inserted_at, title=title, link=link)
for id, inserted_at, title, link in rows
]
return feed.Feed(meta=meta, title=title, link=link, entries=entries)
def store_feed(self, f: feed.Feed):
def update_meta(self, f: feed.FeedMeta):
with self.db:
self.db.execute(
"""
UPDATE feeds SET
last_fetched_ts=?,
retry_after_ts=?,
status=?,
etag=?,
modified=?
WHERE url=?
""",
[
f.last_fetched_ts,
f.retry_after_ts,
f.status,
f.etag,
f.modified,
f.url,
],
)
def store_feed(self, f: feed.Feed) -> int:
"""Store the given feed in the database.
Returns the number of new entries inserted.
"""
with self.db:
self.db.execute(
"""
@ -215,6 +364,11 @@ class Database:
],
)
cursor = self.db.execute(
"SELECT COUNT (*) FROM entries WHERE feed_url=?", [f.meta.url]
)
start_count = cursor.fetchone()[0]
self.db.executemany(
"""
INSERT INTO entries (
@ -248,3 +402,22 @@ class Database:
""",
[(e.id, e.inserted_at, f.meta.url, e.title, e.link) for e in f.entries],
)
cursor = self.db.execute(
"SELECT COUNT (*) FROM entries WHERE feed_url=?", [f.meta.url]
)
end_count = cursor.fetchone()[0]
return end_count - start_count
def set_feed_status(self, url: str, status: int) -> int:
with self.db:
cursor = self.db.execute(
"""
UPDATE feeds
SET status = ?,
last_fetched_ts = ?
WHERE url = ?
""",
[status, int(time.time()), url],
)
return cursor.rowcount

View file

@ -18,9 +18,9 @@ import requests.structures
LOG = logging.getLogger(__name__)
FEED_STATUS_DEAD = 0
FEED_STATUS_ALIVE = 1
FEED_STATUS_MISSING = 2
FEED_STATUS_ALIVE = 0
FEED_STATUS_DEAD = 1
FEED_STATUS_UNSUBSCRIBED = 2
# TODO: Consider configuration here.
http = requests.Session()
@ -143,7 +143,8 @@ async def fetch_feed(
Regardless, the new FeedMeta has the latest state of the feed.
"""
if feed.status == FEED_STATUS_DEAD:
if feed.status != FEED_STATUS_ALIVE:
LOG.info(f"{feed.url} is dead or unsubscribed")
return (None, feed)
if time.time() < feed.retry_after_ts:
@ -198,6 +199,7 @@ async def fetch_feed(
# permanently redirected URL, not just whatever the last thing
# is... e.g. imagine a permanent followed by a temporary
# redirect, then what?
LOG.info(f"{feed.url} permanently redirected to {response.url}")
assert response.url is not None
feed = dataclasses.replace(feed, url=response.url)
@ -239,6 +241,14 @@ async def fetch_feed(
return (parsed, feed)
async def fetch_many(
metas: list[FeedMeta],
) -> list[typing.Tuple[feedparser.FeedParserDict | None, FeedMeta]]:
async with asyncio.TaskGroup() as group:
tasks = [group.create_task(fetch_feed(m)) for m in metas]
return [t.result() for t in tasks]
@dataclasses.dataclass(frozen=True)
class Entry:
id: str
@ -284,6 +294,27 @@ class Entry:
title = clean_text(str(title))
return Entry(id=id, inserted_at=insert_time, title=title, link=link)
def time_ago(self) -> str:
inserted = self.inserted_at / 1000
seconds = int(time.time()) - inserted
if seconds <= 90:
return f"{seconds}s"
minutes = int(seconds / 60)
if minutes <= 90:
return f"{minutes}m"
hours = int(minutes / 60)
if hours < 24:
return f"{hours}h"
days = int(hours / 24)
if days <= 7:
return f"{days}d"
weeks = int(days / 7)
if weeks < 52:
return f"{weeks}w"
years = int(weeks / 52)
return f"{years}y"
@dataclasses.dataclass(frozen=True)
class Feed:
@ -392,3 +423,10 @@ def merge_feeds(a: Feed, b: Feed) -> Feed:
link=source_feed.link,
entries=entries,
)
def sort_key(f: Feed) -> int:
"""A sort key for sorting feeds by recency."""
if len(f.entries) > 0:
return max(e.inserted_at for e in f.entries)
return -1