Read

Deal with locked databases, tweaks
The most basic HTML UI
2024-07-11 10:34:40 +09:00 · 2024-07-11 10:32:48 +09:00 · 2024-07-11 10:18:40 +09:00 · 2024-07-11 07:35:32 +09:00 · 2024-07-11 07:09:46 +09:00 · 2024-07-10 09:37:50 +09:00
4 changed files with 483 additions and 102 deletions
--- a/README.md
+++ b/README.md
@ -1,41 +1,3 @@
 # cry

-[![PyPI](https://img.shields.io/pypi/v/cry.svg)](https://pypi.org/project/cry/)
-[![Changelog](https://img.shields.io/github/v/release/decarabas/cry?include_prereleases&label=changelog)](https://github.com/decarabas/cry/releases)
-[![Tests](https://github.com/decarabas/cry/actions/workflows/test.yml/badge.svg)](https://github.com/decarabas/cry/actions/workflows/test.yml)
-[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/decarabas/cry/blob/master/LICENSE)
-
-Command line feed reader
-
-## Installation
-
-Install this tool using `pip`:
-```bash
-pip install cry
-```
-## Usage
-
-For help, run:
-```bash
-cry --help
-```
-You can also use:
-```bash
-python -m cry --help
-```
-## Development
-
-To contribute to this tool, first checkout the code. Then create a new virtual environment:
-```bash
-cd cry
-python -m venv venv
-source venv/bin/activate
-```
-Now install the dependencies and test dependencies:
-```bash
-pip install -e '.[test]'
-```
-To run the tests:
-```bash
-pytest
-```
+A local-first, command-line feed reader. Kinda.
--- a/cry/cli.py
+++ b/cry/cli.py
@ -1,15 +1,37 @@
 # https://simonwillison.net/2023/Sep/30/cli-tools-python/
 import asyncio
+import html
+import http.server
+import io
+import logging
+
 import click

 from . import feed
 from . import database
+from . import opml
+
+LOG = logging.getLogger(__name__)


@click.group()
@click.version_option()
-def cli():
+@click.option(
+    "-v",
+    "--verbose",
+    count=True,
+    help="Increase the verbosity of the output. This option can be specified multiple times.",
+)
+def cli(verbose):
    "Command line feed reader"
+    if verbose > 1:
+        level = logging.DEBUG
+    elif verbose > 0:
+        level = logging.INFO
+    else:
+        level = logging.WARN
+
+    logging.basicConfig(level=level)


@cli.command(name="subscribe")
@ -36,3 +58,189 @@ def subscribe(url):
    db.store_feed(f)

    click.echo(f"Subscribed to {meta.url}")
+
+
+@cli.command(name="import")
+@click.argument("opml_file", type=click.File("r", encoding="utf-8"))
+def import_opml(opml_file):
+    "Import the specified OPML file."
+
+    db = database.Database.local()
+    urls = opml.parse_opml(opml_file.read())
+    metas = [feed.FeedMeta.from_url(url, db.origin) for url in urls]
+
+    click.echo(f"Fetching {len(urls)} feeds ...")
+    results = asyncio.run(feed.fetch_many(metas))
+
+    subscribed = 0
+    for index, result in enumerate(results):
+        d, meta = result
+        url = urls[index]
+        if d is None:
+            LOG.warn(f"Unable to fetch {url}, skipping...")
+            continue
+
+        existing = db.load_feed(meta.url)
+        if existing is not None:
+            LOG.info(f"{url} already exists (as {meta.url})")
+            continue
+
+        f = feed.Feed.from_parsed(d, meta)
+        db.store_feed(f)
+        subscribed = subscribed + 1
+
+    click.echo(f"Subscribed to {subscribed} new feeds")
+
+
+@cli.command(name="refresh")
+@click.argument("url", required=False, default=None)
+def refresh(url):
+    """Refresh one or more feeds.
+
+    If a URL is specified, refresh that URL. Otherwise, refresh all subscribed
+    feeds.
+    """
+
+    db = database.Database.local()
+    if url:
+        f = db.load_feed(url)
+        if f is None:
+            click.echo(f"Not subscribed to {url}")
+            return 1
+        feeds = [f.meta]
+    else:
+        feeds = db.load_all_meta()
+
+    click.echo(f"Refreshing {len(feeds)} feed(s)...")
+    results = asyncio.run(feed.fetch_many(feeds))
+
+    new_count = 0
+    for d, meta in results:
+        if d is None:
+            # Nothing new.
+            db.update_meta(meta)
+        else:
+            # New items, possibly!
+            f = feed.Feed.from_parsed(d, meta)
+            new_count = new_count + db.store_feed(f)
+
+    click.echo(f"Fetched {new_count} new entries.")
+
+
+@cli.command(name="show")
+@click.argument("pattern", required=False, default="")
+@click.option(
+    "--count",
+    "-c",
+    type=int,
+    default=10,
+    show_default=True,
+    help="Show at most this many entries from each feed.",
+)
+def show(pattern, count):
+    """Show feeds and entries.
+
+    If a pattern is supplied, then filter the feeds to urls or titles that
+    match the pattern. Otherwise, just show everything.
+    """
+
+    db = database.Database.local()
+    feeds = db.load_all(feed_limit=count, pattern=pattern or "")
+
+    feeds.sort(key=feed.sort_key, reverse=True)
+    for f in feeds:
+        click.echo(f"{f.title}")
+        if len(f.entries) > 0:
+            for entry in f.entries:
+                click.echo(f"  {entry.title}")
+        else:
+            click.echo(f"  <No Entries>")
+        click.echo()
+
+
+@cli.command("list")
+@click.argument("pattern", required=False, default="")
+def list_feeds(pattern):
+    """List subscribed feeds.
+
+    If a pattern is supplied, then filter the feeds to urls or titles that
+    match the pattern. Otherwise, just show everything.
+    """
+    db = database.Database.local()
+    feeds = db.load_all(feed_limit=0, pattern=pattern)
+
+    max_title = max(len(f.title) for f in feeds)
+    max_url = max(len(f.meta.url) for f in feeds)
+
+    feeds.sort(key=lambda f: f.title)
+
+    for f in feeds:
+        click.echo(f"{f.title:{max_title}}  {f.meta.url:{max_url}}")
+
+
+@cli.command("unsubscribe")
+@click.argument("url")
+def unsubscribe(url):
+    """Unsubscribe from the specified feed.
+
+    (If you need to find the URL for the feed to unsubscribe from, use the
+    `list` command.)
+    """
+    db = database.Database.local()
+    count = db.set_feed_status(url, feed.FEED_STATUS_UNSUBSCRIBED)
+    if count == 0:
+        click.echo(f"Not subscribed to feed {url}")
+        return 1
+
+
+@cli.command("serve")
+def serve():
+    class Handler(http.server.BaseHTTPRequestHandler):
+        def do_GET(self):
+            db = database.Database.local()
+            feeds = db.load_all(feed_limit=10)
+            del db
+
+            feeds.sort(key=feed.sort_key, reverse=True)
+
+            buffer = io.StringIO()
+            buffer.write(
+                """
+                <!doctype html>
+                <head>
+                <meta charset="utf8">
+                <title>Subscribed Feeds</title>
+                </head>
+                <h1>Feeds</h1>
+                """
+            )
+            for f in feeds:
+                feed_title = html.escape(f.title)
+                if len(f.entries) > 0:
+                    ago = f" ({f.entries[0].time_ago()})"
+                else:
+                    ago = ""
+                buffer.write(f'<h2><a href="{f.link}">{feed_title}</a>{ago}</h2>')
+                buffer.write(f"<div>")
+                if len(f.entries) > 0:
+                    for entry in f.entries:
+                        title = html.escape(entry.title)
+                        buffer.write(
+                            f'<span class="entry">&bull; <a href="{entry.link}">{title}</a> ({entry.time_ago()})</span> '
+                        )
+                else:
+                    buffer.write("<i>No entries...</i>")
+                buffer.write(f"</div>")
+            buffer.flush()
+            text = buffer.getvalue()
+            response = text.encode("utf-8")
+
+            self.send_response(200)
+            self.send_header("content-type", "text/html")
+            self.send_header("content-length", str(len(response)))
+            self.end_headers()
+            self.wfile.write(response)
+
+    with http.server.HTTPServer(("", 8000), Handler) as server:
+        click.echo("Serving at http://127.0.0.1:8000/")
+        server.serve_forever()
--- a/cry/database.py
+++ b/cry/database.py
@ -3,6 +3,7 @@ import random
 import socket
 import sqlite3
 import string
+import time
 import typing

 import platformdirs
@ -33,7 +34,17 @@ SCHEMA_STATEMENTS = [
            ON UPDATE CASCADE
            ON DELETE CASCADE
    );
+    """,
+    # I went and changed the status enum to make ALIVE == 0 when I added the
+    # "unsubscribed" status. I should probably make these strings huh.
    """
+    UPDATE feeds
+    SET status=CASE
+      WHEN status = 0 THEN 1
+      WHEN status = 1 THEN 0
+      ELSE status
+    END
+    """,
 ]


@ -91,20 +102,24 @@ class Database:
        return db

    def get_property(self, prop: str, default=None) -> typing.Any:
-        cursor = self.db.execute("SELECT value FROM properties WHERE name=?", (prop,))
-        result = cursor.fetchone()
-        if result is None:
-            return default
-        return result[0]
+        with self.db:
+            cursor = self.db.execute(
+                "SELECT value FROM properties WHERE name=?", (prop,)
+            )
+            result = cursor.fetchone()
+            if result is None:
+                return default
+            return result[0]

    def set_property(self, prop: str, value):
-        self.db.execute(
-            """
-            INSERT INTO properties (name, value) VALUES (?, ?)
-            ON CONFLICT DO UPDATE SET value=excluded.value
-            """,
-            (prop, value),
-        )
+        with self.db:
+            self.db.execute(
+                """
+                INSERT INTO properties (name, value) VALUES (?, ?)
+                ON CONFLICT DO UPDATE SET value=excluded.value
+                """,
+                (prop, value),
+            )

    def ensure_database_schema(self):
        with self.db:
@ -126,60 +141,194 @@ class Database:
            self.set_property("version", len(SCHEMA_STATEMENTS))
            self.set_property("origin", self.origin)

+    def load_all_meta(self) -> list[feed.FeedMeta]:
+        with self.db:
+            cursor = self.db.execute(
+                """
+                SELECT
+                  url,
+                  last_fetched_ts,
+                  retry_after_ts,
+                  status,
+                  etag,
+                  modified
+                FROM feeds
+                """
+            )
+            rows = cursor.fetchall()
+            return [
+                feed.FeedMeta(
+                    url=url,
+                    last_fetched_ts=int(last_fetched_ts),
+                    retry_after_ts=int(retry_after_ts),
+                    status=int(status),
+                    etag=etag,
+                    modified=modified,
+                    origin=self.origin,
+                )
+                for url, last_fetched_ts, retry_after_ts, status, etag, modified in rows
+            ]
+
+    def load_all(self, feed_limit: int = 20, pattern: str = "") -> list[feed.Feed]:
+        with self.db:
+            pattern = (
+                pattern.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
+            )
+            sql_pattern = f"%{pattern}%"
+            cursor = self.db.execute(
+                """
+                SELECT
+                  url,
+                  last_fetched_ts,
+                  retry_after_ts,
+                  status,
+                  etag,
+                  modified,
+                  title,
+                  link
+                FROM feeds
+                WHERE (title LIKE :sql_pattern ESCAPE '\\'
+                   OR link LIKE :sql_pattern ESCAPE '\\')
+                  AND status != 2 -- UNSUBSCRIBED
+                """,
+                {"sql_pattern": sql_pattern},
+            )
+            rows = cursor.fetchall()
+
+            almost_feeds = []
+            for row in rows:
+                (
+                    url,
+                    last_fetched_ts,
+                    retry_after_ts,
+                    status,
+                    etag,
+                    modified,
+                    title,
+                    link,
+                ) = row
+                meta = feed.FeedMeta(
+                    url=url,
+                    last_fetched_ts=last_fetched_ts,
+                    retry_after_ts=retry_after_ts,
+                    status=status,
+                    etag=etag,
+                    modified=modified,
+                    origin=self.origin,
+                )
+                almost_feeds.append((meta, title, link))
+
+            feeds = []
+            for meta, title, link in almost_feeds:
+                if feed_limit > 0:
+                    cursor = self.db.execute(
+                        """
+                        SELECT
+                          id,
+                          inserted_at,
+                          title,
+                          link
+                        FROM entries
+                        WHERE feed_url=?
+                        ORDER BY inserted_at DESC
+                        LIMIT ?
+                        """,
+                        [meta.url, feed_limit],
+                    )
+                    rows = cursor.fetchall()
+                else:
+                    rows = []
+
+                entries = [
+                    feed.Entry(id=id, inserted_at=inserted_at, title=title, link=link)
+                    for id, inserted_at, title, link in rows
+                ]
+                f = feed.Feed(meta=meta, title=title, link=link, entries=entries)
+                feeds.append(f)
+
+        return feeds
+
    def load_feed(self, url: str) -> feed.Feed | None:
-        cursor = self.db.execute(
-            """
-            SELECT
-              last_fetched_ts,
-              retry_after_ts,
-              status,
-              etag,
-              modified,
-              title,
-              link
-            FROM feeds
-            WHERE url=?
-            """,
-            [url],
-        )
+        with self.db:
+            cursor = self.db.execute(
+                """
+                SELECT
+                  last_fetched_ts,
+                  retry_after_ts,
+                  status,
+                  etag,
+                  modified,
+                  title,
+                  link
+                FROM feeds
+                WHERE url=?
+                """,
+                [url],
+            )

-        row = cursor.fetchone()
-        if row is None:
-            return None
+            row = cursor.fetchone()
+            if row is None:
+                return None

-        last_fetched_ts, retry_after_ts, status, etag, modified, title, link = row
-        meta = feed.FeedMeta(
-            url=url,
-            last_fetched_ts=last_fetched_ts,
-            retry_after_ts=retry_after_ts,
-            status=status,
-            etag=etag,
-            modified=modified,
-            origin=self.origin,
-        )
+            last_fetched_ts, retry_after_ts, status, etag, modified, title, link = row
+            meta = feed.FeedMeta(
+                url=url,
+                last_fetched_ts=last_fetched_ts,
+                retry_after_ts=retry_after_ts,
+                status=status,
+                etag=etag,
+                modified=modified,
+                origin=self.origin,
+            )

-        cursor = self.db.execute(
-            """
-            SELECT
-              id,
-              inserted_at,
-              title,
-              link
-            FROM entries
-            WHERE feed_url=?
-            """,
-            [url],
-        )
+            cursor = self.db.execute(
+                """
+                SELECT
+                  id,
+                  inserted_at,
+                  title,
+                  link
+                FROM entries
+                WHERE feed_url=?
+                """,
+                [url],
+            )

-        rows = cursor.fetchall()
-        entries = [
-            feed.Entry(id=id, inserted_at=inserted_at, title=title, link=link)
-            for id, inserted_at, title, link in rows
-        ]
+            rows = cursor.fetchall()
+            entries = [
+                feed.Entry(id=id, inserted_at=inserted_at, title=title, link=link)
+                for id, inserted_at, title, link in rows
+            ]

        return feed.Feed(meta=meta, title=title, link=link, entries=entries)

-    def store_feed(self, f: feed.Feed):
+    def update_meta(self, f: feed.FeedMeta):
+        with self.db:
+            self.db.execute(
+                """
+                UPDATE feeds SET
+                  last_fetched_ts=?,
+                  retry_after_ts=?,
+                  status=?,
+                  etag=?,
+                  modified=?
+                WHERE url=?
+                """,
+                [
+                    f.last_fetched_ts,
+                    f.retry_after_ts,
+                    f.status,
+                    f.etag,
+                    f.modified,
+                    f.url,
+                ],
+            )
+
+    def store_feed(self, f: feed.Feed) -> int:
+        """Store the given feed in the database.
+
+        Returns the number of new entries inserted.
+        """
        with self.db:
            self.db.execute(
                """
@ -215,6 +364,11 @@ class Database:
                ],
            )

+            cursor = self.db.execute(
+                "SELECT COUNT (*) FROM entries WHERE feed_url=?", [f.meta.url]
+            )
+            start_count = cursor.fetchone()[0]
+
            self.db.executemany(
                """
                INSERT INTO entries (
@ -248,3 +402,22 @@ class Database:
                """,
                [(e.id, e.inserted_at, f.meta.url, e.title, e.link) for e in f.entries],
            )
+
+            cursor = self.db.execute(
+                "SELECT COUNT (*) FROM entries WHERE feed_url=?", [f.meta.url]
+            )
+            end_count = cursor.fetchone()[0]
+            return end_count - start_count
+
+    def set_feed_status(self, url: str, status: int) -> int:
+        with self.db:
+            cursor = self.db.execute(
+                """
+                UPDATE feeds
+                SET status = ?,
+                    last_fetched_ts = ?
+                WHERE url = ?
+                """,
+                [status, int(time.time()), url],
+            )
+            return cursor.rowcount
--- a/cry/feed.py
+++ b/cry/feed.py
@ -18,9 +18,9 @@ import requests.structures
 LOG = logging.getLogger(__name__)


-FEED_STATUS_DEAD = 0
-FEED_STATUS_ALIVE = 1
-FEED_STATUS_MISSING = 2
+FEED_STATUS_ALIVE = 0
+FEED_STATUS_DEAD = 1
+FEED_STATUS_UNSUBSCRIBED = 2

 # TODO: Consider configuration here.
 http = requests.Session()
@ -143,7 +143,8 @@ async def fetch_feed(

    Regardless, the new FeedMeta has the latest state of the feed.
    """
-    if feed.status == FEED_STATUS_DEAD:
+    if feed.status != FEED_STATUS_ALIVE:
+        LOG.info(f"{feed.url} is dead or unsubscribed")
        return (None, feed)

    if time.time() < feed.retry_after_ts:
@ -198,6 +199,7 @@ async def fetch_feed(
        #       permanently redirected URL, not just whatever the last thing
        #       is... e.g. imagine a permanent followed by a temporary
        #       redirect, then what?
+        LOG.info(f"{feed.url} permanently redirected to {response.url}")
        assert response.url is not None
        feed = dataclasses.replace(feed, url=response.url)

@ -239,6 +241,14 @@ async def fetch_feed(
    return (parsed, feed)


+async def fetch_many(
+    metas: list[FeedMeta],
+) -> list[typing.Tuple[feedparser.FeedParserDict | None, FeedMeta]]:
+    async with asyncio.TaskGroup() as group:
+        tasks = [group.create_task(fetch_feed(m)) for m in metas]
+    return [t.result() for t in tasks]
+
+
@dataclasses.dataclass(frozen=True)
 class Entry:
    id: str
@ -284,6 +294,27 @@ class Entry:
        title = clean_text(str(title))
        return Entry(id=id, inserted_at=insert_time, title=title, link=link)

+    def time_ago(self) -> str:
+        inserted = self.inserted_at / 1000
+        seconds = int(time.time()) - inserted
+        if seconds <= 90:
+            return f"{seconds}s"
+        minutes = int(seconds / 60)
+        if minutes <= 90:
+            return f"{minutes}m"
+        hours = int(minutes / 60)
+        if hours < 24:
+            return f"{hours}h"
+        days = int(hours / 24)
+        if days <= 7:
+            return f"{days}d"
+        weeks = int(days / 7)
+        if weeks < 52:
+            return f"{weeks}w"
+
+        years = int(weeks / 52)
+        return f"{years}y"
+

@dataclasses.dataclass(frozen=True)
 class Feed:
@ -392,3 +423,10 @@ def merge_feeds(a: Feed, b: Feed) -> Feed:
        link=source_feed.link,
        entries=entries,
    )
+
+
+def sort_key(f: Feed) -> int:
+    """A sort key for sorting feeds by recency."""
+    if len(f.entries) > 0:
+        return max(e.inserted_at for e in f.entries)
+    return -1
Author	SHA1	Message	Date
John Doty	21aeedd1d1	Read	2024-07-11 10:34:40 +09:00
John Doty	6b02fb66bc	Deal with locked databases, tweaks	2024-07-11 10:32:48 +09:00
John Doty	fb9bfe0084	The most basic HTML UI	2024-07-11 10:18:40 +09:00
John Doty	02232c9c3e	Unsubscribe	2024-07-11 07:35:32 +09:00
John Doty	c06f3ef114	Count, show in the right order	2024-07-11 07:09:46 +09:00
John Doty	34c0b6dd7d	Filter show by pattern, if requested	2024-07-10 09:37:50 +09:00
John Doty	26353a2779	Show	2024-07-10 09:09:17 +09:00
John Doty	cbeadbd302	Import OPML	2024-07-10 08:21:40 +09:00
John Doty	491df5f942	Logging	2024-07-10 08:07:57 +09:00
John Doty	a3858f6395	Refresh?	2024-07-10 08:04:33 +09:00