cry/cry/database.py

import pathlib
import random
import socket
import sqlite3
import string
import time
import typing

import platformdirs

from . import feed

SCHEMA_STATEMENTS = [
    """
    CREATE TABLE feeds (
        url VARCHAR NOT NULL PRIMARY KEY,
        last_fetched_ts INTEGER NOT NULL,
        retry_after_ts INTEGER NOT NULL,
        status INTEGER NOT NULL,
        etag VARCHAR,
        modified VARCHAR,
        title VARCHAR,
        link VARCHAR
    );

    CREATE TABLE entries(
        id VARCHAR NOT NULL,
        inserted_at INTEGER NOT NULL,
        feed_url VARCHAR NOT NULL,
        title VARCHAR,
        link VARCHAR,
        PRIMARY KEY (id, feed_url),
        FOREIGN KEY (feed_url) REFERENCES feeds(url)
            ON UPDATE CASCADE
            ON DELETE CASCADE
    );
    """,
    # I went and changed the status enum to make ALIVE == 0 when I added the
    # "unsubscribed" status. I should probably make these strings huh.
    """
    UPDATE feeds
    SET status=CASE
      WHEN status = 0 THEN 1
      WHEN status = 1 THEN 0
      ELSE status
    END
    """,
]


def origin_path() -> pathlib.Path:
    return platformdirs.user_data_path("cry", "cry") / "origin"


def local_origin(path: pathlib.Path | None = None) -> str:
    if path is None:
        path = origin_path()

    if path.exists():
        with open(path, "r", encoding="utf-8") as f:
            return f.read().strip()

    host = socket.gethostname()
    slug = "".join(
        random.choices(
            string.ascii_uppercase + string.ascii_lowercase + string.digits, k=8
        )
    )
    origin = f"{host}-{slug}"

    path.parent.mkdir(parents=True, exist_ok=True)
    with open(path, "w", encoding="utf-8") as f:
        f.write(origin)

    return origin


def database_path(origin: str) -> pathlib.Path:
    # TODO: Determine the name/slug from local state if necessary
    return pathlib.Path.home() / "Dropbox" / "cry" / f"{origin}.db"


# TODO: Refactor into:
# -top level: transactions
# -bottom level: queries
# to enable reuse
class Database:
    db: sqlite3.Connection
    origin: str

    def __init__(self, path: pathlib.Path | str, origin: str):
        if not isinstance(path, str):
            path.parent.mkdir(parents=True, exist_ok=True)
        db = sqlite3.Connection(str(path), autocommit=False)
        db.execute("PRAGMA foreign_keys = ON")
        self.db = db
        self.origin = origin

    @classmethod
    def local(cls, origin: str | None = None) -> "Database":
        if origin is None:
            origin = local_origin()

        db = Database(database_path(origin), origin)
        db.ensure_database_schema()
        return db

    def get_property(self, prop: str, default=None) -> typing.Any:
        with self.db:
            cursor = self.db.execute(
                "SELECT value FROM properties WHERE name=?", (prop,)
            )
            result = cursor.fetchone()
            if result is None:
                return default
            return result[0]

    def set_property(self, prop: str, value):
        with self.db:
            self.db.execute(
                """
                INSERT INTO properties (name, value) VALUES (?, ?)
                ON CONFLICT DO UPDATE SET value=excluded.value
                """,
                (prop, value),
            )

    def ensure_database_schema(self):
        with self.db:
            self.db.execute(
                """
                CREATE TABLE IF NOT EXISTS properties (
                name VARCHAR NOT NULL PRIMARY KEY,
                value VARCHAR NOT NULL
                )
                """
            )
            version = int(self.get_property("version", 0))
            for script in SCHEMA_STATEMENTS[version:]:
                for statement in script.split(";"):
                    try:
                        self.db.execute(statement)
                    except Exception as e:
                        raise Exception(f"Error executing:\n{statement}") from e
            self.set_property("version", len(SCHEMA_STATEMENTS))
            self.set_property("origin", self.origin)

    def load_all_meta(self) -> list[feed.FeedMeta]:
        with self.db:
            cursor = self.db.execute(
                """
                SELECT
                  url,
                  last_fetched_ts,
                  retry_after_ts,
                  status,
                  etag,
                  modified
                FROM feeds
                """
            )
            rows = cursor.fetchall()
            return [
                feed.FeedMeta(
                    url=url,
                    last_fetched_ts=int(last_fetched_ts),
                    retry_after_ts=int(retry_after_ts),
                    status=int(status),
                    etag=etag,
                    modified=modified,
                    origin=self.origin,
                )
                for url, last_fetched_ts, retry_after_ts, status, etag, modified in rows
            ]

    def load_all(self, feed_limit: int = 20, pattern: str = "") -> list[feed.Feed]:
        with self.db:
            pattern = (
                pattern.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
            )
            sql_pattern = f"%{pattern}%"
            cursor = self.db.execute(
                """
                SELECT
                  url,
                  last_fetched_ts,
                  retry_after_ts,
                  status,
                  etag,
                  modified,
                  title,
                  link
                FROM feeds
                WHERE (title LIKE :sql_pattern ESCAPE '\\'
                   OR link LIKE :sql_pattern ESCAPE '\\')
                  AND status != 2 -- UNSUBSCRIBED
                """,
                {"sql_pattern": sql_pattern},
            )
            rows = cursor.fetchall()

            almost_feeds = []
            for row in rows:
                (
                    url,
                    last_fetched_ts,
                    retry_after_ts,
                    status,
                    etag,
                    modified,
                    title,
                    link,
                ) = row
                meta = feed.FeedMeta(
                    url=url,
                    last_fetched_ts=last_fetched_ts,
                    retry_after_ts=retry_after_ts,
                    status=status,
                    etag=etag,
                    modified=modified,
                    origin=self.origin,
                )
                almost_feeds.append((meta, title, link))

            feeds = []
            for meta, title, link in almost_feeds:
                if feed_limit > 0:
                    cursor = self.db.execute(
                        """
                        SELECT
                          id,
                          inserted_at,
                          title,
                          link
                        FROM entries
                        WHERE feed_url=?
                        ORDER BY inserted_at DESC
                        LIMIT ?
                        """,
                        [meta.url, feed_limit],
                    )
                    rows = cursor.fetchall()
                else:
                    rows = []

                entries = [
                    feed.Entry(id=id, inserted_at=inserted_at, title=title, link=link)
                    for id, inserted_at, title, link in rows
                ]
                f = feed.Feed(meta=meta, title=title, link=link, entries=entries)
                feeds.append(f)

        return feeds

    def load_feed(self, url: str) -> feed.Feed | None:
        with self.db:
            cursor = self.db.execute(
                """
                SELECT
                  last_fetched_ts,
                  retry_after_ts,
                  status,
                  etag,
                  modified,
                  title,
                  link
                FROM feeds
                WHERE url=?
                """,
                [url],
            )

            row = cursor.fetchone()
            if row is None:
                return None

            last_fetched_ts, retry_after_ts, status, etag, modified, title, link = row
            meta = feed.FeedMeta(
                url=url,
                last_fetched_ts=last_fetched_ts,
                retry_after_ts=retry_after_ts,
                status=status,
                etag=etag,
                modified=modified,
                origin=self.origin,
            )

            cursor = self.db.execute(
                """
                SELECT
                  id,
                  inserted_at,
                  title,
                  link
                FROM entries
                WHERE feed_url=?
                """,
                [url],
            )

            rows = cursor.fetchall()
            entries = [
                feed.Entry(id=id, inserted_at=inserted_at, title=title, link=link)
                for id, inserted_at, title, link in rows
            ]

        return feed.Feed(meta=meta, title=title, link=link, entries=entries)

    def update_meta(self, f: feed.FeedMeta):
        with self.db:
            self.db.execute(
                """
                UPDATE feeds SET
                  last_fetched_ts=?,
                  retry_after_ts=?,
                  status=?,
                  etag=?,
                  modified=?
                WHERE url=?
                """,
                [
                    f.last_fetched_ts,
                    f.retry_after_ts,
                    f.status,
                    f.etag,
                    f.modified,
                    f.url,
                ],
            )

    def store_feed(self, f: feed.Feed) -> int:
        """Store the given feed in the database.

        Returns the number of new entries inserted.
        """
        with self.db:
            self.db.execute(
                """
                INSERT INTO feeds (
                  url,
                  last_fetched_ts,
                  retry_after_ts,
                  status,
                  etag,
                  modified,
                  title,
                  link
                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                ON CONFLICT DO UPDATE
                SET
                  last_fetched_ts=excluded.last_fetched_ts,
                  retry_after_ts=excluded.retry_after_ts,
                  status=excluded.status,
                  etag=excluded.etag,
                  modified=excluded.modified,
                  title=excluded.title,
                  link=excluded.link
                """,
                [
                    f.meta.url,
                    f.meta.last_fetched_ts,
                    f.meta.retry_after_ts,
                    f.meta.status,
                    f.meta.etag,
                    f.meta.modified,
                    f.title,
                    f.link,
                ],
            )

            cursor = self.db.execute(
                "SELECT COUNT (*) FROM entries WHERE feed_url=?", [f.meta.url]
            )
            start_count = cursor.fetchone()[0]

            self.db.executemany(
                """
                INSERT INTO entries (
                  id,
                  inserted_at,
                  feed_url,
                  title,
                  link
                ) VALUES (?, ?, ?, ?, ?)
                ON CONFLICT DO UPDATE
                SET
                  -- NOTE: This is also part of the feed merge algorithm, BUT
                  --       we implement it here because feeds tend to be rolling
                  --       windows over some external content and we don't want
                  --       to read and write the entire feed just to update the
                  --       few new items. But we can't just do ON CONFLICT DO
                  --       NOTHING because we *might* be storing a feed where we
                  --       resolved conflicts with another instance. So we want
                  --       to handle all the cases. (In theory we could make two
                  --       different INSERTs to handle the two cases but that is
                  --       more complexity than it is worth.)
                  inserted_at=MIN(inserted_at, excluded.inserted_at),
                  title=CASE
                    WHEN inserted_at < excluded.inserted_at THEN title
                    ELSE excluded.title
                  END,
                  link=CASE
                    WHEN inserted_at < excluded.inserted_at THEN link
                    ELSE excluded.link
                  END
                """,
                [(e.id, e.inserted_at, f.meta.url, e.title, e.link) for e in f.entries],
            )

            cursor = self.db.execute(
                "SELECT COUNT (*) FROM entries WHERE feed_url=?", [f.meta.url]
            )
            end_count = cursor.fetchone()[0]
            return end_count - start_count

    def set_feed_status(self, url: str, status: int) -> int:
        with self.db:
            cursor = self.db.execute(
                """
                UPDATE feeds
                SET status = ?,
                    last_fetched_ts = ?
                WHERE url = ?
                """,
                [status, int(time.time()), url],
            )
            return cursor.rowcount

    def redirect_feed(self, old_url: str, new_url: str):
        with self.db:
            cursor = self.db.execute(
                "SELECT COUNT(1) FROM feeds WHERE url=?", [new_url]
            )
            row = cursor.fetchone()
            if row[0] == 0:
                self.db.execute(
                    "UPDATE feeds SET url = ? WHERE url = ?", [new_url, old_url]
                )
            else:
                # Preserve the entries that were under the old url.
                self.db.execute(
                    """
                    UPDATE entries
                    SET feed_url = ?
                    WHERE feed_url = ?
                    ON CONFLICT DO UPDATE
                    SET
                      -- NOTE: This is also part of the feed merge algorithm, BUT
                      --       we implement it here. See the comment in store_feed
                      --       for the rationale.
                      inserted_at=MIN(inserted_at, excluded.inserted_at),
                      title=CASE
                        WHEN inserted_at < excluded.inserted_at THEN title
                        ELSE excluded.title
                      END,
                      link=CASE
                        WHEN inserted_at < excluded.inserted_at THEN link
                        ELSE excluded.link
                      END
                    """
                )

                # Mark the old feed dead.
                self.db.execute(
                    """
                    UPDATE feeds
                    SET status = ?,
                        last_fetched_ts = ?
                    WHERE url = ?
                    """,
                    [feed.FEED_STATUS_DEAD, int(time.time()), old_url],
                )