Move stuff over from previous incarnation into new frame
Thanks to Simon Wilson for the layout
This commit is contained in:
commit
6be6afdbc3
15 changed files with 799 additions and 0 deletions
52
.github/workflows/publish.yml
vendored
Normal file
52
.github/workflows/publish.yml
vendored
Normal file
|
|
@ -0,0 +1,52 @@
|
||||||
|
name: Publish Python Package
|
||||||
|
|
||||||
|
on:
|
||||||
|
release:
|
||||||
|
types: [created]
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
cache: pip
|
||||||
|
cache-dependency-path: pyproject.toml
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
pip install '.[test]'
|
||||||
|
- name: Run tests
|
||||||
|
run: |
|
||||||
|
pytest
|
||||||
|
deploy:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: [test]
|
||||||
|
environment: release
|
||||||
|
permissions:
|
||||||
|
id-token: write
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.12"
|
||||||
|
cache: pip
|
||||||
|
cache-dependency-path: pyproject.toml
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
pip install setuptools wheel build
|
||||||
|
- name: Build
|
||||||
|
run: |
|
||||||
|
python -m build
|
||||||
|
- name: Publish
|
||||||
|
uses: pypa/gh-action-pypi-publish@release/v1
|
||||||
|
|
||||||
28
.github/workflows/test.yml
vendored
Normal file
28
.github/workflows/test.yml
vendored
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
name: Test
|
||||||
|
|
||||||
|
on: [push, pull_request]
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
cache: pip
|
||||||
|
cache-dependency-path: pyproject.toml
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
pip install '.[test]'
|
||||||
|
- name: Run tests
|
||||||
|
run: |
|
||||||
|
pytest
|
||||||
|
|
||||||
9
.gitignore
vendored
Normal file
9
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
.venv
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
venv
|
||||||
|
.eggs
|
||||||
|
.pytest_cache
|
||||||
|
*.egg-info
|
||||||
|
.DS_Store
|
||||||
1
.pdm-python
Normal file
1
.pdm-python
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
/home/doty/src/cry/.venv/bin/python
|
||||||
19
LICENSE
Normal file
19
LICENSE
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
Copyright 2024 John Doty
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the “Software”), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
41
README.md
Normal file
41
README.md
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
# cry
|
||||||
|
|
||||||
|
[](https://pypi.org/project/cry/)
|
||||||
|
[](https://github.com/decarabas/cry/releases)
|
||||||
|
[](https://github.com/decarabas/cry/actions/workflows/test.yml)
|
||||||
|
[](https://github.com/decarabas/cry/blob/master/LICENSE)
|
||||||
|
|
||||||
|
Command line feed reader
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
Install this tool using `pip`:
|
||||||
|
```bash
|
||||||
|
pip install cry
|
||||||
|
```
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
For help, run:
|
||||||
|
```bash
|
||||||
|
cry --help
|
||||||
|
```
|
||||||
|
You can also use:
|
||||||
|
```bash
|
||||||
|
python -m cry --help
|
||||||
|
```
|
||||||
|
## Development
|
||||||
|
|
||||||
|
To contribute to this tool, first checkout the code. Then create a new virtual environment:
|
||||||
|
```bash
|
||||||
|
cd cry
|
||||||
|
python -m venv venv
|
||||||
|
source venv/bin/activate
|
||||||
|
```
|
||||||
|
Now install the dependencies and test dependencies:
|
||||||
|
```bash
|
||||||
|
pip install -e '.[test]'
|
||||||
|
```
|
||||||
|
To run the tests:
|
||||||
|
```bash
|
||||||
|
pytest
|
||||||
|
```
|
||||||
0
cry/__init__.py
Normal file
0
cry/__init__.py
Normal file
4
cry/__main__.py
Normal file
4
cry/__main__.py
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
from .cli import cli
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
cli()
|
||||||
21
cry/cli.py
Normal file
21
cry/cli.py
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
import click
|
||||||
|
|
||||||
|
|
||||||
|
@click.group()
|
||||||
|
@click.version_option()
|
||||||
|
def cli():
|
||||||
|
"Command line feed reader"
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command(name="command")
|
||||||
|
@click.argument(
|
||||||
|
"example"
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"-o",
|
||||||
|
"--option",
|
||||||
|
help="An example option",
|
||||||
|
)
|
||||||
|
def first_command(example, option):
|
||||||
|
"Command description goes here"
|
||||||
|
click.echo("Here is some output")
|
||||||
85
cry/database.py
Normal file
85
cry/database.py
Normal file
|
|
@ -0,0 +1,85 @@
|
||||||
|
import pathlib
|
||||||
|
import sqlite3
|
||||||
|
import typing
|
||||||
|
|
||||||
|
|
||||||
|
def get_property(db: sqlite3.Connection, prop: str, default=None) -> typing.Any:
|
||||||
|
cursor = db.execute("SELECT value FROM properties WHERE name=?", (prop,))
|
||||||
|
result = cursor.fetchone()
|
||||||
|
if result is None:
|
||||||
|
return default
|
||||||
|
return result[0]
|
||||||
|
|
||||||
|
|
||||||
|
def set_property(db: sqlite3.Connection, prop: str, value):
|
||||||
|
db.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO properties (name, value) VALUES (?, ?)
|
||||||
|
ON CONFLICT DO UPDATE SET value=excluded.value
|
||||||
|
""",
|
||||||
|
(prop, value),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
SCHEMA_STATEMENTS = [
|
||||||
|
"""
|
||||||
|
CREATE TABLE feeds (
|
||||||
|
url VARCHAR NOT NULL PRIMARY KEY,
|
||||||
|
last_fetched_ts INTEGER NOT NULL,
|
||||||
|
retry_after_ts INTEGER NOT NULL,
|
||||||
|
status INTEGER NOT NULL,
|
||||||
|
etag VARCHAR,
|
||||||
|
modified VARCHAR,
|
||||||
|
title VARCHAR,
|
||||||
|
link VARCHAR
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE entries(
|
||||||
|
id VARCHAR NOT NULL PRIMARY KEY,
|
||||||
|
inserted_at INTEGER NOT NULL,
|
||||||
|
feed_url VARCHAR,
|
||||||
|
title VARCHAR,
|
||||||
|
link VARCHAR,
|
||||||
|
FOREIGN KEY feed_url REFERENCES feeds(url)
|
||||||
|
ON UPDATE CASCADE
|
||||||
|
ON DELETE CASCADE
|
||||||
|
);
|
||||||
|
"""
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_database_schema(db: sqlite3.Connection):
|
||||||
|
with db:
|
||||||
|
db.execute(
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS properties (
|
||||||
|
name VARCHAR NOT NULL PRIMARY KEY,
|
||||||
|
value VARCHAR NOT NULL
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
version = int(get_property(db, "version", 0))
|
||||||
|
for script in SCHEMA_STATEMENTS[version:]:
|
||||||
|
for statement in script.split(";"):
|
||||||
|
db.execute(statement)
|
||||||
|
set_property(db, "version", len(SCHEMA_STATEMENTS))
|
||||||
|
|
||||||
|
|
||||||
|
def database_path() -> pathlib.Path:
|
||||||
|
# TODO: Determine the name/slug from local state if necessary
|
||||||
|
return pathlib.Path.home() / "Dropbox" / "cry" / "testing-slug.db"
|
||||||
|
|
||||||
|
|
||||||
|
def connect_database(path: pathlib.Path) -> sqlite3.Connection:
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
connection = sqlite3.Connection(str(path), autocommit=False)
|
||||||
|
connection.execute("PRAGMA foreign_keys = ON")
|
||||||
|
return connection
|
||||||
|
|
||||||
|
|
||||||
|
def setup_database() -> sqlite3.Connection:
|
||||||
|
db_path = database_path()
|
||||||
|
db = connect_database(db_path)
|
||||||
|
ensure_database_schema(db)
|
||||||
|
|
||||||
|
return db
|
||||||
327
cry/feed.py
Normal file
327
cry/feed.py
Normal file
|
|
@ -0,0 +1,327 @@
|
||||||
|
# I guess this is it.
|
||||||
|
import asyncio
|
||||||
|
import dataclasses
|
||||||
|
import functools
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
import typing
|
||||||
|
import pathlib
|
||||||
|
import hashlib
|
||||||
|
import html.parser
|
||||||
|
import io
|
||||||
|
import re
|
||||||
|
|
||||||
|
import feedparser
|
||||||
|
import requests
|
||||||
|
import requests.structures
|
||||||
|
|
||||||
|
import database
|
||||||
|
import opml
|
||||||
|
|
||||||
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
FEED_STATUS_DEAD = 0
|
||||||
|
FEED_STATUS_ALIVE = 1
|
||||||
|
FEED_STATUS_MISSING = 2
|
||||||
|
|
||||||
|
# TODO: Consider configuration here.
|
||||||
|
http = requests.Session()
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass(frozen=True)
|
||||||
|
class FeedMeta:
|
||||||
|
url: str
|
||||||
|
last_fetched_ts: int
|
||||||
|
retry_after_ts: int
|
||||||
|
status: int
|
||||||
|
etag: str | None
|
||||||
|
modified: str | None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_url(cls, url: str) -> "FeedMeta":
|
||||||
|
return FeedMeta(
|
||||||
|
url=url,
|
||||||
|
last_fetched_ts=0,
|
||||||
|
retry_after_ts=0,
|
||||||
|
status=FEED_STATUS_ALIVE,
|
||||||
|
etag=None,
|
||||||
|
modified=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass(frozen=True)
|
||||||
|
class Entry:
|
||||||
|
id: str
|
||||||
|
title: str
|
||||||
|
link: str | None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass(frozen=True)
|
||||||
|
class Feed:
|
||||||
|
meta: FeedMeta
|
||||||
|
title: str
|
||||||
|
link: str
|
||||||
|
entries: list[Entry]
|
||||||
|
|
||||||
|
|
||||||
|
def the_worst_element_hash(value) -> str:
|
||||||
|
"""Compute a content hash for the given feed element, to use as an ID.
|
||||||
|
|
||||||
|
The hash must be as stable as we can make it, but obviously there are things
|
||||||
|
we cannot control. If we've gotten here then the feed author has already
|
||||||
|
failed us and there's little we can do. This is already *known to be wrong.*
|
||||||
|
"""
|
||||||
|
|
||||||
|
def process(value, hash):
|
||||||
|
if isinstance(value, feedparser.FeedParserDict):
|
||||||
|
hash.update(b"dict")
|
||||||
|
keys = sorted(value.keys())
|
||||||
|
for key in keys:
|
||||||
|
hash.update(b"key::")
|
||||||
|
hash.update(key.encode("utf-8"))
|
||||||
|
hash.update(b"value::")
|
||||||
|
process(value[key], hash)
|
||||||
|
hash.update(b"tcid")
|
||||||
|
elif isinstance(value, str):
|
||||||
|
hash.update(b"str")
|
||||||
|
hash.update(value.encode("utf-8"))
|
||||||
|
hash.update(b"rts")
|
||||||
|
elif isinstance(value, list):
|
||||||
|
hash.update(b"list")
|
||||||
|
for item in value:
|
||||||
|
process(item, hash)
|
||||||
|
hash.update(b"tsil")
|
||||||
|
elif isinstance(value, tuple):
|
||||||
|
hash.update(b"tuple")
|
||||||
|
for item in value:
|
||||||
|
process(item, hash)
|
||||||
|
hash.update(b"elput")
|
||||||
|
|
||||||
|
hash = hashlib.sha256(usedforsecurity=False)
|
||||||
|
process(value, hash)
|
||||||
|
return hash.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
BLANK_TAGS = {"p", "br", "li", "div", "img"}
|
||||||
|
MULTI_SPACES = re.compile(r"\s+")
|
||||||
|
|
||||||
|
|
||||||
|
def clean_text(text: str) -> str:
|
||||||
|
"""Sometimes text is HTML and otherwise ugly. This reduces it to
|
||||||
|
something pretty to display. Strips tags, puts blank space in between
|
||||||
|
elements that should generate blank space, and then collapses blank
|
||||||
|
spaces down to one.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class Cleaner(html.parser.HTMLParser):
|
||||||
|
def __init__(self, writer):
|
||||||
|
super().__init__()
|
||||||
|
self.writer = writer
|
||||||
|
|
||||||
|
def handle_data(self, data: str) -> None:
|
||||||
|
self.writer.write(data)
|
||||||
|
|
||||||
|
def handle_startendtag(
|
||||||
|
self, tag: str, attrs: list[tuple[str, str | None]]
|
||||||
|
) -> None:
|
||||||
|
del attrs
|
||||||
|
if tag.lower() in BLANK_TAGS:
|
||||||
|
self.writer.write(" ")
|
||||||
|
|
||||||
|
def handle_starttag(
|
||||||
|
self, tag: str, attrs: list[tuple[str, str | None]]
|
||||||
|
) -> None:
|
||||||
|
del attrs
|
||||||
|
if tag.lower() in BLANK_TAGS:
|
||||||
|
self.writer.write(" ")
|
||||||
|
|
||||||
|
writer = io.StringIO()
|
||||||
|
cleaner = Cleaner(writer)
|
||||||
|
cleaner.feed(text)
|
||||||
|
return MULTI_SPACES.sub(" ", writer.getvalue())
|
||||||
|
|
||||||
|
|
||||||
|
def entry_from_feed(entry: feedparser.FeedParserDict) -> Entry:
|
||||||
|
"""Convert an entry from feedparser into an Entry by extracting the
|
||||||
|
things we care about, fudging things and substituting things as
|
||||||
|
necessary.
|
||||||
|
"""
|
||||||
|
title = entry.get("title")
|
||||||
|
if not title:
|
||||||
|
title = entry.get("description")
|
||||||
|
|
||||||
|
id = entry.get("id")
|
||||||
|
|
||||||
|
link = entry.get("link")
|
||||||
|
if id and not link:
|
||||||
|
linkid = str(id).lower()
|
||||||
|
if linkid.startswith("http:") or linkid.startswith("https:"):
|
||||||
|
link = linkid
|
||||||
|
|
||||||
|
if link and not id:
|
||||||
|
id = link
|
||||||
|
if title and not id:
|
||||||
|
id = title
|
||||||
|
if not id:
|
||||||
|
id = entry.get("published")
|
||||||
|
if not id:
|
||||||
|
id = the_worst_element_hash(entry)
|
||||||
|
|
||||||
|
assert isinstance(id, str)
|
||||||
|
assert link is None or isinstance(link, str)
|
||||||
|
|
||||||
|
title = clean_text(str(title))
|
||||||
|
return Entry(id=id, title=title, link=link)
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_feed(
|
||||||
|
feed: FeedMeta,
|
||||||
|
) -> typing.Tuple[feedparser.FeedParserDict | None, FeedMeta]:
|
||||||
|
"""Potentially fetch the feed described by `feed`, returning a parsed feed
|
||||||
|
(if possible and necessary) and an updated FeedMeta.
|
||||||
|
|
||||||
|
This function can fail to return a parsed feed under a number of
|
||||||
|
circumstances. Among them:
|
||||||
|
|
||||||
|
- It's too soon to be checking this feed again.
|
||||||
|
- The feed has been failing for a while and we've called it's dead.
|
||||||
|
- The server told us it was dead.
|
||||||
|
- We checked the server and it told us our cache was good.
|
||||||
|
- We tried to contact the server, but a networking error happened.
|
||||||
|
|
||||||
|
Regardless, the new FeedMeta has the latest state of the feed.
|
||||||
|
"""
|
||||||
|
if feed.status == FEED_STATUS_DEAD:
|
||||||
|
return (None, feed)
|
||||||
|
|
||||||
|
if time.time() < feed.retry_after_ts:
|
||||||
|
LOG.info(f"{feed.url} will not be pulled until {feed.retry_after_ts}")
|
||||||
|
return (None, feed)
|
||||||
|
|
||||||
|
# We waffle back and forth about using feedreader's HTTP support vs
|
||||||
|
# calling requests ourselves. We have decided to use requests manually at
|
||||||
|
# this time because it make it much much easier to figure out whether or
|
||||||
|
# not a request has succeeded. (The straw was handling timeouts and
|
||||||
|
# understanding whether `bozo_exception` was a transport failure or not.)
|
||||||
|
|
||||||
|
headers = {"user-agent": "cry-reader v0.0"}
|
||||||
|
if feed.etag:
|
||||||
|
headers["if-none-match"] = feed.etag
|
||||||
|
if feed.modified:
|
||||||
|
headers["if-modified-since"] = feed.modified
|
||||||
|
|
||||||
|
LOG.info(f"{feed.url} fetching...")
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
response = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
functools.partial(http.get, feed.url, headers=headers),
|
||||||
|
)
|
||||||
|
LOG.info(f"{feed.url} fetched with status: {response.status_code}")
|
||||||
|
failed = response.status_code >= 400
|
||||||
|
except Exception as e:
|
||||||
|
LOG.error(f"{feed.url} error fetching: {e}")
|
||||||
|
failed = True
|
||||||
|
response = None
|
||||||
|
|
||||||
|
# Now, there are a number of things to consider in the response that
|
||||||
|
# we need to consider in updating our permanent record.
|
||||||
|
|
||||||
|
if response is not None and response.status_code == 410:
|
||||||
|
# Permanently gone, really stop asking.
|
||||||
|
LOG.error(f"{feed.url} permanently gone")
|
||||||
|
return (None, dataclasses.replace(feed, status=FEED_STATUS_DEAD))
|
||||||
|
|
||||||
|
if failed and time.time() > feed.last_fetched_ts + (7 * 24 * 60 * 60):
|
||||||
|
# If we've been failing to fetch the feed for more than a week then
|
||||||
|
# consider us dead, we must be doing something wrong.
|
||||||
|
LOG.error(f"{feed.url} failed for too long, giving up")
|
||||||
|
return (None, dataclasses.replace(feed, status=FEED_STATUS_DEAD))
|
||||||
|
|
||||||
|
if response and response.is_permanent_redirect:
|
||||||
|
# Permanent redirect, update the stored URL, but mark this as a
|
||||||
|
# successful fetch.
|
||||||
|
#
|
||||||
|
# TODO: Is this actually the right URL to store? We need the last
|
||||||
|
# permanently redirected URL, not just whatever the last thing
|
||||||
|
# is... e.g. imagine a permanent followed by a temporary
|
||||||
|
# redirect, then what?
|
||||||
|
assert response.url is not None
|
||||||
|
feed = dataclasses.replace(feed, url=response.url)
|
||||||
|
|
||||||
|
# NOTE: We might still be in a failure state here. But success or fail,
|
||||||
|
# the server might have told us when to next retry, so make a note
|
||||||
|
# of it.
|
||||||
|
retry_delta = None
|
||||||
|
if response is not None:
|
||||||
|
try:
|
||||||
|
retry_delta = int(response.headers.get("retry-after", "nope"))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if retry_delta is None:
|
||||||
|
retry_delta = 60 * 60 # 1 hour default
|
||||||
|
|
||||||
|
feed = dataclasses.replace(feed, retry_after_ts=int(time.time()) + retry_delta)
|
||||||
|
|
||||||
|
# We've done everything we can on a failure, bail if we've got an error.
|
||||||
|
if failed:
|
||||||
|
LOG.info(f"{feed.url} failed at the network level")
|
||||||
|
return (None, feed)
|
||||||
|
|
||||||
|
assert response is not None
|
||||||
|
|
||||||
|
# Record our successful fetch now, to reset the failure timer above.
|
||||||
|
feed = dataclasses.replace(feed, last_fetched_ts=int(time.time()))
|
||||||
|
|
||||||
|
# We can *still* be successful but like, no changes.
|
||||||
|
if response.status_code != 200:
|
||||||
|
LOG.info(f"{feed.url} had no changes")
|
||||||
|
return (None, feed)
|
||||||
|
|
||||||
|
feed = dataclasses.replace(
|
||||||
|
feed,
|
||||||
|
etag=response.headers.get("etag"),
|
||||||
|
modified=response.headers.get("last-modified"),
|
||||||
|
)
|
||||||
|
parsed = feedparser.parse(response.content, response_headers=response.headers)
|
||||||
|
return (parsed, feed)
|
||||||
|
|
||||||
|
|
||||||
|
async def main() -> None:
|
||||||
|
database.setup_database()
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
FeedMeta.from_url(url)
|
||||||
|
for url in opml.load_opml(pathlib.Path.home() / "Downloads" / "fraidycat.opml")
|
||||||
|
]
|
||||||
|
async with asyncio.TaskGroup() as group:
|
||||||
|
tasks = [group.create_task(fetch_feed(f)) for f in feeds]
|
||||||
|
results = [t.result() for t in tasks]
|
||||||
|
|
||||||
|
for d, meta in results:
|
||||||
|
if d is not None:
|
||||||
|
title = None
|
||||||
|
page_url = None
|
||||||
|
|
||||||
|
if d.feed is not None:
|
||||||
|
title = d.feed.get("title")
|
||||||
|
page_url = d.feed.get("link")
|
||||||
|
|
||||||
|
if title is None or title == "":
|
||||||
|
title = meta.url
|
||||||
|
if page_url is None:
|
||||||
|
page_url = meta.url
|
||||||
|
|
||||||
|
print(f"[{title}]({page_url})")
|
||||||
|
print(f"{meta}")
|
||||||
|
|
||||||
|
entries = [entry_from_feed(e) for e in d.entries]
|
||||||
|
for entry in entries:
|
||||||
|
print(f" - {entry.title} ({entry.id})")
|
||||||
|
print(f" {entry.link}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
12
cry/opml.py
Normal file
12
cry/opml.py
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
import pathlib
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
|
||||||
|
def parse_opml(opml: str) -> list[str]:
|
||||||
|
f = xml.etree.ElementTree.fromstring(opml)
|
||||||
|
return [e.attrib["xmlUrl"] for e in f.iterfind(".//*[@xmlUrl]")]
|
||||||
|
|
||||||
|
|
||||||
|
def load_opml(path: pathlib.Path) -> list[str]:
|
||||||
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
|
return parse_opml(f.read())
|
||||||
155
pdm.lock
generated
Normal file
155
pdm.lock
generated
Normal file
|
|
@ -0,0 +1,155 @@
|
||||||
|
# This file is @generated by PDM.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
|
||||||
|
[metadata]
|
||||||
|
groups = ["default"]
|
||||||
|
strategy = ["cross_platform", "inherit_metadata"]
|
||||||
|
lock_version = "4.4.2"
|
||||||
|
content_hash = "sha256:25762b46d2ca8dcb9aaf19a91c0c156a3d3e4cd7101e1fa123fb17a66ebdf2c0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "certifi"
|
||||||
|
version = "2024.7.4"
|
||||||
|
requires_python = ">=3.6"
|
||||||
|
summary = "Python package for providing Mozilla's CA Bundle."
|
||||||
|
groups = ["default"]
|
||||||
|
files = [
|
||||||
|
{file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"},
|
||||||
|
{file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "charset-normalizer"
|
||||||
|
version = "3.3.2"
|
||||||
|
requires_python = ">=3.7.0"
|
||||||
|
summary = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
|
||||||
|
groups = ["default"]
|
||||||
|
files = [
|
||||||
|
{file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"},
|
||||||
|
{file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "click"
|
||||||
|
version = "8.1.7"
|
||||||
|
requires_python = ">=3.7"
|
||||||
|
summary = "Composable command line interface toolkit"
|
||||||
|
groups = ["default"]
|
||||||
|
dependencies = [
|
||||||
|
"colorama; platform_system == \"Windows\"",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"},
|
||||||
|
{file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "colorama"
|
||||||
|
version = "0.4.6"
|
||||||
|
requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
|
||||||
|
summary = "Cross-platform colored terminal text."
|
||||||
|
groups = ["default"]
|
||||||
|
marker = "platform_system == \"Windows\""
|
||||||
|
files = [
|
||||||
|
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
|
||||||
|
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "feedparser"
|
||||||
|
version = "6.0.11"
|
||||||
|
requires_python = ">=3.6"
|
||||||
|
summary = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds"
|
||||||
|
groups = ["default"]
|
||||||
|
dependencies = [
|
||||||
|
"sgmllib3k",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "feedparser-6.0.11-py3-none-any.whl", hash = "sha256:0be7ee7b395572b19ebeb1d6aafb0028dee11169f1c934e0ed67d54992f4ad45"},
|
||||||
|
{file = "feedparser-6.0.11.tar.gz", hash = "sha256:c9d0407b64c6f2a065d0ebb292c2b35c01050cc0dc33757461aaabdc4c4184d5"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "idna"
|
||||||
|
version = "3.7"
|
||||||
|
requires_python = ">=3.5"
|
||||||
|
summary = "Internationalized Domain Names in Applications (IDNA)"
|
||||||
|
groups = ["default"]
|
||||||
|
files = [
|
||||||
|
{file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"},
|
||||||
|
{file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "platformdirs"
|
||||||
|
version = "4.2.2"
|
||||||
|
requires_python = ">=3.8"
|
||||||
|
summary = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
|
||||||
|
groups = ["default"]
|
||||||
|
files = [
|
||||||
|
{file = "platformdirs-4.2.2-py3-none-any.whl", hash = "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee"},
|
||||||
|
{file = "platformdirs-4.2.2.tar.gz", hash = "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "requests"
|
||||||
|
version = "2.32.3"
|
||||||
|
requires_python = ">=3.8"
|
||||||
|
summary = "Python HTTP for Humans."
|
||||||
|
groups = ["default"]
|
||||||
|
dependencies = [
|
||||||
|
"certifi>=2017.4.17",
|
||||||
|
"charset-normalizer<4,>=2",
|
||||||
|
"idna<4,>=2.5",
|
||||||
|
"urllib3<3,>=1.21.1",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
|
||||||
|
{file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sgmllib3k"
|
||||||
|
version = "1.0.0"
|
||||||
|
summary = "Py3k port of sgmllib."
|
||||||
|
groups = ["default"]
|
||||||
|
files = [
|
||||||
|
{file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tomlkit"
|
||||||
|
version = "0.12.5"
|
||||||
|
requires_python = ">=3.7"
|
||||||
|
summary = "Style preserving TOML library"
|
||||||
|
groups = ["default"]
|
||||||
|
files = [
|
||||||
|
{file = "tomlkit-0.12.5-py3-none-any.whl", hash = "sha256:af914f5a9c59ed9d0762c7b64d3b5d5df007448eb9cd2edc8a46b1eafead172f"},
|
||||||
|
{file = "tomlkit-0.12.5.tar.gz", hash = "sha256:eef34fba39834d4d6b73c9ba7f3e4d1c417a4e56f89a7e96e090dd0d24b8fb3c"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "urllib3"
|
||||||
|
version = "2.2.2"
|
||||||
|
requires_python = ">=3.8"
|
||||||
|
summary = "HTTP library with thread-safe connection pooling, file post, and more."
|
||||||
|
groups = ["default"]
|
||||||
|
files = [
|
||||||
|
{file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"},
|
||||||
|
{file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"},
|
||||||
|
]
|
||||||
35
pyproject.toml
Normal file
35
pyproject.toml
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
[project]
|
||||||
|
name = "cry"
|
||||||
|
version = "0.1"
|
||||||
|
description = "Command line feed reader"
|
||||||
|
readme = "README.md"
|
||||||
|
authors = [{name = "John Doty"}]
|
||||||
|
license = {text = "MIT"}
|
||||||
|
requires-python = "==3.12.*"
|
||||||
|
classifiers = [
|
||||||
|
"License :: OSI Approved :: MIT"
|
||||||
|
]
|
||||||
|
dependencies = [
|
||||||
|
"feedparser>=6.0.11",
|
||||||
|
"platformdirs>=4.2.2",
|
||||||
|
"tomlkit>=0.12.5",
|
||||||
|
"requests>=2.32.3",
|
||||||
|
"click>=8.1.7",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.urls]
|
||||||
|
Homepage = "https://github.com/decarabas/cry"
|
||||||
|
Changelog = "https://github.com/decarabas/cry/releases"
|
||||||
|
Issues = "https://github.com/decarabas/cry/issues"
|
||||||
|
CI = "https://github.com/decarabas/cry/actions"
|
||||||
|
|
||||||
|
[project.entry-points.console_scripts]
|
||||||
|
cry = "cry.cli:cli"
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
test = ["pytest"]
|
||||||
|
|
||||||
|
[tool.pyright]
|
||||||
|
exclude = [".venv"]
|
||||||
|
venvPath = "."
|
||||||
|
venv = ".venv"
|
||||||
10
tests/test_cry.py
Normal file
10
tests/test_cry.py
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
from click.testing import CliRunner
|
||||||
|
from cry.cli import cli
|
||||||
|
|
||||||
|
|
||||||
|
def test_version():
|
||||||
|
runner = CliRunner()
|
||||||
|
with runner.isolated_filesystem():
|
||||||
|
result = runner.invoke(cli, ["--version"])
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert result.output.startswith("cli, version ")
|
||||||
Loading…
Add table
Add a link
Reference in a new issue