Remove origin from FeedMeta
It is barely used but complicates things mightily.
This commit is contained in:
parent
cce0ad9f8f
commit
a105cdc649
4 changed files with 24 additions and 31 deletions
|
|
@ -56,7 +56,7 @@ def subscribe(url, literal):
|
||||||
|
|
||||||
if not literal:
|
if not literal:
|
||||||
click.echo(f"Searching for feeds for {url} ...")
|
click.echo(f"Searching for feeds for {url} ...")
|
||||||
feeds = asyncio.run(feed.feed_search(url, db.origin))
|
feeds = asyncio.run(feed.feed_search(url))
|
||||||
if len(feeds) == 0:
|
if len(feeds) == 0:
|
||||||
click.echo(f"Unable to find a suitable feed for {url}")
|
click.echo(f"Unable to find a suitable feed for {url}")
|
||||||
return 1
|
return 1
|
||||||
|
|
@ -83,7 +83,7 @@ def subscribe(url, literal):
|
||||||
click.echo(f"Identified {result.meta.url} as a feed for {url}")
|
click.echo(f"Identified {result.meta.url} as a feed for {url}")
|
||||||
else:
|
else:
|
||||||
click.echo(f"Fetching {url} ...")
|
click.echo(f"Fetching {url} ...")
|
||||||
meta = feed.FeedMeta.from_url(url, db.origin)
|
meta = feed.FeedMeta.from_url(url)
|
||||||
d, meta = asyncio.run(feed.fetch_feed(meta))
|
d, meta = asyncio.run(feed.fetch_feed(meta))
|
||||||
if d is None:
|
if d is None:
|
||||||
click.echo(f"Unable to fetch {url}")
|
click.echo(f"Unable to fetch {url}")
|
||||||
|
|
@ -112,7 +112,7 @@ def import_opml(opml_file):
|
||||||
|
|
||||||
db = database.Database.local()
|
db = database.Database.local()
|
||||||
urls = opml.parse_opml(opml_file.read())
|
urls = opml.parse_opml(opml_file.read())
|
||||||
metas = [feed.FeedMeta.from_url(url, db.origin) for url in urls]
|
metas = [feed.FeedMeta.from_url(url) for url in urls]
|
||||||
|
|
||||||
click.echo(f"Fetching {len(urls)} feeds ...")
|
click.echo(f"Fetching {len(urls)} feeds ...")
|
||||||
results = asyncio.run(feed.fetch_many(metas))
|
results = asyncio.run(feed.fetch_many(metas))
|
||||||
|
|
|
||||||
|
|
@ -168,7 +168,6 @@ class Database:
|
||||||
status=int(status),
|
status=int(status),
|
||||||
etag=etag,
|
etag=etag,
|
||||||
modified=modified,
|
modified=modified,
|
||||||
origin=self.origin,
|
|
||||||
)
|
)
|
||||||
for url, last_fetched_ts, retry_after_ts, status, etag, modified in rows
|
for url, last_fetched_ts, retry_after_ts, status, etag, modified in rows
|
||||||
]
|
]
|
||||||
|
|
@ -218,7 +217,6 @@ class Database:
|
||||||
status=status,
|
status=status,
|
||||||
etag=etag,
|
etag=etag,
|
||||||
modified=modified,
|
modified=modified,
|
||||||
origin=self.origin,
|
|
||||||
)
|
)
|
||||||
almost_feeds.append((meta, title, link))
|
almost_feeds.append((meta, title, link))
|
||||||
|
|
||||||
|
|
@ -282,7 +280,6 @@ class Database:
|
||||||
status=status,
|
status=status,
|
||||||
etag=etag,
|
etag=etag,
|
||||||
modified=modified,
|
modified=modified,
|
||||||
origin=self.origin,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
cursor = self.db.execute(
|
cursor = self.db.execute(
|
||||||
|
|
|
||||||
35
cry/feed.py
35
cry/feed.py
|
|
@ -38,10 +38,9 @@ class FeedMeta:
|
||||||
status: int
|
status: int
|
||||||
etag: str | None
|
etag: str | None
|
||||||
modified: str | None
|
modified: str | None
|
||||||
origin: str
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_url(cls, url: str, origin: str) -> "FeedMeta":
|
def from_url(cls, url: str) -> "FeedMeta":
|
||||||
return FeedMeta(
|
return FeedMeta(
|
||||||
url=url,
|
url=url,
|
||||||
last_fetched_ts=0,
|
last_fetched_ts=0,
|
||||||
|
|
@ -49,7 +48,6 @@ class FeedMeta:
|
||||||
status=FEED_STATUS_ALIVE,
|
status=FEED_STATUS_ALIVE,
|
||||||
etag=None,
|
etag=None,
|
||||||
modified=None,
|
modified=None,
|
||||||
origin=origin,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def should_fetch(self, now) -> bool:
|
def should_fetch(self, now) -> bool:
|
||||||
|
|
@ -147,6 +145,7 @@ class Feed:
|
||||||
link = None
|
link = None
|
||||||
|
|
||||||
if d.feed is not None:
|
if d.feed is not None:
|
||||||
|
assert not isinstance(d.feed, list)
|
||||||
title = d.feed.get("title")
|
title = d.feed.get("title")
|
||||||
link = d.feed.get("link")
|
link = d.feed.get("link")
|
||||||
|
|
||||||
|
|
@ -428,7 +427,7 @@ async def fetch_many(
|
||||||
return [t.result() for t in tasks]
|
return [t.result() for t in tasks]
|
||||||
|
|
||||||
|
|
||||||
def merge_feeds(a: Feed, b: Feed) -> Feed:
|
def merge_feeds(a: Feed, a_origin: str, b: Feed, b_origin: str) -> Feed:
|
||||||
"""Merge two known feeds. There are two conflict resolution policies:
|
"""Merge two known feeds. There are two conflict resolution policies:
|
||||||
|
|
||||||
1. The newer fetch of feed metadata wins.
|
1. The newer fetch of feed metadata wins.
|
||||||
|
|
@ -449,7 +448,7 @@ def merge_feeds(a: Feed, b: Feed) -> Feed:
|
||||||
if a.meta.last_fetched_ts > b.meta.last_fetched_ts:
|
if a.meta.last_fetched_ts > b.meta.last_fetched_ts:
|
||||||
source_feed = a
|
source_feed = a
|
||||||
elif a.meta.last_fetched_ts == b.meta.last_fetched_ts:
|
elif a.meta.last_fetched_ts == b.meta.last_fetched_ts:
|
||||||
source_feed = a if a.meta.origin < b.meta.origin else b
|
source_feed = a if a_origin < b_origin else b
|
||||||
else:
|
else:
|
||||||
source_feed = b
|
source_feed = b
|
||||||
|
|
||||||
|
|
@ -569,11 +568,11 @@ def is_XML_related_link(link: str) -> bool:
|
||||||
return "rss" in link or "rdf" in link or "xml" in link or "atom" in link
|
return "rss" in link or "rdf" in link or "xml" in link or "atom" in link
|
||||||
|
|
||||||
|
|
||||||
async def check_feed(url: str, origin: str) -> Feed | None:
|
async def check_feed(url: str) -> Feed | None:
|
||||||
"""Check to see if the given URL is a feed. If it is, return the feed,
|
"""Check to see if the given URL is a feed. If it is, return the feed,
|
||||||
otherwise return None.
|
otherwise return None.
|
||||||
"""
|
"""
|
||||||
meta = FeedMeta.from_url(url, origin)
|
meta = FeedMeta.from_url(url)
|
||||||
result, meta = await fetch_feed(meta)
|
result, meta = await fetch_feed(meta)
|
||||||
if isinstance(result, Feed):
|
if isinstance(result, Feed):
|
||||||
return result
|
return result
|
||||||
|
|
@ -581,13 +580,13 @@ async def check_feed(url: str, origin: str) -> Feed | None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
async def check_links(links: typing.Iterable[str], origin: str) -> list[Feed]:
|
async def check_links(links: typing.Iterable[str]) -> list[Feed]:
|
||||||
"""Fetch all the links and return the ones that appear to have feeds in
|
"""Fetch all the links and return the ones that appear to have feeds in
|
||||||
them. If none of them are fetchable or none of them have feeds then this
|
them. If none of them are fetchable or none of them have feeds then this
|
||||||
will return nothing.
|
will return nothing.
|
||||||
"""
|
"""
|
||||||
async with asyncio.TaskGroup() as group:
|
async with asyncio.TaskGroup() as group:
|
||||||
tasks = [group.create_task(check_feed(link, origin)) for link in links]
|
tasks = [group.create_task(check_feed(link)) for link in links]
|
||||||
|
|
||||||
outfeeds: list[Feed] = []
|
outfeeds: list[Feed] = []
|
||||||
for task in tasks:
|
for task in tasks:
|
||||||
|
|
@ -598,8 +597,8 @@ async def check_links(links: typing.Iterable[str], origin: str) -> list[Feed]:
|
||||||
return outfeeds
|
return outfeeds
|
||||||
|
|
||||||
|
|
||||||
async def feed_search(uri: str, origin: str) -> list[Feed]:
|
async def feed_search(uri: str) -> list[Feed]:
|
||||||
meta = FeedMeta.from_url(massage_url(uri), origin)
|
meta = FeedMeta.from_url(massage_url(uri))
|
||||||
result, meta = await fetch_feed(meta)
|
result, meta = await fetch_feed(meta)
|
||||||
if result is None:
|
if result is None:
|
||||||
return []
|
return []
|
||||||
|
|
@ -611,22 +610,22 @@ async def feed_search(uri: str, origin: str) -> list[Feed]:
|
||||||
parser.feed(result)
|
parser.feed(result)
|
||||||
|
|
||||||
LOG.debug("Checking links...")
|
LOG.debug("Checking links...")
|
||||||
outfeeds = await check_links(parser.link_links, origin)
|
outfeeds = await check_links(parser.link_links)
|
||||||
if len(outfeeds) > 0:
|
if len(outfeeds) > 0:
|
||||||
return outfeeds
|
return outfeeds
|
||||||
|
|
||||||
LOG.debug("No links, checking A tags...")
|
LOG.debug("No links, checking A tags...")
|
||||||
local_links, remote_links = classify_links(parser.a_links, meta.url)
|
local_links, remote_links = classify_links(parser.a_links, meta.url)
|
||||||
outfeeds = await check_links(filter(is_feed_link, local_links), origin)
|
outfeeds = await check_links(filter(is_feed_link, local_links))
|
||||||
if len(outfeeds) > 0:
|
if len(outfeeds) > 0:
|
||||||
return outfeeds
|
return outfeeds
|
||||||
outfeeds = await check_links(filter(is_XML_related_link, local_links), origin)
|
outfeeds = await check_links(filter(is_XML_related_link, local_links))
|
||||||
if len(outfeeds) > 0:
|
if len(outfeeds) > 0:
|
||||||
return outfeeds
|
return outfeeds
|
||||||
outfeeds = await check_links(filter(is_feed_link, remote_links), origin)
|
outfeeds = await check_links(filter(is_feed_link, remote_links))
|
||||||
if len(outfeeds) > 0:
|
if len(outfeeds) > 0:
|
||||||
return outfeeds
|
return outfeeds
|
||||||
outfeeds = await check_links(filter(is_XML_related_link, remote_links), origin)
|
outfeeds = await check_links(filter(is_XML_related_link, remote_links))
|
||||||
if len(outfeeds) > 0:
|
if len(outfeeds) > 0:
|
||||||
return outfeeds
|
return outfeeds
|
||||||
|
|
||||||
|
|
@ -639,7 +638,5 @@ async def feed_search(uri: str, origin: str) -> list[Feed]:
|
||||||
"index.xml", # MT
|
"index.xml", # MT
|
||||||
"index.rss", # Slash
|
"index.rss", # Slash
|
||||||
]
|
]
|
||||||
outfeeds = await check_links(
|
outfeeds = await check_links([urllib.parse.urljoin(meta.url, x) for x in suffixes])
|
||||||
[urllib.parse.urljoin(meta.url, x) for x in suffixes], origin
|
|
||||||
)
|
|
||||||
return outfeeds
|
return outfeeds
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,6 @@ import http.server
|
||||||
import threading
|
import threading
|
||||||
import typing
|
import typing
|
||||||
|
|
||||||
import requests
|
|
||||||
|
|
||||||
from cry import feed
|
from cry import feed
|
||||||
|
|
||||||
|
|
@ -118,7 +117,7 @@ def test_basic_successful_fetch():
|
||||||
with TestWebServer() as server:
|
with TestWebServer() as server:
|
||||||
server.handle("/", TEST_FEED, content_type="text/xml")
|
server.handle("/", TEST_FEED, content_type="text/xml")
|
||||||
|
|
||||||
meta = feed.FeedMeta.from_url(server.make_url("/"), "asdf")
|
meta = feed.FeedMeta.from_url(server.make_url("/"))
|
||||||
result, new_meta = asyncio.run(feed.fetch_feed(meta))
|
result, new_meta = asyncio.run(feed.fetch_feed(meta))
|
||||||
|
|
||||||
assert new_meta.url == meta.url
|
assert new_meta.url == meta.url
|
||||||
|
|
@ -132,7 +131,7 @@ def test_fetch_after_temp_redirect():
|
||||||
server.handle("/old", code=307, headers=[("location", "/temp")])
|
server.handle("/old", code=307, headers=[("location", "/temp")])
|
||||||
server.handle("/temp", TEST_FEED, content_type="text/xml")
|
server.handle("/temp", TEST_FEED, content_type="text/xml")
|
||||||
|
|
||||||
meta = feed.FeedMeta.from_url(server.make_url("/old"), "asdf")
|
meta = feed.FeedMeta.from_url(server.make_url("/old"))
|
||||||
result, new_meta = asyncio.run(feed.fetch_feed(meta))
|
result, new_meta = asyncio.run(feed.fetch_feed(meta))
|
||||||
assert new_meta.url == meta.url
|
assert new_meta.url == meta.url
|
||||||
assert isinstance(result, feed.Feed)
|
assert isinstance(result, feed.Feed)
|
||||||
|
|
@ -143,7 +142,7 @@ def test_fetch_after_permanent_redirect():
|
||||||
server.handle("/old", code=308, headers=[("location", "/perm")])
|
server.handle("/old", code=308, headers=[("location", "/perm")])
|
||||||
server.handle("/perm", TEST_FEED, content_type="text/xml")
|
server.handle("/perm", TEST_FEED, content_type="text/xml")
|
||||||
|
|
||||||
meta = feed.FeedMeta.from_url(server.make_url("/old"), "asdf")
|
meta = feed.FeedMeta.from_url(server.make_url("/old"))
|
||||||
result, new_meta = asyncio.run(feed.fetch_feed(meta))
|
result, new_meta = asyncio.run(feed.fetch_feed(meta))
|
||||||
assert new_meta.url == server.make_url("/perm")
|
assert new_meta.url == server.make_url("/perm")
|
||||||
assert isinstance(result, feed.Feed)
|
assert isinstance(result, feed.Feed)
|
||||||
|
|
@ -155,7 +154,7 @@ def test_fetch_after_permanent_to_temporary_redirect():
|
||||||
server.handle("/perm", code=307, headers=[("location", "/temp")])
|
server.handle("/perm", code=307, headers=[("location", "/temp")])
|
||||||
server.handle("/temp", TEST_FEED, content_type="text/xml")
|
server.handle("/temp", TEST_FEED, content_type="text/xml")
|
||||||
|
|
||||||
meta = feed.FeedMeta.from_url(server.make_url("/old"), "asdf")
|
meta = feed.FeedMeta.from_url(server.make_url("/old"))
|
||||||
result, new_meta = asyncio.run(feed.fetch_feed(meta))
|
result, new_meta = asyncio.run(feed.fetch_feed(meta))
|
||||||
|
|
||||||
# NOTE: we should record the PERMANENT redirect, not the temporary one.
|
# NOTE: we should record the PERMANENT redirect, not the temporary one.
|
||||||
|
|
@ -169,7 +168,7 @@ def test_fetch_after_permanent_to_permanent_redirect():
|
||||||
server.handle("/one", code=308, headers=[("location", "/two")])
|
server.handle("/one", code=308, headers=[("location", "/two")])
|
||||||
server.handle("/two", TEST_FEED, content_type="text/xml")
|
server.handle("/two", TEST_FEED, content_type="text/xml")
|
||||||
|
|
||||||
meta = feed.FeedMeta.from_url(server.make_url("/old"), "asdf")
|
meta = feed.FeedMeta.from_url(server.make_url("/old"))
|
||||||
result, new_meta = asyncio.run(feed.fetch_feed(meta))
|
result, new_meta = asyncio.run(feed.fetch_feed(meta))
|
||||||
|
|
||||||
# NOTE: we should record the latest redirect.
|
# NOTE: we should record the latest redirect.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue