Remove origin from FeedMeta

It is barely used but complicates things mightily.
This commit is contained in:
John Doty 2024-07-17 06:16:47 -07:00
parent cce0ad9f8f
commit a105cdc649
4 changed files with 24 additions and 31 deletions

View file

@ -56,7 +56,7 @@ def subscribe(url, literal):
if not literal: if not literal:
click.echo(f"Searching for feeds for {url} ...") click.echo(f"Searching for feeds for {url} ...")
feeds = asyncio.run(feed.feed_search(url, db.origin)) feeds = asyncio.run(feed.feed_search(url))
if len(feeds) == 0: if len(feeds) == 0:
click.echo(f"Unable to find a suitable feed for {url}") click.echo(f"Unable to find a suitable feed for {url}")
return 1 return 1
@ -83,7 +83,7 @@ def subscribe(url, literal):
click.echo(f"Identified {result.meta.url} as a feed for {url}") click.echo(f"Identified {result.meta.url} as a feed for {url}")
else: else:
click.echo(f"Fetching {url} ...") click.echo(f"Fetching {url} ...")
meta = feed.FeedMeta.from_url(url, db.origin) meta = feed.FeedMeta.from_url(url)
d, meta = asyncio.run(feed.fetch_feed(meta)) d, meta = asyncio.run(feed.fetch_feed(meta))
if d is None: if d is None:
click.echo(f"Unable to fetch {url}") click.echo(f"Unable to fetch {url}")
@ -112,7 +112,7 @@ def import_opml(opml_file):
db = database.Database.local() db = database.Database.local()
urls = opml.parse_opml(opml_file.read()) urls = opml.parse_opml(opml_file.read())
metas = [feed.FeedMeta.from_url(url, db.origin) for url in urls] metas = [feed.FeedMeta.from_url(url) for url in urls]
click.echo(f"Fetching {len(urls)} feeds ...") click.echo(f"Fetching {len(urls)} feeds ...")
results = asyncio.run(feed.fetch_many(metas)) results = asyncio.run(feed.fetch_many(metas))

View file

@ -168,7 +168,6 @@ class Database:
status=int(status), status=int(status),
etag=etag, etag=etag,
modified=modified, modified=modified,
origin=self.origin,
) )
for url, last_fetched_ts, retry_after_ts, status, etag, modified in rows for url, last_fetched_ts, retry_after_ts, status, etag, modified in rows
] ]
@ -218,7 +217,6 @@ class Database:
status=status, status=status,
etag=etag, etag=etag,
modified=modified, modified=modified,
origin=self.origin,
) )
almost_feeds.append((meta, title, link)) almost_feeds.append((meta, title, link))
@ -282,7 +280,6 @@ class Database:
status=status, status=status,
etag=etag, etag=etag,
modified=modified, modified=modified,
origin=self.origin,
) )
cursor = self.db.execute( cursor = self.db.execute(

View file

@ -38,10 +38,9 @@ class FeedMeta:
status: int status: int
etag: str | None etag: str | None
modified: str | None modified: str | None
origin: str
@classmethod @classmethod
def from_url(cls, url: str, origin: str) -> "FeedMeta": def from_url(cls, url: str) -> "FeedMeta":
return FeedMeta( return FeedMeta(
url=url, url=url,
last_fetched_ts=0, last_fetched_ts=0,
@ -49,7 +48,6 @@ class FeedMeta:
status=FEED_STATUS_ALIVE, status=FEED_STATUS_ALIVE,
etag=None, etag=None,
modified=None, modified=None,
origin=origin,
) )
def should_fetch(self, now) -> bool: def should_fetch(self, now) -> bool:
@ -147,6 +145,7 @@ class Feed:
link = None link = None
if d.feed is not None: if d.feed is not None:
assert not isinstance(d.feed, list)
title = d.feed.get("title") title = d.feed.get("title")
link = d.feed.get("link") link = d.feed.get("link")
@ -428,7 +427,7 @@ async def fetch_many(
return [t.result() for t in tasks] return [t.result() for t in tasks]
def merge_feeds(a: Feed, b: Feed) -> Feed: def merge_feeds(a: Feed, a_origin: str, b: Feed, b_origin: str) -> Feed:
"""Merge two known feeds. There are two conflict resolution policies: """Merge two known feeds. There are two conflict resolution policies:
1. The newer fetch of feed metadata wins. 1. The newer fetch of feed metadata wins.
@ -449,7 +448,7 @@ def merge_feeds(a: Feed, b: Feed) -> Feed:
if a.meta.last_fetched_ts > b.meta.last_fetched_ts: if a.meta.last_fetched_ts > b.meta.last_fetched_ts:
source_feed = a source_feed = a
elif a.meta.last_fetched_ts == b.meta.last_fetched_ts: elif a.meta.last_fetched_ts == b.meta.last_fetched_ts:
source_feed = a if a.meta.origin < b.meta.origin else b source_feed = a if a_origin < b_origin else b
else: else:
source_feed = b source_feed = b
@ -569,11 +568,11 @@ def is_XML_related_link(link: str) -> bool:
return "rss" in link or "rdf" in link or "xml" in link or "atom" in link return "rss" in link or "rdf" in link or "xml" in link or "atom" in link
async def check_feed(url: str, origin: str) -> Feed | None: async def check_feed(url: str) -> Feed | None:
"""Check to see if the given URL is a feed. If it is, return the feed, """Check to see if the given URL is a feed. If it is, return the feed,
otherwise return None. otherwise return None.
""" """
meta = FeedMeta.from_url(url, origin) meta = FeedMeta.from_url(url)
result, meta = await fetch_feed(meta) result, meta = await fetch_feed(meta)
if isinstance(result, Feed): if isinstance(result, Feed):
return result return result
@ -581,13 +580,13 @@ async def check_feed(url: str, origin: str) -> Feed | None:
return None return None
async def check_links(links: typing.Iterable[str], origin: str) -> list[Feed]: async def check_links(links: typing.Iterable[str]) -> list[Feed]:
"""Fetch all the links and return the ones that appear to have feeds in """Fetch all the links and return the ones that appear to have feeds in
them. If none of them are fetchable or none of them have feeds then this them. If none of them are fetchable or none of them have feeds then this
will return nothing. will return nothing.
""" """
async with asyncio.TaskGroup() as group: async with asyncio.TaskGroup() as group:
tasks = [group.create_task(check_feed(link, origin)) for link in links] tasks = [group.create_task(check_feed(link)) for link in links]
outfeeds: list[Feed] = [] outfeeds: list[Feed] = []
for task in tasks: for task in tasks:
@ -598,8 +597,8 @@ async def check_links(links: typing.Iterable[str], origin: str) -> list[Feed]:
return outfeeds return outfeeds
async def feed_search(uri: str, origin: str) -> list[Feed]: async def feed_search(uri: str) -> list[Feed]:
meta = FeedMeta.from_url(massage_url(uri), origin) meta = FeedMeta.from_url(massage_url(uri))
result, meta = await fetch_feed(meta) result, meta = await fetch_feed(meta)
if result is None: if result is None:
return [] return []
@ -611,22 +610,22 @@ async def feed_search(uri: str, origin: str) -> list[Feed]:
parser.feed(result) parser.feed(result)
LOG.debug("Checking links...") LOG.debug("Checking links...")
outfeeds = await check_links(parser.link_links, origin) outfeeds = await check_links(parser.link_links)
if len(outfeeds) > 0: if len(outfeeds) > 0:
return outfeeds return outfeeds
LOG.debug("No links, checking A tags...") LOG.debug("No links, checking A tags...")
local_links, remote_links = classify_links(parser.a_links, meta.url) local_links, remote_links = classify_links(parser.a_links, meta.url)
outfeeds = await check_links(filter(is_feed_link, local_links), origin) outfeeds = await check_links(filter(is_feed_link, local_links))
if len(outfeeds) > 0: if len(outfeeds) > 0:
return outfeeds return outfeeds
outfeeds = await check_links(filter(is_XML_related_link, local_links), origin) outfeeds = await check_links(filter(is_XML_related_link, local_links))
if len(outfeeds) > 0: if len(outfeeds) > 0:
return outfeeds return outfeeds
outfeeds = await check_links(filter(is_feed_link, remote_links), origin) outfeeds = await check_links(filter(is_feed_link, remote_links))
if len(outfeeds) > 0: if len(outfeeds) > 0:
return outfeeds return outfeeds
outfeeds = await check_links(filter(is_XML_related_link, remote_links), origin) outfeeds = await check_links(filter(is_XML_related_link, remote_links))
if len(outfeeds) > 0: if len(outfeeds) > 0:
return outfeeds return outfeeds
@ -639,7 +638,5 @@ async def feed_search(uri: str, origin: str) -> list[Feed]:
"index.xml", # MT "index.xml", # MT
"index.rss", # Slash "index.rss", # Slash
] ]
outfeeds = await check_links( outfeeds = await check_links([urllib.parse.urljoin(meta.url, x) for x in suffixes])
[urllib.parse.urljoin(meta.url, x) for x in suffixes], origin
)
return outfeeds return outfeeds

View file

@ -4,7 +4,6 @@ import http.server
import threading import threading
import typing import typing
import requests
from cry import feed from cry import feed
@ -118,7 +117,7 @@ def test_basic_successful_fetch():
with TestWebServer() as server: with TestWebServer() as server:
server.handle("/", TEST_FEED, content_type="text/xml") server.handle("/", TEST_FEED, content_type="text/xml")
meta = feed.FeedMeta.from_url(server.make_url("/"), "asdf") meta = feed.FeedMeta.from_url(server.make_url("/"))
result, new_meta = asyncio.run(feed.fetch_feed(meta)) result, new_meta = asyncio.run(feed.fetch_feed(meta))
assert new_meta.url == meta.url assert new_meta.url == meta.url
@ -132,7 +131,7 @@ def test_fetch_after_temp_redirect():
server.handle("/old", code=307, headers=[("location", "/temp")]) server.handle("/old", code=307, headers=[("location", "/temp")])
server.handle("/temp", TEST_FEED, content_type="text/xml") server.handle("/temp", TEST_FEED, content_type="text/xml")
meta = feed.FeedMeta.from_url(server.make_url("/old"), "asdf") meta = feed.FeedMeta.from_url(server.make_url("/old"))
result, new_meta = asyncio.run(feed.fetch_feed(meta)) result, new_meta = asyncio.run(feed.fetch_feed(meta))
assert new_meta.url == meta.url assert new_meta.url == meta.url
assert isinstance(result, feed.Feed) assert isinstance(result, feed.Feed)
@ -143,7 +142,7 @@ def test_fetch_after_permanent_redirect():
server.handle("/old", code=308, headers=[("location", "/perm")]) server.handle("/old", code=308, headers=[("location", "/perm")])
server.handle("/perm", TEST_FEED, content_type="text/xml") server.handle("/perm", TEST_FEED, content_type="text/xml")
meta = feed.FeedMeta.from_url(server.make_url("/old"), "asdf") meta = feed.FeedMeta.from_url(server.make_url("/old"))
result, new_meta = asyncio.run(feed.fetch_feed(meta)) result, new_meta = asyncio.run(feed.fetch_feed(meta))
assert new_meta.url == server.make_url("/perm") assert new_meta.url == server.make_url("/perm")
assert isinstance(result, feed.Feed) assert isinstance(result, feed.Feed)
@ -155,7 +154,7 @@ def test_fetch_after_permanent_to_temporary_redirect():
server.handle("/perm", code=307, headers=[("location", "/temp")]) server.handle("/perm", code=307, headers=[("location", "/temp")])
server.handle("/temp", TEST_FEED, content_type="text/xml") server.handle("/temp", TEST_FEED, content_type="text/xml")
meta = feed.FeedMeta.from_url(server.make_url("/old"), "asdf") meta = feed.FeedMeta.from_url(server.make_url("/old"))
result, new_meta = asyncio.run(feed.fetch_feed(meta)) result, new_meta = asyncio.run(feed.fetch_feed(meta))
# NOTE: we should record the PERMANENT redirect, not the temporary one. # NOTE: we should record the PERMANENT redirect, not the temporary one.
@ -169,7 +168,7 @@ def test_fetch_after_permanent_to_permanent_redirect():
server.handle("/one", code=308, headers=[("location", "/two")]) server.handle("/one", code=308, headers=[("location", "/two")])
server.handle("/two", TEST_FEED, content_type="text/xml") server.handle("/two", TEST_FEED, content_type="text/xml")
meta = feed.FeedMeta.from_url(server.make_url("/old"), "asdf") meta = feed.FeedMeta.from_url(server.make_url("/old"))
result, new_meta = asyncio.run(feed.fetch_feed(meta)) result, new_meta = asyncio.run(feed.fetch_feed(meta))
# NOTE: we should record the latest redirect. # NOTE: we should record the latest redirect.