Fix cleaning text with character references

You need to call `.close()` to force the parser to flush all its
buffered output.
This commit is contained in:
John Doty 2024-10-20 08:06:24 -07:00
parent 522ec7144c
commit 073fab3566
3 changed files with 46 additions and 0 deletions

File diff suppressed because one or more lines are too long

View file

@ -1,6 +1,7 @@
import asyncio
import dataclasses
import http.server
import pathlib
import threading
import typing
@ -89,6 +90,22 @@ class TestWebServer:
self.respond(path, MockResponse(code, headers, content))
def handle_canned_response(self, path: str, response_path: str):
headers = []
with open(pathlib.Path(__file__).parent / response_path, "rb") as f:
_, code = f.readline().decode("utf-8").split(" ", maxsplit=1)
while True:
line = f.readline().decode("utf-8").rstrip()
if len(line) == 0:
break
header = tuple(line.split(":", maxsplit=1))
assert len(header) == 2
headers.append(header)
content = f.read()
self.respond(path, MockResponse(int(code), headers, content))
def respond(self, path: str, response: MockResponse):
response_list = self.handlers.get(path)
if response_list is None:
@ -198,3 +215,14 @@ def test_fetch_after_permanent_to_permanent_redirect():
# NOTE: we should record the latest redirect.
assert new_meta.url == server.make_url("/two")
assert isinstance(result, feed.Feed)
def test_titles_with_entities():
with TestWebServer() as server:
server.handle_canned_response("/feed", "feeds/steveklabnik.com.xml")
meta = feed.FeedMeta.from_url(server.make_url("/feed"))
result, _ = asyncio.run(feed.fetch_feed(meta))
assert isinstance(result, feed.Feed)
assert result.entries[0].title == "When should I use String vs &str?"