Fix cleaning text with character references
You need to call `.close()` to force the parser to flush all its buffered output.
This commit is contained in:
parent
522ec7144c
commit
073fab3566
3 changed files with 46 additions and 0 deletions
|
|
@ -284,6 +284,7 @@ def clean_text(text: str) -> str:
|
||||||
writer = io.StringIO()
|
writer = io.StringIO()
|
||||||
cleaner = Cleaner(writer)
|
cleaner = Cleaner(writer)
|
||||||
cleaner.feed(text)
|
cleaner.feed(text)
|
||||||
|
cleaner.close()
|
||||||
return MULTI_SPACES.sub(" ", writer.getvalue())
|
return MULTI_SPACES.sub(" ", writer.getvalue())
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
17
tests/feeds/steveklabnik.com.xml
Normal file
17
tests/feeds/steveklabnik.com.xml
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -1,6 +1,7 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import http.server
|
import http.server
|
||||||
|
import pathlib
|
||||||
import threading
|
import threading
|
||||||
import typing
|
import typing
|
||||||
|
|
||||||
|
|
@ -89,6 +90,22 @@ class TestWebServer:
|
||||||
|
|
||||||
self.respond(path, MockResponse(code, headers, content))
|
self.respond(path, MockResponse(code, headers, content))
|
||||||
|
|
||||||
|
def handle_canned_response(self, path: str, response_path: str):
|
||||||
|
headers = []
|
||||||
|
with open(pathlib.Path(__file__).parent / response_path, "rb") as f:
|
||||||
|
_, code = f.readline().decode("utf-8").split(" ", maxsplit=1)
|
||||||
|
while True:
|
||||||
|
line = f.readline().decode("utf-8").rstrip()
|
||||||
|
if len(line) == 0:
|
||||||
|
break
|
||||||
|
header = tuple(line.split(":", maxsplit=1))
|
||||||
|
assert len(header) == 2
|
||||||
|
headers.append(header)
|
||||||
|
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
self.respond(path, MockResponse(int(code), headers, content))
|
||||||
|
|
||||||
def respond(self, path: str, response: MockResponse):
|
def respond(self, path: str, response: MockResponse):
|
||||||
response_list = self.handlers.get(path)
|
response_list = self.handlers.get(path)
|
||||||
if response_list is None:
|
if response_list is None:
|
||||||
|
|
@ -198,3 +215,14 @@ def test_fetch_after_permanent_to_permanent_redirect():
|
||||||
# NOTE: we should record the latest redirect.
|
# NOTE: we should record the latest redirect.
|
||||||
assert new_meta.url == server.make_url("/two")
|
assert new_meta.url == server.make_url("/two")
|
||||||
assert isinstance(result, feed.Feed)
|
assert isinstance(result, feed.Feed)
|
||||||
|
|
||||||
|
|
||||||
|
def test_titles_with_entities():
|
||||||
|
with TestWebServer() as server:
|
||||||
|
server.handle_canned_response("/feed", "feeds/steveklabnik.com.xml")
|
||||||
|
|
||||||
|
meta = feed.FeedMeta.from_url(server.make_url("/feed"))
|
||||||
|
result, _ = asyncio.run(feed.fetch_feed(meta))
|
||||||
|
|
||||||
|
assert isinstance(result, feed.Feed)
|
||||||
|
assert result.entries[0].title == "When should I use String vs &str?"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue