From 4941cd049ccfc6515774343550aa5a870f6daf8a Mon Sep 17 00:00:00 2001 From: John Doty Date: Fri, 6 Sep 2024 11:50:17 -0700 Subject: [PATCH] Helper routines for generating source code This includes "signing" source to detect modifications, and maintaining user-modified sections. Hooray! --- parser/generated_source.py | 107 ++++++++++++++++++++++++++++++++ tests/test_generated_source.py | 110 +++++++++++++++++++++++++++++++++ 2 files changed, 217 insertions(+) create mode 100644 parser/generated_source.py create mode 100644 tests/test_generated_source.py diff --git a/parser/generated_source.py b/parser/generated_source.py new file mode 100644 index 0000000..0351181 --- /dev/null +++ b/parser/generated_source.py @@ -0,0 +1,107 @@ +import hashlib +import re +import typing + +_SIGNING_SLUG = "!*RVCugYltjOsekrgCXTlKuqIrfy4-ScohO22mEDCr2ts" +_SIGNING_PREFIX = "generated source" + +_BEGIN_PATTERN = re.compile("BEGIN MANUAL SECTION ([^ ]+)") +_END_PATTERN = re.compile("END MANUAL SECTION") +_SIGNATURE_PATTERN = re.compile(_SIGNING_PREFIX + " Signed<<([0-9a-f]+)>>") + + +def signature_token() -> str: + return _SIGNING_PREFIX + " " + _SIGNING_SLUG + + +def begin_manual_section(name: str) -> str: + return f"BEGIN MANUAL SECTION {name}" + + +def end_manual_section() -> str: + return f"END MANUAL SECTION" + + +def _compute_digest(source: str) -> str: + m = hashlib.sha256() + for section, lines in _iterate_sections(source): + if section is None: + for line in lines: + m.update(line.encode("utf-8")) + return m.hexdigest() + + +def sign_generated_source(source: str) -> str: + # Only compute the hash over the automatically generated sections of the + # source file. + digest = _compute_digest(source) + signed = source.replace(_SIGNING_SLUG, f"Signed<<{digest}>>") + if signed == source: + raise ValueError("Source did not contain a signature token to replace") + return signed + + +def is_signed(source: str) -> bool: + return _SIGNATURE_PATTERN.search(source) is not None + + +def validate_signature(source: str) -> bool: + signatures = [m.group(1) for m in _SIGNATURE_PATTERN.finditer(source)] + if len(signatures) > 1: + raise ValueError("Multiple signatures found in source") + if len(signatures) == 0: + raise ValueError("Source does not appear to be signed") + signature: str = signatures[0] + + unsigned = source.replace(f"Signed<<{signature}>>", _SIGNING_SLUG) + actual = _compute_digest(unsigned) + + return signature == actual + + +def merge_existing(existing: str, generated: str) -> str: + manual_sections = _extract_manual_sections(existing) + + result_lines = [] + for section, lines in _iterate_sections(generated): + if section is not None: + lines = manual_sections.get(section, lines) + result_lines.extend(lines) + + return "".join(result_lines) + + +def _extract_manual_sections(code: str) -> dict[str, list[str]]: + result = {} + for section, lines in _iterate_sections(code): + if section is not None: + existing = result.get(section) + if existing is not None: + existing.extend(lines) + else: + result[section] = lines + return result + + +def _iterate_sections(code: str) -> typing.Generator[tuple[str | None, list[str]], None, None]: + current_section: str | None = None + current_lines = [] + for line in code.splitlines(keepends=True): + if current_section is None: + current_lines.append(line) + match = _BEGIN_PATTERN.search(line) + if match is None: + continue + + yield (None, current_lines) + current_lines = [] + current_section = match.group(1) + else: + if _END_PATTERN.search(line): + yield (current_section, current_lines) + current_lines = [] + current_section = None + + current_lines.append(line) + + yield (current_section, current_lines) diff --git a/tests/test_generated_source.py b/tests/test_generated_source.py new file mode 100644 index 0000000..d399474 --- /dev/null +++ b/tests/test_generated_source.py @@ -0,0 +1,110 @@ +import parser.generated_source as generated_source + + +def test_signature(): + input_source = f""" +This is a random thing. + +Put your slug here: {generated_source.signature_token()} + +Here are some more things: + + - Machine Generated + - More Machine Gnerated +{generated_source.begin_manual_section('foo')} + - You can edit here! +{generated_source.end_manual_section()} + - But not here. +{generated_source.begin_manual_section('bar')} + - You can edit here too! +{generated_source.end_manual_section()} + - Also not here. +""" + signed = generated_source.sign_generated_source(input_source) + assert signed != input_source + assert generated_source.is_signed(signed) + assert generated_source.validate_signature(signed) + + +def test_manual_changes(): + input_source = f""" +This is a random thing. + +Put your slug here: {generated_source.signature_token()} + +Here are some more things: + + - Machine Generated + - More Machine Gnerated +{generated_source.begin_manual_section('foo')} + - XXXXX +{generated_source.end_manual_section()} + - But not here. +""" + signed = generated_source.sign_generated_source(input_source) + modified = signed.replace("XXXXX", "YYYYY") + assert modified != signed + + assert generated_source.is_signed(modified) + assert generated_source.validate_signature(modified) + + +def test_bad_changes(): + input_source = f""" +This is a random thing. + +Put your slug here: {generated_source.signature_token()} + +Here are some more things: + + - Machine Generated + - More Machine Gnerated +{generated_source.begin_manual_section('foo')} + - XXXXX +{generated_source.end_manual_section()} + - ZZZZZ +""" + signed = generated_source.sign_generated_source(input_source) + modified = signed.replace("ZZZZZ", "YYYYY") + assert modified != signed + + assert generated_source.is_signed(modified) + assert not generated_source.validate_signature(modified) + + +def test_merge_changes(): + original_source = f""" +A +// {generated_source.begin_manual_section('foo')} +B +// {generated_source.end_manual_section()} +C +// {generated_source.begin_manual_section('bar')} +D +// {generated_source.end_manual_section()} +""" + new_source = f""" +E +// {generated_source.begin_manual_section('bar')} +F +// {generated_source.end_manual_section()} +// {generated_source.begin_manual_section('foo')} +G +// {generated_source.end_manual_section()} +H +""" + + merged = generated_source.merge_existing(original_source, new_source) + assert ( + merged + == f""" +E +// {generated_source.begin_manual_section('bar')} +D +// {generated_source.end_manual_section()} +// {generated_source.begin_manual_section('foo')} +B +// {generated_source.end_manual_section()} +H +""" + )