diff --git a/grammar.py b/grammar.py index a643b68..c0a6e08 100644 --- a/grammar.py +++ b/grammar.py @@ -6,14 +6,16 @@ from parser import ( Rule, Terminal, alt, + br, group, highlight, indent, mark, - newline, + nl, opt, rule, seq, + sp, ) @@ -53,7 +55,7 @@ class FineGrammar(Grammar): def _file_statement_list(self) -> Rule: return alt( self._file_statement, - self._file_statement_list + newline() + self._file_statement, + self._file_statement_list + nl + self._file_statement, ) @rule @@ -64,7 +66,9 @@ class FineGrammar(Grammar): @rule def import_statement(self) -> Rule: - return seq(self.IMPORT, self.STRING, self.AS, self.IDENTIFIER, self.SEMICOLON) + return group( + self.IMPORT, sp, self.STRING, sp, self.AS, sp, self.IDENTIFIER, sp, self.SEMICOLON + ) @rule("ClassDeclaration") def class_declaration(self) -> Rule: @@ -72,16 +76,14 @@ class FineGrammar(Grammar): group( group( self.CLASS, - newline(), + sp, mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type), + sp, ), self.LCURLY, ), - indent( - newline(), - mark(opt(self.class_body), field="body"), - ), - newline(), + indent(nl, mark(opt(self.class_body), field="body")), + nl, self.RCURLY, ) @@ -99,7 +101,7 @@ class FineGrammar(Grammar): @rule("FieldDecl") def field_declaration(self) -> Rule: - return seq(self.IDENTIFIER, self.COLON, self.type_expression, self.SEMICOLON) + return group(self.IDENTIFIER, self.COLON, sp, self.type_expression, self.SEMICOLON) + nl # Types @rule("TypeExpression") @@ -108,7 +110,7 @@ class FineGrammar(Grammar): @rule("AlternateType") def alternate_type(self) -> Rule: - return seq(self.type_expression, self.OR, self.type_identifier) + return group(self.type_expression, sp, self.OR, sp, self.type_identifier) @rule("TypeIdentifier") def type_identifier(self) -> Rule: @@ -117,28 +119,28 @@ class FineGrammar(Grammar): @rule def export_statement(self) -> Rule: return alt( - seq(self.EXPORT, self.class_declaration), - seq(self.EXPORT, self.function_declaration), - seq(self.EXPORT, self.let_statement), - seq(self.EXPORT, self.export_list, self.SEMICOLON), + group(self.EXPORT, sp, self.class_declaration), + group(self.EXPORT, sp, self.function_declaration), + group(self.EXPORT, sp, self.let_statement), + group(self.EXPORT, sp, self.export_list, self.SEMICOLON), ) @rule def export_list(self) -> Rule: - return ( - self.IDENTIFIER - | seq(self.IDENTIFIER, self.COMMA) - | seq(self.IDENTIFIER, self.COMMA, self.export_list) - ) + return self.IDENTIFIER | seq(self.IDENTIFIER, self.COMMA, sp, self.export_list) # Functions @rule("FunctionDecl") def function_declaration(self) -> Rule: return seq( - self.FUN, - mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.function), - mark(self.function_parameters, field="parameters"), - mark(opt(self.ARROW, self.type_expression), field="return_type"), + group( + self.FUN, + sp, + mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.function), + sp, + mark(self.function_parameters, field="parameters"), + mark(opt(sp, self.ARROW, sp, self.type_expression), field="return_type"), + ), mark(self.block, field="body"), ) @@ -147,14 +149,14 @@ class FineGrammar(Grammar): return group( self.LPAREN, indent( - newline(), + nl, opt( self._first_parameter | seq(self._first_parameter, self.COMMA) - | group(self._first_parameter, self.COMMA, newline(), self._parameter_list) + | group(self._first_parameter, self.COMMA, sp, self._parameter_list) ), ), - newline(), + nl, self.RPAREN, ) @@ -164,18 +166,18 @@ class FineGrammar(Grammar): @rule def _parameter_list(self) -> Rule: - return self.parameter | seq(self.parameter, self.COMMA, newline(), self._parameter_list) + return self.parameter | seq(self.parameter, self.COMMA, sp, self._parameter_list) @rule("Parameter") def parameter(self) -> Rule: - return seq(self.IDENTIFIER, self.COLON, self.type_expression) + return group(self.IDENTIFIER, self.COLON, sp, self.type_expression) # Block @rule("Block") def block(self) -> Rule: return alt( - seq(self.LCURLY, self.RCURLY), - group(self.LCURLY, indent(newline(), self.block_body), newline(), self.RCURLY), + group(self.LCURLY, nl, self.RCURLY), + seq(self.LCURLY, indent(br, self.block_body), br, self.RCURLY), ) @rule("BlockBody") @@ -183,12 +185,12 @@ class FineGrammar(Grammar): return alt( self.expression, self._statement_list, - seq(self._statement_list, newline(), self.expression), + seq(self._statement_list, br, self.expression), ) @rule def _statement_list(self) -> Rule: - return self._statement | seq(self._statement_list, self._statement) + return self._statement | seq(self._statement_list, br, self._statement) @rule def _statement(self) -> Rule: @@ -204,15 +206,26 @@ class FineGrammar(Grammar): @rule("LetStatement") def let_statement(self) -> Rule: - return seq(self.LET, self.IDENTIFIER, self.EQUAL, self.expression, self.SEMICOLON) + return group( + self.LET, + sp, + self.IDENTIFIER, + indent(sp, self.EQUAL, indent(sp, group(self.expression, self.SEMICOLON))), + ) @rule("ReturnStatement") def return_statement(self) -> Rule: - return seq(self.RETURN, self.expression, self.SEMICOLON) | seq(self.RETURN, self.SEMICOLON) + return alt( + group(self.RETURN, indent(sp, group(self.expression, self.SEMICOLON))), + group(self.RETURN, self.SEMICOLON), + ) @rule("ForStatement") def for_statement(self) -> Rule: - return seq(self.FOR, self.iterator_variable, self.IN, self.expression, self.block) + return group( + group(self.FOR, sp, self.iterator_variable, sp, self.IN, sp, group(self.expression)), + self.block, + ) @rule("IteratorVariable") def iterator_variable(self) -> Rule: @@ -224,7 +237,7 @@ class FineGrammar(Grammar): @rule def while_statement(self) -> Rule: - return seq(self.WHILE, self.expression, self.block) + return group(group(self.WHILE, sp, self.expression), sp, self.block) @rule def expression_statement(self) -> Rule: @@ -238,24 +251,24 @@ class FineGrammar(Grammar): @rule("BinaryExpression") def binary_expression(self) -> Rule: return alt( - self.expression + self.EQUAL + self.expression, - self.expression + self.OR + self.expression, - self.expression + self.AND + self.expression, - self.expression + self.EQUALEQUAL + self.expression, - self.expression + self.BANGEQUAL + self.expression, - self.expression + self.LESS + self.expression, - self.expression + self.LESSEQUAL + self.expression, - self.expression + self.GREATER + self.expression, - self.expression + self.GREATEREQUAL + self.expression, - self.expression + self.PLUS + self.expression, - self.expression + self.MINUS + self.expression, - self.expression + self.STAR + self.expression, - self.expression + self.SLASH + self.expression, + group(self.expression, sp, self.EQUAL, sp, self.expression), + group(self.expression, sp, self.OR, sp, self.expression), + group(self.expression, sp, self.AND, sp, self.expression), + group(self.expression, sp, self.EQUALEQUAL, sp, self.expression), + group(self.expression, sp, self.BANGEQUAL, sp, self.expression), + group(self.expression, sp, self.LESS, sp, self.expression), + group(self.expression, sp, self.LESSEQUAL, sp, self.expression), + group(self.expression, sp, self.GREATER, sp, self.expression), + group(self.expression, sp, self.GREATEREQUAL, sp, self.expression), + group(self.expression, sp, self.PLUS, sp, self.expression), + group(self.expression, sp, self.MINUS, sp, self.expression), + group(self.expression, sp, self.STAR, sp, self.expression), + group(self.expression, sp, self.SLASH, sp, self.expression), ) @rule("IsExpression") def is_expression(self) -> Rule: - return seq(self.expression, self.IS, self.pattern) + return group(self.expression, sp, self.IS, indent(sp, self.pattern)) @rule def primary_expression(self) -> Rule: @@ -271,9 +284,15 @@ class FineGrammar(Grammar): | self.object_constructor_expression | self.match_expression | seq(self.primary_expression, self.LPAREN, self.RPAREN) - | seq(self.primary_expression, self.LPAREN, self._expression_list, self.RPAREN) - | seq(self.primary_expression, self.DOT, self.IDENTIFIER) - | seq(self.LPAREN, self.expression, self.RPAREN) + | group( + self.primary_expression, + self.LPAREN, + indent(nl, self._expression_list), + nl, + self.RPAREN, + ) + | group(self.primary_expression, indent(nl, self.DOT, self.IDENTIFIER)) + | group(self.LPAREN, indent(nl, self.expression), nl, self.RPAREN) ) @rule("IdentifierExpression") @@ -287,15 +306,26 @@ class FineGrammar(Grammar): @rule("ConditionalExpression") def conditional_expression(self) -> Rule: return ( - seq(self.IF, self.expression, self.block) - | seq(self.IF, self.expression, self.block, self.ELSE, self.conditional_expression) - | seq(self.IF, self.expression, self.block, self.ELSE, self.block) + seq(group(self.IF, sp, self.expression), sp, self.block) + | seq( + group(self.IF, sp, self.expression), + sp, + self.block, + sp, + self.ELSE, + sp, + self.conditional_expression, + ) + | seq( + group(self.IF, sp, self.expression), sp, self.block, sp, self.ELSE, sp, self.block + ) ) @rule def list_constructor_expression(self) -> Rule: - return seq(self.LSQUARE, self.RSQUARE) | seq( - self.LSQUARE, self._expression_list, self.RSQUARE + return alt( + group(self.LSQUARE, nl, self.RSQUARE), + group(self.LSQUARE, indent(nl, self._expression_list), nl, self.RSQUARE), ) @rule @@ -303,34 +333,37 @@ class FineGrammar(Grammar): return ( self.expression | seq(self.expression, self.COMMA) - | seq(self.expression, self.COMMA, self._expression_list) + | seq(self.expression, self.COMMA, sp, self._expression_list) ) @rule def match_expression(self) -> Rule: - return seq(self.MATCH, self.expression, self.match_body) + return group(group(self.MATCH, sp, self.expression), sp, self.match_body) @rule("MatchBody") def match_body(self) -> Rule: - return seq(self.LCURLY, self.RCURLY) | seq(self.LCURLY, self._match_arms, self.RCURLY) + return alt( + group(self.LCURLY, nl, self.RCURLY), + group(self.LCURLY, indent(nl, self._match_arms), nl, self.RCURLY), + ) @rule def _match_arms(self) -> Rule: return ( self.match_arm | seq(self.match_arm, self.COMMA) - | seq(self.match_arm, self.COMMA, self._match_arms) + | seq(self.match_arm, self.COMMA, br, self._match_arms) ) @rule("MatchArm") def match_arm(self) -> Rule: - return seq(self.pattern, self.ARROW, self.expression) + return group(self.pattern, sp, self.ARROW, sp, self.expression) @rule("Pattern") def pattern(self) -> Rule: return ( - seq(self.variable_binding, self._pattern_core, self.AND, self.expression) - | seq(self.variable_binding, self._pattern_core) + group(self.variable_binding, self._pattern_core, sp, self.AND, sp, self.expression) + | group(self.variable_binding, self._pattern_core) | self._pattern_core ) @@ -348,23 +381,26 @@ class FineGrammar(Grammar): @rule def object_constructor_expression(self) -> Rule: - return seq(self.NEW, self.type_identifier, self.field_list) + return group(self.NEW, sp, self.type_identifier, self.field_list) @rule def field_list(self) -> Rule: - return seq(self.LCURLY, self.RCURLY) | seq(self.LCURLY, self.field_values, self.RCURLY) + return alt( + seq(self.LCURLY, self.RCURLY), + group(self.LCURLY, indent(nl, self.field_values), nl, self.RCURLY), + ) @rule def field_values(self) -> Rule: return ( self.field_value | seq(self.field_value, self.COMMA) - | seq(self.field_value, self.COMMA, self.field_values) + | seq(self.field_value, self.COMMA, sp, self.field_values) ) @rule def field_value(self) -> Rule: - return self.IDENTIFIER | seq(self.IDENTIFIER, self.COLON, self.expression) + return self.IDENTIFIER | group(self.IDENTIFIER, self.COLON, indent(sp, self.expression)) BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus()) COMMENT = Terminal( @@ -461,13 +497,17 @@ if __name__ == "__main__": from parser.emacs import emit_emacs_major_mode from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries + # TODO: Actually generate a lexer/parser for some runtime. grammar = FineGrammar() grammar.build_table() lexer = grammar.compile_lexer() dump_lexer_table(lexer) + # Generate tree-sitter parser and emacs mode. ts_path = Path(__file__).parent / "tree-sitter-fine" emit_tree_sitter_grammar(grammar, ts_path) emit_tree_sitter_queries(grammar, ts_path) emit_emacs_major_mode(grammar, ts_path / "fine.el") + + # TODO: Generate pretty-printer code. diff --git a/makefile b/makefile index 743eb52..6305885 100644 --- a/makefile +++ b/makefile @@ -4,4 +4,4 @@ .PHONY: test test: python3 ./parser/parser.py - pdm run pytest + pdm run python3 -m pytest diff --git a/parser/parser.py b/parser/parser.py index c205154..6eafd88 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -540,7 +540,7 @@ class ErrorCollection: match action: case Reduce(name=name, count=count, transparent=transparent): name_str = name if not transparent else f"transparent node ({name})" - action_str = f"pop {count} values off the stack and make a {name_str}" + action_str = f"use the {count} values to make a {name_str}" case Shift(): action_str = "consume the token and keep going" case Accept(): @@ -2680,6 +2680,7 @@ highlight = _Highlight() @dataclasses.dataclass class FormatMeta(SyntaxMeta): newline: str | None = None + forced_break: bool = False indent: int | None = None group: bool = False @@ -2717,6 +2718,17 @@ def newline(text: str | None = None) -> Rule: return mark(Nothing, format=FormatMeta(newline=text)) +nl = newline("") + +sp = newline(" ") + + +def forced_break() -> Rule: + return mark(Nothing, format=FormatMeta(forced_break=True)) + + +br = forced_break() + ############################################################################### # Finally, the base class for grammars ############################################################################### @@ -2753,7 +2765,8 @@ class Grammar: _precedence: dict[str, typing.Tuple[Assoc, int]] _generator: type[GenerateLR0] - _terminals: list[Terminal] + _terminals: dict[str, Terminal] + _nonterminals: dict[str, NonTerminal] _trivia: list[Terminal] def __init__( @@ -2794,6 +2807,19 @@ class Grammar: raise ValueError(f"More than one terminal has the name '{n}'") terminals[n] = t + # Get the nonterminals. + nonterminals = {} + for _, nt in inspect.getmembers(self, lambda x: isinstance(x, NonTerminal)): + if nt.name in nonterminals: + raise ValueError(f"More than one nonterminal found with the name '{nt.name}'") + + if nt.name in terminals: + raise ValueError( + f"'{nt.name}' is the name of both a Terminal and a NonTerminal rule" + ) + + nonterminals[nt.name] = nt + # Resolve the trivia declarations correctly. resolved_trivia: list[Terminal] = [] for t in trivia: @@ -2809,12 +2835,22 @@ class Grammar: precedence_table = {} for prec, (associativity, symbols) in enumerate(precedence): for symbol in symbols: + key = None if isinstance(symbol, Terminal): key = symbol.name + if key is None: + raise ValueError(f"{symbol} is a terminal that has not had a name set yet") elif isinstance(symbol, NonTerminal): key = symbol.name - else: - raise ValueError(f"{symbol} must be either a Token or a NonTerminal") + elif isinstance(symbol, str): + key = terminals.get(symbol) + if key is None: + key = nonterminals.get(symbol) + + if key is None: + raise ValueError( + f"{symbol} must be either a Token or a NonTerminal, or the name of one" + ) precedence_table[key] = (associativity, prec + 1) @@ -2826,18 +2862,19 @@ class Grammar: self._precedence = precedence_table self.start = start self._generator = generator - self._terminals = list(terminals.values()) + self._terminals = terminals + self._nonterminals = nonterminals self._trivia = resolved_trivia self.name = name def terminals(self) -> list[Terminal]: - return self._terminals + return list(self._terminals.values()) def trivia_terminals(self) -> list[Terminal]: return self._trivia def non_terminals(self) -> list[NonTerminal]: - return [nt for _, nt in inspect.getmembers(self, lambda x: isinstance(x, NonTerminal))] + return list(self._nonterminals.values()) def get_precedence(self, name: str) -> None | tuple[Assoc, int]: return self._precedence.get(name) @@ -2858,9 +2895,8 @@ class Grammar: if start is None: start = self.start - rules = self.non_terminals() - nonterminals = {rule.name: rule for rule in rules} - transparents = {rule.name for rule in rules if rule.transparent} + nonterminals = self._nonterminals + transparents = {rule.name for rule in nonterminals.values() if rule.transparent} grammar = {} diff --git a/parser/wadler.py b/parser/wadler.py index 3069097..5c2b518 100644 --- a/parser/wadler.py +++ b/parser/wadler.py @@ -5,6 +5,9 @@ import typing from . import parser from . import runtime +# TODO: I think I want a *force break*, i.e., a document which forces things +# to not fit on one line. + @dataclasses.dataclass(frozen=True) class Cons: @@ -24,6 +27,11 @@ class NewLine: replace: str +@dataclasses.dataclass(frozen=True) +class ForceBreak: + pass + + @dataclasses.dataclass(frozen=True) class Indent: amount: int @@ -60,7 +68,7 @@ class Lazy: return Lazy(lambda: printer.convert_tree_to_document(tree)) -Document = None | Text | Literal | NewLine | Cons | Indent | Group | Lazy +Document = None | Text | Literal | NewLine | ForceBreak | Cons | Indent | Group | Lazy class DocumentLayout: @@ -127,6 +135,12 @@ def layout_document(doc: Document, width: int) -> DocumentLayout: # all fit. return True + case ForceBreak(): + # If we're in a flattened chunk then force it to break by + # returning false here, otherwise we're at the end of the + # line and yes, whatever you were asking about has fit. + return not chunk.flat + case Cons(left, right): stack.append(chunk.with_document(right)) stack.append(chunk.with_document(left)) @@ -180,6 +194,11 @@ def layout_document(doc: Document, width: int) -> DocumentLayout: output.append("\n" + (chunk.indent * " ")) column = chunk.indent + case ForceBreak(): + # TODO: Custom newline expansion, custom indent segments. + output.append("\n" + (chunk.indent * " ")) + column = chunk.indent + case Cons(left, right): chunks.append(chunk.with_document(right)) chunks.append(chunk.with_document(left)) @@ -292,12 +311,14 @@ class Matcher: elif name[0] == "n": replace = self.newline_replace[name] - print(f"!!!! {name} -> {repr(replace)}") child = cons(child, NewLine(replace)) elif name[0] == "p": child = cons(NewLine(""), child) + elif name[0] == "f": + child = cons(child, ForceBreak()) + else: pass # Reducing a transparent rule probably. @@ -375,8 +396,8 @@ class Printer: visited: set[str] = set() group_count = 0 indent_amounts: dict[str, int] = {} - done_newline = False newline_map: dict[str, str] = {} + done_forced_break = False def compile_nonterminal(name: str, rule: parser.NonTerminal): if name not in visited: @@ -388,7 +409,7 @@ class Printer: def compile_production(production: parser.FlattenedWithMetadata) -> list[str]: nonlocal group_count nonlocal indent_amounts - nonlocal done_newline + nonlocal done_forced_break result = [] for item in production: @@ -439,6 +460,13 @@ class Printer: tx_children.append(newline_rule_name) + if pretty.forced_break: + if not done_forced_break: + generated_grammar.append(("forced_break", [])) + done_forced_break = True + + tx_children.append("forced_break") + # If it turned out to have formatting meta then we will # have replaced or augmented the translated children # appropriately. Otherwise, if it's highlighting meta or diff --git a/tests/test_lexer.py b/tests/test_lexer.py index eec0415..ffff192 100644 --- a/tests/test_lexer.py +++ b/tests/test_lexer.py @@ -359,7 +359,7 @@ def test_lexer_compile(): def foo(self): return self.IS - start = foo + start = "foo" IS = Terminal("is") AS = Terminal("as") @@ -392,7 +392,7 @@ def test_lexer_numbers(n: float): def number(self): return self.NUMBER - start = number + start = "number" NUMBER = Terminal( Re.seq( diff --git a/tests/test_wadler.py b/tests/test_wadler.py index 498d15c..a5081de 100644 --- a/tests/test_wadler.py +++ b/tests/test_wadler.py @@ -1,6 +1,22 @@ import typing -from parser.parser import Grammar, Re, Terminal, rule, opt, group, newline, alt, indent +from parser.parser import ( + Grammar, + Re, + Terminal, + rule, + opt, + group, + newline, + alt, + indent, + seq, + Rule, + Assoc, + sp, + nl, + br, +) import parser.runtime as runtime import parser.wadler as wadler @@ -57,10 +73,7 @@ class JsonGrammar(Grammar): self.value + self.COMMA + newline(" ") + self._array_items, ) - BLANKS = Terminal( - Re.set(" ", "\t", "\r", "\n").plus(), - is_format_blank=True, - ) + BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus()) LCURLY = Terminal("{") RCURLY = Terminal("}") COMMA = Terminal(",") @@ -103,6 +116,8 @@ def flatten_document(doc: wadler.Document, src: str) -> list: match doc: case wadler.NewLine(replace): return [f""] + case wadler.ForceBreak(): + return [""] case wadler.Indent(): return [[f"", flatten_document(doc.doc, src)]] case wadler.Text(start, end): @@ -204,3 +219,65 @@ def test_layout_basic(): } """.strip() ) + + +def test_forced_break(): + class TG(Grammar): + start = "root" + trivia = ["BLANKS"] + + @rule + def root(self): + return self._expression + + @rule + def _expression(self): + return self.word | self.list + + @rule + def list(self): + return group(self.LPAREN, indent(nl, self._expressions), nl, self.RPAREN) + + @rule + def _expressions(self): + return self._expression | seq(self._expressions, sp, self._expression) + + @rule + def word(self): + return self.OK | seq(self.BREAK, br, self.BREAK) + + LPAREN = Terminal("(") + RPAREN = Terminal(")") + OK = Terminal("ok") + BREAK = Terminal("break") + + BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus()) + + g = TG() + g_lexer = g.compile_lexer() + g_parser = runtime.Parser(g.build_table()) + + text = "((ok ok) (ok break break ok) (ok ok ok ok))" + + tree, errors = g_parser.parse(runtime.GenericTokenStream(text, g_lexer)) + assert errors == [] + assert tree is not None + + printer = wadler.Printer(g) + result = printer.format_tree(tree, 200).apply_to_source(text) + + assert ( + result + == """ +( + (ok ok) + ( + ok + break + break + ok + ) + (ok ok ok ok) +) + """.strip() + )