Finish annotating test grammar, forced breaks, fixes
Forced breaks force a newline in a spot, which is sometimes what we want. (Like, this syntax should *never* be on a single line.)
This commit is contained in:
parent
938f0e5c69
commit
d7a6891519
6 changed files with 273 additions and 92 deletions
180
grammar.py
180
grammar.py
|
|
@ -6,14 +6,16 @@ from parser import (
|
||||||
Rule,
|
Rule,
|
||||||
Terminal,
|
Terminal,
|
||||||
alt,
|
alt,
|
||||||
|
br,
|
||||||
group,
|
group,
|
||||||
highlight,
|
highlight,
|
||||||
indent,
|
indent,
|
||||||
mark,
|
mark,
|
||||||
newline,
|
nl,
|
||||||
opt,
|
opt,
|
||||||
rule,
|
rule,
|
||||||
seq,
|
seq,
|
||||||
|
sp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -53,7 +55,7 @@ class FineGrammar(Grammar):
|
||||||
def _file_statement_list(self) -> Rule:
|
def _file_statement_list(self) -> Rule:
|
||||||
return alt(
|
return alt(
|
||||||
self._file_statement,
|
self._file_statement,
|
||||||
self._file_statement_list + newline() + self._file_statement,
|
self._file_statement_list + nl + self._file_statement,
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
|
|
@ -64,7 +66,9 @@ class FineGrammar(Grammar):
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def import_statement(self) -> Rule:
|
def import_statement(self) -> Rule:
|
||||||
return seq(self.IMPORT, self.STRING, self.AS, self.IDENTIFIER, self.SEMICOLON)
|
return group(
|
||||||
|
self.IMPORT, sp, self.STRING, sp, self.AS, sp, self.IDENTIFIER, sp, self.SEMICOLON
|
||||||
|
)
|
||||||
|
|
||||||
@rule("ClassDeclaration")
|
@rule("ClassDeclaration")
|
||||||
def class_declaration(self) -> Rule:
|
def class_declaration(self) -> Rule:
|
||||||
|
|
@ -72,16 +76,14 @@ class FineGrammar(Grammar):
|
||||||
group(
|
group(
|
||||||
group(
|
group(
|
||||||
self.CLASS,
|
self.CLASS,
|
||||||
newline(),
|
sp,
|
||||||
mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type),
|
mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type),
|
||||||
|
sp,
|
||||||
),
|
),
|
||||||
self.LCURLY,
|
self.LCURLY,
|
||||||
),
|
),
|
||||||
indent(
|
indent(nl, mark(opt(self.class_body), field="body")),
|
||||||
newline(),
|
nl,
|
||||||
mark(opt(self.class_body), field="body"),
|
|
||||||
),
|
|
||||||
newline(),
|
|
||||||
self.RCURLY,
|
self.RCURLY,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -99,7 +101,7 @@ class FineGrammar(Grammar):
|
||||||
|
|
||||||
@rule("FieldDecl")
|
@rule("FieldDecl")
|
||||||
def field_declaration(self) -> Rule:
|
def field_declaration(self) -> Rule:
|
||||||
return seq(self.IDENTIFIER, self.COLON, self.type_expression, self.SEMICOLON)
|
return group(self.IDENTIFIER, self.COLON, sp, self.type_expression, self.SEMICOLON) + nl
|
||||||
|
|
||||||
# Types
|
# Types
|
||||||
@rule("TypeExpression")
|
@rule("TypeExpression")
|
||||||
|
|
@ -108,7 +110,7 @@ class FineGrammar(Grammar):
|
||||||
|
|
||||||
@rule("AlternateType")
|
@rule("AlternateType")
|
||||||
def alternate_type(self) -> Rule:
|
def alternate_type(self) -> Rule:
|
||||||
return seq(self.type_expression, self.OR, self.type_identifier)
|
return group(self.type_expression, sp, self.OR, sp, self.type_identifier)
|
||||||
|
|
||||||
@rule("TypeIdentifier")
|
@rule("TypeIdentifier")
|
||||||
def type_identifier(self) -> Rule:
|
def type_identifier(self) -> Rule:
|
||||||
|
|
@ -117,28 +119,28 @@ class FineGrammar(Grammar):
|
||||||
@rule
|
@rule
|
||||||
def export_statement(self) -> Rule:
|
def export_statement(self) -> Rule:
|
||||||
return alt(
|
return alt(
|
||||||
seq(self.EXPORT, self.class_declaration),
|
group(self.EXPORT, sp, self.class_declaration),
|
||||||
seq(self.EXPORT, self.function_declaration),
|
group(self.EXPORT, sp, self.function_declaration),
|
||||||
seq(self.EXPORT, self.let_statement),
|
group(self.EXPORT, sp, self.let_statement),
|
||||||
seq(self.EXPORT, self.export_list, self.SEMICOLON),
|
group(self.EXPORT, sp, self.export_list, self.SEMICOLON),
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def export_list(self) -> Rule:
|
def export_list(self) -> Rule:
|
||||||
return (
|
return self.IDENTIFIER | seq(self.IDENTIFIER, self.COMMA, sp, self.export_list)
|
||||||
self.IDENTIFIER
|
|
||||||
| seq(self.IDENTIFIER, self.COMMA)
|
|
||||||
| seq(self.IDENTIFIER, self.COMMA, self.export_list)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Functions
|
# Functions
|
||||||
@rule("FunctionDecl")
|
@rule("FunctionDecl")
|
||||||
def function_declaration(self) -> Rule:
|
def function_declaration(self) -> Rule:
|
||||||
return seq(
|
return seq(
|
||||||
self.FUN,
|
group(
|
||||||
mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.function),
|
self.FUN,
|
||||||
mark(self.function_parameters, field="parameters"),
|
sp,
|
||||||
mark(opt(self.ARROW, self.type_expression), field="return_type"),
|
mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.function),
|
||||||
|
sp,
|
||||||
|
mark(self.function_parameters, field="parameters"),
|
||||||
|
mark(opt(sp, self.ARROW, sp, self.type_expression), field="return_type"),
|
||||||
|
),
|
||||||
mark(self.block, field="body"),
|
mark(self.block, field="body"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -147,14 +149,14 @@ class FineGrammar(Grammar):
|
||||||
return group(
|
return group(
|
||||||
self.LPAREN,
|
self.LPAREN,
|
||||||
indent(
|
indent(
|
||||||
newline(),
|
nl,
|
||||||
opt(
|
opt(
|
||||||
self._first_parameter
|
self._first_parameter
|
||||||
| seq(self._first_parameter, self.COMMA)
|
| seq(self._first_parameter, self.COMMA)
|
||||||
| group(self._first_parameter, self.COMMA, newline(), self._parameter_list)
|
| group(self._first_parameter, self.COMMA, sp, self._parameter_list)
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
newline(),
|
nl,
|
||||||
self.RPAREN,
|
self.RPAREN,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -164,18 +166,18 @@ class FineGrammar(Grammar):
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def _parameter_list(self) -> Rule:
|
def _parameter_list(self) -> Rule:
|
||||||
return self.parameter | seq(self.parameter, self.COMMA, newline(), self._parameter_list)
|
return self.parameter | seq(self.parameter, self.COMMA, sp, self._parameter_list)
|
||||||
|
|
||||||
@rule("Parameter")
|
@rule("Parameter")
|
||||||
def parameter(self) -> Rule:
|
def parameter(self) -> Rule:
|
||||||
return seq(self.IDENTIFIER, self.COLON, self.type_expression)
|
return group(self.IDENTIFIER, self.COLON, sp, self.type_expression)
|
||||||
|
|
||||||
# Block
|
# Block
|
||||||
@rule("Block")
|
@rule("Block")
|
||||||
def block(self) -> Rule:
|
def block(self) -> Rule:
|
||||||
return alt(
|
return alt(
|
||||||
seq(self.LCURLY, self.RCURLY),
|
group(self.LCURLY, nl, self.RCURLY),
|
||||||
group(self.LCURLY, indent(newline(), self.block_body), newline(), self.RCURLY),
|
seq(self.LCURLY, indent(br, self.block_body), br, self.RCURLY),
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule("BlockBody")
|
@rule("BlockBody")
|
||||||
|
|
@ -183,12 +185,12 @@ class FineGrammar(Grammar):
|
||||||
return alt(
|
return alt(
|
||||||
self.expression,
|
self.expression,
|
||||||
self._statement_list,
|
self._statement_list,
|
||||||
seq(self._statement_list, newline(), self.expression),
|
seq(self._statement_list, br, self.expression),
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def _statement_list(self) -> Rule:
|
def _statement_list(self) -> Rule:
|
||||||
return self._statement | seq(self._statement_list, self._statement)
|
return self._statement | seq(self._statement_list, br, self._statement)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def _statement(self) -> Rule:
|
def _statement(self) -> Rule:
|
||||||
|
|
@ -204,15 +206,26 @@ class FineGrammar(Grammar):
|
||||||
|
|
||||||
@rule("LetStatement")
|
@rule("LetStatement")
|
||||||
def let_statement(self) -> Rule:
|
def let_statement(self) -> Rule:
|
||||||
return seq(self.LET, self.IDENTIFIER, self.EQUAL, self.expression, self.SEMICOLON)
|
return group(
|
||||||
|
self.LET,
|
||||||
|
sp,
|
||||||
|
self.IDENTIFIER,
|
||||||
|
indent(sp, self.EQUAL, indent(sp, group(self.expression, self.SEMICOLON))),
|
||||||
|
)
|
||||||
|
|
||||||
@rule("ReturnStatement")
|
@rule("ReturnStatement")
|
||||||
def return_statement(self) -> Rule:
|
def return_statement(self) -> Rule:
|
||||||
return seq(self.RETURN, self.expression, self.SEMICOLON) | seq(self.RETURN, self.SEMICOLON)
|
return alt(
|
||||||
|
group(self.RETURN, indent(sp, group(self.expression, self.SEMICOLON))),
|
||||||
|
group(self.RETURN, self.SEMICOLON),
|
||||||
|
)
|
||||||
|
|
||||||
@rule("ForStatement")
|
@rule("ForStatement")
|
||||||
def for_statement(self) -> Rule:
|
def for_statement(self) -> Rule:
|
||||||
return seq(self.FOR, self.iterator_variable, self.IN, self.expression, self.block)
|
return group(
|
||||||
|
group(self.FOR, sp, self.iterator_variable, sp, self.IN, sp, group(self.expression)),
|
||||||
|
self.block,
|
||||||
|
)
|
||||||
|
|
||||||
@rule("IteratorVariable")
|
@rule("IteratorVariable")
|
||||||
def iterator_variable(self) -> Rule:
|
def iterator_variable(self) -> Rule:
|
||||||
|
|
@ -224,7 +237,7 @@ class FineGrammar(Grammar):
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def while_statement(self) -> Rule:
|
def while_statement(self) -> Rule:
|
||||||
return seq(self.WHILE, self.expression, self.block)
|
return group(group(self.WHILE, sp, self.expression), sp, self.block)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def expression_statement(self) -> Rule:
|
def expression_statement(self) -> Rule:
|
||||||
|
|
@ -238,24 +251,24 @@ class FineGrammar(Grammar):
|
||||||
@rule("BinaryExpression")
|
@rule("BinaryExpression")
|
||||||
def binary_expression(self) -> Rule:
|
def binary_expression(self) -> Rule:
|
||||||
return alt(
|
return alt(
|
||||||
self.expression + self.EQUAL + self.expression,
|
group(self.expression, sp, self.EQUAL, sp, self.expression),
|
||||||
self.expression + self.OR + self.expression,
|
group(self.expression, sp, self.OR, sp, self.expression),
|
||||||
self.expression + self.AND + self.expression,
|
group(self.expression, sp, self.AND, sp, self.expression),
|
||||||
self.expression + self.EQUALEQUAL + self.expression,
|
group(self.expression, sp, self.EQUALEQUAL, sp, self.expression),
|
||||||
self.expression + self.BANGEQUAL + self.expression,
|
group(self.expression, sp, self.BANGEQUAL, sp, self.expression),
|
||||||
self.expression + self.LESS + self.expression,
|
group(self.expression, sp, self.LESS, sp, self.expression),
|
||||||
self.expression + self.LESSEQUAL + self.expression,
|
group(self.expression, sp, self.LESSEQUAL, sp, self.expression),
|
||||||
self.expression + self.GREATER + self.expression,
|
group(self.expression, sp, self.GREATER, sp, self.expression),
|
||||||
self.expression + self.GREATEREQUAL + self.expression,
|
group(self.expression, sp, self.GREATEREQUAL, sp, self.expression),
|
||||||
self.expression + self.PLUS + self.expression,
|
group(self.expression, sp, self.PLUS, sp, self.expression),
|
||||||
self.expression + self.MINUS + self.expression,
|
group(self.expression, sp, self.MINUS, sp, self.expression),
|
||||||
self.expression + self.STAR + self.expression,
|
group(self.expression, sp, self.STAR, sp, self.expression),
|
||||||
self.expression + self.SLASH + self.expression,
|
group(self.expression, sp, self.SLASH, sp, self.expression),
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule("IsExpression")
|
@rule("IsExpression")
|
||||||
def is_expression(self) -> Rule:
|
def is_expression(self) -> Rule:
|
||||||
return seq(self.expression, self.IS, self.pattern)
|
return group(self.expression, sp, self.IS, indent(sp, self.pattern))
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def primary_expression(self) -> Rule:
|
def primary_expression(self) -> Rule:
|
||||||
|
|
@ -271,9 +284,15 @@ class FineGrammar(Grammar):
|
||||||
| self.object_constructor_expression
|
| self.object_constructor_expression
|
||||||
| self.match_expression
|
| self.match_expression
|
||||||
| seq(self.primary_expression, self.LPAREN, self.RPAREN)
|
| seq(self.primary_expression, self.LPAREN, self.RPAREN)
|
||||||
| seq(self.primary_expression, self.LPAREN, self._expression_list, self.RPAREN)
|
| group(
|
||||||
| seq(self.primary_expression, self.DOT, self.IDENTIFIER)
|
self.primary_expression,
|
||||||
| seq(self.LPAREN, self.expression, self.RPAREN)
|
self.LPAREN,
|
||||||
|
indent(nl, self._expression_list),
|
||||||
|
nl,
|
||||||
|
self.RPAREN,
|
||||||
|
)
|
||||||
|
| group(self.primary_expression, indent(nl, self.DOT, self.IDENTIFIER))
|
||||||
|
| group(self.LPAREN, indent(nl, self.expression), nl, self.RPAREN)
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule("IdentifierExpression")
|
@rule("IdentifierExpression")
|
||||||
|
|
@ -287,15 +306,26 @@ class FineGrammar(Grammar):
|
||||||
@rule("ConditionalExpression")
|
@rule("ConditionalExpression")
|
||||||
def conditional_expression(self) -> Rule:
|
def conditional_expression(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
seq(self.IF, self.expression, self.block)
|
seq(group(self.IF, sp, self.expression), sp, self.block)
|
||||||
| seq(self.IF, self.expression, self.block, self.ELSE, self.conditional_expression)
|
| seq(
|
||||||
| seq(self.IF, self.expression, self.block, self.ELSE, self.block)
|
group(self.IF, sp, self.expression),
|
||||||
|
sp,
|
||||||
|
self.block,
|
||||||
|
sp,
|
||||||
|
self.ELSE,
|
||||||
|
sp,
|
||||||
|
self.conditional_expression,
|
||||||
|
)
|
||||||
|
| seq(
|
||||||
|
group(self.IF, sp, self.expression), sp, self.block, sp, self.ELSE, sp, self.block
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def list_constructor_expression(self) -> Rule:
|
def list_constructor_expression(self) -> Rule:
|
||||||
return seq(self.LSQUARE, self.RSQUARE) | seq(
|
return alt(
|
||||||
self.LSQUARE, self._expression_list, self.RSQUARE
|
group(self.LSQUARE, nl, self.RSQUARE),
|
||||||
|
group(self.LSQUARE, indent(nl, self._expression_list), nl, self.RSQUARE),
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
|
|
@ -303,34 +333,37 @@ class FineGrammar(Grammar):
|
||||||
return (
|
return (
|
||||||
self.expression
|
self.expression
|
||||||
| seq(self.expression, self.COMMA)
|
| seq(self.expression, self.COMMA)
|
||||||
| seq(self.expression, self.COMMA, self._expression_list)
|
| seq(self.expression, self.COMMA, sp, self._expression_list)
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def match_expression(self) -> Rule:
|
def match_expression(self) -> Rule:
|
||||||
return seq(self.MATCH, self.expression, self.match_body)
|
return group(group(self.MATCH, sp, self.expression), sp, self.match_body)
|
||||||
|
|
||||||
@rule("MatchBody")
|
@rule("MatchBody")
|
||||||
def match_body(self) -> Rule:
|
def match_body(self) -> Rule:
|
||||||
return seq(self.LCURLY, self.RCURLY) | seq(self.LCURLY, self._match_arms, self.RCURLY)
|
return alt(
|
||||||
|
group(self.LCURLY, nl, self.RCURLY),
|
||||||
|
group(self.LCURLY, indent(nl, self._match_arms), nl, self.RCURLY),
|
||||||
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def _match_arms(self) -> Rule:
|
def _match_arms(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
self.match_arm
|
self.match_arm
|
||||||
| seq(self.match_arm, self.COMMA)
|
| seq(self.match_arm, self.COMMA)
|
||||||
| seq(self.match_arm, self.COMMA, self._match_arms)
|
| seq(self.match_arm, self.COMMA, br, self._match_arms)
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule("MatchArm")
|
@rule("MatchArm")
|
||||||
def match_arm(self) -> Rule:
|
def match_arm(self) -> Rule:
|
||||||
return seq(self.pattern, self.ARROW, self.expression)
|
return group(self.pattern, sp, self.ARROW, sp, self.expression)
|
||||||
|
|
||||||
@rule("Pattern")
|
@rule("Pattern")
|
||||||
def pattern(self) -> Rule:
|
def pattern(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
seq(self.variable_binding, self._pattern_core, self.AND, self.expression)
|
group(self.variable_binding, self._pattern_core, sp, self.AND, sp, self.expression)
|
||||||
| seq(self.variable_binding, self._pattern_core)
|
| group(self.variable_binding, self._pattern_core)
|
||||||
| self._pattern_core
|
| self._pattern_core
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -348,23 +381,26 @@ class FineGrammar(Grammar):
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def object_constructor_expression(self) -> Rule:
|
def object_constructor_expression(self) -> Rule:
|
||||||
return seq(self.NEW, self.type_identifier, self.field_list)
|
return group(self.NEW, sp, self.type_identifier, self.field_list)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def field_list(self) -> Rule:
|
def field_list(self) -> Rule:
|
||||||
return seq(self.LCURLY, self.RCURLY) | seq(self.LCURLY, self.field_values, self.RCURLY)
|
return alt(
|
||||||
|
seq(self.LCURLY, self.RCURLY),
|
||||||
|
group(self.LCURLY, indent(nl, self.field_values), nl, self.RCURLY),
|
||||||
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def field_values(self) -> Rule:
|
def field_values(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
self.field_value
|
self.field_value
|
||||||
| seq(self.field_value, self.COMMA)
|
| seq(self.field_value, self.COMMA)
|
||||||
| seq(self.field_value, self.COMMA, self.field_values)
|
| seq(self.field_value, self.COMMA, sp, self.field_values)
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def field_value(self) -> Rule:
|
def field_value(self) -> Rule:
|
||||||
return self.IDENTIFIER | seq(self.IDENTIFIER, self.COLON, self.expression)
|
return self.IDENTIFIER | group(self.IDENTIFIER, self.COLON, indent(sp, self.expression))
|
||||||
|
|
||||||
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||||
COMMENT = Terminal(
|
COMMENT = Terminal(
|
||||||
|
|
@ -461,13 +497,17 @@ if __name__ == "__main__":
|
||||||
from parser.emacs import emit_emacs_major_mode
|
from parser.emacs import emit_emacs_major_mode
|
||||||
from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries
|
from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries
|
||||||
|
|
||||||
|
# TODO: Actually generate a lexer/parser for some runtime.
|
||||||
grammar = FineGrammar()
|
grammar = FineGrammar()
|
||||||
grammar.build_table()
|
grammar.build_table()
|
||||||
|
|
||||||
lexer = grammar.compile_lexer()
|
lexer = grammar.compile_lexer()
|
||||||
dump_lexer_table(lexer)
|
dump_lexer_table(lexer)
|
||||||
|
|
||||||
|
# Generate tree-sitter parser and emacs mode.
|
||||||
ts_path = Path(__file__).parent / "tree-sitter-fine"
|
ts_path = Path(__file__).parent / "tree-sitter-fine"
|
||||||
emit_tree_sitter_grammar(grammar, ts_path)
|
emit_tree_sitter_grammar(grammar, ts_path)
|
||||||
emit_tree_sitter_queries(grammar, ts_path)
|
emit_tree_sitter_queries(grammar, ts_path)
|
||||||
emit_emacs_major_mode(grammar, ts_path / "fine.el")
|
emit_emacs_major_mode(grammar, ts_path / "fine.el")
|
||||||
|
|
||||||
|
# TODO: Generate pretty-printer code.
|
||||||
|
|
|
||||||
2
makefile
2
makefile
|
|
@ -4,4 +4,4 @@
|
||||||
.PHONY: test
|
.PHONY: test
|
||||||
test:
|
test:
|
||||||
python3 ./parser/parser.py
|
python3 ./parser/parser.py
|
||||||
pdm run pytest
|
pdm run python3 -m pytest
|
||||||
|
|
|
||||||
|
|
@ -540,7 +540,7 @@ class ErrorCollection:
|
||||||
match action:
|
match action:
|
||||||
case Reduce(name=name, count=count, transparent=transparent):
|
case Reduce(name=name, count=count, transparent=transparent):
|
||||||
name_str = name if not transparent else f"transparent node ({name})"
|
name_str = name if not transparent else f"transparent node ({name})"
|
||||||
action_str = f"pop {count} values off the stack and make a {name_str}"
|
action_str = f"use the {count} values to make a {name_str}"
|
||||||
case Shift():
|
case Shift():
|
||||||
action_str = "consume the token and keep going"
|
action_str = "consume the token and keep going"
|
||||||
case Accept():
|
case Accept():
|
||||||
|
|
@ -2680,6 +2680,7 @@ highlight = _Highlight()
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class FormatMeta(SyntaxMeta):
|
class FormatMeta(SyntaxMeta):
|
||||||
newline: str | None = None
|
newline: str | None = None
|
||||||
|
forced_break: bool = False
|
||||||
indent: int | None = None
|
indent: int | None = None
|
||||||
group: bool = False
|
group: bool = False
|
||||||
|
|
||||||
|
|
@ -2717,6 +2718,17 @@ def newline(text: str | None = None) -> Rule:
|
||||||
return mark(Nothing, format=FormatMeta(newline=text))
|
return mark(Nothing, format=FormatMeta(newline=text))
|
||||||
|
|
||||||
|
|
||||||
|
nl = newline("")
|
||||||
|
|
||||||
|
sp = newline(" ")
|
||||||
|
|
||||||
|
|
||||||
|
def forced_break() -> Rule:
|
||||||
|
return mark(Nothing, format=FormatMeta(forced_break=True))
|
||||||
|
|
||||||
|
|
||||||
|
br = forced_break()
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# Finally, the base class for grammars
|
# Finally, the base class for grammars
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
@ -2753,7 +2765,8 @@ class Grammar:
|
||||||
|
|
||||||
_precedence: dict[str, typing.Tuple[Assoc, int]]
|
_precedence: dict[str, typing.Tuple[Assoc, int]]
|
||||||
_generator: type[GenerateLR0]
|
_generator: type[GenerateLR0]
|
||||||
_terminals: list[Terminal]
|
_terminals: dict[str, Terminal]
|
||||||
|
_nonterminals: dict[str, NonTerminal]
|
||||||
_trivia: list[Terminal]
|
_trivia: list[Terminal]
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
@ -2794,6 +2807,19 @@ class Grammar:
|
||||||
raise ValueError(f"More than one terminal has the name '{n}'")
|
raise ValueError(f"More than one terminal has the name '{n}'")
|
||||||
terminals[n] = t
|
terminals[n] = t
|
||||||
|
|
||||||
|
# Get the nonterminals.
|
||||||
|
nonterminals = {}
|
||||||
|
for _, nt in inspect.getmembers(self, lambda x: isinstance(x, NonTerminal)):
|
||||||
|
if nt.name in nonterminals:
|
||||||
|
raise ValueError(f"More than one nonterminal found with the name '{nt.name}'")
|
||||||
|
|
||||||
|
if nt.name in terminals:
|
||||||
|
raise ValueError(
|
||||||
|
f"'{nt.name}' is the name of both a Terminal and a NonTerminal rule"
|
||||||
|
)
|
||||||
|
|
||||||
|
nonterminals[nt.name] = nt
|
||||||
|
|
||||||
# Resolve the trivia declarations correctly.
|
# Resolve the trivia declarations correctly.
|
||||||
resolved_trivia: list[Terminal] = []
|
resolved_trivia: list[Terminal] = []
|
||||||
for t in trivia:
|
for t in trivia:
|
||||||
|
|
@ -2809,12 +2835,22 @@ class Grammar:
|
||||||
precedence_table = {}
|
precedence_table = {}
|
||||||
for prec, (associativity, symbols) in enumerate(precedence):
|
for prec, (associativity, symbols) in enumerate(precedence):
|
||||||
for symbol in symbols:
|
for symbol in symbols:
|
||||||
|
key = None
|
||||||
if isinstance(symbol, Terminal):
|
if isinstance(symbol, Terminal):
|
||||||
key = symbol.name
|
key = symbol.name
|
||||||
|
if key is None:
|
||||||
|
raise ValueError(f"{symbol} is a terminal that has not had a name set yet")
|
||||||
elif isinstance(symbol, NonTerminal):
|
elif isinstance(symbol, NonTerminal):
|
||||||
key = symbol.name
|
key = symbol.name
|
||||||
else:
|
elif isinstance(symbol, str):
|
||||||
raise ValueError(f"{symbol} must be either a Token or a NonTerminal")
|
key = terminals.get(symbol)
|
||||||
|
if key is None:
|
||||||
|
key = nonterminals.get(symbol)
|
||||||
|
|
||||||
|
if key is None:
|
||||||
|
raise ValueError(
|
||||||
|
f"{symbol} must be either a Token or a NonTerminal, or the name of one"
|
||||||
|
)
|
||||||
|
|
||||||
precedence_table[key] = (associativity, prec + 1)
|
precedence_table[key] = (associativity, prec + 1)
|
||||||
|
|
||||||
|
|
@ -2826,18 +2862,19 @@ class Grammar:
|
||||||
self._precedence = precedence_table
|
self._precedence = precedence_table
|
||||||
self.start = start
|
self.start = start
|
||||||
self._generator = generator
|
self._generator = generator
|
||||||
self._terminals = list(terminals.values())
|
self._terminals = terminals
|
||||||
|
self._nonterminals = nonterminals
|
||||||
self._trivia = resolved_trivia
|
self._trivia = resolved_trivia
|
||||||
self.name = name
|
self.name = name
|
||||||
|
|
||||||
def terminals(self) -> list[Terminal]:
|
def terminals(self) -> list[Terminal]:
|
||||||
return self._terminals
|
return list(self._terminals.values())
|
||||||
|
|
||||||
def trivia_terminals(self) -> list[Terminal]:
|
def trivia_terminals(self) -> list[Terminal]:
|
||||||
return self._trivia
|
return self._trivia
|
||||||
|
|
||||||
def non_terminals(self) -> list[NonTerminal]:
|
def non_terminals(self) -> list[NonTerminal]:
|
||||||
return [nt for _, nt in inspect.getmembers(self, lambda x: isinstance(x, NonTerminal))]
|
return list(self._nonterminals.values())
|
||||||
|
|
||||||
def get_precedence(self, name: str) -> None | tuple[Assoc, int]:
|
def get_precedence(self, name: str) -> None | tuple[Assoc, int]:
|
||||||
return self._precedence.get(name)
|
return self._precedence.get(name)
|
||||||
|
|
@ -2858,9 +2895,8 @@ class Grammar:
|
||||||
if start is None:
|
if start is None:
|
||||||
start = self.start
|
start = self.start
|
||||||
|
|
||||||
rules = self.non_terminals()
|
nonterminals = self._nonterminals
|
||||||
nonterminals = {rule.name: rule for rule in rules}
|
transparents = {rule.name for rule in nonterminals.values() if rule.transparent}
|
||||||
transparents = {rule.name for rule in rules if rule.transparent}
|
|
||||||
|
|
||||||
grammar = {}
|
grammar = {}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,9 @@ import typing
|
||||||
from . import parser
|
from . import parser
|
||||||
from . import runtime
|
from . import runtime
|
||||||
|
|
||||||
|
# TODO: I think I want a *force break*, i.e., a document which forces things
|
||||||
|
# to not fit on one line.
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass(frozen=True)
|
@dataclasses.dataclass(frozen=True)
|
||||||
class Cons:
|
class Cons:
|
||||||
|
|
@ -24,6 +27,11 @@ class NewLine:
|
||||||
replace: str
|
replace: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass(frozen=True)
|
||||||
|
class ForceBreak:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass(frozen=True)
|
@dataclasses.dataclass(frozen=True)
|
||||||
class Indent:
|
class Indent:
|
||||||
amount: int
|
amount: int
|
||||||
|
|
@ -60,7 +68,7 @@ class Lazy:
|
||||||
return Lazy(lambda: printer.convert_tree_to_document(tree))
|
return Lazy(lambda: printer.convert_tree_to_document(tree))
|
||||||
|
|
||||||
|
|
||||||
Document = None | Text | Literal | NewLine | Cons | Indent | Group | Lazy
|
Document = None | Text | Literal | NewLine | ForceBreak | Cons | Indent | Group | Lazy
|
||||||
|
|
||||||
|
|
||||||
class DocumentLayout:
|
class DocumentLayout:
|
||||||
|
|
@ -127,6 +135,12 @@ def layout_document(doc: Document, width: int) -> DocumentLayout:
|
||||||
# all fit.
|
# all fit.
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
case ForceBreak():
|
||||||
|
# If we're in a flattened chunk then force it to break by
|
||||||
|
# returning false here, otherwise we're at the end of the
|
||||||
|
# line and yes, whatever you were asking about has fit.
|
||||||
|
return not chunk.flat
|
||||||
|
|
||||||
case Cons(left, right):
|
case Cons(left, right):
|
||||||
stack.append(chunk.with_document(right))
|
stack.append(chunk.with_document(right))
|
||||||
stack.append(chunk.with_document(left))
|
stack.append(chunk.with_document(left))
|
||||||
|
|
@ -180,6 +194,11 @@ def layout_document(doc: Document, width: int) -> DocumentLayout:
|
||||||
output.append("\n" + (chunk.indent * " "))
|
output.append("\n" + (chunk.indent * " "))
|
||||||
column = chunk.indent
|
column = chunk.indent
|
||||||
|
|
||||||
|
case ForceBreak():
|
||||||
|
# TODO: Custom newline expansion, custom indent segments.
|
||||||
|
output.append("\n" + (chunk.indent * " "))
|
||||||
|
column = chunk.indent
|
||||||
|
|
||||||
case Cons(left, right):
|
case Cons(left, right):
|
||||||
chunks.append(chunk.with_document(right))
|
chunks.append(chunk.with_document(right))
|
||||||
chunks.append(chunk.with_document(left))
|
chunks.append(chunk.with_document(left))
|
||||||
|
|
@ -292,12 +311,14 @@ class Matcher:
|
||||||
|
|
||||||
elif name[0] == "n":
|
elif name[0] == "n":
|
||||||
replace = self.newline_replace[name]
|
replace = self.newline_replace[name]
|
||||||
print(f"!!!! {name} -> {repr(replace)}")
|
|
||||||
child = cons(child, NewLine(replace))
|
child = cons(child, NewLine(replace))
|
||||||
|
|
||||||
elif name[0] == "p":
|
elif name[0] == "p":
|
||||||
child = cons(NewLine(""), child)
|
child = cons(NewLine(""), child)
|
||||||
|
|
||||||
|
elif name[0] == "f":
|
||||||
|
child = cons(child, ForceBreak())
|
||||||
|
|
||||||
else:
|
else:
|
||||||
pass # Reducing a transparent rule probably.
|
pass # Reducing a transparent rule probably.
|
||||||
|
|
||||||
|
|
@ -375,8 +396,8 @@ class Printer:
|
||||||
visited: set[str] = set()
|
visited: set[str] = set()
|
||||||
group_count = 0
|
group_count = 0
|
||||||
indent_amounts: dict[str, int] = {}
|
indent_amounts: dict[str, int] = {}
|
||||||
done_newline = False
|
|
||||||
newline_map: dict[str, str] = {}
|
newline_map: dict[str, str] = {}
|
||||||
|
done_forced_break = False
|
||||||
|
|
||||||
def compile_nonterminal(name: str, rule: parser.NonTerminal):
|
def compile_nonterminal(name: str, rule: parser.NonTerminal):
|
||||||
if name not in visited:
|
if name not in visited:
|
||||||
|
|
@ -388,7 +409,7 @@ class Printer:
|
||||||
def compile_production(production: parser.FlattenedWithMetadata) -> list[str]:
|
def compile_production(production: parser.FlattenedWithMetadata) -> list[str]:
|
||||||
nonlocal group_count
|
nonlocal group_count
|
||||||
nonlocal indent_amounts
|
nonlocal indent_amounts
|
||||||
nonlocal done_newline
|
nonlocal done_forced_break
|
||||||
|
|
||||||
result = []
|
result = []
|
||||||
for item in production:
|
for item in production:
|
||||||
|
|
@ -439,6 +460,13 @@ class Printer:
|
||||||
|
|
||||||
tx_children.append(newline_rule_name)
|
tx_children.append(newline_rule_name)
|
||||||
|
|
||||||
|
if pretty.forced_break:
|
||||||
|
if not done_forced_break:
|
||||||
|
generated_grammar.append(("forced_break", []))
|
||||||
|
done_forced_break = True
|
||||||
|
|
||||||
|
tx_children.append("forced_break")
|
||||||
|
|
||||||
# If it turned out to have formatting meta then we will
|
# If it turned out to have formatting meta then we will
|
||||||
# have replaced or augmented the translated children
|
# have replaced or augmented the translated children
|
||||||
# appropriately. Otherwise, if it's highlighting meta or
|
# appropriately. Otherwise, if it's highlighting meta or
|
||||||
|
|
|
||||||
|
|
@ -359,7 +359,7 @@ def test_lexer_compile():
|
||||||
def foo(self):
|
def foo(self):
|
||||||
return self.IS
|
return self.IS
|
||||||
|
|
||||||
start = foo
|
start = "foo"
|
||||||
|
|
||||||
IS = Terminal("is")
|
IS = Terminal("is")
|
||||||
AS = Terminal("as")
|
AS = Terminal("as")
|
||||||
|
|
@ -392,7 +392,7 @@ def test_lexer_numbers(n: float):
|
||||||
def number(self):
|
def number(self):
|
||||||
return self.NUMBER
|
return self.NUMBER
|
||||||
|
|
||||||
start = number
|
start = "number"
|
||||||
|
|
||||||
NUMBER = Terminal(
|
NUMBER = Terminal(
|
||||||
Re.seq(
|
Re.seq(
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,22 @@
|
||||||
import typing
|
import typing
|
||||||
|
|
||||||
from parser.parser import Grammar, Re, Terminal, rule, opt, group, newline, alt, indent
|
from parser.parser import (
|
||||||
|
Grammar,
|
||||||
|
Re,
|
||||||
|
Terminal,
|
||||||
|
rule,
|
||||||
|
opt,
|
||||||
|
group,
|
||||||
|
newline,
|
||||||
|
alt,
|
||||||
|
indent,
|
||||||
|
seq,
|
||||||
|
Rule,
|
||||||
|
Assoc,
|
||||||
|
sp,
|
||||||
|
nl,
|
||||||
|
br,
|
||||||
|
)
|
||||||
|
|
||||||
import parser.runtime as runtime
|
import parser.runtime as runtime
|
||||||
import parser.wadler as wadler
|
import parser.wadler as wadler
|
||||||
|
|
@ -57,10 +73,7 @@ class JsonGrammar(Grammar):
|
||||||
self.value + self.COMMA + newline(" ") + self._array_items,
|
self.value + self.COMMA + newline(" ") + self._array_items,
|
||||||
)
|
)
|
||||||
|
|
||||||
BLANKS = Terminal(
|
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||||
Re.set(" ", "\t", "\r", "\n").plus(),
|
|
||||||
is_format_blank=True,
|
|
||||||
)
|
|
||||||
LCURLY = Terminal("{")
|
LCURLY = Terminal("{")
|
||||||
RCURLY = Terminal("}")
|
RCURLY = Terminal("}")
|
||||||
COMMA = Terminal(",")
|
COMMA = Terminal(",")
|
||||||
|
|
@ -103,6 +116,8 @@ def flatten_document(doc: wadler.Document, src: str) -> list:
|
||||||
match doc:
|
match doc:
|
||||||
case wadler.NewLine(replace):
|
case wadler.NewLine(replace):
|
||||||
return [f"<newline {repr(replace)}>"]
|
return [f"<newline {repr(replace)}>"]
|
||||||
|
case wadler.ForceBreak():
|
||||||
|
return ["<forced break>"]
|
||||||
case wadler.Indent():
|
case wadler.Indent():
|
||||||
return [[f"<indent {doc.amount}>", flatten_document(doc.doc, src)]]
|
return [[f"<indent {doc.amount}>", flatten_document(doc.doc, src)]]
|
||||||
case wadler.Text(start, end):
|
case wadler.Text(start, end):
|
||||||
|
|
@ -204,3 +219,65 @@ def test_layout_basic():
|
||||||
}
|
}
|
||||||
""".strip()
|
""".strip()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_forced_break():
|
||||||
|
class TG(Grammar):
|
||||||
|
start = "root"
|
||||||
|
trivia = ["BLANKS"]
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def root(self):
|
||||||
|
return self._expression
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def _expression(self):
|
||||||
|
return self.word | self.list
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def list(self):
|
||||||
|
return group(self.LPAREN, indent(nl, self._expressions), nl, self.RPAREN)
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def _expressions(self):
|
||||||
|
return self._expression | seq(self._expressions, sp, self._expression)
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def word(self):
|
||||||
|
return self.OK | seq(self.BREAK, br, self.BREAK)
|
||||||
|
|
||||||
|
LPAREN = Terminal("(")
|
||||||
|
RPAREN = Terminal(")")
|
||||||
|
OK = Terminal("ok")
|
||||||
|
BREAK = Terminal("break")
|
||||||
|
|
||||||
|
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||||
|
|
||||||
|
g = TG()
|
||||||
|
g_lexer = g.compile_lexer()
|
||||||
|
g_parser = runtime.Parser(g.build_table())
|
||||||
|
|
||||||
|
text = "((ok ok) (ok break break ok) (ok ok ok ok))"
|
||||||
|
|
||||||
|
tree, errors = g_parser.parse(runtime.GenericTokenStream(text, g_lexer))
|
||||||
|
assert errors == []
|
||||||
|
assert tree is not None
|
||||||
|
|
||||||
|
printer = wadler.Printer(g)
|
||||||
|
result = printer.format_tree(tree, 200).apply_to_source(text)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
result
|
||||||
|
== """
|
||||||
|
(
|
||||||
|
(ok ok)
|
||||||
|
(
|
||||||
|
ok
|
||||||
|
break
|
||||||
|
break
|
||||||
|
ok
|
||||||
|
)
|
||||||
|
(ok ok ok ok)
|
||||||
|
)
|
||||||
|
""".strip()
|
||||||
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue