Finish annotating test grammar, forced breaks, fixes
Forced breaks force a newline in a spot, which is sometimes what we want. (Like, this syntax should *never* be on a single line.)
This commit is contained in:
parent
938f0e5c69
commit
d7a6891519
6 changed files with 273 additions and 92 deletions
180
grammar.py
180
grammar.py
|
|
@ -6,14 +6,16 @@ from parser import (
|
|||
Rule,
|
||||
Terminal,
|
||||
alt,
|
||||
br,
|
||||
group,
|
||||
highlight,
|
||||
indent,
|
||||
mark,
|
||||
newline,
|
||||
nl,
|
||||
opt,
|
||||
rule,
|
||||
seq,
|
||||
sp,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -53,7 +55,7 @@ class FineGrammar(Grammar):
|
|||
def _file_statement_list(self) -> Rule:
|
||||
return alt(
|
||||
self._file_statement,
|
||||
self._file_statement_list + newline() + self._file_statement,
|
||||
self._file_statement_list + nl + self._file_statement,
|
||||
)
|
||||
|
||||
@rule
|
||||
|
|
@ -64,7 +66,9 @@ class FineGrammar(Grammar):
|
|||
|
||||
@rule
|
||||
def import_statement(self) -> Rule:
|
||||
return seq(self.IMPORT, self.STRING, self.AS, self.IDENTIFIER, self.SEMICOLON)
|
||||
return group(
|
||||
self.IMPORT, sp, self.STRING, sp, self.AS, sp, self.IDENTIFIER, sp, self.SEMICOLON
|
||||
)
|
||||
|
||||
@rule("ClassDeclaration")
|
||||
def class_declaration(self) -> Rule:
|
||||
|
|
@ -72,16 +76,14 @@ class FineGrammar(Grammar):
|
|||
group(
|
||||
group(
|
||||
self.CLASS,
|
||||
newline(),
|
||||
sp,
|
||||
mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type),
|
||||
sp,
|
||||
),
|
||||
self.LCURLY,
|
||||
),
|
||||
indent(
|
||||
newline(),
|
||||
mark(opt(self.class_body), field="body"),
|
||||
),
|
||||
newline(),
|
||||
indent(nl, mark(opt(self.class_body), field="body")),
|
||||
nl,
|
||||
self.RCURLY,
|
||||
)
|
||||
|
||||
|
|
@ -99,7 +101,7 @@ class FineGrammar(Grammar):
|
|||
|
||||
@rule("FieldDecl")
|
||||
def field_declaration(self) -> Rule:
|
||||
return seq(self.IDENTIFIER, self.COLON, self.type_expression, self.SEMICOLON)
|
||||
return group(self.IDENTIFIER, self.COLON, sp, self.type_expression, self.SEMICOLON) + nl
|
||||
|
||||
# Types
|
||||
@rule("TypeExpression")
|
||||
|
|
@ -108,7 +110,7 @@ class FineGrammar(Grammar):
|
|||
|
||||
@rule("AlternateType")
|
||||
def alternate_type(self) -> Rule:
|
||||
return seq(self.type_expression, self.OR, self.type_identifier)
|
||||
return group(self.type_expression, sp, self.OR, sp, self.type_identifier)
|
||||
|
||||
@rule("TypeIdentifier")
|
||||
def type_identifier(self) -> Rule:
|
||||
|
|
@ -117,28 +119,28 @@ class FineGrammar(Grammar):
|
|||
@rule
|
||||
def export_statement(self) -> Rule:
|
||||
return alt(
|
||||
seq(self.EXPORT, self.class_declaration),
|
||||
seq(self.EXPORT, self.function_declaration),
|
||||
seq(self.EXPORT, self.let_statement),
|
||||
seq(self.EXPORT, self.export_list, self.SEMICOLON),
|
||||
group(self.EXPORT, sp, self.class_declaration),
|
||||
group(self.EXPORT, sp, self.function_declaration),
|
||||
group(self.EXPORT, sp, self.let_statement),
|
||||
group(self.EXPORT, sp, self.export_list, self.SEMICOLON),
|
||||
)
|
||||
|
||||
@rule
|
||||
def export_list(self) -> Rule:
|
||||
return (
|
||||
self.IDENTIFIER
|
||||
| seq(self.IDENTIFIER, self.COMMA)
|
||||
| seq(self.IDENTIFIER, self.COMMA, self.export_list)
|
||||
)
|
||||
return self.IDENTIFIER | seq(self.IDENTIFIER, self.COMMA, sp, self.export_list)
|
||||
|
||||
# Functions
|
||||
@rule("FunctionDecl")
|
||||
def function_declaration(self) -> Rule:
|
||||
return seq(
|
||||
self.FUN,
|
||||
mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.function),
|
||||
mark(self.function_parameters, field="parameters"),
|
||||
mark(opt(self.ARROW, self.type_expression), field="return_type"),
|
||||
group(
|
||||
self.FUN,
|
||||
sp,
|
||||
mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.function),
|
||||
sp,
|
||||
mark(self.function_parameters, field="parameters"),
|
||||
mark(opt(sp, self.ARROW, sp, self.type_expression), field="return_type"),
|
||||
),
|
||||
mark(self.block, field="body"),
|
||||
)
|
||||
|
||||
|
|
@ -147,14 +149,14 @@ class FineGrammar(Grammar):
|
|||
return group(
|
||||
self.LPAREN,
|
||||
indent(
|
||||
newline(),
|
||||
nl,
|
||||
opt(
|
||||
self._first_parameter
|
||||
| seq(self._first_parameter, self.COMMA)
|
||||
| group(self._first_parameter, self.COMMA, newline(), self._parameter_list)
|
||||
| group(self._first_parameter, self.COMMA, sp, self._parameter_list)
|
||||
),
|
||||
),
|
||||
newline(),
|
||||
nl,
|
||||
self.RPAREN,
|
||||
)
|
||||
|
||||
|
|
@ -164,18 +166,18 @@ class FineGrammar(Grammar):
|
|||
|
||||
@rule
|
||||
def _parameter_list(self) -> Rule:
|
||||
return self.parameter | seq(self.parameter, self.COMMA, newline(), self._parameter_list)
|
||||
return self.parameter | seq(self.parameter, self.COMMA, sp, self._parameter_list)
|
||||
|
||||
@rule("Parameter")
|
||||
def parameter(self) -> Rule:
|
||||
return seq(self.IDENTIFIER, self.COLON, self.type_expression)
|
||||
return group(self.IDENTIFIER, self.COLON, sp, self.type_expression)
|
||||
|
||||
# Block
|
||||
@rule("Block")
|
||||
def block(self) -> Rule:
|
||||
return alt(
|
||||
seq(self.LCURLY, self.RCURLY),
|
||||
group(self.LCURLY, indent(newline(), self.block_body), newline(), self.RCURLY),
|
||||
group(self.LCURLY, nl, self.RCURLY),
|
||||
seq(self.LCURLY, indent(br, self.block_body), br, self.RCURLY),
|
||||
)
|
||||
|
||||
@rule("BlockBody")
|
||||
|
|
@ -183,12 +185,12 @@ class FineGrammar(Grammar):
|
|||
return alt(
|
||||
self.expression,
|
||||
self._statement_list,
|
||||
seq(self._statement_list, newline(), self.expression),
|
||||
seq(self._statement_list, br, self.expression),
|
||||
)
|
||||
|
||||
@rule
|
||||
def _statement_list(self) -> Rule:
|
||||
return self._statement | seq(self._statement_list, self._statement)
|
||||
return self._statement | seq(self._statement_list, br, self._statement)
|
||||
|
||||
@rule
|
||||
def _statement(self) -> Rule:
|
||||
|
|
@ -204,15 +206,26 @@ class FineGrammar(Grammar):
|
|||
|
||||
@rule("LetStatement")
|
||||
def let_statement(self) -> Rule:
|
||||
return seq(self.LET, self.IDENTIFIER, self.EQUAL, self.expression, self.SEMICOLON)
|
||||
return group(
|
||||
self.LET,
|
||||
sp,
|
||||
self.IDENTIFIER,
|
||||
indent(sp, self.EQUAL, indent(sp, group(self.expression, self.SEMICOLON))),
|
||||
)
|
||||
|
||||
@rule("ReturnStatement")
|
||||
def return_statement(self) -> Rule:
|
||||
return seq(self.RETURN, self.expression, self.SEMICOLON) | seq(self.RETURN, self.SEMICOLON)
|
||||
return alt(
|
||||
group(self.RETURN, indent(sp, group(self.expression, self.SEMICOLON))),
|
||||
group(self.RETURN, self.SEMICOLON),
|
||||
)
|
||||
|
||||
@rule("ForStatement")
|
||||
def for_statement(self) -> Rule:
|
||||
return seq(self.FOR, self.iterator_variable, self.IN, self.expression, self.block)
|
||||
return group(
|
||||
group(self.FOR, sp, self.iterator_variable, sp, self.IN, sp, group(self.expression)),
|
||||
self.block,
|
||||
)
|
||||
|
||||
@rule("IteratorVariable")
|
||||
def iterator_variable(self) -> Rule:
|
||||
|
|
@ -224,7 +237,7 @@ class FineGrammar(Grammar):
|
|||
|
||||
@rule
|
||||
def while_statement(self) -> Rule:
|
||||
return seq(self.WHILE, self.expression, self.block)
|
||||
return group(group(self.WHILE, sp, self.expression), sp, self.block)
|
||||
|
||||
@rule
|
||||
def expression_statement(self) -> Rule:
|
||||
|
|
@ -238,24 +251,24 @@ class FineGrammar(Grammar):
|
|||
@rule("BinaryExpression")
|
||||
def binary_expression(self) -> Rule:
|
||||
return alt(
|
||||
self.expression + self.EQUAL + self.expression,
|
||||
self.expression + self.OR + self.expression,
|
||||
self.expression + self.AND + self.expression,
|
||||
self.expression + self.EQUALEQUAL + self.expression,
|
||||
self.expression + self.BANGEQUAL + self.expression,
|
||||
self.expression + self.LESS + self.expression,
|
||||
self.expression + self.LESSEQUAL + self.expression,
|
||||
self.expression + self.GREATER + self.expression,
|
||||
self.expression + self.GREATEREQUAL + self.expression,
|
||||
self.expression + self.PLUS + self.expression,
|
||||
self.expression + self.MINUS + self.expression,
|
||||
self.expression + self.STAR + self.expression,
|
||||
self.expression + self.SLASH + self.expression,
|
||||
group(self.expression, sp, self.EQUAL, sp, self.expression),
|
||||
group(self.expression, sp, self.OR, sp, self.expression),
|
||||
group(self.expression, sp, self.AND, sp, self.expression),
|
||||
group(self.expression, sp, self.EQUALEQUAL, sp, self.expression),
|
||||
group(self.expression, sp, self.BANGEQUAL, sp, self.expression),
|
||||
group(self.expression, sp, self.LESS, sp, self.expression),
|
||||
group(self.expression, sp, self.LESSEQUAL, sp, self.expression),
|
||||
group(self.expression, sp, self.GREATER, sp, self.expression),
|
||||
group(self.expression, sp, self.GREATEREQUAL, sp, self.expression),
|
||||
group(self.expression, sp, self.PLUS, sp, self.expression),
|
||||
group(self.expression, sp, self.MINUS, sp, self.expression),
|
||||
group(self.expression, sp, self.STAR, sp, self.expression),
|
||||
group(self.expression, sp, self.SLASH, sp, self.expression),
|
||||
)
|
||||
|
||||
@rule("IsExpression")
|
||||
def is_expression(self) -> Rule:
|
||||
return seq(self.expression, self.IS, self.pattern)
|
||||
return group(self.expression, sp, self.IS, indent(sp, self.pattern))
|
||||
|
||||
@rule
|
||||
def primary_expression(self) -> Rule:
|
||||
|
|
@ -271,9 +284,15 @@ class FineGrammar(Grammar):
|
|||
| self.object_constructor_expression
|
||||
| self.match_expression
|
||||
| seq(self.primary_expression, self.LPAREN, self.RPAREN)
|
||||
| seq(self.primary_expression, self.LPAREN, self._expression_list, self.RPAREN)
|
||||
| seq(self.primary_expression, self.DOT, self.IDENTIFIER)
|
||||
| seq(self.LPAREN, self.expression, self.RPAREN)
|
||||
| group(
|
||||
self.primary_expression,
|
||||
self.LPAREN,
|
||||
indent(nl, self._expression_list),
|
||||
nl,
|
||||
self.RPAREN,
|
||||
)
|
||||
| group(self.primary_expression, indent(nl, self.DOT, self.IDENTIFIER))
|
||||
| group(self.LPAREN, indent(nl, self.expression), nl, self.RPAREN)
|
||||
)
|
||||
|
||||
@rule("IdentifierExpression")
|
||||
|
|
@ -287,15 +306,26 @@ class FineGrammar(Grammar):
|
|||
@rule("ConditionalExpression")
|
||||
def conditional_expression(self) -> Rule:
|
||||
return (
|
||||
seq(self.IF, self.expression, self.block)
|
||||
| seq(self.IF, self.expression, self.block, self.ELSE, self.conditional_expression)
|
||||
| seq(self.IF, self.expression, self.block, self.ELSE, self.block)
|
||||
seq(group(self.IF, sp, self.expression), sp, self.block)
|
||||
| seq(
|
||||
group(self.IF, sp, self.expression),
|
||||
sp,
|
||||
self.block,
|
||||
sp,
|
||||
self.ELSE,
|
||||
sp,
|
||||
self.conditional_expression,
|
||||
)
|
||||
| seq(
|
||||
group(self.IF, sp, self.expression), sp, self.block, sp, self.ELSE, sp, self.block
|
||||
)
|
||||
)
|
||||
|
||||
@rule
|
||||
def list_constructor_expression(self) -> Rule:
|
||||
return seq(self.LSQUARE, self.RSQUARE) | seq(
|
||||
self.LSQUARE, self._expression_list, self.RSQUARE
|
||||
return alt(
|
||||
group(self.LSQUARE, nl, self.RSQUARE),
|
||||
group(self.LSQUARE, indent(nl, self._expression_list), nl, self.RSQUARE),
|
||||
)
|
||||
|
||||
@rule
|
||||
|
|
@ -303,34 +333,37 @@ class FineGrammar(Grammar):
|
|||
return (
|
||||
self.expression
|
||||
| seq(self.expression, self.COMMA)
|
||||
| seq(self.expression, self.COMMA, self._expression_list)
|
||||
| seq(self.expression, self.COMMA, sp, self._expression_list)
|
||||
)
|
||||
|
||||
@rule
|
||||
def match_expression(self) -> Rule:
|
||||
return seq(self.MATCH, self.expression, self.match_body)
|
||||
return group(group(self.MATCH, sp, self.expression), sp, self.match_body)
|
||||
|
||||
@rule("MatchBody")
|
||||
def match_body(self) -> Rule:
|
||||
return seq(self.LCURLY, self.RCURLY) | seq(self.LCURLY, self._match_arms, self.RCURLY)
|
||||
return alt(
|
||||
group(self.LCURLY, nl, self.RCURLY),
|
||||
group(self.LCURLY, indent(nl, self._match_arms), nl, self.RCURLY),
|
||||
)
|
||||
|
||||
@rule
|
||||
def _match_arms(self) -> Rule:
|
||||
return (
|
||||
self.match_arm
|
||||
| seq(self.match_arm, self.COMMA)
|
||||
| seq(self.match_arm, self.COMMA, self._match_arms)
|
||||
| seq(self.match_arm, self.COMMA, br, self._match_arms)
|
||||
)
|
||||
|
||||
@rule("MatchArm")
|
||||
def match_arm(self) -> Rule:
|
||||
return seq(self.pattern, self.ARROW, self.expression)
|
||||
return group(self.pattern, sp, self.ARROW, sp, self.expression)
|
||||
|
||||
@rule("Pattern")
|
||||
def pattern(self) -> Rule:
|
||||
return (
|
||||
seq(self.variable_binding, self._pattern_core, self.AND, self.expression)
|
||||
| seq(self.variable_binding, self._pattern_core)
|
||||
group(self.variable_binding, self._pattern_core, sp, self.AND, sp, self.expression)
|
||||
| group(self.variable_binding, self._pattern_core)
|
||||
| self._pattern_core
|
||||
)
|
||||
|
||||
|
|
@ -348,23 +381,26 @@ class FineGrammar(Grammar):
|
|||
|
||||
@rule
|
||||
def object_constructor_expression(self) -> Rule:
|
||||
return seq(self.NEW, self.type_identifier, self.field_list)
|
||||
return group(self.NEW, sp, self.type_identifier, self.field_list)
|
||||
|
||||
@rule
|
||||
def field_list(self) -> Rule:
|
||||
return seq(self.LCURLY, self.RCURLY) | seq(self.LCURLY, self.field_values, self.RCURLY)
|
||||
return alt(
|
||||
seq(self.LCURLY, self.RCURLY),
|
||||
group(self.LCURLY, indent(nl, self.field_values), nl, self.RCURLY),
|
||||
)
|
||||
|
||||
@rule
|
||||
def field_values(self) -> Rule:
|
||||
return (
|
||||
self.field_value
|
||||
| seq(self.field_value, self.COMMA)
|
||||
| seq(self.field_value, self.COMMA, self.field_values)
|
||||
| seq(self.field_value, self.COMMA, sp, self.field_values)
|
||||
)
|
||||
|
||||
@rule
|
||||
def field_value(self) -> Rule:
|
||||
return self.IDENTIFIER | seq(self.IDENTIFIER, self.COLON, self.expression)
|
||||
return self.IDENTIFIER | group(self.IDENTIFIER, self.COLON, indent(sp, self.expression))
|
||||
|
||||
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||
COMMENT = Terminal(
|
||||
|
|
@ -461,13 +497,17 @@ if __name__ == "__main__":
|
|||
from parser.emacs import emit_emacs_major_mode
|
||||
from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries
|
||||
|
||||
# TODO: Actually generate a lexer/parser for some runtime.
|
||||
grammar = FineGrammar()
|
||||
grammar.build_table()
|
||||
|
||||
lexer = grammar.compile_lexer()
|
||||
dump_lexer_table(lexer)
|
||||
|
||||
# Generate tree-sitter parser and emacs mode.
|
||||
ts_path = Path(__file__).parent / "tree-sitter-fine"
|
||||
emit_tree_sitter_grammar(grammar, ts_path)
|
||||
emit_tree_sitter_queries(grammar, ts_path)
|
||||
emit_emacs_major_mode(grammar, ts_path / "fine.el")
|
||||
|
||||
# TODO: Generate pretty-printer code.
|
||||
|
|
|
|||
2
makefile
2
makefile
|
|
@ -4,4 +4,4 @@
|
|||
.PHONY: test
|
||||
test:
|
||||
python3 ./parser/parser.py
|
||||
pdm run pytest
|
||||
pdm run python3 -m pytest
|
||||
|
|
|
|||
|
|
@ -540,7 +540,7 @@ class ErrorCollection:
|
|||
match action:
|
||||
case Reduce(name=name, count=count, transparent=transparent):
|
||||
name_str = name if not transparent else f"transparent node ({name})"
|
||||
action_str = f"pop {count} values off the stack and make a {name_str}"
|
||||
action_str = f"use the {count} values to make a {name_str}"
|
||||
case Shift():
|
||||
action_str = "consume the token and keep going"
|
||||
case Accept():
|
||||
|
|
@ -2680,6 +2680,7 @@ highlight = _Highlight()
|
|||
@dataclasses.dataclass
|
||||
class FormatMeta(SyntaxMeta):
|
||||
newline: str | None = None
|
||||
forced_break: bool = False
|
||||
indent: int | None = None
|
||||
group: bool = False
|
||||
|
||||
|
|
@ -2717,6 +2718,17 @@ def newline(text: str | None = None) -> Rule:
|
|||
return mark(Nothing, format=FormatMeta(newline=text))
|
||||
|
||||
|
||||
nl = newline("")
|
||||
|
||||
sp = newline(" ")
|
||||
|
||||
|
||||
def forced_break() -> Rule:
|
||||
return mark(Nothing, format=FormatMeta(forced_break=True))
|
||||
|
||||
|
||||
br = forced_break()
|
||||
|
||||
###############################################################################
|
||||
# Finally, the base class for grammars
|
||||
###############################################################################
|
||||
|
|
@ -2753,7 +2765,8 @@ class Grammar:
|
|||
|
||||
_precedence: dict[str, typing.Tuple[Assoc, int]]
|
||||
_generator: type[GenerateLR0]
|
||||
_terminals: list[Terminal]
|
||||
_terminals: dict[str, Terminal]
|
||||
_nonterminals: dict[str, NonTerminal]
|
||||
_trivia: list[Terminal]
|
||||
|
||||
def __init__(
|
||||
|
|
@ -2794,6 +2807,19 @@ class Grammar:
|
|||
raise ValueError(f"More than one terminal has the name '{n}'")
|
||||
terminals[n] = t
|
||||
|
||||
# Get the nonterminals.
|
||||
nonterminals = {}
|
||||
for _, nt in inspect.getmembers(self, lambda x: isinstance(x, NonTerminal)):
|
||||
if nt.name in nonterminals:
|
||||
raise ValueError(f"More than one nonterminal found with the name '{nt.name}'")
|
||||
|
||||
if nt.name in terminals:
|
||||
raise ValueError(
|
||||
f"'{nt.name}' is the name of both a Terminal and a NonTerminal rule"
|
||||
)
|
||||
|
||||
nonterminals[nt.name] = nt
|
||||
|
||||
# Resolve the trivia declarations correctly.
|
||||
resolved_trivia: list[Terminal] = []
|
||||
for t in trivia:
|
||||
|
|
@ -2809,12 +2835,22 @@ class Grammar:
|
|||
precedence_table = {}
|
||||
for prec, (associativity, symbols) in enumerate(precedence):
|
||||
for symbol in symbols:
|
||||
key = None
|
||||
if isinstance(symbol, Terminal):
|
||||
key = symbol.name
|
||||
if key is None:
|
||||
raise ValueError(f"{symbol} is a terminal that has not had a name set yet")
|
||||
elif isinstance(symbol, NonTerminal):
|
||||
key = symbol.name
|
||||
else:
|
||||
raise ValueError(f"{symbol} must be either a Token or a NonTerminal")
|
||||
elif isinstance(symbol, str):
|
||||
key = terminals.get(symbol)
|
||||
if key is None:
|
||||
key = nonterminals.get(symbol)
|
||||
|
||||
if key is None:
|
||||
raise ValueError(
|
||||
f"{symbol} must be either a Token or a NonTerminal, or the name of one"
|
||||
)
|
||||
|
||||
precedence_table[key] = (associativity, prec + 1)
|
||||
|
||||
|
|
@ -2826,18 +2862,19 @@ class Grammar:
|
|||
self._precedence = precedence_table
|
||||
self.start = start
|
||||
self._generator = generator
|
||||
self._terminals = list(terminals.values())
|
||||
self._terminals = terminals
|
||||
self._nonterminals = nonterminals
|
||||
self._trivia = resolved_trivia
|
||||
self.name = name
|
||||
|
||||
def terminals(self) -> list[Terminal]:
|
||||
return self._terminals
|
||||
return list(self._terminals.values())
|
||||
|
||||
def trivia_terminals(self) -> list[Terminal]:
|
||||
return self._trivia
|
||||
|
||||
def non_terminals(self) -> list[NonTerminal]:
|
||||
return [nt for _, nt in inspect.getmembers(self, lambda x: isinstance(x, NonTerminal))]
|
||||
return list(self._nonterminals.values())
|
||||
|
||||
def get_precedence(self, name: str) -> None | tuple[Assoc, int]:
|
||||
return self._precedence.get(name)
|
||||
|
|
@ -2858,9 +2895,8 @@ class Grammar:
|
|||
if start is None:
|
||||
start = self.start
|
||||
|
||||
rules = self.non_terminals()
|
||||
nonterminals = {rule.name: rule for rule in rules}
|
||||
transparents = {rule.name for rule in rules if rule.transparent}
|
||||
nonterminals = self._nonterminals
|
||||
transparents = {rule.name for rule in nonterminals.values() if rule.transparent}
|
||||
|
||||
grammar = {}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,9 @@ import typing
|
|||
from . import parser
|
||||
from . import runtime
|
||||
|
||||
# TODO: I think I want a *force break*, i.e., a document which forces things
|
||||
# to not fit on one line.
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class Cons:
|
||||
|
|
@ -24,6 +27,11 @@ class NewLine:
|
|||
replace: str
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class ForceBreak:
|
||||
pass
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class Indent:
|
||||
amount: int
|
||||
|
|
@ -60,7 +68,7 @@ class Lazy:
|
|||
return Lazy(lambda: printer.convert_tree_to_document(tree))
|
||||
|
||||
|
||||
Document = None | Text | Literal | NewLine | Cons | Indent | Group | Lazy
|
||||
Document = None | Text | Literal | NewLine | ForceBreak | Cons | Indent | Group | Lazy
|
||||
|
||||
|
||||
class DocumentLayout:
|
||||
|
|
@ -127,6 +135,12 @@ def layout_document(doc: Document, width: int) -> DocumentLayout:
|
|||
# all fit.
|
||||
return True
|
||||
|
||||
case ForceBreak():
|
||||
# If we're in a flattened chunk then force it to break by
|
||||
# returning false here, otherwise we're at the end of the
|
||||
# line and yes, whatever you were asking about has fit.
|
||||
return not chunk.flat
|
||||
|
||||
case Cons(left, right):
|
||||
stack.append(chunk.with_document(right))
|
||||
stack.append(chunk.with_document(left))
|
||||
|
|
@ -180,6 +194,11 @@ def layout_document(doc: Document, width: int) -> DocumentLayout:
|
|||
output.append("\n" + (chunk.indent * " "))
|
||||
column = chunk.indent
|
||||
|
||||
case ForceBreak():
|
||||
# TODO: Custom newline expansion, custom indent segments.
|
||||
output.append("\n" + (chunk.indent * " "))
|
||||
column = chunk.indent
|
||||
|
||||
case Cons(left, right):
|
||||
chunks.append(chunk.with_document(right))
|
||||
chunks.append(chunk.with_document(left))
|
||||
|
|
@ -292,12 +311,14 @@ class Matcher:
|
|||
|
||||
elif name[0] == "n":
|
||||
replace = self.newline_replace[name]
|
||||
print(f"!!!! {name} -> {repr(replace)}")
|
||||
child = cons(child, NewLine(replace))
|
||||
|
||||
elif name[0] == "p":
|
||||
child = cons(NewLine(""), child)
|
||||
|
||||
elif name[0] == "f":
|
||||
child = cons(child, ForceBreak())
|
||||
|
||||
else:
|
||||
pass # Reducing a transparent rule probably.
|
||||
|
||||
|
|
@ -375,8 +396,8 @@ class Printer:
|
|||
visited: set[str] = set()
|
||||
group_count = 0
|
||||
indent_amounts: dict[str, int] = {}
|
||||
done_newline = False
|
||||
newline_map: dict[str, str] = {}
|
||||
done_forced_break = False
|
||||
|
||||
def compile_nonterminal(name: str, rule: parser.NonTerminal):
|
||||
if name not in visited:
|
||||
|
|
@ -388,7 +409,7 @@ class Printer:
|
|||
def compile_production(production: parser.FlattenedWithMetadata) -> list[str]:
|
||||
nonlocal group_count
|
||||
nonlocal indent_amounts
|
||||
nonlocal done_newline
|
||||
nonlocal done_forced_break
|
||||
|
||||
result = []
|
||||
for item in production:
|
||||
|
|
@ -439,6 +460,13 @@ class Printer:
|
|||
|
||||
tx_children.append(newline_rule_name)
|
||||
|
||||
if pretty.forced_break:
|
||||
if not done_forced_break:
|
||||
generated_grammar.append(("forced_break", []))
|
||||
done_forced_break = True
|
||||
|
||||
tx_children.append("forced_break")
|
||||
|
||||
# If it turned out to have formatting meta then we will
|
||||
# have replaced or augmented the translated children
|
||||
# appropriately. Otherwise, if it's highlighting meta or
|
||||
|
|
|
|||
|
|
@ -359,7 +359,7 @@ def test_lexer_compile():
|
|||
def foo(self):
|
||||
return self.IS
|
||||
|
||||
start = foo
|
||||
start = "foo"
|
||||
|
||||
IS = Terminal("is")
|
||||
AS = Terminal("as")
|
||||
|
|
@ -392,7 +392,7 @@ def test_lexer_numbers(n: float):
|
|||
def number(self):
|
||||
return self.NUMBER
|
||||
|
||||
start = number
|
||||
start = "number"
|
||||
|
||||
NUMBER = Terminal(
|
||||
Re.seq(
|
||||
|
|
|
|||
|
|
@ -1,6 +1,22 @@
|
|||
import typing
|
||||
|
||||
from parser.parser import Grammar, Re, Terminal, rule, opt, group, newline, alt, indent
|
||||
from parser.parser import (
|
||||
Grammar,
|
||||
Re,
|
||||
Terminal,
|
||||
rule,
|
||||
opt,
|
||||
group,
|
||||
newline,
|
||||
alt,
|
||||
indent,
|
||||
seq,
|
||||
Rule,
|
||||
Assoc,
|
||||
sp,
|
||||
nl,
|
||||
br,
|
||||
)
|
||||
|
||||
import parser.runtime as runtime
|
||||
import parser.wadler as wadler
|
||||
|
|
@ -57,10 +73,7 @@ class JsonGrammar(Grammar):
|
|||
self.value + self.COMMA + newline(" ") + self._array_items,
|
||||
)
|
||||
|
||||
BLANKS = Terminal(
|
||||
Re.set(" ", "\t", "\r", "\n").plus(),
|
||||
is_format_blank=True,
|
||||
)
|
||||
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||
LCURLY = Terminal("{")
|
||||
RCURLY = Terminal("}")
|
||||
COMMA = Terminal(",")
|
||||
|
|
@ -103,6 +116,8 @@ def flatten_document(doc: wadler.Document, src: str) -> list:
|
|||
match doc:
|
||||
case wadler.NewLine(replace):
|
||||
return [f"<newline {repr(replace)}>"]
|
||||
case wadler.ForceBreak():
|
||||
return ["<forced break>"]
|
||||
case wadler.Indent():
|
||||
return [[f"<indent {doc.amount}>", flatten_document(doc.doc, src)]]
|
||||
case wadler.Text(start, end):
|
||||
|
|
@ -204,3 +219,65 @@ def test_layout_basic():
|
|||
}
|
||||
""".strip()
|
||||
)
|
||||
|
||||
|
||||
def test_forced_break():
|
||||
class TG(Grammar):
|
||||
start = "root"
|
||||
trivia = ["BLANKS"]
|
||||
|
||||
@rule
|
||||
def root(self):
|
||||
return self._expression
|
||||
|
||||
@rule
|
||||
def _expression(self):
|
||||
return self.word | self.list
|
||||
|
||||
@rule
|
||||
def list(self):
|
||||
return group(self.LPAREN, indent(nl, self._expressions), nl, self.RPAREN)
|
||||
|
||||
@rule
|
||||
def _expressions(self):
|
||||
return self._expression | seq(self._expressions, sp, self._expression)
|
||||
|
||||
@rule
|
||||
def word(self):
|
||||
return self.OK | seq(self.BREAK, br, self.BREAK)
|
||||
|
||||
LPAREN = Terminal("(")
|
||||
RPAREN = Terminal(")")
|
||||
OK = Terminal("ok")
|
||||
BREAK = Terminal("break")
|
||||
|
||||
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||
|
||||
g = TG()
|
||||
g_lexer = g.compile_lexer()
|
||||
g_parser = runtime.Parser(g.build_table())
|
||||
|
||||
text = "((ok ok) (ok break break ok) (ok ok ok ok))"
|
||||
|
||||
tree, errors = g_parser.parse(runtime.GenericTokenStream(text, g_lexer))
|
||||
assert errors == []
|
||||
assert tree is not None
|
||||
|
||||
printer = wadler.Printer(g)
|
||||
result = printer.format_tree(tree, 200).apply_to_source(text)
|
||||
|
||||
assert (
|
||||
result
|
||||
== """
|
||||
(
|
||||
(ok ok)
|
||||
(
|
||||
ok
|
||||
break
|
||||
break
|
||||
ok
|
||||
)
|
||||
(ok ok ok ok)
|
||||
)
|
||||
""".strip()
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue