From 228ca719f0ca016e8bbbd98e503c81eccdc02a3c Mon Sep 17 00:00:00 2001 From: John Doty Date: Sat, 6 Apr 2024 18:25:57 -0700 Subject: [PATCH] [fine] Start on a pretty printer ala https://justinpombrio.net/2024/02/23/a-twist-on-Wadlers-printer.html --- Cargo.lock | 1 + fine/Cargo.toml | 1 + fine/src/format/mod.rs | 5 ++ fine/src/format/notation.rs | 65 +++++++++++++++ fine/src/format/print.rs | 152 ++++++++++++++++++++++++++++++++++++ fine/src/lib.rs | 1 + fine/src/parser.rs | 4 - 7 files changed, 225 insertions(+), 4 deletions(-) create mode 100644 fine/src/format/mod.rs create mode 100644 fine/src/format/notation.rs create mode 100644 fine/src/format/print.rs diff --git a/Cargo.lock b/Cargo.lock index f971a182..61fe7c7d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -732,6 +732,7 @@ dependencies = [ "salsa-2022", "syn 2.0.52", "thiserror", + "unicode-width", ] [[package]] diff --git a/fine/Cargo.toml b/fine/Cargo.toml index 6abccf48..52489575 100644 --- a/fine/Cargo.toml +++ b/fine/Cargo.toml @@ -15,4 +15,5 @@ syn = "2.0.47" [dependencies] thiserror = "1.0.56" +unicode-width = "=0.1.11" salsa = { git = "https://github.com/salsa-rs/salsa.git", package = "salsa-2022" } \ No newline at end of file diff --git a/fine/src/format/mod.rs b/fine/src/format/mod.rs new file mode 100644 index 00000000..ea6a8985 --- /dev/null +++ b/fine/src/format/mod.rs @@ -0,0 +1,5 @@ +mod notation; +mod print; + +pub use notation::{flat, indent, nl, txt, Notation}; +pub use print::pretty_print; diff --git a/fine/src/format/notation.rs b/fine/src/format/notation.rs new file mode 100644 index 00000000..8b571835 --- /dev/null +++ b/fine/src/format/notation.rs @@ -0,0 +1,65 @@ +use std::ops::{BitAnd, BitOr}; +use std::rc::Rc; + +#[derive(Debug, Clone)] +pub struct Notation(pub(crate) Rc); + +#[derive(Debug, Clone)] +pub enum NotationInner { + Newline, + Text(String, u32), + Flat(Notation), + Indent(u32, Notation), + Concat(Notation, Notation), + Choice(Notation, Notation), +} + +/// Display a newline +pub fn nl() -> Notation { + Notation(Rc::new(NotationInner::Newline)) +} + +/// Display text exactly as-is. The text should not contain a newline! +pub fn txt(s: impl ToString) -> Notation { + let string = s.to_string(); + let width = unicode_width::UnicodeWidthStr::width(&string as &str) as u32; + Notation(Rc::new(NotationInner::Text(string, width))) +} + +/// Use the leftmost option of every choice in the contained Notation. +/// If the contained Notation follows the recommendation of not +/// putting newlines in the left-most options of choices, then this +/// `flat` will be displayed all on one line. +pub fn flat(notation: Notation) -> Notation { + Notation(Rc::new(NotationInner::Flat(notation))) +} + +/// Increase the indentation level of the contained notation by the +/// given width. The indentation level determines the number of spaces +/// put after `Newline`s. (It therefore doesn't affect the first line +/// of a notation.) +pub fn indent(indent: u32, notation: Notation) -> Notation { + Notation(Rc::new(NotationInner::Indent(indent, notation))) +} + +impl BitAnd for Notation { + type Output = Notation; + + /// Display both notations. The first character of the right + /// notation immediately follows the last character of the + /// left notation. + fn bitand(self, other: Notation) -> Notation { + Notation(Rc::new(NotationInner::Concat(self, other))) + } +} + +impl BitOr for Notation { + type Output = Notation; + + /// If inside a `flat`, _or_ the first line of the left notation + /// fits within the required width, then display the left + /// notation. Otherwise, display the right notation. + fn bitor(self, other: Notation) -> Notation { + Notation(Rc::new(NotationInner::Choice(self, other))) + } +} diff --git a/fine/src/format/print.rs b/fine/src/format/print.rs new file mode 100644 index 00000000..b454aa3b --- /dev/null +++ b/fine/src/format/print.rs @@ -0,0 +1,152 @@ +use super::notation::{Notation, NotationInner}; + +pub fn pretty_print(notation: &Notation, printing_width: u32) -> String { + let mut printer = PrettyPrinter::new(notation, printing_width); + printer.print() +} + +#[derive(Debug, Clone, Copy)] +struct Chunk<'a> { + notation: &'a Notation, + indent: u32, + flat: bool, +} + +impl<'a> Chunk<'a> { + fn with_notation(self, notation: &'a Notation) -> Chunk<'a> { + Chunk { + notation, + indent: self.indent, + flat: self.flat, + } + } + + fn indented(self, indent: u32) -> Chunk<'a> { + Chunk { + notation: self.notation, + indent: self.indent + indent, + flat: self.flat, + } + } + + fn flat(self) -> Chunk<'a> { + Chunk { + notation: self.notation, + indent: self.indent, + flat: true, + } + } +} + +struct PrettyPrinter<'a> { + /// Maximum line width that we'll try to stay within + width: u32, + /// Current column position + col: u32, + /// A stack of chunks to print. The _top_ of the stack is the + /// _end_ of the vector, which represents the _earliest_ part + /// of the document to print. + chunks: Vec>, +} + +impl<'a> PrettyPrinter<'a> { + fn new(notation: &'a Notation, width: u32) -> PrettyPrinter<'a> { + let chunk = Chunk { + notation, + indent: 0, + flat: false, + }; + PrettyPrinter { + width, + col: 0, + chunks: vec![chunk], + } + } + + fn print(&mut self) -> String { + use NotationInner::*; + + let mut output = String::new(); + while let Some(chunk) = self.chunks.pop() { + match chunk.notation.0.as_ref() { + Text(text, width) => { + output.push_str(text); + self.col += width; + } + Newline => { + output.push('\n'); + for _ in 0..chunk.indent { + output.push(' '); + } + self.col = chunk.indent; + } + Flat(x) => self.chunks.push(chunk.with_notation(x).flat()), + Indent(i, x) => self.chunks.push(chunk.with_notation(x).indented(*i)), + Concat(x, y) => { + self.chunks.push(chunk.with_notation(y)); + self.chunks.push(chunk.with_notation(x)); + } + Choice(x, y) => { + if chunk.flat || self.fits(chunk.with_notation(x)) { + self.chunks.push(chunk.with_notation(x)); + } else { + self.chunks.push(chunk.with_notation(y)); + } + } + } + } + output + } + + fn fits(&self, chunk: Chunk<'a>) -> bool { + use NotationInner::*; + + let mut remaining = if self.col <= self.width { + self.width - self.col + } else { + return false; + }; + let mut stack = vec![chunk]; + let mut chunks = &self.chunks as &[Chunk]; + + loop { + let chunk = match stack.pop() { + Some(chunk) => chunk, + None => match chunks.split_last() { + None => return true, + Some((chunk, more_chunks)) => { + chunks = more_chunks; + *chunk + } + }, + }; + + match chunk.notation.0.as_ref() { + Newline => return true, + Text(_text, text_width) => { + if *text_width <= remaining { + remaining -= *text_width; + } else { + return false; + } + } + Flat(x) => stack.push(chunk.with_notation(x).flat()), + Indent(i, x) => stack.push(chunk.with_notation(x).indented(*i)), + Concat(x, y) => { + stack.push(chunk.with_notation(y)); + stack.push(chunk.with_notation(x)); + } + Choice(x, y) => { + if chunk.flat { + stack.push(chunk.with_notation(x)); + } else { + // Relies on the rule that for every choice + // `x | y`, the first line of `y` is no longer + // than the first line of `x`. + stack.push(chunk.with_notation(y)); + } + } + } + } + } +} diff --git a/fine/src/lib.rs b/fine/src/lib.rs index b914ca6e..752480a1 100644 --- a/fine/src/lib.rs +++ b/fine/src/lib.rs @@ -5,6 +5,7 @@ use program::{Module, Program, StandardModuleLoader}; use vm::{eval, Context}; pub mod compiler; +pub mod format; pub mod parser; pub mod program; pub mod semantics; diff --git a/fine/src/parser.rs b/fine/src/parser.rs index 557421e5..1775a497 100644 --- a/fine/src/parser.rs +++ b/fine/src/parser.rs @@ -1494,10 +1494,6 @@ mod tests { // them to key function definitions and the type checker and use them // to link classes to their definitions, etc. It's important that an // Option be *extremely* cheap to manipulate. - // - // TODO: This optimization isn't as good as it might be because tokens are - // huge so Child is huge no matter what we do. If we retain - // tokens out of line then we can take full advantage of this. assert_eq!(4, std::mem::size_of::>()); }