diff --git a/pom.xml b/pom.xml index f6b71401137..87f975c7ad1 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.rascalmpl rascal - 0.42.2-RC2-SNAPSHOT + 0.43.0-SNAPSHOT jar diff --git a/src/org/rascalmpl/library/IO.rsc b/src/org/rascalmpl/library/IO.rsc index 5e3255b4f67..e37a598ae2d 100644 --- a/src/org/rascalmpl/library/IO.rsc +++ b/src/org/rascalmpl/library/IO.rsc @@ -140,8 +140,9 @@ Append a textual representation of some values to an existing or a newly created * All other values are printed as-is. * Each value is terminated by a newline character. -The existing file can be stored using any character set possible, if you know the character set, please use ((appendToFileEnc)). -Else the same method of deciding the character set is used as in ((readFile)). +The existing file can be stored using any character set possible. +If you know the character set, please use the `charset` keyword parameter. +Otherwise, the same method of deciding the character set is used as in ((readFile)). } @pitfalls{ * The same encoding pitfalls as the ((readFile)) function. @@ -239,6 +240,8 @@ public java bool isDirectory(loc file); See ((IO-iprintExp)) for a version that returns its argument as result and ((IO-iprintln)) for a version that adds a newline and ((IO-iprintToFile)) for a version that prints to a file. + +With a negative `lineLimit` the limit is ignored and the entire value will be printed. } @examples{ ```rascal-shell @@ -254,6 +257,7 @@ public java void iprint(value arg, int lineLimit = 1000); See ((IO-iprint)) for a version that displays the result on the console and ((IO-iprintExp)) for a version that returns its argument as result and ((IO-iprintln)) for a version that adds a newline. + } @examples{ ```rascal-shell @@ -308,6 +312,8 @@ and ((IO-iprint)) for a version that does not add a newline. By default we only print the first 1000 lines, if you want to print larger values, either use ((ValueIO-writeTextValueFile)) or change the limit with the lineLimit parameter. + +With a negative `lineLimit` the limit is ignored and the entire value will be printed. } @examples{ ```rascal-shell diff --git a/src/org/rascalmpl/library/List.rsc b/src/org/rascalmpl/library/List.rsc index aa7be84cc1c..00abd9c5b26 100644 --- a/src/org/rascalmpl/library/List.rsc +++ b/src/org/rascalmpl/library/List.rsc @@ -20,7 +20,6 @@ module List import Exception; import Map; - @synopsis{Concatenate a list of lists.} @examples{ ```rascal-shell @@ -261,7 +260,6 @@ intercalate(", ", ["zebra", "elephant", "snake", "owl"]); str intercalate(str sep, list[value] l) = "<}>"; - @synopsis{Intersperses a list of values with a separator.} @examples{ ```rascal-shell @@ -272,9 +270,8 @@ intersperse(1, []); intersperse([], [1]); ``` } -list[&T] intersperse(&T sep, list[&T] xs) = - (isEmpty(xs))? [] : ([head(xs)] | it + [sep,x] | x <- tail(xs)); - +list[&T] intersperse(&T sep, list[&T] xs) = + [x, sep | &T x <- xs][..-1]; @synopsis{Test whether a list is empty.} @description{ @@ -655,6 +652,29 @@ tuple[list[&T],list[&T]] split(list[&T] l) { return ; } +@synopsis{Groups sublists for consecutive elements which are `similar`} +@description{ +This function does not change the order of the elements. Only elements +which are similar end-up in a sub-list with more than one element. The +elements which are not similar to their siblings, end up in singleton +lists. +} +@examples{ +```rascal-shell +import List; +bool bothEvenOrBothOdd(int a, int b) = (a % 2 == 0 && b % 2 == 0) || (a % 2 == 1 && b % 2 == 1); +group([1,7,3,6,2,9], bothEvenOrBothOdd); +``` +} +public list[list[&T]] group(list[&T] input, bool (&T a, &T b) similar) { + lres = while ([hd, *tl] := input) { + sim = [hd, *takeWhile(tl, bool (&T a) { return similar(a, hd); })]; + append sim; + input = drop(size(sim), input); + } + + return lres; +} @synopsis{Sum the elements of a list.} @examples{ diff --git a/src/org/rascalmpl/library/Type.rsc b/src/org/rascalmpl/library/Type.rsc index 8c6cafe1997..c3511cfda0b 100644 --- a/src/org/rascalmpl/library/Type.rsc +++ b/src/org/rascalmpl/library/Type.rsc @@ -30,7 +30,7 @@ The ((subtype)) relation of Rascal has all the mathematical properties of a _fin This is a core design principle of Rascal with the following benefits: * Type inference has a guaranteed least or greatest solution, always. This means that constraints are always solvable in an unambiguous manner. * A _principal type_ can always be computed, which is a most precise and unique solution of a type inference problem. Without the lattice, solution candidates could become incomparable and thus ambiguous. Without -this principal type property, type inference is predictable for programmers. +this principal type property, type inference is unpredictable for programmers. * Solving type inference constraints can be implemented efficiently. The algorithm, based on ((lub)) and ((glb)), makes progress _deterministically_ and does not require backtracking to find better solutions. Since the lattice is not very deep, fixed-point solutions are always found quickly. diff --git a/src/org/rascalmpl/library/analysis/diff/edits/ExecuteTextEdits.rsc b/src/org/rascalmpl/library/analysis/diff/edits/ExecuteTextEdits.rsc index f1c41d5c695..4ccf4641949 100644 --- a/src/org/rascalmpl/library/analysis/diff/edits/ExecuteTextEdits.rsc +++ b/src/org/rascalmpl/library/analysis/diff/edits/ExecuteTextEdits.rsc @@ -46,13 +46,13 @@ void executeFileSystemChange(changed(loc file, list[TextEdit] edits)) { } str executeTextEdits(str content, list[TextEdit] edits) { - assert isSorted(edits, less=bool (TextEdit e1, TextEdit e2) { - return e1.range.offset < e2.range.offset; - }); + // assert isSorted(edits, less=bool (TextEdit e1, TextEdit e2) { + // return e1.range.offset < e2.range.offset; + // }); - for (replace(loc range, str repl) <- reverse(edits)) { - content = ""; - } + int cursor = 0; - return content; + // linear-time streamed reconstruction of the entire text + return "< + cursor = range.offset + range.length;}>"; } diff --git a/src/org/rascalmpl/library/analysis/diff/edits/HiFiLayoutDiff.rsc b/src/org/rascalmpl/library/analysis/diff/edits/HiFiLayoutDiff.rsc index 65a5b722ca4..a72905fc087 100644 --- a/src/org/rascalmpl/library/analysis/diff/edits/HiFiLayoutDiff.rsc +++ b/src/org/rascalmpl/library/analysis/diff/edits/HiFiLayoutDiff.rsc @@ -27,6 +27,8 @@ module analysis::diff::edits::HiFiLayoutDiff extend analysis::diff::edits::HiFiTreeDiff; import ParseTree; // this should not be necessary because imported by HiFiTreeDiff import String; // this should not be be necessary because imported by HiFiTreeDiff +import lang::rascal::grammar::definition::Characters; +import IO; @synopsis{Normalization choices for case-insensitive literals.} data CaseInsensitivity @@ -56,7 +58,7 @@ list[TextEdit] layoutDiff(Tree original, Tree formatted, bool recoverComments = list[TextEdit] rec( t:appl(prod(Symbol tS, _, _), list[Tree] tArgs), // layout is not necessarily parsed with the same rules (i.e. comments are lost!) u:appl(prod(Symbol uS, _, _), list[Tree] uArgs)) - = [replace(t@\loc, recoverComments ? learnComments(t, u) : "") | tArgs != uArgs, "" != "" /* avoid useless edits */] + = [replace(t@\loc, repl) | tArgs != uArgs, str repl := (recoverComments ? learnComments(t, u) : ""), repl != "" /* do not edit anything if nothing has changed */] when delabel(tS) is layouts, delabel(uS) is layouts, @@ -106,22 +108,31 @@ list[TextEdit] layoutDiff(Tree original, Tree formatted, bool recoverComments = default list[TextEdit] rec( Tree t:appl(Production p, list[Tree] argsA), appl(p /* must be the same by the above assert */, list[Tree] argsB)) - = [*rec(a, b) | <- zip2(argsA, argsB)]; + = [*rec(argsA[i], argsB[i]) | i <- [0..size(argsA)]]; // first add required locations to layout nodes - original = reposition(original, markLit=true, markLayout=true, markSubLayout=true); + // TODO: check if indeed repositioning is never needed + // original = reposition(original, markLit=true, markLayout=true, markSubLayout=true); return rec(original, formatted); } +private Symbol newlineClass = \char-class([range(10,10)]); + @synopsis{Make sure the new layout still contains all the source code comments of the original layout} @description{ -This algorithm uses the @category("Comments") tag to detect source code comments inside layout substrings. If the original +This algorithm uses the `@category(/[cC]omments/)` tag to detect source code comments inside layout substrings. If the original layout contains comments, we re-introduce the comments at the expected level of indentation. New comments present in the replacement are kept and will overwrite any original comments. -This trick is complicated by the syntax of multiline comments and single line comments that have -to end with a newline. +There are corner cases with respect to the original comments: +* the single line comment that does not end with a newline itself, yet it must always end with a newline after it. +* multiple single line comments after each other + +Then there are corner cases with respect to the replacement whitespace: +* the last line of the replacement whitespace is special. This is the indentation to use for all comments. +* but there could be no newlines in the replacement whitespace; and still there is a single line comment to be included. +Now we need to infer an indentation level for what follows the comment from "thin air". } @benefits{ * if comments are kepts and formatted by tools like Tree2Box, then this algorithm does not overwrite these. @@ -132,7 +143,14 @@ to end with a newline. * if comments are not marked with `@category("Comment")` in the original grammar, then this algorithm recovers nothing. } private str learnComments(Tree original, Tree replacement) { - originalComments = ["" | /c:appl(prod(_,_,{\tag("category"(/^[Cc]omment$/)), *_}), _) := original]; + bool mustEndWithNewline(lit("\n")) = true; + bool mustEndWithNewline(conditional(Symbol s, _)) = mustEndWithNewline(s); + // if a comment can not contain newline characters, but everything else, then it must be followed by one: + bool mustEndWithNewline(\iter(Symbol cc:\char-class(_))) = intersection(cc, newlineClass) != newlineClass; + bool mustEndWithNewline(\iter-star(Symbol cc:\char-class(_))) = intersection(cc, newlineClass) != newlineClass; + default bool mustEndWithNewline(_) = false; + + originalComments = [ | /c:appl(prod(_,[*_,Symbol lastSym],{\tag("category"(/^[Cc]omment$/)), *_}), _) := original, str s := ""]; if (originalComments == []) { // if the original did not contain comments, stick with the replacements @@ -146,23 +164,42 @@ private str learnComments(Tree original, Tree replacement) { return ""; } - // At this point, we know that: (a) comments are not present in the replacement and (b) they used to be there in the original. - // So the old comments are going to be the new output. however, we want to learn indentation from the replacement. + // At this point, we know that: + // (a) comments are not present in the replacement and + // (b) they used to be there in the original. + // So the old comments are going to be copied to the new output. + // But, we want to indent them using the style of the replacement. + + // The last line of the replacement string typically has the indentation for the construct that follows: + // | // a comment + // | if (true) { + // ^^^^ + // newIndent + // + // However, if the replacement string is on a single line, then we don't have the indentation + // for the string on the next line readily available. In this case we indent the next line + // to the start column of the replacement layout, as a proxy. + + str replString = ""; + str newIndent = split("\n", replString)[-1] ? ""; - // Drop the last newline of single-line comments, because we don't want two newlines in the output for every comment: - str dropEndNl(str line:/^.*\n$/) = (line[..-1]); - default str dropEndNl(str line) = line; + if (/\n/ !:= replString) { + // no newline in the repl string, so no indentation available for what follows the comment... + newIndent = " <}>"; + } - // the first line of the replacement ,is the indentation to use. - str replString = ""; - str replacementIndent = /^\n+$/ !:= replString - ? split("\n", replString)[0] - : ""; - - // trimming each line makes sure we forget about the original indentation, and drop accidental spaces after comment lines - return replString + indent(replacementIndent, - " - '<}>"[..-1], indentFirstLine=false) + replString; + // we always place sequential comments vertically, because we don't know if we are dealing + // we a single line comment that has to end with newline by follow restriction or by a literal "\n". + // TODO: a deeper analysis of the comment rule that's in use could also be used to discover this. + str trimmedOriginals = " <- originalComments) {> + '<}><}>"; + + // we wrap the comment with the formatted whitespace to assure the proper indentation + // of its first line, and the proper indentation of what comes after this layout node + return replString + + indent(newIndent, trimmedOriginals, indentFirstLine=false) + + newIndent + ; } private Symbol delabel(label(_, Symbol t)) = t; diff --git a/src/org/rascalmpl/library/lang/box/syntax/Box.rsc b/src/org/rascalmpl/library/lang/box/syntax/Box.rsc index bf6ef897bb9..ae9928ebcc6 100644 --- a/src/org/rascalmpl/library/lang/box/syntax/Box.rsc +++ b/src/org/rascalmpl/library/lang/box/syntax/Box.rsc @@ -10,6 +10,8 @@ @synopsis{An abstract declarative language for two dimensional text layout} module lang::box::\syntax::Box +import List; + @synopsis{Every kind of boxes encodes one or more parameterized two-dimensional text constraints.} @description{ * `H` puts their elements next to each other one the same line separated by `hs` spaces. @@ -22,9 +24,11 @@ module lang::box::\syntax::Box * `SPACE` produces `space` spaces * `L` produces A literal word. This word may only contain printable characters and no spaces; this is a required property that the formatting algorithm depends on for correctness. * `U` splices its contents in the surrounding box, for automatic flattening of overly nested structures in syntax trees. -* `G` is an additional group-by feature that reduces tot the above core features -* `SL` is a convenience box for separated syntax lists based on `G` -* `NULL()` is the group that will disappear from its context, useful for skipping content. It is based on the `U` box. +* `G` is an additional group-by feature for `list[Box]` that reduces tot the above core features. You can use it to wrap another +box around every `gs` elements. +* `AG` is an additional group-by feature for array `Row`s that reduces to the above core features. You can use it to wrap a `R` row +around every `gs` elements and then construct an `A` around those rows. +* `NULL()` is the group that will dissappear from its context, useful for skipping content. It is based on the `U` box. } @benefits{ * Box expressions are a declarative mechanism to express formatting rules that are flexible enough to deal @@ -38,20 +42,35 @@ set on every `I` Box according to the current preferences of the user. * `U(boxes)` is rendered as `H(boxes)` if it's the outermost Box. } data Box(int hs=1, int vs=0, int is=4) - = H(list[Box] boxes) - | V(list[Box] boxes) - | HOV(list[Box] boxes) - | HV(list[Box] boxes) - | I(list[Box] boxes) - | WD(list[Box] boxes) - | A(list[Row] rows, list[Alignment] columns=[l() | [R(list[Box] cs), *_] := rows, _ <- cs] /* learns the amount of columns from the first row */) + = H_(list[Box] boxes) + | V_(list[Box] boxes) + | HOV_(list[Box] boxes) + | HV_(list[Box] boxes) + | I_(list[Box] boxes) + | WD_(list[Box] boxes) + | A_(list[Row] rows, Box rs=NULL(), list[Alignment] columns=[]) + | AG_(list[Box] boxes, int gs=2, list[Alignment] columns=[], Box rs=NULL()) | SPACE(int space) | L(str word) - | U(list[Box] boxes) - | G(list[Box] boxes, Box(list[Box]) op = H, int gs=2) + | U_(list[Box] boxes) + | G_(list[Box] boxes, bool backwards=false, int gs=2, Box op = H([])) | NULL() ; +Box H(Box boxes..., int hs=1) = H_(boxes, hs=hs); +Box V(Box boxes..., int vs=0) = V_(boxes, vs=vs); +Box HOV(Box boxes..., int hs=1, int vs=0) = HOV_(boxes, hs=hs, vs=vs); +Box HV(Box boxes..., int hs=1, int vs=0) = HV_(boxes, hs=hs, vs=vs); +Box I(Box boxes...) = I_(boxes); +Box WD(Box boxes...) = WD_(boxes); +Box A(Row rows..., Box rs=NULL(), list[Alignment] columns=[]) + = A_(rows, rs=rs, columns=columns); +Box AG(Box boxes..., int gs=2, list[Alignment] columns=[], Box rs=NULL()) + = AG_(boxes, gs=gs, columns=columns, rs=rs); +Box U(Box boxes...) = U_(boxes); +Box G(Box boxes..., bool backwards=false, int gs=2, Box op = H([])) + = G_(boxes, backwards=backwards, gs=gs, op=op); + @synopsis{A row is a list of boxes that go into an `A` array/table.} @description{ Rows do not have parameters. These are set on the `A` level instead, @@ -59,6 +78,8 @@ or per cell Box. } data Row = R(list[Box] cells); +// Row R(Box cells...) = _R(cells); + data Alignment = l() | r() | c(); @synopsis{NULL can be used to return a Box that will completely disappear in the surrounding context.} @@ -81,4 +102,74 @@ algorithm starts counting boxes and widths. * Do not use `NULL` for empty Row cells, unless you do want your cells aligned to the left and filled up to the right with empty H boxes. * NULL will be formatted as `H([])` if it's the outermost Box. } -Box NULL() = U([]); \ No newline at end of file +Box NULL() = U([]); + +@synopsis{Convenience box for adding separators to an existing box list} +@description{ +Each element is wrapped by the `op` operator together with the next separator. +The resulting list is wrapped by a G box, of which the elements will be spliced +into their context. +} +Box SL(list[Box] boxes, Box sep, Box op = H([], hs=0)) + = G([b, sep | b <- boxes][..-1], op=op, gs=2); + +@synopsis{Flatten and fold U and G boxes to simplify the Box structure} +@description{ +U and G and AG boxes greatly simplify the Box tree before it is formatted. This +happens "just-in-time" for efficiency reasons. However, from a Box tree +with many U and G boxes it can become hard to see what the actual formatting +constraints are going to be. + +This function applies the semantics of G and U and returns a Box that renders +exactly the same output, but with a lot less nested structure. +} +@benefits{ +* useful to debug complex `toBox` mappings +* formatting semantics preserving transformation +} +@pitfalls{ +* only useful for debugging purposes, because it becomes a pipeline bottleneck otherwise. +} +Box debUG(Box b) { + list[Box] groupBy([], int _gs, Box _op) = []; + list[Box] groupBy(list[Box] boxes:[Box _, *_], int gs, Box op) + = [op[boxes=boxes[..gs]], *groupBy(boxes[gs..], gs, op)]; + + list[Box] groupByBackward([], int _gs, Box _op) = []; + list[Box] groupByBackward(list[Box] boxes:[Box _, *_], int gs, Box op) + = [op[boxes=boxes[..size(boxes) mod gs]], *groupBy(boxes[size(boxes) mod gs..], gs, op)]; + + list[Row] groupRows([], int _gs) = []; + list[Row] groupRows(list[Box] boxes:[Box _, *_], int gs) + = [R(boxes[..gs]), *groupRows(boxes[gs..], gs)]; + + return innermost visit(b) { + case [*Box pre, U_([*Box mid]), *Box post] => [*pre, *mid, *post] + case G_(list[Box] boxes, gs=gs, op=op, backwards=bw) => U_(bw ? groupByBackward(boxes, gs, op) : groupBy(boxes, gs, op)) + case AG_(list[Box] boxes, gs=gs, columns=cs, rs=rs) => A(groupRows(boxes, gs), columns=cs, rs=rs) + } +} + +@synopsis{Short-hand for `H(hs=0)`} +Box H0(Box boxes...) = H_(boxes, hs=0); + +@synopsis{Short-hand for `H(hs=1)`} +Box H1(Box boxes...) = H_(boxes, hs=1); + +@synopsis{Short-hand for `HOV(hs=0)``} +Box HOV0(Box boxes ...) = HOV_(boxes, hs=0); + +@synopsis{Short-hand for `HV(hs=0)``} +Box HV0(Box boxes...) = HV_(boxes, hs=0); + +@synopsis{Short-hand for indented H} +Box IH(Box boxes..., int hs=1) = I(H_(boxes, hs=hs)); + +@synopsis{Short-hand for indented V} +Box IV(Box boxes..., int hs=1) = I(V_(boxes, hs=hs)); + +@synopsis{Short-hand for indented HOV} +Box IHOV(Box boxes..., int hs=1, int vs=0) = I(HOV_(boxes, hs=hs, vs=vs)); + +@synopsis{Short-hand for indented HV} +Box IHV(Box boxes..., int hs=1, int vs=0) = I(HV_(boxes, hs=hs, vs=vs)); \ No newline at end of file diff --git a/src/org/rascalmpl/library/lang/box/util/Box2Text.rsc b/src/org/rascalmpl/library/lang/box/util/Box2Text.rsc index 08026eaf619..61f2f78c475 100644 --- a/src/org/rascalmpl/library/lang/box/util/Box2Text.rsc +++ b/src/org/rascalmpl/library/lang/box/util/Box2Text.rsc @@ -32,15 +32,15 @@ This demonstrates the semantics of the main hard constraints: ```rascal-shell import lang::box::util::Box2Text; import lang::box::\syntax::Box; -format(H([L("A"), L("B"), L("C")], hs=2)) -format(H([L("A"), L("B"), L("C")], hs=1)) -format(H([L("A"), L("B"), L("C")], hs=0)) -format(V([L("A"), L("B"), L("C")], vs=2)) -format(V([L("A"), L("B"), L("C")], vs=1)) -format(V([L("A"), L("B"), L("C")], vs=0)) -format(H([L("A"), V([L("B"), L("C")])])) -format(H([L("A"), I([L("B")]), L("C")])) -format(H([L("A"), V([L("B"), H([L("C"), L("D")])])])) +format(H(L("A"), L("B"), L("C"), hs=2)) +format(H(L("A"), L("B"), L("C"), hs=1)) +format(H(L("A"), L("B"), L("C"), hs=0)) +format(V(L("A"), L("B"), L("C"), vs=2)) +format(V(L("A"), L("B"), L("C"), vs=1)) +format(V(L("A"), L("B"), L("C"), vs=0)) +format(H(L("A"), V(L("B"), L("C")))) +format(H(L("A"), I(L("B")), L("C"))) +format(H(L("A"), V(L("B"), H(L("C"), L("D"))))) ``` The "soft" constraints change their behavior based on available horizontal room: @@ -57,9 +57,9 @@ format(HOV([L("W") | i <- [0..30]])); By cleverly combining constraints, a specifically desired behavior is easy to achieve: ```rascal-shell,continue -format(H([L("if"), H([L("("), L("true"), L(")")], hs=0), HOV([L("doSomething")])])) -format(H([L("if"), H([L("("), L("true"), L(")")], hs=0), HOV([L("W") | i <- [0..30]])])) -format(H([L("if"), H([L("("), L("true"), L(")")], hs=0), HV([L("W") | i <- [0..30]])])) +format(H(L("if"), H(L("("), L("true"), L(")"), hs=0), HOV(L("doSomething")))) +format(H(L("if"), H(L("("), L("true"), L(")"), hs=0), HOV([L("W") | i <- [0..30]]))) +format(H(L("if"), H(L("("), L("true"), L(")"), hs=0), HV([L("W") | i <- [0..30]]))) ``` } @pitfalls{ @@ -71,6 +71,33 @@ import util::Math; import List; import String; import lang::box::\syntax::Box; +import IO; + +@synopsis{formatting options for ((Box2Text))} +@description{ +* `maxWidth` is the constraint that makes HV and HOV boxes switch to vertical mode +* `wrapAfter` is the lowerbound that makes HV and HOV stay horizontal +* `tabSize` is the default indentation used when an `I` box does not have an explicit `is` parameter +* `insertSpaces`, when set to true it prefers spaces over tabs, when set to false we use tabs for indentation (see `tabSize`) +* `trimTrailingWhitespace` when `true` the formatter can not leave spaces or tabs after the last non-whitespace character, +when false it does not matter. +* `insertFinalNewline`, insert a newline character at the end of the file if one does not exist. +* `trimFinalNewlines`, trim all newlines after the final newline at the end of the file. + +Note that there may be more FormattingOptions due to other elements of a formatting pipeline, such as ((layoutDiff)). +} +data FormattingOptions( + int maxWidth=120, + int wrapAfter=90, + int tabSize = 4, + bool insertSpaces = true, + bool trimTrailingWhitespace = true, + bool insertFinalNewline = true, + bool trimFinalNewlines = true +) = formattingOptions(); + +@synopsis{Specialized alignments for the final column to implement `trimTrailingWhitespace`} +private data Alignment = fl() | fc(); @synopsis{Converts boxes into a string by finding an "optimal" two-dimensional layout} @description{ @@ -84,9 +111,26 @@ fit it will still be printed. We say `maxWidth` is a _soft_ constraint. * HV and HOV are the soft constraints that allow for better solutions, so use them where you can to allow for flexible layout that can handle deeply nested expressions and statements. } -public str format(Box b, int maxWidth=80, int wrapAfter=70) - = " - '<}>"; +public str format(Box b, FormattingOptions opts = formattingOptions()) + = finalNewlineOptions(" + '<}>", opts.insertFinalNewline, opts.trimFinalNewlines + ); + +private str finalNewlineOptions(str lines, bool insertFinalNewline, bool trimFinalNewlines) { + if (!insertFinalNewline) { + lines = lines[..-1]; + + if (trimFinalNewlines, /^\s+$/ := lines) { + lines = prefix; + } + } + else if (trimFinalNewlines, /^\s*$/ := lines) { + lines = "\n"; + } + + return lines; +} + @synopsis{Box2text uses list[str] as intermediate representation of the output during formatting} @benefits{ @@ -100,8 +144,8 @@ ANSI escape codes, and characters like \r and \n in `L` boxes _will break_ the a alias Text = list[str]; @synopsis{Converts boxes into list of lines (Unicode)} -public Text box2text(Box b, int maxWidth=80, int wrapAfter=70) - = box2data(b, options(maxWidth=maxWidth, wrapAfter=wrapAfter)); +public Text box2text(Box b, FormattingOptions opts = formattingOptions()) + = box2data(b, options(maxWidth=opts.maxWidth, wrapAfter=opts.wrapAfter, is=opts.tabSize, trimTrailingWhitespace=opts.trimTrailingWhitespace, insertSpaces=opts.insertSpaces)); ////////// private functions below implement the intermediate data-structures ////////// and the constraint solver @@ -117,17 +161,19 @@ This is used during the algorithm, not for external usage. * `wrapAfter` is the threshold criterion for line fullness, to go to the next line in a HV box and to switching between horizontal and vertical for HOV boxes. } -data Options = options( +private data Options = options( int hs = 1, int vs = 0, int is = 4, int maxWidth = 80, - int wrapAfter = 70 + int wrapAfter = 70, + bool trimTrailingWhitespace = true, + bool insertSpaces = true ); @synopsis{Quickly splice in any nested U boxes, and empty H, V, HV, I or HOV boxes} list[Box] u(list[Box] boxes) { - return [*((U(list[Box] nested) := b) ? u(nested) : [b]) | b <- boxes, !isDegenerate(b)]; + return [*((U_(list[Box] nested) := b) ? u(nested) : [b]) | b <- boxes, !isDegenerate(b)]; } @synopsis{Empty H, V, HOV, HV, I boxes should not lead to accidental extra separators in their context} @@ -137,29 +183,28 @@ private bool isDegenerate(Box b) = b has boxes && b.boxes == []; private Text vv(Text a, Text b) = [*a, *b]; @synopsis{Create a string of spaces just as wide as the parameter a} -private str blank(str a) = right("", width(a)); +private str blank(str a, Options opts) = hskip(size(a), opts)[0]; @synopsis{Computes a white line with the length of the last line of a} -Text wd([]) = []; -Text wd([*_, str x]) = [blank(x)]; - -@synopsis{Computes the length of unescaped string s} -private int width(str s) = size(s); +Text wd([], Options _) = []; +Text wd([*_, str x], Options opts) = [blank(x, opts)]; @synopsis{Computes the maximum width of text t} private int twidth([]) = 0; -private default int twidth(Text t) = max([width(line) | line <- t]); +private default int twidth(Text t) = max([size(line) | line <- t]); @synopsis{Computes the length of the last line of t} private int hwidth([]) = 0; -private int hwidth([*_, str last]) = width(last); +private int hwidth([*_, str last]) = size(last); @synopsis{Prepends str a before text b, all lines of b will be shifted} -private Text bar(str a, []) = [a]; -private Text bar(str a, [str bh, *str bt]) = vv([""], prepend(blank(a), bt)); +private Text bar(str a, [], Options _) = [a]; +private Text bar(str a, [str bh, *str bt], Options opts) = vv([""], prepend(blank(a, opts), bt)); @synopsis{Produce text consisting of a white line of length n} -Text hskip(int n) = [right("", n)]; +Text hskip(int n, Options opts) = opts.insertSpaces + ? [right("", n)] + : ["\t<}> <}>"]; @synopsis{Produces text consisting of n white lines at length 0} private Text vskip(int n) = ["" | _ <- [0..n]]; @@ -168,26 +213,29 @@ private Text vskip(int n) = ["" | _ <- [0..n]]; private Text prepend(str a, Text b) = ["" | line <- b]; @synopsis{Implements horizontal concatenation, also for multiple lines} -private Text hh([], Text b) = b; -private Text hh(Text a, []) = a; -private Text hh([a], Text b) = bar(a, b); +private Text hh([], Text b, Options _) = b; +private Text hh(Text a, [], Options _) = a; +private Text hh([a], Text b, Options opts) = bar(a, b, opts); -private default Text hh(Text a, Text b) = vv(a[0..-1], bar(a[-1], b)); +private default Text hh(Text a, Text b, Options opts) = vv(a[0..-1], bar(a[-1], b, opts)); @synopsis{Horizontal concatenation, but if the left text is empty return nothing.} -private Text lhh([], Text _) = []; -private default Text lhh(a, b) = hh(a, b); +private Text lhh([], Text _, Options _) = []; +private default Text lhh(a, b, Options opts) = hh(a, b, opts); @synopsis{Horizontal concatenation, but if the right text is empty return nothing.} -private Text rhh(Text _, []) = []; -private Text rhh(Text a, Text b) = hh(a, b); +private Text rhh(Text _, [], Options _) = []; +private Text rhh(Text a, Text b, Options opts) = hh(a, b, opts); @synopsis{Vertical concatenation, but if the right text is empty return nothing.} private Text rvv(Text _, []) = []; private default Text rvv(Text a, Text b) = vv(a,b); -private Text LL(str s ) = [s]; - +private Text LL(str s) { + assert s != "" : "literal strings must never be empty for Box2Text to work correctly."; + return [s]; +} + private Text HH([], Box _, Options _opts, int _m) = []; private Text HH(list[Box] b:[_, *_], Box _, Options opts, int m) { @@ -196,20 +244,34 @@ private Text HH(list[Box] b:[_, *_], Box _, Options opts, int m) { for (a <- b) { Text t = \continue(a, H([]), opts, m); int s = hwidth(t); - r = hh(t, rhh(hskip(opts.hs), r)); + r = hh(t, rhh(hskip(opts.hs, opts), r, opts), opts); m = m - s - opts.hs; } return r; } +private Text GG(list[Box] boxes, Box c, Options opts, int m, int gs, Box op, bool backwards) + = \continue(c[boxes=groupBy(boxes, gs, op, backwards)], c, opts, m); + +public list[Box] groupBy(list[Box] boxes, int gs, Box op, false) = groupBy(boxes, gs, op); + +@synopsis{simulates grouping as-if done from the back, by starting to peel off the rest instead of grouping the rest at the end} +public list[Box] groupBy(list[Box] boxes, int gs, Box op, true) + = [op[boxes=boxes[..size(boxes) mod gs]], *groupBy(boxes[size(boxes) mod gs..], gs, op)]; + +public list[Box] groupBy([], int _gs, Box _op) = []; + +public list[Box] groupBy(list[Box] boxes:[Box _, *_], int gs, Box op) + = [op[boxes=boxes[..gs]], *groupBy(boxes[gs..], gs, op)]; + private Text VV([], Box _c, Options _opts, int _m) = []; private Text VV(list[Box] b:[_, *_], Box c, Options opts, int m) { Text r = []; b = reverse(b); for (a <- b) { - if (V(_) !:= c || L("") !:= a) { + if (V_(_) !:= c || L("") !:= a) { Text t = \continue(a, V([]), opts, m); r = vv(t, rvv(vskip(opts.vs), r)); } @@ -219,11 +281,11 @@ private Text VV(list[Box] b:[_, *_], Box c, Options opts, int m) { private Text II([], Box _c, Options _opts, int _m) = []; -private Text II(list[Box] b:[_, *_] , c:H(list[Box] _), Options opts, int m) +private Text II(list[Box] b:[_, *_] , c:H_(list[Box] _), Options opts, int m) = HH(b, c, opts, m); -private Text II(list[Box] b:[Box _, *Box _], c:V(list[Box] _), Options opts, int m) - = rhh(hskip(opts.is), \continue(V(b, vs=opts.vs), c, opts, m - opts.is)); +private Text II(list[Box] b:[Box _, *Box _], c:V_(list[Box] _), Options opts, int m) + = rhh(hskip(opts.is, opts), \continue(V(b, vs=opts.vs), c, opts, m - opts.is), opts); private Text WDWD([], Box _c , Options _opts, int _m) = []; @@ -232,13 +294,13 @@ private Text WDWD([Box head, *Box tail], Box c , Options opts, int m) { int h = head.hs ? opts.hs; Text t = \continue(head, c, opts, m); int s = hwidth(t); - return hh(wd(t), rhh(hskip(h) , WDWD(tail, c, opts, m - s - h))); + return hh(wd(t, opts), rhh(hskip(h, opts) , WDWD(tail, c, opts, m - s - h), opts), opts); } private Text ifHOV([], Box b, Box c, Options opts, int m) = []; private Text ifHOV(Text t:[str head], Box b, Box c, Options opts, int m) - = width(head) <= m ? t : \continue(b, c, opts, m); + = size(head) <= m ? t : \continue(b, c, opts, m); private Text ifHOV(Text t:[str head, str _, *str_], Box b, Box c, Options opts, int m) = \continue(b, c, opts, m); @@ -255,21 +317,21 @@ private Text HVHV(Text T, int s, Text a, Box A, list[Box] B, Options opts, int m if (size(a) > 1) { // Multiple lines Text T1 = \continue(A, V([]), opts, m-i); - return vv(T, rvv(vskip(v), HVHV(T1, m-hwidth(T1), B, opts, m, H([])))); + return vv(T, rvv(vskip(v), HVHV(T1, m-hwidth(T1), B, opts, m, H_([])))); } if (n <= s) { // Box A fits in current line - return HVHV(hh(lhh(T, hskip(h)), a), s-n, B, opts, m, H([])); + return HVHV(hh(lhh(T, hskip(h,opts), opts), a, opts), s-n, B, opts, m, H_([])); } else { - n -= h; // n == width(a) + n -= h; // n == size(a) if (i + n < m) { // Fits in the next line, not in current line Text T1 =\continue(A, V([]), opts, m-i); - return vv(T, rvv(vskip(v), HVHV(T1, m-n-i, B, opts, m, H([])))); + return vv(T, rvv(vskip(v), HVHV(T1, m-n-i, B, opts, m, H_([])))); } else { // Doesn't fit in either lines Text T1 = \continue(A, V([]), opts, m-i); - return vv(T, rvv(vskip(v), HVHV(T1, m-hwidth(T1), B, opts, m, H([])))); + return vv(T, rvv(vskip(v), HVHV(T1, m-hwidth(T1), B, opts, m, H_([])))); } } } @@ -285,47 +347,33 @@ private Text HVHV([], Box _, Options opts, int m) = []; private Text HVHV(list[Box] b:[Box head], Box _, Options opts, int m) - = \continue(head, V([]), opts, m); + = \continue(head, V_([]), opts, m); private Text HVHV(list[Box] b:[Box head, Box next, *Box tail], Box _, Options opts, int m) { - Text T = \continue(head, V([]), opts, m); - return HVHV(T, m - hwidth(T), [next, *tail], opts, m, H([])); + Text T = \continue(head, V_([]), opts, m); + return HVHV(T, m - hwidth(T), [next, *tail], opts, m, H_([])); } -// empty lists do not need grouping -private Text GG([], Box(list[Box]) op, int gs, Box c, Options opts, int m) - = \continue(U([]), c, opts, m); - -// the last elements are smaller than the group size, just wrap them up and finish -private Text GG([*Box last], Box(list[Box]) op, int gs, Box c, Options opts, int m) - = \continue(op(u(last))[hs=opts.hs][vs=opts.vs][is=opts.is], c, opts, m) - when size(last) < gs; - -// we pick the head of (size group size) and then continue with the rest -private Text GG([*Box heads, *Box tail], Box(list[Box]) op, int gs, Box c, Options opts, int m) - = \continue(op(heads)[hs=opts.hs][vs=opts.vs][is=opts.is], NULL(), opts, m) - + \continue(G(tail, op=op, hs=opts.hs, vs=opts.vs, is=opts.is, gs=gs), c, opts, m) - when size(heads) == gs; - private Text continueWith(Box b:L(str s) , Box c, Options opts, int m) = LL(s); -private Text continueWith(Box b:H(list[Box] bl) , Box c, Options opts, int m) = HH(u(bl), c, opts, m); -private Text continueWith(Box b:V(list[Box] bl) , Box c, Options opts, int m) = VV(u(bl), c, opts, m); -private Text continueWith(Box b:I(list[Box] bl) , Box c, Options opts, int m) = II(u(bl), c, opts, m); -private Text continueWith(Box b:WD(list[Box] bl) , Box c, Options opts, int m) = WDWD(u(bl), c, opts, m); -private Text continueWith(Box b:HOV(list[Box] bl), Box c, Options opts, int m) = HOVHOV(u(bl), c, opts, m); -private Text continueWith(Box b:HV(list[Box] bl) , Box c, Options opts, int m) = HVHV(u(bl), c, opts, m); -private Text continueWith(Box b:SPACE(int n) , Box c, Options opts, int m) = hskip(n); +private Text continueWith(Box b:H_(list[Box] bl) , Box c, Options opts, int m) = HH(u(bl), c, opts, m); +private Text continueWith(Box b:V_(list[Box] bl) , Box c, Options opts, int m) = VV(u(bl), c, opts, m); +private Text continueWith(Box b:I_(list[Box] bl) , Box c, Options opts, int m) = II(u(bl), c, opts, m); +private Text continueWith(Box b:WD_(list[Box] bl) , Box c, Options opts, int m) = WDWD(u(bl), c, opts, m); +private Text continueWith(Box b:HOV_(list[Box] bl), Box c, Options opts, int m) = HOVHOV(u(bl), c, opts, m); +private Text continueWith(Box b:HV_(list[Box] bl) , Box c, Options opts, int m) = HVHV(u(bl), c, opts, m); +private Text continueWith(Box b:SPACE(int n) , Box c, Options opts, int m) = hskip(n, opts); // This is a degenerate case, an outermost U-Box without a wrapper around it. -private Text continueWith(Box b:U(list[Box] bl) , Box c, Options opts, int m) = HH(u(bl), c, opts, m); +private Text continueWith(Box b:U_(list[Box] bl) , Box c, Options opts, int m) = HH(u(bl), c, opts, m); -private Text continueWith(Box b:A(list[Row] rows), Box c, Options opts, int m) - = AA(rows, c, b.columns, opts, m); +private Text continueWith(Box b:G_(list[Box] bl), Box c, Options opts, int m) + = GG(u(bl), c, opts, m, b.gs, b.op, b.backwards); -private Text continueWith(Box b:G(list[Box] bl), Box c, Options opts, int m) = GG(u(bl), b.op, b.gs, c, opts, m); +private Text continueWith(Box b:A_(list[Row] rows), Box c, Options opts, int m) + = AA(rows, c, b.columns, b.rs, opts, m); -@synopsis{General shape of a Box operator, as a parameter to `G`} -private alias BoxOp = Box(list[Box]); +private Text continueWith(Box b:AG_(list[Box] boxes), Box c, Options opts, int m) + = AAG(u(boxes), b.gs, b.columns, b.rs, c, opts, m); @synopsis{Option inheritance layer; then continue with the next box.} @description{ @@ -333,7 +381,7 @@ The next box is either configured by itself. Options are transferred from the box to the opts parameter for easy passing on to recursive calls. } private Text \continue(Box b, Box c, Options opts, int m) - = continueWith(b, c, opts[hs=b.hs][vs=b.vs][is=b.is], m); + = continueWith(b, c, opts[hs=b.hs][vs=b.vs][is=(b.is?)?b.is:opts.is], m); /* ------------------------------- Alignment ------------------------------------------------------------*/ @@ -358,37 +406,107 @@ private int Acolumns(list[Row] rows) = (0 | max(it, size(row.cells)) | row <- ro @synopsis{Compute the maximum cell width for each column in an array} private list[int] Awidth(list[list[Box]] rows) - = [(0 | max(it, row[col].width) | row <- rows ) | int col <- [0..size(head(rows))]]; + = [(0 | max(it, row[col].width) | row <- rows, col < size(row) ) | int col <- [0..size(head(rows))]]; @synopsis{Adds empty cells to every row until every row has the same amount of columns.} -list[Row] AcompleteRows(list[Row] rows, int columns=Acolumns(rows)) - = [ R(u([*row.cells, *[H([]) | _ <- [0..columns - size(row.cells)]]])) | row <- rows]; +list[Row] AcompleteRows(list[Row] rows, int columns=Acolumns(rows), Box rs=NULL()) + = [ R(u([*row.cells[..-1], H_([row.cells[-1], rs],hs=0), *[SPACE(1) | _ <- [0..columns - size(row.cells)]]])) | row <- rows[..-1]] + + [ R(u([*rows[-1].cells, *[SPACE(1) | _ <- [0..columns - size(rows[-1].cells)]]]))] ; @synopsis{Helper function for aligning Text inside an array cell} private Box align(l(), Box cell, int maxWidth) = maxWidth - cell.width > 0 - ? H([cell, SPACE(maxWidth - cell.width)], hs=0) + ? H_([cell, SPACE(maxWidth - cell.width)], hs=0) : cell; +@synopsis{Helper function for aligning Text inside an array cell} private Box align(r(), Box cell, int maxWidth) = maxWidth - cell.width > 0 - ? H([SPACE(maxWidth - cell.width), cell], hs=0) + ? H_([SPACE(maxWidth - cell.width), cell], hs=0) : cell; private Box align(c(), Box cell, int maxWidth) = maxWidth - cell.width > 1 - ? H([SPACE((maxWidth - cell.width) / 2), cell, SPACE((maxWidth - cell.width) / 2)], hs=0) + ? H_([SPACE((maxWidth - cell.width) / 2), cell, SPACE((maxWidth - cell.width) / 2)], hs=0) : maxWidth - cell.width == 1 ? align(l(), cell, maxWidth) : cell; -private Text AA(list[Row] table, Box c, list[Alignment] alignments, Options opts, int m) { - list[list[Box]] rows = RR(AcompleteRows(table), c, opts, m); + +// the last left box should not fill up to the right for the next non-existing column, to help implement `trimTrailingWhitespace` +private Box align(fl(), Box cell, int maxWidth) = cell; + +// the last center box should not fill up to the right, only to the left to help implement `trimTrailingWhitespace` +private Box align(fc(), Box cell, int maxWidth) = maxWidth - cell.width > 1 + ? H_([SPACE((maxWidth - cell.width) / 2), cell], hs=0) + : maxWidth - cell.width == 1 ? + align(l(), cell, maxWidth) + : cell; + +private Text AA(list[Row] table, Box c, list[Alignment] alignments, Box rs, Options opts, int m) { + if (table == []) { + return []; + } + + // first flatten any nested U cell lists into the Rows + table = [R(u(r.cells)) | Row r <- table]; + + // we remove any H-V backtracking because table cells are too small anyway, generally. + // so we prefer the less wide V over HOV and HV. This boosts efficiency radically, because + // later, ever cell will be formatted individually to an optimal width, and measured, before we even start + // to format the table. Then the same cells will be formatted again from scratch. By removing the + // backtracking, larger tables (like reified grammars) become doable. + table = visit (table) { + case Box b:HOV_(list[Box] boxes) => V_(boxes, vs=b.vs) + case Box b:HV_(list[Box] boxes) => V_(boxes, vs=b.vs) + } + + // then we can know the number of columns + int maxColumns = Acolumns(table); + + // then we fill each row up to the maximum of columns + list[list[Box]] rows = RR(AcompleteRows(table, columns=maxColumns, rs=rs), c, opts, m); + + // and we infer alignments where not provided + alignments = AcompleteAlignments(alignments, maxColumns); + + if (opts.trimTrailingWhitespace) { + alignments = AfinalColumnSpecials(alignments); + } + + // finally we compute alignment information list[int] maxWidths = Awidth(rows); - - return \continue(V([ - H([align(al, cell, mw) | <- zip3(row, alignments, maxWidths)]) - | row <- rows - ]),c, opts, m); + + try { + // A row is simply an H box where each cell is filled with enough spaces to align for the next column + return \continue(V_([ + H_([align(al, cell, mw) | <- zip3(row, alignments, maxWidths)]) | row <- rows]), c, opts, m); + } + catch IllegalArgument(_, "List size mismatch"): { + throw IllegalArgument("Array alignments size is while there are columns."); + } } +private Text AAG([], int _gs, list[Alignment] _columns, Box _rs, Box _c, Options _opts, int _m) = []; + +private Text AAG(list[Box] boxes:[Box _, *_], int gs, list[Alignment] columns, Box rs, Box c, Options opts, int m) + = \continue(A(groupRows(boxes, gs), columns=columns, rs=rs), c, opts, m); + +private list[Row] groupRows([], int _gs) = []; + +private list[Row] groupRows(list[Box] boxes:[Box _, *_], int gs) + = [R(boxes[..gs]), *groupRows(boxes[gs..], gs)]; + +@synopsis{Cuts off and extends the alignment spec to the width of the table} +@description{ +* if too few columns are specified: `l()`'s are added accordingly +* if too many columns are specified: they are cut off from the right +} +private list[Alignment] AcompleteAlignments(list[Alignment] alignments, int maxColumns) + = [*alignments[..maxColumns], *[l() | _ <- [0..maxColumns - size(alignments)]]]; + +@synopsis{Translate l() and c() to fl() and fc() for the final columns to help implement `trimTrailingWhitespace`} +private list[Alignment] AfinalColumnSpecials([*Alignment pre, l()]) = [*pre, fl()]; +private list[Alignment] AfinalColumnSpecials([*Alignment pre, c()]) = [*pre, fc()]; +private default list[Alignment] AfinalColumnSpecials(list[Alignment] as) = as; + @synopsis{Check soft limit for HV and HOV boxes} // TODO this seems to ignore SPACE boxes? private bool noWidthOverflow(list[Box] hv, Options opts) @@ -396,46 +514,88 @@ private bool noWidthOverflow(list[Box] hv, Options opts) @synopsis{Changes all HV boxes that do fit horizontally into hard H boxes.} private Box applyHVconstraints(Box b, Options opts) = innermost visit(b) { - case HV(boxes, hs=h, is=i, vs=v) => H(boxes, hs=h, is=i, vs=v) + case Box B:HV_(list[Box] boxes, hs=h, is=i, vs=v) => H_(boxes, hs=h, is=(B.is?)?i:opts.is, vs=v) when noWidthOverflow(boxes, opts) }; @synopsis{Changes all HOV boxes that do fit horizontally into hard H boxes, and the others into hard V boxes.} private Box applyHOVconstraints(Box b, Options opts) = innermost visit(b) { - case HOV(boxes, hs=h, is=i, vs=v) => noWidthOverflow(boxes, opts) - ? H(boxes, hs=h, is=i, vs=v) - : V(boxes, hs=h, is=i, vs=v) + case Box B:HOV_(list[Box] boxes, hs=h, is=i, vs=v) => noWidthOverflow(boxes, opts) + ? H_(boxes, hs=h, is=(B.is?)?i:opts.is, vs=v) + : V_(boxes, hs=h, is=(B.is?)?i:opts.is, vs=v) }; @synopsis{Workhorse, that first applies hard HV and HOV limits and then starts the general algorithm} private Text box2data(Box b, Options opts) { b = applyHVconstraints(b, opts); b = applyHOVconstraints(b, opts); - return \continue(b, V([]), options(), opts.maxWidth); + return \continue(b, V_([]), options(is=opts.is, insertSpaces=opts.insertSpaces, trimTrailingWhitespace=opts.trimTrailingWhitespace), opts.maxWidth); } ///////////////// regression tests //////////////////////////////// test bool horizontalPlacement2() - = format(H([L("A"), L("B"), L("C")], hs=2)) + = format(H(L("A"), L("B"), L("C"), hs=2)) == "A B C '"; test bool horizontalPlacement3() - = format(H([L("A"), L("B"), L("C")], hs=3)) + = format(H(L("A"), L("B"), L("C"), hs=3)) == "A B C '"; +test bool horizontalIndentIsNoop1() + = format(H(L("A"), I(L("B")))) + == "A B + '"; + +test bool horizontalIndentIsNoop2() + = format(HV(L("A"), I(L("B")))) + == "A B + '"; + +test bool horizontalIndentIsNoop3() + = format(HOV(L("A"), I(L("B")))) + == "A B + '"; + +test bool emptyBoxesNoExtraSpacing1() + = format(H(L("A"), H(), L("B"))) + == "A B + '"; + +test bool emptyBoxesNoExtraSpacing2() + = format(H(L("A"), V(), L("B"))) + == "A B + '"; + +test bool emptyBoxesNoExtraSpacing3() + = format(H(L("A"), I(), L("B"))) + == "A B + '"; + +test bool emptyBoxesNoExtraSpacing4() + = format(V(L("A"), H(), L("B"))) + == "A + 'B + '"; + +test bool emptyBoxesNoExtraSpacing5() + = format(V(L("A"), V(), L("B"))) + == "A + 'B + '"; + test bool verticalPlacement0() - = format(V([L("A"), L("B"), L("C")], vs=0)) + = format(V(L("A"), L("B"), L("C"), vs=0)) == "A 'B 'C '"; test bool verticalPlacement1() - = format(V([L("A"), L("B"), L("C")], vs=1)) + = format(V(L("A"), L("B"), L("C"), vs=1)) == "A ' 'B @@ -444,14 +604,14 @@ test bool verticalPlacement1() '"; test bool verticalIndentation2() - = format(V([L("A"), I([L("B")]), L("C")])) + = format(V(L("A"), I(L("B")), L("C"))) == "A ' B 'C '"; test bool blockIndent() - = format(V([L("A"), I([V([L("B"), L("C")])]), L("D")])) + = format(V(L("A"), I(V(L("B"), L("C"))), L("D"))) == "A ' B ' C @@ -459,52 +619,52 @@ test bool blockIndent() '"; test bool wrappingIgnoreIndent() - = format(HV([L("A"), I([L("B")]), L("C")], hs=0), maxWidth=2, wrapAfter=2) + = format(HV(L("A"), I(L("B")), L("C"), hs=0), opts=formattingOptions(maxWidth=2, wrapAfter=2)) == "AB 'C '"; test bool wrappingWithIndent() - = format(HV([L("A"), I([L("B")]), I([L("C")])], hs=0), maxWidth=2, wrapAfter=2) + = format(HV(L("A"), I(L("B")), I(L("C")), hs=0),opts=formattingOptions( maxWidth=2, wrapAfter=2)) == "AB ' C '"; test bool multiBoxIndentIsVertical() - = format(I([L("A"), L("B")])) + = format(I(L("A"), L("B"))) == " A ' B '"; test bool flipping1NoIndent() - = format(HOV([L("A"), L("B"), L("C")], hs=0, vs=0), maxWidth=2, wrapAfter=2) + = format(HOV(L("A"), L("B"), L("C"), hs=0, vs=0), opts=formattingOptions(maxWidth=2, wrapAfter=2)) == "A 'B 'C '"; test bool horizontalOfOneVertical() - = format(H([L("A"), V([L("B"), L("C")])])) + = format(H(L("A"), V(L("B"), L("C")))) == "A B ' C '"; test bool stairCase() - = format(H([L("A"), V([L("B"), H([L("C"), V([L("D"), H([L("E"), L("F")])])])])])) + = format(H(L("A"), V(L("B"), H(L("C"), V(L("D"), H(L("E"), L("F"))))))) == "A B ' C D ' E F '"; test bool simpleTable() - = format(A([R([L("1"),L("2"),L("3")]),R([L("4"), L("5"), L("6")]),R([L("7"), L("8"), L("9")])])) + = format(A(R([L("1"),L("2"),L("3")]),R([L("4"), L("5"), L("6")]),R([L("7"), L("8"), L("9")]))) == "1 2 3 '4 5 6 '7 8 9 '"; test bool simpleAlignedTable() - = format(A([R([L("1"),L("2"),L("3")]),R([L("44"), L("55"), L("66")]),R([L("777"), L("888"), L("999")])], + = format(A(R([L("1"),L("2"),L("3")]),R([L("44"), L("55"), L("66")]),R([L("777"), L("888"), L("999")]), columns=[l(),c(),r()])) == "1 2 3 '44 55 66 @@ -512,7 +672,7 @@ test bool simpleAlignedTable() '"; test bool simpleAlignedTableDifferentAlignment() - = format(A([R([L("1"),L("2"),L("3")]),R([L("44"), L("55"), L("66")]),R([L("777"), L("888"), L("999")])], + = format(A(R([L("1"),L("2"),L("3")]),R([L("44"), L("55"), L("66")]),R([L("777"), L("888"), L("999")]), columns=[r(),c(),l()])) == " 1 2 3 ' 44 55 66 @@ -520,54 +680,51 @@ test bool simpleAlignedTableDifferentAlignment() '"; test bool WDtest() { - L1 = H([L("aap")] , hs=0); - L2 = H([WD([L1]), L("noot")], hs=0); - L3 = H([WD([L2]), L("mies")], hs=0); + L1 = H(L("aap") , hs=0); + L2 = H(WD(L1), L("noot"), hs=0); + L3 = H(WD(L2), L("mies"), hs=0); - return format(V([L1, L2, L3])) + return format(V(L1, L2, L3)) == "aap ' noot ' mies '"; } -test bool groupBy() { +test bool groupByTest() { lst = [L("") | i <- [0..10]]; - g1 = G(lst, op=H, gs=3); - lst2 = [H([L(""), L(""), L("")]) | i <- [0,3..7]] + [H([L("9")])]; + g1 = G(lst, op=H(), gs=3); + lst2 = [H(L(""), L(""), L("")) | i <- [0,3..7]] + [H(L("9"))]; - return format(V([g1])) == format(V(lst2)); + return format(V(g1)) == format(V(lst2)); } -test bool noDegenerateHSeparators() - = format(H([L("a"),H([]),L("b")])) - == "a b - '"; +test bool groupByBackwardsTest() { + lst = [L("") | i <- [0..10]]; + g1 = G(lst, op=H(), gs=3, backwards=true); + lst2 = [H(L("0"))] + [H(L(""), L(""), L("")) | i <- [1, 4..10]]; -test bool noDegenerateVSeparators() - = format(V([L("a"),H([]),L("b")])) - == "a - 'b - '"; + return format(V([g1])) == format(V(lst2)); +} test bool noDegenerateHVSeparators1() - = format(HV([L("a"),V([]),L("b")])) + = format(HV(L("a"),V(),L("b"))) == "a b '"; test bool noDegenerateHVSeparators2() - = format(HV([L("a"),V([]),L("b")]), maxWidth=1, wrapAfter=1) + = format(HV(L("a"),V(),L("b")), opts=formattingOptions(maxWidth=1, wrapAfter=1)) == "a 'b '"; test bool noDegenerateHOVSeparators1() - = format(HOV([L("a"),V([]),L("b")])) + = format(HOV(L("a"),V(),L("b"))) == "a b '"; test bool noDegenerateHOVSeparators2() - = format(HOV([L("a"),V([]),L("b")]), maxWidth=1, wrapAfter=1) + = format(HOV(L("a"),V(),L("b")), opts=formattingOptions(maxWidth=1, wrapAfter=1)) == "a 'b '"; diff --git a/src/org/rascalmpl/library/lang/box/util/Tree2Box.rsc b/src/org/rascalmpl/library/lang/box/util/Tree2Box.rsc index bf580f16f4f..21ba5f513ec 100644 --- a/src/org/rascalmpl/library/lang/box/util/Tree2Box.rsc +++ b/src/org/rascalmpl/library/lang/box/util/Tree2Box.rsc @@ -47,7 +47,7 @@ b = toBox(program); import lang::box::util::Box2Text; format(b) // If you are not happy, then you should produce a specialization: -Box toBox((Program) `begin <{Statement ";"}* body> end`, FormatOptions opts=formatOptions()) +Box toBox((Program) `begin <{Statement ";"}* body> end`, FormattingOptions opts=formattingOptions()) = V([ L("begin"), I([ @@ -67,12 +67,12 @@ module lang::box::util::Tree2Box import ParseTree; import lang::box::\syntax::Box; import String; -import IO; + @synopsis{Configuration options for toBox} -data FormatOptions = formatOptions( +data FormattingOptions( CaseInsensitivity ci = asIs() -); +) = formattingOptions(); @synopsis{Normalization choices for case-insensitive literals.} data CaseInsensitivity @@ -92,8 +92,6 @@ by the user is necessary. default Box toBox(t:appl(Production p, list[Tree] args), FO opts = fo()) { // the big workhorse switch identifies all kinds of special cases for shapes of // grammar rules, and accidental instances (emptiness, only whitespace, etc.) - Symbol _nl = #[\n].symbol; - Symbol notNl = #![\n].symbol; switch () { // nothing should not produce additional spaces @@ -101,39 +99,48 @@ default Box toBox(t:appl(Production p, list[Tree] args), FO opts = fo()) { return NULL(); // literals are printed as-is - case : - return L(""); + case : { + str yield = ""; + return yield != "" ? L(yield) : NULL(); + } // case-insensitive literals are optionally normalized - case : - return L(ci("", opts.ci)); + case : { + str yield = ""; + return yield != "" ? L(ci(opts.ci, "")) : NULL(); + } // non-existing content should not generate accidental spaces case : - return NULL(); + return NULL(); + + case : + return U([toBox(present)]); // non-separated lists should stick without spacing (probably lexical) case : return H([toBox(e, opts=opts) | e <- elements], hs=0); + // non-separated lists should stick without spacing (probably lexical) case : return H([toBox(e, opts=opts) | e <- elements], hs=0); - // comma's are usually for parameters separation + // comma's are usually for parameters separation. leaving it to + // parent to wrap the box in the right context. case : - return HOV([ + return U([ H([ - toBox(elements[i], opts=opts), - *[H([toBox(elements[i+2], opts=opts)], hs=1) | i + 2 < size(elements)] + toBox(elements[i], opts=opts), // element + *[L(",") | i + 2 < size(elements)] // separator ], hs=0) | int i <- [0,4..size(elements)] ]); // comma's are usually for parameters separation case : - return HOV([ + return HV([ H([ - toBox(elements[i], opts=opts), - *[H([toBox(elements[i+2], opts=opts)], hs=1) | i + 2 < size(elements)] + toBox(elements[i], opts=opts), // element + *[L(",") | i + 2 < size(elements)] // separator ], hs=0) | int i <- [0,4..size(elements)] ]); @@ -141,8 +148,8 @@ default Box toBox(t:appl(Production p, list[Tree] args), FO opts = fo()) { case : return V([ H([ - toBox(elements[i], opts=opts), - *[H([toBox(elements[i+2], opts=opts)], hs=1) | i + 2 < size(elements)] + toBox(elements[i], opts=opts), // element + *[L(";") | i + 2 < size(elements)] // separator ], hs=0) | int i <- [0,4..size(elements)] ]); @@ -150,17 +157,16 @@ default Box toBox(t:appl(Production p, list[Tree] args), FO opts = fo()) { case : return V([ H([ - toBox(elements[i], opts=opts), - *[H([toBox(elements[i+2], opts=opts)], hs=1) | i + 2 < size(elements)] + toBox(elements[i], opts=opts), // element + *[toBox(elements[i+2]) | i + 2 < size(elements)] // separator ], hs=0) | int i <- [0,4..size(elements)] ]); - // semi-colons are usually for parameters separation case : return V([ H([ - toBox(elements[i], opts=opts), - *[H([toBox(elements[i+2], opts=opts)], hs=1) | i + 2 < size(elements)] + toBox(elements[i], opts=opts), // element + *[L(";") | i + 2 < size(elements)] // separator ], hs=0) | int i <- [0,4..size(elements)] ]); @@ -168,75 +174,89 @@ default Box toBox(t:appl(Production p, list[Tree] args), FO opts = fo()) { case : return V([ H([ - toBox(elements[i], opts=opts), - *[H([toBox(elements[i+2], opts=opts)], hs=1) | i + 2 < size(elements)] + toBox(elements[i], opts=opts), // element + *[toBox(elements[i+2]) | i + 2 < size(elements)] // separator + ], hs=0) | int i <- [0,4..size(elements)] + ]); + + // now we have any other literal as separator + case : + return U([ + H([ + toBox(elements[i], opts=opts), // element + *[toBox(elements[i+2]) | i + 2 < size(elements)] // separator ], hs=0) | int i <- [0,4..size(elements)] ]); - case : - return V([G([toBox(e, opts=opts) | e <- elements], gs=4, hs=0, op=H)], hs=1); + case : + return U([ + H([ + toBox(elements[i], opts=opts), // element + *[toBox(elements[i+2]) | i + 2 < size(elements)] // separator + ], hs=0) | int i <- [0,4..size(elements)] + ]); + + + // this is a normal list + case : + return U([toBox(elements[i], opts=opts) | int i <- [0,2..size(elements)]]); + + // this is likely a lexical + case : + return H([toBox(e, opts=opts) | e <- elements], hs=0); - case : - return V([G([toBox(e, opts=opts) | e <- elements], gs=4, hs=0, op=H)], hs=1); - - // with only one separator it's probably a lexical - case : - return V([G([toBox(e, opts=opts) | e <- elements], gs=2, hs=0, op=H)], hs=0); + // this is likely a lexical + case : + return H([toBox(e, opts=opts) | e <- elements], hs=0); + + // this is a normal list + case : + return U([toBox(elements[i], opts=opts) | int i <- [0,2..size(elements)]]); - case : - return V([G([toBox(e, opts=opts) | e <- elements], gs=2, hs=0, op=H)], hs=0); + // this is likely a lexical + case : + return H([toBox(e, opts=opts) | e <- elements], hs=0); + // this is likely a lexical + case : + return H([toBox(e, opts=opts) | e <- elements], hs=0); + // We remove all layout node positions to make the number of children predictable // Comments can be recovered by `layoutDiff`. By not recursing into layout // positions `toBox` becomes more than twice as fast. case : return NULL(); - // if we are given a comment node, then we can format it here for use by layoutDiff - case : - return V([ - H([toBox(elements[0], opts=opts), - H([L(e) | e <- words("")], hs=1) - ], hs=1) - ]); - - // if we are given a comment node, then we can pretty print it here for use by layoutDiff - case : - return V([ - H([toBox(elements[0], opts=opts), - H([L(w) | e <- elements[1..], w <- words("")], hs=1) - ], hs=1) - ]); - - // multiline comments are rewrapped for the sake of readability and fitting on the page - case : - return HV([toBox(elements[0], opts=opts), // recurse in case its a ci literal - *[L(w) | e <- elements[1..-1], w <- words("")], // wrap a nice paragraph - toBox(elements[-1], opts=opts) // recurse in case its a ci literal - ], hs=1); - - // lexicals are never split in pieces, unless it's comments but those are handled above. - case : - return L(""); + // lexicals are never split in pieces + case : { + str yield = ""; + return yield != "" ? L(yield) : NULL(); + } // Now we will deal with a lot of cases for expressions and block-structured statements. // Those kinds of structures appear again and again as many languages share inspiration - // from their predecessors. Watching out not to loose any comments... + // from their pre-decessors. - case : - return HOV([toBox(elements[0], opts=opts), H([toBox(e, opts=opts) | e <- elements[1..]])]); + // binary operators become flat lists, but only if they are associative + case : + if ({\assoc(\left()), \assoc(\right()), \assoc(\assoc())} & attrs != {}) + return U([toBox(elements[0]), L(op), toBox(elements[-1])]) ; + // postfix operators stick - case : + case : return H([toBox(e, opts=opts) | e <- elements], hs=0); // prefix operators stick - case : + case : return H([toBox(e, opts=opts) | e <- elements], hs=0); // brackets stick - case : - return H([toBox(e, opts=opts) | e <- elements], hs=0); + case : + return H(L("("), I(toExpBox(elements[2], wrapper=HOV(), opts=opts)), L(")"), hs=0); + + case : + return toBox(single); // if the sort name is statement-like and the structure block-like, we go for // vertical with indentation @@ -247,6 +267,10 @@ default Box toBox(t:appl(Production p, list[Tree] args), FO opts = fo()) { I([V([toBox(e, opts=opts) | Tree e <- elements[size(pre)+1..-1]])]), toBox(elements[-1], opts=opts) ]); + + // this is to simplify the tree structure for efficiency and readability + case : + return toBox(singleton); } return HV([toBox(a, opts=opts) | a <- args]); @@ -256,34 +280,112 @@ default Box toBox(t:appl(Production p, list[Tree] args), FO opts = fo()) { default Box toBox(amb({Tree t, *Tree _}), FO opts=fo()) = toBox(t); @synopsis{When we end up here we simply render the unicode codepoint back.} -default Box toBox(c:char(_), FormatOptions opts=fo() ) = L(""); +default Box toBox(c:char(_), FormattingOptions opts=fo() ) = L(""); @synopsis{Cycles are invisible and zero length} default Box toBox(cycle(_, _), FO opts=fo()) = NULL(); -@synopsis{Private type alias for legibility's sake} -private alias FO = FormatOptions; +@synopsis{Create a V box of V boxes where the inner boxes are connected and the outer boxes are separated by an empty line.} +@description{ +This function learns from the input trees how vertical clusters were layout in the original tree. +The resulting box maintains the original clustering. +For example, such lists of declarations which are separated by a newline, remain separated after formatting with `toClusterBox` +``` +int a1 = 1; +int a2 = 2; + +int b1 = 3; +int b2 = 4; +``` +} +@benefits{ +* many programmers use vertical clustering, or "grouping statements", to indicate meaning or intent, by not throwing this +away we are not throwing away the documentative value of their grouping efforts. +} +@pitfalls{ +* ((toClusterBox)) is one of the (very) few Box functions that use layout information from the input tree to +influence the layout of the output formatted code. It replaces a call to ((toBox)) for that reason. +* ((toClusterBox)) does not work on separated lists, yet. +} +Box toClusterBox(list[Tree] lst, FO opts=fo()) { + list[Box] cluster([]) = []; + + list[Box] cluster([Tree e]) = [V([toBox(e)], vs=0)]; -@synopsis{Removing production labels removes similar patterns in the main toBox function.} -private Production delabel(prod(label(_, Symbol s), list[Symbol] syms, set[Attr] attrs)) - = prod(s, delabel(syms), attrs); + list[Box] cluster([*Tree pre, Tree last, Tree first, *Tree post]) + = [V([*[toBox(p, opts=opts) | p <- pre], toBox(last, opts=opts)], vs=0), *cluster([first, *post])] + when first@\loc.begin.line - last@\loc.end.line > 1 + ; -private default Production delabel(Production p) = p; + default list[Box] cluster(list[Tree] l) = [V([toBox(e, opts=opts) | e <- l], vs=0)]; -private list[Symbol] delabel(list[Symbol] syms) = [delabel(s) | s <- syms]; + return V(cluster(lst), vs=1); +} + +Box toClusterBox(&T* lst, FO opts=fo()) = toClusterBox([e | e <- lst], opts=opts); +Box toClusterBox(&T+ lst, FO opts=fo()) = toClusterBox([e | e <- lst], opts=opts); -private Symbol delabel(label(_, Symbol s)) = s; -private default Symbol delabel(Symbol s) = s; +@synopsis{Reusable way of dealing with large binary expression trees} +@description{ +1. the default `toBox` will flatten nested binary expressions to U lists. +2. the G box groups each operator with the following expression on the right hand-side, + * given an initial element (usually L("=") or L(":=")) for the assignment operators +3. the entire list is indented in case the surrounding context needs more space +4. the net result is usually in vertical mode: +``` + = operand1 + + operand2 + + operand3 +``` +or in horizontal mode: +``` += operand1 + operand2 + operand3 +``` + +By default ((toExpBox)) wraps it result in a HOV context, but you can pass +in a different `wrapper` if you like. +} +Box toExpBox(Box prefix, Tree expression, Box wrapper=HOV(), FO opts=fo()) + = wrapper[boxes=[G(prefix, toBox(expression, opts=opts), gs=2, op=H())]]; + +@synopsis{Reusable way of dealing with large binary expression trees} +@description{ +1. the default `toBox` will flatten nested binary expressions to U lists. +2. the G box groups each operator horizontally with the following expression on the right hand-side. +4. the net result is usually in vertical mode: +``` + operand1 + operand2 + + operand3 +``` +or in horizontal mode: +``` +operand1 + operand2 + operand3 +``` + +By default ((toExpBox)) wraps it result in a HV context, but you can pass +in a different `wrapper` if you like. + +} +Box toExpBox(Tree expression, Box wrapper=HV(), FO opts=fo()) + = wrapper[boxes=[G(toBox(expression, opts=opts), gs=2, backwards=true, op=H())]]; + +@synopsis{Private type alias for legibility's sake} +private alias FO = FormattingOptions; @synopsis{This is a short-hand for legibility's sake} -private FO fo() = formatOptions(); +private FO fo() = formattingOptions(); @synopsis{Implements normalization of case-insensitive literals} -private str ci(str word, toLower()) = toLowerCase(word); -private str ci(str word, toUpper()) = toUpperCase(word); -private str ci(str word, toCapitalized()) = capitalize(word); -private str ci(str word, asIs()) = word; - -@synopsis{Split a text by the supported whitespace characters} -private list[str] words(str text) - = [ x | // := text]; \ No newline at end of file +private str ci(toLower(), str word) = toLowerCase(word); +private str ci(toUpper(), str word) = toUpperCase(word); +private str ci(toCapitalized(), str word) = capitalize(word); +private str ci(asIs(), str word) = word; + +@synopsis{Removing production labels helps with case distinctions on ((Symbol)) kinds.} +private Production delabel(prod(Symbol s, list[Symbol] syms, set[Attr] attrs)) = prod(delabel(s), [delabel(x) | x <- syms], attrs); +private Production delabel(regular(Symbol s)) = regular(delabel(s)); + +@synopsis{Removing symbol labels helps with case distinctions on ((Symbol)) kinds.} +private Symbol delabel(label(_, Symbol s)) = delabel(s); +private Symbol delabel(conditional(Symbol s, _)) = delabel(s); +private default Symbol delabel(Symbol s) = s; \ No newline at end of file diff --git a/src/org/rascalmpl/library/lang/c90/syntax/C.rsc b/src/org/rascalmpl/library/lang/c90/syntax/C.rsc index 556b7fcb9fa..b054d59a69b 100644 --- a/src/org/rascalmpl/library/lang/c90/syntax/C.rsc +++ b/src/org/rascalmpl/library/lang/c90/syntax/C.rsc @@ -87,8 +87,7 @@ syntax Expression | Expression "\>\>=" Expression | Expression "&=" Expression | Expression "^=" Expression - | Expression " - | =" Expression + | Expression "|=" Expression ) > left commaExpression: Expression "," Expression ; diff --git a/src/org/rascalmpl/library/lang/pico/format/Formatting.rsc b/src/org/rascalmpl/library/lang/pico/format/Formatting.rsc index 20941f8a5f8..02649d8a481 100644 --- a/src/org/rascalmpl/library/lang/pico/format/Formatting.rsc +++ b/src/org/rascalmpl/library/lang/pico/format/Formatting.rsc @@ -47,18 +47,18 @@ list[TextEdit] formatPicoTree(start[Program] file) { @synopsis{Format while} Box toBox((Statement) `while do <{Statement ";"}* block> od`, FO opts = fo()) - = V([ - H([L("while"), toBox(e, opts=opts), L("do")]), - I([toBox(block, opts=opts)]), + = V( + H(L("while"), HV(toBox(e, opts=opts)), L("do")), + I(toClusterBox(block, opts=opts)), L("od") - ]); + ); @synopsis{Format if-then-else } Box toBox((Statement) `if then <{Statement ";"}* thenPart> else <{Statement ";"}* elsePart> fi`, FO opts = fo()) - = V([ - H([L("if"), toBox(e, opts=opts), L("then")]), - I([toBox(thenPart, opts=opts)]), + = V( + H(L("if"), HV(toBox(e, opts=opts)), L("then")), + I(toClusterBox(thenPart, opts=opts)), L("else"), - I([toBox(elsePart, opts=opts)]), + I(toClusterBox(elsePart, opts=opts)), L("fi") - ]); \ No newline at end of file + ); \ No newline at end of file diff --git a/src/org/rascalmpl/library/lang/rascal/format/Grammar.rsc b/src/org/rascalmpl/library/lang/rascal/format/Grammar.rsc index d29f1ad0e30..73e27142e60 100644 --- a/src/org/rascalmpl/library/lang/rascal/format/Grammar.rsc +++ b/src/org/rascalmpl/library/lang/rascal/format/Grammar.rsc @@ -10,6 +10,10 @@ @contributor{Arnold Lankamp - Arnold.Lankamp@cwi.nl} @synopsis{Convert the Rascal internal grammar representation format (Grammar) to a syntax definition in Rascal source code.} +@pitfalls{ +This function does not use advanced formatting feature because it is a part of +components early in Rascal's bootstrapping and standard library construction cycle. +} module lang::rascal::format::Grammar import ParseTree; diff --git a/src/org/rascalmpl/library/lang/rascal/format/Rascal.rsc b/src/org/rascalmpl/library/lang/rascal/format/Rascal.rsc new file mode 100644 index 00000000000..063e4ac4176 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/format/Rascal.rsc @@ -0,0 +1,1175 @@ +@license{ +Copyright (c) 2022, NWO-I Centrum Wiskunde & Informatica (CWI) +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +} +@synopsis{Composes a default formatter for Rascal modules} +@description{ +This module composes and describes a "standard" formatting style for Rascal. +There could be other styles of course. Other styles can be build by +writing different `toBox` rules. +} +@bootstrapParser +module lang::rascal::format::Rascal + +// by extending these modules we compose a `toBox` function +// which handles all relevant constructs of Rascal +extend lang::box::util::Tree2Box; +extend lang::rascal::\syntax::Rascal; + +import IO; +import ParseTree; +import String; +import analysis::diff::edits::ExecuteTextEdits; +import analysis::diff::edits::HiFiLayoutDiff; +import analysis::diff::edits::TextEdits; +import lang::box::\syntax::Box; +import lang::box::util::Box2Text; +import util::FileSystem; +import util::Formatters; +import util::Monitor; +import util::Reflective; + + +@synopsis{Format any Rascal module and dump the result as a string} +void debugFormatRascalFile(loc \module, bool console=false, bool HTML=!console, FormattingOptions opts = formattingOptions(), bool dumpEdits=false) { + debugFileFormat(#start[Module], toBox, \module, console=console, HTML=HTML, opts=opts, dumpEdits=dumpEdits); +} + +void testOnLibrary() { + debugFilesFormat( + #start[Module], + toBox, + |project://rascal/src/org/rascalmpl/library/|, + "rsc", + ansi=false, + shadowFiles=true, + appendFile=false, + console=false); +} + +void testOnCompiler() { + debugFilesFormat( + #start[Module], + toBox, + |project://rascal/src/org/rascalmpl/compiler/|, + "rsc", + ansi=true, + shadowFiles=false, + appendFile=true, + console=false); +} + +/* Modules */ + +Box toBox(Toplevel* toplevels) = toClusterBox(toplevels); + +Box toBox((Module) ` module `) + = V(V(toBox(tags), + H(L("module"), toBox(name))), + toClusterBox(imports), + toBox(body), vs=1); + +Box toBox(Import* imports) = toClusterBox(imports); + +Box toBox((Import) `import ;`) + = H(L("import"), H0(toBox(m), L(";"))); + +Box toBox((Import) `extend ;`) + = H(L("extend"), H0(toBox(m), L(";"))); + +Box toBox((Visibility) ``) = NULL(); + +/* Syntax definitions */ + +Box toBox((SyntaxDefinition) ` syntax = ;`) + = (production is \all || production is \first) + ? V(H(toBox(st), L("syntax"), toBox(defined)), + I(G(L("="), toBox(production), gs=2, op=H([])), + L(";"))) + : // single rule case + H(toBox(st), L("syntax"), toBox(defined), L("="), H0(toBox(production), L(";"))) + ; + +Box toBox((SyntaxDefinition) `lexical = ;`) + = (production is \all || production is \first) + ? V(H(L("lexical"), toBox(defined)), + I(G(L("="), toBox(production), gs=2, op=H([])), + L(";"))) + : // single rule case + H(L("lexical"), toBox(defined), L("="), H0(toBox(production), L(";"))) + ; + +Box toBox((SyntaxDefinition) `keyword = ;`) + = (production is \all || production is \first) + ? V(H(L("keyword"), toBox(defined)), + I(G(L("="), toBox(production), gs=2, op=H([])), + L(";"))) + : // single rule case + H(L("keyword"), toBox(defined), L("="), H0(toBox(production), L(";"))) + ; + +Box toBox((SyntaxDefinition) ` layout = ;`) + = (production is \all || production is \first) + ? V(H(toBox(v), L("layout"), toBox(defined)), + I(G(L("="), toBox(production), gs=2, op=H([])), + L(";"))) + : // single rule case + H(toBox(v), L("layout"), toBox(defined), L("="), H0(toBox(production), L(";"))) + ; + + +Box toBox((Prod) ` | `) + = U(toBox(lhs), L("|"), toBox(rhs)); + +Box toBox((Prod) ` \> `) + = U(toBox(lhs), L("\>"), toBox(rhs)); + +Box toBox((Prod) `:`) + = H0(L(":"), toBox(n)); + +Box toBox((Prod) ` : `) + = H([toBox(modifiers), H0(toBox(name), L(":")), *[toBox(s) | s <- syms]]); + +Box toBox((Prod) ` `) + = H([toBox(modifiers), *[toBox(s) | s <- syms]]); + +Box toBox((Prod) ` ()`) + = H(toBox(a), HOV(G(L("("), U(toBox(g)), L(")"), gs=2, op=H([])))); + +/* symbols */ +Box toBox((Sym) `{ }*`) = H0(L("{"), H1(toBox(e), toBox(sep)), L("}"), L("*")); +Box toBox((Sym) `{ }+`) = H0(L("{"), H1(toBox(e), toBox(sep)), L("}"), L("+")); +Box toBox((Sym) `*`) = H0(toBox(e), L("*")); +Box toBox((Sym) `+`) = H0(toBox(e), L("+")); +Box toBox((Sym) `?`) = H0(toBox(e), L("?")); +Box toBox((Sym) `()`) = H0(L("("), L(")")); + +Box toBox((Sym) `( )`) + = H0(L("("), H1([toBox(first), *[toBox(e) | Sym e <- sequence]]),L(")")); + +Box toBox((Sym) `start[]`) = H0(L("start"), L("["), toBox(s), L("]")); + +Box toBox((Sym) `( | <{Sym "|"}+ alternatives>)`) + = H0(L("("), H1([toBox(first), *[L("|"), toBox(e) | Sym e <- alternatives]]),L(")")); + +Box toBox((Class) `[]`) + = H0([L("["), *[toBox(r) | r <- ranges], L("]")]); + +Box toBox((Range) ` - `) + = H0(toBox(s), L("-"), toBox(e)); + +/* Declarations */ + +Box toBox((QualifiedName) `<{Name "::"}+ names>`) + = L(""); + +Box toBox((Tag) `@ `) + = H0(L("@"), toBox(n), toBox(contents)); + +Box toBox((Tag) `@ = `) + = H0(L("@"), toBox(n), L("="), toBox(exp)); + +Box toBox((Tag) `@`) + = H0(L("@"), toBox(n)); + +Box toBox(QualifiedName n) = L(""); + +Box toBox((Declaration) ` anno @;`) + = V( + toBox(t), + H(toBox(v), L("anno"), toBox(annoType), H0(toBox(onType), L("@"), toBox(n))) + ); + +Box toBox((Declaration) ` alias = ;`) + = V(toBox(t), + H(toBox(v), L("alias"), toBox(user), L("="), H0(toBox(base), L(";")))); + +Box toBox((Declaration) ` data ;`) + = V(toBox(tg), + H(toBox(v), L("data"), H0(toBox(typ), toBox(ps), L(";")))); + +Box toBox((Declaration) ` data = ;`) + = HV(V(toBox(tg), + H(toBox(v), L("data"), H0(toBox(typ)), toBox(ps))), + I(H(L("="), H0(toBox(va), L(";"))))); + +Box toBox((Declaration) ` data = | <{Variant "|"}+ vs>;`) + = V(toBox(tg), + H(toBox(v), L("data"), H0(toBox(typ)), toBox(ps)), + I([G([ + L("="), + toBox(va), + *[L("|"), toBox(vax) | Variant vax <- vs] // hoist the bars `|` up to the same level of `=` + ]), L(";")])); + +Box toBox((Declaration) ` = ;`) + = HV( + V( + toBox(tags), + H1(toBox(visibility), toBox(typ), toBox(name)) + ), + I(H0(HOV(G(L("="), U([toBox(initial)]))), L(";")))); + +Box toBox((Declaration) ` , <{Variable ","}+ variables>;`) + = HV(V(toBox(tags), H1(toBox(visibility), toBox(typ))), I(HOV(H0(toBox(first), L(",")), SL([toBox(v) | v <- variables], L(",")))), L(";")); + +Box toBox((Declarator) ` `) + = H1(toBox(typ), toBox(name)); + +Box toBox((Declarator) ` = `) + = HV(H(toBox(typ), toBox(name)), I(toExpBox(L("="), initial))); + +Box toBox((Declarator) ` , <{Variable ","}+ variables>`) + = HV(I(HOV(H(toBox(typ), toBox(first)), L(","), SL([toBox(v) | v <- variables], L(","))))); + +Box toBox((CommonKeywordParameters) `(<{KeywordFormal ","}+ fs>)`) + = H0(L("("), HOV(toBox(fs)), L(")")); + +Box toBox((Variant) `(<{TypeArg ","}* args>, <{KeywordFormal ","}+ kws>)`) + = HV( + H0(toBox(n), L("(")), + HOV( + I(H0(toBox(args), L(","))), + I(toBox(kws)), hs=1), + L(")"), hs=0); + +Box toBox((Variant) `(<{TypeArg ","}* args>)`) + = HV(H0(toBox(n), L("(")), + I(toBox(args)), + L(")"), hs=0); + +Box toBox((Variant) `(<{TypeArg ","}* args> + '<{KeywordFormal ","}+ kws>)`) + = HV( + H0(toBox(n), L("(")), + HOV( + I(H0(toBox(args))), + I(toBox(kws)), hs=1 + ), + L(")"), hs=0); + +Box toBox(FunctionModifier* modifiers) = H([toBox(b) | b <- modifiers]); + +Box toBox((Signature) ` throws <{Type ","}+ exs>`) + = H0( + HOV([ + H(toBox(modifiers), toBox(typ), H0(toBox(name), L("("))), + G(toBox(parameters), gs=1, op=I())], hs=0), + H([L(")"), L("throws"), SL([toBox(e) | e <- exs], L(","))], hs=1)); + +Box toBox((Signature) ` `) + = H0(HOV( + H(toBox(modifiers), toBox(typ), H0(toBox(name), L("("))), + G(toBox(parameters), gs=1, op=I()) + hs=0), L(")")); + +Box toBox((FunctionDeclaration) ` ;`) + = V( + toBox(tags), + HOV( + toBox(vis), + H0(toBox(sig), L(";")) + ) + ); + +Box toBox((FunctionDeclaration) ` = ;`) + = V( + toBox(tags), + H0(HOV( + toBox(vis), + toBox(sig), + I(toExpBox(L("="), exp))), + L(";")) + ) + when !(exp is \visit || exp is voidClosure || exp is closure); + +Box toBox((Expression) ` { }`) + = HOV( + toBox(typ), H0(L("("), HOV(G(toBox(parameters), gs=1, op=I())), H(L(")")), L("{")), + I(V(toClusterBox(statements))), + L("}") + ); + +Box toBox((Expression) ` { }`) + = HOV( + H0(L("("), HOV(G(toBox(parameters), gs=1, op=I())), H(L(")"), L("{"))), + I(V(toClusterBox(statements))), + L("}") + ); + +Box toBox((FunctionDeclaration) ` = { };`) + = V(toBox(tags), + HOV( + toBox(vis), + toBox(sig), + I(HOV( + H(L("="), H0(toBox(typ), L("("))), + G(toBox(parameters), gs=1, op=I()), + H(L(")"), L("{")) + ))), + I(V(toClusterBox(statements))), + H0(L("}"), L(";"))); + +Box toBox((FunctionDeclaration) ` = { };`) + = V(toBox(tags), + HOV( + toBox(vis), + toBox(sig), + I(HOV( + H(L("="), L("(")), + G(toBox(parameters), gs=1, op=I()), + H(L(")"), L("{")) + ))), + I(V(toClusterBox(statements))), + H0(L("}"), L(";"))); + + +Box toBox((FunctionDeclaration) ` = <}>", htmlEscapes); - str rec(t:appl(prod(cilit(str l), _, _), _)) + str rec(Tree t:appl(prod(lit(str l), _, _), _)) = span("Keyword", l) when isKeyword(l); - str rec(t:appl(prod(_, _, {*_, \tag("category"(str cat))}), list[Tree] as)) - = span(cat, "<}>"); + str rec(Tree t:appl(prod(cilit(str l), _, _), list[Tree] as)) + = span("Keyword", yield(as)) when isKeyword(l); - default str rec(appl(_, list[Tree] as)) - = "<}>"; + str rec(Tree t:appl(prod(_, _, {*_, \tag("category"(str cat))}), list[Tree] as)) + = span(cat, yield(as)); - str rec(amb({k, *_})) = rec(k); + str rec(amb({Tree k, *_})) = rec(k); + + default str rec(appl(Production p, list[Tree] as)) + = "<}>"; default str rec(Tree t:char(_)) = escape("", htmlEscapes); str span(str class, str src) = "\\"\>\"; - return "\
\\\";
+  if (withStyle) {
+    return "\
+           '
+           '\
+           '\
\
+           '
+           '\\";
+  }
+  else {
+    return "\
\
+           '
+           '\\";
+  }
 }
 
 @synopsis{Yields the characters of a parse tree as the original input sentence but using macros to wrap to-be-highlighted areas.}
@@ -80,44 +100,6 @@ public str toLaTeX(Tree t) {
   return rec(t);
 } 
 
-@synopsis{Yields the characters of a parse tree as the original input sentence in a ... block, but with spans for highlighted segments in HTML}
-public str toHTML(Tree t) {
-  htmlEscapes = (
-	  "\<": "<",
-	  "\>": ">",
-	  "&" : "&"
-  );
-
-  str rec(t:appl(prod(lit(str l), _, _), _)) 
-    = wrapLink(span("Keyword", l), t)
-    when isKeyword(l);
-
-  str rec(t:appl(prod(cilit(str l), _, _), _)) 
-    = wrapLink(span("Keyword", l), t)
-    when isKeyword(l);
-
-  str rec(t:appl(prod(_, _, {*_, \tag("category"(str cat))}), list[Tree] as))
-    = wrapLink(span(cat, ( "" | it + rec(a) | a <- as )), t);
-
-  str rec(appl(prod(_, _, set[Attr] attrs), list[Tree] as))
-    = ( "" | it + rec(a) | a <- as )
-    when {*_, \tag("category"(str _))} !:= attrs;
-
-  str rec(appl(regular(_), list[Tree] as))
-    = ( "" | it + rec(a) | a <- as );
-
-  str rec(amb({k, *_})) = rec(k);
-
-  default str rec(Tree t) 
-    = wrapLink(escape(unparse(t), htmlEscapes), t);
-
-  str span(str class, str src) = "\\"\>\";
-
-  default str wrapLink(str text, Tree _) = text;
-
-  return "\
\\\";
-}
-
 @synopsis{Unparse a parse tree to unicode characters, wrapping certain substrings with ANSI codes for highlighting.}
 public str toANSI(Tree t, bool underlineAmbiguity=false, int tabSize=4) {
   str rec(Tree x:appl(prod(lit(str l), _, _), _))   = isKeyword(l) ? bold("") :  "";
@@ -143,8 +125,8 @@ public str toANSI(Tree t, bool underlineAmbiguity=false, int tabSize=4) {
   str underline(str s)  = "";
   str comment(str s)    = "";
 
-  str \map("Comment", text)         = comment(text);
-  str \map("Keyword", text)         = bold(text);
+  str \map(/[Cc]omment/, text)         = comment(text);
+  str \map(/[Kk]eyword/, text)         = bold(text);
   default str \map(str _, str text) = text;
 
   return rec(t);
diff --git a/src/org/rascalmpl/library/util/Monitor.rsc b/src/org/rascalmpl/library/util/Monitor.rsc
index 94948a9bec7..2357dddadee 100644
--- a/src/org/rascalmpl/library/util/Monitor.rsc
+++ b/src/org/rascalmpl/library/util/Monitor.rsc
@@ -11,9 +11,11 @@
 @bootstrapParser
 module util::Monitor
 
-import util::Math;
-import IO;
 import Exception;
+import IO;
+import List;
+import Set;
+import util::Math;
 
 @synopsis{Log the start of a job.}
 @description{
@@ -88,6 +90,63 @@ with a parameterized workload and the same label as the job name.
   }
 }
 
+@synopsis{Like ((job)) but does not return a value.}
+void voidJob(str label, void (void (str message, int worked) step) block, int totalWork=100) {
+  try {
+    jobStart(label, totalWork=totalWork);
+    block(void (str message, int worked) { 
+      jobStep(label, message, work=worked);
+    });
+  }
+  catch "Never caught": {
+    // This is only here because we cannot have a "finally" clause in Rascal without a catch
+    throw "Never caught";
+  }
+  finally {
+    jobEnd(label);
+  }
+}
+
+@synopsis{Convenience function for reporting progress on a for loop over a list, with a list of results}
+list[&U] loopJob(list[&T] work, &U(&T) consumer, str label = "", str (&T) stepLabel = str(&T w) { return ""; }) {
+  return job(label, list[&U] (void (str message, int worked) step) {
+    return for (&T w <- work) {
+      step(stepLabel(w), 1);
+      append consumer(w);
+    }
+  }, totalWork=size(work));
+}
+
+@synopsis{Convenience function for reporting progress on a for loop over a list, no results computed}
+void loopVoidJob(list[&T] work, void(&T) consumer, str label = "", str (&T) stepLabel = str(&T w) { return ""; }) {
+  voidJob(label, void (void (str message, int worked) step) {
+    for (&T w <- work) {
+      step(stepLabel(w), 1);
+      consumer(w);
+    }
+  }, totalWork=size(work));
+}
+
+@synopsis{Convenience function for reporting progress on a for loop over a set, with a list of results.}
+list[&U] loopJob(set[&T] work, &U(&T) consumer, str label = "", str (&T) stepLabel = str(&T w) { return ""; }) {
+  return job(label, list[&U] (void (str message, int worked) step) {
+    return for (&T w <- work) {
+      step(stepLabel(w), 1);
+      append consumer(w);
+    }
+  }, totalWork=size(work));
+}
+
+@synopsis{Convenience function for reporting progress on a for loop over a list, no results computed}
+void loopVoidJob(set[&T] work, void(&T) consumer, str label = "", str (&T) stepLabel = str(&T w) { return ""; }) {
+  voidJob(label, void (void (str message, int worked) step) {
+    for (&T w <- work) {
+      step(stepLabel(w), 1);
+      consumer(w);
+    }
+  }, totalWork=size(work));
+}
+
 @synopsis{A job block guarantees a start and end, and provides easy access to the stepper interface.}
 @description{
 The convenience function that is passed to the block can be used inside the block to register steps
@@ -120,6 +179,24 @@ with a parameterized workload and the same label as the job name.
   }
 }
 
+@synopsis{Like ((job)) but does not return a value.}
+void voidJob(str label, void (void (int worked) step) block, int totalWork=1) {
+  try {
+    jobStart(label, totalWork=totalWork);
+    block(void (int worked) { 
+      jobStep(label, label, work=worked);
+    });
+  }
+  catch "Never caught": {
+    // This is only here because we cannot have a "finally" clause in Rascal without a catch
+    throw "Never caught";
+  }
+  finally {
+    jobEnd(label);
+  }
+}
+
+
 @synopsis{A job block guarantees a start and end, and provides easy access to the stepper interface.}
 @description{
 The convenience function that is passed to the block can be used inside the block to register steps
@@ -148,6 +225,24 @@ with workload `1` and the same label as the job name.
   }
 }
 
+@synopsis{Like ((job)) but does not return a value.}
+void voidJob(str label, void (void () step) block, int totalWork=1) {
+  try {
+    jobStart(label, totalWork=totalWork);
+    block(void () {
+      jobStep(label, label, work=1);
+    });
+  }
+  catch "Never caught": {
+    // This is only here because we cannot have a "finally" clause in Rascal without a catch
+    throw "Never caught";
+  }
+  finally {
+    jobEnd(label);
+  }
+}
+
+
 @synopsis{A job block guarantees a start and end, and provides easy access to the stepper interface.}
 @benefits{
 * the block code does not need to remember to end the job with the same job name.
@@ -157,6 +252,21 @@ with workload `1` and the same label as the job name.
   try {
     jobStart(label, totalWork=totalWork);
     return block();
+  }  
+  catch "Never caught": {
+    // This is only here because we cannot have a "finally" clause in Rascal without a catch
+    throw "Never caught";
+  }
+  finally {
+    jobEnd(label);
+  }
+}
+
+@synopsis{Like ((job)) but does not return a value.}
+void voidJob(str label, void () block, int totalWork=1) {
+  try {
+    jobStart(label, totalWork=totalWork);
+    block();
   }
   catch "Never caught": {
     // This is only here because we cannot have a "finally" clause in Rascal without a catch