usethesource · jurgenvinju · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/pom.xml b/pom.xml
@@ -3,7 +3,7 @@
 
     <groupId>org.rascalmpl</groupId>
     <artifactId>rascal</artifactId>
-    <version>0.42.2-RC2-SNAPSHOT</version>
+    <version>0.43.0-SNAPSHOT</version>
     <packaging>jar</packaging>
 
     <scm>

diff --git a/src/org/rascalmpl/library/IO.rsc b/src/org/rascalmpl/library/IO.rsc
@@ -140,8 +140,9 @@ Append a textual representation of some values to an existing or a newly created
 *  All other values are printed as-is.
 *  Each value is terminated by a newline character.
 
-The existing file can be stored using any character set possible, if you know the character set, please use ((appendToFileEnc)).
-Else the same method of deciding the character set is used as in ((readFile)).
+The existing file can be stored using any character set possible. 
+If you know the character set, please use the `charset` keyword parameter.
+Otherwise, the same method of deciding the character set is used as in ((readFile)).
 }
 @pitfalls{
 *  The same encoding pitfalls as the ((readFile)) function.
@@ -239,6 +240,8 @@ public java bool isDirectory(loc file);
 See ((IO-iprintExp)) for a version that returns its argument as result
 and ((IO-iprintln)) for a version that adds a newline
 and ((IO-iprintToFile)) for a version that prints to a file.
+
+With a negative `lineLimit` the limit is ignored and the entire value will be printed.
 }
 @examples{
 ```rascal-shell
@@ -254,6 +257,7 @@ public java void iprint(value arg, int lineLimit = 1000);
 See ((IO-iprint)) for a version that displays the result on the console
 and ((IO-iprintExp)) for a version that returns its argument as result
 and ((IO-iprintln)) for a version that adds a newline.
+
 }
 @examples{
 ```rascal-shell
@@ -308,6 +312,8 @@ and ((IO-iprint)) for a version that does not add a newline.
 
 By default we only print the first 1000 lines, if you want to print larger values, either 
 use ((ValueIO-writeTextValueFile)) or change the limit with the lineLimit parameter.
+
+With a negative `lineLimit` the limit is ignored and the entire value will be printed.
 }
 @examples{
 ```rascal-shell

diff --git a/src/org/rascalmpl/library/List.rsc b/src/org/rascalmpl/library/List.rsc
@@ -20,7 +20,6 @@ module List
 import Exception;
 import Map;
 
-
 @synopsis{Concatenate a list of lists.}
 @examples{
 ```rascal-shell
@@ -261,7 +260,6 @@ intercalate(", ", ["zebra", "elephant", "snake", "owl"]);
 str intercalate(str sep, list[value] l) = 
   "<for(int i <- index(l)){><i == 0 ? "" : sep><l[i]><}>";
 
-
 @synopsis{Intersperses a list of values with a separator.}
 @examples{
 ```rascal-shell
@@ -272,9 +270,8 @@ intersperse(1, []);
 intersperse([], [1]);
 ```
 }
-list[&T] intersperse(&T sep, list[&T] xs) = 
-  (isEmpty(xs))? [] : ([head(xs)] | it + [sep,x] | x <- tail(xs));
-
+list[&T] intersperse(&T sep, list[&T] xs) =
+  [x, sep | &T x <- xs][..-1];
 
 @synopsis{Test whether a list is empty.}
 @description{
@@ -655,6 +652,29 @@ tuple[list[&T],list[&T]] split(list[&T] l) {
 	return <take(half,l), drop(half,l)>;
 }
 
+@synopsis{Groups sublists for consecutive elements which are `similar`}
+@description{
+This function does not change the order of the elements. Only elements
+which are similar end-up in a sub-list with more than one element. The
+elements which are not similar to their siblings, end up in singleton
+lists.
+}
+@examples{
+```rascal-shell
+import List;
+bool bothEvenOrBothOdd(int a, int b) = (a % 2 == 0 && b % 2 == 0) || (a % 2 == 1 && b % 2 == 1);
+group([1,7,3,6,2,9], bothEvenOrBothOdd);
+```
+}
+public list[list[&T]] group(list[&T] input, bool (&T a, &T b) similar) {
+  lres = while ([hd, *tl] := input) {
+      sim = [hd, *takeWhile(tl, bool (&T a) { return similar(a, hd); })];
+	    append sim;
+	    input = drop(size(sim), input);
+  }
+
+  return lres; 
+}
 
 @synopsis{Sum the elements of a list.}
 @examples{

diff --git a/src/org/rascalmpl/library/Type.rsc b/src/org/rascalmpl/library/Type.rsc
@@ -30,7 +30,7 @@ The ((subtype)) relation of Rascal has all the mathematical properties of a _fin
 This is a core design principle of Rascal with the following benefits:
 * Type inference has a guaranteed least or greatest solution, always. This means that constraints are always solvable in an unambiguous manner.
 * A _principal type_ can always be computed, which is a most precise and unique solution of a type inference problem. Without the lattice, solution candidates could become incomparable and thus ambiguous. Without
-this principal type property, type inference is predictable for programmers.
+this principal type property, type inference is unpredictable for programmers.
 * Solving type inference constraints can be implemented efficiently. The algorithm, based on ((lub)) and ((glb)), makes progress _deterministically_ and does not require backtracking
 to find better solutions. Since the lattice is not very deep, fixed-point solutions are always found quickly.
 

diff --git a/src/org/rascalmpl/library/analysis/diff/edits/ExecuteTextEdits.rsc b/src/org/rascalmpl/library/analysis/diff/edits/ExecuteTextEdits.rsc
@@ -46,13 +46,13 @@ void executeFileSystemChange(changed(loc file, list[TextEdit] edits)) {
 }
 
 str executeTextEdits(str content, list[TextEdit] edits) {
-    assert isSorted(edits, less=bool (TextEdit e1, TextEdit e2) { 
-        return e1.range.offset < e2.range.offset; 
-    });
+    // assert isSorted(edits, less=bool (TextEdit e1, TextEdit e2) { 
+    //     return e1.range.offset < e2.range.offset; 
+    // });
 
-    for (replace(loc range, str repl) <- reverse(edits)) {
-        content = "<content[..range.offset]><repl><content[range.offset+range.length..]>";
-    }
+    int cursor = 0;
 
-    return content;
+    // linear-time streamed reconstruction of the entire text
+    return "<for (replace(loc range, str repl) <- edits) {><content[cursor..range.offset]><repl>< 
+             cursor = range.offset + range.length;}><content[cursor..]>";
 }
diff --git a/src/org/rascalmpl/library/analysis/diff/edits/HiFiLayoutDiff.rsc b/src/org/rascalmpl/library/analysis/diff/edits/HiFiLayoutDiff.rsc
@@ -27,6 +27,8 @@ module analysis::diff::edits::HiFiLayoutDiff
 extend analysis::diff::edits::HiFiTreeDiff;
 import ParseTree; // this should not be necessary because imported by HiFiTreeDiff
 import String; // this should not be be necessary because imported by HiFiTreeDiff
+import lang::rascal::grammar::definition::Characters;
+import IO;
 
 @synopsis{Normalization choices for case-insensitive literals.}
 data CaseInsensitivity
@@ -56,7 +58,7 @@ list[TextEdit] layoutDiff(Tree original, Tree formatted, bool recoverComments =
     list[TextEdit] rec(
         t:appl(prod(Symbol tS, _, _), list[Tree] tArgs), // layout is not necessarily parsed with the same rules (i.e. comments are lost!)
         u:appl(prod(Symbol uS, _, _), list[Tree] uArgs))
-        = [replace(t@\loc, recoverComments ? learnComments(t, u) : "<u>") | tArgs != uArgs, "<t>" != "<u>" /* avoid useless edits */] 
+        = [replace(t@\loc, repl) | tArgs != uArgs, str repl := (recoverComments ? learnComments(t, u) : "<u>"), repl != "<t>" /* do not edit anything if nothing has changed */] 
         when 
             delabel(tS) is layouts, 
             delabel(uS) is layouts,
@@ -106,22 +108,31 @@ list[TextEdit] layoutDiff(Tree original, Tree formatted, bool recoverComments =
     default list[TextEdit] rec(
         Tree t:appl(Production p, list[Tree] argsA),
         appl(p /* must be the same by the above assert */, list[Tree] argsB)) 
-        = [*rec(a, b) | <a, b> <- zip2(argsA, argsB)]; 
+        = [*rec(argsA[i], argsB[i]) | i <- [0..size(argsA)]]; 
 
     // first add required locations to layout nodes
-    original = reposition(original, markLit=true, markLayout=true, markSubLayout=true);
+    // TODO: check if indeed repositioning is never needed
+    // original = reposition(original, markLit=true, markLayout=true, markSubLayout=true);
 
     return rec(original, formatted);
 }
 
+private Symbol newlineClass = \char-class([range(10,10)]);
+
 @synopsis{Make sure the new layout still contains all the source code comments of the original layout}
 @description{
-This algorithm uses the @category("Comments") tag to detect source code comments inside layout substrings. If the original
+This algorithm uses the `@category(/[cC]omments/)` tag to detect source code comments inside layout substrings. If the original
 layout contains comments, we re-introduce the comments at the expected level of indentation. New comments present in the 
 replacement are kept and will overwrite any original comments. 
 
-This trick is complicated by the syntax of multiline comments and single line comments that have
-to end with a newline.
+There are corner cases with respect to the original comments:
+* the single line comment that does not end with a newline itself, yet it must always end with a newline after it.
+* multiple single line comments after each other
+
+Then there are corner cases with respect to the replacement whitespace:
+* the last line of the replacement whitespace is special. This is the indentation to use for all comments.
+* but there could be no newlines in the replacement whitespace; and still there is a single line comment to be included.
+Now we need to infer an indentation level for what follows the comment from "thin air". 
 }
 @benefits{
 * if comments are kepts and formatted by tools like Tree2Box, then this algorithm does not overwrite these.
@@ -132,7 +143,14 @@ to end with a newline.
 * if comments are not marked with `@category("Comment")` in the original grammar, then this algorithm recovers nothing.
 }
 private str learnComments(Tree original, Tree replacement) {
-    originalComments = ["<c>" | /c:appl(prod(_,_,{\tag("category"(/^[Cc]omment$/)), *_}), _) := original];
+    bool mustEndWithNewline(lit("\n"))                     = true;
+    bool mustEndWithNewline(conditional(Symbol s, _))      = mustEndWithNewline(s);
+    // if a comment can not contain newline characters, but everything else, then it must be followed by one:
+    bool mustEndWithNewline(\iter(Symbol cc:\char-class(_)))      = intersection(cc, newlineClass) != newlineClass;
+    bool mustEndWithNewline(\iter-star(Symbol cc:\char-class(_))) = intersection(cc, newlineClass) != newlineClass;
+    default  bool mustEndWithNewline(_)                    = false;
+
+    originalComments = [<s, s[-1] == "\n" || mustEndWithNewline(lastSym)> | /c:appl(prod(_,[*_,Symbol lastSym],{\tag("category"(/^[Cc]omment$/)), *_}), _) := original, str s := "<c>"];
 
     if (originalComments == []) {
         // if the original did not contain comments, stick with the replacements
@@ -146,23 +164,42 @@ private str learnComments(Tree original, Tree replacement) {
         return "<replacement>";
     }
 
-    // At this point, we know that: (a) comments are not present in the replacement and (b) they used to be there in the original.
-    // So the old comments are going to be the new output. however, we want to learn indentation from the replacement.
+    // At this point, we know that: 
+    //   (a) comments are not present in the replacement and 
+    //   (b) they used to be there in the original.
+    // So the old comments are going to be copied to the new output.
+    // But, we want to indent them using the style of the replacement.
+
+    // The last line of the replacement string typically has the indentation for the construct that follows:
+    //   |    // a comment 
+    //   |    if (true) {
+    //    ^^^^
+    //      newIndent
+    //
+    // However, if the replacement string is on a single line, then we don't have the indentation
+    // for the string on the next line readily available. In this case we indent the next line
+    // to the start column of the replacement layout, as a proxy.
+
+    str replString = "<replacement>";
+    str newIndent  = split("\n", replString)[-1] ? "";
 
-    // Drop the last newline of single-line comments, because we don't want two newlines in the output for every comment:
-    str dropEndNl(str line:/^.*\n$/) = (line[..-1]);
-    default str dropEndNl(str line)  = line;
+    if (/\n/ !:= replString) {
+        // no newline in the repl string, so no indentation available for what follows the comment...
+        newIndent = "<for (_ <- [0..replacement@\loc.begin.column]) {> <}>";
+    }
 
-    // the first line of the replacement ,is the indentation to use.
-    str replString = "<replacement>";
-    str replacementIndent = /^\n+$/ !:= replString
-        ? split("\n", replString)[0]
-        : "";
-
-    // trimming each line makes sure we forget about the original indentation, and drop accidental spaces after comment lines
-    return replString + indent(replacementIndent,
-            "<for (c <- originalComments, str line <- split("\n", dropEndNl(c))) {><indent(replacementIndent, trim(line), indentFirstLine=true)>
-           '<}>"[..-1], indentFirstLine=false) + replString;
+    // we always place sequential comments vertically, because we don't know if we are dealing
+    // we a single line comment that has to end with newline by follow restriction or by a literal "\n".
+    // TODO: a deeper analysis of the comment rule that's in use could also be used to discover this.
+    str trimmedOriginals = "<for (<c, newLine> <- originalComments) {><trim(c)><if (newLine) {>
+                           '<}><}>";
+
+    // we wrap the comment with the formatted whitespace to assure the proper indentation
+    // of its first line, and the proper indentation of what comes after this layout node
+    return replString 
+        + indent(newIndent, trimmedOriginals, indentFirstLine=false) 
+        + newIndent
+        ;
 }
 
 private Symbol delabel(label(_, Symbol t)) = t;