diff --git a/benchmark-compare.sh b/benchmark-compare.sh
new file mode 100755
index 000000000..59b693dc3
--- /dev/null
+++ b/benchmark-compare.sh
@@ -0,0 +1,168 @@
+#!/bin/bash
+set -e
+
+SKIP_COMPILE=0
+if [ "$1" = "--skip-compile" ]; then
+    SKIP_COMPILE=1
+fi
+
+BACKENDS=("llvm")
+WARMUP=10
+RUNS=50
+TARGET_BRANCH="main"
+OUTPUT_DIR="benchmark-results"
+TIMESTAMP=$(date +%Y%m%d_%H%M%S)
+CURRENT_BRANCH=$(git branch --show-current)
+CURRENT_BRANCH_SAFE="${CURRENT_BRANCH//\//-}"
+
+cleanup() {
+    local current=$(git branch --show-current)
+    if [ "$current" != "$CURRENT_BRANCH" ]; then
+        echo ""
+        echo "Interrupted! Switching back to $CURRENT_BRANCH..."
+        git checkout -q "$CURRENT_BRANCH"
+    fi
+    exit 1
+}
+
+trap cleanup SIGINT SIGTERM
+
+if ! command -v hyperfine &> /dev/null; then
+    echo "Error: hyperfine is not installed"
+    exit 1
+fi
+
+if [ "$CURRENT_BRANCH" = "$TARGET_BRANCH" ]; then
+    echo "Error: You are currently on the $TARGET_BRANCH branch"
+    echo "Please switch to your feature branch first"
+    exit 1
+fi
+
+mkdir -p "$OUTPUT_DIR"
+
+declare -A BENCHMARKS=(
+   ["arity_raising/record_passing"]="25000000"
+   ["arity_raising/matrix_determinant"]="2000000"
+   ["large_records/10"]="2000000"
+   ["large_records/20"]="2000000"
+   ["nested_records/10"]="2000000"
+   ["nested_records/20"]="2000000"
+)
+
+echo "Comparing: $CURRENT_BRANCH vs $TARGET_BRANCH"
+echo "Backends: ${BACKENDS[*]}"
+echo "Runs: $RUNS, Warmup: $WARMUP"
+echo "Skip compilation: $([ $SKIP_COMPILE -eq 1 ] && echo 'yes' || echo 'no')"
+echo ""
+
+for backend in "${BACKENDS[@]}"; do
+    OUT_CURRENT="out-${CURRENT_BRANCH_SAFE}-${backend}"
+    OUT_MAIN="out-main-${backend}"
+    mkdir -p "$OUT_CURRENT" "$OUT_MAIN"
+done
+
+if [ $SKIP_COMPILE -eq 0 ]; then
+
+    if ! git diff-index --quiet HEAD --; then
+        echo "Error: You have uncommitted changes"
+        echo "Please commit or stash your changes before running this script"
+        exit 1
+    fi
+
+    echo "=== Building compiler on $CURRENT_BRANCH ==="
+    sbt install
+    echo ""
+
+    echo "=== Compiling benchmarks on $CURRENT_BRANCH ==="
+    for backend in "${BACKENDS[@]}"; do
+        OUT_CURRENT="out-${CURRENT_BRANCH_SAFE}-${backend}"
+        echo "Backend: $backend"
+        
+        for bench_path in "${!BENCHMARKS[@]}"; do
+            bench_name=$(basename "$bench_path")
+            source_file="examples/benchmarks/${bench_path}.effekt"
+            echo "  $bench_name"
+            effekt --backend="$backend" --build -o "$OUT_CURRENT" "$source_file"
+        done
+    done
+    echo ""
+
+    echo "=== Building compiler on $TARGET_BRANCH ==="
+    git checkout -q "$TARGET_BRANCH"
+    sbt install
+    echo ""
+
+    echo "=== Compiling benchmarks on $TARGET_BRANCH ==="
+    for backend in "${BACKENDS[@]}"; do
+        OUT_MAIN="out-main-${backend}"
+        echo "Backend: $backend"
+        
+        for bench_path in "${!BENCHMARKS[@]}"; do
+            bench_name=$(basename "$bench_path")
+            source_file="examples/benchmarks/${bench_path}.effekt"
+            echo "  $bench_name"
+            effekt --backend="$backend" --build -o "$OUT_MAIN" "$source_file"
+        done
+    done
+    echo ""
+
+    echo "=== Switching back to $CURRENT_BRANCH ==="
+    git checkout -q "$CURRENT_BRANCH"
+    echo ""
+else
+    echo "=== Skipping compilation (using existing binaries) ==="
+    echo ""
+fi
+
+echo "=== Starting benchmarks ==="
+echo ""
+
+for backend in "${BACKENDS[@]}"; do
+    echo "=== Benchmarking backend: $backend ==="
+    
+    OUT_CURRENT="out-${CURRENT_BRANCH_SAFE}-${backend}"
+    OUT_MAIN="out-main-${backend}"
+    
+    comparison_file="${OUTPUT_DIR}/comparison_${backend}_${CURRENT_BRANCH_SAFE}_vs_main_${TIMESTAMP}.md"
+    echo "# $CURRENT_BRANCH vs main ($backend)" > "$comparison_file"
+    echo "Date: $(date)" >> "$comparison_file"
+    echo "Runs: $RUNS, Warmup: $WARMUP" >> "$comparison_file"
+    echo "" >> "$comparison_file"
+    
+    for bench_path in "${!BENCHMARKS[@]}"; do
+        bench_name=$(basename "$bench_path")
+        params=${BENCHMARKS[$bench_path]}
+        
+        echo "  $bench_name"
+        
+        case $backend in
+            llvm)
+                current_exec="./$OUT_CURRENT/${bench_name}"
+                target_exec="./$OUT_MAIN/${bench_name}"
+                ;;
+            js)
+                current_exec="node $OUT_CURRENT/${bench_name}.js"
+                target_exec="node $OUT_MAIN/${bench_name}.js"
+                ;;
+            chez-callcc)
+                current_exec="scheme --script $OUT_CURRENT/${bench_name}.ss"
+                target_exec="scheme --script $OUT_MAIN/${bench_name}.ss"
+                ;;
+        esac
+        
+        echo "## $bench_name" >> "$comparison_file"
+        hyperfine \
+            --warmup "$WARMUP" \
+            --runs "$RUNS" \
+            --export-markdown - \
+            --command-name "main" "$target_exec $params" \
+            --command-name "$CURRENT_BRANCH" "$current_exec $params" \
+            2>&1 | tee -a "$comparison_file"
+        echo "" >> "$comparison_file"
+    done
+    
+    echo "Results: $comparison_file"
+    echo ""
+done
+
+echo "Done! Results in: $OUTPUT_DIR/"
diff --git a/effekt/shared/src/main/scala/effekt/core/ArityRaising.scala b/effekt/shared/src/main/scala/effekt/core/ArityRaising.scala
new file mode 100644
index 000000000..38664eeca
--- /dev/null
+++ b/effekt/shared/src/main/scala/effekt/core/ArityRaising.scala
@@ -0,0 +1,229 @@
+package effekt
+package core
+import effekt.context.Context
+import effekt.core.optimizer.Deadcode
+import effekt.typer.Typer.checkMain
+import effekt.symbols.Symbol.fresh
+import effekt.lexer.TokenKind
+import effekt.core.Type.instantiate
+import effekt.generator.llvm.Transformer.BlockContext
+import effekt.machine.Transformer.BlocksParamsContext
+
+object ArityRaising extends Phase[CoreTransformed, CoreTransformed] {
+  override val phaseName: String = "arity raising"
+
+  override def run(input: CoreTransformed)(using C: Context): Option[CoreTransformed] = input match {
+    case CoreTransformed(source, tree, mod, core) =>
+      implicit val pctx: DeclarationContext = new DeclarationContext(core.declarations, core.externs)
+      Context.module = mod
+      val main = C.ensureMainExists(mod)
+      val res = Deadcode.remove(main, core)
+      val transformed = Context.timed(phaseName, source.name) { transform(res) }
+      Some(CoreTransformed(source, tree, mod, transformed))
+  }
+
+  def transform(decl: ModuleDecl)(using Context, DeclarationContext): ModuleDecl = decl match {
+    case ModuleDecl(path, includes, declarations, externs, definitions, exports) =>
+      ModuleDecl(path, includes, declarations, externs, definitions map transform, exports)
+  }
+
+  def transform(toplevel: Toplevel)(using C: Context, DC: DeclarationContext): Toplevel = toplevel match {
+    case Toplevel.Def(id, block) => Toplevel.Def(id, transform(block)(using C, DC, Set.empty))
+    case Toplevel.Val(id, binding) => Toplevel.Val(id, transform(binding)(using C, DC, Set.empty))
+  }
+
+  def transform(block: Block)(using C: Context, DC: DeclarationContext, boundBlockParams: Set[Id]): Block = block match {
+    case Block.BlockVar(id, annotatedTpe, annotatedCapt) => block
+    case Block.BlockLit(tparams, cparams, vparams, bparams, body) =>
+      def flattenParam(param: ValueParam): (List[ValueParam], List[(Id, Expr)]) = param match {
+        case ValueParam(paramId, tpe @ ValueType.Data(name, targs)) =>
+          DC.findData(name) match {
+            case Some(Data(_, List(), List(Constructor(ctor, List(), fields)))) =>
+              val (flatParams, allBindings, fieldVars) = fields.map { case Field(fieldName, fieldType) =>
+                val freshId = Id(fieldName)
+                val (params, bindings) = flattenParam(ValueParam(freshId, fieldType))
+                (params, bindings, ValueVar(freshId, fieldType))
+              }.unzip3
+
+              val binding = (paramId, Make(tpe, ctor, List(), fieldVars))
+              (flatParams.flatten, allBindings.flatten :+ binding)
+
+            case _ => (List(param), List())
+          }
+        case _ => (List(param), List())
+      }
+
+      val flattened = vparams.map(flattenParam)
+      val (allParams, allBindings) = flattened.unzip
+
+      val newBody = allBindings.flatten.foldRight(transform(body)(using C, DC, boundBlockParams ++ bparams.map(_.id))) {
+        case ((id, expr), body) => Let(id, expr, body)
+      }
+
+      Block.BlockLit(tparams, cparams, allParams.flatten, bparams, newBody)
+
+    case Block.Unbox(pure) =>
+      Block.Unbox(transform(pure))
+
+    case Block.New(Implementation(interface, operations)) =>
+      Block.New(Implementation(interface, operations.map {
+        case Operation(name, tparams, cparams, vparams, bparams, body) =>
+          Operation(name, tparams, cparams, vparams, bparams, transform(body)(using C, DC, boundBlockParams ++ bparams.map(_.id)))
+      }))
+  }
+
+  // Helper to check if a type needs flattening
+  def needsFlattening(tpe: ValueType)(using DC: DeclarationContext): Boolean = tpe match {
+    case ValueType.Data(name, _) =>
+      DC.findData(name) match {
+        case Some(Data(_, List(), List(Constructor(_, List(), _)))) => true
+        case _ => false
+      }
+    case _ => false
+  }
+
+  def wrapBlockVarIfNeeded(barg: BlockVar, annotatedTpe: BlockType)(using C: Context, DC: DeclarationContext, boundBlockParams: Set[Id]): Block =
+    annotatedTpe match {
+      case BlockType.Function(tparams, cparams, vparams, bparamTpes, result) if vparams.exists(needsFlattening) =>
+        val values = vparams.map { tpe =>
+          val freshId = Id("x")
+          (ValueParam(freshId, tpe), ValueVar(freshId, tpe))
+        }
+        val blocks = bparamTpes.zip(cparams).map { case (tpe, capt) =>
+          val freshId = Id("f")
+          (BlockParam(freshId, tpe, Set(capt)), BlockVar(freshId, tpe, Set(capt)))
+        }
+        val call = Stmt.App(barg, List(), values.map(_._2), blocks.map(_._2))
+        BlockLit(tparams, cparams, values.map(_._1), blocks.map(_._1), transform(call)(using C, DC, boundBlockParams ++ blocks.map(_._1.id)))
+
+
+      case _ => transform(barg)
+    }
+
+  def transform(stmt: Stmt)(using C: Context, DC: DeclarationContext, boundBlockParams: Set[Id]): Stmt = stmt match {
+    case Stmt.App(callee @ BlockVar(id, BlockType.Function(tparams, cparams, vparamsTypes, bparamTypes, returnTpe), annotatedCapt), targs, vargs, bargs) if !boundBlockParams.contains(id) =>
+      def flattenArg(arg: Expr, argType: ValueType): (List[Expr], List[ValueType], List[(Expr, Id, List[ValueParam])]) = argType match {
+        case ValueType.Data(name, targs) =>
+          DC.findData(name) match {
+            case Some(Data(_, List(), List(Constructor(ctor, List(), fields)))) =>
+              val fieldParams = fields.map { case Field(name, tpe) => ValueParam(Id(name), tpe) }
+              val nestedResults = fieldParams.map { param => flattenArg(ValueVar(param.id, param.tpe), param.tpe) }
+              val (nestedVars, nestedTypes, nestedMatches) = nestedResults.unzip3
+              val thisMatch = (arg, ctor, fieldParams)
+              (nestedVars.flatten, nestedTypes.flatten, thisMatch :: nestedMatches.flatten)
+
+            case _ => (List(arg), List(argType), List())
+          }
+        case _ => (List(arg), List(argType), List())
+      }
+
+      val flattened = (vargs zip vparamsTypes).map { case (arg, tpe) => flattenArg(arg, tpe) }
+      val (allArgs, allTypes, allMatches) = flattened.unzip3
+
+      val transformedBargs = bargs.map { barg =>
+        barg match {
+          // This handles:
+          // val res = myList.map {myFunc}
+          // by making it:
+          // val res = myList.map {t => myFunc(t)}
+          // but only if the arity of myFunc changes
+          case bvar @ BlockVar(id, annotatedTpe, annotatedCapt) if !boundBlockParams.contains(id) =>
+            wrapBlockVarIfNeeded(bvar, annotatedTpe)
+
+          case BlockLit(btparams, bcparams, bvparams, bbparams, body) =>
+            BlockLit(btparams, bcparams, bvparams, bbparams, transform(body)(using C, DC, boundBlockParams ++ bbparams.map(_.id)))
+
+          case _ =>
+            transform(barg)
+        }
+      }
+
+      val newCalleTpe: BlockType.Function = BlockType.Function(tparams, cparams, allTypes.flatten, bparamTypes, returnTpe)
+      val newCallee = BlockVar(id, newCalleTpe, annotatedCapt)
+      val innerApp = Stmt.App(newCallee, targs, allArgs.flatten, transformedBargs)
+
+      allMatches.flatten.foldRight(innerApp) {
+        case ((scrutinee, ctor, params), body) =>
+          val resultTpe = instantiate(newCalleTpe, targs, bargs.map(_.capt)).result
+          Stmt.Match(scrutinee, resultTpe, List((ctor, BlockLit(List(), List(), params, List(), body))), None)
+      }
+
+    case Stmt.App(callee, targs, vargs, bargs) =>
+      Stmt.App(callee, targs, vargs map transform, bargs map transform)
+
+    case Stmt.Def(id, block, rest) =>
+      Stmt.Def(id, transform(block), transform(rest))
+
+    case Stmt.Let(id, binding, rest) =>
+      Stmt.Let(id, transform(binding), transform(rest))
+
+    case Stmt.Return(expr) =>
+      Stmt.Return(transform(expr))
+
+    case Stmt.Val(id, binding, body) =>
+      Stmt.Val(id, transform(binding), transform(body))
+
+    case Stmt.Invoke(callee, method, methodTpe, targs, vargs, bargs) =>
+      Stmt.Invoke(transform(callee), method, methodTpe, targs, vargs map transform, bargs map transform)
+
+    case Stmt.If(cond, thn, els) =>
+      Stmt.If(transform(cond), transform(thn), transform(els))
+    case Stmt.Match(scrutinee, tpe, clauses, default) =>
+      Stmt.Match(transform(scrutinee), tpe, clauses.map { case (id, BlockLit(tparams, cparams, vparams, bparams, body)) =>
+        (id, BlockLit(tparams, cparams, vparams, bparams, transform(body)(using C, DC, boundBlockParams ++ bparams.map(_.id))))
+      }, default map transform)
+
+    case Stmt.ImpureApp(id, callee, targs, vargs, bargs, body) =>
+      Stmt.ImpureApp(id, callee, targs, vargs map transform, bargs map transform, transform(body))
+
+    case Stmt.Region(BlockLit(tparams, cparams, vparams, bparams, body)) =>
+      Stmt.Region(BlockLit(tparams, cparams, vparams, bparams, transform(body)(using C, DC, boundBlockParams ++ bparams.map(_.id))))
+
+    case Stmt.Alloc(id, init, region, body) =>
+      Stmt.Alloc(id, transform(init), region, transform(body))
+
+    case Stmt.Var(ref, init, capture, body) =>
+      Stmt.Var(ref, transform(init), capture, transform(body))
+
+    case Stmt.Get(id, annotatedTpe, ref, annotatedCapt, body) =>
+      Stmt.Get(id, annotatedTpe, ref, annotatedCapt, transform(body))
+
+    case Stmt.Put(ref, annotatedCapt, value, body) =>
+      Stmt.Put(ref, annotatedCapt, transform(value), transform(body))
+
+    case Stmt.Reset(BlockLit(tparams, cparams, vparams, bparams, body)) =>
+      Stmt.Reset(BlockLit(tparams, cparams, vparams, bparams, transform(body)(using C, DC, boundBlockParams ++ bparams.map(_.id))))
+
+    case Stmt.Shift(prompt, k, body) =>
+      // k is a continuation (block param), so add it to boundBlockParams
+      Stmt.Shift(prompt, k, transform(body)(using C, DC, boundBlockParams + k.id))
+
+    case Stmt.Resume(k, body) =>
+      Stmt.Resume(k, transform(body))
+
+    case Stmt.Hole(tpe, span) =>
+      Stmt.Hole(tpe, span)
+  }
+
+  def transform(pure: Expr)(using C: Context, DC: DeclarationContext, boundBlockParams: Set[Id]): Expr = pure match {
+    case Expr.ValueVar(id, annotatedType) => pure
+
+    case Expr.Literal(value, annotatedType) => pure
+
+    case Expr.Box(bvar @ BlockVar(id, annotatedTpe, annotatedCapt), annotatedCapture) if !boundBlockParams.contains(id) =>
+      Expr.Box(wrapBlockVarIfNeeded(bvar, annotatedTpe), annotatedCapture)
+
+    case Expr.Box(b, annotatedCapture) =>
+      Expr.Box(transform(b), annotatedCapture)
+
+    case Expr.PureApp(b, targs, vargs) =>
+      Expr.PureApp(b, targs, vargs map transform)
+
+    case Expr.Make(data, tag, targs, vargs) =>
+      Expr.Make(data, tag, targs, vargs map transform)
+  }
+
+  def transform(valueType: ValueType.Data)(using C: Context, DC: DeclarationContext): ValueType.Data = valueType match {
+    case ValueType.Data(symbol, targs) => valueType
+  }
+}
diff --git a/effekt/shared/src/main/scala/effekt/generator/chez/ChezSchemeCPS.scala b/effekt/shared/src/main/scala/effekt/generator/chez/ChezSchemeCPS.scala
index 1260bc462..d68fe2e65 100644
--- a/effekt/shared/src/main/scala/effekt/generator/chez/ChezSchemeCPS.scala
+++ b/effekt/shared/src/main/scala/effekt/generator/chez/ChezSchemeCPS.scala
@@ -6,6 +6,7 @@ import effekt.context.Context
 import effekt.core.optimizer.{DropBindings, Optimizer}
 import kiama.util.Source
 import kiama.output.PrettyPrinterTypes.Document
+import effekt.core.ArityRaising
 
 class ChezSchemeCPS extends Compiler[String] {
 
@@ -37,7 +38,7 @@ class ChezSchemeCPS extends Compiler[String] {
     Frontend andThen Middleend
   }
 
-  lazy val Optimized = allToCore(Core) andThen Aggregate andThen Optimizer map {
+  lazy val Optimized = allToCore(Core) andThen Aggregate andThen ArityRaising andThen Optimizer map {
     case input @ CoreTransformed(source, tree, mod, core) =>
       val mainSymbol = Context.ensureMainExists(mod)
       val mainFile = path(mod)
@@ -66,4 +67,4 @@ class ChezSchemeCPS extends Compiler[String] {
 
   def pretty(expr: chez.Expr): Document =
     chez.PrettyPrinter.pretty(chez.PrettyPrinter.toDoc(expr), 100)
-}
\ No newline at end of file
+}
diff --git a/effekt/shared/src/main/scala/effekt/generator/js/JavaScript.scala b/effekt/shared/src/main/scala/effekt/generator/js/JavaScript.scala
index 1c860b065..6f1823194 100644
--- a/effekt/shared/src/main/scala/effekt/generator/js/JavaScript.scala
+++ b/effekt/shared/src/main/scala/effekt/generator/js/JavaScript.scala
@@ -7,6 +7,7 @@ import effekt.context.Context
 import effekt.core.optimizer.{ DropBindings, Optimizer }
 import kiama.output.PrettyPrinterTypes.Document
 import kiama.util.Source
+import effekt.core.ArityRaising
 
 
 class JavaScript(additionalFeatureFlags: List[String] = Nil) extends Compiler[String] {
@@ -44,7 +45,7 @@ class JavaScript(additionalFeatureFlags: List[String] = Nil) extends Compiler[St
     Frontend andThen Middleend
   }
 
-  lazy val Optimized = allToCore(Core) andThen Aggregate andThen Optimizer andThen DropBindings map {
+  lazy val Optimized = allToCore(Core) andThen Aggregate andThen ArityRaising andThen Optimizer andThen DropBindings map {
     case input @ CoreTransformed(source, tree, mod, core) =>
       val mainSymbol = Context.ensureMainExists(mod)
       val mainFile = path(mod)
diff --git a/effekt/shared/src/main/scala/effekt/generator/llvm/LLVM.scala b/effekt/shared/src/main/scala/effekt/generator/llvm/LLVM.scala
index 79c58984b..a816fee69 100644
--- a/effekt/shared/src/main/scala/effekt/generator/llvm/LLVM.scala
+++ b/effekt/shared/src/main/scala/effekt/generator/llvm/LLVM.scala
@@ -7,6 +7,7 @@ import effekt.core.optimizer
 import effekt.machine
 import kiama.output.PrettyPrinterTypes.{ Document, emptyLinks }
 import kiama.util.Source
+import effekt.core.ArityRaising
 
 
 class LLVM extends Compiler[String] {
@@ -54,7 +55,7 @@ class LLVM extends Compiler[String] {
   // -----------------------------------
   object steps {
     // intermediate steps for VSCode
-    val afterCore = allToCore(Core) andThen Aggregate andThen optimizer.Optimizer
+    val afterCore = allToCore(Core) andThen Aggregate andThen ArityRaising andThen optimizer.Optimizer
     val afterMachine = afterCore andThen Machine map { case (mod, main, prog) => prog }
     val afterLLVM = afterMachine map {
       case machine.Program(decls, defns, entry) =>
diff --git a/examples/benchmarks/arity_raising/matrix_determinant.check b/examples/benchmarks/arity_raising/matrix_determinant.check
new file mode 100644
index 000000000..573541ac9
--- /dev/null
+++ b/examples/benchmarks/arity_raising/matrix_determinant.check
@@ -0,0 +1 @@
+0
diff --git a/examples/benchmarks/arity_raising/matrix_determinant.effekt b/examples/benchmarks/arity_raising/matrix_determinant.effekt
new file mode 100644
index 000000000..d1c908a10
--- /dev/null
+++ b/examples/benchmarks/arity_raising/matrix_determinant.effekt
@@ -0,0 +1,56 @@
+import examples/benchmarks/runner
+
+record Vec4(a: Int, b: Int, c: Int, d: Int)
+
+record Matrix4(row1: Vec4, row2: Vec4, row3: Vec4, row4: Vec4)
+
+record Vec3(a: Int, b: Int, c: Int)
+record Matrix3(row1: Vec3, row2: Vec3, row3: Vec3)
+
+def det3(m: Matrix3): Int = {
+  m.row1.a * (m.row2.b * m.row3.c - m.row2.c * m.row3.b) -
+  m.row1.b * (m.row2.a * m.row3.c - m.row2.c * m.row3.a) +
+  m.row1.c * (m.row2.a * m.row3.b - m.row2.b * m.row3.a)
+}
+
+def det4(m: Matrix4): Int = {
+  val c1 = m.row1.a * det3(Matrix3(
+    Vec3(m.row2.b, m.row2.c, m.row2.d),
+    Vec3(m.row3.b, m.row3.c, m.row3.d),
+    Vec3(m.row4.b, m.row4.c, m.row4.d)
+  ))
+  val c2 = m.row1.b * det3(Matrix3(
+    Vec3(m.row2.a, m.row2.c, m.row2.d),
+    Vec3(m.row3.a, m.row3.c, m.row3.d),
+    Vec3(m.row4.a, m.row4.c, m.row4.d)
+  ))
+  val c3 = m.row1.c * det3(Matrix3(
+    Vec3(m.row2.a, m.row2.b, m.row2.d),
+    Vec3(m.row3.a, m.row3.b, m.row3.d),
+    Vec3(m.row4.a, m.row4.b, m.row4.d)
+  ))
+  val c4 = m.row1.d * det3(Matrix3(
+    Vec3(m.row2.a, m.row2.b, m.row2.c),
+    Vec3(m.row3.a, m.row3.b, m.row3.c),
+    Vec3(m.row4.a, m.row4.b, m.row4.c)
+  ))
+  c1 - c2 + c3 - c4
+}
+
+def runBenchmark(n: Int): Int = {
+  def loop(i: Int, acc: Int): Int = {
+    if (i <= 0) { acc }
+    else {
+      val m = Matrix4(
+        Vec4(i, i + 1, i + 2, i + 3),
+        Vec4(i + 4, i + 5, i + 6, i + 7),
+        Vec4(i + 8, i + 9, i + 10, i + 11),
+        Vec4(i + 12, i + 13, i + 14, i + 15)
+      )
+      loop(i - 1, acc + det4(m))
+    }
+  }
+  loop(n, 0)
+}
+
+def main() = benchmark(1000000){ n => runBenchmark(n) }
diff --git a/examples/benchmarks/arity_raising/record_passing.check b/examples/benchmarks/arity_raising/record_passing.check
new file mode 100644
index 000000000..d774a2872
--- /dev/null
+++ b/examples/benchmarks/arity_raising/record_passing.check
@@ -0,0 +1 @@
+62500000500000000
\ No newline at end of file
diff --git a/examples/benchmarks/arity_raising/record_passing.effekt b/examples/benchmarks/arity_raising/record_passing.effekt
new file mode 100644
index 000000000..15185f2e7
--- /dev/null
+++ b/examples/benchmarks/arity_raising/record_passing.effekt
@@ -0,0 +1,18 @@
+import examples/benchmarks/runner
+
+record Point(x: Int, y: Int)
+
+def add(p: Point, depth: Int): Int = {
+  if (depth <= 0) { p.x + p.y }
+  else { add(Point(p.y, p.x), depth - 1) }
+}
+
+def runBenchmark(n: Int): Int = {
+  def loop(i: Int, acc: Int): Int = {
+    if (i <= 0) { acc } 
+    else { loop(i - 1, acc + add(Point(i, i + 1), 2)) }
+  }
+  loop(n, 0)
+}
+
+def main() = benchmark(250000000){ n => runBenchmark(n) }
diff --git a/python_benchmark/benchmark.py b/python_benchmark/benchmark.py
new file mode 100755
index 000000000..b59422839
--- /dev/null
+++ b/python_benchmark/benchmark.py
@@ -0,0 +1,169 @@
+#!/usr/bin/env python3
+"""
+Compares benchmarks between the current branch and main.
+Usage: ./benchmark.py [--skip-compile] [--config benchmark.yml]
+"""
+
+import argparse
+import os
+import shutil
+import signal
+import subprocess
+import sys
+import yaml
+from datetime import datetime
+from pathlib import Path
+
+# Run from the repo root so git/sbt/effekt resolve correctly
+REPO_ROOT = Path(__file__).parent.parent
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+
+def run(cmd, **kwargs):
+    """Run a command from the repo root, inheriting stdio so output streams to the terminal."""
+    subprocess.run(cmd, check=True, cwd=REPO_ROOT, **kwargs)
+
+def git_current_branch() -> str:
+    return subprocess.check_output(
+        ["git", "branch", "--show-current"], text=True, cwd=REPO_ROOT
+    ).strip()
+
+def bench_exec(backend: str, out_dir: str, bench_name: str, params: str) -> str:
+    match backend:
+        case "llvm":
+            return f"./{out_dir}/{bench_name} {params}"
+        case "js":
+            return f"node {out_dir}/{bench_name}.js {params}"
+        case "chez-callcc":
+            return f"scheme --script {out_dir}/{bench_name}.ss {params}"
+        case _:
+            raise ValueError(f"Unknown backend: {backend}")
+
+# ── Main ──────────────────────────────────────────────────────────────────────
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--skip-compile", action="store_true")
+    parser.add_argument("--config", default=Path(__file__).parent / "benchmark.yml",
+                        type=Path, help="Path to benchmark config YAML")
+    args = parser.parse_args()
+
+    cfg           = yaml.safe_load(args.config.read_text())
+    BACKENDS      = cfg["backends"]
+    WARMUP        = cfg["warmup"]
+    RUNS          = cfg["runs"]
+    BRANCH        = cfg["branch"]
+    TARGET_BRANCH = cfg["target_branch"]
+    OUTPUT_DIR    = REPO_ROOT / cfg.get("output_dir", "benchmark-results")
+    # benchmarks: list of {path, n}
+    BENCHMARKS    = {b["path"]: str(b["n"]) for b in cfg["benchmarks"]}
+
+    current_branch = git_current_branch()  # saved only to restore at the end
+    branch_safe    = BRANCH.replace("/", "-")
+    target_safe    = TARGET_BRANCH.replace("/", "-")
+    timestamp      = datetime.now().strftime("%Y%m%d_%H%M%S")
+
+    # Restore branch on Ctrl-C
+    def on_interrupt(sig, frame):
+        if git_current_branch() != current_branch:
+            print(f"\nInterrupted! Switching back to {current_branch}...")
+            subprocess.run(["git", "checkout", "-q", current_branch])
+        sys.exit(1)
+    signal.signal(signal.SIGINT,  on_interrupt)
+    signal.signal(signal.SIGTERM, on_interrupt)
+
+    if not shutil.which("hyperfine"):
+        sys.exit("Error: hyperfine is not installed")
+
+    if BRANCH == TARGET_BRANCH:
+        sys.exit(f"Error: branch and target_branch are both '{TARGET_BRANCH}'. They must differ.")
+
+    target_safe = TARGET_BRANCH.replace("/", "-")
+
+    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+
+    for backend in BACKENDS:
+        Path(REPO_ROOT / f"out-{branch_safe}-{backend}").mkdir(exist_ok=True)
+        Path(REPO_ROOT / f"out-{target_safe}-{backend}").mkdir(exist_ok=True)
+
+    print(f"Comparing: {BRANCH} vs {TARGET_BRANCH}")
+    print(f"Backends:  {', '.join(BACKENDS)}")
+    print(f"Runs: {RUNS}, Warmup: {WARMUP}")
+    print(f"Skip compilation: {'yes' if args.skip_compile else 'no'}\n")
+
+    if not args.skip_compile:
+        result = subprocess.run(["git", "diff-index", "--quiet", "HEAD", "--"], cwd=REPO_ROOT)
+        if result.returncode != 0:
+            sys.exit("Error: you have uncommitted changes. Commit or stash them first.")
+
+        print(f"=== Building compiler on {BRANCH} ===")
+        run(["git", "checkout", "-q", BRANCH])
+        run(["sbt", "install"])
+
+        print(f"\n=== Compiling benchmarks on {BRANCH} ===")
+        for backend in BACKENDS:
+            out_current = f"out-{branch_safe}-{backend}"
+            print(f"Backend: {backend}")
+            for bench_path in BENCHMARKS:
+                bench_name  = Path(bench_path).name
+                source_file = f"examples/benchmarks/{bench_path}.effekt"
+                print(f"  {bench_name}")
+                run(["effekt", f"--backend={backend}", "--build", "-o", out_current, source_file])
+
+        print(f"\n=== Building compiler on {TARGET_BRANCH} ===")
+        run(["git", "checkout", "-q", TARGET_BRANCH])
+        run(["sbt", "install"])
+
+        print(f"\n=== Compiling benchmarks on {TARGET_BRANCH} ===")
+        for backend in BACKENDS:
+            out_target = f"out-{target_safe}-{backend}"
+            print(f"Backend: {backend}")
+            for bench_path in BENCHMARKS:
+                bench_name  = Path(bench_path).name
+                source_file = f"examples/benchmarks/{bench_path}.effekt"
+                print(f"  {bench_name}")
+                run(["effekt", f"--backend={backend}", "--build", "-o", out_target, source_file])
+
+        print(f"\n=== Switching back to {current_branch} ===")
+        run(["git", "checkout", "-q", current_branch])
+    else:
+        print("=== Skipping compilation (using existing binaries) ===\n")
+
+    print("=== Starting benchmarks ===\n")
+
+    try:
+        for backend in BACKENDS:
+            print(f"=== Benchmarking backend: {backend} ===")
+            out_current = f"out-{branch_safe}-{backend}"
+            out_main    = f"out-{target_safe}-{backend}"
+            results_dir = OUTPUT_DIR / f"comparison_{backend}_{branch_safe}_vs_{target_safe}_{timestamp}"
+            results_dir.mkdir(parents=True, exist_ok=True)
+
+            for bench_path, n in BENCHMARKS.items():
+                bench_name = Path(bench_path).name
+                print(f"  {bench_name}")
+                result = subprocess.run([
+                    "hyperfine",
+                    "--warmup",        str(WARMUP),
+                    "--runs",          str(RUNS),
+                    "--export-json",   str(results_dir / f"{bench_name}.json"),
+                    "--command-name",  TARGET_BRANCH,
+                    bench_exec(backend, out_main, bench_name, n),
+                    "--command-name",  BRANCH,
+                    bench_exec(backend, out_current, bench_name, n),
+                ], cwd=REPO_ROOT)
+                if result.returncode != 0:
+                    print(f"  (skipped {bench_name} — hyperfine failed)")
+
+            print(f"Results: {results_dir}/\n")
+    finally:
+        # Always make sure we end up on the original branch
+        if git_current_branch() != current_branch:
+            print(f"=== Switching back to {current_branch} ===")
+            run(["git", "checkout", "-q", current_branch])
+
+    print(f"Done! Results in: {OUTPUT_DIR}/")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python_benchmark/benchmark.yml b/python_benchmark/benchmark.yml
new file mode 100644
index 000000000..53ffa0ec4
--- /dev/null
+++ b/python_benchmark/benchmark.yml
@@ -0,0 +1,92 @@
+backends:
+  - llvm
+
+warmup: 10
+runs: 50
+branch: konradbausch/arity-raising 
+target_branch: main
+output_dir: benchmark-results
+
+benchmarks:
+  # - path: arity_raising/bad_mark
+  #   n: 160000
+  - path: arity_raising/bad_mark_2
+    n: 160000
+
+  # - path: large_record/return_int_recreate_1
+  #   n: 160000000
+  # - path: large_record/return_int_recreate_2
+  #   n: 140000000
+  # - path: large_record/return_int_recreate_3
+  #   n: 4000000000
+  # - path: large_record/return_int_recreate_4
+  #   n: 150000000
+  # - path: large_record/return_int_recreate_5
+  #   n: 150000000
+  # - path: large_record/return_int_recreate_6
+  #   n: 130000000
+  # - path: large_record/return_int_recreate_7
+  #   n: 95000000
+  # - path: large_record/return_int_recreate_8
+  #   n: 85000000
+  # - path: large_record/return_int_recreate_9
+  #   n: 75000000
+  # - path: large_record/return_int_recreate_10
+  #   n: 75000000
+  # - path: large_record/return_int_recreate_11
+  #   n: 70000000
+  # - path: large_record/return_int_recreate_12
+  #   n: 65000000
+  # - path: large_record/return_int_recreate_13
+  #   n: 65000000
+  # - path: large_record/return_int_recreate_14
+  #   n: 37000000
+  # - path: large_record/return_int_recreate_15
+  #   n: 55000000
+  # - path: large_record/return_int_recreate_16
+  #   n: 55000000
+  # - path: large_record/return_int_recreate_17
+  #   n: 50000000
+  # - path: large_record/return_int_recreate_18
+  #   n: 50000000
+  # - path: large_record/return_int_recreate_19
+  #   n: 45000000
+  # - path: large_record/return_int_recreate_20
+  #   n: 40000000
+
+  # - path: nested_record/return_int_recreate_0
+  #   n: 2000
+  # - path: nested_record/return_int_recreate_25
+  #   n: 240000
+  # - path: nested_record/return_int_recreate_50
+  #   n: 120000
+  # - path: nested_record/return_int_recreate_75
+  #   n: 83000
+  # - path: nested_record/return_int_recreate_100
+  #   n: 62000
+  # - path: nested_record/return_int_recreate_125
+  #   n: 47000
+  # - path: nested_record/return_int_recreate_150
+  #   n: 38000
+  # - path: nested_record/return_int_recreate_175
+  #   n: 30000
+  # - path: nested_record/return_int_recreate_200
+  #   n: 25000
+  # - path: nested_record/return_int_recreate_225
+  #   n: 20000
+  # - path: nested_record/return_int_recreate_250
+  #   n: 16000
+  # - path: nested_record/return_int_recreate_275
+  #   n: 13000
+  # - path: nested_record/return_int_recreate_300
+  #   n: 11000
+  # - path: nested_record/return_int_recreate_325
+  #   n: 9000
+  # - path: nested_record/return_int_recreate_350
+  #   n: 7500
+  # - path: nested_record/return_int_recreate_375
+  #   n: 6500
+  # - path: nested_record/return_int_recreate_400
+  #   n: 6000
+  # - path: nested_record/return_int_recreate_425
+  #   n: 5000
diff --git a/python_benchmark/benchmark_to_csv.py b/python_benchmark/benchmark_to_csv.py
new file mode 100755
index 000000000..e0ecbc7d1
--- /dev/null
+++ b/python_benchmark/benchmark_to_csv.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+"""
+Converts a directory of per-benchmark hyperfine JSON files to CSV.
+Usage: ./benchmark_to_csv.py <results_dir/> [output.csv]
+  If output.csv is omitted, prints to stdout.
+"""
+
+import sys
+import re
+import csv
+import json
+import math
+from pathlib import Path
+
+if len(sys.argv) < 2:
+    print(f"Usage: {sys.argv[0]} <results_dir/> [output.csv]", file=sys.stderr)
+    sys.exit(1)
+
+results_dir = Path(sys.argv[1])
+output      = sys.argv[2] if len(sys.argv) > 2 else None
+
+results = []
+
+for json_file in results_dir.glob('*.json'):
+    entry  = json.loads(json_file.read_text())
+    name   = json_file.stem
+    by_cmd = {r['command']: r for r in entry['results']}
+
+    if 'main' not in by_cmd or len(by_cmd) < 2:
+        continue
+
+    main_r   = by_cmd['main']
+    branch_r = next(r for cmd, r in by_cmd.items() if cmd != 'main')
+
+    # hyperfine stores times in seconds; convert to ms
+    main_mean = main_r['mean']     * 1000
+    main_pm   = main_r['stddev']   * 1000
+    br_mean   = branch_r['mean']   * 1000
+    br_pm     = branch_r['stddev'] * 1000
+
+    speedup    = main_mean / br_mean
+    # error propagation for f = a/b: σ_f/f = sqrt((σ_a/a)² + (σ_b/b)²)
+    speedup_pm = speedup * math.sqrt((main_pm / main_mean) ** 2 + (br_pm / br_mean) ** 2)
+
+    results.append((name, speedup, speedup_pm, br_mean, br_pm, main_mean, main_pm))
+
+# Natural sort: split name into text/number chunks so e.g. _2 < _11
+def natural_key(row):
+    return [int(c) if c.isdigit() else c for c in re.split(r'(\d+)', row[0])]
+
+results.sort(key=natural_key)
+
+header = ['benchmark', 'speedup', 'speedup_pm', 'ar_mean_ms', 'ar_pm_ms', 'main_mean_ms', 'main_pm_ms']
+
+def fmt(v, decimals=2):
+    return f"{v:.{decimals}f}"
+
+fh = open(output, 'w', newline='') if output else sys.stdout
+
+writer = csv.writer(fh)
+writer.writerow(header)
+for name, speedup, speedup_pm, ar_mean, ar_pm, main_mean, main_pm in results:
+    writer.writerow([
+        name,
+        fmt(speedup),
+        fmt(speedup_pm) if speedup_pm is not None else '',
+        fmt(ar_mean, 1),
+        fmt(ar_pm,   1),
+        fmt(main_mean, 1),
+        fmt(main_pm,   1),
+    ])
+
+if output:
+    fh.close()
+    print(f"Written to {output}", file=sys.stderr)
diff --git a/python_benchmark/create.sh b/python_benchmark/create.sh
new file mode 100755
index 000000000..25f52a0b9
--- /dev/null
+++ b/python_benchmark/create.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+NESTED_OUT="../examples/benchmarks/nested_record"
+LARGE_OUT="../examples/benchmarks/large_record"
+
+mkdir -p "$NESTED_OUT" "$LARGE_OUT"
+
+MAX_NESTED_RECORD_SIZE=1000
+
+for i in $(seq 1 $MAX_NESTED_RECORD_SIZE); do
+    python nested_record/create_return_int_recreate.py $i > "$NESTED_OUT/return_int_recreate_$i.effekt"
+    python nested_record/create_return_int_reuse.py $i > "$NESTED_OUT/return_int_reuse_$i.effekt"
+    python nested_record/create_return_record_recreate.py $i > "$NESTED_OUT/return_record_recreate_$i.effekt"
+    python nested_record/create_return_record_reuse.py $i > "$NESTED_OUT/return_record_reuse_$i.effekt"
+done
+
+echo "Generated files from 1 to $MAX_NESTED_RECORD_SIZE for nested records"
+
+MAX_LARGE_RECORD_SIZE=100
+
+for i in $(seq 1 $MAX_LARGE_RECORD_SIZE); do
+    python large_record/create_return_int_recreate.py $i > "$LARGE_OUT/return_int_recreate_$i.effekt"
+    python large_record/create_return_int_reuse.py $i > "$LARGE_OUT/return_int_reuse_$i.effekt"
+    python large_record/create_return_record_recreate.py $i > "$LARGE_OUT/return_record_recreate_$i.effekt"
+    python large_record/create_return_record_reuse.py $i > "$LARGE_OUT/return_record_reuse_$i.effekt"
+done
+
+echo "Generated files from 1 to $MAX_LARGE_RECORD_SIZE for large records"
diff --git a/python_benchmark/large_record/create_return_int_recreate.py b/python_benchmark/large_record/create_return_int_recreate.py
new file mode 100644
index 000000000..228edafc0
--- /dev/null
+++ b/python_benchmark/large_record/create_return_int_recreate.py
@@ -0,0 +1,34 @@
+import sys
+
+length = int(sys.argv[1])
+
+print("import examples/benchmarks/runner")
+
+fields = [str(i) for i in range(length)]
+field_types = ", ".join([f"x{i}: Int" for i in fields])
+field_sum = " + ".join([f"m.x{i}" for i in fields])
+field_values = ", ".join([f"i + {i}" for i in fields])
+shifted_fields = ", ".join([f"m.x{i}" for i in (fields[-1:] + fields[:-1])])
+
+print(f"record Rec({field_types})")
+print(f"""
+def recfunc(m: Rec, depth: Int): Int = {{
+    if (depth <= 0) {{ {field_sum} }}
+    else {{recfunc(Rec({shifted_fields}), depth - 1)}}
+}}
+""")
+
+print(f"""
+def runBenchmark(n: Int): Int = {{
+  def loop(i: Int, acc: Int): Int = {{
+    if (i <= 0) {{ acc }}
+    else {{
+      val rec = Rec({field_values})
+      loop(i - 1, acc + recfunc(rec, 2))
+    }}
+  }}
+  loop(n, 0)
+}}
+""")
+
+print("def main() = benchmark(1000000){ n => runBenchmark(n) }")
diff --git a/python_benchmark/large_record/create_return_int_reuse.py b/python_benchmark/large_record/create_return_int_reuse.py
new file mode 100644
index 000000000..da6399bb1
--- /dev/null
+++ b/python_benchmark/large_record/create_return_int_reuse.py
@@ -0,0 +1,33 @@
+import sys
+
+length = int(sys.argv[1])
+
+print("import examples/benchmarks/runner")
+
+fields = [str(i) for i in range(length)]
+field_types = ", ".join([f"x{i}: Int" for i in fields])
+field_sum = " + ".join([f"m.x{i}" for i in fields])
+field_values = ", ".join([f"i + {i}" for i in fields])
+
+print(f"record Rec({field_types})")
+print(f"""
+def recfunc(m: Rec, depth: Int): Int = {{
+    if (depth <= 0) {{ {field_sum} }}
+    else {{recfunc(m, depth - 1)}}
+}}
+""")
+
+print(f"""
+def runBenchmark(n: Int): Int = {{
+  def loop(i: Int, acc: Int): Int = {{
+    if (i <= 0) {{ acc }}
+    else {{
+      val rec = Rec({field_values})
+      loop(i - 1, acc + recfunc(rec, 2))
+    }}
+  }}
+  loop(n, 0)
+}}
+""")
+
+print("def main() = benchmark(1000000){ n => runBenchmark(n) }")
diff --git a/python_benchmark/large_record/create_return_record_recreate.py b/python_benchmark/large_record/create_return_record_recreate.py
new file mode 100644
index 000000000..ff6e8996d
--- /dev/null
+++ b/python_benchmark/large_record/create_return_record_recreate.py
@@ -0,0 +1,35 @@
+import sys
+
+length = int(sys.argv[1])
+
+print("import examples/benchmarks/runner")
+
+fields = [str(i) for i in range(length)]
+field_types = ", ".join([f"x{i}: Int" for i in fields])
+result_field_sum = " + ".join([f"result.x{i}" for i in fields])
+field_values = ", ".join([f"i + {i}" for i in fields])
+shifted_fields = ", ".join([f"m.x{i}" for i in (fields[-1:] + fields[:-1])])
+
+print(f"record Rec({field_types})")
+print(f"""
+def recfunc(m: Rec, depth: Int): Rec = {{
+    if (depth <= 0) {{ m }}
+    else {{recfunc(Rec({shifted_fields}), depth - 1)}}
+}}
+""")
+
+print(f"""
+def runBenchmark(n: Int): Int = {{
+  def loop(i: Int, acc: Int): Int = {{
+    if (i <= 0) {{ acc }}
+    else {{
+      val rec = Rec({field_values})
+      val result = recfunc(rec, 2)
+      loop(i - 1, acc + {result_field_sum})
+    }}
+  }}
+  loop(n, 0)
+}}
+""")
+
+print("def main() = benchmark(1000000){ n => runBenchmark(n) }")
diff --git a/python_benchmark/large_record/create_return_record_reuse.py b/python_benchmark/large_record/create_return_record_reuse.py
new file mode 100644
index 000000000..e27784c47
--- /dev/null
+++ b/python_benchmark/large_record/create_return_record_reuse.py
@@ -0,0 +1,34 @@
+import sys
+
+length = int(sys.argv[1])
+
+print("import examples/benchmarks/runner")
+
+fields = [str(i) for i in range(length)]
+field_types = ", ".join([f"x{i}: Int" for i in fields])
+result_field_sum = " + ".join([f"result.x{i}" for i in fields])
+field_values = ", ".join([f"i + {i}" for i in fields])
+
+print(f"record Rec({field_types})")
+print(f"""
+def recfunc(m: Rec, depth: Int): Rec = {{
+    if (depth <= 0) {{ m }}
+    else {{recfunc(m, depth - 1)}}
+}}
+""")
+
+print(f"""
+def runBenchmark(n: Int): Int = {{
+  def loop(i: Int, acc: Int): Int = {{
+    if (i <= 0) {{ acc }}
+    else {{
+      val rec = Rec({field_values})
+      val result = recfunc(rec, 2)
+      loop(i - 1, acc + {result_field_sum})
+    }}
+  }}
+  loop(n, 0)
+}}
+""")
+
+print("def main() = benchmark(1000000){ n => runBenchmark(n) }")
diff --git a/python_benchmark/nested_record/create_return_int_recreate.py b/python_benchmark/nested_record/create_return_int_recreate.py
new file mode 100644
index 000000000..bee9f6d17
--- /dev/null
+++ b/python_benchmark/nested_record/create_return_int_recreate.py
@@ -0,0 +1,42 @@
+import sys
+
+nesting = int(sys.argv[1])
+
+print("import examples/benchmarks/runner")
+
+rec_constructor = "Rec0(1, 1)"
+
+for i in range(nesting):
+    if i == 0:
+        print("record Rec0(a: Int, b: Int)")
+        print("""
+def recfunc0(m: Rec0, depth: Int): Int = {
+    m.a + m.b
+}
+""")
+    else:
+        rec_constructor = f"Rec{i}({rec_constructor}, {i} - i)"
+        print(f"record Rec{i}(a: Rec{i-1}, b: Int)")
+        print(f"""
+def recfunc{i}(m: Rec{i}, depth: Int): Int = {{
+
+    if (depth <= 0) {{ recfunc{i-1}(m.a, 2) + m.b }}
+    else {{ recfunc{i}(Rec{i}(m.a, m.b + 1), depth - 1) + m.b }}
+   
+}}
+""")
+
+print(f"""
+def runBenchmark(n: Int): Int = {{
+  def loop(i: Int, acc: Int): Int = {{
+    if (i <= 0) {{ acc }}
+    else {{
+      val rec = {rec_constructor}
+      loop(i - 1, acc + recfunc{nesting - 1}(rec, 2))
+    }}
+  }}
+  loop(n, 0)
+}}
+""")
+
+print("def main() = benchmark(1000000){ n => runBenchmark(n) }")
diff --git a/python_benchmark/nested_record/create_return_int_reuse.py b/python_benchmark/nested_record/create_return_int_reuse.py
new file mode 100644
index 000000000..9f572a87a
--- /dev/null
+++ b/python_benchmark/nested_record/create_return_int_reuse.py
@@ -0,0 +1,42 @@
+import sys
+
+nesting = int(sys.argv[1])
+
+print("import examples/benchmarks/runner")
+
+rec_constructor = "Rec0(1, 1)"
+
+for i in range(nesting):
+    if i == 0:
+        print("record Rec0(a: Int, b: Int)")
+        print("""
+def recfunc0(m: Rec0, depth: Int): Int = {
+    m.a + m.b
+}
+""")
+    else:
+        rec_constructor = f"Rec{i}({rec_constructor}, {i} - i)"
+        print(f"record Rec{i}(a: Rec{i-1}, b: Int)")
+        print(f"""
+def recfunc{i}(m: Rec{i}, depth: Int): Int = {{
+
+    if (depth <= 0) {{ recfunc{i-1}(m.a, 2) + m.b }}
+    else {{ recfunc{i}(m, depth - 1) + m.b }}
+   
+}}
+""")
+
+print(f"""
+def runBenchmark(n: Int): Int = {{
+  def loop(i: Int, acc: Int): Int = {{
+    if (i <= 0) {{ acc }}
+    else {{
+      val rec = {rec_constructor}
+      loop(i - 1, acc + recfunc{nesting - 1}(rec, 2))
+    }}
+  }}
+  loop(n, 0)
+}}
+""")
+
+print("def main() = benchmark(1000000){ n => runBenchmark(n) }")
diff --git a/python_benchmark/nested_record/create_return_record_recreate.py b/python_benchmark/nested_record/create_return_record_recreate.py
new file mode 100644
index 000000000..e8355fae1
--- /dev/null
+++ b/python_benchmark/nested_record/create_return_record_recreate.py
@@ -0,0 +1,43 @@
+import sys
+
+nesting = int(sys.argv[1])
+
+print("import examples/benchmarks/runner")
+
+rec_constructor = "Rec0(1, 1)"
+
+for i in range(nesting):
+    if i == 0:
+        print("record Rec0(a: Int, b: Int)")
+        print("""
+def recfunc0(m: Rec0, depth: Int): Rec0 = {
+    m
+}
+""")
+    else:
+        rec_constructor = f"Rec{i}({rec_constructor}, {i} - i)"
+        print(f"record Rec{i}(a: Rec{i-1}, b: Int)")
+        print(f"""
+def recfunc{i}(m: Rec{i}, depth: Int): Rec{i} = {{
+
+    if (depth <= 0) {{ Rec{i}(recfunc{i-1}(m.a, 2), m.b) }}
+    else {{ recfunc{i}(Rec{i}(m.a, m.b + 1), depth - 1) }}
+   
+}}
+""")
+
+print(f"""
+def runBenchmark(n: Int): Int = {{
+  def loop(i: Int, acc: Int): Int = {{
+    if (i <= 0) {{ acc }}
+    else {{
+      val rec = {rec_constructor}
+      val result = recfunc{nesting - 1}(rec, 2)
+      loop(i - 1, acc + result.b)
+    }}
+  }}
+  loop(n, 0)
+}}
+""")
+
+print("def main() = benchmark(1000000){ n => runBenchmark(n) }")
diff --git a/python_benchmark/nested_record/create_return_record_reuse.py b/python_benchmark/nested_record/create_return_record_reuse.py
new file mode 100644
index 000000000..6cebc3605
--- /dev/null
+++ b/python_benchmark/nested_record/create_return_record_reuse.py
@@ -0,0 +1,43 @@
+import sys
+
+nesting = int(sys.argv[1])
+
+print("import examples/benchmarks/runner")
+
+rec_constructor = "Rec0(1, 1)"
+
+for i in range(nesting):
+    if i == 0:
+        print("record Rec0(a: Int, b: Int)")
+        print("""
+def recfunc0(m: Rec0, depth: Int): Rec0 = {
+    m
+}
+""")
+    else:
+        rec_constructor = f"Rec{i}({rec_constructor}, {i} - i)"
+        print(f"record Rec{i}(a: Rec{i-1}, b: Int)")
+        print(f"""
+def recfunc{i}(m: Rec{i}, depth: Int): Rec{i} = {{
+
+    if (depth <= 0) {{ Rec{i}(recfunc{i-1}(m.a, 2), m.b) }}
+    else {{ recfunc{i}(m, depth - 1) }}
+   
+}}
+""")
+
+print(f"""
+def runBenchmark(n: Int): Int = {{
+  def loop(i: Int, acc: Int): Int = {{
+    if (i <= 0) {{ acc }}
+    else {{
+      val rec = {rec_constructor}
+      val result = recfunc{nesting - 1}(rec, 2)
+      loop(i - 1, acc + result.b)
+    }}
+  }}
+  loop(n, 0)
+}}
+""")
+
+print("def main() = benchmark(1000000){ n => runBenchmark(n) }")
diff --git a/python_benchmark/plot_speedup.py b/python_benchmark/plot_speedup.py
new file mode 100644
index 000000000..c55077111
--- /dev/null
+++ b/python_benchmark/plot_speedup.py
@@ -0,0 +1,181 @@
+"""
+Plot speedup results from hyperfine benchmark markdown files.
+
+Produces two publication-quality subplots – nested_records and large_records –
+with the same visual vocabulary as the learning-curve reference:
+  • CI band (fill_between, low alpha)
+  • Raw values as a faint thin line
+  • Smoothed trend as the main foreground line with markers
+"""
+from __future__ import annotations
+
+import glob
+import re
+import time
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import matplotlib.ticker as ticker
+import numpy as np
+import pandas as pd
+from tueplots import bundles, figsizes
+from tueplots.constants.color import rgb
+
+# ─── Config ──────────────────────────────────────────────────────────────────
+BENCHMARK_DIR = Path(__file__).parent.parent / "benchmark-results"
+BACKEND = "llvm"
+
+# ─── Helpers ─────────────────────────────────────────────────────────────────
+def _to_ms(value: str, unit: str) -> float:
+    """Convert a hyperfine time value (value + unit string) to milliseconds."""
+    v = float(value.replace(",", ""))
+    u = unit.strip()
+    if u == "µs":  return v / 1_000
+    if u == "ms":  return v
+    if u == "s":   return v * 1_000
+    raise ValueError(f"Unknown time unit: {u!r}")
+
+
+# ─── Parser ──────────────────────────────────────────────────────────────────
+_TIME_RE    = re.compile(
+    r"Time \(mean ± σ\):\s+([\d.,]+)\s+(µs|ms|s)\s+±\s+([\d.,]+)\s+(µs|ms|s)"
+)
+_SUMMARY_RE = re.compile(
+    r"(konradbausch/arity-raising|main) ran\s+([\d.]+) ± ([\d.]+) times faster than",
+    re.DOTALL,
+)
+
+
+def parse_file(path: Path) -> pd.DataFrame:
+    """Return a DataFrame with one row per successfully benchmarked program."""
+    text   = path.read_text()
+    rows   = []
+
+    for section in re.split(r"^## ", text, flags=re.MULTILINE)[1:]:
+        name = section.splitlines()[0].strip()
+        m    = re.match(r"^(nested_records|large_records)_(\d+)$", name)
+        if not m:
+            continue
+        family, n = m.group(1), int(m.group(2))
+
+        if "non-zero exit code" in section or "Error:" in section:
+            continue
+
+        times = _TIME_RE.findall(section)
+        if len(times) < 2:
+            continue
+
+        main_mean = _to_ms(times[0][0], times[0][1])
+        main_std  = _to_ms(times[0][2], times[0][3])
+        ar_mean   = _to_ms(times[1][0], times[1][1])
+        ar_std    = _to_ms(times[1][2], times[1][3])
+
+        sm = _SUMMARY_RE.search(section)
+        if sm:
+            winner, sp, se = sm.group(1), float(sm.group(2)), float(sm.group(3))
+            if winner == "main":          # arity-raising is the *loser*
+                sp = 1.0 / sp
+                se = se / (sp ** 2)       # propagate 1/x uncertainty
+        else:
+            sp = main_mean / ar_mean
+            se = sp * np.sqrt((main_std / main_mean) ** 2 + (ar_std / ar_mean) ** 2)
+
+        rows.append(dict(
+            name=name, family=family, n=n,
+            main_mean=main_mean, main_std=main_std,
+            ar_mean=ar_mean,   ar_std=ar_std,
+            speedup=sp, speedup_err=se,
+        ))
+
+    return pd.DataFrame(rows)
+
+
+def latest_file(backend: str) -> Path:
+    pattern = str(
+        BENCHMARK_DIR / f"comparison_{backend}_konradbausch-arity-raising_vs_main_*.md"
+    )
+    files = sorted(glob.glob(pattern))
+    if not files:
+        raise FileNotFoundError(f"No files matching {pattern}")
+    return Path(files[-1])
+
+
+# ─── Plot ────────────────────────────────────────────────────────────────────
+def make_plot(df: pd.DataFrame, outpath: Path | None = None) -> None:
+    t0 = time.perf_counter()
+
+    plt.rcParams.update(bundles.icml2022())
+
+    fig, (ax_nested, ax_large) = plt.subplots(
+        1, 2,
+        figsize=(7.0, 2.6),
+        constrained_layout=True,
+    )
+
+    panels = [
+        ("nested_records", ax_nested, rgb.pn_orange,
+         "Record depth $n$",
+         "Speedup of arity-raising over main\n(nested records)"),
+        ("large_records",  ax_large,  rgb.tue_blue,
+         "Number of record fields $n$",
+         "Speedup of arity-raising over main\n(large records)"),
+    ]
+
+    for family, ax, color, xlabel, title in panels:
+        sub = df[df["family"] == family].sort_values("n")
+        if sub.empty:
+            ax.set_visible(False)
+            continue
+
+        x   = sub["n"].to_numpy(dtype=float)
+        y   = sub["speedup"].to_numpy()
+        lo  = y - sub["speedup_err"].to_numpy()
+        hi  = y + sub["speedup_err"].to_numpy()
+
+        # Deviation band
+        ax.fill_between(x, lo, hi, alpha=0.18, linewidth=0, color=color, zorder=1)
+
+        # Speedup line
+        ax.plot(
+            x, y,
+            linewidth=1.3,
+            marker="o",
+            markersize=2.2,
+            color=color,
+            zorder=3,
+            label="arity-raising vs main",
+        )
+
+        # Reference line at y = 1 (no speedup)
+        ax.axhline(1.0, linewidth=0.6, linestyle="--", color="0.55", zorder=0)
+
+        ax.set_xlabel(xlabel)
+        ax.set_ylabel("Speedup (×)")
+        ax.set_title(title)
+        ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())
+        ax.grid(axis="y", which="major", color="0.88", linewidth=0.6)
+        ax.margins(x=0.03)
+        ax.set_ylim(bottom=0)
+
+    if outpath is not None:
+        outpath.parent.mkdir(parents=True, exist_ok=True)
+        fig.savefig(outpath, dpi=300, bbox_inches="tight")
+        print(f"✓  Saved → {outpath}")
+
+    plt.savefig(
+        Path(__file__).parent / "speedup_comparison.png",
+        dpi=300, bbox_inches="tight",
+    )
+    print(f"✓  Saved → {Path(__file__).parent / 'speedup_comparison.png'}")
+    plt.show()
+    plt.close(fig)
+    print(f"[plot_speedup] total time: {time.perf_counter() - t0:.2f}s")
+
+
+# ─── Entry point ─────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    src  = latest_file(BACKEND)
+    print(f"Parsing: {src.name}")
+    df   = parse_file(src)
+    print(df[["name", "speedup", "speedup_err"]].to_string(index=False))
+    make_plot(df)