diff --git a/CHANGELOG.md b/CHANGELOG.md index ca81458e01..8051a1c52f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - Implemented project assembly ([#2877](https://github.com/0xMiden/miden-vm/pull/2877)). - Added `FastProcessor::into_parts()` to extract advice provider, memory, and precompile transcript after step-based execution ([#2901](https://github.com/0xMiden/miden-vm/pull/2901)). +- Added `FrameBase` variant to `DebugVarLocation` and `set_value_location` to `DebugVarInfo` for frame-pointer-relative debug variable locations ([#2955](https://github.com/0xMiden/miden-vm/pull/2955)). ## 0.22.0 (2026-03-18) diff --git a/Cargo.lock b/Cargo.lock index 73ca6d82b8..3c41b49bf5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1350,7 +1350,7 @@ dependencies = [ "num", "num-bigint", "pretty_assertions", - "rand 0.9.2", + "rand 0.9.4", "rand_chacha", "rstest", "serde_json", @@ -1387,7 +1387,7 @@ dependencies = [ "p3-miden-lifted-stark", "p3-symmetric", "p3-util", - "rand 0.9.2", + "rand 0.9.4", "rand_chacha", "rand_core 0.9.5", "rand_hc", @@ -1438,7 +1438,7 @@ dependencies = [ "p3-field", "p3-goldilocks", "paste", - "rand 0.10.0", + "rand 0.10.1", "serde", "subtle", "thiserror", @@ -1936,7 +1936,7 @@ dependencies = [ "p3-maybe-rayon", "p3-util", "paste", - "rand 0.10.0", + "rand 0.10.1", "serde", "tracing", ] @@ -1957,7 +1957,7 @@ dependencies = [ "p3-symmetric", "p3-util", "paste", - "rand 0.10.0", + "rand 0.10.1", "serde", ] @@ -1982,7 +1982,7 @@ dependencies = [ "p3-field", "p3-maybe-rayon", "p3-util", - "rand 0.10.0", + "rand 0.10.1", "serde", "tracing", ] @@ -2006,7 +2006,7 @@ dependencies = [ "p3-field", "p3-symmetric", "p3-util", - "rand 0.10.0", + "rand 0.10.1", ] [[package]] @@ -2037,7 +2037,7 @@ dependencies = [ "p3-miden-lmcs", "p3-miden-transcript", "p3-util", - "rand 0.10.0", + "rand 0.10.1", "thiserror", "tracing", ] @@ -2077,7 +2077,7 @@ dependencies = [ "p3-miden-transcript", "p3-symmetric", "p3-util", - "rand 0.10.0", + "rand 0.10.1", "serde", "thiserror", "tracing", @@ -2123,7 +2123,7 @@ dependencies = [ "p3-symmetric", "p3-util", "paste", - "rand 0.10.0", + "rand 0.10.1", "serde", "spin 0.10.0", "tracing", @@ -2137,7 +2137,7 @@ checksum = "6a018b618e3fa0aec8be933b1d8e404edd23f46991f6bf3f5c2f3f95e9413fe9" dependencies = [ "p3-field", "p3-symmetric", - "rand 0.10.0", + "rand 0.10.1", ] [[package]] @@ -2150,7 +2150,7 @@ dependencies = [ "p3-mds", "p3-symmetric", "p3-util", - "rand 0.10.0", + "rand 0.10.1", ] [[package]] @@ -2396,7 +2396,7 @@ checksum = "4b45fcc2344c680f5025fe57779faef368840d0bd1f42f216291f0dc4ace4744" dependencies = [ "bitflags", "num-traits", - "rand 0.9.2", + "rand 0.9.4", "rand_chacha", "rand_xorshift", "regex-syntax", @@ -2451,9 +2451,9 @@ checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" [[package]] name = "rand" -version = "0.9.2" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" dependencies = [ "rand_chacha", "rand_core 0.9.5", @@ -2461,9 +2461,9 @@ dependencies = [ [[package]] name = "rand" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8" +checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" dependencies = [ "rand_core 0.10.0", ] diff --git a/core/src/mast/debuginfo/asm_op_storage.rs b/core/src/mast/debuginfo/asm_op_storage.rs index b1108ccb6b..ba3dba3fff 100644 --- a/core/src/mast/debuginfo/asm_op_storage.rs +++ b/core/src/mast/debuginfo/asm_op_storage.rs @@ -183,6 +183,12 @@ impl OpToAsmOpId { entries.first().map(|(_, id)| *id) } + /// Returns all `(op_idx, AsmOpId)` pairs for the given node, or an empty vec if the + /// node has no asm ops. + pub fn asm_ops_for_node(&self, node_id: MastNodeId) -> Vec<(usize, AsmOpId)> { + self.inner.row(node_id).map(|r| r.to_vec()).unwrap_or_default() + } + /// Validates the CSR structure integrity. /// /// # Arguments diff --git a/core/src/mast/debuginfo/debug_var_storage.rs b/core/src/mast/debuginfo/debug_var_storage.rs index 6c11aaca25..7ef81c9779 100644 --- a/core/src/mast/debuginfo/debug_var_storage.rs +++ b/core/src/mast/debuginfo/debug_var_storage.rs @@ -452,6 +452,71 @@ impl OpToDebugVarIds { Ok(result) } + /// Returns all `(op_idx, DebugVarId)` pairs for the given node, or an empty vec if the + /// node has no debug vars. + pub fn debug_vars_for_node(&self, node: MastNodeId) -> Vec<(usize, DebugVarId)> { + let op_range = match self.operation_range_for_node(node) { + Ok(range) => range, + Err(_) => return Vec::new(), + }; + + let mut result = Vec::new(); + for (op_offset, op_idx) in op_range.clone().enumerate() { + if op_idx + 1 >= self.op_indptr_for_var_ids.len() { + break; + } + let var_start = self.op_indptr_for_var_ids[op_idx]; + let var_end = self.op_indptr_for_var_ids[op_idx + 1]; + for &var_id in &self.debug_var_ids[var_start..var_end] { + result.push((op_offset, var_id)); + } + } + result + } + + /// Creates a new [`OpToDebugVarIds`] with remapped node IDs. + /// + /// This is used when nodes are removed from a MastForest and the remaining nodes are + /// renumbered. The remapping maps old node IDs to new node IDs. + /// + /// Nodes that are not in the remapping are considered removed and their debug var data + /// is discarded. + pub fn remap_nodes( + &self, + remapping: &alloc::collections::BTreeMap, + ) -> Self { + if self.is_empty() { + return Self::new(); + } + if remapping.is_empty() { + return self.clone(); + } + + let max_new_id = remapping.values().map(|id| id.to_usize()).max().unwrap_or(0); + let num_new_nodes = max_new_id + 1; + + let mut new_node_data: alloc::collections::BTreeMap> = + alloc::collections::BTreeMap::new(); + + for (old_id, new_id) in remapping { + let vars = self.debug_vars_for_node(*old_id); + if !vars.is_empty() { + new_node_data.insert(new_id.to_usize(), vars); + } + } + + let mut new_storage = Self::new(); + for idx in 0..num_new_nodes { + let node_id = MastNodeId::new_unchecked(idx as u32); + let vars = new_node_data.remove(&idx).unwrap_or_default(); + new_storage + .add_debug_var_info_for_node(node_id, vars) + .expect("failed to remap debug var storage"); + } + + new_storage + } + /// Clears this storage. pub fn clear(&mut self) { self.debug_var_ids.clear(); diff --git a/core/src/mast/debuginfo/mod.rs b/core/src/mast/debuginfo/mod.rs index 3294d908bb..8b6325c5c8 100644 --- a/core/src/mast/debuginfo/mod.rs +++ b/core/src/mast/debuginfo/mod.rs @@ -272,6 +272,12 @@ impl DebugInfo { self.debug_vars.get(debug_var_id) } + /// Returns all `(op_idx, DebugVarId)` pairs for the given node, or an empty vec if the + /// node has no debug vars. + pub fn debug_vars_for_node(&self, node_id: MastNodeId) -> Vec<(usize, DebugVarId)> { + self.op_debug_var_storage.debug_vars_for_node(node_id) + } + /// Returns debug variable IDs for a specific operation within a node. pub fn debug_vars_for_operation( &self, @@ -364,6 +370,11 @@ impl DebugInfo { self.asm_ops.get(asm_op_id) } + /// Returns all `(op_idx, AsmOpId)` pairs for the given node. + pub fn asm_ops_for_node(&self, node_id: MastNodeId) -> Vec<(usize, AsmOpId)> { + self.asm_op_storage.asm_ops_for_node(node_id) + } + // ASSEMBLY OP MUTATORS // -------------------------------------------------------------------------------------------- @@ -413,6 +424,14 @@ impl DebugInfo { self.asm_op_storage = self.asm_op_storage.remap_nodes(remapping); } + /// Remaps the debug var storage to use new node IDs after nodes have been removed/reordered. + /// + /// This should be called after nodes are removed from the MastForest to ensure the debug + /// var storage still references valid node IDs. + pub(super) fn remap_debug_var_storage(&mut self, remapping: &BTreeMap) { + self.op_debug_var_storage = self.op_debug_var_storage.remap_nodes(remapping); + } + // DEBUG VARIABLE MUTATORS // -------------------------------------------------------------------------------------------- diff --git a/core/src/mast/merger/mod.rs b/core/src/mast/merger/mod.rs index 67aec6f8e7..17634e57bd 100644 --- a/core/src/mast/merger/mod.rs +++ b/core/src/mast/merger/mod.rs @@ -3,9 +3,11 @@ use alloc::{collections::BTreeMap, vec::Vec}; use crate::{ crypto::hash::Blake3Digest, mast::{ - DecoratorId, MastForest, MastForestContributor, MastForestError, MastNode, MastNodeBuilder, - MastNodeFingerprint, MastNodeId, MultiMastForestIteratorItem, MultiMastForestNodeIter, + AsmOpId, DebugVarId, DecoratorId, MastForest, MastForestContributor, MastForestError, + MastNode, MastNodeBuilder, MastNodeFingerprint, MastNodeId, MultiMastForestIteratorItem, + MultiMastForestNodeIter, }, + serde::Serializable, utils::{DenseIdMap, IndexVec}, }; @@ -158,6 +160,8 @@ impl MastForestMerger { self.merge_roots(forest_idx, forest)?; } + self.merge_debug_metadata(&forests)?; + Ok(()) } @@ -228,9 +232,19 @@ impl MastForestMerger { &self.decorator_id_mappings[forest_idx], )?; - let node_fingerprint = + let base_fingerprint = remapped_builder.fingerprint_for_node(&self.mast_forest, &self.hash_by_node_id)?; + // Augment with the source node's debug vars so same-ops/different-vars + // blocks from different forests are not collapsed. + let debug_var_data = + serialize_debug_var_content_for_node(original_forests[forest_idx], merging_id); + let asm_op_data = + serialize_asm_op_content_for_node(original_forests[forest_idx], merging_id); + let node_fingerprint = base_fingerprint + .augment_with_data(&debug_var_data) + .augment_with_data(&asm_op_data); + match self.lookup_node_by_fingerprint(&node_fingerprint) { Some(matching_node_id) => { // If a node with a matching fingerprint exists, then the merging node is a @@ -282,6 +296,95 @@ impl MastForestMerger { Ok(()) } + /// Transfers procedure names, asm ops, and debug vars from the source forests + /// into the merged forest, remapping all IDs along the way. + /// + /// Procedure names are merged separately by digest. Per-node asm-op and debug-var + /// metadata are remapped by node ID, and when two source nodes map to the same merged + /// node (dedup), the first forest's per-node metadata wins. + fn merge_debug_metadata(&mut self, forests: &[&MastForest]) -> Result<(), MastForestError> { + // Procedure names are keyed by digest. First name wins so that a later + // forest cannot silently rename an already-registered procedure. + for forest in forests.iter() { + for (digest, name) in forest.debug_info.procedure_names() { + if self.mast_forest.debug_info.procedure_name(&digest).is_none() { + self.mast_forest.debug_info.insert_procedure_name(digest, name.clone()); + } + } + } + + // Collect per-node asm-op and debug-var registrations across all forests. + // BTreeMap gives us sorted-by-node-id iteration, which the CSR requires. + let mut asm_entries: BTreeMap> = BTreeMap::new(); + let mut dbg_entries: BTreeMap> = BTreeMap::new(); + + for (forest_idx, forest) in forests.iter().enumerate() { + // Copy AssemblyOp objects and build old→new AsmOpId remapping. + let mut asm_id_map: BTreeMap = BTreeMap::new(); + for (raw, asm_op) in forest.debug_info.asm_ops().iter().enumerate() { + let old_id = AsmOpId::new(raw as u32); + let new_id = self.mast_forest.debug_info.add_asm_op(asm_op.clone())?; + asm_id_map.insert(old_id, new_id); + } + + // Copy DebugVarInfo objects and build old→new DebugVarId remapping. + let mut dbg_id_map: BTreeMap = BTreeMap::new(); + for (raw, dvar) in forest.debug_info.debug_vars().iter().enumerate() { + let old_id = DebugVarId::from(raw as u32); + let new_id = self.mast_forest.debug_info.add_debug_var(dvar.clone())?; + dbg_id_map.insert(old_id, new_id); + } + + // For each source node, remap and store entries. First forest wins per node. + for old_raw in 0..forest.num_nodes() { + let old_id = MastNodeId::new_unchecked(old_raw); + let new_id = match self.node_id_mappings[forest_idx].get(old_id) { + Some(id) => id, + None => continue, + }; + + if let alloc::collections::btree_map::Entry::Vacant(e) = asm_entries.entry(new_id) { + let ops = forest.debug_info.asm_ops_for_node(old_id); + if !ops.is_empty() { + let remapped = + ops.into_iter().map(|(idx, id)| (idx, asm_id_map[&id])).collect(); + e.insert(remapped); + } + } + + if let alloc::collections::btree_map::Entry::Vacant(e) = dbg_entries.entry(new_id) { + let vars = forest.debug_info.debug_vars_for_node(old_id); + if !vars.is_empty() { + let remapped = + vars.into_iter().map(|(idx, id)| (idx, dbg_id_map[&id])).collect(); + e.insert(remapped); + } + } + } + } + + // Register in node-ID order (CSR sequential constraint). + for (node_id, entries) in asm_entries { + let num_ops = match &self.mast_forest[node_id] { + MastNode::Block(block) => block.num_operations() as usize, + _ => entries.iter().map(|(idx, _)| idx + 1).max().unwrap_or(0), + }; + self.mast_forest + .debug_info + .register_asm_ops(node_id, num_ops, entries) + .map_err(|_| MastForestError::TooManyNodes)?; + } + + for (node_id, entries) in dbg_entries { + self.mast_forest + .debug_info + .register_op_indexed_debug_vars(node_id, entries) + .map_err(|_| MastForestError::TooManyNodes)?; + } + + Ok(()) + } + // HELPERS // ================================================================================================ @@ -304,6 +407,63 @@ impl MastForestMerger { } } +// HELPERS +// ================================================================================================ + +/// Serializes the actual debug var *content* (name, location, etc.) for a node, +/// producing a stable byte sequence suitable for fingerprint augmentation. +/// +/// Unlike the assembler's `serialize_debug_vars` (which serializes `(op_idx, DebugVarId)` pairs), +/// this serializes the resolved DebugVarInfo so that two forests assigning different DebugVarIds +/// to identical variables still produce the same fingerprint contribution. +fn serialize_debug_var_content_for_node(forest: &MastForest, node_id: MastNodeId) -> Vec { + let entries = forest.debug_info().debug_vars_for_node(node_id); + if entries.is_empty() { + return Vec::new(); + } + + let mut data = Vec::new(); + for (op_idx, var_id) in entries { + data.extend_from_slice(&op_idx.to_le_bytes()); + if let Some(info) = forest.debug_info().debug_var(var_id) { + info.write_into(&mut data); + } + } + data +} + +/// Serializes the actual asm-op content for a node, producing a stable byte +/// sequence suitable for fingerprint augmentation. +/// +/// This ensures that nodes with identical structure but different source-mapping +/// metadata do not collapse during merge. +fn serialize_asm_op_content_for_node(forest: &MastForest, node_id: MastNodeId) -> Vec { + let entries = forest.debug_info().asm_ops_for_node(node_id); + if entries.is_empty() { + return Vec::new(); + } + + let mut data = Vec::new(); + for (op_idx, asm_op_id) in entries { + data.extend_from_slice(&op_idx.to_le_bytes()); + if let Some(asm_op) = forest.debug_info().asm_op(asm_op_id) { + asm_op.context_name().write_into(&mut data); + asm_op.op().write_into(&mut data); + asm_op.num_cycles().write_into(&mut data); + match asm_op.location() { + Some(location) => { + data.push(1); + location.uri.write_into(&mut data); + data.extend_from_slice(&u32::from(location.start).to_le_bytes()); + data.extend_from_slice(&u32::from(location.end).to_le_bytes()); + }, + None => data.push(0), + } + } + } + data +} + // MAST FOREST ROOT MAP // ================================================================================================ diff --git a/core/src/mast/merger/tests.rs b/core/src/mast/merger/tests.rs index bcffd6f917..8519ed0a32 100644 --- a/core/src/mast/merger/tests.rs +++ b/core/src/mast/merger/tests.rs @@ -1,3 +1,5 @@ +use alloc::sync::Arc; + use super::*; use crate::{ Felt, ONE, Word, @@ -6,7 +8,7 @@ use crate::{ LoopNodeBuilder, node::{MastForestContributor, MastNodeExt}, }, - operations::{DebugOptions, Decorator, Operation}, + operations::{AssemblyOp, DebugOptions, DebugVarInfo, DebugVarLocation, Decorator, Operation}, utils::Idx, }; @@ -1114,3 +1116,468 @@ fn mast_forest_merge_op_indexed_decorators_preservation() { "Every decorator in merged forest should be referenced at least once (no orphans)" ); } + +/// Merging two forests preserves procedure names, asm ops, and debug vars +/// with correct node-ID remapping. +#[test] +fn merge_preserves_debug_metadata() { + // Forest A: one block with asm op + debug var + procedure name. + let mut forest_a = MastForest::new(); + let asm_op = AssemblyOp::new(None, "test".into(), 1, "add".into()); + let asm_id_a = forest_a.debug_info_mut().add_asm_op(asm_op).unwrap(); + let dvar_a = forest_a + .add_debug_var(DebugVarInfo::new("x", DebugVarLocation::Stack(0))) + .unwrap(); + + let block_a_id = block_foo().add_to_forest(&mut forest_a).unwrap(); + let num_ops_a = forest_a[block_a_id].get_basic_block().unwrap().num_operations() as usize; + forest_a + .debug_info_mut() + .register_asm_ops(block_a_id, num_ops_a, vec![(0, asm_id_a)]) + .unwrap(); + forest_a + .debug_info_mut() + .register_op_indexed_debug_vars(block_a_id, vec![(0, dvar_a)]) + .unwrap(); + forest_a.make_root(block_a_id); + let digest_a = forest_a[block_a_id].digest(); + forest_a.insert_procedure_name(digest_a, Arc::from("proc_a")); + + // Forest B: different block with its own asm op + debug var + procedure name. + let mut forest_b = MastForest::new(); + let asm_op_b = AssemblyOp::new(None, "test".into(), 1, "and".into()); + let asm_id_b = forest_b.debug_info_mut().add_asm_op(asm_op_b).unwrap(); + let dvar_b = forest_b + .add_debug_var(DebugVarInfo::new("y", DebugVarLocation::Stack(1))) + .unwrap(); + + let block_b_id = block_bar().add_to_forest(&mut forest_b).unwrap(); + let num_ops_b = forest_b[block_b_id].get_basic_block().unwrap().num_operations() as usize; + forest_b + .debug_info_mut() + .register_asm_ops(block_b_id, num_ops_b, vec![(0, asm_id_b)]) + .unwrap(); + forest_b + .debug_info_mut() + .register_op_indexed_debug_vars(block_b_id, vec![(0, dvar_b)]) + .unwrap(); + forest_b.make_root(block_b_id); + let digest_b = forest_b[block_b_id].digest(); + forest_b.insert_procedure_name(digest_b, Arc::from("proc_b")); + + // Merge. + let (merged, root_map) = MastForest::merge([&forest_a, &forest_b]).unwrap(); + + // Both procedure names must be present. + assert_eq!(merged.procedure_name(&digest_a), Some("proc_a")); + assert_eq!(merged.procedure_name(&digest_b), Some("proc_b")); + + // Both nodes must have asm ops. + let new_a = root_map.map_root(0, &block_a_id).unwrap(); + let new_b = root_map.map_root(1, &block_b_id).unwrap(); + + assert!( + merged.debug_info().first_asm_op_for_node(new_a).is_some(), + "merged node A must have asm op" + ); + assert!( + merged.debug_info().first_asm_op_for_node(new_b).is_some(), + "merged node B must have asm op" + ); + + // Both nodes must have debug vars. + let vars_a = merged.debug_info().debug_vars_for_node(new_a); + let vars_b = merged.debug_info().debug_vars_for_node(new_b); + assert_eq!(vars_a.len(), 1, "merged node A must have debug var"); + assert_eq!(vars_b.len(), 1, "merged node B must have debug var"); +} + +/// compact() (which is a self-merge) must keep debug metadata intact. +#[test] +fn compact_preserves_debug_metadata() { + let mut forest = MastForest::new(); + let asm_op = AssemblyOp::new(None, "test".into(), 1, "add".into()); + let asm_id = forest.debug_info_mut().add_asm_op(asm_op).unwrap(); + let dvar = forest + .add_debug_var(DebugVarInfo::new("z", DebugVarLocation::Stack(2))) + .unwrap(); + + let block_id = block_foo().add_to_forest(&mut forest).unwrap(); + let num_ops = forest[block_id].get_basic_block().unwrap().num_operations() as usize; + forest + .debug_info_mut() + .register_asm_ops(block_id, num_ops, vec![(0, asm_id)]) + .unwrap(); + forest + .debug_info_mut() + .register_op_indexed_debug_vars(block_id, vec![(0, dvar)]) + .unwrap(); + forest.make_root(block_id); + let digest = forest[block_id].digest(); + forest.insert_procedure_name(digest, Arc::from("my_proc")); + + let (compacted, _root_map) = forest.compact(); + + // Find the node by digest in the compacted forest. + let compacted_id = compacted.find_procedure_root(digest).expect("root should survive compact"); + + assert_eq!(compacted.procedure_name(&digest), Some("my_proc")); + assert!( + compacted.debug_info().first_asm_op_for_node(compacted_id).is_some(), + "compacted node must keep asm op" + ); + let vars = compacted.debug_info().debug_vars_for_node(compacted_id); + assert_eq!(vars.len(), 1, "compacted node must keep debug var"); +} + +/// Two basic blocks with the same ops but different debug vars must stay +/// distinct after MastForest::merge (the merger must not collapse them). +#[test] +fn merge_keeps_blocks_with_different_debug_vars_distinct() { + // Forest A: block [Mul, Add] with debug var "x" at stack 0. + let mut forest_a = MastForest::new(); + let dvar_a = forest_a + .add_debug_var(DebugVarInfo::new("x", DebugVarLocation::Stack(0))) + .unwrap(); + let block_a = block_foo().add_to_forest(&mut forest_a).unwrap(); + forest_a + .debug_info_mut() + .register_op_indexed_debug_vars(block_a, vec![(0, dvar_a)]) + .unwrap(); + forest_a.make_root(block_a); + + // Forest B: identical block [Mul, Add] but with debug var "y" at stack 1. + let mut forest_b = MastForest::new(); + let dvar_b = forest_b + .add_debug_var(DebugVarInfo::new("y", DebugVarLocation::Stack(1))) + .unwrap(); + let block_b = block_foo().add_to_forest(&mut forest_b).unwrap(); + forest_b + .debug_info_mut() + .register_op_indexed_debug_vars(block_b, vec![(0, dvar_b)]) + .unwrap(); + forest_b.make_root(block_b); + + let (merged, root_map) = MastForest::merge([&forest_a, &forest_b]).unwrap(); + + let new_a = root_map.map_root(0, &block_a).unwrap(); + let new_b = root_map.map_root(1, &block_b).unwrap(); + + // The blocks must be distinct -- different debug vars must prevent dedup. + assert_ne!(new_a, new_b, "same-ops blocks with different debug vars must not be collapsed"); + + // Each must have its own debug var. + let vars_a = merged.debug_info().debug_vars_for_node(new_a); + let vars_b = merged.debug_info().debug_vars_for_node(new_b); + assert_eq!(vars_a.len(), 1); + assert_eq!(vars_b.len(), 1); + + let info_a = merged.debug_info().debug_var(vars_a[0].1).unwrap(); + let info_b = merged.debug_info().debug_var(vars_b[0].1).unwrap(); + assert_eq!(info_a.name(), "x"); + assert_eq!(info_b.name(), "y"); +} + +/// Two blocks with identical structure and debug vars but different asm-op +/// metadata must stay distinct after merge so diagnostics keep the right source +/// mapping. +#[test] +fn merge_keeps_blocks_with_different_asm_ops_distinct() { + let mut forest_a = MastForest::new(); + let asm_id_a = forest_a + .debug_info_mut() + .add_asm_op(AssemblyOp::new(None, "ctx_a".into(), 1, "mul add".into())) + .unwrap(); + let dvar_a = forest_a + .add_debug_var(DebugVarInfo::new("x", DebugVarLocation::Stack(0))) + .unwrap(); + let block_a = block_foo().add_to_forest(&mut forest_a).unwrap(); + let num_ops_a = forest_a[block_a].get_basic_block().unwrap().num_operations() as usize; + forest_a + .debug_info_mut() + .register_asm_ops(block_a, num_ops_a, vec![(0, asm_id_a)]) + .unwrap(); + forest_a + .debug_info_mut() + .register_op_indexed_debug_vars(block_a, vec![(0, dvar_a)]) + .unwrap(); + forest_a.make_root(block_a); + + let mut forest_b = MastForest::new(); + let asm_id_b = forest_b + .debug_info_mut() + .add_asm_op(AssemblyOp::new(None, "ctx_b".into(), 1, "mul add".into())) + .unwrap(); + let dvar_b = forest_b + .add_debug_var(DebugVarInfo::new("x", DebugVarLocation::Stack(0))) + .unwrap(); + let block_b = block_foo().add_to_forest(&mut forest_b).unwrap(); + let num_ops_b = forest_b[block_b].get_basic_block().unwrap().num_operations() as usize; + forest_b + .debug_info_mut() + .register_asm_ops(block_b, num_ops_b, vec![(0, asm_id_b)]) + .unwrap(); + forest_b + .debug_info_mut() + .register_op_indexed_debug_vars(block_b, vec![(0, dvar_b)]) + .unwrap(); + forest_b.make_root(block_b); + + let (merged, root_map) = MastForest::merge([&forest_a, &forest_b]).unwrap(); + + let new_a = root_map.map_root(0, &block_a).unwrap(); + let new_b = root_map.map_root(1, &block_b).unwrap(); + + assert_ne!( + new_a, new_b, + "same-structure blocks with different AssemblyOps must not collapse" + ); + assert_eq!( + merged.debug_info().first_asm_op_for_node(new_a).unwrap().context_name(), + "ctx_a" + ); + assert_eq!( + merged.debug_info().first_asm_op_for_node(new_b).unwrap().context_name(), + "ctx_b" + ); +} + +/// Debug vars are only representable on basic block nodes. The builder API +/// (`ensure_block`) is the sole entry point for attaching debug vars, and +/// control-flow nodes (Join, Split, Loop, Call, Dyn) have no `debug_vars` +/// parameter. This test verifies that after assembly, non-block nodes carry +/// no debug vars. +#[test] +fn non_basic_block_nodes_have_no_debug_vars() { + use crate::mast::{JoinNodeBuilder, SplitNodeBuilder}; + + let mut forest = MastForest::new(); + + // Two leaf blocks (no debug vars). + let block_a = block_foo().add_to_forest(&mut forest).unwrap(); + let block_b = block_bar().add_to_forest(&mut forest).unwrap(); + + // Join node wrapping the two. + let join = JoinNodeBuilder::new([block_a, block_b]); + let join_id = join.add_to_forest(&mut forest).unwrap(); + + // Split node wrapping the two. + let split = SplitNodeBuilder::new([block_a, block_b]); + let split_id = split.add_to_forest(&mut forest).unwrap(); + + // Loop node. + let loop_node = LoopNodeBuilder::new(block_a); + let loop_id = loop_node.add_to_forest(&mut forest).unwrap(); + + forest.make_root(join_id); + forest.make_root(split_id); + forest.make_root(loop_id); + + // None of these control-flow nodes should have debug vars. + assert!( + forest.debug_info().debug_vars_for_node(join_id).is_empty(), + "join node must not carry debug vars" + ); + assert!( + forest.debug_info().debug_vars_for_node(split_id).is_empty(), + "split node must not carry debug vars" + ); + assert!( + forest.debug_info().debug_vars_for_node(loop_id).is_empty(), + "loop node must not carry debug vars" + ); +} + +/// Identical debug var content from two forests collapses to one node. +#[test] +fn merge_deduplicates_blocks_with_same_debug_vars() { + let mut forest_a = MastForest::new(); + let dvar_a = forest_a + .add_debug_var(DebugVarInfo::new("x", DebugVarLocation::Stack(0))) + .unwrap(); + let block_a = block_foo().add_to_forest(&mut forest_a).unwrap(); + forest_a + .debug_info_mut() + .register_op_indexed_debug_vars(block_a, vec![(0, dvar_a)]) + .unwrap(); + forest_a.make_root(block_a); + + let mut forest_b = MastForest::new(); + let dvar_b = forest_b + .add_debug_var(DebugVarInfo::new("x", DebugVarLocation::Stack(0))) + .unwrap(); + let block_b = block_foo().add_to_forest(&mut forest_b).unwrap(); + forest_b + .debug_info_mut() + .register_op_indexed_debug_vars(block_b, vec![(0, dvar_b)]) + .unwrap(); + forest_b.make_root(block_b); + + let (merged, root_map) = MastForest::merge([&forest_a, &forest_b]).unwrap(); + let new_a = root_map.map_root(0, &block_a).unwrap(); + let new_b = root_map.map_root(1, &block_b).unwrap(); + + assert_eq!(new_a, new_b, "identical content must dedup to one node"); + assert_eq!(merged.debug_info().debug_vars_for_node(new_a).len(), 1); +} + +/// Different debug vars prevent compact from collapsing same-ops blocks. +#[test] +fn compact_keeps_blocks_with_different_debug_vars_distinct() { + let mut forest = MastForest::new(); + let var_x = forest + .add_debug_var(DebugVarInfo::new("x", DebugVarLocation::Stack(0))) + .unwrap(); + let var_y = forest + .add_debug_var(DebugVarInfo::new("y", DebugVarLocation::Stack(1))) + .unwrap(); + + let block_a = block_foo().add_to_forest(&mut forest).unwrap(); + forest + .debug_info_mut() + .register_op_indexed_debug_vars(block_a, vec![(0, var_x)]) + .unwrap(); + forest.make_root(block_a); + + let block_b = block_foo().add_to_forest(&mut forest).unwrap(); + forest + .debug_info_mut() + .register_op_indexed_debug_vars(block_b, vec![(0, var_y)]) + .unwrap(); + forest.make_root(block_b); + + let (compacted, root_map) = forest.compact(); + let new_a = root_map.map_root(0, &block_a).unwrap(); + let new_b = root_map.map_root(0, &block_b).unwrap(); + + assert_ne!(new_a, new_b, "different debug vars must survive compact"); + + let info_a = compacted + .debug_info() + .debug_var(compacted.debug_info().debug_vars_for_node(new_a)[0].1) + .unwrap(); + let info_b = compacted + .debug_info() + .debug_var(compacted.debug_info().debug_vars_for_node(new_b)[0].1) + .unwrap(); + assert_eq!(info_a.name(), "x"); + assert_eq!(info_b.name(), "y"); +} + +/// Procedure names survive compact. +#[test] +fn compact_preserves_procedure_names() { + let mut forest = MastForest::new(); + let block_id = block_foo().add_to_forest(&mut forest).unwrap(); + forest.make_root(block_id); + let digest = forest[block_id].digest(); + forest.insert_procedure_name(digest, Arc::from("my_fn")); + + let (compacted, _) = forest.compact(); + + assert_eq!(compacted.procedure_name(&digest), Some("my_fn")); +} + +/// Three-way merge keeps debug vars and asm ops on every root. +#[test] +fn merge_three_forests_preserves_all_metadata() { + let blocks = [block_foo, block_bar, block_qux]; + let var_names = ["a", "b", "c"]; + let ctx_names = ["ctx_1", "ctx_2", "ctx_3"]; + let mut forests = Vec::new(); + + for i in 0..3 { + let mut f = MastForest::new(); + let dvar = f + .add_debug_var(DebugVarInfo::new(var_names[i], DebugVarLocation::Stack(i as u8))) + .unwrap(); + let asm = AssemblyOp::new(None, ctx_names[i].into(), 1, "op".into()); + let asm_id = f.debug_info_mut().add_asm_op(asm).unwrap(); + + let block = blocks[i]().add_to_forest(&mut f).unwrap(); + let num_ops = f[block].get_basic_block().unwrap().num_operations() as usize; + f.debug_info_mut().register_asm_ops(block, num_ops, vec![(0, asm_id)]).unwrap(); + f.debug_info_mut() + .register_op_indexed_debug_vars(block, vec![(0, dvar)]) + .unwrap(); + f.make_root(block); + forests.push(f); + } + + let refs: Vec<&MastForest> = forests.iter().collect(); + let (merged, _) = MastForest::merge(refs).unwrap(); + + assert_eq!(merged.procedure_roots().len(), 3); + for root_id in merged.procedure_roots() { + assert_eq!(merged.debug_info().debug_vars_for_node(*root_id).len(), 1); + assert!(merged.debug_info().first_asm_op_for_node(*root_id).is_some()); + } +} + +/// External placeholder doesn't clobber the concrete node's asm ops / debug vars. +#[test] +fn merge_concrete_metadata_survives_external_placeholder() { + let mut forest_concrete = MastForest::new(); + let asm = AssemblyOp::new(None, "real_ctx".into(), 1, "mul".into()); + let asm_id = forest_concrete.debug_info_mut().add_asm_op(asm).unwrap(); + let dvar = forest_concrete + .add_debug_var(DebugVarInfo::new("v", DebugVarLocation::Stack(0))) + .unwrap(); + let block_id = block_foo().add_to_forest(&mut forest_concrete).unwrap(); + let num_ops = forest_concrete[block_id].get_basic_block().unwrap().num_operations() as usize; + forest_concrete + .debug_info_mut() + .register_asm_ops(block_id, num_ops, vec![(0, asm_id)]) + .unwrap(); + forest_concrete + .debug_info_mut() + .register_op_indexed_debug_vars(block_id, vec![(0, dvar)]) + .unwrap(); + forest_concrete.make_root(block_id); + let digest = forest_concrete[block_id].digest(); + + let mut forest_external = MastForest::new(); + let ext_id = ExternalNodeBuilder::new(digest).add_to_forest(&mut forest_external).unwrap(); + forest_external.make_root(ext_id); + + // external first, concrete second + let (merged, root_map) = MastForest::merge([&forest_external, &forest_concrete]).unwrap(); + let merged_id = root_map.map_root(1, &block_id).unwrap(); + + assert!( + merged.debug_info().first_asm_op_for_node(merged_id).is_some(), + "concrete asm-op must survive merge with external placeholder" + ); + assert_eq!( + merged.debug_info().debug_vars_for_node(merged_id).len(), + 1, + "concrete debug var must survive merge with external placeholder" + ); +} + +/// First name wins when two forests name the same digest. +#[test] +fn merge_procedure_names_first_name_wins() { + let mut forest_a = MastForest::new(); + let block_a = block_foo().add_to_forest(&mut forest_a).unwrap(); + forest_a.make_root(block_a); + let digest = forest_a[block_a].digest(); + forest_a.insert_procedure_name(digest, Arc::from("alias_a")); + + let mut forest_b = MastForest::new(); + let block_b = block_foo().add_to_forest(&mut forest_b).unwrap(); + forest_b.make_root(block_b); + assert_eq!(forest_b[block_b].digest(), digest); + forest_b.insert_procedure_name(digest, Arc::from("alias_b")); + + let (merged, root_map) = MastForest::merge([&forest_a, &forest_b]).unwrap(); + let new_a = root_map.map_root(0, &block_a).unwrap(); + let new_b = root_map.map_root(1, &block_b).unwrap(); + + assert_eq!(new_a, new_b); + assert_eq!( + merged.procedure_name(&digest), + Some("alias_a"), + "first forest's name must stick" + ); +} diff --git a/core/src/mast/mod.rs b/core/src/mast/mod.rs index ec98ec13d2..918c99cacf 100644 --- a/core/src/mast/mod.rs +++ b/core/src/mast/mod.rs @@ -190,8 +190,9 @@ impl MastForest { self.remap_and_add_nodes(retained_nodes, &id_remappings); self.remap_and_add_roots(old_root_ids, &id_remappings); - // Remap the asm_op_storage to use the new node IDs + // Remap the asm_op_storage and debug_var_storage to use the new node IDs self.debug_info.remap_asm_op_storage(&id_remappings); + self.debug_info.remap_debug_var_storage(&id_remappings); // Invalidate the cached commitment since we modified the forest structure self.commitment_cache.take(); diff --git a/core/src/mast/node/basic_block_node/mod.rs b/core/src/mast/node/basic_block_node/mod.rs index cf3d05dbbc..95bacde701 100644 --- a/core/src/mast/node/basic_block_node/mod.rs +++ b/core/src/mast/node/basic_block_node/mod.rs @@ -162,6 +162,21 @@ impl BasicBlockNode { }) .collect() } + + /// Adjusts padded operation indices back to raw indices for AssemblyOp mappings. + pub fn unadjust_asm_op_indices( + asm_ops: Vec<(usize, T)>, + op_batches: &[OpBatch], + ) -> Vec<(usize, T)> { + let pad2raw = PaddedToRawPrefix::new(op_batches); + asm_ops + .into_iter() + .map(|(padded_idx, id)| { + let raw = padded_idx - pad2raw[padded_idx]; + (raw, id) + }) + .collect() + } } // ------------------------------------------------------------------------------------------------ diff --git a/core/src/mast/node_builder_utils.rs b/core/src/mast/node_builder_utils.rs index 486dba6fdd..9f4ac9f531 100644 --- a/core/src/mast/node_builder_utils.rs +++ b/core/src/mast/node_builder_utils.rs @@ -74,9 +74,10 @@ where MastNodeBuilder::Call(builder) }, MastNode::Block(basic_block_node) => { - // For BasicBlockNode, we need to remap op-indexed decorators as well - let builder = BasicBlockNodeBuilder::new( - basic_block_node.operations().copied().collect(), + // Preserve the stored batches so copied blocks fingerprint the same way as raw-built + // equivalents, even when padding NOOPs shifted stored metadata indices. + let builder = BasicBlockNodeBuilder::from_op_batches( + basic_block_node.op_batches().to_vec(), basic_block_node .indexed_decorator_iter(source_forest) .map(|(idx, decorator_id)| { @@ -84,6 +85,7 @@ where Ok((idx, mapped_decorator)) }) .collect::, _>>()?, + basic_block_node.digest(), ) .with_before_enter(before_enter_decorators) .with_after_exit(after_exit_decorators); diff --git a/core/src/mast/node_fingerprint.rs b/core/src/mast/node_fingerprint.rs index af785bc2f3..38ed198c2c 100644 --- a/core/src/mast/node_fingerprint.rs +++ b/core/src/mast/node_fingerprint.rs @@ -49,6 +49,32 @@ impl MastNodeFingerprint { } } +// ------------------------------------------------------------------------------------------------ +/// Augmentation +impl MastNodeFingerprint { + /// Returns a new fingerprint that incorporates the given additional data into the + /// decorator root hash. + /// + /// This is used by the assembler to make the dedup key sensitive to external metadata + /// without storing that metadata on the node builder. + /// If `data` is empty, the fingerprint is returned unchanged. + pub fn augment_with_data(self, data: &[u8]) -> Self { + if data.is_empty() { + return self; + } + + let existing: &[u8] = match &self.decorator_root { + Some(root) => root.as_bytes(), + None => &[], + }; + let new_root = Blake3_256::hash_iter([existing, data].into_iter()); + Self { + mast_root: self.mast_root, + decorator_root: Some(new_root), + } + } +} + pub fn fingerprint_from_parts( forest: &MastForest, hash_by_node_id: &impl LookupByIdx, diff --git a/core/src/operations/decorators/debug_var.rs b/core/src/operations/decorators/debug_var.rs index 8eeeb0b500..7023fd27bb 100644 --- a/core/src/operations/decorators/debug_var.rs +++ b/core/src/operations/decorators/debug_var.rs @@ -94,6 +94,11 @@ impl DebugVarInfo { pub fn value_location(&self) -> &DebugVarLocation { &self.value_location } + + /// Replaces the value location in-place, preserving all other fields. + pub fn set_value_location(&mut self, value_location: DebugVarLocation) { + self.value_location = value_location; + } } /// Serde deserializer for `Arc`. @@ -148,6 +153,18 @@ pub enum DebugVarLocation { /// where offset is typically negative (locals are below FMP). /// For example, with 3 locals: local\[0\] has offset -3, local\[2\] has offset -1. Local(i16), + /// Variable is in WASM linear memory at an address computed from a global base + /// plus a byte offset: `value_of(global[global_index]) + byte_offset`. + /// + /// This corresponds to DWARF's `DW_OP_fbreg` where the frame base is a WASM + /// global (typically `__stack_pointer`). The byte offset is divided by the + /// element size (4 for i32) to get the Miden memory element address. + FrameBase { + /// WASM global index whose runtime value provides the base address + global_index: u32, + /// Byte offset from the base (may be positive or negative) + byte_offset: i64, + }, /// Complex location described by expression bytes. /// This is used for variables that require computation to locate, /// such as struct fields or array elements. @@ -161,6 +178,9 @@ impl fmt::Display for DebugVarLocation { Self::Memory(addr) => write!(f, "mem[{}]", addr), Self::Const(val) => write!(f, "const({})", val.as_canonical_u64()), Self::Local(offset) => write!(f, "FMP{:+}", offset), + Self::FrameBase { global_index, byte_offset } => { + write!(f, "global[{}]{:+}", global_index, byte_offset) + }, Self::Expression(bytes) => { write!(f, "expr(")?; for (i, byte) in bytes.iter().enumerate() { @@ -197,6 +217,11 @@ impl Serializable for DebugVarLocation { target.write_u8(3); target.write_bytes(&offset.to_le_bytes()); }, + Self::FrameBase { global_index, byte_offset } => { + target.write_u8(5); + target.write_u32(*global_index); + target.write_bytes(&byte_offset.to_le_bytes()); + }, Self::Expression(bytes) => { target.write_u8(4); bytes.write_into(target); @@ -223,6 +248,12 @@ impl Deserializable for DebugVarLocation { let bytes = Vec::::read_from(source)?; Ok(Self::Expression(bytes)) }, + 5 => { + let global_index = source.read_u32()?; + let bytes = source.read_array::<8>()?; + let byte_offset = i64::from_le_bytes(bytes); + Ok(Self::FrameBase { global_index, byte_offset }) + }, _ => Err(DeserializationError::InvalidValue(format!( "invalid DebugVarLocation tag: {tag}" ))), @@ -316,6 +347,7 @@ mod tests { DebugVarLocation::Memory(0xdead_beef), DebugVarLocation::Const(Felt::new(999)), DebugVarLocation::Local(-3), + DebugVarLocation::FrameBase { global_index: 20, byte_offset: -12 }, DebugVarLocation::Expression(vec![0x10, 0x20, 0x30]), ]; diff --git a/crates/assembly/src/assembler.rs b/crates/assembly/src/assembler.rs index 9bb4e7b5ee..6e881619f7 100644 --- a/crates/assembly/src/assembler.rs +++ b/crates/assembly/src/assembler.rs @@ -537,6 +537,7 @@ impl Assembler { } }); let mut mast_forest_builder = MastForestBuilder::new(staticlibs)?; + mast_forest_builder.set_emit_debug_info(self.emit_debug_info); let mut exports = { let mut exports = BTreeMap::new(); @@ -763,6 +764,7 @@ impl Assembler { } }); let mut mast_forest_builder = MastForestBuilder::new(staticlibs)?; + mast_forest_builder.set_emit_debug_info(self.emit_debug_info); if let Some(advice_map) = self.linker[module_index].advice_map() { mast_forest_builder.merge_advice_map(advice_map)?; @@ -1140,19 +1142,15 @@ impl Assembler { next_depth, )?; + let asm_op = self.create_asmop_decorator(span, "if.true", proc_ctx); let mut split_builder = SplitNodeBuilder::new([then_blk, else_blk]); if let Some(decorator_ids) = block_builder.drain_decorators() { split_builder.append_before_enter(decorator_ids); } - let split_node_id = - block_builder.mast_forest_builder_mut().ensure_node(split_builder)?; - - // Add an assembly operation to the if node. - let asm_op = self.create_asmop_decorator(span, "if.true", proc_ctx); - block_builder + let split_node_id = block_builder .mast_forest_builder_mut() - .register_node_asm_op(split_node_id, asm_op)?; + .ensure_node_with_asm_op(split_builder, asm_op)?; body_node_ids.push(split_node_id); }, @@ -1205,7 +1203,10 @@ impl Assembler { } if let Some(decorator_ids) = block_builder.drain_decorators() { - // Attach the decorators before the first instance of the repeated node + // Attach decorators before the first iteration. We must carry the + // original node's external metadata into the dedup fingerprint, + // otherwise structurally identical nodes with different source mappings + // can alias. let first_repeat_builder = block_builder.mast_forest_builder() [repeat_node_id] .clone() @@ -1213,7 +1214,10 @@ impl Assembler { .with_before_enter(decorator_ids); let first_repeat_node_id = block_builder .mast_forest_builder_mut() - .ensure_node(first_repeat_builder)?; + .ensure_node_preserving_debug_vars( + first_repeat_builder, + repeat_node_id, + )?; body_node_ids.push(first_repeat_node_id); let remaining_iterations = @@ -1269,14 +1273,10 @@ impl Assembler { loop_builder.append_before_enter(decorator_ids); } - let loop_node_id = - block_builder.mast_forest_builder_mut().ensure_node(loop_builder)?; - - // Add an assembly operation to the loop node. let asm_op = self.create_asmop_decorator(span, "while.true", proc_ctx); - block_builder + let loop_node_id = block_builder .mast_forest_builder_mut() - .register_node_asm_op(loop_node_id, asm_op)?; + .ensure_node_with_asm_op(loop_builder, asm_op)?; body_node_ids.push(loop_node_id); }, @@ -1319,6 +1319,7 @@ impl Assembler { vec![], vec![], vec![], + vec![], )? } else { let asm_op = self.create_asmop_decorator(&proc_ctx.span(), "begin", proc_ctx); diff --git a/crates/assembly/src/basic_block_builder.rs b/crates/assembly/src/basic_block_builder.rs index 9efb0c9562..5a8437ba4a 100644 --- a/crates/assembly/src/basic_block_builder.rs +++ b/crates/assembly/src/basic_block_builder.rs @@ -231,15 +231,14 @@ impl BasicBlockBuilder<'_> { let asm_ops = core::mem::take(&mut self.asm_ops); let debug_vars: Vec<(usize, DebugVarId)> = self.debug_vars.drain(..).collect(); - let basic_block_node_id = - self.mast_forest_builder - .ensure_block(ops, decorators, asm_ops, vec![], vec![])?; - - // Register debug variables for this node (stored separately from decorators) - if !debug_vars.is_empty() { - self.mast_forest_builder - .register_debug_vars_for_node(basic_block_node_id, debug_vars)?; - } + let basic_block_node_id = self.mast_forest_builder.ensure_block( + ops, + decorators, + asm_ops, + debug_vars, + vec![], + vec![], + )?; Ok(Some(basic_block_node_id)) } else { diff --git a/crates/assembly/src/instruction/mod.rs b/crates/assembly/src/instruction/mod.rs index f8270157ee..e9761e04c4 100644 --- a/crates/assembly/src/instruction/mod.rs +++ b/crates/assembly/src/instruction/mod.rs @@ -9,7 +9,7 @@ use miden_assembly_syntax::{ use miden_core::{ Felt, WORD_SIZE, ZERO, mast::MastNodeId, - operations::{Decorator, Operation}, + operations::{AssemblyOp, Decorator, Operation}, }; use crate::{ @@ -65,13 +65,13 @@ impl Assembler { }; // Compile the instruction (decorators are now always empty for this path). - let opt_new_node_id = - self.compile_instruction_impl(instruction, block_builder, proc_ctx, vec![])?; - - // If we have a pending AssemblyOp for a node-creating instruction, register it now. - if let (Some(asm_op), Some(node_id)) = (pending_node_asm_op, opt_new_node_id) { - block_builder.mast_forest_builder_mut().register_node_asm_op(node_id, asm_op)?; - } + let opt_new_node_id = self.compile_instruction_impl( + instruction, + block_builder, + proc_ctx, + vec![], + pending_node_asm_op, + )?; // If we didn't create a node, set the cycle count after compilation. if !can_create_node { @@ -87,6 +87,7 @@ impl Assembler { block_builder: &mut BasicBlockBuilder, proc_ctx: &mut ProcedureContext, before_enter: Vec, + node_asm_op: Option, ) -> Result, Report> { use Operation::*; @@ -548,6 +549,7 @@ impl Assembler { proc_ctx.id(), block_builder.mast_forest_builder_mut(), before_enter, + None, ) .map(Into::into); }, @@ -559,6 +561,7 @@ impl Assembler { proc_ctx.id(), block_builder.mast_forest_builder_mut(), before_enter, + Some(node_asm_op.expect("call instructions must provide an AssemblyOp")), ) .map(Into::into); }, @@ -570,14 +573,23 @@ impl Assembler { proc_ctx.id(), block_builder.mast_forest_builder_mut(), before_enter, + Some(node_asm_op.expect("syscall instructions must provide an AssemblyOp")), ) .map(Into::into); }, Instruction::DynExec => { - return self.dynexec(block_builder.mast_forest_builder_mut(), before_enter); + return self.dynexec( + block_builder.mast_forest_builder_mut(), + before_enter, + node_asm_op.expect("dynexec instructions must provide an AssemblyOp"), + ); }, Instruction::DynCall => { - return self.dyncall(block_builder.mast_forest_builder_mut(), before_enter); + return self.dyncall( + block_builder.mast_forest_builder_mut(), + before_enter, + node_asm_op.expect("dyncall instructions must provide an AssemblyOp"), + ); }, Instruction::ProcRef(callee) => self.procref(callee, proc_ctx.id(), block_builder)?, diff --git a/crates/assembly/src/instruction/procedures.rs b/crates/assembly/src/instruction/procedures.rs index b7b92544f5..b28fb707b5 100644 --- a/crates/assembly/src/instruction/procedures.rs +++ b/crates/assembly/src/instruction/procedures.rs @@ -6,8 +6,8 @@ use miden_assembly_syntax::{ diagnostics::Report, }; use miden_core::{ - mast::{MastNodeExt, MastNodeId}, - operations::Operation, + mast::{CallNodeBuilder, DynNodeBuilder, MastForestContributor, MastNodeExt, MastNodeId}, + operations::{AssemblyOp, Operation}, }; use smallvec::SmallVec; @@ -30,6 +30,7 @@ impl Assembler { caller: GlobalItemIndex, mast_forest_builder: &mut MastForestBuilder, before_enter: Vec, + asm_op: Option, ) -> Result { let resolved = self .resolve_target(kind, callee, caller, mast_forest_builder)? @@ -37,12 +38,18 @@ impl Assembler { match kind { InvokeKind::ProcRef | InvokeKind::Exec => Ok(resolved.node), - InvokeKind::Call => { - mast_forest_builder.ensure_call(resolved.node, before_enter, vec![]) - }, - InvokeKind::SysCall => { - mast_forest_builder.ensure_syscall(resolved.node, before_enter, vec![]) - }, + InvokeKind::Call => mast_forest_builder.ensure_node_with_asm_op( + CallNodeBuilder::new(resolved.node) + .with_before_enter(before_enter) + .with_after_exit(vec![]), + asm_op.expect("call invocations must provide an AssemblyOp"), + ), + InvokeKind::SysCall => mast_forest_builder.ensure_node_with_asm_op( + CallNodeBuilder::new_syscall(resolved.node) + .with_before_enter(before_enter) + .with_after_exit(vec![]), + asm_op.expect("syscall invocations must provide an AssemblyOp"), + ), } } @@ -51,8 +58,14 @@ impl Assembler { &self, mast_forest_builder: &mut MastForestBuilder, before_enter: Vec, + asm_op: AssemblyOp, ) -> Result, Report> { - let dyn_node_id = mast_forest_builder.ensure_dyn(before_enter, vec![])?; + let dyn_node_id = mast_forest_builder.ensure_node_with_asm_op( + DynNodeBuilder::new_dyn() + .with_before_enter(before_enter) + .with_after_exit(vec![]), + asm_op, + )?; Ok(Some(dyn_node_id)) } @@ -62,8 +75,14 @@ impl Assembler { &self, mast_forest_builder: &mut MastForestBuilder, before_enter: Vec, + asm_op: AssemblyOp, ) -> Result, Report> { - let dyn_call_node_id = mast_forest_builder.ensure_dyncall(before_enter, vec![])?; + let dyn_call_node_id = mast_forest_builder.ensure_node_with_asm_op( + DynNodeBuilder::new_dyncall() + .with_before_enter(before_enter) + .with_after_exit(vec![]), + asm_op, + )?; Ok(Some(dyn_call_node_id)) } diff --git a/crates/assembly/src/mast_forest_builder.rs b/crates/assembly/src/mast_forest_builder.rs index 7eb1cbbbd8..37100f09ba 100644 --- a/crates/assembly/src/mast_forest_builder.rs +++ b/crates/assembly/src/mast_forest_builder.rs @@ -9,12 +9,13 @@ use miden_core::{ Felt, Word, advice::AdviceMap, mast::{ - AsmOpId, BasicBlockNode, BasicBlockNodeBuilder, CallNodeBuilder, DecoratorFingerprint, - DecoratorId, DynNodeBuilder, ExternalNodeBuilder, JoinNodeBuilder, MastForest, - MastForestContributor, MastForestError, MastNode, MastNodeBuilder, MastNodeExt, - MastNodeFingerprint, MastNodeId, Remapping, SubtreeIterator, + AsmOpId, BasicBlockNode, BasicBlockNodeBuilder, DecoratorFingerprint, DecoratorId, + ExternalNodeBuilder, JoinNodeBuilder, MastForest, MastForestContributor, MastForestError, + MastNode, MastNodeBuilder, MastNodeExt, MastNodeFingerprint, MastNodeId, Remapping, + SubtreeIterator, }, operations::{AssemblyOp, Decorator, DecoratorList, Operation}, + serde::Serializable, }; use super::{GlobalItemIndex, LinkerError, Procedure}; @@ -82,6 +83,18 @@ pub struct MastForestBuilder { /// when `build()` is called. This is necessary because the CSR structure requires nodes /// to be added in sequential order, but nodes may be created in any order during assembly. pending_asm_op_mappings: Vec<(MastNodeId, Vec<(usize, AsmOpId)>)>, + /// Pending debug variable mappings to be registered at build time. + /// + /// Like `pending_asm_op_mappings`, these are collected during assembly and registered all + /// at once in sorted node order when `build()` is called. This avoids the crash that + /// occurs when `register_debug_vars_for_node` is called with an out-of-order node ID + /// (which happens when `ensure_block` deduplicates a basic block and returns an + /// already-existing `MastNodeId`). + pending_debug_var_mappings: Vec<(MastNodeId, Vec<(usize, miden_core::mast::DebugVarId)>)>, + /// When false, asm ops and debug vars are not included in the dedup + /// fingerprint. This avoids keeping duplicate nodes in stripped builds + /// where the metadata that justified the split has been discarded. + emit_debug_info: bool, } impl MastForestBuilder { @@ -107,10 +120,29 @@ impl MastForestBuilder { Ok(MastForestBuilder { mast_forest, statically_linked_mast: Arc::new(statically_linked_mast), + emit_debug_info: true, ..Self::default() }) } + /// When set to true, asm ops and debug vars participate in the dedup + /// fingerprint so nodes with different source metadata stay distinct. + /// When false (release builds), only ops and decorators matter. + pub fn set_emit_debug_info(&mut self, emit: bool) { + self.emit_debug_info = emit; + } + + /// Augments a fingerprint with metadata bytes only when debug info is + /// being emitted. In stripped builds this is a no-op so identical-ops + /// nodes collapse back to a single node. + fn maybe_augment(&self, fp: MastNodeFingerprint, data: &[u8]) -> MastNodeFingerprint { + if self.emit_debug_info { + fp.augment_with_data(data) + } else { + fp + } + } + /// Returns a reference to the underlying [`MastForest`]. pub fn mast_forest(&self) -> &MastForest { &self.mast_forest @@ -141,6 +173,20 @@ impl MastForestBuilder { .expect("failed to register AssemblyOps - internal ordering error"); } + // Register all pending debug variable mappings in sorted node order. + // The CSR structure requires sequential node registration. + // Debug vars are included in the augmented dedup fingerprint, so blocks with + // different debug vars are not deduplicated. The dedup here is only a safety measure. + let debug_var_mappings = + deduplicate_debug_var_mappings(core::mem::take(&mut self.pending_debug_var_mappings)); + + for (node_id, debug_vars) in debug_var_mappings { + self.mast_forest + .debug_info_mut() + .register_op_indexed_debug_vars(node_id, debug_vars) + .expect("failed to register debug variables - internal ordering error"); + } + let nodes_to_remove = get_nodes_to_remove(self.merged_basic_block_ids, &self.mast_forest); let id_remappings = self.mast_forest.remove_nodes(&nodes_to_remove); @@ -184,6 +230,145 @@ fn deduplicate_asm_op_mappings( .collect() } +fn append_serialized_asm_op(data: &mut Vec, op_idx: usize, asm_op: &AssemblyOp) { + data.extend_from_slice(&op_idx.to_le_bytes()); + asm_op.context_name().write_into(data); + asm_op.op().write_into(data); + asm_op.num_cycles().write_into(data); + match asm_op.location() { + Some(location) => { + data.push(1); + location.uri.write_into(data); + data.extend_from_slice(&u32::from(location.start).to_le_bytes()); + data.extend_from_slice(&u32::from(location.end).to_le_bytes()); + }, + None => data.push(0), + } +} + +/// Serializes AssemblyOp content into bytes for fingerprint augmentation. +/// +/// This uses the resolved [`AssemblyOp`] payload rather than raw `AsmOpId`s so dedup depends on +/// source-mapping semantics, not allocation order. +fn serialize_asm_ops(asm_ops: &[(usize, AssemblyOp)]) -> Vec { + let mut data = Vec::new(); + for (op_idx, asm_op) in asm_ops { + append_serialized_asm_op(&mut data, *op_idx, asm_op); + } + data +} + +/// Serializes AssemblyOp content referenced by `AsmOpId`s into bytes for fingerprint augmentation. +fn serialize_asm_op_ids(forest: &MastForest, asm_op_ids: &[(usize, AsmOpId)]) -> Vec { + let mut data = Vec::new(); + for (op_idx, asm_op_id) in asm_op_ids { + if let Some(asm_op) = forest.debug_info().asm_op(*asm_op_id) { + append_serialized_asm_op(&mut data, *op_idx, asm_op); + } + } + data +} + +/// Looks up and serializes the asm ops for a given node from the pending mappings. +fn serialize_asm_ops_for_node( + forest: &MastForest, + pending: &[(MastNodeId, Vec<(usize, AsmOpId)>)], + node_id: MastNodeId, +) -> Vec { + for (id, asm_ops) in pending { + if *id == node_id { + return serialize_asm_op_ids(forest, asm_ops); + } + } + Vec::new() +} + +/// Looks up and serializes the asm ops registered for a node in an existing forest. +fn serialize_asm_ops_from_forest_node(forest: &MastForest, node_id: MastNodeId) -> Vec { + let mut entries = forest.debug_info().asm_ops_for_node(node_id); + if entries.is_empty() { + return Vec::new(); + } + + if let MastNode::Block(block) = &forest[node_id] { + entries = BasicBlockNode::unadjust_asm_op_indices(entries, block.op_batches()); + } + + let mut data = Vec::new(); + for (op_idx, asm_op_id) in entries { + if let Some(asm_op) = forest.debug_info().asm_op(asm_op_id) { + append_serialized_asm_op(&mut data, op_idx, asm_op); + } + } + data +} + +/// Serializes debug variable content into bytes for fingerprint augmentation. +/// +/// This uses the resolved [`DebugVarInfo`] payload rather than raw `DebugVarId`s so +/// dedup depends on variable semantics, not allocation order. +fn serialize_debug_vars( + forest: &MastForest, + debug_vars: &[(usize, miden_core::mast::DebugVarId)], +) -> Vec { + let mut data = Vec::new(); + for (op_idx, var_id) in debug_vars { + data.extend_from_slice(&op_idx.to_le_bytes()); + if let Some(debug_var) = forest.debug_info().debug_var(*var_id) { + debug_var.write_into(&mut data); + } + } + data +} + +/// Looks up and serializes the debug vars for a given node from the pending mappings. +fn serialize_debug_vars_for_node( + forest: &MastForest, + pending: &[(MastNodeId, Vec<(usize, miden_core::mast::DebugVarId)>)], + node_id: MastNodeId, +) -> Vec { + for (id, vars) in pending { + if *id == node_id { + return serialize_debug_vars(forest, vars); + } + } + Vec::new() +} + +/// Looks up and serializes the debug vars registered for a node in an existing forest. +fn serialize_debug_vars_from_forest_node(forest: &MastForest, node_id: MastNodeId) -> Vec { + let entries = forest.debug_info().debug_vars_for_node(node_id); + if entries.is_empty() { + return Vec::new(); + } + + let mut data = Vec::new(); + for (op_idx, var_id) in entries { + data.extend_from_slice(&op_idx.to_le_bytes()); + if let Some(debug_var) = forest.debug_info().debug_var(var_id) { + debug_var.write_into(&mut data); + } + } + data +} + +/// Deduplicates debug variable mappings by node_id (keeps first registration). +/// +/// Debug vars are included in the augmented dedup fingerprint, so blocks with different +/// debug vars will not be deduplicated. This function is a safety measure to handle any +/// remaining duplicate registrations (e.g. from control flow node deduplication). +fn deduplicate_debug_var_mappings( + mut mappings: Vec<(MastNodeId, Vec<(usize, miden_core::mast::DebugVarId)>)>, +) -> Vec<(MastNodeId, Vec<(usize, miden_core::mast::DebugVarId)>)> { + mappings.sort_by_key(|(node_id, _)| *node_id); + + let mut seen_node_ids = BTreeSet::new(); + mappings + .into_iter() + .filter(|(node_id, _)| seen_node_ids.insert(*node_id)) + .collect() +} + /// Takes the set of MAST node ids (all basic blocks) that were merged as part of the assembly /// process (i.e. they were contiguous and were merged into a single basic block), and returns the /// subset of nodes that can be removed from the MAST forest. @@ -333,11 +518,14 @@ impl MastForestBuilder { while let (Some(left), Some(right)) = (source_mast_node_iter.next(), source_mast_node_iter.next()) { - let join_mast_node_id = self.ensure_join(left, right, vec![], vec![])?; - - if let Some(ref asm_op) = asm_op { - self.register_node_asm_op(join_mast_node_id, asm_op.clone())?; - } + let join_builder = JoinNodeBuilder::new([left, right]) + .with_before_enter(vec![]) + .with_after_exit(vec![]); + let join_mast_node_id = if let Some(ref asm_op) = asm_op { + self.ensure_node_with_asm_op(join_builder, asm_op.clone())? + } else { + self.ensure_node(join_builder)? + }; node_ids.push(join_mast_node_id); } @@ -392,8 +580,9 @@ impl MastForestBuilder { let mut operations: Vec = Vec::new(); let mut decorators = DecoratorList::new(); - // Track asm_ops being accumulated for merged blocks, with adjusted indices + // Track asm_ops and debug_vars being accumulated for merged blocks, with adjusted indices let mut merged_asm_ops: Vec<(usize, AsmOpId)> = Vec::new(); + let mut merged_debug_vars: Vec<(usize, miden_core::mast::DebugVarId)> = Vec::new(); let mut merged_basic_blocks: Vec = Vec::new(); @@ -422,8 +611,13 @@ impl MastForestBuilder { }; let ops_offset = operations.len(); - // Transfer any pending asm_ops for this block to the merged result + // Transfer any pending asm_ops and debug_vars for this block to the merged result self.transfer_asm_ops_for_merge(basic_block_id, ops_offset, &mut merged_asm_ops); + self.transfer_debug_vars_for_merge( + basic_block_id, + ops_offset, + &mut merged_debug_vars, + ); // Add decorators with adjusted indices for (op_idx, decorator) in block_decorators { @@ -437,10 +631,12 @@ impl MastForestBuilder { let block_ops = core::mem::take(&mut operations); let block_decorators = core::mem::take(&mut decorators); let block_asm_ops = core::mem::take(&mut merged_asm_ops); - let merged_basic_block_id = self.ensure_block_with_asm_op_ids( + let block_debug_vars = core::mem::take(&mut merged_debug_vars); + let merged_basic_block_id = self.ensure_block_with_asm_op_and_debug_var_ids( block_ops, block_decorators, block_asm_ops, + block_debug_vars, vec![], vec![], )?; @@ -455,10 +651,11 @@ impl MastForestBuilder { self.merged_basic_block_ids.extend(contiguous_basic_block_ids.iter()); if !operations.is_empty() || !decorators.is_empty() { - let merged_basic_block = self.ensure_block_with_asm_op_ids( + let merged_basic_block = self.ensure_block_with_asm_op_and_debug_var_ids( operations, decorators, merged_asm_ops, + merged_debug_vars, vec![], vec![], )?; @@ -468,49 +665,90 @@ impl MastForestBuilder { Ok(merged_basic_blocks) } - /// Helper to transfer pending asm_ops from a block being merged. - /// Removes the asm_ops from pending_asm_op_mappings and adds them to the merged list - /// with indices adjusted by the given offset. + /// Copies pending asm_ops from a source block into the merged list with adjusted indices. + /// + /// The source block's entries are left in `pending_asm_op_mappings` so that if it + /// survives removal (e.g. it's a procedure root or referenced child), its metadata + /// is still registered during `build()`. fn transfer_asm_ops_for_merge( - &mut self, + &self, source_block_id: MastNodeId, ops_offset: usize, merged_asm_ops: &mut Vec<(usize, AsmOpId)>, ) { - // Partition pending mappings into those matching source_block_id and the rest - let (matched, rest): (Vec<_>, Vec<_>) = core::mem::take(&mut self.pending_asm_op_mappings) - .into_iter() - .partition(|(node_id, _)| *node_id == source_block_id); - - self.pending_asm_op_mappings = rest; + for (node_id, asm_ops) in &self.pending_asm_op_mappings { + if *node_id == source_block_id { + merged_asm_ops.extend( + asm_ops.iter().map(|(op_idx, asm_op_id)| (op_idx + ops_offset, *asm_op_id)), + ); + } + } + } - // Add matched asm_ops to merged list with adjusted indices - for (_, asm_ops) in matched { - merged_asm_ops.extend( - asm_ops.into_iter().map(|(op_idx, asm_op_id)| (op_idx + ops_offset, asm_op_id)), - ); + /// Copies pending debug_vars from a source block into the merged list with adjusted indices. + /// + /// Same as `transfer_asm_ops_for_merge`: source entries are kept so surviving blocks + /// retain their metadata. + fn transfer_debug_vars_for_merge( + &self, + source_block_id: MastNodeId, + ops_offset: usize, + merged_debug_vars: &mut Vec<(usize, miden_core::mast::DebugVarId)>, + ) { + for (node_id, debug_vars) in &self.pending_debug_var_mappings { + if *node_id == source_block_id { + merged_debug_vars.extend( + debug_vars.iter().map(|(op_idx, var_id)| (op_idx + ops_offset, *var_id)), + ); + } } } - /// Like ensure_block but takes pre-existing AsmOpIds instead of AssemblyOps. - /// Used when merging blocks that already have their asm_ops registered. - fn ensure_block_with_asm_op_ids( + /// Like ensure_block but takes pre-existing AsmOpIds and DebugVarIds instead of raw + /// AssemblyOps. Used when merging blocks that already have their metadata registered. + fn ensure_block_with_asm_op_and_debug_var_ids( &mut self, operations: Vec, decorators: DecoratorList, asm_op_ids: Vec<(usize, AsmOpId)>, + debug_vars: Vec<(usize, miden_core::mast::DebugVarId)>, before_enter: Vec, after_exit: Vec, ) -> Result { let block = BasicBlockNodeBuilder::new(operations, decorators) .with_before_enter(before_enter) .with_after_exit(after_exit); - let (node_id, is_new) = self.ensure_node_exists(block)?; - // Only register AssemblyOps for newly created nodes. + let base_fingerprint = block + .fingerprint_for_node(&self.mast_forest, &self.hash_by_node_id) + .expect("hash_by_node_id should contain the fingerprints of all children of `node`"); + let dedup_fingerprint = self.maybe_augment( + self.maybe_augment( + base_fingerprint, + &serialize_asm_op_ids(&self.mast_forest, &asm_op_ids), + ), + &serialize_debug_vars(&self.mast_forest, &debug_vars), + ); + + let (node_id, is_new) = + if let Some(node_id) = self.node_id_by_fingerprint.get(&dedup_fingerprint) { + (*node_id, false) + } else { + let new_node_id = block + .add_to_forest(&mut self.mast_forest) + .into_diagnostic() + .wrap_err("assembler failed to add new node")?; + self.node_id_by_fingerprint.insert(dedup_fingerprint, new_node_id); + self.hash_by_node_id.insert(new_node_id, dedup_fingerprint); + (new_node_id, true) + }; + if is_new && !asm_op_ids.is_empty() { self.pending_asm_op_mappings.push((node_id, asm_op_ids)); } + if is_new && !debug_vars.is_empty() { + self.pending_debug_var_mappings.push((node_id, debug_vars)); + } Ok(node_id) } @@ -566,6 +804,178 @@ impl MastForestBuilder { Ok(node_id) } + /// Like [`Self::ensure_node`], but includes an AssemblyOp in the dedup fingerprint and + /// registers it for the node if a new node is created. + pub(crate) fn ensure_node_with_asm_op( + &mut self, + builder: impl MastForestContributor, + asm_op: AssemblyOp, + ) -> Result { + let base_fingerprint = builder + .fingerprint_for_node(&self.mast_forest, &self.hash_by_node_id) + .expect("hash_by_node_id should contain the fingerprints of all children of `node`"); + let dedup_fingerprint = + self.maybe_augment(base_fingerprint, &serialize_asm_ops(&[(0, asm_op.clone())])); + + if let Some(node_id) = self.node_id_by_fingerprint.get(&dedup_fingerprint) { + Ok(*node_id) + } else { + let new_node_id = builder + .add_to_forest(&mut self.mast_forest) + .into_diagnostic() + .wrap_err("assembler failed to add new node")?; + self.node_id_by_fingerprint.insert(dedup_fingerprint, new_node_id); + self.hash_by_node_id.insert(new_node_id, dedup_fingerprint); + + let asm_op_id = self + .mast_forest + .debug_info_mut() + .add_asm_op(asm_op) + .into_diagnostic() + .wrap_err("failed to add AssemblyOp for control flow node")?; + self.pending_asm_op_mappings.push((new_node_id, vec![(0, asm_op_id)])); + + Ok(new_node_id) + } + } + + /// Like [`Self::ensure_node`], but includes external metadata from `source_node_id` in the + /// dedup fingerprint and copies it to the new node. Used when cloning a node (e.g. the + /// repeat path) so that the rebuilt node keeps the same dedup semantics as the original. + pub(crate) fn ensure_node_preserving_debug_vars( + &mut self, + builder: impl MastForestContributor, + source_node_id: MastNodeId, + ) -> Result { + let base_fingerprint = builder + .fingerprint_for_node(&self.mast_forest, &self.hash_by_node_id) + .expect("hash_by_node_id should contain the fingerprints of all children of `node`"); + + // Augment with the source node's external metadata, matching ensure_block() semantics. + let asm_ops_data = serialize_asm_ops_for_node( + &self.mast_forest, + &self.pending_asm_op_mappings, + source_node_id, + ); + let debug_vars_data = serialize_debug_vars_for_node( + &self.mast_forest, + &self.pending_debug_var_mappings, + source_node_id, + ); + let dedup_fingerprint = self + .maybe_augment(self.maybe_augment(base_fingerprint, &asm_ops_data), &debug_vars_data); + + if let Some(node_id) = self.node_id_by_fingerprint.get(&dedup_fingerprint) { + Ok(*node_id) + } else { + let new_node_id = builder + .add_to_forest(&mut self.mast_forest) + .into_diagnostic() + .wrap_err("assembler failed to add new node")?; + self.node_id_by_fingerprint.insert(dedup_fingerprint, new_node_id); + self.hash_by_node_id.insert(new_node_id, dedup_fingerprint); + + // Carry over AssemblyOp registration from the source node. + let asm_ops: Option> = self + .pending_asm_op_mappings + .iter() + .find(|(id, _)| *id == source_node_id) + .map(|(_, asm_ops)| asm_ops.clone()); + if let Some(asm_ops) = asm_ops + && !asm_ops.is_empty() + { + self.pending_asm_op_mappings.push((new_node_id, asm_ops)); + } + + // Carry over debug var registration from the source node. + let debug_vars: Option> = self + .pending_debug_var_mappings + .iter() + .find(|(id, _)| *id == source_node_id) + .map(|(_, vars)| vars.clone()); + if let Some(vars) = debug_vars + && !vars.is_empty() + { + self.pending_debug_var_mappings.push((new_node_id, vars)); + } + + Ok(new_node_id) + } + } + + /// Copies a statically linked node into this builder while keeping source metadata in the + /// dedup fingerprint and remapping it into the target forest when a new node is created. + fn ensure_node_from_statically_linked_source( + &mut self, + builder: impl MastForestContributor, + source_node_id: MastNodeId, + ) -> Result { + let base_fingerprint = builder + .fingerprint_for_node(&self.mast_forest, &self.hash_by_node_id) + .expect("hash_by_node_id should contain the fingerprints of all children of `node`"); + let asm_ops_data = + serialize_asm_ops_from_forest_node(&self.statically_linked_mast, source_node_id); + let debug_vars_data = + serialize_debug_vars_from_forest_node(&self.statically_linked_mast, source_node_id); + let dedup_fingerprint = self + .maybe_augment(self.maybe_augment(base_fingerprint, &asm_ops_data), &debug_vars_data); + + if let Some(node_id) = self.node_id_by_fingerprint.get(&dedup_fingerprint) { + return Ok(*node_id); + } + + let new_node_id = builder + .add_to_forest(&mut self.mast_forest) + .into_diagnostic() + .wrap_err("assembler failed to add new statically linked node")?; + self.node_id_by_fingerprint.insert(dedup_fingerprint, new_node_id); + self.hash_by_node_id.insert(new_node_id, dedup_fingerprint); + + let mut asm_ops = self.statically_linked_mast.debug_info().asm_ops_for_node(source_node_id); + if let MastNode::Block(block) = &self.statically_linked_mast[source_node_id] { + asm_ops = BasicBlockNode::unadjust_asm_op_indices(asm_ops, block.op_batches()); + } + if !asm_ops.is_empty() { + let mut remapped_asm_ops = Vec::with_capacity(asm_ops.len()); + for (op_idx, asm_op_id) in asm_ops { + if let Some(asm_op) = self.statically_linked_mast.debug_info().asm_op(asm_op_id) { + let new_asm_op_id = self + .mast_forest + .debug_info_mut() + .add_asm_op(asm_op.clone()) + .into_diagnostic() + .wrap_err("failed to copy AssemblyOp from statically linked forest")?; + remapped_asm_ops.push((op_idx, new_asm_op_id)); + } + } + if !remapped_asm_ops.is_empty() { + self.pending_asm_op_mappings.push((new_node_id, remapped_asm_ops)); + } + } + + let debug_vars = + self.statically_linked_mast.debug_info().debug_vars_for_node(source_node_id); + if !debug_vars.is_empty() { + let mut remapped_debug_vars = Vec::with_capacity(debug_vars.len()); + for (op_idx, var_id) in debug_vars { + if let Some(debug_var) = self.statically_linked_mast.debug_info().debug_var(var_id) + { + let new_var_id = self + .mast_forest + .add_debug_var(debug_var.clone()) + .into_diagnostic() + .wrap_err("failed to copy debug var from statically linked forest")?; + remapped_debug_vars.push((op_idx, new_var_id)); + } + } + if !remapped_debug_vars.is_empty() { + self.pending_debug_var_mappings.push((new_node_id, remapped_debug_vars)); + } + } + + Ok(new_node_id) + } + /// Adds a node to the forest if it doesn't already exist. /// /// Returns `(node_id, is_new)` where `is_new` is true if the node was newly added, @@ -599,24 +1009,53 @@ impl MastForestBuilder { /// entry is `(op_idx, AssemblyOp)` where `op_idx` is the operation index the AssemblyOp /// corresponds to. /// - /// Note: AssemblyOps are only registered for newly created nodes. If a duplicate node already - /// exists in the forest (deduplication), the asm_ops are ignored since the existing node - /// already has its metadata registered. + /// The `debug_vars` parameter contains debug variable metadata for operations in this block. + /// Each entry is `(op_idx, DebugVarId)` where `op_idx` is the operation index the debug + /// variable corresponds to. + /// + /// Note: AssemblyOps and debug variables are kept external to the block builder but are + /// included in the dedup fingerprint so that blocks with identical operations but different + /// metadata are not deduplicated. /// - /// The actual registration of AssemblyOp mappings is deferred until `build()` is called, - /// to ensure nodes are registered in sequential order as required by the CSR structure. + /// The actual registration of both AssemblyOp and debug variable mappings is deferred until + /// `build()` is called, to ensure nodes are registered in sequential order as required by + /// the CSR structure. pub fn ensure_block( &mut self, operations: Vec, decorators: DecoratorList, asm_ops: Vec<(usize, AssemblyOp)>, + debug_vars: Vec<(usize, miden_core::mast::DebugVarId)>, before_enter: Vec, after_exit: Vec, ) -> Result { let block = BasicBlockNodeBuilder::new(operations, decorators) .with_before_enter(before_enter) .with_after_exit(after_exit); - let (node_id, is_new) = self.ensure_node_exists(block)?; + + // Compute the base fingerprint from the builder, then augment with external metadata + // so the dedup key stays sensitive to source mappings without storing them on the + // builder itself. + let base_fingerprint = block + .fingerprint_for_node(&self.mast_forest, &self.hash_by_node_id) + .expect("hash_by_node_id should contain the fingerprints of all children of `node`"); + let dedup_fingerprint = self.maybe_augment( + self.maybe_augment(base_fingerprint, &serialize_asm_ops(&asm_ops)), + &serialize_debug_vars(&self.mast_forest, &debug_vars), + ); + + let (node_id, is_new) = + if let Some(node_id) = self.node_id_by_fingerprint.get(&dedup_fingerprint) { + (*node_id, false) + } else { + let new_node_id = block + .add_to_forest(&mut self.mast_forest) + .into_diagnostic() + .wrap_err("assembler failed to add new node")?; + self.node_id_by_fingerprint.insert(dedup_fingerprint, new_node_id); + self.hash_by_node_id.insert(new_node_id, dedup_fingerprint); + (new_node_id, true) + }; // Only register AssemblyOps for newly created nodes. // Deduplicated nodes already have their asm_ops registered from when they were first added. @@ -635,106 +1074,14 @@ impl MastForestBuilder { self.pending_asm_op_mappings.push((node_id, asm_op_mappings)); } - Ok(node_id) - } - - /// Adds a join node to the forest, and returns the [`MastNodeId`] associated with it. - pub fn ensure_join( - &mut self, - left_child: MastNodeId, - right_child: MastNodeId, - before_enter: Vec, - after_exit: Vec, - ) -> Result { - let join = JoinNodeBuilder::new([left_child, right_child]) - .with_before_enter(before_enter) - .with_after_exit(after_exit); - self.ensure_node(join) - } - - /// Adds a call node to the forest, and returns the [`MastNodeId`] associated with it. - pub fn ensure_call( - &mut self, - callee: MastNodeId, - before_enter: Vec, - after_exit: Vec, - ) -> Result { - let call = CallNodeBuilder::new(callee) - .with_before_enter(before_enter) - .with_after_exit(after_exit); - self.ensure_node(call) - } - - /// Adds a split node to the forest, and returns the [`MastNodeId`] associated with it. - // Kept for giving tests some consistency - #[cfg(all(test, feature = "std"))] - pub fn ensure_split( - &mut self, - left_child: MastNodeId, - right_child: MastNodeId, - before_enter: Vec, - after_exit: Vec, - ) -> Result { - use miden_core::mast::SplitNodeBuilder; - let split = SplitNodeBuilder::new([left_child, right_child]) - .with_before_enter(before_enter) - .with_after_exit(after_exit); - self.ensure_node(split) - } - - /// Adds a loop node to the forest, and returns the [`MastNodeId`] associated with it. - // Kept for giving tests some consistency - #[cfg(all(test, feature = "std"))] - pub fn ensure_loop( - &mut self, - body: MastNodeId, - before_enter: Vec, - after_exit: Vec, - ) -> Result { - use miden_core::mast::LoopNodeBuilder; - let loop_node = LoopNodeBuilder::new(body) - .with_before_enter(before_enter) - .with_after_exit(after_exit); - self.ensure_node(loop_node) - } - - /// Adds a syscall node to the forest, and returns the [`MastNodeId`] associated with it. - pub fn ensure_syscall( - &mut self, - callee: MastNodeId, - before_enter: Vec, - after_exit: Vec, - ) -> Result { - let syscall = CallNodeBuilder::new_syscall(callee) - .with_after_exit(after_exit) - .with_before_enter(before_enter); - self.ensure_node(syscall) - } - - /// Adds a dyn node to the forest, and returns the [`MastNodeId`] associated with it. - pub fn ensure_dyn( - &mut self, - before_enter: Vec, - after_exit: Vec, - ) -> Result { - self.ensure_node( - DynNodeBuilder::new_dyn() - .with_after_exit(after_exit) - .with_before_enter(before_enter), - ) - } + // Only register debug variables for newly created nodes. + // Blocks with different debug vars have different augmented fingerprints, + // so deduplication only occurs when vars are truly identical. + if is_new && !debug_vars.is_empty() { + self.pending_debug_var_mappings.push((node_id, debug_vars)); + } - /// Adds a dyncall node to the forest, and returns the [`MastNodeId`] associated with it. - pub fn ensure_dyncall( - &mut self, - before_enter: Vec, - after_exit: Vec, - ) -> Result { - self.ensure_node( - DynNodeBuilder::new_dyncall() - .with_after_exit(after_exit) - .with_before_enter(before_enter), - ) + Ok(node_id) } /// Collects all decorators from a subtree in the statically linked forest and copies them @@ -809,24 +1156,34 @@ impl MastForestBuilder { /// * If statically-linked, then the entire subtree is copied, and the MastNodeId of the root of /// the inserted subtree is returned. /// * If dynamically-linked, then an external node is inserted, and its MastNodeId is returned + /// + /// TODO(#2990): when multiple roots share the same digest but carry + /// different metadata, this always picks the first match. Needs a source + /// MastNodeId threaded from ProcedureInfo through the linker. pub fn ensure_external_link(&mut self, mast_root: Word) -> Result { if let Some(root_id) = self.statically_linked_mast.find_procedure_root(mast_root) { - // First, collect and copy all decorators from the subtree - self.collect_decorators_from_subtree(&root_id)?; - - // Then copy all nodes with remapped children and decorators - for old_id in SubtreeIterator::new(&root_id, &self.statically_linked_mast.clone()) { - let node = self.statically_linked_mast[old_id].clone(); - let builder = self.build_with_remapped_ids(old_id, node)?; - let new_id = self.ensure_node(builder)?; - self.statically_linked_mast_remapping.insert(old_id, new_id); - } - Ok(root_id.remap(&self.statically_linked_mast_remapping)) + self.copy_statically_linked_subtree(root_id) } else { self.ensure_node(ExternalNodeBuilder::new(mast_root)) } } + /// Copies a subtree from the statically linked forest into the builder's forest. + fn copy_statically_linked_subtree( + &mut self, + root_id: MastNodeId, + ) -> Result { + self.collect_decorators_from_subtree(&root_id)?; + + for old_id in SubtreeIterator::new(&root_id, &self.statically_linked_mast.clone()) { + let node = self.statically_linked_mast[old_id].clone(); + let builder = self.build_with_remapped_ids(old_id, node)?; + let new_id = self.ensure_node_from_statically_linked_source(builder, old_id)?; + self.statically_linked_mast_remapping.insert(old_id, new_id); + } + Ok(root_id.remap(&self.statically_linked_mast_remapping)) + } + /// Adds a list of decorators to the provided node to be executed before the node executes. /// /// If other decorators are already present, the new decorators are added to the end of the @@ -839,8 +1196,21 @@ impl MastForestBuilder { // Extract the existing node and convert it to a builder let mut decorated_builder = self.mast_forest[node_id].clone().to_builder(&self.mast_forest); decorated_builder.append_before_enter(decorator_ids); - let new_node_fingerprint = + let base_fingerprint = decorated_builder.fingerprint_for_node(&self.mast_forest, &self.hash_by_node_id)?; + + // Re-augment the fingerprint with this node's external metadata so the dedup key + // stays consistent with what ensure_block() originally computed. + let asm_ops_data = + serialize_asm_ops_for_node(&self.mast_forest, &self.pending_asm_op_mappings, node_id); + let debug_vars_data = serialize_debug_vars_for_node( + &self.mast_forest, + &self.pending_debug_var_mappings, + node_id, + ); + let new_node_fingerprint = self + .maybe_augment(self.maybe_augment(base_fingerprint, &asm_ops_data), &debug_vars_data); + self.mast_forest[node_id] = decorated_builder.build(&self.mast_forest)?; self.hash_by_node_id.insert(node_id, new_node_fingerprint); @@ -856,8 +1226,21 @@ impl MastForestBuilder { // Extract the existing node and convert it to a builder let mut decorated_builder = self.mast_forest[node_id].clone().to_builder(&self.mast_forest); decorated_builder.append_after_exit(decorator_ids); - let new_node_fingerprint = + let base_fingerprint = decorated_builder.fingerprint_for_node(&self.mast_forest, &self.hash_by_node_id)?; + + // Re-augment the fingerprint with this node's external metadata so the dedup key + // stays consistent with what ensure_block() originally computed. + let asm_ops_data = + serialize_asm_ops_for_node(&self.mast_forest, &self.pending_asm_op_mappings, node_id); + let debug_vars_data = serialize_debug_vars_for_node( + &self.mast_forest, + &self.pending_debug_var_mappings, + node_id, + ); + let new_node_fingerprint = self + .maybe_augment(self.maybe_augment(base_fingerprint, &asm_ops_data), &debug_vars_data); + self.mast_forest[node_id] = decorated_builder.build(&self.mast_forest)?; self.hash_by_node_id.insert(node_id, new_node_fingerprint); @@ -872,45 +1255,6 @@ impl MastForestBuilder { pub fn register_error(&mut self, msg: Arc) -> Felt { self.mast_forest.register_error(msg) } - - /// Registers an AssemblyOp for a control flow node (e.g., if.true, while.true). - /// - /// This is used for nodes that don't have operation indices, such as Split, Loop, Call, etc. - /// The AssemblyOp is registered at operation index 0 for the node. - /// - /// The actual registration is deferred until `build()` is called, to ensure nodes are - /// registered in sequential order as required by the CSR structure. - pub fn register_node_asm_op( - &mut self, - node_id: MastNodeId, - asm_op: AssemblyOp, - ) -> Result<(), Report> { - let asm_op_id = self - .mast_forest - .debug_info_mut() - .add_asm_op(asm_op) - .into_diagnostic() - .wrap_err("failed to add AssemblyOp for control flow node")?; - // Defer registration until build() to ensure sequential node order. - self.pending_asm_op_mappings.push((node_id, vec![(0, asm_op_id)])); - Ok(()) - } - - /// Registers debug variables for a specific node. - /// - /// This associates already-added debug variables with specific operations within a node. - /// Debug variables are stored in dedicated CSR storage and are only accessed by the debugger. - pub fn register_debug_vars_for_node( - &mut self, - node_id: MastNodeId, - debug_vars: Vec<(usize, miden_core::mast::DebugVarId)>, - ) -> Result<(), Report> { - self.mast_forest - .debug_info_mut() - .register_op_indexed_debug_vars(node_id, debug_vars) - .into_diagnostic() - .wrap_err("failed to register debug variables for node") - } } impl Index for MastForestBuilder { @@ -985,7 +1329,7 @@ fn should_merge(is_procedure: bool, num_op_batches: usize) -> bool { #[cfg(test)] mod tests { - use miden_core::operations::Operation; + use miden_core::{mast::CallNodeBuilder, operations::Operation}; use super::*; @@ -1026,7 +1370,7 @@ mod tests { ]; let block1_id = builder - .ensure_block(block1_ops.clone(), block1_decorators, vec![], vec![], vec![]) + .ensure_block(block1_ops.clone(), block1_decorators, vec![], vec![], vec![], vec![]) .unwrap(); // Sanity check the test itself makes sense @@ -1047,7 +1391,7 @@ mod tests { ]; // [push mul] [3] let block2_id = builder - .ensure_block(block2_ops.clone(), block2_decorators, vec![], vec![], vec![]) + .ensure_block(block2_ops.clone(), block2_decorators, vec![], vec![], vec![], vec![]) .unwrap(); // Merge the blocks @@ -1185,12 +1529,13 @@ mod tests { let num_ops = PROCEDURE_INLINING_THRESHOLD * 1024; let large_ops = vec![Operation::Add; num_ops]; - let large_block_id = - builder.ensure_block(large_ops, Vec::new(), vec![], vec![], vec![]).unwrap(); + let large_block_id = builder + .ensure_block(large_ops, Vec::new(), vec![], vec![], vec![], vec![]) + .unwrap(); builder.mast_forest.make_root(large_block_id); let small_block_id = builder - .ensure_block(vec![Operation::Add], Vec::new(), vec![], vec![], vec![]) + .ensure_block(vec![Operation::Add], Vec::new(), vec![], vec![], vec![], vec![]) .unwrap(); let merged_blocks = builder.merge_basic_blocks(&[large_block_id, small_block_id]).unwrap(); @@ -1199,4 +1544,628 @@ mod tests { assert_eq!(merged_blocks[0], large_block_id); assert_eq!(merged_blocks[1], small_block_id); } + + /// Cloning a block with debug vars via `to_builder().with_before_enter()` must + /// propagate those vars to the new node (exercises the assembler repeat path). + #[test] + fn test_ensure_node_preserving_debug_vars_on_cloned_block() { + use miden_core::operations::{DebugVarInfo, DebugVarLocation}; + + let mut builder = MastForestBuilder::new(&[]).unwrap(); + + let var_info = DebugVarInfo::new("x", DebugVarLocation::Stack(0)); + let var_id = builder.add_debug_var(var_info).unwrap(); + + let block_id = builder + .ensure_block( + vec![Operation::Add], + Vec::new(), + vec![], + vec![(0, var_id)], + vec![], + vec![], + ) + .unwrap(); + + let decorator_id = builder.ensure_decorator(Decorator::Trace(42)).unwrap(); + + // Simulate the repeat path: clone + add before_enter + preserve debug vars. + let rebuilt_builder = builder.mast_forest[block_id] + .clone() + .to_builder(builder.mast_forest()) + .with_before_enter(vec![decorator_id]); + let cloned_id = + builder.ensure_node_preserving_debug_vars(rebuilt_builder, block_id).unwrap(); + + assert_ne!(cloned_id, block_id); + + let (forest, _remapping) = builder.build(); + + let cloned_vars = forest.debug_info().debug_vars_for_node(cloned_id); + assert_eq!(cloned_vars.len(), 1, "cloned node should have debug vars"); + assert_eq!(cloned_vars[0].1, var_id); + + let original_vars = forest.debug_info().debug_vars_for_node(block_id); + assert_eq!(original_vars.len(), 1, "original should keep its debug vars"); + assert_eq!(original_vars[0].1, var_id); + } + + /// Same-ops blocks with different debug vars must not alias after clone + before_enter. + #[test] + fn test_ensure_node_preserving_debug_vars_prevents_aliasing() { + use miden_core::operations::{DebugVarInfo, DebugVarLocation}; + + let mut builder = MastForestBuilder::new(&[]).unwrap(); + + let var_x_id = builder + .add_debug_var(DebugVarInfo::new("x", DebugVarLocation::Stack(0))) + .unwrap(); + let var_y_id = builder + .add_debug_var(DebugVarInfo::new("y", DebugVarLocation::Stack(1))) + .unwrap(); + + // Same ops, different debug vars -- should not dedup. + let block_a = builder + .ensure_block( + vec![Operation::Add], + Vec::new(), + vec![], + vec![(0, var_x_id)], + vec![], + vec![], + ) + .unwrap(); + let block_b = builder + .ensure_block( + vec![Operation::Add], + Vec::new(), + vec![], + vec![(0, var_y_id)], + vec![], + vec![], + ) + .unwrap(); + + assert_ne!(block_a, block_b); + + let decorator_id = builder.ensure_decorator(Decorator::Trace(1)).unwrap(); + + let rebuilt_a = builder.mast_forest[block_a] + .clone() + .to_builder(builder.mast_forest()) + .with_before_enter(vec![decorator_id]); + let cloned_a = builder.ensure_node_preserving_debug_vars(rebuilt_a, block_a).unwrap(); + + let rebuilt_b = builder.mast_forest[block_b] + .clone() + .to_builder(builder.mast_forest()) + .with_before_enter(vec![decorator_id]); + let cloned_b = builder.ensure_node_preserving_debug_vars(rebuilt_b, block_b).unwrap(); + + assert_ne!(cloned_a, cloned_b, "different debug vars must prevent dedup"); + + let (forest, _remapping) = builder.build(); + let vars_a = forest.debug_info().debug_vars_for_node(cloned_a); + let vars_b = forest.debug_info().debug_vars_for_node(cloned_b); + + assert_eq!(vars_a.len(), 1); + assert_eq!(vars_a[0].1, var_x_id, "cloned_a should have var x"); + assert_eq!(vars_b.len(), 1); + assert_eq!(vars_b[0].1, var_y_id, "cloned_b should have var y"); + } + + /// Same-content debug vars should not prevent block dedup just because they + /// were allocated different DebugVarIds. + #[test] + fn test_ensure_block_dedups_identical_debug_var_payloads() { + use miden_core::operations::{DebugVarInfo, DebugVarLocation}; + + let mut builder = MastForestBuilder::new(&[]).unwrap(); + + let var_a = builder + .add_debug_var(DebugVarInfo::new("x", DebugVarLocation::Stack(0))) + .unwrap(); + let var_b = builder + .add_debug_var(DebugVarInfo::new("x", DebugVarLocation::Stack(0))) + .unwrap(); + + let block_a = builder + .ensure_block( + vec![Operation::Add], + Vec::new(), + vec![], + vec![(0, var_a)], + vec![], + vec![], + ) + .unwrap(); + let block_b = builder + .ensure_block( + vec![Operation::Add], + Vec::new(), + vec![], + vec![(0, var_b)], + vec![], + vec![], + ) + .unwrap(); + + assert_eq!( + block_a, block_b, + "same op stream plus same DebugVarInfo payload should dedup to one node" + ); + } + + /// Same-ops blocks with different AssemblyOps must not alias during assembly. + #[test] + fn test_ensure_block_keeps_different_asm_ops_distinct() { + let mut builder = MastForestBuilder::new(&[]).unwrap(); + + let block_a = builder + .ensure_block( + vec![Operation::Add], + Vec::new(), + vec![(0, AssemblyOp::new(None, "ctx_a".into(), 1, "add".into()))], + vec![], + vec![], + vec![], + ) + .unwrap(); + let block_b = builder + .ensure_block( + vec![Operation::Add], + Vec::new(), + vec![(0, AssemblyOp::new(None, "ctx_b".into(), 1, "add".into()))], + vec![], + vec![], + vec![], + ) + .unwrap(); + + assert_ne!( + block_a, block_b, + "same op stream plus different AssemblyOp payload must not dedup" + ); + + let (forest, _remapping) = builder.build(); + assert_eq!( + forest.debug_info().first_asm_op_for_node(block_a).unwrap().context_name(), + "ctx_a" + ); + assert_eq!( + forest.debug_info().first_asm_op_for_node(block_b).unwrap().context_name(), + "ctx_b" + ); + } + + /// Non-block nodes with different AssemblyOps must not alias during assembly. + #[test] + fn test_non_block_nodes_keep_different_asm_ops_distinct() { + let mut builder = MastForestBuilder::new(&[]).unwrap(); + + let callee = builder + .ensure_block(vec![Operation::Add], Vec::new(), vec![], vec![], vec![], vec![]) + .unwrap(); + let call_a = builder + .ensure_node_with_asm_op( + CallNodeBuilder::new(callee), + AssemblyOp::new(None, "ctx_a".into(), 1, "call.foo".into()), + ) + .unwrap(); + let call_b = builder + .ensure_node_with_asm_op( + CallNodeBuilder::new(callee), + AssemblyOp::new(None, "ctx_b".into(), 1, "call.foo".into()), + ) + .unwrap(); + + assert_ne!( + call_a, call_b, + "same-structure non-block nodes with different AssemblyOps must not dedup" + ); + + let (forest, _remapping) = builder.build(); + assert_eq!( + forest.debug_info().first_asm_op_for_node(call_a).unwrap().context_name(), + "ctx_a" + ); + assert_eq!( + forest.debug_info().first_asm_op_for_node(call_b).unwrap().context_name(), + "ctx_b" + ); + } + + /// Cloning a block with AssemblyOps via `to_builder().with_before_enter()` must + /// preserve those asm ops on the new node. + #[test] + fn test_ensure_node_preserving_debug_vars_on_cloned_block_keeps_asm_ops() { + let mut builder = MastForestBuilder::new(&[]).unwrap(); + + let block_id = builder + .ensure_block( + vec![Operation::Add], + Vec::new(), + vec![(0, AssemblyOp::new(None, "ctx".into(), 1, "add".into()))], + vec![], + vec![], + vec![], + ) + .unwrap(); + + let decorator_id = builder.ensure_decorator(Decorator::Trace(7)).unwrap(); + + let rebuilt_builder = builder.mast_forest[block_id] + .clone() + .to_builder(builder.mast_forest()) + .with_before_enter(vec![decorator_id]); + let cloned_id = + builder.ensure_node_preserving_debug_vars(rebuilt_builder, block_id).unwrap(); + + assert_ne!(cloned_id, block_id); + + let (forest, _remapping) = builder.build(); + + assert_eq!( + forest.debug_info().first_asm_op_for_node(cloned_id).unwrap().context_name(), + "ctx" + ); + assert_eq!( + forest.debug_info().first_asm_op_for_node(block_id).unwrap().context_name(), + "ctx" + ); + } + + /// Statically linked nodes must keep source metadata in the dedup fingerprint so copied + /// nodes do not alias local nodes with different source mappings. + #[test] + fn test_statically_linked_nodes_preserve_metadata_in_dedup() { + use miden_core::operations::{DebugVarInfo, DebugVarLocation}; + + let mut static_forest = MastForest::new(); + let static_asm_op_id = static_forest + .debug_info_mut() + .add_asm_op(AssemblyOp::new(None, "lib_ctx".into(), 1, "add".into())) + .unwrap(); + let static_var_id = static_forest + .add_debug_var(DebugVarInfo::new("x", DebugVarLocation::Stack(0))) + .unwrap(); + let static_block_id = BasicBlockNodeBuilder::new(vec![Operation::Add], Vec::new()) + .add_to_forest(&mut static_forest) + .unwrap(); + static_forest + .debug_info_mut() + .register_asm_ops(static_block_id, 1, vec![(0, static_asm_op_id)]) + .unwrap(); + static_forest + .debug_info_mut() + .register_op_indexed_debug_vars(static_block_id, vec![(0, static_var_id)]) + .unwrap(); + static_forest.make_root(static_block_id); + + let mut builder = MastForestBuilder::new([&static_forest]).unwrap(); + let copied_block_id = + builder.ensure_external_link(static_forest[static_block_id].digest()).unwrap(); + + let local_var_id = builder + .add_debug_var(DebugVarInfo::new("y", DebugVarLocation::Stack(1))) + .unwrap(); + let local_block_id = builder + .ensure_block( + vec![Operation::Add], + Vec::new(), + vec![(0, AssemblyOp::new(None, "local_ctx".into(), 1, "add".into()))], + vec![(0, local_var_id)], + vec![], + vec![], + ) + .unwrap(); + + assert_ne!( + copied_block_id, local_block_id, + "statically linked nodes must not alias local nodes with different metadata" + ); + + let (forest, _remapping) = builder.build(); + assert_eq!( + forest + .debug_info() + .first_asm_op_for_node(copied_block_id) + .unwrap() + .context_name(), + "lib_ctx" + ); + assert_eq!( + forest + .debug_info() + .first_asm_op_for_node(local_block_id) + .unwrap() + .context_name(), + "local_ctx" + ); + + let copied_vars = forest.debug_info().debug_vars_for_node(copied_block_id); + let local_vars = forest.debug_info().debug_vars_for_node(local_block_id); + assert_eq!(forest.debug_info().debug_var(copied_vars[0].1).unwrap().name(), "x"); + assert_eq!(forest.debug_info().debug_var(local_vars[0].1).unwrap().name(), "y"); + } + + #[test] + fn test_statically_linked_padded_block_dedups_with_equivalent_local_block() { + let mut source_builder = MastForestBuilder::new(&[]).unwrap(); + let ops = vec![ + Operation::Push(Felt::new(1)), + Operation::Drop, + Operation::Drop, + Operation::Drop, + Operation::Drop, + Operation::Drop, + Operation::Drop, + Operation::Push(Felt::new(2)), + Operation::Push(Felt::new(3)), + ]; + let asm_op = AssemblyOp::new(None, "padded_ctx".into(), 1, "push.3".into()); + + let static_block = source_builder + .ensure_block( + ops.clone(), + Vec::new(), + vec![(8, asm_op.clone())], + vec![], + vec![], + vec![], + ) + .unwrap(); + source_builder.mast_forest.make_root(static_block); + + let (static_forest, _) = source_builder.build(); + let expected_padded_idx = static_forest.debug_info().asm_ops_for_node(static_block)[0].0; + + let mut builder = MastForestBuilder::new([&static_forest]).unwrap(); + let copied_block_id = + builder.ensure_external_link(static_forest[static_block].digest()).unwrap(); + let local_block_id = builder + .ensure_block(ops, Vec::new(), vec![(8, asm_op)], vec![], vec![], vec![]) + .unwrap(); + + assert_eq!( + copied_block_id, local_block_id, + "copied padded blocks should dedup with equivalent local blocks", + ); + + let (forest, remapping) = builder.build(); + let final_block_id = remapping.get(&copied_block_id).copied().unwrap_or(copied_block_id); + + assert!( + forest + .debug_info() + .asm_op_for_operation(final_block_id, expected_padded_idx - 1) + .is_none(), + "the asm op must not be attached before its padded operation index", + ); + assert_eq!( + forest + .debug_info() + .asm_op_for_operation(final_block_id, expected_padded_idx) + .unwrap() + .context_name(), + "padded_ctx", + ); + } + + /// A small procedure root that gets merged into a larger block must keep its own + /// debug vars and asm ops, since the root node survives removal. + #[test] + fn test_merged_root_block_keeps_metadata() { + use miden_core::operations::{AssemblyOp, DebugVarInfo, DebugVarLocation}; + + let mut builder = MastForestBuilder::new(&[]).unwrap(); + + let var_id = builder + .add_debug_var(DebugVarInfo::new("x", DebugVarLocation::Stack(0))) + .unwrap(); + let asm_op = AssemblyOp::new(None, "test".into(), 1, "add".into()); + + // Small block that will be a procedure root -- should_merge returns true for + // small roots, so it will be folded into the merged block. + let root_block = builder + .ensure_block( + vec![Operation::Add], + Vec::new(), + vec![(0, asm_op)], + vec![(0, var_id)], + vec![], + vec![], + ) + .unwrap(); + builder.mast_forest.make_root(root_block); + + // Second block to merge with. + let other_block = builder + .ensure_block(vec![Operation::Mul], Vec::new(), vec![], vec![], vec![], vec![]) + .unwrap(); + + let merged = builder.merge_basic_blocks(&[root_block, other_block]).unwrap(); + // Root was small enough to merge, so we get one merged block. + assert_eq!(merged.len(), 1); + let merged_id = merged[0]; + assert_ne!(merged_id, root_block); + + let (forest, remapping) = builder.build(); + + // The root block survives removal (it's a procedure root). + let final_root_id = remapping.get(&root_block).copied().unwrap_or(root_block); + assert!(forest.is_procedure_root(final_root_id), "root should survive"); + + // Root block must still have its debug vars. + let root_vars = forest.debug_info().debug_vars_for_node(final_root_id); + assert_eq!(root_vars.len(), 1, "root must keep its debug vars after merge"); + assert_eq!(root_vars[0].1, var_id); + + // Root block must still have its asm op. + let root_asm = forest.debug_info().first_asm_op_for_node(final_root_id); + assert!(root_asm.is_some(), "root must keep its asm op after merge"); + } + + /// Two same-digest roots with different asm ops stay distinct when + /// linked by exact node ID. + #[test] + fn test_static_link_exact_node_preserves_alias_metadata() { + let mut source_builder = MastForestBuilder::new(&[]).unwrap(); + + let alias_a = source_builder + .ensure_block( + vec![Operation::Add], + Vec::new(), + vec![(0, AssemblyOp::new(None, "alias_a".into(), 1, "add".into()))], + vec![], + vec![], + vec![], + ) + .unwrap(); + let alias_b = source_builder + .ensure_block( + vec![Operation::Add], + Vec::new(), + vec![(0, AssemblyOp::new(None, "alias_b".into(), 1, "add".into()))], + vec![], + vec![], + vec![], + ) + .unwrap(); + source_builder.mast_forest.make_root(alias_a); + source_builder.mast_forest.make_root(alias_b); + + let (static_forest, _remapping) = source_builder.build(); + assert_eq!(static_forest[alias_a].digest(), static_forest[alias_b].digest()); + + // Exact path via internal API — gets alias_b's metadata. + let mut exact_builder = MastForestBuilder::new([&static_forest]).unwrap(); + let exact_alias_b = { + let node = exact_builder.statically_linked_mast[alias_b].clone(); + let builder = exact_builder.build_with_remapped_ids(alias_b, node).unwrap(); + exact_builder + .ensure_node_from_statically_linked_source(builder, alias_b) + .unwrap() + }; + let (exact_forest, _) = exact_builder.build(); + assert_eq!( + exact_forest + .debug_info() + .first_asm_op_for_node(exact_alias_b) + .unwrap() + .context_name(), + "alias_b" + ); + } + + /// Digest-based linking imports only the selected alias, not all + /// same-digest roots. The unselected alias must not leak into the forest. + #[test] + fn test_static_link_by_digest_imports_only_selected_alias() { + let mut source_builder = MastForestBuilder::new(&[]).unwrap(); + + let alias_a = source_builder + .ensure_block( + vec![Operation::Add], + Vec::new(), + vec![(0, AssemblyOp::new(None, "alias_a".into(), 1, "add".into()))], + vec![], + vec![], + vec![], + ) + .unwrap(); + let alias_b = source_builder + .ensure_block( + vec![Operation::Add], + Vec::new(), + vec![(0, AssemblyOp::new(None, "alias_b".into(), 1, "add".into()))], + vec![], + vec![], + vec![], + ) + .unwrap(); + source_builder.mast_forest.make_root(alias_a); + source_builder.mast_forest.make_root(alias_b); + + let (static_forest, _) = source_builder.build(); + + let mut builder = MastForestBuilder::new([&static_forest]).unwrap(); + let linked = builder.ensure_external_link(static_forest[alias_a].digest()).unwrap(); + builder.mast_forest.make_root(linked); + let (forest, _) = builder.build(); + + // Only one node should be in the forest — the selected alias. + assert_eq!(forest.num_nodes(), 1, "only the selected alias should be imported"); + assert_eq!( + forest.debug_info().first_asm_op_for_node(linked).unwrap().context_name(), + "alias_a", + ); + } + + /// Digest-based linking picks the first root, so the second alias gets + /// the wrong metadata. Fixing this needs #2990. + #[test] + #[ignore = "requires #2990: source MastNodeId in ProcedureInfo"] + fn repro_statically_linked_same_digest_root_uses_first_alias_metadata() { + let mut source_builder = MastForestBuilder::new(&[]).unwrap(); + + let alias_a = source_builder + .ensure_block( + vec![Operation::Add], + Vec::new(), + vec![(0, AssemblyOp::new(None, "alias_a".into(), 1, "add".into()))], + vec![], + vec![], + vec![], + ) + .unwrap(); + let alias_b = source_builder + .ensure_block( + vec![Operation::Add], + Vec::new(), + vec![(0, AssemblyOp::new(None, "alias_b".into(), 1, "add".into()))], + vec![], + vec![], + vec![], + ) + .unwrap(); + source_builder.mast_forest.make_root(alias_a); + source_builder.mast_forest.make_root(alias_b); + + let (static_forest, _remapping) = source_builder.build(); + assert_eq!(static_forest[alias_a].digest(), static_forest[alias_b].digest()); + + let mut exact_builder = MastForestBuilder::new([&static_forest]).unwrap(); + let exact_alias_b = { + let node = exact_builder.statically_linked_mast[alias_b].clone(); + let builder = exact_builder.build_with_remapped_ids(alias_b, node).unwrap(); + exact_builder + .ensure_node_from_statically_linked_source(builder, alias_b) + .unwrap() + }; + let (exact_forest, _) = exact_builder.build(); + + let mut digest_builder = MastForestBuilder::new([&static_forest]).unwrap(); + let linked_alias_b = + digest_builder.ensure_external_link(static_forest[alias_b].digest()).unwrap(); + let (linked_forest, _) = digest_builder.build(); + + assert_eq!( + exact_forest + .debug_info() + .first_asm_op_for_node(exact_alias_b) + .unwrap() + .context_name(), + "alias_b" + ); + assert_eq!( + linked_forest + .debug_info() + .first_asm_op_for_node(linked_alias_b) + .unwrap() + .context_name(), + "alias_b", + "linking the second same-digest root by digest should preserve that root's metadata", + ); + } } diff --git a/crates/assembly/src/tests.rs b/crates/assembly/src/tests.rs index 2c443bf035..2617f212ab 100644 --- a/crates/assembly/src/tests.rs +++ b/crates/assembly/src/tests.rs @@ -16,7 +16,10 @@ use miden_core::{ Felt, Word, assert_matches, events::EventId, field::PrimeField64, - mast::{MastNodeExt, MastNodeId}, + mast::{ + CallNodeBuilder, JoinNodeBuilder, LoopNodeBuilder, MastForestContributor, MastNodeExt, + MastNodeId, SplitNodeBuilder, + }, operations::Operation, program::Program, serde::{Deserializable, Serializable}, @@ -358,16 +361,15 @@ fn library_procedure_collision() -> Result<(), Report> { // make sure lib2 has the expected exports (i.e., bar1 and bar2) assert_eq!(lib2.num_exports(), 2); - // Now that AssemblyOps are stored separately in DebugInfo (not as Decorator::AsmOp), - // identical procedures ARE deduplicated because their MAST fingerprints are the same. - // The debug info (AssemblyOps) is stored separately and doesn't affect deduplication. + // AssemblyOp metadata now participates in assembler-side deduplication, so a re-exported + // procedure and a locally defined procedure with the same operations remain distinct when + // their source mappings differ. let lib2_bar_bar1 = QualifiedProcedureName::from_str("lib2::bar::bar1").unwrap(); let lib2_bar_bar2 = QualifiedProcedureName::from_str("lib2::bar::bar2").unwrap(); - assert_eq!(lib2.get_export_node_id(&lib2_bar_bar1), lib2.get_export_node_id(&lib2_bar_bar2)); + assert_ne!(lib2.get_export_node_id(&lib2_bar_bar1), lib2.get_export_node_id(&lib2_bar_bar2)); - // With deduplication restored, we expect fewer nodes than before when debug decorators - // prevented deduplication. The identical procedures now share the same node. - assert_eq!(lib2.mast_forest().num_nodes(), 5); + // Keeping those procedures distinct adds one more node to the library forest. + assert_eq!(lib2.mast_forest().num_nodes(), 6); Ok(()) } @@ -4422,11 +4424,15 @@ fn nested_blocks() -> Result<(), Report> { // `Assembler::with_kernel_from_module()`. let syscall_foo_node_id = { let kernel_foo_node_id = expected_mast_forest_builder - .ensure_block(vec![Operation::Add], Vec::new(), vec![], vec![], vec![]) + .ensure_block(vec![Operation::Add], Vec::new(), vec![], vec![], vec![], vec![]) .unwrap(); expected_mast_forest_builder - .ensure_syscall(kernel_foo_node_id, vec![], vec![]) + .ensure_node( + CallNodeBuilder::new_syscall(kernel_foo_node_id) + .with_before_enter(vec![]) + .with_after_exit(vec![]), + ) .unwrap() }; @@ -4470,35 +4476,85 @@ fn nested_blocks() -> Result<(), Report> { // basic block representing foo::bar.baz procedure let exec_foo_bar_baz_node_id = expected_mast_forest_builder - .ensure_block(vec![Operation::Push(Felt::from_u32(29))], Vec::new(), vec![], vec![], vec![]) + .ensure_block( + vec![Operation::Push(Felt::from_u32(29))], + Vec::new(), + vec![], + vec![], + vec![], + vec![], + ) .unwrap(); let fmp_initialization = expected_mast_forest_builder - .ensure_block(fmp_initialization_sequence(), Vec::new(), vec![], vec![], vec![]) + .ensure_block(fmp_initialization_sequence(), Vec::new(), vec![], vec![], vec![], vec![]) .unwrap(); let before = expected_mast_forest_builder - .ensure_block(vec![Operation::Push(Felt::from_u32(2))], Vec::new(), vec![], vec![], vec![]) + .ensure_block( + vec![Operation::Push(Felt::from_u32(2))], + Vec::new(), + vec![], + vec![], + vec![], + vec![], + ) .unwrap(); let r#true1 = expected_mast_forest_builder - .ensure_block(vec![Operation::Push(Felt::from_u32(3))], Vec::new(), vec![], vec![], vec![]) + .ensure_block( + vec![Operation::Push(Felt::from_u32(3))], + Vec::new(), + vec![], + vec![], + vec![], + vec![], + ) .unwrap(); let r#false1 = expected_mast_forest_builder - .ensure_block(vec![Operation::Push(Felt::from_u32(5))], Vec::new(), vec![], vec![], vec![]) + .ensure_block( + vec![Operation::Push(Felt::from_u32(5))], + Vec::new(), + vec![], + vec![], + vec![], + vec![], + ) .unwrap(); let r#if1 = expected_mast_forest_builder - .ensure_split(r#true1, r#false1, vec![], vec![]) + .ensure_node( + SplitNodeBuilder::new([r#true1, r#false1]) + .with_before_enter(vec![]) + .with_after_exit(vec![]), + ) .unwrap(); let r#true3 = expected_mast_forest_builder - .ensure_block(vec![Operation::Push(Felt::from_u32(7))], Vec::new(), vec![], vec![], vec![]) + .ensure_block( + vec![Operation::Push(Felt::from_u32(7))], + Vec::new(), + vec![], + vec![], + vec![], + vec![], + ) .unwrap(); let r#false3 = expected_mast_forest_builder - .ensure_block(vec![Operation::Push(Felt::from_u32(11))], Vec::new(), vec![], vec![], vec![]) + .ensure_block( + vec![Operation::Push(Felt::from_u32(11))], + Vec::new(), + vec![], + vec![], + vec![], + vec![], + ) .unwrap(); let r#true2 = expected_mast_forest_builder - .ensure_split(r#true3, r#false3, vec![], vec![]) + .ensure_node( + SplitNodeBuilder::new([r#true3, r#false3]) + .with_before_enter(vec![]) + .with_after_exit(vec![]), + ) .unwrap(); let r#while = { @@ -4513,20 +4569,42 @@ fn nested_blocks() -> Result<(), Report> { vec![], vec![], vec![], + vec![], ) .unwrap(); - expected_mast_forest_builder.ensure_loop(body_node_id, vec![], vec![]).unwrap() + expected_mast_forest_builder + .ensure_node( + LoopNodeBuilder::new(body_node_id) + .with_before_enter(vec![]) + .with_after_exit(vec![]), + ) + .unwrap() }; let push_13_basic_block_id = expected_mast_forest_builder - .ensure_block(vec![Operation::Push(Felt::from_u32(13))], Vec::new(), vec![], vec![], vec![]) + .ensure_block( + vec![Operation::Push(Felt::from_u32(13))], + Vec::new(), + vec![], + vec![], + vec![], + vec![], + ) .unwrap(); let r#false2 = expected_mast_forest_builder - .ensure_join(push_13_basic_block_id, r#while, vec![], vec![]) + .ensure_node( + JoinNodeBuilder::new([push_13_basic_block_id, r#while]) + .with_before_enter(vec![]) + .with_after_exit(vec![]), + ) .unwrap(); let nested = expected_mast_forest_builder - .ensure_split(r#true2, r#false2, vec![], vec![]) + .ensure_node( + SplitNodeBuilder::new([r#true2, r#false2]) + .with_before_enter(vec![]) + .with_after_exit(vec![]), + ) .unwrap(); let combined_node_id = expected_mast_forest_builder @@ -4667,11 +4745,10 @@ fn duplicate_procedure() { "#; let program = context.assemble(program_source).unwrap(); - // Now that AssemblyOps are stored separately in DebugInfo (not as Decorator::AsmOp), - // identical procedures ARE deduplicated because their MAST fingerprints are the same. - // foo and bar have identical code, so they share the same MAST node. The entrypoint - // is a separate procedure, so we have 2 procedures total. - assert_eq!(program.num_procedures(), 2); + // AssemblyOp metadata now participates in assembler-side deduplication. Even though + // `foo` and `bar` have the same operations, they carry different source mappings and + // therefore must remain distinct procedures. The entrypoint is a third procedure. + assert_eq!(program.num_procedures(), 3); } #[test]