From 19112b3f9c8a7aba8f170f4924d1c9523e746d03 Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Thu, 9 Apr 2026 10:55:37 -0400 Subject: [PATCH 1/4] Convert crate hash to use metadata instead of HIR --- compiler/rustc_driver_impl/src/lib.rs | 8 +- compiler/rustc_interface/src/passes.rs | 9 +- compiler/rustc_metadata/src/fs.rs | 2 +- .../src/rmeta/decoder/cstore_impl.rs | 97 ++++++ compiler/rustc_metadata/src/rmeta/encoder.rs | 311 +++++++++++++----- compiler/rustc_metadata/src/rmeta/mod.rs | 6 +- compiler/rustc_metadata/src/rmeta/table.rs | 10 +- compiler/rustc_middle/src/hir/map.rs | 101 +----- compiler/rustc_middle/src/hir/mod.rs | 1 - compiler/rustc_session/src/cstore.rs | 4 + 10 files changed, 357 insertions(+), 192 deletions(-) diff --git a/compiler/rustc_driver_impl/src/lib.rs b/compiler/rustc_driver_impl/src/lib.rs index c15c3c229398c..18276311dd6df 100644 --- a/compiler/rustc_driver_impl/src/lib.rs +++ b/compiler/rustc_driver_impl/src/lib.rs @@ -324,10 +324,6 @@ pub fn run_compiler(at_args: &[String], callbacks: &mut (dyn Callbacks + Send)) tcx.ensure_ok().analysis(()); - if let Some(metrics_dir) = &sess.opts.unstable_opts.metrics_dir { - dump_feature_usage_metrics(tcx, metrics_dir); - } - if callbacks.after_analysis(compiler, tcx) == Compilation::Stop { return early_exit(); } @@ -340,6 +336,10 @@ pub fn run_compiler(at_args: &[String], callbacks: &mut (dyn Callbacks + Send)) let linker = Linker::codegen_and_build_linker(tcx, &*compiler.codegen_backend); + if let Some(metrics_dir) = &sess.opts.unstable_opts.metrics_dir { + dump_feature_usage_metrics(tcx, metrics_dir); + } + tcx.report_unused_features(); Some(linker) diff --git a/compiler/rustc_interface/src/passes.rs b/compiler/rustc_interface/src/passes.rs index 9c020c35e1429..6d8c8ea154a57 100644 --- a/compiler/rustc_interface/src/passes.rs +++ b/compiler/rustc_interface/src/passes.rs @@ -951,8 +951,13 @@ pub fn create_and_enter_global_ctxt FnOnce(TyCtxt<'tcx>) -> T>( let definitions = FreezeLock::new(Definitions::new(stable_crate_id)); let stable_crate_ids = FreezeLock::new(StableCrateIdMap::default()); - let untracked = - Untracked { cstore, source_span: AppendOnlyIndexVec::new(), definitions, stable_crate_ids }; + let untracked = Untracked { + cstore, + source_span: AppendOnlyIndexVec::new(), + definitions, + stable_crate_ids, + local_crate_hash: OnceLock::new(), + }; // We're constructing the HIR here; we don't care what we will // read, since we haven't even constructed the *input* to diff --git a/compiler/rustc_metadata/src/fs.rs b/compiler/rustc_metadata/src/fs.rs index 1eaad26ff8e80..66af70717d64f 100644 --- a/compiler/rustc_metadata/src/fs.rs +++ b/compiler/rustc_metadata/src/fs.rs @@ -54,7 +54,7 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> EncodedMetadata { None }; - if tcx.needs_metadata() { + if tcx.needs_metadata() || tcx.needs_crate_hash() { encode_metadata(tcx, &metadata_filename, metadata_stub_filename.as_deref()); } else { // Always create a file at `metadata_filename`, even if we have nothing to write to it. diff --git a/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs index a00fb59963ac2..bc42d578c0c51 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs @@ -750,4 +750,101 @@ fn provide_cstore_hooks(providers: &mut Providers) { cdata.imported_source_file(tcx, file_index as u32); } }; + + providers.queries.crate_hash = |tcx: TyCtxt<'_>, _: LocalCrate| { + *tcx.untracked() + .local_crate_hash + .get() + .expect("crate_hash(LOCAL_CRATE) called before metadata encoding") + }; } + +/*pub(super) fn crate_hash(tcx: TyCtxt<'_>, cnum: rustc_hir::def_id::CrateNum) -> Svh { + let cstore = CStore::from_tcx(tcx); + let crate_data = cstore.get_crate_data(cnum); + crate_data.root.header.hash + + let upstream_crates = upstream_crates(tcx); + + let resolutions = tcx.resolutions(()); + + // We hash the final, remapped names of all local source files so we + // don't have to include the path prefix remapping commandline args. + // If we included the full mapping in the SVH, we could only have + // reproducible builds by compiling from the same directory. So we just + // hash the result of the mapping instead of the mapping itself. + let mut source_file_names: Vec<_> = tcx + .sess + .source_map() + .files() + .iter() + .filter(|source_file| source_file.cnum == LOCAL_CRATE) + .map(|source_file| source_file.stable_id) + .collect(); + + source_file_names.sort_unstable(); + + // We have to take care of debugger visualizers explicitly. The HIR (and + // thus `hir_body_hash`) contains the #[debugger_visualizer] attributes but + // these attributes only store the file path to the visualizer file, not + // their content. Yet that content is exported into crate metadata, so any + // changes to it need to be reflected in the crate hash. + let debugger_visualizers: Vec<_> = tcx + .debugger_visualizers(LOCAL_CRATE) + .iter() + // We ignore the path to the visualizer file since it's not going to be + // encoded in crate metadata and we already hash the full contents of + // the file. + .map(DebuggerVisualizerFile::path_erased) + .collect(); + + let crate_hash: Fingerprint = tcx.with_stable_hashing_context(|mut hcx| { + let mut stable_hasher = StableHasher::new(); + metadata_hash.hash_stable(&mut hcx, &mut stable_hasher); + upstream_crates.hash_stable(&mut hcx, &mut stable_hasher); + source_file_names.hash_stable(&mut hcx, &mut stable_hasher); + debugger_visualizers.hash_stable(&mut hcx, &mut stable_hasher); + if tcx.sess.opts.incremental.is_some() { + let definitions = tcx.untracked().definitions.freeze(); + let mut owner_spans: Vec<_> = tcx + .hir_crate_items(()) + .definitions() + .map(|def_id| { + let def_path_hash = definitions.def_path_hash(def_id); + let span = tcx.source_span(def_id); + debug_assert_eq!(span.parent(), None); + (def_path_hash, span) + }) + .collect(); + owner_spans.sort_unstable_by_key(|bn| bn.0); + owner_spans.hash_stable(&mut hcx, &mut stable_hasher); + } + tcx.sess.opts.dep_tracking_hash(true).hash_stable(&mut hcx, &mut stable_hasher); + tcx.stable_crate_id(LOCAL_CRATE).hash_stable(&mut hcx, &mut stable_hasher); + // Hash visibility information since it does not appear in HIR. + // FIXME: Figure out how to remove `visibilities_for_hashing` by hashing visibilities on + // the fly in the resolver, storing only their accumulated hash in `ResolverGlobalCtxt`, + // and combining it with other hashes here. + resolutions.visibilities_for_hashing.hash_stable(&mut hcx, &mut stable_hasher); + with_metavar_spans(|mspans| { + mspans.freeze_and_get_read_spans().hash_stable(&mut hcx, &mut stable_hasher); + }); + stable_hasher.finish() + }); + + Svh::new(crate_hash) +} + +fn upstream_crates(tcx: TyCtxt<'_>) -> Vec<(StableCrateId, Svh)> { + let mut upstream_crates: Vec<_> = tcx + .crates(()) + .iter() + .map(|&cnum| { + let stable_crate_id = tcx.stable_crate_id(cnum); + let hash = tcx.crate_hash(cnum); + (stable_crate_id, hash) + }) + .collect(); + upstream_crates.sort_unstable_by_key(|&(stable_crate_id, _)| stable_crate_id); + upstream_crates +}*/ diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index ece9dc52c292c..69bcd792e018c 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -2,11 +2,16 @@ use std::borrow::Borrow; use std::collections::hash_map::Entry; use std::fs::File; use std::io::{Read, Seek, Write}; +use std::ops::Deref; use std::path::{Path, PathBuf}; use std::sync::Arc; +//use rustc_data_structures::fingerprint::Fingerprint; +//use rustc_data_structures::Svh; use rustc_data_structures::fx::{FxIndexMap, FxIndexSet}; use rustc_data_structures::memmap::{Mmap, MmapMut}; +use rustc_data_structures::owned_slice::slice_owned; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc_data_structures::sync::{par_for_each_in, par_join}; use rustc_data_structures::temp_dir::MaybeTempDir; use rustc_data_structures::thousands::usize_with_underscores; @@ -17,6 +22,7 @@ use rustc_hir::definitions::DefPathData; use rustc_hir::find_attr; use rustc_hir_pretty::id_to_string; use rustc_middle::dep_graph::WorkProductId; +use rustc_middle::ich::StableHashingContext; use rustc_middle::middle::dependency_format::Linkage; use rustc_middle::mir::interpret; use rustc_middle::query::Providers; @@ -39,8 +45,14 @@ use crate::eii::EiiMapEncodedKeyValue; use crate::errors::{FailCreateFileEncoder, FailWriteFile}; use crate::rmeta::*; -pub(super) struct EncodeContext<'a, 'tcx> { +// Struct to enable split borrows. +pub(super) struct ContextEncoder<'a> { opaque: opaque::FileEncoder, + stable_hasher: StableHasher, + hcx: StableHashingContext<'a>, +} + +pub(super) struct EncodeContext<'a, 'tcx> { tcx: TyCtxt<'tcx>, feat: &'tcx rustc_feature::Features, tables: TableBuilders, @@ -67,6 +79,8 @@ pub(super) struct EncodeContext<'a, 'tcx> { hygiene_ctxt: &'a HygieneEncodeContext, // Used for both `Symbol`s and `ByteSymbol`s. symbol_index_table: FxHashMap, + + encoder: ContextEncoder<'a>, } /// If the current crate is a proc-macro, returns early with `LazyArray::default()`. @@ -80,14 +94,60 @@ macro_rules! empty_proc_macro { }; } -macro_rules! encoder_methods { +macro_rules! context_encoder_methods { ($($name:ident($ty:ty);)*) => { + #[inline] $(fn $name(&mut self, value: $ty) { + value.hash_stable(&mut self.hcx, &mut self.stable_hasher); self.opaque.$name(value) })* } } +impl<'a> Encoder for ContextEncoder<'a> { + context_encoder_methods! { + emit_usize(usize); + emit_u128(u128); + emit_u64(u64); + emit_u32(u32); + emit_u16(u16); + emit_u8(u8); + + emit_isize(isize); + emit_i128(i128); + emit_i64(i64); + emit_i32(i32); + emit_i16(i16); + + emit_raw_bytes(&[u8]); + } +} + +impl<'a> ContextEncoder<'a> { + #[inline] + pub(super) fn position(&self) -> usize { + self.opaque.position() + } + + #[inline] + pub(super) fn write_m_with(&mut self, b: &[u8; N], m: usize) { + (b[..m]).hash_stable(&mut self.hcx, &mut self.stable_hasher); + self.opaque.write_with(|dest| { + *dest = *b; + m + }); + } +} + +macro_rules! encoder_methods { + ($($name:ident($ty:ty);)*) => { + #[inline] + $(fn $name(&mut self, value: $ty) { + self.encoder.$name(value) + })* + } +} + impl<'a, 'tcx> Encoder for EncodeContext<'a, 'tcx> { encoder_methods! { emit_usize(usize); @@ -177,25 +237,19 @@ impl<'a, 'tcx> SpanEncoder for EncodeContext<'a, 'tcx> { let last_location = *o.get(); // This cannot underflow. Metadata is written with increasing position(), so any // previously saved offset must be smaller than the current position. - let offset = self.opaque.position() - last_location; + let offset = self.encoder.position() - last_location; if offset < last_location { let needed = bytes_needed(offset); SpanTag::indirect(true, needed as u8).encode(self); - self.opaque.write_with(|dest| { - *dest = offset.to_le_bytes(); - needed - }); + self.encoder.write_m_with(&offset.to_le_bytes(), needed); } else { let needed = bytes_needed(last_location); SpanTag::indirect(false, needed as u8).encode(self); - self.opaque.write_with(|dest| { - *dest = last_location.to_le_bytes(); - needed - }); + self.encoder.write_m_with(&last_location.to_le_bytes(), needed); } } Entry::Vacant(v) => { - let position = self.opaque.position(); + let position = self.encoder.position(); v.insert(position); // Data is encoded with a SpanTag prefix (see below). span.data().encode(self); @@ -372,7 +426,7 @@ impl<'a, 'tcx> TyEncoder<'tcx> for EncodeContext<'a, 'tcx> { const CLEAR_CROSS_CRATE: bool = true; fn position(&self) -> usize { - self.opaque.position() + self.encoder.position() } fn type_shorthands(&mut self) -> &mut FxHashMap, usize> { @@ -489,21 +543,21 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { ) { // if symbol/byte symbol is predefined, emit tag and symbol index if Symbol::is_predefined(index) { - self.opaque.emit_u8(SYMBOL_PREDEFINED); - self.opaque.emit_u32(index); + self.encoder.emit_u8(SYMBOL_PREDEFINED); + self.encoder.emit_u32(index); } else { // otherwise write it as string or as offset to it match self.symbol_index_table.entry(index) { Entry::Vacant(o) => { - self.opaque.emit_u8(SYMBOL_STR); - let pos = self.opaque.position(); + self.encoder.emit_u8(SYMBOL_STR); + let pos = self.encoder.position(); o.insert(pos); emit_str_or_byte_str(self); } Entry::Occupied(o) => { let x = *o.get(); - self.emit_u8(SYMBOL_OFFSET); - self.emit_usize(x); + self.encoder.emit_u8(SYMBOL_OFFSET); + self.encoder.emit_usize(x); } } } @@ -598,7 +652,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { adapted.set_some(on_disk_index, self.lazy(adapted_source_file)); } - adapted.encode(&mut self.opaque) + adapted.encode(&mut self.encoder) } fn encode_crate_root(&mut self) -> LazyValue { @@ -682,7 +736,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { // encode_def_path_table. let proc_macro_data = stat!("proc-macro-data", || self.encode_proc_macros()); - let tables = stat!("tables", || self.tables.encode(&mut self.opaque)); + let tables = stat!("tables", || self.tables.encode(&mut self.encoder)); let debugger_visualizers = stat!("debugger-visualizers", || self.encode_debugger_visualizers()); @@ -720,11 +774,26 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { let root = stat!("final", || { let attrs = tcx.hir_krate_attrs(); + let new_hash = Svh::new(self.encoder.stable_hasher.clone().finish()); + + /*eprintln!("crate: {:?}", tcx.crate_name(LOCAL_CRATE)); + eprintln!("crate HASH: {:?}", new_hash); + if let Some(hash) = tcx.untracked().local_crate_hash.get() { + eprintln!("resetting hash: {:?}", hash); + }*/ + + tcx.untracked().local_crate_hash.set(new_hash).expect("local_crate_hash set twice"); + + /*let old_hash = tcx.crate_hash(new_hash); + eprintln!("OLD HASH: {:?}", old_hash); + eprintln!("NEW HASH: {:?}", new_hash); + assert_eq!(old_hash, new_hash, "Hash mismatch!");*/ + self.lazy(CrateRoot { header: CrateHeader { name: tcx.crate_name(LOCAL_CRATE), triple: tcx.sess.opts.target_triple.clone(), - hash: tcx.crate_hash(LOCAL_CRATE), + hash: new_hash, is_proc_macro_crate: proc_macro_data.is_some(), is_stub: false, }, @@ -786,19 +855,20 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { if tcx.sess.opts.unstable_opts.meta_stats { use std::fmt::Write; - self.opaque.flush(); + let opaque = &mut self.encoder.opaque; + opaque.flush(); // Rewind and re-read all the metadata to count the zero bytes we wrote. - let pos_before_rewind = self.opaque.file().stream_position().unwrap(); + let pos_before_rewind = opaque.file().stream_position().unwrap(); let mut zero_bytes = 0; - self.opaque.file().rewind().unwrap(); - let file = std::io::BufReader::new(self.opaque.file()); + opaque.file().rewind().unwrap(); + let file = std::io::BufReader::new(opaque.file()); for e in file.bytes() { if e.unwrap() == 0 { zero_bytes += 1; } } - assert_eq!(self.opaque.file().stream_position().unwrap(), pos_before_rewind); + assert_eq!(opaque.file().stream_position().unwrap(), pos_before_rewind); stats.sort_by_key(|&(_, usize)| usize); stats.reverse(); // bigger items first @@ -1970,9 +2040,9 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { ); ( - syntax_contexts.encode(&mut self.opaque), - expn_data_table.encode(&mut self.opaque), - expn_hash_table.encode(&mut self.opaque), + syntax_contexts.encode(&mut self.encoder), + expn_data_table.encode(&mut self.encoder), + expn_hash_table.encode(&mut self.encoder), ) } @@ -2429,20 +2499,35 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: Option<&Path>) { tcx.dep_graph.assert_ignored(); // Generate the metadata stub manually, as that is a small file compared to full metadata. - if let Some(ref_path) = ref_path { + /*if let Some(ref_path) = ref_path { let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata_stub"); with_encode_metadata_header(tcx, ref_path, |ecx| { + //let new_hash = Svh::new(ecx.encoder.stable_hasher.clone().finish()); + + /*eprintln!("crate: {:?}", tcx.crate_name(LOCAL_CRATE)); + eprintln!("encoding hash HASH: {:?}", new_hash); + if let Some(hash) = tcx.untracked().local_crate_hash.get() { + eprintln!("resetting hash: {:?}", hash); + } + + + tcx.untracked().local_crate_hash.set(new_hash).expect("local_crate_hash set twice");*/ + let header: LazyValue = ecx.lazy(CrateHeader { name: tcx.crate_name(LOCAL_CRATE), triple: tcx.sess.opts.target_triple.clone(), - hash: tcx.crate_hash(LOCAL_CRATE), + hash: tcx.crate_hash(LOCAL_CRATE), /*tcx + .untracked() + .local_crate_hash + .get() + .expect("The hash should have been calculated during metadataencoding"),*/ is_proc_macro_crate: false, is_stub: true, }); header.position.get() }) - } + }*/ let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata"); @@ -2462,6 +2547,44 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: Option<&Path>) { Ok(_) => {} Err(err) => tcx.dcx().emit_fatal(FailCreateFileEncoder { err }), }; + + let file = std::fs::File::open(&source_file).unwrap(); + let mmap = unsafe { Mmap::map(file) }.unwrap(); + let owned = slice_owned(mmap, Deref::deref); + let blob = MetadataBlob::new(owned); + let header = blob.expect("file already created").get_header(); + tcx.untracked().local_crate_hash.set(header.hash).expect("local_crate_hash set twice"); + + if let Some(ref_path) = ref_path { + let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata_stub"); + + with_encode_metadata_header(tcx, ref_path, |ecx| { + //let new_hash = Svh::new(ecx.encoder.stable_hasher.clone().finish()); + + /*eprintln!("crate: {:?}", tcx.crate_name(LOCAL_CRATE)); + eprintln!("encoding hash HASH: {:?}", new_hash); + if let Some(hash) = tcx.untracked().local_crate_hash.get() { + eprintln!("resetting hash: {:?}", hash); + } + + + tcx.untracked().local_crate_hash.set(new_hash).expect("local_crate_hash set twice");*/ + + let header: LazyValue = ecx.lazy(CrateHeader { + name: tcx.crate_name(LOCAL_CRATE), + triple: tcx.sess.opts.target_triple.clone(), + hash: tcx.crate_hash(LOCAL_CRATE), /*tcx + .untracked() + .local_crate_hash + .get() + .expect("The hash should have been calculated during metadataencoding"),*/ + is_proc_macro_crate: false, + is_stub: true, + }); + header.position.get() + }) + } + return; }; @@ -2490,12 +2613,12 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: Option<&Path>) { let root = ecx.encode_crate_root(); // Flush buffer to ensure backing file has the correct size. - ecx.opaque.flush(); + ecx.encoder.opaque.flush(); // Record metadata size for self-profiling tcx.prof.artifact_size( "crate_metadata", "crate_metadata", - ecx.opaque.file().metadata().unwrap().len(), + ecx.encoder.opaque.file().metadata().unwrap().len(), ); root.position.get() @@ -2503,6 +2626,36 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: Option<&Path>) { }, None, ); + + if let Some(ref_path) = ref_path { + let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata_stub"); + + with_encode_metadata_header(tcx, ref_path, |ecx| { + //let new_hash = Svh::new(ecx.encoder.stable_hasher.clone().finish()); + + /*eprintln!("crate: {:?}", tcx.crate_name(LOCAL_CRATE)); + eprintln!("encoding hash HASH: {:?}", new_hash); + if let Some(hash) = tcx.untracked().local_crate_hash.get() { + eprintln!("resetting hash: {:?}", hash); + } + + + tcx.untracked().local_crate_hash.set(new_hash).expect("local_crate_hash set twice");*/ + + let header: LazyValue = ecx.lazy(CrateHeader { + name: tcx.crate_name(LOCAL_CRATE), + triple: tcx.sess.opts.target_triple.clone(), + hash: tcx.crate_hash(LOCAL_CRATE), /*tcx + .untracked() + .local_crate_hash + .get() + .expect("The hash should have been calculated during metadataencoding"),*/ + is_proc_macro_crate: false, + is_stub: true, + }); + header.position.get() + }) + } } fn with_encode_metadata_header( @@ -2510,53 +2663,55 @@ fn with_encode_metadata_header( path: &Path, f: impl FnOnce(&mut EncodeContext<'_, '_>) -> usize, ) { - let mut encoder = opaque::FileEncoder::new(path) - .unwrap_or_else(|err| tcx.dcx().emit_fatal(FailCreateFileEncoder { err })); - encoder.emit_raw_bytes(METADATA_HEADER); - - // Will be filled with the root position after encoding everything. - encoder.emit_raw_bytes(&0u64.to_le_bytes()); - - let source_map_files = tcx.sess.source_map().files(); - let source_file_cache = (Arc::clone(&source_map_files[0]), 0); - let required_source_files = Some(FxIndexSet::default()); - drop(source_map_files); - - let hygiene_ctxt = HygieneEncodeContext::default(); - - let mut ecx = EncodeContext { - opaque: encoder, - tcx, - feat: tcx.features(), - tables: Default::default(), - lazy_state: LazyState::NoNode, - span_shorthands: Default::default(), - type_shorthands: Default::default(), - predicate_shorthands: Default::default(), - source_file_cache, - interpret_allocs: Default::default(), - required_source_files, - is_proc_macro: tcx.crate_types().contains(&CrateType::ProcMacro), - hygiene_ctxt: &hygiene_ctxt, - symbol_index_table: Default::default(), - }; + tcx.with_stable_hashing_context(|hcx| { + let mut encoder = opaque::FileEncoder::new(path) + .unwrap_or_else(|err| tcx.dcx().emit_fatal(FailCreateFileEncoder { err })); + encoder.emit_raw_bytes(METADATA_HEADER); + + // Will be filled with the root position after encoding everything. + encoder.emit_raw_bytes(&0u64.to_le_bytes()); + + let source_map_files = tcx.sess.source_map().files(); + let source_file_cache = (Arc::clone(&source_map_files[0]), 0); + let required_source_files = Some(FxIndexSet::default()); + drop(source_map_files); + + let hygiene_ctxt = HygieneEncodeContext::default(); + + let mut ecx = EncodeContext { + tcx, + feat: tcx.features(), + tables: Default::default(), + lazy_state: LazyState::NoNode, + span_shorthands: Default::default(), + type_shorthands: Default::default(), + predicate_shorthands: Default::default(), + source_file_cache, + interpret_allocs: Default::default(), + required_source_files, + is_proc_macro: tcx.crate_types().contains(&CrateType::ProcMacro), + hygiene_ctxt: &hygiene_ctxt, + symbol_index_table: Default::default(), + encoder: ContextEncoder { opaque: encoder, stable_hasher: StableHasher::new(), hcx }, + }; - // Encode the rustc version string in a predictable location. - rustc_version(tcx.sess.cfg_version).encode(&mut ecx); + // Encode the rustc version string in a predictable location. + rustc_version(tcx.sess.cfg_version).encode(&mut ecx); - let root_position = f(&mut ecx); + let root_position = f(&mut ecx); - // Make sure we report any errors from writing to the file. - // If we forget this, compilation can succeed with an incomplete rmeta file, - // causing an ICE when the rmeta file is read by another compilation. - if let Err((path, err)) = ecx.opaque.finish() { - tcx.dcx().emit_fatal(FailWriteFile { path: &path, err }); - } + // Make sure we report any errors from writing to the file. + // If we forget this, compilation can succeed with an incomplete rmeta file, + // causing an ICE when the rmeta file is read by another compilation. + if let Err((path, err)) = ecx.encoder.opaque.finish() { + tcx.dcx().emit_fatal(FailWriteFile { path: &path, err }); + } - let file = ecx.opaque.file(); - if let Err(err) = encode_root_position(file, root_position) { - tcx.dcx().emit_fatal(FailWriteFile { path: ecx.opaque.path(), err }); - } + let file = ecx.encoder.opaque.file(); + if let Err(err) = encode_root_position(file, root_position) { + tcx.dcx().emit_fatal(FailWriteFile { path: ecx.encoder.opaque.path(), err }); + } + }) } fn encode_root_position(mut file: &File, pos: usize) -> Result<(), std::io::Error> { diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs index c7b2eaa15ebfb..c3b36797f92d5 100644 --- a/compiler/rustc_metadata/src/rmeta/mod.rs +++ b/compiler/rustc_metadata/src/rmeta/mod.rs @@ -1,6 +1,7 @@ use std::marker::PhantomData; use std::num::NonZero; +use rustc_session::config::mitigation_coverage::DeniedPartialMitigation; use decoder::LazyDecoder; pub(crate) use decoder::{CrateMetadata, CrateNumMap, MetadataBlob, TargetModifiers}; use def_path_hash_map::DefPathHashMapRef; @@ -35,8 +36,6 @@ use rustc_middle::mir::ConstValue; use rustc_middle::ty::fast_reject::SimplifiedType; use rustc_middle::ty::{self, Ty, TyCtxt}; use rustc_middle::util::Providers; -use rustc_serialize::opaque::FileEncoder; -use rustc_session::config::mitigation_coverage::DeniedPartialMitigation; use rustc_session::config::{SymbolManglingVersion, TargetModifier}; use rustc_session::cstore::{CrateDepKind, ForeignModule, LinkagePreference, NativeLib}; use rustc_span::edition::Edition; @@ -46,6 +45,7 @@ use rustc_target::spec::{PanicStrategy, TargetTuple}; use table::TableBuilder; use crate::eii::EiiMapEncodedKeyValue; +use crate::rmeta::encoder::ContextEncoder; mod decoder; mod def_path_hash_map; @@ -364,7 +364,7 @@ macro_rules! define_tables { } impl TableBuilders { - fn encode(&self, buf: &mut FileEncoder) -> LazyTables { + fn encode(&self, buf: &mut ContextEncoder<'_>) -> LazyTables { LazyTables { $($name1: self.$name1.encode(buf),)+ $($name2: self.$name2.encode(buf),)+ diff --git a/compiler/rustc_metadata/src/rmeta/table.rs b/compiler/rustc_metadata/src/rmeta/table.rs index 26c5908563777..1319f17222730 100644 --- a/compiler/rustc_metadata/src/rmeta/table.rs +++ b/compiler/rustc_metadata/src/rmeta/table.rs @@ -2,6 +2,9 @@ use rustc_hir::def::CtorOf; use rustc_index::Idx; use crate::rmeta::decoder::MetaBlob; +use crate::rmeta::decoder::Metadata; +use crate::rmeta::encoder::ContextEncoder; + use crate::rmeta::*; pub(super) trait IsDefault: Default { @@ -486,15 +489,12 @@ impl> TableBui } } - pub(crate) fn encode(&self, buf: &mut FileEncoder) -> LazyTable { + pub(crate) fn encode(&self, buf: &mut ContextEncoder<'_>) -> LazyTable { let pos = buf.position(); let width = self.width; for block in &self.blocks { - buf.write_with(|dest| { - *dest = *block; - width - }); + buf.write_m_with(block, width); } LazyTable::from_position_and_encoded_size( diff --git a/compiler/rustc_middle/src/hir/map.rs b/compiler/rustc_middle/src/hir/map.rs index 68357212bebe8..affbb025f7b04 100644 --- a/compiler/rustc_middle/src/hir/map.rs +++ b/compiler/rustc_middle/src/hir/map.rs @@ -4,22 +4,16 @@ use rustc_abi::ExternAbi; use rustc_ast::visit::{VisitorResult, walk_list}; -use rustc_data_structures::fingerprint::Fingerprint; -use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; -use rustc_data_structures::svh::Svh; -use rustc_data_structures::sync::{DynSend, DynSync, par_for_each_in, spawn, try_par_for_each_in}; +use rustc_data_structures::sync::{DynSend, DynSync, par_for_each_in, try_par_for_each_in, spawn}; use rustc_hir::def::{DefKind, Res}; -use rustc_hir::def_id::{DefId, LOCAL_CRATE, LocalDefId, LocalModDefId}; +use rustc_hir::def_id::{DefId, LocalDefId, LocalModDefId}; use rustc_hir::definitions::{DefKey, DefPath, DefPathHash}; use rustc_hir::intravisit::Visitor; use rustc_hir::*; use rustc_hir_pretty as pprust_hir; -use rustc_span::def_id::StableCrateId; -use rustc_span::{ErrorGuaranteed, Ident, Span, Symbol, kw, with_metavar_spans}; +use rustc_span::{ErrorGuaranteed, Ident, Span, Symbol, kw}; use crate::hir::{ModuleItems, nested_filter}; -use crate::middle::debugger_visualizer::DebuggerVisualizerFile; -use crate::query::LocalCrate; use crate::ty::TyCtxt; /// An iterator that walks up the ancestor tree of a given `HirId`. @@ -1123,95 +1117,6 @@ impl<'tcx> pprust_hir::PpAnn for TyCtxt<'tcx> { } } -pub(super) fn crate_hash(tcx: TyCtxt<'_>, _: LocalCrate) -> Svh { - let krate = tcx.hir_crate(()); - let hir_body_hash = krate.opt_hir_hash.expect("HIR hash missing while computing crate hash"); - - let upstream_crates = upstream_crates(tcx); - - let resolutions = tcx.resolutions(()); - - // We hash the final, remapped names of all local source files so we - // don't have to include the path prefix remapping commandline args. - // If we included the full mapping in the SVH, we could only have - // reproducible builds by compiling from the same directory. So we just - // hash the result of the mapping instead of the mapping itself. - let mut source_file_names: Vec<_> = tcx - .sess - .source_map() - .files() - .iter() - .filter(|source_file| source_file.cnum == LOCAL_CRATE) - .map(|source_file| source_file.stable_id) - .collect(); - - source_file_names.sort_unstable(); - - // We have to take care of debugger visualizers explicitly. The HIR (and - // thus `hir_body_hash`) contains the #[debugger_visualizer] attributes but - // these attributes only store the file path to the visualizer file, not - // their content. Yet that content is exported into crate metadata, so any - // changes to it need to be reflected in the crate hash. - let debugger_visualizers: Vec<_> = tcx - .debugger_visualizers(LOCAL_CRATE) - .iter() - // We ignore the path to the visualizer file since it's not going to be - // encoded in crate metadata and we already hash the full contents of - // the file. - .map(DebuggerVisualizerFile::path_erased) - .collect(); - - let crate_hash: Fingerprint = tcx.with_stable_hashing_context(|mut hcx| { - let mut stable_hasher = StableHasher::new(); - hir_body_hash.hash_stable(&mut hcx, &mut stable_hasher); - upstream_crates.hash_stable(&mut hcx, &mut stable_hasher); - source_file_names.hash_stable(&mut hcx, &mut stable_hasher); - debugger_visualizers.hash_stable(&mut hcx, &mut stable_hasher); - if tcx.sess.opts.incremental.is_some() { - let definitions = tcx.untracked().definitions.freeze(); - let mut owner_spans: Vec<_> = tcx - .hir_crate_items(()) - .definitions() - .map(|def_id| { - let def_path_hash = definitions.def_path_hash(def_id); - let span = tcx.source_span(def_id); - debug_assert_eq!(span.parent(), None); - (def_path_hash, span) - }) - .collect(); - owner_spans.sort_unstable_by_key(|bn| bn.0); - owner_spans.hash_stable(&mut hcx, &mut stable_hasher); - } - tcx.sess.opts.dep_tracking_hash(true).hash_stable(&mut hcx, &mut stable_hasher); - tcx.stable_crate_id(LOCAL_CRATE).hash_stable(&mut hcx, &mut stable_hasher); - // Hash visibility information since it does not appear in HIR. - // FIXME: Figure out how to remove `visibilities_for_hashing` by hashing visibilities on - // the fly in the resolver, storing only their accumulated hash in `ResolverGlobalCtxt`, - // and combining it with other hashes here. - resolutions.visibilities_for_hashing.hash_stable(&mut hcx, &mut stable_hasher); - with_metavar_spans(|mspans| { - mspans.freeze_and_get_read_spans().hash_stable(&mut hcx, &mut stable_hasher); - }); - stable_hasher.finish() - }); - - Svh::new(crate_hash) -} - -fn upstream_crates(tcx: TyCtxt<'_>) -> Vec<(StableCrateId, Svh)> { - let mut upstream_crates: Vec<_> = tcx - .crates(()) - .iter() - .map(|&cnum| { - let stable_crate_id = tcx.stable_crate_id(cnum); - let hash = tcx.crate_hash(cnum); - (stable_crate_id, hash) - }) - .collect(); - upstream_crates.sort_unstable_by_key(|&(stable_crate_id, _)| stable_crate_id); - upstream_crates -} - pub(super) fn hir_module_items(tcx: TyCtxt<'_>, module_id: LocalModDefId) -> ModuleItems { let mut collector = ItemCollector::new(tcx, false); diff --git a/compiler/rustc_middle/src/hir/mod.rs b/compiler/rustc_middle/src/hir/mod.rs index 7f82b9161fe61..765875213e0ba 100644 --- a/compiler/rustc_middle/src/hir/mod.rs +++ b/compiler/rustc_middle/src/hir/mod.rs @@ -454,7 +454,6 @@ pub struct Hashes { pub fn provide(providers: &mut Providers) { providers.hir_crate_items = map::hir_crate_items; - providers.crate_hash = map::crate_hash; providers.hir_module_items = map::hir_module_items; providers.local_def_id_to_hir_id = |tcx, def_id| match tcx.hir_crate(()).owner(tcx, def_id) { MaybeOwner::Owner(_) => HirId::make_owner(def_id), diff --git a/compiler/rustc_session/src/cstore.rs b/compiler/rustc_session/src/cstore.rs index c186557ccaa49..9d04df3a6b98f 100644 --- a/compiler/rustc_session/src/cstore.rs +++ b/compiler/rustc_session/src/cstore.rs @@ -4,8 +4,10 @@ use std::any::Any; use std::path::PathBuf; +use std::sync::OnceLock; use rustc_abi::ExternAbi; +use rustc_data_structures::svh::Svh; use rustc_data_structures::sync::{self, AppendOnlyIndexVec, FreezeLock}; use rustc_hir::attrs::{CfgEntry, NativeLibKind, PeImportNameType}; use rustc_hir::def_id::{ @@ -223,4 +225,6 @@ pub struct Untracked { pub definitions: FreezeLock, /// The interned [StableCrateId]s. pub stable_crate_ids: FreezeLock, + /// The hash of the local crate as computed in metadata encoding. + pub local_crate_hash: OnceLock, } From e89f50ad935638ccb38f080f702645b498eb58d0 Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Mon, 13 Apr 2026 17:04:12 -0400 Subject: [PATCH 2/4] remove some HIR hashing --- compiler/rustc_ast_lowering/src/lib.rs | 18 ++++++------- compiler/rustc_data_structures/src/svh.rs | 2 +- compiler/rustc_metadata/src/rmeta/mod.rs | 2 +- compiler/rustc_middle/src/hir/map.rs | 2 +- compiler/rustc_middle/src/hir/mod.rs | 29 ++++++++++++++------- compiler/rustc_middle/src/queries.rs | 1 + compiler/rustc_query_impl/src/query_impl.rs | 2 ++ 7 files changed, 35 insertions(+), 21 deletions(-) diff --git a/compiler/rustc_ast_lowering/src/lib.rs b/compiler/rustc_ast_lowering/src/lib.rs index 5e9674b7a0422..deb9e9a17273f 100644 --- a/compiler/rustc_ast_lowering/src/lib.rs +++ b/compiler/rustc_ast_lowering/src/lib.rs @@ -41,11 +41,11 @@ use std::sync::Arc; use rustc_ast::node_id::NodeMap; use rustc_ast::visit::Visitor; use rustc_ast::{self as ast, *}; -use rustc_attr_parsing::{AttributeParser, EmitAttribute, Late, OmitDoc}; -use rustc_data_structures::fingerprint::Fingerprint; +use rustc_attr_parsing::{AttributeParser, EmitAttribute, Late, OmitDoc}; +//use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::FxIndexSet; use rustc_data_structures::sorted_map::SortedMap; -use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +//use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc_data_structures::steal::Steal; use rustc_data_structures::tagged_ptr::TaggedRef; use rustc_errors::{DiagArgFromDisplay, DiagCtxtHandle}; @@ -57,7 +57,7 @@ use rustc_hir::{ self as hir, AngleBrackets, ConstArg, GenericArg, HirId, ItemLocalMap, LifetimeSource, LifetimeSyntax, ParamName, Target, TraitCandidate, find_attr, }; -use rustc_index::{Idx, IndexSlice, IndexVec}; +use rustc_index::{Idx, IndexVec}; use rustc_macros::extension; use rustc_middle::hir::{self as mid_hir}; use rustc_middle::span_bug; @@ -507,7 +507,7 @@ fn index_crate<'a, 'b>( /// Compute the hash for the HIR of the full crate. /// This hash will then be part of the crate_hash which is stored in the metadata. -fn compute_hir_hash( +/*fn compute_hir_hash( tcx: TyCtxt<'_>, owners: &IndexSlice>, ) -> Fingerprint { @@ -526,7 +526,7 @@ fn compute_hir_hash( hir_body_nodes.hash_stable(&mut hcx, &mut stable_hasher); stable_hasher.finish() }) -} +}*/ pub fn lower_to_hir(tcx: TyCtxt<'_>, (): ()) -> mid_hir::Crate<'_> { // Queries that borrow `resolver_for_lowering`. @@ -562,11 +562,11 @@ pub fn lower_to_hir(tcx: TyCtxt<'_>, (): ()) -> mid_hir::Crate<'_> { } // Don't hash unless necessary, because it's expensive. - let opt_hir_hash = - if tcx.needs_crate_hash() { Some(compute_hir_hash(tcx, &owners)) } else { None }; + // let opt_hir_hash = + // if tcx.needs_crate_hash() { Some(tcx.crate_hash(LOCAL_CRATE)) } else { None }; let delayed_resolver = Steal::new((resolver, krate)); - mid_hir::Crate::new(owners, delayed_ids, delayed_resolver, opt_hir_hash) + mid_hir::Crate::new(owners, delayed_ids, delayed_resolver, None) } /// Lowers an AST owner corresponding to `def_id`, now only delegations are lowered this way. diff --git a/compiler/rustc_data_structures/src/svh.rs b/compiler/rustc_data_structures/src/svh.rs index 68b224676aec7..eecdef91eee6b 100644 --- a/compiler/rustc_data_structures/src/svh.rs +++ b/compiler/rustc_data_structures/src/svh.rs @@ -23,7 +23,7 @@ use crate::fingerprint::Fingerprint; HashStable_NoContext )] pub struct Svh { - hash: Fingerprint, + pub hash: Fingerprint, } impl Svh { diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs index c3b36797f92d5..5d9e7b8f36bcf 100644 --- a/compiler/rustc_metadata/src/rmeta/mod.rs +++ b/compiler/rustc_metadata/src/rmeta/mod.rs @@ -1,7 +1,6 @@ use std::marker::PhantomData; use std::num::NonZero; -use rustc_session::config::mitigation_coverage::DeniedPartialMitigation; use decoder::LazyDecoder; pub(crate) use decoder::{CrateMetadata, CrateNumMap, MetadataBlob, TargetModifiers}; use def_path_hash_map::DefPathHashMapRef; @@ -36,6 +35,7 @@ use rustc_middle::mir::ConstValue; use rustc_middle::ty::fast_reject::SimplifiedType; use rustc_middle::ty::{self, Ty, TyCtxt}; use rustc_middle::util::Providers; +use rustc_session::config::mitigation_coverage::DeniedPartialMitigation; use rustc_session::config::{SymbolManglingVersion, TargetModifier}; use rustc_session::cstore::{CrateDepKind, ForeignModule, LinkagePreference, NativeLib}; use rustc_span::edition::Edition; diff --git a/compiler/rustc_middle/src/hir/map.rs b/compiler/rustc_middle/src/hir/map.rs index affbb025f7b04..675e1376f9949 100644 --- a/compiler/rustc_middle/src/hir/map.rs +++ b/compiler/rustc_middle/src/hir/map.rs @@ -4,7 +4,7 @@ use rustc_abi::ExternAbi; use rustc_ast::visit::{VisitorResult, walk_list}; -use rustc_data_structures::sync::{DynSend, DynSync, par_for_each_in, try_par_for_each_in, spawn}; +use rustc_data_structures::sync::{DynSend, DynSync, par_for_each_in, spawn, try_par_for_each_in}; use rustc_hir::def::{DefKind, Res}; use rustc_hir::def_id::{DefId, LocalDefId, LocalModDefId}; use rustc_hir::definitions::{DefKey, DefPath, DefPathHash}; diff --git a/compiler/rustc_middle/src/hir/mod.rs b/compiler/rustc_middle/src/hir/mod.rs index 765875213e0ba..f0af235208d82 100644 --- a/compiler/rustc_middle/src/hir/mod.rs +++ b/compiler/rustc_middle/src/hir/mod.rs @@ -8,19 +8,21 @@ pub mod place; use std::sync::Arc; +//use rustc_hir::def_id::LOCAL_CRATE; use rustc_ast::{self as ast}; use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::FxIndexSet; use rustc_data_structures::sorted_map::SortedMap; use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc_data_structures::steal::Steal; +use rustc_data_structures::svh::Svh; use rustc_data_structures::sync::{DynSend, DynSync, try_par_for_each_in}; use rustc_hir::def::{DefKind, Res}; use rustc_hir::def_id::{DefId, LocalDefId, LocalModDefId}; use rustc_hir::*; use rustc_index::IndexVec; use rustc_macros::{Decodable, Encodable, HashStable}; -use rustc_span::{ErrorGuaranteed, ExpnId, HashStableContext, Span}; +use rustc_span::{ErrorGuaranteed, ExpnId, Span}; use crate::query::Providers; use crate::ty::{ResolverAstLowering, TyCtxt}; @@ -31,8 +33,8 @@ use crate::ty::{ResolverAstLowering, TyCtxt}; /// For more details, see the [rustc dev guide]. /// /// [rustc dev guide]: https://rustc-dev-guide.rust-lang.org/hir.html -#[derive(Debug)] pub struct Crate<'hir> { + // tcx: TyCtxt<'hir>, // This field is private by intention, access it through `owner` method. owners: IndexVec>, // Ids of delayed AST owners which are lowered through `lower_delayed_owner` query. @@ -41,7 +43,17 @@ pub struct Crate<'hir> { // and then stolen and dropped in `force_delayed_owners_lowering`. pub delayed_resolver: Steal<(ResolverAstLowering<'hir>, Arc)>, // Only present when incr. comp. is enabled. - pub opt_hir_hash: Option, + pub opt_hash: Option, +} + +impl std::fmt::Debug for Crate<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Crate") + .field("owners", &self.owners) + .field("delayed_ids", &self.delayed_ids) + .field("delayed_resolver", &self.delayed_resolver) + .finish() + } } impl<'hir> Crate<'hir> { @@ -49,9 +61,9 @@ impl<'hir> Crate<'hir> { owners: IndexVec>, delayed_ids: FxIndexSet, delayed_resolver: Steal<(ResolverAstLowering<'hir>, Arc)>, - opt_hir_hash: Option, + opt_hash: Option, ) -> Crate<'hir> { - Crate { owners, delayed_ids, delayed_resolver, opt_hir_hash } + Crate { owners, delayed_ids, delayed_resolver, opt_hash } } /// Serves as an entry point for getting `MaybeOwner`. As owner can either be in @@ -76,12 +88,11 @@ impl<'hir> Crate<'hir> { } } -impl HashStable for Crate<'_> { +/*impl HashStable for Crate<'_> { fn hash_stable(&self, hcx: &mut Hcx, hasher: &mut StableHasher) { - let Crate { opt_hir_hash, .. } = self; - opt_hir_hash.unwrap().hash_stable(hcx, hasher) + self.tcx.crate_hash(LOCAL_CRATE).hash_stable(hcx, hasher) } -} +}*/ /// Gather the LocalDefId for each item-like within a module, including items contained within /// bodies. The Ids are in visitor order. This is used to partition a pass between modules. diff --git a/compiler/rustc_middle/src/queries.rs b/compiler/rustc_middle/src/queries.rs index 7c6ab642b2736..9cbabc697b452 100644 --- a/compiler/rustc_middle/src/queries.rs +++ b/compiler/rustc_middle/src/queries.rs @@ -208,6 +208,7 @@ rustc_queries! { query hir_crate(key: ()) -> &'tcx Crate<'tcx> { arena_cache eval_always + no_hash desc { "getting the crate HIR" } } diff --git a/compiler/rustc_query_impl/src/query_impl.rs b/compiler/rustc_query_impl/src/query_impl.rs index 4425acc6b86b8..a272449733bf6 100644 --- a/compiler/rustc_query_impl/src/query_impl.rs +++ b/compiler/rustc_query_impl/src/query_impl.rs @@ -1,3 +1,4 @@ +//use rustc_data_structures::fingerprint::Fingerprint; use rustc_middle::queries::TaggedQueryKey; use rustc_middle::query::erase::{self, Erased}; use rustc_middle::query::{AsLocalQueryKey, QueryMode, QueryVTable}; @@ -189,6 +190,7 @@ macro_rules! define_queries { hash_value_fn: Some(|hcx, erased_value: &erase::Erased>| { let value = erase::restore_val(*erased_value); rustc_middle::dep_graph::hash_result(hcx, &value) + //Fingerprint::new(0, 0) }), format_value: |erased_value: &erase::Erased>| { From 3ea7557663b62ab7c20807352d2670a45feae01f Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Tue, 14 Apr 2026 12:59:11 -0400 Subject: [PATCH 3/4] Comment out a currently failing test DO NOT SUBMIT! --- compiler/rustc_ast_lowering/src/lib.rs | 21 ++++++++------ compiler/rustc_data_structures/src/svh.rs | 2 +- compiler/rustc_metadata/src/fs.rs | 2 +- .../src/rmeta/decoder/cstore_impl.rs | 29 ++++++++++++------- compiler/rustc_metadata/src/rmeta/encoder.rs | 4 +++ compiler/rustc_metadata/src/rmeta/table.rs | 2 -- compiler/rustc_middle/src/hir/mod.rs | 29 ++++++------------- compiler/rustc_query_impl/src/query_impl.rs | 2 -- 8 files changed, 45 insertions(+), 46 deletions(-) diff --git a/compiler/rustc_ast_lowering/src/lib.rs b/compiler/rustc_ast_lowering/src/lib.rs index deb9e9a17273f..1d8f4e1c62625 100644 --- a/compiler/rustc_ast_lowering/src/lib.rs +++ b/compiler/rustc_ast_lowering/src/lib.rs @@ -41,11 +41,11 @@ use std::sync::Arc; use rustc_ast::node_id::NodeMap; use rustc_ast::visit::Visitor; use rustc_ast::{self as ast, *}; -use rustc_attr_parsing::{AttributeParser, EmitAttribute, Late, OmitDoc}; -//use rustc_data_structures::fingerprint::Fingerprint; +use rustc_attr_parsing::{AttributeParser, EmitAttribute, Late, OmitDoc}; +use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::FxIndexSet; use rustc_data_structures::sorted_map::SortedMap; -//use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc_data_structures::steal::Steal; use rustc_data_structures::tagged_ptr::TaggedRef; use rustc_errors::{DiagArgFromDisplay, DiagCtxtHandle}; @@ -57,7 +57,7 @@ use rustc_hir::{ self as hir, AngleBrackets, ConstArg, GenericArg, HirId, ItemLocalMap, LifetimeSource, LifetimeSyntax, ParamName, Target, TraitCandidate, find_attr, }; -use rustc_index::{Idx, IndexVec}; +use rustc_index::{Idx, IndexSlice, IndexVec}; use rustc_macros::extension; use rustc_middle::hir::{self as mid_hir}; use rustc_middle::span_bug; @@ -507,7 +507,7 @@ fn index_crate<'a, 'b>( /// Compute the hash for the HIR of the full crate. /// This hash will then be part of the crate_hash which is stored in the metadata. -/*fn compute_hir_hash( +fn compute_hir_hash( tcx: TyCtxt<'_>, owners: &IndexSlice>, ) -> Fingerprint { @@ -526,7 +526,7 @@ fn index_crate<'a, 'b>( hir_body_nodes.hash_stable(&mut hcx, &mut stable_hasher); stable_hasher.finish() }) -}*/ +} pub fn lower_to_hir(tcx: TyCtxt<'_>, (): ()) -> mid_hir::Crate<'_> { // Queries that borrow `resolver_for_lowering`. @@ -562,11 +562,14 @@ pub fn lower_to_hir(tcx: TyCtxt<'_>, (): ()) -> mid_hir::Crate<'_> { } // Don't hash unless necessary, because it's expensive. - // let opt_hir_hash = - // if tcx.needs_crate_hash() { Some(tcx.crate_hash(LOCAL_CRATE)) } else { None }; + let opt_hir_hash = if tcx.needs_crate_hash() && !tcx.needs_metadata() { + Some(compute_hir_hash(tcx, &owners)) + } else { + None + }; let delayed_resolver = Steal::new((resolver, krate)); - mid_hir::Crate::new(owners, delayed_ids, delayed_resolver, None) + mid_hir::Crate::new(owners, delayed_ids, delayed_resolver, opt_hir_hash) } /// Lowers an AST owner corresponding to `def_id`, now only delegations are lowered this way. diff --git a/compiler/rustc_data_structures/src/svh.rs b/compiler/rustc_data_structures/src/svh.rs index eecdef91eee6b..68b224676aec7 100644 --- a/compiler/rustc_data_structures/src/svh.rs +++ b/compiler/rustc_data_structures/src/svh.rs @@ -23,7 +23,7 @@ use crate::fingerprint::Fingerprint; HashStable_NoContext )] pub struct Svh { - pub hash: Fingerprint, + hash: Fingerprint, } impl Svh { diff --git a/compiler/rustc_metadata/src/fs.rs b/compiler/rustc_metadata/src/fs.rs index 66af70717d64f..1eaad26ff8e80 100644 --- a/compiler/rustc_metadata/src/fs.rs +++ b/compiler/rustc_metadata/src/fs.rs @@ -54,7 +54,7 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> EncodedMetadata { None }; - if tcx.needs_metadata() || tcx.needs_crate_hash() { + if tcx.needs_metadata() { encode_metadata(tcx, &metadata_filename, metadata_stub_filename.as_deref()); } else { // Always create a file at `metadata_filename`, even if we have nothing to write to it. diff --git a/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs index bc42d578c0c51..90169d72a8932 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs @@ -2,6 +2,9 @@ use std::any::Any; use std::mem; use std::sync::Arc; +use rustc_data_structures::fingerprint::Fingerprint; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use rustc_data_structures::svh::Svh; use rustc_hir::attrs::Deprecation; use rustc_hir::def::{CtorKind, DefKind}; use rustc_hir::def_id::{CrateNum, DefId, DefIdMap, LOCAL_CRATE}; @@ -9,6 +12,7 @@ use rustc_hir::definitions::{DefKey, DefPath, DefPathHash}; use rustc_middle::arena::ArenaAllocatable; use rustc_middle::bug; use rustc_middle::metadata::{AmbigModChild, ModChild}; +use rustc_middle::middle::debugger_visualizer::DebuggerVisualizerFile; use rustc_middle::middle::exported_symbols::ExportedSymbol; use rustc_middle::middle::stability::DeprecationEntry; use rustc_middle::queries::ExternProviders; @@ -20,7 +24,7 @@ use rustc_serialize::Decoder; use rustc_session::StableCrateId; use rustc_session::cstore::{CrateStore, ExternCrate}; use rustc_span::hygiene::ExpnId; -use rustc_span::{Span, Symbol, kw}; +use rustc_span::{Span, Symbol, kw, with_metavar_spans}; use super::{Decodable, DecodeIterator}; use crate::creader::{CStore, LoadedMacro}; @@ -752,17 +756,20 @@ fn provide_cstore_hooks(providers: &mut Providers) { }; providers.queries.crate_hash = |tcx: TyCtxt<'_>, _: LocalCrate| { - *tcx.untracked() - .local_crate_hash - .get() - .expect("crate_hash(LOCAL_CRATE) called before metadata encoding") + if tcx.needs_metadata() { + *tcx.untracked() + .local_crate_hash + .get() + .expect("crate_hash(LOCAL_CRATE) called before metadata encoding") + } else { + crate_hash(tcx) + } }; } -/*pub(super) fn crate_hash(tcx: TyCtxt<'_>, cnum: rustc_hir::def_id::CrateNum) -> Svh { - let cstore = CStore::from_tcx(tcx); - let crate_data = cstore.get_crate_data(cnum); - crate_data.root.header.hash +pub(super) fn crate_hash(tcx: TyCtxt<'_>) -> Svh { + let krate = tcx.hir_crate(()); + let hir_body_hash = krate.opt_hir_hash.expect("HIR hash missing while computing crate hash"); let upstream_crates = upstream_crates(tcx); @@ -800,7 +807,7 @@ fn provide_cstore_hooks(providers: &mut Providers) { let crate_hash: Fingerprint = tcx.with_stable_hashing_context(|mut hcx| { let mut stable_hasher = StableHasher::new(); - metadata_hash.hash_stable(&mut hcx, &mut stable_hasher); + hir_body_hash.hash_stable(&mut hcx, &mut stable_hasher); upstream_crates.hash_stable(&mut hcx, &mut stable_hasher); source_file_names.hash_stable(&mut hcx, &mut stable_hasher); debugger_visualizers.hash_stable(&mut hcx, &mut stable_hasher); @@ -847,4 +854,4 @@ fn upstream_crates(tcx: TyCtxt<'_>) -> Vec<(StableCrateId, Svh)> { .collect(); upstream_crates.sort_unstable_by_key(|&(stable_crate_id, _)| stable_crate_id); upstream_crates -}*/ +} diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index 69bcd792e018c..abb1917a455ce 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -774,6 +774,10 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { let root = stat!("final", || { let attrs = tcx.hir_krate_attrs(); + tcx.sess + .opts + .dep_tracking_hash(true) + .hash_stable(&mut self.encoder.hcx, &mut self.encoder.stable_hasher); let new_hash = Svh::new(self.encoder.stable_hasher.clone().finish()); /*eprintln!("crate: {:?}", tcx.crate_name(LOCAL_CRATE)); diff --git a/compiler/rustc_metadata/src/rmeta/table.rs b/compiler/rustc_metadata/src/rmeta/table.rs index 1319f17222730..1010a24209751 100644 --- a/compiler/rustc_metadata/src/rmeta/table.rs +++ b/compiler/rustc_metadata/src/rmeta/table.rs @@ -2,9 +2,7 @@ use rustc_hir::def::CtorOf; use rustc_index::Idx; use crate::rmeta::decoder::MetaBlob; -use crate::rmeta::decoder::Metadata; use crate::rmeta::encoder::ContextEncoder; - use crate::rmeta::*; pub(super) trait IsDefault: Default { diff --git a/compiler/rustc_middle/src/hir/mod.rs b/compiler/rustc_middle/src/hir/mod.rs index f0af235208d82..765875213e0ba 100644 --- a/compiler/rustc_middle/src/hir/mod.rs +++ b/compiler/rustc_middle/src/hir/mod.rs @@ -8,21 +8,19 @@ pub mod place; use std::sync::Arc; -//use rustc_hir::def_id::LOCAL_CRATE; use rustc_ast::{self as ast}; use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::FxIndexSet; use rustc_data_structures::sorted_map::SortedMap; use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc_data_structures::steal::Steal; -use rustc_data_structures::svh::Svh; use rustc_data_structures::sync::{DynSend, DynSync, try_par_for_each_in}; use rustc_hir::def::{DefKind, Res}; use rustc_hir::def_id::{DefId, LocalDefId, LocalModDefId}; use rustc_hir::*; use rustc_index::IndexVec; use rustc_macros::{Decodable, Encodable, HashStable}; -use rustc_span::{ErrorGuaranteed, ExpnId, Span}; +use rustc_span::{ErrorGuaranteed, ExpnId, HashStableContext, Span}; use crate::query::Providers; use crate::ty::{ResolverAstLowering, TyCtxt}; @@ -33,8 +31,8 @@ use crate::ty::{ResolverAstLowering, TyCtxt}; /// For more details, see the [rustc dev guide]. /// /// [rustc dev guide]: https://rustc-dev-guide.rust-lang.org/hir.html +#[derive(Debug)] pub struct Crate<'hir> { - // tcx: TyCtxt<'hir>, // This field is private by intention, access it through `owner` method. owners: IndexVec>, // Ids of delayed AST owners which are lowered through `lower_delayed_owner` query. @@ -43,17 +41,7 @@ pub struct Crate<'hir> { // and then stolen and dropped in `force_delayed_owners_lowering`. pub delayed_resolver: Steal<(ResolverAstLowering<'hir>, Arc)>, // Only present when incr. comp. is enabled. - pub opt_hash: Option, -} - -impl std::fmt::Debug for Crate<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Crate") - .field("owners", &self.owners) - .field("delayed_ids", &self.delayed_ids) - .field("delayed_resolver", &self.delayed_resolver) - .finish() - } + pub opt_hir_hash: Option, } impl<'hir> Crate<'hir> { @@ -61,9 +49,9 @@ impl<'hir> Crate<'hir> { owners: IndexVec>, delayed_ids: FxIndexSet, delayed_resolver: Steal<(ResolverAstLowering<'hir>, Arc)>, - opt_hash: Option, + opt_hir_hash: Option, ) -> Crate<'hir> { - Crate { owners, delayed_ids, delayed_resolver, opt_hash } + Crate { owners, delayed_ids, delayed_resolver, opt_hir_hash } } /// Serves as an entry point for getting `MaybeOwner`. As owner can either be in @@ -88,11 +76,12 @@ impl<'hir> Crate<'hir> { } } -/*impl HashStable for Crate<'_> { +impl HashStable for Crate<'_> { fn hash_stable(&self, hcx: &mut Hcx, hasher: &mut StableHasher) { - self.tcx.crate_hash(LOCAL_CRATE).hash_stable(hcx, hasher) + let Crate { opt_hir_hash, .. } = self; + opt_hir_hash.unwrap().hash_stable(hcx, hasher) } -}*/ +} /// Gather the LocalDefId for each item-like within a module, including items contained within /// bodies. The Ids are in visitor order. This is used to partition a pass between modules. diff --git a/compiler/rustc_query_impl/src/query_impl.rs b/compiler/rustc_query_impl/src/query_impl.rs index a272449733bf6..4425acc6b86b8 100644 --- a/compiler/rustc_query_impl/src/query_impl.rs +++ b/compiler/rustc_query_impl/src/query_impl.rs @@ -1,4 +1,3 @@ -//use rustc_data_structures::fingerprint::Fingerprint; use rustc_middle::queries::TaggedQueryKey; use rustc_middle::query::erase::{self, Erased}; use rustc_middle::query::{AsLocalQueryKey, QueryMode, QueryVTable}; @@ -190,7 +189,6 @@ macro_rules! define_queries { hash_value_fn: Some(|hcx, erased_value: &erase::Erased>| { let value = erase::restore_val(*erased_value); rustc_middle::dep_graph::hash_result(hcx, &value) - //Fingerprint::new(0, 0) }), format_value: |erased_value: &erase::Erased>| { From ca61d35e8206065e2b104c6b24483abe0a2088ec Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Tue, 28 Apr 2026 14:06:29 -0400 Subject: [PATCH 4/4] Checkpointing an attempted optimization --- compiler/rustc_metadata/src/lib.rs | 1 + compiler/rustc_metadata/src/rmeta/encoder.rs | 30 +- compiler/rustc_metadata/src/rmeta/leb128.rs | 167 +++++++ compiler/rustc_metadata/src/rmeta/mod.rs | 2 + compiler/rustc_metadata/src/rmeta/opaque.rs | 483 +++++++++++++++++++ 5 files changed, 668 insertions(+), 15 deletions(-) create mode 100644 compiler/rustc_metadata/src/rmeta/leb128.rs create mode 100644 compiler/rustc_metadata/src/rmeta/opaque.rs diff --git a/compiler/rustc_metadata/src/lib.rs b/compiler/rustc_metadata/src/lib.rs index 1dff5740ab3bc..f76e975620d99 100644 --- a/compiler/rustc_metadata/src/lib.rs +++ b/compiler/rustc_metadata/src/lib.rs @@ -1,5 +1,6 @@ // tidy-alphabetical-start #![allow(internal_features)] +#![feature(core_intrinsics)] #![feature(error_iter)] #![feature(file_buffered)] #![feature(gen_blocks)] diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index abb1917a455ce..1bc6f2394874e 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -22,7 +22,7 @@ use rustc_hir::definitions::DefPathData; use rustc_hir::find_attr; use rustc_hir_pretty::id_to_string; use rustc_middle::dep_graph::WorkProductId; -use rustc_middle::ich::StableHashingContext; +//use rustc_middle::ich::StableHashingContext; use rustc_middle::middle::dependency_format::Linkage; use rustc_middle::mir::interpret; use rustc_middle::query::Providers; @@ -31,7 +31,7 @@ use rustc_middle::ty::AssocContainer; use rustc_middle::ty::codec::TyEncoder; use rustc_middle::ty::fast_reject::{self, TreatParams}; use rustc_middle::{bug, span_bug}; -use rustc_serialize::{Decodable, Decoder, Encodable, Encoder, opaque}; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; use rustc_session::config::mitigation_coverage::DeniedPartialMitigation; use rustc_session::config::{CrateType, OptLevel, TargetModifier}; use rustc_span::hygiene::HygieneEncodeContext; @@ -47,9 +47,7 @@ use crate::rmeta::*; // Struct to enable split borrows. pub(super) struct ContextEncoder<'a> { - opaque: opaque::FileEncoder, - stable_hasher: StableHasher, - hcx: StableHashingContext<'a>, + opaque: opaque::FileEncoder<'a>, } pub(super) struct EncodeContext<'a, 'tcx> { @@ -98,7 +96,7 @@ macro_rules! context_encoder_methods { ($($name:ident($ty:ty);)*) => { #[inline] $(fn $name(&mut self, value: $ty) { - value.hash_stable(&mut self.hcx, &mut self.stable_hasher); + //value.hash_stable(&mut self.hcx, &mut self.stable_hasher); self.opaque.$name(value) })* } @@ -131,7 +129,7 @@ impl<'a> ContextEncoder<'a> { #[inline] pub(super) fn write_m_with(&mut self, b: &[u8; N], m: usize) { - (b[..m]).hash_stable(&mut self.hcx, &mut self.stable_hasher); + //(b[..m]).hash_stable(&mut self.hcx, &mut self.stable_hasher); self.opaque.write_with(|dest| { *dest = *b; m @@ -774,11 +772,11 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { let root = stat!("final", || { let attrs = tcx.hir_krate_attrs(); - tcx.sess - .opts - .dep_tracking_hash(true) - .hash_stable(&mut self.encoder.hcx, &mut self.encoder.stable_hasher); - let new_hash = Svh::new(self.encoder.stable_hasher.clone().finish()); + /*tcx.sess + .opts + .dep_tracking_hash(true) + .hash_stable(&mut self.encoder.hcx, &mut self.encoder.stable_hasher)*/ + let new_hash = Svh::new(self.encoder.opaque.hash()); /*eprintln!("crate: {:?}", tcx.crate_name(LOCAL_CRATE)); eprintln!("crate HASH: {:?}", new_hash); @@ -2667,8 +2665,10 @@ fn with_encode_metadata_header( path: &Path, f: impl FnOnce(&mut EncodeContext<'_, '_>) -> usize, ) { - tcx.with_stable_hashing_context(|hcx| { - let mut encoder = opaque::FileEncoder::new(path) + tcx.with_stable_hashing_context(|mut hcx| { + let mut stable_hasher = StableHasher::new(); + tcx.sess.opts.dep_tracking_hash(true).hash_stable(&mut hcx, &mut stable_hasher); + let mut encoder = opaque::FileEncoder::new(path, hcx, &mut stable_hasher) .unwrap_or_else(|err| tcx.dcx().emit_fatal(FailCreateFileEncoder { err })); encoder.emit_raw_bytes(METADATA_HEADER); @@ -2696,7 +2696,7 @@ fn with_encode_metadata_header( is_proc_macro: tcx.crate_types().contains(&CrateType::ProcMacro), hygiene_ctxt: &hygiene_ctxt, symbol_index_table: Default::default(), - encoder: ContextEncoder { opaque: encoder, stable_hasher: StableHasher::new(), hcx }, + encoder: ContextEncoder { opaque: encoder }, }; // Encode the rustc version string in a predictable location. diff --git a/compiler/rustc_metadata/src/rmeta/leb128.rs b/compiler/rustc_metadata/src/rmeta/leb128.rs new file mode 100644 index 0000000000000..67348b35fd85c --- /dev/null +++ b/compiler/rustc_metadata/src/rmeta/leb128.rs @@ -0,0 +1,167 @@ +// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance. +// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727 +use rustc_serialize::Decoder; +use rustc_serialize::int_overflow::DebugStrictAdd; + +use crate::rmeta::opaque::MemDecoder; + +/// Returns the length of the longest LEB128 encoding for `T`, assuming `T` is an integer type +pub(super) const fn max_leb128_len() -> usize { + // The longest LEB128 encoding for an integer uses 7 bits per byte. + (size_of::() * 8).div_ceil(7) +} + +/// Returns the length of the longest LEB128 encoding of all supported integer types. +/*pub(super) const fn largest_max_leb128_len() -> usize { + max_leb128_len::() +}*/ + +macro_rules! impl_write_unsigned_leb128 { + ($fn_name:ident, $int_ty:ty) => { + #[inline] + pub(super) fn $fn_name( + out: &mut [u8; max_leb128_len::<$int_ty>()], + mut value: $int_ty, + ) -> usize { + let mut i = 0; + + loop { + if value < 0x80 { + unsafe { + *out.get_unchecked_mut(i) = value as u8; + } + + i = i.debug_strict_add(1); + break; + } else { + unsafe { + *out.get_unchecked_mut(i) = ((value & 0x7f) | 0x80) as u8; + } + + value >>= 7; + i = i.debug_strict_add(1); + } + } + + i + } + }; +} + +//impl_write_unsigned_leb128!(write_u16_leb128, u16); +impl_write_unsigned_leb128!(write_u32_leb128, u32); +impl_write_unsigned_leb128!(write_u64_leb128, u64); +impl_write_unsigned_leb128!(write_u128_leb128, u128); +impl_write_unsigned_leb128!(write_usize_leb128, usize); + +macro_rules! impl_read_unsigned_leb128 { + ($fn_name:ident, $int_ty:ty) => { + #[inline] + pub(super) fn $fn_name(decoder: &mut MemDecoder<'_>) -> $int_ty { + // The first iteration of this loop is unpeeled. This is a + // performance win because this code is hot and integer values less + // than 128 are very common, typically occurring 50-80% or more of + // the time, even for u64 and u128. + let byte = decoder.read_u8(); + if (byte & 0x80) == 0 { + return byte as $int_ty; + } + let mut result = (byte & 0x7F) as $int_ty; + let mut shift = 7; + loop { + let byte = decoder.read_u8(); + if (byte & 0x80) == 0 { + result |= (byte as $int_ty) << shift; + return result; + } else { + result |= ((byte & 0x7F) as $int_ty) << shift; + } + shift = shift.debug_strict_add(7); + } + } + }; +} + +//impl_read_unsigned_leb128!(read_u16_leb128, u16); +impl_read_unsigned_leb128!(read_u32_leb128, u32); +impl_read_unsigned_leb128!(read_u64_leb128, u64); +impl_read_unsigned_leb128!(read_u128_leb128, u128); +impl_read_unsigned_leb128!(read_usize_leb128, usize); + +macro_rules! impl_write_signed_leb128 { + ($fn_name:ident, $int_ty:ty) => { + #[inline] + pub(super) fn $fn_name( + out: &mut [u8; max_leb128_len::<$int_ty>()], + mut value: $int_ty, + ) -> usize { + let mut i = 0; + + loop { + let mut byte = (value as u8) & 0x7f; + value >>= 7; + let more = !(((value == 0) && ((byte & 0x40) == 0)) + || ((value == -1) && ((byte & 0x40) != 0))); + + if more { + byte |= 0x80; // Mark this byte to show that more bytes will follow. + } + + unsafe { + *out.get_unchecked_mut(i) = byte; + } + + i = i.debug_strict_add(1); + + if !more { + break; + } + } + + i + } + }; +} + +//impl_write_signed_leb128!(write_i16_leb128, i16); +impl_write_signed_leb128!(write_i32_leb128, i32); +impl_write_signed_leb128!(write_i64_leb128, i64); +impl_write_signed_leb128!(write_i128_leb128, i128); +impl_write_signed_leb128!(write_isize_leb128, isize); + +macro_rules! impl_read_signed_leb128 { + ($fn_name:ident, $int_ty:ty) => { + #[inline] + pub(super) fn $fn_name(decoder: &mut MemDecoder<'_>) -> $int_ty { + let mut result = 0; + let mut shift = 0; + let mut byte; + + loop { + byte = decoder.read_u8(); + result |= <$int_ty>::from(byte & 0x7F) << shift; + shift = shift.debug_strict_add(7); + + if (byte & 0x80) == 0 { + break; + } + } + + if (shift < <$int_ty>::BITS) && ((byte & 0x40) != 0) { + // sign extend + result |= (!0 << shift); + } + + result + } + }; +} + +//impl_read_signed_leb128!(read_i16_leb128, i16); +impl_read_signed_leb128!(read_i32_leb128, i32); +impl_read_signed_leb128!(read_i64_leb128, i64); +impl_read_signed_leb128!(read_i128_leb128, i128); +impl_read_signed_leb128!(read_isize_leb128, isize); + +/*#[cfg(test)] +mod tests;*/ diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs index 5d9e7b8f36bcf..ca248dad8c3e4 100644 --- a/compiler/rustc_metadata/src/rmeta/mod.rs +++ b/compiler/rustc_metadata/src/rmeta/mod.rs @@ -50,6 +50,8 @@ use crate::rmeta::encoder::ContextEncoder; mod decoder; mod def_path_hash_map; mod encoder; +mod leb128; +mod opaque; mod parameterized; mod table; diff --git a/compiler/rustc_metadata/src/rmeta/opaque.rs b/compiler/rustc_metadata/src/rmeta/opaque.rs new file mode 100644 index 0000000000000..9d34f1f9ec215 --- /dev/null +++ b/compiler/rustc_metadata/src/rmeta/opaque.rs @@ -0,0 +1,483 @@ +use std::fs::File; +use std::io::{self, Write}; +use std::marker::PhantomData; +//use std::ops::Range; +use std::path::{Path, PathBuf}; + +use rustc_data_structures::fingerprint::Fingerprint; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use rustc_middle::ich::StableHashingContext; +use rustc_serialize::int_overflow::DebugStrictAdd; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; + +// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance. +// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727 +use crate::rmeta::leb128; + +//pub mod mem_encoder; + +// ----------------------------------------------------------------------------- +// Encoder +// ----------------------------------------------------------------------------- + +pub(super) type FileEncodeResult = Result; + +pub(super) const MAGIC_END_BYTES: &[u8] = b"rust-end-file"; + +/// The size of the buffer in `FileEncoder`. +const BUF_SIZE: usize = 64 * 1024; + +/// `FileEncoder` encodes data to file via fixed-size buffer. +/// +/// There used to be a `MemEncoder` type that encoded all the data into a +/// `Vec`. `FileEncoder` is better because its memory use is determined by the +/// size of the buffer, rather than the full length of the encoded data, and +/// because it doesn't need to reallocate memory along the way. +pub(super) struct FileEncoder<'a> { + // The input buffer. For adequate performance, we need to be able to write + // directly to the unwritten region of the buffer, without calling copy_from_slice. + // Note that our buffer is always initialized so that we can do that direct access + // without unsafe code. Users of this type write many more than BUF_SIZE bytes, so the + // initialization is approximately free. + buf: Box<[u8; BUF_SIZE]>, + buffered: usize, + flushed: usize, + file: File, + // This is used to implement delayed error handling, as described in the + // comment on `trait Encoder`. + res: Result<(), io::Error>, + path: PathBuf, + stable_hasher: &'a mut StableHasher, + hcx: StableHashingContext<'a>, + #[cfg(debug_assertions)] + finished: bool, +} + +impl<'a> FileEncoder<'a> { + pub(super) fn new>( + path: P, + hcx: StableHashingContext<'a>, + stable_hasher: &'a mut StableHasher, + ) -> io::Result { + // File::create opens the file for writing only. When -Zmeta-stats is enabled, the metadata + // encoder rewinds the file to inspect what was written. So we need to always open the file + // for reading and writing. + let file = + File::options().read(true).write(true).create(true).truncate(true).open(&path)?; + + Ok(FileEncoder { + buf: vec![0u8; BUF_SIZE].into_boxed_slice().try_into().unwrap(), + path: path.as_ref().into(), + buffered: 0, + flushed: 0, + file, + res: Ok(()), + stable_hasher, + hcx, + #[cfg(debug_assertions)] + finished: false, + }) + } + + #[inline] + pub(super) fn position(&self) -> usize { + // Tracking position this way instead of having a `self.position` field + // means that we only need to update `self.buffered` on a write call, + // as opposed to updating `self.position` and `self.buffered`. + self.flushed.debug_strict_add(self.buffered) + } + + #[cold] + #[inline(never)] + pub(super) fn flush(&mut self) { + #[cfg(debug_assertions)] + { + self.finished = false; + } + if self.res.is_ok() { + self.res = self.file.write_all(&self.buf[..self.buffered]); + } + self.flushed += self.buffered; + self.buf[..self.buffered].hash_stable(&mut self.hcx, &mut self.stable_hasher); + self.buffered = 0; + } + + #[inline] + pub(super) fn file(&self) -> &File { + &self.file + } + + #[inline] + pub(super) fn path(&self) -> &Path { + &self.path + } + + #[inline] + fn buffer_empty(&mut self) -> &mut [u8] { + // SAFETY: self.buffered is inbounds as an invariant of the type + unsafe { self.buf.get_unchecked_mut(self.buffered..) } + } + + #[cold] + #[inline(never)] + fn write_all_cold_path(&mut self, buf: &[u8]) { + self.flush(); + if let Some(dest) = self.buf.get_mut(..buf.len()) { + dest.copy_from_slice(buf); + self.buffered += buf.len(); + } else { + if self.res.is_ok() { + buf.hash_stable(&mut self.hcx, &mut self.stable_hasher); + self.res = self.file.write_all(buf); + } + self.flushed += buf.len(); + } + } + + #[inline] + fn write_all(&mut self, buf: &[u8]) { + #[cfg(debug_assertions)] + { + self.finished = false; + } + if let Some(dest) = self.buffer_empty().get_mut(..buf.len()) { + dest.copy_from_slice(buf); + self.buffered = self.buffered.debug_strict_add(buf.len()); + } else { + self.write_all_cold_path(buf); + } + } + + /// Write up to `N` bytes to this encoder. + /// + /// This function can be used to avoid the overhead of calling memcpy for writes that + /// have runtime-variable length, but are small and have a small fixed upper bound. + /// + /// This can be used to do in-place encoding as is done for leb128 (without this function + /// we would need to write to a temporary buffer then memcpy into the encoder), and it can + /// also be used to implement the varint scheme we use for rmeta and dep graph encoding, + /// where we only want to encode the first few bytes of an integer. Copying in the whole + /// integer then only advancing the encoder state for the few bytes we care about is more + /// efficient than calling [`FileEncoder::write_all`], because variable-size copies are + /// always lowered to `memcpy`, which has overhead and contains a lot of logic we can bypass + /// with this function. Note that common architectures support fixed-size writes up to 8 bytes + /// with one instruction, so while this does in some sense do wasted work, we come out ahead. + #[inline] + pub(super) fn write_with( + &mut self, + visitor: impl FnOnce(&mut [u8; N]) -> usize, + ) { + #[cfg(debug_assertions)] + { + self.finished = false; + } + let flush_threshold = const { BUF_SIZE.checked_sub(N).unwrap() }; + if std::intrinsics::unlikely(self.buffered > flush_threshold) { + self.flush(); + } + // SAFETY: We checked above that N < self.buffer_empty().len(), + // and if isn't, flush ensures that our empty buffer is now BUF_SIZE. + // We produce a post-mono error if N > BUF_SIZE. + let buf = unsafe { self.buffer_empty().first_chunk_mut::().unwrap_unchecked() }; + let written = visitor(buf); + // We have to ensure that an errant visitor cannot cause self.buffered to exceed BUF_SIZE. + if written > N { + Self::panic_invalid_write::(written); + } + self.buffered = self.buffered.debug_strict_add(written); + } + + #[cold] + #[inline(never)] + fn panic_invalid_write(written: usize) { + panic!("FileEncoder::write_with::<{N}> cannot be used to write {written} bytes"); + } + + /// Helper for calls where [`FileEncoder::write_with`] always writes the whole array. + #[inline] + pub(super) fn write_array(&mut self, buf: [u8; N]) { + self.write_with(|dest| { + *dest = buf; + N + }) + } + + pub(super) fn finish(&mut self) -> FileEncodeResult { + self.write_all(MAGIC_END_BYTES); + self.flush(); + #[cfg(debug_assertions)] + { + self.finished = true; + } + match std::mem::replace(&mut self.res, Ok(())) { + Ok(()) => Ok(self.position()), + Err(e) => Err((self.path.clone(), e)), + } + } + + pub(super) fn hash(&mut self) -> Fingerprint { + self.flush(); + self.stable_hasher.clone().finish() + } +} + +#[cfg(debug_assertions)] +impl<'a> Drop for FileEncoder<'a> { + fn drop(&mut self) { + if !std::thread::panicking() { + assert!(self.finished); + } + } +} + +macro_rules! write_leb128 { + ($this_fn:ident, $int_ty:ty, $write_leb_fn:ident) => { + #[inline] + fn $this_fn(&mut self, v: $int_ty) { + self.write_with(|buf| leb128::$write_leb_fn(buf, v)) + } + }; +} + +impl Encoder for FileEncoder<'_> { + write_leb128!(emit_usize, usize, write_usize_leb128); + write_leb128!(emit_u128, u128, write_u128_leb128); + write_leb128!(emit_u64, u64, write_u64_leb128); + write_leb128!(emit_u32, u32, write_u32_leb128); + + #[inline] + fn emit_u16(&mut self, v: u16) { + self.write_array(v.to_le_bytes()); + } + + #[inline] + fn emit_u8(&mut self, v: u8) { + self.write_array([v]); + } + + write_leb128!(emit_isize, isize, write_isize_leb128); + write_leb128!(emit_i128, i128, write_i128_leb128); + write_leb128!(emit_i64, i64, write_i64_leb128); + write_leb128!(emit_i32, i32, write_i32_leb128); + + #[inline] + fn emit_i16(&mut self, v: i16) { + self.write_array(v.to_le_bytes()); + } + + #[inline] + fn emit_raw_bytes(&mut self, s: &[u8]) { + self.write_all(s); + } +} + +// ----------------------------------------------------------------------------- +// Decoder +// ----------------------------------------------------------------------------- + +// Conceptually, `MemDecoder` wraps a `&[u8]` with a cursor into it that is always valid. +// This is implemented with three pointers, two which represent the original slice and a +// third that is our cursor. +// It is an invariant of this type that start <= current <= end. +// Additionally, the implementation of this type never modifies start and end. +pub(super) struct MemDecoder<'a> { + start: *const u8, + current: *const u8, + end: *const u8, + _marker: PhantomData<&'a u8>, +} + +impl<'a> MemDecoder<'a> { + /*#[inline] + pub(super) fn new(data: &'a [u8], position: usize) -> Result, ()> { + let data = data.strip_suffix(MAGIC_END_BYTES).ok_or(())?; + let Range { start, end } = data.as_ptr_range(); + Ok(MemDecoder { start, current: data[position..].as_ptr(), end, _marker: PhantomData }) + } + + #[inline] + pub(super) fn split_at(&self, position: usize) -> MemDecoder<'a> { + assert!(position <= self.len()); + // SAFETY: We checked above that this offset is within the original slice + let current = unsafe { self.start.add(position) }; + MemDecoder { start: self.start, current, end: self.end, _marker: PhantomData } + } + + #[inline] + pub(super) fn len(&self) -> usize { + // SAFETY: This recovers the length of the original slice, only using members we never modify. + unsafe { self.end.offset_from_unsigned(self.start) } + }*/ + + #[inline] + pub(super) fn remaining(&self) -> usize { + // SAFETY: This type guarantees current <= end. + unsafe { self.end.offset_from_unsigned(self.current) } + } + + #[cold] + #[inline(never)] + fn decoder_exhausted() -> ! { + panic!("MemDecoder exhausted") + } + + #[inline] + pub(super) fn read_array(&mut self) -> [u8; N] { + self.read_raw_bytes(N).try_into().unwrap() + } + + /* + /// While we could manually expose manipulation of the decoder position, + /// all current users of that method would need to reset the position later, + /// incurring the bounds check of set_position twice. + #[inline] + pub(super) fn with_position(&mut self, pos: usize, func: F) -> T + where + F: Fn(&mut MemDecoder<'a>) -> T, + { + struct SetOnDrop<'a, 'guarded> { + decoder: &'guarded mut MemDecoder<'a>, + current: *const u8, + } + impl Drop for SetOnDrop<'_, '_> { + fn drop(&mut self) { + self.decoder.current = self.current; + } + } + + if pos >= self.len() { + Self::decoder_exhausted(); + } + let previous = self.current; + // SAFETY: We just checked if this add is in-bounds above. + unsafe { + self.current = self.start.add(pos); + } + let guard = SetOnDrop { current: previous, decoder: self }; + func(guard.decoder) + }*/ +} + +macro_rules! read_leb128 { + ($this_fn:ident, $int_ty:ty, $read_leb_fn:ident) => { + #[inline] + fn $this_fn(&mut self) -> $int_ty { + leb128::$read_leb_fn(self) + } + }; +} + +impl<'a> Decoder for MemDecoder<'a> { + read_leb128!(read_usize, usize, read_usize_leb128); + read_leb128!(read_u128, u128, read_u128_leb128); + read_leb128!(read_u64, u64, read_u64_leb128); + read_leb128!(read_u32, u32, read_u32_leb128); + + #[inline] + fn read_u16(&mut self) -> u16 { + u16::from_le_bytes(self.read_array()) + } + + #[inline] + fn read_u8(&mut self) -> u8 { + if self.current == self.end { + Self::decoder_exhausted(); + } + // SAFETY: This type guarantees current <= end, and we just checked current == end. + unsafe { + let byte = *self.current; + self.current = self.current.add(1); + byte + } + } + + read_leb128!(read_isize, isize, read_isize_leb128); + read_leb128!(read_i128, i128, read_i128_leb128); + read_leb128!(read_i64, i64, read_i64_leb128); + read_leb128!(read_i32, i32, read_i32_leb128); + + #[inline] + fn read_i16(&mut self) -> i16 { + i16::from_le_bytes(self.read_array()) + } + + #[inline] + fn read_raw_bytes(&mut self, bytes: usize) -> &'a [u8] { + if bytes > self.remaining() { + Self::decoder_exhausted(); + } + // SAFETY: We just checked if this range is in-bounds above. + unsafe { + let slice = std::slice::from_raw_parts(self.current, bytes); + self.current = self.current.add(bytes); + slice + } + } + + #[inline] + fn peek_byte(&self) -> u8 { + if self.current == self.end { + Self::decoder_exhausted(); + } + // SAFETY: This type guarantees current is inbounds or one-past-the-end, which is end. + // Since we just checked current == end, the current pointer must be inbounds. + unsafe { *self.current } + } + + #[inline] + fn position(&self) -> usize { + // SAFETY: This type guarantees start <= current + unsafe { self.current.offset_from_unsigned(self.start) } + } +} + +// Specializations for contiguous byte sequences follow. The default implementations for slices +// encode and decode each element individually. This isn't necessary for `u8` slices when using +// opaque encoders and decoders, because each `u8` is unchanged by encoding and decoding. +// Therefore, we can use more efficient implementations that process the entire sequence at once. + +// Specialize encoding byte slices. This specialization also applies to encoding `Vec`s, etc., +// since the default implementations call `encode` on their slices internally. +impl Encodable> for [u8] { + fn encode(&self, e: &mut FileEncoder<'_>) { + Encoder::emit_usize(e, self.len()); + e.emit_raw_bytes(self); + } +} + +// Specialize decoding `Vec`. This specialization also applies to decoding `Box<[u8]>`s, etc., +// since the default implementations call `decode` to produce a `Vec` internally. +impl<'a> Decodable> for Vec { + fn decode(d: &mut MemDecoder<'a>) -> Self { + let len = Decoder::read_usize(d); + d.read_raw_bytes(len).to_owned() + } +} + +/*/// An integer that will always encode to 8 bytes. +pub(super) struct IntEncodedWithFixedSize(pub(super) u64); + +impl IntEncodedWithFixedSize { + pub(super) const ENCODED_SIZE: usize = 8; +} + +impl Encodable> for IntEncodedWithFixedSize { + #[inline] + fn encode(&self, e: &mut FileEncoder<'_>) { + let start_pos = e.position(); + e.write_array(self.0.to_le_bytes()); + let end_pos = e.position(); + debug_assert_eq!((end_pos - start_pos), IntEncodedWithFixedSize::ENCODED_SIZE); + } +} + +impl<'a> Decodable> for IntEncodedWithFixedSize { + #[inline] + fn decode(decoder: &mut MemDecoder<'a>) -> IntEncodedWithFixedSize { + let bytes = decoder.read_array::<{ IntEncodedWithFixedSize::ENCODED_SIZE }>(); + IntEncodedWithFixedSize(u64::from_le_bytes(bytes)) + } +}*/ + +/*#[cfg(test)] +mod tests;*/