diff --git a/.changeset/batch-plugin-visitor.md b/.changeset/batch-plugin-visitor.md new file mode 100644 index 000000000000..c36bd24c40a1 --- /dev/null +++ b/.changeset/batch-plugin-visitor.md @@ -0,0 +1,5 @@ +--- +"@biomejs/biome": patch +--- + +Improved plugin performance by batching all plugins into a single syntax visitor with a kind-to-plugin lookup map, reducing per-node dispatch overhead from O(N) to O(1) where N is the number of plugins. diff --git a/.github/workflows/benchmark_grit.yml b/.github/workflows/benchmark_grit.yml new file mode 100644 index 000000000000..6a47b7ea2c24 --- /dev/null +++ b/.github/workflows/benchmark_grit.yml @@ -0,0 +1,72 @@ +name: Benchmarks GritQL + +on: + workflow_dispatch: + merge_group: + pull_request: + types: [ opened, synchronize ] + branches: + - main + - next + paths: + - 'Cargo.lock' + - 'crates/biome_grit_parser/**/*.rs' + - 'crates/biome_grit_patterns/**/*.rs' + - 'crates/biome_grit_syntax/**/*.rs' + - 'crates/biome_rowan/**/*.rs' + push: + branches: + - main + - next + paths: + - 'Cargo.lock' + - 'crates/biome_grit_parser/**/*.rs' + - 'crates/biome_grit_patterns/**/*.rs' + - 'crates/biome_grit_syntax/**/*.rs' + - 'crates/biome_rowan/**/*.rs' + +env: + RUST_LOG: info + +jobs: + bench: + permissions: + contents: read + pull-requests: write + name: Bench + runs-on: depot-ubuntu-24.04-arm-16 + strategy: + matrix: + package: + - biome_grit_patterns + + steps: + + - name: Checkout PR Branch + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: ${{ github.event.pull_request.head.sha || github.sha }} + + - name: Install toolchain + uses: moonrepo/setup-rust@ede6de059f8046a5e236c94046823e2af11ca670 # v1.2.2 + with: + channel: stable + cache-target: release + bins: cargo-codspeed + cache-base: main + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Compile + timeout-minutes: 20 + run: cargo codspeed build -p ${{ matrix.package }} + env: + CARGO_BUILD_JOBS: 3 # Default is 4 (equals to the vCPU count of the runner), which leads OOM on cargo build + + - name: Run the benchmarks + uses: CodSpeedHQ/action@4deb3275dd364fb96fb074c953133d29ec96f80f # v4.10.6 + timeout-minutes: 50 + with: + mode: simulation + run: cargo codspeed run + token: ${{ secrets.CODSPEED_TOKEN }} diff --git a/Cargo.lock b/Cargo.lock index 1254176996e6..1b180efdb049 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -772,9 +772,11 @@ dependencies = [ "biome_string_case", "biome_test_utils", "camino", + "codspeed-criterion-compat", "grit-pattern-matcher", "grit-util", "insta", + "mimalloc", "path-absolutize", "rand 0.8.5", "regex", @@ -783,6 +785,7 @@ dependencies = [ "serde", "serde_json", "tests_macros", + "tikv-jemallocator", ] [[package]] diff --git a/crates/biome_analyze/src/analyzer_plugin.rs b/crates/biome_analyze/src/analyzer_plugin.rs index 7028267172a6..870db3c9589a 100644 --- a/crates/biome_analyze/src/analyzer_plugin.rs +++ b/crates/biome_analyze/src/analyzer_plugin.rs @@ -1,5 +1,5 @@ -use camino::Utf8PathBuf; -use rustc_hash::FxHashSet; +use camino::{Utf8Path, Utf8PathBuf}; +use rustc_hash::{FxHashMap, FxHashSet}; use std::hash::Hash; use std::{fmt::Debug, sync::Arc}; @@ -23,6 +23,14 @@ pub trait AnalyzerPlugin: Debug + Send + Sync { fn query(&self) -> Vec; fn evaluate(&self, node: AnySyntaxNode, path: Arc) -> Vec; + + /// Returns true if this plugin should run on the given file path. + /// + /// Stub that always returns `true` — file-scoping will be implemented + /// in a companion PR (#9171) via the `includes` plugin option. + fn applies_to_file(&self, _path: &Utf8Path) -> bool { + true + } } #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] @@ -37,6 +45,10 @@ pub enum PluginTargetLanguage { pub struct PluginVisitor { query: FxHashSet, plugin: Arc>, + + /// When set, all nodes in this subtree are skipped until we leave it. + /// Used to skip subtrees that fall entirely outside the analysis range + /// (see the `ctx.range` check in `visit`). skip_subtree: Option>, } @@ -102,6 +114,10 @@ where return; } + if !self.plugin.applies_to_file(&ctx.options.file_path) { + return; + } + let rule_timer = profiling::start_plugin_rule("plugin"); let diagnostics = self .plugin @@ -126,3 +142,139 @@ where ctx.signal_queue.extend(signals); } } + +/// A batched syntax visitor that evaluates multiple plugins in a single visitor. +/// +/// Instead of registering N separate `PluginVisitor` instances (one per plugin), +/// this holds all plugins together and dispatches using a kind-to-plugin lookup +/// map. This reduces visitor-dispatch overhead and enables O(1) kind matching +/// per node instead of iterating all plugins. +pub struct BatchPluginVisitor { + plugins: Vec>>, + + /// Maps each syntax kind to the indices of plugins that query for it. + kind_to_plugins: FxHashMap>, + + /// When set, all nodes in this subtree are skipped until we leave it. + /// Used to skip subtrees that fall entirely outside the analysis range + /// (see the `ctx.range` check in `visit`). + skip_subtree: Option>, + + /// Cached per-plugin results of `applies_to_file`. Populated lazily on + /// first `WalkEvent::Enter` — the file path is constant for the entire walk. + applicable: Option>, +} + +impl BatchPluginVisitor +where + L: Language + 'static, + L::Kind: Eq + Hash, +{ + /// Creates a batched plugin visitor from a slice of plugins. + /// + /// # Safety + /// Caller must ensure all plugins target language `L`. The `RawSyntaxKind` + /// values returned by each plugin's `query()` are converted to `L::Kind` + /// via `from_raw` without validation. + pub unsafe fn new_unchecked(plugins: AnalyzerPluginSlice) -> Self { + let mut all_plugins = Vec::with_capacity(plugins.len()); + let mut kind_to_plugins: FxHashMap> = FxHashMap::default(); + + for (idx, plugin) in plugins.iter().enumerate() { + all_plugins.push(Arc::clone(plugin)); + let mut seen_kinds = FxHashSet::default(); + for raw_kind in plugin.query() { + let kind = L::Kind::from_raw(raw_kind); + if seen_kinds.insert(kind) { + kind_to_plugins.entry(kind).or_default().push(idx); + } + } + } + + Self { + plugins: all_plugins, + kind_to_plugins, + skip_subtree: None, + applicable: None, + } + } +} + +impl Visitor for BatchPluginVisitor +where + L: Language + 'static, + L::Kind: Eq + Hash, +{ + type Language = L; + + fn visit( + &mut self, + event: &WalkEvent>, + ctx: VisitorContext, + ) { + let node = match event { + WalkEvent::Enter(node) => node, + WalkEvent::Leave(node) => { + if let Some(skip_subtree) = &self.skip_subtree + && skip_subtree == node + { + self.skip_subtree = None; + } + + return; + } + }; + + if self.skip_subtree.is_some() { + return; + } + + if let Some(range) = ctx.range + && node.text_range_with_trivia().ordering(range).is_ne() + { + self.skip_subtree = Some(node.clone()); + return; + } + + let kind = node.kind(); + + let Some(plugin_indices) = self.kind_to_plugins.get(&kind) else { + return; + }; + + let applicable = self.applicable.get_or_insert_with(|| { + self.plugins + .iter() + .map(|p| p.applies_to_file(&ctx.options.file_path)) + .collect() + }); + + for &idx in plugin_indices { + if !applicable[idx] { + continue; + } + + let plugin = &self.plugins[idx]; + let rule_timer = profiling::start_plugin_rule("plugin"); + let diagnostics = plugin.evaluate(node.clone().into(), ctx.options.file_path.clone()); + rule_timer.stop(); + + let signals = diagnostics.into_iter().map(|diagnostic| { + let name = diagnostic + .subcategory + .clone() + .unwrap_or_else(|| "anonymous".into()); + + SignalEntry { + text_range: diagnostic.span().unwrap_or_default(), + signal: Box::new(PluginSignal::::new(diagnostic)), + rule: SignalRuleKey::Plugin(name.into()), + category: RuleCategory::Lint, + instances: Default::default(), + } + }); + + ctx.signal_queue.extend(signals); + } + } +} diff --git a/crates/biome_analyze/src/lib.rs b/crates/biome_analyze/src/lib.rs index 5d08efefd195..06f8d42cd354 100644 --- a/crates/biome_analyze/src/lib.rs +++ b/crates/biome_analyze/src/lib.rs @@ -30,7 +30,8 @@ mod visitor; pub use biome_diagnostics::category_concat; pub use crate::analyzer_plugin::{ - AnalyzerPlugin, AnalyzerPluginSlice, AnalyzerPluginVec, PluginTargetLanguage, PluginVisitor, + AnalyzerPlugin, AnalyzerPluginSlice, AnalyzerPluginVec, BatchPluginVisitor, + PluginTargetLanguage, PluginVisitor, }; pub use crate::categories::{ ActionCategory, OtherActionCategory, RefactorKind, RuleCategories, RuleCategoriesBuilder, diff --git a/crates/biome_css_analyze/src/lib.rs b/crates/biome_css_analyze/src/lib.rs index 69ef29ddfce3..4f57f619418c 100644 --- a/crates/biome_css_analyze/src/lib.rs +++ b/crates/biome_css_analyze/src/lib.rs @@ -14,8 +14,8 @@ pub use crate::registry::visit_registry; use crate::suppression_action::CssSuppressionAction; use biome_analyze::{ AnalysisFilter, AnalyzerOptions, AnalyzerPluginSlice, AnalyzerSignal, AnalyzerSuppression, - ControlFlow, LanguageRoot, MatchQueryParams, MetadataRegistry, Phases, PluginTargetLanguage, - PluginVisitor, RuleAction, RuleRegistry, to_analyzer_suppressions, + BatchPluginVisitor, ControlFlow, LanguageRoot, MatchQueryParams, MetadataRegistry, Phases, + PluginTargetLanguage, RuleAction, RuleRegistry, to_analyzer_suppressions, }; use biome_css_syntax::{CssFileSource, CssLanguage, TextRange}; use biome_diagnostics::Error; @@ -151,15 +151,19 @@ where analyzer.add_visitor(phase, visitor); } - for plugin in plugins { - // SAFETY: The plugin target language is correctly checked here. + let css_plugins: Vec<_> = plugins + .iter() + .filter(|p| p.language() == PluginTargetLanguage::Css) + .cloned() + .collect(); + + if !css_plugins.is_empty() { + // SAFETY: All plugins have been verified to target CSS above. unsafe { - if plugin.language() == PluginTargetLanguage::Css { - analyzer.add_visitor( - Phases::Syntax, - Box::new(PluginVisitor::new_unchecked(plugin.clone())), - ) - } + analyzer.add_visitor( + Phases::Syntax, + Box::new(BatchPluginVisitor::new_unchecked(&css_plugins)), + ); } } diff --git a/crates/biome_grit_patterns/Cargo.toml b/crates/biome_grit_patterns/Cargo.toml index e695e12c2b02..ab17a5cd8a98 100644 --- a/crates/biome_grit_patterns/Cargo.toml +++ b/crates/biome_grit_patterns/Cargo.toml @@ -11,6 +11,10 @@ keywords.workspace = true categories.workspace = true publish = false +[[bench]] +harness = false +name = "grit_query" + [dependencies] biome_analyze = { workspace = true } biome_console = { workspace = true } @@ -39,9 +43,16 @@ serde_json = { workspace = true, optional = true } [dev-dependencies] biome_test_utils = { path = "../biome_test_utils" } +criterion = { package = "codspeed-criterion-compat", version = "*" } insta = { workspace = true } tests_macros = { path = "../tests_macros" } +[target.'cfg(all(target_family="unix", not(all(target_arch = "aarch64", target_env = "musl"))))'.dev-dependencies] +tikv-jemallocator = { workspace = true } + +[target.'cfg(target_os = "windows")'.dev-dependencies] +mimalloc = { workspace = true } + [features] schema = ["biome_js_parser/schema", "dep:schemars", "serde"] serde = ["dep:serde", "dep:serde_json"] diff --git a/crates/biome_grit_patterns/benches/grit_query.rs b/crates/biome_grit_patterns/benches/grit_query.rs new file mode 100644 index 000000000000..bfc1d40f5288 --- /dev/null +++ b/crates/biome_grit_patterns/benches/grit_query.rs @@ -0,0 +1,109 @@ +use biome_grit_patterns::testing::{compile_js_query, make_js_file}; +use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main}; + +#[cfg(target_os = "windows")] +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; + +#[cfg(all( + any(target_os = "macos", target_os = "linux"), + not(target_env = "musl"), +))] +#[global_allocator] +static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; + +#[cfg(all(target_env = "musl", target_os = "linux", target_arch = "aarch64"))] +#[global_allocator] +static GLOBAL: std::alloc::System = std::alloc::System; + +/// Sample JS code with multiple patterns to match against. +const JS_CODE: &str = r#" +import fs from "fs"; + +console.log("start"); + +function processItems(items) { + const results = new Array(items.length); + for (let i = 0; i < items.length; i++) { + const item = items[i]; + console.log("processing", item); + if (typeof item === "undefined") { + continue; + } + results[i] = item.toString(); + } + console.warn("done", results.length); + return results; +} + +const buffer = new Buffer(1024); +const x = typeof window !== "undefined" ? window : global; +console.log("end"); +"#; + +fn bench_execute(criterion: &mut Criterion) { + let mut group = criterion.benchmark_group("grit_query_execute"); + + let patterns = [ + ("code_snippet", "`console.log($msg)`"), + ( + "where_clause", + r#"`console.log($msg)` where { $msg <: `"start"` }"#, + ), + ( + "or_pattern", + "or { `console.log($msg)`, `console.warn($msg)` }", + ), + ]; + + for (name, pattern_src) in &patterns { + let query = compile_js_query(pattern_src); + let base_file = make_js_file(JS_CODE); + + group.bench_with_input(BenchmarkId::new("execute", name), pattern_src, |b, _| { + b.iter(|| { + black_box(query.execute(base_file.clone()).unwrap()); + }); + }); + + group.bench_with_input( + BenchmarkId::new("execute_optimized", name), + pattern_src, + |b, _| { + b.iter(|| { + black_box(query.execute_optimized(base_file.clone()).unwrap()); + }); + }, + ); + } + + group.finish(); +} + +fn bench_anchor_extraction(criterion: &mut Criterion) { + let mut group = criterion.benchmark_group("grit_query_anchor_kinds"); + + let patterns = [ + ("code_snippet", "`console.log($msg)`"), + ( + "or_pattern", + "or { `console.log($msg)`, `console.warn($msg)` }", + ), + ("metavariable", "$x"), + ]; + + for (name, pattern_src) in &patterns { + let query = compile_js_query(pattern_src); + + group.bench_with_input(BenchmarkId::from_parameter(name), pattern_src, |b, _| { + b.iter(|| { + black_box(query.anchor_kinds()); + }); + }); + } + + group.finish(); +} + +criterion_group!(grit_query, bench_execute, bench_anchor_extraction); +criterion_main!(grit_query); diff --git a/crates/biome_grit_patterns/src/grit_query.rs b/crates/biome_grit_patterns/src/grit_query.rs index 2553b0c56e1c..cd35c0b85b6b 100644 --- a/crates/biome_grit_patterns/src/grit_query.rs +++ b/crates/biome_grit_patterns/src/grit_query.rs @@ -4,8 +4,10 @@ use crate::grit_context::{GritExecContext, GritQueryContext, GritTargetFile}; use crate::grit_definitions::{ Definitions, ScannedDefinitionInfo, compile_definitions, scan_definitions, }; +use crate::grit_file::GritFile; use crate::grit_resolved_pattern::GritResolvedPattern; use crate::grit_target_language::GritTargetLanguage; +use crate::grit_target_node::GritTargetSyntaxKind; use crate::grit_tree::GritTargetTree; use crate::pattern_compiler::{PatternCompiler, auto_wrap_pattern}; use crate::pattern_compiler::{ @@ -17,11 +19,13 @@ use biome_analyze::RuleDiagnostic; use biome_grit_syntax::{GritRoot, GritRootExt}; use camino::Utf8Path; use grit_pattern_matcher::constants::{ - ABSOLUTE_PATH_INDEX, FILENAME_INDEX, NEW_FILES_INDEX, PROGRAM_INDEX, + ABSOLUTE_PATH_INDEX, FILENAME_INDEX, GLOBAL_VARS_SCOPE_INDEX, NEW_FILES_INDEX, PROGRAM_INDEX, }; +use grit_pattern_matcher::context::ExecContext; use grit_pattern_matcher::file_owners::{FileOwner, FileOwners}; use grit_pattern_matcher::pattern::{ - FilePtr, FileRegistry, Matcher, Pattern, ResolvedPattern, State, VariableSource, + File as GritFileTrait, FilePtr, FileRegistry, Matcher, Pattern, Predicate, ResolvedPattern, + State, VariableSource, }; use grit_util::error::{GritPatternError, GritResult}; use grit_util::{AnalysisLogs, Ast, ByteRange, InputRanges, Range, VariableMatch}; @@ -64,29 +68,39 @@ pub struct GritQuery { } impl GritQuery { - pub fn execute(&self, file: GritTargetFile) -> GritResult { - let file_owners = FileOwners::new(); - let files = vec![file]; - let file_ptr = FilePtr::new(0, 0); - let context = GritExecContext { + fn make_exec_context<'a>( + &'a self, + files: &'a [GritTargetFile], + file_owners: &'a FileOwners, + ) -> GritExecContext<'a> { + GritExecContext { lang: self.language.clone(), name: self.name.as_deref(), - loadable_files: &files, - files: &file_owners, + loadable_files: files, + files: file_owners, built_ins: &self.built_ins, functions: &self.definitions.functions, patterns: &self.definitions.patterns, predicates: &self.definitions.predicates, diagnostics: Mutex::new(Vec::new()), - }; + } + } + fn make_initial_state<'a>(&self, files: &'a [GritTargetFile]) -> State<'a, GritQueryContext> { let var_registry = VarRegistry::from_locations(&self.variable_locations); - let paths: Vec<_> = files.iter().map(|file| file.path.as_std_path()).collect(); let file_registry = FileRegistry::new_from_paths(paths); + State::new(var_registry.into(), file_registry) + } + + pub fn execute(&self, file: GritTargetFile) -> GritResult { + let file_owners = FileOwners::new(); + let files = vec![file]; + let file_ptr = FilePtr::new(0, 0); + let context = self.make_exec_context(&files, &file_owners); + let mut state = self.make_initial_state(&files); let binding = FilePattern::Single(file_ptr); - let mut state = State::new(var_registry.into(), file_registry); let mut logs = Vec::new().into(); let mut effects: Vec = Vec::new(); @@ -108,6 +122,133 @@ impl GritQuery { }) } + /// Returns the syntax kinds that this query's pattern targets. + /// + /// Extracts kinds from the inner CodeSnippet or AstNode patterns + /// by navigating the compiled pattern tree. Returns an empty vec + /// if the pattern structure can't be analyzed. + pub fn anchor_kinds(&self) -> Vec { + extract_anchor_kinds(&self.pattern) + } + + /// Optimized execution that replaces the Contains full-tree walk + /// with an anchor-kind-filtered walk. + /// + /// Instead of walking every node in the tree (what Contains does), + /// this only executes the inner Bubble pattern at nodes matching + /// the extracted anchor kinds. + /// + /// Falls back to `execute()` if anchor extraction fails. + pub fn execute_optimized(&self, file: GritTargetFile) -> GritResult { + let anchor_kinds = self.anchor_kinds(); + let inner = extract_contains_inner(&self.pattern); + + let Some(inner) = inner else { + return self.execute(file); + }; + if anchor_kinds.is_empty() { + return self.execute(file); + } + // Create tree independently of state to avoid borrow conflicts. + // from_cached_parse_result wraps the existing parsed tree — O(1). + let mut logs: AnalysisLogs = Vec::new().into(); + let tree = self.language.get_parser().from_cached_parse_result( + &file.parse, + Some(file.path.as_std_path()), + &mut logs, + ); + let Some(tree) = tree else { + return self.execute(file); + }; + + // Collect anchor-kind nodes from the independent tree. + // Use Vec::contains — anchor_kinds is tiny (1-3 items), faster than hashing. + let root = tree.root_node(); + let anchor_nodes: Vec<_> = root + .descendants() + .filter(|node| anchor_kinds.contains(&node.kind())) + .collect(); + + // Set up context and state (same as execute). + let file_owners = FileOwners::new(); + let files = vec![file]; + let file_ptr = FilePtr::new(0, 0); + let context = self.make_exec_context(&files, &file_owners); + let mut state = self.make_initial_state(&files); + + // Load file (creates FileOwner in file_owners, loads into state.files). + let grit_file = GritFile::Ptr(file_ptr); + context.load_file(&grit_file, &mut state, &mut logs)?; + + // Replicate the global-variable binding from `FilePattern::execute` in + // `grit-pattern-matcher` (crate `grit-pattern-matcher`, module `pattern/file_pattern.rs`). + // If the upstream binding logic changes, this block must be updated to match. + let name_val = grit_file.name(&state.files); + let program_val = grit_file.binding(&state.files); + let abs_path_val = grit_file.absolute_path(&state.files, &context.lang)?; + + state.bindings[GLOBAL_VARS_SCOPE_INDEX as usize] + .last_mut() + .unwrap()[FILENAME_INDEX] + .value = Some(name_val); + state.bindings[GLOBAL_VARS_SCOPE_INDEX as usize] + .last_mut() + .unwrap()[PROGRAM_INDEX] + .value = Some(program_val); + state.bindings[GLOBAL_VARS_SCOPE_INDEX as usize] + .last_mut() + .unwrap()[ABSOLUTE_PATH_INDEX] + .value = Some(abs_path_val); + state.bindings[GLOBAL_VARS_SCOPE_INDEX as usize] + .last_mut() + .unwrap()[NEW_FILES_INDEX] + .value = Some(GritResolvedPattern::from_list_parts([].into_iter())); + + // Execute inner pattern (Bubble) at each anchor-kind node. + let mut matched = false; + for node in anchor_nodes { + let binding = GritResolvedPattern::from_node_binding(node); + let saved = state.clone(); + if inner.execute(&binding, &mut state, &context, &mut logs)? { + matched = true; + } else { + state = saved; + } + } + + // Collect match ranges and set on file (replicate exec_step behavior). + if matched { + let (variables, ranges, suppressed) = + state.bindings_history_to_ranges(&context.lang, context.name); + let unique_ranges: Vec<_> = ranges + .into_iter() + .collect::>() + .into_iter() + .collect(); + let input_ranges = InputRanges { + ranges: unique_ranges, + variables, + suppressed, + }; + let file_owner = state.files.get_file_owner(file_ptr); + file_owner.matches.borrow_mut().input_matches = Some(input_ranges); + } + + // Collect effects. + let mut effects = Vec::new(); + for file in state.files.files() { + if let Some(effect) = GritQueryEffect::from_file(file)? { + effects.push(effect); + } + } + + Ok(GritQueryResult { + effects, + diagnostics: context.into_diagnostics(), + logs, + }) + } + pub fn from_node( root: GritRoot, source_path: Option<&Utf8Path>, @@ -379,3 +520,297 @@ pub struct Message { pub range: Vec, pub variable_runtime_id: String, } + +/// Extracts the syntax kinds that a pattern targets by navigating +/// the auto-wrapped pattern tree. +/// +/// The auto-wrap chain is: +/// Sequential → Step → [And →] File → Contains → Bubble → Where → Predicate::Match → inner +/// +/// Returns an empty vec (triggering fallback to full execute) when +/// the pattern structure can't be statically analyzed. +fn extract_anchor_kinds(pattern: &Pattern) -> Vec { + match pattern { + Pattern::Sequential(seq) => seq + .iter() + .flat_map(|step| extract_anchor_kinds(&step.pattern)) + .collect(), + Pattern::File(file) => extract_anchor_kinds(&file.body), + Pattern::Contains(contains) => { + if contains.until.is_some() { + return vec![]; + } + extract_anchor_kinds(&contains.contains) + } + Pattern::Bubble(bubble) => extract_anchor_kinds(bubble.pattern_def.pattern()), + Pattern::Where(where_pat) => { + let mut kinds = extract_anchor_kinds(&where_pat.pattern); + if kinds.is_empty() { + kinds = extract_anchor_kinds_from_predicate(&where_pat.side_condition); + } + kinds + } + // NOTE: collects kinds from ALL And branches. extract_contains_inner + // uses find_map (returns the first Contains only), so when an And has + // multiple Contains branches with different kind sets, we may run the + // inner pattern on extra nodes. This is harmless — the inner pattern + // simply won't match — but causes unnecessary evaluations. + Pattern::And(and) => and.patterns.iter().flat_map(extract_anchor_kinds).collect(), + // For Or/Any: if ANY branch is universal (returns []), the whole + // pattern is universal — we can't restrict to specific kinds. + Pattern::Or(or) => { + let all: Vec<_> = or.patterns.iter().map(extract_anchor_kinds).collect(); + if all.iter().any(|kinds| kinds.is_empty()) { + return vec![]; + } + all.into_iter().flatten().collect() + } + Pattern::Any(any) => { + let all: Vec<_> = any.patterns.iter().map(extract_anchor_kinds).collect(); + if all.iter().any(|kinds| kinds.is_empty()) { + return vec![]; + } + all.into_iter().flatten().collect() + } + // Not: extracting anchors from the negated pattern is wrong. + // `not { X }` succeeds on nodes where X does NOT match, so + // restricting to X's kinds would miss valid matches. + Pattern::Not(_) => vec![], + Pattern::Rewrite(rw) => extract_anchor_kinds(&rw.left), + Pattern::Maybe(maybe) => extract_anchor_kinds(&maybe.pattern), + Pattern::Limit(limit) => extract_anchor_kinds(&limit.pattern), + Pattern::CodeSnippet(snippet) => snippet.patterns.iter().map(|(kind, _)| *kind).collect(), + Pattern::AstNode(node) => vec![node.kind], + _ => vec![], + } +} + +/// Extracts anchor kinds from a predicate expression. +fn extract_anchor_kinds_from_predicate( + predicate: &Predicate, +) -> Vec { + match predicate { + Predicate::Match(m) => { + if let Some(pattern) = &m.pattern { + extract_anchor_kinds(pattern) + } else { + vec![] + } + } + Predicate::And(a) => a + .predicates + .iter() + .flat_map(extract_anchor_kinds_from_predicate) + .collect(), + Predicate::Or(o) => { + let all: Vec<_> = o + .predicates + .iter() + .map(extract_anchor_kinds_from_predicate) + .collect(); + if all.iter().any(|kinds| kinds.is_empty()) { + return vec![]; + } + all.into_iter().flatten().collect() + } + _ => vec![], + } +} + +/// Navigates the auto-wrapped pattern tree to find the inner pattern +/// of the Contains node (the Bubble pattern). +/// +/// Returns None if the pattern structure doesn't match the expected +/// auto-wrap chain. +/// +/// Note: only inspects the first step of Sequential, matching the +/// auto-wrap structure where Contains is always in the first step. +fn extract_contains_inner( + pattern: &Pattern, +) -> Option<&Pattern> { + match pattern { + Pattern::Sequential(seq) => seq + .first() + .and_then(|step| extract_contains_inner(&step.pattern)), + Pattern::File(file) => extract_contains_inner(&file.body), + Pattern::Contains(contains) => { + if contains.until.is_some() { + return None; + } + Some(&contains.contains) + } + // NOTE: returns only the first Contains found. extract_anchor_kinds + // collects from ALL And branches, so there may be an asymmetry when + // multiple Contains exist. See the matching NOTE there. + Pattern::And(and) => and.patterns.iter().find_map(extract_contains_inner), + Pattern::Limit(limit) => extract_contains_inner(&limit.pattern), + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::testing::{compile_js_query, make_js_file}; + + // -- extract_anchor_kinds tests -- + + #[test] + fn anchor_kinds_extracts_from_code_snippet() { + let query = compile_js_query("`console.log($msg)`"); + let kinds = query.anchor_kinds(); + assert!( + !kinds.is_empty(), + "code snippet should produce anchor kinds" + ); + } + + #[test] + fn anchor_kinds_returns_empty_for_metavariable() { + // A bare metavariable like `$x` matches any node — universal. + let query = compile_js_query("$x"); + let kinds = query.anchor_kinds(); + assert!( + kinds.is_empty(), + "bare metavariable is universal, expected empty anchor kinds" + ); + } + + #[test] + fn anchor_kinds_returns_empty_for_not() { + // `not` negation: can't restrict to specific kinds. + let query = compile_js_query("not `console.log($msg)`"); + let kinds = query.anchor_kinds(); + assert!( + kinds.is_empty(), + "Not patterns should return empty anchor kinds" + ); + } + + #[test] + fn anchor_kinds_or_with_universal_branch_returns_empty() { + // If any Or branch is universal (metavariable), the whole Or is universal. + let query = compile_js_query("or { `console.log($x)`, $y }"); + let kinds = query.anchor_kinds(); + assert!( + kinds.is_empty(), + "Or with universal branch should return empty anchor kinds" + ); + } + + #[test] + fn anchor_kinds_or_with_all_specific_branches() { + let query = compile_js_query("or { `console.log($x)`, `console.warn($x)` }"); + let kinds = query.anchor_kinds(); + assert!( + !kinds.is_empty(), + "Or with all specific branches should return anchor kinds" + ); + } + + #[test] + fn anchor_kinds_where_clause_extracts_from_match() { + let query = compile_js_query("`console.log($msg)` where { $msg <: `\"hello\"` }"); + let kinds = query.anchor_kinds(); + assert!( + !kinds.is_empty(), + "where clause with code snippet should produce anchor kinds" + ); + } + + // -- extract_contains_inner tests -- + + #[test] + fn contains_inner_found_for_code_snippet() { + let query = compile_js_query("`console.log($msg)`"); + let inner = extract_contains_inner(&query.pattern); + assert!( + inner.is_some(), + "should find inner pattern in auto-wrapped Contains" + ); + } + + #[test] + fn contains_inner_found_for_where_pattern() { + let query = compile_js_query("`console.log($msg)` where { $msg <: `\"test\"` }"); + let inner = extract_contains_inner(&query.pattern); + assert!( + inner.is_some(), + "should find inner pattern for where-clause patterns" + ); + } + + // -- execute_optimized equivalence tests -- + + #[test] + fn execute_optimized_matches_execute_for_simple_pattern() { + let query = compile_js_query("`console.log($msg)`"); + let code = r#" + console.log("hello"); + const x = 42; + console.log("world"); + "#; + + let opt_result = query + .execute_optimized(make_js_file(code)) + .expect("optimized failed"); + let full_result = query.execute(make_js_file(code)).expect("execute failed"); + + assert_eq!( + opt_result.effects, full_result.effects, + "optimized and full execute should produce identical effects" + ); + } + + #[test] + fn execute_optimized_matches_execute_for_where_clause() { + let query = compile_js_query(r#"`console.log($msg)` where { $msg <: `"hello"` }"#); + let code = r#" + console.log("hello"); + console.log("world"); + "#; + + let opt_result = query + .execute_optimized(make_js_file(code)) + .expect("optimized failed"); + let full_result = query.execute(make_js_file(code)).expect("execute failed"); + + assert_eq!( + opt_result.effects, full_result.effects, + "where-clause: optimized and full should produce identical effects" + ); + } + + #[test] + fn execute_optimized_falls_back_for_non_optimizable() { + // Bare metavariable — anchor_kinds() returns empty, should fallback. + let query = compile_js_query("$x"); + let code = "const x = 1;"; + + let result = query + .execute_optimized(make_js_file(code)) + .expect("fallback execution failed"); + + // Should still produce results via fallback. + assert!( + result.diagnostics.is_empty(), + "fallback should not produce errors" + ); + } + + #[test] + fn execute_optimized_no_matches_when_pattern_absent() { + let query = compile_js_query("`console.log($msg)`"); + let code = "const x = 42;"; + + let opt_result = query + .execute_optimized(make_js_file(code)) + .expect("optimized failed"); + let full_result = query.execute(make_js_file(code)).expect("execute failed"); + + assert_eq!( + opt_result.effects, full_result.effects, + "no-match: optimized and full should produce identical effects" + ); + } +} diff --git a/crates/biome_grit_patterns/src/lib.rs b/crates/biome_grit_patterns/src/lib.rs index 689d2fed9b5c..fcee683429d3 100644 --- a/crates/biome_grit_patterns/src/lib.rs +++ b/crates/biome_grit_patterns/src/lib.rs @@ -21,6 +21,8 @@ mod grit_target_node; mod grit_tree; mod pattern_compiler; mod source_location_ext; +#[doc(hidden)] +pub mod testing; mod util; mod variables; diff --git a/crates/biome_grit_patterns/src/testing.rs b/crates/biome_grit_patterns/src/testing.rs new file mode 100644 index 000000000000..9866e7e58a3c --- /dev/null +++ b/crates/biome_grit_patterns/src/testing.rs @@ -0,0 +1,27 @@ +use crate::grit_context::GritTargetFile; +use crate::grit_query::GritQuery; +use crate::grit_target_language::GritTargetLanguage; +use biome_grit_parser::parse_grit; +use biome_js_parser::{JsParserOptions, parse}; +use biome_js_syntax::JsFileSource; + +pub fn compile_js_query(source: &str) -> GritQuery { + let parsed = parse_grit(source); + assert!( + parsed.diagnostics().is_empty(), + "parse error: {:?}", + parsed.diagnostics() + ); + GritQuery::from_node( + parsed.tree(), + None, + GritTargetLanguage::JsTargetLanguage(crate::JsTargetLanguage), + Vec::new(), + ) + .expect("compile failed") +} + +pub fn make_js_file(code: &str) -> GritTargetFile { + let parsed = parse(code, JsFileSource::js_module(), JsParserOptions::default()); + GritTargetFile::new("test.js", parsed.into()) +} diff --git a/crates/biome_js_analyze/src/lib.rs b/crates/biome_js_analyze/src/lib.rs index 45839b4b688d..01af22592cf2 100644 --- a/crates/biome_js_analyze/src/lib.rs +++ b/crates/biome_js_analyze/src/lib.rs @@ -8,8 +8,8 @@ use crate::services::embedded_value_references::EmbeddedValueReferences; use crate::suppression_action::JsSuppressionAction; use biome_analyze::{ AnalysisFilter, Analyzer, AnalyzerContext, AnalyzerOptions, AnalyzerPluginSlice, - AnalyzerSignal, AnalyzerSuppression, ControlFlow, InspectMatcher, LanguageRoot, - MatchQueryParams, MetadataRegistry, Phases, PluginTargetLanguage, PluginVisitor, RuleAction, + AnalyzerSignal, AnalyzerSuppression, BatchPluginVisitor, ControlFlow, InspectMatcher, + LanguageRoot, MatchQueryParams, MetadataRegistry, Phases, PluginTargetLanguage, RuleAction, RuleRegistry, to_analyzer_suppressions, }; use biome_aria::AriaRoles; @@ -160,15 +160,19 @@ where analyzer.add_visitor(phase, visitor); } - for plugin in plugins { - // SAFETY: The plugin target language is correctly checked here. + let js_plugins: Vec<_> = plugins + .iter() + .filter(|p| p.language() == PluginTargetLanguage::JavaScript) + .cloned() + .collect(); + + if !js_plugins.is_empty() { + // SAFETY: All plugins have been verified to target JavaScript above. unsafe { - if plugin.language() == PluginTargetLanguage::JavaScript { - analyzer.add_visitor( - Phases::Syntax, - Box::new(PluginVisitor::new_unchecked(plugin.clone())), - ) - } + analyzer.add_visitor( + Phases::Syntax, + Box::new(BatchPluginVisitor::new_unchecked(&js_plugins)), + ); } } diff --git a/crates/biome_json_analyze/src/lib.rs b/crates/biome_json_analyze/src/lib.rs index b709d20ad407..d199dc6c3c10 100644 --- a/crates/biome_json_analyze/src/lib.rs +++ b/crates/biome_json_analyze/src/lib.rs @@ -14,8 +14,8 @@ use crate::suppression_action::JsonSuppressionAction; pub use biome_analyze::ExtendedConfigurationProvider; use biome_analyze::{ AnalysisFilter, AnalyzerOptions, AnalyzerPluginSlice, AnalyzerSignal, AnalyzerSuppression, - ControlFlow, LanguageRoot, MatchQueryParams, MetadataRegistry, Phases, PluginTargetLanguage, - PluginVisitor, RuleAction, RuleRegistry, to_analyzer_suppressions, + BatchPluginVisitor, ControlFlow, LanguageRoot, MatchQueryParams, MetadataRegistry, Phases, + PluginTargetLanguage, RuleAction, RuleRegistry, to_analyzer_suppressions, }; use biome_diagnostics::Error; use biome_json_syntax::{JsonFileSource, JsonLanguage, TextRange}; @@ -132,15 +132,19 @@ where analyzer.add_visitor(phase, visitor); } - for plugin in plugins { - // SAFETY: The plugin target language is correctly checked here. + let json_plugins: Vec<_> = plugins + .iter() + .filter(|p| p.language() == PluginTargetLanguage::Json) + .cloned() + .collect(); + + if !json_plugins.is_empty() { + // SAFETY: All plugins have been verified to target JSON above. unsafe { - if plugin.language() == PluginTargetLanguage::Json { - analyzer.add_visitor( - Phases::Syntax, - Box::new(PluginVisitor::new_unchecked(plugin.clone())), - ) - } + analyzer.add_visitor( + Phases::Syntax, + Box::new(BatchPluginVisitor::new_unchecked(&json_plugins)), + ); } } diff --git a/crates/biome_plugin_loader/src/analyzer_grit_plugin.rs b/crates/biome_plugin_loader/src/analyzer_grit_plugin.rs index ee2b0ad8b179..5ec8d837aae1 100644 --- a/crates/biome_plugin_loader/src/analyzer_grit_plugin.rs +++ b/crates/biome_plugin_loader/src/analyzer_grit_plugin.rs @@ -86,7 +86,7 @@ impl AnalyzerPlugin for AnalyzerGritPlugin { let parse = AnyParse::Node(NodeParse::new(root.unwrap(), vec![])); let file = GritTargetFile { parse, path }; - match self.grit_query.execute(file) { + match self.grit_query.execute_optimized(file) { Ok(result) => { let mut diagnostics: Vec<_> = result .logs