From 6c9df0fb585552457c14fdb1da5d2bfb1279c987 Mon Sep 17 00:00:00 2001 From: albab-hasan Date: Sun, 7 Jun 2026 09:50:57 +0600 Subject: [PATCH] fix: avoid proc-macro re-expansion on trivia-only edits --- .../hir-def/src/nameres/tests/incremental.rs | 85 +++++- crates/hir-expand/src/db.rs | 288 ++++++++++++++++-- crates/hir-expand/src/fixup.rs | 5 + 3 files changed, 358 insertions(+), 20 deletions(-) diff --git a/crates/hir-def/src/nameres/tests/incremental.rs b/crates/hir-def/src/nameres/tests/incremental.rs index 08f672aa0c0d..b5b55ae9d2a7 100644 --- a/crates/hir-def/src/nameres/tests/incremental.rs +++ b/crates/hir-def/src/nameres/tests/incremental.rs @@ -303,8 +303,9 @@ fn f() { foo } "file_item_tree_query", "ast_id_map", "parse_macro_expansion", - "expand_proc_macro", "macro_arg", + "proc_macro_raw_output", + "macro_arg_key", "proc_macro_span_shim", ] "#]], @@ -315,8 +316,85 @@ fn f() { foo } "file_item_tree_query", "real_span_map", "macro_arg", - "expand_proc_macro", "parse_macro_expansion", + "macro_arg_key", + "proc_macro_raw_output", + "ast_id_map", + "file_item_tree_query", + ] + "#]], + ); +} + +#[test] +fn typing_trivia_inside_proc_macro_attribute_does_not_invoke_subprocess() { + // Regression test for https://github.com/rust-lang/rust-analyzer/issues/17213. + // Adding a comment inside an attributed item (trivia-only edit) must not trigger the + // proc-macro subprocess call: `proc_macro_raw_output` is backdated because the token tree + // structure (ignoring span byte-ranges) is unchanged. `expand_proc_macro` is a plain + // function (not a salsa query) that re-runs inside `parse_macro_expansion` to remap + // stale span byte-ranges to fresh positions, keeping all IDE position-dependent features + // correct. The subprocess is never re-invoked; only cheap span remapping occurs. + check_def_map_is_not_recomputed( + r" +//- proc_macros: identity +//- /lib.rs +mod foo; + +//- /foo/mod.rs +pub mod bar; + +//- /foo/bar.rs +$0 +#[proc_macros::identity] +fn f() {} +", + r" +#[proc_macros::identity] +fn f() { + // a comment was added — trivia only +} +", + expect![[r#" + [ + "crate_local_def_map", + "file_item_tree_query", + "ast_id_map", + "parse", + "real_span_map", + "crate_local_def_map", + "proc_macros_for_crate_shim", + "file_item_tree_query", + "ast_id_map", + "parse", + "real_span_map", + "file_item_tree_query", + "ast_id_map", + "parse", + "real_span_map", + "file_item_tree_query", + "ast_id_map", + "parse", + "real_span_map", + "macro_def_shim", + "file_item_tree_query", + "ast_id_map", + "parse_macro_expansion", + "macro_arg", + "proc_macro_raw_output", + "macro_arg_key", + "proc_macro_span_shim", + ] + "#]], + expect![[r#" + [ + "parse", + "ast_id_map", + "file_item_tree_query", + "real_span_map", + "macro_arg", + "parse_macro_expansion", + "macro_arg_key", "ast_id_map", "file_item_tree_query", ] @@ -439,8 +517,9 @@ pub struct S {} "file_item_tree_query", "ast_id_map", "parse_macro_expansion", - "expand_proc_macro", "macro_arg", + "proc_macro_raw_output", + "macro_arg_key", "proc_macro_span_shim", ] "#]], diff --git a/crates/hir-expand/src/db.rs b/crates/hir-expand/src/db.rs index 513115684f87..43ce2091b0d4 100644 --- a/crates/hir-expand/src/db.rs +++ b/crates/hir-expand/src/db.rs @@ -2,7 +2,8 @@ use base_db::{Crate, SourceDatabase}; use mbe::MatchedArmIndex; -use span::{AstIdMap, Edition, Span, SyntaxContext}; +use rustc_hash::FxHashMap; +use span::{AstIdMap, Edition, FIXUP_ERASED_FILE_AST_ID_MARKER, Span, SyntaxContext, TextRange}; use std::borrow::Cow; use syntax::{AstNode, Parse, SyntaxError, SyntaxNode, SyntaxToken, T, ast}; use syntax_bridge::{DocCommentDesugarMode, syntax_node_to_token_tree}; @@ -23,6 +24,47 @@ use crate::{ }; /// This is just to ensure the types of smart_macro_arg and macro_arg are the same type MacroArgResult = (tt::TopSubtree, SyntaxFixupUndoInfo, Span); + +/// [`MacroArgResult`] with span byte-ranges excluded from equality. +/// +/// `macro_arg` changes whenever the source file changes because token spans embed byte offsets +/// that shift even when only whitespace or comments ("trivia") are added. Since trivia is stripped +/// from the token tree before it reaches the proc-macro, the expansion output is structurally +/// identical before and after a trivia-only edit. This type's [`PartialEq`] ignores span ranges so +/// salsa can backdate [`macro_arg_key`] — and therefore [`proc_macro_raw_output`] — on trivia-only +/// edits, skipping the expensive subprocess call. +#[derive(Clone, Debug)] +pub struct MacroArgKey(MacroArgResult); + +impl PartialEq for MacroArgKey { + fn eq(&self, other: &Self) -> bool { + let (tt1, undo1, span1) = &self.0; + let (tt2, undo2, span2) = &other.0; + zero_ranges(tt1) == zero_ranges(tt2) + && undo1.with_normalized_trees(zero_ranges) == undo2.with_normalized_trees(zero_ranges) + && span1.anchor == span2.anchor + && span1.ctx == span2.ctx + } +} +impl Eq for MacroArgKey {} + +/// The raw output of a proc-macro subprocess call together with the input spans used for that run. +/// +/// Stored by [`proc_macro_raw_output`] so that [`expand_proc_macro`] can remap stale span +/// byte-ranges in the cached output when [`proc_macro_raw_output`] is backdated on a trivia-only +/// edit. See [`remap_tt_spans`]. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ProcMacroRawOutput { + /// Raw token tree from the subprocess (before [`fixup::reverse_fixups`]). + pub tt: ExpandResult, + /// Flat DFS-ordered byte-ranges from the macro input fed to this subprocess call. + /// Each entry is the `range` field of the corresponding span in `macro_arg`, in the same + /// DFS order as [`collect_ranges`]. Only the range is stored (not the full span) because + /// anchor and ctx are stable across trivia-only edits and are recovered from the fresh + /// `macro_arg` at remap time. This is 8 bytes per token vs 20 bytes for a full [`Span`]. + pub input_ranges: Box<[TextRange]>, +} + /// Total limit on the number of tokens produced by any macro invocation. /// /// If an invocation produces more tokens than this limit, it will not be stored in the database and @@ -115,13 +157,24 @@ pub trait ExpandDatabase: SourceDatabase { id: AstId, ) -> &DeclarativeMacroExpander; - /// Special case of the previous query for procedural macros. We can't LRU - /// proc macros, since they are not deterministic in general, and - /// non-determinism breaks salsa in a very, very, very bad way. - /// @edwin0cheng heroically debugged this once! See #4315 for details - #[salsa::invoke(expand_proc_macro)] + /// [`macro_arg`] wrapped in [`MacroArgKey`]. Salsa backdates this query on trivia-only edits + /// because [`MacroArgKey::eq`] ignores span byte-ranges. [`proc_macro_raw_output`] depends on + /// this query so the subprocess call is also backdated on trivia-only edits. + #[salsa::invoke(macro_arg_key)] #[salsa::transparent] - fn expand_proc_macro(&self, call: MacroCallId) -> &ExpandResult; + fn macro_arg_key(&self, id: MacroCallId) -> &MacroArgKey; + + /// Raw proc-macro subprocess output plus the input byte-ranges used for that run. + /// + /// Backdated on trivia-only edits because it depends on [`macro_arg_key`] whose equality + /// ignores span byte-ranges. On trivia edits [`parse_macro_expansion`] re-runs (because + /// `macro_arg` changed), calls [`expand_proc_macro`] which reads this backdated query and + /// remaps its stale spans to current positions — producing a fresh [`ExpansionSpanMap`] + /// without re-invoking the subprocess. + #[salsa::invoke(proc_macro_raw_output)] + #[salsa::transparent] + fn proc_macro_raw_output(&self, id: MacroCallId) -> &ProcMacroRawOutput; + /// Retrieves the span to be used for a proc-macro expansions spans. /// This is a firewall query as it requires parsing the file, which we don't want proc-macros to /// directly depend on as that would cause to frequent invalidations, mainly because of the @@ -561,10 +614,9 @@ fn macro_expand<'db>( let (ExpandResult { value: (tt, matched_arm), err }, span) = match loc.def.kind { MacroDefKind::ProcMacro(..) => { - return db - .expand_proc_macro(macro_call_id) - .as_ref() - .map(|it| (Cow::Borrowed(it), None)); + // expand_proc_macro is not a salsa query — called directly to avoid + // caching a second copy of the TT (proc_macro_raw_output already caches it). + return expand_proc_macro(db, macro_call_id).map(|it| (Cow::Owned(it), None)); } _ => { let (macro_arg, undo_info, span) = @@ -643,10 +695,24 @@ fn proc_macro_span(db: &dyn ExpandDatabase, ast: AstId) -> Span { span_map.span_for_range(range) } +/// Implements [`ExpandDatabase::macro_arg_key`]. #[salsa_macros::tracked(returns(ref))] -fn expand_proc_macro(db: &dyn ExpandDatabase, id: MacroCallId) -> ExpandResult { +fn macro_arg_key(db: &dyn ExpandDatabase, id: MacroCallId) -> MacroArgKey { + MacroArgKey(db.macro_arg(id).clone()) +} + +#[salsa_macros::tracked(returns(ref))] +fn proc_macro_raw_output(db: &dyn ExpandDatabase, id: MacroCallId) -> ProcMacroRawOutput { let loc = id.loc(db); - let (macro_arg, undo_info, span) = db.macro_arg_considering_derives(id, &loc.kind); + + // Calling macro_arg_key here creates a salsa dependency that uses MacroArgKey's + // position-ignoring PartialEq, so this query is backdated on trivia-only edits. + let MacroArgKey((macro_arg, _undo_info, _call_span)) = match &loc.kind { + MacroCallKind::Derive { derive_macro_id, .. } => db.macro_arg_key(*derive_macro_id), + _ => db.macro_arg_key(id), + }; + + let input_ranges = collect_ranges(macro_arg); let (ast, expander) = match loc.def.kind { MacroDefKind::ProcMacro(ast, expander, _) => (ast, expander), @@ -658,7 +724,7 @@ fn expand_proc_macro(db: &dyn ExpandDatabase, id: MacroCallId) -> ExpandResult None, }; - let ExpandResult { value: mut tt, err } = { + let tt = { let span = db.proc_macro_span(ast); expander.expand( db, @@ -672,12 +738,58 @@ fn expand_proc_macro(db: &dyn ExpandDatabase, id: MacroCallId) -> ExpandResult ExpandResult { + let loc = id.loc(db); + + // Fresh macro_arg: this direct dependency on macro_arg_considering_derives (which is NOT + // backdated on trivia edits) ensures that expand_proc_macro re-runs on trivia edits even + // when proc_macro_raw_output is backdated. The re-run remaps the stale output spans to fresh + // positions, keeping ExpansionSpanMap current without re-invoking the subprocess. + let (fresh_macro_arg, fresh_undo_info, span) = db.macro_arg_considering_derives(id, &loc.kind); + + // Raw output from the subprocess — may be a backdated cached result on trivia-only edits. + let raw = db.proc_macro_raw_output(id); + + // Remap stale span byte-ranges in the cached TT to the current source positions. + // On structural edits proc_macro_raw_output re-ran with fresh input, so raw.input_ranges + // and the fresh macro_arg ranges are identical and remap_tt_spans is a structural no-op. + // + // We reconstruct each old span as `Span { range: old_range, anchor: fresh.anchor, ctx: + // fresh.ctx }` because anchor and ctx are stable across trivia-only edits (only byte-range + // positions shift), and on structural edits the raw output was recomputed so old == fresh. + let fresh_input_spans = collect_spans(fresh_macro_arg); + // Build a map from old span → new span for every input token whose byte-range shifted. + // Entries where old_range == fresh.range are skipped (no-ops): tokens before the trivia + // insertion point and all tokens on structural edits (where input_ranges == fresh ranges). + // This keeps the map small and lets remap_tt_spans early-exit via `map.is_empty()` on + // structural edits, avoiding a pointless TT clone. + let span_remap: FxHashMap = raw + .input_ranges + .iter() + .zip(fresh_input_spans.iter()) + .filter(|(_, fresh)| fresh.anchor.ast_id != FIXUP_ERASED_FILE_AST_ID_MARKER) + .filter(|(old_range, fresh)| **old_range != fresh.range) + .map(|(&old_range, &fresh)| { + // Reconstruct the old span using old range + fresh anchor/ctx. + // anchor and ctx are stable across trivia-only edits; on structural edits + // old_range == fresh.range so this branch is never reached. + let old = Span { range: old_range, anchor: fresh.anchor, ctx: fresh.ctx }; + (old, fresh) + }) + .collect(); + // Set a hard limit for the expanded tt - if let Err(value) = check_tt_count(&tt) { - return value.map(|()| tt::TopSubtree::empty(tt::DelimSpan::from_single(*span))); + if let Err(err_val) = check_tt_count(&raw.tt.value) { + return err_val.map(|()| tt::TopSubtree::empty(tt::DelimSpan::from_single(*span))); } - fixup::reverse_fixups(&mut tt, undo_info); + let err = raw.tt.err.clone(); + let mut tt = remap_tt_spans(raw.tt.value.clone(), &span_remap); + + fixup::reverse_fixups(&mut tt, fresh_undo_info); ExpandResult { value: tt, err } } @@ -697,6 +809,148 @@ pub(crate) fn token_tree_to_syntax_node( syntax_bridge::token_tree_to_syntax_node(tt, entry_point, &mut |ctx| ctx.edition(db)) } +/// Collects the byte-range from every span in a token tree, in depth-first order +/// (open delimiter, children, close delimiter). Stores only the [`TextRange`] field of each +/// [`Span`] — anchor and ctx are stable across trivia-only edits and are recovered from a +/// same-structure fresh token tree at remap time. 8 bytes per token instead of 20. +fn collect_ranges(tt: &tt::TopSubtree) -> Box<[TextRange]> { + fn recurse(iter: tt::TtIter<'_>, out: &mut Vec) { + for el in iter { + match el { + tt::TtElement::Leaf(l) => out.push(l.span().range), + tt::TtElement::Subtree(sub, children) => { + out.push(sub.delimiter.open.range); + recurse(children, out); + out.push(sub.delimiter.close.range); + } + } + } + } + let top = tt.top_subtree(); + let mut out = Vec::new(); + out.push(top.delimiter.open.range); + recurse(tt.iter(), &mut out); + out.push(top.delimiter.close.range); + out.into_boxed_slice() +} + +/// Collects all spans in a token tree in depth-first order (open delimiter, children, close +/// delimiter). The resulting slice is aligned index-for-index with the spans of a structurally +/// identical token tree, enabling O(n) span remapping without re-running the macro. +fn collect_spans(tt: &tt::TopSubtree) -> Box<[Span]> { + fn recurse(iter: tt::TtIter<'_>, out: &mut Vec) { + for el in iter { + match el { + tt::TtElement::Leaf(l) => out.push(*l.span()), + tt::TtElement::Subtree(sub, children) => { + out.push(sub.delimiter.open); + recurse(children, out); + out.push(sub.delimiter.close); + } + } + } + } + let top = tt.top_subtree(); + let mut out = Vec::new(); + out.push(top.delimiter.open); + recurse(tt.iter(), &mut out); + out.push(top.delimiter.close); + out.into_boxed_slice() +} + +/// Rebuilds `tree` replacing each span that appears in `map` with its mapped value. +/// Spans not present in the map (def-site, call-site, or macro-generated spans that don't +/// originate from the call-site input) are left unchanged. +fn remap_tt_spans(tree: tt::TopSubtree, map: &FxHashMap) -> tt::TopSubtree { + fn remap(span: Span, map: &FxHashMap) -> Span { + // Fixup spans encode an index in range.start(); never remap them. + if span.anchor.ast_id == FIXUP_ERASED_FILE_AST_ID_MARKER { + return span; + } + map.get(&span).copied().unwrap_or(span) + } + + fn leaf(mut l: tt::Leaf, map: &FxHashMap) -> tt::Leaf { + match &mut l { + tt::Leaf::Literal(it) => it.span = remap(it.span, map), + tt::Leaf::Punct(it) => it.span = remap(it.span, map), + tt::Leaf::Ident(it) => it.span = remap(it.span, map), + } + l + } + + fn build( + builder: &mut tt::TopSubtreeBuilder, + iter: tt::TtIter<'_>, + map: &FxHashMap, + ) { + for el in iter { + match el { + tt::TtElement::Leaf(l) => builder.push(leaf(l, map)), + tt::TtElement::Subtree(sub, children) => { + builder.open(sub.delimiter.kind, remap(sub.delimiter.open, map)); + build(builder, children, map); + builder.close(remap(sub.delimiter.close, map)); + } + } + } + } + + if map.is_empty() { + return tree; + } + + let top = tree.top_subtree(); + let mut b = tt::TopSubtreeBuilder::new(tt::Delimiter { + open: remap(top.delimiter.open, map), + close: remap(top.delimiter.close, map), + kind: top.delimiter.kind, + }); + build(&mut b, tree.iter(), map); + b.build() +} + +/// Rebuilds `tree` with all span byte-ranges set to zero, preserving fixup spans whose +/// `range.start()` encodes an index into [`SyntaxFixupUndoInfo::original`]. +/// Used by [`MacroArgKey::eq`] to compare token trees ignoring position information. +fn zero_ranges(tree: &tt::TopSubtree) -> tt::TopSubtree { + fn zero(span: Span) -> Span { + if span.anchor.ast_id == FIXUP_ERASED_FILE_AST_ID_MARKER { + return span; + } + use syntax::TextSize; + Span { range: TextRange::empty(TextSize::new(0)), ..span } + } + fn leaf(mut l: tt::Leaf) -> tt::Leaf { + match &mut l { + tt::Leaf::Literal(it) => it.span = zero(it.span), + tt::Leaf::Punct(it) => it.span = zero(it.span), + tt::Leaf::Ident(it) => it.span = zero(it.span), + } + l + } + fn build(builder: &mut tt::TopSubtreeBuilder, iter: tt::TtIter<'_>) { + for el in iter { + match el { + tt::TtElement::Leaf(l) => builder.push(leaf(l)), + tt::TtElement::Subtree(sub, children) => { + builder.open(sub.delimiter.kind, zero(sub.delimiter.open)); + build(builder, children); + builder.close(zero(sub.delimiter.close)); + } + } + } + } + let top = tree.top_subtree(); + let mut b = tt::TopSubtreeBuilder::new(tt::Delimiter { + open: zero(top.delimiter.open), + close: zero(top.delimiter.close), + kind: top.delimiter.kind, + }); + build(&mut b, tree.iter()); + b.build() +} + fn check_tt_count(tt: &tt::TopSubtree) -> Result<(), ExpandResult<()>> { let tt = tt.top_subtree(); let count = tt.count(); diff --git a/crates/hir-expand/src/fixup.rs b/crates/hir-expand/src/fixup.rs index 939104b70916..80e0c4a6739c 100644 --- a/crates/hir-expand/src/fixup.rs +++ b/crates/hir-expand/src/fixup.rs @@ -40,6 +40,11 @@ pub struct SyntaxFixupUndoInfo { impl SyntaxFixupUndoInfo { pub(crate) const NONE: Self = SyntaxFixupUndoInfo { original: None }; + + /// Returns a copy with each stored subtree replaced by `f(subtree)`. + pub(crate) fn with_normalized_trees(&self, f: impl Fn(&TopSubtree) -> TopSubtree) -> Self { + SyntaxFixupUndoInfo { original: self.original.as_ref().map(|v| v.iter().map(f).collect()) } + } } // We mark spans with `FIXUP_DUMMY_AST_ID` to indicate that they are fake.