Skip to content

Commit 7f25c39

Browse files
sdsrssclaude
andcommitted
fix(quality): production hardening — race condition, unbounded queries, DRY, observability
- Fix embedding thread / rebuild race: wait for background embedding before clearing data in tool_rebuild_index to prevent orphaned vectors - Batch-fetch unembedded nodes with LIMIT to bound memory usage - Add LIMIT 200 to project_map key-symbols query (prevent full table scan) - Enable RUST_LOG env filter for runtime log level control - Extract FTS5 trigger SQL to shared function (was copy-pasted 4x in schema.rs) - Extract is_test_symbol helper (was duplicated 3x in server.rs) - Add anyOf required constraint to get_ast_node tool schema - Replace bare .unwrap() with .expect() in parser cache - Mark test-only functions with #[cfg(test)]: update_context_string, rrf_fusion - Fix pre-existing clippy nonminimal_bool warning - Bump version to 0.5.17 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 50b44c4 commit 7f25c39

10 files changed

Lines changed: 93 additions & 58 deletions

File tree

Cargo.lock

Lines changed: 13 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "code-graph-mcp"
3-
version = "0.5.16"
3+
version = "0.5.17"
44
edition = "2021"
55

66
[features]
@@ -18,7 +18,7 @@ ignore = "0.4"
1818
rayon = "1"
1919
notify = "6"
2020
tracing = "0.1"
21-
tracing-subscriber = "0.3"
21+
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
2222
anyhow = "1"
2323
bytemuck = "1"
2424
candle-core = { version = "0.9", optional = true }

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@sdsrs/code-graph",
3-
"version": "0.5.16",
3+
"version": "0.5.17",
44
"description": "MCP server that indexes codebases into an AST knowledge graph with semantic search, call graph traversal, and HTTP route tracing",
55
"license": "MIT",
66
"repository": {

src/main.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ fn print_help() {
6363

6464
fn run_serve() -> Result<()> {
6565
tracing_subscriber::fmt()
66+
.with_env_filter(
67+
tracing_subscriber::EnvFilter::try_from_default_env()
68+
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info"))
69+
)
6670
.with_writer(io::stderr)
6771
.init();
6872

src/mcp/server.rs

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ fn required_str<'a>(args: &'a serde_json::Value, key: &str) -> Result<&'a str> {
2525
Ok(s)
2626
}
2727

28+
/// Whether a symbol is a test-only symbol (by name or file path convention).
29+
fn is_test_symbol(name: &str, file_path: &str) -> bool {
30+
name.starts_with("test_") || file_path.starts_with("tests/")
31+
}
32+
2833
/// Parse route input like "GET /api/users" into (Some("GET"), "/api/users").
2934
/// If no method prefix, returns (None, original_path).
3035
fn parse_route_input(input: &str) -> (Option<String>, &str) {
@@ -356,27 +361,29 @@ impl McpServer {
356361
None => return Ok(()),
357362
};
358363
let db = Database::open_with_vec(&db_path)?;
359-
let unembedded = queries::get_unembedded_nodes(db.conn())?;
360-
if unembedded.is_empty() {
361-
return Ok(());
362-
}
363364

364-
let total = unembedded.len();
365-
tracing::info!("[embed-bg] Embedding {} nodes in background...", total);
365+
const EMBED_BATCH: usize = 32;
366+
let mut total_embedded = 0usize;
366367
let t0 = std::time::Instant::now();
367368

368-
const EMBED_PROGRESS_BATCH: usize = 32;
369-
for (i, chunk) in unembedded.chunks(EMBED_PROGRESS_BATCH).enumerate() {
369+
loop {
370+
let chunk = queries::get_unembedded_nodes(db.conn(), EMBED_BATCH)?;
371+
if chunk.is_empty() {
372+
break;
373+
}
374+
370375
let tx = db.conn().unchecked_transaction()?;
371-
embed_and_store_batch(&db, &model, chunk)?;
376+
embed_and_store_batch(&db, &model, &chunk)?;
372377
tx.commit()?;
373378

374-
let done = ((i + 1) * EMBED_PROGRESS_BATCH).min(total);
375-
tracing::info!("[embed-bg] Progress: {}/{} nodes", done, total);
379+
total_embedded += chunk.len();
380+
tracing::info!("[embed-bg] Progress: {} nodes embedded", total_embedded);
376381
}
377382

378-
tracing::info!("[embed-bg] Complete: {} nodes in {:.1}s",
379-
total, t0.elapsed().as_secs_f64());
383+
if total_embedded > 0 {
384+
tracing::info!("[embed-bg] Complete: {} nodes in {:.1}s",
385+
total_embedded, t0.elapsed().as_secs_f64());
386+
}
380387
Ok(())
381388
})();
382389

@@ -438,7 +445,7 @@ impl McpServer {
438445
fn resolve_fuzzy_name(&self, name: &str) -> Result<FuzzyResolution> {
439446
let candidates: Vec<_> = queries::find_functions_by_fuzzy_name(self.db.conn(), name)?
440447
.into_iter()
441-
.filter(|c| !c.name.starts_with("test_") && !c.file_path.starts_with("tests/"))
448+
.filter(|c| !is_test_symbol(&c.name, &c.file_path))
442449
.collect();
443450
if candidates.len() == 1 {
444451
Ok(FuzzyResolution::Unique(candidates.into_iter().next().unwrap().name))
@@ -1133,7 +1140,7 @@ impl McpServer {
11331140
// If exact match returns empty (only seed node, no edges), try fuzzy name resolution
11341141
let has_edges = results.iter().any(|n| n.depth > 0);
11351142
let has_seed = results.iter().any(|n| n.depth == 0);
1136-
if !has_edges && !(has_seed && file_path.is_some()) {
1143+
if !(has_edges || has_seed && file_path.is_some()) {
11371144
match self.resolve_fuzzy_name(function_name)? {
11381145
FuzzyResolution::Unique(resolved) => {
11391146
let results2 = crate::graph::query::get_call_graph(
@@ -1177,7 +1184,7 @@ impl McpServer {
11771184
results: &[crate::graph::query::CallGraphNode],
11781185
) -> Result<serde_json::Value> {
11791186
let is_test = |n: &&crate::graph::query::CallGraphNode| {
1180-
n.name.starts_with("test_") || n.file_path.starts_with("tests/")
1187+
is_test_symbol(&n.name, &n.file_path)
11811188
};
11821189
let all_nodes: Vec<serde_json::Value> = results.iter()
11831190
.filter(|n| n.depth > 0 && !is_test(n))
@@ -1585,6 +1592,16 @@ impl McpServer {
15851592
let project_root = self.project_root.as_ref()
15861593
.ok_or_else(|| anyhow!("No project root configured"))?;
15871594

1595+
// Wait for background embedding to finish before clearing data
1596+
// to avoid race where embedding thread writes vectors for deleted nodes
1597+
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10);
1598+
while self.embedding_in_progress.load(Ordering::Acquire) {
1599+
if std::time::Instant::now() > deadline {
1600+
return Err(anyhow!("Background embedding still in progress. Try again shortly."));
1601+
}
1602+
std::thread::sleep(std::time::Duration::from_millis(100));
1603+
}
1604+
15881605
// Clear all data in a single transaction (CASCADE handles nodes→edges)
15891606
{
15901607
let tx = self.db.conn().unchecked_transaction()?;
@@ -1685,7 +1702,7 @@ impl McpServer {
16851702

16861703
// Separate production callers from test callers
16871704
let is_test = |c: &&queries::CallerWithRouteInfo| {
1688-
c.name.starts_with("test_") || c.file_path.starts_with("tests/")
1705+
is_test_symbol(&c.name, &c.file_path)
16891706
};
16901707
let prod_callers: Vec<_> = callers.iter().filter(|c| !is_test(c)).collect();
16911708
let test_callers: Vec<_> = callers.iter().filter(|c| is_test(c)).collect();

src/mcp/tools.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,11 @@ impl ToolRegistry {
7373
"node_id": { "type": "number", "description": "Node ID (alternative to file_path+symbol_name)" },
7474
"include_references": { "type": "boolean", "description": "Include callers/callees (default false)" },
7575
"context_lines": { "type": "number", "description": "Surrounding source lines to include (default 0)" }
76-
}
76+
},
77+
"anyOf": [
78+
{ "required": ["symbol_name"] },
79+
{ "required": ["node_id"] }
80+
]
7781
}),
7882
},
7983
ToolDefinition {

src/parser/treesitter.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ pub fn parse_tree(source: &str, language: &str) -> Result<tree_sitter::Tree> {
3838
p.set_language(&lang)?;
3939
cache.insert(language.to_string(), p);
4040
}
41-
let parser = cache.get_mut(language).unwrap();
41+
let parser = cache.get_mut(language)
42+
.expect("parser was just inserted");
4243
parser.parse(source, None)
4344
.ok_or_else(|| anyhow!("parse failed or timed out"))
4445
})

src/search/fusion.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ pub fn weighted_rrf_fusion(
3636
results
3737
}
3838

39+
#[cfg(test)]
3940
pub fn rrf_fusion(
4041
fts_results: &[SearchResult],
4142
vec_results: &[SearchResult],

src/storage/queries.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ pub fn delete_nodes_by_file(conn: &Connection, file_id: i64) -> Result<()> {
237237
Ok(())
238238
}
239239

240+
#[cfg(test)]
240241
pub fn update_context_string(conn: &Connection, node_id: i64, context_string: &str) -> Result<()> {
241242
conn.execute(
242243
"UPDATE nodes SET context_string = ?1 WHERE id = ?2",
@@ -882,13 +883,15 @@ pub fn get_import_tree(
882883
// --- Unembedded nodes ---
883884

884885
/// Get (node_id, context_string) for nodes that have context strings but no vectors.
885-
pub fn get_unembedded_nodes(conn: &Connection) -> Result<Vec<(i64, String)>> {
886+
/// Returns at most `limit` rows per call to bound memory usage.
887+
pub fn get_unembedded_nodes(conn: &Connection, limit: usize) -> Result<Vec<(i64, String)>> {
886888
let mut stmt = conn.prepare(
887889
"SELECT n.id, n.context_string FROM nodes n
888890
LEFT JOIN node_vectors v ON v.node_id = n.id
889-
WHERE n.context_string IS NOT NULL AND v.node_id IS NULL"
891+
WHERE n.context_string IS NOT NULL AND v.node_id IS NULL
892+
LIMIT ?1"
890893
)?;
891-
let rows = stmt.query_map([], |row| {
894+
let rows = stmt.query_map([limit as i64], |row| {
892895
Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
893896
})?;
894897
rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
@@ -1007,7 +1010,8 @@ pub fn get_project_map(conn: &Connection) -> Result<(Vec<ModuleStats>, Vec<Modul
10071010
AND f.path NOT LIKE 'tests/%' \
10081011
AND f.path NOT LIKE '%_test.%' \
10091012
GROUP BY n.id \
1010-
ORDER BY cnt DESC";
1013+
ORDER BY cnt DESC \
1014+
LIMIT 200";
10111015
let mut stmt = conn.prepare(sql)?;
10121016
let rows = stmt.query_map([REL_CALLS], |row| {
10131017
Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))

src/storage/schema.rs

Lines changed: 23 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,25 @@ CREATE INDEX IF NOT EXISTS idx_edges_source_rel ON edges(source_id, relation);
7171
CREATE INDEX IF NOT EXISTS idx_edges_target_rel ON edges(target_id, relation);
7272
"#;
7373

74+
/// FTS5 sync trigger SQL. Shared by migrations that recreate the FTS5 table,
75+
/// ensuring trigger definitions stay consistent with CREATE_TABLES.
76+
fn fts5_triggers_sql() -> &'static str {
77+
"CREATE TRIGGER IF NOT EXISTS nodes_ai AFTER INSERT ON nodes BEGIN
78+
INSERT INTO nodes_fts(rowid, name, qualified_name, code_content, context_string, doc_comment, name_tokens, return_type, param_types)
79+
VALUES (new.id, new.name, new.qualified_name, new.code_content, new.context_string, new.doc_comment, new.name_tokens, new.return_type, new.param_types);
80+
END;
81+
CREATE TRIGGER IF NOT EXISTS nodes_ad AFTER DELETE ON nodes BEGIN
82+
INSERT INTO nodes_fts(nodes_fts, rowid, name, qualified_name, code_content, context_string, doc_comment, name_tokens, return_type, param_types)
83+
VALUES ('delete', old.id, old.name, old.qualified_name, old.code_content, old.context_string, old.doc_comment, old.name_tokens, old.return_type, old.param_types);
84+
END;
85+
CREATE TRIGGER IF NOT EXISTS nodes_au AFTER UPDATE ON nodes BEGIN
86+
INSERT INTO nodes_fts(nodes_fts, rowid, name, qualified_name, code_content, context_string, doc_comment, name_tokens, return_type, param_types)
87+
VALUES ('delete', old.id, old.name, old.qualified_name, old.code_content, old.context_string, old.doc_comment, old.name_tokens, old.return_type, old.param_types);
88+
INSERT INTO nodes_fts(rowid, name, qualified_name, code_content, context_string, doc_comment, name_tokens, return_type, param_types)
89+
VALUES (new.id, new.name, new.qualified_name, new.code_content, new.context_string, new.doc_comment, new.name_tokens, new.return_type, new.param_types);
90+
END;"
91+
}
92+
7493
/// Migrate from schema v1 to v2. Must be called within a transaction.
7594
pub fn migrate_v1_to_v2(conn: &rusqlite::Connection) -> anyhow::Result<()> {
7695
tracing::info!("[schema] Migrating v1 → v2: adding name_tokens, return_type, param_types");
@@ -93,23 +112,9 @@ pub fn migrate_v1_to_v2(conn: &rusqlite::Connection) -> anyhow::Result<()> {
93112
name, qualified_name, code_content, context_string, doc_comment,
94113
name_tokens, return_type, param_types,
95114
content='nodes', content_rowid='id'
96-
);
97-
98-
CREATE TRIGGER IF NOT EXISTS nodes_ai AFTER INSERT ON nodes BEGIN
99-
INSERT INTO nodes_fts(rowid, name, qualified_name, code_content, context_string, doc_comment, name_tokens, return_type, param_types)
100-
VALUES (new.id, new.name, new.qualified_name, new.code_content, new.context_string, new.doc_comment, new.name_tokens, new.return_type, new.param_types);
101-
END;
102-
CREATE TRIGGER IF NOT EXISTS nodes_ad AFTER DELETE ON nodes BEGIN
103-
INSERT INTO nodes_fts(nodes_fts, rowid, name, qualified_name, code_content, context_string, doc_comment, name_tokens, return_type, param_types)
104-
VALUES ('delete', old.id, old.name, old.qualified_name, old.code_content, old.context_string, old.doc_comment, old.name_tokens, old.return_type, old.param_types);
105-
END;
106-
CREATE TRIGGER IF NOT EXISTS nodes_au AFTER UPDATE ON nodes BEGIN
107-
INSERT INTO nodes_fts(nodes_fts, rowid, name, qualified_name, code_content, context_string, doc_comment, name_tokens, return_type, param_types)
108-
VALUES ('delete', old.id, old.name, old.qualified_name, old.code_content, old.context_string, old.doc_comment, old.name_tokens, old.return_type, old.param_types);
109-
INSERT INTO nodes_fts(rowid, name, qualified_name, code_content, context_string, doc_comment, name_tokens, return_type, param_types)
110-
VALUES (new.id, new.name, new.qualified_name, new.code_content, new.context_string, new.doc_comment, new.name_tokens, new.return_type, new.param_types);
111-
END;"
115+
);"
112116
)?;
117+
conn.execute_batch(fts5_triggers_sql())?;
113118

114119
conn.execute_batch("INSERT INTO nodes_fts(nodes_fts) VALUES('rebuild');")?;
115120

@@ -167,23 +172,9 @@ pub fn migrate_v3_to_v4(conn: &rusqlite::Connection) -> anyhow::Result<()> {
167172
name_tokens, return_type, param_types,
168173
content='nodes', content_rowid='id',
169174
tokenize='porter unicode61'
170-
);
171-
172-
CREATE TRIGGER IF NOT EXISTS nodes_ai AFTER INSERT ON nodes BEGIN
173-
INSERT INTO nodes_fts(rowid, name, qualified_name, code_content, context_string, doc_comment, name_tokens, return_type, param_types)
174-
VALUES (new.id, new.name, new.qualified_name, new.code_content, new.context_string, new.doc_comment, new.name_tokens, new.return_type, new.param_types);
175-
END;
176-
CREATE TRIGGER IF NOT EXISTS nodes_ad AFTER DELETE ON nodes BEGIN
177-
INSERT INTO nodes_fts(nodes_fts, rowid, name, qualified_name, code_content, context_string, doc_comment, name_tokens, return_type, param_types)
178-
VALUES ('delete', old.id, old.name, old.qualified_name, old.code_content, old.context_string, old.doc_comment, old.name_tokens, old.return_type, old.param_types);
179-
END;
180-
CREATE TRIGGER IF NOT EXISTS nodes_au AFTER UPDATE ON nodes BEGIN
181-
INSERT INTO nodes_fts(nodes_fts, rowid, name, qualified_name, code_content, context_string, doc_comment, name_tokens, return_type, param_types)
182-
VALUES ('delete', old.id, old.name, old.qualified_name, old.code_content, old.context_string, old.doc_comment, old.name_tokens, old.return_type, old.param_types);
183-
INSERT INTO nodes_fts(rowid, name, qualified_name, code_content, context_string, doc_comment, name_tokens, return_type, param_types)
184-
VALUES (new.id, new.name, new.qualified_name, new.code_content, new.context_string, new.doc_comment, new.name_tokens, new.return_type, new.param_types);
185-
END;"
175+
);"
186176
)?;
177+
conn.execute_batch(fts5_triggers_sql())?;
187178

188179
conn.execute_batch("INSERT INTO nodes_fts(nodes_fts) VALUES('rebuild');")?;
189180

0 commit comments

Comments
 (0)