Skip to content

Commit f21d1b4

Browse files
committed
feat: add coreference resolution, fuzzy entity dedup, ctxgraph watch --git (v0.4)
1 parent 16d35b0 commit f21d1b4

File tree

16 files changed

+857
-23
lines changed

16 files changed

+857
-23
lines changed

Cargo.lock

Lines changed: 6 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ resolver = "2"
33
members = ["crates/*"]
44

55
[workspace.package]
6-
version = "0.3.0"
6+
version = "0.4.0"
77
edition = "2024"
88
license = "MIT"
99
repository = "https://github.com/rohansx/ctxgraph"
@@ -23,3 +23,4 @@ gline-rs = "1.0"
2323
ort = "=2.0.0-rc.9"
2424
ndarray = "0.16"
2525
fastembed = "4"
26+
strsim = "0.11"

crates/ctxgraph-cli/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ name = "ctxgraph"
1111
path = "src/main.rs"
1212

1313
[dependencies]
14-
ctxgraph = { version = "0.3.0", path = "../ctxgraph-core" }
14+
ctxgraph = { version = "0.4.0", path = "../ctxgraph-core" }
1515
clap = { workspace = true }
1616
serde_json = { workspace = true }
1717
chrono = { workspace = true }

crates/ctxgraph-cli/src/commands/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ pub mod init;
44
pub mod log;
55
pub mod query;
66
pub mod stats;
7+
pub mod watch;
78

89
use std::env;
910
use std::path::PathBuf;
Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
use std::path::{Path, PathBuf};
2+
use std::process::Command;
3+
4+
use colored::Colorize;
5+
use ctxgraph::{Episode, Graph};
6+
7+
use super::open_graph;
8+
9+
/// A parsed git commit.
10+
#[derive(Debug, Clone)]
11+
pub struct GitCommit {
12+
pub hash: String,
13+
pub author_name: String,
14+
pub author_email: String,
15+
pub committed_at: String, // ISO 8601
16+
pub subject: String,
17+
pub body: String,
18+
}
19+
20+
/// Parse the raw output of our git log format into GitCommit list.
21+
pub(crate) fn parse_git_log_output(output: &str) -> Vec<GitCommit> {
22+
let mut commits = Vec::new();
23+
24+
// Split on our sentinel — the first element before any sentinel will be empty or whitespace
25+
for block in output.split("---CTXGRAPH---") {
26+
let block = block.trim();
27+
if block.is_empty() {
28+
continue;
29+
}
30+
31+
let lines: Vec<&str> = block.lines().collect();
32+
33+
// Need at least 5 lines: hash, author_name, author_email, committed_at, subject
34+
if lines.len() < 5 {
35+
continue;
36+
}
37+
38+
let hash = lines[0].trim().to_string();
39+
if hash.is_empty() {
40+
continue;
41+
}
42+
43+
let author_name = lines[1].trim().to_string();
44+
let author_email = lines[2].trim().to_string();
45+
let committed_at = lines[3].trim().to_string();
46+
let subject = lines[4].trim().to_string();
47+
48+
// Body is everything from line 5 onward (may be empty)
49+
let body = if lines.len() > 5 {
50+
lines[5..].join("\n").trim().to_string()
51+
} else {
52+
String::new()
53+
};
54+
55+
commits.push(GitCommit {
56+
hash,
57+
author_name,
58+
author_email,
59+
committed_at,
60+
subject,
61+
body,
62+
});
63+
}
64+
65+
commits
66+
}
67+
68+
/// Run `git log` and parse commits.
69+
pub fn get_git_commits(
70+
repo: &Path,
71+
last: usize,
72+
since: Option<&str>,
73+
) -> Result<Vec<GitCommit>, String> {
74+
let format = "--format=---CTXGRAPH---%n%H%n%an%n%ae%n%ai%n%s%n%b";
75+
76+
let mut cmd = Command::new("git");
77+
cmd.current_dir(repo)
78+
.arg("log")
79+
.arg(format)
80+
.arg(format!("-n{last}"));
81+
82+
if let Some(since_date) = since {
83+
cmd.arg(format!("--since={since_date}"));
84+
}
85+
86+
let output = cmd.output().map_err(|e| {
87+
format!("Failed to run git: {e}. Is git installed?")
88+
})?;
89+
90+
if !output.status.success() {
91+
let stderr = String::from_utf8_lossy(&output.stderr);
92+
return Err(format!(
93+
"git log failed: {stderr}. Is this a git repository?"
94+
));
95+
}
96+
97+
let stdout = String::from_utf8_lossy(&output.stdout);
98+
Ok(parse_git_log_output(&stdout))
99+
}
100+
101+
/// Import git commits into the graph.
102+
pub fn run_watch(
103+
graph: &Graph,
104+
last: usize,
105+
since: Option<&str>,
106+
repo: Option<&PathBuf>,
107+
install_hook: bool,
108+
) -> Result<(), String> {
109+
let default_repo = std::env::current_dir()
110+
.map_err(|e| format!("Cannot determine current directory: {e}"))?;
111+
let repo_path = repo.map(|p| p.as_path()).unwrap_or(default_repo.as_path());
112+
113+
if install_hook {
114+
install_post_commit_hook(repo_path)?;
115+
}
116+
117+
println!("Scanning git log (last {} commits)...", last);
118+
119+
let commits = get_git_commits(repo_path, last, since)
120+
.map_err(|e| e)?;
121+
122+
let mut imported = 0usize;
123+
let mut skipped = 0usize;
124+
125+
for commit in &commits {
126+
// Check if already ingested
127+
let already_exists = graph
128+
.has_episode_by_git_hash(&commit.hash)
129+
.map_err(|e| format!("Database error: {e}"))?;
130+
131+
let short_hash = &commit.hash[..commit.hash.len().min(7)];
132+
133+
if already_exists {
134+
println!(
135+
" {} {} \"{}\" — already ingested, skipping",
136+
"→".yellow(),
137+
short_hash,
138+
truncate(&commit.subject, 60)
139+
);
140+
skipped += 1;
141+
continue;
142+
}
143+
144+
// Build episode content
145+
let content = if commit.body.trim().is_empty() {
146+
commit.subject.clone()
147+
} else {
148+
format!("{}\n\n{}", commit.subject, commit.body.trim())
149+
};
150+
151+
let episode = Episode::builder(&content)
152+
.source("git")
153+
.meta("commit_hash", commit.hash.as_str())
154+
.meta("author", commit.author_name.as_str())
155+
.meta("email", commit.author_email.as_str())
156+
.meta("committed_at", commit.committed_at.as_str())
157+
.build();
158+
159+
graph
160+
.add_episode(episode)
161+
.map_err(|e| format!("Failed to insert episode: {e}"))?;
162+
163+
println!(
164+
" {} {} \"{}\" — imported",
165+
"✓".green(),
166+
short_hash,
167+
truncate(&commit.subject, 60)
168+
);
169+
imported += 1;
170+
}
171+
172+
println!();
173+
println!(
174+
"Imported {} new commit{}. {} already existed.",
175+
imported,
176+
if imported == 1 { "" } else { "s" },
177+
skipped
178+
);
179+
180+
Ok(())
181+
}
182+
183+
/// Install a post-commit hook in the given git repo.
184+
pub fn install_post_commit_hook(repo: &Path) -> Result<(), String> {
185+
let hooks_dir = repo.join(".git/hooks");
186+
if !hooks_dir.exists() {
187+
return Err(
188+
"No .git/hooks directory found — is this a git repo?".to_string(),
189+
);
190+
}
191+
192+
let hook_path = hooks_dir.join("post-commit");
193+
let content = "#!/bin/sh\nctxgraph watch --last 1\n";
194+
195+
std::fs::write(&hook_path, content)
196+
.map_err(|e| format!("Failed to write hook: {e}"))?;
197+
198+
#[cfg(unix)]
199+
{
200+
use std::os::unix::fs::PermissionsExt;
201+
let mut perms = std::fs::metadata(&hook_path)
202+
.map_err(|e| format!("Failed to read hook metadata: {e}"))?
203+
.permissions();
204+
perms.set_mode(0o755);
205+
std::fs::set_permissions(&hook_path, perms)
206+
.map_err(|e| format!("Failed to set hook permissions: {e}"))?;
207+
}
208+
209+
println!("Installed post-commit hook: {}", hook_path.display());
210+
Ok(())
211+
}
212+
213+
/// Top-level handler called from main.rs — opens the graph and runs watch.
214+
pub fn run_watch_command(
215+
last: usize,
216+
since: Option<String>,
217+
repo: Option<PathBuf>,
218+
install_hook: bool,
219+
) -> ctxgraph::Result<()> {
220+
let repo_path = repo
221+
.clone()
222+
.unwrap_or_else(|| std::env::current_dir().unwrap());
223+
224+
if install_hook {
225+
install_post_commit_hook(&repo_path)
226+
.map_err(|e| ctxgraph::CtxGraphError::InvalidInput(e))?;
227+
}
228+
229+
let graph = open_graph()?;
230+
run_watch(&graph, last, since.as_deref(), Some(&repo_path), false)
231+
.map_err(|e| ctxgraph::CtxGraphError::InvalidInput(e))
232+
}
233+
234+
fn truncate(s: &str, max_chars: usize) -> String {
235+
if s.chars().count() <= max_chars {
236+
s.to_string()
237+
} else {
238+
let truncated: String = s.chars().take(max_chars - 1).collect();
239+
format!("{truncated}…")
240+
}
241+
}

crates/ctxgraph-cli/src/main.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,25 @@ enum Commands {
6666

6767
/// Show graph statistics
6868
Stats,
69+
70+
/// Auto-capture git commits as episodes
71+
Watch {
72+
/// Import the last N commits
73+
#[arg(long, default_value = "10")]
74+
last: usize,
75+
76+
/// Only import commits since this date (ISO 8601)
77+
#[arg(long)]
78+
since: Option<String>,
79+
80+
/// Path to git repository (default: current directory)
81+
#[arg(long)]
82+
repo: Option<std::path::PathBuf>,
83+
84+
/// Install as .git/hooks/post-commit hook
85+
#[arg(long)]
86+
install_hook: bool,
87+
},
6988
}
7089

7190
#[derive(Subcommand)]
@@ -139,6 +158,12 @@ fn main() {
139158
DecisionsAction::Show { id } => commands::decisions::show(id),
140159
},
141160
Commands::Stats => commands::stats::run(),
161+
Commands::Watch {
162+
last,
163+
since,
164+
repo,
165+
install_hook,
166+
} => commands::watch::run_watch_command(last, since, repo, install_hook),
142167
};
143168

144169
if let Err(e) = result {

0 commit comments

Comments
 (0)