From 7c6ccd99f0842302c5e1923aaacb3cb1273d7d4a Mon Sep 17 00:00:00 2001 From: dian-lun-lin Date: Tue, 14 Apr 2026 15:06:34 -0700 Subject: [PATCH 01/18] Add on-disk graph index compaction algorithm Introduce OnDiskGraphIndexCompactor and PQRetrainer for streaming N:1 merging of on-disk HNSW indexes without full in-memory materialization. Supports deletion filtering via live-node bitsets, custom ordinal mapping, and PQ codebook retraining. --- docs/compaction.md | 199 +++ .../jvector/graph/GraphIndexBuilder.java | 30 +- .../jbellis/jvector/graph/GraphSearcher.java | 8 +- .../graph/disk/OnDiskGraphIndexCompactor.java | 1459 +++++++++++++++++ .../jvector/graph/disk/OrdinalMapper.java | 33 + .../jvector/graph/disk/PQRetrainer.java | 233 +++ .../jvector/quantization/PQVectors.java | 2 +- 7 files changed, 1954 insertions(+), 10 deletions(-) create mode 100644 docs/compaction.md create mode 100644 jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java create mode 100644 jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/PQRetrainer.java diff --git a/docs/compaction.md b/docs/compaction.md new file mode 100644 index 000000000..068c22fdf --- /dev/null +++ b/docs/compaction.md @@ -0,0 +1,199 @@ +# Graph Index Compaction + +`OnDiskGraphIndexCompactor` merges multiple on-disk HNSW graph indexes into a single compacted index. This is useful in write-heavy workloads where data is continuously ingested into small segment indexes that accumulate over time; periodically compacting those segments into one larger index improves search throughput and recall without rebuilding from scratch. + +## Overview + +``` +source[0].index ─┐ +source[1].index ─┤──► OnDiskGraphIndexCompactor ──► compacted.index +source[N].index ─┘ +``` + +Each source is an `OnDiskGraphIndex` with an associated `FixedBitSet` marking which of its nodes are live (not deleted). The compactor merges all live nodes into a single graph, remaps ordinals so the output is contiguously numbered, and optionally retrains the Product Quantization codebook for the combined dataset. + +## Usage + +```java +List sources = List.of(index0, index1, index2); + +// Mark all nodes live (no deletions) +List liveNodes = sources.stream() + .map(s -> { var bs = new FixedBitSet(s.size()); bs.set(0, s.size()); return bs; }) + .collect(toList()); + +// Sequential ordinal remapping: source[s] node i → global offset[s] + i +int offset = 0; +List remappers = new ArrayList<>(); +for (var src : sources) { + remappers.add(new OrdinalMapper.OffsetMapper(offset, src.size())); + offset += src.size(); +} + +var compactor = new OnDiskGraphIndexCompactor( + sources, liveNodes, remappers, + VectorSimilarityFunction.COSINE, + /* executor= */ null // null = create internal ForkJoinPool +); + +compactor.compact(Path.of("compacted.index")); +``` + +### Handling Deleted Nodes + +Deleted nodes are excluded from the output by marking them as `false` in the corresponding `FixedBitSet`. + +```java +// Example: every 5th node is deleted +FixedBitSet live = new FixedBitSet(source.size()); +Map oldToNew = new HashMap<>(); +int newOrd = 0; +for (int i = 0; i < source.size(); i++) { + if (i % 5 != 0) { + live.set(i); + oldToNew.put(i, newOrd++); + } +} +remappers.add(new OrdinalMapper.MapMapper(oldToNew)); +``` + +## Algorithm + +### Ordinal Remapping + +Each source assigns its own local ordinals. The compactor maps them to a new global ordinal space using user-provided `OrdinalMapper`. + + +### PQ Retraining + +If the source indexes use FusedPQ, the compactor retrains the Product Quantization codebook on the combined dataset before writing the output. This is done by `PQRetrainer`, which +performs **balanced proportional sampling** across all sources (up to `ProductQuantization.MAX_PQ_TRAINING_SET_SIZE` vectors total, at least 1000 per source). + + +### Neighbor Selection (per node) + +For each live node at each graph level, the compactor gathers a candidate neighbor pool and then applies diversity selection: + +**1. Gather from same source** (`gatherFromSameSource`)\ +Iterate the node's existing neighbors in its source index. Filter out deleted nodes. Score each with the similarity function. No graph search — neighbors are already precomputed. + +**2. Gather from other sources** (`gatherFromOtherSource`)\ +Run a graph search in every other source index starting from that source's entry point. If FusedPQ is available, approximate PQ scoring is used during the search and top results are rescored exactly. + +- *Level 0*: a full hierarchical graph search is used (`GraphSearcher.search()`), descending from the entry node down to level 0. +- *Level L > 0*: the compactor first descends greedily from the source's entry node through each level above L (one `searchOneLayer` call with topK=1 per level, feeding the result into the next via `setEntryPointsFromPreviousLayer()`), then performs the full beam search at level L. This mirrors standard HNSW construction and gives a much better starting point than jumping directly to level L from the global entry node. + +``` +searchTopK = max(2, ceil(degree / numSources) * 2) +beamWidth = max(degree, searchTopK) * 2 +``` + +**3. Diversity selection** (Vamana-style)\ +Candidates are sorted by score (descending). The compactor selects up to `maxDegree` diverse neighbors using an adaptive alpha: + +``` +for alpha in [1.0, 1.2]: + for each candidate c (highest score first): + if c is already selected: skip + if ∀ selected neighbor j: similarity(c, j) ≤ score(c) × alpha: + select c + if |selected| == maxDegree: stop +``` + +### Hierarchical Levels + +Level 0 (base layer) stores inline vectors, FusedPQ codes, and the neighbor list. Upper levels store only the neighbor list (plus PQ codes at level 1 for cross-level searching). + +Processing is batched per source and run in parallel across sources using a `ForkJoinPool`. A backpressure window keeps at most `taskWindowSize` batches in-flight at once, bounding memory use. + +### Entry Node + +The entry node of the compacted graph is: +1. The original entry node of `sources[0]`, if it is live. +2. Otherwise, the first live node found by scanning all sources in order. + +## Benchmarking + +Use `CompactorBenchmark` (in `benchmarks-jmh`) to measure compaction performance. See `benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md` for full instructions. + +### Default: partition and compact in one run + +```bash +java -Xmx220g --add-modules jdk.incubator.vector \ + -jar benchmarks-jmh/target/benchmarks-jmh-*.jar CompactorBenchmark \ + -p workloadMode=PARTITION_AND_COMPACT \ + -p datasetNames= \ + -p numPartitions=4 \ + -p splitDistribution=FIBONACCI \ + -p indexPrecision=FUSEDPQ \ + -wi 0 -i 1 -f 1 +``` + +### Measuring peak heap during compaction + +To measure how little RAM compaction actually needs — without the dataset occupying heap — run the two steps separately. + +**Step 1: build partitions** (dataset in memory, large heap required) + +```bash +java -Xmx220g --add-modules jdk.incubator.vector \ + -jar benchmarks-jmh/target/benchmarks-jmh-*.jar CompactorBenchmark \ + -p workloadMode=PARTITION_ONLY \ + -p datasetNames= \ + -p numPartitions=4 \ + -p splitDistribution=FIBONACCI \ + -p indexPrecision=FUSEDPQ \ + -wi 0 -i 1 -f 1 +``` + +**Step 2: compact only** (dataset not loaded; use a small heap to prove low-memory operation) + +```bash +java -Xmx5g --add-modules jdk.incubator.vector \ + -jar benchmarks-jmh/target/benchmarks-jmh-*.jar CompactorBenchmark \ + -p workloadMode=COMPACT_ONLY \ + -p datasetNames= \ + -p numPartitions=4 \ + -p splitDistribution=FIBONACCI \ + -p indexPrecision=FUSEDPQ \ + -wi 0 -i 1 -f 1 +``` + +`COMPACT_ONLY` skips dataset loading entirely, so `-Xmx5g` is sufficient even for large datasets. This lets you confirm that the compactor itself — not the dataset — is the memory bottleneck. + +Key `workloadMode` values: + +| Mode | Description | +|---|---| +| `PARTITION_AND_COMPACT` | **(default)** Build partitions, compact them, then measure recall | +| `PARTITION_ONLY` | Build N partition indexes and exit; use before `COMPACT_ONLY` | +| `COMPACT_ONLY` | Compact existing partitions without loading the dataset; `durationMs` = `compact()` time | +| `BUILD_FROM_SCRATCH` | Build one index over the full dataset; `durationMs` = `build()` time | + +Results are written as JSONL to `target/benchmark-results/compactor-*/compactor-results.jsonl`. The `durationMs` field records only the target function time (not dataset loading or JVM startup). + +## Recall + + +The table compares building from scratch with compaction under the following configurations (results averaged over three runs): + +- Build from scratch: build with PQ; search using FusedPQ with FP reranking. +- Compaction: build source partitions with PQ; compact using FusedPQ with FP rescoring; search using FusedPQ with FP reranking. + +| Dataset | Dim | Build from Scratch | Compaction | Delta | +|----------------------|-----:|-------------------:|-----------:|-------:| +| cap-6M | 768 | 0.626 | 0.619 | -0.008 | +| cap-1M | 768 | 0.656 | 0.656 | 0.000 | +| gecko-100k | 768 | 0.690 | 0.701 | +0.011 | +| e5-small-v2-100k | 384 | 0.572 | 0.586 | +0.014 | +| ada002-1M | 1536 | 0.687 | 0.703 | +0.016 | +| e5-base-v2-100k | 768 | 0.676 | 0.692 | +0.016 | +| cohere-english-v3-10M | 1024 | 0.544 | 0.561 | +0.017 | +| e5-large-v2-100k | 1024 | 0.686 | 0.703 | +0.017 | +| ada002-100k | 1536 | 0.751 | 0.769 | +0.018 | +| cohere-english-v3-1M | 1024 | 0.593 | 0.612 | +0.019 | + +# Memory footprint + +All datasets above can be compacted under `COMPACT_ONLY` with `-Xmx5g`. In addition, compaction successfully scales to a dataset with 2560 dimensions and 10M vectors under the same memory constraint. + diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphIndexBuilder.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphIndexBuilder.java index 9e366676c..8135bba25 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphIndexBuilder.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphIndexBuilder.java @@ -25,10 +25,7 @@ import io.github.jbellis.jvector.graph.similarity.BuildScoreProvider; import io.github.jbellis.jvector.graph.similarity.ScoreFunction; import io.github.jbellis.jvector.graph.similarity.SearchScoreProvider; -import io.github.jbellis.jvector.util.Bits; -import io.github.jbellis.jvector.util.ExceptionUtils; -import io.github.jbellis.jvector.util.ExplicitThreadLocal; -import io.github.jbellis.jvector.util.PhysicalCoreExecutor; +import io.github.jbellis.jvector.util.*; import io.github.jbellis.jvector.vector.VectorSimilarityFunction; import io.github.jbellis.jvector.vector.types.VectorFloat; import org.slf4j.Logger; @@ -57,7 +54,7 @@ * Under most conditions this is not something you need to worry about, but it does mean * that spawning a new Thread per call is not advisable. This includes virtual threads. */ -public class GraphIndexBuilder implements Closeable { +public class GraphIndexBuilder implements Closeable, Accountable { private static final Logger logger = LoggerFactory.getLogger(GraphIndexBuilder.class); private final int beamWidth; @@ -848,6 +845,29 @@ public void close() throws IOException { } } + @Override + public long ramBytesUsed() { + int OH = RamUsageEstimator.NUM_BYTES_OBJECT_HEADER; + int REF = RamUsageEstimator.NUM_BYTES_OBJECT_REF; + + // Shallow size of this object: header + all fields + // Primitive fields: beamWidth(int), dimension(int), neighborOverflow(float), + // alpha(float), addHierarchy(boolean), refineFinalGraph(boolean) + // Reference fields: naturalScratch, concurrentScratch, graph, insertionsInProgress, + // scoreProvider, simdExecutor, parallelExecutor, searchers, rng + long size = OH + 9L * REF + Integer.BYTES * 2 + Float.BYTES * 2 + 2; + + // The graph is the dominant memory consumer + size += graph.ramBytesUsed(); + + // insertionsInProgress: ConcurrentSkipListSet — typically small during measurement, + // but account for object overhead plus per-entry cost + long inProgressEntrySize = OH + 2L * REF + Integer.BYTES + Integer.BYTES; // NodeAtLevel + skip list node + size += OH + REF + (long) insertionsInProgress.size() * inProgressEntrySize; + + return size; + } + private static class ExcludingBits implements Bits { private final int excluded; diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphSearcher.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphSearcher.java index 73cc5fbd5..4dd491913 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphSearcher.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphSearcher.java @@ -51,7 +51,7 @@ public class GraphSearcher implements Closeable { // Scratch data structures that are used in each {@link #searchInternal} call. These can be expensive // to allocate, so they're cleared and reused across calls. private final NodeQueue candidates; - final NodeQueue approximateResults; + public final NodeQueue approximateResults; private final NodeQueue rerankedResults; private final IntHashSet visited; private final NodesUnsorted evictedResults; @@ -307,7 +307,7 @@ public SearchResult search(SearchScoreProvider scoreProvider, return search(scoreProvider, topK, 0.0f, acceptOrds); } - void setEntryPointsFromPreviousLayer() { + public void setEntryPointsFromPreviousLayer() { // push the candidates seen so far back onto the queue for the next layer // at worst we save recomputing the similarity; at best we might connect to a more distant cluster approximateResults.foreach(candidates::push); @@ -316,7 +316,7 @@ void setEntryPointsFromPreviousLayer() { approximateResults.clear(); } - void initializeInternal(SearchScoreProvider scoreProvider, NodeAtLevel entry, Bits rawAcceptOrds) { + public void initializeInternal(SearchScoreProvider scoreProvider, NodeAtLevel entry, Bits rawAcceptOrds) { // save search parameters for potential later resume initializeScoreProvider(scoreProvider); this.acceptOrds = Bits.intersectionOf(rawAcceptOrds, view.liveNodes()); @@ -384,7 +384,7 @@ private boolean stopSearch(NodeQueue localCandidates, ScoreTracker scoreTracker, // incorrect and is discarded, and there is no reason to pass a rerankFloor parameter to resume(). // // Finally: resume() also drives the use of CachingReranker. - void searchOneLayer(SearchScoreProvider scoreProvider, + public void searchOneLayer(SearchScoreProvider scoreProvider, int rerankK, float threshold, int level, diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java new file mode 100644 index 000000000..04d6c7c1d --- /dev/null +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java @@ -0,0 +1,1459 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.jbellis.jvector.graph.disk; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.ByteOrder; +import java.nio.file.Path; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.StandardOpenOption; +import java.util.*; +import java.util.concurrent.*; +import java.util.stream.IntStream; + +import io.github.jbellis.jvector.disk.BufferedRandomAccessWriter; +import io.github.jbellis.jvector.disk.RandomAccessWriter; +import io.github.jbellis.jvector.disk.ByteBufferIndexWriter; +import io.github.jbellis.jvector.graph.*; +import io.github.jbellis.jvector.graph.disk.feature.Feature; +import io.github.jbellis.jvector.graph.disk.feature.FeatureId; +import io.github.jbellis.jvector.graph.disk.feature.InlineVectors; +import io.github.jbellis.jvector.graph.disk.feature.FusedPQ; +import io.github.jbellis.jvector.graph.similarity.DefaultSearchScoreProvider; +import io.github.jbellis.jvector.graph.similarity.SearchScoreProvider; +import io.github.jbellis.jvector.util.*; +import io.github.jbellis.jvector.vector.VectorSimilarityFunction; +import io.github.jbellis.jvector.graph.similarity.ScoreFunction; +import io.github.jbellis.jvector.vector.types.VectorFloat; +import io.github.jbellis.jvector.quantization.ProductQuantization; +import io.github.jbellis.jvector.vector.VectorizationProvider; +import io.github.jbellis.jvector.vector.types.VectorTypeSupport; +import io.github.jbellis.jvector.vector.types.ByteSequence; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static java.lang.Math.*; + +public final class OnDiskGraphIndexCompactor implements Accountable { + private static final VectorTypeSupport vectorTypeSupport = VectorizationProvider.getInstance().getVectorTypeSupport(); + private static final Logger log = LoggerFactory.getLogger(OnDiskGraphIndexCompactor.class); + + // Compaction constants + private static final float DIVERSITY_ALPHA_STEP = 0.2f; + private static final int BEAM_WIDTH_MULTIPLIER = 2; + private static final int TARGET_BATCHES_PER_SOURCE = 40; + private static final int TARGET_NODES_PER_BATCH = 128; + private static final int MIN_SEARCH_TOP_K = 2; + private static final int SEARCH_TOP_K_MULTIPLIER = 2; + + private final List sources; + private final List liveNodes; + private final List numLiveNodesPerSource; + private final List remappers; + private final List maxDegrees; + + private final int dimension; + private int maxOrdinal = -1; + private int numTotalNodes = 0; + private boolean ownsExecutor = false; + private final ForkJoinPool executor; + private final int taskWindowSize; + private final VectorSimilarityFunction similarityFunction; + + /** + * Constructs a new OnDiskGraphIndexCompactor to merge multiple graph indexes. + * Initializes thread pool, validates inputs, and prepares metadata for compaction. + */ + public OnDiskGraphIndexCompactor( + List sources, + List liveNodes, + List remappers, + VectorSimilarityFunction similarityFunction, + ForkJoinPool executor) { + checkBeforeCompact(sources, liveNodes, remappers); + + int threads = Runtime.getRuntime().availableProcessors(); + if (executor != null) { + this.executor = executor; + } else { + this.executor = new ForkJoinPool(threads); + this.ownsExecutor = true; + } + this.taskWindowSize = threads; + + this.sources = sources; + this.remappers = remappers; + this.liveNodes = liveNodes; + this.numLiveNodesPerSource = new ArrayList<>(this.sources.size()); + for (int s = 0; s < this.sources.size(); s++) { + int numLiveNodes = this.liveNodes.get(s).cardinality(); + this.numTotalNodes += numLiveNodes; + this.numLiveNodesPerSource.add(numLiveNodes); + } + + maxDegrees = this.sources.stream() + .max(Comparator.comparingInt(s -> s.maxDegrees().size())) + .orElseThrow() + .maxDegrees(); + dimension = this.sources.get(0).getDimension(); + for (var mapper : remappers) { + maxOrdinal = max(mapper.maxOrdinal(), maxOrdinal); + } + this.similarityFunction = similarityFunction; + } + + /** + * Validates that all source indexes have compatible configurations and required features + * before attempting compaction. Ensures consistent dimensions, max degrees, hierarchical + * settings, and feature sets across all sources. + */ + private void checkBeforeCompact( + List sources, + List liveNodes, + List remappers) { + validateInputSizes(sources, liveNodes, remappers); + validateLiveNodesBounds(sources, liveNodes); + validateGraphConfiguration(sources); + validateFeatures(sources); + } + + /** + * Validates that input lists have consistent sizes and are non-null. + */ + private void validateInputSizes(List sources, + List liveNodes, + List remappers) { + if (sources.size() < 2) { + throw new IllegalArgumentException("Must have at least two sources"); + } + Objects.requireNonNull(liveNodes, "liveNodes"); + Objects.requireNonNull(remappers, "remappers"); + + if (sources.size() != liveNodes.size()) { + throw new IllegalArgumentException("sources and liveNodes must have the same size"); + } + if (sources.size() != remappers.size()) { + throw new IllegalArgumentException("sources and remappers must have the same size"); + } + } + + /** + * Validates that liveNodes bitsets match the size of their corresponding sources. + */ + private void validateLiveNodesBounds(List sources, List liveNodes) { + for (int s = 0; s < sources.size(); ++s) { + if (liveNodes.get(s).length() != sources.get(s).size(0)) { + throw new IllegalArgumentException("source " + s + " out of bounds"); + } + } + } + + /** + * Validates that all sources have consistent graph configuration (dimensions, degrees, hierarchy). + */ + private void validateGraphConfiguration(List sources) { + int dimension = sources.get(0).getDimension(); + var refDegrees = sources.stream() + .max(Comparator.comparingInt(s -> s.maxDegrees().size())) + .orElseThrow() + .maxDegrees(); + var addHierarchy = sources.get(0).isHierarchical(); + + for (OnDiskGraphIndex source : sources) { + if (source.getDimension() != dimension) { + throw new IllegalArgumentException("sources must have the same dimension"); + } + int sharedLevels = Math.min(refDegrees.size(), source.maxDegrees().size()); + for (int d = 0; d < sharedLevels; d++) { + if (!Objects.equals(source.maxDegrees().get(d), refDegrees.get(d))) { + throw new IllegalArgumentException("sources must have the same max degrees"); + } + } + if (addHierarchy != source.isHierarchical()) { + throw new IllegalArgumentException("sources must have the same hierarchical setting"); + } + } + } + + /** + * Validates that all sources have compatible features for compaction. + */ + private void validateFeatures(List sources) { + Set refKeys = sources.get(0).getFeatures().keySet(); + boolean sameFeatures = sources.stream() + .skip(1) + .map(s -> s.getFeatures().keySet()) + .allMatch(refKeys::equals); + + if (!sameFeatures) { + throw new IllegalArgumentException("Each source must have the same features"); + } + if (!refKeys.contains(FeatureId.INLINE_VECTORS)) { + throw new IllegalArgumentException("Each source must have the INLINE_VECTORS feature"); + } + } + + /** + * Main compaction entry point. Merges all source indexes into a single output index at the + * specified path, handling PQ retraining if needed, and writing header, all layers, and footer. + */ + public void compact(Path outputPath) throws FileNotFoundException { + boolean fusedPQEnabled = hasFusedPQ(); + boolean compressedPrecision = fusedPQEnabled; + + ProductQuantization pq; + int pqLength; + if (fusedPQEnabled) { + pq = resolvePQFromSources(similarityFunction); + pqLength = pq.compressedVectorSize(); + } else { + pq = null; + pqLength = -1; + } + + List layerInfo = computeLayerInfoFromSources(); + int entryNode = resolveEntryNode(); + + log.info("Writing compacted graph : {} total nodes, maxOrdinal={}, dimension={}, degree={}", + numTotalNodes, maxOrdinal, dimension, maxDegrees.get(0)); + try (CompactWriter writer = new CompactWriter(outputPath, maxOrdinal, numTotalNodes, 0, layerInfo, entryNode, dimension, maxDegrees, pq, pqLength, fusedPQEnabled)) { + writer.writeHeader(); + compactLevels(writer, similarityFunction, fusedPQEnabled, compressedPrecision, pq); + writer.writeFooter(); + log.info("Compaction complete: {}", outputPath); + } catch (IOException | ExecutionException | InterruptedException e) { + throw new RuntimeException(e); + } finally { + if (ownsExecutor) executor.shutdown(); + } + } + + /** + * Resolves the entry node for the compacted graph. The chosen node must exist at maxLevel + * (since the on-disk format sets entryNode.level = maxLevel). Prefers the designated entry + * node of any source whose maxLevel equals the global maxLevel; if all such entry nodes + * are deleted, falls back to the first live node at maxLevel across all sources. + */ + private int resolveEntryNode() { + int maxLevel = sources.stream().mapToInt(OnDiskGraphIndex::getMaxLevel).max().orElse(0); + + // The on-disk format sets entryNode.level = layerInfo.size() - 1 (i.e. maxLevel). + // So the chosen node must actually have neighbors written at maxLevel — meaning it + // must exist at maxLevel in its source. Prefer the designated entry node of a + // maxLevel source; fall back to any live node that is at maxLevel. + for (int s = 0; s < sources.size(); s++) { + if (sources.get(s).getMaxLevel() == maxLevel) { + int originalEntry = sources.get(s).getView().entryNode().node; + if (liveNodes.get(s).get(originalEntry)) { + return remappers.get(s).oldToNew(originalEntry); + } + } + } + + // Entry nodes were all deleted: scan for any live node that exists at maxLevel. + for (int s = 0; s < sources.size(); s++) { + if (sources.get(s).getMaxLevel() < maxLevel) continue; + NodesIterator it = sources.get(s).getNodes(maxLevel); + while (it.hasNext()) { + int node = it.next(); + if (liveNodes.get(s).get(node)) { + return remappers.get(s).oldToNew(node); + } + } + } + + throw new IllegalStateException("No live nodes found at maxLevel=" + maxLevel); + } + + /** + * Compacts all hierarchical levels of the graph, processing each level in batches. + * For level 0 (base layer), writes inline vectors and neighbors. For upper layers, + * writes only graph structure and optional PQ codes. + */ + private void compactLevels(CompactWriter writer, + VectorSimilarityFunction similarityFunction, + boolean fusedPQEnabled, + boolean compressedPrecision, + ProductQuantization pq) + throws IOException, ExecutionException, InterruptedException { + + int maxUpperDegree = 0; + for (int level = 1; level < maxDegrees.size(); level++) { + maxUpperDegree = Math.max(maxUpperDegree, maxDegrees.get(level)); + } + + int baseSearchTopK = Math.max(MIN_SEARCH_TOP_K, ((maxDegrees.get(0) + sources.size() - 1) / sources.size()) * SEARCH_TOP_K_MULTIPLIER); + int baseMaxCandidateSize = baseSearchTopK * (sources.size() - 1) + maxDegrees.get(0); + int upperMaxPerSourceTopK = maxUpperDegree == 0 ? 0 : Math.max(MIN_SEARCH_TOP_K, ((maxUpperDegree + sources.size() - 1) / sources.size()) * SEARCH_TOP_K_MULTIPLIER); + int upperMaxCandidateSize = upperMaxPerSourceTopK * sources.size(); + int maxCandidateSize = Math.max(baseMaxCandidateSize, upperMaxCandidateSize); + int scratchDegree = Math.max(maxDegrees.get(0), Math.max(1, maxUpperDegree)); + final ThreadLocal threadLocalScratch = ThreadLocal.withInitial(() -> + new Scratch(maxCandidateSize, scratchDegree, dimension, sources, pq) + ); + + for (int level = 0; level < maxDegrees.size(); level++) { + List batches = buildBatches(level); + int searchTopK = Math.max(MIN_SEARCH_TOP_K, ((maxDegrees.get(level) + sources.size() - 1) / sources.size()) * SEARCH_TOP_K_MULTIPLIER); + int beamWidth = Math.max(maxDegrees.get(level), searchTopK) * BEAM_WIDTH_MULTIPLIER; + + CompactionParams params = new CompactionParams(fusedPQEnabled, compressedPrecision, searchTopK, beamWidth, pq); + + if (level == 0) { + log.info("Compacting level 0 (base layer)"); + + ExecutorCompletionService> ecs = + new ExecutorCompletionService<>(executor); + + java.util.function.Consumer submitOne = (bs) -> { + ecs.submit(() -> { + Scratch scratch = threadLocalScratch.get(); + return computeBaseBatch(writer, bs, scratch, params); + }); + }; + + var wropts = EnumSet.of(StandardOpenOption.WRITE, StandardOpenOption.READ); + try (FileChannel fc = FileChannel.open(writer.getOutputPath(), wropts)) { + + runBatchesWithBackpressure( + batches, + ecs, + submitOne, + (results) -> { + try { + for (WriteResult r : results) { + ByteBuffer b = r.data; + long pos = r.fileOffset; + while (b.hasRemaining()) { + int n = fc.write(b, pos); + pos += n; + } + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + ); + } + + writer.offsetAfterInline(); + + } else { + final int lvl = level; + log.info("Compacting upper layer {}", level); + + ExecutorCompletionService> ecs = + new ExecutorCompletionService<>(executor); + + java.util.function.Consumer submitOne = (bs) -> { + ecs.submit(() -> { + Scratch scratch = threadLocalScratch.get(); + return computeUpperBatchForLevel(bs, lvl, scratch, params); + }); + }; + + runBatchesWithBackpressure( + batches, + ecs, + submitOne, + (results) -> { + try { + for (UpperLayerWriteResult r : results) { + writer.writeUpperLayerNode( + lvl, + r.ordinal, + r.neighbors, + r.pqCode + ); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + ); + } + } + + Scratch s = threadLocalScratch.get(); + s.close(); + threadLocalScratch.remove(); + } + + /** + * Divides nodes at a given level across all source indexes into processing batches + * for parallel execution. Each batch contains a subset of nodes from one source. + */ + private List buildBatches(int level) { + List batches = new ArrayList<>(); + + for (int s = 0; s < sources.size(); ++s) { + var source = sources.get(s); + if (level > source.getMaxLevel()) continue; + NodesIterator sourceNodes = source.getNodes(level); + int numNodes = sourceNodes.size(); + int[] nodes = new int[numNodes]; + int i = 0; + while (sourceNodes.hasNext()) { + nodes[i++] = sourceNodes.next(); + } + + int numBatches = max(TARGET_BATCHES_PER_SOURCE, (numNodes + TARGET_NODES_PER_BATCH - 1) / TARGET_NODES_PER_BATCH); + if (numBatches > numNodes) numBatches = numNodes; + int batchSize = (numNodes + numBatches - 1) / numBatches; + for (int b = 0; b < numBatches; ++b) { + int start = min(numNodes, batchSize * b); + int end = min(numNodes, batchSize * (b + 1)); + batches.add(new BatchSpec(s, nodes, start, end)); + } + } + + return batches; + } + + /** + * Processes a batch of base layer (level 0) nodes from one source index. For each live node, + * gathers candidates from all sources, applies diversity selection, and creates write results + * containing the full node record data. + */ + private List computeBaseBatch(CompactWriter writer, + BatchSpec bs, + Scratch scratch, + CompactionParams params) throws IOException { + + List out = new ArrayList<>(bs.end - bs.start); + + for (int i = bs.start; i < bs.end; i++) { + int node = bs.nodes[i]; + if (!liveNodes.get(bs.sourceIdx).get(node)) continue; + + out.add(processBaseNode(node, bs.sourceIdx, scratch, writer, params)); + } + + return out; + } + + /** + * Processes a batch of upper layer nodes from one source index. Similar to base layer + * processing but returns only ordinal, neighbors, and optional PQ code (no inline vectors). + */ + private List computeUpperBatchForLevel( + BatchSpec bs, + int level, + Scratch scratch, + CompactionParams params + ) { + List results = + new ArrayList<>(bs.end - bs.start); + + for (int i = bs.start; i < bs.end; i++) { + int node = bs.nodes[i]; + + if (!liveNodes.get(bs.sourceIdx).get(node)) continue; + + results.add(processUpperNode(node, bs.sourceIdx, level, scratch, params)); + } + + return results; + } + + /** + * Processes a single base layer node: retrieves its vector, gathers diverse candidates from + * all sources, selects best neighbors using diversity criteria, remaps ordinals, and returns + * the complete write result for this node. + */ + private WriteResult processBaseNode( + int node, + int sourceIdx, + Scratch scratch, + CompactWriter writer, + CompactionParams params + ) throws IOException { + + var sourceView = (OnDiskGraphIndex.View) scratch.gs[sourceIdx].getView(); + sourceView.getVectorInto(node, scratch.baseVec, 0); + + int candSize = gatherCandidates(node, 0, sourceIdx, scratch, scratch.baseVec, params); + + int[] order = IntStream.range(0, candSize).toArray(); + sortOrderByScoreDesc(order, scratch.candScore, candSize); + + var selected = scratch.selectedCache; + + new CompactVamanaDiversityProvider(similarityFunction, 1.2f) + .retainDiverse( + scratch.candSrc, + scratch.candNode, + scratch.candScore, + order, + candSize, + maxDegrees.get(0), + selected, + scratch.tmpVec, + scratch.gs + ); + + // remap + for (int k = 0; k < selected.size; k++) { + selected.nodes[k] = + remappers.get(selected.sourceIdx[k]) + .oldToNew(selected.nodes[k]); + } + + int newOrdinal = remappers.get(sourceIdx).oldToNew(node); + + return writer.writeInlineNodeRecord( + newOrdinal, + scratch.baseVec, + selected, + scratch.pqCode + ); + } + + /** + * Processes a single upper layer node: similar to base layer processing but only returns + * graph structure (ordinal and neighbors) and optional PQ encoding for level 1. + */ + private UpperLayerWriteResult processUpperNode( + int node, + int sourceIdx, + int level, + Scratch scratch, + CompactionParams params + ) { + var sourceView = (OnDiskGraphIndex.View) scratch.gs[sourceIdx].getView(); + sourceView.getVectorInto(node, scratch.baseVec, 0); + + int candSize = gatherCandidates(node, level, sourceIdx, scratch, scratch.baseVec, params); + + int[] order = IntStream.range(0, candSize).toArray(); + sortOrderByScoreDesc(order, scratch.candScore, candSize); + + var selected = scratch.selectedCache; + + new CompactVamanaDiversityProvider(similarityFunction, 1.2f) + .retainDiverse( + scratch.candSrc, + scratch.candNode, + scratch.candScore, + order, + candSize, + maxDegrees.get(level), + selected, + scratch.tmpVec, + scratch.gs + ); + + // remap + for (int k = 0; k < selected.size; k++) { + selected.nodes[k] = + remappers.get(selected.sourceIdx[k]) + .oldToNew(selected.nodes[k]); + } + + int newOrdinal = remappers.get(sourceIdx).oldToNew(node); + + ByteSequence pqCode = maybeEncodePQ(level, scratch, params); + + return new UpperLayerWriteResult(newOrdinal, selected, pqCode); + } + + /** + * Encodes a vector using Product Quantization if enabled and the level is 1. + * Returns null otherwise. + */ + private ByteSequence maybeEncodePQ(int level, Scratch scratch, CompactionParams params) { + if (!params.fusedPQEnabled || level != 1) { + return null; + } + + scratch.pqCode.zero(); + params.pq.encodeTo(scratch.baseVec, scratch.pqCode); + return scratch.pqCode.copy(); + } + + /** + * Collects neighbor candidates for a node from all source indexes. For the source containing + * the node, uses existing neighbors; for other sources, performs graph search. Returns the + * total number of candidates gathered. + */ + private int gatherCandidates( + int node, + int level, + int sourceIdx, + Scratch scratch, + VectorFloat baseVec, + CompactionParams params + ) { + int candSize = 0; + + for (int ss = 0; ss < sources.size(); ss++) { + var searchView = (OnDiskGraphIndex.View) scratch.gs[ss].getView(); + var indexAlive = liveNodes.get(ss); + + if (ss == sourceIdx) { + candSize = gatherFromSameSource(node, level, ss, searchView, indexAlive, + baseVec, scratch, candSize); + } else { + candSize = gatherFromOtherSource(node, level, ss, searchView, indexAlive, + baseVec, scratch, candSize, params); + } + } + + return candSize; + } + + /** + * Gathers candidates from the same source index that contains the node. + * Simply iterates through existing neighbors. + */ + private int gatherFromSameSource(int node, int level, int sourceIdx, + OnDiskGraphIndex.View searchView, FixedBitSet indexAlive, + VectorFloat baseVec, Scratch scratch, int candSize) { + var it = searchView.getNeighborsIterator(level, node); + while (it.hasNext()) { + int nb = it.nextInt(); + if (!indexAlive.get(nb)) continue; + + searchView.getVectorInto(nb, scratch.tmpVec, 0); + + scratch.candSrc[candSize] = sourceIdx; + scratch.candNode[candSize] = nb; + scratch.candScore[candSize] = similarityFunction.compare(baseVec, scratch.tmpVec); + candSize++; + } + return candSize; + } + + /** + * Gathers candidates from a different source index via graph search. + */ + private int gatherFromOtherSource(int node, int level, int sourceIdx, + OnDiskGraphIndex.View searchView, FixedBitSet indexAlive, + VectorFloat baseVec, Scratch scratch, int candSize, + CompactionParams params) { + SearchScoreProvider ssp = buildCrossSourceScoreProvider( + params.compressedPrecision, + sources.get(sourceIdx), + searchView, + baseVec, + scratch.tmpVec, + similarityFunction + ); + + if (level == 0) { + SearchResult results = scratch.gs[sourceIdx].search( + ssp, params.searchTopK, params.beamWidth, 0f, 0f, indexAlive + ); + + for (var r : results.getNodes()) { + scratch.candSrc[candSize] = sourceIdx; + scratch.candNode[candSize] = r.node; + scratch.candScore[candSize] = + params.fusedPQEnabled + ? rescore(searchView, r.node, baseVec, scratch.tmpVec) + : r.score; + candSize++; + } + } else { + var entry = searchView.entryNode(); + if (level > entry.level) return candSize; + scratch.gs[sourceIdx].initializeInternal(ssp, entry, Bits.ALL); + + // Descend greedily through levels above the target level, so the search at + // `level` starts from the best-known region rather than the global entry node. + // This mirrors how GraphSearcher.searchInternal navigates the hierarchy. + for (int l = entry.level; l > level; l--) { + scratch.gs[sourceIdx].searchOneLayer(ssp, 1, 0f, l, Bits.ALL); + scratch.gs[sourceIdx].setEntryPointsFromPreviousLayer(); + } + + scratch.gs[sourceIdx].searchOneLayer( + ssp, params.searchTopK, 0f, level, indexAlive + ); + + int prev_candSize = candSize; + candSize = appendApproximateResults( + scratch.gs[sourceIdx].approximateResults, + sourceIdx, + scratch, + candSize + ); + + if (params.fusedPQEnabled) { + for (int i = prev_candSize; i < candSize; i++) { + scratch.candScore[i] = rescore( + searchView, + scratch.candNode[i], + baseVec, + scratch.tmpVec + ); + } + } + } + + return candSize; + } + + /** + * Recomputes exact similarity score between the base vector and a node's vector, + * used to refine approximate PQ-based search results. + */ + private float rescore(OnDiskGraphIndex.View view, + int node, + VectorFloat base, + VectorFloat tmp) { + view.getVectorInto(node, tmp, 0); + return similarityFunction.compare(base, tmp); + } + + /** + * Executes batches with controlled concurrency using a sliding window approach. Prevents + * overwhelming memory by limiting the number of in-flight tasks while maintaining high + * throughput via the completion service. + */ + private void runBatchesWithBackpressure( + List batches, + ExecutorCompletionService> ecs, + java.util.function.Consumer submitOne, + java.util.function.Consumer> onComplete + ) throws InterruptedException, ExecutionException { + + final int total = batches.size(); + int nextToSubmit = 0; + int inFlight = 0; + + // initial window + while (inFlight < taskWindowSize && nextToSubmit < total) { + submitOne.accept(batches.get(nextToSubmit++)); + inFlight++; + } + + int completed = 0; + while (completed < total) { + List results = ecs.take().get(); + onComplete.accept(results); + + completed++; + inFlight--; + + if (nextToSubmit < total) { + submitOne.accept(batches.get(nextToSubmit++)); + inFlight++; + } + if (completed % 10 == 0) { + log.info("Compaction I/O progress: {}/{} batches written to disk", completed, total); + } + } + } + + /** + * Appends search results from a NodeQueue to the candidate arrays, returning the updated + * candidate count. + */ + private int appendApproximateResults(NodeQueue queue, + int sourceIdx, + Scratch scratch, + int candSize) { + final int ss = sourceIdx; + final int[] idx = new int[] { candSize }; + + queue.foreach((nb, score) -> { + scratch.candSrc[idx[0]] = ss; + scratch.candNode[idx[0]] = nb; + scratch.candScore[idx[0]] = score; + idx[0]++; + }); + + return idx[0]; + } + + /** + * Computes layer metadata for the compacted graph by counting live nodes at each level + * across all source indexes. + */ + private List computeLayerInfoFromSources() { + int maxLevel = sources.stream().mapToInt(OnDiskGraphIndex::getMaxLevel).max().orElse(0); + List layerInfo = new ArrayList<>(maxLevel + 1); + for (int level = 0; level <= maxLevel; level++) { + int count = 0; + for (int s = 0; s < sources.size(); s++) { + if (level > sources.get(s).getMaxLevel()) continue; + NodesIterator it = sources.get(s).getNodes(level); + FixedBitSet alive = liveNodes.get(s); + while (it.hasNext()) { + int node = it.next(); + if (alive.get(node)) count++; + } + } + layerInfo.add(new CommonHeader.LayerInfo(count, maxDegrees.get(level))); + } + return layerInfo; + } + + /** + * Trains a new Product Quantization codebook using balanced sampling across all source + * indexes. This ensures the PQ is optimized for the combined dataset. + */ + private ProductQuantization resolvePQFromSources(VectorSimilarityFunction similarityFunction) { + PQRetrainer retrainer = new PQRetrainer(sources, liveNodes, dimension); + return retrainer.retrain(similarityFunction); + } + + /** + * Checks if the source indexes have FusedPQ feature enabled. + */ + private boolean hasFusedPQ() { + return sources.get(0).getFeatures().containsKey(FeatureId.FUSED_PQ); + } + + /** + * Creates a score provider for searching across different source indexes. Uses approximate + * PQ-based scoring if compressedPrecision is enabled, otherwise uses exact scoring. + */ + private SearchScoreProvider buildCrossSourceScoreProvider(boolean compressedPrecision, + OnDiskGraphIndex searchSource, + OnDiskGraphIndex.View searchView, + VectorFloat baseVec, + VectorFloat tmpVec, + VectorSimilarityFunction similarityFunction) { + if (compressedPrecision) { + ScoreFunction.ExactScoreFunction reranker = + node2 -> { + searchView.getVectorInto(node2, tmpVec, 0); + return similarityFunction.compare(baseVec, tmpVec); + }; + var asf = ((FusedPQ) searchSource.getFeatures().get(FeatureId.FUSED_PQ)).approximateScoreFunctionFor(baseVec, similarityFunction, searchView, reranker); + + return new DefaultSearchScoreProvider(asf); + } + + var sf = new ScoreFunction.ExactScoreFunction() { + @Override + public float similarityTo(int node2) { + searchView.getVectorInto(node2, tmpVec, 0); + return similarityFunction.compare(baseVec, tmpVec); + } + }; + return new DefaultSearchScoreProvider(sf); + } + + /** + * Estimates the RAM usage of this compactor instance. + * Accounts for data structures used during compaction including bitsets, remappers, + * executor overhead, and per-thread scratch space. + */ + @Override + public long ramBytesUsed() { + int OH = RamUsageEstimator.NUM_BYTES_OBJECT_HEADER; + int REF = RamUsageEstimator.NUM_BYTES_OBJECT_REF; + + // Shallow size of this object (header + fields) + // Current fields: sources, liveNodes, numLiveNodesPerSource, remappers, maxDegrees, + // dimension(int), maxOrdinal(int), numTotalNodes(int), + // ownsExecutor(boolean), executor, taskWindowSize(int), similarityFunction + long size = OH + 8L * REF + Integer.BYTES * 4 + 1; + + // liveNodes: FixedBitSet per source + for (var entry : liveNodes) { + size += entry.ramBytesUsed(); + } + + // numLiveNodesPerSource: ArrayList of Integers + size += OH + REF + (long) numLiveNodesPerSource.size() * (OH + Integer.BYTES); + + // remappers: each MapMapper holds an oldToNew HashMap and newToOld Int2IntHashMap + // Estimate based on the number of mappings + for (var mapper : remappers) { + // Object overhead + two maps with int key/value pairs + // HashMap entry: ~32 bytes each; Int2IntHashMap: ~16 bytes per entry + if (mapper instanceof OrdinalMapper.MapMapper) { + // rough estimate: the mapper stores two maps over all mapped ordinals + size += OH + (long) (maxOrdinal + 1) * 48; + } + } + + // maxDegrees: small list of integers + size += OH + REF + (long) maxDegrees.size() * (OH + Integer.BYTES); + + // executor: ForkJoinPool overhead (if owned) + // Estimate based on number of threads + int numThreads = ownsExecutor ? Runtime.getRuntime().availableProcessors() : taskWindowSize; + if (ownsExecutor) { + size += OH + REF; + } + + // Scratch space: ThreadLocal instances (one per active thread) + // Each Scratch contains: + // - candSrc, candNode, candScore arrays + // - SelectedVecCache (with its own arrays and vector copies) + // - tmpVec, baseVec (VectorFloat instances) + // - GraphSearcher array (one per source) + // - pqCode ByteSequence + size += estimateScratchSpacePerThread() * numThreads; + + return size; + } + + /** + * Estimates the RAM usage of a single Scratch instance. + */ + private long estimateScratchSpacePerThread() { + int OH = RamUsageEstimator.NUM_BYTES_OBJECT_HEADER; + int REF = RamUsageEstimator.NUM_BYTES_OBJECT_REF; + + // Calculate maxCandidateSize and maxDegree (same logic as in compactLevels) + int maxUpperDegree = 0; + for (int level = 1; level < maxDegrees.size(); level++) { + maxUpperDegree = Math.max(maxUpperDegree, maxDegrees.get(level)); + } + int baseSearchTopK = Math.max(MIN_SEARCH_TOP_K, ((maxDegrees.get(0) + sources.size() - 1) / sources.size()) * SEARCH_TOP_K_MULTIPLIER); + int baseMaxCandidateSize = baseSearchTopK * (sources.size() - 1) + maxDegrees.get(0); + int upperMaxPerSourceTopK = maxUpperDegree == 0 ? 0 : Math.max(MIN_SEARCH_TOP_K, ((maxUpperDegree + sources.size() - 1) / sources.size()) * SEARCH_TOP_K_MULTIPLIER); + int upperMaxCandidateSize = upperMaxPerSourceTopK * sources.size(); + int maxCandidateSize = Math.max(baseMaxCandidateSize, upperMaxCandidateSize); + int scratchDegree = Math.max(maxDegrees.get(0), Math.max(1, maxUpperDegree)); + + long scratchSize = OH + 6L * REF; + + // candSrc, candNode, candScore arrays + scratchSize += (long) maxCandidateSize * Integer.BYTES; // candSrc + scratchSize += (long) maxCandidateSize * Integer.BYTES; // candNode + scratchSize += (long) maxCandidateSize * Float.BYTES; // candScore + + // SelectedVecCache + scratchSize += OH + 5L * REF + Integer.BYTES; // SelectedVecCache object + scratchSize += (long) scratchDegree * Integer.BYTES; // sourceIdx array + scratchSize += (long) scratchDegree * REF; // views array + scratchSize += (long) scratchDegree * Integer.BYTES; // nodes array + scratchSize += (long) scratchDegree * Float.BYTES; // scores array + scratchSize += (long) scratchDegree * REF; // vecs array + scratchSize += (long) scratchDegree * (OH + dimension * Float.BYTES); // VectorFloat instances + + // tmpVec and baseVec + scratchSize += 2L * (OH + dimension * Float.BYTES); + + // GraphSearcher array (one per source) + scratchSize += (long) sources.size() * REF; + // Each GraphSearcher has internal state - rough estimate + scratchSize += (long) sources.size() * (OH + 10L * REF); + + // pqCode ByteSequence (if PQ enabled) + if (hasFusedPQ()) { + FusedPQ fpq = (FusedPQ) sources.get(0).getFeatures().get(FeatureId.FUSED_PQ); + int subspaceCount = fpq.getPQ().getSubspaceCount(); + scratchSize += OH + subspaceCount; // ByteSequence + } + + return scratchSize; + } + + /** + * Encapsulates common parameters used throughout the compaction process. + */ + private static final class CompactionParams { + final boolean fusedPQEnabled; + final boolean compressedPrecision; + final int searchTopK; + final int beamWidth; + final ProductQuantization pq; + + CompactionParams(boolean fusedPQEnabled, boolean compressedPrecision, + int searchTopK, int beamWidth, ProductQuantization pq) { + this.fusedPQEnabled = fusedPQEnabled; + this.compressedPrecision = compressedPrecision; + this.searchTopK = searchTopK; + this.beamWidth = beamWidth; + this.pq = pq; + } + } + + /** + * Sorts an index array by descending score values using quicksort. + */ + private static void sortOrderByScoreDesc(int[] order, float[] score, int size) { + quicksort(order, score, 0, size - 1); + } + + /** + * Tail-recursive quicksort implementation for sorting by score in descending order. + */ + private static void quicksort(int[] order, float[] score, int lo, int hi) { + while (lo < hi) { + int p = partition(order, score, lo, hi); + // recurse smaller side first (limits stack) + if (p - lo < hi - p) { + quicksort(order, score, lo, p - 1); + lo = p + 1; + } else { + quicksort(order, score, p + 1, hi); + hi = p - 1; + } + } + } + + /** + * Partitions the order array for quicksort using descending score comparison. + */ + private static int partition(int[] order, float[] score, int lo, int hi) { + float pivot = score[order[hi]]; + int i = lo; + for (int j = lo; j < hi; j++) { + if (score[order[j]] > pivot) { // DESC + int t = order[i]; + order[i] = order[j]; + order[j] = t; + i++; + } + } + int t = order[i]; + order[i] = order[hi]; + order[hi] = t; + return i; + } + + private static final class WriteResult { + final int newOrdinal; + final long fileOffset; + final ByteBuffer data; + + WriteResult(int newOrdinal, long fileOffset, ByteBuffer data) { + this.newOrdinal = newOrdinal; + this.fileOffset = fileOffset; + this.data = data; + } + }; + + private static final class UpperLayerWriteResult { + final int ordinal; + final int[] neighbors; + final ByteSequence pqCode; + + UpperLayerWriteResult(int ordinal, SelectedVecCache cache, ByteSequence pqCode) { + this.ordinal = ordinal; + this.neighbors = Arrays.copyOf(cache.nodes, cache.size); + this.pqCode = pqCode == null ? null : pqCode.copy(); + } + }; + + + /** + * Thread-local scratch space containing reusable buffers and search state for processing nodes. + */ + private static final class Scratch implements AutoCloseable { + + final int[] candSrc, candNode; + final float[] candScore; + final SelectedVecCache selectedCache; + final VectorFloat tmpVec, baseVec; + final GraphSearcher[] gs; + final ByteSequence pqCode; + + /** + * Constructs scratch space with buffers sized for the maximum expected candidates and degree. + */ + Scratch(int maxCandidateSize, int maxDegree, int dimension, List sources, ProductQuantization pq) { + this.candSrc = new int[maxCandidateSize]; + this.candNode = new int[maxCandidateSize]; + this.candScore = new float[maxCandidateSize]; + this.selectedCache = new SelectedVecCache(maxDegree, dimension); + this.tmpVec = vectorTypeSupport.createFloatVector(dimension); + this.baseVec = vectorTypeSupport.createFloatVector(dimension); + this.pqCode = (pq == null) ? null : vectorTypeSupport.createByteSequence(pq.getSubspaceCount()); + + this.gs = new GraphSearcher[sources.size()]; + for (int i = 0; i < sources.size(); i++) { + gs[i] = new GraphSearcher(sources.get(i)); + gs[i].usePruning(false); + } + } + + /** + * Closes all graph searchers and resets the cache. + */ + @Override + public void close() throws IOException { + for (var s : gs) s.close(); + selectedCache.reset(); + } + } + + /** + * Specification for a batch of nodes to be processed from one source index. + */ + private static final class BatchSpec { + final int sourceIdx; + final int[] nodes; // materialized node ids for this source + final int start; + final int end; + + BatchSpec(int sourceIdx, int[] nodes, int start, int end) { + this.sourceIdx = sourceIdx; + this.nodes = nodes; + this.start = start; + this.end = end; + } + } + + /** + * Provides Vamana-style diversity filtering for neighbor selection during compaction. + */ + private static final class CompactVamanaDiversityProvider { + /** + * the diversity threshold; 1.0 is equivalent to HNSW; Vamana uses 1.2 or more + */ + public final float alpha; + + /** + * used to compute diversity + */ + public final VectorSimilarityFunction vsf; + + /** + * Create a new diversity provider + */ + public CompactVamanaDiversityProvider(VectorSimilarityFunction vsf, float alpha) { + this.vsf = vsf; + this.alpha = alpha; + } + + /** + * Selects diverse neighbors from candidates using gradually increasing alpha threshold. + * Update `selected` with the diverse members of `neighbors`. `neighbors` is not modified + * It assumes that the i-th neighbor with 0 {@literal <=} i {@literal <} diverseBefore is already diverse. + */ + public void retainDiverse(int[] candSrc, int[] candNode, float[] candScore, int[] order, int orderSize, int maxDegree, SelectedVecCache selectedCache, VectorFloat tmp, GraphSearcher[] gs) { + selectedCache.reset(); + if (orderSize == 0) return; + int nSelected = 0; + + // add diverse candidates, gradually increasing alpha to the threshold + // (so that the nearest candidates are prioritized) + float currentAlpha = 1.0f; + while (currentAlpha <= alpha + 1E-6 && nSelected < maxDegree) { + for (int i = 0; i < orderSize && nSelected < maxDegree; i++) { + int ci = order[i]; + int cSrc = candSrc[ci]; + int cNode = candNode[ci]; + float cScore = candScore[ci]; + + OnDiskGraphIndex.View cView = (OnDiskGraphIndex.View) gs[cSrc].getView(); + cView.getVectorInto(cNode, tmp, 0); + if (isDiverse(cView, cNode, tmp, cScore, currentAlpha, selectedCache)) { + selectedCache.add(cSrc, cView, cNode, cScore, tmp); + nSelected++; + } + } + + currentAlpha += DIVERSITY_ALPHA_STEP; + } + } + + /** + * Checks if a candidate is diverse enough by ensuring it's closer to the base node + * than to any already-selected neighbor (scaled by alpha threshold). + */ + private boolean isDiverse(OnDiskGraphIndex.View cView, int cNode, VectorFloat cVec, float cScore, float alpha, SelectedVecCache selectedCache) { + for (int j = 0; j < selectedCache.size; j++) { + if (selectedCache.views[j] == cView && selectedCache.nodes[j] == cNode) { + return false; // already selected; don't add a duplicate + } + if (vsf.compare(cVec, selectedCache.vecs[j]) > cScore * alpha) { + return false; + } + } + return true; + } + + } + + /** + * Handles writing the compacted graph index to disk, managing header, node records, + * upper layers, and footer in the on-disk format. + */ + private static final class CompactWriter implements AutoCloseable { + + private static final int FOOTER_MAGIC = 0x4a564244; + private static final int FOOTER_OFFSET_SIZE = Long.BYTES; + private static final int FOOTER_MAGIC_SIZE = Integer.BYTES; + private static final int FOOTER_SIZE = FOOTER_MAGIC_SIZE + FOOTER_OFFSET_SIZE; + + private final RandomAccessWriter writer; + private final int recordSize; + private final long startOffset; + private final int headerSize; + private final Header header; + private final int version; + private final FusedPQ fusedPQFeature; + private final ProductQuantization pq; + private final int baseDegree; + private final int maxOrdinal; + private final ThreadLocal bufferPerThread; + private final ThreadLocal> zeroPQ; + private final boolean fusedPQEnabled; + private final Path outputPath; + private final List configuredLayerInfo; + private final List configuredLayerDegrees; + private final List level1FeatureRecords; + + /** + * Constructs a CompactWriter that will write the compacted index to the specified path. + */ + CompactWriter(Path outputPath, + int maxOrdinal, + int numBaseLayerNodes, + long startOffset, + List layerInfo, + int entryNode, + int dimension, + List layerDegrees, + ProductQuantization pq, + int pqLength, + boolean fusedPQEnabled) + throws IOException { + this.fusedPQEnabled = fusedPQEnabled; + this.version = OnDiskGraphIndex.CURRENT_VERSION; + this.outputPath = outputPath; + this.writer = new BufferedRandomAccessWriter(outputPath); + this.startOffset = startOffset; + this.configuredLayerInfo = new ArrayList<>(layerInfo); + this.configuredLayerDegrees = new ArrayList<>(layerDegrees); + this.baseDegree = layerDegrees.get(0); + this.pq = pq; + this.maxOrdinal = maxOrdinal; + this.level1FeatureRecords = new ArrayList<>(); + + Map featureMap = new LinkedHashMap<>(); + InlineVectors inlineVectorFeature = new InlineVectors(dimension); + featureMap.put(FeatureId.INLINE_VECTORS, inlineVectorFeature); + if (fusedPQEnabled) { + this.fusedPQFeature = new FusedPQ(Collections.max(layerDegrees), pq); + featureMap.put(FeatureId.FUSED_PQ, this.fusedPQFeature); + } else { + this.fusedPQFeature = null; + } + + int rsize = Integer.BYTES + inlineVectorFeature.featureSize() + Integer.BYTES + baseDegree * Integer.BYTES; + if (fusedPQEnabled) { + rsize += fusedPQFeature.featureSize(); + } + this.recordSize = rsize; + + this.configuredLayerInfo.set(0, new CommonHeader.LayerInfo(numBaseLayerNodes, baseDegree)); + var commonHeader = new CommonHeader(this.version, dimension, entryNode, this.configuredLayerInfo, this.maxOrdinal + 1); + this.header = new Header(commonHeader, featureMap); + this.headerSize = header.size(); + + this.bufferPerThread = ThreadLocal.withInitial(() -> { + ByteBuffer buffer = ByteBuffer.allocate(recordSize); + buffer.order(ByteOrder.BIG_ENDIAN); + return buffer; + }); + this.zeroPQ = ThreadLocal.withInitial(() -> { + var vec = vectorTypeSupport.createByteSequence(pqLength > 0 ? pqLength : 1); + vec.zero(); + return vec; + }); + } + + /** + * Writes the graph header at the start of the file. + */ + public void writeHeader() throws IOException { + writer.seek(startOffset); + header.write(writer); + assert writer.position() == startOffset + headerSize : String.format("%d != %d", writer.position(), startOffset + headerSize); + writer.flush(); + } + + /** + * Writes the footer containing upper layer features (if any), header copy, and magic number. + */ + void writeFooter() throws IOException { + if (fusedPQEnabled && version == 6 && !level1FeatureRecords.isEmpty()) { + for (UpperLayerFeatureRecord record : level1FeatureRecords) { + writer.writeInt(record.ordinal); + vectorTypeSupport.writeByteSequence(writer, record.pqCode); + } + } + long headerOffset = writer.position(); + header.write(writer); + writer.writeLong(headerOffset); + writer.writeInt(FOOTER_MAGIC); + final long expectedPosition = headerOffset + headerSize + FOOTER_SIZE; + assert writer.position() == expectedPosition : String.format("%d != %d", writer.position(), expectedPosition); + } + + /** + * Positions the writer after the inline (base layer) records section. + */ + public void offsetAfterInline() throws IOException { + long offset = startOffset + headerSize + (long) (maxOrdinal + 1) * recordSize; + writer.seek(offset); + } + + /** + * Returns the output file path. + */ + public Path getOutputPath() { + return outputPath; + } + + /** + * Writes an upper layer node's graph structure (ordinal and neighbors). + * Collects level 1 PQ codes for later writing in the footer. + */ + public void writeUpperLayerNode(int level, int ordinal, int[] neighbors, ByteSequence level1PqCode) throws IOException { + writer.writeInt(ordinal); + writer.writeInt(neighbors.length); + int degree = configuredLayerDegrees.get(level); + int n = 0; + for (; n < neighbors.length; n++) { + writer.writeInt(neighbors[n]); + } + for (; n < degree; n++) { + writer.writeInt(-1); + } + if (fusedPQEnabled && version == 6 && level == 1 && level1PqCode != null) { + level1FeatureRecords.add(new UpperLayerFeatureRecord(ordinal, level1PqCode.copy())); + } + } + + /** + * Flushes and closes the writer. + */ + public void close() throws IOException { + final var endOfGraphPosition = writer.position(); + writer.seek(endOfGraphPosition); + writer.flush(); + } + + /** + * Constructs and returns a write result for a base layer node containing the full record: + * ordinal, inline vector, PQ codes for neighbors, and neighbor list. + */ + public WriteResult writeInlineNodeRecord(int ordinal, VectorFloat vec, SelectedVecCache selectedCache, ByteSequence pqCode) throws IOException + { + var bwriter = new ByteBufferIndexWriter(bufferPerThread.get()); + + long fileOffset = startOffset + headerSize + (long) ordinal * recordSize; + bwriter.reset(); + bwriter.writeInt(ordinal); + + for(int i = 0; i < vec.length(); ++i) { + bwriter.writeFloat(vec.get(i)); + } + + // write fused PQ + // since we build a graph in a streaming way, + // we cannot use fusedPQfeature.writeInline + if (fusedPQEnabled) { + int k = 0; + for (; k < selectedCache.size; k++) { + pqCode.zero(); + pq.encodeTo(selectedCache.vecs[k], pqCode); + vectorTypeSupport.writeByteSequence(bwriter, pqCode); + } + for (; k < baseDegree; k++) { + vectorTypeSupport.writeByteSequence(bwriter, zeroPQ.get()); + } + } + + // write neighbors list + bwriter.writeInt(selectedCache.size); + int n = 0; + for (; n < selectedCache.size; n++) { + bwriter.writeInt(selectedCache.nodes[n]); + } + + // pad out to base layer degree + for (; n < baseDegree; n++) { + bwriter.writeInt(-1); + } + + if (bwriter.bytesWritten() != recordSize) { + throw new IllegalStateException( + String.format("Record size mismatch for ordinal %d: expected %d bytes, wrote %d bytes, base degree: %d", + ordinal, recordSize, bwriter.bytesWritten(), baseDegree)); + } + + ByteBuffer dataCopy = bwriter.cloneBuffer(); + + return new WriteResult(ordinal, fileOffset, dataCopy); + } + } + + private static final class UpperLayerFeatureRecord { + final int ordinal; + final ByteSequence pqCode; + + UpperLayerFeatureRecord(int ordinal, ByteSequence pqCode) { + this.ordinal = ordinal; + this.pqCode = pqCode; + } + } + + /** + * Cache for storing selected diverse neighbors along with their metadata and vector copies. + */ + private static final class SelectedVecCache { + int[] sourceIdx; + OnDiskGraphIndex.View[] views; + int[] nodes; + float[] scores; + VectorFloat[] vecs; + int size; + + /** + * Constructs a cache with the specified capacity and vector dimension. + */ + SelectedVecCache(int capacity, int dimension) { + sourceIdx = new int[capacity]; + views = new OnDiskGraphIndex.View[capacity]; + nodes = new int[capacity]; + scores = new float[capacity]; + vecs = new VectorFloat[capacity]; + for(int c = 0; c < capacity; ++c) { + vecs[c] = vectorTypeSupport.createFloatVector(dimension); + } + size = 0; + } + + /** + * Resets the cache for reuse. + */ + void reset() { + size = 0; + } + + /** + * Adds a selected neighbor to the cache, copying its vector. + */ + void add(int source, OnDiskGraphIndex.View view, int node, float score, VectorFloat vec) { + sourceIdx[size] = source; + views[size] = view; + nodes[size] = node; + scores[size] = score; + vecs[size].copyFrom(vec, 0, 0, vec.length()); + size++; + } + } + +} + diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OrdinalMapper.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OrdinalMapper.java index 526241eff..445255980 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OrdinalMapper.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OrdinalMapper.java @@ -106,4 +106,37 @@ public int newToOld(int newOrdinal) { return newToOld.get(newOrdinal); } } + + /** + * A mapper that applies a fixed offset to ordinals. + * Used for sequential mapping where local ordinal i maps to globalOffset + i. + */ + class OffsetMapper implements OrdinalMapper { + private final int offset; + private final int size; + + public OffsetMapper(int offset, int size) { + this.offset = offset; + this.size = size; + } + + @Override + public int maxOrdinal() { + return offset + size - 1; + } + + @Override + public int oldToNew(int oldOrdinal) { + return oldOrdinal + offset; + } + + @Override + public int newToOld(int newOrdinal) { + int oldOrdinal = newOrdinal - offset; + if (oldOrdinal < 0 || oldOrdinal >= size) { + return OMITTED; + } + return oldOrdinal; + } + } } diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/PQRetrainer.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/PQRetrainer.java new file mode 100644 index 000000000..a0438168e --- /dev/null +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/PQRetrainer.java @@ -0,0 +1,233 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.jbellis.jvector.graph.disk; + +import io.github.jbellis.jvector.graph.ListRandomAccessVectorValues; +import io.github.jbellis.jvector.graph.disk.feature.FeatureId; +import io.github.jbellis.jvector.graph.disk.feature.FusedPQ; +import io.github.jbellis.jvector.quantization.ProductQuantization; +import io.github.jbellis.jvector.util.DocIdSetIterator; +import io.github.jbellis.jvector.util.FixedBitSet; +import io.github.jbellis.jvector.vector.VectorizationProvider; +import io.github.jbellis.jvector.vector.VectorSimilarityFunction; +import io.github.jbellis.jvector.vector.types.VectorFloat; +import io.github.jbellis.jvector.vector.types.VectorTypeSupport; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.concurrent.ThreadLocalRandom; + +/** + * Handles Product Quantization retraining for graph index compaction. + * Performs balanced sampling across multiple source indexes and trains + * a new PQ codebook optimized for the combined dataset. + */ +public class PQRetrainer { + private static final Logger log = LoggerFactory.getLogger(PQRetrainer.class); + private static final VectorTypeSupport vectorTypeSupport = VectorizationProvider.getInstance().getVectorTypeSupport(); + private static final int MIN_SAMPLES_PER_SOURCE = 1000; + // Number of consecutive nodes to read per chunk before jumping to another location. + // Keeping reads sequential within each chunk lets the OS read-ahead cover them, + // avoiding the random I/O that would happen with per-node random sampling. + private static final int SAMPLE_CHUNK_SIZE = 32; + + private final List sources; + private final List liveNodes; + private final List numLiveNodesPerSource; + private final int dimension; + private final int numTotalNodes; + + public PQRetrainer(List sources, List liveNodes, int dimension) { + this.sources = sources; + this.liveNodes = liveNodes; + this.dimension = dimension; + + this.numLiveNodesPerSource = new ArrayList<>(sources.size()); + int total = 0; + for (int s = 0; s < sources.size(); s++) { + int numLiveNodes = liveNodes.get(s).cardinality(); + total += numLiveNodes; + this.numLiveNodesPerSource.add(numLiveNodes); + } + this.numTotalNodes = total; + } + + /** + * Trains a new Product Quantization codebook using balanced sampling across all source indexes. + * All sampled vectors are read into memory up front, so ProductQuantization.compute() itself + * performs no I/O. + */ + public ProductQuantization retrain(VectorSimilarityFunction similarityFunction) { + log.info("Training PQ using balanced sampling across sources"); + + List samples = sampleBalanced(ProductQuantization.MAX_PQ_TRAINING_SET_SIZE); + + // Sort by (source, node) so extractVectorsSequential reads each source's file + // in ascending order, enabling OS read-ahead instead of random page faults. + samples.sort(Comparator.comparingInt((SampleRef r) -> r.source).thenComparingInt(r -> r.node)); + + log.info("Collected {} training samples", samples.size()); + + // Extract vectors sequentially in sorted (source, node) order so disk reads are + // purely sequential and the OS read-ahead can cover them efficiently. We do this + // here rather than letting ProductQuantization.compute() drive the reads via its + // parallel stream, which would scatter page faults across a potentially very large + // file and cause I/O that scales with dataset size rather than sample count. + List> trainingVectors = extractVectorsSequential(samples); + var ravv = new ListRandomAccessVectorValues(trainingVectors, dimension); + + FusedPQ fpq = (FusedPQ) sources.get(0).getFeatures().get(FeatureId.FUSED_PQ); + ProductQuantization basePQ = fpq.getPQ(); + + boolean center = similarityFunction == VectorSimilarityFunction.EUCLIDEAN; + + return ProductQuantization.compute( + ravv, + basePQ.getSubspaceCount(), + basePQ.getClusterCount(), + center + ); + } + + /** + * Performs balanced sampling across all source indexes to ensure proportional representation. + * Guarantees minimum samples per source while respecting total sample budget. + */ + private List sampleBalanced(int totalSamples) { + // If total live nodes <= totalSamples, return ALL + if (numTotalNodes <= totalSamples) { + List all = new ArrayList<>(numTotalNodes); + + for (int s = 0; s < sources.size(); s++) { + FixedBitSet live = liveNodes.get(s); + + for (int node = live.nextSetBit(0); + node != DocIdSetIterator.NO_MORE_DOCS; + node = live.nextSetBit(node + 1)) { + all.add(new SampleRef(s, node)); + } + } + + return all; + } + + final int MIN_PER_SOURCE = Math.min(MIN_SAMPLES_PER_SOURCE, totalSamples / sources.size()); + + int[] quota = new int[sources.size()]; + int assigned = 0; + + // Proportional allocation + for (int s = 0; s < sources.size(); s++) { + quota[s] = Math.max( + MIN_PER_SOURCE, + (int) ((long) totalSamples * numLiveNodesPerSource.get(s) / numTotalNodes) + ); + assigned += quota[s]; + } + + // Normalize down + while (assigned > totalSamples) { + for (int s = 0; s < sources.size() && assigned > totalSamples; s++) { + if (quota[s] > MIN_PER_SOURCE) { + quota[s]--; + assigned--; + } + } + } + + // Normalize up + while (assigned < totalSamples) { + for (int s = 0; s < sources.size() && assigned < totalSamples; s++) { + quota[s]++; + assigned++; + } + } + + List samples = new ArrayList<>(totalSamples); + ThreadLocalRandom rand = ThreadLocalRandom.current(); + + for (int s = 0; s < sources.size(); s++) { + FixedBitSet live = liveNodes.get(s); + int max = live.length(); + int numChunks = (max + SAMPLE_CHUNK_SIZE - 1) / SAMPLE_CHUNK_SIZE; + + // Build a shuffled chunk order so samples are representative but + // each chunk is read sequentially to minimize page faults. + // Fisher-Yates shuffle + int[] chunkOrder = new int[numChunks]; + for (int i = 0; i < numChunks; i++) chunkOrder[i] = i; + for (int i = numChunks - 1; i > 0; i--) { + int j = rand.nextInt(i + 1); + int tmp = chunkOrder[i]; + chunkOrder[i] = chunkOrder[j]; + chunkOrder[j] = tmp; + } + + int count = 0; + outer: + for (int ci = 0; ci < numChunks; ci++) { + int start = chunkOrder[ci] * SAMPLE_CHUNK_SIZE; + int end = Math.min(max, start + SAMPLE_CHUNK_SIZE); + for (int node = start; node < end; node++) { + if (live.get(node)) { + samples.add(new SampleRef(s, node)); + if (++count >= quota[s]) break outer; + } + } + } + } + + return samples; + } + + /** + * Reads sampled vectors in the order provided. The caller must pre-sort {@code samples} + * by (source, node) so reads within each source are ascending, letting the OS read-ahead + * cover them efficiently. Each source's view is opened once and reused for all its samples. + */ + private List> extractVectorsSequential(List samples) { + OnDiskGraphIndex.View[] views = new OnDiskGraphIndex.View[sources.size()]; + for (int s = 0; s < sources.size(); s++) { + views[s] = (OnDiskGraphIndex.View) sources.get(s).getView(); + } + + List> vectors = new ArrayList<>(samples.size()); + VectorFloat tmp = vectorTypeSupport.createFloatVector(dimension); + for (SampleRef ref : samples) { + views[ref.source].getVectorInto(ref.node, tmp, 0); + vectors.add(tmp.copy()); + } + return vectors; + } + + /** + * Reference to a sampled vector from a specific source index. + */ + private static final class SampleRef { + final int source; + final int node; + + SampleRef(int source, int node) { + this.source = source; + this.node = node; + } + } + +} diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/quantization/PQVectors.java b/jvector-base/src/main/java/io/github/jbellis/jvector/quantization/PQVectors.java index 538632da0..42113b242 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/quantization/PQVectors.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/quantization/PQVectors.java @@ -444,7 +444,7 @@ public String toString() { * This is emulative of modern Java records, but keeps to J11 standards. * This class consolidates the layout calculations for PQ data into one place */ - static class PQLayout { + public static class PQLayout { /** * total number of vectors From 52e721717d760d232c50f8b33552cc5c54e5011e Mon Sep 17 00:00:00 2001 From: dian-lun-lin Date: Tue, 14 Apr 2026 15:06:34 -0700 Subject: [PATCH 02/18] Add compaction unit tests Tests for OnDiskGraphIndexCompactor covering basic compaction, deletions, ordinal remapping, multi-source merging, and FusedPQ compaction scenarios. --- .../disk/TestOnDiskGraphIndexCompactor.java | 774 ++++++++++++++++++ 1 file changed, 774 insertions(+) create mode 100644 jvector-tests/src/test/java/io/github/jbellis/jvector/graph/disk/TestOnDiskGraphIndexCompactor.java diff --git a/jvector-tests/src/test/java/io/github/jbellis/jvector/graph/disk/TestOnDiskGraphIndexCompactor.java b/jvector-tests/src/test/java/io/github/jbellis/jvector/graph/disk/TestOnDiskGraphIndexCompactor.java new file mode 100644 index 000000000..410b96d0e --- /dev/null +++ b/jvector-tests/src/test/java/io/github/jbellis/jvector/graph/disk/TestOnDiskGraphIndexCompactor.java @@ -0,0 +1,774 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.jbellis.jvector.graph.disk; + +import com.carrotsearch.randomizedtesting.RandomizedTest; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; +import io.github.jbellis.jvector.TestUtil; +import io.github.jbellis.jvector.disk.ReaderSupplier; +import io.github.jbellis.jvector.disk.ReaderSupplierFactory; +import io.github.jbellis.jvector.disk.SimpleMappedReader; +import io.github.jbellis.jvector.example.util.AccuracyMetrics; +import io.github.jbellis.jvector.graph.*; +import io.github.jbellis.jvector.graph.disk.feature.Feature; +import io.github.jbellis.jvector.graph.disk.feature.FeatureId; +import io.github.jbellis.jvector.graph.disk.feature.FusedPQ; +import io.github.jbellis.jvector.graph.disk.feature.InlineVectors; +import io.github.jbellis.jvector.graph.similarity.BuildScoreProvider; +import io.github.jbellis.jvector.graph.similarity.DefaultSearchScoreProvider; +import io.github.jbellis.jvector.graph.similarity.SearchScoreProvider; +import io.github.jbellis.jvector.quantization.PQVectors; +import io.github.jbellis.jvector.quantization.ProductQuantization; +import io.github.jbellis.jvector.util.Bits; +import io.github.jbellis.jvector.util.BoundedLongHeap; +import io.github.jbellis.jvector.util.FixedBitSet; +import io.github.jbellis.jvector.vector.VectorSimilarityFunction; +import io.github.jbellis.jvector.vector.VectorizationProvider; +import io.github.jbellis.jvector.vector.types.VectorFloat; +import io.github.jbellis.jvector.vector.types.VectorTypeSupport; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; +import java.util.concurrent.ForkJoinPool; +import java.util.function.IntFunction; + +import static io.github.jbellis.jvector.TestUtil.createRandomVectors; +import static io.github.jbellis.jvector.quantization.KMeansPlusPlusClusterer.UNWEIGHTED; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; + +@ThreadLeakScope(ThreadLeakScope.Scope.NONE) +public class TestOnDiskGraphIndexCompactor extends RandomizedTest { + private static final VectorTypeSupport vectorTypeSupport = VectorizationProvider.getInstance().getVectorTypeSupport(); + + private ImmutableGraphIndex golden; + private Path testDirectory; + List> allVecs = new ArrayList<>(); + int dimension = 32; + int numVectorsPerGraph = 256; + int numSources = 3; + int numQueries = 20; + VectorSimilarityFunction similarityFunction = VectorSimilarityFunction.COSINE; + RandomAccessVectorValues allravv; + private final ForkJoinPool simdExecutor = ForkJoinPool.commonPool(); + private final ForkJoinPool parallelExecutor = ForkJoinPool.commonPool(); + + @Before + public void setup() throws IOException { + testDirectory = Files.createTempDirectory("jvector_test"); + buildFusedPQ(); + buildGoldenPQ(); + } + + /** + * Builds source graphs with FusedPQ feature enabled. + * Uses random vectors with COSINE similarity. + */ + void buildFusedPQ() throws IOException { + for(int i = 0; i < numSources; ++i) { + List> vecs = createRandomVectors(numVectorsPerGraph, dimension); + + RandomAccessVectorValues ravv = new ListRandomAccessVectorValues(vecs, dimension); + ProductQuantization pq = ProductQuantization.compute(ravv, 8, 256, true, UNWEIGHTED, simdExecutor, parallelExecutor); + PQVectors pqv = (PQVectors) pq.encodeAll(ravv, simdExecutor); + var bsp = BuildScoreProvider.pqBuildScoreProvider(similarityFunction, pqv); + var builder = new GraphIndexBuilder(bsp, dimension, 16, 100, 1.2f, 1.2f, false, true, simdExecutor, parallelExecutor); + var graph = builder.getGraph(); + + var outputPath = testDirectory.resolve("test_graph_" + i); + Map> writeSuppliers = new EnumMap<>(FeatureId.class); + writeSuppliers.put(FeatureId.INLINE_VECTORS, ordinal -> new InlineVectors.State(ravv.getVector(ordinal))); + + var identityMapper = new OrdinalMapper.IdentityMapper(ravv.size() - 1); + var writerBuilder = new OnDiskGraphIndexWriter.Builder(graph, outputPath); + writerBuilder.withMapper(identityMapper); + writerBuilder.with(new InlineVectors(dimension)); + writerBuilder.with(new FusedPQ(graph.maxDegree(), pq)); + var writer = writerBuilder.build(); + + for (var node = 0; node < ravv.size(); node++) { + var stateMap = new EnumMap(FeatureId.class); + stateMap.put(FeatureId.INLINE_VECTORS, writeSuppliers.get(FeatureId.INLINE_VECTORS).apply(node)); + writer.writeInline(node, stateMap); + builder.addGraphNode(node, ravv.getVector(node)); + } + builder.cleanup(); + + writeSuppliers.put(FeatureId.FUSED_PQ, ordinal -> new FusedPQ.State(graph.getView(), pqv, ordinal)); + writer.write(writeSuppliers); + allVecs.addAll(vecs); + } + } + + /** + * Builds the golden graph from all vectors combined. + * This represents the ideal case of building from scratch. + */ + void buildGoldenPQ() throws IOException { + allravv = new ListRandomAccessVectorValues(allVecs, dimension); + + ProductQuantization pq = ProductQuantization.compute(allravv, 8, 256, true, UNWEIGHTED, simdExecutor, parallelExecutor); + PQVectors pqv = (PQVectors) pq.encodeAll(allravv, simdExecutor); + var bsp = BuildScoreProvider.pqBuildScoreProvider(similarityFunction, pqv); + var builder = new GraphIndexBuilder(bsp, dimension, 16, 100, 1.2f, 1.2f, false, true, simdExecutor, parallelExecutor); + for (var i = 0; i < allravv.size(); i++) { + builder.addGraphNode(i, allravv.getVector(i)); + } + builder.cleanup(); + golden = builder.getGraph(); + } + List searchFromAll(List> queries, int topK) { + List srs = new ArrayList<>(); + try (GraphSearcher searcher = new GraphSearcher(golden)) { + for(VectorFloat q: queries) { + var row = new ArrayList(); + SearchScoreProvider ssp = DefaultSearchScoreProvider.exact(q, similarityFunction, allravv); + SearchResult sr = searcher.search(ssp, topK, Bits.ALL); + srs.add(sr); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + return srs; + } + List> buildGT(List> queries, int topK) { + List> rows = new ArrayList<>(); + + for(int i = 0; i < queries.size(); ++i) { + NodeQueue expected = new NodeQueue(new BoundedLongHeap(topK), NodeQueue.Order.MIN_HEAP); + for (int j = 0; j < allVecs.size(); j++) { + expected.push(j, similarityFunction.compare(queries.get(i), allVecs.get(j))); + } + + var row = new ArrayList(); + for(int k = 0; k < topK; ++k) { + row.add(expected.pop()); + } + rows.add(row); + } + return rows; + } + + @After + public void tearDown() { + TestUtil.deleteQuietly(testDirectory); + } + + /** + * Builds a small source graph with InlineVectors only (no FusedPQ), using exact scoring. + * Returns the path to the written graph file. + */ + private Path buildSimpleSourceGraph(List> vecs, int dim, VectorSimilarityFunction vsf, String name) throws IOException { + RandomAccessVectorValues ravv = new ListRandomAccessVectorValues(vecs, dim); + var bsp = BuildScoreProvider.randomAccessScoreProvider(ravv, vsf); + var builder = new GraphIndexBuilder(bsp, dim, 4, 20, 1.2f, 1.2f, false, true, simdExecutor, parallelExecutor); + for (int i = 0; i < vecs.size(); i++) { + builder.addGraphNode(i, vecs.get(i)); + } + builder.cleanup(); + var graph = builder.getGraph(); + + var outputPath = testDirectory.resolve(name); + var identityMapper = new OrdinalMapper.IdentityMapper(vecs.size() - 1); + var writerBuilder = new OnDiskGraphIndexWriter.Builder(graph, outputPath); + writerBuilder.withMapper(identityMapper); + writerBuilder.with(new InlineVectors(dim)); + var writer = writerBuilder.build(); + + Map> writeSuppliers = new EnumMap<>(FeatureId.class); + writeSuppliers.put(FeatureId.INLINE_VECTORS, ordinal -> new InlineVectors.State(ravv.getVector(ordinal))); + + for (int node = 0; node < vecs.size(); node++) { + var stateMap = new EnumMap(FeatureId.class); + stateMap.put(FeatureId.INLINE_VECTORS, writeSuppliers.get(FeatureId.INLINE_VECTORS).apply(node)); + writer.writeInline(node, stateMap); + } + writer.write(writeSuppliers); + return outputPath; + } + + /** Creates a vector of the given dimension with value at index {@code hot} set to {@code val}, rest 0. */ + private VectorFloat makeVec(int dim, int hot, float val) { + VectorFloat v = vectorTypeSupport.createFloatVector(dim); + for (int d = 0; d < dim; d++) { + v.set(d, d == hot ? val : 0.0f); + } + return v; + } + + private void assertVecEquals(VectorFloat expected, VectorFloat actual, int ordinal) { + int dim = expected.length(); + assertEquals("dimension mismatch at ordinal " + ordinal, dim, actual.length()); + for (int d = 0; d < dim; d++) { + assertEquals(String.format("vector[%d] dim %d mismatch", ordinal, d), expected.get(d), actual.get(d), 0.0f); + } + } + + /** + * Tests that vectors are stored exactly at the expected global ordinals after compaction. + * Uses two small sources with simple, known float values and identity mapping. + */ + @Test + public void testExactVectorValuesAfterCompaction() throws Exception { + int dim = 4; + int n = 6; // nodes per source + VectorSimilarityFunction vsf = VectorSimilarityFunction.EUCLIDEAN; + + // Source 0: vectors with first dim varying by index + List> vecs0 = new ArrayList<>(); + for (int i = 0; i < n; i++) { + vecs0.add(makeVec(dim, 0, (float)(i + 1))); + } + // Source 1: vectors with second dim varying by index + List> vecs1 = new ArrayList<>(); + for (int i = 0; i < n; i++) { + vecs1.add(makeVec(dim, 1, (float)(i + 10))); + } + + Path path0 = buildSimpleSourceGraph(vecs0, dim, vsf, "simple_src_0"); + Path path1 = buildSimpleSourceGraph(vecs1, dim, vsf, "simple_src_1"); + + ReaderSupplier rs0 = ReaderSupplierFactory.open(path0); + ReaderSupplier rs1 = ReaderSupplierFactory.open(path1); + OnDiskGraphIndex g0 = OnDiskGraphIndex.load(rs0); + OnDiskGraphIndex g1 = OnDiskGraphIndex.load(rs1); + + // Identity remapping: source i -> global ordinals [i*n, (i+1)*n) + Map map0 = new HashMap<>(); + Map map1 = new HashMap<>(); + for (int i = 0; i < n; i++) { + map0.put(i, i); + map1.put(i, n + i); + } + + FixedBitSet live0 = new FixedBitSet(n); + live0.set(0, n); + FixedBitSet live1 = new FixedBitSet(n); + live1.set(0, n); + + var compactor = new OnDiskGraphIndexCompactor( + List.of(g0, g1), + List.of(live0, live1), + List.of(new OrdinalMapper.MapMapper(map0), new OrdinalMapper.MapMapper(map1)), + vsf, null); + + Path outPath = testDirectory.resolve("simple_compact_out"); + compactor.compact(outPath); + + ReaderSupplier rsOut = ReaderSupplierFactory.open(outPath); + OnDiskGraphIndex compacted = OnDiskGraphIndex.load(rsOut); + assertEquals(2 * n, compacted.size(0)); + + var view = compacted.getView(); + VectorFloat buf = vectorTypeSupport.createFloatVector(dim); + + // Source 0 vectors must be at ordinals 0..n-1 + for (int i = 0; i < n; i++) { + view.getVectorInto(i, buf, 0); + assertVecEquals(vecs0.get(i), buf, i); + } + // Source 1 vectors must be at ordinals n..2n-1 + for (int i = 0; i < n; i++) { + view.getVectorInto(n + i, buf, 0); + assertVecEquals(vecs1.get(i), buf, n + i); + } + } + + /** + * Tests that only live vectors appear after compaction, placed at the correct remapped ordinals. + * Deletes every other node from each source and verifies the compacted output exactly. + */ + @Test + public void testExactVectorValuesWithDeletions() throws Exception { + int dim = 4; + int n = 8; // nodes per source + VectorSimilarityFunction vsf = VectorSimilarityFunction.EUCLIDEAN; + + // Source 0: vectors [1,0,0,0] through [8,0,0,0] + List> vecs0 = new ArrayList<>(); + for (int i = 0; i < n; i++) { + vecs0.add(makeVec(dim, 0, (float)(i + 1))); + } + // Source 1: vectors [0,10,0,0] through [0,170,0,0] + List> vecs1 = new ArrayList<>(); + for (int i = 0; i < n; i++) { + vecs1.add(makeVec(dim, 1, (float)((i + 1) * 10))); + } + + Path path0 = buildSimpleSourceGraph(vecs0, dim, vsf, "del_src_0"); + Path path1 = buildSimpleSourceGraph(vecs1, dim, vsf, "del_src_1"); + + ReaderSupplier rs0 = ReaderSupplierFactory.open(path0); + ReaderSupplier rs1 = ReaderSupplierFactory.open(path1); + OnDiskGraphIndex g0 = OnDiskGraphIndex.load(rs0); + OnDiskGraphIndex g1 = OnDiskGraphIndex.load(rs1); + + // Keep only even-indexed nodes (0, 2, 4, 6) in both sources + FixedBitSet live0 = new FixedBitSet(n); + FixedBitSet live1 = new FixedBitSet(n); + Map map0 = new HashMap<>(); + Map map1 = new HashMap<>(); + int globalOrdinal = 0; + for (int i = 0; i < n; i++) { + if (i % 2 == 0) { + live0.set(i); + map0.put(i, globalOrdinal++); + } + } + for (int i = 0; i < n; i++) { + if (i % 2 == 0) { + live1.set(i); + map1.put(i, globalOrdinal++); + } + } + int expectedTotal = globalOrdinal; + + var compactor = new OnDiskGraphIndexCompactor( + List.of(g0, g1), + List.of(live0, live1), + List.of(new OrdinalMapper.MapMapper(map0), new OrdinalMapper.MapMapper(map1)), + vsf, null); + + Path outPath = testDirectory.resolve("del_compact_out"); + compactor.compact(outPath); + + ReaderSupplier rsOut = ReaderSupplierFactory.open(outPath); + OnDiskGraphIndex compacted = OnDiskGraphIndex.load(rsOut); + assertEquals(expectedTotal, compacted.size(0)); + + var view = compacted.getView(); + VectorFloat buf = vectorTypeSupport.createFloatVector(dim); + + // Verify source 0 live nodes at their mapped ordinals + for (int i = 0; i < n; i++) { + if (i % 2 == 0) { + int ord = map0.get(i); + view.getVectorInto(ord, buf, 0); + assertVecEquals(vecs0.get(i), buf, ord); + } + } + // Verify source 1 live nodes at their mapped ordinals + for (int i = 0; i < n; i++) { + if (i % 2 == 0) { + int ord = map1.get(i); + view.getVectorInto(ord, buf, 0); + assertVecEquals(vecs1.get(i), buf, ord); + } + } + } + + /** + * Tests that vectors end up at the correct ordinals when a non-sequential remapping is used. + * Source 0 is mapped in reverse order; source 1 is mapped in forward order. + * Verifies exact vector values at every remapped position. + */ + @Test + public void testExactVectorValuesWithCustomRemapping() throws Exception { + int dim = 4; + int n = 6; + VectorSimilarityFunction vsf = VectorSimilarityFunction.EUCLIDEAN; + + List> vecs0 = new ArrayList<>(); + for (int i = 0; i < n; i++) { + vecs0.add(makeVec(dim, 2, (float)(i + 1))); + } + List> vecs1 = new ArrayList<>(); + for (int i = 0; i < n; i++) { + vecs1.add(makeVec(dim, 3, (float)(i + 100))); + } + + Path path0 = buildSimpleSourceGraph(vecs0, dim, vsf, "remap_src_0"); + Path path1 = buildSimpleSourceGraph(vecs1, dim, vsf, "remap_src_1"); + + ReaderSupplier rs0 = ReaderSupplierFactory.open(path0); + ReaderSupplier rs1 = ReaderSupplierFactory.open(path1); + OnDiskGraphIndex g0 = OnDiskGraphIndex.load(rs0); + OnDiskGraphIndex g1 = OnDiskGraphIndex.load(rs1); + + // Source 0: reverse mapping (local 0 -> global n-1, local 1 -> global n-2, ...) + Map map0 = new HashMap<>(); + for (int i = 0; i < n; i++) { + map0.put(i, n - 1 - i); + } + // Source 1: forward mapping (local 0 -> global n, local 1 -> global n+1, ...) + Map map1 = new HashMap<>(); + for (int i = 0; i < n; i++) { + map1.put(i, n + i); + } + + FixedBitSet live0 = new FixedBitSet(n); + live0.set(0, n); + FixedBitSet live1 = new FixedBitSet(n); + live1.set(0, n); + + var compactor = new OnDiskGraphIndexCompactor( + List.of(g0, g1), + List.of(live0, live1), + List.of(new OrdinalMapper.MapMapper(map0), new OrdinalMapper.MapMapper(map1)), + vsf, null); + + Path outPath = testDirectory.resolve("remap_compact_out"); + compactor.compact(outPath); + + ReaderSupplier rsOut = ReaderSupplierFactory.open(outPath); + OnDiskGraphIndex compacted = OnDiskGraphIndex.load(rsOut); + assertEquals(2 * n, compacted.size(0)); + + var view = compacted.getView(); + VectorFloat buf = vectorTypeSupport.createFloatVector(dim); + + for (int i = 0; i < n; i++) { + int ord = map0.get(i); + view.getVectorInto(ord, buf, 0); + assertVecEquals(vecs0.get(i), buf, ord); + } + for (int i = 0; i < n; i++) { + int ord = map1.get(i); + view.getVectorInto(ord, buf, 0); + assertVecEquals(vecs1.get(i), buf, ord); + } + } + + /** + * Tests basic compaction: merging multiple graphs without deletions. + * Verifies that compacted graph recall is comparable to golden graph. + */ + @Test + public void testCompact() throws Exception { + List graphs = new ArrayList<>(); + List rss = new ArrayList<>(); + List liveNodes = new ArrayList<>(); + List remappers = new ArrayList<>(); + + // Load all source graphs + for(int i = 0; i < numSources; ++i) { + var outputPath = testDirectory.resolve("test_graph_" + i); + rss.add(ReaderSupplierFactory.open(outputPath.toAbsolutePath())); + var onDiskGraph = OnDiskGraphIndex.load(rss.get(i)); + graphs.add(onDiskGraph); + } + + // Create identity mapping and all nodes live + int globalOrdinal = 0; + for (int n = 0; n < numSources; n++) { + Map map = new HashMap<>(numVectorsPerGraph); + for (int i = 0; i < numVectorsPerGraph; i++) { + map.put(i, globalOrdinal++); + } + remappers.add(new OrdinalMapper.MapMapper(map)); + + var lives = new FixedBitSet(numVectorsPerGraph); + lives.set(0, numVectorsPerGraph); + liveNodes.add(lives); + } + + var compactor = new OnDiskGraphIndexCompactor(graphs, liveNodes, remappers, similarityFunction, null); + int topK = 10; + + // Select query vectors from the dataset + var outputPath = testDirectory.resolve("test_compact_graph_"); + List> queries = new ArrayList<>(); + for(int i = 0; i < numQueries; ++i) { + queries.add(allVecs.get(randomIntBetween(0, allVecs.size() - 1))); + } + + // Get golden results and ground truth + List goldenResults = searchFromAll(queries, topK); + List> groundTruth = buildGT(queries, topK); + + // Compact and test + compactor.compact(outputPath); + + ReaderSupplier rs = ReaderSupplierFactory.open(outputPath); + var compactGraph = OnDiskGraphIndex.load(rs); + + // Verify basic properties + assertEquals("Compacted graph should have all nodes", numSources * numVectorsPerGraph, compactGraph.size(0)); + + GraphSearcher searcher = new GraphSearcher(compactGraph); + List compactResults = new ArrayList<>(); + for(VectorFloat q: queries) { + SearchScoreProvider ssp = DefaultSearchScoreProvider.exact(q, similarityFunction, allravv); + compactResults.add(searcher.search(ssp, topK, Bits.ALL)); + } + + // Calculate recalls + double goldenRecall = AccuracyMetrics.recallFromSearchResults(groundTruth, goldenResults, topK, topK); + double compactRecall = AccuracyMetrics.recallFromSearchResults(groundTruth, compactResults, topK, topK); + + System.out.printf("Golden (built from scratch) Recall: %.4f%n", goldenRecall); + System.out.printf("Compacted Recall: %.4f%n", compactRecall); + System.out.printf("Recall difference: %.4f%n", Math.abs(goldenRecall - compactRecall)); + + // For random vectors with COSINE, both golden and compact should have similar recall + // The key is that they're comparable to each other, showing compaction preserves graph quality + double recallDifference = Math.abs(goldenRecall - compactRecall); + assertTrue(String.format("Compacted recall (%.4f) should be comparable to golden recall (%.4f), difference: %.4f", + compactRecall, goldenRecall, recallDifference), + recallDifference < 0.2); // Allow up to 20% difference for random vectors + + // Verify both are reasonable (not completely broken) + assertTrue(String.format("Golden recall should be at least 0.2, got %.4f", goldenRecall), + goldenRecall >= 0.2); + assertTrue(String.format("Compacted recall should be at least 0.2, got %.4f", compactRecall), + compactRecall >= 0.2); + + searcher.close(); + } + + /** + * Tests compaction with deleted nodes. + * Verifies that deleted nodes are properly excluded from the compacted graph. + */ + @Test + public void testCompactWithDeletions() throws Exception { + List graphs = new ArrayList<>(); + List rss = new ArrayList<>(); + List liveNodes = new ArrayList<>(); + List remappers = new ArrayList<>(); + + for(int i = 0; i < numSources; ++i) { + var outputPath = testDirectory.resolve("test_graph_" + i); + rss.add(ReaderSupplierFactory.open(outputPath.toAbsolutePath())); + var onDiskGraph = OnDiskGraphIndex.load(rss.get(i)); + graphs.add(onDiskGraph); + } + + // Mark some nodes as deleted (not live) + int globalOrdinal = 0; + int totalLiveNodes = 0; + Set deletedGlobalOrdinals = new HashSet<>(); + + for (int n = 0; n < numSources; n++) { + Map map = new HashMap<>(); + var lives = new FixedBitSet(numVectorsPerGraph); + + // Delete every 5th node + for (int i = 0; i < numVectorsPerGraph; i++) { + int originalGlobalOrdinal = n * numVectorsPerGraph + i; + if (i % 5 != 0) { + lives.set(i); + map.put(i, globalOrdinal++); + totalLiveNodes++; + } else { + deletedGlobalOrdinals.add(originalGlobalOrdinal); + } + } + + remappers.add(new OrdinalMapper.MapMapper(map)); + liveNodes.add(lives); + } + + var compactor = new OnDiskGraphIndexCompactor(graphs, liveNodes, remappers, similarityFunction, null); + var outputPath = testDirectory.resolve("test_compact_with_deletions"); + + compactor.compact(outputPath); + + ReaderSupplier rs = ReaderSupplierFactory.open(outputPath); + var compactGraph = OnDiskGraphIndex.load(rs); + + // Verify the compacted graph has the correct size (excluding deleted nodes) + assertEquals("Compacted graph size should equal live nodes", totalLiveNodes, compactGraph.size(0)); + + // Verify search functionality still works + GraphSearcher searcher = new GraphSearcher(compactGraph); + var query = allVecs.get(randomIntBetween(0, allVecs.size() - 1)); + SearchScoreProvider ssp = DefaultSearchScoreProvider.exact(query, similarityFunction, allravv); + SearchResult result = searcher.search(ssp, 10, Bits.ALL); + + // Verify we get results and they're all valid + assertTrue("Should return some results", result.getNodes().length > 0); + + searcher.close(); + } + + /** + * Tests compaction with custom ordinal mappings. + * Verifies that vectors are correctly placed at their mapped ordinals. + */ + @Test + public void testOrdinalMapping() throws Exception { + List graphs = new ArrayList<>(); + List rss = new ArrayList<>(); + List liveNodes = new ArrayList<>(); + List remappers = new ArrayList<>(); + + for(int i = 0; i < numSources; ++i) { + var outputPath = testDirectory.resolve("test_graph_" + i); + rss.add(ReaderSupplierFactory.open(outputPath.toAbsolutePath())); + var onDiskGraph = OnDiskGraphIndex.load(rss.get(i)); + graphs.add(onDiskGraph); + } + + // Create custom ordinal mappings (non-sequential) + int globalOrdinal = 0; + List> mappingList = new ArrayList<>(); + + for (int n = 0; n < numSources; n++) { + Map map = new HashMap<>(); + // Use a custom mapping: reverse order for even sources, normal order for odd + if (n % 2 == 0) { + for (int i = 0; i < numVectorsPerGraph; i++) { + int newOrdinal = globalOrdinal + (numVectorsPerGraph - 1 - i); + map.put(i, newOrdinal); + } + globalOrdinal += numVectorsPerGraph; + } else { + for (int i = 0; i < numVectorsPerGraph; i++) { + map.put(i, globalOrdinal++); + } + } + mappingList.add(map); + remappers.add(new OrdinalMapper.MapMapper(map)); + + var lives = new FixedBitSet(numVectorsPerGraph); + lives.set(0, numVectorsPerGraph); + liveNodes.add(lives); + } + + var compactor = new OnDiskGraphIndexCompactor(graphs, liveNodes, remappers, similarityFunction, null); + var outputPath = testDirectory.resolve("test_compact_with_ordinal_mapping"); + + compactor.compact(outputPath); + + ReaderSupplier rs = ReaderSupplierFactory.open(outputPath); + var compactGraph = OnDiskGraphIndex.load(rs); + + // Verify the graph was created with correct ordinal mapping + assertEquals("Compacted graph should have all nodes", numSources * numVectorsPerGraph, compactGraph.size(0)); + + // Verify that the vectors are correctly mapped in the compacted graph + var compactView = compactGraph.getView(); + + // Check a few vectors to ensure they're at the correct ordinals + for (int sourceIdx = 0; sourceIdx < numSources; sourceIdx++) { + Map mapping = mappingList.get(sourceIdx); + // Check first, middle, and last nodes + int[] testIndices = {0, numVectorsPerGraph / 2, numVectorsPerGraph - 1}; + + for (int localIdx : testIndices) { + int expectedGlobalOrdinal = mapping.get(localIdx); + int originalVectorIdx = sourceIdx * numVectorsPerGraph + localIdx; + + VectorFloat originalVec = allVecs.get(originalVectorIdx); + VectorFloat compactVec = vectorTypeSupport.createFloatVector(dimension); + compactView.getVectorInto(expectedGlobalOrdinal, compactVec, 0); + + // Verify the vectors match (use similarity for normalized vectors) + float similarity = similarityFunction.compare(originalVec, compactVec); + assertTrue(String.format("Vector at ordinal %d should match (similarity=%.4f)", + expectedGlobalOrdinal, similarity), + similarity > 0.9999f); + } + } + } + + /** + * Tests compaction with both deletions and custom ordinal mappings combined. + * Verifies that both features work correctly together. + */ + @Test + public void testDeletionsAndOrdinalMapping() throws Exception { + List graphs = new ArrayList<>(); + List rss = new ArrayList<>(); + List liveNodes = new ArrayList<>(); + List remappers = new ArrayList<>(); + + for(int i = 0; i < numSources; ++i) { + var outputPath = testDirectory.resolve("test_graph_" + i); + rss.add(ReaderSupplierFactory.open(outputPath.toAbsolutePath())); + var onDiskGraph = OnDiskGraphIndex.load(rss.get(i)); + graphs.add(onDiskGraph); + } + + // Combine deletions with custom ordinal mapping + int globalOrdinal = 0; + int totalLiveNodes = 0; + List> mappingList = new ArrayList<>(); + + for (int n = 0; n < numSources; n++) { + Map map = new HashMap<>(); + var lives = new FixedBitSet(numVectorsPerGraph); + + // Delete every 4th node + for (int i = 0; i < numVectorsPerGraph; i++) { + if (i % 4 != 0) { + lives.set(i); + map.put(i, globalOrdinal++); + totalLiveNodes++; + } + } + + mappingList.add(map); + remappers.add(new OrdinalMapper.MapMapper(map)); + liveNodes.add(lives); + } + + var compactor = new OnDiskGraphIndexCompactor(graphs, liveNodes, remappers, similarityFunction, null); + var outputPath = testDirectory.resolve("test_compact_deletions_and_mapping"); + + compactor.compact(outputPath); + + ReaderSupplier rs = ReaderSupplierFactory.open(outputPath); + var compactGraph = OnDiskGraphIndex.load(rs); + + // Verify correct size + assertEquals("Compacted graph should only contain live nodes", totalLiveNodes, compactGraph.size(0)); + + // Verify a sample of vectors are at correct ordinals + var compactView = compactGraph.getView(); + int samplesVerified = 0; + for (int sourceIdx = 0; sourceIdx < numSources; sourceIdx++) { + Map mapping = mappingList.get(sourceIdx); + + // Check a few live nodes per source + for (int localIdx = 1; localIdx < numVectorsPerGraph && samplesVerified < 20; localIdx++) { + if (localIdx % 4 == 0) continue; // Skip deleted nodes + + int expectedGlobalOrdinal = mapping.get(localIdx); + int originalVectorIdx = sourceIdx * numVectorsPerGraph + localIdx; + + VectorFloat originalVec = allVecs.get(originalVectorIdx); + VectorFloat compactVec = vectorTypeSupport.createFloatVector(dimension); + compactView.getVectorInto(expectedGlobalOrdinal, compactVec, 0); + + // Verify the vectors match using similarity + float similarity = similarityFunction.compare(originalVec, compactVec); + assertTrue(String.format("Vector at ordinal %d should match (similarity=%.4f)", + expectedGlobalOrdinal, similarity), + similarity > 0.9999f); + samplesVerified++; + } + } + + // Verify search functionality + GraphSearcher searcher = new GraphSearcher(compactGraph); + var query = allVecs.get(randomIntBetween(0, allVecs.size() - 1)); + SearchScoreProvider ssp = DefaultSearchScoreProvider.exact(query, similarityFunction, allravv); + SearchResult result = searcher.search(ssp, 10, Bits.ALL); + + assertTrue("Search should return results", result.getNodes().length > 0); + + searcher.close(); + } +} From 475ee063fefdaf5ac13cdf72c6ec161618f0e3b0 Mon Sep 17 00:00:00 2001 From: dian-lun-lin Date: Tue, 14 Apr 2026 15:06:35 -0700 Subject: [PATCH 03/18] Add reporting and storage infrastructure for CompactorBenchmark Add JFR recording, system stats collection, JSONL logging, git info capture, thread allocation tracking, dataset partitioning, and cloud storage layout utilities used by CompactorBenchmark. Switch jvector-examples logging from logback to log4j2 for consistency with benchmarks-jmh and to avoid duplicate SLF4J bindings in the fat jar. --- jvector-examples/pom.xml | 13 +- .../jvector/example/reporting/GitInfo.java | 54 ++ .../example/reporting/JfrRecorder.java | 105 +++ .../example/reporting/JsonlWriter.java | 56 ++ .../reporting/SystemStatsCollector.java | 104 +++ .../example/reporting/ThreadAllocTracker.java | 203 +++++ .../example/util/DataSetPartitioner.java | 60 ++ .../util/storage/CloudStorageLayoutUtil.java | 331 +++++++++ .../util/storage/GcpStorageLayoutUtil.java | 695 ++++++++++++++++++ .../util/storage/LocalStorageLayoutUtil.java | 524 +++++++++++++ .../util/storage/StorageLayoutUtil.java | 584 +++++++++++++++ .../example/util/storage/package-info.java | 28 + .../example/yaml/TestDataPartition.java | 84 +++ .../src/main/resources/log4j2.xml | 14 + 14 files changed, 2851 insertions(+), 4 deletions(-) create mode 100644 jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/GitInfo.java create mode 100644 jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/JfrRecorder.java create mode 100644 jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/JsonlWriter.java create mode 100644 jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/SystemStatsCollector.java create mode 100644 jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/ThreadAllocTracker.java create mode 100644 jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/DataSetPartitioner.java create mode 100644 jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/CloudStorageLayoutUtil.java create mode 100644 jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/GcpStorageLayoutUtil.java create mode 100644 jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/LocalStorageLayoutUtil.java create mode 100644 jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/StorageLayoutUtil.java create mode 100644 jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/package-info.java create mode 100644 jvector-examples/src/main/java/io/github/jbellis/jvector/example/yaml/TestDataPartition.java create mode 100644 jvector-examples/src/main/resources/log4j2.xml diff --git a/jvector-examples/pom.xml b/jvector-examples/pom.xml index 9daf7b8cf..731c1d769 100644 --- a/jvector-examples/pom.xml +++ b/jvector-examples/pom.xml @@ -85,16 +85,16 @@ gson 2.10.1 - + org.slf4j slf4j-api 2.0.9 - ch.qos.logback - logback-classic - 1.4.11 + org.apache.logging.log4j + log4j-slf4j2-impl + 2.24.3 software.amazon.awssdk @@ -112,6 +112,11 @@ aws-crt-client ${awssdk.version} + + software.amazon.awssdk + ec2 + ${awssdk.version} + software.amazon.awssdk s3 diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/GitInfo.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/GitInfo.java new file mode 100644 index 000000000..92979ac5d --- /dev/null +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/GitInfo.java @@ -0,0 +1,54 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.jbellis.jvector.example.reporting; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Detects the current git commit hash for tagging benchmark results. + */ +public final class GitInfo { + private static final Logger log = LoggerFactory.getLogger(GitInfo.class); + + private GitInfo() {} + + // Lazy holder pattern — computed once on first access + private static class Holder { + static final String SHORT_HASH; + static { + String hash; + try { + var process = new ProcessBuilder("git", "rev-parse", "HEAD").redirectErrorStream(true).start(); + hash = new String(process.getInputStream().readAllBytes()).trim(); + process.waitFor(); + if (hash.length() >= 8) { + hash = hash.substring(hash.length() - 8); + } + } catch (Exception e) { + log.warn("Could not determine git hash", e); + hash = "unknown"; + } + SHORT_HASH = hash; + } + } + + /** Returns the last 8 characters of {@code git rev-parse HEAD}, or {@code "unknown"} on failure. */ + public static String getShortHash() { + return Holder.SHORT_HASH; + } +} diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/JfrRecorder.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/JfrRecorder.java new file mode 100644 index 000000000..ab5c4b581 --- /dev/null +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/JfrRecorder.java @@ -0,0 +1,105 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.jbellis.jvector.example.reporting; + +import jdk.jfr.Configuration; +import jdk.jfr.Recording; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.text.ParseException; +import java.time.Duration; + +/** + * Manages the lifecycle of a JFR (Java Flight Recorder) recording for benchmarks. + */ +public final class JfrRecorder { + private static final Logger log = LoggerFactory.getLogger(JfrRecorder.class); + + private Recording recording; + private String fileName; + + /** + * Creates the output directory, configures a "profile" recording, starts it, and returns the absolute path. + * + * @param outputDir directory to write the JFR file into + * @param fileName name of the JFR file (e.g. {@code "compactor-foo.jfr"}) + * @return the absolute path of the recording file + * @throws IOException if the directory cannot be created + * @throws ParseException if the JFR "profile" configuration cannot be loaded + */ + public Path start(Path outputDir, String fileName) throws IOException, ParseException { + return start(outputDir, fileName, false); + } + + /** + * Creates the output directory, configures a "profile" recording, starts it, and returns the absolute path. + * + * @param outputDir directory to write the JFR file into + * @param fileName name of the JFR file + * @param objectCount whether to enable periodic 'jdk.ObjectCount' events + * @return the absolute path of the recording file + */ + public Path start(Path outputDir, String fileName, boolean objectCount) throws IOException, ParseException { + Files.createDirectories(outputDir); + Path jfrPath = outputDir.resolve(fileName).toAbsolutePath(); + recording = new Recording(Configuration.getConfiguration("profile")); + recording.setToDisk(true); + recording.setDestination(jfrPath); + + // Enable heap occupancy snapshots and old object sampling + var settings = recording.getSettings(); + if (objectCount) { + settings.put("jdk.ObjectCount#enabled", "true"); + settings.put("jdk.ObjectCount#period", "10s"); // Every 10 seconds + } + settings.put("jdk.OldObjectSample#enabled", "true"); + // Flush to disk every minute so data is available for inspection during long benchmarks + settings.put("flush-interval", Duration.ofMinutes(1).toMillis() + "ms"); + recording.setSettings(settings); + recording.start(); + this.fileName = fileName; + System.out.println("JFR recording started, saving to: " + jfrPath); + log.info("JFR recording started, saving to: {}", jfrPath); + return jfrPath; + } + + /** Stops and closes the recording, logging the saved path. */ + public void stop() { + if (recording != null) { + Path jfrPath = recording.getDestination(); + recording.stop(); + recording.close(); + recording = null; + System.out.println("JFR recording saved to: " + jfrPath); + log.info("JFR recording saved to: {}", jfrPath); + } + } + + /** Returns {@code true} if a recording is currently in progress. */ + public boolean isActive() { + return recording != null; + } + + /** Returns the current file name, or {@code null} if no recording has been started. */ + public String getFileName() { + return fileName; + } +} diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/JsonlWriter.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/JsonlWriter.java new file mode 100644 index 000000000..a025dd207 --- /dev/null +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/JsonlWriter.java @@ -0,0 +1,56 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.jbellis.jvector.example.reporting; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.Map; + +/** + * Append-only JSONL file writer that serializes one map per line using GSON. + */ +public final class JsonlWriter { + private static final Logger log = LoggerFactory.getLogger(JsonlWriter.class); + private static final Gson GSON = new GsonBuilder() + .disableHtmlEscaping() + .serializeNulls() + .create(); // No pretty printing for JSONL + + private final Path outputFile; + + public JsonlWriter(Path outputFile) { + this.outputFile = outputFile; + } + + /** Serializes the map as a single JSON line and appends it to the output file. */ + public void writeLine(Map result) { + String json = GSON.toJson(result) + "\n"; + try { + Files.writeString(outputFile, json, + StandardOpenOption.CREATE, StandardOpenOption.APPEND); + } catch (IOException e) { + log.error("Failed to persist result to {}", outputFile, e); + } + } +} diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/SystemStatsCollector.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/SystemStatsCollector.java new file mode 100644 index 000000000..3932958e3 --- /dev/null +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/SystemStatsCollector.java @@ -0,0 +1,104 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.jbellis.jvector.example.reporting; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.concurrent.TimeUnit; + +/** + * Background collector of {@code /proc} system metrics (CPU topology, load, memory, disk I/O). + * Spawns a bash process that appends JSONL lines to a file every 30 seconds. + */ +public final class SystemStatsCollector { + private static final Logger log = LoggerFactory.getLogger(SystemStatsCollector.class); + + private static final String SCRIPT = String.join("\n", + "cpuThreads=$(grep -c '^processor' /proc/cpuinfo)", + "cpuSockets=$(awk '/^physical id/{print $NF}' /proc/cpuinfo | sort -u | wc -l)", + "[ \"$cpuSockets\" -eq 0 ] && cpuSockets=1", + "cpuCores=$(awk '/^physical id/{pid=$NF} /^core id/{print pid\"-\"$NF}' /proc/cpuinfo | sort -u | wc -l)", + "[ \"$cpuCores\" -eq 0 ] && cpuCores=$cpuThreads", + "while true; do", + " ts=$(date -u +%Y-%m-%dT%H:%M:%SZ)", + " read load1 load5 load15 runprocs rest < /proc/loadavg", + " IFS='/' read running total <<< \"$runprocs\"", + " memTotal=$(awk '/^MemTotal:/{print $2}' /proc/meminfo)", + " memFree=$(awk '/^MemFree:/{print $2}' /proc/meminfo)", + " memAvail=$(awk '/^MemAvailable:/{print $2}' /proc/meminfo)", + " buffers=$(awk '/^Buffers:/{print $2}' /proc/meminfo)", + " cached=$(awk '/^Cached:/{print $2}' /proc/meminfo)", + " swapTotal=$(awk '/^SwapTotal:/{print $2}' /proc/meminfo)", + " swapFree=$(awk '/^SwapFree:/{print $2}' /proc/meminfo)", + " disks=\"\"", + " while read maj min dev reads rmerged rsectors rtime writes wmerged wsectors wtime inprog iotime wiotime rest; do", + " if echo \"$dev\" | grep -qxE '(sd[a-z]+|nvme[0-9]+n[0-9]+|vd[a-z]+|xvd[a-z]+)'; then", + " [ -n \"$disks\" ] && disks=\"$disks,\"", + " disks=\"$disks{\\\"device\\\":\\\"$dev\\\",\\\"readsCompleted\\\":$reads,\\\"readsMerged\\\":$rmerged,\\\"sectorsRead\\\":$rsectors,\\\"readTimeMs\\\":$rtime,\\\"writesCompleted\\\":$writes,\\\"writesMerged\\\":$wmerged,\\\"sectorsWritten\\\":$wsectors,\\\"writeTimeMs\\\":$wtime,\\\"ioInProgress\\\":$inprog,\\\"ioTimeMs\\\":$iotime,\\\"weightedIoTimeMs\\\":$wiotime}\"", + " fi", + " done < /proc/diskstats", + " echo \"{\\\"timestamp\\\":\\\"$ts\\\",\\\"cpuSockets\\\":$cpuSockets,\\\"cpuCores\\\":$cpuCores,\\\"cpuThreads\\\":$cpuThreads,\\\"loadAvg1\\\":$load1,\\\"loadAvg5\\\":$load5,\\\"loadAvg15\\\":$load15,\\\"runningProcs\\\":$running,\\\"totalProcs\\\":$total,\\\"memTotalKB\\\":$memTotal,\\\"memFreeKB\\\":$memFree,\\\"memAvailableKB\\\":$memAvail,\\\"buffersKB\\\":$buffers,\\\"cachedKB\\\":$cached,\\\"swapTotalKB\\\":$swapTotal,\\\"swapFreeKB\\\":$swapFree,\\\"diskStats\\\":[$disks]}\"", + " sleep 30", + "done"); + + private Process process; + private String fileName; + + /** + * Creates the output directory, spawns the bash collector process, and returns the absolute path of the output file. + * + * @param outputDir directory to write the stats file into + * @param fileName name of the output JSONL file + * @return the absolute path of the stats file + * @throws IOException if the directory cannot be created or the process fails to start + */ + public Path start(Path outputDir, String fileName) throws IOException { + Files.createDirectories(outputDir); + Path sysStatsPath = outputDir.resolve(fileName).toAbsolutePath(); + var pb = new ProcessBuilder("bash", "-c", SCRIPT); + pb.redirectOutput(ProcessBuilder.Redirect.to(sysStatsPath.toFile())); + pb.redirectErrorStream(true); + process = pb.start(); + this.fileName = fileName; + log.info("System stats collection started, saving to: {}", sysStatsPath); + return sysStatsPath; + } + + /** Destroys the process (with a 5-second wait) and logs the saved path. */ + public void stop(Path outputDir) throws InterruptedException { + if (process != null) { + process.destroy(); + process.waitFor(5, TimeUnit.SECONDS); + process = null; + log.info("System stats collection stopped, saved to: {}", outputDir.resolve(fileName).toAbsolutePath()); + } + } + + /** Returns {@code true} if the background process is currently running. */ + public boolean isActive() { + return process != null; + } + + /** Returns the current file name, or {@code null} if collection has not been started. */ + public String getFileName() { + return fileName; + } +} diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/ThreadAllocTracker.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/ThreadAllocTracker.java new file mode 100644 index 000000000..207d67045 --- /dev/null +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/ThreadAllocTracker.java @@ -0,0 +1,203 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.jbellis.jvector.example.reporting; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.lang.management.ManagementFactory; +import java.lang.management.ThreadInfo; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Instant; +import java.util.HashMap; +import java.util.Map; + +/** + * Periodically samples per-thread heap allocation via + * {@link com.sun.management.ThreadMXBean#getThreadAllocatedBytes(long[])} + * and writes JSONL output with per-thread deltas and cumulative totals. + * + * Lifecycle mirrors {@link SystemStatsCollector}: {@link #start(Path, String)}, + * {@link #stop()}, {@link #isActive()}, {@link #getFileName()}. + */ +public final class ThreadAllocTracker { + private static final Logger log = LoggerFactory.getLogger(ThreadAllocTracker.class); + + private static final long DEFAULT_INTERVAL_SECONDS = 10; + + private final com.sun.management.ThreadMXBean threadMXBean; + private final long intervalSeconds; + + private volatile Thread samplerThread; + private volatile boolean running; + private String fileName; + + /// Creates a tracker with the default 10-second sampling interval. + public ThreadAllocTracker() { + this(DEFAULT_INTERVAL_SECONDS); + } + + /// Creates a tracker with a custom sampling interval. + /// + /// @param intervalSeconds seconds between each sample + public ThreadAllocTracker(long intervalSeconds) { + this.threadMXBean = (com.sun.management.ThreadMXBean) ManagementFactory.getThreadMXBean(); + this.intervalSeconds = intervalSeconds; + } + + /// Creates the output directory, enables thread allocated memory tracking, + /// and spawns a daemon thread that periodically writes JSONL samples. + /// + /// @param outputDir directory to write the JSONL file into + /// @param fileName name of the output file + /// @return the absolute path of the output file + /// @throws IOException if the directory cannot be created + public Path start(Path outputDir, String fileName) throws IOException { + Files.createDirectories(outputDir); + Path outputPath = outputDir.resolve(fileName).toAbsolutePath(); + this.fileName = fileName; + + threadMXBean.setThreadAllocatedMemoryEnabled(true); + + running = true; + samplerThread = new Thread(() -> sampleLoop(outputPath), "thread-alloc-tracker"); + samplerThread.setDaemon(true); + samplerThread.start(); + + log.info("Thread allocation tracking started, saving to: {}", outputPath); + return outputPath; + } + + /// Stops the sampler thread and writes a final cumulative summary line. + public void stop() { + running = false; + if (samplerThread != null) { + samplerThread.interrupt(); + try { + samplerThread.join(5000); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + samplerThread = null; + log.info("Thread allocation tracking stopped, saved to: {}", fileName); + } + } + + /// Returns {@code true} if the sampler thread is currently running. + public boolean isActive() { + return samplerThread != null && running; + } + + /// Returns the current file name, or {@code null} if tracking has not been started. + public String getFileName() { + return fileName; + } + + private void sampleLoop(Path outputPath) { + // Track cumulative allocations per thread (by id) for delta computation + var previousAllocations = new HashMap(); + + try (var writer = Files.newBufferedWriter(outputPath)) { + while (running) { + try { + Thread.sleep(intervalSeconds * 1000); + } catch (InterruptedException e) { + // On interrupt (from stop()), write final summary and exit + break; + } + writeSample(writer, previousAllocations, false); + } + // Write final summary with cumulative totals + writeSample(writer, previousAllocations, true); + } catch (IOException e) { + log.error("Failed to write thread allocation sample", e); + } + } + + private void writeSample(BufferedWriter writer, Map previousAllocations, boolean isSummary) + throws IOException { + long[] threadIds = threadMXBean.getAllThreadIds(); + long[] allocatedBytes = threadMXBean.getThreadAllocatedBytes(threadIds); + ThreadInfo[] threadInfos = threadMXBean.getThreadInfo(threadIds); + + var sb = new StringBuilder(); + sb.append("{\"timestamp\":\"").append(Instant.now().toString()).append('"'); + if (isSummary) { + sb.append(",\"event\":\"summary\""); + } + sb.append(",\"threads\":["); + + long totalAllocated = 0; + long totalDelta = 0; + boolean first = true; + + for (int i = 0; i < threadIds.length; i++) { + if (threadInfos[i] == null || allocatedBytes[i] < 0) { + continue; + } + long id = threadIds[i]; + long allocated = allocatedBytes[i]; + long previous = previousAllocations.getOrDefault(id, 0L); + long delta = allocated - previous; + previousAllocations.put(id, allocated); + + totalAllocated += allocated; + totalDelta += delta; + + if (!first) { + sb.append(','); + } + first = false; + + sb.append("{\"id\":").append(id) + .append(",\"name\":\"").append(escapeJson(threadInfos[i].getThreadName())).append('"') + .append(",\"allocatedBytes\":").append(allocated) + .append(",\"deltaBytes\":").append(delta) + .append('}'); + } + + sb.append("],\"totalAllocatedBytes\":").append(totalAllocated) + .append(",\"totalDeltaBytes\":").append(totalDelta) + .append('}'); + + writer.write(sb.toString()); + writer.newLine(); + writer.flush(); + } + + private static String escapeJson(String value) { + if (value == null) { + return ""; + } + var sb = new StringBuilder(value.length()); + for (int i = 0; i < value.length(); i++) { + char c = value.charAt(i); + switch (c) { + case '"': sb.append("\\\""); break; + case '\\': sb.append("\\\\"); break; + case '\n': sb.append("\\n"); break; + case '\r': sb.append("\\r"); break; + case '\t': sb.append("\\t"); break; + default: sb.append(c); + } + } + return sb.toString(); + } +} diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/DataSetPartitioner.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/DataSetPartitioner.java new file mode 100644 index 000000000..1e6a83f40 --- /dev/null +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/DataSetPartitioner.java @@ -0,0 +1,60 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.jbellis.jvector.example.util; + +import io.github.jbellis.jvector.example.benchmarks.datasets.DataSet; +import io.github.jbellis.jvector.example.yaml.TestDataPartition; +import io.github.jbellis.jvector.vector.types.VectorFloat; + +import java.util.ArrayList; +import java.util.List; + +/** + * Utility for partitioning a DataSet into multiple segments based on a distribution. + */ +public final class DataSetPartitioner { + private DataSetPartitioner() {} + + public static final class PartitionedData { + public final List>> vectors; + public final List sizes; + + public PartitionedData(List>> vectors, List sizes) { + this.vectors = vectors; + this.sizes = sizes; + } + } + + public static PartitionedData partition(DataSet ds, int numParts, TestDataPartition.Distribution distribution) { + return partition(ds.getBaseVectors(), numParts, distribution); + } + + public static PartitionedData partition(List> baseVectors, int numParts, TestDataPartition.Distribution distribution) { + List sizes = distribution.computeSplitSizes(baseVectors.size(), numParts); + List>> parts = new ArrayList<>(numParts); + + int runningStart = 0; + for (int size : sizes) { + int start = runningStart; + int end = start + size; + runningStart = end; + parts.add(new ArrayList<>(baseVectors.subList(start, end))); + } + + return new PartitionedData(parts, sizes); + } +} diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/CloudStorageLayoutUtil.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/CloudStorageLayoutUtil.java new file mode 100644 index 000000000..9530b8815 --- /dev/null +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/CloudStorageLayoutUtil.java @@ -0,0 +1,331 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.github.jbellis.jvector.example.util.storage; + +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Objects; + +/** + * Cloud wrapper that chooses AWS or GCP storage inspection and maps the provider-specific classes + * into cloud-agnostic storage tiers. + */ +public final class CloudStorageLayoutUtil { + private CloudStorageLayoutUtil() { + } + + public enum CloudProvider { + AWS_EC2, + GCP_GCE, + LOCAL_OR_UNKNOWN + } + + public enum StorageClass { + BLOCK_HDD_COLD, + BLOCK_HDD_THROUGHPUT, + BLOCK_HDD_STANDARD, + BLOCK_SSD_BALANCED, + BLOCK_SSD_GENERAL, + BLOCK_SSD_HIGH_IOPS, + LOCAL_SSD, + LOCAL_NVME, + NETWORK_FILESYSTEM, + MEMORY_TMPFS, + PSEUDO_FILESYSTEM, + UNKNOWN + } + + public static final class StorageSnapshot { + private final T cloudSpecificSnapshot; + private final CloudProvider provider; + private final boolean runningInCloud; + private final String instanceId; + private final String instanceTypeOrMachineType; + private final String regionOrZone; + private final Map mountsByMountPoint; + + public StorageSnapshot(T cloudSpecificSnapshot, + CloudProvider provider, + boolean runningInCloud, + String instanceId, + String instanceTypeOrMachineType, + String regionOrZone, + Map mountsByMountPoint) { + this.cloudSpecificSnapshot = cloudSpecificSnapshot; + this.provider = Objects.requireNonNull(provider, "provider"); + this.runningInCloud = runningInCloud; + this.instanceId = instanceId; + this.instanceTypeOrMachineType = instanceTypeOrMachineType; + this.regionOrZone = regionOrZone; + this.mountsByMountPoint = Objects.requireNonNull(mountsByMountPoint, "mountsByMountPoint"); + } + + public T cloudSpecificSnapshot() { + return cloudSpecificSnapshot; + } + + public CloudProvider provider() { + return provider; + } + + public boolean runningInCloud() { + return runningInCloud; + } + + public String instanceId() { + return instanceId; + } + + public String instanceTypeOrMachineType() { + return instanceTypeOrMachineType; + } + + public String regionOrZone() { + return regionOrZone; + } + + public Map mountsByMountPoint() { + return mountsByMountPoint; + } + } + + public static final class MountStorageInfo { + private final String mountPoint; + private final String source; + private final String filesystemType; + private final StorageClass storageClass; + private final String providerSpecificClass; + + public MountStorageInfo(String mountPoint, + String source, + String filesystemType, + StorageClass storageClass, + String providerSpecificClass) { + this.mountPoint = mountPoint; + this.source = source; + this.filesystemType = filesystemType; + this.storageClass = Objects.requireNonNull(storageClass, "storageClass"); + this.providerSpecificClass = providerSpecificClass; + } + + public String mountPoint() { + return mountPoint; + } + + public String source() { + return source; + } + + public String filesystemType() { + return filesystemType; + } + + public StorageClass storageClass() { + return storageClass; + } + + public String providerSpecificClass() { + return providerSpecificClass; + } + } + + public static StorageSnapshot inspectStorage() { + var awsSnapshot = StorageLayoutUtil.inspectStorage(); + if (awsSnapshot.runningOnEc2()) { + return fromAws(awsSnapshot, CloudProvider.AWS_EC2, true); + } + + var gcpSnapshot = GcpStorageLayoutUtil.inspectStorage(); + if (gcpSnapshot.runningOnGcp()) { + return fromGcp(gcpSnapshot); + } + + // Not in a detected cloud environment. Use OS-specific local storage inspection. + var localSnapshot = LocalStorageLayoutUtil.inspectStorage(); + return fromLocal(localSnapshot); + } + + public static Map storageClassByMountPoint() { + var snapshot = inspectStorage(); + var byMountPoint = new LinkedHashMap(snapshot.mountsByMountPoint().size()); + for (var entry : snapshot.mountsByMountPoint().entrySet()) { + byMountPoint.put(entry.getKey(), entry.getValue().storageClass()); + } + return Collections.unmodifiableMap(byMountPoint); + } + + private static StorageSnapshot fromAws(StorageLayoutUtil.StorageSnapshot snapshot, + CloudProvider provider, + boolean runningInCloud) { + var byMountPoint = new LinkedHashMap(snapshot.mountsByMountPoint().size()); + for (var entry : snapshot.mountsByMountPoint().entrySet()) { + var mount = entry.getValue(); + byMountPoint.put( + entry.getKey(), + new MountStorageInfo( + mount.mountPoint(), + mount.source(), + mount.filesystemType(), + mapAwsClass(mount.storageClass()), + mount.storageClass().name() + ) + ); + } + + return new StorageSnapshot<>( + snapshot, + provider, + runningInCloud, + snapshot.instanceId(), + snapshot.instanceType(), + snapshot.region(), + Collections.unmodifiableMap(byMountPoint) + ); + } + + private static StorageSnapshot fromGcp(GcpStorageLayoutUtil.StorageSnapshot snapshot) { + var byMountPoint = new LinkedHashMap(snapshot.mountsByMountPoint().size()); + for (var entry : snapshot.mountsByMountPoint().entrySet()) { + var mount = entry.getValue(); + byMountPoint.put( + entry.getKey(), + new MountStorageInfo( + mount.mountPoint(), + mount.source(), + mount.filesystemType(), + mapGcpClass(mount.storageClass()), + mount.storageClass().name() + ) + ); + } + + return new StorageSnapshot<>( + snapshot, + CloudProvider.GCP_GCE, + true, + snapshot.instanceId(), + snapshot.machineType(), + snapshot.zone(), + Collections.unmodifiableMap(byMountPoint) + ); + } + + private static StorageSnapshot fromLocal(LocalStorageLayoutUtil.StorageSnapshot snapshot) { + var byMountPoint = new LinkedHashMap(snapshot.mountsByMountPoint().size()); + for (var entry : snapshot.mountsByMountPoint().entrySet()) { + var mount = entry.getValue(); + byMountPoint.put( + entry.getKey(), + new MountStorageInfo( + mount.mountPoint(), + mount.source(), + mount.filesystemType(), + mapLocalClass(mount.storageClass()), + mount.storageClass().name() + ) + ); + } + + return new StorageSnapshot<>( + snapshot, + CloudProvider.LOCAL_OR_UNKNOWN, + false, + null, + snapshot.osName(), + snapshot.osName(), + Collections.unmodifiableMap(byMountPoint) + ); + } + + private static StorageClass mapAwsClass(StorageLayoutUtil.StorageClass storageClass) { + switch (storageClass) { + case EBS_COLD_HDD: + return StorageClass.BLOCK_HDD_COLD; + case EBS_THROUGHPUT_HDD: + return StorageClass.BLOCK_HDD_THROUGHPUT; + case EBS_MAGNETIC: + return StorageClass.BLOCK_HDD_STANDARD; + case EBS_GP2: + return StorageClass.BLOCK_SSD_BALANCED; + case EBS_GP3: + return StorageClass.BLOCK_SSD_GENERAL; + case EBS_PROVISIONED_IOPS_SSD: + return StorageClass.BLOCK_SSD_HIGH_IOPS; + case INSTANCE_STORE_SSD: + return StorageClass.LOCAL_SSD; + case INSTANCE_STORE_NVME: + return StorageClass.LOCAL_NVME; + case NETWORK_FILESYSTEM: + return StorageClass.NETWORK_FILESYSTEM; + case MEMORY_TMPFS: + return StorageClass.MEMORY_TMPFS; + case PSEUDO_FILESYSTEM: + return StorageClass.PSEUDO_FILESYSTEM; + case UNKNOWN: + default: + return StorageClass.UNKNOWN; + } + } + + private static StorageClass mapGcpClass(GcpStorageLayoutUtil.StorageClass storageClass) { + switch (storageClass) { + case PD_STANDARD_HDD: + return StorageClass.BLOCK_HDD_STANDARD; + case PD_THROUGHPUT_OPTIMIZED: + return StorageClass.BLOCK_HDD_THROUGHPUT; + case PD_BALANCED_SSD: + return StorageClass.BLOCK_SSD_BALANCED; + case PD_SSD: + return StorageClass.BLOCK_SSD_GENERAL; + case PD_EXTREME_SSD: + return StorageClass.BLOCK_SSD_HIGH_IOPS; + case LOCAL_SSD: + return StorageClass.LOCAL_SSD; + case LOCAL_NVME: + return StorageClass.LOCAL_NVME; + case NETWORK_FILESYSTEM: + return StorageClass.NETWORK_FILESYSTEM; + case MEMORY_TMPFS: + return StorageClass.MEMORY_TMPFS; + case PSEUDO_FILESYSTEM: + return StorageClass.PSEUDO_FILESYSTEM; + case UNKNOWN: + default: + return StorageClass.UNKNOWN; + } + } + + private static StorageClass mapLocalClass(LocalStorageLayoutUtil.StorageClass storageClass) { + switch (storageClass) { + case LOCAL_HDD: + return StorageClass.BLOCK_HDD_STANDARD; + case LOCAL_SSD: + return StorageClass.LOCAL_SSD; + case LOCAL_NVME: + return StorageClass.LOCAL_NVME; + case NETWORK_FILESYSTEM: + return StorageClass.NETWORK_FILESYSTEM; + case MEMORY_TMPFS: + return StorageClass.MEMORY_TMPFS; + case PSEUDO_FILESYSTEM: + return StorageClass.PSEUDO_FILESYSTEM; + case UNKNOWN: + default: + return StorageClass.UNKNOWN; + } + } +} diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/GcpStorageLayoutUtil.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/GcpStorageLayoutUtil.java new file mode 100644 index 000000000..a4ecb6d09 --- /dev/null +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/GcpStorageLayoutUtil.java @@ -0,0 +1,695 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.github.jbellis.jvector.example.util.storage; + +import java.io.IOException; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.regex.Pattern; +import java.util.stream.Stream; + +/** + * Detects GCE runtime context via metadata service and classifies storage for each mounted filesystem. + */ +public final class GcpStorageLayoutUtil { + private static final String GCE_METADATA_HOST_ENV = "GCE_METADATA_HOST"; + private static final String METADATA_HOST_DEFAULT = "metadata.google.internal"; + private static final String METADATA_PREFIX = "/computeMetadata/v1/"; + private static final String METADATA_FLAVOR_HEADER = "Metadata-Flavor"; + private static final String METADATA_FLAVOR_VALUE = "Google"; + private static final Duration METADATA_TIMEOUT = Duration.ofMillis(300); + + private static final Pattern NVME_PARTITION_SUFFIX = Pattern.compile("p\\d+$"); + private static final Pattern GENERIC_PARTITION_SUFFIX = Pattern.compile("\\d+$"); + private static final Set NETWORK_FILESYSTEM_TYPES = Set.of("nfs", "nfs4", "efs", "cifs", "smbfs", "fuse.sshfs"); + + private GcpStorageLayoutUtil() { + } + + public enum StorageClass { + PD_STANDARD_HDD, + PD_THROUGHPUT_OPTIMIZED, + PD_BALANCED_SSD, + PD_SSD, + PD_EXTREME_SSD, + LOCAL_SSD, + LOCAL_NVME, + NETWORK_FILESYSTEM, + MEMORY_TMPFS, + PSEUDO_FILESYSTEM, + UNKNOWN + } + + public static final class StorageSnapshot { + private final boolean runningOnGcp; + private final String instanceId; + private final String machineType; + private final String zone; + private final Map mountsByMountPoint; + + public StorageSnapshot(boolean runningOnGcp, + String instanceId, + String machineType, + String zone, + Map mountsByMountPoint) { + this.runningOnGcp = runningOnGcp; + this.instanceId = instanceId; + this.machineType = machineType; + this.zone = zone; + this.mountsByMountPoint = Objects.requireNonNull(mountsByMountPoint, "mountsByMountPoint"); + } + + public boolean runningOnGcp() { + return runningOnGcp; + } + + public String instanceId() { + return instanceId; + } + + public String machineType() { + return machineType; + } + + public String zone() { + return zone; + } + + public Map mountsByMountPoint() { + return mountsByMountPoint; + } + } + + public static final class MountStorageInfo { + private final String mountPoint; + private final String source; + private final String filesystemType; + private final StorageClass storageClass; + private final String deviceName; + private final String diskKind; + private final String interfaceType; + + public MountStorageInfo(String mountPoint, + String source, + String filesystemType, + StorageClass storageClass, + String deviceName, + String diskKind, + String interfaceType) { + this.mountPoint = mountPoint; + this.source = source; + this.filesystemType = filesystemType; + this.storageClass = Objects.requireNonNull(storageClass, "storageClass"); + this.deviceName = deviceName; + this.diskKind = diskKind; + this.interfaceType = interfaceType; + } + + public String mountPoint() { + return mountPoint; + } + + public String source() { + return source; + } + + public String filesystemType() { + return filesystemType; + } + + public StorageClass storageClass() { + return storageClass; + } + + public String deviceName() { + return deviceName; + } + + public String diskKind() { + return diskKind; + } + + public String interfaceType() { + return interfaceType; + } + } + + public static StorageSnapshot inspectStorage() { + var identity = fetchGcpIdentity(); + var mounts = readMountEntries(); + var diskData = identity.map(GcpStorageLayoutUtil::fetchGcpDiskData).orElse(GcpDiskData.empty()); + + mounts.sort(Comparator.comparing(MountEntry::mountPoint)); + var byMountPoint = new LinkedHashMap(mounts.size()); + for (var mount : mounts) { + var diskResolution = resolveDisk(mount.source(), diskData); + var storageClass = classify(mount, diskResolution); + byMountPoint.put( + mount.mountPoint(), + new MountStorageInfo( + mount.mountPoint(), + mount.source(), + mount.filesystemType(), + storageClass, + diskResolution.deviceName(), + diskResolution.diskKind(), + diskResolution.interfaceType() + ) + ); + } + + return new StorageSnapshot( + identity.isPresent(), + identity.map(GcpIdentity::instanceId).orElse(null), + identity.map(GcpIdentity::machineType).orElse(null), + identity.map(GcpIdentity::zone).orElse(null), + Collections.unmodifiableMap(byMountPoint) + ); + } + + public static Map storageClassByMountPoint() { + var snapshot = inspectStorage(); + var byMountPoint = new LinkedHashMap(snapshot.mountsByMountPoint().size()); + for (var entry : snapshot.mountsByMountPoint().entrySet()) { + byMountPoint.put(entry.getKey(), entry.getValue().storageClass()); + } + return Collections.unmodifiableMap(byMountPoint); + } + + private static Optional fetchGcpIdentity() { + var client = HttpClient.newBuilder() + .connectTimeout(METADATA_TIMEOUT) + .build(); + + var instanceId = readMetadata(client, "instance/id"); + if (instanceId == null || instanceId.isBlank()) { + return Optional.empty(); + } + + var machineType = parseLeafResource(readMetadata(client, "instance/machine-type")); + var zone = parseLeafResource(readMetadata(client, "instance/zone")); + return Optional.of(new GcpIdentity(instanceId.trim(), machineType, zone)); + } + + private static GcpDiskData fetchGcpDiskData(GcpIdentity ignoredIdentity) { + var byDeviceName = fetchDisksByDeviceNameFromMetadata(); + var aliasesByNormalizedDevice = mapGoogleAliasesByNormalizedDevice(); + return new GcpDiskData(byDeviceName, aliasesByNormalizedDevice); + } + + private static Map fetchDisksByDeviceNameFromMetadata() { + var client = HttpClient.newBuilder() + .connectTimeout(METADATA_TIMEOUT) + .build(); + + var indexListing = readMetadata(client, "instance/disks/"); + if (indexListing == null || indexListing.isBlank()) { + return Map.of(); + } + + var byDeviceName = new LinkedHashMap(); + for (var rawLine : indexListing.split("\n")) { + var line = rawLine.trim(); + if (line.isEmpty()) { + continue; + } + var index = line.endsWith("/") ? line.substring(0, line.length() - 1) : line; + var deviceName = readMetadata(client, "instance/disks/" + index + "/device-name"); + if (deviceName == null || deviceName.isBlank()) { + continue; + } + + var diskKind = safeLower(readMetadata(client, "instance/disks/" + index + "/type")); + var interfaceType = safeUpper(readMetadata(client, "instance/disks/" + index + "/interface")); + var diskTypeHint = readMetadata(client, "instance/disks/" + index + "/disk-type"); + byDeviceName.put(deviceName.trim(), new GcpDiskInfo(deviceName.trim(), diskKind, interfaceType, safeLower(diskTypeHint))); + } + return byDeviceName; + } + + private static Map> mapGoogleAliasesByNormalizedDevice() { + var byIdDir = Path.of("/dev/disk/by-id"); + if (!Files.isDirectory(byIdDir)) { + return Map.of(); + } + + var aliasesByDevice = new LinkedHashMap>(); + try (Stream entries = Files.list(byIdDir)) { + entries.filter(Files::isSymbolicLink).forEach(link -> { + var alias = link.getFileName().toString(); + if (!alias.startsWith("google-")) { + return; + } + try { + var target = normalizeDevice(link.toRealPath().toString()); + aliasesByDevice.computeIfAbsent(target, unused -> new ArrayList<>()).add(alias); + } catch (IOException ignored) { + // continue + } + }); + } catch (IOException ignored) { + return Map.of(); + } + + for (var aliases : aliasesByDevice.values()) { + aliases.sort(String::compareTo); + } + return aliasesByDevice; + } + + private static DiskResolution resolveDisk(String mountSource, GcpDiskData diskData) { + if (mountSource == null || !mountSource.startsWith("/dev/")) { + return DiskResolution.empty(); + } + + var normalized = normalizeDevice(mountSource); + var aliases = diskData.aliasesByNormalizedDevice().getOrDefault(normalized, List.of()); + var primaryAlias = aliases.isEmpty() ? null : aliases.get(0); + var inferredDeviceName = primaryAlias == null ? null : stripGooglePrefix(primaryAlias); + GcpDiskInfo info = inferredDeviceName == null ? null : diskData.byDeviceName().get(inferredDeviceName); + + // Try all aliases in case the first one doesn't match a metadata device-name. + if (info == null) { + for (var alias : aliases) { + var candidate = stripGooglePrefix(alias); + if (candidate == null) { + continue; + } + info = diskData.byDeviceName().get(candidate); + if (info != null) { + inferredDeviceName = candidate; + break; + } + } + } + + var rotational = readRotationalFlag(normalized); + if (info == null) { + return new DiskResolution(normalized, inferredDeviceName, null, null, null, rotational); + } + return new DiskResolution( + normalized, + inferredDeviceName, + info.diskKind(), + info.interfaceType(), + info.diskTypeHint(), + rotational + ); + } + + private static StorageClass classify(MountEntry mount, DiskResolution diskResolution) { + var fsType = safeLower(mount.filesystemType()); + var source = mount.source(); + var sourceLower = safeLower(source); + + if ("tmpfs".equals(fsType)) { + return StorageClass.MEMORY_TMPFS; + } + if (NETWORK_FILESYSTEM_TYPES.contains(fsType)) { + return StorageClass.NETWORK_FILESYSTEM; + } + if (isPseudoFileSystem(fsType, sourceLower)) { + return StorageClass.PSEUDO_FILESYSTEM; + } + + if ("scratch".equals(diskResolution.diskKind())) { + if ("NVME".equals(diskResolution.interfaceType()) || sourceLower.contains("nvme")) { + return StorageClass.LOCAL_NVME; + } + return StorageClass.LOCAL_SSD; + } + if ("persistent".equals(diskResolution.diskKind())) { + return classifyPersistentDisk(diskResolution); + } + + // Best-effort fallback based on device name hints and local block characteristics. + var hints = safeLower(diskResolution.deviceName()) + " " + + safeLower(diskResolution.diskTypeHint()) + " " + + sourceLower; + if (hints.contains("local-ssd")) { + return sourceLower.contains("nvme") ? StorageClass.LOCAL_NVME : StorageClass.LOCAL_SSD; + } + if (source != null && source.startsWith("/dev/")) { + if (sourceLower.contains("nvme")) { + return StorageClass.LOCAL_NVME; + } + if (Boolean.TRUE.equals(diskResolution.rotational())) { + return StorageClass.PD_STANDARD_HDD; + } + return StorageClass.LOCAL_SSD; + } + return StorageClass.UNKNOWN; + } + + private static StorageClass classifyPersistentDisk(DiskResolution diskResolution) { + var hints = safeLower(diskResolution.deviceName()) + " " + safeLower(diskResolution.diskTypeHint()); + if (hints.contains("extreme")) { + return StorageClass.PD_EXTREME_SSD; + } + if (hints.contains("throughput")) { + return StorageClass.PD_THROUGHPUT_OPTIMIZED; + } + if (hints.contains("balanced")) { + return StorageClass.PD_BALANCED_SSD; + } + if (hints.contains("pd-ssd") || hints.contains("ssd")) { + return StorageClass.PD_SSD; + } + if (hints.contains("standard")) { + return StorageClass.PD_STANDARD_HDD; + } + + if (Boolean.TRUE.equals(diskResolution.rotational())) { + return StorageClass.PD_STANDARD_HDD; + } + return StorageClass.PD_BALANCED_SSD; + } + + private static String readMetadata(HttpClient client, String relativePath) { + var host = Optional.ofNullable(System.getenv(GCE_METADATA_HOST_ENV)).orElse(METADATA_HOST_DEFAULT); + var uri = URI.create("http://" + host + METADATA_PREFIX + relativePath); + try { + var request = HttpRequest.newBuilder(uri) + .timeout(METADATA_TIMEOUT) + .header(METADATA_FLAVOR_HEADER, METADATA_FLAVOR_VALUE) + .GET() + .build(); + var response = client.send(request, HttpResponse.BodyHandlers.ofString()); + if (response.statusCode() != 200) { + return null; + } + var flavorHeader = response.headers().firstValue(METADATA_FLAVOR_HEADER).orElse(""); + if (!METADATA_FLAVOR_VALUE.equalsIgnoreCase(flavorHeader)) { + return null; + } + return response.body(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return null; + } catch (IOException e) { + return null; + } + } + + private static String parseLeafResource(String value) { + if (value == null) { + return null; + } + var trimmed = value.trim(); + if (trimmed.isEmpty()) { + return null; + } + var idx = trimmed.lastIndexOf('/'); + if (idx < 0 || idx == trimmed.length() - 1) { + return trimmed; + } + return trimmed.substring(idx + 1); + } + + private static List readMountEntries() { + var mountsPath = Files.isReadable(Path.of("/proc/self/mounts")) + ? Path.of("/proc/self/mounts") + : Path.of("/proc/mounts"); + + if (!Files.isReadable(mountsPath)) { + return new ArrayList<>(); + } + + var entries = new ArrayList(); + try (Stream lines = Files.lines(mountsPath)) { + lines.forEach(line -> { + var parts = line.split(" "); + if (parts.length < 3) { + return; + } + var source = decodeMountToken(parts[0]); + var mountPoint = decodeMountToken(parts[1]); + var filesystemType = decodeMountToken(parts[2]); + entries.add(new MountEntry(source, mountPoint, filesystemType)); + }); + } catch (IOException ignored) { + return new ArrayList<>(); + } + return entries; + } + + private static Boolean readRotationalFlag(String normalizedDevice) { + if (normalizedDevice == null || !normalizedDevice.startsWith("/dev/")) { + return null; + } + var blockName = normalizedDevice.substring("/dev/".length()); + var rotaPath = Path.of("/sys/class/block", blockName, "queue", "rotational"); + if (!Files.isReadable(rotaPath)) { + return null; + } + try { + var value = Files.readString(rotaPath).trim(); + if ("1".equals(value)) { + return Boolean.TRUE; + } + if ("0".equals(value)) { + return Boolean.FALSE; + } + } catch (IOException ignored) { + return null; + } + return null; + } + + private static boolean isPseudoFileSystem(String fsType, String sourceLower) { + return fsType.equals("proc") + || fsType.equals("sysfs") + || fsType.equals("devpts") + || fsType.equals("devtmpfs") + || fsType.equals("cgroup") + || fsType.equals("cgroup2") + || fsType.equals("autofs") + || fsType.equals("mqueue") + || fsType.equals("tracefs") + || fsType.equals("pstore") + || fsType.equals("securityfs") + || fsType.equals("debugfs") + || fsType.equals("configfs") + || fsType.equals("fusectl") + || fsType.equals("binfmt_misc") + || fsType.equals("rpc_pipefs") + || sourceLower.equals("proc") + || sourceLower.equals("sysfs") + || sourceLower.equals("tmpfs"); + } + + private static String normalizeDevice(String device) { + if (device == null) { + return null; + } + if (!device.startsWith("/dev/")) { + return device; + } + if (device.startsWith("/dev/nvme")) { + return NVME_PARTITION_SUFFIX.matcher(device).replaceAll(""); + } + return GENERIC_PARTITION_SUFFIX.matcher(device).replaceAll(""); + } + + private static String decodeMountToken(String token) { + return token + .replace("\\040", " ") + .replace("\\011", "\t") + .replace("\\012", "\n") + .replace("\\134", "\\"); + } + + private static String stripGooglePrefix(String alias) { + if (alias == null || !alias.startsWith("google-") || alias.length() <= "google-".length()) { + return null; + } + return alias.substring("google-".length()); + } + + private static String safeLower(String value) { + return value == null ? "" : value.toLowerCase(Locale.ROOT); + } + + private static String safeUpper(String value) { + return value == null ? null : value.trim().toUpperCase(Locale.ROOT); + } + + private static final class MountEntry { + private final String source; + private final String mountPoint; + private final String filesystemType; + + private MountEntry(String source, String mountPoint, String filesystemType) { + this.source = source; + this.mountPoint = mountPoint; + this.filesystemType = filesystemType; + } + + private String source() { + return source; + } + + private String mountPoint() { + return mountPoint; + } + + private String filesystemType() { + return filesystemType; + } + } + + private static final class GcpIdentity { + private final String instanceId; + private final String machineType; + private final String zone; + + private GcpIdentity(String instanceId, String machineType, String zone) { + this.instanceId = instanceId; + this.machineType = machineType; + this.zone = zone; + } + + private String instanceId() { + return instanceId; + } + + private String machineType() { + return machineType; + } + + private String zone() { + return zone; + } + } + + private static final class GcpDiskInfo { + private final String deviceName; + private final String diskKind; + private final String interfaceType; + private final String diskTypeHint; + + private GcpDiskInfo(String deviceName, String diskKind, String interfaceType, String diskTypeHint) { + this.deviceName = deviceName; + this.diskKind = diskKind; + this.interfaceType = interfaceType; + this.diskTypeHint = diskTypeHint; + } + + private String deviceName() { + return deviceName; + } + + private String diskKind() { + return diskKind; + } + + private String interfaceType() { + return interfaceType; + } + + private String diskTypeHint() { + return diskTypeHint; + } + } + + private static final class GcpDiskData { + private final Map byDeviceName; + private final Map> aliasesByNormalizedDevice; + + private GcpDiskData(Map byDeviceName, Map> aliasesByNormalizedDevice) { + this.byDeviceName = Objects.requireNonNull(byDeviceName, "byDeviceName"); + this.aliasesByNormalizedDevice = Objects.requireNonNull(aliasesByNormalizedDevice, "aliasesByNormalizedDevice"); + } + + private Map byDeviceName() { + return byDeviceName; + } + + private Map> aliasesByNormalizedDevice() { + return aliasesByNormalizedDevice; + } + + private static GcpDiskData empty() { + return new GcpDiskData(Map.of(), Map.of()); + } + } + + private static final class DiskResolution { + private final String normalizedDevice; + private final String deviceName; + private final String diskKind; + private final String interfaceType; + private final String diskTypeHint; + private final Boolean rotational; + + private DiskResolution(String normalizedDevice, + String deviceName, + String diskKind, + String interfaceType, + String diskTypeHint, + Boolean rotational) { + this.normalizedDevice = normalizedDevice; + this.deviceName = deviceName; + this.diskKind = diskKind; + this.interfaceType = interfaceType; + this.diskTypeHint = diskTypeHint; + this.rotational = rotational; + } + + private static DiskResolution empty() { + return new DiskResolution(null, null, null, null, null, null); + } + + private String normalizedDevice() { + return normalizedDevice; + } + + private String deviceName() { + return deviceName; + } + + private String diskKind() { + return diskKind; + } + + private String interfaceType() { + return interfaceType; + } + + private String diskTypeHint() { + return diskTypeHint; + } + + private Boolean rotational() { + return rotational; + } + } +} diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/LocalStorageLayoutUtil.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/LocalStorageLayoutUtil.java new file mode 100644 index 000000000..23dad5a18 --- /dev/null +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/LocalStorageLayoutUtil.java @@ -0,0 +1,524 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.github.jbellis.jvector.example.util.storage; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.regex.Pattern; +import java.util.stream.Stream; + +/** + * Best-effort storage inspection utility for non-cloud environments. + * Supports Linux, macOS, and Windows using local OS signals and common mount metadata. + */ +public final class LocalStorageLayoutUtil { + private static final Pattern LINUX_NVME_PARTITION_SUFFIX = Pattern.compile("p\\d+$"); + private static final Pattern GENERIC_PARTITION_SUFFIX = Pattern.compile("\\d+$"); + private static final Pattern MAC_MOUNT_PATTERN = Pattern.compile("^(.+) on (.+) \\((.+)\\)$"); + private static final Pattern MAC_DISK_SLICE_SUFFIX = Pattern.compile("s\\d+$"); + private static final Set NETWORK_FILESYSTEM_TYPES = + Set.of("nfs", "nfs4", "efs", "cifs", "smbfs", "fuse.sshfs", "afpfs", "webdav", "davfs"); + + private LocalStorageLayoutUtil() { + } + + public enum StorageClass { + LOCAL_HDD, + LOCAL_SSD, + LOCAL_NVME, + NETWORK_FILESYSTEM, + MEMORY_TMPFS, + PSEUDO_FILESYSTEM, + UNKNOWN + } + + public static final class StorageSnapshot { + private final String osName; + private final Map mountsByMountPoint; + + public StorageSnapshot(String osName, Map mountsByMountPoint) { + this.osName = osName; + this.mountsByMountPoint = Objects.requireNonNull(mountsByMountPoint, "mountsByMountPoint"); + } + + public String osName() { + return osName; + } + + public Map mountsByMountPoint() { + return mountsByMountPoint; + } + } + + public static final class MountStorageInfo { + private final String mountPoint; + private final String source; + private final String filesystemType; + private final StorageClass storageClass; + private final String osHint; + + public MountStorageInfo(String mountPoint, + String source, + String filesystemType, + StorageClass storageClass, + String osHint) { + this.mountPoint = mountPoint; + this.source = source; + this.filesystemType = filesystemType; + this.storageClass = Objects.requireNonNull(storageClass, "storageClass"); + this.osHint = osHint; + } + + public String mountPoint() { + return mountPoint; + } + + public String source() { + return source; + } + + public String filesystemType() { + return filesystemType; + } + + public StorageClass storageClass() { + return storageClass; + } + + public String osHint() { + return osHint; + } + } + + public static StorageSnapshot inspectStorage() { + var os = safeLower(System.getProperty("os.name")); + List mounts; + if (isLinux(os)) { + mounts = readLinuxMountEntries(); + } else if (isMac(os)) { + mounts = readMacMountEntries(); + } else if (isWindows(os)) { + mounts = readWindowsMountEntries(); + } else { + mounts = readGenericMountEntries(); + } + + mounts.sort(Comparator.comparing(MountEntry::mountPoint)); + var byMountPoint = new LinkedHashMap(mounts.size()); + for (var mount : mounts) { + StorageClass storageClass; + String osHint; + if (isLinux(os)) { + storageClass = classifyLinux(mount); + osHint = "linux"; + } else if (isMac(os)) { + storageClass = classifyMac(mount); + osHint = "macos"; + } else if (isWindows(os)) { + storageClass = classifyWindows(mount); + osHint = "windows"; + } else { + storageClass = classifyGeneric(mount); + osHint = "generic"; + } + + byMountPoint.put( + mount.mountPoint(), + new MountStorageInfo( + mount.mountPoint(), + mount.source(), + mount.filesystemType(), + storageClass, + osHint + ) + ); + } + + return new StorageSnapshot( + System.getProperty("os.name"), + Collections.unmodifiableMap(byMountPoint) + ); + } + + public static Map storageClassByMountPoint() { + var snapshot = inspectStorage(); + var byMountPoint = new LinkedHashMap(snapshot.mountsByMountPoint().size()); + for (var entry : snapshot.mountsByMountPoint().entrySet()) { + byMountPoint.put(entry.getKey(), entry.getValue().storageClass()); + } + return Collections.unmodifiableMap(byMountPoint); + } + + private static List readLinuxMountEntries() { + var mountsPath = Files.isReadable(Path.of("/proc/self/mounts")) + ? Path.of("/proc/self/mounts") + : Path.of("/proc/mounts"); + if (!Files.isReadable(mountsPath)) { + return new ArrayList<>(); + } + + var entries = new ArrayList(); + try (Stream lines = Files.lines(mountsPath)) { + lines.forEach(line -> { + var parts = line.split(" "); + if (parts.length < 3) { + return; + } + entries.add(new MountEntry( + decodeMountToken(parts[0]), + decodeMountToken(parts[1]), + decodeMountToken(parts[2]) + )); + }); + } catch (IOException ignored) { + return new ArrayList<>(); + } + return entries; + } + + private static List readMacMountEntries() { + var entries = new ArrayList(); + for (String line : runCommandLines("mount")) { + var matcher = MAC_MOUNT_PATTERN.matcher(line); + if (!matcher.matches()) { + continue; + } + var source = matcher.group(1).trim(); + var mountPoint = matcher.group(2).trim(); + var options = matcher.group(3).trim(); + var fsType = options.split(",")[0].trim(); + entries.add(new MountEntry(source, mountPoint, fsType)); + } + if (entries.isEmpty()) { + return readGenericMountEntries(); + } + return entries; + } + + private static List readWindowsMountEntries() { + var entries = new ArrayList(); + var roots = File.listRoots(); + if (roots == null) { + return entries; + } + for (var root : roots) { + if (root == null) { + continue; + } + var path = root.toPath(); + String fsType = "unknown"; + try { + fsType = Files.getFileStore(path).type(); + } catch (IOException ignored) { + // keep default + } + entries.add(new MountEntry(root.getPath(), root.getPath(), fsType)); + } + return entries; + } + + private static List readGenericMountEntries() { + var entries = new ArrayList(); + var roots = File.listRoots(); + if (roots == null) { + return entries; + } + for (var root : roots) { + if (root == null) { + continue; + } + String fsType = "unknown"; + try { + fsType = Files.getFileStore(root.toPath()).type(); + } catch (IOException ignored) { + // keep default + } + entries.add(new MountEntry(root.getPath(), root.getPath(), fsType)); + } + return entries; + } + + private static StorageClass classifyLinux(MountEntry mount) { + var fsType = safeLower(mount.filesystemType()); + var source = mount.source(); + var sourceLower = safeLower(source); + + if ("tmpfs".equals(fsType) || "ramfs".equals(fsType)) { + return StorageClass.MEMORY_TMPFS; + } + if (NETWORK_FILESYSTEM_TYPES.contains(fsType) || sourceLower.startsWith("//")) { + return StorageClass.NETWORK_FILESYSTEM; + } + if (isPseudoFileSystem(fsType, sourceLower)) { + return StorageClass.PSEUDO_FILESYSTEM; + } + + if (source != null && source.startsWith("/dev/")) { + var normalized = normalizeLinuxDevice(sourceLower); + if (normalized.contains("nvme")) { + return StorageClass.LOCAL_NVME; + } + + Boolean rotational = readLinuxRotationalFlag(normalized); + if (Boolean.TRUE.equals(rotational)) { + return StorageClass.LOCAL_HDD; + } + if (Boolean.FALSE.equals(rotational)) { + return StorageClass.LOCAL_SSD; + } + return StorageClass.UNKNOWN; + } + return StorageClass.UNKNOWN; + } + + private static StorageClass classifyMac(MountEntry mount) { + var fsType = safeLower(mount.filesystemType()); + var source = mount.source(); + var sourceLower = safeLower(source); + + if ("devfs".equals(fsType) || "autofs".equals(fsType) || "procfs".equals(fsType)) { + return StorageClass.PSEUDO_FILESYSTEM; + } + if ("tmpfs".equals(fsType) || "ramfs".equals(fsType)) { + return StorageClass.MEMORY_TMPFS; + } + if (NETWORK_FILESYSTEM_TYPES.contains(fsType) || sourceLower.startsWith("//")) { + return StorageClass.NETWORK_FILESYSTEM; + } + + if (source != null && source.startsWith("/dev/")) { + var diskInfo = readMacDiskInfo(source); + if (diskInfo.protocolNvme) { + return StorageClass.LOCAL_NVME; + } + if (diskInfo.solidState != null) { + return diskInfo.solidState ? StorageClass.LOCAL_SSD : StorageClass.LOCAL_HDD; + } + if (sourceLower.contains("nvme")) { + return StorageClass.LOCAL_NVME; + } + return StorageClass.UNKNOWN; + } + return StorageClass.UNKNOWN; + } + + private static StorageClass classifyWindows(MountEntry mount) { + var fsType = safeLower(mount.filesystemType()); + var source = mount.source(); + var sourceLower = safeLower(source); + + if (NETWORK_FILESYSTEM_TYPES.contains(fsType) + || fsType.contains("smb") + || fsType.contains("cifs") + || sourceLower.startsWith("\\\\")) { + return StorageClass.NETWORK_FILESYSTEM; + } + if (fsType.contains("tmp") || fsType.contains("ram")) { + return StorageClass.MEMORY_TMPFS; + } + + // Generic stub: fixed drives are treated as local SSD class when media specifics are unavailable. + if (source != null && source.matches("^[A-Za-z]:\\\\.*")) { + return StorageClass.LOCAL_SSD; + } + return StorageClass.UNKNOWN; + } + + private static StorageClass classifyGeneric(MountEntry mount) { + var fsType = safeLower(mount.filesystemType()); + if ("tmpfs".equals(fsType) || "ramfs".equals(fsType)) { + return StorageClass.MEMORY_TMPFS; + } + if (NETWORK_FILESYSTEM_TYPES.contains(fsType)) { + return StorageClass.NETWORK_FILESYSTEM; + } + return StorageClass.UNKNOWN; + } + + private static boolean isPseudoFileSystem(String fsType, String sourceLower) { + return fsType.equals("proc") + || fsType.equals("sysfs") + || fsType.equals("devpts") + || fsType.equals("devtmpfs") + || fsType.equals("cgroup") + || fsType.equals("cgroup2") + || fsType.equals("autofs") + || fsType.equals("mqueue") + || fsType.equals("tracefs") + || fsType.equals("pstore") + || fsType.equals("securityfs") + || fsType.equals("debugfs") + || fsType.equals("configfs") + || fsType.equals("fusectl") + || fsType.equals("binfmt_misc") + || fsType.equals("rpc_pipefs") + || sourceLower.equals("proc") + || sourceLower.equals("sysfs") + || sourceLower.equals("tmpfs"); + } + + private static String normalizeLinuxDevice(String device) { + if (!device.startsWith("/dev/")) { + return device; + } + if (device.startsWith("/dev/nvme")) { + return LINUX_NVME_PARTITION_SUFFIX.matcher(device).replaceAll(""); + } + return GENERIC_PARTITION_SUFFIX.matcher(device).replaceAll(""); + } + + private static Boolean readLinuxRotationalFlag(String normalizedDevice) { + if (normalizedDevice == null || !normalizedDevice.startsWith("/dev/")) { + return null; + } + var blockName = normalizedDevice.substring("/dev/".length()); + var rotaPath = Path.of("/sys/class/block", blockName, "queue", "rotational"); + if (!Files.isReadable(rotaPath)) { + return null; + } + try { + var value = Files.readString(rotaPath).trim(); + if ("1".equals(value)) { + return Boolean.TRUE; + } + if ("0".equals(value)) { + return Boolean.FALSE; + } + } catch (IOException ignored) { + return null; + } + return null; + } + + private static MacDiskInfo readMacDiskInfo(String sourceDevice) { + var base = sourceDevice; + var slash = sourceDevice.lastIndexOf('/'); + if (slash >= 0 && slash + 1 < sourceDevice.length()) { + var leaf = sourceDevice.substring(slash + 1); + if (leaf.startsWith("disk")) { + leaf = MAC_DISK_SLICE_SUFFIX.matcher(leaf).replaceAll(""); + base = "/dev/" + leaf; + } + } + + Boolean solidState = null; + boolean protocolNvme = false; + for (String line : runCommandLines("diskutil", "info", base)) { + var trimmed = line.trim(); + var lower = safeLower(trimmed); + if (lower.startsWith("solid state:")) { + solidState = lower.endsWith("yes"); + } else if (lower.startsWith("protocol:")) { + protocolNvme = lower.contains("nvme"); + } else if (lower.startsWith("device / media name:") && lower.contains("nvme")) { + protocolNvme = true; + } + } + return new MacDiskInfo(solidState, protocolNvme); + } + + private static List runCommandLines(String... command) { + var lines = new ArrayList(); + var pb = new ProcessBuilder(command); + pb.redirectErrorStream(true); + try { + var process = pb.start(); + try (var reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) { + String line; + while ((line = reader.readLine()) != null) { + lines.add(line); + } + } + process.waitFor(); + } catch (IOException | InterruptedException ignored) { + if (ignored instanceof InterruptedException) { + Thread.currentThread().interrupt(); + } + } + return lines; + } + + private static String decodeMountToken(String token) { + return token + .replace("\\040", " ") + .replace("\\011", "\t") + .replace("\\012", "\n") + .replace("\\134", "\\"); + } + + private static boolean isLinux(String osNameLower) { + return osNameLower.contains("linux"); + } + + private static boolean isMac(String osNameLower) { + return osNameLower.contains("mac") || osNameLower.contains("darwin"); + } + + private static boolean isWindows(String osNameLower) { + return osNameLower.contains("win"); + } + + private static String safeLower(String value) { + return value == null ? "" : value.toLowerCase(Locale.ROOT); + } + + private static final class MountEntry { + private final String source; + private final String mountPoint; + private final String filesystemType; + + private MountEntry(String source, String mountPoint, String filesystemType) { + this.source = source; + this.mountPoint = mountPoint; + this.filesystemType = filesystemType; + } + + private String source() { + return source; + } + + private String mountPoint() { + return mountPoint; + } + + private String filesystemType() { + return filesystemType; + } + } + + private static final class MacDiskInfo { + private final Boolean solidState; + private final boolean protocolNvme; + + private MacDiskInfo(Boolean solidState, boolean protocolNvme) { + this.solidState = solidState; + this.protocolNvme = protocolNvme; + } + } +} diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/StorageLayoutUtil.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/StorageLayoutUtil.java new file mode 100644 index 000000000..bfbaed234 --- /dev/null +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/StorageLayoutUtil.java @@ -0,0 +1,584 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.github.jbellis.jvector.example.util.storage; + +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.ec2.Ec2Client; +import software.amazon.awssdk.services.ec2.model.DescribeInstancesRequest; +import software.amazon.awssdk.services.ec2.model.DescribeVolumesRequest; +import software.amazon.awssdk.services.ec2.model.InstanceBlockDeviceMapping; +import software.amazon.awssdk.services.ec2.model.Volume; + +import java.io.IOException; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.regex.Pattern; +import java.util.stream.Stream; + +/** + * Detects EC2 runtime context via IMDSv2 and classifies storage for each mounted filesystem. + */ +public final class StorageLayoutUtil { + private static final String AWS_EC2_METADATA_DISABLED = "AWS_EC2_METADATA_DISABLED"; + private static final URI IMDS_TOKEN_URI = URI.create("http://169.254.169.254/latest/api/token"); + private static final URI IMDS_IDENTITY_URI = URI.create("http://169.254.169.254/latest/dynamic/instance-identity/document"); + private static final Duration IMDS_TIMEOUT = Duration.ofMillis(300); + private static final String IMDS_TOKEN_HEADER = "X-aws-ec2-metadata-token"; + private static final String IMDS_TOKEN_TTL_HEADER = "X-aws-ec2-metadata-token-ttl-seconds"; + + private static final Pattern JSON_FIELD_PATTERN = Pattern.compile("\"([^\"]+)\"\\s*:\\s*\"([^\"]+)\""); + private static final Pattern VOL_ID_PATTERN = Pattern.compile("vol-?[0-9a-fA-F]+"); + private static final Pattern NVME_PARTITION_SUFFIX = Pattern.compile("p\\d+$"); + private static final Pattern GENERIC_PARTITION_SUFFIX = Pattern.compile("\\d+$"); + private static final Set NETWORK_FILESYSTEM_TYPES = Set.of("nfs", "nfs4", "efs", "cifs", "smbfs", "fuse.sshfs"); + + private StorageLayoutUtil() { + } + + public enum StorageClass { + // Slowest EBS tiers + EBS_COLD_HDD, + EBS_THROUGHPUT_HDD, + EBS_MAGNETIC, + + // Faster EBS SSD tiers + EBS_GP2, + EBS_GP3, + EBS_PROVISIONED_IOPS_SSD, + + // Local instance storage + INSTANCE_STORE_SSD, + INSTANCE_STORE_NVME, + + // Non-block storage + NETWORK_FILESYSTEM, + MEMORY_TMPFS, + PSEUDO_FILESYSTEM, + UNKNOWN + } + + public static final class StorageSnapshot { + private final boolean runningOnEc2; + private final String instanceId; + private final String instanceType; + private final String region; + private final Map mountsByMountPoint; + + public StorageSnapshot(boolean runningOnEc2, + String instanceId, + String instanceType, + String region, + Map mountsByMountPoint) { + this.runningOnEc2 = runningOnEc2; + this.instanceId = instanceId; + this.instanceType = instanceType; + this.region = region; + this.mountsByMountPoint = Objects.requireNonNull(mountsByMountPoint, "mountsByMountPoint"); + } + + public boolean runningOnEc2() { + return runningOnEc2; + } + + public String instanceId() { + return instanceId; + } + + public String instanceType() { + return instanceType; + } + + public String region() { + return region; + } + + public Map mountsByMountPoint() { + return mountsByMountPoint; + } + } + + public static final class MountStorageInfo { + private final String mountPoint; + private final String source; + private final String filesystemType; + private final StorageClass storageClass; + private final String volumeId; + private final String volumeType; + + public MountStorageInfo(String mountPoint, + String source, + String filesystemType, + StorageClass storageClass, + String volumeId, + String volumeType) { + this.mountPoint = mountPoint; + this.source = source; + this.filesystemType = filesystemType; + this.storageClass = Objects.requireNonNull(storageClass, "storageClass"); + this.volumeId = volumeId; + this.volumeType = volumeType; + } + + public String mountPoint() { + return mountPoint; + } + + public String source() { + return source; + } + + public String filesystemType() { + return filesystemType; + } + + public StorageClass storageClass() { + return storageClass; + } + + public String volumeId() { + return volumeId; + } + + public String volumeType() { + return volumeType; + } + } + + public static StorageSnapshot inspectStorage() { + var identity = fetchEc2Identity(); + var mounts = readMountEntries(); + var ec2Data = identity.map(StorageLayoutUtil::fetchEc2VolumeData).orElse(Ec2VolumeData.empty()); + + mounts.sort(Comparator.comparing(MountEntry::mountPoint)); + var byMountPoint = new LinkedHashMap(mounts.size()); + for (var mount : mounts) { + var resolvedVolumeId = resolveVolumeId(mount.source(), ec2Data); + var volumeType = resolvedVolumeId == null ? null : ec2Data.volumeTypeById().get(resolvedVolumeId); + var storageClass = classify(mount, resolvedVolumeId, volumeType); + byMountPoint.put( + mount.mountPoint(), + new MountStorageInfo( + mount.mountPoint(), + mount.source(), + mount.filesystemType(), + storageClass, + resolvedVolumeId, + volumeType + ) + ); + } + + return new StorageSnapshot( + identity.isPresent(), + identity.map(Ec2Identity::instanceId).orElse(null), + identity.map(Ec2Identity::instanceType).orElse(null), + identity.map(Ec2Identity::region).orElse(null), + Collections.unmodifiableMap(byMountPoint) + ); + } + + public static Map storageClassByMountPoint() { + var snapshot = inspectStorage(); + var byMountPoint = new LinkedHashMap(snapshot.mountsByMountPoint().size()); + for (var entry : snapshot.mountsByMountPoint().entrySet()) { + byMountPoint.put(entry.getKey(), entry.getValue().storageClass()); + } + return Collections.unmodifiableMap(byMountPoint); + } + + private static Optional fetchEc2Identity() { + var imdsDisabled = System.getenv(AWS_EC2_METADATA_DISABLED); + if (imdsDisabled != null && "true".equalsIgnoreCase(imdsDisabled)) { + return Optional.empty(); + } + + var client = HttpClient.newBuilder() + .connectTimeout(IMDS_TIMEOUT) + .build(); + try { + var tokenRequest = HttpRequest.newBuilder(IMDS_TOKEN_URI) + .timeout(IMDS_TIMEOUT) + .header(IMDS_TOKEN_TTL_HEADER, "60") + .method("PUT", HttpRequest.BodyPublishers.noBody()) + .build(); + var tokenResponse = client.send(tokenRequest, HttpResponse.BodyHandlers.ofString()); + if (tokenResponse.statusCode() != 200) { + return Optional.empty(); + } + + var token = tokenResponse.body(); + if (token == null || token.isBlank()) { + return Optional.empty(); + } + + var identityRequest = HttpRequest.newBuilder(IMDS_IDENTITY_URI) + .timeout(IMDS_TIMEOUT) + .header(IMDS_TOKEN_HEADER, token) + .GET() + .build(); + var identityResponse = client.send(identityRequest, HttpResponse.BodyHandlers.ofString()); + if (identityResponse.statusCode() != 200) { + return Optional.empty(); + } + + return parseIdentity(identityResponse.body()); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return Optional.empty(); + } catch (IOException e) { + return Optional.empty(); + } + } + + private static Optional parseIdentity(String json) { + if (json == null || json.isBlank()) { + return Optional.empty(); + } + var values = new LinkedHashMap(); + var matcher = JSON_FIELD_PATTERN.matcher(json); + while (matcher.find()) { + values.put(matcher.group(1), matcher.group(2)); + } + + var instanceId = values.get("instanceId"); + var instanceType = values.get("instanceType"); + var region = values.get("region"); + if (instanceId == null || instanceType == null || region == null) { + return Optional.empty(); + } + return Optional.of(new Ec2Identity(instanceId, instanceType, region)); + } + + private static Ec2VolumeData fetchEc2VolumeData(Ec2Identity identity) { + var deviceNameToVolumeId = new LinkedHashMap(); + var volumeTypeById = new LinkedHashMap(); + var nvmeDeviceToVolumeId = mapNvmeDevicesToVolumeIds(); + + try (var ec2 = Ec2Client.builder().region(Region.of(identity.region())).build()) { + var instanceRequest = DescribeInstancesRequest.builder() + .instanceIds(identity.instanceId()) + .build(); + var instanceResponse = ec2.describeInstances(instanceRequest); + var reservations = instanceResponse.reservations(); + if (reservations != null) { + for (var reservation : reservations) { + for (var instance : reservation.instances()) { + for (InstanceBlockDeviceMapping mapping : instance.blockDeviceMappings()) { + if (mapping.ebs() == null || mapping.ebs().volumeId() == null || mapping.deviceName() == null) { + continue; + } + deviceNameToVolumeId.put(normalizeDevice(mapping.deviceName()), mapping.ebs().volumeId()); + } + } + } + } + + if (!deviceNameToVolumeId.isEmpty()) { + var volumeResponse = ec2.describeVolumes(DescribeVolumesRequest.builder() + .volumeIds(deviceNameToVolumeId.values()) + .build()); + for (Volume volume : volumeResponse.volumes()) { + if (volume.volumeId() != null && volume.volumeType() != null) { + volumeTypeById.put(volume.volumeId(), volume.volumeTypeAsString()); + } + } + } + } catch (RuntimeException ignored) { + // If IAM permissions or service calls fail, we still return mount classifications. + } + + return new Ec2VolumeData(deviceNameToVolumeId, nvmeDeviceToVolumeId, volumeTypeById); + } + + private static List readMountEntries() { + var mountsPath = Files.isReadable(Path.of("/proc/self/mounts")) + ? Path.of("/proc/self/mounts") + : Path.of("/proc/mounts"); + + if (!Files.isReadable(mountsPath)) { + return new ArrayList<>(); + } + + var entries = new ArrayList(); + try (Stream lines = Files.lines(mountsPath)) { + lines.forEach(line -> { + var parts = line.split(" "); + if (parts.length < 3) { + return; + } + var source = decodeMountToken(parts[0]); + var mountPoint = decodeMountToken(parts[1]); + var filesystemType = decodeMountToken(parts[2]); + entries.add(new MountEntry(source, mountPoint, filesystemType)); + }); + } catch (IOException ignored) { + return new ArrayList<>(); + } + return entries; + } + + private static Map mapNvmeDevicesToVolumeIds() { + var byIdDir = Path.of("/dev/disk/by-id"); + if (!Files.isDirectory(byIdDir)) { + return Map.of(); + } + + var mapping = new LinkedHashMap(); + try (Stream entries = Files.list(byIdDir)) { + entries.filter(Files::isSymbolicLink).forEach(link -> { + var name = link.getFileName().toString(); + if (!name.startsWith("nvme-Amazon_Elastic_Block_Store_")) { + return; + } + var volumeId = extractVolumeId(name); + if (volumeId == null) { + return; + } + + try { + var target = normalizeDevice(link.toRealPath().toString()); + mapping.put(target, volumeId); + } catch (IOException ignored) { + // continue + } + }); + } catch (IOException ignored) { + return Map.of(); + } + return mapping; + } + + private static String resolveVolumeId(String mountSource, Ec2VolumeData ec2Data) { + if (mountSource == null || !mountSource.startsWith("/dev/")) { + return null; + } + + var normalized = normalizeDevice(mountSource); + var byNvme = ec2Data.nvmeDeviceToVolumeId().get(normalized); + if (byNvme != null) { + return byNvme; + } + return ec2Data.deviceNameToVolumeId().get(normalized); + } + + private static StorageClass classify(MountEntry mount, String volumeId, String volumeType) { + var fsType = safeLower(mount.filesystemType()); + var source = mount.source(); + var sourceLower = safeLower(source); + + if ("tmpfs".equals(fsType)) { + return StorageClass.MEMORY_TMPFS; + } + if (NETWORK_FILESYSTEM_TYPES.contains(fsType)) { + return StorageClass.NETWORK_FILESYSTEM; + } + if (isPseudoFileSystem(fsType, sourceLower)) { + return StorageClass.PSEUDO_FILESYSTEM; + } + + if (volumeId != null) { + return mapEbsVolumeType(volumeType); + } + + if (source != null && source.startsWith("/dev/")) { + if (sourceLower.contains("nvme")) { + return StorageClass.INSTANCE_STORE_NVME; + } + return StorageClass.INSTANCE_STORE_SSD; + } + return StorageClass.UNKNOWN; + } + + private static StorageClass mapEbsVolumeType(String volumeType) { + if (volumeType == null) { + return StorageClass.EBS_GP3; + } + + switch (safeLower(volumeType)) { + case "sc1": + return StorageClass.EBS_COLD_HDD; + case "st1": + return StorageClass.EBS_THROUGHPUT_HDD; + case "standard": + return StorageClass.EBS_MAGNETIC; + case "io1": + case "io2": + return StorageClass.EBS_PROVISIONED_IOPS_SSD; + case "gp2": + return StorageClass.EBS_GP2; + case "gp3": + return StorageClass.EBS_GP3; + default: + return StorageClass.EBS_GP3; + } + } + + private static boolean isPseudoFileSystem(String fsType, String sourceLower) { + return fsType.equals("proc") + || fsType.equals("sysfs") + || fsType.equals("devpts") + || fsType.equals("devtmpfs") + || fsType.equals("cgroup") + || fsType.equals("cgroup2") + || fsType.equals("autofs") + || fsType.equals("mqueue") + || fsType.equals("tracefs") + || fsType.equals("pstore") + || fsType.equals("securityfs") + || fsType.equals("debugfs") + || fsType.equals("configfs") + || fsType.equals("fusectl") + || fsType.equals("binfmt_misc") + || fsType.equals("rpc_pipefs") + || sourceLower.equals("proc") + || sourceLower.equals("sysfs") + || sourceLower.equals("tmpfs"); + } + + private static String decodeMountToken(String token) { + return token + .replace("\\040", " ") + .replace("\\011", "\t") + .replace("\\012", "\n") + .replace("\\134", "\\"); + } + + private static String extractVolumeId(String value) { + var matcher = VOL_ID_PATTERN.matcher(value); + if (!matcher.find()) { + return null; + } + var raw = matcher.group(); + if (raw.startsWith("vol-")) { + return raw.toLowerCase(Locale.ROOT); + } + return "vol-" + raw.substring(3).toLowerCase(Locale.ROOT); + } + + private static String normalizeDevice(String device) { + if (device == null) { + return null; + } + if (!device.startsWith("/dev/")) { + return device; + } + + if (device.startsWith("/dev/nvme")) { + return NVME_PARTITION_SUFFIX.matcher(device).replaceAll(""); + } + return GENERIC_PARTITION_SUFFIX.matcher(device).replaceAll(""); + } + + private static String safeLower(String value) { + return value == null ? "" : value.toLowerCase(Locale.ROOT); + } + + private static final class MountEntry { + private final String source; + private final String mountPoint; + private final String filesystemType; + + private MountEntry(String source, String mountPoint, String filesystemType) { + this.source = source; + this.mountPoint = mountPoint; + this.filesystemType = filesystemType; + } + + private String source() { + return source; + } + + private String mountPoint() { + return mountPoint; + } + + private String filesystemType() { + return filesystemType; + } + } + + private static final class Ec2Identity { + private final String instanceId; + private final String instanceType; + private final String region; + + private Ec2Identity(String instanceId, String instanceType, String region) { + this.instanceId = instanceId; + this.instanceType = instanceType; + this.region = region; + } + + private String instanceId() { + return instanceId; + } + + private String instanceType() { + return instanceType; + } + + private String region() { + return region; + } + } + + private static final class Ec2VolumeData { + private final Map deviceNameToVolumeId; + private final Map nvmeDeviceToVolumeId; + private final Map volumeTypeById; + + private Ec2VolumeData(Map deviceNameToVolumeId, + Map nvmeDeviceToVolumeId, + Map volumeTypeById) { + Objects.requireNonNull(deviceNameToVolumeId, "deviceNameToVolumeId"); + Objects.requireNonNull(nvmeDeviceToVolumeId, "nvmeDeviceToVolumeId"); + Objects.requireNonNull(volumeTypeById, "volumeTypeById"); + this.deviceNameToVolumeId = deviceNameToVolumeId; + this.nvmeDeviceToVolumeId = nvmeDeviceToVolumeId; + this.volumeTypeById = volumeTypeById; + } + + private Map deviceNameToVolumeId() { + return deviceNameToVolumeId; + } + + private Map nvmeDeviceToVolumeId() { + return nvmeDeviceToVolumeId; + } + + private Map volumeTypeById() { + return volumeTypeById; + } + + private static Ec2VolumeData empty() { + return new Ec2VolumeData(Map.of(), Map.of(), Map.of()); + } + } +} diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/package-info.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/package-info.java new file mode 100644 index 000000000..a553c8b23 --- /dev/null +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/storage/package-info.java @@ -0,0 +1,28 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Provides utilities for characterizing the underlying storage hardware and layout. + *

+ * This package contains logic to detect and classify storage tiers (e.g., Local SSD, + * Persistent Disk, Network Filesystem) across different environments including + * AWS, GCP, and local development machines. + *

+ * The primary entry point is {@link io.github.jbellis.jvector.example.util.storage.CloudStorageLayoutUtil}, + * which provides a unified view of the system's mount points and their corresponding + * {@link io.github.jbellis.jvector.example.util.storage.CloudStorageLayoutUtil.StorageClass}. + */ +package io.github.jbellis.jvector.example.util.storage; diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/yaml/TestDataPartition.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/yaml/TestDataPartition.java new file mode 100644 index 000000000..b7592f042 --- /dev/null +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/yaml/TestDataPartition.java @@ -0,0 +1,84 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.jbellis.jvector.example.yaml; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * Specifically for defining how data is partitioned for testing compaction. + */ +public class TestDataPartition { + public List numSplits; + public List splitDistribution; + + public TestDataPartition() { + this.numSplits = Collections.singletonList(1); + this.splitDistribution = Collections.singletonList(Distribution.UNIFORM); + } + + public TestDataPartition(int numSplits) { + this.numSplits = Collections.singletonList(numSplits); + this.splitDistribution = Collections.singletonList(Distribution.UNIFORM); + } + + public enum Distribution { + UNIFORM, + FIBONACCI, + LOG2N; + + public List computeSplitSizes(int total, int numSplits) { + int[] weights = new int[numSplits]; + switch (this) { + case UNIFORM: + for (int i = 0; i < numSplits; i++) weights[i] = 1; + break; + case FIBONACCI: + int a = 1, b = 2; + weights[0] = 1; + for (int i = 1; i < numSplits; i++) { + weights[i] = b; + int next = a + b; + a = b; + b = next; + } + break; + case LOG2N: + for (int i = 0; i < numSplits; i++) weights[i] = 1 << i; + break; + } + + long weightSum = 0; + for (int w : weights) weightSum += w; + + List sizes = new ArrayList<>(numSplits); + int assigned = 0; + for (int i = 0; i < numSplits; i++) { + int size; + if (i == numSplits - 1) { + size = total - assigned; + } else { + size = (int) (((long) weights[i] * total) / weightSum); + } + sizes.add(size); + assigned += size; + } + return sizes; + } + } +} diff --git a/jvector-examples/src/main/resources/log4j2.xml b/jvector-examples/src/main/resources/log4j2.xml new file mode 100644 index 000000000..83c77bced --- /dev/null +++ b/jvector-examples/src/main/resources/log4j2.xml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + From ce40c754f27cd9d398ca7e8ed2d36fb137a6874a Mon Sep 17 00:00:00 2001 From: dian-lun-lin Date: Tue, 14 Apr 2026 15:06:35 -0700 Subject: [PATCH 04/18] Add CompactorBenchmark and tooling JMH-based benchmark with configurable workload modes (PARTITION_AND_COMPACT, PARTITION_ONLY, COMPACT_ONLY, BUILD_FROM_SCRATCH), recall measurement, JFR recording, and JSONL result logging. Includes BenchmarkParamCounter for progress tracking, EventLogAnalyzer for post-run analysis, GHA workflow, and exec-maven-plugin integration. Add forced vectorization provider property to VectorizationProvider for benchmark reproducibility. --- .github/workflows/run-compaction.yml | 112 ++ benchmarks-jmh/pom.xml | 39 +- .../jvector/bench/CompactorBenchmark.java | 1036 +++++++++++++++++ .../jvector/bench/CompactorBenchmark.md | 139 +++ .../benchtools/BenchmarkParamCounter.java | 54 + .../bench/benchtools/EventLogAnalyzer.java | 470 ++++++++ .../bench/benchtools/package-info.java | 24 + benchmarks-jmh/src/main/resources/log4j2.xml | 8 +- .../jvector/vector/VectorizationProvider.java | 22 + 9 files changed, 1902 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/run-compaction.yml create mode 100644 benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.java create mode 100644 benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md create mode 100644 benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/benchtools/BenchmarkParamCounter.java create mode 100644 benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/benchtools/EventLogAnalyzer.java create mode 100644 benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/benchtools/package-info.java diff --git a/.github/workflows/run-compaction.yml b/.github/workflows/run-compaction.yml new file mode 100644 index 000000000..8d12ee161 --- /dev/null +++ b/.github/workflows/run-compaction.yml @@ -0,0 +1,112 @@ +name: Run Compaction Bench + +on: + workflow_dispatch: + inputs: + dataset: + description: 'Dataset name passed to CompactorBenchmark (-p datasetNames)' + required: false + default: 'ada002-100k' + branches: + description: 'Space-separated list of branches to benchmark' + required: false + default: 'main' + pull_request: + types: [opened, synchronize, ready_for_review] + branches: + - main + paths: + - '**/src/main/java/**' + - 'pom.xml' + - '**/pom.xml' + +jobs: + # Job to generate the matrix configuration + generate-matrix: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - name: Generate matrix + id: set-matrix + run: | + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + BRANCHES='["main", "${{ github.head_ref }}"]' + elif [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.branches }}" ]]; then + BRANCHES_INPUT="${{ github.event.inputs.branches }}" + BRANCHES="[" + for branch in $BRANCHES_INPUT; do + if [[ "$BRANCHES" != "[" ]]; then + BRANCHES="$BRANCHES, " + fi + BRANCHES="$BRANCHES\"$branch\"" + done + BRANCHES="$BRANCHES]" + else + BRANCHES='["main"]' + fi + + echo "matrix={\"jdk\":[24],\"isa\":[\"isa-avx512f\"],\"branch\":$BRANCHES}" >> $GITHUB_OUTPUT + + test-compaction: + needs: generate-matrix + strategy: + matrix: ${{ fromJSON(needs.generate-matrix.outputs.matrix) }} + runs-on: ${{ matrix.isa }} + steps: + - name: Set up GCC + run: sudo apt install -y gcc + - uses: actions/checkout@v4 + - name: Set up JDK ${{ matrix.jdk }} + uses: actions/setup-java@v3 + with: + java-version: ${{ matrix.jdk }} + distribution: temurin + cache: maven + + - name: Checkout branch + uses: actions/checkout@v4 + with: + ref: ${{ matrix.branch }} + fetch-depth: 0 + + - name: Build branch + run: mvn -B -Punix-amd64-profile package --file pom.xml + + - name: Run CompactorBenchmark + id: run-benchmark + run: | + TOTAL_MEM_GB=$(free -g | awk '/^Mem:/ {print $2}') + if [[ -z "$TOTAL_MEM_GB" ]] || [[ "$TOTAL_MEM_GB" -le 0 ]]; then + TOTAL_MEM_GB=16 + fi + HALF_MEM_GB=$((TOTAL_MEM_GB / 2)) + if [[ "$HALF_MEM_GB" -lt 1 ]]; then + HALF_MEM_GB=1 + fi + + DATASET="${{ github.event.inputs.dataset }}" + if [[ -z "$DATASET" ]]; then + DATASET="ada002-100k" + fi + + SAFE_BRANCH=$(echo "${{ matrix.branch }}" | sed 's/[^A-Za-z0-9_-]/_/g') + echo "safe_branch=$SAFE_BRANCH" >> $GITHUB_OUTPUT + + java --enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector \ + -Djvector.experimental.enable_native_vectorization=true \ + -Xmx${HALF_MEM_GB}g \ + -jar benchmarks-jmh/target/benchmarks-jmh-*.jar CompactorBenchmark \ + -p workloadMode=PARTITION_AND_COMPACT \ + -p datasetNames=$DATASET \ + -p numPartitions=4 \ + -p splitDistribution=FIBONACCI \ + -p indexPrecision=FUSEDPQ \ + -wi 0 -i 1 -f 1 + + - name: Upload compaction results + uses: actions/upload-artifact@v4 + with: + name: compaction-results-${{ matrix.isa }}-jdk${{ matrix.jdk }}-${{ steps.run-benchmark.outputs.safe_branch }} + path: target/benchmark-results/compactor-*/compactor-results.jsonl + if-no-files-found: warn diff --git a/benchmarks-jmh/pom.xml b/benchmarks-jmh/pom.xml index c82ee2707..05fe36793 100644 --- a/benchmarks-jmh/pom.xml +++ b/benchmarks-jmh/pom.xml @@ -15,6 +15,9 @@ UTF-8 22 1.37 + 2.21.10 + + @@ -53,6 +56,11 @@ log4j-slf4j2-impl 2.24.3 + + software.amazon.awssdk + ec2 + ${awssdk.version} + @@ -94,6 +102,35 @@ + + + org.codehaus.mojo + exec-maven-plugin + + + compactor + + exec + + + false + java + --enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector -Djvector.experimental.enable_native_vectorization=true -cp %classpath io.github.jbellis.jvector.bench.CompactorBenchmark ${args} + + + + analyze + + exec + + + false + java + -cp %classpath io.github.jbellis.jvector.bench.benchtools.EventLogAnalyzer ${args} + + + + - \ No newline at end of file + diff --git a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.java b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.java new file mode 100644 index 000000000..597cfe40e --- /dev/null +++ b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.java @@ -0,0 +1,1036 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.github.jbellis.jvector.bench; + +import io.github.jbellis.jvector.bench.benchtools.BenchmarkParamCounter; +import io.github.jbellis.jvector.disk.ReaderSupplier; +import io.github.jbellis.jvector.disk.ReaderSupplierFactory; +import io.github.jbellis.jvector.example.benchmarks.datasets.DataSet; +import io.github.jbellis.jvector.example.benchmarks.datasets.DataSetInfo; +import io.github.jbellis.jvector.example.benchmarks.datasets.DataSets; +import io.github.jbellis.jvector.example.reporting.GitInfo; +import io.github.jbellis.jvector.example.reporting.JfrRecorder; +import io.github.jbellis.jvector.example.reporting.JsonlWriter; +import io.github.jbellis.jvector.example.reporting.SystemStatsCollector; +import io.github.jbellis.jvector.example.reporting.ThreadAllocTracker; +import io.github.jbellis.jvector.example.util.AccuracyMetrics; +import io.github.jbellis.jvector.example.util.DataSetPartitioner; +import io.github.jbellis.jvector.example.util.storage.CloudStorageLayoutUtil; +import io.github.jbellis.jvector.example.yaml.TestDataPartition; +import io.github.jbellis.jvector.graph.*; +import io.github.jbellis.jvector.graph.disk.AbstractGraphIndexWriter; +import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex; +import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndexCompactor; +import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndexWriter; +import io.github.jbellis.jvector.graph.disk.OnDiskParallelGraphIndexWriter; +import io.github.jbellis.jvector.graph.disk.OrdinalMapper; +import io.github.jbellis.jvector.graph.disk.feature.Feature; +import io.github.jbellis.jvector.graph.disk.feature.FeatureId; +import io.github.jbellis.jvector.graph.disk.feature.FusedPQ; +import io.github.jbellis.jvector.graph.disk.feature.InlineVectors; +import io.github.jbellis.jvector.graph.similarity.BuildScoreProvider; +import io.github.jbellis.jvector.graph.similarity.DefaultSearchScoreProvider; +import io.github.jbellis.jvector.graph.similarity.SearchScoreProvider; +import io.github.jbellis.jvector.quantization.PQVectors; +import io.github.jbellis.jvector.quantization.ProductQuantization; +import io.github.jbellis.jvector.util.Bits; +import io.github.jbellis.jvector.util.FixedBitSet; +import io.github.jbellis.jvector.vector.VectorSimilarityFunction; +import io.github.jbellis.jvector.vector.VectorizationProvider; +import io.github.jbellis.jvector.vector.types.VectorFloat; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.results.format.ResultFormatType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.OverlappingFileLockException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.time.Instant; +import java.util.*; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.IntFunction; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Thread) +@Fork(1) +@Warmup(iterations = 0) +@Measurement(iterations = 1) +@Threads(1) +public class CompactorBenchmark { + + // RUN_DIR must be initialized before the Logger so log4j2's File appender + // can resolve ${sys:jvector.internal.runDir} + private static final Path RUN_DIR; + static { + String runDir = System.getProperty("jvector.internal.runDir"); + if (runDir == null) { + runDir = Path.of("target", "benchmark-results", "compactor-" + Instant.now().getEpochSecond()).toString(); + System.setProperty("jvector.internal.runDir", runDir); + } + RUN_DIR = Path.of(runDir); + try { + Files.createDirectories(RUN_DIR); + } catch (IOException e) { + throw new RuntimeException("Failed to create run directory: " + RUN_DIR, e); + } + } + + private static final Logger log = LoggerFactory.getLogger(CompactorBenchmark.class); + + public enum IndexPrecision { + FULLPRECISION, + FUSEDPQ + } + + public enum WorkloadMode { + /** + * Build per-source partitions and stop. (No compaction, no recall.) + */ + PARTITION_ONLY, + + /** + * Assume partitions exist on disk; compact them. + */ + COMPACT_ONLY, + + /** + * Assume partitions exist on disk; compact them, then run recall. + */ + COMPACT_AND_RECALL, + + /** + * Build a single graph for the whole dataset and write it. Then run recall. + */ + BUILD_FROM_SCRATCH, + /** + * (Default) Build partitions, compact them, then run recall. + */ + PARTITION_AND_COMPACT + } + + private static final Path RESULTS_FILE = RUN_DIR.resolve("compactor-results.jsonl"); + private static final Path JFR_DIR = RUN_DIR.resolve("jfrs"); + private static final Path SYSTEM_DIR = RUN_DIR.resolve("system"); + private static final JsonlWriter jsonlWriter = new JsonlWriter(RESULTS_FILE); + + // In the forked JVM, main() passes the computed total via this internal property + private static final int TOTAL_TESTS = Integer.getInteger( + "jvector.internal.totalTests", + BenchmarkParamCounter.computeTotalTests(CompactorBenchmark.class, null) + ); + + private static final AtomicLong LAST_TEST_ID = new AtomicLong(0); + private static final String TEST_ID = generateTestId(); + + /** + * Generates a lexicographically sortable test ID: base36-encoded milliseconds + * followed by 2 base36 suffix chars (starting at "00"). Uses an atomic counter + * so that IDs generated within the same millisecond auto-increment instead of colliding. + */ + static String generateTestId() { + long candidate = System.currentTimeMillis() * 1296; // suffix starts at 00 + long actual = LAST_TEST_ID.updateAndGet(last -> Math.max(candidate, last + 1)); + return Long.toString(actual, 36); + } + + /** + * Returns a Bits instance representing randomly selected live nodes. + * + */ + private static FixedBitSet randomLiveNodes(int size, double liveRate, long seed) { + FixedBitSet live = new FixedBitSet(size); + + if (liveRate >= 1.0) { + live.set(0, size); // all nodes live + return live; + } + + var rnd = new java.util.SplittableRandom(seed); + int liveCount = 0; + + for (int i = 0; i < size; i++) { + if (rnd.nextDouble() < liveRate) { + live.set(i); + liveCount++; + } + } + + // avoid degenerate case (all dead) + if (liveCount == 0 && size > 0) { + live.set(rnd.nextInt(size)); + } + + return live; + } + + private static final Path COUNTER_FILE = RUN_DIR.resolve("completed-count"); + private static final AtomicInteger completedTests = new AtomicInteger(readCompletedCount()); + + /** + * Read the completed test count from a dedicated counter file. + * Each JMH fork is a fresh JVM, so this file provides cross-fork continuity. + * Acquires an exclusive file lock and throws if another process holds it, + * since concurrent benchmark runs against the same RUN_DIR are not supported. + */ + private static int readCompletedCount() { + if (!Files.exists(COUNTER_FILE)) { + return 0; + } + try (var ch = FileChannel.open(COUNTER_FILE, StandardOpenOption.READ)) { + var lock = ch.tryLock(0, Long.MAX_VALUE, true); + if (lock == null) { + throw new IllegalStateException( + "Counter file is locked by another process — concurrent benchmark runs sharing " + + RUN_DIR + " are not supported"); + } + try { + return Integer.parseInt(Files.readString(COUNTER_FILE).trim()); + } finally { + lock.release(); + } + } catch (OverlappingFileLockException e) { + throw new IllegalStateException( + "Counter file is locked by another thread — concurrent benchmark runs sharing " + + RUN_DIR + " are not supported", e); + } catch (IllegalStateException e) { + throw e; + } catch (Exception e) { + // Fall back to 0 for parse errors, etc. + return 0; + } + } + + private static void writeCompletedCount(int count) { + try (var ch = FileChannel.open(COUNTER_FILE, + StandardOpenOption.CREATE, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING)) { + var lock = ch.tryLock(); + if (lock == null) { + throw new IllegalStateException( + "Counter file is locked by another process — concurrent benchmark runs sharing " + + RUN_DIR + " are not supported"); + } + try { + ch.write(ByteBuffer.wrap(String.valueOf(count).getBytes(StandardCharsets.UTF_8))); + } finally { + lock.release(); + } + } catch (OverlappingFileLockException e) { + throw new IllegalStateException( + "Counter file is locked by another thread — concurrent benchmark runs sharing " + + RUN_DIR + " are not supported", e); + } catch (IllegalStateException e) { + throw e; + } catch (IOException e) { + log.error("Failed to write completed count", e); + } + } + + private static final AtomicInteger workerCounter = new AtomicInteger(0); + + // ---------- Benchmark state ---------- + private RandomAccessVectorValues ravv; + private List> queryVectors; + private List> baseVectors; + private List> groundTruth; + private DataSet ds; + private VectorSimilarityFunction similarityFunction; + + private final List graphs = new ArrayList<>(); + private final List rss = new ArrayList<>(); + + private Path tempDir; + private List storagePaths; + private List vectorsPerSourceCount; + private String resolvedVectorizationProvider; + + // Paths used during execution + private Path partitionsBaseDir; // where per-source partitions are placed (or found) + private Path compactOutputPath; // where compacted graph is written + private Path scratchOutputPath; // where build-from-scratch graph is written + + // ---------- Params ---------- + @Param({"glove-100-angular"}) + public String datasetNames; + + @Param({"PARTITION_AND_COMPACT"}) + public WorkloadMode workloadMode; + + @Param({"4"}) // Default value, can be overridden via command line + public int numPartitions; + + @Param({"32"}) + public int graphDegree; + + @Param({"100"}) + public int beamWidth; + + /** + * liveNodesRate controls how many nodes are considered "live" per source partition + * when calling compactor.setLiveNodes(...). + * + * - 1.0 => all nodes live (default; behaves like no deletions) + * - 0.8 => ~80% live, ~20% deleted (randomly selected) + */ + @Param({"1.0"}) + public double liveNodesRate; + + @Param({""}) + public String storageDirectories; + + @Param({""}) + public String storageClasses; + + @Param({"UNIFORM"}) + public TestDataPartition.Distribution splitDistribution; + + @Param({"FULLPRECISION"}) + public IndexPrecision indexPrecision; + + @Param({"1"}) + public int parallelWriteThreads; + + @Param({""}) + public String vectorizationProvider; + + @Param({"1.0"}) + public double datasetPortion; + + @Param({"false"}) + public boolean jfrPartitioning; + + @Param({"true"}) + public boolean jfrCompacting; + + @Param({"false"}) + public boolean jfrObjectCount; + + @Param({"true"}) + public boolean sysStatsEnabled; + + @Param({"false"}) + public boolean threadAllocTracking; + + private final JfrRecorder jfrPartitioningRecorder = new JfrRecorder(); + private final JfrRecorder jfrCompactingRecorder = new JfrRecorder(); + private final SystemStatsCollector sysStatsCollector = new SystemStatsCollector(); + private final ThreadAllocTracker threadAllocTracker = new ThreadAllocTracker(); + + private volatile boolean resultPersisted; + + @State(Scope.Thread) + @AuxCounters(AuxCounters.Type.EVENTS) + public static class RecallResult { + public double recall; + } + + private String jfrParamSuffix() { + return String.format("%s-w%s-n%d-d%d-bw%d-%s-%s-pw%d-%s-dp%.2f-live%.2f", + datasetNames, workloadMode, numPartitions, graphDegree, beamWidth, + splitDistribution, indexPrecision, parallelWriteThreads, resolvedVectorizationProvider, datasetPortion, liveNodesRate); + } + + @Setup(Level.Iteration) + public void setup() throws Exception { + try { + resultPersisted = false; + Thread.currentThread().setName("compactor-" + workerCounter.incrementAndGet()); + + if (vectorizationProvider != null && !vectorizationProvider.isBlank()) { + System.setProperty("jvector.vectorization_provider", vectorizationProvider); + } + resolvedVectorizationProvider = VectorizationProvider.getInstance().getClass().getSimpleName(); + + if (sysStatsEnabled) { + String sysStatsFileName = String.format("sysstats-%s.jsonl", jfrParamSuffix()); + try { + sysStatsCollector.start(SYSTEM_DIR, sysStatsFileName); + } catch (Exception e) { + log.warn("Failed to start system stats collection", e); + } + } + + if (threadAllocTracking) { + String threadAllocFileName = String.format("threadalloc-%s.jsonl", jfrParamSuffix()); + try { + threadAllocTracker.start(SYSTEM_DIR, threadAllocFileName); + } catch (Exception e) { + log.warn("Failed to start thread allocation tracking", e); + } + } + + persistStarted(); + + validateParams(); + + int dimension; + + if (workloadMode == WorkloadMode.COMPACT_ONLY) { + ds = null; + queryVectors = null; + groundTruth = null; + ravv = null; + baseVectors = null; + dimension = -1; + + var datasetInfo = DataSets.loadDataSet(datasetNames); + similarityFunction = datasetInfo + .flatMap(DataSetInfo::similarityFunction) + .orElseGet(() -> { + log.warn("Could not determine similarity function for dataset '{}'; defaulting to COSINE", datasetNames); + return VectorSimilarityFunction.COSINE; + }); + + log.info("Skipping dataset load for COMPACT_ONLY mode without recall. Workload: {}, similarityFunction: {}, Live nodes rate: {}", + workloadMode, similarityFunction, liveNodesRate); + } else { + ds = DataSets.loadDataSet(datasetNames) + .orElseThrow(() -> new RuntimeException("Dataset not found: " + datasetNames)) + .getDataSet(); + + if (datasetPortion == 1.0) { + ravv = ds.getBaseRavv(); + baseVectors = ds.getBaseVectors(); + } else { + int totalVectors = ds.getBaseRavv().size(); + int portionedSize = (int) (totalVectors * datasetPortion); + if (portionedSize < Math.max(1, numPartitions)) { + throw new IllegalArgumentException( + "datasetPortion=" + datasetPortion + " yields " + portionedSize + + " vectors, fewer than numPartitions=" + numPartitions); + } + baseVectors = ds.getBaseVectors().subList(0, portionedSize); + ravv = new ListRandomAccessVectorValues(baseVectors, ds.getDimension()); + } + + queryVectors = ds.getQueryVectors(); + groundTruth = ds.getGroundTruth(); + similarityFunction = ds.getSimilarityFunction(); + dimension = ds.getDimension(); + + log.info("Dataset {} loaded with recall data. Base vectors: {} (portion {}), Query vectors: {}, Dim: {}, Similarity: {}, Workload: {}, Live nodes rate: {}", + datasetNames, ravv.size(), datasetPortion, queryVectors.size(), dimension, similarityFunction, workloadMode, liveNodesRate); + } + + // Resolve storagePaths + partitionsDir + storagePaths = resolveStoragePaths(); + partitionsBaseDir = resolvePartitionsBaseDir(storagePaths); + compactOutputPath = resolveCompactOutputPath(partitionsBaseDir); + scratchOutputPath = resolveScratchOutputPath(partitionsBaseDir); + + // Clean stale artifacts only if we're going to rebuild them. + if (workloadMode == WorkloadMode.COMPACT_ONLY || workloadMode == WorkloadMode.COMPACT_AND_RECALL) { + // For compact-only and compact-and-recall, ensure the partition files exist. + verifyPartitionsExist(partitionsBaseDir, numPartitions); + } + + // Partition metadata for remapping (needed for compaction) + if (workloadMode == WorkloadMode.PARTITION_ONLY || workloadMode == WorkloadMode.PARTITION_AND_COMPACT) { + var partitionedData = DataSetPartitioner.partition(baseVectors, numPartitions, splitDistribution); + vectorsPerSourceCount = partitionedData.sizes; + } else { + vectorsPerSourceCount = null; + } + + // Build partitions during setup for SEGMENTS_* (matches original benchmark structure) + if (workloadMode == WorkloadMode.PARTITION_ONLY || workloadMode == WorkloadMode.PARTITION_AND_COMPACT) { + if (jfrPartitioning) { + jfrPartitioningRecorder.start(JFR_DIR, "partitioning-" + jfrParamSuffix() + ".jfr", jfrObjectCount); + } + buildPartitions(ds, baseVectors); + if (jfrPartitioningRecorder.isActive()) { + jfrPartitioningRecorder.stop(); + } + } + + } catch (Exception e) { + persistError(e); + throw e; + } + } + + private void validateParams() { + if (workloadMode == WorkloadMode.BUILD_FROM_SCRATCH) { + log.warn("numPartitions={} ignored in BUILD_FROM_SCRATCH mode", numPartitions); + } + else { + if (numPartitions <= 1) throw new IllegalArgumentException("numPartitions must be larger than one"); + } + if (graphDegree <= 0) throw new IllegalArgumentException("graphDegree must be positive"); + if (beamWidth <= 0) throw new IllegalArgumentException("beamWidth must be positive"); + if (datasetPortion <= 0.0 || datasetPortion > 1.0) { + throw new IllegalArgumentException("datasetPortion must be in (0.0, 1.0]"); + } + if (liveNodesRate <= 0.0 || liveNodesRate > 1.0) { + throw new IllegalArgumentException("liveNodesRate must be in (0.0, 1.0]"); + } + } + + private List resolveStoragePaths() throws IOException { + // Priority: + // 1) storageDirectories (comma-separated) + // 3) temp dir + var paths = new ArrayList(); + + if (storageDirectories != null && !storageDirectories.isBlank()) { + for (String dir : storageDirectories.split(",")) { + Path path = Path.of(dir.trim()); + if (!Files.exists(path)) Files.createDirectories(path); + if (!Files.isDirectory(path) || !Files.isWritable(path)) { + throw new IllegalArgumentException("Path is not a writable directory: " + dir); + } + paths.add(path); + } + } else { + tempDir = Files.createTempDirectory("compact-bench"); + paths.add(tempDir); + } + + // Handle storage class validation + if (storageClasses != null && !storageClasses.isBlank()) { + String[] classes = storageClasses.split(","); + if (classes.length != paths.size()) { + throw new IllegalArgumentException(String.format( + "Mismatch between number of storage classes (%d) and storage directories (%d). They must be pairwise 1:1.", + classes.length, paths.size())); + } + + var actualStorageClasses = CloudStorageLayoutUtil.storageClassByMountPoint(); + for (int i = 0; i < paths.size(); i++) { + Path path = paths.get(i).toAbsolutePath(); + CloudStorageLayoutUtil.StorageClass expected; + try { + expected = CloudStorageLayoutUtil.StorageClass.valueOf(classes[i].trim()); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Invalid StorageClass: " + classes[i], e); + } + + String bestMount = null; + for (String mountPoint : actualStorageClasses.keySet()) { + if (path.toString().startsWith(mountPoint)) { + if (bestMount == null || mountPoint.length() > bestMount.length()) { + bestMount = mountPoint; + } + } + } + + if (bestMount != null) { + CloudStorageLayoutUtil.StorageClass actual = actualStorageClasses.get(bestMount); + if (actual != expected) { + throw new IllegalStateException(String.format( + "Storage class mismatch for path %s: expected %s, found %s (mount: %s)", + path, expected, actual, bestMount)); + } + } else { + log.warn("Could not determine storage class for path {}. Skipping validation.", path); + } + } + } + + return paths; + } + + private Path resolvePartitionsBaseDir(List storagePaths) throws IOException { + Path p = storagePaths.get(0); + Files.createDirectories(p); + return p; + } + + private Path resolveCompactOutputPath(Path baseDir) { + return baseDir.resolve("compact-graph"); + } + + private Path resolveScratchOutputPath(Path baseDir) { + return baseDir.resolve("scratch-graph"); + } + + private void verifyPartitionsExist(Path partitionsDir, int numPartitions) { + for (int i = 0; i < numPartitions; i++) { + Path seg = partitionsDir.resolve("per-source-graph-" + i); + if (!Files.exists(seg)) { + throw new IllegalStateException("Missing partition file for COMPACT_ONLY or COMPACT_AND_RECALL: " + seg.toAbsolutePath()); + } + } + } + + private void buildPartitions(DataSet ds, List> baseVectors) throws Exception { + + var partitionedData = DataSetPartitioner.partition(baseVectors, numPartitions, splitDistribution); + vectorsPerSourceCount = partitionedData.sizes; + + log.info("Building {} partitions into {} (deg={}, bw={}, split={}, splitSizes={}, precision={}, pwThreads={}, vp={})", + numPartitions, partitionsBaseDir.toAbsolutePath(), graphDegree, beamWidth, splitDistribution, vectorsPerSourceCount, + indexPrecision, parallelWriteThreads, resolvedVectorizationProvider); + + int dimension = baseVectors.get(0).length(); + for (int i = 0; i < numPartitions; i++) { + List> vectorsPerSource = partitionedData.vectors.get(i); + + // Round-robin assignment of partition files to storage paths, but still keep canonical base dir name stable. + Path baseDirForThisSegment = storagePaths.get(i % storagePaths.size()); + Path outputPath = baseDirForThisSegment.resolve("per-source-graph-" + i); + if (Files.exists(outputPath)) { + Files.delete(outputPath); + } + + log.info("Building partition {}/{}: vectors={} -> {}", + i + 1, numPartitions, vectorsPerSource.size(), outputPath.toAbsolutePath()); + + var ravvPerSource = new ListRandomAccessVectorValues(vectorsPerSource, dimension); + BuildScoreProvider bspPerSource; + ProductQuantization pq = null; + PQVectors pqVectors = null; + // TODO: should we build partitions by FUSEDPQ? + if (indexPrecision == IndexPrecision.FUSEDPQ) { + boolean centerData = similarityFunction == VectorSimilarityFunction.EUCLIDEAN; + pq = ProductQuantization.compute(ravvPerSource, dimension / 8, 256, centerData); + pqVectors = (PQVectors) pq.encodeAll(ravvPerSource); + bspPerSource = BuildScoreProvider.pqBuildScoreProvider(similarityFunction, pqVectors); + } + else { + bspPerSource = BuildScoreProvider.randomAccessScoreProvider(ravvPerSource, similarityFunction); + } + + var builder = new GraphIndexBuilder(bspPerSource, + dimension, + graphDegree, beamWidth, 1.2f, 1.2f, true); + var graph = builder.build(ravvPerSource); + + AbstractGraphIndexWriter.Builder writerBuilder; + if (parallelWriteThreads > 1) { + writerBuilder = new OnDiskParallelGraphIndexWriter.Builder(graph, outputPath) + .withParallelWorkerThreads(parallelWriteThreads); + } else { + writerBuilder = new OnDiskGraphIndexWriter.Builder(graph, outputPath); + } + + writerBuilder.with(new InlineVectors(dimension)); + + + if (indexPrecision == IndexPrecision.FUSEDPQ) { + writerBuilder.with(new FusedPQ(graph.maxDegree(), pq)); + } + + try (var writer = writerBuilder.build()) { + var suppliers = new EnumMap>(FeatureId.class); + suppliers.put(FeatureId.INLINE_VECTORS, ordinal -> new InlineVectors.State(ravvPerSource.getVector(ordinal))); + + if (indexPrecision == IndexPrecision.FUSEDPQ) { + var view = graph.getView(); + var finalPqVectors = pqVectors; + suppliers.put(FeatureId.FUSED_PQ, ordinal -> new FusedPQ.State(view, finalPqVectors, ordinal)); + } + + writer.write(suppliers); + } + } + + log.info("Done building partitions."); + } + + private long compactPartitions() throws Exception { + + // Load partitions (from round-robin storage paths, same naming) + for (int i = 0; i < numPartitions; i++) { + Path baseDir = storagePaths.get(i % storagePaths.size()); + Path segPath = baseDir.resolve("per-source-graph-" + i); + log.info("Loading partition {}/{} from {}", i + 1, numPartitions, segPath.toAbsolutePath()); + rss.add(ReaderSupplierFactory.open(segPath.toAbsolutePath())); + graphs.add(OnDiskGraphIndex.load(rss.get(i))); + } + + // Ensure output dir exists + if (compactOutputPath.getParent() != null) { + Files.createDirectories(compactOutputPath.getParent()); + } + + if (Files.exists(compactOutputPath)) { + Files.delete(compactOutputPath); + } + + log.info("Compacting {} partitions into {}", numPartitions, compactOutputPath.toAbsolutePath()); + + + List remappers = new ArrayList<>(numPartitions); + List liveNodes = new ArrayList<>(numPartitions); + // Remap ordinals: local [0..size-1] -> global increasing in partition order + int globalOrdinal = 0; + for (int n = 0; n < numPartitions; n++) { + int size = graphs.get(n).size(); + var remapper = new OrdinalMapper.OffsetMapper(globalOrdinal, size); + remappers.add(remapper); + liveNodes.add(randomLiveNodes(size, liveNodesRate, n)); + globalOrdinal += size; + } + var compactor = new OnDiskGraphIndexCompactor(graphs, liveNodes, remappers, similarityFunction, null); + + long startNanos = System.nanoTime(); + compactor.compact(compactOutputPath); + return TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos); + } + + private long buildFromScratch(List> baseVectors) throws Exception { + if (scratchOutputPath.getParent() != null) { + Files.createDirectories(scratchOutputPath.getParent()); + } + if (Files.exists(scratchOutputPath)) { + Files.delete(scratchOutputPath); + } + + int dimension = baseVectors.get(0).length(); + var full = new ListRandomAccessVectorValues(baseVectors, dimension); + ProductQuantization pq = null; + PQVectors pqVectors = null; + BuildScoreProvider bsp; + if (indexPrecision == IndexPrecision.FUSEDPQ) { + boolean centerData = similarityFunction == VectorSimilarityFunction.EUCLIDEAN; + pq = ProductQuantization.compute(full, dimension / 8, 256, centerData); + pqVectors = (PQVectors) pq.encodeAll(full); + bsp = BuildScoreProvider.pqBuildScoreProvider(similarityFunction, pqVectors); + } + else { + bsp = BuildScoreProvider.randomAccessScoreProvider(full, similarityFunction); + } + + log.info("Building from scratch: vectors={} dim={} sim={} deg={} bw={} precision={} pwThreads={} vp={} -> {}", + full.size(), dimension, similarityFunction, + graphDegree, beamWidth, indexPrecision, parallelWriteThreads, resolvedVectorizationProvider, + scratchOutputPath.toAbsolutePath()); + + var builder = new GraphIndexBuilder(bsp, dimension, graphDegree, beamWidth, 1.2f, 1.2f, true); + var graph = builder.build(full); + + AbstractGraphIndexWriter.Builder writerBuilder = + (parallelWriteThreads > 1) + ? new OnDiskParallelGraphIndexWriter.Builder(graph, scratchOutputPath) + .withParallelWorkerThreads(parallelWriteThreads) + : new OnDiskGraphIndexWriter.Builder(graph, scratchOutputPath); + + writerBuilder.with(new InlineVectors(dimension)); + +// ProductQuantization pq = null; +// PQVectors pqVectors = null; +// if (indexPrecision == IndexPrecision.FUSEDPQ) { +// boolean centerData = similarityFunction == VectorSimilarityFunction.EUCLIDEAN; +// pq = ProductQuantization.compute(full, dimension / 8, 256, centerData); +// pqVectors = (PQVectors) pq.encodeAll(full); +// writerBuilder.with(new FusedPQ(graph.maxDegree(), pq)); +// } + if (indexPrecision == IndexPrecision.FUSEDPQ) { + writerBuilder.with(new FusedPQ(graph.maxDegree(), pq)); + } + + long startNanos = System.nanoTime(); + try (var writer = writerBuilder.build()) { + var suppliers = new EnumMap>(FeatureId.class); + suppliers.put(FeatureId.INLINE_VECTORS, ord -> new InlineVectors.State(full.getVector(ord))); + + if (indexPrecision == IndexPrecision.FUSEDPQ) { + var view = graph.getView(); + var finalPQ = pqVectors; + suppliers.put(FeatureId.FUSED_PQ, ord -> new FusedPQ.State(view, finalPQ, ord)); + } + + writer.write(suppliers); + } + return TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos); + } + + @TearDown(Level.Iteration) + public void tearDown() throws IOException, InterruptedException { + if (threadAllocTracker.isActive()) { + threadAllocTracker.stop(); + } + + if (sysStatsCollector.isActive()) { + sysStatsCollector.stop(SYSTEM_DIR); + } + + if (jfrPartitioningRecorder.isActive()) { + jfrPartitioningRecorder.stop(); + } + if (jfrCompactingRecorder.isActive()) { + jfrCompactingRecorder.stop(); + } + + closeLoadedGraphs(); + } + + private void closeLoadedGraphs() { + for (var graph : graphs) { + try { + graph.close(); + } catch (Exception e) { + log.error("Failed to close graph", e); + } + } + graphs.clear(); + + for (var rs : rss) { + try { + rs.close(); + } catch (Exception e) { + log.error("Failed to close ReaderSupplier", e); + } + } + rss.clear(); + } + + @Benchmark + public void run(Blackhole blackhole, RecallResult recallResult) throws Exception { + long durationMs = 0; + double recall = -1; + + try { + if (jfrCompacting) { + try { + jfrCompactingRecorder.start(JFR_DIR, "workload-" + jfrParamSuffix() + ".jfr", jfrObjectCount); + } catch (Exception e) { + log.warn("Failed to start workload JFR recording", e); + } + } + + // Execute workload + switch (workloadMode) { + case PARTITION_ONLY: + break; + + case COMPACT_ONLY: + durationMs = compactPartitions(); + break; + + case COMPACT_AND_RECALL: + durationMs = compactPartitions(); + recall = runRecall(compactOutputPath); + break; + + case BUILD_FROM_SCRATCH: { + durationMs = buildFromScratch(baseVectors); + recall = runRecall(scratchOutputPath); + break; + } + + case PARTITION_AND_COMPACT: + durationMs = compactPartitions(); + recall = runRecall(compactOutputPath); + break; + + default: + throw new IllegalStateException("Unknown workloadMode: " + workloadMode); + } + + recallResult.recall = recall; + persistResult(recall, durationMs); + blackhole.consume(durationMs); + + } catch (Exception e) { + persistError(e); + throw e; + } finally { + if (jfrCompactingRecorder.isActive()) { + jfrCompactingRecorder.stop(); + } + closeLoadedGraphs(); + } + } + + private double runRecall(Path indexPath) throws Exception { + + log.info("Loading and searching index at {}", indexPath.toAbsolutePath()); + try (var rs = ReaderSupplierFactory.open(indexPath)) { + var graph = OnDiskGraphIndex.load(rs); + GraphSearcher searcher = new GraphSearcher(graph); + var view = (ImmutableGraphIndex.ScoringView) searcher.getView(); + searcher.usePruning(false); + List retrieved = new ArrayList<>(queryVectors.size()); + for (int n = 0; n < queryVectors.size(); ++n) { + SearchResult result; + if(indexPrecision == IndexPrecision.FUSEDPQ) { + var asf = view.approximateScoreFunctionFor(queryVectors.get(n), similarityFunction); + var rerank = view.rerankerFor(queryVectors.get(n), similarityFunction); + SearchScoreProvider ssp = new DefaultSearchScoreProvider(asf, rerank); + result = searcher.search(ssp, 10, 10, 0.0f, 0.0f, Bits.ALL); + } + else { + var ssp = DefaultSearchScoreProvider.exact(queryVectors.get(n), similarityFunction, ravv); + result = searcher.search(ssp, 10, 10, 0.0f, 0.0f, Bits.ALL); + } + retrieved.add(result); + } + + double recall = AccuracyMetrics.recallFromSearchResults(groundTruth, retrieved, 10, 10); + log.info("Recall [dataset={}, workloadMode={}, numPartitions={}, graphDegree={}, beamWidth={}, splitDistribution={}, indexPrecision={}, parallelWriteThreads={}, vectorizationProvider={}, datasetPortion={}]: {}", + datasetNames, workloadMode, numPartitions, graphDegree, beamWidth, splitDistribution, indexPrecision, parallelWriteThreads, resolvedVectorizationProvider, datasetPortion, recall); + return recall; + } + } + + // ---------- result persistence ---------- + private LinkedHashMap buildParams() { + var params = new LinkedHashMap(); + params.put("dataset", datasetNames); + params.put("workloadMode", workloadMode.name()); + params.put("numPartitions", numPartitions); + params.put("graphDegree", graphDegree); + params.put("beamWidth", beamWidth); + params.put("storageDirectories", storageDirectories); + params.put("storageClasses", storageClasses); + params.put("splitDistribution", splitDistribution.name()); + params.put("indexPrecision", indexPrecision.name()); + params.put("parallelWriteThreads", parallelWriteThreads); + params.put("vectorizationProvider", resolvedVectorizationProvider); + params.put("datasetPortion", datasetPortion); + params.put("jfrPartitioning", jfrPartitioning); + params.put("jfrCompacting", jfrCompacting); + params.put("jfrObjectCount", jfrObjectCount); + params.put("sysStatsEnabled", sysStatsEnabled); + params.put("threadAllocTracking", threadAllocTracking); + params.put("liveNodesRate", liveNodesRate); + return params; + } + + private LinkedHashMap baseResult(String event) { + var result = new LinkedHashMap(); + result.put("testId", TEST_ID); + result.put("gitHash", GitInfo.getShortHash()); + result.put("timestamp", Instant.now().toString()); + result.put("event", event); + result.put("benchmark", "run"); + result.put("params", buildParams()); + return result; + } + + private void persistStarted() { + var result = baseResult("started"); + result.put("completedTests", completedTests.get()); + result.put("totalTests", TOTAL_TESTS); + jsonlWriter.writeLine(result); + log.info("Starting test {}/{}", completedTests.get() + 1, TOTAL_TESTS); + } + + private void persistResult(double recall, long durationMs) { + if (resultPersisted) return; + resultPersisted = true; + + int completed = completedTests.incrementAndGet(); + writeCompletedCount(completed); + + var result = baseResult("completed"); + var results = new LinkedHashMap(); + results.put("durationMs", durationMs); + + // Only meaningful for recall-enabled workloads; else NaN + results.put("recall", recall); + + if (vectorsPerSourceCount != null) { + results.put("splitSizes", vectorsPerSourceCount.toString()); + } + if (jfrPartitioningRecorder.getFileName() != null) { + results.put("jfrPartitioningFile", jfrPartitioningRecorder.getFileName()); + } + if (jfrCompactingRecorder.getFileName() != null) { + results.put("jfrWorkloadFile", jfrCompactingRecorder.getFileName()); + } + if (sysStatsCollector.getFileName() != null) { + results.put("sysStatsFile", sysStatsCollector.getFileName()); + } + if (threadAllocTracker.getFileName() != null) { + results.put("threadAllocFile", threadAllocTracker.getFileName()); + } + + result.put("results", results); + result.put("completedTests", completed); + result.put("totalTests", TOTAL_TESTS); + + jsonlWriter.writeLine(result); + log.info("Completed test {}/{}", completed, TOTAL_TESTS); + } + + private void persistError(Exception e) { + try { + var result = baseResult("error"); + var results = new LinkedHashMap(); + results.put("errorMessage", e.getMessage() != null ? e.getMessage() : e.getClass().getName()); + result.put("results", results); + result.put("completedTests", completedTests.get()); + result.put("totalTests", TOTAL_TESTS); + jsonlWriter.writeLine(result); + } catch (Exception inner) { + log.error("Failed to persist error event", inner); + } + } + + public static void main(String[] args) throws Exception { + Files.createDirectories(RUN_DIR); + String jmhResultFile = RUN_DIR.resolve("compactor-jmh.json").toString(); + log.info("Benchmark run directory: {}", RUN_DIR.toAbsolutePath()); + log.info("Progressive results will be written to: {}", RESULTS_FILE.toAbsolutePath()); + log.info("JMH results will be written to: {}", Path.of(jmhResultFile).toAbsolutePath()); + + org.openjdk.jmh.runner.options.CommandLineOptions cmdOptions = new org.openjdk.jmh.runner.options.CommandLineOptions(args); + int totalTests = BenchmarkParamCounter.computeTotalTests(CompactorBenchmark.class, cmdOptions); + log.info("Total test combinations: {}", totalTests); + + // Resolve the log4j2 config so the forked JVM picks it up explicitly + var log4j2Config = CompactorBenchmark.class.getClassLoader().getResource("log4j2.xml"); + String log4j2Arg = log4j2Config != null + ? "-Dlog4j2.configurationFile=" + log4j2Config + : "-Dlog4j2.configurationFile=classpath:log4j2.xml"; + + // The forked JVM's stdout is piped through JMH, so System.console() returns null + // and Log4j2 suppresses ANSI. Propagate the parent's TTY detection to the child. + String disableAnsi = System.console() == null ? "true" : "false"; + + // Collect all JVM args for the forked process in one list, + // because jvmArgsAppend() replaces (not appends) on each call. + var jvmArgs = new ArrayList(); + jvmArgs.add("-Djvector.internal.runDir=" + RUN_DIR); + jvmArgs.add("-Djvector.internal.totalTests=" + totalTests); + jvmArgs.add(log4j2Arg); + jvmArgs.add("-Dcompactor.disableAnsi=" + disableAnsi); + + // Pass the vectorization provider if specified in command line options + var vpParam = cmdOptions.getParameter("vectorizationProvider"); + if (vpParam.hasValue()) { + var vpValues = vpParam.get(); + if (!vpValues.isEmpty()) { + jvmArgs.add("-Djvector.vectorization_provider=" + vpValues.iterator().next()); + } + } + + var optBuilder = new org.openjdk.jmh.runner.options.OptionsBuilder(); + optBuilder.include(CompactorBenchmark.class.getSimpleName()) + .parent(cmdOptions) + .forks(1) + .threads(1) + .shouldFailOnError(true) + .jvmArgsAppend(jvmArgs.toArray(new String[0])) + .resultFormat(ResultFormatType.JSON) + .result(jmhResultFile); + + new org.openjdk.jmh.runner.Runner(optBuilder.build()).run(); + } + +} diff --git a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md new file mode 100644 index 000000000..bf8355de0 --- /dev/null +++ b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md @@ -0,0 +1,139 @@ + + +# CompactorBenchmark + +`CompactorBenchmark` evaluates the **performance, memory usage, and recall quality** of graph index compaction using `OnDiskGraphIndexCompactor`. + +--- + +# 1. Workload Modes + +| Mode | Description | +|------|-------------| +| `PARTITION_AND_COMPACT` | **(default)** Build partitions, compact them, then measure recall — all in one run | +| `PARTITION_ONLY` | Build N partition indexes and exit; no compaction | +| `COMPACT_ONLY` | Compact existing partitions without loading the dataset | +| `BUILD_FROM_SCRATCH` | Build a single index over the full dataset | + +--- + +# 2. Quick Start + +## Default: partition and compact in one run + +The default mode builds partitions and immediately compacts them. Use this when you want a single-command end-to-end result. + +```bash +java -Xmx220g --add-modules jdk.incubator.vector \ + -jar benchmarks-jmh/target/benchmarks-jmh-*.jar CompactorBenchmark \ + -p workloadMode=PARTITION_AND_COMPACT \ + -p datasetNames=glove-100-angular \ + -p numPartitions=4 \ + -p splitDistribution=FIBONACCI \ + -p indexPrecision=FUSEDPQ \ + -wi 0 -i 1 -f 1 +``` + +--- + +# 3. Measuring Peak Heap During Compaction + +The two-step workflow (`PARTITION_ONLY` → `COMPACT_ONLY`) exists to isolate compaction's true memory footprint. In `PARTITION_AND_COMPACT` mode the dataset is still resident in heap during compaction, which inflates the apparent memory cost. `COMPACT_ONLY` skips dataset loading entirely, so the heap limit applies only to the compactor itself. + +This lets you prove that compaction can run on machines with very little RAM — e.g., `-Xmx5g` is sufficient even for large datasets. + +## Step 1: Build partitions + +Run with a large heap since the full dataset must be loaded into memory. + +```bash +java -Xmx220g --add-modules jdk.incubator.vector \ + -jar benchmarks-jmh/target/benchmarks-jmh-*.jar CompactorBenchmark \ + -p workloadMode=PARTITION_ONLY \ + -p datasetNames=glove-100-angular \ + -p numPartitions=4 \ + -p splitDistribution=FIBONACCI \ + -p indexPrecision=FUSEDPQ \ + -wi 0 -i 1 -f 1 +``` + +The partition files are written to disk and reused in the next step. + +## Step 2: Compact only (low-memory run) + +The dataset is **not** loaded in this mode. Use a small `-Xmx` to measure and prove the compactor's true peak heap. + +```bash +java -Xmx5g --add-modules jdk.incubator.vector \ + -jar benchmarks-jmh/target/benchmarks-jmh-*.jar CompactorBenchmark \ + -p workloadMode=COMPACT_ONLY \ + -p datasetNames=glove-100-angular \ + -p numPartitions=4 \ + -p splitDistribution=FIBONACCI \ + -p indexPrecision=FUSEDPQ \ + -wi 0 -i 1 -f 1 +``` + +`durationMs` in the output records only the `compact()` call — not JVM startup or I/O setup. + +--- + +# 4. Key Parameters + +| Parameter | Default | Description | +|-----------|---------|-------------| +| `datasetNames` | `glove-100-angular` | Dataset name | +| `workloadMode` | `PARTITION_AND_COMPACT` | Which phase(s) to run | +| `numPartitions` | `4` | Number of source partition indexes | +| `splitDistribution` | — | Data partitioning strategy (`UNIFORM`, `FIBONACCI`, …) | +| `indexPrecision` | — | `FULLPRECISION` (inline vectors only) or `FUSEDPQ` (inline + FusedPQ) | +| `storageDirectories` | *(temp dir)* | Comma-separated list of directories where partition files are written; partitions are distributed round-robin across them. Defaults to a JVM temp directory if unset. | + +--- + +# 5. Index Precision + +`indexPrecision` controls what features are written into each partition index. + +| Value | Written features | +|-------|-----------------| +| `FULLPRECISION` | `INLINE_VECTORS` only | +| `FUSEDPQ` | `INLINE_VECTORS` + `FUSED_PQ` — required for compressed compaction | + +--- + +# 6. Results + +Results are written as JSONL to: + +``` +target/benchmark-results/compactor-/compactor-results.jsonl +``` + +Key fields: + +| Field | Description | +|-------|-------------| +| `durationMs` | Time spent in the measured phase only | +| `recall` | Recall@10 (present when workload mode includes recall, e.g. `PARTITION_AND_COMPACT`) | +| `peakHeapMb` | Peak JVM heap observed during the run | + +--- + +# 7. Memory Footprint + +All datasets in the recall table (see `docs/compaction.md`) can be run under `COMPACT_ONLY` with `-Xmx5g`. Compaction also successfully scales to a dataset with 2560 dimensions and 10M vectors under the same constraint. diff --git a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/benchtools/BenchmarkParamCounter.java b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/benchtools/BenchmarkParamCounter.java new file mode 100644 index 000000000..ab3035d9c --- /dev/null +++ b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/benchtools/BenchmarkParamCounter.java @@ -0,0 +1,54 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.jbellis.jvector.bench.benchtools; + +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.runner.options.CommandLineOptions; + +/** + * Counts the total number of {@code @Param} combinations for a JMH benchmark class. + */ +public final class BenchmarkParamCounter { + private BenchmarkParamCounter() {} + + /** + * Computes the total number of benchmark parameter combinations as the cartesian product + * of all {@code @Param} value sets. When {@code cmdOptions} is provided, command-line + * {@code -p} overrides take precedence over the annotation defaults. + * + * @param benchmarkClass the JMH benchmark class to inspect + * @param cmdOptions parsed command-line options, or {@code null} to use annotation defaults only + * @return the total number of parameter combinations + */ + public static int computeTotalTests(Class benchmarkClass, CommandLineOptions cmdOptions) { + int total = 1; + for (var field : benchmarkClass.getDeclaredFields()) { + var paramAnnotation = field.getAnnotation(Param.class); + if (paramAnnotation != null) { + if (cmdOptions != null) { + var cmdOverride = cmdOptions.getParameter(field.getName()); + if (cmdOverride.hasValue() && !cmdOverride.get().isEmpty()) { + total *= cmdOverride.get().size(); + continue; + } + } + total *= paramAnnotation.value().length; + } + } + return total; + } +} diff --git a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/benchtools/EventLogAnalyzer.java b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/benchtools/EventLogAnalyzer.java new file mode 100644 index 000000000..896a83af1 --- /dev/null +++ b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/benchtools/EventLogAnalyzer.java @@ -0,0 +1,470 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.jbellis.jvector.bench.benchtools; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Stream; + +/** + * CLI utility that reads a CompactorBenchmark JSONL event log and reports + * max concurrency and the cartesian parameter matrix with testIds and results. + */ +public final class EventLogAnalyzer { + + private static final List PARAM_FIELDS = List.of( + "params.dataset", "params.numSources", "params.graphDegree", "params.beamWidth", + "params.storageDirectories", "params.storageClasses", "params.splitDistribution", + "params.indexPrecision", "params.parallelWriteThreads", "params.vectorizationProvider", + "params.datasetPortion" + ); + + private static final List RESULT_FIELDS = List.of( + "results.recall", "results.durationMs", "results.errorMessage" + ); + + private static final Path RESULTS_DIR = Path.of("target", "benchmark-results"); + private static final String RESULTS_FILENAME = "compactor-results.jsonl"; + + /** + * Finds the most recent compactor-results.jsonl under target/benchmark-results/ + * by selecting the compactor-* directory with the highest name (epoch-second suffix). + */ + private static Path findLatestResultsFile() { + if (!Files.isDirectory(RESULTS_DIR)) { + return null; + } + try (Stream dirs = Files.list(RESULTS_DIR)) { + return dirs.filter(Files::isDirectory) + .filter(d -> d.getFileName().toString().startsWith("compactor-")) + .sorted(Comparator.reverseOrder()) + .map(d -> d.resolve(RESULTS_FILENAME)) + .filter(Files::exists) + .findFirst() + .orElse(null); + } catch (IOException e) { + return null; + } + } + + public static void main(String[] args) throws IOException { + Path inputFile = null; + String startingAt = null, endingAt = null, startingTestId = null, endingTestId = null; + + for (int i = 0; i < args.length; i++) { + switch (args[i]) { + case "--starting-at": startingAt = args[++i]; break; + case "--ending-at": endingAt = args[++i]; break; + case "--starting-testid": startingTestId = args[++i]; break; + case "--ending-testid": endingTestId = args[++i]; break; + case "--help": + case "-h": + printUsage(); + System.exit(0); + break; + default: + if (args[i].startsWith("--")) { + System.err.println("Unknown option: " + args[i]); + System.exit(1); + } + if (inputFile != null) { + System.err.println("Multiple input files specified"); + System.exit(1); + } + inputFile = Path.of(args[i]); + } + } + + if (inputFile == null) { + inputFile = findLatestResultsFile(); + if (inputFile == null) { + System.err.println("No results file found under " + RESULTS_DIR.toAbsolutePath()); + printUsage(); + System.exit(1); + } + } + + System.err.println("Using: " + inputFile.toAbsolutePath()); + + // Read and parse all lines + List> allEvents = new ArrayList<>(); + for (String line : Files.readAllLines(inputFile)) { + line = line.trim(); + if (!line.isEmpty()) { + allEvents.add(parseJsonLine(line)); + } + } + + // Assign synthetic testIds where missing + boolean hasSyntheticIds = false; + String currentSyntheticId = null; + int syntheticCounter = 0; + for (var event : allEvents) { + if (!event.containsKey("testId") || event.get("testId").isEmpty()) { + hasSyntheticIds = true; + if ("started".equals(event.get("event"))) { + currentSyntheticId = String.format("#%03d", ++syntheticCounter); + } + if (currentSyntheticId != null) { + event.put("testId", currentSyntheticId); + } + } + } + + // Group all events by testId + Map>> allByTestId = new LinkedHashMap<>(); + for (var event : allEvents) { + String tid = event.get("testId"); + if (tid != null) { + allByTestId.computeIfAbsent(tid, k -> new ArrayList<>()).add(event); + } + } + + // Determine which testIds are in scope + Set inScopeTestIds = new LinkedHashSet<>(); + for (var entry : allByTestId.entrySet()) { + String tid = entry.getKey(); + + // testId range filter (skip for synthetic IDs) + if (!hasSyntheticIds) { + if (startingTestId != null && tid.compareTo(startingTestId) < 0) continue; + if (endingTestId != null && tid.compareTo(endingTestId) > 0) continue; + } + + // Timestamp range filter: test is in scope if ANY event is within range + if (startingAt != null || endingAt != null) { + boolean anyInRange = false; + for (var event : entry.getValue()) { + String ts = event.get("timestamp"); + if (ts == null) continue; + boolean afterStart = startingAt == null || ts.compareTo(startingAt) >= 0; + boolean beforeEnd = endingAt == null || ts.compareTo(endingAt) <= 0; + if (afterStart && beforeEnd) { + anyInRange = true; + break; + } + } + if (!anyInRange) continue; + } + + inScopeTestIds.add(tid); + } + + if (inScopeTestIds.isEmpty()) { + System.out.println("No matching events found."); + return; + } + + // Collect all events for in-scope testIds + Map>> byTestId = new LinkedHashMap<>(); + for (String tid : inScopeTestIds) { + byTestId.put(tid, allByTestId.get(tid)); + } + + // Compute max concurrency by sweeping start/end intervals + List intervals = new ArrayList<>(); + for (var entry : byTestId.entrySet()) { + String startTs = null, endTs = null; + for (var event : entry.getValue()) { + String ev = event.get("event"); + String ts = event.get("timestamp"); + if (ts == null) continue; + if ("started".equals(ev) && (startTs == null || ts.compareTo(startTs) < 0)) { + startTs = ts; + } + if (("completed".equals(ev) || "error".equals(ev)) + && (endTs == null || ts.compareTo(endTs) > 0)) { + endTs = ts; + } + } + if (startTs != null && endTs != null) { + intervals.add(new String[]{startTs, endTs}); + } + } + + System.out.println("Max concurrency: " + computeMaxConcurrency(intervals)); + System.out.println(); + + // Extract params and results for each test. + // Params come from the "started" event (which includes the "params" sub-object). + // Results come from the "completed" or "error" event (which includes the "results" sub-object). + Map> testParams = new LinkedHashMap<>(); + Map> testResults = new LinkedHashMap<>(); + Map testStatus = new LinkedHashMap<>(); + for (var entry : byTestId.entrySet()) { + String tid = entry.getKey(); + Map params = null; + Map results = null; + String status = "started"; + for (var event : entry.getValue()) { + String ev = event.get("event"); + // Take params from whichever event has them (all events include params now) + if (params == null && event.keySet().stream().anyMatch(k -> k.startsWith("params."))) { + params = event; + } + if ("completed".equals(ev)) { + status = "completed"; + results = event; + } else if ("error".equals(ev)) { + status = "error"; + results = event; + } + } + if (params != null) { + testParams.put(tid, params); + } + testResults.put(tid, results != null ? results : Map.of()); + testStatus.put(tid, status); + } + + // Classify parameters as static (single value) or varying (multiple values) + List varyingParams = new ArrayList<>(); + Map staticParams = new LinkedHashMap<>(); + for (String field : PARAM_FIELDS) { + Set values = new HashSet<>(); + for (var params : testParams.values()) { + values.add(params.getOrDefault(field, "")); + } + if (values.size() > 1) { + varyingParams.add(field); + } else if (values.size() == 1) { + String value = values.iterator().next(); + if (!value.isEmpty()) { + staticParams.put(stripPrefix(field), value); + } + } + } + + // Display static parameters at the top + if (!staticParams.isEmpty()) { + System.out.println("Static parameters:"); + for (var entry : staticParams.entrySet()) { + System.out.println(" " + entry.getKey() + " = " + entry.getValue()); + } + System.out.println(); + } + + if (varyingParams.isEmpty()) { + System.out.println("No varying parameters found."); + } else { + // Display without "params." prefix for readability + List displayNames = new ArrayList<>(); + for (String p : varyingParams) { + displayNames.add(stripPrefix(p)); + } + System.out.println("Varying parameters: " + String.join(", ", displayNames)); + } + System.out.println(); + + // Determine which result fields have any non-empty values + List activeResultFields = new ArrayList<>(); + for (String field : RESULT_FIELDS) { + for (var results : testResults.values()) { + if (!results.getOrDefault(field, "").isEmpty()) { + activeResultFields.add(field); + break; + } + } + } + + // Build and print table sorted by testId + List sortedTestIds = new ArrayList<>(testParams.keySet()); + Collections.sort(sortedTestIds); + + // Columns: testId, varying params (without prefix), status, active result fields (without prefix) + List columns = new ArrayList<>(); + columns.add("testId"); + for (String p : varyingParams) { + columns.add(stripPrefix(p)); + } + columns.add("status"); + for (String r : activeResultFields) { + columns.add(stripPrefix(r)); + } + + List> rows = new ArrayList<>(); + for (String tid : sortedTestIds) { + List row = new ArrayList<>(); + row.add(hasSyntheticIds ? "n/a" : tid); + var params = testParams.get(tid); + for (String field : varyingParams) { + row.add(params != null ? params.getOrDefault(field, "") : ""); + } + row.add(testStatus.getOrDefault(tid, "")); + var results = testResults.getOrDefault(tid, Map.of()); + for (String field : activeResultFields) { + row.add(results.getOrDefault(field, "")); + } + rows.add(row); + } + + printTable(columns, rows); + } + + private static String stripPrefix(String field) { + int dot = field.indexOf('.'); + return dot >= 0 ? field.substring(dot + 1) : field; + } + + private static void printTable(List columns, List> rows) { + int[] widths = new int[columns.size()]; + for (int i = 0; i < columns.size(); i++) { + widths[i] = columns.get(i).length(); + } + for (var row : rows) { + for (int i = 0; i < row.size(); i++) { + widths[i] = Math.max(widths[i], row.get(i).length()); + } + } + + StringBuilder header = new StringBuilder(); + for (int i = 0; i < columns.size(); i++) { + if (i > 0) header.append(" "); + header.append(String.format("%-" + widths[i] + "s", columns.get(i))); + } + System.out.println(header); + + for (var row : rows) { + StringBuilder line = new StringBuilder(); + for (int i = 0; i < row.size(); i++) { + if (i > 0) line.append(" "); + line.append(String.format("%-" + widths[i] + "s", row.get(i))); + } + System.out.println(line); + } + } + + private static void printUsage() { + System.err.println("Usage: EventLogAnalyzer [results.jsonl] [options]"); + System.err.println(" If no file is given, the latest results under target/benchmark-results/ are used."); + System.err.println("Options:"); + System.err.println(" --starting-at Include tests with events at or after this timestamp"); + System.err.println(" --ending-at Include tests with events at or before this timestamp"); + System.err.println(" --starting-testid Include tests with testId >= this value"); + System.err.println(" --ending-testid Include tests with testId <= this value"); + } + + private static int computeMaxConcurrency(List intervals) { + if (intervals.isEmpty()) return 0; + + // Each sweep event is {timestamp, delta} where delta is +1 (start) or -1 (end) + List events = new ArrayList<>(); + for (var interval : intervals) { + events.add(new String[]{interval[0], "+1"}); + events.add(new String[]{interval[1], "-1"}); + } + // Sort by timestamp, then ends before starts at the same timestamp + events.sort((a, b) -> { + int cmp = a[0].compareTo(b[0]); + if (cmp != 0) return cmp; + return a[1].compareTo(b[1]); // "-1" < "+1" lexicographically + }); + + int max = 0, current = 0; + for (var event : events) { + current += Integer.parseInt(event[1]); + max = Math.max(max, current); + } + return max; + } + + /** + * Parses a JSON line into key-value string pairs. + * Nested objects are flattened with dot-prefixed keys (e.g., "params.dataset"). + * Handles quoted strings (with backslash escapes), numbers, booleans, and one level of nesting. + */ + private static Map parseJsonLine(String line) { + Map result = new LinkedHashMap<>(); + parseObject(line, new int[]{0}, "", result); + return result; + } + + private static void parseObject(String line, int[] pos, String prefix, Map result) { + int len = line.length(); + + // Skip to opening brace + while (pos[0] < len && line.charAt(pos[0]) != '{') pos[0]++; + pos[0]++; + + while (pos[0] < len) { + // Skip whitespace and commas + while (pos[0] < len && (line.charAt(pos[0]) == ' ' || line.charAt(pos[0]) == ',' || line.charAt(pos[0]) == '\t')) pos[0]++; + if (pos[0] >= len || line.charAt(pos[0]) == '}') { + pos[0]++; // skip closing brace + break; + } + + // Parse key (quoted string) + if (line.charAt(pos[0]) != '"') break; + pos[0]++; + int keyStart = pos[0]; + while (pos[0] < len && line.charAt(pos[0]) != '"') { + if (line.charAt(pos[0]) == '\\') pos[0]++; + pos[0]++; + } + String key = line.substring(keyStart, pos[0]); + pos[0]++; // skip closing quote + + String fullKey = prefix.isEmpty() ? key : prefix + "." + key; + + // Skip colon and whitespace + while (pos[0] < len && (line.charAt(pos[0]) == ':' || line.charAt(pos[0]) == ' ')) pos[0]++; + + // Parse value + if (pos[0] < len && line.charAt(pos[0]) == '{') { + // Nested object — recurse with dot-prefixed key + parseObject(line, pos, fullKey, result); + } else if (pos[0] < len && line.charAt(pos[0]) == '"') { + // String value + pos[0]++; + StringBuilder sb = new StringBuilder(); + while (pos[0] < len && line.charAt(pos[0]) != '"') { + if (line.charAt(pos[0]) == '\\' && pos[0] + 1 < len) { + pos[0]++; + switch (line.charAt(pos[0])) { + case '"': sb.append('"'); break; + case '\\': sb.append('\\'); break; + case 'n': sb.append('\n'); break; + case 't': sb.append('\t'); break; + default: sb.append(line.charAt(pos[0])); break; + } + } else { + sb.append(line.charAt(pos[0])); + } + pos[0]++; + } + pos[0]++; // skip closing quote + result.put(fullKey, sb.toString()); + } else { + // Number, boolean, or null + int valStart = pos[0]; + while (pos[0] < len && line.charAt(pos[0]) != ',' && line.charAt(pos[0]) != '}' && line.charAt(pos[0]) != ' ') pos[0]++; + result.put(fullKey, line.substring(valStart, pos[0])); + } + } + } +} diff --git a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/benchtools/package-info.java b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/benchtools/package-info.java new file mode 100644 index 000000000..1221ea481 --- /dev/null +++ b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/benchtools/package-info.java @@ -0,0 +1,24 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Reusable benchmark infrastructure utilities. + *

+ * This package provides general-purpose tools for JMH benchmarks including + * {@code @Param} combination counting ({@link io.github.jbellis.jvector.bench.benchtools.BenchmarkParamCounter}) + * and JFR event log analysis ({@link io.github.jbellis.jvector.bench.benchtools.EventLogAnalyzer}). + */ +package io.github.jbellis.jvector.bench.benchtools; diff --git a/benchmarks-jmh/src/main/resources/log4j2.xml b/benchmarks-jmh/src/main/resources/log4j2.xml index 823788261..75bb377fd 100644 --- a/benchmarks-jmh/src/main/resources/log4j2.xml +++ b/benchmarks-jmh/src/main/resources/log4j2.xml @@ -3,13 +3,19 @@ - + + + + + + \ No newline at end of file diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/vector/VectorizationProvider.java b/jvector-base/src/main/java/io/github/jbellis/jvector/vector/VectorizationProvider.java index 1ec46443d..5ab5664d1 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/vector/VectorizationProvider.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/vector/VectorizationProvider.java @@ -77,6 +77,28 @@ protected VectorizationProvider() { // visible for tests static VectorizationProvider lookup(boolean testMode) { + String forcedProvider = System.getProperty("jvector.vectorization_provider"); + if (forcedProvider != null) { + switch (forcedProvider.toLowerCase(Locale.ROOT)) { + case "default": + return new DefaultVectorizationProvider(); + case "panama": + try { + return (VectorizationProvider) Class.forName("io.github.jbellis.jvector.vector.PanamaVectorizationProvider").getConstructor().newInstance(); + } catch (Throwable e) { + throw new RuntimeException("Failed to load forced PanamaVectorizationProvider", e); + } + case "native": + try { + return (VectorizationProvider) Class.forName("io.github.jbellis.jvector.vector.NativeVectorizationProvider").getConstructor().newInstance(); + } catch (Throwable e) { + throw new RuntimeException("Failed to load forced NativeVectorizationProvider", e); + } + default: + throw new IllegalArgumentException("Unknown vectorization provider: " + forcedProvider); + } + } + final int runtimeVersion = Runtime.version().feature(); if (runtimeVersion >= 20) { // is locale sane (only buggy in Java 20) From c75256afb46f0284cda6b8f5959126b0124c2068 Mon Sep 17 00:00:00 2001 From: dian-lun-lin Date: Tue, 14 Apr 2026 15:06:35 -0700 Subject: [PATCH 05/18] Update build config and project metadata for compaction Add result file patterns to .gitignore, update rat-excludes for the new compaction workflow and catalog cache files. --- .gitignore | 4 ++++ rat-excludes.txt | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 4b5599f84..70cede6f9 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,10 @@ local/ dataset_ **/local_datasets/** +### Testing Results +**results**.json +**results**.jsonl + ### Bench caches pq_cache/ index_cache/ diff --git a/rat-excludes.txt b/rat-excludes.txt index 436c97822..ccb3b13e5 100644 --- a/rat-excludes.txt +++ b/rat-excludes.txt @@ -7,6 +7,7 @@ CONTRIBUTIONS.md package.json .github/workflows/tag-release.yml .github/workflows/run-bench.yml +.github/workflows/run-compaction.yml .mvn/wrapper/maven-wrapper.properties .mvn/jvm.config README.md @@ -26,9 +27,9 @@ scripts/test_node_setup.sh scripts/jmh_results_formatter.py yaml-configs/**/*.yaml yaml-configs/**/*.yml +yaml-configs/**/.catalog-cache/** src/main/resources/logback.xml docs/**/*.md yaml-configs/**/*.md local_datasets/** **/datasets/** - From 415f907b088d3a410def4a7419cd840061fd81b6 Mon Sep 17 00:00:00 2001 From: dian-lun-lin Date: Tue, 14 Apr 2026 22:52:37 -0700 Subject: [PATCH 06/18] Fix JMH jar selection in run-compaction.yml The benchmarks-jmh-*.jar glob matched the -javadoc jar first, which has no Main-Class. Select the shaded JMH jar explicitly by excluding -javadoc and -sources jars. --- .github/workflows/run-compaction.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run-compaction.yml b/.github/workflows/run-compaction.yml index 8d12ee161..b43d33e2c 100644 --- a/.github/workflows/run-compaction.yml +++ b/.github/workflows/run-compaction.yml @@ -93,10 +93,18 @@ jobs: SAFE_BRANCH=$(echo "${{ matrix.branch }}" | sed 's/[^A-Za-z0-9_-]/_/g') echo "safe_branch=$SAFE_BRANCH" >> $GITHUB_OUTPUT + # Select the shaded JMH jar (exclude -javadoc and -sources jars) + JMH_JAR=$(ls benchmarks-jmh/target/benchmarks-jmh-*.jar | grep -Ev -- '-(javadoc|sources)\.jar$' | head -1) + if [[ -z "$JMH_JAR" ]]; then + echo "ERROR: No JMH jar found under benchmarks-jmh/target/" + exit 1 + fi + echo "Using JMH jar: $JMH_JAR" + java --enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector \ -Djvector.experimental.enable_native_vectorization=true \ -Xmx${HALF_MEM_GB}g \ - -jar benchmarks-jmh/target/benchmarks-jmh-*.jar CompactorBenchmark \ + -jar "$JMH_JAR" CompactorBenchmark \ -p workloadMode=PARTITION_AND_COMPACT \ -p datasetNames=$DATASET \ -p numPartitions=4 \ From 224a709a92af0c572dbdaf3f51a1434b28b0c27e Mon Sep 17 00:00:00 2001 From: dian-lun-lin Date: Wed, 15 Apr 2026 21:39:37 -0700 Subject: [PATCH 07/18] Fix CompactorBenchmark invocation in run-compaction.yml Use -cp with CompactorBenchmark.main() instead of -jar with JMH Main to avoid BenchmarkList discovery issues in CI's shaded jar. --- .github/workflows/run-compaction.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/run-compaction.yml b/.github/workflows/run-compaction.yml index b43d33e2c..42efe907d 100644 --- a/.github/workflows/run-compaction.yml +++ b/.github/workflows/run-compaction.yml @@ -93,23 +93,20 @@ jobs: SAFE_BRANCH=$(echo "${{ matrix.branch }}" | sed 's/[^A-Za-z0-9_-]/_/g') echo "safe_branch=$SAFE_BRANCH" >> $GITHUB_OUTPUT - # Select the shaded JMH jar (exclude -javadoc and -sources jars) JMH_JAR=$(ls benchmarks-jmh/target/benchmarks-jmh-*.jar | grep -Ev -- '-(javadoc|sources)\.jar$' | head -1) - if [[ -z "$JMH_JAR" ]]; then - echo "ERROR: No JMH jar found under benchmarks-jmh/target/" - exit 1 - fi echo "Using JMH jar: $JMH_JAR" java --enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector \ -Djvector.experimental.enable_native_vectorization=true \ -Xmx${HALF_MEM_GB}g \ - -jar "$JMH_JAR" CompactorBenchmark \ + -cp "$JMH_JAR" \ + io.github.jbellis.jvector.bench.CompactorBenchmark \ -p workloadMode=PARTITION_AND_COMPACT \ -p datasetNames=$DATASET \ -p numPartitions=4 \ -p splitDistribution=FIBONACCI \ -p indexPrecision=FUSEDPQ \ + -jvmArgsPrepend "-Xmx${HALF_MEM_GB}g" \ -wi 0 -i 1 -f 1 - name: Upload compaction results From 191a40d2a751a2e0f6e801d271349187bc732b18 Mon Sep 17 00:00:00 2001 From: dian-lun-lin Date: Fri, 17 Apr 2026 14:41:47 -0700 Subject: [PATCH 08/18] Address PR review feedback - Extract CompactWriter into its own file to reduce OnDiskGraphIndexCompactor size - Rewrite SystemStatsCollector to read /proc files directly in Java instead of spawning bash - Clarify recall section description in docs/compaction.md --- docs/compaction.md | 6 +- .../jvector/graph/disk/CompactWriter.java | 241 ++++++++++++++++++ .../graph/disk/OnDiskGraphIndexCompactor.java | 236 +---------------- .../reporting/SystemStatsCollector.java | 192 ++++++++++---- 4 files changed, 388 insertions(+), 287 deletions(-) create mode 100644 jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CompactWriter.java diff --git a/docs/compaction.md b/docs/compaction.md index 068c22fdf..f63697aa9 100644 --- a/docs/compaction.md +++ b/docs/compaction.md @@ -175,10 +175,10 @@ Results are written as JSONL to `target/benchmark-results/compactor-*/compactor- ## Recall -The table compares building from scratch with compaction under the following configurations (results averaged over three runs): +Recall comparison (results averaged over three runs): -- Build from scratch: build with PQ; search using FusedPQ with FP reranking. -- Compaction: build source partitions with PQ; compact using FusedPQ with FP rescoring; search using FusedPQ with FP reranking. +- Build from scratch: build one index over the full dataset with PQ scoring; search using FusedPQ with FP reranking. +- Compaction: partition the dataset into 4 source indexes (Fibonacci distribution), build each with PQ scoring, then compact into one index; search using FusedPQ with FP reranking. | Dataset | Dim | Build from Scratch | Compaction | Delta | |----------------------|-----:|-------------------:|-----------:|-------:| diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CompactWriter.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CompactWriter.java new file mode 100644 index 000000000..5a3091686 --- /dev/null +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CompactWriter.java @@ -0,0 +1,241 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.jbellis.jvector.graph.disk; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import io.github.jbellis.jvector.disk.BufferedRandomAccessWriter; +import io.github.jbellis.jvector.disk.RandomAccessWriter; +import io.github.jbellis.jvector.disk.ByteBufferIndexWriter; +import io.github.jbellis.jvector.graph.disk.feature.Feature; +import io.github.jbellis.jvector.graph.disk.feature.FeatureId; +import io.github.jbellis.jvector.graph.disk.feature.InlineVectors; +import io.github.jbellis.jvector.graph.disk.feature.FusedPQ; +import io.github.jbellis.jvector.quantization.ProductQuantization; +import io.github.jbellis.jvector.vector.VectorizationProvider; +import io.github.jbellis.jvector.vector.types.ByteSequence; +import io.github.jbellis.jvector.vector.types.VectorFloat; +import io.github.jbellis.jvector.vector.types.VectorTypeSupport; + +import static io.github.jbellis.jvector.graph.disk.OnDiskGraphIndexCompactor.SelectedVecCache; +import static io.github.jbellis.jvector.graph.disk.OnDiskGraphIndexCompactor.WriteResult; + +final class CompactWriter implements AutoCloseable { + + private static final VectorTypeSupport vectorTypeSupport = VectorizationProvider.getInstance().getVectorTypeSupport(); + + private static final int FOOTER_MAGIC = 0x4a564244; + private static final int FOOTER_OFFSET_SIZE = Long.BYTES; + private static final int FOOTER_MAGIC_SIZE = Integer.BYTES; + private static final int FOOTER_SIZE = FOOTER_MAGIC_SIZE + FOOTER_OFFSET_SIZE; + + private final RandomAccessWriter writer; + private final int recordSize; + private final long startOffset; + private final int headerSize; + private final Header header; + private final int version; + private final FusedPQ fusedPQFeature; + private final ProductQuantization pq; + private final int baseDegree; + private final int maxOrdinal; + private final ThreadLocal bufferPerThread; + private final ThreadLocal> zeroPQ; + private final boolean fusedPQEnabled; + private final Path outputPath; + private final List configuredLayerInfo; + private final List configuredLayerDegrees; + private final List level1FeatureRecords; + + CompactWriter(Path outputPath, + int maxOrdinal, + int numBaseLayerNodes, + long startOffset, + List layerInfo, + int entryNode, + int dimension, + List layerDegrees, + ProductQuantization pq, + int pqLength, + boolean fusedPQEnabled) + throws IOException { + this.fusedPQEnabled = fusedPQEnabled; + this.version = OnDiskGraphIndex.CURRENT_VERSION; + this.outputPath = outputPath; + this.writer = new BufferedRandomAccessWriter(outputPath); + this.startOffset = startOffset; + this.configuredLayerInfo = new ArrayList<>(layerInfo); + this.configuredLayerDegrees = new ArrayList<>(layerDegrees); + this.baseDegree = layerDegrees.get(0); + this.pq = pq; + this.maxOrdinal = maxOrdinal; + this.level1FeatureRecords = new ArrayList<>(); + + Map featureMap = new LinkedHashMap<>(); + InlineVectors inlineVectorFeature = new InlineVectors(dimension); + featureMap.put(FeatureId.INLINE_VECTORS, inlineVectorFeature); + if (fusedPQEnabled) { + this.fusedPQFeature = new FusedPQ(Collections.max(layerDegrees), pq); + featureMap.put(FeatureId.FUSED_PQ, this.fusedPQFeature); + } else { + this.fusedPQFeature = null; + } + + int rsize = Integer.BYTES + inlineVectorFeature.featureSize() + Integer.BYTES + baseDegree * Integer.BYTES; + if (fusedPQEnabled) { + rsize += fusedPQFeature.featureSize(); + } + this.recordSize = rsize; + + this.configuredLayerInfo.set(0, new CommonHeader.LayerInfo(numBaseLayerNodes, baseDegree)); + var commonHeader = new CommonHeader(this.version, dimension, entryNode, this.configuredLayerInfo, this.maxOrdinal + 1); + this.header = new Header(commonHeader, featureMap); + this.headerSize = header.size(); + + this.bufferPerThread = ThreadLocal.withInitial(() -> { + ByteBuffer buffer = ByteBuffer.allocate(recordSize); + buffer.order(ByteOrder.BIG_ENDIAN); + return buffer; + }); + this.zeroPQ = ThreadLocal.withInitial(() -> { + var vec = vectorTypeSupport.createByteSequence(pqLength > 0 ? pqLength : 1); + vec.zero(); + return vec; + }); + } + + public void writeHeader() throws IOException { + writer.seek(startOffset); + header.write(writer); + assert writer.position() == startOffset + headerSize : String.format("%d != %d", writer.position(), startOffset + headerSize); + writer.flush(); + } + + void writeFooter() throws IOException { + if (fusedPQEnabled && version == 6 && !level1FeatureRecords.isEmpty()) { + for (UpperLayerFeatureRecord record : level1FeatureRecords) { + writer.writeInt(record.ordinal); + vectorTypeSupport.writeByteSequence(writer, record.pqCode); + } + } + long headerOffset = writer.position(); + header.write(writer); + writer.writeLong(headerOffset); + writer.writeInt(FOOTER_MAGIC); + final long expectedPosition = headerOffset + headerSize + FOOTER_SIZE; + assert writer.position() == expectedPosition : String.format("%d != %d", writer.position(), expectedPosition); + } + + public void offsetAfterInline() throws IOException { + long offset = startOffset + headerSize + (long) (maxOrdinal + 1) * recordSize; + writer.seek(offset); + } + + public Path getOutputPath() { + return outputPath; + } + + public void writeUpperLayerNode(int level, int ordinal, int[] neighbors, ByteSequence level1PqCode) throws IOException { + writer.writeInt(ordinal); + writer.writeInt(neighbors.length); + int degree = configuredLayerDegrees.get(level); + int n = 0; + for (; n < neighbors.length; n++) { + writer.writeInt(neighbors[n]); + } + for (; n < degree; n++) { + writer.writeInt(-1); + } + if (fusedPQEnabled && version == 6 && level == 1 && level1PqCode != null) { + level1FeatureRecords.add(new UpperLayerFeatureRecord(ordinal, level1PqCode.copy())); + } + } + + public void close() throws IOException { + final var endOfGraphPosition = writer.position(); + writer.seek(endOfGraphPosition); + writer.flush(); + } + + public WriteResult writeInlineNodeRecord(int ordinal, VectorFloat vec, SelectedVecCache selectedCache, ByteSequence pqCode) throws IOException + { + var bwriter = new ByteBufferIndexWriter(bufferPerThread.get()); + + long fileOffset = startOffset + headerSize + (long) ordinal * recordSize; + bwriter.reset(); + bwriter.writeInt(ordinal); + + for(int i = 0; i < vec.length(); ++i) { + bwriter.writeFloat(vec.get(i)); + } + + // write fused PQ + // since we build a graph in a streaming way, + // we cannot use fusedPQfeature.writeInline + if (fusedPQEnabled) { + int k = 0; + for (; k < selectedCache.size; k++) { + pqCode.zero(); + pq.encodeTo(selectedCache.vecs[k], pqCode); + vectorTypeSupport.writeByteSequence(bwriter, pqCode); + } + for (; k < baseDegree; k++) { + vectorTypeSupport.writeByteSequence(bwriter, zeroPQ.get()); + } + } + + // write neighbors list + bwriter.writeInt(selectedCache.size); + int n = 0; + for (; n < selectedCache.size; n++) { + bwriter.writeInt(selectedCache.nodes[n]); + } + + // pad out to base layer degree + for (; n < baseDegree; n++) { + bwriter.writeInt(-1); + } + + if (bwriter.bytesWritten() != recordSize) { + throw new IllegalStateException( + String.format("Record size mismatch for ordinal %d: expected %d bytes, wrote %d bytes, base degree: %d", + ordinal, recordSize, bwriter.bytesWritten(), baseDegree)); + } + + ByteBuffer dataCopy = bwriter.cloneBuffer(); + + return new WriteResult(ordinal, fileOffset, dataCopy); + } + + static final class UpperLayerFeatureRecord { + final int ordinal; + final ByteSequence pqCode; + + UpperLayerFeatureRecord(int ordinal, ByteSequence pqCode) { + this.ordinal = ordinal; + this.pqCode = pqCode; + } + } +} diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java index 04d6c7c1d..9200b2763 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java @@ -18,7 +18,6 @@ import java.io.FileNotFoundException; import java.io.IOException; -import java.nio.ByteOrder; import java.nio.file.Path; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; @@ -26,14 +25,9 @@ import java.util.*; import java.util.concurrent.*; import java.util.stream.IntStream; - -import io.github.jbellis.jvector.disk.BufferedRandomAccessWriter; -import io.github.jbellis.jvector.disk.RandomAccessWriter; -import io.github.jbellis.jvector.disk.ByteBufferIndexWriter; import io.github.jbellis.jvector.graph.*; import io.github.jbellis.jvector.graph.disk.feature.Feature; import io.github.jbellis.jvector.graph.disk.feature.FeatureId; -import io.github.jbellis.jvector.graph.disk.feature.InlineVectors; import io.github.jbellis.jvector.graph.disk.feature.FusedPQ; import io.github.jbellis.jvector.graph.similarity.DefaultSearchScoreProvider; import io.github.jbellis.jvector.graph.similarity.SearchScoreProvider; @@ -1028,7 +1022,7 @@ private static int partition(int[] order, float[] score, int lo, int hi) { return i; } - private static final class WriteResult { + static final class WriteResult { final int newOrdinal; final long fileOffset; final ByteBuffer data; @@ -1183,236 +1177,10 @@ private boolean isDiverse(OnDiskGraphIndex.View cView, int cNode, VectorFloat } - /** - * Handles writing the compacted graph index to disk, managing header, node records, - * upper layers, and footer in the on-disk format. - */ - private static final class CompactWriter implements AutoCloseable { - - private static final int FOOTER_MAGIC = 0x4a564244; - private static final int FOOTER_OFFSET_SIZE = Long.BYTES; - private static final int FOOTER_MAGIC_SIZE = Integer.BYTES; - private static final int FOOTER_SIZE = FOOTER_MAGIC_SIZE + FOOTER_OFFSET_SIZE; - - private final RandomAccessWriter writer; - private final int recordSize; - private final long startOffset; - private final int headerSize; - private final Header header; - private final int version; - private final FusedPQ fusedPQFeature; - private final ProductQuantization pq; - private final int baseDegree; - private final int maxOrdinal; - private final ThreadLocal bufferPerThread; - private final ThreadLocal> zeroPQ; - private final boolean fusedPQEnabled; - private final Path outputPath; - private final List configuredLayerInfo; - private final List configuredLayerDegrees; - private final List level1FeatureRecords; - - /** - * Constructs a CompactWriter that will write the compacted index to the specified path. - */ - CompactWriter(Path outputPath, - int maxOrdinal, - int numBaseLayerNodes, - long startOffset, - List layerInfo, - int entryNode, - int dimension, - List layerDegrees, - ProductQuantization pq, - int pqLength, - boolean fusedPQEnabled) - throws IOException { - this.fusedPQEnabled = fusedPQEnabled; - this.version = OnDiskGraphIndex.CURRENT_VERSION; - this.outputPath = outputPath; - this.writer = new BufferedRandomAccessWriter(outputPath); - this.startOffset = startOffset; - this.configuredLayerInfo = new ArrayList<>(layerInfo); - this.configuredLayerDegrees = new ArrayList<>(layerDegrees); - this.baseDegree = layerDegrees.get(0); - this.pq = pq; - this.maxOrdinal = maxOrdinal; - this.level1FeatureRecords = new ArrayList<>(); - - Map featureMap = new LinkedHashMap<>(); - InlineVectors inlineVectorFeature = new InlineVectors(dimension); - featureMap.put(FeatureId.INLINE_VECTORS, inlineVectorFeature); - if (fusedPQEnabled) { - this.fusedPQFeature = new FusedPQ(Collections.max(layerDegrees), pq); - featureMap.put(FeatureId.FUSED_PQ, this.fusedPQFeature); - } else { - this.fusedPQFeature = null; - } - - int rsize = Integer.BYTES + inlineVectorFeature.featureSize() + Integer.BYTES + baseDegree * Integer.BYTES; - if (fusedPQEnabled) { - rsize += fusedPQFeature.featureSize(); - } - this.recordSize = rsize; - - this.configuredLayerInfo.set(0, new CommonHeader.LayerInfo(numBaseLayerNodes, baseDegree)); - var commonHeader = new CommonHeader(this.version, dimension, entryNode, this.configuredLayerInfo, this.maxOrdinal + 1); - this.header = new Header(commonHeader, featureMap); - this.headerSize = header.size(); - - this.bufferPerThread = ThreadLocal.withInitial(() -> { - ByteBuffer buffer = ByteBuffer.allocate(recordSize); - buffer.order(ByteOrder.BIG_ENDIAN); - return buffer; - }); - this.zeroPQ = ThreadLocal.withInitial(() -> { - var vec = vectorTypeSupport.createByteSequence(pqLength > 0 ? pqLength : 1); - vec.zero(); - return vec; - }); - } - - /** - * Writes the graph header at the start of the file. - */ - public void writeHeader() throws IOException { - writer.seek(startOffset); - header.write(writer); - assert writer.position() == startOffset + headerSize : String.format("%d != %d", writer.position(), startOffset + headerSize); - writer.flush(); - } - - /** - * Writes the footer containing upper layer features (if any), header copy, and magic number. - */ - void writeFooter() throws IOException { - if (fusedPQEnabled && version == 6 && !level1FeatureRecords.isEmpty()) { - for (UpperLayerFeatureRecord record : level1FeatureRecords) { - writer.writeInt(record.ordinal); - vectorTypeSupport.writeByteSequence(writer, record.pqCode); - } - } - long headerOffset = writer.position(); - header.write(writer); - writer.writeLong(headerOffset); - writer.writeInt(FOOTER_MAGIC); - final long expectedPosition = headerOffset + headerSize + FOOTER_SIZE; - assert writer.position() == expectedPosition : String.format("%d != %d", writer.position(), expectedPosition); - } - - /** - * Positions the writer after the inline (base layer) records section. - */ - public void offsetAfterInline() throws IOException { - long offset = startOffset + headerSize + (long) (maxOrdinal + 1) * recordSize; - writer.seek(offset); - } - - /** - * Returns the output file path. - */ - public Path getOutputPath() { - return outputPath; - } - - /** - * Writes an upper layer node's graph structure (ordinal and neighbors). - * Collects level 1 PQ codes for later writing in the footer. - */ - public void writeUpperLayerNode(int level, int ordinal, int[] neighbors, ByteSequence level1PqCode) throws IOException { - writer.writeInt(ordinal); - writer.writeInt(neighbors.length); - int degree = configuredLayerDegrees.get(level); - int n = 0; - for (; n < neighbors.length; n++) { - writer.writeInt(neighbors[n]); - } - for (; n < degree; n++) { - writer.writeInt(-1); - } - if (fusedPQEnabled && version == 6 && level == 1 && level1PqCode != null) { - level1FeatureRecords.add(new UpperLayerFeatureRecord(ordinal, level1PqCode.copy())); - } - } - - /** - * Flushes and closes the writer. - */ - public void close() throws IOException { - final var endOfGraphPosition = writer.position(); - writer.seek(endOfGraphPosition); - writer.flush(); - } - - /** - * Constructs and returns a write result for a base layer node containing the full record: - * ordinal, inline vector, PQ codes for neighbors, and neighbor list. - */ - public WriteResult writeInlineNodeRecord(int ordinal, VectorFloat vec, SelectedVecCache selectedCache, ByteSequence pqCode) throws IOException - { - var bwriter = new ByteBufferIndexWriter(bufferPerThread.get()); - - long fileOffset = startOffset + headerSize + (long) ordinal * recordSize; - bwriter.reset(); - bwriter.writeInt(ordinal); - - for(int i = 0; i < vec.length(); ++i) { - bwriter.writeFloat(vec.get(i)); - } - - // write fused PQ - // since we build a graph in a streaming way, - // we cannot use fusedPQfeature.writeInline - if (fusedPQEnabled) { - int k = 0; - for (; k < selectedCache.size; k++) { - pqCode.zero(); - pq.encodeTo(selectedCache.vecs[k], pqCode); - vectorTypeSupport.writeByteSequence(bwriter, pqCode); - } - for (; k < baseDegree; k++) { - vectorTypeSupport.writeByteSequence(bwriter, zeroPQ.get()); - } - } - - // write neighbors list - bwriter.writeInt(selectedCache.size); - int n = 0; - for (; n < selectedCache.size; n++) { - bwriter.writeInt(selectedCache.nodes[n]); - } - - // pad out to base layer degree - for (; n < baseDegree; n++) { - bwriter.writeInt(-1); - } - - if (bwriter.bytesWritten() != recordSize) { - throw new IllegalStateException( - String.format("Record size mismatch for ordinal %d: expected %d bytes, wrote %d bytes, base degree: %d", - ordinal, recordSize, bwriter.bytesWritten(), baseDegree)); - } - - ByteBuffer dataCopy = bwriter.cloneBuffer(); - - return new WriteResult(ordinal, fileOffset, dataCopy); - } - } - - private static final class UpperLayerFeatureRecord { - final int ordinal; - final ByteSequence pqCode; - - UpperLayerFeatureRecord(int ordinal, ByteSequence pqCode) { - this.ordinal = ordinal; - this.pqCode = pqCode; - } - } - /** * Cache for storing selected diverse neighbors along with their metadata and vector copies. */ - private static final class SelectedVecCache { + static final class SelectedVecCache { int[] sourceIdx; OnDiskGraphIndex.View[] views; int[] nodes; diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/SystemStatsCollector.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/SystemStatsCollector.java index 3932958e3..571782510 100644 --- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/SystemStatsCollector.java +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/reporting/SystemStatsCollector.java @@ -19,86 +19,178 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.BufferedWriter; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.time.Instant; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.util.HashSet; +import java.util.List; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import java.util.regex.Pattern; /** * Background collector of {@code /proc} system metrics (CPU topology, load, memory, disk I/O). - * Spawns a bash process that appends JSONL lines to a file every 30 seconds. + * Reads /proc files directly in Java and appends JSONL lines to a file every 30 seconds. */ public final class SystemStatsCollector { private static final Logger log = LoggerFactory.getLogger(SystemStatsCollector.class); + private static final Path PROC_CPUINFO = Path.of("/proc/cpuinfo"); + private static final Path PROC_LOADAVG = Path.of("/proc/loadavg"); + private static final Path PROC_MEMINFO = Path.of("/proc/meminfo"); + private static final Path PROC_DISKSTATS = Path.of("/proc/diskstats"); + private static final Pattern DISK_DEVICE_PATTERN = Pattern.compile("sd[a-z]+|nvme[0-9]+n[0-9]+|vd[a-z]+|xvd[a-z]+"); + private static final DateTimeFormatter TS_FORMAT = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'").withZone(ZoneOffset.UTC); - private static final String SCRIPT = String.join("\n", - "cpuThreads=$(grep -c '^processor' /proc/cpuinfo)", - "cpuSockets=$(awk '/^physical id/{print $NF}' /proc/cpuinfo | sort -u | wc -l)", - "[ \"$cpuSockets\" -eq 0 ] && cpuSockets=1", - "cpuCores=$(awk '/^physical id/{pid=$NF} /^core id/{print pid\"-\"$NF}' /proc/cpuinfo | sort -u | wc -l)", - "[ \"$cpuCores\" -eq 0 ] && cpuCores=$cpuThreads", - "while true; do", - " ts=$(date -u +%Y-%m-%dT%H:%M:%SZ)", - " read load1 load5 load15 runprocs rest < /proc/loadavg", - " IFS='/' read running total <<< \"$runprocs\"", - " memTotal=$(awk '/^MemTotal:/{print $2}' /proc/meminfo)", - " memFree=$(awk '/^MemFree:/{print $2}' /proc/meminfo)", - " memAvail=$(awk '/^MemAvailable:/{print $2}' /proc/meminfo)", - " buffers=$(awk '/^Buffers:/{print $2}' /proc/meminfo)", - " cached=$(awk '/^Cached:/{print $2}' /proc/meminfo)", - " swapTotal=$(awk '/^SwapTotal:/{print $2}' /proc/meminfo)", - " swapFree=$(awk '/^SwapFree:/{print $2}' /proc/meminfo)", - " disks=\"\"", - " while read maj min dev reads rmerged rsectors rtime writes wmerged wsectors wtime inprog iotime wiotime rest; do", - " if echo \"$dev\" | grep -qxE '(sd[a-z]+|nvme[0-9]+n[0-9]+|vd[a-z]+|xvd[a-z]+)'; then", - " [ -n \"$disks\" ] && disks=\"$disks,\"", - " disks=\"$disks{\\\"device\\\":\\\"$dev\\\",\\\"readsCompleted\\\":$reads,\\\"readsMerged\\\":$rmerged,\\\"sectorsRead\\\":$rsectors,\\\"readTimeMs\\\":$rtime,\\\"writesCompleted\\\":$writes,\\\"writesMerged\\\":$wmerged,\\\"sectorsWritten\\\":$wsectors,\\\"writeTimeMs\\\":$wtime,\\\"ioInProgress\\\":$inprog,\\\"ioTimeMs\\\":$iotime,\\\"weightedIoTimeMs\\\":$wiotime}\"", - " fi", - " done < /proc/diskstats", - " echo \"{\\\"timestamp\\\":\\\"$ts\\\",\\\"cpuSockets\\\":$cpuSockets,\\\"cpuCores\\\":$cpuCores,\\\"cpuThreads\\\":$cpuThreads,\\\"loadAvg1\\\":$load1,\\\"loadAvg5\\\":$load5,\\\"loadAvg15\\\":$load15,\\\"runningProcs\\\":$running,\\\"totalProcs\\\":$total,\\\"memTotalKB\\\":$memTotal,\\\"memFreeKB\\\":$memFree,\\\"memAvailableKB\\\":$memAvail,\\\"buffersKB\\\":$buffers,\\\"cachedKB\\\":$cached,\\\"swapTotalKB\\\":$swapTotal,\\\"swapFreeKB\\\":$swapFree,\\\"diskStats\\\":[$disks]}\"", - " sleep 30", - "done"); - - private Process process; + private ScheduledExecutorService scheduler; + private BufferedWriter writer; private String fileName; + private int cpuSockets; + private int cpuCores; + private int cpuThreads; - /** - * Creates the output directory, spawns the bash collector process, and returns the absolute path of the output file. - * - * @param outputDir directory to write the stats file into - * @param fileName name of the output JSONL file - * @return the absolute path of the stats file - * @throws IOException if the directory cannot be created or the process fails to start - */ public Path start(Path outputDir, String fileName) throws IOException { + if (!Files.exists(PROC_CPUINFO)) { + log.warn("/proc filesystem not available (not Linux?), system stats collection disabled"); + return null; + } + Files.createDirectories(outputDir); Path sysStatsPath = outputDir.resolve(fileName).toAbsolutePath(); - var pb = new ProcessBuilder("bash", "-c", SCRIPT); - pb.redirectOutput(ProcessBuilder.Redirect.to(sysStatsPath.toFile())); - pb.redirectErrorStream(true); - process = pb.start(); this.fileName = fileName; + + parseCpuTopology(); + + this.writer = Files.newBufferedWriter(sysStatsPath, + StandardOpenOption.CREATE, StandardOpenOption.APPEND); + + scheduler = Executors.newSingleThreadScheduledExecutor(r -> { + Thread t = new Thread(r, "sys-stats-collector"); + t.setDaemon(true); + return t; + }); + scheduler.scheduleAtFixedRate(() -> { + try { + String line = collectSnapshot(); + writer.write(line); + writer.newLine(); + writer.flush(); + } catch (Exception e) { + log.warn("Failed to collect system stats", e); + } + }, 0, 30, TimeUnit.SECONDS); + log.info("System stats collection started, saving to: {}", sysStatsPath); return sysStatsPath; } - /** Destroys the process (with a 5-second wait) and logs the saved path. */ public void stop(Path outputDir) throws InterruptedException { - if (process != null) { - process.destroy(); - process.waitFor(5, TimeUnit.SECONDS); - process = null; + if (scheduler != null) { + scheduler.shutdown(); + scheduler.awaitTermination(5, TimeUnit.SECONDS); + scheduler = null; + try { + if (writer != null) { + writer.close(); + writer = null; + } + } catch (IOException e) { + log.warn("Failed to close stats writer", e); + } log.info("System stats collection stopped, saved to: {}", outputDir.resolve(fileName).toAbsolutePath()); } } - /** Returns {@code true} if the background process is currently running. */ public boolean isActive() { - return process != null; + return scheduler != null && !scheduler.isShutdown(); } - /** Returns the current file name, or {@code null} if collection has not been started. */ public String getFileName() { return fileName; } + + private void parseCpuTopology() throws IOException { + List lines = Files.readAllLines(PROC_CPUINFO); + int threads = 0; + var physicalIds = new HashSet(); + var coreKeys = new HashSet(); + String currentPhysicalId = "0"; + + for (String line : lines) { + if (line.startsWith("processor")) { + threads++; + } else if (line.startsWith("physical id")) { + currentPhysicalId = line.substring(line.indexOf(':') + 1).trim(); + physicalIds.add(currentPhysicalId); + } else if (line.startsWith("core id")) { + String coreId = line.substring(line.indexOf(':') + 1).trim(); + coreKeys.add(currentPhysicalId + "-" + coreId); + } + } + + this.cpuThreads = threads; + this.cpuSockets = physicalIds.isEmpty() ? 1 : physicalIds.size(); + this.cpuCores = coreKeys.isEmpty() ? cpuThreads : coreKeys.size(); + } + + private String collectSnapshot() throws IOException { + String ts = TS_FORMAT.format(Instant.now()); + + // /proc/loadavg: "0.50 0.35 0.25 2/150 12345" + String loadLine = Files.readString(PROC_LOADAVG).trim(); + String[] loadParts = loadLine.split("\\s+"); + String load1 = loadParts[0]; + String load5 = loadParts[1]; + String load15 = loadParts[2]; + String[] runProcs = loadParts[3].split("/"); + String running = runProcs[0]; + String total = runProcs[1]; + + // /proc/meminfo + long memTotal = 0, memFree = 0, memAvail = 0, buffers = 0, cached = 0, swapTotal = 0, swapFree = 0; + for (String line : Files.readAllLines(PROC_MEMINFO)) { + if (line.startsWith("MemTotal:")) memTotal = parseMemValue(line); + else if (line.startsWith("MemFree:")) memFree = parseMemValue(line); + else if (line.startsWith("MemAvailable:")) memAvail = parseMemValue(line); + else if (line.startsWith("Buffers:")) buffers = parseMemValue(line); + else if (line.startsWith("Cached:")) cached = parseMemValue(line); + else if (line.startsWith("SwapTotal:")) swapTotal = parseMemValue(line); + else if (line.startsWith("SwapFree:")) swapFree = parseMemValue(line); + } + + // /proc/diskstats + StringBuilder disks = new StringBuilder(); + for (String line : Files.readAllLines(PROC_DISKSTATS)) { + String[] f = line.trim().split("\\s+"); + if (f.length < 14) continue; + String dev = f[2]; + if (!DISK_DEVICE_PATTERN.matcher(dev).matches()) continue; + if (disks.length() > 0) disks.append(','); + disks.append(String.format( + "{\"device\":\"%s\",\"readsCompleted\":%s,\"readsMerged\":%s,\"sectorsRead\":%s,\"readTimeMs\":%s," + + "\"writesCompleted\":%s,\"writesMerged\":%s,\"sectorsWritten\":%s,\"writeTimeMs\":%s," + + "\"ioInProgress\":%s,\"ioTimeMs\":%s,\"weightedIoTimeMs\":%s}", + dev, f[3], f[4], f[5], f[6], f[7], f[8], f[9], f[10], f[11], f[12], f[13])); + } + + return String.format( + "{\"timestamp\":\"%s\",\"cpuSockets\":%d,\"cpuCores\":%d,\"cpuThreads\":%d," + + "\"loadAvg1\":%s,\"loadAvg5\":%s,\"loadAvg15\":%s,\"runningProcs\":%s,\"totalProcs\":%s," + + "\"memTotalKB\":%d,\"memFreeKB\":%d,\"memAvailableKB\":%d,\"buffersKB\":%d,\"cachedKB\":%d," + + "\"swapTotalKB\":%d,\"swapFreeKB\":%d,\"diskStats\":[%s]}", + ts, cpuSockets, cpuCores, cpuThreads, + load1, load5, load15, running, total, + memTotal, memFree, memAvail, buffers, cached, swapTotal, swapFree, + disks); + } + + private static long parseMemValue(String line) { + String[] parts = line.split("\\s+"); + return Long.parseLong(parts[1]); + } } From 06fff1775e93304e9f8b91799d5a43a7c38dd39c Mon Sep 17 00:00:00 2001 From: dian-lun-lin Date: Sun, 19 Apr 2026 22:48:17 -0700 Subject: [PATCH 09/18] Fix benchmark invocation in docs and default dataset Use -cp instead of -jar in docs since the benchmarks-jmh-*.jar glob matches the -javadoc jar first. Change default dataset from glove-100-angular to ada002-100k. Note -Xmx should be adjusted to fit the dataset. --- .../jvector/bench/CompactorBenchmark.java | 2 +- .../jvector/bench/CompactorBenchmark.md | 19 +++++++++++-------- docs/compaction.md | 11 ++++++++--- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.java b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.java index 597cfe40e..85b16e314 100644 --- a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.java +++ b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.java @@ -272,7 +272,7 @@ private static void writeCompletedCount(int count) { private Path scratchOutputPath; // where build-from-scratch graph is written // ---------- Params ---------- - @Param({"glove-100-angular"}) + @Param({"ada002-100k"}) public String datasetNames; @Param({"PARTITION_AND_COMPACT"}) diff --git a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md index bf8355de0..a10b0a27b 100644 --- a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md +++ b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md @@ -35,13 +35,14 @@ ## Default: partition and compact in one run -The default mode builds partitions and immediately compacts them. Use this when you want a single-command end-to-end result. +The default mode builds partitions and immediately compacts them. Use this when you want a single-command end-to-end result. Adjust `-Xmx` to fit the dataset in memory (e.g., 220g for large datasets). ```bash java -Xmx220g --add-modules jdk.incubator.vector \ - -jar benchmarks-jmh/target/benchmarks-jmh-*.jar CompactorBenchmark \ + -cp benchmarks-jmh/target/benchmarks-jmh-*.jar \ + io.github.jbellis.jvector.bench.CompactorBenchmark \ -p workloadMode=PARTITION_AND_COMPACT \ - -p datasetNames=glove-100-angular \ + -p datasetNames=ada002-100k \ -p numPartitions=4 \ -p splitDistribution=FIBONACCI \ -p indexPrecision=FUSEDPQ \ @@ -62,9 +63,10 @@ Run with a large heap since the full dataset must be loaded into memory. ```bash java -Xmx220g --add-modules jdk.incubator.vector \ - -jar benchmarks-jmh/target/benchmarks-jmh-*.jar CompactorBenchmark \ + -cp benchmarks-jmh/target/benchmarks-jmh-*.jar \ + io.github.jbellis.jvector.bench.CompactorBenchmark \ -p workloadMode=PARTITION_ONLY \ - -p datasetNames=glove-100-angular \ + -p datasetNames=ada002-100k \ -p numPartitions=4 \ -p splitDistribution=FIBONACCI \ -p indexPrecision=FUSEDPQ \ @@ -79,9 +81,10 @@ The dataset is **not** loaded in this mode. Use a small `-Xmx` to measure and pr ```bash java -Xmx5g --add-modules jdk.incubator.vector \ - -jar benchmarks-jmh/target/benchmarks-jmh-*.jar CompactorBenchmark \ + -cp benchmarks-jmh/target/benchmarks-jmh-*.jar \ + io.github.jbellis.jvector.bench.CompactorBenchmark \ -p workloadMode=COMPACT_ONLY \ - -p datasetNames=glove-100-angular \ + -p datasetNames=ada002-100k \ -p numPartitions=4 \ -p splitDistribution=FIBONACCI \ -p indexPrecision=FUSEDPQ \ @@ -96,7 +99,7 @@ java -Xmx5g --add-modules jdk.incubator.vector \ | Parameter | Default | Description | |-----------|---------|-------------| -| `datasetNames` | `glove-100-angular` | Dataset name | +| `datasetNames` | `ada002-100k` | Dataset name | | `workloadMode` | `PARTITION_AND_COMPACT` | Which phase(s) to run | | `numPartitions` | `4` | Number of source partition indexes | | `splitDistribution` | — | Data partitioning strategy (`UNIFORM`, `FIBONACCI`, …) | diff --git a/docs/compaction.md b/docs/compaction.md index f63697aa9..d0cdffcbc 100644 --- a/docs/compaction.md +++ b/docs/compaction.md @@ -118,9 +118,12 @@ Use `CompactorBenchmark` (in `benchmarks-jmh`) to measure compaction performance ### Default: partition and compact in one run +Adjust `-Xmx` to fit the dataset in memory (e.g., 220g for large datasets). + ```bash java -Xmx220g --add-modules jdk.incubator.vector \ - -jar benchmarks-jmh/target/benchmarks-jmh-*.jar CompactorBenchmark \ + -cp benchmarks-jmh/target/benchmarks-jmh-*.jar \ + io.github.jbellis.jvector.bench.CompactorBenchmark \ -p workloadMode=PARTITION_AND_COMPACT \ -p datasetNames= \ -p numPartitions=4 \ @@ -137,7 +140,8 @@ To measure how little RAM compaction actually needs — without the dataset occu ```bash java -Xmx220g --add-modules jdk.incubator.vector \ - -jar benchmarks-jmh/target/benchmarks-jmh-*.jar CompactorBenchmark \ + -cp benchmarks-jmh/target/benchmarks-jmh-*.jar \ + io.github.jbellis.jvector.bench.CompactorBenchmark \ -p workloadMode=PARTITION_ONLY \ -p datasetNames= \ -p numPartitions=4 \ @@ -150,7 +154,8 @@ java -Xmx220g --add-modules jdk.incubator.vector \ ```bash java -Xmx5g --add-modules jdk.incubator.vector \ - -jar benchmarks-jmh/target/benchmarks-jmh-*.jar CompactorBenchmark \ + -cp benchmarks-jmh/target/benchmarks-jmh-*.jar \ + io.github.jbellis.jvector.bench.CompactorBenchmark \ -p workloadMode=COMPACT_ONLY \ -p datasetNames= \ -p numPartitions=4 \ From 6178afa1f01e48d815661a7f0ecab161154b31f9 Mon Sep 17 00:00:00 2001 From: dian-lun-lin Date: Mon, 20 Apr 2026 13:51:28 -0700 Subject: [PATCH 10/18] Fix jar selection: use fixed output name compactor-benchmark.jar The benchmarks-jmh-*.jar glob expands to multiple jars (shaded + javadoc), causing -cp to misinterpret the second jar as the main class. Configure shade plugin outputFile to produce a fixed compactor-benchmark.jar name. Update docs and CI workflow. --- .github/workflows/run-compaction.yml | 5 +---- benchmarks-jmh/pom.xml | 1 + .../io/github/jbellis/jvector/bench/CompactorBenchmark.md | 6 +++--- docs/compaction.md | 6 +++--- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/.github/workflows/run-compaction.yml b/.github/workflows/run-compaction.yml index 42efe907d..49f72b64b 100644 --- a/.github/workflows/run-compaction.yml +++ b/.github/workflows/run-compaction.yml @@ -93,13 +93,10 @@ jobs: SAFE_BRANCH=$(echo "${{ matrix.branch }}" | sed 's/[^A-Za-z0-9_-]/_/g') echo "safe_branch=$SAFE_BRANCH" >> $GITHUB_OUTPUT - JMH_JAR=$(ls benchmarks-jmh/target/benchmarks-jmh-*.jar | grep -Ev -- '-(javadoc|sources)\.jar$' | head -1) - echo "Using JMH jar: $JMH_JAR" - java --enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector \ -Djvector.experimental.enable_native_vectorization=true \ -Xmx${HALF_MEM_GB}g \ - -cp "$JMH_JAR" \ + -cp benchmarks-jmh/target/compactor-benchmark.jar \ io.github.jbellis.jvector.bench.CompactorBenchmark \ -p workloadMode=PARTITION_AND_COMPACT \ -p datasetNames=$DATASET \ diff --git a/benchmarks-jmh/pom.xml b/benchmarks-jmh/pom.xml index 05fe36793..d51f7fafa 100644 --- a/benchmarks-jmh/pom.xml +++ b/benchmarks-jmh/pom.xml @@ -93,6 +93,7 @@ shade + ${project.build.directory}/compactor-benchmark.jar org.openjdk.jmh.Main diff --git a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md index a10b0a27b..57bac26e0 100644 --- a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md +++ b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md @@ -39,7 +39,7 @@ The default mode builds partitions and immediately compacts them. Use this when ```bash java -Xmx220g --add-modules jdk.incubator.vector \ - -cp benchmarks-jmh/target/benchmarks-jmh-*.jar \ + -cp benchmarks-jmh/target/compactor-benchmark.jar \ io.github.jbellis.jvector.bench.CompactorBenchmark \ -p workloadMode=PARTITION_AND_COMPACT \ -p datasetNames=ada002-100k \ @@ -63,7 +63,7 @@ Run with a large heap since the full dataset must be loaded into memory. ```bash java -Xmx220g --add-modules jdk.incubator.vector \ - -cp benchmarks-jmh/target/benchmarks-jmh-*.jar \ + -cp benchmarks-jmh/target/compactor-benchmark.jar \ io.github.jbellis.jvector.bench.CompactorBenchmark \ -p workloadMode=PARTITION_ONLY \ -p datasetNames=ada002-100k \ @@ -81,7 +81,7 @@ The dataset is **not** loaded in this mode. Use a small `-Xmx` to measure and pr ```bash java -Xmx5g --add-modules jdk.incubator.vector \ - -cp benchmarks-jmh/target/benchmarks-jmh-*.jar \ + -cp benchmarks-jmh/target/compactor-benchmark.jar \ io.github.jbellis.jvector.bench.CompactorBenchmark \ -p workloadMode=COMPACT_ONLY \ -p datasetNames=ada002-100k \ diff --git a/docs/compaction.md b/docs/compaction.md index d0cdffcbc..371ea512f 100644 --- a/docs/compaction.md +++ b/docs/compaction.md @@ -122,7 +122,7 @@ Adjust `-Xmx` to fit the dataset in memory (e.g., 220g for large datasets). ```bash java -Xmx220g --add-modules jdk.incubator.vector \ - -cp benchmarks-jmh/target/benchmarks-jmh-*.jar \ + -cp benchmarks-jmh/target/compactor-benchmark.jar \ io.github.jbellis.jvector.bench.CompactorBenchmark \ -p workloadMode=PARTITION_AND_COMPACT \ -p datasetNames= \ @@ -140,7 +140,7 @@ To measure how little RAM compaction actually needs — without the dataset occu ```bash java -Xmx220g --add-modules jdk.incubator.vector \ - -cp benchmarks-jmh/target/benchmarks-jmh-*.jar \ + -cp benchmarks-jmh/target/compactor-benchmark.jar \ io.github.jbellis.jvector.bench.CompactorBenchmark \ -p workloadMode=PARTITION_ONLY \ -p datasetNames= \ @@ -154,7 +154,7 @@ java -Xmx220g --add-modules jdk.incubator.vector \ ```bash java -Xmx5g --add-modules jdk.incubator.vector \ - -cp benchmarks-jmh/target/benchmarks-jmh-*.jar \ + -cp benchmarks-jmh/target/compactor-benchmark.jar \ io.github.jbellis.jvector.bench.CompactorBenchmark \ -p workloadMode=COMPACT_ONLY \ -p datasetNames= \ From 0ab1deaf3694be15f16f6dd16dd748790304aa5a Mon Sep 17 00:00:00 2001 From: dian-lun-lin Date: Wed, 22 Apr 2026 14:55:44 -0700 Subject: [PATCH 11/18] Refactor workload modes and fix build-from-scratch timing Simplify WorkloadMode enum: PARTITION_ONLY/COMPACT_ONLY/COMPACT_AND_RECALL/ BUILD_FROM_SCRATCH collapsed into PARTITION/COMPACT/BUILD/PARTITION_AND_COMPACT plus a separate measureRecall flag. Fix buildFromScratch timing to include PQ computation and graph construction (previously only timed the write step). Add fair comparison guidelines to CompactorBenchmark.md. --- .../jvector/bench/CompactorBenchmark.java | 135 +++++++++--------- .../jvector/bench/CompactorBenchmark.md | 45 ++++-- docs/compaction.md | 19 +-- 3 files changed, 114 insertions(+), 85 deletions(-) diff --git a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.java b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.java index 85b16e314..ffad5ffd0 100644 --- a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.java +++ b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.java @@ -107,26 +107,22 @@ public enum IndexPrecision { public enum WorkloadMode { /** - * Build per-source partitions and stop. (No compaction, no recall.) + * Build per-source partitions and stop. */ - PARTITION_ONLY, + PARTITION, /** * Assume partitions exist on disk; compact them. */ - COMPACT_ONLY, + COMPACT, /** - * Assume partitions exist on disk; compact them, then run recall. + * Build a single graph for the whole dataset and write it. */ - COMPACT_AND_RECALL, + BUILD, /** - * Build a single graph for the whole dataset and write it. Then run recall. - */ - BUILD_FROM_SCRATCH, - /** - * (Default) Build partitions, compact them, then run recall. + * (Default) Build partitions, compact them. */ PARTITION_AND_COMPACT } @@ -278,6 +274,9 @@ private static void writeCompletedCount(int count) { @Param({"PARTITION_AND_COMPACT"}) public WorkloadMode workloadMode; + @Param({"true"}) + public boolean measureRecall; + @Param({"4"}) // Default value, can be overridden via command line public int numPartitions; @@ -387,25 +386,10 @@ public void setup() throws Exception { int dimension; - if (workloadMode == WorkloadMode.COMPACT_ONLY) { - ds = null; - queryVectors = null; - groundTruth = null; - ravv = null; - baseVectors = null; - dimension = -1; - - var datasetInfo = DataSets.loadDataSet(datasetNames); - similarityFunction = datasetInfo - .flatMap(DataSetInfo::similarityFunction) - .orElseGet(() -> { - log.warn("Could not determine similarity function for dataset '{}'; defaulting to COSINE", datasetNames); - return VectorSimilarityFunction.COSINE; - }); + boolean needsBaseVectors = workloadMode != WorkloadMode.COMPACT; + boolean needsRecallData = measureRecall && workloadMode != WorkloadMode.PARTITION; - log.info("Skipping dataset load for COMPACT_ONLY mode without recall. Workload: {}, similarityFunction: {}, Live nodes rate: {}", - workloadMode, similarityFunction, liveNodesRate); - } else { + if (needsBaseVectors) { ds = DataSets.loadDataSet(datasetNames) .orElseThrow(() -> new RuntimeException("Dataset not found: " + datasetNames)) .getDataSet(); @@ -425,13 +409,38 @@ public void setup() throws Exception { ravv = new ListRandomAccessVectorValues(baseVectors, ds.getDimension()); } - queryVectors = ds.getQueryVectors(); - groundTruth = ds.getGroundTruth(); similarityFunction = ds.getSimilarityFunction(); dimension = ds.getDimension(); - log.info("Dataset {} loaded with recall data. Base vectors: {} (portion {}), Query vectors: {}, Dim: {}, Similarity: {}, Workload: {}, Live nodes rate: {}", - datasetNames, ravv.size(), datasetPortion, queryVectors.size(), dimension, similarityFunction, workloadMode, liveNodesRate); + if (needsRecallData) { + queryVectors = ds.getQueryVectors(); + groundTruth = ds.getGroundTruth(); + log.info("Dataset {} loaded with recall data. Base vectors: {} (portion {}), Query vectors: {}, Dim: {}, Similarity: {}, Workload: {}, measureRecall: {}, Live nodes rate: {}", + datasetNames, ravv.size(), datasetPortion, queryVectors.size(), dimension, similarityFunction, workloadMode, measureRecall, liveNodesRate); + } else { + queryVectors = null; + groundTruth = null; + log.info("Dataset {} loaded (base vectors only). Base vectors: {} (portion {}), Dim: {}, Similarity: {}, Workload: {}, measureRecall: {}", + datasetNames, ravv.size(), datasetPortion, dimension, similarityFunction, workloadMode, measureRecall); + } + } else { + ds = null; + queryVectors = null; + groundTruth = null; + ravv = null; + baseVectors = null; + dimension = -1; + + var datasetInfo = DataSets.loadDataSet(datasetNames); + similarityFunction = datasetInfo + .flatMap(DataSetInfo::similarityFunction) + .orElseGet(() -> { + log.warn("Could not determine similarity function for dataset '{}'; defaulting to COSINE", datasetNames); + return VectorSimilarityFunction.COSINE; + }); + + log.info("Skipping dataset load for {} mode. similarityFunction: {}, Live nodes rate: {}", + workloadMode, similarityFunction, liveNodesRate); } // Resolve storagePaths + partitionsDir @@ -440,22 +449,18 @@ public void setup() throws Exception { compactOutputPath = resolveCompactOutputPath(partitionsBaseDir); scratchOutputPath = resolveScratchOutputPath(partitionsBaseDir); - // Clean stale artifacts only if we're going to rebuild them. - if (workloadMode == WorkloadMode.COMPACT_ONLY || workloadMode == WorkloadMode.COMPACT_AND_RECALL) { - // For compact-only and compact-and-recall, ensure the partition files exist. + if (workloadMode == WorkloadMode.COMPACT) { verifyPartitionsExist(partitionsBaseDir, numPartitions); } - // Partition metadata for remapping (needed for compaction) - if (workloadMode == WorkloadMode.PARTITION_ONLY || workloadMode == WorkloadMode.PARTITION_AND_COMPACT) { + if (workloadMode == WorkloadMode.PARTITION || workloadMode == WorkloadMode.PARTITION_AND_COMPACT) { var partitionedData = DataSetPartitioner.partition(baseVectors, numPartitions, splitDistribution); vectorsPerSourceCount = partitionedData.sizes; } else { vectorsPerSourceCount = null; } - // Build partitions during setup for SEGMENTS_* (matches original benchmark structure) - if (workloadMode == WorkloadMode.PARTITION_ONLY || workloadMode == WorkloadMode.PARTITION_AND_COMPACT) { + if (workloadMode == WorkloadMode.PARTITION || workloadMode == WorkloadMode.PARTITION_AND_COMPACT) { if (jfrPartitioning) { jfrPartitioningRecorder.start(JFR_DIR, "partitioning-" + jfrParamSuffix() + ".jfr", jfrObjectCount); } @@ -472,8 +477,8 @@ public void setup() throws Exception { } private void validateParams() { - if (workloadMode == WorkloadMode.BUILD_FROM_SCRATCH) { - log.warn("numPartitions={} ignored in BUILD_FROM_SCRATCH mode", numPartitions); + if (workloadMode == WorkloadMode.BUILD) { + log.warn("numPartitions={} ignored in BUILD mode", numPartitions); } else { if (numPartitions <= 1) throw new IllegalArgumentException("numPartitions must be larger than one"); @@ -570,7 +575,7 @@ private void verifyPartitionsExist(Path partitionsDir, int numPartitions) { for (int i = 0; i < numPartitions; i++) { Path seg = partitionsDir.resolve("per-source-graph-" + i); if (!Files.exists(seg)) { - throw new IllegalStateException("Missing partition file for COMPACT_ONLY or COMPACT_AND_RECALL: " + seg.toAbsolutePath()); + throw new IllegalStateException("Missing partition file for COMPACT mode: " + seg.toAbsolutePath()); } } } @@ -701,6 +706,14 @@ private long buildFromScratch(List> baseVectors) throws Exception int dimension = baseVectors.get(0).length(); var full = new ListRandomAccessVectorValues(baseVectors, dimension); + + log.info("Building from scratch: vectors={} dim={} sim={} deg={} bw={} precision={} pwThreads={} vp={} -> {}", + full.size(), dimension, similarityFunction, + graphDegree, beamWidth, indexPrecision, parallelWriteThreads, resolvedVectorizationProvider, + scratchOutputPath.toAbsolutePath()); + + long startNanos = System.nanoTime(); + ProductQuantization pq = null; PQVectors pqVectors = null; BuildScoreProvider bsp; @@ -714,11 +727,6 @@ private long buildFromScratch(List> baseVectors) throws Exception bsp = BuildScoreProvider.randomAccessScoreProvider(full, similarityFunction); } - log.info("Building from scratch: vectors={} dim={} sim={} deg={} bw={} precision={} pwThreads={} vp={} -> {}", - full.size(), dimension, similarityFunction, - graphDegree, beamWidth, indexPrecision, parallelWriteThreads, resolvedVectorizationProvider, - scratchOutputPath.toAbsolutePath()); - var builder = new GraphIndexBuilder(bsp, dimension, graphDegree, beamWidth, 1.2f, 1.2f, true); var graph = builder.build(full); @@ -730,19 +738,10 @@ private long buildFromScratch(List> baseVectors) throws Exception writerBuilder.with(new InlineVectors(dimension)); -// ProductQuantization pq = null; -// PQVectors pqVectors = null; -// if (indexPrecision == IndexPrecision.FUSEDPQ) { -// boolean centerData = similarityFunction == VectorSimilarityFunction.EUCLIDEAN; -// pq = ProductQuantization.compute(full, dimension / 8, 256, centerData); -// pqVectors = (PQVectors) pq.encodeAll(full); -// writerBuilder.with(new FusedPQ(graph.maxDegree(), pq)); -// } if (indexPrecision == IndexPrecision.FUSEDPQ) { writerBuilder.with(new FusedPQ(graph.maxDegree(), pq)); } - long startNanos = System.nanoTime(); try (var writer = writerBuilder.build()) { var suppliers = new EnumMap>(FeatureId.class); suppliers.put(FeatureId.INLINE_VECTORS, ord -> new InlineVectors.State(full.getVector(ord))); @@ -814,27 +813,28 @@ public void run(Blackhole blackhole, RecallResult recallResult) throws Exception // Execute workload switch (workloadMode) { - case PARTITION_ONLY: + case PARTITION: break; - case COMPACT_ONLY: + case COMPACT: durationMs = compactPartitions(); + if (measureRecall) { + recall = runRecall(compactOutputPath); + } break; - case COMPACT_AND_RECALL: - durationMs = compactPartitions(); - recall = runRecall(compactOutputPath); - break; - - case BUILD_FROM_SCRATCH: { + case BUILD: durationMs = buildFromScratch(baseVectors); - recall = runRecall(scratchOutputPath); + if (measureRecall) { + recall = runRecall(scratchOutputPath); + } break; - } case PARTITION_AND_COMPACT: durationMs = compactPartitions(); - recall = runRecall(compactOutputPath); + if (measureRecall) { + recall = runRecall(compactOutputPath); + } break; default: @@ -902,6 +902,7 @@ private LinkedHashMap buildParams() { params.put("parallelWriteThreads", parallelWriteThreads); params.put("vectorizationProvider", resolvedVectorizationProvider); params.put("datasetPortion", datasetPortion); + params.put("measureRecall", measureRecall); params.put("jfrPartitioning", jfrPartitioning); params.put("jfrCompacting", jfrCompacting); params.put("jfrObjectCount", jfrObjectCount); diff --git a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md index 57bac26e0..2fe0effab 100644 --- a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md +++ b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md @@ -24,10 +24,33 @@ | Mode | Description | |------|-------------| -| `PARTITION_AND_COMPACT` | **(default)** Build partitions, compact them, then measure recall — all in one run | -| `PARTITION_ONLY` | Build N partition indexes and exit; no compaction | -| `COMPACT_ONLY` | Compact existing partitions without loading the dataset | -| `BUILD_FROM_SCRATCH` | Build a single index over the full dataset | +| `PARTITION_AND_COMPACT` | **(default)** Build partitions, compact them | +| `PARTITION` | Build N partition indexes and exit; no compaction | +| `COMPACT` | Compact existing partitions | +| `BUILD` | Build a single index over the full dataset | + +## measureRecall + +Set `-p measureRecall=false` to skip recall measurement. For `COMPACT` mode this also +skips dataset loading entirely, since query vectors and ground truth are not needed. + +## Fair comparison guidelines + +**Recall**: use `PARTITION_AND_COMPACT` vs `BUILD`, both with `measureRecall=true` and +the same dataset, `indexPrecision`, `graphDegree`, and `beamWidth`. Both modes search +using FusedPQ with FP reranking. The `recall` field in the JSONL output is directly +comparable. + +**Build performance**: use `COMPACT` vs `BUILD`, both with `measureRecall=false`. +The `durationMs` field measures only the graph construction pipeline (PQ training + +graph build + write for `BUILD`; PQ retraining + neighbor gathering + write for +`COMPACT`). Dataset loading is excluded from `durationMs` in both modes. +Run `PARTITION` first to create the partition files needed by `COMPACT`. + +**Memory footprint**: run `COMPACT` with `measureRecall=false` and a small `-Xmx` +(e.g., 5g). Since `COMPACT` does not load the dataset into heap, the heap limit +reflects only the compactor's own memory usage. `BUILD` always requires the full +dataset in memory, so its heap requirement scales with dataset size. --- @@ -53,7 +76,7 @@ java -Xmx220g --add-modules jdk.incubator.vector \ # 3. Measuring Peak Heap During Compaction -The two-step workflow (`PARTITION_ONLY` → `COMPACT_ONLY`) exists to isolate compaction's true memory footprint. In `PARTITION_AND_COMPACT` mode the dataset is still resident in heap during compaction, which inflates the apparent memory cost. `COMPACT_ONLY` skips dataset loading entirely, so the heap limit applies only to the compactor itself. +The two-step workflow (`PARTITION` → `COMPACT` with `measureRecall=false`) exists to isolate compaction's true memory footprint. In `PARTITION_AND_COMPACT` mode the dataset is still resident in heap during compaction, which inflates the apparent memory cost. `COMPACT` with `measureRecall=false` skips dataset loading entirely, so the heap limit applies only to the compactor itself. This lets you prove that compaction can run on machines with very little RAM — e.g., `-Xmx5g` is sufficient even for large datasets. @@ -65,7 +88,7 @@ Run with a large heap since the full dataset must be loaded into memory. java -Xmx220g --add-modules jdk.incubator.vector \ -cp benchmarks-jmh/target/compactor-benchmark.jar \ io.github.jbellis.jvector.bench.CompactorBenchmark \ - -p workloadMode=PARTITION_ONLY \ + -p workloadMode=PARTITION \ -p datasetNames=ada002-100k \ -p numPartitions=4 \ -p splitDistribution=FIBONACCI \ @@ -83,7 +106,8 @@ The dataset is **not** loaded in this mode. Use a small `-Xmx` to measure and pr java -Xmx5g --add-modules jdk.incubator.vector \ -cp benchmarks-jmh/target/compactor-benchmark.jar \ io.github.jbellis.jvector.bench.CompactorBenchmark \ - -p workloadMode=COMPACT_ONLY \ + -p workloadMode=COMPACT \ + -p measureRecall=false \ -p datasetNames=ada002-100k \ -p numPartitions=4 \ -p splitDistribution=FIBONACCI \ @@ -100,7 +124,8 @@ java -Xmx5g --add-modules jdk.incubator.vector \ | Parameter | Default | Description | |-----------|---------|-------------| | `datasetNames` | `ada002-100k` | Dataset name | -| `workloadMode` | `PARTITION_AND_COMPACT` | Which phase(s) to run | +| `workloadMode` | `PARTITION_AND_COMPACT` | Which phase(s) to run (`PARTITION`, `COMPACT`, `BUILD`, `PARTITION_AND_COMPACT`) | +| `measureRecall` | `true` | Whether to run recall measurement after building/compacting | | `numPartitions` | `4` | Number of source partition indexes | | `splitDistribution` | — | Data partitioning strategy (`UNIFORM`, `FIBONACCI`, …) | | `indexPrecision` | — | `FULLPRECISION` (inline vectors only) or `FUSEDPQ` (inline + FusedPQ) | @@ -132,11 +157,11 @@ Key fields: | Field | Description | |-------|-------------| | `durationMs` | Time spent in the measured phase only | -| `recall` | Recall@10 (present when workload mode includes recall, e.g. `PARTITION_AND_COMPACT`) | +| `recall` | Recall@10 (present when `measureRecall=true`) | | `peakHeapMb` | Peak JVM heap observed during the run | --- # 7. Memory Footprint -All datasets in the recall table (see `docs/compaction.md`) can be run under `COMPACT_ONLY` with `-Xmx5g`. Compaction also successfully scales to a dataset with 2560 dimensions and 10M vectors under the same constraint. +All datasets in the recall table (see `docs/compaction.md`) can be run under `COMPACT` with `measureRecall=false` and `-Xmx5g`. Compaction also successfully scales to a dataset with 2560 dimensions and 10M vectors under the same constraint. diff --git a/docs/compaction.md b/docs/compaction.md index 371ea512f..c3e68c963 100644 --- a/docs/compaction.md +++ b/docs/compaction.md @@ -142,7 +142,7 @@ To measure how little RAM compaction actually needs — without the dataset occu java -Xmx220g --add-modules jdk.incubator.vector \ -cp benchmarks-jmh/target/compactor-benchmark.jar \ io.github.jbellis.jvector.bench.CompactorBenchmark \ - -p workloadMode=PARTITION_ONLY \ + -p workloadMode=PARTITION \ -p datasetNames= \ -p numPartitions=4 \ -p splitDistribution=FIBONACCI \ @@ -156,7 +156,8 @@ java -Xmx220g --add-modules jdk.incubator.vector \ java -Xmx5g --add-modules jdk.incubator.vector \ -cp benchmarks-jmh/target/compactor-benchmark.jar \ io.github.jbellis.jvector.bench.CompactorBenchmark \ - -p workloadMode=COMPACT_ONLY \ + -p workloadMode=COMPACT \ + -p measureRecall=false \ -p datasetNames= \ -p numPartitions=4 \ -p splitDistribution=FIBONACCI \ @@ -164,16 +165,18 @@ java -Xmx5g --add-modules jdk.incubator.vector \ -wi 0 -i 1 -f 1 ``` -`COMPACT_ONLY` skips dataset loading entirely, so `-Xmx5g` is sufficient even for large datasets. This lets you confirm that the compactor itself — not the dataset — is the memory bottleneck. +`COMPACT` with `measureRecall=false` skips dataset loading entirely, so `-Xmx5g` is sufficient even for large datasets. This lets you confirm that the compactor itself — not the dataset — is the memory bottleneck. Key `workloadMode` values: | Mode | Description | |---|---| -| `PARTITION_AND_COMPACT` | **(default)** Build partitions, compact them, then measure recall | -| `PARTITION_ONLY` | Build N partition indexes and exit; use before `COMPACT_ONLY` | -| `COMPACT_ONLY` | Compact existing partitions without loading the dataset; `durationMs` = `compact()` time | -| `BUILD_FROM_SCRATCH` | Build one index over the full dataset; `durationMs` = `build()` time | +| `PARTITION_AND_COMPACT` | **(default)** Build partitions, compact them | +| `PARTITION` | Build N partition indexes and exit; use before `COMPACT` | +| `COMPACT` | Compact existing partitions | +| `BUILD` | Build one index over the full dataset | + +Set `-p measureRecall=false` to skip recall measurement (and dataset loading for `COMPACT` mode). Results are written as JSONL to `target/benchmark-results/compactor-*/compactor-results.jsonl`. The `durationMs` field records only the target function time (not dataset loading or JVM startup). @@ -200,5 +203,5 @@ Recall comparison (results averaged over three runs): # Memory footprint -All datasets above can be compacted under `COMPACT_ONLY` with `-Xmx5g`. In addition, compaction successfully scales to a dataset with 2560 dimensions and 10M vectors under the same memory constraint. +All datasets above can be compacted under `COMPACT` with `measureRecall=false` and `-Xmx5g`. In addition, compaction successfully scales to a dataset with 2560 dimensions and 10M vectors under the same memory constraint. From 3127043f3324af8fc8bacc3ce2e35f4036ae9d03 Mon Sep 17 00:00:00 2001 From: dian-lun-lin Date: Thu, 23 Apr 2026 00:30:40 -0700 Subject: [PATCH 12/18] Add TIERED_10_90 and TIERED_1_99 split distributions Support 10%/90% and 1%/99% partition splits for benchmarking compaction of a small new segment into a large existing index. Add split distribution reference table to CompactorBenchmark.md. --- .../jvector/bench/CompactorBenchmark.md | 24 +++++++++++++++---- .../example/yaml/TestDataPartition.java | 14 ++++++++++- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md index 2fe0effab..1d9247e65 100644 --- a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md +++ b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.md @@ -127,13 +127,29 @@ java -Xmx5g --add-modules jdk.incubator.vector \ | `workloadMode` | `PARTITION_AND_COMPACT` | Which phase(s) to run (`PARTITION`, `COMPACT`, `BUILD`, `PARTITION_AND_COMPACT`) | | `measureRecall` | `true` | Whether to run recall measurement after building/compacting | | `numPartitions` | `4` | Number of source partition indexes | -| `splitDistribution` | — | Data partitioning strategy (`UNIFORM`, `FIBONACCI`, …) | +| `splitDistribution` | — | Data partitioning strategy (see below) | | `indexPrecision` | — | `FULLPRECISION` (inline vectors only) or `FUSEDPQ` (inline + FusedPQ) | | `storageDirectories` | *(temp dir)* | Comma-separated list of directories where partition files are written; partitions are distributed round-robin across them. Defaults to a JVM temp directory if unset. | --- -# 5. Index Precision +# 5. Split Distributions + +`splitDistribution` controls how vectors are divided across partitions. + +| Distribution | Weights (example: 4 partitions) | Description | +|---|---|---| +| `UNIFORM` | [1, 1, 1, 1] (25% each) | Equal-sized partitions | +| `FIBONACCI` | [1, 2, 3, 5] (9%, 18%, 27%, 45%) | Fibonacci-weighted; larger partitions grow progressively | +| `LOG2N` | [1, 2, 4, 8] (7%, 13%, 27%, 53%) | Power-of-two weighted | +| `TIERED_10_90` | [1, 9] (10%, 90%) | Small + large; simulates compacting a new segment into a large index | +| `TIERED_1_99` | [1, 99] (1%, 99%) | Very small + very large; extreme tiered compaction scenario | + +`TIERED_10_90` and `TIERED_1_99` are designed for 2-partition benchmarks (`-p numPartitions=2`). + +--- + +# 6. Index Precision `indexPrecision` controls what features are written into each partition index. @@ -144,7 +160,7 @@ java -Xmx5g --add-modules jdk.incubator.vector \ --- -# 6. Results +# 7. Results Results are written as JSONL to: @@ -162,6 +178,6 @@ Key fields: --- -# 7. Memory Footprint +# 8. Memory Footprint All datasets in the recall table (see `docs/compaction.md`) can be run under `COMPACT` with `measureRecall=false` and `-Xmx5g`. Compaction also successfully scales to a dataset with 2560 dimensions and 10M vectors under the same constraint. diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/yaml/TestDataPartition.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/yaml/TestDataPartition.java index b7592f042..648e4942a 100644 --- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/yaml/TestDataPartition.java +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/yaml/TestDataPartition.java @@ -40,7 +40,11 @@ public TestDataPartition(int numSplits) { public enum Distribution { UNIFORM, FIBONACCI, - LOG2N; + LOG2N, + /** First partition gets 10%, last gets 90%. For N>2, middle partitions are empty. */ + TIERED_10_90, + /** First partition gets 1%, last gets 99%. For N>2, middle partitions are empty. */ + TIERED_1_99; public List computeSplitSizes(int total, int numSplits) { int[] weights = new int[numSplits]; @@ -61,6 +65,14 @@ public List computeSplitSizes(int total, int numSplits) { case LOG2N: for (int i = 0; i < numSplits; i++) weights[i] = 1 << i; break; + case TIERED_10_90: + weights[0] = 1; + weights[numSplits - 1] = 9; + break; + case TIERED_1_99: + weights[0] = 1; + weights[numSplits - 1] = 99; + break; } long weightSum = 0; From 632bc76dd2b9602d322095355e05c4df27d1a829 Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Thu, 7 May 2026 16:40:29 -0400 Subject: [PATCH 13/18] fix for bug when fused pq is used with no hierarchy (#664) --- .../jvector/graph/disk/CompactWriter.java | 29 ++++++++++++--- .../graph/disk/OnDiskGraphIndexCompactor.java | 35 ++++++++++++++----- 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CompactWriter.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CompactWriter.java index 5a3091686..c0cc684e7 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CompactWriter.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CompactWriter.java @@ -61,6 +61,7 @@ final class CompactWriter implements AutoCloseable { private final ProductQuantization pq; private final int baseDegree; private final int maxOrdinal; + private final int entryNode; private final ThreadLocal bufferPerThread; private final ThreadLocal> zeroPQ; private final boolean fusedPQEnabled; @@ -68,6 +69,9 @@ final class CompactWriter implements AutoCloseable { private final List configuredLayerInfo; private final List configuredLayerDegrees; private final List level1FeatureRecords; + // PQ code for the entry node, required when hierarchy is disabled (no level 1). + // Mirrors what AbstractGraphIndexWriter.writeSparseLevels writes in the no-hierarchy branch. + private ByteSequence entryNodePqCode; CompactWriter(Path outputPath, int maxOrdinal, @@ -91,7 +95,9 @@ final class CompactWriter implements AutoCloseable { this.baseDegree = layerDegrees.get(0); this.pq = pq; this.maxOrdinal = maxOrdinal; + this.entryNode = entryNode; this.level1FeatureRecords = new ArrayList<>(); + this.entryNodePqCode = null; Map featureMap = new LinkedHashMap<>(); InlineVectors inlineVectorFeature = new InlineVectors(dimension); @@ -134,10 +140,21 @@ public void writeHeader() throws IOException { } void writeFooter() throws IOException { - if (fusedPQEnabled && version == 6 && !level1FeatureRecords.isEmpty()) { - for (UpperLayerFeatureRecord record : level1FeatureRecords) { - writer.writeInt(record.ordinal); - vectorTypeSupport.writeByteSequence(writer, record.pqCode); + if (fusedPQEnabled && version == 6) { + if (!level1FeatureRecords.isEmpty()) { + // Hierarchy is enabled: write PQ source feature for every level-1 node. + // Mirrors AbstractGraphIndexWriter.writeSparseLevels (getMaxLevel >= 1 branch). + for (UpperLayerFeatureRecord record : level1FeatureRecords) { + writer.writeInt(record.ordinal); + vectorTypeSupport.writeByteSequence(writer, record.pqCode); + } + } else if (entryNodePqCode != null) { + // No hierarchy: write the entry node's own PQ code so that + // OnDiskGraphIndex.loadInMemoryFeatures can populate hierarchyCachedFeatures + // and GraphSearcher.initializeInternal can score the entry point. + // Mirrors AbstractGraphIndexWriter.writeSparseLevels (getMaxLevel == 0 branch). + writer.writeInt(entryNode); + vectorTypeSupport.writeByteSequence(writer, entryNodePqCode); } } long headerOffset = writer.position(); @@ -157,6 +174,10 @@ public Path getOutputPath() { return outputPath; } + public void setEntryNodePqCode(ByteSequence code) { + this.entryNodePqCode = code; + } + public void writeUpperLayerNode(int level, int ordinal, int[] neighbors, ByteSequence level1PqCode) throws IOException { writer.writeInt(ordinal); writer.writeInt(neighbors.length); diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java index 9200b2763..ef1d8bcb5 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java @@ -223,13 +223,31 @@ public void compact(Path outputPath) throws FileNotFoundException { } List layerInfo = computeLayerInfoFromSources(); - int entryNode = resolveEntryNode(); + int[] entryNodeSource = resolveEntryNodeSource(); // {sourceIdx, originalOrdinal} + int entryNode = remappers.get(entryNodeSource[0]).oldToNew(entryNodeSource[1]); log.info("Writing compacted graph : {} total nodes, maxOrdinal={}, dimension={}, degree={}", numTotalNodes, maxOrdinal, dimension, maxDegrees.get(0)); try (CompactWriter writer = new CompactWriter(outputPath, maxOrdinal, numTotalNodes, 0, layerInfo, entryNode, dimension, maxDegrees, pq, pqLength, fusedPQEnabled)) { writer.writeHeader(); compactLevels(writer, similarityFunction, fusedPQEnabled, compressedPrecision, pq); + + // When FusedPQ is enabled and there is no hierarchy (only L0), the reader expects + // to find the entry node's own PQ code written after the L0 block, just as + // AbstractGraphIndexWriter.writeSparseLevels does in its getMaxLevel == 0 branch. + // Without it, loadInMemoryFeatures reads garbage and hierarchyCachedFeatures is + // missing the entry node, causing "Node X is not in the hierarchy" on first search. + if (fusedPQEnabled && maxDegrees.size() == 1) { + try (var entryView = sources.get(entryNodeSource[0]).getView()) { + var entryVec = vectorTypeSupport.createFloatVector(dimension); + entryView.getVectorInto(entryNodeSource[1], entryVec, 0); + var entryPqCode = vectorTypeSupport.createByteSequence(pq.getSubspaceCount()); + entryPqCode.zero(); + pq.encodeTo(entryVec, entryPqCode); + writer.setEntryNodePqCode(entryPqCode); + } + } + writer.writeFooter(); log.info("Compaction complete: {}", outputPath); } catch (IOException | ExecutionException | InterruptedException e) { @@ -240,12 +258,13 @@ public void compact(Path outputPath) throws FileNotFoundException { } /** - * Resolves the entry node for the compacted graph. The chosen node must exist at maxLevel - * (since the on-disk format sets entryNode.level = maxLevel). Prefers the designated entry - * node of any source whose maxLevel equals the global maxLevel; if all such entry nodes - * are deleted, falls back to the first live node at maxLevel across all sources. + * Returns {sourceIdx, originalOrdinal} for the entry node of the compacted graph. + * The chosen node must exist at maxLevel (since the on-disk format sets entryNode.level = + * maxLevel). Prefers the designated entry node of any source whose maxLevel equals the global + * maxLevel; if all such entry nodes are deleted, falls back to the first live node at maxLevel + * across all sources. */ - private int resolveEntryNode() { + private int[] resolveEntryNodeSource() { int maxLevel = sources.stream().mapToInt(OnDiskGraphIndex::getMaxLevel).max().orElse(0); // The on-disk format sets entryNode.level = layerInfo.size() - 1 (i.e. maxLevel). @@ -256,7 +275,7 @@ private int resolveEntryNode() { if (sources.get(s).getMaxLevel() == maxLevel) { int originalEntry = sources.get(s).getView().entryNode().node; if (liveNodes.get(s).get(originalEntry)) { - return remappers.get(s).oldToNew(originalEntry); + return new int[]{s, originalEntry}; } } } @@ -268,7 +287,7 @@ private int resolveEntryNode() { while (it.hasNext()) { int node = it.next(); if (liveNodes.get(s).get(node)) { - return remappers.get(s).oldToNew(node); + return new int[]{s, node}; } } } From 6e97fc52790848b834467c263d0cd76894bd5a4d Mon Sep 17 00:00:00 2001 From: Mark Wolters Date: Tue, 12 May 2026 11:08:59 -0400 Subject: [PATCH 14/18] fix for hierarchy issue --- .../jvector/graph/disk/OnDiskGraphIndexCompactor.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java index ef1d8bcb5..1da50e6a4 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java @@ -153,8 +153,9 @@ private void validateInputSizes(List sources, */ private void validateLiveNodesBounds(List sources, List liveNodes) { for (int s = 0; s < sources.size(); ++s) { - if (liveNodes.get(s).length() != sources.get(s).size(0)) { - throw new IllegalArgumentException("source " + s + " out of bounds"); + if (liveNodes.get(s).length() != sources.get(s).getIdUpperBound()) { + throw new IllegalArgumentException("source " + s + " out of bounds: liveNodes length " + + liveNodes.get(s).length() + " != idUpperBound " + sources.get(s).getIdUpperBound()); } } } From e1eb1c0caff8fc956541a95c3c3db4975158dbce Mon Sep 17 00:00:00 2001 From: dian-lun-lin Date: Wed, 20 May 2026 11:38:12 -0700 Subject: [PATCH 15/18] Compaction: introduce QuantizationCompactionStrategy, accept non-fused sidecar I/O, and pre-encode PQ codes Refactor the compactor's quantization-aware logic behind a QuantizationCompactionStrategy abstraction with two concrete implementations (FusedCompactionStrategy and SidecarCompactionStrategy) selected via Feature/CompressedVectors factories, so the compactor body stays scheme-agnostic and adding a new quantization type (e.g. ASH) requires no new strategy classes. Add a non-fused compressed-sidecar I/O channel: compact(graphPath, compressedPath) accepts PQVectors-style inputs parallel to the source graphs, retrains the compressor on a balanced sample of merged source vectors, and writes a single merged CompressedVectors sidecar after the graph file is closed. Pre-encode every live node's PQ code once into a memory-mapped section appended past the projected output end (FusedCompactionStrategy. precomputeCodes). CompactWriter memcpies neighbor codes from that cache during inline writes instead of re-encoding per parent record; the cache is unmapped and the section truncated in onAfterClose. --- .../jvector/graph/disk/CompactWriter.java | 110 ++++-- .../jvector/graph/disk/CompactionContext.java | 62 ++++ .../graph/disk/FusedCompactionStrategy.java | 225 +++++++++++++ .../graph/disk/OnDiskGraphIndexCompactor.java | 224 ++++++++++--- .../jvector/graph/disk/PQRetrainer.java | 14 +- .../disk/QuantizationCompactionStrategy.java | 157 +++++++++ .../graph/disk/SidecarCompactionStrategy.java | 170 ++++++++++ .../graph/disk/VectorCompressorRetrainer.java | 33 ++ .../graph/disk/feature/FusedFeature.java | 42 +++ .../jvector/graph/disk/feature/FusedPQ.java | 35 ++ .../quantization/CompressedVectors.java | 44 +++ .../jvector/quantization/PQVectors.java | 33 ++ .../disk/TestOnDiskGraphIndexCompactor.java | 316 ++++++++++++++++++ 13 files changed, 1390 insertions(+), 75 deletions(-) create mode 100644 jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CompactionContext.java create mode 100644 jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/FusedCompactionStrategy.java create mode 100644 jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/QuantizationCompactionStrategy.java create mode 100644 jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/SidecarCompactionStrategy.java create mode 100644 jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/VectorCompressorRetrainer.java diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CompactWriter.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CompactWriter.java index c0cc684e7..d64abf553 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CompactWriter.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CompactWriter.java @@ -19,9 +19,9 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.MappedByteBuffer; import java.nio.file.Path; import java.util.ArrayList; -import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -31,9 +31,8 @@ import io.github.jbellis.jvector.disk.ByteBufferIndexWriter; import io.github.jbellis.jvector.graph.disk.feature.Feature; import io.github.jbellis.jvector.graph.disk.feature.FeatureId; +import io.github.jbellis.jvector.graph.disk.feature.FusedFeature; import io.github.jbellis.jvector.graph.disk.feature.InlineVectors; -import io.github.jbellis.jvector.graph.disk.feature.FusedPQ; -import io.github.jbellis.jvector.quantization.ProductQuantization; import io.github.jbellis.jvector.vector.VectorizationProvider; import io.github.jbellis.jvector.vector.types.ByteSequence; import io.github.jbellis.jvector.vector.types.VectorFloat; @@ -57,8 +56,7 @@ final class CompactWriter implements AutoCloseable { private final int headerSize; private final Header header; private final int version; - private final FusedPQ fusedPQFeature; - private final ProductQuantization pq; + private final FusedFeature fusedFeature; private final int baseDegree; private final int maxOrdinal; private final int entryNode; @@ -73,6 +71,14 @@ final class CompactWriter implements AutoCloseable { // Mirrors what AbstractGraphIndexWriter.writeSparseLevels writes in the no-hierarchy branch. private ByteSequence entryNodePqCode; + // Optional pre-computed PQ codes by new ordinal. When set, writeInlineNodeRecord copies + // codes from this buffer instead of calling pq.encodeTo per neighbor. Each worker thread + // gets its own duplicated view via pqCacheViewPerThread so positions don't race. + private MappedByteBuffer pqCodeCache; + private int pqCodeSize; + private ThreadLocal pqCacheViewPerThread; + private ThreadLocal pqCodeBufPerThread; + CompactWriter(Path outputPath, int maxOrdinal, int numBaseLayerNodes, @@ -81,11 +87,10 @@ final class CompactWriter implements AutoCloseable { int entryNode, int dimension, List layerDegrees, - ProductQuantization pq, - int pqLength, - boolean fusedPQEnabled) + FusedFeature fusedFeature) throws IOException { - this.fusedPQEnabled = fusedPQEnabled; + this.fusedFeature = fusedFeature; + this.fusedPQEnabled = fusedFeature != null; this.version = OnDiskGraphIndex.CURRENT_VERSION; this.outputPath = outputPath; this.writer = new BufferedRandomAccessWriter(outputPath); @@ -93,7 +98,6 @@ final class CompactWriter implements AutoCloseable { this.configuredLayerInfo = new ArrayList<>(layerInfo); this.configuredLayerDegrees = new ArrayList<>(layerDegrees); this.baseDegree = layerDegrees.get(0); - this.pq = pq; this.maxOrdinal = maxOrdinal; this.entryNode = entryNode; this.level1FeatureRecords = new ArrayList<>(); @@ -102,16 +106,13 @@ final class CompactWriter implements AutoCloseable { Map featureMap = new LinkedHashMap<>(); InlineVectors inlineVectorFeature = new InlineVectors(dimension); featureMap.put(FeatureId.INLINE_VECTORS, inlineVectorFeature); - if (fusedPQEnabled) { - this.fusedPQFeature = new FusedPQ(Collections.max(layerDegrees), pq); - featureMap.put(FeatureId.FUSED_PQ, this.fusedPQFeature); - } else { - this.fusedPQFeature = null; + if (fusedFeature != null) { + featureMap.put(fusedFeature.id(), fusedFeature); } int rsize = Integer.BYTES + inlineVectorFeature.featureSize() + Integer.BYTES + baseDegree * Integer.BYTES; - if (fusedPQEnabled) { - rsize += fusedPQFeature.featureSize(); + if (fusedFeature != null) { + rsize += fusedFeature.featureSize(); } this.recordSize = rsize; @@ -120,18 +121,36 @@ final class CompactWriter implements AutoCloseable { this.header = new Header(commonHeader, featureMap); this.headerSize = header.size(); + final int codeSize = fusedFeature != null ? fusedFeature.codeSize() : 1; this.bufferPerThread = ThreadLocal.withInitial(() -> { ByteBuffer buffer = ByteBuffer.allocate(recordSize); buffer.order(ByteOrder.BIG_ENDIAN); return buffer; }); this.zeroPQ = ThreadLocal.withInitial(() -> { - var vec = vectorTypeSupport.createByteSequence(pqLength > 0 ? pqLength : 1); + var vec = vectorTypeSupport.createByteSequence(codeSize); vec.zero(); return vec; }); } + /** + * Enables the pre-computed PQ code cache. Must be called before any writeInlineNodeRecord + * call. Once enabled, neighbor PQ codes are copied from {@code cache} instead of being + * re-encoded per write. + * + * @param cache a buffer holding pqCodeSize bytes per new ordinal (length must be at + * least {@code (maxOrdinal + 1) * pqCodeSize}) + * @param pqCodeSize bytes per code (== FusedFeature.codeSize() of the source's feature) + */ + public void enablePqCodeCache(MappedByteBuffer cache, int pqCodeSize) { + this.pqCodeCache = cache; + this.pqCodeSize = pqCodeSize; + // Each worker thread gets its own ByteBuffer view so absolute-position seeks don't race. + this.pqCacheViewPerThread = ThreadLocal.withInitial(() -> cache.duplicate()); + this.pqCodeBufPerThread = ThreadLocal.withInitial(() -> new byte[pqCodeSize]); + } + public void writeHeader() throws IOException { writer.seek(startOffset); header.write(writer); @@ -178,6 +197,39 @@ public void setEntryNodePqCode(ByteSequence code) { this.entryNodePqCode = code; } + /** + * Returns the projected end-of-output offset, computed from layer counts and degrees. + * Used to place a transient PQ-code mmap section past the end of the output during + * compaction; the file is truncated back to this size when compaction completes. + */ + public long projectedOutputSize() { + long total = startOffset + headerSize; + // Level-0 records: one per ordinal slot. + total += (long) (maxOrdinal + 1) * recordSize; + // Upper-layer records: written by writeUpperLayerNode as + // [ord int][count int][degree neighbor ints]. + for (int level = 1; level < configuredLayerDegrees.size(); level++) { + int degree = configuredLayerDegrees.get(level); + int count = configuredLayerInfo.get(level).size; + total += (long) count * (Integer.BYTES * 2L + (long) degree * Integer.BYTES); + } + // PQ feature records written at the start of writeFooter() when v6 + fused PQ: + // - hierarchy enabled: one [ord, code] record per level-1 node; + // - no hierarchy: one [entryOrd, code] record for the entry node only. + if (fusedPQEnabled && version == 6) { + int pqSize = fusedFeature.codeSize(); + if (configuredLayerInfo.size() > 1) { + int level1Count = configuredLayerInfo.get(1).size; + total += (long) level1Count * (Integer.BYTES + pqSize); + } else { + total += Integer.BYTES + pqSize; + } + } + // Footer trailer: header copy + headerOffset (long) + magic (int). + total += headerSize + FOOTER_SIZE; + return total; + } + public void writeUpperLayerNode(int level, int ordinal, int[] neighbors, ByteSequence level1PqCode) throws IOException { writer.writeInt(ordinal); writer.writeInt(neighbors.length); @@ -217,10 +269,24 @@ public WriteResult writeInlineNodeRecord(int ordinal, VectorFloat vec, Select // we cannot use fusedPQfeature.writeInline if (fusedPQEnabled) { int k = 0; - for (; k < selectedCache.size; k++) { - pqCode.zero(); - pq.encodeTo(selectedCache.vecs[k], pqCode); - vectorTypeSupport.writeByteSequence(bwriter, pqCode); + if (pqCodeCache != null) { + // Look up neighbors' codes from the pre-encoded mmap'd cache instead of re-encoding. + ByteBuffer cacheView = pqCacheViewPerThread.get(); + byte[] codeBuf = pqCodeBufPerThread.get(); + for (; k < selectedCache.size; k++) { + int newOrd = selectedCache.nodes[k]; // already remapped before this call + int offset = newOrd * pqCodeSize; + cacheView.position(offset); + cacheView.get(codeBuf, 0, pqCodeSize); + bwriter.write(codeBuf, 0, pqCodeSize); + } + } else { + var compressor = fusedFeature.getCompressor(); + for (; k < selectedCache.size; k++) { + pqCode.zero(); + compressor.encodeTo(selectedCache.vecs[k], pqCode); + vectorTypeSupport.writeByteSequence(bwriter, pqCode); + } } for (; k < baseDegree; k++) { vectorTypeSupport.writeByteSequence(bwriter, zeroPQ.get()); diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CompactionContext.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CompactionContext.java new file mode 100644 index 000000000..b01901832 --- /dev/null +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CompactionContext.java @@ -0,0 +1,62 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.jbellis.jvector.graph.disk; + +import io.github.jbellis.jvector.quantization.CompressedVectors; +import io.github.jbellis.jvector.util.FixedBitSet; + +import java.util.Collections; +import java.util.List; +import java.util.concurrent.ForkJoinPool; + +/** + * Bundle of inputs that {@link QuantizationCompactionStrategy} implementations need to do their work + * during a single compaction run. Passed to {@code FusedFeature.createCompactionStrategy(...)} + * and {@code CompressedVectors.createCompactionStrategy(...)} so strategy implementations can + * capture exactly the pieces they need without the {@code OnDiskGraphIndexCompactor} leaking + * through. + */ +public final class CompactionContext { + public final List sources; + /** Parallel to {@link #sources}; {@code null} when no non-fused sidecar input is supplied. */ + public final List sourceCompressed; + public final List liveNodes; + public final List remappers; + public final int dimension; + public final int maxOrdinal; + public final ForkJoinPool executor; + public final int taskWindowSize; + + public CompactionContext( + List sources, + List sourceCompressed, + List liveNodes, + List remappers, + int dimension, + int maxOrdinal, + ForkJoinPool executor, + int taskWindowSize) { + this.sources = Collections.unmodifiableList(sources); + this.sourceCompressed = sourceCompressed == null ? null : Collections.unmodifiableList(sourceCompressed); + this.liveNodes = Collections.unmodifiableList(liveNodes); + this.remappers = Collections.unmodifiableList(remappers); + this.dimension = dimension; + this.maxOrdinal = maxOrdinal; + this.executor = executor; + this.taskWindowSize = taskWindowSize; + } +} diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/FusedCompactionStrategy.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/FusedCompactionStrategy.java new file mode 100644 index 000000000..eb594b058 --- /dev/null +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/FusedCompactionStrategy.java @@ -0,0 +1,225 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.jbellis.jvector.graph.disk; + +import io.github.jbellis.jvector.graph.disk.feature.FusedFeature; +import io.github.jbellis.jvector.quantization.VectorCompressor; +import io.github.jbellis.jvector.vector.VectorSimilarityFunction; +import io.github.jbellis.jvector.vector.VectorizationProvider; +import io.github.jbellis.jvector.vector.types.ByteSequence; +import io.github.jbellis.jvector.vector.types.VectorFloat; +import io.github.jbellis.jvector.vector.types.VectorTypeSupport; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; + +/** + * Generic compaction strategy for any {@link FusedFeature} (PQ today, ASH or other schemes + * later). Parameterized by: + *

    + *
  • a {@link VectorCompressorRetrainer} producing the retrained compressor — the only + * scheme-specific knowledge this strategy needs,
  • + *
  • the source's {@link FusedFeature}, used as a factory ({@code withCompressor(...)}) + * to produce the merged output's fused feature.
  • + *
+ * The pre-encode mmap pass, entry-node-code tail write, and file truncation are all expressed + * against {@code VectorCompressor.encodeTo} and {@code FusedFeature.codeSize()} — no PQ or ASH + * specifics live here. + */ +public final class FusedCompactionStrategy extends QuantizationCompactionStrategy { + private static final Logger log = LoggerFactory.getLogger(FusedCompactionStrategy.class); + private static final VectorTypeSupport vectorTypeSupport = VectorizationProvider.getInstance().getVectorTypeSupport(); + + private final CompactionContext ctx; + private final FusedFeature sourceFusedFeature; + private final VectorCompressorRetrainer retrainer; + + private VectorCompressor> retrainedCompressor; + + // Transient pre-encode cache: lives in a memory-mapped section appended past the projected + // end of the output graph file. Truncated away in onAfterClose. Off-heap; single-mapping + // limit (2 GB) caps this at ~10M nodes for typical codeSize. + private MappedByteBuffer codeCache; + private int cacheCodeSize; + private long cacheTruncateAt; + + public FusedCompactionStrategy(CompactionContext ctx, + FusedFeature sourceFusedFeature, + VectorCompressorRetrainer retrainer) { + this.ctx = ctx; + this.sourceFusedFeature = sourceFusedFeature; + this.retrainer = retrainer; + } + + @Override + @SuppressWarnings("unchecked") + public void retrain(VectorSimilarityFunction vsf) { + log.info("Retraining fused-quantization compressor on merged sources"); + this.retrainedCompressor = (VectorCompressor>) (VectorCompressor) retrainer.retrain(vsf); + } + + @Override + public VectorCompressor compressor() { + return retrainedCompressor; + } + + @Override + public boolean writesCodesInline() { + return true; + } + + /** + * Returns the {@link FusedFeature} the compactor should put in the merged output graph. + * Constructed from the source's fused feature via {@link FusedFeature#withCompressor}, + * parameterized by the retrained compressor and the merged graph's max degree. + */ + public FusedFeature outputFusedFeature(int maxDegree) { + if (retrainedCompressor == null) { + throw new IllegalStateException("retrain() must be called before outputFusedFeature()"); + } + return sourceFusedFeature.withCompressor(retrainedCompressor, maxDegree); + } + + @Override + public void onAfterHeader(CompactWriter writer) throws IOException { + if (retrainedCompressor == null) { + throw new IllegalStateException("retrain() must be called before onAfterHeader()"); + } + try { + precomputeCodes(writer); + if (codeCache != null) { + writer.enablePqCodeCache(codeCache, cacheCodeSize); + } + } catch (IOException e) { + log.warn("Code pre-encode failed, falling back to per-write encoding: {}", e.getMessage()); + } + } + + @Override + public void onAfterLevels(CompactWriter writer, int[] entryNodeSource, List maxDegrees) throws IOException { + // When fused features are present and there is no hierarchy (only L0), the reader expects + // to find the entry node's own code written after the L0 block, just as + // AbstractGraphIndexWriter.writeSparseLevels does in its getMaxLevel == 0 branch. Without + // it, loadInMemoryFeatures reads garbage and hierarchyCachedFeatures is missing the + // entry node, causing "Node X is not in the hierarchy" on first search. + if (maxDegrees.size() != 1) { + return; + } + try (var entryView = ctx.sources.get(entryNodeSource[0]).getView()) { + var entryVec = vectorTypeSupport.createFloatVector(ctx.dimension); + entryView.getVectorInto(entryNodeSource[1], entryVec, 0); + var entryCode = vectorTypeSupport.createByteSequence(retrainedCompressor.compressedVectorSize()); + entryCode.zero(); + retrainedCompressor.encodeTo(entryVec, entryCode); + writer.setEntryNodePqCode(entryCode); + } + } + + @Override + public void onAfterClose(Path graphPath) { + if (cacheTruncateAt > 0) { + codeCache = null; + try (FileChannel fc = FileChannel.open(graphPath, StandardOpenOption.WRITE)) { + if (fc.size() > cacheTruncateAt) { + fc.truncate(cacheTruncateAt); + } + } catch (IOException e) { + log.warn("Failed to truncate code-cache section from output file {}: {}", + graphPath, e.getMessage()); + } + cacheTruncateAt = 0; + } + } + + /** Pre-encode every live node's code into a memory-mapped section past the projected output end. */ + private void precomputeCodes(CompactWriter writer) throws IOException { + cacheCodeSize = retrainedCompressor.compressedVectorSize(); + long tempSize = (long) (ctx.maxOrdinal + 1) * cacheCodeSize; + if (tempSize <= 0 || tempSize > Integer.MAX_VALUE) { + log.info("Code pre-encode skipped: required cache size {} bytes exceeds single-mapping limit", tempSize); + return; + } + + long tempOffset = writer.projectedOutputSize(); + cacheTruncateAt = tempOffset; + long totalSize = tempOffset + tempSize; + + try (FileChannel fc = FileChannel.open(writer.getOutputPath(), + StandardOpenOption.READ, StandardOpenOption.WRITE)) { + ByteBuffer pad = ByteBuffer.wrap(new byte[]{0}); + fc.write(pad, totalSize - 1); + codeCache = fc.map(FileChannel.MapMode.READ_WRITE, tempOffset, tempSize); + } + + final int cs = cacheCodeSize; + final VectorCompressor> compressor = retrainedCompressor; + List> tasks = new ArrayList<>(); + int targetTasks = Math.max(ctx.taskWindowSize * 4, 16); + for (int s = 0; s < ctx.sources.size(); s++) { + final int sIdx = s; + final var source = ctx.sources.get(s); + final var alive = ctx.liveNodes.get(s); + final int upper = alive.length(); + int chunkSize = Math.max(256, (upper + targetTasks - 1) / targetTasks); + for (int chunkStart = 0; chunkStart < upper; chunkStart += chunkSize) { + final int cStart = chunkStart; + final int cEnd = Math.min(chunkStart + chunkSize, upper); + tasks.add(() -> { + ByteSequence code = vectorTypeSupport.createByteSequence(cs); + VectorFloat vec = vectorTypeSupport.createFloatVector(ctx.dimension); + long count = 0; + try (var view = source.getView()) { + for (int oldOrd = cStart; oldOrd < cEnd; oldOrd++) { + if (!alive.get(oldOrd)) continue; + view.getVectorInto(oldOrd, vec, 0); + code.zero(); + compressor.encodeTo(vec, code); + int newOrd = ctx.remappers.get(sIdx).oldToNew(oldOrd); + int offset = newOrd * cs; + for (int i = 0; i < cs; i++) { + codeCache.put(offset + i, code.get(i)); + } + count++; + } + } + return count; + }); + } + } + try { + long total = 0; + for (Future f : ctx.executor.invokeAll(tasks)) { + total += f.get(); + } + log.info("Code pre-encode: {} nodes encoded into {} MB in-output cache (offset {})", + total, tempSize / (1024 * 1024), tempOffset); + } catch (InterruptedException | ExecutionException e) { + throw new IOException("Code pre-encode failed", e); + } + } +} diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java index 1da50e6a4..785dc8f9d 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java @@ -18,9 +18,9 @@ import java.io.FileNotFoundException; import java.io.IOException; -import java.nio.file.Path; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; +import java.nio.file.Path; import java.nio.file.StandardOpenOption; import java.util.*; import java.util.concurrent.*; @@ -28,6 +28,7 @@ import io.github.jbellis.jvector.graph.*; import io.github.jbellis.jvector.graph.disk.feature.Feature; import io.github.jbellis.jvector.graph.disk.feature.FeatureId; +import io.github.jbellis.jvector.graph.disk.feature.FusedFeature; import io.github.jbellis.jvector.graph.disk.feature.FusedPQ; import io.github.jbellis.jvector.graph.similarity.DefaultSearchScoreProvider; import io.github.jbellis.jvector.graph.similarity.SearchScoreProvider; @@ -35,6 +36,7 @@ import io.github.jbellis.jvector.vector.VectorSimilarityFunction; import io.github.jbellis.jvector.graph.similarity.ScoreFunction; import io.github.jbellis.jvector.vector.types.VectorFloat; +import io.github.jbellis.jvector.quantization.CompressedVectors; import io.github.jbellis.jvector.quantization.ProductQuantization; import io.github.jbellis.jvector.vector.VectorizationProvider; import io.github.jbellis.jvector.vector.types.VectorTypeSupport; @@ -58,6 +60,10 @@ public final class OnDiskGraphIndexCompactor implements Accountable { private static final int SEARCH_TOP_K_MULTIPLIER = 2; private final List sources; + // Optional non-fused compressed sidecar, parallel to `sources`. Null when sources carry their + // quantization inline (FUSED_PQ) or have none. When non-null, compact(Path, Path) retrains the + // compressor on merged vectors and writes a single merged CompressedVectors to compressedPath. + private final List sourceCompressed; private final List liveNodes; private final List numLiveNodesPerSource; private final List remappers; @@ -71,17 +77,37 @@ public final class OnDiskGraphIndexCompactor implements Accountable { private final int taskWindowSize; private final VectorSimilarityFunction similarityFunction; + /** + * Constructs a new OnDiskGraphIndexCompactor for graphs without a non-fused compressed sidecar. + * Equivalent to calling the 6-arg constructor with {@code sourceCompressed = null}. + */ + public OnDiskGraphIndexCompactor( + List sources, + List liveNodes, + List remappers, + VectorSimilarityFunction similarityFunction, + ForkJoinPool executor) { + this(sources, null, liveNodes, remappers, similarityFunction, executor); + } + /** * Constructs a new OnDiskGraphIndexCompactor to merge multiple graph indexes. * Initializes thread pool, validates inputs, and prepares metadata for compaction. + * + * @param sourceCompressed parallel to {@code sources}, supplying the non-fused compressed + * vectors (e.g. {@link io.github.jbellis.jvector.quantization.PQVectors}) + * that ship alongside each graph. Pass {@code null} when sources carry + * quantization inline (FUSED_PQ) or have none. Must not be combined + * with sources that carry the FUSED_PQ feature. */ public OnDiskGraphIndexCompactor( List sources, + List sourceCompressed, List liveNodes, List remappers, VectorSimilarityFunction similarityFunction, ForkJoinPool executor) { - checkBeforeCompact(sources, liveNodes, remappers); + checkBeforeCompact(sources, sourceCompressed, liveNodes, remappers); int threads = Runtime.getRuntime().availableProcessors(); if (executor != null) { @@ -93,6 +119,7 @@ public OnDiskGraphIndexCompactor( this.taskWindowSize = threads; this.sources = sources; + this.sourceCompressed = (sourceCompressed == null || sourceCompressed.isEmpty()) ? null : sourceCompressed; this.remappers = remappers; this.liveNodes = liveNodes; this.numLiveNodesPerSource = new ArrayList<>(this.sources.size()); @@ -120,12 +147,53 @@ public OnDiskGraphIndexCompactor( */ private void checkBeforeCompact( List sources, + List sourceCompressed, List liveNodes, List remappers) { validateInputSizes(sources, liveNodes, remappers); validateLiveNodesBounds(sources, liveNodes); validateGraphConfiguration(sources); validateFeatures(sources); + validateCompressed(sources, sourceCompressed); + } + + /** + * Validates that the optional non-fused compressed sidecar list is consistent with + * {@code sources}: same size, no nulls, identical compressor type across entries, and not + * combined with FUSED_PQ (which already carries codes inline). + */ + private void validateCompressed(List sources, List sourceCompressed) { + if (sourceCompressed == null || sourceCompressed.isEmpty()) { + return; + } + if (sourceCompressed.size() != sources.size()) { + throw new IllegalArgumentException("sourceCompressed must have the same size as sources"); + } + // Inline (fused) and sidecar are mutually exclusive ways to carry quantization codes. + // Check for any fused feature rather than hard-coding FUSED_PQ so future fused types + // (e.g. FUSED_ASH) are rejected here without further edits. + for (var feature : sources.get(0).getFeatures().values()) { + if (feature.isFused()) { + throw new IllegalArgumentException( + "sourceCompressed cannot be combined with a fused feature (" + + feature.id() + "); choose one"); + } + } + Class compressorClass = null; + for (int s = 0; s < sourceCompressed.size(); s++) { + CompressedVectors cv = Objects.requireNonNull(sourceCompressed.get(s), + "sourceCompressed[" + s + "] is null"); + var compressor = Objects.requireNonNull(cv.getCompressor(), + "sourceCompressed[" + s + "].getCompressor() is null"); + if (compressorClass == null) { + compressorClass = compressor.getClass(); + } else if (compressorClass != compressor.getClass()) { + throw new IllegalArgumentException( + "sourceCompressed entries must all use the same compressor type; got " + + compressorClass.getSimpleName() + " and " + + compressor.getClass().getSimpleName()); + } + } } /** @@ -210,18 +278,98 @@ private void validateFeatures(List sources) { * specified path, handling PQ retraining if needed, and writing header, all layers, and footer. */ public void compact(Path outputPath) throws FileNotFoundException { - boolean fusedPQEnabled = hasFusedPQ(); - boolean compressedPrecision = fusedPQEnabled; + try { + QuantizationCompactionStrategy strategy = detectInlineStrategy(); + compactGraphImpl(outputPath, strategy); + } finally { + if (ownsExecutor) executor.shutdown(); + } + } - ProductQuantization pq; - int pqLength; - if (fusedPQEnabled) { - pq = resolvePQFromSources(similarityFunction); - pqLength = pq.compressedVectorSize(); - } else { - pq = null; - pqLength = -1; + /** + * Compaction entry point for graphs that ship a non-fused compressed sidecar (e.g. + * {@link io.github.jbellis.jvector.quantization.PQVectors}). Writes the merged graph to + * {@code graphPath} and the merged compressed vectors to {@code compressedPath}. + *

+ * The compressor is retrained on a balanced sample of merged source vectors, then every live + * node is re-encoded against the new codebook. Requires that {@code sourceCompressed} was + * supplied to the constructor. + */ + public void compact(Path graphPath, Path compressedPath) throws FileNotFoundException { + if (sourceCompressed == null) { + throw new IllegalStateException( + "compact(graphPath, compressedPath) requires sourceCompressed to be supplied to the constructor"); + } + Objects.requireNonNull(compressedPath, "compressedPath"); + + try { + // Graph compaction proceeds without fused-PQ retrain (validateCompressed forbids + // FUSED_PQ when sourceCompressed is set), then the sidecar is written below. + QuantizationCompactionStrategy inlineStrategy = detectInlineStrategy(); + QuantizationCompactionStrategy sidecarStrategy = detectSidecarStrategy(); + sidecarStrategy.retrain(similarityFunction); + compactGraphImpl(graphPath, inlineStrategy); + sidecarStrategy.writeSidecar(compressedPath); + } catch (IOException e) { + throw new RuntimeException("Sidecar compaction failed", e); + } finally { + if (ownsExecutor) executor.shutdown(); } + } + + /** + * Pick the inline-codes strategy by asking the source's fused feature (if any) for its + * compaction strategy. Returns {@link QuantizationCompactionStrategy#NONE} when no fused feature is + * present. New fused quantization types extend the compactor purely by implementing + * {@link FusedFeature#createCompactionStrategy}. + */ + private QuantizationCompactionStrategy detectInlineStrategy() { + for (var feature : sources.get(0).getFeatures().values()) { + if (feature instanceof FusedFeature) { + return ((FusedFeature) feature).createCompactionStrategy(buildContext()); + } + } + return QuantizationCompactionStrategy.NONE; + } + + /** + * Pick the sidecar-codes strategy by delegating to the first {@link CompressedVectors}' + * own factory. Returns {@link QuantizationCompactionStrategy#NONE} when no sidecar input was supplied + * to the constructor. New sidecar quantization types extend the compactor purely by + * implementing {@link CompressedVectors#createCompactionStrategy}. + */ + private QuantizationCompactionStrategy detectSidecarStrategy() { + if (sourceCompressed == null) { + return QuantizationCompactionStrategy.NONE; + } + return sourceCompressed.get(0).createCompactionStrategy(buildContext()); + } + + /** Snapshot the compactor's state into a {@link CompactionContext} for strategies to consume. */ + private CompactionContext buildContext() { + return new CompactionContext(sources, sourceCompressed, liveNodes, remappers, + dimension, maxOrdinal, executor, taskWindowSize); + } + + /** + * Internal graph-compaction body. Performs the full graph write but does not shut + * down {@link #executor}; the public {@code compact(...)} entry points own that lifecycle so + * follow-on passes (e.g. a sidecar write via {@link SidecarCompactionStrategy}) can keep using + * the executor. + *

+ * Quantization-aware steps (codebook retrain, pre-encode caches, entry-node tail records, + * mmap cleanup) are delegated to {@code strategy}. For sources with no inline quantization, + * pass {@link QuantizationCompactionStrategy#NONE} for a fully no-op strategy hook set. + */ + private void compactGraphImpl(Path outputPath, QuantizationCompactionStrategy strategy) throws FileNotFoundException { + strategy.retrain(similarityFunction); + + boolean fusedPQEnabled = strategy.writesCodesInline(); + ProductQuantization pq = strategy.compressorAsPQ(); + boolean compressedPrecision = fusedPQEnabled; + int maxBaseDegree = java.util.Collections.max(maxDegrees); + io.github.jbellis.jvector.graph.disk.feature.FusedFeature outputFusedFeature = + strategy.outputFusedFeature(maxBaseDegree); List layerInfo = computeLayerInfoFromSources(); int[] entryNodeSource = resolveEntryNodeSource(); // {sourceIdx, originalOrdinal} @@ -229,32 +377,22 @@ public void compact(Path outputPath) throws FileNotFoundException { log.info("Writing compacted graph : {} total nodes, maxOrdinal={}, dimension={}, degree={}", numTotalNodes, maxOrdinal, dimension, maxDegrees.get(0)); - try (CompactWriter writer = new CompactWriter(outputPath, maxOrdinal, numTotalNodes, 0, layerInfo, entryNode, dimension, maxDegrees, pq, pqLength, fusedPQEnabled)) { + try (CompactWriter writer = new CompactWriter(outputPath, maxOrdinal, numTotalNodes, 0, layerInfo, entryNode, dimension, maxDegrees, outputFusedFeature)) { + // Header has to be written first so the writer's position is past the header + // before any strategy that mmaps past the projected end of the output runs. writer.writeHeader(); + strategy.onAfterHeader(writer); + compactLevels(writer, similarityFunction, fusedPQEnabled, compressedPrecision, pq); - // When FusedPQ is enabled and there is no hierarchy (only L0), the reader expects - // to find the entry node's own PQ code written after the L0 block, just as - // AbstractGraphIndexWriter.writeSparseLevels does in its getMaxLevel == 0 branch. - // Without it, loadInMemoryFeatures reads garbage and hierarchyCachedFeatures is - // missing the entry node, causing "Node X is not in the hierarchy" on first search. - if (fusedPQEnabled && maxDegrees.size() == 1) { - try (var entryView = sources.get(entryNodeSource[0]).getView()) { - var entryVec = vectorTypeSupport.createFloatVector(dimension); - entryView.getVectorInto(entryNodeSource[1], entryVec, 0); - var entryPqCode = vectorTypeSupport.createByteSequence(pq.getSubspaceCount()); - entryPqCode.zero(); - pq.encodeTo(entryVec, entryPqCode); - writer.setEntryNodePqCode(entryPqCode); - } - } + strategy.onAfterLevels(writer, entryNodeSource, maxDegrees); writer.writeFooter(); log.info("Compaction complete: {}", outputPath); } catch (IOException | ExecutionException | InterruptedException e) { throw new RuntimeException(e); } finally { - if (ownsExecutor) executor.shutdown(); + strategy.onAfterClose(outputPath); } } @@ -821,22 +959,6 @@ private List computeLayerInfoFromSources() { return layerInfo; } - /** - * Trains a new Product Quantization codebook using balanced sampling across all source - * indexes. This ensures the PQ is optimized for the combined dataset. - */ - private ProductQuantization resolvePQFromSources(VectorSimilarityFunction similarityFunction) { - PQRetrainer retrainer = new PQRetrainer(sources, liveNodes, dimension); - return retrainer.retrain(similarityFunction); - } - - /** - * Checks if the source indexes have FusedPQ feature enabled. - */ - private boolean hasFusedPQ() { - return sources.get(0).getFeatures().containsKey(FeatureId.FUSED_PQ); - } - /** * Creates a score provider for searching across different source indexes. Uses approximate * PQ-based scoring if compressedPrecision is enabled, otherwise uses exact scoring. @@ -968,11 +1090,13 @@ private long estimateScratchSpacePerThread() { // Each GraphSearcher has internal state - rough estimate scratchSize += (long) sources.size() * (OH + 10L * REF); - // pqCode ByteSequence (if PQ enabled) - if (hasFusedPQ()) { - FusedPQ fpq = (FusedPQ) sources.get(0).getFeatures().get(FeatureId.FUSED_PQ); - int subspaceCount = fpq.getPQ().getSubspaceCount(); - scratchSize += OH + subspaceCount; // ByteSequence + // Per-thread scratch ByteSequence holding one code's worth of bytes, for each fused + // feature carried by the graph. Generalized over fused types so new quantizations + // (e.g. FUSED_ASH) don't need an edit here. + for (var feature : sources.get(0).getFeatures().values()) { + if (feature instanceof FusedFeature) { + scratchSize += OH + ((FusedFeature) feature).codeSize(); + } } return scratchSize; diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/PQRetrainer.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/PQRetrainer.java index a0438168e..280d75c9c 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/PQRetrainer.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/PQRetrainer.java @@ -71,10 +71,21 @@ public PQRetrainer(List sources, List liveNodes, /** * Trains a new Product Quantization codebook using balanced sampling across all source indexes. + * The base PQ parameters are taken from the FUSED_PQ feature on the first source. * All sampled vectors are read into memory up front, so ProductQuantization.compute() itself * performs no I/O. */ public ProductQuantization retrain(VectorSimilarityFunction similarityFunction) { + FusedPQ fpq = (FusedPQ) sources.get(0).getFeatures().get(FeatureId.FUSED_PQ); + return retrain(similarityFunction, fpq.getPQ()); + } + + /** + * Trains a new Product Quantization codebook using balanced sampling across all source indexes + * and the supplied base PQ for subspace/cluster parameters. Used when the base PQ comes from a + * non-fused source (e.g. a sidecar {@code CompressedVectors}) rather than the FUSED_PQ feature. + */ + public ProductQuantization retrain(VectorSimilarityFunction similarityFunction, ProductQuantization basePQ) { log.info("Training PQ using balanced sampling across sources"); List samples = sampleBalanced(ProductQuantization.MAX_PQ_TRAINING_SET_SIZE); @@ -93,9 +104,6 @@ public ProductQuantization retrain(VectorSimilarityFunction similarityFunction) List> trainingVectors = extractVectorsSequential(samples); var ravv = new ListRandomAccessVectorValues(trainingVectors, dimension); - FusedPQ fpq = (FusedPQ) sources.get(0).getFeatures().get(FeatureId.FUSED_PQ); - ProductQuantization basePQ = fpq.getPQ(); - boolean center = similarityFunction == VectorSimilarityFunction.EUCLIDEAN; return ProductQuantization.compute( diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/QuantizationCompactionStrategy.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/QuantizationCompactionStrategy.java new file mode 100644 index 000000000..8eaf0a3a1 --- /dev/null +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/QuantizationCompactionStrategy.java @@ -0,0 +1,157 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.jbellis.jvector.graph.disk; + +import io.github.jbellis.jvector.graph.disk.feature.FusedFeature; +import io.github.jbellis.jvector.quantization.ProductQuantization; +import io.github.jbellis.jvector.quantization.VectorCompressor; +import io.github.jbellis.jvector.vector.VectorSimilarityFunction; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.List; + +/** + * Encapsulates the quantization-aware steps the compactor needs to run during a single + * {@code compact()} invocation. Pulling these behind a strategy lets the compactor body stay + * scheme-agnostic: it asks the strategy whether to write inline codes, hands it pre/post hooks, + * and (for sidecar strategies) defers the merged sidecar write to the strategy. + *

+ * One strategy instance per compaction run. Strategies are stateful — they hold the retrained + * compressor produced by {@link #retrain} and any transient resources (e.g. memory-mapped + * pre-encode caches) until {@link #onAfterClose} releases them. + *

+ * Two concrete implementations cover all quantization schemes: + *

    + *
  • {@link FusedCompactionStrategy} — sources carry a {@link FusedFeature} with inline codes; + * the strategy is parameterized by a {@link VectorCompressorRetrainer} and the source's + * feature (used as a factory for the merged output's feature via + * {@link FusedFeature#withCompressor}). No PQ- or ASH-specific code lives in the strategy.
  • + *
  • {@link SidecarCompactionStrategy} — sources ship codes as a non-fused + * {@code CompressedVectors} sidecar; the strategy is parameterized by a retrainer plus the + * source's {@code CompressedVectors} (used as a format handle).
  • + *
+ * Adding a new quantization type (e.g. ASH) requires no strategy classes; the new {@code FusedASH} + * and {@code ASHVectors} just return appropriately-parameterized instances of these two strategies. + */ +public abstract class QuantizationCompactionStrategy { + + /** + * Singleton strategy for sources that ship no quantization at all (no FUSED_PQ, no sidecar). + * All hooks are no-ops and {@link #compressor()} returns {@code null}. + */ + public static final QuantizationCompactionStrategy NONE = new QuantizationCompactionStrategy() { + @Override + public void retrain(VectorSimilarityFunction vsf) { + // no-op + } + + @Override + public VectorCompressor compressor() { + return null; + } + + @Override + public String toString() { + return "QuantizationCompactionStrategy.NONE"; + } + }; + + /** + * Trains a fresh compressor on a balanced sample of merged source vectors. May be a no-op + * for strategies that don't carry a compressor (e.g. {@link #NONE}). After this call, + * {@link #compressor()} returns the retrained compressor. + */ + public abstract void retrain(VectorSimilarityFunction vsf); + + /** The retrained compressor produced by {@link #retrain}. {@code null} before retrain or for NONE. */ + public abstract VectorCompressor compressor(); + + /** + * Whether this strategy writes codes inline in the graph file (FusedPQ-style). When true, the + * compactor passes the compressor to {@link CompactWriter} and the strategy expects to drive + * per-node code emission via the writer's inline-code path. + */ + public boolean writesCodesInline() { + return false; + } + + /** + * Whether this strategy writes codes to a separate sidecar file (PQVectors-style). When true, + * the compactor calls {@link #writeSidecar} after the graph file is closed. + */ + public boolean writesCodesSidecar() { + return false; + } + + /** + * Hook invoked once after {@link CompactWriter#writeHeader()} but before {@code compactLevels}. + * Inline strategies can use this to pre-encode every live node's code into a transient cache + * that the writer will copy from during inline writes. No-op by default. + */ + public void onAfterHeader(CompactWriter writer) throws IOException { + // no-op + } + + /** + * Hook invoked once after {@code compactLevels} returns but before + * {@link CompactWriter#writeFooter()}. Inline strategies that need to emit a per-graph tail + * record (e.g. the entry-node PQ code for FusedPQ when there is no hierarchy) do so here. + * No-op by default. + */ + public void onAfterLevels(CompactWriter writer, int[] entryNodeSource, List maxDegrees) throws IOException { + // no-op + } + + /** + * Hook invoked once after the graph file is closed (in {@code finally}). Strategies can + * release transient resources (e.g. unmap a pre-encode cache and truncate the output file + * back to its expected size). No-op by default. + */ + public void onAfterClose(Path graphPath) { + // no-op + } + + /** + * Writes the merged compressed-vectors sidecar file. Called by the compactor's + * {@code compact(graphPath, compressedPath)} entry point after the graph is fully written. + * Throws {@link UnsupportedOperationException} by default; sidecar strategies override. + */ + public void writeSidecar(Path compressedPath) throws IOException { + throw new UnsupportedOperationException(this + " does not write a sidecar"); + } + + /** + * Returns the {@link FusedFeature} the compactor should put in the merged output graph for + * an inline strategy. {@code null} for non-inline strategies (NONE and any sidecar strategy). + * Called after {@link #retrain} so the strategy can build the output feature from the + * retrained compressor. + */ + public FusedFeature outputFusedFeature(int maxDegree) { + return null; + } + + /** + * Convenience: returns {@link #compressor()} cast to {@link ProductQuantization}, or + * {@code null} if no compressor is held. Kept for backward compat with code paths that still + * thread a typed {@code ProductQuantization} through {@link CompactWriter}. + */ + protected ProductQuantization compressorAsPQ() { + VectorCompressor c = compressor(); + return (c instanceof ProductQuantization) ? (ProductQuantization) c : null; + } +} diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/SidecarCompactionStrategy.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/SidecarCompactionStrategy.java new file mode 100644 index 000000000..1317ca88a --- /dev/null +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/SidecarCompactionStrategy.java @@ -0,0 +1,170 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.jbellis.jvector.graph.disk; + +import io.github.jbellis.jvector.disk.BufferedRandomAccessWriter; +import io.github.jbellis.jvector.quantization.CompressedVectors; +import io.github.jbellis.jvector.quantization.VectorCompressor; +import io.github.jbellis.jvector.vector.VectorSimilarityFunction; +import io.github.jbellis.jvector.vector.VectorizationProvider; +import io.github.jbellis.jvector.vector.types.ByteSequence; +import io.github.jbellis.jvector.vector.types.VectorFloat; +import io.github.jbellis.jvector.vector.types.VectorTypeSupport; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; + +/** + * Generic compaction strategy for any non-fused {@link CompressedVectors} sidecar. Parameterized + * by: + *
    + *
  • a {@link VectorCompressorRetrainer} that produces the merged compressor on retrain (the + * only scheme-specific knowledge this strategy carries),
  • + *
  • a {@code formatHandle} {@link CompressedVectors} from the sources, used only to invoke + * {@link CompressedVectors#writeSidecarHeader} and {@link CompressedVectors#sidecarVectorsPerChunk} + * — the format hooks that decide the on-disk layout for the merged sidecar.
  • + *
+ * No PQ-specific (or ASH-specific) code lives here. Adding a new quantization type that ships + * a sidecar means implementing those two hooks on its {@code CompressedVectors} class plus a + * retrainer; this strategy and the compactor stay untouched. + */ +public final class SidecarCompactionStrategy extends QuantizationCompactionStrategy { + private static final Logger log = LoggerFactory.getLogger(SidecarCompactionStrategy.class); + private static final VectorTypeSupport vectorTypeSupport = VectorizationProvider.getInstance().getVectorTypeSupport(); + + private final CompactionContext ctx; + private final CompressedVectors formatHandle; + private final VectorCompressorRetrainer retrainer; + private VectorCompressor retrainedCompressor; + + public SidecarCompactionStrategy(CompactionContext ctx, + CompressedVectors formatHandle, + VectorCompressorRetrainer retrainer) { + this.ctx = ctx; + this.formatHandle = formatHandle; + this.retrainer = retrainer; + } + + @Override + public void retrain(VectorSimilarityFunction vsf) { + log.info("Retraining sidecar compressor ({}) on merged sources", + formatHandle.getClass().getSimpleName()); + this.retrainedCompressor = retrainer.retrain(vsf); + } + + @Override + public VectorCompressor compressor() { + return retrainedCompressor; + } + + @Override + public boolean writesCodesSidecar() { + return true; + } + + @Override + public void writeSidecar(Path compressedPath) throws IOException { + if (retrainedCompressor == null) { + throw new IllegalStateException("retrain() must be called before writeSidecar()"); + } + final int vectorsPerChunk = formatHandle.sidecarVectorsPerChunk(); + final int codeSize = retrainedCompressor.compressedVectorSize(); + final int count = ctx.maxOrdinal + 1; + final int chunkCount = (count + vectorsPerChunk - 1) / vectorsPerChunk; + + log.info("Streaming {} merged ordinals to {} ({} chunks of up to {} entries each)", + count, compressedPath, chunkCount, vectorsPerChunk); + + try (var out = new BufferedRandomAccessWriter(compressedPath)) { + formatHandle.writeSidecarHeader(out, retrainedCompressor, count); + + int parallelism = Math.max(ctx.taskWindowSize, 1); + for (int batchStart = 0; batchStart < chunkCount; batchStart += parallelism) { + int batchEnd = Math.min(batchStart + parallelism, chunkCount); + List>> tasks = new ArrayList<>(batchEnd - batchStart); + for (int c = batchStart; c < batchEnd; c++) { + final int chunkStart = c * vectorsPerChunk; + final int chunkEnd = Math.min(chunkStart + vectorsPerChunk, count); + tasks.add(() -> encodeChunk(chunkStart, chunkEnd, codeSize, retrainedCompressor)); + } + for (var f : ctx.executor.invokeAll(tasks)) { + vectorTypeSupport.writeByteSequence(out, f.get()); + } + } + } catch (InterruptedException | ExecutionException e) { + throw new IOException("Failed to write compressed sidecar to " + compressedPath, e); + } + log.info("Wrote compacted compressed sidecar to {}", compressedPath); + } + + @SuppressWarnings("unchecked") + private ByteSequence encodeChunk(int chunkStart, int chunkEnd, int codeSize, VectorCompressor compressor) throws IOException { + int chunkBytes = (chunkEnd - chunkStart) * codeSize; + ByteSequence chunk = vectorTypeSupport.createByteSequence(chunkBytes); + chunk.zero(); + + // Cast once; valid for all VectorCompressor implementations that produce ByteSequence codes + // (PQ, future ASH, etc.). VectorCompressor's encode/encodeTo contract guarantees T is the + // encoded type and for our supported quantization schemes T = ByteSequence. + VectorCompressor> byteCompressor = (VectorCompressor>) compressor; + + OnDiskGraphIndex.View[] views = new OnDiskGraphIndex.View[ctx.sources.size()]; + try { + VectorFloat vec = vectorTypeSupport.createFloatVector(ctx.dimension); + ByteSequence code = vectorTypeSupport.createByteSequence(codeSize); + for (int newOrd = chunkStart; newOrd < chunkEnd; newOrd++) { + int[] resolved = resolveSourceForNewOrd(newOrd); + if (resolved == null) continue; // hole; slot stays zero + int srcIdx = resolved[0]; + int oldOrd = resolved[1]; + if (views[srcIdx] == null) { + views[srcIdx] = (OnDiskGraphIndex.View) ctx.sources.get(srcIdx).getView(); + } + views[srcIdx].getVectorInto(oldOrd, vec, 0); + code.zero(); + byteCompressor.encodeTo(vec, code); + int slotOffset = (newOrd - chunkStart) * codeSize; + for (int b = 0; b < codeSize; b++) { + chunk.set(slotOffset + b, code.get(b)); + } + } + } finally { + for (var v : views) { + if (v != null) { + try { v.close(); } catch (Exception ignore) {} + } + } + } + return chunk; + } + + private int[] resolveSourceForNewOrd(int newOrd) { + for (int s = 0; s < ctx.remappers.size(); s++) { + int oldOrd = ctx.remappers.get(s).newToOld(newOrd); + if (oldOrd != OrdinalMapper.OMITTED) { + return new int[]{s, oldOrd}; + } + } + return null; + } +} diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/VectorCompressorRetrainer.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/VectorCompressorRetrainer.java new file mode 100644 index 000000000..5110cbe52 --- /dev/null +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/VectorCompressorRetrainer.java @@ -0,0 +1,33 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.jbellis.jvector.graph.disk; + +import io.github.jbellis.jvector.quantization.VectorCompressor; +import io.github.jbellis.jvector.vector.VectorSimilarityFunction; + +/** + * Trains a fresh {@link VectorCompressor} on the merged source vectors during a compaction run. + *

+ * One implementation per quantization scheme (today: {@link PQRetrainer} wrapped behind a lambda; + * future: {@code ASHRetrainer}, etc.). Both fused and sidecar generic strategies receive a + * retrainer via their constructor and invoke it at {@code retrain(vsf)} time — the strategies + * stay quantization-agnostic and the retrainer encapsulates the scheme-specific training math. + */ +@FunctionalInterface +public interface VectorCompressorRetrainer { + VectorCompressor retrain(VectorSimilarityFunction vsf); +} diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/feature/FusedFeature.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/feature/FusedFeature.java index d54630999..64334310a 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/feature/FusedFeature.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/feature/FusedFeature.java @@ -18,7 +18,11 @@ import io.github.jbellis.jvector.disk.IndexWriter; import io.github.jbellis.jvector.disk.RandomAccessReader; +import io.github.jbellis.jvector.graph.disk.CompactionContext; +import io.github.jbellis.jvector.graph.disk.QuantizationCompactionStrategy; +import io.github.jbellis.jvector.quantization.VectorCompressor; import io.github.jbellis.jvector.util.Accountable; +import io.github.jbellis.jvector.vector.types.ByteSequence; import java.io.IOException; @@ -38,4 +42,42 @@ default boolean isFused() { interface InlineSource extends Accountable {} InlineSource loadSourceFeature(RandomAccessReader in) throws IOException; + + /** + * For compaction use: bytes occupied on disk by a single stored code (one neighbor's payload). + * For fused features {@code featureSize() == codeSize() * maxDegree}. Called by the compactor + * (and {@link io.github.jbellis.jvector.graph.disk.OnDiskGraphIndexCompactor#ramBytesUsed}) to + * size per-thread scratch buffers and by {@code FusedCompactionStrategy} to size the streaming + * pre-encode cache. + */ + int codeSize(); + + /** + * For compaction use: returns the underlying compressor that produced the inline codes carried + * by this feature. Returned typed as {@code VectorCompressor>} so generic + * compaction code (the pre-encode pass, the per-write encoding fallback in {@code CompactWriter}) + * can call {@code encodeTo(VectorFloat, ByteSequence)} without knowing the concrete + * quantization scheme. + */ + VectorCompressor> getCompressor(); + + /** + * For compaction use: returns a fresh {@link FusedFeature} of this same scheme but + * parameterized by a new compressor and max degree. Called by + * {@code FusedCompactionStrategy.outputFusedFeature} to construct the merged output's fused + * feature from a retrained compressor — every {@link FusedFeature} implementation acts as a + * factory for itself in this way so the compactor never references concrete subtypes. + */ + FusedFeature withCompressor(VectorCompressor> newCompressor, int maxDegree); + + /** + * For compaction use: returns the {@link QuantizationCompactionStrategy} the compactor should + * run when merging graphs that carry this fused feature. One strategy instance per + * compaction; it owns any transient state (retrained codebook, pre-encode caches) until the + * compactor releases it via {@link QuantizationCompactionStrategy#onAfterClose}. + *

+ * Implementations must return a fresh strategy on every call — feature instances themselves + * are read-mostly objects that may be shared by concurrent readers of the source graph. + */ + QuantizationCompactionStrategy createCompactionStrategy(CompactionContext ctx); } diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/feature/FusedPQ.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/feature/FusedPQ.java index 840650ba5..7fd48ee27 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/feature/FusedPQ.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/feature/FusedPQ.java @@ -20,7 +20,9 @@ import io.github.jbellis.jvector.disk.RandomAccessReader; import io.github.jbellis.jvector.graph.ImmutableGraphIndex; import io.github.jbellis.jvector.graph.disk.CommonHeader; +import io.github.jbellis.jvector.graph.disk.CompactionContext; import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex; +import io.github.jbellis.jvector.graph.disk.QuantizationCompactionStrategy; import io.github.jbellis.jvector.graph.similarity.ScoreFunction; import io.github.jbellis.jvector.quantization.FusedPQDecoder; import io.github.jbellis.jvector.quantization.PQVectors; @@ -67,6 +69,23 @@ public ProductQuantization getPQ() { return pq; } + /** For compaction use. See {@link FusedFeature#getCompressor}. */ + @Override + @SuppressWarnings("unchecked") + public io.github.jbellis.jvector.quantization.VectorCompressor> getCompressor() { + return (io.github.jbellis.jvector.quantization.VectorCompressor>) (io.github.jbellis.jvector.quantization.VectorCompressor) pq; + } + + /** For compaction use. See {@link FusedFeature#withCompressor}. */ + @Override + public FusedFeature withCompressor(io.github.jbellis.jvector.quantization.VectorCompressor> newCompressor, int maxDegree) { + if (!(newCompressor instanceof ProductQuantization)) { + throw new IllegalArgumentException( + "FusedPQ requires ProductQuantization; got " + newCompressor.getClass().getSimpleName()); + } + return new FusedPQ(maxDegree, (ProductQuantization) newCompressor); + } + @Override public int headerSize() { return pq.compressorSize(); @@ -77,6 +96,12 @@ public int featureSize() { return pq.compressedVectorSize() * maxDegree; } + /** For compaction use. See {@link FusedFeature#codeSize}. */ + @Override + public int codeSize() { + return pq.compressedVectorSize(); + } + static FusedPQ load(CommonHeader header, RandomAccessReader reader) { try { return new FusedPQ(header.layerInfo.get(0).degree, ProductQuantization.load(reader)); @@ -96,6 +121,16 @@ public ScoreFunction.ApproximateScoreFunction approximateScoreFunctionFor(Vector return FusedPQDecoder.newDecoder(neighbors, pq, hierarchyCachedFeatures, queryVector, reusableNeighborCodes.get(), reusableResults.get(), vsf, esf); } + /** For compaction use. See {@link FusedFeature#createCompactionStrategy}. */ + @Override + public QuantizationCompactionStrategy createCompactionStrategy(CompactionContext ctx) { + ProductQuantization basePQ = this.pq; + io.github.jbellis.jvector.graph.disk.VectorCompressorRetrainer retrainer = + vsf -> new io.github.jbellis.jvector.graph.disk.PQRetrainer(ctx.sources, ctx.liveNodes, ctx.dimension) + .retrain(vsf, basePQ); + return new io.github.jbellis.jvector.graph.disk.FusedCompactionStrategy(ctx, this, retrainer); + } + @Override public void writeHeader(IndexWriter out) throws IOException { pq.write(out, OnDiskGraphIndex.CURRENT_VERSION); diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/quantization/CompressedVectors.java b/jvector-base/src/main/java/io/github/jbellis/jvector/quantization/CompressedVectors.java index 767659148..020397eba 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/quantization/CompressedVectors.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/quantization/CompressedVectors.java @@ -17,7 +17,9 @@ package io.github.jbellis.jvector.quantization; import io.github.jbellis.jvector.disk.IndexWriter; +import io.github.jbellis.jvector.graph.disk.CompactionContext; import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex; +import io.github.jbellis.jvector.graph.disk.QuantizationCompactionStrategy; import io.github.jbellis.jvector.graph.similarity.ScoreFunction; import io.github.jbellis.jvector.util.Accountable; import io.github.jbellis.jvector.vector.VectorSimilarityFunction; @@ -66,4 +68,46 @@ default ScoreFunction.ApproximateScoreFunction approximateScoreFunctionFor(Vecto /** the number of vectors */ int count(); + + /** + * For compaction use: returns the {@link QuantizationCompactionStrategy} the compactor should + * run when merging graphs whose non-fused compressed sidecars are this kind of + * {@code CompressedVectors}. One strategy instance per compaction; it retrains the compressor + * on the merged source vectors and streams the merged sidecar to disk. + *

+ * Called by {@code OnDiskGraphIndexCompactor.detectSidecarStrategy()}. Named to mirror + * {@code FusedFeature.createCompactionStrategy} — same verb, receiver type disambiguates + * whether the returned strategy drives the inline-fused or sidecar workflow. Default throws — + * implementations supporting compaction must override. + */ + default QuantizationCompactionStrategy createCompactionStrategy(CompactionContext ctx) { + throw new UnsupportedOperationException( + getClass().getSimpleName() + " does not support sidecar compaction"); + } + + // ---- For compaction use: sidecar-streaming-write hooks. Called by the generic + // SidecarCompactionStrategy to produce a merged-format-compatible sidecar without that + // strategy knowing the format. ---- + + /** + * For compaction use: writes the format-specific sidecar header (compressor params + vector + * count + any extras the reader expects between count and the chunk stream). Called once at + * the start of a streaming sidecar write by {@code SidecarCompactionStrategy.writeSidecar}, + * after which the strategy emits chunks of {@code sidecarVectorsPerChunk()} codes each. + * Default throws — implementations supporting sidecar compaction must override. + */ + default void writeSidecarHeader(IndexWriter out, VectorCompressor mergedCompressor, int count) throws IOException { + throw new UnsupportedOperationException( + getClass().getSimpleName() + " does not support sidecar compaction"); + } + + /** + * For compaction use: vectors per chunk for streaming sidecar writes. The chunk size must + * match the format the reader expects (e.g. {@code PQVectors} uses 1024 to align with + * {@code MutablePQVectors}'s on-disk layout). Read by + * {@code SidecarCompactionStrategy.writeSidecar} to size each emitted chunk. + */ + default int sidecarVectorsPerChunk() { + return 1024; + } } diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/quantization/PQVectors.java b/jvector-base/src/main/java/io/github/jbellis/jvector/quantization/PQVectors.java index 42113b242..760eb38ee 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/quantization/PQVectors.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/quantization/PQVectors.java @@ -19,6 +19,8 @@ import io.github.jbellis.jvector.disk.IndexWriter; import io.github.jbellis.jvector.disk.RandomAccessReader; import io.github.jbellis.jvector.graph.RandomAccessVectorValues; +import io.github.jbellis.jvector.graph.disk.CompactionContext; +import io.github.jbellis.jvector.graph.disk.QuantizationCompactionStrategy; import io.github.jbellis.jvector.graph.similarity.ScoreFunction; import io.github.jbellis.jvector.util.RamUsageEstimator; import io.github.jbellis.jvector.vector.VectorSimilarityFunction; @@ -416,6 +418,37 @@ public ProductQuantization getCompressor() { return pq; } + /** For compaction use. See {@link CompressedVectors#createCompactionStrategy}. */ + @Override + public QuantizationCompactionStrategy createCompactionStrategy(CompactionContext ctx) { + ProductQuantization basePQ = this.pq; + io.github.jbellis.jvector.graph.disk.VectorCompressorRetrainer retrainer = + vsf -> new io.github.jbellis.jvector.graph.disk.PQRetrainer(ctx.sources, ctx.liveNodes, ctx.dimension) + .retrain(vsf, basePQ); + return new io.github.jbellis.jvector.graph.disk.SidecarCompactionStrategy(ctx, this, retrainer); + } + + /** For compaction use. See {@link CompressedVectors#writeSidecarHeader}. */ + @Override + public void writeSidecarHeader(IndexWriter out, VectorCompressor mergedCompressor, int count) throws IOException { + if (!(mergedCompressor instanceof ProductQuantization)) { + throw new IllegalArgumentException( + "PQVectors sidecar header requires ProductQuantization; got " + + mergedCompressor.getClass().getSimpleName()); + } + ProductQuantization mergedPQ = (ProductQuantization) mergedCompressor; + mergedPQ.write(out, io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex.CURRENT_VERSION); + out.writeInt(count); + out.writeInt(mergedPQ.getSubspaceCount()); + } + + /** For compaction use. See {@link CompressedVectors#sidecarVectorsPerChunk}. */ + @Override + public int sidecarVectorsPerChunk() { + // Match MutablePQVectors so the on-disk layout is identical to what PQVectors.load reconstructs. + return 1024; + } + @Override public long ramBytesUsed() { int REF_BYTES = RamUsageEstimator.NUM_BYTES_OBJECT_REF; diff --git a/jvector-tests/src/test/java/io/github/jbellis/jvector/graph/disk/TestOnDiskGraphIndexCompactor.java b/jvector-tests/src/test/java/io/github/jbellis/jvector/graph/disk/TestOnDiskGraphIndexCompactor.java index 410b96d0e..3517c7c40 100644 --- a/jvector-tests/src/test/java/io/github/jbellis/jvector/graph/disk/TestOnDiskGraphIndexCompactor.java +++ b/jvector-tests/src/test/java/io/github/jbellis/jvector/graph/disk/TestOnDiskGraphIndexCompactor.java @@ -771,4 +771,320 @@ public void testDeletionsAndOrdinalMapping() throws Exception { searcher.close(); } + + // ----------------------------------------------------------------------------------------- + // Tests for non-fused compressed-sidecar compaction (compact(graphPath, compressedPath)) + // ----------------------------------------------------------------------------------------- + + /** + * Happy path: merge two sources whose PQ codes are shipped as a non-fused {@link PQVectors} + * sidecar, and verify both outputs — graph and compressed sidecar — are produced correctly. + * Asserts: + *

    + *
  • merged graph has the expected node count and per-ordinal vector values,
  • + *
  • merged sidecar loads as PQVectors with the same {@code count}, subspace count, and + * cluster count as the inputs,
  • + *
  • each merged code decodes to a vector close to the original raw vector (within PQ + * reconstruction error).
  • + *
+ */ + @Test + public void testCompactWithCompressedSidecar() throws Exception { + int dim = 16; + int n = 32; // nodes per source + int M = 8; // PQ subspaces + int clusters = 16; // small for fast test + VectorSimilarityFunction vsf = VectorSimilarityFunction.EUCLIDEAN; + + List> vecs0 = createRandomVectors(n, dim); + List> vecs1 = createRandomVectors(n, dim); + + Path graph0 = buildSimpleSourceGraph(vecs0, dim, vsf, "sidecar_src_0"); + Path graph1 = buildSimpleSourceGraph(vecs1, dim, vsf, "sidecar_src_1"); + + ReaderSupplier rs0 = ReaderSupplierFactory.open(graph0); + ReaderSupplier rs1 = ReaderSupplierFactory.open(graph1); + OnDiskGraphIndex g0 = OnDiskGraphIndex.load(rs0); + OnDiskGraphIndex g1 = OnDiskGraphIndex.load(rs1); + + // Per-source PQVectors — the non-fused sidecar input. + RandomAccessVectorValues ravv0 = new ListRandomAccessVectorValues(vecs0, dim); + RandomAccessVectorValues ravv1 = new ListRandomAccessVectorValues(vecs1, dim); + ProductQuantization pq0 = ProductQuantization.compute(ravv0, M, clusters, true, UNWEIGHTED, simdExecutor, parallelExecutor); + ProductQuantization pq1 = ProductQuantization.compute(ravv1, M, clusters, true, UNWEIGHTED, simdExecutor, parallelExecutor); + PQVectors pqv0 = (PQVectors) pq0.encodeAll(ravv0, simdExecutor); + PQVectors pqv1 = (PQVectors) pq1.encodeAll(ravv1, simdExecutor); + + // Identity remapping: source 0 -> [0, n), source 1 -> [n, 2n) + Map map0 = new HashMap<>(); + Map map1 = new HashMap<>(); + for (int i = 0; i < n; i++) { + map0.put(i, i); + map1.put(i, n + i); + } + FixedBitSet live0 = new FixedBitSet(n); live0.set(0, n); + FixedBitSet live1 = new FixedBitSet(n); live1.set(0, n); + + var compactor = new OnDiskGraphIndexCompactor( + List.of(g0, g1), + List.of(pqv0, pqv1), + List.of(live0, live1), + List.of(new OrdinalMapper.MapMapper(map0), new OrdinalMapper.MapMapper(map1)), + vsf, null); + + Path graphOut = testDirectory.resolve("sidecar_graph_out"); + Path compressedOut = testDirectory.resolve("sidecar_pq_out"); + compactor.compact(graphOut, compressedOut); + + // ---- Verify merged graph ---- + ReaderSupplier rsOut = ReaderSupplierFactory.open(graphOut); + OnDiskGraphIndex compacted = OnDiskGraphIndex.load(rsOut); + assertEquals("compacted graph node count", 2 * n, compacted.size(0)); + + var view = compacted.getView(); + VectorFloat buf = vectorTypeSupport.createFloatVector(dim); + for (int i = 0; i < n; i++) { + view.getVectorInto(i, buf, 0); + assertVecEquals(vecs0.get(i), buf, i); + view.getVectorInto(n + i, buf, 0); + assertVecEquals(vecs1.get(i), buf, n + i); + } + + // ---- Verify merged compressed sidecar ---- + try (var rsCompressed = ReaderSupplierFactory.open(compressedOut); var reader = rsCompressed.get()) { + PQVectors mergedPqv = PQVectors.load(reader); + assertEquals("merged PQVectors count", 2 * n, mergedPqv.count()); + ProductQuantization mergedPQ = mergedPqv.getCompressor(); + assertEquals("merged PQ subspaceCount", M, mergedPQ.getSubspaceCount()); + assertEquals("merged PQ clusterCount", clusters, mergedPQ.getClusterCount()); + assertEquals("merged PQ compressedVectorSize", M, mergedPQ.compressedVectorSize()); + + // Each merged code should decode to a vector close to the original (PQ is lossy + // but with these params reconstruction error stays bounded). We check that the + // re-encoded code matches the stored code — i.e. encoding is consistent under the + // retrained codebook. + VectorFloat reEncoded = vectorTypeSupport.createFloatVector(dim); + io.github.jbellis.jvector.vector.types.ByteSequence tmpCode = vectorTypeSupport.createByteSequence(M); + for (int i = 0; i < n; i++) { + mergedPQ.encodeTo(vecs0.get(i), tmpCode); + io.github.jbellis.jvector.vector.types.ByteSequence stored = mergedPqv.get(i); + for (int b = 0; b < M; b++) { + assertEquals("ord " + i + " code byte " + b, tmpCode.get(b), stored.get(b)); + } + mergedPQ.encodeTo(vecs1.get(i), tmpCode); + stored = mergedPqv.get(n + i); + for (int b = 0; b < M; b++) { + assertEquals("ord " + (n + i) + " code byte " + b, tmpCode.get(b), stored.get(b)); + } + } + } + } + + /** + * Validation: combining {@code sourceCompressed} with sources that already carry FUSED_PQ + * inline must throw, since the two are mutually exclusive ways to ship PQ codes. + */ + @Test + public void testCompactCompressedSidecarRejectsFusedPQ() throws Exception { + // Reuse the FusedPQ sources built by setup(). + ReaderSupplier rs0 = ReaderSupplierFactory.open(testDirectory.resolve("test_graph_0")); + ReaderSupplier rs1 = ReaderSupplierFactory.open(testDirectory.resolve("test_graph_1")); + OnDiskGraphIndex g0 = OnDiskGraphIndex.load(rs0); + OnDiskGraphIndex g1 = OnDiskGraphIndex.load(rs1); + + // Throwaway PQVectors just to exercise the validation; values don't matter. + var ravv = new ListRandomAccessVectorValues(allVecs.subList(0, numVectorsPerGraph), dimension); + ProductQuantization pq = ProductQuantization.compute(ravv, 8, 16, true, UNWEIGHTED, simdExecutor, parallelExecutor); + PQVectors pqv0 = (PQVectors) pq.encodeAll(ravv, simdExecutor); + PQVectors pqv1 = (PQVectors) pq.encodeAll(ravv, simdExecutor); + + Map map0 = new HashMap<>(); + Map map1 = new HashMap<>(); + for (int i = 0; i < numVectorsPerGraph; i++) { + map0.put(i, i); + map1.put(i, numVectorsPerGraph + i); + } + FixedBitSet live0 = new FixedBitSet(numVectorsPerGraph); live0.set(0, numVectorsPerGraph); + FixedBitSet live1 = new FixedBitSet(numVectorsPerGraph); live1.set(0, numVectorsPerGraph); + + try { + new OnDiskGraphIndexCompactor( + List.of(g0, g1), + List.of(pqv0, pqv1), + List.of(live0, live1), + List.of(new OrdinalMapper.MapMapper(map0), new OrdinalMapper.MapMapper(map1)), + similarityFunction, null); + org.junit.Assert.fail("expected IllegalArgumentException for FUSED_PQ + sourceCompressed"); + } catch (IllegalArgumentException expected) { + assertTrue("error message mentions FUSED_PQ", + expected.getMessage().toLowerCase().contains("fused_pq") + || expected.getMessage().toLowerCase().contains("fused pq")); + } + } + + /** + * Validation: {@code sourceCompressed.size()} must equal {@code sources.size()}. + */ + @Test + public void testCompactCompressedSidecarRejectsSizeMismatch() throws Exception { + int dim = 8; + int n = 32; // need >= clusters for k-means training + int clusters = 16; + VectorSimilarityFunction vsf = VectorSimilarityFunction.EUCLIDEAN; + + List> vecs0 = createRandomVectors(n, dim); + List> vecs1 = createRandomVectors(n, dim); + + Path graph0 = buildSimpleSourceGraph(vecs0, dim, vsf, "size_src_0"); + Path graph1 = buildSimpleSourceGraph(vecs1, dim, vsf, "size_src_1"); + + OnDiskGraphIndex g0 = OnDiskGraphIndex.load(ReaderSupplierFactory.open(graph0)); + OnDiskGraphIndex g1 = OnDiskGraphIndex.load(ReaderSupplierFactory.open(graph1)); + + RandomAccessVectorValues ravv0 = new ListRandomAccessVectorValues(vecs0, dim); + ProductQuantization pq = ProductQuantization.compute(ravv0, 4, clusters, true, UNWEIGHTED, simdExecutor, parallelExecutor); + PQVectors pqv0 = (PQVectors) pq.encodeAll(ravv0, simdExecutor); + + Map map0 = new HashMap<>(); + Map map1 = new HashMap<>(); + for (int i = 0; i < n; i++) { map0.put(i, i); map1.put(i, n + i); } + FixedBitSet live = new FixedBitSet(n); live.set(0, n); + + try { + new OnDiskGraphIndexCompactor( + List.of(g0, g1), + List.of(pqv0), // size 1 vs sources size 2 + List.of(live, live), + List.of(new OrdinalMapper.MapMapper(map0), new OrdinalMapper.MapMapper(map1)), + vsf, null); + org.junit.Assert.fail("expected IllegalArgumentException for size mismatch"); + } catch (IllegalArgumentException expected) { + assertTrue("error message mentions size", + expected.getMessage().toLowerCase().contains("size")); + } + } + + /** + * Calling the two-arg compact() without supplying {@code sourceCompressed} must fail — + * there is no source for the merged sidecar. + */ + @Test + public void testCompactTwoArgRequiresSourceCompressed() throws Exception { + int dim = 8; + int n = 8; + VectorSimilarityFunction vsf = VectorSimilarityFunction.EUCLIDEAN; + + List> vecs0 = createRandomVectors(n, dim); + List> vecs1 = createRandomVectors(n, dim); + Path graph0 = buildSimpleSourceGraph(vecs0, dim, vsf, "noarg_src_0"); + Path graph1 = buildSimpleSourceGraph(vecs1, dim, vsf, "noarg_src_1"); + + OnDiskGraphIndex g0 = OnDiskGraphIndex.load(ReaderSupplierFactory.open(graph0)); + OnDiskGraphIndex g1 = OnDiskGraphIndex.load(ReaderSupplierFactory.open(graph1)); + + Map map0 = new HashMap<>(); + Map map1 = new HashMap<>(); + for (int i = 0; i < n; i++) { map0.put(i, i); map1.put(i, n + i); } + FixedBitSet live = new FixedBitSet(n); live.set(0, n); + + // Use the legacy 5-arg constructor — sourceCompressed defaults to null. + var compactor = new OnDiskGraphIndexCompactor( + List.of(g0, g1), + List.of(live, live), + List.of(new OrdinalMapper.MapMapper(map0), new OrdinalMapper.MapMapper(map1)), + vsf, null); + + Path graphOut = testDirectory.resolve("noarg_graph_out"); + Path compressedOut = testDirectory.resolve("noarg_pq_out"); + try { + compactor.compact(graphOut, compressedOut); + org.junit.Assert.fail("expected IllegalStateException without sourceCompressed"); + } catch (IllegalStateException expected) { + assertTrue("error message mentions sourceCompressed", + expected.getMessage().toLowerCase().contains("sourcecompressed")); + } + } + + /** + * Compaction with deletions: only live nodes appear in the merged sidecar at their remapped + * ordinals, and the count matches the number of live nodes (dense merged ordinal range). + */ + @Test + public void testCompactCompressedSidecarWithDeletions() throws Exception { + int dim = 16; + int n = 16; + int M = 8; + int clusters = 16; + VectorSimilarityFunction vsf = VectorSimilarityFunction.EUCLIDEAN; + + List> vecs0 = createRandomVectors(n, dim); + List> vecs1 = createRandomVectors(n, dim); + Path graph0 = buildSimpleSourceGraph(vecs0, dim, vsf, "delsidecar_src_0"); + Path graph1 = buildSimpleSourceGraph(vecs1, dim, vsf, "delsidecar_src_1"); + + OnDiskGraphIndex g0 = OnDiskGraphIndex.load(ReaderSupplierFactory.open(graph0)); + OnDiskGraphIndex g1 = OnDiskGraphIndex.load(ReaderSupplierFactory.open(graph1)); + + RandomAccessVectorValues ravv0 = new ListRandomAccessVectorValues(vecs0, dim); + RandomAccessVectorValues ravv1 = new ListRandomAccessVectorValues(vecs1, dim); + ProductQuantization pq0 = ProductQuantization.compute(ravv0, M, clusters, true, UNWEIGHTED, simdExecutor, parallelExecutor); + ProductQuantization pq1 = ProductQuantization.compute(ravv1, M, clusters, true, UNWEIGHTED, simdExecutor, parallelExecutor); + PQVectors pqv0 = (PQVectors) pq0.encodeAll(ravv0, simdExecutor); + PQVectors pqv1 = (PQVectors) pq1.encodeAll(ravv1, simdExecutor); + + // Keep even nodes live; map them densely. + FixedBitSet live0 = new FixedBitSet(n); + FixedBitSet live1 = new FixedBitSet(n); + Map map0 = new HashMap<>(); + Map map1 = new HashMap<>(); + int next = 0; + for (int i = 0; i < n; i++) { + if (i % 2 == 0) { live0.set(i); map0.put(i, next++); } + } + int firstSourceCount = next; + for (int i = 0; i < n; i++) { + if (i % 2 == 0) { live1.set(i); map1.put(i, next++); } + } + int totalLive = next; + + var compactor = new OnDiskGraphIndexCompactor( + List.of(g0, g1), + List.of(pqv0, pqv1), + List.of(live0, live1), + List.of(new OrdinalMapper.MapMapper(map0), new OrdinalMapper.MapMapper(map1)), + vsf, null); + + Path graphOut = testDirectory.resolve("delsidecar_graph_out"); + Path compressedOut = testDirectory.resolve("delsidecar_pq_out"); + compactor.compact(graphOut, compressedOut); + + // Verify graph + OnDiskGraphIndex compacted = OnDiskGraphIndex.load(ReaderSupplierFactory.open(graphOut)); + assertEquals("compacted graph live count", totalLive, compacted.size(0)); + + // Verify sidecar: count matches dense live total; each live ordinal's code matches a + // fresh re-encoding of the corresponding raw vector under the retrained codebook. + try (var rsCompressed = ReaderSupplierFactory.open(compressedOut); var reader = rsCompressed.get()) { + PQVectors mergedPqv = PQVectors.load(reader); + assertEquals("merged sidecar count", totalLive, mergedPqv.count()); + + ProductQuantization mergedPQ = mergedPqv.getCompressor(); + io.github.jbellis.jvector.vector.types.ByteSequence tmp = vectorTypeSupport.createByteSequence(M); + for (int i = 0; i < n; i++) { + if (i % 2 != 0) continue; + mergedPQ.encodeTo(vecs0.get(i), tmp); + io.github.jbellis.jvector.vector.types.ByteSequence stored = mergedPqv.get(map0.get(i)); + for (int b = 0; b < M; b++) { + assertEquals("source 0 ord " + i + " byte " + b, tmp.get(b), stored.get(b)); + } + mergedPQ.encodeTo(vecs1.get(i), tmp); + stored = mergedPqv.get(map1.get(i)); + for (int b = 0; b < M; b++) { + assertEquals("source 1 ord " + i + " byte " + b, tmp.get(b), stored.get(b)); + } + } + // sanity check on dense layout + assertEquals("first-source live count", firstSourceCount, n / 2); + } + } } From b8c448dd44b21afc8d620ae70fd65aa5df99b93d Mon Sep 17 00:00:00 2001 From: dian-lun-lin Date: Wed, 20 May 2026 11:53:25 -0700 Subject: [PATCH 16/18] Compaction: add in-place L0 neighbor refinement pass after merge After compactGraphImpl writes the merged graph, run a second pass that mirrors GraphIndexBuilder.cleanup()'s improveConnections: for each L1 node (or all L0 nodes when no hierarchy), descend the upper layers, beam-search L0, union with existing edges, run Vamana diversity selection, and rewrite the L0 record (and inline fused-PQ codes) in place. Fused-PQ outputs memcpy codes from the pre-encode cache by new ordinal; onAfterClose is deferred to the public compact() entry points so the cache stays mapped during refinement. QuantizationCompactionStrategy exposes the code cache via getCodeCache()/getCacheCodeSize() (FusedCompactionStrategy overrides), plus releaseSources() so the compactor can drop source-graph refs (and the strategy's CompactionContext hold) between compactGraphImpl and refineCompactedGraph. OnDiskGraphIndex.offsetFor()/neighborsOffsetFor() become package-private so the compactor can address L0 records for in-place rewrites. Refinement reads via SimpleReader (no mmap). SEARCH_TOP_K_MULTIPLIER=4 widens the refinement candidate pool that the diversity prune selects baseDegree from -- written degree unchanged. --- .../graph/disk/FusedCompactionStrategy.java | 22 +- .../jvector/graph/disk/OnDiskGraphIndex.java | 6 +- .../graph/disk/OnDiskGraphIndexCompactor.java | 422 +++++++++++++++++- .../disk/QuantizationCompactionStrategy.java | 29 ++ 4 files changed, 465 insertions(+), 14 deletions(-) diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/FusedCompactionStrategy.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/FusedCompactionStrategy.java index eb594b058..50715f0f5 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/FusedCompactionStrategy.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/FusedCompactionStrategy.java @@ -55,7 +55,9 @@ public final class FusedCompactionStrategy extends QuantizationCompactionStrateg private static final Logger log = LoggerFactory.getLogger(FusedCompactionStrategy.class); private static final VectorTypeSupport vectorTypeSupport = VectorizationProvider.getInstance().getVectorTypeSupport(); - private final CompactionContext ctx; + // Non-final: nulled by releaseSources() after compactGraphImpl so the source graphs reachable + // through ctx.sources can be GC'd before refinement. onAfterClose must not touch ctx. + private CompactionContext ctx; private final FusedFeature sourceFusedFeature; private final VectorCompressorRetrainer retrainer; @@ -88,11 +90,29 @@ public VectorCompressor compressor() { return retrainedCompressor; } + @Override + public MappedByteBuffer getCodeCache() { + return codeCache; + } + + @Override + public int getCacheCodeSize() { + return cacheCodeSize; + } + @Override public boolean writesCodesInline() { return true; } + @Override + public void releaseSources() { + // ctx is only needed during onAfterHeader/onAfterLevels (pre-encode + entry-node code), + // which run inside compactGraphImpl. onAfterClose uses only cacheTruncateAt/codeCache. + // Safe to drop here so ctx.sources' in-heap layers/features are reclaimable before refine. + ctx = null; + } + /** * Returns the {@link FusedFeature} the compactor should put in the merged output graph. * Constructed from the source's fused feature via {@link FusedFeature#withCompressor}, diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndex.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndex.java index 9ab122392..3fb69d967 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndex.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndex.java @@ -491,7 +491,8 @@ public RandomAccessVectorValues copy() { throw new UnsupportedOperationException(); // need to copy reader } - private long offsetFor(int node, FeatureId featureId) { + // package-private: OnDiskGraphIndexCompactor uses this for in-place neighbor refinement + long offsetFor(int node, FeatureId featureId) { Feature feature = features.get(featureId); // Separated features are just global offset + node offset @@ -506,7 +507,8 @@ private long offsetFor(int node, FeatureId featureId) { return baseNodeOffsetFor(node) + skipInNode; } - private long neighborsOffsetFor(int level, int node) { + // package-private: OnDiskGraphIndexCompactor uses this for in-place neighbor refinement + long neighborsOffsetFor(int level, int node) { assert level == 0; // higher layers are in memory // skip node ID + inline features diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java index 785dc8f9d..b8fc7f210 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexCompactor.java @@ -18,6 +18,7 @@ import java.io.FileNotFoundException; import java.io.IOException; +import java.io.UncheckedIOException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.file.Path; @@ -36,8 +37,10 @@ import io.github.jbellis.jvector.vector.VectorSimilarityFunction; import io.github.jbellis.jvector.graph.similarity.ScoreFunction; import io.github.jbellis.jvector.vector.types.VectorFloat; +import io.github.jbellis.jvector.disk.SimpleReader; import io.github.jbellis.jvector.quantization.CompressedVectors; import io.github.jbellis.jvector.quantization.ProductQuantization; +import io.github.jbellis.jvector.quantization.VectorCompressor; import io.github.jbellis.jvector.vector.VectorizationProvider; import io.github.jbellis.jvector.vector.types.VectorTypeSupport; import io.github.jbellis.jvector.vector.types.ByteSequence; @@ -57,16 +60,21 @@ public final class OnDiskGraphIndexCompactor implements Accountable { private static final int TARGET_BATCHES_PER_SOURCE = 40; private static final int TARGET_NODES_PER_BATCH = 128; private static final int MIN_SEARCH_TOP_K = 2; - private static final int SEARCH_TOP_K_MULTIPLIER = 2; + private static final int SEARCH_TOP_K_MULTIPLIER = 4; - private final List sources; + + // Non-final so releaseSourcesBeforeRefine() can drop the strong reference once compactGraphImpl + // has consumed them, letting the source graphs' in-heap upper-layer adjacency + feature buffers + // be reclaimed before refineCompactedGraph loads a second full graph. Read only during + // compaction (validation, compactGraphImpl, cost estimation) — never after refinement starts. + private List sources; // Optional non-fused compressed sidecar, parallel to `sources`. Null when sources carry their // quantization inline (FUSED_PQ) or have none. When non-null, compact(Path, Path) retrains the // compressor on merged vectors and writes a single merged CompressedVectors to compressedPath. private final List sourceCompressed; - private final List liveNodes; + private List liveNodes; private final List numLiveNodesPerSource; - private final List remappers; + private List remappers; private final List maxDegrees; private final int dimension; @@ -278,10 +286,15 @@ private void validateFeatures(List sources) { * specified path, handling PQ retraining if needed, and writing header, all layers, and footer. */ public void compact(Path outputPath) throws FileNotFoundException { + QuantizationCompactionStrategy strategy = detectInlineStrategy(); try { - QuantizationCompactionStrategy strategy = detectInlineStrategy(); compactGraphImpl(outputPath, strategy); + releaseSourcesBeforeRefine(strategy); + refineCompactedGraph(outputPath, strategy); } finally { + // Delayed until after refinement so refineCompactedGraph can read from the pre-encoded + // code cache appended past the projected EOF; onAfterClose unmaps it and truncates. + strategy.onAfterClose(outputPath); if (ownsExecutor) executor.shutdown(); } } @@ -302,21 +315,41 @@ public void compact(Path graphPath, Path compressedPath) throws FileNotFoundExce } Objects.requireNonNull(compressedPath, "compressedPath"); + // Graph compaction proceeds without fused-PQ retrain (validateCompressed forbids + // FUSED_PQ when sourceCompressed is set), then the sidecar is written below. + QuantizationCompactionStrategy inlineStrategy = detectInlineStrategy(); + QuantizationCompactionStrategy sidecarStrategy = detectSidecarStrategy(); try { - // Graph compaction proceeds without fused-PQ retrain (validateCompressed forbids - // FUSED_PQ when sourceCompressed is set), then the sidecar is written below. - QuantizationCompactionStrategy inlineStrategy = detectInlineStrategy(); - QuantizationCompactionStrategy sidecarStrategy = detectSidecarStrategy(); sidecarStrategy.retrain(similarityFunction); compactGraphImpl(graphPath, inlineStrategy); + refineCompactedGraph(graphPath, inlineStrategy); sidecarStrategy.writeSidecar(compressedPath); } catch (IOException e) { throw new RuntimeException("Sidecar compaction failed", e); } finally { + inlineStrategy.onAfterClose(graphPath); if (ownsExecutor) executor.shutdown(); } } + /** + * For compaction use. Drops the compactor's strong references to the source graphs and their + * per-source live-node / remapper sidecars, and tells the strategy to release its + * {@link CompactionContext} hold on the same. Called between {@code compactGraphImpl} and + * {@code refineCompactedGraph} so the source graphs' in-heap upper-layer adjacency and feature + * buffers become GC-eligible before refinement loads a second full graph — the peak that was + * OOM-ing on memory-tight hosts. The underlying {@code ReaderSupplier}s are still owned and + * closed by the caller (per {@link OnDiskGraphIndex#close()}'s contract), so we only drop + * references, never close. Not used by the sidecar {@code compact(graphPath, compressedPath)} + * path: {@code SidecarCompactionStrategy.writeSidecar} re-reads source vectors after refinement. + */ + private void releaseSourcesBeforeRefine(QuantizationCompactionStrategy strategy) { + strategy.releaseSources(); + sources = null; + liveNodes = null; + remappers = null; + } + /** * Pick the inline-codes strategy by asking the source's fused feature (if any) for its * compaction strategy. Returns {@link QuantizationCompactionStrategy#NONE} when no fused feature is @@ -391,8 +424,375 @@ private void compactGraphImpl(Path outputPath, QuantizationCompactionStrategy st log.info("Compaction complete: {}", outputPath); } catch (IOException | ExecutionException | InterruptedException e) { throw new RuntimeException(e); - } finally { - strategy.onAfterClose(outputPath); + } + // strategy.onAfterClose is deferred to the public compact() entry points so refinement + // can read from the still-mapped pre-encode cache section past the projected EOF. + } + + /** + * Second pass over the just-written compacted graph. Mirrors + * {@link io.github.jbellis.jvector.graph.GraphIndexBuilder}'s {@code cleanup()} refinement + * step: when the merged graph has a hierarchy, iterates only level-1 nodes (which are also + * in L0); for each node, descends greedily through upper layers and beam-searches level 0 + * carrying entry points layer-to-layer, then rewrites the L0 neighbor list (and the inline + * per-neighbor PQ codes for fused-PQ outputs) in place. When the merged graph has no + * hierarchy, falls back to iterating all live L0 nodes. + *

+ * The refinement search uses approximate PQ scoring with an exact reranker when fused-PQ is + * available (matching the cross-source path in {@code compactLevels}); otherwise it falls + * back to exact-only scoring backed by inline vectors. + *

+ * For fused-PQ outputs the per-neighbor code write is a memcpy from the + * {@link QuantizationCompactionStrategy#getCodeCache() pre-encode cache} keyed by new + * ordinal — no per-neighbor {@code encodeTo} call. The cache lives in the same file past + * the projected EOF and is truncated away by {@code onAfterClose} once refinement returns. + *

+ * Only L0 records are written. Upper-layer neighbor lists live in an in-memory map after + * load and have no addressable file offset, so they're left as written by compactLevels. + */ + private void refineCompactedGraph(Path outputPath, QuantizationCompactionStrategy strategy) { + log.info("Refining compacted graph: {}", outputPath); + long t0 = System.nanoTime(); + + final int baseDegree = maxDegrees.get(0); + final boolean hasFusedPQ = strategy.writesCodesInline(); + @SuppressWarnings("unchecked") + final VectorCompressor> compressor = + hasFusedPQ ? (VectorCompressor>) (VectorCompressor) strategy.compressor() : null; + final int pqCodeSize = hasFusedPQ ? compressor.compressedVectorSize() : 0; + + final int searchTopK = Math.max(MIN_SEARCH_TOP_K, + baseDegree * SEARCH_TOP_K_MULTIPLIER); + final int beamWidth = Math.max(baseDegree, searchTopK) * BEAM_WIDTH_MULTIPLIER; + + // Code cache may or may not be present; capture once so refineOneNode can take the fast path. + // The cache is shared across threads; refineOneNode duplicates per call (cheap; no per-thread + // state to track and the duplicates are tiny GC-friendly ByteBuffer wrappers). + final java.nio.MappedByteBuffer codeCache = hasFusedPQ ? strategy.getCodeCache() : null; + final int cacheCodeSize = hasFusedPQ ? strategy.getCacheCodeSize() : 0; + + try (var supplier = new SimpleReader.Supplier(outputPath); + FileChannel fc = FileChannel.open(outputPath, StandardOpenOption.WRITE, StandardOpenOption.READ)) { + + // useFooter=false because the file's logical EOF (where the v6 footer trailer sits) is + // before the still-attached pre-encode cache section. loadFromFooter() would seek to + // the actual file length and read garbage as the magic. + OnDiskGraphIndex mergedGraph = OnDiskGraphIndex.load(supplier, 0, false); + + // Pick the iteration set: when there's a hierarchy, refine only L1 nodes (each also + // lives in L0, so their L0 record is what we rewrite). Mirrors GraphIndexBuilder's + // cleanup() which gates improveConnections() on `graph.getMaxLevel() > 0` and iterates + // `nodeStream(1)`. When there's no hierarchy, fall back to all L0 nodes. + int[] liveOrdinals; + int iterationLevel = mergedGraph.getMaxLevel() > 0 ? 1 : 0; + try (var collectView = mergedGraph.getView()) { + NodesIterator it = mergedGraph.getNodes(iterationLevel); + liveOrdinals = new int[it.size()]; + int n = 0; + while (it.hasNext()) liveOrdinals[n++] = it.next(); + } + + final ThreadLocal tls = ThreadLocal.withInitial(() -> + new RefineScratch(mergedGraph, baseDegree, dimension, searchTopK, pqCodeSize)); + + ExecutorCompletionService ecs = new ExecutorCompletionService<>(executor); + + int total = liveOrdinals.length; + int targetBatches = Math.max(taskWindowSize * 4, 16); + int batchSize = Math.max(1, (total + targetBatches - 1) / targetBatches); + + final int[] ords = liveOrdinals; + final boolean fpq = hasFusedPQ; + final int codeSize = pqCodeSize; + final VectorCompressor> cmp = compressor; + final int bw = beamWidth; + final java.nio.MappedByteBuffer cache = codeCache; + final int cacheSz = cacheCodeSize; + final OnDiskGraphIndex graphRef = mergedGraph; + + log.info("Refining {} live nodes at level {} (hierarchy maxLevel={}, fusedPQ={}, codeCache={})", + total, iterationLevel, mergedGraph.getMaxLevel(), fpq, cache != null); + + int submitted = 0; + for (int start = 0; start < total; start += batchSize) { + final int s = start; + final int e = Math.min(start + batchSize, total); + ecs.submit(() -> { + RefineScratch scratch = tls.get(); + for (int i = s; i < e; i++) { + int node = ords[i]; + refineOneNode(node, scratch, fc, baseDegree, fpq, codeSize, cmp, bw, + graphRef, cache, cacheSz); + } + return e - s; + }); + submitted++; + } + + int completed = 0; + int nodesDone = 0; + int progressStep = Math.max(1, total / 10); + int nextProgress = progressStep; + while (completed < submitted) { + nodesDone += ecs.take().get(); + completed++; + if (nodesDone >= nextProgress) { + log.info("Refinement progress: {}/{} nodes", nodesDone, total); + nextProgress += progressStep; + } + } + + // Per-thread scratches live in worker-thread ThreadLocals; closing the supplier in + // try-with-resources tears down the underlying mapping, so any later access would + // fail anyway. The references will be GC'd when the worker threads die. + } catch (IOException | InterruptedException | ExecutionException e) { + throw new RuntimeException("Refinement failed", e); + } + + log.info("Refinement complete in {} ms", (System.nanoTime() - t0) / 1_000_000); + } + + /** + * Refines a single node by mirroring {@code GraphIndexBuilder.improveConnections}: + * descend greedily through upper layers carrying entry points layer-to-layer, then beam + * search at L0. Diversity selection + in-place L0 record rewrite happen at the end. + *

+ * The {@code SearchScoreProvider} uses approximate PQ scoring with an exact reranker when + * fused-PQ is available; otherwise exact-only via the inline-vector reranker. Diversity + * always runs over exact scores (so we rescore approximate results after the L0 beam). + */ + private void refineOneNode(int node, + RefineScratch scratch, + FileChannel fc, + int baseDegree, + boolean hasFusedPQ, + int pqCodeSize, + VectorCompressor> compressor, + int beamWidth, + OnDiskGraphIndex mergedGraph, + java.nio.MappedByteBuffer codeCache, + int cacheCodeSize) { + OnDiskGraphIndex.View view = scratch.view; + view.getVectorInto(node, scratch.queryVec, 0); + + // Build score provider for this query. Reranker reads the candidate's inline FP vector + // (via view.getVectorInto into a worker-private tmp) and computes exact similarity. + ScoreFunction.ExactScoreFunction reranker = node2 -> { + view.getVectorInto(node2, scratch.tmpVec, 0); + return similarityFunction.compare(scratch.queryVec, scratch.tmpVec); + }; + SearchScoreProvider ssp; + if (hasFusedPQ) { + FusedPQ fpq = (FusedPQ) mergedGraph.getFeatures().get(FeatureId.FUSED_PQ); + var asf = fpq.approximateScoreFunctionFor(scratch.queryVec, similarityFunction, view, reranker); + ssp = new DefaultSearchScoreProvider(asf, reranker); + } else { + ssp = new DefaultSearchScoreProvider(reranker); + } + + Bits excludeSelf = idx -> idx != node; + + // Per-layer descent. Mirrors GraphSearcher.internalSearch: greedy single-best through + // each upper layer, then a beam search at layer 0. Entry points carry forward via + // setEntryPointsFromPreviousLayer so the L0 beam starts from the best-known region + // rather than the global entry node — much cheaper than the previous full search(). + GraphSearcher gs = scratch.searcher; + var entry = view.entryNode(); + gs.initializeInternal(ssp, entry, excludeSelf); + for (int lvl = entry.level; lvl > 0; lvl--) { + gs.searchOneLayer(ssp, 1, 0f, lvl, excludeSelf); + gs.setEntryPointsFromPreviousLayer(); + } + gs.searchOneLayer(ssp, beamWidth, 0f, 0, excludeSelf); + + // Collect candidates. Start with the node's existing L0 edges (rescored exact) so + // refinement never drops an edge that the search happened to miss — matches the + // existing+search union pattern from GraphIndexBuilder.insertDiverse. + scratch.candSize = 0; + var existing = view.getNeighborsIterator(0, node); + while (existing.hasNext()) { + int nb = existing.nextInt(); + if (nb == node) continue; + view.getVectorInto(nb, scratch.tmpVec, 0); + scratch.candNode[scratch.candSize] = nb; + scratch.candScore[scratch.candSize] = similarityFunction.compare(scratch.queryVec, scratch.tmpVec); + scratch.candSize++; + } + // Pull search results from approximateResults. When fused-PQ is on the scores there are + // approximate; rescore exact for correct diversity comparison against existing edges. + final boolean rescore = hasFusedPQ; + gs.approximateResults.foreach((nb, approxScore) -> { + if (nb == node) return; + for (int k = 0; k < scratch.candSize; k++) { + if (scratch.candNode[k] == nb) return; // de-dupe against existing edges + } + if (scratch.candSize >= scratch.candNode.length) return; + float s; + if (rescore) { + view.getVectorInto(nb, scratch.tmpVec, 0); + s = similarityFunction.compare(scratch.queryVec, scratch.tmpVec); + } else { + s = approxScore; + } + scratch.candNode[scratch.candSize] = nb; + scratch.candScore[scratch.candSize] = s; + scratch.candSize++; + }); + + if (scratch.candSize == 0) { + // No live neighbors found — leave the existing record alone. + return; + } + + // Sort candidates by descending score. + int[] order = scratch.order; + for (int k = 0; k < scratch.candSize; k++) order[k] = k; + sortOrderByScoreDesc(order, scratch.candScore, scratch.candSize); + + // Vamana diversity selection with progressively-relaxed alpha. + int selectedSize = retainDiverseSingleSource( + view, order, scratch.candNode, scratch.candScore, scratch.candSize, + baseDegree, scratch.selectedNodes, scratch.selectedVecs, scratch.tmpVec); + + // Build the trailing-section bytes (PQ codes block — if any — followed by count + neighbors). + ByteBuffer rec = scratch.recordBuffer; + rec.clear(); + + long writeOffset; + if (hasFusedPQ) { + // PQ codes block sits between the inline vector and the neighbor count. + writeOffset = view.offsetFor(node, FeatureId.FUSED_PQ); + if (codeCache != null) { + // Memcpy from the pre-encoded cache (indexed by new ordinal). Avoids one FP + // vector read AND one PQ encode per selected neighbor. duplicate() gives this + // call its own position cursor without racing other workers. + ByteBuffer cacheView = codeCache.duplicate(); + byte[] codeBuf = scratch.pqCodeBytes; + for (int k = 0; k < selectedSize; k++) { + int newOrd = scratch.selectedNodes[k]; + cacheView.position(newOrd * cacheCodeSize); + cacheView.get(codeBuf, 0, cacheCodeSize); + rec.put(codeBuf, 0, cacheCodeSize); + } + } else { + // Fallback: re-encode from the selected neighbor's inline vector. Same as before + // the cache-reuse optimization. Used when the cache wasn't built (graph too large + // for a single mapping, or pre-encode failure). + ByteSequence codeOut = scratch.pqCode; + for (int k = 0; k < selectedSize; k++) { + view.getVectorInto(scratch.selectedNodes[k], scratch.tmpVec, 0); + codeOut.zero(); + compressor.encodeTo(scratch.tmpVec, codeOut); + for (int b = 0; b < pqCodeSize; b++) { + rec.put(codeOut.get(b)); + } + } + } + // Pad remaining slots with zero codes (matches CompactWriter's zeroPQ behavior). + int padSlots = baseDegree - selectedSize; + for (int s = 0; s < padSlots; s++) { + for (int b = 0; b < pqCodeSize; b++) rec.put((byte) 0); + } + } else { + writeOffset = view.neighborsOffsetFor(0, node); + } + + // Neighbor count + ordinals (-1 padding for unused slots). + rec.putInt(selectedSize); + for (int k = 0; k < selectedSize; k++) rec.putInt(scratch.selectedNodes[k]); + for (int k = selectedSize; k < baseDegree; k++) rec.putInt(-1); + + rec.flip(); + try { + while (rec.hasRemaining()) { + int n = fc.write(rec, writeOffset); + writeOffset += n; + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + /** + * Single-source Vamana diversity selection. Mirrors {@link CompactVamanaDiversityProvider} + * but operates on one merged graph rather than per-source views, so candidates are bare + * (node, score) pairs. + * + * @return the number of selected neighbors written into {@code selectedNodes}. + */ + private int retainDiverseSingleSource(OnDiskGraphIndex.View view, + int[] order, int[] candNode, float[] candScore, int candSize, + int maxDegree, int[] selectedNodes, + VectorFloat[] selectedVecs, VectorFloat tmp) { + if (candSize == 0) return 0; + int nSelected = 0; + float currentAlpha = 1.0f; + final float alpha = 1.2f; + while (currentAlpha <= alpha + 1E-6 && nSelected < maxDegree) { + for (int i = 0; i < candSize && nSelected < maxDegree; i++) { + int ci = order[i]; + int cNode = candNode[ci]; + float cScore = candScore[ci]; + + view.getVectorInto(cNode, tmp, 0); + + boolean diverse = true; + for (int j = 0; j < nSelected; j++) { + if (selectedNodes[j] == cNode) { diverse = false; break; } + if (similarityFunction.compare(tmp, selectedVecs[j]) > cScore * currentAlpha) { + diverse = false; + break; + } + } + if (diverse) { + selectedNodes[nSelected] = cNode; + selectedVecs[nSelected].copyFrom(tmp, 0, 0, tmp.length()); + nSelected++; + } + } + currentAlpha += DIVERSITY_ALPHA_STEP; + } + return nSelected; + } + + /** Per-thread scratch space for refinement. One per worker thread, populated lazily via ThreadLocal. */ + private static final class RefineScratch { + final OnDiskGraphIndex.View view; + final GraphSearcher searcher; + final VectorFloat queryVec; + final VectorFloat tmpVec; + final int[] candNode; + final float[] candScore; + final int[] order; + int candSize; + final int[] selectedNodes; + final VectorFloat[] selectedVecs; + final ByteSequence pqCode; + // Heap byte buffer for memcpy from the precomputed code cache into the record buffer. + final byte[] pqCodeBytes; + final ByteBuffer recordBuffer; + + RefineScratch(OnDiskGraphIndex mergedGraph, int baseDegree, int dimension, int searchTopK, int pqCodeSize) { + this.view = mergedGraph.getView(); + this.searcher = new GraphSearcher(mergedGraph); + this.searcher.usePruning(false); + this.queryVec = vectorTypeSupport.createFloatVector(dimension); + this.tmpVec = vectorTypeSupport.createFloatVector(dimension); + // Candidates = existing neighbors (up to baseDegree) ∪ search results (up to searchTopK). + int cap = searchTopK + baseDegree + 16; + this.candNode = new int[cap]; + this.candScore = new float[cap]; + this.order = new int[cap]; + this.selectedNodes = new int[baseDegree]; + this.selectedVecs = new VectorFloat[baseDegree]; + for (int i = 0; i < baseDegree; i++) { + this.selectedVecs[i] = vectorTypeSupport.createFloatVector(dimension); + } + this.pqCode = pqCodeSize > 0 ? vectorTypeSupport.createByteSequence(pqCodeSize) : null; + this.pqCodeBytes = pqCodeSize > 0 ? new byte[pqCodeSize] : null; + // Trailing section to rewrite: optional PQ codes block + count + neighbor ids. + int recordBytes = (pqCodeSize > 0 ? baseDegree * pqCodeSize : 0) + Integer.BYTES + baseDegree * Integer.BYTES; + this.recordBuffer = ByteBuffer.allocate(recordBytes).order(java.nio.ByteOrder.BIG_ENDIAN); } } diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/QuantizationCompactionStrategy.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/QuantizationCompactionStrategy.java index 8eaf0a3a1..5d9c7311c 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/QuantizationCompactionStrategy.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/QuantizationCompactionStrategy.java @@ -22,6 +22,7 @@ import io.github.jbellis.jvector.vector.VectorSimilarityFunction; import java.io.IOException; +import java.nio.MappedByteBuffer; import java.nio.file.Path; import java.util.List; @@ -145,6 +146,34 @@ public FusedFeature outputFusedFeature(int maxDegree) { return null; } + /** + * For compaction use. Returns the precomputed code cache built by {@link #onAfterHeader}, + * indexed by new ordinal so refinement can memcpy neighbor codes instead of re-encoding them. + * Returns {@code null} when no cache is held (non-fused strategy, NONE, or graph too large for + * a single mapping). The returned buffer is shared; callers must {@code .duplicate()} per + * thread before using. + */ + public MappedByteBuffer getCodeCache() { + return null; + } + + /** For compaction use. Bytes per code in {@link #getCodeCache()}, or {@code 0} when no cache. */ + public int getCacheCodeSize() { + return 0; + } + + /** + * For compaction use. Drops the strategy's hold on the {@link CompactionContext} (and thus the + * source graphs) once {@code compactGraphImpl} no longer needs it, so the source graphs become + * GC-eligible before the refinement pass loads a second full graph. Called only from the + * non-sidecar {@code compact(Path)} path. No-op by default; strategies that retain a context + * override. Must not be called before {@code onAfterHeader}/{@code onAfterLevels} have run, and + * implementations must keep {@code onAfterClose} working without the context. + */ + public void releaseSources() { + // no-op + } + /** * Convenience: returns {@link #compressor()} cast to {@link ProductQuantization}, or * {@code null} if no compressor is held. Kept for backward compat with code paths that still From 780c0eb442f2bc67bfe402df1207dabe0253afb3 Mon Sep 17 00:00:00 2001 From: dian-lun-lin Date: Fri, 15 May 2026 21:21:20 -0700 Subject: [PATCH 17/18] Bench: load recall data for COMPACT workload when measureRecall CompactorBenchmark.run() calls runRecall() whenever measureRecall is set, but queryVectors/groundTruth were only loaded inside the needsBaseVectors block. WorkloadMode.COMPACT has needsBaseVectors=false, so that block was skipped and the else forced queryVectors=null even though needsRecallData was true -> runRecall NPE'd on queryVectors.size(). In the skip-base-vectors else branch, when needsRecallData is true, load the dataset's queryVectors/groundTruth/similarityFunction (base vectors still skipped). Covers the FUSEDPQ recall path, which does not use ravv. The non-FUSEDPQ exact path still needs ravv (base vectors), which COMPACT mode deliberately skips -- that combination remains unsupported. --- .../jvector/bench/CompactorBenchmark.java | 36 ++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.java b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.java index ffad5ffd0..30599b0ac 100644 --- a/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.java +++ b/benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/CompactorBenchmark.java @@ -424,23 +424,35 @@ public void setup() throws Exception { datasetNames, ravv.size(), datasetPortion, dimension, similarityFunction, workloadMode, measureRecall); } } else { - ds = null; - queryVectors = null; - groundTruth = null; ravv = null; baseVectors = null; dimension = -1; - var datasetInfo = DataSets.loadDataSet(datasetNames); - similarityFunction = datasetInfo - .flatMap(DataSetInfo::similarityFunction) - .orElseGet(() -> { - log.warn("Could not determine similarity function for dataset '{}'; defaulting to COSINE", datasetNames); - return VectorSimilarityFunction.COSINE; - }); + if (needsRecallData) { + ds = DataSets.loadDataSet(datasetNames) + .orElseThrow(() -> new RuntimeException("Dataset not found: " + datasetNames)) + .getDataSet(); + queryVectors = ds.getQueryVectors(); + groundTruth = ds.getGroundTruth(); + similarityFunction = ds.getSimilarityFunction(); + log.info("Dataset {} loaded for {} mode with recall data (base vectors skipped). Query vectors: {}, Similarity: {}, Live nodes rate: {}", + datasetNames, workloadMode, queryVectors.size(), similarityFunction, liveNodesRate); + } else { + ds = null; + queryVectors = null; + groundTruth = null; - log.info("Skipping dataset load for {} mode. similarityFunction: {}, Live nodes rate: {}", - workloadMode, similarityFunction, liveNodesRate); + var datasetInfo = DataSets.loadDataSet(datasetNames); + similarityFunction = datasetInfo + .flatMap(DataSetInfo::similarityFunction) + .orElseGet(() -> { + log.warn("Could not determine similarity function for dataset '{}'; defaulting to COSINE", datasetNames); + return VectorSimilarityFunction.COSINE; + }); + + log.info("Skipping dataset load for {} mode. similarityFunction: {}, Live nodes rate: {}", + workloadMode, similarityFunction, liveNodesRate); + } } // Resolve storagePaths + partitionsDir From a5bba85b06264f25e91cdf9d83bf9e904d72e5f4 Mon Sep 17 00:00:00 2001 From: dian-lun-lin Date: Wed, 27 May 2026 16:11:17 -0700 Subject: [PATCH 18/18] Compaction: unmap fused-PQ code cache before truncate (Windows fix) --- .../graph/disk/FusedCompactionStrategy.java | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/FusedCompactionStrategy.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/FusedCompactionStrategy.java index 50715f0f5..f759bc702 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/FusedCompactionStrategy.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/FusedCompactionStrategy.java @@ -25,8 +25,10 @@ import io.github.jbellis.jvector.vector.types.VectorTypeSupport; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import sun.misc.Unsafe; import java.io.IOException; +import java.lang.reflect.Field; import java.nio.ByteBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; @@ -54,6 +56,18 @@ public final class FusedCompactionStrategy extends QuantizationCompactionStrategy { private static final Logger log = LoggerFactory.getLogger(FusedCompactionStrategy.class); private static final VectorTypeSupport vectorTypeSupport = VectorizationProvider.getInstance().getVectorTypeSupport(); + private static final Unsafe UNSAFE = getUnsafe(); + + private static Unsafe getUnsafe() { + try { + Field f = Unsafe.class.getDeclaredField("theUnsafe"); + f.setAccessible(true); + return (Unsafe) f.get(null); + } catch (Exception e) { + log.warn("FusedCompactionStrategy can't acquire needed Unsafe access; code-cache will not be explicitly unmapped"); + return null; + } + } // Non-final: nulled by releaseSources() after compactGraphImpl so the source graphs reachable // through ctx.sources can be GC'd before refinement. onAfterClose must not touch ctx. @@ -163,14 +177,20 @@ public void onAfterLevels(CompactWriter writer, int[] entryNodeSource, List 0) { + if (codeCache != null && UNSAFE != null) { + try { + UNSAFE.invokeCleaner(codeCache); + } catch (IllegalArgumentException ignored) { + // duplicated/indirect buffer; not cleanable + } + } codeCache = null; try (FileChannel fc = FileChannel.open(graphPath, StandardOpenOption.WRITE)) { if (fc.size() > cacheTruncateAt) { fc.truncate(cacheTruncateAt); } } catch (IOException e) { - log.warn("Failed to truncate code-cache section from output file {}: {}", - graphPath, e.getMessage()); + throw new RuntimeException("Failed to truncate code-cache section from output file " + graphPath, e); } cacheTruncateAt = 0; }