Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
7c6ccd9
Add on-disk graph index compaction algorithm
dian-lun-lin Apr 14, 2026
52e7217
Add compaction unit tests
dian-lun-lin Apr 14, 2026
475ee06
Add reporting and storage infrastructure for CompactorBenchmark
dian-lun-lin Apr 14, 2026
ce40c75
Add CompactorBenchmark and tooling
dian-lun-lin Apr 14, 2026
c75256a
Update build config and project metadata for compaction
dian-lun-lin Apr 14, 2026
415f907
Fix JMH jar selection in run-compaction.yml
dian-lun-lin Apr 15, 2026
224a709
Fix CompactorBenchmark invocation in run-compaction.yml
dian-lun-lin Apr 16, 2026
191a40d
Address PR review feedback
dian-lun-lin Apr 17, 2026
06fff17
Fix benchmark invocation in docs and default dataset
dian-lun-lin Apr 20, 2026
6178afa
Fix jar selection: use fixed output name compactor-benchmark.jar
dian-lun-lin Apr 20, 2026
0ab1dea
Refactor workload modes and fix build-from-scratch timing
dian-lun-lin Apr 22, 2026
3127043
Add TIERED_10_90 and TIERED_1_99 split distributions
dian-lun-lin Apr 23, 2026
632bc76
fix for bug when fused pq is used with no hierarchy (#664)
MarkWolters May 7, 2026
6e97fc5
fix for hierarchy issue
MarkWolters May 12, 2026
e1eb1c0
Compaction: introduce QuantizationCompactionStrategy, accept non-fuse…
dian-lun-lin May 20, 2026
b8c448d
Compaction: add in-place L0 neighbor refinement pass after merge
dian-lun-lin May 20, 2026
780c0eb
Bench: load recall data for COMPACT workload when measureRecall
dian-lun-lin May 16, 2026
a5bba85
Compaction: unmap fused-PQ code cache before truncate (Windows fix)
dian-lun-lin May 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 117 additions & 0 deletions .github/workflows/run-compaction.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
name: Run Compaction Bench

on:
workflow_dispatch:
inputs:
dataset:
description: 'Dataset name passed to CompactorBenchmark (-p datasetNames)'
required: false
default: 'ada002-100k'
branches:
description: 'Space-separated list of branches to benchmark'
required: false
default: 'main'
pull_request:
types: [opened, synchronize, ready_for_review]
branches:
- main
paths:
- '**/src/main/java/**'
- 'pom.xml'
- '**/pom.xml'

jobs:
# Job to generate the matrix configuration
generate-matrix:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: Generate matrix
id: set-matrix
run: |
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
BRANCHES='["main", "${{ github.head_ref }}"]'
elif [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.branches }}" ]]; then
BRANCHES_INPUT="${{ github.event.inputs.branches }}"
BRANCHES="["
for branch in $BRANCHES_INPUT; do
if [[ "$BRANCHES" != "[" ]]; then
BRANCHES="$BRANCHES, "
fi
BRANCHES="$BRANCHES\"$branch\""
done
BRANCHES="$BRANCHES]"
else
BRANCHES='["main"]'
fi

echo "matrix={\"jdk\":[24],\"isa\":[\"isa-avx512f\"],\"branch\":$BRANCHES}" >> $GITHUB_OUTPUT

test-compaction:
needs: generate-matrix
strategy:
matrix: ${{ fromJSON(needs.generate-matrix.outputs.matrix) }}
runs-on: ${{ matrix.isa }}
steps:
- name: Set up GCC
run: sudo apt install -y gcc
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.jdk }}
uses: actions/setup-java@v3
with:
java-version: ${{ matrix.jdk }}
distribution: temurin
cache: maven

- name: Checkout branch
uses: actions/checkout@v4
with:
ref: ${{ matrix.branch }}
fetch-depth: 0

- name: Build branch
run: mvn -B -Punix-amd64-profile package --file pom.xml

- name: Run CompactorBenchmark
id: run-benchmark
run: |
TOTAL_MEM_GB=$(free -g | awk '/^Mem:/ {print $2}')
if [[ -z "$TOTAL_MEM_GB" ]] || [[ "$TOTAL_MEM_GB" -le 0 ]]; then
TOTAL_MEM_GB=16
fi
HALF_MEM_GB=$((TOTAL_MEM_GB / 2))
if [[ "$HALF_MEM_GB" -lt 1 ]]; then
HALF_MEM_GB=1
fi

DATASET="${{ github.event.inputs.dataset }}"
if [[ -z "$DATASET" ]]; then
DATASET="ada002-100k"
fi

SAFE_BRANCH=$(echo "${{ matrix.branch }}" | sed 's/[^A-Za-z0-9_-]/_/g')
echo "safe_branch=$SAFE_BRANCH" >> $GITHUB_OUTPUT

JMH_JAR=$(ls benchmarks-jmh/target/benchmarks-jmh-*.jar | grep -Ev -- '-(javadoc|sources)\.jar$' | head -1)
echo "Using JMH jar: $JMH_JAR"

java --enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector \
-Djvector.experimental.enable_native_vectorization=true \
-Xmx${HALF_MEM_GB}g \
-cp "$JMH_JAR" \
io.github.jbellis.jvector.bench.CompactorBenchmark \
-p workloadMode=PARTITION_AND_COMPACT \
-p datasetNames=$DATASET \
-p numPartitions=4 \
-p splitDistribution=FIBONACCI \
-p indexPrecision=FUSEDPQ \
-jvmArgsPrepend "-Xmx${HALF_MEM_GB}g" \
-wi 0 -i 1 -f 1

- name: Upload compaction results
uses: actions/upload-artifact@v4
with:
name: compaction-results-${{ matrix.isa }}-jdk${{ matrix.jdk }}-${{ steps.run-benchmark.outputs.safe_branch }}
path: target/benchmark-results/compactor-*/compactor-results.jsonl
if-no-files-found: warn
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ local/
dataset_
**/local_datasets/**

### Testing Results
**results**.json
**results**.jsonl

### Bench caches
pq_cache/
index_cache/
Expand Down
39 changes: 38 additions & 1 deletion benchmarks-jmh/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.release>22</maven.compiler.release>
<jmh.version>1.37</jmh.version>
<awssdk.version>2.21.10</awssdk.version>
<!-- Default benchmark arguments (empty) -->
<args></args>
</properties>

<dependencies>
Expand Down Expand Up @@ -53,6 +56,11 @@
<artifactId>log4j-slf4j2-impl</artifactId>
<version>2.24.3</version>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>ec2</artifactId>
<version>${awssdk.version}</version>
</dependency>

</dependencies>

Expand Down Expand Up @@ -94,6 +102,35 @@
</execution>
</executions>
</plugin>

<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<executions>
<execution>
<id>compactor</id>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<skip>false</skip>
<executable>java</executable>
<commandlineArgs>--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector -Djvector.experimental.enable_native_vectorization=true -cp %classpath io.github.jbellis.jvector.bench.CompactorBenchmark ${args}</commandlineArgs>
</configuration>
</execution>
<execution>
<id>analyze</id>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<skip>false</skip>
<executable>java</executable>
<commandlineArgs>-cp %classpath io.github.jbellis.jvector.bench.benchtools.EventLogAnalyzer ${args}</commandlineArgs>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
</project>
Loading
Loading