NodeDB-Lab · farhan-syah · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/.config/nextest.toml b/.config/nextest.toml
@@ -0,0 +1,79 @@
+# nextest configuration. Run with: cargo nextest run --all-features
+#
+# Why nextest over `cargo test`:
+#   - Each test runs in its own process → no in-process state contention.
+#     Integration tests that spawn 3-node clusters used to hang under
+#     `cargo test`'s default within-binary parallelism because multiple
+#     clusters in the same process exhausted ports / file descriptors.
+#   - Per-test timeouts make hangs fail fast instead of stalling CI.
+#   - Better failure output, retry support, and JUnit XML for CI.
+
+[profile.default]
+# Hard ceiling per test. Anything above this is a bug, not a slow test.
+slow-timeout = { period = "30s", terminate-after = 4 }
+
+# Use every available core for cheap unit tests. Heavy cluster tests
+# are kept from starving by `threads-required` overrides below — they
+# claim ALL slots so nothing else runs alongside them, regardless of
+# whether you're on a 24-core dev box or a 2-core CI runner.
+test-threads = "num-cpus"
+
+# Heavy cluster tests: each one brings up 3 servers + per-node Tokio
+# runtimes. Two things keep them stable across machine sizes:
+#
+#   1. `test-group = "cluster"` with `max-threads = 1` ensures at
+#      most ONE cluster test runs at a time (no two clusters share
+#      ports / file descriptors / thread pools).
+#   2. `threads-required = "num-test-threads"` makes the running
+#      cluster test claim every available test slot, which evicts
+#      every other test from the run-queue while it's executing.
+#      That's what prevents a 24-core dev box from scheduling 23
+#      unit tests alongside the cluster and starving its Raft
+#      heartbeats.
+#
+# The combined effect: cluster tests run strictly serially AND
+# strictly alone, and the rest of the suite gets full parallelism
+# the moment the cluster test finishes.
+[[profile.default.overrides]]
+filter = '''
+binary(/cluster/)
+| binary(/cross_node/)
+| binary(/_lease_/)
+| binary(descriptor_lease_drain)
+| binary(descriptor_lease_forwarding_and_renewal)
+| binary(descriptor_lease_planner_integration)
+| binary(descriptor_versioning_cross_node)
+| binary(prepared_cache_invalidation)
+| binary(sql_cluster_cross_node_dml)
+'''
+test-group = 'cluster'
+threads-required = 'num-test-threads'
+# Cluster tests bring up real Raft nodes and racy multi-node
+# convergence checks. They're flaky enough that one retry catches
+# legitimate startup jitter without hiding real regressions — a
+# genuinely broken test fails twice in a row.
+retries = { backoff = "fixed", count = 2, delay = "1s" }
+
+[test-groups]
+cluster = { max-threads = 1 }
+
+[profile.ci]
+# CI inherits the default profile (cluster group, threads-required,
+# slow-timeout) and adds:
+#   - more retries: CI runners are ~2× slower per-core than dev
+#     workstations, so the cluster tests' in-test `wait_for`
+#     budgets are proportionally tighter. Three retries (four total
+#     attempts) buys headroom for jitter without papering over real
+#     regressions — a genuinely broken test fails four times in a row.
+#   - JUnit XML: picked up by the workflow's artifact upload.
+#
+# NOTE: we deliberately do NOT bump `slow-timeout` here. The
+# slow-timeout only controls when nextest gives up on a stuck
+# *process*; it does NOT extend the test's internal `wait_for`
+# budgets. Once a `wait_for` panics, the test has already failed —
+# making nextest wait longer just wastes CI minutes on cleanup.
+retries = { backoff = "fixed", count = 3, delay = "2s" }
+fail-fast = false
+
+[profile.ci.junit]
+path = "junit.xml"
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -55,5 +55,21 @@ jobs:
           sudo apt-get install -y --no-install-recommends \
             cmake clang libclang-dev pkg-config protobuf-compiler perl \
             libcurl4-openssl-dev libsasl2-dev
+      # nextest is required — `.config/nextest.toml` defines the
+      # `cluster` test-group that serializes 3-node integration tests
+      # and the `ci` profile that retries flaky cluster tests once and
+      # writes a JUnit report. Plain `cargo test` ignores all of that
+      # and will hang/fail on the cluster suite.
+      - name: Install cargo-nextest
+        uses: taiki-e/install-action@v2
+        with:
+          tool: nextest
       - name: Run tests
-        run: cargo test --all-features --profile ci
+        run: cargo nextest run --all-features --cargo-profile ci --profile ci
+      - name: Upload JUnit report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: junit-report
+          path: target/nextest/ci/junit.xml
+          if-no-files-found: ignore
diff --git a/Dockerfile b/Dockerfile
@@ -36,9 +36,11 @@ FROM debian:bookworm-slim AS runtime
 
 # ca-certificates: needed for JWKS fetch, OTLP export, S3 archival
 # curl: needed for HEALTHCHECK
+# gosu: drop privileges from root after fixing data-dir ownership in entrypoint
 RUN apt-get update && apt-get install -y --no-install-recommends \
     ca-certificates \
     curl \
+    gosu \
     && rm -rf /var/lib/apt/lists/*
 
 # Non-root user
@@ -51,12 +53,18 @@ RUN mkdir -p /var/lib/nodedb /etc/nodedb \
 
 COPY --from=builder /build/target/release/nodedb /usr/local/bin/nodedb
 
+# Entrypoint: when started as root, fix data-dir ownership and drop to the
+# nodedb user. When already started as a non-root user (e.g. `--user 10001`),
+# exec directly. This makes `-v <named-volume>:/var/lib/nodedb` work even
+# when Docker initialises the volume as root-owned (common on Linux hosts).
+COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
+RUN chmod +x /usr/local/bin/docker-entrypoint.sh
+
 # Bind to all interfaces (required for Docker port mapping)
 # Point data dir at the declared volume
 ENV NODEDB_HOST=0.0.0.0 \
     NODEDB_DATA_DIR=/var/lib/nodedb
 
-USER nodedb
 WORKDIR /var/lib/nodedb
 
 # pgwire | native protocol | HTTP API | WebSocket sync | OTLP gRPC | OTLP HTTP
@@ -67,4 +75,5 @@ VOLUME ["/var/lib/nodedb"]
 HEALTHCHECK --interval=10s --timeout=3s --start-period=5s \
     CMD curl -f http://localhost:6480/health || exit 1
 
-ENTRYPOINT ["/usr/local/bin/nodedb"]
+ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]
+CMD ["/usr/local/bin/nodedb"]
diff --git a/README.md b/README.md
@@ -148,7 +148,8 @@ For development or contributing:
 git clone https://github.com/NodeDB-Lab/nodedb.git
 cd nodedb
 cargo build --release
-cargo test --all-features
+cargo install cargo-nextest --locked  # one-time
+cargo nextest run --all-features
 ```
 
 ## Status

diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh
@@ -0,0 +1,47 @@
+#!/bin/sh
+# NodeDB container entrypoint.
+#
+# When invoked as root (the default for `docker run` with no --user), fix
+# ownership of NODEDB_DATA_DIR and drop privileges to the unprivileged
+# `nodedb` user before exec'ing the server. When invoked as any other UID
+# (e.g. `--user 10001` or via Kubernetes runAsUser), exec directly and
+# leave the data directory alone.
+#
+# This makes `-v <named-volume>:/var/lib/nodedb` work even when Docker
+# initialises the named volume as root-owned (common on Linux hosts where
+# the volume is created out-of-band before the container's first run).
+
+set -e
+
+DATA_DIR="${NODEDB_DATA_DIR:-/var/lib/nodedb}"
+
+if [ "$(id -u)" = "0" ]; then
+    # Running as root: ensure the data dir exists and is owned by nodedb,
+    # then drop privileges. mkdir is a no-op for the declared VOLUME but
+    # protects against custom NODEDB_DATA_DIR overrides.
+    mkdir -p "$DATA_DIR"
+    chown -R nodedb:nodedb "$DATA_DIR"
+    exec gosu nodedb "$@"
+fi
+
+# Already non-root: ensure we can actually write to the data dir, otherwise
+# fail fast with a clear message instead of the cryptic WAL "Permission
+# denied (os error 13)" the user sees on a misconfigured volume mount.
+if [ ! -w "$DATA_DIR" ]; then
+    cat >&2 <<EOF
+nodedb: data directory $DATA_DIR is not writable by uid=$(id -u) gid=$(id -g).
+
+This usually means a host volume was mounted with root ownership while
+NodeDB is configured to run as a non-root user. Fixes:
+
+  1. Let the entrypoint fix it: drop the explicit --user flag so the
+     container starts as root and chowns the volume on first boot.
+  2. Pre-create the volume with the right ownership on the host, e.g.
+     chown -R 10001:10001 /path/to/host/dir
+  3. Run as root explicitly: docker run --user 0:0 ...
+
+EOF
+    exit 1
+fi
+
+exec "$@"