Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ spec:
labels:
{{- include "dra-driver-nvidia-gpu.templateLabels" . | nindent 8 }}
{{- include "dra-driver-nvidia-gpu.selectorLabels" (dict "context" . "componentName" "controller") | nindent 8 }}
nvidia-dra-driver-gpu-component: controller
spec:
{{- if .Values.controller.priorityClassName }}
priorityClassName: {{ .Values.controller.priorityClassName }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ spec:
labels:
{{- include "dra-driver-nvidia-gpu.templateLabels" . | nindent 8 }}
{{- include "dra-driver-nvidia-gpu.selectorLabels" (dict "context" . "componentName" "kubelet-plugin") | nindent 8 }}
nvidia-dra-driver-gpu-component: kubelet-plugin
spec:
{{- if .Values.kubeletPlugin.priorityClassName }}
priorityClassName: {{ .Values.kubeletPlugin.priorityClassName }}
Expand Down
4 changes: 2 additions & 2 deletions tests/bats/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ TEST_CHART_REPO ?= "oci://gcr.io/k8s-staging-nvidia/charts/dra-driver-nvidia-gpu
TEST_CHART_VERSION ?= "$(VERSION_STAGING_CHART)"

# The baseline Helm chart to test upgrades from and downgrades to.
TEST_CHART_LASTSTABLE_REPO ?= "oci://ghcr.io/nvidia/k8s-dra-driver-gpu"
TEST_CHART_LASTSTABLE_VERSION ?= "25.12.0-0882da87-chart"
TEST_CHART_LASTSTABLE_REPO ?= "oci://registry.k8s.io/dra-driver-nvidia/charts/dra-driver-nvidia-gpu"
TEST_CHART_LASTSTABLE_VERSION ?= "0.4.0"

# If not "false": the to-be-tested Helm chart is installed from the local
# filesystem (from `deployments/helm/dra-driver-nvidia-gpu`). Make sure
Expand Down
6 changes: 2 additions & 4 deletions tests/bats/helpers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,16 @@ iupgrade_wait() {
# not natively supported by `kubectl wait`, hence this must be something of
# the shape
# `kubectl get pods ... | xargs -I{} kubectl wait --for=condition=Ready pod/{} `
# TODO: change `nvidia-dra-driver-gpu-component` when last stable supports both, the
# new and the old label key
sleep 1
kubectl wait --for=condition=READY pods -A -l nvidia-dra-driver-gpu-component=kubelet-plugin --timeout=15s
kubectl wait --for=condition=READY pods -A -l dra-driver-nvidia-gpu-component=kubelet-plugin --timeout=15s

# Again, log current state.
kubectl get pods -n dra-driver-nvidia-gpu

# That one should be obvious now, but make that guarantee explicit for
# consuming tests. Skip when compute domains are disabled (no controller deployment).
if [ "${DISABLE_COMPUTE_DOMAINS:-}" != "true" ]; then
kubectl wait --for=condition=READY pods -A -l nvidia-dra-driver-gpu-component=controller --timeout=10s
kubectl wait --for=condition=READY pods -A -l dra-driver-nvidia-gpu-component=controller --timeout=10s
fi
# maybe: check version on labels (to confirm that we set labels correctly)
log "iupgrade_wait: done"
Expand Down
8 changes: 4 additions & 4 deletions tests/bats/test_gpu_updowngrade.bats
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ bats::on_failure() {
_node=$(kubectl get pod "${_podname}" -o jsonpath='{.spec.nodeName}')
log "workload runs on node: ${_node}"
_kpod=$(kubectl get pods -n dra-driver-nvidia-gpu \
-l nvidia-dra-driver-gpu-component=kubelet-plugin \
-l dra-driver-nvidia-gpu-component=kubelet-plugin \
--field-selector spec.nodeName="${_node}" \
-o jsonpath='{.items[0].metadata.name}')
log "kubelet-plugin pod on that node: ${_kpod}"
Expand Down Expand Up @@ -119,7 +119,7 @@ bats::on_failure() {
_node=$(kubectl get pod "${_podname}" -o jsonpath='{.spec.nodeName}')
log "workload runs on node: ${_node}"
_kpod=$(kubectl get pods -n dra-driver-nvidia-gpu \
-l nvidia-dra-driver-gpu-component=kubelet-plugin \
-l dra-driver-nvidia-gpu-component=kubelet-plugin \
--field-selector spec.nodeName="${_node}" \
-o jsonpath='{.items[0].metadata.name}')
log "kubelet-plugin pod on that node: ${_kpod}"
Expand Down Expand Up @@ -172,7 +172,7 @@ bats::on_failure() {
# Stage 2: pick any kubelet plugin pod.
local _kpod _node
_kpod=$(kubectl get pods -n dra-driver-nvidia-gpu \
-l nvidia-dra-driver-gpu-component=kubelet-plugin \
-l dra-driver-nvidia-gpu-component=kubelet-plugin \
-o jsonpath='{.items[0].metadata.name}')
_node=$(kubectl get pod -n dra-driver-nvidia-gpu "${_kpod}" -o jsonpath='{.spec.nodeName}')
log "targeting plugin pod ${_kpod} on node ${_node}"
Expand Down Expand Up @@ -200,7 +200,7 @@ bats::on_failure() {
local _newkpod="" _deadline=$((SECONDS + 60))
while true; do
_newkpod=$(kubectl get pods -n dra-driver-nvidia-gpu \
-l nvidia-dra-driver-gpu-component=kubelet-plugin \
-l dra-driver-nvidia-gpu-component=kubelet-plugin \
--field-selector spec.nodeName="${_node}" \
-o jsonpath='{.items[0].metadata.name}' 2>/dev/null) || true
if [ -n "${_newkpod}" ] && [ "${_newkpod}" != "${_kpod}" ]; then
Expand Down
Loading