From 0be6bc5b435903cfc56d0d7b787300c7a15cb3bd Mon Sep 17 00:00:00 2001 From: Paul Coignet Date: Tue, 19 May 2026 11:02:40 +0300 Subject: [PATCH 1/5] [FA] Gate start task DONE on DaemonSet rollout readiness - Gate start task completion on DaemonSet rollout (all pods updated and ready) so the DONE state reflects actual rollout progress rather than just task dispatch. - Report rollout progress in Task.Error.Message while RUNNING so the RC backend can surface it to the user. - Populate RunningVersion in PackageState RC state; use proto.Clone in setTaskState so all fields are preserved without manual enumeration. - Revert INVALID_STATE for promote precondition failures back to ERROR (INVALID_STATE is reserved for RC config state mismatches). - Bump datadog-agent/pkg/proto to v0.77.0. --- pkg/fleet/daemon.go | 39 +++++++--- pkg/fleet/daemon_test.go | 146 +++++++++++++++++++++++++++++++++++++ pkg/fleet/daemon_worker.go | 25 ++++++- 3 files changed, 199 insertions(+), 11 deletions(-) diff --git a/pkg/fleet/daemon.go b/pkg/fleet/daemon.go index 682fa96149..e27ae3108a 100644 --- a/pkg/fleet/daemon.go +++ b/pkg/fleet/daemon.go @@ -7,6 +7,7 @@ package fleet import ( "context" + "encoding/json" "errors" "fmt" "sync" @@ -19,6 +20,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/manager" + v2alpha1 "github.com/DataDog/datadog-operator/api/datadoghq/v2alpha1" "github.com/DataDog/datadog-operator/pkg/remoteconfig" ) @@ -111,7 +113,8 @@ func (d *Daemon) handleTask(ctx context.Context, req remoteAPIRequest) error { d.taskMu.Lock() pending, err := d.handleRemoteAPIRequest(ctx, req) if err != nil { - // Expected and current stable/experiment configs don't match. + // stateDoesntMatchError covers RC config state mismatches (verifyExpectedState) + // and annotation slot conflicts (guardPendingOperationSlot). var stateErr *stateDoesntMatchError if errors.As(err, &stateErr) { d.setTaskState(req.Package, req.ID, pbgo.TaskState_INVALID_STATE, err) @@ -227,20 +230,16 @@ func (d *Daemon) setTaskState(pkgName, taskID string, taskState pbgo.TaskState, found := false for _, pkg := range current { if pkg.GetPackage() == pkgName { - updated = append(updated, &pbgo.PackageState{ - Package: pkg.GetPackage(), - StableVersion: pkg.GetStableVersion(), - ExperimentVersion: pkg.GetExperimentVersion(), - Task: task, - StableConfigVersion: pkg.GetStableConfigVersion(), - ExperimentConfigVersion: pkg.GetExperimentConfigVersion(), - }) + cloned := proto.Clone(pkg).(*pbgo.PackageState) + cloned.Task = task + updated = append(updated, cloned) found = true } else { updated = append(updated, pkg) } } if !found { + // Package not yet in state: no existing fields to preserve. updated = append(updated, &pbgo.PackageState{ Package: pkgName, Task: task, @@ -319,3 +318,25 @@ func (d *Daemon) logInstallerState(caller string) { ) } } + +type rolloutProgress struct { + TargetVersion string `json:"targetVersion,omitempty"` + Desired int32 `json:"desired,omitempty"` + UpToDate int32 `json:"upToDate,omitempty"` + Ready int32 `json:"ready,omitempty"` +} + +// rolloutProgressJSON encodes DaemonSet rollout counters and the target config +// version as a JSON string. Returns "" when agent is nil. +func rolloutProgressJSON(agent *v2alpha1.DaemonSetStatus, targetVersion string) string { + if agent == nil { + return "" + } + b, _ := json.Marshal(rolloutProgress{ + TargetVersion: targetVersion, + Desired: agent.Desired, + UpToDate: agent.UpToDate, + Ready: agent.Ready, + }) + return string(b) +} diff --git a/pkg/fleet/daemon_test.go b/pkg/fleet/daemon_test.go index 7b1a687148..40aa821e13 100644 --- a/pkg/fleet/daemon_test.go +++ b/pkg/fleet/daemon_test.go @@ -136,6 +136,10 @@ func testInstallerConfigWithDDA() map[string]installerConfig { } } +func testCompletedAgentStatus() *v2alpha1.DaemonSetStatus { + return &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 3, Ready: 3} +} + func testStartRequest() remoteAPIRequest { return remoteAPIRequest{ ID: "exp-abc", @@ -1048,6 +1052,26 @@ func TestSetTaskState_PreservesOtherPackages(t *testing.T) { assert.Equal(t, pbgo.TaskState_DONE, rc.state[1].Task.State) } +func TestSetTaskState_PreservesAllFields(t *testing.T) { + d, rc := testDaemonWithRC([]*pbgo.PackageState{ + { + Package: "datadog-operator", + StableVersion: "1.0.0", + ExperimentVersion: "2.0.0", + StableConfigVersion: "cfg-stable", + ExperimentConfigVersion: "cfg-exp", + }, + }) + d.setTaskState("datadog-operator", "task-1", pbgo.TaskState_DONE, nil) + + require.Len(t, rc.state, 1) + pkg := rc.state[0] + assert.Equal(t, "1.0.0", pkg.StableVersion) + assert.Equal(t, "2.0.0", pkg.ExperimentVersion) + assert.Equal(t, "cfg-stable", pkg.StableConfigVersion) + assert.Equal(t, "cfg-exp", pkg.ExperimentConfigVersion) +} + func TestSetTaskState_NilClient(t *testing.T) { d := &Daemon{} // Must not panic when rcClient is nil. @@ -1276,6 +1300,7 @@ func TestRunPendingOperationWorker_StartTerminalPhaseSetsError(t *testing.T) { func TestRunPendingOperationWorker_CompletesStatusUpdateForNonDefaultPackage(t *testing.T) { dda := testDDAObject(v2alpha1.ExperimentPhaseRunning) + dda.Status.Agent = testCompletedAgentStatus() dda.Annotations[v2alpha1.AnnotationPendingTaskID] = "task-1" dda.Annotations[v2alpha1.AnnotationPendingAction] = string(pendingIntentStart) dda.Annotations[v2alpha1.AnnotationPendingExperimentID] = testExperimentID @@ -1445,6 +1470,7 @@ func TestStartDatadogAgentExperiment_OverwritesStalePendingResultVersion(t *test func TestRunPendingOperationWorker_RecoversPendingOperationFromAnnotations(t *testing.T) { dda := testDDAObject(v2alpha1.ExperimentPhaseRunning) + dda.Status.Agent = testCompletedAgentStatus() dda.Annotations[v2alpha1.AnnotationPendingTaskID] = "task-1" dda.Annotations[v2alpha1.AnnotationPendingAction] = string(pendingIntentStart) dda.Annotations[v2alpha1.AnnotationPendingExperimentID] = testExperimentID @@ -1503,3 +1529,123 @@ func TestRunPendingOperationWorker_RecoversPromoteResultVersionFromAnnotations(t require.NoError(t, c.Get(context.Background(), testDDANSN, got)) assert.Empty(t, got.Annotations[v2alpha1.AnnotationPendingResultVersion]) } + +// --- isDaemonSetRolloutComplete tests --- + +func TestIsDaemonSetRolloutComplete(t *testing.T) { + tests := []struct { + name string + agent *v2alpha1.DaemonSetStatus + want bool + }{ + { + name: "nil agent — rollout not yet reported", + agent: nil, + want: false, + }, + { + name: "zero desired — trivially complete (all nodes tainted/cordoned)", + agent: &v2alpha1.DaemonSetStatus{Desired: 0}, + want: true, + }, + { + name: "rolling out — some pods not yet updated", + agent: &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 1, Ready: 1}, + want: false, + }, + { + name: "updated but none ready", + agent: &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 3, Ready: 0}, + want: false, + }, + { + name: "updated but only partially ready", + agent: &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 3, Ready: 2}, + want: false, + }, + { + name: "all updated and all ready", + agent: &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 3, Ready: 3}, + want: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, isDaemonSetRolloutComplete(tt.agent)) + }) + } +} + +// --- Worker start gating tests --- + +func TestRunPendingOperationWorker_StartWaitsForRollout_NilAgent(t *testing.T) { + dda := testDDAObject(v2alpha1.ExperimentPhaseRunning) + // status.Agent deliberately left nil — rollout not yet reflected in status + dda.Annotations[v2alpha1.AnnotationPendingTaskID] = "task-1" + dda.Annotations[v2alpha1.AnnotationPendingAction] = string(pendingIntentStart) + dda.Annotations[v2alpha1.AnnotationPendingExperimentID] = testExperimentID + dda.Annotations[v2alpha1.AnnotationPendingPackage] = "datadog-operator" + + d, _ := testDaemon(dda, testInstallerConfigWithDDA()) + d.rcClient = &mockRCClient{state: []*pbgo.PackageState{ + {Package: "datadog-operator", StableConfigVersion: "stable-1"}, + }} + + tracker := newOperationTracker(d) + tracker.onStatusUpdate(context.Background(), newDDAStatusSnapshot(dda)) + + rc := d.rcClient.(*mockRCClient) + require.NotNil(t, rc.state[0].Task) + assert.Equal(t, pbgo.TaskState_RUNNING, rc.state[0].Task.State) + assert.Nil(t, rc.state[0].Task.Error) +} + +func TestRunPendingOperationWorker_StartWaitsForRollout_PartialUpdate(t *testing.T) { + dda := testDDAObject(v2alpha1.ExperimentPhaseRunning) + dda.Status.Agent = &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 1, Ready: 1} + dda.Annotations[v2alpha1.AnnotationPendingTaskID] = "task-1" + dda.Annotations[v2alpha1.AnnotationPendingAction] = string(pendingIntentStart) + dda.Annotations[v2alpha1.AnnotationPendingExperimentID] = testExperimentID + dda.Annotations[v2alpha1.AnnotationPendingPackage] = "datadog-operator" + + d, _ := testDaemon(dda, testInstallerConfigWithDDA()) + d.rcClient = &mockRCClient{state: []*pbgo.PackageState{ + {Package: "datadog-operator", StableConfigVersion: "stable-1"}, + }} + + tracker := newOperationTracker(d) + tracker.onStatusUpdate(context.Background(), newDDAStatusSnapshot(dda)) + + rc := d.rcClient.(*mockRCClient) + require.NotNil(t, rc.state[0].Task) + assert.Equal(t, pbgo.TaskState_RUNNING, rc.state[0].Task.State) + require.NotNil(t, rc.state[0].Task.Error) + assert.Equal(t, `{"targetVersion":"test-config","desired":3,"upToDate":1,"ready":1}`, rc.state[0].Task.Error.Message) +} + +func TestRunPendingOperationWorker_StartDoneAfterRolloutComplete(t *testing.T) { + dda := testDDAObject(v2alpha1.ExperimentPhaseRunning) + dda.Status.Agent = testCompletedAgentStatus() + dda.Annotations[v2alpha1.AnnotationPendingTaskID] = "task-1" + dda.Annotations[v2alpha1.AnnotationPendingAction] = string(pendingIntentStart) + dda.Annotations[v2alpha1.AnnotationPendingExperimentID] = testExperimentID + dda.Annotations[v2alpha1.AnnotationPendingPackage] = "datadog-operator" + + d, c := testDaemon(dda, testInstallerConfigWithDDA()) + d.rcClient = &mockRCClient{state: []*pbgo.PackageState{ + {Package: "datadog-operator", StableConfigVersion: "stable-1"}, + }} + + tracker := newOperationTracker(d) + tracker.onStatusUpdate(context.Background(), newDDAStatusSnapshot(dda)) + + rc := d.rcClient.(*mockRCClient) + require.NotNil(t, rc.state[0].Task) + assert.Equal(t, pbgo.TaskState_DONE, rc.state[0].Task.State) + assert.Nil(t, rc.state[0].Task.Error) + assert.Equal(t, testExperimentID, rc.state[0].ExperimentConfigVersion) + + got := &v2alpha1.DatadogAgent{} + require.NoError(t, c.Get(context.Background(), testDDANSN, got)) + assert.Empty(t, got.Annotations[v2alpha1.AnnotationPendingTaskID]) +} diff --git a/pkg/fleet/daemon_worker.go b/pkg/fleet/daemon_worker.go index 9324ea25df..3061f541cf 100644 --- a/pkg/fleet/daemon_worker.go +++ b/pkg/fleet/daemon_worker.go @@ -8,6 +8,7 @@ package fleet import ( "context" "encoding/json" + "errors" "fmt" "maps" @@ -27,6 +28,7 @@ type ddaStatusSnapshot struct { nsn types.NamespacedName annotations map[string]string experiment *v2alpha1.ExperimentStatus + agent *v2alpha1.DaemonSetStatus } type pendingOperation struct { @@ -98,7 +100,11 @@ func (t *operationTracker) onStatusUpdate(ctx context.Context, snapshot ddaStatu done, resultErr := evaluatePendingTask(snapshot, op) if !done { t.daemon.taskMu.Lock() - t.daemon.setTaskState(op.packageName, op.taskID, pbgo.TaskState_RUNNING, nil) + var progressErr error + if progress := rolloutProgressJSON(snapshot.agent, op.experimentID); progress != "" { + progressErr = errors.New(progress) + } + t.daemon.setTaskState(op.packageName, op.taskID, pbgo.TaskState_RUNNING, progressErr) t.daemon.taskMu.Unlock() return } @@ -145,6 +151,9 @@ func newDDAStatusSnapshot(dda *v2alpha1.DatadogAgent) ddaStatusSnapshot { if dda.Status.Experiment != nil { snapshot.experiment = dda.Status.Experiment.DeepCopy() } + if dda.Status.Agent != nil { + snapshot.agent = dda.Status.Agent.DeepCopy() + } return snapshot } @@ -160,7 +169,7 @@ func evaluatePendingTask(snapshot ddaStatusSnapshot, task pendingOperation) (boo switch task.intent { case pendingIntentStart: if phase == v2alpha1.ExperimentPhaseRunning { - return true, nil + return isDaemonSetRolloutComplete(snapshot.agent), nil } case pendingIntentStop: return isTerminalPhase(phase), nil @@ -177,6 +186,18 @@ func evaluatePendingTask(snapshot ddaStatusSnapshot, task pendingOperation) (boo return false, nil } +func isDaemonSetRolloutComplete(agent *v2alpha1.DaemonSetStatus) bool { + if agent == nil { + return false + } + // Desired == 0 is trivially complete: all nodes are tainted/cordoned so + // there is nothing to roll out. This is safe because experiments update an + // existing DaemonSet rather than create a new one, so Desired never + // transiently passes through 0 mid-rollout — it stays at its current node + // count throughout the update. + return agent.UpToDate == agent.Desired && agent.Ready == agent.Desired +} + // finishPendingOperation writes the final RC state for a task. // // RC is updated before DDA annotations are cleared. If the daemon crashes in From 6d7403b13ad4cba92635d852d7633a87e4c81083 Mon Sep 17 00:00:00 2001 From: Paul Coignet Date: Tue, 19 May 2026 15:02:05 +0300 Subject: [PATCH 2/5] Remove state --- pkg/fleet/daemon.go | 24 ------------------------ pkg/fleet/daemon_test.go | 3 +-- pkg/fleet/daemon_worker.go | 7 +------ 3 files changed, 2 insertions(+), 32 deletions(-) diff --git a/pkg/fleet/daemon.go b/pkg/fleet/daemon.go index e27ae3108a..7adbd19faa 100644 --- a/pkg/fleet/daemon.go +++ b/pkg/fleet/daemon.go @@ -7,7 +7,6 @@ package fleet import ( "context" - "encoding/json" "errors" "fmt" "sync" @@ -20,7 +19,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/manager" - v2alpha1 "github.com/DataDog/datadog-operator/api/datadoghq/v2alpha1" "github.com/DataDog/datadog-operator/pkg/remoteconfig" ) @@ -318,25 +316,3 @@ func (d *Daemon) logInstallerState(caller string) { ) } } - -type rolloutProgress struct { - TargetVersion string `json:"targetVersion,omitempty"` - Desired int32 `json:"desired,omitempty"` - UpToDate int32 `json:"upToDate,omitempty"` - Ready int32 `json:"ready,omitempty"` -} - -// rolloutProgressJSON encodes DaemonSet rollout counters and the target config -// version as a JSON string. Returns "" when agent is nil. -func rolloutProgressJSON(agent *v2alpha1.DaemonSetStatus, targetVersion string) string { - if agent == nil { - return "" - } - b, _ := json.Marshal(rolloutProgress{ - TargetVersion: targetVersion, - Desired: agent.Desired, - UpToDate: agent.UpToDate, - Ready: agent.Ready, - }) - return string(b) -} diff --git a/pkg/fleet/daemon_test.go b/pkg/fleet/daemon_test.go index 40aa821e13..37810f3485 100644 --- a/pkg/fleet/daemon_test.go +++ b/pkg/fleet/daemon_test.go @@ -1619,8 +1619,7 @@ func TestRunPendingOperationWorker_StartWaitsForRollout_PartialUpdate(t *testing rc := d.rcClient.(*mockRCClient) require.NotNil(t, rc.state[0].Task) assert.Equal(t, pbgo.TaskState_RUNNING, rc.state[0].Task.State) - require.NotNil(t, rc.state[0].Task.Error) - assert.Equal(t, `{"targetVersion":"test-config","desired":3,"upToDate":1,"ready":1}`, rc.state[0].Task.Error.Message) + assert.Nil(t, rc.state[0].Task.Error) } func TestRunPendingOperationWorker_StartDoneAfterRolloutComplete(t *testing.T) { diff --git a/pkg/fleet/daemon_worker.go b/pkg/fleet/daemon_worker.go index 3061f541cf..e17a361f7a 100644 --- a/pkg/fleet/daemon_worker.go +++ b/pkg/fleet/daemon_worker.go @@ -8,7 +8,6 @@ package fleet import ( "context" "encoding/json" - "errors" "fmt" "maps" @@ -100,11 +99,7 @@ func (t *operationTracker) onStatusUpdate(ctx context.Context, snapshot ddaStatu done, resultErr := evaluatePendingTask(snapshot, op) if !done { t.daemon.taskMu.Lock() - var progressErr error - if progress := rolloutProgressJSON(snapshot.agent, op.experimentID); progress != "" { - progressErr = errors.New(progress) - } - t.daemon.setTaskState(op.packageName, op.taskID, pbgo.TaskState_RUNNING, progressErr) + t.daemon.setTaskState(op.packageName, op.taskID, pbgo.TaskState_RUNNING, nil) t.daemon.taskMu.Unlock() return } From 8703f83ad297255d2bb52ea47a833f45ca91cfaa Mon Sep 17 00:00:00 2001 From: Paul Coignet Date: Tue, 19 May 2026 16:07:51 +0300 Subject: [PATCH 3/5] Add log --- pkg/fleet/daemon_worker.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/fleet/daemon_worker.go b/pkg/fleet/daemon_worker.go index e17a361f7a..96d659e300 100644 --- a/pkg/fleet/daemon_worker.go +++ b/pkg/fleet/daemon_worker.go @@ -216,8 +216,10 @@ func (d *Daemon) finishPendingOperation(ctx context.Context, task pendingOperati } } if resultErr != nil { + ctrl.Log.Info("Task finished with error", "taskID", task.taskID, "package", task.packageName, "intent", task.intent, "error", resultErr) d.setTaskState(task.packageName, task.taskID, pbgo.TaskState_ERROR, resultErr) } else { + ctrl.Log.Info("Task finished successfully", "taskID", task.taskID, "package", task.packageName, "intent", task.intent) d.setTaskState(task.packageName, task.taskID, pbgo.TaskState_DONE, nil) } d.taskMu.Unlock() From a2a84af182e59cc57f1d9880bda7b4a489293fd6 Mon Sep 17 00:00:00 2001 From: Paul Coignet Date: Tue, 19 May 2026 16:17:10 +0300 Subject: [PATCH 4/5] Revert unrelated daemon.go changes from PR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove proto.Clone refactor and comment update — these are independent of the DONE state gating feature and belong in a separate PR. --- pkg/fleet/daemon.go | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pkg/fleet/daemon.go b/pkg/fleet/daemon.go index 7adbd19faa..682fa96149 100644 --- a/pkg/fleet/daemon.go +++ b/pkg/fleet/daemon.go @@ -111,8 +111,7 @@ func (d *Daemon) handleTask(ctx context.Context, req remoteAPIRequest) error { d.taskMu.Lock() pending, err := d.handleRemoteAPIRequest(ctx, req) if err != nil { - // stateDoesntMatchError covers RC config state mismatches (verifyExpectedState) - // and annotation slot conflicts (guardPendingOperationSlot). + // Expected and current stable/experiment configs don't match. var stateErr *stateDoesntMatchError if errors.As(err, &stateErr) { d.setTaskState(req.Package, req.ID, pbgo.TaskState_INVALID_STATE, err) @@ -228,16 +227,20 @@ func (d *Daemon) setTaskState(pkgName, taskID string, taskState pbgo.TaskState, found := false for _, pkg := range current { if pkg.GetPackage() == pkgName { - cloned := proto.Clone(pkg).(*pbgo.PackageState) - cloned.Task = task - updated = append(updated, cloned) + updated = append(updated, &pbgo.PackageState{ + Package: pkg.GetPackage(), + StableVersion: pkg.GetStableVersion(), + ExperimentVersion: pkg.GetExperimentVersion(), + Task: task, + StableConfigVersion: pkg.GetStableConfigVersion(), + ExperimentConfigVersion: pkg.GetExperimentConfigVersion(), + }) found = true } else { updated = append(updated, pkg) } } if !found { - // Package not yet in state: no existing fields to preserve. updated = append(updated, &pbgo.PackageState{ Package: pkgName, Task: task, From 435014c3c400e8913a05a5e204c71ab435e210ad Mon Sep 17 00:00:00 2001 From: Paul Coignet Date: Thu, 21 May 2026 10:06:40 +0300 Subject: [PATCH 5/5] Move done --- pkg/fleet/daemon_operations.go | 8 + pkg/fleet/daemon_test.go | 310 +++++++++++++++++++++++++++++++-- pkg/fleet/daemon_worker.go | 57 ++++-- 3 files changed, 352 insertions(+), 23 deletions(-) diff --git a/pkg/fleet/daemon_operations.go b/pkg/fleet/daemon_operations.go index df08b42291..8065fa9d8c 100644 --- a/pkg/fleet/daemon_operations.go +++ b/pkg/fleet/daemon_operations.go @@ -148,6 +148,11 @@ func (d *Daemon) applyOperation(ctx context.Context, nsn types.NamespacedName, s // when they are omitted. annotations[v2alpha1.AnnotationPendingResultVersion] = nil } + if pending.preExperimentHash != "" { + annotations[v2alpha1.AnnotationPendingPreExperimentHash] = pending.preExperimentHash + } else { + annotations[v2alpha1.AnnotationPendingPreExperimentHash] = nil + } var err error patch, err = json.Marshal(patchMap) @@ -238,6 +243,9 @@ func (d *Daemon) planStart(ctx context.Context, req remoteAPIRequest, op resolve if err := d.client.Get(ctx, op.NamespacedName, dda); err != nil { return nil, nil, fmt.Errorf("start DatadogAgent experiment: failed to get DatadogAgent: %w", err) } + if dda.Status.Agent != nil { + pending.preExperimentHash = dda.Status.Agent.CurrentHash + } if experimentHasPhase(dda, experimentID, v2alpha1.ExperimentPhaseRunning) { // The controller already started this experiment. Update RC now and let // handleTask mark the task done. diff --git a/pkg/fleet/daemon_test.go b/pkg/fleet/daemon_test.go index 37810f3485..5908a64a46 100644 --- a/pkg/fleet/daemon_test.go +++ b/pkg/fleet/daemon_test.go @@ -137,7 +137,7 @@ func testInstallerConfigWithDDA() map[string]installerConfig { } func testCompletedAgentStatus() *v2alpha1.DaemonSetStatus { - return &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 3, Ready: 3} + return &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 3, Ready: 3, Available: 3} } func testStartRequest() remoteAPIRequest { @@ -1314,10 +1314,14 @@ func TestRunPendingOperationWorker_CompletesStatusUpdateForNonDefaultPackage(t * ctx, cancel := context.WithCancel(context.Background()) defer cancel() tracker := newOperationTracker(d) - tracker.onStatusUpdate(ctx, newDDAStatusSnapshot(dda)) + snap := newDDAStatusSnapshot(dda) + tracker.onStatusUpdate(ctx, snap) rc := d.rcClient.(*mockRCClient) require.NotNil(t, rc.state[0].Task) + assert.Equal(t, pbgo.TaskState_RUNNING, rc.state[0].Task.State) + + tracker.onStatusUpdate(ctx, snap) assert.Equal(t, "task-1", rc.state[0].Task.Id) assert.Equal(t, pbgo.TaskState_DONE, rc.state[0].Task.State) assert.Equal(t, testExperimentID, rc.state[0].ExperimentConfigVersion) @@ -1484,10 +1488,14 @@ func TestRunPendingOperationWorker_RecoversPendingOperationFromAnnotations(t *te ctx, cancel := context.WithCancel(context.Background()) defer cancel() tracker := newOperationTracker(d) - tracker.onStatusUpdate(ctx, newDDAStatusSnapshot(dda)) + snap := newDDAStatusSnapshot(dda) + tracker.onStatusUpdate(ctx, snap) rc := d.rcClient.(*mockRCClient) require.NotNil(t, rc.state[0].Task) + assert.Equal(t, pbgo.TaskState_RUNNING, rc.state[0].Task.State) + + tracker.onStatusUpdate(ctx, snap) assert.Equal(t, "task-1", rc.state[0].Task.Id) assert.Equal(t, pbgo.TaskState_DONE, rc.state[0].Task.State) assert.Equal(t, testExperimentID, rc.state[0].ExperimentConfigVersion) @@ -1550,22 +1558,27 @@ func TestIsDaemonSetRolloutComplete(t *testing.T) { }, { name: "rolling out — some pods not yet updated", - agent: &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 1, Ready: 1}, + agent: &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 1, Ready: 1, Available: 1}, want: false, }, { - name: "updated but none ready", - agent: &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 3, Ready: 0}, + name: "updated but none available", + agent: &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 3, Ready: 3, Available: 0}, want: false, }, { - name: "updated but only partially ready", - agent: &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 3, Ready: 2}, + name: "updated but only partially available", + agent: &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 3, Ready: 3, Available: 2}, want: false, }, { - name: "all updated and all ready", - agent: &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 3, Ready: 3}, + name: "updated and ready but minReadySeconds not yet elapsed (Available < Ready)", + agent: &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 3, Ready: 3, Available: 0}, + want: false, + }, + { + name: "all updated and all available", + agent: &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 3, Ready: 3, Available: 3}, want: true, }, } @@ -1636,10 +1649,14 @@ func TestRunPendingOperationWorker_StartDoneAfterRolloutComplete(t *testing.T) { }} tracker := newOperationTracker(d) - tracker.onStatusUpdate(context.Background(), newDDAStatusSnapshot(dda)) + snap := newDDAStatusSnapshot(dda) + tracker.onStatusUpdate(context.Background(), snap) rc := d.rcClient.(*mockRCClient) require.NotNil(t, rc.state[0].Task) + assert.Equal(t, pbgo.TaskState_RUNNING, rc.state[0].Task.State) + + tracker.onStatusUpdate(context.Background(), snap) assert.Equal(t, pbgo.TaskState_DONE, rc.state[0].Task.State) assert.Nil(t, rc.state[0].Task.Error) assert.Equal(t, testExperimentID, rc.state[0].ExperimentConfigVersion) @@ -1647,4 +1664,275 @@ func TestRunPendingOperationWorker_StartDoneAfterRolloutComplete(t *testing.T) { got := &v2alpha1.DatadogAgent{} require.NoError(t, c.Get(context.Background(), testDDANSN, got)) assert.Empty(t, got.Annotations[v2alpha1.AnnotationPendingTaskID]) + assert.Empty(t, got.Annotations[v2alpha1.AnnotationPendingPreExperimentHash]) +} + +// --- Pre-experiment hash gate tests --- + +// TestRunPendingOperationWorker_StartBlockedByHashGate verifies that a start +// task is NOT completed when the DaemonSet status still carries the +// pre-experiment hash, even if counts look healthy. This is the race described +// by Codex: the DDAI controller hasn't yet processed the new spec. +func TestRunPendingOperationWorker_StartBlockedByHashGate(t *testing.T) { + dda := testDDAObject(v2alpha1.ExperimentPhaseRunning) + dda.Status.Agent = &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 3, Ready: 3, Available: 3, CurrentHash: "hash-before"} + dda.Annotations[v2alpha1.AnnotationPendingTaskID] = "task-1" + dda.Annotations[v2alpha1.AnnotationPendingAction] = string(pendingIntentStart) + dda.Annotations[v2alpha1.AnnotationPendingExperimentID] = testExperimentID + dda.Annotations[v2alpha1.AnnotationPendingPackage] = "datadog-operator" + dda.Annotations[v2alpha1.AnnotationPendingPreExperimentHash] = "hash-before" + + d, _ := testDaemon(dda, testInstallerConfigWithDDA()) + d.rcClient = &mockRCClient{state: []*pbgo.PackageState{ + {Package: "datadog-operator", StableConfigVersion: "stable-1"}, + }} + + tracker := newOperationTracker(d) + tracker.onStatusUpdate(context.Background(), newDDAStatusSnapshot(dda)) + + rc := d.rcClient.(*mockRCClient) + require.NotNil(t, rc.state[0].Task) + assert.Equal(t, pbgo.TaskState_RUNNING, rc.state[0].Task.State) + assert.Nil(t, rc.state[0].Task.Error) +} + +// TestRunPendingOperationWorker_StartDoneAfterHashChanges verifies that a +// start task completes once the DaemonSet hash has changed from the +// pre-experiment value and the rollout counts are satisfied. +func TestRunPendingOperationWorker_StartDoneAfterHashChanges(t *testing.T) { + dda := testDDAObject(v2alpha1.ExperimentPhaseRunning) + dda.Status.Agent = &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 3, Ready: 3, Available: 3, CurrentHash: "hash-after"} + dda.Annotations[v2alpha1.AnnotationPendingTaskID] = "task-1" + dda.Annotations[v2alpha1.AnnotationPendingAction] = string(pendingIntentStart) + dda.Annotations[v2alpha1.AnnotationPendingExperimentID] = testExperimentID + dda.Annotations[v2alpha1.AnnotationPendingPackage] = "datadog-operator" + dda.Annotations[v2alpha1.AnnotationPendingPreExperimentHash] = "hash-before" + + d, c := testDaemon(dda, testInstallerConfigWithDDA()) + d.rcClient = &mockRCClient{state: []*pbgo.PackageState{ + {Package: "datadog-operator", StableConfigVersion: "stable-1"}, + }} + + tracker := newOperationTracker(d) + snap := newDDAStatusSnapshot(dda) + + tracker.onStatusUpdate(context.Background(), snap) + rc := d.rcClient.(*mockRCClient) + require.NotNil(t, rc.state[0].Task) + assert.Equal(t, pbgo.TaskState_RUNNING, rc.state[0].Task.State) + + tracker.onStatusUpdate(context.Background(), snap) + assert.Equal(t, pbgo.TaskState_DONE, rc.state[0].Task.State) + assert.Nil(t, rc.state[0].Task.Error) + + got := &v2alpha1.DatadogAgent{} + require.NoError(t, c.Get(context.Background(), testDDANSN, got)) + assert.Empty(t, got.Annotations[v2alpha1.AnnotationPendingPreExperimentHash]) +} + +// --- Skip-once guard tests --- + +// TestRunPendingOperationWorker_StartSkipsFirstDoneOncePerExperiment verifies +// that the first DONE observation for a start task is skipped (reported as +// RUNNING) and the second observation reaches finishPendingOperation. This +// mirrors the stale-UpToDate race: DDAI sets CurrentHash before the kube +// DaemonSet controller has decremented UpdatedNumberScheduled. +func TestRunPendingOperationWorker_StartSkipsFirstDoneOncePerExperiment(t *testing.T) { + dda := testDDAObject(v2alpha1.ExperimentPhaseRunning) + dda.Status.Agent = testCompletedAgentStatus() + dda.Annotations[v2alpha1.AnnotationPendingTaskID] = "task-1" + dda.Annotations[v2alpha1.AnnotationPendingAction] = string(pendingIntentStart) + dda.Annotations[v2alpha1.AnnotationPendingExperimentID] = testExperimentID + dda.Annotations[v2alpha1.AnnotationPendingPackage] = "datadog-operator" + + d, c := testDaemon(dda, testInstallerConfigWithDDA()) + d.rcClient = &mockRCClient{state: []*pbgo.PackageState{ + {Package: "datadog-operator", StableConfigVersion: "stable-1"}, + }} + + tracker := newOperationTracker(d) + snap := newDDAStatusSnapshot(dda) + + tracker.onStatusUpdate(context.Background(), snap) + rc := d.rcClient.(*mockRCClient) + require.NotNil(t, rc.state[0].Task) + assert.Equal(t, pbgo.TaskState_RUNNING, rc.state[0].Task.State, "first DONE must be skipped") + + got := &v2alpha1.DatadogAgent{} + require.NoError(t, c.Get(context.Background(), testDDANSN, got)) + assert.NotEmpty(t, got.Annotations[v2alpha1.AnnotationPendingTaskID], "annotations must not be cleared after first skip") + + tracker.onStatusUpdate(context.Background(), snap) + assert.Equal(t, pbgo.TaskState_DONE, rc.state[0].Task.State, "second DONE must be reported") + + require.NoError(t, c.Get(context.Background(), testDDANSN, got)) + assert.Empty(t, got.Annotations[v2alpha1.AnnotationPendingTaskID], "annotations must be cleared after DONE") +} + +// TestRunPendingOperationWorker_StartSkipOnceConsumedAcrossRunningSnapshots +// verifies that not-done snapshots do not consume the skip; only the first +// done=true snapshot does. Simulates the normal rollout: not-done → skip +// consumed on first done → DONE on second done. +func TestRunPendingOperationWorker_StartSkipOnceConsumedAcrossRunningSnapshots(t *testing.T) { + dda := testDDAObject(v2alpha1.ExperimentPhaseRunning) + dda.Annotations[v2alpha1.AnnotationPendingTaskID] = "task-1" + dda.Annotations[v2alpha1.AnnotationPendingAction] = string(pendingIntentStart) + dda.Annotations[v2alpha1.AnnotationPendingExperimentID] = testExperimentID + dda.Annotations[v2alpha1.AnnotationPendingPackage] = "datadog-operator" + + d, _ := testDaemon(dda, testInstallerConfigWithDDA()) + d.rcClient = &mockRCClient{state: []*pbgo.PackageState{ + {Package: "datadog-operator", StableConfigVersion: "stable-1"}, + }} + + // Snapshot with rollout in progress (not done). + ddaInProgress := testDDAObject(v2alpha1.ExperimentPhaseRunning) + ddaInProgress.Status.Agent = &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 1, Ready: 1} + ddaInProgress.Annotations = dda.Annotations + + // Snapshot with rollout complete (done). + ddaComplete := testDDAObject(v2alpha1.ExperimentPhaseRunning) + ddaComplete.Status.Agent = testCompletedAgentStatus() + ddaComplete.Annotations = dda.Annotations + + tracker := newOperationTracker(d) + rc := d.rcClient.(*mockRCClient) + + tracker.onStatusUpdate(context.Background(), newDDAStatusSnapshot(ddaInProgress)) + require.NotNil(t, rc.state[0].Task) + assert.Equal(t, pbgo.TaskState_RUNNING, rc.state[0].Task.State, "in-progress snapshot: RUNNING") + + tracker.onStatusUpdate(context.Background(), newDDAStatusSnapshot(ddaComplete)) + assert.Equal(t, pbgo.TaskState_RUNNING, rc.state[0].Task.State, "first done snapshot: skip consumed, still RUNNING") + + tracker.onStatusUpdate(context.Background(), newDDAStatusSnapshot(ddaComplete)) + assert.Equal(t, pbgo.TaskState_DONE, rc.state[0].Task.State, "second done snapshot: DONE") +} + +// TestRunPendingOperationWorker_StartTerminalPhaseBypassesSkipOnce verifies +// that a start task that reaches a terminal phase with an error (unexpected +// state) is reported immediately without consuming the skip. +func TestRunPendingOperationWorker_StartTerminalPhaseBypassesSkipOnce(t *testing.T) { + dda := testDDAObject(v2alpha1.ExperimentPhaseTerminated) + dda.Annotations[v2alpha1.AnnotationPendingTaskID] = "task-1" + dda.Annotations[v2alpha1.AnnotationPendingAction] = string(pendingIntentStart) + dda.Annotations[v2alpha1.AnnotationPendingExperimentID] = testExperimentID + dda.Annotations[v2alpha1.AnnotationPendingPackage] = "datadog-operator" + + d, _ := testDaemon(dda, testInstallerConfigWithDDA()) + d.rcClient = &mockRCClient{state: []*pbgo.PackageState{ + {Package: "datadog-operator", StableConfigVersion: "stable-1"}, + }} + + tracker := newOperationTracker(d) + tracker.onStatusUpdate(context.Background(), newDDAStatusSnapshot(dda)) + + rc := d.rcClient.(*mockRCClient) + require.NotNil(t, rc.state[0].Task) + assert.Equal(t, pbgo.TaskState_ERROR, rc.state[0].Task.State, "terminal-phase error must not be skipped") +} + +// TestRunPendingOperationWorker_StartSkipPerExperimentNotShared verifies that +// each experiment ID has its own independent skip budget: the second experiment +// also skips its first DONE and requires a second call. +func TestRunPendingOperationWorker_StartSkipPerExperimentNotShared(t *testing.T) { + // Build a snapshot where both Status.Experiment.ID and the pending annotation + // use the same expID so evaluatePendingTask does not short-circuit. + makeSnap := func(expID string) ddaStatusSnapshot { + dda := testDDAObject(v2alpha1.ExperimentPhaseRunning) + dda.Status.Experiment = &v2alpha1.ExperimentStatus{Phase: v2alpha1.ExperimentPhaseRunning, ID: expID} + dda.Status.Agent = testCompletedAgentStatus() + dda.Annotations[v2alpha1.AnnotationPendingTaskID] = "task-" + expID + dda.Annotations[v2alpha1.AnnotationPendingAction] = string(pendingIntentStart) + dda.Annotations[v2alpha1.AnnotationPendingExperimentID] = expID + dda.Annotations[v2alpha1.AnnotationPendingPackage] = "datadog-operator" + return newDDAStatusSnapshot(dda) + } + + dda := testDDAObject(v2alpha1.ExperimentPhaseRunning) + d, _ := testDaemon(dda, testInstallerConfigWithDDA()) + d.rcClient = &mockRCClient{state: []*pbgo.PackageState{ + {Package: "datadog-operator", StableConfigVersion: "stable-1"}, + }} + + tracker := newOperationTracker(d) + rc := d.rcClient.(*mockRCClient) + + // First experiment: skip on first call, DONE on second. + snap1 := makeSnap("exp-1") + tracker.onStatusUpdate(context.Background(), snap1) + require.NotNil(t, rc.state[0].Task) + assert.Equal(t, pbgo.TaskState_RUNNING, rc.state[0].Task.State, "exp-1 first call: RUNNING") + tracker.onStatusUpdate(context.Background(), snap1) + assert.Equal(t, pbgo.TaskState_DONE, rc.state[0].Task.State, "exp-1 second call: DONE") + + // Second experiment with a different ID — skip budget is fresh. + snap2 := makeSnap("exp-2") + tracker.onStatusUpdate(context.Background(), snap2) + assert.Equal(t, pbgo.TaskState_RUNNING, rc.state[0].Task.State, "exp-2 first call: RUNNING (new skip)") + tracker.onStatusUpdate(context.Background(), snap2) + assert.Equal(t, pbgo.TaskState_DONE, rc.state[0].Task.State, "exp-2 second call: DONE") +} + +// TestRunPendingOperationWorker_StopAndPromoteNotAffectedBySkipOnce verifies +// that stop and promote tasks reach DONE on a single onStatusUpdate call. +func TestRunPendingOperationWorker_StopAndPromoteNotAffectedBySkipOnce(t *testing.T) { + t.Run("stop", func(t *testing.T) { + dda := testDDAObject(v2alpha1.ExperimentPhaseTerminated) + dda.Annotations[v2alpha1.AnnotationPendingTaskID] = "task-1" + dda.Annotations[v2alpha1.AnnotationPendingAction] = string(pendingIntentStop) + dda.Annotations[v2alpha1.AnnotationPendingExperimentID] = testExperimentID + dda.Annotations[v2alpha1.AnnotationPendingPackage] = "datadog-operator" + + d, _ := testDaemon(dda, testInstallerConfigWithDDA()) + d.rcClient = &mockRCClient{state: []*pbgo.PackageState{ + {Package: "datadog-operator", StableConfigVersion: "stable-1", ExperimentConfigVersion: testExperimentID}, + }} + + tracker := newOperationTracker(d) + tracker.onStatusUpdate(context.Background(), newDDAStatusSnapshot(dda)) + + rc := d.rcClient.(*mockRCClient) + require.NotNil(t, rc.state[0].Task) + assert.Equal(t, pbgo.TaskState_DONE, rc.state[0].Task.State, "stop must reach DONE on first call") + }) + + t.Run("promote", func(t *testing.T) { + dda := testDDAObject(v2alpha1.ExperimentPhasePromoted) + dda.Annotations[v2alpha1.AnnotationPendingTaskID] = "task-1" + dda.Annotations[v2alpha1.AnnotationPendingAction] = string(pendingIntentPromote) + dda.Annotations[v2alpha1.AnnotationPendingExperimentID] = testExperimentID + dda.Annotations[v2alpha1.AnnotationPendingPackage] = "datadog-operator" + dda.Annotations[v2alpha1.AnnotationPendingResultVersion] = "exp-v1" + + d, _ := testDaemon(dda, testInstallerConfigWithDDA()) + d.rcClient = &mockRCClient{state: []*pbgo.PackageState{ + {Package: "datadog-operator", StableConfigVersion: "stable-1", ExperimentConfigVersion: "exp-v1"}, + }} + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + tracker := newOperationTracker(d) + tracker.onStatusUpdate(ctx, newDDAStatusSnapshot(dda)) + + rc := d.rcClient.(*mockRCClient) + require.NotNil(t, rc.state[0].Task) + assert.Equal(t, pbgo.TaskState_DONE, rc.state[0].Task.State, "promote must reach DONE on first call") + }) +} + +// TestStartDatadogAgentExperiment_WritesPreExperimentHash verifies that when +// the DDA already has an agent status with a CurrentHash, the daemon records +// that hash in the pending-pre-experiment-hash annotation so the rollout gate +// can detect a stale status after restart. +func TestStartDatadogAgentExperiment_WritesPreExperimentHash(t *testing.T) { + dda := testDDAObject("") + dda.Status.Agent = &v2alpha1.DaemonSetStatus{Desired: 3, UpToDate: 3, Ready: 3, CurrentHash: "hash-before"} + d, c := testDaemon(dda, testInstallerConfigWithDDA()) + req := testStartRequest() + requireStartQueued(t, d, req) + + got := &v2alpha1.DatadogAgent{} + require.NoError(t, c.Get(context.Background(), testDDANSN, got)) + assert.Equal(t, "hash-before", got.Annotations[v2alpha1.AnnotationPendingPreExperimentHash]) } diff --git a/pkg/fleet/daemon_worker.go b/pkg/fleet/daemon_worker.go index 96d659e300..ab3be90296 100644 --- a/pkg/fleet/daemon_worker.go +++ b/pkg/fleet/daemon_worker.go @@ -42,6 +42,10 @@ type pendingOperation struct { experimentID string // resultVersion is only used by promote. It becomes stable_config on success. resultVersion string + // preExperimentHash is the agent DaemonSetStatus.CurrentHash captured when + // the start signal was written. The rollout gate requires the hash to change + // before declaring DONE, ensuring the DDAI controller has applied the new spec. + preExperimentHash string } type pendingIntent string @@ -63,6 +67,14 @@ func (op pendingOperation) matches(other pendingOperation) bool { type operationTracker struct { daemon *Daemon + // skipExperimentID is the experiment ID for which a single potential + // false-positive DONE on a start task has been consumed. DDAI can flip + // CurrentHash before the kube DaemonSet controller decrements + // UpdatedNumberScheduled, making isDaemonSetRolloutComplete return true + // against a stale UpToDate. Requiring a second confirming snapshot + // eliminates the race without new annotations. Single-goroutine field + // (only written/read from the run() loop) — no lock needed. + skipExperimentID string } func newOperationTracker(d *Daemon) *operationTracker { @@ -104,6 +116,19 @@ func (t *operationTracker) onStatusUpdate(ctx context.Context, snapshot ddaStatu return } + // Skip-once guard: only for successful start completions. DDAI sets + // CurrentHash before the kube DaemonSet controller updates UpToDate, so + // the first DONE observation may be against a stale rollout status. + // Requiring a second confirming snapshot eliminates the race. Errors and + // non-start intents bypass the guard and are reported immediately. + if op.intent == pendingIntentStart && resultErr == nil && t.skipExperimentID != op.experimentID { + t.skipExperimentID = op.experimentID + t.daemon.taskMu.Lock() + t.daemon.setTaskState(op.packageName, op.taskID, pbgo.TaskState_RUNNING, nil) + t.daemon.taskMu.Unlock() + return + } + t.daemon.finishPendingOperation(ctx, op, resultErr) } @@ -164,6 +189,12 @@ func evaluatePendingTask(snapshot ddaStatusSnapshot, task pendingOperation) (boo switch task.intent { case pendingIntentStart: if phase == v2alpha1.ExperimentPhaseRunning { + if task.preExperimentHash != "" && snapshot.agent != nil && + snapshot.agent.CurrentHash == task.preExperimentHash { + // The DDAI controller has not yet applied the new spec: the agent + // status still carries the pre-experiment hash. + return false, nil + } return isDaemonSetRolloutComplete(snapshot.agent), nil } case pendingIntentStop: @@ -190,7 +221,7 @@ func isDaemonSetRolloutComplete(agent *v2alpha1.DaemonSetStatus) bool { // existing DaemonSet rather than create a new one, so Desired never // transiently passes through 0 mid-rollout — it stays at its current node // count throughout the update. - return agent.UpToDate == agent.Desired && agent.Ready == agent.Desired + return agent.UpToDate == agent.Desired && agent.Available == agent.Desired } // finishPendingOperation writes the final RC state for a task. @@ -259,12 +290,13 @@ func (d *Daemon) reconcileTimedOutExperiment(ctx context.Context, snapshot ddaSt // Missing fields or an unknown action mean there is no task to track. func pendingOperationFromAnnotations(nsn types.NamespacedName, annotations map[string]string) (pendingOperation, bool) { op := pendingOperation{ - intent: pendingIntent(annotations[v2alpha1.AnnotationPendingAction]), - taskID: annotations[v2alpha1.AnnotationPendingTaskID], - packageName: annotations[v2alpha1.AnnotationPendingPackage], - nsn: nsn, - experimentID: annotations[v2alpha1.AnnotationPendingExperimentID], - resultVersion: annotations[v2alpha1.AnnotationPendingResultVersion], + intent: pendingIntent(annotations[v2alpha1.AnnotationPendingAction]), + taskID: annotations[v2alpha1.AnnotationPendingTaskID], + packageName: annotations[v2alpha1.AnnotationPendingPackage], + nsn: nsn, + experimentID: annotations[v2alpha1.AnnotationPendingExperimentID], + resultVersion: annotations[v2alpha1.AnnotationPendingResultVersion], + preExperimentHash: annotations[v2alpha1.AnnotationPendingPreExperimentHash], } if op.taskID == "" || op.intent == "" || op.experimentID == "" || op.packageName == "" { return pendingOperation{}, false @@ -293,11 +325,12 @@ func (d *Daemon) clearPendingAnnotationsIfCurrent(ctx context.Context, task pend patch, err := json.Marshal(map[string]any{ "metadata": map[string]any{ "annotations": map[string]any{ - v2alpha1.AnnotationPendingTaskID: nil, - v2alpha1.AnnotationPendingAction: nil, - v2alpha1.AnnotationPendingExperimentID: nil, - v2alpha1.AnnotationPendingPackage: nil, - v2alpha1.AnnotationPendingResultVersion: nil, + v2alpha1.AnnotationPendingTaskID: nil, + v2alpha1.AnnotationPendingAction: nil, + v2alpha1.AnnotationPendingExperimentID: nil, + v2alpha1.AnnotationPendingPackage: nil, + v2alpha1.AnnotationPendingResultVersion: nil, + v2alpha1.AnnotationPendingPreExperimentHash: nil, }, }, })