Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func (w *legacyMonitorTests) EvaluateTestsFromConstructedIntervals(ctx context.C
if err != nil || level == unknownUpgradeLevel {
return nil, fmt.Errorf("failed to determine upgrade level: %w", err)
}
junits = append(junits, testUpgradeOperatorProgressingStateTransitions(finalIntervals, level == patchUpgradeLevel)...)
junits = append(junits, testUpgradeOperatorProgressingStateTransitions(finalIntervals, level == patchUpgradeLevel, w.adminRESTConfig)...)
} else {
junits = append(junits, testStableSystemOperatorStateTransitions(finalIntervals, w.adminRESTConfig)...)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,14 @@ func getControlPlaneTopology(clientConfig *rest.Config) (configv1.TopologyMode,
return *topo, nil
}

// isTNFJobClusterOperatorReason matches ClusterOperator condition Reason values emitted while
// two-node fencing (TNF) batch Jobs run in openshift-etcd. The cluster-etcd-operator maps
// active Job state into etcd's ClusterOperator with reasons shaped like
// tnf-<workflow>_JobRunning (including a per-job hash suffix on some Jobs, e.g. tnf-auth-job-master-0-64736551_JobRunning).
func isTNFJobClusterOperatorReason(reason string) bool {
return strings.HasPrefix(reason, "tnf-") && strings.HasSuffix(reason, "_JobRunning")
}

// isInUpgradeWindow determines if the given eventInterval falls within an upgrade window.
// UpgradeStart and UpgradeRollback events start upgrade windows and can end and already started upgrade window.
// UpgradeComplete and UpgradeFailed events end upgrade windows; if there was not an already started upgrade window,
Expand Down Expand Up @@ -290,6 +298,11 @@ func testUpgradeOperatorStateTransitions(events monitorapi.Intervals, clientConf
strings.Contains(condition.Message, `Waiting for Deployment`) {
return "csi snapshot controller is allowed to have Available=False due to CSI webhook test on two node"
}
case "etcd":
if condition.Type == configv1.OperatorAvailable && condition.Status == configv1.ConditionFalse &&
isTNFJobClusterOperatorReason(condition.Reason) {
return "clusteroperator/etcd may report Available=False while a TNF batch Job is running on dual-replica topology (CEO JobRunning condition reasons)"
}
}
}

Expand Down Expand Up @@ -600,11 +613,21 @@ func testOperatorStateTransitions(events monitorapi.Intervals, conditionTypes []
return ret
}

func testUpgradeOperatorProgressingStateTransitions(events monitorapi.Intervals, isPatchLevelUpgrade bool) []*junitapi.JUnitTestCase {
func testUpgradeOperatorProgressingStateTransitions(events monitorapi.Intervals, isPatchLevelUpgrade bool, clientConfig *rest.Config) []*junitapi.JUnitTestCase {
var ret []*junitapi.JUnitTestCase
upgradeWindows := getUpgradeWindows(events)
multiUpgrades := platformidentification.UpgradeNumberDuringCollection(events, time.Time{}, time.Time{}) > 1

isTwoNode := false
if clientConfig != nil {
topology, err := getControlPlaneTopology(clientConfig)
if err != nil {
logrus.Warnf("Error checking for ControlPlaneTopology configuration for MCO co-progressing monitor (unable to apply two-node TNF exceptions): %v", err)
} else {
isTwoNode = topology == configv1.HighlyAvailableArbiterMode || topology == configv1.DualReplicaTopologyMode
}
}

var machineConfigProgressingStart time.Time
var eventsInUpgradeWindows monitorapi.Intervals

Expand Down Expand Up @@ -711,6 +734,10 @@ func testUpgradeOperatorProgressingStateTransitions(events monitorapi.Intervals,

except = func(co string, reason string) string {
switch co {
case "etcd":
if isTwoNode && isTNFJobClusterOperatorReason(reason) {
return "clusteroperator/etcd may report Progressing=True while a TNF batch Job is running during DualReplica topology upgrades (CEO JobRunning condition reasons)"
}
case "console":
if reason == "SyncLoopRefresh_InProgress" {
return "https://issues.redhat.com/browse/OCPBUGS-64688"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -370,3 +370,26 @@ func Test_patchUpgradeWithConfigClient(t *testing.T) {
})
}
}

func TestIsTNFJobClusterOperatorReason(t *testing.T) {
tests := []struct {
reason string
want bool
}{
{"tnf-setup-job_JobRunning", true},
{"tnf-fencing-job_JobRunning", true},
{"tnf-auth-job-master-0-64736551_JobRunning", true},
{"tnf-update-setup-job-master-1-abc12345_JobRunning", true},
{"tnf-after-setup-job-master-0-deadbeef_JobRunning", true},
{"EtcdMembersProgressing", false},
{"NodeInstaller_InstallerPodRunning", false},
{"tnf-setup-job_JobComplete", false},
{"setup-job_JobRunning", false},
{"", false},
}
for _, tt := range tests {
t.Run(tt.reason, func(t *testing.T) {
assert.Equal(t, tt.want, isTNFJobClusterOperatorReason(tt.reason))
})
}
}