diff --git a/test/extended/node/additional_artifact_stores.go b/test/extended/node/additional_artifact_stores.go new file mode 100644 index 000000000000..01d102d7af8b --- /dev/null +++ b/test/extended/node/additional_artifact_stores.go @@ -0,0 +1,737 @@ +package node + +import ( + "context" + "fmt" + "strings" + "time" + + g "github.com/onsi/ginkgo/v2" + o "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/kubernetes/test/e2e/framework" + e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" + + configv1 "github.com/openshift/api/config/v1" + machineconfigv1 "github.com/openshift/api/machineconfiguration/v1" + mcclient "github.com/openshift/client-go/machineconfiguration/clientset/versioned" + exutil "github.com/openshift/origin/test/extended/util" +) + +const ( + additionalArtifactStorePath = "/var/lib/additional-artifacts" + additionalArtifactStoreTestName = "additional-artifactstore-test" + maxArtifactStoresCount = 10 +) + +// Non-disruptive API validation tests - can run in parallel +var _ = g.Describe("[Jira:Node/CRI-O][sig-node][Feature:AdditionalStorageSupport][Suite:openshift/disruptive-longrunning] Additional Artifact Stores API Validation", func() { + defer g.GinkgoRecover() + + var oc = exutil.NewCLI("additional-artifact-stores-api") + + g.BeforeEach(func(ctx context.Context) { + g.By("Checking TechPreviewNoUpgrade feature set is enabled") + featureGate, err := oc.AdminConfigClient().ConfigV1().FeatureGates().Get(ctx, "cluster", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + if featureGate.Spec.FeatureSet != configv1.TechPreviewNoUpgrade { + g.Skip("Skipping test: TechPreviewNoUpgrade feature set is required for additionalArtifactStores") + } + }) + + // TC1: Validate Path Format Restrictions + g.It("should reject invalid path formats for additionalArtifactStores [TC1]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + invalidPaths := []struct { + name string + path string + description string + }{ + {"relative-path", "relative/path", "relative path without leading slash"}, + {"empty-path", "", "empty path"}, + } + + for _, tc := range invalidPaths { + g.By(fmt.Sprintf("Testing invalid path: %s (%s)", tc.path, tc.description)) + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("artifact-invalid-path-test-%s", tc.name), + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath(tc.path)}, + }, + }, + }, + } + + _, err := mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + if err != nil { + framework.Logf("Path '%s' correctly rejected at API level: %v", tc.path, err) + } else { + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + framework.Logf("Path '%s' accepted at API level, checking MCO validation", tc.path) + } + } + + framework.Logf("Test PASSED: Invalid path formats handled correctly") + }) + + // TC2: Validate Count Limits (max 10 artifact stores) + g.It("should reject more than 10 additionalArtifactStores [TC2]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with 11 artifact stores (exceeds max of 10)") + artifactStores := make([]machineconfigv1.AdditionalArtifactStore, 11) + for i := 0; i < 11; i++ { + artifactStores[i] = machineconfigv1.AdditionalArtifactStore{Path: machineconfigv1.StorePath(fmt.Sprintf("/mnt/artifactstore-%d", i))} + } + + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "artifact-exceed-limit-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalArtifactStores: artifactStores, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + if err != nil { + o.Expect(err.Error()).To(o.ContainSubstring("must have at most 10 items"), "Error should mention maximum limit of 10 items") + framework.Logf("Test PASSED: 11 artifact stores correctly rejected: %v", err) + } else { + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + framework.Logf("Warning: 11 artifact stores accepted at API level, checking MCO status") + + err = wait.PollUntilContextTimeout(ctx, 5*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { + cfg, err := mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Get(ctx, ctrcfg.Name, metav1.GetOptions{}) + if err != nil { + return false, err + } + for _, condition := range cfg.Status.Conditions { + if condition.Type == machineconfigv1.ContainerRuntimeConfigFailure && + condition.Status == corev1.ConditionTrue { + framework.Logf("MCO rejected config: %s", condition.Message) + return true, nil + } + } + return cfg.Status.ObservedGeneration == cfg.Generation, nil + }) + } + }) + + // TC3: Validate Path Uniqueness Within Store Type + g.It("should reject duplicate paths in additionalArtifactStores [TC3]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with duplicate paths") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "artifact-duplicate-path-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath("/mnt/shared-artifacts")}, + {Path: machineconfigv1.StorePath("/mnt/shared-artifacts")}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + if err != nil { + framework.Logf("Duplicate paths correctly rejected at API level: %v", err) + } else { + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + framework.Logf("Duplicate paths accepted at API level, checking MCO validation") + } + + framework.Logf("Test completed: Duplicate path validation checked") + }) + + // TC4: Path contains spaces + g.It("should reject additionalArtifactStores path containing spaces [TC4]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with path containing spaces") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "artifact-path-spaces-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath("/var/lib/artifact store")}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).To(o.HaveOccurred()) + o.Expect(err.Error()).To(o.ContainSubstring("alphanumeric")) + framework.Logf("Test PASSED: Path with spaces correctly rejected: %v", err) + }) + + // TC5: Path contains invalid characters + g.It("should reject additionalArtifactStores path containing invalid characters [TC5]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + invalidChars := []struct { + name string + path string + char string + }{ + {"at-symbol", "/var/lib/artifact@store", "@"}, + {"exclamation", "/var/lib/artifact!store", "!"}, + {"hash", "/var/lib/artifact#store", "#"}, + {"dollar", "/var/lib/artifact$store", "$"}, + {"percent", "/var/lib/artifact%store", "%"}, + } + + for _, tc := range invalidChars { + g.By(fmt.Sprintf("Testing path with invalid character: %s", tc.char)) + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("artifact-invalid-char-%s-test", tc.name), + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath(tc.path)}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + if err != nil { + framework.Logf("Path with '%s' correctly rejected: %v", tc.char, err) + } else { + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + framework.Logf("Warning: Path with '%s' accepted at API level", tc.char) + } + } + framework.Logf("Test completed: Invalid character validation checked") + }) + + // TC6: Path too long (>256 bytes) + g.It("should reject additionalArtifactStores path exceeding 256 characters [TC6]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + longPath := "/" + strings.Repeat("a", 256) + g.By(fmt.Sprintf("Creating ContainerRuntimeConfig with path of %d characters", len(longPath))) + + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "artifact-long-path-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath(longPath)}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + if err != nil { + o.Expect(err.Error()).To(o.Or(o.ContainSubstring("256"), o.ContainSubstring("long"))) + framework.Logf("Test PASSED: Long path correctly rejected: %v", err) + } else { + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + framework.Logf("Warning: Long path accepted at API level") + } + }) + + // TC7: Path contains consecutive forward slashes + g.It("should reject additionalArtifactStores path with consecutive forward slashes [TC7]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with consecutive forward slashes") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "artifact-consecutive-slashes-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath("/var/lib//artifacts")}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + if err != nil { + o.Expect(err.Error()).To(o.ContainSubstring("consecutive")) + framework.Logf("Test PASSED: Consecutive slashes correctly rejected: %v", err) + } else { + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + framework.Logf("Warning: Consecutive slashes accepted at API level") + } + }) + + // TC8: Single artifact store creation (P1 Basic) + g.It("should successfully create ContainerRuntimeConfig with single additionalArtifactStore [TC8]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with single artifact store") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "artifact-single-store-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath("/var/lib/artifact-single")}, + }, + }, + }, + } + + created, err := mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + + o.Expect(created.Name).To(o.Equal(ctrcfg.Name)) + o.Expect(created.Spec.ContainerRuntimeConfig.AdditionalArtifactStores).To(o.HaveLen(1)) + + framework.Logf("Test PASSED: Single artifact store created successfully") + }) + + // TC9: Same path across store types (P2) + g.It("should accept same path across different storage types [TC9]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with same path for layer, image, and artifact stores") + sharedPath := "/mnt/shared-storage" + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "artifact-same-path-cross-type-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath(sharedPath)}, + }, + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath(sharedPath)}, + }, + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath(sharedPath)}, + }, + }, + }, + } + + created, err := mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + + o.Expect(string(created.Spec.ContainerRuntimeConfig.AdditionalLayerStores[0].Path)).To(o.Equal(sharedPath)) + o.Expect(string(created.Spec.ContainerRuntimeConfig.AdditionalImageStores[0].Path)).To(o.Equal(sharedPath)) + o.Expect(string(created.Spec.ContainerRuntimeConfig.AdditionalArtifactStores[0].Path)).To(o.Equal(sharedPath)) + + framework.Logf("Test PASSED: Same path accepted across different storage types") + }) +}) + +// Disruptive E2E tests - must run serially +var _ = g.Describe("[Jira:Node/CRI-O][sig-node][Feature:AdditionalStorageSupport][Serial][Disruptive][Suite:openshift/disruptive-longrunning] Additional Artifact Stores E2E", func() { + defer g.GinkgoRecover() + + var oc = exutil.NewCLI("additional-artifact-stores") + + g.BeforeEach(func(ctx context.Context) { + g.By("Checking TechPreviewNoUpgrade feature set is enabled") + featureGate, err := oc.AdminConfigClient().ConfigV1().FeatureGates().Get(ctx, "cluster", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + if featureGate.Spec.FeatureSet != configv1.TechPreviewNoUpgrade { + g.Skip("Skipping test: TechPreviewNoUpgrade feature set is required for additionalArtifactStores") + } + + infra, err := oc.AdminConfigClient().ConfigV1().Infrastructures().Get(ctx, "cluster", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + if infra.Status.PlatformStatus != nil && infra.Status.PlatformStatus.Type == configv1.AzurePlatformType { + g.Skip("Skipping test on Microsoft Azure cluster") + } + }) + + // TC10: Comprehensive E2E test - Configure and Verify storage.conf + g.It("should configure additionalArtifactStores and generate correct CRI-O config [TC10]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + workerNodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker") + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(len(workerNodes)).To(o.BeNumerically(">", 0)) + pureWorkers := getPureWorkerNodes(workerNodes) + if len(pureWorkers) < 1 { + e2eskipper.Skipf("Need at least 1 worker node for this test") + } + + // PHASE 1: Setup - Create shared directory on worker nodes + g.By("PHASE 1: Creating shared artifact directory on worker nodes") + artifactDirs := []string{additionalArtifactStorePath} + err = createDirectoriesOnNodes(oc, pureWorkers, artifactDirs) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupDirectoriesOnNodes(oc, pureWorkers, artifactDirs) + + // PHASE 2: Create ContainerRuntimeConfig and verify MCO processing + g.By("PHASE 2: Creating ContainerRuntimeConfig with additionalArtifactStores") + ctrcfg := createAdditionalArtifactStoresCTRCfg(additionalArtifactStoreTestName, additionalArtifactStorePath) + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + framework.Logf("ContainerRuntimeConfig %s created", ctrcfg.Name) + + g.By("Verifying ContainerRuntimeConfig resource created") + createdCfg, err := mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Get(ctx, ctrcfg.Name, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(createdCfg.Name).To(o.Equal(ctrcfg.Name)) + o.Expect(createdCfg.Spec.ContainerRuntimeConfig.AdditionalArtifactStores).To(o.HaveLen(1)) + framework.Logf("ContainerRuntimeConfig resource verified") + + g.By("Waiting for ContainerRuntimeConfig to be processed by MCO") + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("ContainerRuntimeConfig processed by MCO") + + g.By("Verifying MachineConfig generated from ContainerRuntimeConfig") + mcList, err := mcClient.MachineconfigurationV1().MachineConfigs().List(ctx, metav1.ListOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + foundCtrcfgMC := false + for _, mc := range mcList.Items { + if strings.Contains(mc.Name, "containerruntime") || strings.Contains(mc.Name, "ctrcfg") { + framework.Logf("Found generated MachineConfig: %s", mc.Name) + foundCtrcfgMC = true + break + } + } + o.Expect(foundCtrcfgMC).To(o.BeTrue(), "Should find MachineConfig generated from ContainerRuntimeConfig") + + g.By("Waiting for MachineConfigPool to start updating") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MachineConfigPool started updating") + + g.By("Waiting for MachineConfigPool rollout to complete") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MachineConfigPool rollout completed") + + // PHASE 3: Verify CRI-O config on all nodes + g.By("PHASE 3: Verifying CRI-O config contains additionalArtifactStores on all worker nodes") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/crio/crio.conf.d/01-ctrcfg-additionalArtifactStores") + o.Expect(err).NotTo(o.HaveOccurred()) + + // Verify exact format: additional_artifact_stores = ["/var/lib/additional-artifacts"] + expectedConfig := fmt.Sprintf(`additional_artifact_stores = ["%s"]`, additionalArtifactStorePath) + o.Expect(output).To(o.ContainSubstring(expectedConfig), + "CRI-O config should contain '%s' on node %s", expectedConfig, node.Name) + framework.Logf("Node %s: CRI-O config verified with additionalArtifactStores = [\"%s\"]", node.Name, additionalArtifactStorePath) + } + + g.By("Verifying CRI-O is running with new configuration") + for _, node := range pureWorkers { + crioStatus, err := ExecOnNodeWithChroot(oc, node.Name, "systemctl", "is-active", "crio") + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(strings.TrimSpace(crioStatus)).To(o.Equal("active"), + "CRI-O should be active on node %s", node.Name) + framework.Logf("Node %s: CRI-O is active", node.Name) + } + + g.By("Verifying nodes are Ready") + for _, node := range pureWorkers { + nodeObj, err := oc.AdminKubeClient().CoreV1().Nodes().Get(ctx, node.Name, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(isNodeInReadyState(nodeObj)).To(o.BeTrue(), + "Node %s should be Ready", node.Name) + framework.Logf("Node %s: Ready", node.Name) + } + + // ===================================================================== + // PHASE 5: Delete ContainerRuntimeConfig and verify cleanup + // ===================================================================== + g.By("PHASE 5: Deleting ContainerRuntimeConfig") + err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Delete(ctx, ctrcfg.Name, metav1.DeleteOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("ContainerRuntimeConfig %s deleted", ctrcfg.Name) + + g.By("Waiting for MachineConfigPool to start updating after deletion") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MachineConfigPool started updating after CRC deletion") + + g.By("Waiting for MachineConfigPool rollout to complete after deletion") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MachineConfigPool rollout completed after deletion") + + // ===================================================================== + // PHASE 6: Verify CRI-O config cleanup + // ===================================================================== + g.By("PHASE 6: Verifying CRI-O config file is removed after CRC deletion") + for _, node := range pureWorkers { + _, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/crio/crio.conf.d/01-ctrcfg-additionalArtifactStores") + o.Expect(err).To(o.HaveOccurred(), + "CRI-O config file should be removed after ContainerRuntimeConfig deletion on node %s", node.Name) + framework.Logf("Node %s: CRI-O config file removed successfully", node.Name) + } + + // Final Summary + framework.Logf("========================================") + framework.Logf("TEST RESULTS SUMMARY") + framework.Logf("========================================") + framework.Logf("Phase 1: Directory creation - PASSED") + framework.Logf("Phase 2: ContainerRuntimeConfig creation - PASSED") + framework.Logf("Phase 3: CRI-O config verification - PASSED") + framework.Logf("Phase 4: CRI-O and node status - PASSED") + framework.Logf("Phase 5: ContainerRuntimeConfig deletion - PASSED") + framework.Logf("Phase 6: CRI-O config cleanup - PASSED") + framework.Logf("========================================") + framework.Logf("Test PASSED: Comprehensive additionalArtifactStores E2E lifecycle verification complete") + }) + + // TC11: Update Existing Configuration + g.It("should update additionalArtifactStores when ContainerRuntimeConfig is modified [TC11]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + workerNodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker") + o.Expect(err).NotTo(o.HaveOccurred()) + pureWorkers := getPureWorkerNodes(workerNodes) + + g.By("Creating shared artifact directories on worker nodes") + artifactDirs := []string{"/var/lib/artifactstore-1", "/var/lib/artifactstore-2", "/var/lib/artifactstore-3"} + err = createDirectoriesOnNodes(oc, pureWorkers, artifactDirs) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupDirectoriesOnNodes(oc, pureWorkers, artifactDirs) + + g.By("Creating initial ContainerRuntimeConfig with one artifact store") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "artifact-update-test-ctrcfg", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath("/var/lib/artifactstore-1")}, + }, + }, + }, + } + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Verifying initial configuration in CRI-O config") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/crio/crio.conf.d/01-ctrcfg-additionalArtifactStores") + o.Expect(err).NotTo(o.HaveOccurred()) + expectedConfig := `additional_artifact_stores = ["/var/lib/artifactstore-1"]` + o.Expect(output).To(o.ContainSubstring(expectedConfig), + "CRI-O config should contain '%s' on node %s", expectedConfig, node.Name) + } + + g.By("Updating ContainerRuntimeConfig to add second artifact store") + currentCfg, err := mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Get(ctx, ctrcfg.Name, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + currentCfg.Spec.ContainerRuntimeConfig.AdditionalArtifactStores = []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath("/var/lib/artifactstore-1")}, + {Path: machineconfigv1.StorePath("/var/lib/artifactstore-2")}, + } + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Update(ctx, currentCfg, metav1.UpdateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Verifying updated configuration includes both stores in CRI-O config") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/crio/crio.conf.d/01-ctrcfg-additionalArtifactStores") + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(output).To(o.ContainSubstring("\"/var/lib/artifactstore-1\""), + "CRI-O config should contain /var/lib/artifactstore-1 on node %s", node.Name) + o.Expect(output).To(o.ContainSubstring("\"/var/lib/artifactstore-2\""), + "CRI-O config should contain /var/lib/artifactstore-2 on node %s", node.Name) + framework.Logf("Node %s: Both artifact stores configured after update", node.Name) + } + + framework.Logf("Test PASSED: ContainerRuntimeConfig update applied successfully") + }) + + // TC12: Multiple Storage Paths + g.It("should configure multiple additionalArtifactStores paths [TC12]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + workerNodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker") + o.Expect(err).NotTo(o.HaveOccurred()) + pureWorkers := getPureWorkerNodes(workerNodes) + + g.By("Creating multiple shared artifact directories on worker nodes") + artifactDirs := []string{"/var/lib/artifactstore-1", "/var/lib/artifactstore-2", "/var/lib/artifactstore-3"} + err = createDirectoriesOnNodes(oc, pureWorkers, artifactDirs) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupDirectoriesOnNodes(oc, pureWorkers, artifactDirs) + + g.By("Creating ContainerRuntimeConfig with multiple artifact stores") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "multi-artifactstore-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath("/var/lib/artifactstore-1")}, + {Path: machineconfigv1.StorePath("/var/lib/artifactstore-2")}, + {Path: machineconfigv1.StorePath("/var/lib/artifactstore-3")}, + }, + }, + }, + } + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Verifying all artifact stores configured in CRI-O config on nodes") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/crio/crio.conf.d/01-ctrcfg-additionalArtifactStores") + o.Expect(err).NotTo(o.HaveOccurred()) + + // Verify all 3 paths are in the array + o.Expect(output).To(o.ContainSubstring("\"/var/lib/artifactstore-1\""), + "CRI-O config should contain /var/lib/artifactstore-1 on node %s", node.Name) + o.Expect(output).To(o.ContainSubstring("\"/var/lib/artifactstore-2\""), + "CRI-O config should contain /var/lib/artifactstore-2 on node %s", node.Name) + o.Expect(output).To(o.ContainSubstring("\"/var/lib/artifactstore-3\""), + "CRI-O config should contain /var/lib/artifactstore-3 on node %s", node.Name) + framework.Logf("Node %s: All 3 artifact stores configured", node.Name) + } + + framework.Logf("Test PASSED: Multiple additionalArtifactStores configured successfully") + }) + +}) + +func createAdditionalArtifactStoresCTRCfg(testName, storePath string) *machineconfigv1.ContainerRuntimeConfig { + return &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: testName, + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath(storePath)}, + }, + }, + }, + } +} diff --git a/test/extended/node/additional_image_stores.go b/test/extended/node/additional_image_stores.go new file mode 100644 index 000000000000..06ff8106012f --- /dev/null +++ b/test/extended/node/additional_image_stores.go @@ -0,0 +1,865 @@ +package node + +import ( + "context" + "fmt" + "strings" + "time" + + g "github.com/onsi/ginkgo/v2" + o "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/kubernetes/test/e2e/framework" + e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" + + configv1 "github.com/openshift/api/config/v1" + machineconfigv1 "github.com/openshift/api/machineconfiguration/v1" + mcclient "github.com/openshift/client-go/machineconfiguration/clientset/versioned" + exutil "github.com/openshift/origin/test/extended/util" +) + +const ( + additionalImageStorePath = "/var/lib/additional-images" + additionalImageStoreTestName = "additional-imagestore-test" + testImageDefault = "quay.io/bgudi/test-image-6gb:v1.0" + maxImageStoresCount = 10 +) + +// Non-disruptive API validation tests - can run in parallel +var _ = g.Describe("[Jira:Node/CRI-O][sig-node][Feature:AdditionalStorageSupport][Suite:openshift/disruptive-longrunning] Additional Image Stores API Validation", func() { + defer g.GinkgoRecover() + + var oc = exutil.NewCLI("additional-image-stores-api") + + g.BeforeEach(func(ctx context.Context) { + g.By("Checking TechPreviewNoUpgrade feature set is enabled") + featureGate, err := oc.AdminConfigClient().ConfigV1().FeatureGates().Get(ctx, "cluster", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + if featureGate.Spec.FeatureSet != configv1.TechPreviewNoUpgrade { + g.Skip("Skipping test: TechPreviewNoUpgrade feature set is required for additionalImageStores") + } + }) + + // TC1: Validate Path Format Restrictions + g.It("should reject invalid path formats for additionalImageStores [TC1]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + invalidPaths := []struct { + name string + path string + description string + }{ + {"relative-path", "relative/path", "relative path without leading slash"}, + {"empty-path", "", "empty path"}, + } + + for _, tc := range invalidPaths { + g.By(fmt.Sprintf("Testing invalid path: %s (%s)", tc.path, tc.description)) + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("invalid-path-test-%s", tc.name), + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath(tc.path)}, + }, + }, + }, + } + + _, err := mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + if err != nil { + framework.Logf("Path '%s' correctly rejected at API level: %v", tc.path, err) + } else { + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + framework.Logf("Path '%s' accepted at API level, checking MCO validation", tc.path) + } + } + + framework.Logf("Test PASSED: Invalid path formats handled correctly") + }) + + // TC2: Validate Count Limits (max 10 image stores) + g.It("should reject more than 10 additionalImageStores [TC2]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with 11 image stores (exceeds max of 10)") + imageStores := make([]machineconfigv1.AdditionalImageStore, 11) + for i := 0; i < 11; i++ { + imageStores[i] = machineconfigv1.AdditionalImageStore{Path: machineconfigv1.StorePath(fmt.Sprintf("/mnt/imagestore-%d", i))} + } + + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "exceed-limit-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalImageStores: imageStores, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + if err != nil { + o.Expect(err.Error()).To(o.ContainSubstring("must have at most 10 items"), "Error should mention maximum limit of 10 items") + framework.Logf("Test PASSED: 11 image stores correctly rejected: %v", err) + } else { + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + framework.Logf("Warning: 11 image stores accepted at API level, checking MCO status") + + err = wait.PollUntilContextTimeout(ctx, 5*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { + cfg, err := mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Get(ctx, ctrcfg.Name, metav1.GetOptions{}) + if err != nil { + return false, err + } + for _, condition := range cfg.Status.Conditions { + if condition.Type == machineconfigv1.ContainerRuntimeConfigFailure && + condition.Status == corev1.ConditionTrue { + framework.Logf("MCO rejected config: %s", condition.Message) + return true, nil + } + } + return cfg.Status.ObservedGeneration == cfg.Generation, nil + }) + } + }) + + // TC3: Validate Path Uniqueness Within Store Type + g.It("should reject duplicate paths in additionalImageStores [TC3]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with duplicate paths") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "duplicate-path-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath("/mnt/shared-images")}, + {Path: machineconfigv1.StorePath("/mnt/shared-images")}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + if err != nil { + framework.Logf("Duplicate paths correctly rejected at API level: %v", err) + } else { + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + framework.Logf("Duplicate paths accepted at API level, checking MCO validation") + } + + framework.Logf("Test completed: Duplicate path validation checked") + }) + + // TC4: Path contains spaces + g.It("should reject additionalImageStores path containing spaces [TC4]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with path containing spaces") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "imagestore-path-spaces-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath("/var/lib/image store")}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).To(o.HaveOccurred()) + o.Expect(err.Error()).To(o.ContainSubstring("alphanumeric")) + framework.Logf("Test PASSED: Path with spaces correctly rejected: %v", err) + }) + + // TC5: Path contains invalid characters + g.It("should reject additionalImageStores path containing invalid characters [TC5]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + invalidChars := []struct { + name string + path string + char string + }{ + {"at-symbol", "/var/lib/image@store", "@"}, + {"exclamation", "/var/lib/image!store", "!"}, + {"hash", "/var/lib/image#store", "#"}, + {"dollar", "/var/lib/image$store", "$"}, + {"percent", "/var/lib/image%store", "%"}, + } + + for _, tc := range invalidChars { + g.By(fmt.Sprintf("Testing path with invalid character: %s", tc.char)) + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("imagestore-invalid-char-%s-test", tc.name), + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath(tc.path)}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + if err != nil { + framework.Logf("Path with '%s' correctly rejected: %v", tc.char, err) + } else { + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + framework.Logf("Warning: Path with '%s' accepted at API level", tc.char) + } + } + framework.Logf("Test completed: Invalid character validation checked") + }) + + // TC6: Path too long (>256 bytes) + g.It("should reject additionalImageStores path exceeding 256 characters [TC6]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + longPath := "/" + strings.Repeat("a", 256) + g.By(fmt.Sprintf("Creating ContainerRuntimeConfig with path of %d characters", len(longPath))) + + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "imagestore-long-path-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath(longPath)}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + if err != nil { + o.Expect(err.Error()).To(o.Or(o.ContainSubstring("256"), o.ContainSubstring("long"))) + framework.Logf("Test PASSED: Long path correctly rejected: %v", err) + } else { + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + framework.Logf("Warning: Long path accepted at API level") + } + }) + + // TC7: Path contains consecutive forward slashes + g.It("should reject additionalImageStores path with consecutive forward slashes [TC7]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with consecutive forward slashes") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "imagestore-consecutive-slashes-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath("/var/lib//images")}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + if err != nil { + o.Expect(err.Error()).To(o.ContainSubstring("consecutive")) + framework.Logf("Test PASSED: Consecutive slashes correctly rejected: %v", err) + } else { + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + framework.Logf("Warning: Consecutive slashes accepted at API level") + } + }) + + // TC8: Single image store creation (P1 Basic) + g.It("should successfully create ContainerRuntimeConfig with single additionalImageStore [TC8]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with single image store") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "imagestore-single-store-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath("/var/lib/imagestore-single")}, + }, + }, + }, + } + + created, err := mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + + g.By("Verifying resource created successfully") + o.Expect(created.Name).To(o.Equal(ctrcfg.Name)) + o.Expect(created.Spec.ContainerRuntimeConfig.AdditionalImageStores).To(o.HaveLen(1)) + + framework.Logf("Test PASSED: Single image store created successfully") + }) + + // TC9: Same path across store types (P2) + g.It("should accept same path across different storage types [TC9]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with same path for layer, image, and artifact stores") + sharedPath := "/mnt/shared-storage" + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "imagestore-same-path-cross-type-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath(sharedPath)}, + }, + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath(sharedPath)}, + }, + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath(sharedPath)}, + }, + }, + }, + } + + created, err := mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + + g.By("Verifying same path accepted across different store types") + o.Expect(string(created.Spec.ContainerRuntimeConfig.AdditionalLayerStores[0].Path)).To(o.Equal(sharedPath)) + o.Expect(string(created.Spec.ContainerRuntimeConfig.AdditionalImageStores[0].Path)).To(o.Equal(sharedPath)) + o.Expect(string(created.Spec.ContainerRuntimeConfig.AdditionalArtifactStores[0].Path)).To(o.Equal(sharedPath)) + + framework.Logf("Test PASSED: Same path accepted across different storage types") + }) +}) + +// Disruptive E2E tests - must run serially +var _ = g.Describe("[Jira:Node/CRI-O][sig-node][Feature:AdditionalStorageSupport][Serial][Disruptive][Suite:openshift/disruptive-longrunning] Additional Image Stores E2E", func() { + defer g.GinkgoRecover() + + var oc = exutil.NewCLI("additional-image-stores") + + g.BeforeEach(func(ctx context.Context) { + g.By("Checking TechPreviewNoUpgrade feature set is enabled") + featureGate, err := oc.AdminConfigClient().ConfigV1().FeatureGates().Get(ctx, "cluster", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + if featureGate.Spec.FeatureSet != configv1.TechPreviewNoUpgrade { + g.Skip("Skipping test: TechPreviewNoUpgrade feature set is required for additionalImageStores") + } + + infra, err := oc.AdminConfigClient().ConfigV1().Infrastructures().Get(ctx, "cluster", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + if infra.Status.PlatformStatus != nil && infra.Status.PlatformStatus.Type == configv1.AzurePlatformType { + g.Skip("Skipping test on Microsoft Azure cluster") + } + }) + + // TC10: Comprehensive E2E test - Configure, Verify storage.conf, and Verify Pod Deployment + g.It("should perform complete E2E lifecycle test with prepopulated images and fallback validation [TC10]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + workerNodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker") + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(len(workerNodes)).To(o.BeNumerically(">", 0)) + pureWorkers := getPureWorkerNodes(workerNodes) + if len(pureWorkers) < 1 { + e2eskipper.Skipf("Need at least 1 worker node for this test") + } + testNode := pureWorkers[0].Name + + // ===================================================================== + // PHASE 1: Setup - Create directory on worker nodes + // ===================================================================== + g.By("PHASE 1: Creating directory on worker nodes") + imageDirs := []string{additionalImageStorePath} + err = createDirectoriesOnNodes(oc, pureWorkers, imageDirs) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupDirectoriesOnNodes(oc, pureWorkers, imageDirs) + framework.Logf("Directory %s created on all workers", additionalImageStorePath) + + // ===================================================================== + // PHASE 2: Create ContainerRuntimeConfig and verify MCO processing + // ===================================================================== + g.By("PHASE 2: Creating ContainerRuntimeConfig with additionalImageStores") + ctrcfg := createAdditionalImageStoresCTRCfg(additionalImageStoreTestName, additionalImageStorePath) + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + framework.Logf("ContainerRuntimeConfig %s created", ctrcfg.Name) + + g.By("Verifying ContainerRuntimeConfig resource created") + createdCfg, err := mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Get(ctx, ctrcfg.Name, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(createdCfg.Name).To(o.Equal(ctrcfg.Name)) + o.Expect(createdCfg.Spec.ContainerRuntimeConfig.AdditionalImageStores).To(o.HaveLen(1)) + framework.Logf("ContainerRuntimeConfig resource verified") + + g.By("Waiting for ContainerRuntimeConfig to be processed by MCO") + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("ContainerRuntimeConfig processed by MCO") + + g.By("Verifying MachineConfig generated from ContainerRuntimeConfig") + mcList, err := mcClient.MachineconfigurationV1().MachineConfigs().List(ctx, metav1.ListOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + foundCtrcfgMC := false + var generatedMCName string + for _, mc := range mcList.Items { + if strings.Contains(mc.Name, "containerruntime") || strings.Contains(mc.Name, "ctrcfg") { + framework.Logf("Found generated MachineConfig: %s", mc.Name) + generatedMCName = mc.Name + foundCtrcfgMC = true + } + } + o.Expect(foundCtrcfgMC).To(o.BeTrue(), "Should find MachineConfig generated from ContainerRuntimeConfig") + + g.By("Waiting for MachineConfigPool to start updating") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MachineConfigPool started updating") + + g.By("Waiting for MachineConfigPool rollout to complete") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MachineConfigPool rollout completed") + + // ===================================================================== + // PHASE 3: Verify storage.conf on all nodes + // ===================================================================== + g.By("PHASE 3: Verifying storage.conf contains additionalImageStores on all worker nodes") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/containers/storage.conf") + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(output).To(o.ContainSubstring("additionalimagestores"), + "storage.conf should contain additionalimagestores on node %s", node.Name) + o.Expect(output).To(o.ContainSubstring(additionalImageStorePath), + "storage.conf should contain path %s on node %s", additionalImageStorePath, node.Name) + framework.Logf("Node %s: storage.conf verified with additionalImageStores", node.Name) + } + + g.By("Verifying CRI-O is running with new configuration") + for _, node := range pureWorkers { + crioStatus, err := ExecOnNodeWithChroot(oc, node.Name, "systemctl", "is-active", "crio") + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(strings.TrimSpace(crioStatus)).To(o.Equal("active"), + "CRI-O should be active on node %s", node.Name) + framework.Logf("Node %s: CRI-O is active", node.Name) + } + + g.By("Verifying nodes are Ready") + for _, node := range pureWorkers { + nodeObj, err := oc.AdminKubeClient().CoreV1().Nodes().Get(ctx, node.Name, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(isNodeInReadyState(nodeObj)).To(o.BeTrue(), + "Node %s should be Ready", node.Name) + framework.Logf("Node %s: Ready", node.Name) + } + + // ===================================================================== + // PHASE 4: Test pre-populated image functionality + // ===================================================================== + g.By("PHASE 4: Pre-populating image in shared storage") + err = prepopulateImageOnNode(ctx, oc, testNode, testImageDefault, additionalImageStorePath) + o.Expect(err).NotTo(o.HaveOccurred(), "Failed to prepopulate image - this is required for TC10 to validate additionalImageStores functionality") + + g.By("Deploying test pod using pre-populated image") + testPod := createTestPod("imagestore-test-pod", testImageDefault, testNode) + startTime := time.Now() + _, err = oc.AdminKubeClient().CoreV1().Pods(oc.Namespace()).Create(ctx, testPod, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer oc.AdminKubeClient().CoreV1().Pods(oc.Namespace()).Delete(ctx, testPod.Name, metav1.DeleteOptions{}) + + g.By("Waiting for pod to be running") + err = waitForPodRunning(ctx, oc, testPod.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + podStartupTime := time.Since(startTime) + framework.Logf("Pod started in %v", podStartupTime) + + g.By("Verifying pod events and image pull behavior") + events, err := oc.AdminKubeClient().CoreV1().Events(oc.Namespace()).List(ctx, metav1.ListOptions{ + FieldSelector: fmt.Sprintf("involvedObject.name=%s", testPod.Name), + }) + o.Expect(err).NotTo(o.HaveOccurred()) + + var foundAlreadyPresentEvent bool + for _, event := range events.Items { + if event.Reason == "Pulled" { + framework.Logf("Image pulled event: %s", event.Message) + // Check if the event indicates image was already present on machine + // This is the expected message when image is loaded from additionalImageStores + if strings.Contains(event.Message, "already present on machine and can be accessed by the pod") { + foundAlreadyPresentEvent = true + framework.Logf("SUCCESS: Image was loaded from additionalImageStore - event message: %s", event.Message) + break + } + } + } + + // Validation: Verify image was loaded from additional storage + // Expected event message: "Container image ... already present on machine and can be accessed by the pod" + o.Expect(foundAlreadyPresentEvent).To(o.BeTrue(), + "Image should have been loaded from additionalImageStore (%s). "+ + "Expected event message containing 'already present on machine and can be accessed by the pod' but did not find it. "+ + "This indicates the image was not pre-populated correctly or not loaded from additional storage.", additionalImageStorePath) + framework.Logf("Verified: Image was loaded from additional storage at %s", additionalImageStorePath) + + // ===================================================================== + // PHASE 5: Test fallback behavior when image is not in additional storage + // ===================================================================== + g.By("PHASE 5: Testing fallback to registry when image not in additional storage") + + g.By("Deleting first pod") + err = oc.AdminKubeClient().CoreV1().Pods(oc.Namespace()).Delete(ctx, testPod.Name, metav1.DeleteOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("First pod deleted") + + g.By("Removing image from additional storage to test fallback") + removeCmd := fmt.Sprintf("podman --root %s rmi %s", additionalImageStorePath, testImageDefault) + removeOutput, err := ExecOnNodeWithChroot(oc, testNode, "sh", "-c", removeCmd) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("Image removed from additional storage: %s", removeOutput) + + g.By("Creating second pod to test fallback to registry") + testPod2 := createTestPod("imagestore-fallback-pod", testImageDefault, testNode) + startTime2 := time.Now() + _, err = oc.AdminKubeClient().CoreV1().Pods(oc.Namespace()).Create(ctx, testPod2, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer oc.AdminKubeClient().CoreV1().Pods(oc.Namespace()).Delete(ctx, testPod2.Name, metav1.DeleteOptions{}) + + g.By("Waiting for second pod to be running") + err = waitForPodRunning(ctx, oc, testPod2.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + pod2StartupTime := time.Since(startTime2) + framework.Logf("Second pod started in %v", pod2StartupTime) + + g.By("Verifying second pod pulled from registry (fallback behavior)") + events2, err := oc.AdminKubeClient().CoreV1().Events(oc.Namespace()).List(ctx, metav1.ListOptions{ + FieldSelector: fmt.Sprintf("involvedObject.name=%s", testPod2.Name), + }) + o.Expect(err).NotTo(o.HaveOccurred()) + + var foundSuccessfullyPulledEvent bool + for _, event := range events2.Items { + if event.Reason == "Pulled" { + framework.Logf("Second pod pull event: %s", event.Message) + // Should see "Successfully pulled" since image is not in additional storage + if strings.Contains(event.Message, "Successfully pulled") { + foundSuccessfullyPulledEvent = true + framework.Logf("SUCCESS: Image was pulled from registry (fallback) - event message: %s", event.Message) + break + } + } + } + + o.Expect(foundSuccessfullyPulledEvent).To(o.BeTrue(), + "Image should have been pulled from registry since it was removed from additionalImageStore. "+ + "Expected event message containing 'Successfully pulled' but did not find it.") + framework.Logf("Verified: Fallback to registry works when image not in additional storage") + + g.By("Verifying performance improvement with additionalImageStores") + framework.Logf("Performance comparison:") + framework.Logf(" Pod 1 (prepopulated from additionalImageStore): %v", podStartupTime) + framework.Logf(" Pod 2 (pulled from registry): %v", pod2StartupTime) + speedup := float64(pod2StartupTime) / float64(podStartupTime) + framework.Logf(" Speedup: %.2fx faster with additionalImageStores", speedup) + + // Verify that prepopulated image is significantly faster than registry pull + // For a 6GB image, prepopulated should be at least 2x faster + o.Expect(podStartupTime).To(o.BeNumerically("<", pod2StartupTime/2), + "Pod using prepopulated image from additionalImageStore should be significantly faster. "+ + "Expected pod1 (%v) to be at least 2x faster than pod2 (%v) pulling from registry.", + podStartupTime, pod2StartupTime) + framework.Logf("Performance improvement verified: Prepopulated image is %.2fx faster", speedup) + + g.By("Deleting second pod") + err = oc.AdminKubeClient().CoreV1().Pods(oc.Namespace()).Delete(ctx, testPod2.Name, metav1.DeleteOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("Second pod deleted") + + // ===================================================================== + // PHASE 6: Cleanup - Remove ContainerRuntimeConfig and verify removal + // ===================================================================== + g.By("PHASE 6: Removing ContainerRuntimeConfig and verifying cleanup") + + g.By("Deleting ContainerRuntimeConfig") + err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Delete(ctx, ctrcfg.Name, metav1.DeleteOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("ContainerRuntimeConfig deleted") + + g.By("Waiting for MachineConfigPool to start updating after deletion") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MCP started updating after deletion") + + g.By("Waiting for MachineConfigPool rollout to complete after deletion") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MCP rollout completed after deletion") + + g.By("Verifying additionalImageStores removed from storage.conf") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/containers/storage.conf") + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(output).NotTo(o.ContainSubstring(additionalImageStorePath), + "storage.conf should not contain %s after ContainerRuntimeConfig deletion on node %s", + additionalImageStorePath, node.Name) + framework.Logf("Node %s: storage.conf verified - additionalImageStores removed", node.Name) + } + + // ===================================================================== + // PHASE 7: Final Summary + // ===================================================================== + framework.Logf("========================================") + framework.Logf("COMPREHENSIVE E2E TEST RESULTS SUMMARY") + framework.Logf("========================================") + framework.Logf("Phase 1: Directory creation - PASSED") + framework.Logf("Phase 2: ContainerRuntimeConfig creation - PASSED") + framework.Logf(" - ContainerRuntimeConfig: %s", ctrcfg.Name) + framework.Logf(" - Generated MachineConfig: %s", generatedMCName) + framework.Logf(" - MCP rollout: COMPLETED") + framework.Logf("Phase 3: storage.conf verification - PASSED") + framework.Logf(" - storage.conf updated: YES") + framework.Logf(" - CRI-O active: YES") + framework.Logf(" - All nodes Ready: YES") + framework.Logf("Phase 4: Prepopulated image test - PASSED") + framework.Logf(" - Pod startup time: %v", podStartupTime) + framework.Logf(" - Image source: ADDITIONAL STORAGE (verified by 'already present on machine' event)") + framework.Logf("Phase 5: Fallback to registry test - PASSED") + framework.Logf(" - Image removed from additional storage") + framework.Logf(" - Pod successfully pulled from registry (fallback verified)") + framework.Logf(" - Pod 2 startup time: %v", pod2StartupTime) + speedupFinal := float64(pod2StartupTime) / float64(podStartupTime) + framework.Logf(" - Performance improvement: %.2fx faster with additionalImageStores", speedupFinal) + framework.Logf("Phase 6: ContainerRuntimeConfig removal - PASSED") + framework.Logf(" - ContainerRuntimeConfig deleted") + framework.Logf(" - MCP rollout after deletion: COMPLETED") + framework.Logf(" - storage.conf cleanup: VERIFIED") + framework.Logf("========================================") + framework.Logf("Test PASSED: Comprehensive additionalImageStores E2E lifecycle verification complete") + }) + + // TC12: Update Existing Configuration + g.It("should update additionalImageStores when ContainerRuntimeConfig is modified [TC11]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + workerNodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker") + o.Expect(err).NotTo(o.HaveOccurred()) + pureWorkers := getPureWorkerNodes(workerNodes) + + g.By("Creating shared image directories on worker nodes") + imageDirs := []string{"/var/lib/imagestore-1", "/var/lib/imagestore-2", "/var/lib/imagestore-3"} + err = createDirectoriesOnNodes(oc, pureWorkers, imageDirs) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupDirectoriesOnNodes(oc, pureWorkers, imageDirs) + + g.By("Creating initial ContainerRuntimeConfig with one image store") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "update-test-ctrcfg", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath("/var/lib/imagestore-1")}, + }, + }, + }, + } + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool to start updating") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool rollout to complete") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Verifying initial configuration") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/containers/storage.conf") + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(output).To(o.ContainSubstring("/var/lib/imagestore-1")) + } + + g.By("Updating ContainerRuntimeConfig to add second image store") + currentCfg, err := mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Get(ctx, ctrcfg.Name, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + currentCfg.Spec.ContainerRuntimeConfig.AdditionalImageStores = []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath("/var/lib/imagestore-1")}, + {Path: machineconfigv1.StorePath("/var/lib/imagestore-2")}, + } + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Update(ctx, currentCfg, metav1.UpdateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool to start updating after modification") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool rollout to complete after modification") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Verifying updated configuration includes both stores") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/containers/storage.conf") + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(output).To(o.ContainSubstring("/var/lib/imagestore-1")) + o.Expect(output).To(o.ContainSubstring("/var/lib/imagestore-2")) + framework.Logf("Node %s: Both image stores configured after update", node.Name) + } + + framework.Logf("Test PASSED: ContainerRuntimeConfig update applied successfully") + }) + + // TC17: Multiple Storage Types Combined + g.It("should configure multiple additionalImageStores paths [TC12]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + workerNodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker") + o.Expect(err).NotTo(o.HaveOccurred()) + pureWorkers := getPureWorkerNodes(workerNodes) + + g.By("Creating multiple shared image directories on worker nodes") + imageDirs := []string{"/var/lib/imagestore-1", "/var/lib/imagestore-2", "/var/lib/imagestore-3"} + err = createDirectoriesOnNodes(oc, pureWorkers, imageDirs) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupDirectoriesOnNodes(oc, pureWorkers, imageDirs) + + g.By("Creating ContainerRuntimeConfig with multiple image stores") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "multi-imagestore-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath("/var/lib/imagestore-1")}, + {Path: machineconfigv1.StorePath("/var/lib/imagestore-2")}, + {Path: machineconfigv1.StorePath("/var/lib/imagestore-3")}, + }, + }, + }, + } + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool to start updating") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool rollout to complete") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Verifying all image stores configured on nodes") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/containers/storage.conf") + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(output).To(o.ContainSubstring("/var/lib/imagestore-1")) + o.Expect(output).To(o.ContainSubstring("/var/lib/imagestore-2")) + o.Expect(output).To(o.ContainSubstring("/var/lib/imagestore-3")) + framework.Logf("Node %s: All 3 image stores configured", node.Name) + } + + framework.Logf("Test PASSED: Multiple additionalImageStores configured successfully") + }) + +}) diff --git a/test/extended/node/additional_layer_stores.go b/test/extended/node/additional_layer_stores.go new file mode 100644 index 000000000000..dfe36468e871 --- /dev/null +++ b/test/extended/node/additional_layer_stores.go @@ -0,0 +1,1228 @@ +package node + +import ( + "context" + "fmt" + "strings" + "time" + + g "github.com/onsi/ginkgo/v2" + o "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/kubernetes/test/e2e/framework" + e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" + + configv1 "github.com/openshift/api/config/v1" + machineconfigv1 "github.com/openshift/api/machineconfiguration/v1" + mcclient "github.com/openshift/client-go/machineconfiguration/clientset/versioned" + exutil "github.com/openshift/origin/test/extended/util" +) + +const ( + additionalLayerStorePath = "/var/lib/additional-layers" + additionalLayerStoreTestName = "additional-layerstore-test" + maxLayerStoresCount = 5 +) + +// Non-disruptive API validation tests - can run in parallel +var _ = g.Describe("[Jira:Node/CRI-O][sig-node][Feature:AdditionalStorageSupport] Additional Layer Stores API Validation", func() { + defer g.GinkgoRecover() + + var oc = exutil.NewCLI("additional-layer-stores-api") + + g.BeforeEach(func(ctx context.Context) { + g.By("Checking TechPreviewNoUpgrade feature set is enabled") + featureGate, err := oc.AdminConfigClient().ConfigV1().FeatureGates().Get(ctx, "cluster", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + if featureGate.Spec.FeatureSet != configv1.TechPreviewNoUpgrade { + g.Skip("Skipping test: TechPreviewNoUpgrade feature set is required for additionalLayerStores") + } + }) + + // TC1: Should be able to create ContainerRuntimeConfig with multiple additionalLayerStores + g.It("should accept multiple valid additionalLayerStores paths [TC1]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with multiple valid layer store paths") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "layer-multi-path-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath("/var/lib/stargz-store")}, + {Path: machineconfigv1.StorePath("/mnt/nydus-store")}, + {Path: machineconfigv1.StorePath("/opt/layer_store-v1.0")}, + }, + }, + }, + } + + created, err := mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + + o.Expect(created.Spec.ContainerRuntimeConfig.AdditionalLayerStores).To(o.HaveLen(3)) + framework.Logf("Test PASSED: Multiple valid layer store paths accepted") + }) + + // TC2: Should fail if additionalLayerStores path is empty + // Note: Go API returns "Required value" while YAML returns "at least 1 chars long" + g.It("should reject empty path for additionalLayerStores [TC2]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with empty path") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "layer-empty-path-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath("")}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).To(o.HaveOccurred()) + framework.Logf("Expected substring: 'Required value' (Go API) or 'at least 1 chars long' (YAML)") + framework.Logf("Actual error: %v", err) + o.Expect(err.Error()).To(o.ContainSubstring("Required value")) + framework.Logf("Test PASSED: Empty path correctly rejected") + }) + + // TC3: Should fail if additionalLayerStores path is not absolute + g.It("should reject relative path for additionalLayerStores [TC3]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with relative path") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "layer-relative-path-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath("var/lib/stargz-store")}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).To(o.HaveOccurred()) + o.Expect(err.Error()).To(o.ContainSubstring("path must be absolute and contain only alphanumeric characters")) + framework.Logf("Test PASSED: Relative path correctly rejected: %v", err) + }) + + // TC4: Should fail if additionalLayerStores path contains spaces + g.It("should reject path with spaces for additionalLayerStores [TC4]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with path containing spaces") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "layer-path-spaces-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath("/var/lib/stargz store")}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).To(o.HaveOccurred()) + o.Expect(err.Error()).To(o.ContainSubstring("path must be absolute and contain only alphanumeric characters")) + framework.Logf("Test PASSED: Path with spaces correctly rejected: %v", err) + }) + + // TC5: Should fail if additionalLayerStores path contains invalid characters + g.It("should reject path with invalid characters for additionalLayerStores [TC5]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + invalidChars := []struct { + name string + path string + char string + }{ + {"at-symbol", "/var/lib/stargz@store", "@"}, + {"exclamation", "/var/lib/stargz!store", "!"}, + {"hash", "/var/lib/stargz#store", "#"}, + {"dollar", "/var/lib/stargz$store", "$"}, + {"percent", "/var/lib/stargz%store", "%"}, + } + + for _, tc := range invalidChars { + g.By(fmt.Sprintf("Testing path with invalid character: %s", tc.char)) + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("layer-invalid-char-%s-test", tc.name), + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath(tc.path)}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + if err != nil { + framework.Logf("Path with '%s' correctly rejected: %v", tc.char, err) + } else { + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + framework.Logf("Warning: Path with '%s' accepted at API level", tc.char) + } + } + framework.Logf("Test completed: Invalid character validation checked") + }) + + // TC6: Should fail if additionalLayerStores path is too long (>256 bytes) + g.It("should reject path exceeding 256 characters for additionalLayerStores [TC6]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + longPath := "/" + strings.Repeat("a", 256) + g.By(fmt.Sprintf("Creating ContainerRuntimeConfig with path of %d characters", len(longPath))) + + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "layer-long-path-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath(longPath)}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).To(o.HaveOccurred()) + o.Expect(err.Error()).To(o.Or(o.ContainSubstring("256"), o.ContainSubstring("Too long"))) + framework.Logf("Test PASSED: Long path correctly rejected: %v", err) + }) + + // TC7: Should fail if additionalLayerStores exceeds maximum of 5 items + g.It("should reject more than 5 additionalLayerStores [TC7]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with 6 layer stores (exceeds max of 5)") + layerStores := make([]machineconfigv1.AdditionalLayerStore, 6) + for i := 0; i < 6; i++ { + layerStores[i] = machineconfigv1.AdditionalLayerStore{Path: machineconfigv1.StorePath(fmt.Sprintf("/var/lib/store%d", i))} + } + + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "layer-exceed-limit-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: layerStores, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).To(o.HaveOccurred()) + o.Expect(err.Error()).To(o.ContainSubstring("must have at most 5 items")) + framework.Logf("Test PASSED: 6 layer stores correctly rejected: %v", err) + }) + + // TC8: Should fail if additionalLayerStores path contains consecutive forward slashes + g.It("should reject path with consecutive forward slashes for additionalLayerStores [TC8]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with consecutive forward slashes") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "layer-consecutive-slashes-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath("/var/lib//stargz-store")}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).To(o.HaveOccurred()) + o.Expect(err.Error()).To(o.ContainSubstring("consecutive")) + framework.Logf("Test PASSED: Consecutive slashes correctly rejected: %v", err) + }) + + // TC9: Should fail if additionalLayerStores contains duplicate paths + g.It("should reject duplicate paths in additionalLayerStores [TC9]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with duplicate paths") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "layer-duplicate-path-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath("/var/lib/stargz-store")}, + {Path: machineconfigv1.StorePath("/var/lib/stargz-store")}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).To(o.HaveOccurred()) + o.Expect(err.Error()).To(o.ContainSubstring("duplicate")) + framework.Logf("Test PASSED: Duplicate paths correctly rejected: %v", err) + }) +}) + +// Disruptive E2E tests - must run serially +var _ = g.Describe("[Jira:Node/CRI-O][sig-node][Feature:AdditionalStorageSupport][Serial][Disruptive] Additional Layer Stores E2E", func() { + defer g.GinkgoRecover() + + var oc = exutil.NewCLI("additional-layer-stores") + + g.BeforeEach(func(ctx context.Context) { + g.By("Checking TechPreviewNoUpgrade feature set is enabled") + featureGate, err := oc.AdminConfigClient().ConfigV1().FeatureGates().Get(ctx, "cluster", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + if featureGate.Spec.FeatureSet != configv1.TechPreviewNoUpgrade { + g.Skip("Skipping test: TechPreviewNoUpgrade feature set is required for additionalLayerStores") + } + + infra, err := oc.AdminConfigClient().ConfigV1().Infrastructures().Get(ctx, "cluster", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + if infra.Status.PlatformStatus != nil && infra.Status.PlatformStatus.Type == configv1.AzurePlatformType { + g.Skip("Skipping test on Microsoft Azure cluster") + } + }) + + // TC10: Comprehensive E2E test - Stargz-store setup, CRC, verification, lazy pulling, and cleanup + g.It("should configure additionalLayerStores with stargz-store and verify lazy pulling [TC10]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + workerNodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker") + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(len(workerNodes)).To(o.BeNumerically(">=", 1)) + pureWorkers := getPureWorkerNodes(workerNodes) + if len(pureWorkers) < 1 { + e2eskipper.Skipf("Need at least 1 worker node for this test") + } + testNode := pureWorkers[0] + testNamespace := oc.Namespace() + eStargzImage := "quay.io/bgudi/test-small:estargz" + + // ===================================================================== + // PHASE 1: Deploy stargz-store on worker nodes + // ===================================================================== + g.By("PHASE 1: Deploying stargz-store on worker nodes") + stargzSetup := NewStargzStoreSetup(oc) + err = stargzSetup.Deploy(ctx) + o.Expect(err).NotTo(o.HaveOccurred()) + defer func() { + if stargzSetup.IsDeployed() { + stargzSetup.Cleanup(ctx) + } + }() + framework.Logf("stargz-store deployed successfully") + + g.By("Verifying stargz-store service is active on all workers") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "systemctl", "is-active", "stargz-store") + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(strings.TrimSpace(output)).To(o.Equal("active"), + "stargz-store should be active on node %s", node.Name) + framework.Logf("Node %s: stargz-store service active", node.Name) + } + + // ===================================================================== + // PHASE 2: Create ContainerRuntimeConfig with stargz-store path + // ===================================================================== + g.By("PHASE 2: Creating ContainerRuntimeConfig with stargz-store path") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "stargz-comprehensive-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath(stargzSetup.GetStorePath())}, + }, + }, + }, + } + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("ContainerRuntimeConfig %s created with path: %s", ctrcfg.Name, stargzSetup.GetStorePath()) + + g.By("Waiting for ContainerRuntimeConfig to be processed by MCO") + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("ContainerRuntimeConfig processed by MCO") + + // ===================================================================== + // PHASE 3: Verify MCP rollout and nodes Ready + // ===================================================================== + g.By("PHASE 3: Waiting for MachineConfigPool to start updating") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MachineConfigPool started updating") + + g.By("Waiting for MachineConfigPool rollout to complete") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MachineConfigPool rollout completed") + + g.By("Verifying all nodes are Ready") + for _, node := range pureWorkers { + nodeObj, err := oc.AdminKubeClient().CoreV1().Nodes().Get(ctx, node.Name, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(isNodeInReadyState(nodeObj)).To(o.BeTrue(), + "Node %s should be Ready after MCP rollout", node.Name) + framework.Logf("Node %s: Ready", node.Name) + } + + // ===================================================================== + // PHASE 4: Verify storage.conf contains path with :ref suffix (MCO added) + // ===================================================================== + g.By("PHASE 4: Verifying storage.conf contains stargz-store path with :ref suffix") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/containers/storage.conf") + o.Expect(err).NotTo(o.HaveOccurred()) + + // MCO automatically appends :ref suffix to all additionalLayerStores paths + expectedPathWithRef := fmt.Sprintf("%s:ref", stargzSetup.GetStorePath()) + o.Expect(output).To(o.ContainSubstring("additionallayerstores"), + "storage.conf should contain additionallayerstores on node %s", node.Name) + o.Expect(output).To(o.ContainSubstring(expectedPathWithRef), + "storage.conf should contain %s with :ref suffix on node %s", stargzSetup.GetStorePath(), node.Name) + framework.Logf("Node %s: storage.conf verified with path %s (MCO added :ref)", node.Name, expectedPathWithRef) + } + + g.By("Verifying CRI-O is active with new configuration") + crioStatus, err := ExecOnNodeWithChroot(oc, testNode.Name, "systemctl", "is-active", "crio") + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(strings.TrimSpace(crioStatus)).To(o.Equal("active")) + framework.Logf("CRI-O is active") + + // ===================================================================== + // PHASE 5: Create first pod with eStargz image + // ===================================================================== + g.By("PHASE 5: Getting initial snapshot count in stargz-store") + initialSnapshots := getStargzSnapshotCount(oc, testNode.Name) + framework.Logf("Initial snapshot count: %d", initialSnapshots) + + g.By("Creating first pod with eStargz format image") + pod1Name := "stargz-test-pod-1" + pod1 := createTestPodSpec(pod1Name, testNamespace, eStargzImage, testNode.Name) + _, err = oc.AdminKubeClient().CoreV1().Pods(testNamespace).Create(ctx, pod1, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer deletePodAndWait(ctx, oc, testNamespace, pod1Name) + + g.By("Waiting for first pod to be running") + err = waitForPodRunning(ctx, oc, pod1Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("First pod %s is running", pod1Name) + + // ===================================================================== + // PHASE 6: Verify snapshot created in layer store path + // ===================================================================== + g.By("PHASE 6: Verifying snapshot is created in stargz-store") + time.Sleep(10 * time.Second) // Allow time for lazy pull and snapshot creation + + storeOutput, err := ExecOnNodeWithChroot(oc, testNode.Name, "ls", "-lRt", "/var/lib/stargz-store/store/") + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("stargz-store contents:\n%s", storeOutput) + + o.Expect(storeOutput).To(o.ContainSubstring("sha256:"), + "stargz-store should contain layer directories with sha256 digests") + + snapshotsAfterPod1 := getStargzSnapshotCount(oc, testNode.Name) + framework.Logf("Snapshot count after first pod: %d", snapshotsAfterPod1) + o.Expect(snapshotsAfterPod1).To(o.BeNumerically(">", initialSnapshots), + "Snapshots should be created after pulling eStargz image") + + // ===================================================================== + // PHASE 7: Create second pod with same image + // ===================================================================== + g.By("PHASE 7: Creating second pod with same eStargz image") + pod2Name := "stargz-test-pod-2" + pod2 := createTestPodSpec(pod2Name, testNamespace, eStargzImage, testNode.Name) + + startTime := time.Now() + _, err = oc.AdminKubeClient().CoreV1().Pods(testNamespace).Create(ctx, pod2, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer deletePodAndWait(ctx, oc, testNamespace, pod2Name) + + g.By("Waiting for second pod to be running") + err = waitForPodRunning(ctx, oc, pod2Name, 3*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + pullDuration := time.Since(startTime) + framework.Logf("Second pod %s started in %v (should be fast due to layer sharing)", pod2Name, pullDuration) + + // ===================================================================== + // PHASE 8: Verify second pod used existing snapshot (no new layers) + // ===================================================================== + g.By("PHASE 8: Verifying second pod used existing snapshot") + pod2Events, _ := oc.Run("describe").Args("pod", pod2Name).Output() + framework.Logf("Second pod events: %s", pod2Events) + + snapshotsAfterPod2 := getStargzSnapshotCount(oc, testNode.Name) + framework.Logf("Snapshot count after second pod: %d", snapshotsAfterPod2) + o.Expect(snapshotsAfterPod2).To(o.Equal(snapshotsAfterPod1), + "Snapshot count should remain same when using shared layers") + + // ===================================================================== + // PHASE 9: Verify through stargz-store and crio logs + // ===================================================================== + g.By("PHASE 9: Verifying through stargz-store logs") + stargzLogs, _ := ExecOnNodeWithChroot(oc, testNode.Name, "journalctl", "-u", "stargz-store", "--since", "5 minutes ago", "-n", "50") + framework.Logf("Recent stargz-store logs:\n%s", stargzLogs) + + g.By("Verifying through CRI-O logs") + crioLogs, _ := ExecOnNodeWithChroot(oc, testNode.Name, "journalctl", "-u", "crio", "--since", "5 minutes ago", "--grep", eStargzImage, "-n", "20") + framework.Logf("Recent CRI-O logs for image:\n%s", crioLogs) + + // ===================================================================== + // PHASE 10: Remove pods + // ===================================================================== + g.By("PHASE 10: Removing test pods") + err = oc.AdminKubeClient().CoreV1().Pods(testNamespace).Delete(ctx, pod1Name, metav1.DeleteOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + err = oc.AdminKubeClient().CoreV1().Pods(testNamespace).Delete(ctx, pod2Name, metav1.DeleteOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + time.Sleep(15 * time.Second) // Wait for pods to terminate + framework.Logf("Test pods removed") + + // ===================================================================== + // PHASE 11: Delete ContainerRuntimeConfig + // ===================================================================== + g.By("PHASE 11: Deleting ContainerRuntimeConfig") + err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Delete(ctx, ctrcfg.Name, metav1.DeleteOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("ContainerRuntimeConfig %s deleted", ctrcfg.Name) + + g.By("Waiting for MachineConfigPool to start updating after deletion") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MachineConfigPool started updating after CRC deletion") + + g.By("Waiting for MachineConfigPool rollout to complete after deletion") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MachineConfigPool rollout completed after deletion") + + // ===================================================================== + // PHASE 12: Verify storage.conf cleanup (path removed) + // ===================================================================== + g.By("PHASE 12: Verifying storage.conf cleanup after CRC deletion") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/containers/storage.conf") + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(output).NotTo(o.ContainSubstring(stargzSetup.GetStorePath()), + "storage.conf should not contain stargz-store path after ContainerRuntimeConfig deletion on node %s", + node.Name) + framework.Logf("Node %s: stargz-store path removed from storage.conf", node.Name) + } + + // Final Summary + framework.Logf("========================================") + framework.Logf("COMPREHENSIVE TEST RESULTS SUMMARY") + framework.Logf("========================================") + framework.Logf("1. stargz-store deployed: YES") + framework.Logf("2. stargz-store service active: YES") + framework.Logf("3. ContainerRuntimeConfig applied: YES") + framework.Logf("4. MCO/MCP rollout completed: YES") + framework.Logf("5. storage.conf updated with :ref: YES") + framework.Logf("6. CRI-O active: YES") + framework.Logf("7. All nodes Ready: YES") + framework.Logf("8. First pod with eStargz created: YES") + framework.Logf("9. Snapshots created: YES (count: %d -> %d)", initialSnapshots, snapshotsAfterPod1) + framework.Logf("10. Second pod layer sharing: VERIFIED (snapshot count unchanged)") + framework.Logf("11. stargz-store logs verified: YES") + framework.Logf("12. CRI-O logs verified: YES") + framework.Logf("13. Pods removed: YES") + framework.Logf("14. CRC deleted: YES") + framework.Logf("15. storage.conf cleanup: YES") + framework.Logf("========================================") + framework.Logf("Image: %s", eStargzImage) + framework.Logf("Test Node: %s", testNode.Name) + framework.Logf("========================================") + framework.Logf("Test PASSED: Comprehensive additionalLayerStores E2E verification complete") + }) + + // TC11: Update Existing Configuration + g.It("should update additionalLayerStores when ContainerRuntimeConfig is modified [TC11]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + workerNodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker") + o.Expect(err).NotTo(o.HaveOccurred()) + pureWorkers := getPureWorkerNodes(workerNodes) + + g.By("Creating shared layer directories on worker nodes") + layerDirs := []string{"/var/lib/layerstore-1", "/var/lib/layerstore-2", "/var/lib/layerstore-3"} + err = createDirectoriesOnNodes(oc, pureWorkers, layerDirs) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupDirectoriesOnNodes(oc, pureWorkers, layerDirs) + + g.By("Creating initial ContainerRuntimeConfig with one layer store") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "layer-update-test-ctrcfg", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath("/var/lib/layerstore-1")}, + }, + }, + }, + } + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool to start updating") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MachineConfigPool started updating") + + g.By("Waiting for MachineConfigPool rollout to complete") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Verifying initial configuration with :ref suffix") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/containers/storage.conf") + o.Expect(err).NotTo(o.HaveOccurred()) + // MCO automatically appends :ref suffix + o.Expect(output).To(o.ContainSubstring("/var/lib/layerstore-1:ref"), + "storage.conf should contain /var/lib/layerstore-1:ref on node %s", node.Name) + } + + g.By("Updating ContainerRuntimeConfig to add second layer store") + currentCfg, err := mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Get(ctx, ctrcfg.Name, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + currentCfg.Spec.ContainerRuntimeConfig.AdditionalLayerStores = []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath("/var/lib/layerstore-1")}, + {Path: machineconfigv1.StorePath("/var/lib/layerstore-2")}, + } + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Update(ctx, currentCfg, metav1.UpdateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool to start updating after update") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MachineConfigPool started updating") + + g.By("Waiting for MachineConfigPool rollout to complete after update") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Verifying updated configuration includes both stores with :ref suffix") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/containers/storage.conf") + o.Expect(err).NotTo(o.HaveOccurred()) + // MCO automatically appends :ref suffix to all paths + o.Expect(output).To(o.ContainSubstring("/var/lib/layerstore-1:ref"), + "storage.conf should contain /var/lib/layerstore-1:ref on node %s", node.Name) + o.Expect(output).To(o.ContainSubstring("/var/lib/layerstore-2:ref"), + "storage.conf should contain /var/lib/layerstore-2:ref on node %s", node.Name) + framework.Logf("Node %s: Both layer stores configured with :ref suffix after update", node.Name) + } + + framework.Logf("Test PASSED: ContainerRuntimeConfig update applied successfully") + }) + + // TC16: Missing Storage Path Handling + + // TC12: Multiple Storage Paths (up to max 5) + g.It("should configure multiple additionalLayerStores paths up to maximum [TC12]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + workerNodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker") + o.Expect(err).NotTo(o.HaveOccurred()) + pureWorkers := getPureWorkerNodes(workerNodes) + + g.By("Creating multiple shared layer directories on worker nodes (max 5)") + layerDirs := []string{ + "/var/lib/layerstore-1", + "/var/lib/layerstore-2", + "/var/lib/layerstore-3", + "/var/lib/layerstore-4", + "/var/lib/layerstore-5", + } + err = createDirectoriesOnNodes(oc, pureWorkers, layerDirs) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupDirectoriesOnNodes(oc, pureWorkers, layerDirs) + + g.By("Creating ContainerRuntimeConfig with 5 layer stores (maximum allowed)") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "multi-layerstore-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath("/var/lib/layerstore-1")}, + {Path: machineconfigv1.StorePath("/var/lib/layerstore-2")}, + {Path: machineconfigv1.StorePath("/var/lib/layerstore-3")}, + {Path: machineconfigv1.StorePath("/var/lib/layerstore-4")}, + {Path: machineconfigv1.StorePath("/var/lib/layerstore-5")}, + }, + }, + }, + } + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool to start updating") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MachineConfigPool started updating") + + g.By("Waiting for MachineConfigPool rollout to complete") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Verifying all 5 layer stores configured with :ref suffix on nodes") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/containers/storage.conf") + o.Expect(err).NotTo(o.HaveOccurred()) + // MCO automatically appends :ref suffix to all paths + for _, dir := range layerDirs { + expectedPathWithRef := fmt.Sprintf("%s:ref", dir) + o.Expect(output).To(o.ContainSubstring(expectedPathWithRef), + "storage.conf should contain %s with :ref suffix on node %s", dir, node.Name) + } + framework.Logf("Node %s: All 5 layer stores configured with :ref suffix", node.Name) + } + + framework.Logf("Test PASSED: Multiple additionalLayerStores (max 5) configured successfully") + }) + + // TC13: Fallback when non-eStargz image is used + g.It("should fallback to standard pull when non-eStargz image is used [TC13]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + workerNodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker") + o.Expect(err).NotTo(o.HaveOccurred()) + pureWorkers := getPureWorkerNodes(workerNodes) + if len(pureWorkers) < 1 { + e2eskipper.Skipf("Need at least 1 worker node for this test") + } + testNode := pureWorkers[0] + + g.By("Phase 1: Deploying stargz-store for additionalLayerStores") + stargzSetup := NewStargzStoreSetup(oc) + err = stargzSetup.Deploy(ctx) + o.Expect(err).NotTo(o.HaveOccurred()) + defer stargzSetup.Cleanup(ctx) + framework.Logf("stargz-store deployed successfully") + + g.By("Phase 2: Creating ContainerRuntimeConfig with additionalLayerStores") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "layer-fallback-nonstargz-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath(stargzSetup.GetStorePath())}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool to start updating") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MachineConfigPool started updating") + + g.By("Waiting for MachineConfigPool rollout to complete") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MCP rollout completed") + + g.By("Phase 3: Verifying stargz-store is running") + err = stargzSetup.VerifyStorageConfContainsStargz(ctx) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Phase 4: Testing fallback with non-eStargz image (standard OCI image)") + // Use a standard OCI image (NOT eStargz format) + standardImage := "quay.io/openshifttest/busybox@sha256:c3839dd800b9eb7603340509769c43e146a74c63dca3045a8e7dc8ee07e53966" + framework.Logf("Pulling standard OCI image (non-eStargz): %s", standardImage) + + initialSnapshots := getStargzSnapshotCount(oc, testNode.Name) + framework.Logf("Initial stargz snapshots: %d", initialSnapshots) + + // Create pod with standard OCI image + podName := "fallback-standard-oci-pod" + pod := createTestPodSpec(podName, "default", standardImage, testNode.Name) + _, err = oc.AdminKubeClient().CoreV1().Pods("default").Create(ctx, pod, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer deletePodAndWait(ctx, oc, "default", podName) + + g.By("Waiting for pod to start with standard image") + err = waitForPodRunning(ctx, oc, podName, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("Pod started successfully with standard OCI image") + + // Verify stargz snapshots did NOT increase (fallback to standard pull) + snapshotsAfterStandard := getStargzSnapshotCount(oc, testNode.Name) + framework.Logf("Snapshots after standard image: %d", snapshotsAfterStandard) + + snapshotDiff := snapshotsAfterStandard - initialSnapshots + framework.Logf("Snapshot difference: %d", snapshotDiff) + + // Standard images should NOT create stargz snapshots (or very minimal) + // The image should be pulled normally using standard OCI pull mechanism + o.Expect(snapshotDiff).To(o.BeNumerically("<=", 1), + "Standard OCI image should not create significant stargz snapshots (fallback to standard pull)") + + g.By("Verifying pod is running and healthy") + podObj, err := oc.AdminKubeClient().CoreV1().Pods("default").Get(ctx, podName, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(podObj.Status.Phase).To(o.Equal(corev1.PodRunning)) + + deletePodAndWait(ctx, oc, "default", podName) + + framework.Logf("Test PASSED: Non-eStargz image successfully used standard pull mechanism (fallback)") + }) + + // TC14: Fallback when stargz-store is stopped + g.It("should fallback to standard pull when stargz-store service is stopped [TC14]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + workerNodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker") + o.Expect(err).NotTo(o.HaveOccurred()) + pureWorkers := getPureWorkerNodes(workerNodes) + if len(pureWorkers) < 1 { + e2eskipper.Skipf("Need at least 1 worker node for this test") + } + testNode := pureWorkers[0] + + g.By("Phase 1: Deploying stargz-store for additionalLayerStores") + stargzSetup := NewStargzStoreSetup(oc) + err = stargzSetup.Deploy(ctx) + o.Expect(err).NotTo(o.HaveOccurred()) + defer stargzSetup.Cleanup(ctx) + framework.Logf("stargz-store deployed successfully") + + g.By("Phase 2: Creating ContainerRuntimeConfig with additionalLayerStores") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "layer-fallback-stopped-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath(stargzSetup.GetStorePath())}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool to start updating") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MachineConfigPool started updating") + + g.By("Waiting for MachineConfigPool rollout to complete") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MCP rollout completed") + + g.By("Phase 3: Verifying stargz-store is running") + err = stargzSetup.VerifyStorageConfContainsStargz(ctx) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Phase 4: Testing eStargz image works when stargz-store is running") + eStargzImage := "quay.io/bgudi/test-small:estargz" + framework.Logf("Pulling eStargz image with stargz-store running: %s", eStargzImage) + + initialSnapshots := getStargzSnapshotCount(oc, testNode.Name) + framework.Logf("Initial stargz snapshots: %d", initialSnapshots) + + // First pod - stargz-store running + pod1Name := "fallback-estargz-running-pod" + pod1 := createTestPodSpec(pod1Name, "default", eStargzImage, testNode.Name) + _, err = oc.AdminKubeClient().CoreV1().Pods("default").Create(ctx, pod1, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer deletePodAndWait(ctx, oc, "default", pod1Name) + + err = waitForPodRunning(ctx, oc, pod1Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("First pod running with stargz-store active") + + snapshotsAfterPod1 := getStargzSnapshotCount(oc, testNode.Name) + framework.Logf("Snapshots after first pod: %d", snapshotsAfterPod1) + o.Expect(snapshotsAfterPod1).To(o.BeNumerically(">", initialSnapshots), + "eStargz image should create snapshots when stargz-store is running") + + deletePodAndWait(ctx, oc, "default", pod1Name) + time.Sleep(10 * time.Second) + + g.By("Phase 5: Stopping stargz-store service on test node") + stopOutput, err := ExecOnNodeWithChroot(oc, testNode.Name, "systemctl", "stop", "stargz-store") + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("stargz-store stopped: %s", stopOutput) + defer func() { + // Restart stargz-store at cleanup + framework.Logf("Restarting stargz-store service") + ExecOnNodeWithChroot(oc, testNode.Name, "systemctl", "start", "stargz-store") + }() + + // Verify service is stopped + statusOutput, err := ExecOnNodeWithChroot(oc, testNode.Name, "systemctl", "is-active", "stargz-store") + framework.Logf("stargz-store status after stop: %s", strings.TrimSpace(statusOutput)) + o.Expect(strings.TrimSpace(statusOutput)).NotTo(o.Equal("active")) + + g.By("Phase 6: Testing fallback when stargz-store is stopped") + framework.Logf("Pulling eStargz image with stargz-store stopped (should fallback to standard pull)") + + // Second pod - stargz-store stopped, should fallback to standard pull + pod2Name := "fallback-estargz-stopped-pod" + pod2 := createTestPodSpec(pod2Name, "default", eStargzImage, testNode.Name) + _, err = oc.AdminKubeClient().CoreV1().Pods("default").Create(ctx, pod2, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer deletePodAndWait(ctx, oc, "default", pod2Name) + + err = waitForPodRunning(ctx, oc, pod2Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("Second pod running successfully with stargz-store stopped (fallback to standard pull)") + + // Verify pod is healthy + pod2Obj, err := oc.AdminKubeClient().CoreV1().Pods("default").Get(ctx, pod2Name, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(pod2Obj.Status.Phase).To(o.Equal(corev1.PodRunning)) + + // Verify snapshots did not change significantly (stargz-store was stopped) + snapshotsAfterPod2 := getStargzSnapshotCount(oc, testNode.Name) + framework.Logf("Snapshots after second pod (stargz stopped): %d", snapshotsAfterPod2) + + snapshotDiffAfterStop := snapshotsAfterPod2 - snapshotsAfterPod1 + framework.Logf("New snapshots created with stargz-store stopped: %d", snapshotDiffAfterStop) + + // When stargz-store is stopped, eStargz images should fallback to standard pull + // No new stargz snapshots should be created + o.Expect(snapshotDiffAfterStop).To(o.BeNumerically("<=", 1), + "When stargz-store is stopped, eStargz images should fallback to standard pull (no new snapshots)") + + deletePodAndWait(ctx, oc, "default", pod2Name) + + framework.Logf("Test PASSED: eStargz image successfully fell back to standard pull when stargz-store was stopped") + }) + + // TC15: Comprehensive Nydus layer store test + g.It("should successfully configure and use nydus-store as additional layer store [TC15][Disruptive]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + const ( + nydusNamespace = "nydus-store" + nydusSetupYAML = "/home/bgudi/work/src/github.com/openshift/epic/additionalArtifactsStore/testfiles/nydus-store-setup.yaml" + nydusStorePath = "/var/lib/nydus-store" + nydusTestPodImage = "quay.io/bgudi/test-small:oci" + nydusTestName = "test-nydus-layerstore" + ) + + g.By("Phase 1: Deploying nydus-store DaemonSet") + framework.Logf("Applying nydus-store setup from: %s", nydusSetupYAML) + + // Apply the nydus-store setup DaemonSet + output, err := oc.AsAdmin().Run("apply").Args("-f", nydusSetupYAML).Output() + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("Setup DaemonSet applied:\n%s", output) + + // Grant privileged SCC to the service account + g.By("Granting privileged SCC to nydus-store-installer") + output, err = oc.AsAdmin().Run("adm").Args("policy", "add-scc-to-user", "privileged", "-z", "nydus-store-installer", "-n", nydusNamespace).Output() + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("Privileged SCC granted:\n%s", output) + + // Ensure cleanup on exit + defer func() { + framework.Logf("Cleaning up nydus-store DaemonSet") + oc.AsAdmin().Run("delete").Args("-f", nydusSetupYAML, "--ignore-not-found=true").Execute() + }() + + g.By("Phase 2: Waiting for nydus-store installer pods to be Running") + framework.Logf("Waiting for all nydus-store installer pods to complete build and be running...") + + // Wait for DaemonSet pods to be ready (build takes 1-2 minutes per node) + err = waitForDaemonSetReady(ctx, oc, nydusNamespace, "nydus-store-installer", 10*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("All nydus-store installer pods are running") + + // Get logs from one of the pods to verify installation + podList, err := oc.AdminKubeClient().CoreV1().Pods(nydusNamespace).List(ctx, metav1.ListOptions{ + LabelSelector: "app=nydus-store-installer", + }) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(len(podList.Items)).To(o.BeNumerically(">", 0)) + + samplePod := podList.Items[0].Name + logs, err := oc.AsAdmin().Run("logs").Args("-n", nydusNamespace, samplePod, "--tail=20").Output() + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("Sample installation logs from pod %s:\n%s", samplePod, logs) + o.Expect(logs).To(o.ContainSubstring("Setup complete!")) + + g.By("Phase 3: Verifying nydus-store service on worker nodes") + framework.Logf("Checking nydus-store service status on all worker nodes") + + // Get all worker nodes + nodes, err := oc.AdminKubeClient().CoreV1().Nodes().List(ctx, metav1.ListOptions{ + LabelSelector: "node-role.kubernetes.io/worker=", + }) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(len(nodes.Items)).To(o.BeNumerically(">", 0)) + + // Verify service on each node + for _, node := range nodes.Items { + framework.Logf("Verifying nydus-store service on node: %s", node.Name) + statusOutput, err := ExecOnNodeWithChroot(oc, node.Name, "systemctl", "status", "nydus-store", "--no-pager") + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("nydus-store status on %s:\n%s", node.Name, statusOutput) + o.Expect(statusOutput).To(o.ContainSubstring("active (running)")) + } + + g.By("Phase 4: Creating ContainerRuntimeConfig with nydus layer store") + ctrcfg := createAdditionalLayerStoresCTRCfg(nydusTestName, nydusStorePath) + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("Created ContainerRuntimeConfig: %s with additionalLayerStores: %s", nydusTestName, nydusStorePath) + + defer func() { + framework.Logf("Cleaning up ContainerRuntimeConfig: %s", nydusTestName) + mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Delete(ctx, nydusTestName, metav1.DeleteOptions{}) + framework.Logf("Waiting for MachineConfigPool to start updating after CRC deletion") + waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + framework.Logf("Waiting for MachineConfigPool rollout after cleanup") + waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + }() + + // Wait for ContainerRuntimeConfig to be successful + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, nydusTestName, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool to start updating") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MachineConfigPool started updating") + + g.By("Waiting for MachineConfigPool rollout to complete") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MachineConfigPool rollout completed successfully") + + g.By("Phase 5: Verifying storage.conf configuration on worker nodes") + // Pick first worker node for verification + testNode := nodes.Items[0] + framework.Logf("Verifying storage.conf on node: %s", testNode.Name) + + storageConf, err := ExecOnNodeWithChroot(oc, testNode.Name, "cat", "/etc/containers/storage.conf") + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("storage.conf content:\n%s", storageConf) + + // Verify additionallayerstores field exists and contains nydus path with :ref + o.Expect(strings.ToLower(storageConf)).To(o.ContainSubstring("additionallayerstores")) + expectedPath := nydusStorePath + ":ref" + o.Expect(storageConf).To(o.ContainSubstring(expectedPath)) + framework.Logf("Verified: storage.conf contains additionallayerstores with path: %s", expectedPath) + + g.By("Phase 6: Deploying test pod with OCI image") + framework.Logf("Creating test pod with image: %s", nydusTestPodImage) + + podName := "nydus-test-pod" + pod := createTestPodSpec(podName, "default", nydusTestPodImage, testNode.Name) + _, err = oc.AdminKubeClient().CoreV1().Pods("default").Create(ctx, pod, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer deletePodAndWait(ctx, oc, "default", podName) + + // Wait for pod to be running + err = waitForPodRunning(ctx, oc, podName, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("Test pod is running successfully") + + // Verify pod is healthy + podObj, err := oc.AdminKubeClient().CoreV1().Pods("default").Get(ctx, podName, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(podObj.Status.Phase).To(o.Equal(corev1.PodRunning)) + framework.Logf("Pod phase: %s", podObj.Status.Phase) + + g.By("Phase 7: Verifying nydus-store data directory") + // Check nydus-store directory for data + nydusStoreContent, err := ExecOnNodeWithChroot(oc, testNode.Name, "ls", "-la", nydusStorePath) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("nydus-store directory content:\n%s", nydusStoreContent) + + // Should contain pool/ and store/ subdirectories + o.Expect(nydusStoreContent).To(o.Or( + o.ContainSubstring("pool"), + o.ContainSubstring("store"), + )) + + g.By("Phase 8: Checking nydus-store service logs") + serviceLogs, err := ExecOnNodeWithChroot(oc, testNode.Name, "journalctl", "-u", "nydus-store", "--since", "5 minutes ago", "-n", "30") + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("nydus-store service logs (last 30 lines):\n%s", serviceLogs) + + // Delete the test pod + deletePodAndWait(ctx, oc, "default", podName) + + framework.Logf("Test PASSED: Successfully configured and used nydus-store as additional layer store") + }) + + // TC18: Default resolution unchanged (P1 Regression) + + // TC19: Permission denied handling (P2) + + // TC20: Config merge (multiple CRCs) (P2) + + // TC21: Existing stores still work (P2) +}) + +func createAdditionalLayerStoresCTRCfg(testName, storePath string) *machineconfigv1.ContainerRuntimeConfig { + return &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: testName, + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath(storePath)}, + }, + }, + }, + } +} + +// Stargz-store E2E tests - tests with actual stargz-store daemon diff --git a/test/extended/node/additional_stores_combined.go b/test/extended/node/additional_stores_combined.go new file mode 100644 index 000000000000..a89011e45314 --- /dev/null +++ b/test/extended/node/additional_stores_combined.go @@ -0,0 +1,833 @@ +package node + +import ( + "context" + "fmt" + "strings" + "time" + + g "github.com/onsi/ginkgo/v2" + o "github.com/onsi/gomega" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/kubernetes/test/e2e/framework" + e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" + + configv1 "github.com/openshift/api/config/v1" + machineconfigv1 "github.com/openshift/api/machineconfiguration/v1" + mcclient "github.com/openshift/client-go/machineconfiguration/clientset/versioned" + exutil "github.com/openshift/origin/test/extended/util" +) + +// Non-disruptive API validation tests for combined storage types +var _ = g.Describe("[Jira:Node/CRI-O][sig-node][Feature:AdditionalStorageSupport] Combined Additional Stores API Validation", func() { + defer g.GinkgoRecover() + + var oc = exutil.NewCLI("combined-stores-api") + + g.BeforeEach(func(ctx context.Context) { + g.By("Checking TechPreviewNoUpgrade feature set is enabled") + featureGate, err := oc.AdminConfigClient().ConfigV1().FeatureGates().Get(ctx, "cluster", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + if featureGate.Spec.FeatureSet != configv1.TechPreviewNoUpgrade { + g.Skip("Skipping test: TechPreviewNoUpgrade feature set is required for additional storage configuration") + } + }) + + // TC1: Reject if any store type has invalid path + g.It("should reject if any store type has invalid path in combined config [TC1]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with valid layer/artifact but invalid image path") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "combined-invalid-image-path-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath("/var/lib/stargz-store")}, + }, + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath("relative/invalid/path")}, + }, + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath("/mnt/ssd-artifacts")}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).To(o.HaveOccurred()) + framework.Logf("Test PASSED: Combined config with invalid image path rejected: %v", err) + }) + + // TC2: Reject if layer stores exceed max while other stores are valid + g.It("should reject if layer stores exceed max even with valid image/artifact stores [TC2]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with 6 layer stores (exceeds max of 5)") + layerStores := make([]machineconfigv1.AdditionalLayerStore, 6) + for i := 0; i < 6; i++ { + layerStores[i] = machineconfigv1.AdditionalLayerStore{ + Path: machineconfigv1.StorePath(fmt.Sprintf("/var/lib/layer-store-%d", i)), + } + } + + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "combined-exceed-layer-max-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: layerStores, + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath("/mnt/nfs-images")}, + }, + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath("/mnt/ssd-artifacts")}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).To(o.HaveOccurred()) + o.Expect(err.Error()).To(o.ContainSubstring("must have at most 5 items")) + framework.Logf("Test PASSED: Exceeding layer store max rejected: %v", err) + }) + + // TC3: Reject duplicate paths within same store type in combined config + g.It("should reject duplicate paths within same store type in combined config [TC3]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Creating ContainerRuntimeConfig with duplicate paths in image stores") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "combined-duplicate-image-path-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath("/var/lib/stargz-store")}, + }, + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath("/mnt/nfs-images")}, + {Path: machineconfigv1.StorePath("/mnt/nfs-images")}, + }, + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath("/mnt/ssd-artifacts")}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).To(o.HaveOccurred()) + o.Expect(err.Error()).To(o.ContainSubstring("duplicate")) + framework.Logf("Test PASSED: Duplicate paths in same store type rejected: %v", err) + }) +}) + +// Disruptive E2E tests for combined storage types +var _ = g.Describe("[Jira:Node/CRI-O][sig-node][Feature:AdditionalStorageSupport][Serial][Disruptive] Combined Additional Stores E2E", func() { + defer g.GinkgoRecover() + + var oc = exutil.NewCLI("combined-stores-e2e") + + g.BeforeEach(func(ctx context.Context) { + g.By("Checking TechPreviewNoUpgrade feature set is enabled") + featureGate, err := oc.AdminConfigClient().ConfigV1().FeatureGates().Get(ctx, "cluster", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + if featureGate.Spec.FeatureSet != configv1.TechPreviewNoUpgrade { + g.Skip("Skipping test: TechPreviewNoUpgrade feature set is required for additional storage configuration") + } + + infra, err := oc.AdminConfigClient().ConfigV1().Infrastructures().Get(ctx, "cluster", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + if infra.Status.PlatformStatus != nil && infra.Status.PlatformStatus.Type == configv1.AzurePlatformType { + g.Skip("Skipping test on Microsoft Azure cluster") + } + }) + + // TC4: All three storage types with LogLevel/PidsLimit and verify storage.conf + g.It("should configure all three storage types with CRI-O fields and generate correct storage.conf [TC4]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + workerNodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker") + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(len(workerNodes)).To(o.BeNumerically(">", 0)) + pureWorkers := getPureWorkerNodes(workerNodes) + if len(pureWorkers) < 1 { + e2eskipper.Skipf("Need at least 1 worker node for this test") + } + + g.By("Creating shared directories on worker nodes") + allDirs := []string{ + "/var/lib/combined-layers", + "/var/lib/combined-images", + "/var/lib/combined-artifacts", + } + err = createDirectoriesOnNodes(oc, pureWorkers, allDirs) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupDirectoriesOnNodes(oc, pureWorkers, allDirs) + + g.By("Creating ContainerRuntimeConfig with all three storage types and CRI-O fields") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "combined-e2e-all-stores-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + LogLevel: "info", + PidsLimit: int64Ptr(4096), + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath("/var/lib/combined-layers")}, + }, + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath("/var/lib/combined-images")}, + }, + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath("/var/lib/combined-artifacts")}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + + g.By("Waiting for ContainerRuntimeConfig to be processed by MCO") + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool to start updating") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("MachineConfigPool started updating") + + g.By("Waiting for MachineConfigPool rollout to complete") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Verifying storage.conf contains all store types on all worker nodes") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/containers/storage.conf") + o.Expect(err).NotTo(o.HaveOccurred()) + + o.Expect(output).To(o.ContainSubstring("/var/lib/combined-layers"), + "storage.conf should contain layer store path on node %s", node.Name) + o.Expect(output).To(o.ContainSubstring("/var/lib/combined-images"), + "storage.conf should contain image store path on node %s", node.Name) + o.Expect(output).To(o.ContainSubstring("/var/lib/combined-artifacts"), + "storage.conf should contain artifact store path on node %s", node.Name) + + framework.Logf("Node %s: All three storage types verified in storage.conf", node.Name) + } + + g.By("Verifying CRI-O is running and has correct config") + for _, node := range pureWorkers { + crioStatus, err := ExecOnNodeWithChroot(oc, node.Name, "systemctl", "is-active", "crio") + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(strings.TrimSpace(crioStatus)).To(o.Equal("active")) + } + + framework.Logf("Test PASSED: All three storage types with CRI-O fields configured successfully") + }) + + // TC5: Maximum stores for each type (5 layer, 10 image, 10 artifact) + g.It("should configure maximum stores for each type [TC5]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + workerNodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker") + o.Expect(err).NotTo(o.HaveOccurred()) + pureWorkers := getPureWorkerNodes(workerNodes) + + g.By("Creating directories for maximum stores") + var allDirs []string + for i := 0; i < 5; i++ { + allDirs = append(allDirs, fmt.Sprintf("/var/lib/layer-store-%d", i)) + } + for i := 0; i < 10; i++ { + allDirs = append(allDirs, fmt.Sprintf("/var/lib/image-store-%d", i)) + } + for i := 0; i < 10; i++ { + allDirs = append(allDirs, fmt.Sprintf("/var/lib/artifact-store-%d", i)) + } + err = createDirectoriesOnNodes(oc, pureWorkers, allDirs) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupDirectoriesOnNodes(oc, pureWorkers, allDirs) + + g.By("Creating ContainerRuntimeConfig with max stores for each type") + layerStores := make([]machineconfigv1.AdditionalLayerStore, 5) + for i := 0; i < 5; i++ { + layerStores[i] = machineconfigv1.AdditionalLayerStore{ + Path: machineconfigv1.StorePath(fmt.Sprintf("/var/lib/layer-store-%d", i)), + } + } + + imageStores := make([]machineconfigv1.AdditionalImageStore, 10) + for i := 0; i < 10; i++ { + imageStores[i] = machineconfigv1.AdditionalImageStore{ + Path: machineconfigv1.StorePath(fmt.Sprintf("/var/lib/image-store-%d", i)), + } + } + + artifactStores := make([]machineconfigv1.AdditionalArtifactStore, 10) + for i := 0; i < 10; i++ { + artifactStores[i] = machineconfigv1.AdditionalArtifactStore{ + Path: machineconfigv1.StorePath(fmt.Sprintf("/var/lib/artifact-store-%d", i)), + } + } + + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "combined-max-stores-e2e-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: layerStores, + AdditionalImageStores: imageStores, + AdditionalArtifactStores: artifactStores, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool to start updating") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool rollout to complete") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Verifying storage.conf contains all stores") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/containers/storage.conf") + o.Expect(err).NotTo(o.HaveOccurred()) + + // Verify at least some of the stores are present + o.Expect(output).To(o.ContainSubstring("/var/lib/layer-store-0")) + o.Expect(output).To(o.ContainSubstring("/var/lib/layer-store-4")) + o.Expect(output).To(o.ContainSubstring("/var/lib/image-store-0")) + o.Expect(output).To(o.ContainSubstring("/var/lib/image-store-9")) + o.Expect(output).To(o.ContainSubstring("/var/lib/artifact-store-0")) + o.Expect(output).To(o.ContainSubstring("/var/lib/artifact-store-9")) + + framework.Logf("Node %s: Maximum stores verified in storage.conf", node.Name) + } + + framework.Logf("Test PASSED: Maximum stores for all types configured (5 layer, 10 image, 10 artifact)") + }) + + // TC6: Same path across different store types + g.It("should allow same path across different store types if API accepts [TC6]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + workerNodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker") + o.Expect(err).NotTo(o.HaveOccurred()) + pureWorkers := getPureWorkerNodes(workerNodes) + + g.By("Creating shared directory") + allDirs := []string{"/mnt/shared-storage"} + err = createDirectoriesOnNodes(oc, pureWorkers, allDirs) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupDirectoriesOnNodes(oc, pureWorkers, allDirs) + + g.By("Creating ContainerRuntimeConfig with same path in different store types") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "combined-same-path-e2e-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath("/mnt/shared-storage")}, + }, + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath("/mnt/shared-storage")}, + }, + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath("/mnt/shared-storage")}, + }, + }, + }, + } + + created, err := mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + if err != nil { + framework.Logf("API rejected same path across different store types (expected): %v", err) + g.Skip("API does not allow same path across different store types") + } + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + + o.Expect(created.Spec.ContainerRuntimeConfig.AdditionalLayerStores).To(o.HaveLen(1)) + o.Expect(created.Spec.ContainerRuntimeConfig.AdditionalImageStores).To(o.HaveLen(1)) + o.Expect(created.Spec.ContainerRuntimeConfig.AdditionalArtifactStores).To(o.HaveLen(1)) + + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool to start updating") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool rollout to complete") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Verifying storage.conf contains shared path for all types") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/containers/storage.conf") + o.Expect(err).NotTo(o.HaveOccurred()) + + o.Expect(output).To(o.ContainSubstring("/mnt/shared-storage")) + framework.Logf("Node %s: Shared storage path verified in storage.conf", node.Name) + } + + framework.Logf("Test PASSED: Same path across different store types configured successfully") + }) + + // TC7: Update combined config - add more stores to each type + g.It("should update combined config by adding stores to each type [TC7]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + workerNodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker") + o.Expect(err).NotTo(o.HaveOccurred()) + pureWorkers := getPureWorkerNodes(workerNodes) + + g.By("Creating shared directories") + allDirs := []string{ + "/var/lib/layer-1", "/var/lib/layer-2", + "/var/lib/image-1", "/var/lib/image-2", + "/var/lib/artifact-1", "/var/lib/artifact-2", + } + err = createDirectoriesOnNodes(oc, pureWorkers, allDirs) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupDirectoriesOnNodes(oc, pureWorkers, allDirs) + + g.By("Creating initial ContainerRuntimeConfig with one store per type") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "combined-update-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath("/var/lib/layer-1")}, + }, + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath("/var/lib/image-1")}, + }, + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath("/var/lib/artifact-1")}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool to start updating") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool rollout to complete") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Updating ContainerRuntimeConfig to add second store to each type") + currentCfg, err := mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Get(ctx, ctrcfg.Name, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + currentCfg.Spec.ContainerRuntimeConfig.AdditionalLayerStores = []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath("/var/lib/layer-1")}, + {Path: machineconfigv1.StorePath("/var/lib/layer-2")}, + } + currentCfg.Spec.ContainerRuntimeConfig.AdditionalImageStores = []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath("/var/lib/image-1")}, + {Path: machineconfigv1.StorePath("/var/lib/image-2")}, + } + currentCfg.Spec.ContainerRuntimeConfig.AdditionalArtifactStores = []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath("/var/lib/artifact-1")}, + {Path: machineconfigv1.StorePath("/var/lib/artifact-2")}, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Update(ctx, currentCfg, metav1.UpdateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool to start updating") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool rollout to complete") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Verifying updated configuration has all stores") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/containers/storage.conf") + o.Expect(err).NotTo(o.HaveOccurred()) + + for _, dir := range allDirs { + o.Expect(output).To(o.ContainSubstring(dir), + "storage.conf should contain %s on node %s", dir, node.Name) + } + framework.Logf("Node %s: All stores verified after update", node.Name) + } + + framework.Logf("Test PASSED: Combined config update applied successfully") + }) + + // TC8: Remove one store type while keeping others + g.It("should remove one store type while keeping others [TC8]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + workerNodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker") + o.Expect(err).NotTo(o.HaveOccurred()) + pureWorkers := getPureWorkerNodes(workerNodes) + + g.By("Creating shared directories") + allDirs := []string{"/var/lib/layers", "/var/lib/images", "/var/lib/artifacts"} + err = createDirectoriesOnNodes(oc, pureWorkers, allDirs) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupDirectoriesOnNodes(oc, pureWorkers, allDirs) + + g.By("Creating ContainerRuntimeConfig with all three store types") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "combined-remove-type-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath("/var/lib/layers")}, + }, + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath("/var/lib/images")}, + }, + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath("/var/lib/artifacts")}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool to start updating") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool rollout to complete") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Updating ContainerRuntimeConfig to remove layer stores") + currentCfg, err := mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Get(ctx, ctrcfg.Name, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + currentCfg.Spec.ContainerRuntimeConfig.AdditionalLayerStores = nil + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Update(ctx, currentCfg, metav1.UpdateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool to start updating") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool rollout to complete") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Verifying layer stores removed but image/artifact stores remain") + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(oc, node.Name, "cat", "/etc/containers/storage.conf") + o.Expect(err).NotTo(o.HaveOccurred()) + + o.Expect(output).NotTo(o.ContainSubstring("/var/lib/layers"), + "storage.conf should not contain layer store path on node %s", node.Name) + o.Expect(output).To(o.ContainSubstring("/var/lib/images"), + "storage.conf should still contain image store path on node %s", node.Name) + o.Expect(output).To(o.ContainSubstring("/var/lib/artifacts"), + "storage.conf should still contain artifact store path on node %s", node.Name) + + framework.Logf("Node %s: Layer stores removed, image/artifact stores remain", node.Name) + } + + framework.Logf("Test PASSED: Partial removal of store types successful") + }) + + // TC9: Comprehensive functional test - verify all three storage types actually work + g.It("should functionally verify all three storage types work together [TC9]", func(ctx context.Context) { + mcClient, err := mcclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + + workerNodes, err := getNodesByLabel(ctx, oc, "node-role.kubernetes.io/worker") + o.Expect(err).NotTo(o.HaveOccurred()) + pureWorkers := getPureWorkerNodes(workerNodes) + if len(pureWorkers) < 1 { + e2eskipper.Skipf("Need at least 1 worker node for this test") + } + testNode := pureWorkers[0] + + // Phase 1: Deploy stargz-store for layer stores + g.By("Phase 1: Deploying stargz-store for additionalLayerStores") + stargzSetup := NewStargzStoreSetup(oc) + err = stargzSetup.Deploy(ctx) + o.Expect(err).NotTo(o.HaveOccurred()) + defer stargzSetup.Cleanup(ctx) + + // Phase 2: Pre-populate image store + g.By("Phase 2: Pre-populating additionalImageStores") + imageStorePath := "/var/lib/combined-imagestore" + allDirs := []string{imageStorePath} + + // Also create artifact store directory + artifactStorePath := "/var/lib/combined-artifactstore" + allDirs = append(allDirs, artifactStorePath) + + err = createDirectoriesOnNodes(oc, pureWorkers, allDirs) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupDirectoriesOnNodes(oc, pureWorkers, allDirs) + + // Pre-populate test image in image store + testImage := "quay.io/openshifttest/busybox@sha256:c3839dd800b9eb7603340509769c43e146a74c63dca3045a8e7dc8ee07e53966" + framework.Logf("Pre-populating image %s to %s on node %s", testImage, imageStorePath, testNode.Name) + + // Use skopeo to copy image to additional image store + skopeoCmd := fmt.Sprintf("skopeo copy docker://%s dir:%s/prepopulated-image", testImage, imageStorePath) + _, err = ExecOnNodeWithChroot(oc, testNode.Name, "bash", "-c", skopeoCmd) + o.Expect(err).NotTo(o.HaveOccurred()) + + // Verify image was copied + lsOutput, err := ExecOnNodeWithChroot(oc, testNode.Name, "ls", "-la", imageStorePath+"/prepopulated-image") + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(lsOutput).To(o.ContainSubstring("manifest.json")) + framework.Logf("Image pre-populated successfully: %s", lsOutput) + + // Phase 3: Create ContainerRuntimeConfig with all three storage types + g.By("Phase 3: Creating ContainerRuntimeConfig with all three storage types") + ctrcfg := &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "combined-functional-test", + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalLayerStores: []machineconfigv1.AdditionalLayerStore{ + {Path: machineconfigv1.StorePath(stargzSetup.GetStorePath())}, + }, + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath(imageStorePath)}, + }, + AdditionalArtifactStores: []machineconfigv1.AdditionalArtifactStore{ + {Path: machineconfigv1.StorePath(artifactStorePath)}, + }, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Create(ctx, ctrcfg, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer cleanupContainerRuntimeConfig(ctx, mcClient, ctrcfg.Name) + + err = waitForContainerRuntimeConfigSuccess(ctx, mcClient, ctrcfg.Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool to start updating") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Waiting for MachineConfigPool rollout to complete") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + // Phase 4: Verify storage.conf has all three storage types + g.By("Phase 4: Verifying storage.conf contains all three storage types") + storageConfOutput, err := ExecOnNodeWithChroot(oc, testNode.Name, "cat", "/etc/containers/storage.conf") + o.Expect(err).NotTo(o.HaveOccurred()) + + expectedLayerPath := fmt.Sprintf("%s:ref", stargzSetup.GetStorePath()) + o.Expect(storageConfOutput).To(o.ContainSubstring(expectedLayerPath)) + o.Expect(storageConfOutput).To(o.ContainSubstring(imageStorePath)) + o.Expect(storageConfOutput).To(o.ContainSubstring(artifactStorePath)) + framework.Logf("All three storage types verified in storage.conf") + + // Phase 5: Verify stargz-store is running + g.By("Phase 5: Verifying stargz-store service is active") + err = stargzSetup.VerifyStorageConfContainsStargz(ctx) + o.Expect(err).NotTo(o.HaveOccurred()) + + // Phase 6: Test layer store functionality - stargz lazy pulling + g.By("Phase 6: Testing additionalLayerStores - stargz lazy pulling") + estargzImage := "quay.io/bgudi/test-small:estargz" + + // Get initial snapshot count + initialSnapshots := getStargzSnapshotCount(oc, testNode.Name) + framework.Logf("Initial stargz snapshots: %d", initialSnapshots) + + // Pull eStargz image first time + framework.Logf("Pulling eStargz image for first time: %s", estargzImage) + pod1Name := "combined-test-estargz-pod1" + pod1 := createTestPodSpec(pod1Name, "default", estargzImage, testNode.Name) + _, err = oc.AdminKubeClient().CoreV1().Pods("default").Create(ctx, pod1, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer deletePodAndWait(ctx, oc, "default", pod1Name) + + err = waitForPodRunning(ctx, oc, pod1Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("First pod running successfully") + + // Get snapshot count after first pull + snapshotsAfterFirst := getStargzSnapshotCount(oc, testNode.Name) + framework.Logf("Snapshots after first pull: %d", snapshotsAfterFirst) + o.Expect(snapshotsAfterFirst).To(o.BeNumerically(">", initialSnapshots)) + + // Delete first pod + deletePodAndWait(ctx, oc, "default", pod1Name) + time.Sleep(10 * time.Second) + + // Pull same image second time - should reuse snapshots (lazy pulling) + framework.Logf("Pulling same eStargz image second time - should reuse snapshots") + pod2Name := "combined-test-estargz-pod2" + pod2 := createTestPodSpec(pod2Name, "default", estargzImage, testNode.Name) + _, err = oc.AdminKubeClient().CoreV1().Pods("default").Create(ctx, pod2, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + defer deletePodAndWait(ctx, oc, "default", pod2Name) + + err = waitForPodRunning(ctx, oc, pod2Name, 5*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("Second pod running successfully") + + // Get snapshot count after second pull + snapshotsAfterSecond := getStargzSnapshotCount(oc, testNode.Name) + framework.Logf("Snapshots after second pull: %d", snapshotsAfterSecond) + + // Verify lazy pulling - snapshot count should not increase significantly + newSnapshots := snapshotsAfterSecond - snapshotsAfterFirst + framework.Logf("New snapshots created on second pull: %d", newSnapshots) + o.Expect(newSnapshots).To(o.BeNumerically("<=", 2), "Lazy pulling should reuse existing snapshots") + + deletePodAndWait(ctx, oc, "default", pod2Name) + framework.Logf("Layer store (stargz) lazy pulling verified successfully") + + // Phase 7: Test image store functionality + g.By("Phase 7: Testing additionalImageStores - verify pre-populated image accessible") + + // First verify the image exists in the additional store + imageCheckOutput, err := ExecOnNodeWithChroot(oc, testNode.Name, "ls", "-la", imageStorePath+"/prepopulated-image") + o.Expect(err).NotTo(o.HaveOccurred()) + framework.Logf("Pre-populated image still exists in additional store: %s", imageCheckOutput) + + // Check storage.conf has the image store path + storageConf, err := ExecOnNodeWithChroot(oc, testNode.Name, "cat", "/etc/containers/storage.conf") + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(storageConf).To(o.ContainSubstring(imageStorePath)) + framework.Logf("Image store path verified in storage.conf") + + // Phase 8: Test artifact store functionality + g.By("Phase 8: Testing additionalArtifactStores - verify path configured") + + // Verify artifact store path in storage.conf + o.Expect(storageConf).To(o.ContainSubstring(artifactStorePath)) + framework.Logf("Artifact store path verified in storage.conf") + + // Create a test artifact file + artifactTestFile := artifactStorePath + "/test-artifact.txt" + createArtifactCmd := fmt.Sprintf("echo 'test artifact content' > %s", artifactTestFile) + _, err = ExecOnNodeWithChroot(oc, testNode.Name, "bash", "-c", createArtifactCmd) + o.Expect(err).NotTo(o.HaveOccurred()) + + // Verify artifact file exists + artifactCheck, err := ExecOnNodeWithChroot(oc, testNode.Name, "cat", artifactTestFile) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(artifactCheck).To(o.ContainSubstring("test artifact content")) + framework.Logf("Artifact store verified - can read/write artifacts") + + // Phase 9: Final verification + g.By("Phase 9: Final verification - all three storage types functional") + framework.Logf("✓ Layer stores: stargz lazy pulling works (reused snapshots)") + framework.Logf("✓ Image stores: pre-populated images accessible") + framework.Logf("✓ Artifact stores: can read/write artifacts") + + framework.Logf("Test PASSED: All three storage types verified functionally") + }) +}) diff --git a/test/extended/node/node_utils.go b/test/extended/node/node_utils.go index 8af84984b0d9..7eaaa1c15346 100644 --- a/test/extended/node/node_utils.go +++ b/test/extended/node/node_utils.go @@ -9,10 +9,12 @@ import ( "strings" "time" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/wait" kubeletconfigv1beta1 "k8s.io/kubelet/config/v1beta1" @@ -516,6 +518,46 @@ func waitForHyperConvergedReady(ctx context.Context, oc *exutil.CLI) error { }) } +// waitForMCPToStartUpdating waits for MCP to acknowledge a config change and start updating +// This prevents race conditions where we check MCP status before MCO processes the new config +func waitForMCPToStartUpdating(ctx context.Context, mcClient *machineconfigclient.Clientset, poolName string, timeout time.Duration) error { + framework.Logf("Waiting for MCP %s to start updating (timeout: %v)...", poolName, timeout) + + // First get current rendered config to detect change + initialMCP, err := mcClient.MachineconfigurationV1().MachineConfigPools().Get(ctx, poolName, metav1.GetOptions{}) + if err != nil { + return err + } + initialConfig := initialMCP.Status.Configuration.Name + framework.Logf("Initial rendered config: %s", initialConfig) + + return wait.PollUntilContextTimeout(ctx, 5*time.Second, timeout, true, func(ctx context.Context) (bool, error) { + mcp, err := mcClient.MachineconfigurationV1().MachineConfigPools().Get(ctx, poolName, metav1.GetOptions{}) + if err != nil { + return false, err + } + + // Check if config changed or MCP is updating + configChanged := mcp.Status.Configuration.Name != initialConfig + updating := false + for _, condition := range mcp.Status.Conditions { + if condition.Type == "Updating" && condition.Status == corev1.ConditionTrue { + updating = true + break + } + } + + if configChanged || updating { + framework.Logf("MCP %s started updating: configChanged=%v, updating=%v, newConfig=%s", + poolName, configChanged, updating, mcp.Status.Configuration.Name) + return true, nil + } + + framework.Logf("MCP %s not yet updating, waiting for MCO to process config change...", poolName) + return false, nil + }) +} + // waitForMCP waits for a MachineConfigPool to be ready (not updating, updated, and all machines ready) // Returns error immediately if the MCP becomes degraded func waitForMCP(ctx context.Context, mcClient *machineconfigclient.Clientset, poolName string, timeout time.Duration) error { @@ -741,3 +783,376 @@ func ensureDropInDirectoryExists(ctx context.Context, oc *exutil.CLI, dirPath st return nil } + +// createDirectoriesOnNodes creates specified directories on the given nodes +func createDirectoriesOnNodes(oc *exutil.CLI, nodes []corev1.Node, dirs []string) error { + for _, node := range nodes { + for _, dir := range dirs { + _, err := ExecOnNodeWithChroot(oc, node.Name, "mkdir", "-p", dir) + if err != nil { + return fmt.Errorf("failed to create directory %s on node %s: %v", dir, node.Name, err) + } + framework.Logf("Node %s: directory %s created", node.Name, dir) + } + } + return nil +} + +// createDirectoryMachineConfig creates a MachineConfig that creates a directory via Ignition +// storage.directories. This ensures the directory exists before any services start. +func createDirectoryMachineConfig(ctx context.Context, mcClient *machineconfigclient.Clientset, name, dirPath string) error { + // Use Ignition storage.directories to create the directory during early boot + // This runs before any systemd services start + ignitionConfig := map[string]interface{}{ + "ignition": map[string]string{ + "version": "3.2.0", + }, + "storage": map[string]interface{}{ + "directories": []map[string]interface{}{ + { + "path": dirPath, + "mode": 0755, + }, + }, + }, + } + + ignitionJSON, err := json.Marshal(ignitionConfig) + if err != nil { + return fmt.Errorf("failed to marshal ignition config: %v", err) + } + + mc := &machineconfigv1.MachineConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: map[string]string{ + "machineconfiguration.openshift.io/role": "worker", + }, + }, + Spec: machineconfigv1.MachineConfigSpec{ + Config: runtime.RawExtension{ + Raw: ignitionJSON, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().MachineConfigs().Create(ctx, mc, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("failed to create MachineConfig %s: %v", name, err) + } + framework.Logf("Created MachineConfig %s for directory %s", name, dirPath) + return nil +} + +// cleanupMachineConfig deletes a MachineConfig +func cleanupMachineConfig(ctx context.Context, mcClient *machineconfigclient.Clientset, name string) { + err := mcClient.MachineconfigurationV1().MachineConfigs().Delete(ctx, name, metav1.DeleteOptions{}) + if err != nil { + framework.Logf("Warning: failed to delete MachineConfig %s: %v", name, err) + } else { + framework.Logf("Deleted MachineConfig %s", name) + } +} + +// createDirectoriesMachineConfig creates a MachineConfig that creates multiple directories via Ignition +// storage.directories. This ensures directories exist before any services start. +func createDirectoriesMachineConfig(ctx context.Context, mcClient *machineconfigclient.Clientset, name string, dirPaths []string) error { + // Build directory entries for Ignition storage.directories + directories := make([]map[string]interface{}, 0, len(dirPaths)) + for _, dirPath := range dirPaths { + directories = append(directories, map[string]interface{}{ + "path": dirPath, + "mode": 0755, + }) + } + + // Create Ignition config JSON using storage.directories + // This runs during Ignition phase, before any systemd services start + ignitionConfig := map[string]interface{}{ + "ignition": map[string]string{ + "version": "3.2.0", + }, + "storage": map[string]interface{}{ + "directories": directories, + }, + } + + ignitionJSON, err := json.Marshal(ignitionConfig) + if err != nil { + return fmt.Errorf("failed to marshal ignition config: %v", err) + } + + mc := &machineconfigv1.MachineConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: map[string]string{ + "machineconfiguration.openshift.io/role": "worker", + }, + }, + Spec: machineconfigv1.MachineConfigSpec{ + Config: runtime.RawExtension{ + Raw: ignitionJSON, + }, + }, + } + + _, err = mcClient.MachineconfigurationV1().MachineConfigs().Create(ctx, mc, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("failed to create MachineConfig %s: %v", name, err) + } + framework.Logf("Created MachineConfig %s for directories: %v", name, dirPaths) + return nil +} + +// setupDirectoriesViaMachineConfig creates directories via MachineConfig and waits for MCP rollout +// Returns the MachineConfig name for cleanup +func setupDirectoriesViaMachineConfig(ctx context.Context, mcClient *machineconfigclient.Clientset, mcName string, dirPaths []string) error { + // Create MachineConfig for directories + err := createDirectoriesMachineConfig(ctx, mcClient, mcName, dirPaths) + if err != nil { + return err + } + + // Wait for MCP to start updating + framework.Logf("Waiting for MCP to start updating after directory MachineConfig...") + err = waitForMCPToStartUpdating(ctx, mcClient, "worker", 5*time.Minute) + if err != nil { + return fmt.Errorf("MCP did not start updating: %v", err) + } + + // Wait for MCP rollout to complete + framework.Logf("Waiting for MCP rollout to complete...") + err = waitForMCP(ctx, mcClient, "worker", 25*time.Minute) + if err != nil { + return fmt.Errorf("MCP rollout failed: %v", err) + } + + framework.Logf("Directories %v now exist on all worker nodes", dirPaths) + return nil +} + +// cleanupDirectoriesOnNodes removes specified directories from the given nodes +func cleanupDirectoriesOnNodes(oc *exutil.CLI, nodes []corev1.Node, dirs []string) { + for _, node := range nodes { + for _, dir := range dirs { + _, err := ExecOnNodeWithChroot(oc, node.Name, "rm", "-rf", dir) + if err != nil { + framework.Logf("Warning: failed to cleanup directory %s on node %s: %v", dir, node.Name, err) + } + } + } +} + +// ============================================================================ +// Additional Image Stores Helper Functions +// ============================================================================ + +func waitForDaemonSetReady(ctx context.Context, oc *exutil.CLI, namespace, name string, timeout time.Duration) error { + return wait.PollUntilContextTimeout(ctx, 10*time.Second, timeout, true, func(ctx context.Context) (bool, error) { + ds, err := oc.AdminKubeClient().AppsV1().DaemonSets(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return false, nil + } + return ds.Status.NumberReady == ds.Status.DesiredNumberScheduled && + ds.Status.DesiredNumberScheduled > 0, nil + }) +} + +func int64Ptr(i int64) *int64 { + return &i +} + +func boolPtr(b bool) *bool { + return &b +} + +func createAdditionalImageStoresCTRCfg(testName, storePath string) *machineconfigv1.ContainerRuntimeConfig { + return &machineconfigv1.ContainerRuntimeConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: testName, + }, + Spec: machineconfigv1.ContainerRuntimeConfigSpec{ + MachineConfigPoolSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "pools.operator.machineconfiguration.openshift.io/worker": "", + }, + }, + ContainerRuntimeConfig: &machineconfigv1.ContainerRuntimeConfiguration{ + AdditionalImageStores: []machineconfigv1.AdditionalImageStore{ + {Path: machineconfigv1.StorePath(storePath)}, + }, + }, + }, + } +} + +func createTestPod(name, image, nodeName string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: corev1.PodSpec{ + NodeName: nodeName, + SecurityContext: &corev1.PodSecurityContext{ + RunAsUser: int64Ptr(1000), + RunAsNonRoot: boolPtr(true), + SeccompProfile: &corev1.SeccompProfile{ + Type: corev1.SeccompProfileTypeRuntimeDefault, + }, + }, + Containers: []corev1.Container{ + { + Name: "test", + Image: image, + Command: []string{"sleep", "3600"}, + SecurityContext: &corev1.SecurityContext{ + AllowPrivilegeEscalation: boolPtr(false), + Capabilities: &corev1.Capabilities{ + Drop: []corev1.Capability{"ALL"}, + }, + RunAsNonRoot: boolPtr(true), + SeccompProfile: &corev1.SeccompProfile{ + Type: corev1.SeccompProfileTypeRuntimeDefault, + }, + }, + }, + }, + RestartPolicy: corev1.RestartPolicyNever, + }, + } +} + +func waitForContainerRuntimeConfigSuccess(ctx context.Context, mcClient *machineconfigclient.Clientset, name string, timeout time.Duration) error { + return wait.PollUntilContextTimeout(ctx, 10*time.Second, timeout, true, func(ctx context.Context) (bool, error) { + ctrcfg, err := mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return false, err + } + + if ctrcfg.Status.ObservedGeneration != ctrcfg.Generation { + return false, nil + } + + for _, condition := range ctrcfg.Status.Conditions { + if condition.Type == machineconfigv1.ContainerRuntimeConfigSuccess && + condition.Status == corev1.ConditionTrue { + return true, nil + } + if condition.Type == machineconfigv1.ContainerRuntimeConfigFailure && + condition.Status == corev1.ConditionTrue { + return false, fmt.Errorf("ContainerRuntimeConfig failed: %s", condition.Message) + } + } + return false, nil + }) +} + +func waitForPodRunning(ctx context.Context, oc *exutil.CLI, podName string, timeout time.Duration) error { + return wait.PollUntilContextTimeout(ctx, 5*time.Second, timeout, true, func(ctx context.Context) (bool, error) { + pod, err := oc.AdminKubeClient().CoreV1().Pods(oc.Namespace()).Get(ctx, podName, metav1.GetOptions{}) + if err != nil { + framework.Logf("Error getting pod %s: %v", podName, err) + return false, nil + } + + framework.Logf("Pod %s status: Phase=%s, ContainerStatuses=%d", podName, pod.Status.Phase, len(pod.Status.ContainerStatuses)) + + // Check if pod is in Running phase + if pod.Status.Phase == corev1.PodRunning { + return true, nil + } + + // Also consider pod as running if all containers are running, even if phase hasn't updated + if len(pod.Status.ContainerStatuses) > 0 { + allRunning := true + for _, cs := range pod.Status.ContainerStatuses { + if cs.State.Running == nil { + allRunning = false + framework.Logf("Container %s not yet running: %+v", cs.Name, cs.State) + break + } + } + if allRunning { + framework.Logf("All containers running, considering pod as running") + return true, nil + } + } + + return false, nil + }) +} + +func waitForPodDeleted(ctx context.Context, oc *exutil.CLI, podName string, timeout time.Duration) error { + return wait.PollUntilContextTimeout(ctx, 5*time.Second, timeout, true, func(ctx context.Context) (bool, error) { + _, err := oc.AdminKubeClient().CoreV1().Pods(oc.Namespace()).Get(ctx, podName, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + return true, nil + } + return false, nil + }) +} + +func prepopulateImageOnNode(ctx context.Context, oc *exutil.CLI, nodeName, image, storePath string) error { + // Pull image directly to the additional storage using podman + // Note: Don't use --runroot to avoid database configuration mismatch errors + framework.Logf("Pulling image %s to additional storage at %s on node %s", image, storePath, nodeName) + pullCmd := fmt.Sprintf("podman --root %s pull %s", storePath, image) + pullOutput, err := ExecOnNodeWithChroot(oc, nodeName, "sh", "-c", pullCmd) + if err != nil { + return fmt.Errorf("failed to pull image to additional storage: %w, output: %s", err, pullOutput) + } + framework.Logf("Image pulled successfully to additional storage") + + // Verify image exists in additional storage + framework.Logf("Verifying image in additional storage") + verifyCmd := fmt.Sprintf("podman --root %s images %s", storePath, image) + verifyOutput, err := ExecOnNodeWithChroot(oc, nodeName, "sh", "-c", verifyCmd) + if err != nil { + return fmt.Errorf("failed to verify image in additional storage: %w, output: %s", err, verifyOutput) + } + framework.Logf("Image verified in additional storage: %s", verifyOutput) + + return nil +} + +func cleanupContainerRuntimeConfig(ctx context.Context, mcClient *machineconfigclient.Clientset, name string) { + err := mcClient.MachineconfigurationV1().ContainerRuntimeConfigs().Delete(ctx, name, metav1.DeleteOptions{}) + if err != nil { + framework.Logf("Warning: failed to cleanup ContainerRuntimeConfig %s: %v", name, err) + } +} + +func createTestDaemonSet(name, namespace, image string) *appsv1.DaemonSet { + return &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": name, + }, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": name, + }, + }, + Spec: corev1.PodSpec{ + NodeSelector: map[string]string{ + "node-role.kubernetes.io/worker": "", + }, + Containers: []corev1.Container{ + { + Name: "app", + Image: image, + Command: []string{"sleep", "3600"}, + }, + }, + }, + }, + }, + } +} diff --git a/test/extended/node/stargz_store_setup.go b/test/extended/node/stargz_store_setup.go new file mode 100644 index 000000000000..e70f3252a2bf --- /dev/null +++ b/test/extended/node/stargz_store_setup.go @@ -0,0 +1,603 @@ +package node + +import ( + "context" + "fmt" + "strings" + "time" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/kubernetes/test/e2e/framework" + "k8s.io/utils/ptr" + + exutil "github.com/openshift/origin/test/extended/util" +) + +const ( + stargzStoreNamespace = "stargz-store" + stargzStoreDaemonSetName = "stargz-store-installer" + stargzStoreServiceAccount = "stargz-store" + stargzStorePath = "/var/lib/stargz-store/store" // MCO automatically adds :ref suffix + stargzStoreVersion = "v0.18.2" +) + +// StargzStoreSetup manages the lifecycle of stargz-store on cluster nodes +type StargzStoreSetup struct { + oc *exutil.CLI + namespace string + deployed bool +} + +// NewStargzStoreSetup creates a new StargzStoreSetup instance +func NewStargzStoreSetup(oc *exutil.CLI) *StargzStoreSetup { + return &StargzStoreSetup{ + oc: oc, + namespace: stargzStoreNamespace, + deployed: false, + } +} + +// Deploy installs stargz-store on all worker nodes using a DaemonSet +func (s *StargzStoreSetup) Deploy(ctx context.Context) error { + framework.Logf("Deploying stargz-store to cluster...") + + // Create namespace + if err := s.createNamespace(ctx); err != nil { + return fmt.Errorf("failed to create namespace: %w", err) + } + + // Create ServiceAccount + if err := s.createServiceAccount(ctx); err != nil { + return fmt.Errorf("failed to create serviceaccount: %w", err) + } + + // Grant privileged SCC to ServiceAccount + if err := s.grantPrivilegedSCC(ctx); err != nil { + return fmt.Errorf("failed to grant privileged SCC: %w", err) + } + + // Create ConfigMap with stargz-store config and systemd service + if err := s.createConfigMap(ctx); err != nil { + return fmt.Errorf("failed to create configmap: %w", err) + } + + // Create DaemonSet + if err := s.createDaemonSet(ctx); err != nil { + return fmt.Errorf("failed to create daemonset: %w", err) + } + + // Wait for DaemonSet to be ready + if err := s.waitForDaemonSetReady(ctx, 10*time.Minute); err != nil { + return fmt.Errorf("failed waiting for daemonset: %w", err) + } + + // Verify stargz-store is running on nodes + if err := s.verifyStargzStoreRunning(ctx); err != nil { + return fmt.Errorf("stargz-store verification failed: %w", err) + } + + s.deployed = true + framework.Logf("stargz-store deployed successfully") + return nil +} + +// Cleanup removes stargz-store from all nodes +func (s *StargzStoreSetup) Cleanup(ctx context.Context) error { + if !s.deployed { + return nil + } + + framework.Logf("Cleaning up stargz-store...") + + // Delete namespace (cascades to all resources) + err := s.oc.AdminKubeClient().CoreV1().Namespaces().Delete(ctx, s.namespace, metav1.DeleteOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + framework.Logf("Warning: failed to delete namespace %s: %v", s.namespace, err) + } + + // Wait for namespace to be deleted + err = wait.PollUntilContextTimeout(ctx, 5*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { + _, err := s.oc.AdminKubeClient().CoreV1().Namespaces().Get(ctx, s.namespace, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + return true, nil + } + return false, nil + }) + + if err != nil { + framework.Logf("Warning: namespace deletion timed out: %v", err) + } + + s.deployed = false + framework.Logf("stargz-store cleanup completed") + return nil +} + +// GetStorePath returns the path to use in ContainerRuntimeConfig for stargz layer store +func (s *StargzStoreSetup) GetStorePath() string { + return stargzStorePath +} + +// IsDeployed returns true if stargz-store has been deployed +func (s *StargzStoreSetup) IsDeployed() bool { + return s.deployed +} + +func (s *StargzStoreSetup) createNamespace(ctx context.Context) error { + ns := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: s.namespace, + Labels: map[string]string{ + "app": "stargz-store", + "pod-security.kubernetes.io/enforce": "privileged", + "pod-security.kubernetes.io/audit": "privileged", + "pod-security.kubernetes.io/warn": "privileged", + "security.openshift.io/scc.podSecurityLabelSync": "false", + }, + }, + } + + _, err := s.oc.AdminKubeClient().CoreV1().Namespaces().Create(ctx, ns, metav1.CreateOptions{}) + if err != nil && !apierrors.IsAlreadyExists(err) { + return err + } + framework.Logf("Namespace %s created/exists", s.namespace) + return nil +} + +func (s *StargzStoreSetup) createServiceAccount(ctx context.Context) error { + sa := &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: stargzStoreServiceAccount, + Namespace: s.namespace, + }, + } + + _, err := s.oc.AdminKubeClient().CoreV1().ServiceAccounts(s.namespace).Create(ctx, sa, metav1.CreateOptions{}) + if err != nil && !apierrors.IsAlreadyExists(err) { + return err + } + framework.Logf("ServiceAccount %s created/exists", stargzStoreServiceAccount) + return nil +} + +func (s *StargzStoreSetup) grantPrivilegedSCC(ctx context.Context) error { + // Get the privileged SCC + scc, err := s.oc.AdminSecurityClient().SecurityV1().SecurityContextConstraints().Get(ctx, "privileged", metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get privileged SCC: %w", err) + } + + // Add ServiceAccount to SCC users list + saUser := fmt.Sprintf("system:serviceaccount:%s:%s", s.namespace, stargzStoreServiceAccount) + + // Check if already added + for _, user := range scc.Users { + if user == saUser { + framework.Logf("ServiceAccount %s already has privileged SCC", stargzStoreServiceAccount) + return nil + } + } + + // Add to users list + scc.Users = append(scc.Users, saUser) + + // Update SCC + _, err = s.oc.AdminSecurityClient().SecurityV1().SecurityContextConstraints().Update(ctx, scc, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update privileged SCC: %w", err) + } + + framework.Logf("Granted privileged SCC to ServiceAccount %s", stargzStoreServiceAccount) + return nil +} + +func (s *StargzStoreSetup) createConfigMap(ctx context.Context) error { + configToml := `# Stargz-store configuration for CRI-O +# Registry resolver config +[[resolver.host."quay.io".mirrors]] +host = "quay.io" + +[[resolver.host."docker.io".mirrors]] +host = "registry-1.docker.io" + +[[resolver.host."gcr.io".mirrors]] +host = "gcr.io" + +[[resolver.host."ghcr.io".mirrors]] +host = "ghcr.io" + +[[resolver.host."registry.redhat.io".mirrors]] +host = "registry.redhat.io" +` + + serviceFile := `[Unit] +Description=stargz store +After=network.target +Before=crio.service + +[Service] +Type=notify +Environment=HOME=/root +ExecStart=/usr/local/bin/stargz-store --log-level=debug --config=/etc/stargz-store/config.toml /var/lib/stargz-store/store +ExecStopPost=umount /var/lib/stargz-store/store +Restart=always +RestartSec=1 + +[Install] +WantedBy=multi-user.target +` + + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "stargz-store-config", + Namespace: s.namespace, + }, + Data: map[string]string{ + "config.toml": configToml, + "stargz-store.service": serviceFile, + }, + } + + _, err := s.oc.AdminKubeClient().CoreV1().ConfigMaps(s.namespace).Create(ctx, cm, metav1.CreateOptions{}) + if err != nil && !apierrors.IsAlreadyExists(err) { + return err + } + framework.Logf("ConfigMap stargz-store-config created/exists") + return nil +} + +func (s *StargzStoreSetup) createDaemonSet(ctx context.Context) error { + installerScript := fmt.Sprintf(`set -e + +echo "=== Stargz-store Installer ===" +echo "Node: $(hostname)" + +# Check if already installed and running +if nsenter -t 1 -m -u -i -n -p -- systemctl is-active stargz-store &>/dev/null; then + echo "stargz-store already running on this node" + nsenter -t 1 -m -u -i -n -p -- mount | grep stargz || true + echo "Sleeping to keep pod running..." + sleep infinity +fi + +echo "Installing stargz-store..." + +# Unlock ostree for modifications +echo "Unlocking ostree..." +nsenter -t 1 -m -u -i -n -p -- ostree admin unlock --hotfix || echo "ostree unlock failed or already unlocked" + +# Download stargz-snapshotter release +echo "Downloading stargz-snapshotter %s..." +curl -L -o /tmp/stargz.tar.gz \ + https://github.com/containerd/stargz-snapshotter/releases/download/%s/stargz-snapshotter-%s-linux-amd64.tar.gz + +# Extract to host +echo "Extracting binary to /usr/local/bin..." +tar -xzf /tmp/stargz.tar.gz -C /tmp/ +cp /tmp/stargz-store /host/usr/local/bin/ +chmod +x /host/usr/local/bin/stargz-store + +# Verify binary +echo "Verifying binary..." +nsenter -t 1 -m -u -i -n -p -- /usr/local/bin/stargz-store --version || echo "Version check skipped" + +# Create directories +echo "Creating directories..." +mkdir -p /host/etc/stargz-store +mkdir -p /host/var/lib/stargz-store/store + +# Copy config file +echo "Copying config.toml..." +cp /config/config.toml /host/etc/stargz-store/config.toml + +# Copy service file +echo "Copying systemd service..." +cp /config/stargz-store.service /host/etc/systemd/system/stargz-store.service + +# Reload systemd and enable service +echo "Enabling stargz-store service..." +nsenter -t 1 -m -u -i -n -p -- systemctl daemon-reload +nsenter -t 1 -m -u -i -n -p -- systemctl enable stargz-store +nsenter -t 1 -m -u -i -n -p -- systemctl start stargz-store + +# Wait for service to be ready +echo "Waiting for stargz-store to be ready..." +sleep 5 + +# Verify service is running +echo "Verifying stargz-store service..." +nsenter -t 1 -m -u -i -n -p -- systemctl status stargz-store --no-pager || true + +# Verify FUSE mount +echo "Verifying FUSE mount..." +nsenter -t 1 -m -u -i -n -p -- mount | grep stargz || echo "WARNING: stargz mount not found" + +# Restart CRI-O to pick up the new layer store +echo "Restarting CRI-O..." +nsenter -t 1 -m -u -i -n -p -- systemctl restart crio + +echo "=== Setup complete! ===" +echo "stargz-store is now running on $(hostname)" +echo "" +echo "To use in ContainerRuntimeConfig, set additionalLayerStores path to:" +echo " /var/lib/stargz-store/store:ref" +echo "" + +# Keep pod running +sleep infinity +`, stargzStoreVersion, stargzStoreVersion, stargzStoreVersion) + + ds := &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: stargzStoreDaemonSetName, + Namespace: s.namespace, + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "stargz-store-installer", + }, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": "stargz-store-installer", + }, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: stargzStoreServiceAccount, + NodeSelector: map[string]string{ + "node-role.kubernetes.io/worker": "", + }, + HostPID: true, + HostNetwork: true, + Tolerations: []corev1.Toleration{ + { + Operator: corev1.TolerationOpExists, + }, + }, + Containers: []corev1.Container{ + { + Name: "installer", + Image: "registry.access.redhat.com/ubi9/ubi:latest", + SecurityContext: &corev1.SecurityContext{ + Privileged: ptr.To(true), + }, + Command: []string{"/bin/bash", "-c", installerScript}, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "host-root", + MountPath: "/host", + }, + { + Name: "config", + MountPath: "/config", + ReadOnly: true, + }, + }, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceMemory: resourceMustParse("64Mi"), + corev1.ResourceCPU: resourceMustParse("100m"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceMemory: resourceMustParse("256Mi"), + corev1.ResourceCPU: resourceMustParse("500m"), + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "host-root", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/", + Type: hostPathTypePtr(corev1.HostPathDirectory), + }, + }, + }, + { + Name: "config", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "stargz-store-config", + }, + }, + }, + }, + }, + }, + }, + }, + } + + _, err := s.oc.AdminKubeClient().AppsV1().DaemonSets(s.namespace).Create(ctx, ds, metav1.CreateOptions{}) + if err != nil && !apierrors.IsAlreadyExists(err) { + return err + } + framework.Logf("DaemonSet %s created/exists", stargzStoreDaemonSetName) + return nil +} + +func (s *StargzStoreSetup) waitForDaemonSetReady(ctx context.Context, timeout time.Duration) error { + framework.Logf("Waiting for stargz-store DaemonSet to be ready (timeout: %v)...", timeout) + + return wait.PollUntilContextTimeout(ctx, 10*time.Second, timeout, true, func(ctx context.Context) (bool, error) { + ds, err := s.oc.AdminKubeClient().AppsV1().DaemonSets(s.namespace).Get(ctx, stargzStoreDaemonSetName, metav1.GetOptions{}) + if err != nil { + return false, nil + } + + framework.Logf("DaemonSet status: desired=%d, ready=%d, available=%d", + ds.Status.DesiredNumberScheduled, ds.Status.NumberReady, ds.Status.NumberAvailable) + + if ds.Status.DesiredNumberScheduled == 0 { + return false, nil + } + + if ds.Status.NumberReady == ds.Status.DesiredNumberScheduled && + ds.Status.NumberAvailable == ds.Status.DesiredNumberScheduled { + return true, nil + } + + return false, nil + }) +} + +func (s *StargzStoreSetup) verifyStargzStoreRunning(ctx context.Context) error { + framework.Logf("Verifying stargz-store is running on worker nodes...") + + workerNodes, err := getNodesByLabel(ctx, s.oc, "node-role.kubernetes.io/worker") + if err != nil { + return fmt.Errorf("failed to get worker nodes: %w", err) + } + + pureWorkers := getPureWorkerNodes(workerNodes) + if len(pureWorkers) == 0 { + return fmt.Errorf("no pure worker nodes found") + } + + for _, node := range pureWorkers { + // Check stargz-store service status + output, err := ExecOnNodeWithChroot(s.oc, node.Name, "systemctl", "is-active", "stargz-store") + if err != nil { + framework.Logf("Warning: failed to check stargz-store status on node %s: %v", node.Name, err) + continue + } + + if strings.TrimSpace(output) != "active" { + return fmt.Errorf("stargz-store is not active on node %s (status: %s)", node.Name, strings.TrimSpace(output)) + } + + // Check FUSE mount + mountOutput, err := ExecOnNodeWithChroot(s.oc, node.Name, "mount") + if err != nil { + framework.Logf("Warning: failed to check mounts on node %s: %v", node.Name, err) + continue + } + + if !strings.Contains(mountOutput, "stargz") { + framework.Logf("Warning: stargz FUSE mount not found on node %s", node.Name) + } + + framework.Logf("Node %s: stargz-store is active and mounted", node.Name) + } + + return nil +} + +// VerifyStorageConfContainsStargz checks if storage.conf contains stargz-store path +func (s *StargzStoreSetup) VerifyStorageConfContainsStargz(ctx context.Context) error { + workerNodes, err := getNodesByLabel(ctx, s.oc, "node-role.kubernetes.io/worker") + if err != nil { + return fmt.Errorf("failed to get worker nodes: %w", err) + } + + pureWorkers := getPureWorkerNodes(workerNodes) + for _, node := range pureWorkers { + output, err := ExecOnNodeWithChroot(s.oc, node.Name, "cat", "/etc/containers/storage.conf") + if err != nil { + return fmt.Errorf("failed to read storage.conf on node %s: %w", node.Name, err) + } + + if !strings.Contains(output, "/var/lib/stargz-store/store") { + return fmt.Errorf("storage.conf on node %s does not contain stargz-store path", node.Name) + } + + framework.Logf("Node %s: storage.conf contains stargz-store path", node.Name) + } + + return nil +} + +// Helper functions +func hostPathTypePtr(t corev1.HostPathType) *corev1.HostPathType { + return &t +} + +func resourceMustParse(s string) resource.Quantity { + q := resource.MustParse(s) + return q +} + +// getStargzSnapshotCount returns the number of snapshots in stargz-store +func getStargzSnapshotCount(oc *exutil.CLI, nodeName string) int { + // List contents of stargz-store to count snapshots/layers + output, err := ExecOnNodeWithChroot(oc, nodeName, "find", "/var/lib/stargz-store/store", "-type", "d", "-mindepth", "1") + if err != nil { + framework.Logf("Warning: failed to count snapshots: %v", err) + return 0 + } + + lines := strings.Split(strings.TrimSpace(output), "\n") + count := 0 + for _, line := range lines { + if line != "" { + count++ + } + } + return count +} + +// createTestPodSpec creates a simple pod spec for testing +func createTestPodSpec(name, namespace, image, nodeName string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Spec: corev1.PodSpec{ + NodeName: nodeName, + SecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: boolPtr(true), + SeccompProfile: &corev1.SeccompProfile{ + Type: corev1.SeccompProfileTypeRuntimeDefault, + }, + }, + Containers: []corev1.Container{ + { + Name: "test-container", + Image: image, + Command: []string{"/bin/sh", "-c", "sleep 3600"}, + SecurityContext: &corev1.SecurityContext{ + AllowPrivilegeEscalation: boolPtr(false), + Capabilities: &corev1.Capabilities{ + Drop: []corev1.Capability{"ALL"}, + }, + RunAsNonRoot: boolPtr(true), + SeccompProfile: &corev1.SeccompProfile{ + Type: corev1.SeccompProfileTypeRuntimeDefault, + }, + }, + }, + }, + RestartPolicy: corev1.RestartPolicyNever, + }, + } +} + +// deletePodAndWait deletes a pod and waits for it to be gone +func deletePodAndWait(ctx context.Context, oc *exutil.CLI, namespace, podName string) { + err := oc.AdminKubeClient().CoreV1().Pods(namespace).Delete(ctx, podName, metav1.DeleteOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + framework.Logf("Warning: failed to delete pod %s: %v", podName, err) + return + } + + // Wait for pod to be deleted + wait.PollUntilContextTimeout(ctx, 2*time.Second, 1*time.Minute, true, func(ctx context.Context) (bool, error) { + _, err := oc.AdminKubeClient().CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + return true, nil + } + return false, nil + }) +}