diff --git a/config/dr-cluster/rbac/role.yaml b/config/dr-cluster/rbac/role.yaml index 1a41b0a57a..1a9d57b93a 100644 --- a/config/dr-cluster/rbac/role.yaml +++ b/config/dr-cluster/rbac/role.yaml @@ -8,8 +8,13 @@ rules: - "" resources: - configmaps + - persistentvolumes verbs: + - create + - get - list + - patch + - update - watch - apiGroups: - "" @@ -33,17 +38,6 @@ rules: - patch - update - watch -- apiGroups: - - "" - resources: - - persistentvolumes - verbs: - - create - - get - - list - - patch - - update - - watch - apiGroups: - "" resources: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 89e76bda16..e1aa678104 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -8,8 +8,13 @@ rules: - "" resources: - configmaps + - persistentvolumes verbs: + - create + - get - list + - patch + - update - watch - apiGroups: - "" @@ -43,17 +48,6 @@ rules: - patch - update - watch -- apiGroups: - - "" - resources: - - persistentvolumes - verbs: - - create - - get - - list - - patch - - update - - watch - apiGroups: - "" resources: diff --git a/internal/controller/controllers_utils_test.go b/internal/controller/controllers_utils_test.go index 81fda02fb1..8fca8cf6b9 100644 --- a/internal/controller/controllers_utils_test.go +++ b/internal/controller/controllers_utils_test.go @@ -341,11 +341,6 @@ func drclusterConditionExpect( } func validateClusterManifest(apiReader client.Reader, drcluster *ramen.DRCluster, disabled bool) { - expectedCount := 9 - if disabled { - expectedCount = 4 - } - clusterName := drcluster.Name key := types.NamespacedName{ @@ -356,12 +351,35 @@ func validateClusterManifest(apiReader client.Reader, drcluster *ramen.DRCluster manifestWork := &workv1.ManifestWork{} Eventually( - func(g Gomega) []workv1.Manifest { + func(g Gomega) { g.Expect(apiReader.Get(context.TODO(), key, manifestWork)).To(Succeed()) - return manifestWork.Spec.Workload.Manifests + // Validate base ClusterRoles are always present (5 manifests) + g.Expect(manifestWork.Spec.Workload.Manifests).To(ContainElement( + HaveField("RawExtension.Raw", ContainSubstring("volrepgroup-edit")))) + g.Expect(manifestWork.Spec.Workload.Manifests).To(ContainElement( + HaveField("RawExtension.Raw", ContainSubstring("mmode-edit")))) + g.Expect(manifestWork.Spec.Workload.Manifests).To(ContainElement( + HaveField("RawExtension.Raw", ContainSubstring("drclusterconfig-edit")))) + g.Expect(manifestWork.Spec.Workload.Manifests).To(ContainElement( + HaveField("RawExtension.Raw", ContainSubstring("networkfence-edit")))) + g.Expect(manifestWork.Spec.Workload.Manifests).To(ContainElement( + HaveField("RawExtension.Raw", ContainSubstring("recipe-edit")))) + + if !disabled { + // When deployment automation is enabled, validate additional manifests (5 more) + g.Expect(manifestWork.Spec.Workload.Manifests).To(ContainElement( + HaveField("RawExtension.Raw", ContainSubstring("olm-edit")))) + g.Expect(manifestWork.Spec.Workload.Manifests).To(ContainElement( + HaveField("RawExtension.Raw", ContainSubstring("OperatorGroup")))) + g.Expect(manifestWork.Spec.Workload.Manifests).To(ContainElement( + HaveField("RawExtension.Raw", ContainSubstring("Subscription")))) + // Two Namespace manifests + g.Expect(manifestWork.Spec.Workload.Manifests).To(ContainElement( + HaveField("RawExtension.Raw", And(ContainSubstring("Namespace"), ContainSubstring("ramen"))))) + } }, timeout, interval, - ).Should(HaveLen(expectedCount)) + ).Should(Succeed()) Expect(manifestWork.GetAnnotations()[controllers.DRClusterNameAnnotation]).Should(Equal(clusterName)) // TODO: Validate fencing status diff --git a/internal/controller/drplacementcontrol.go b/internal/controller/drplacementcontrol.go index 821a90f228..77c74986a9 100644 --- a/internal/controller/drplacementcontrol.go +++ b/internal/controller/drplacementcontrol.go @@ -14,11 +14,13 @@ import ( "time" "github.com/go-logr/logr" + recipev1 "github.com/ramendr/recipe/api/v1alpha1" corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" clrapiv1beta1 "open-cluster-management.io/api/cluster/v1beta1" + ocmworkv1 "open-cluster-management.io/api/work/v1" "sigs.k8s.io/controller-runtime/pkg/client" rmn "github.com/ramendr/ramen/api/v1alpha1" @@ -3158,3 +3160,99 @@ func (d *DRPCInstance) filterSCCAnnotations(annotations map[string]string) (map[ return filteredAnnotations, nil } + +func (d *DRPCInstance) ensureRecipeManifestWork(srcCluster, dstCluster string) error { + if d.instance.Spec.KubeObjectProtection == nil || d.instance.Spec.KubeObjectProtection.RecipeRef == nil { + return nil + } + + recipeName := d.instance.Spec.KubeObjectProtection.RecipeRef.Name + recipeNamespace := d.instance.Spec.KubeObjectProtection.RecipeRef.Namespace + + // Always fetch the latest recipe from source cluster + recipe, err := d.reconciler.MCVGetter.GetRecipeFromManagedCluster(srcCluster, recipeName, recipeNamespace) + if err != nil { + return fmt.Errorf("failed to get recipe %s/%s in cluster %s: %w", recipeNamespace, recipeName, srcCluster, err) + } + + // Check if recipe MW exists on destination + existingMW, err := d.mwu.FindManifestWorkByType(rmnutil.MWTypeRecipe, dstCluster) + if err != nil { + if !k8serrors.IsNotFound(err) { + return fmt.Errorf("failed to get recipe MW in cluster %s: %w", dstCluster, err) + } + // MW doesn't exist, create it + return d.mwu.CreateOrUpdateRecipeManifestWork(recipe, dstCluster) + } + + // MW exists, check if recipe needs update by comparing Generation or ResourceVersion + // Extract existing recipe from ManifestWork and compare + if needsUpdate, err := d.recipeNeedsUpdate(existingMW, recipe); err != nil { + return fmt.Errorf("failed to check if recipe needs update: %w", err) + } else if needsUpdate { + d.log.Info("Recipe has been updated, updating ManifestWork", + "recipe", recipeName, + "namespace", recipeNamespace, + "srcCluster", srcCluster, + "dstCluster", dstCluster, + "generation", recipe.Generation, + "resourceVersion", recipe.ResourceVersion) + + return d.mwu.CreateOrUpdateRecipeManifestWork(recipe, dstCluster) + } + + return nil +} + +// recipeNeedsUpdate compares the recipe in the ManifestWork with the source recipe +// Returns true if the recipe needs to be updated +func (d *DRPCInstance) recipeNeedsUpdate(mw *ocmworkv1.ManifestWork, sourceRecipe *recipev1.Recipe) (bool, error) { + if mw == nil || len(mw.Spec.Workload.Manifests) == 0 { + return true, nil + } + + // Extract recipe from ManifestWork + existingRecipe := &recipev1.Recipe{} + if err := rmnutil.ExtractResourceFromManifestWork(mw, existingRecipe, + schema.GroupVersionKind{ + Group: recipev1.GroupVersion.Group, + Version: recipev1.GroupVersion.Version, + Kind: "Recipe", + }); err != nil { + return false, fmt.Errorf("failed to extract recipe from ManifestWork: %w", err) + } + + // Compare Spec - this is the most important part + if !reflect.DeepEqual(existingRecipe.Spec, sourceRecipe.Spec) { + d.log.V(1).Info("Recipe Spec has changed", "recipe", sourceRecipe.Name) + + return true, nil + } + + // Compare Labels - only if both have labels + // Handle nil vs empty map equivalence + if !mapsEqual(existingRecipe.Labels, sourceRecipe.Labels) { + d.log.V(1).Info("Recipe Labels have changed", "recipe", sourceRecipe.Name) + + return true, nil + } + + // Compare Annotations - only if both have annotations + // Handle nil vs empty map equivalence + if !mapsEqual(existingRecipe.Annotations, sourceRecipe.Annotations) { + d.log.V(1).Info("Recipe Annotations have changed", "recipe", sourceRecipe.Name) + + return true, nil + } + + return false, nil +} + +// mapsEqual compares two string maps, treating nil and empty maps as equivalent +func mapsEqual(a, b map[string]string) bool { + if len(a) == 0 && len(b) == 0 { + return true + } + + return reflect.DeepEqual(a, b) +} diff --git a/internal/controller/drplacementcontrol_controller.go b/internal/controller/drplacementcontrol_controller.go index 9cb0963158..aacefd4a25 100644 --- a/internal/controller/drplacementcontrol_controller.go +++ b/internal/controller/drplacementcontrol_controller.go @@ -68,6 +68,11 @@ const ( var ErrInitialWaitTimeForDRPCPlacementRule = errors.New("waiting for DRPC Placement to produces placement decision") +// Extend this metric to other DR progression(rmn.ProgressionState) states by adding them to the `states` slice. +var trackedDRProgressionStates = []string{ + string(rmn.ProgressionWaitOnUserToCleanUp), +} + // ProgressCallback of function type type ProgressCallback func(string, string) @@ -419,15 +424,10 @@ func (r *DRPlacementControlReconciler) setDRProgressionStateMetric(drpc *rmn.DRP log.Info(fmt.Sprintf("setting metric: (%s)", DRProgressionState)) - // Extend this metric to other DR progression(rmn.ProgressionState) states by adding them to the `states` slice. - states := []string{ - string(rmn.ProgressionWaitOnUserToCleanUp), - } - currentState := string(drpc.Status.Progression) - for _, state := range states { - labels := DRProgressionStateMetricLabels(drpc) - if state == currentState { + for _, trackedState := range trackedDRProgressionStates { + labels := DRProgressionStateMetricLabels(drpc, trackedState) + if trackedState == currentState { drpcProgressionState.With(labels).Set(1) } else { drpcProgressionState.With(labels).Set(0) @@ -556,17 +556,6 @@ func (r *DRPlacementControlReconciler) createCGEnabledMetricsInstance( } } -func (r *DRPlacementControlReconciler) createDRProgressionStateMetricsInstance( - drpc *rmn.DRPlacementControl, -) *DRProgressionStateMetrics { - drProgressionStateLabels := DRProgressionStateMetricLabels(drpc) - drProgressionStateMetrics := NewDRPCProgressionStateMetric(drProgressionStateLabels) - - return &DRProgressionStateMetrics{ - DRProgressionState: drProgressionStateMetrics.DRProgressionState, - } -} - func (r *DRPlacementControlReconciler) createGlobalActionMetricsInstance( drpc *rmn.DRPlacementControl, ) *GlobalActionMetrics { @@ -852,8 +841,10 @@ func (r *DRPlacementControlReconciler) finalizeDRPC(ctx context.Context, drpc *r cgEnabledMetricLabels := CGEnabledMetricLabels(drpc) DeleteCGEnabledMetric(cgEnabledMetricLabels) - DRProgressionStateMetricsLabels := DRProgressionStateMetricLabels(drpc) - DeleteDRPCProgressionStateMetric(DRProgressionStateMetricsLabels) + for _, trackedState := range trackedDRProgressionStates { + labels := DRProgressionStateMetricLabels(drpc, trackedState) + DeleteDRPCProgressionStateMetric(labels) + } globalActionLabels := GlobalActionLabels(drpc) DeleteGlobalActionMetric(globalActionLabels) @@ -1769,8 +1760,7 @@ func (r *DRPlacementControlReconciler) setDRPCMetrics(ctx context.Context, r.setLastSyncBytesMetric(&syncMetrics.SyncDataBytesMetrics, drpc.Status.LastGroupSyncBytes, log) } - appDRCleanupMetrics := r.createDRProgressionStateMetricsInstance(drpc) - r.setDRProgressionStateMetric(drpc, appDRCleanupMetrics, log) + r.setDRProgressionStateMetric(drpc, &DRProgressionStateMetrics{}, log) return nil } diff --git a/internal/controller/drplacementcontrol_controller_test.go b/internal/controller/drplacementcontrol_controller_test.go index 25782cb8cb..21e93cb01b 100644 --- a/internal/controller/drplacementcontrol_controller_test.go +++ b/internal/controller/drplacementcontrol_controller_test.go @@ -1060,7 +1060,7 @@ func verifyVRGManifestWorkCreatedAsPrimary(namespace, managedCluster string) { return err == nil }, timeout, interval).Should(BeTrue()) - Expect(len(createdVRGRolesManifest.Spec.Workload.Manifests)).To(Equal(9)) + Expect(len(createdVRGRolesManifest.Spec.Workload.Manifests)).To(Equal(10)) vrgClusterRoleManifest := createdVRGRolesManifest.Spec.Workload.Manifests[0] Expect(vrgClusterRoleManifest).ToNot(BeNil()) diff --git a/internal/controller/drplacementcontrolvolsync.go b/internal/controller/drplacementcontrolvolsync.go index a21e8a0e2a..5b4c3f5fd5 100644 --- a/internal/controller/drplacementcontrolvolsync.go +++ b/internal/controller/drplacementcontrolvolsync.go @@ -291,6 +291,13 @@ func (d *DRPCInstance) createOrUpdateSecondaryManifestWork(srcCluster string) (c d.instance.Namespace, dstCluster) } + err = d.ensureRecipeManifestWork(srcCluster, dstCluster) + if err != nil { + return ctrlutil.OperationResultNone, + fmt.Errorf("creating ManifestWork couldn't ensure recipe on cluster %s exists", + dstCluster) + } + annotations := make(map[string]string) annotations[DRPCNameAnnotation] = d.instance.Name diff --git a/internal/controller/fake_mcv_test.go b/internal/controller/fake_mcv_test.go index 56d2297df1..692c2ab2c7 100644 --- a/internal/controller/fake_mcv_test.go +++ b/internal/controller/fake_mcv_test.go @@ -12,6 +12,7 @@ import ( volrep "github.com/csi-addons/kubernetes-csi-addons/api/replication.storage/v1alpha1" "github.com/go-logr/logr" snapv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" + recipev1 "github.com/ramendr/recipe/api/v1alpha1" groupsnapv1beta1 "github.com/red-hat-storage/external-snapshotter/client/v8/apis/volumegroupsnapshot/v1beta1" corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" @@ -202,3 +203,9 @@ func (f FakeMCVGetter) DeleteManagedClusterView(clusterName, mcvName string, log func (f FakeMCVGetter) GetNSFromManagedCluster(managedCluster, resourceName string) (*corev1.Namespace, error) { return nil, nil } + +func (f FakeMCVGetter) GetRecipeFromManagedCluster(managedCluster, resourceName, + resourceNamespace string, +) (*recipev1.Recipe, error) { + return nil, nil +} diff --git a/internal/controller/kubeobjects/velero/exclude_resources.go b/internal/controller/kubeobjects/velero/exclude_resources.go new file mode 100644 index 0000000000..28a7ed4a49 --- /dev/null +++ b/internal/controller/kubeobjects/velero/exclude_resources.go @@ -0,0 +1,296 @@ +// SPDX-FileCopyrightText: The RamenDR authors +// SPDX-License-Identifier: Apache-2.0 + +package velero + +import ( + "context" + "encoding/json" + "fmt" + "strings" + + "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + // DefaultExcludedResourcesConfigMapName is the name of the ConfigMap that contains + // the default list of resources to exclude from Velero backups + DefaultExcludedResourcesConfigMapName = "default-excluded-resources" + + // ExcludedResourcesKey is the key in the ConfigMap data that contains the excluded resources list + ExcludedResourcesKey = "resources" +) + +// getDefaultExcludedResources returns the hardcoded default list of resources to exclude from Velero backups. +// These defaults are used when creating the ConfigMap for the first time or as a fallback. +func getDefaultExcludedResources() []string { + return []string{ + // Exclude VRs from Backup so VRG can create them: see https://github.com/RamenDR/ramen/issues/884 + "volumereplications.replication.storage.openshift.io", + "volumegroupreplications.replication.storage.openshift.io", + // Exclude VolSync resources as they are managed by VRG + "replicationsources.volsync.backube", + "replicationdestinations.volsync.backube", + // Exclude PVCs and PVs as they are handled separately + "persistentvolumeclaims", + "persistentvolumes", + // Exclude EndpointSlices/Endpoints to prevent Submariner conflicts: see https://github.com/RamenDR/ramen/issues/1889 + "endpointslices.discovery.k8s.io", + "endpoints", + // Exclude VolumeSnapshots and VolumeGroupSnapshots from backup + "volumesnapshots.snapshot.storage.k8s.io", + "volumegroupsnapshots.groupsnapshot.storage.k8s.io", + } +} + +// getCriticalExcludedResources returns resources that should always be excluded +// to prevent breaking Ramen's internal logic. These will generate warnings if removed from ConfigMap. +func getCriticalExcludedResources() []string { + return []string{ + "volumereplications.replication.storage.openshift.io", + "volumegroupreplications.replication.storage.openshift.io", + "replicationsources.volsync.backube", + "replicationdestinations.volsync.backube", + } +} + +// ExcludedResourcesManager manages the default excluded resources ConfigMap +type ExcludedResourcesManager struct { + client client.Client + namespace string + log logr.Logger + // Cached excluded resources list + cachedExclusions []string +} + +// NewExcludedResourcesManager creates a new ExcludedResourcesManager +func NewExcludedResourcesManager(client client.Client, namespace string, log logr.Logger) *ExcludedResourcesManager { + return &ExcludedResourcesManager{ + client: client, + namespace: namespace, + log: log, + } +} + +// EnsureConfigMap ensures the default excluded resources ConfigMap exists. +// If it doesn't exist, it creates one with the default hardcoded values. +// Returns the list of excluded resources. +func (m *ExcludedResourcesManager) EnsureConfigMap(ctx context.Context) ([]string, error) { + configMap := &corev1.ConfigMap{} + configMapKey := types.NamespacedName{ + Namespace: m.namespace, + Name: DefaultExcludedResourcesConfigMapName, + } + + err := m.client.Get(ctx, configMapKey, configMap) + if err != nil { + if !errors.IsNotFound(err) { + return nil, fmt.Errorf("failed to get ConfigMap %s: %w", configMapKey, err) + } + + // ConfigMap doesn't exist, create it with default values + m.log.Info("ConfigMap not found, creating with default excluded resources", + "configMap", DefaultExcludedResourcesConfigMapName, + "namespace", m.namespace) + + configMap, err = m.createDefaultConfigMap(ctx) + if err != nil { + return nil, fmt.Errorf("failed to create default ConfigMap: %w", err) + } + } + + // Parse and return the excluded resources + exclusions, err := m.parseExcludedResources(configMap) + if err != nil { + m.log.Error(err, "Failed to parse excluded resources from ConfigMap, using hardcoded defaults") + + return getDefaultExcludedResources(), nil + } + + // Validate and warn about missing critical exclusions + m.validateCriticalExclusions(exclusions) + + // Cache the exclusions + m.cachedExclusions = exclusions + + m.log.Info("Loaded excluded resources from ConfigMap", + "count", len(exclusions), + "configMap", DefaultExcludedResourcesConfigMapName) + + return exclusions, nil +} + +// createDefaultConfigMap creates a ConfigMap with the default excluded resources +func (m *ExcludedResourcesManager) createDefaultConfigMap(ctx context.Context) (*corev1.ConfigMap, error) { + defaults := getDefaultExcludedResources() + + // Convert to JSON for better structure and validation + resourcesJSON, err := json.Marshal(defaults) + if err != nil { + return nil, fmt.Errorf("failed to marshal default excluded resources: %w", err) + } + + configMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: DefaultExcludedResourcesConfigMapName, + Namespace: m.namespace, + Labels: map[string]string{ + "app.kubernetes.io/name": "ramen-dr-cluster-operator", + "app.kubernetes.io/component": "kubeobjects-protection", + "app.kubernetes.io/managed-by": "ramen-dr-cluster-operator", + }, + Annotations: map[string]string{ + "ramen.openshift.io/description": "Default list of resources to exclude from Velero backups. " + + "You can modify this ConfigMap to customize the exclusions. " + + "Changes will be picked up on the next reconciliation.", + "ramen.openshift.io/version": "v1", + }, + }, + Data: map[string]string{ + ExcludedResourcesKey: string(resourcesJSON), + }, + } + + if err := m.client.Create(ctx, configMap); err != nil { + return nil, fmt.Errorf("failed to create ConfigMap: %w", err) + } + + m.log.Info("Created default excluded resources ConfigMap", + "configMap", DefaultExcludedResourcesConfigMapName, + "namespace", m.namespace, + "resources", len(defaults)) + + return configMap, nil +} + +// parseExcludedResources parses the excluded resources from the ConfigMap +func (m *ExcludedResourcesManager) parseExcludedResources(configMap *corev1.ConfigMap) ([]string, error) { + resourcesData, ok := configMap.Data[ExcludedResourcesKey] + if !ok { + return nil, fmt.Errorf("ConfigMap missing key %s", ExcludedResourcesKey) + } + + if resourcesData == "" { + m.log.Info("ConfigMap has empty excluded resources list") + + return []string{}, nil + } + + var exclusions []string + + // Try to parse as JSON first + err := json.Unmarshal([]byte(resourcesData), &exclusions) + if err != nil { + // Fallback to comma-separated format for backwards compatibility + m.log.Info("ConfigMap data is not JSON, trying comma-separated format") + exclusions = m.parseCommaSeparated(resourcesData) + } + + // Trim whitespace from all entries + for i := range exclusions { + exclusions[i] = strings.TrimSpace(exclusions[i]) + } + + // Remove empty entries + filtered := make([]string, 0, len(exclusions)) + for _, resource := range exclusions { + if resource != "" { + filtered = append(filtered, resource) + } + } + + return filtered, nil +} + +// parseCommaSeparated parses a comma-separated list of resources +func (m *ExcludedResourcesManager) parseCommaSeparated(data string) []string { + parts := strings.Split(data, ",") + result := make([]string, 0, len(parts)) + + for _, part := range parts { + trimmed := strings.TrimSpace(part) + if trimmed != "" { + result = append(result, trimmed) + } + } + + return result +} + +// validateCriticalExclusions checks if critical resources are in the exclusion list +// and logs warnings if they're missing +func (m *ExcludedResourcesManager) validateCriticalExclusions(exclusions []string) { + criticalResources := getCriticalExcludedResources() + + for _, critical := range criticalResources { + found := false + + for _, excluded := range exclusions { + if excluded == critical { + found = true + + break + } + } + + if !found { + m.log.Info("WARNING: Critical resource not in exclusions list, this may cause issues", + "resource", critical, + "reason", "Required by Ramen for proper VRG operation") + } + } +} + +// GetExcludedResources returns the cached excluded resources. +// If not cached, it loads from ConfigMap. +func (m *ExcludedResourcesManager) GetExcludedResources(ctx context.Context) ([]string, error) { + if m.cachedExclusions != nil { + return m.cachedExclusions, nil + } + + return m.EnsureConfigMap(ctx) +} + +// ReloadExcludedResources reloads the excluded resources from the ConfigMap. +// This should be called when the ConfigMap is updated. +func (m *ExcludedResourcesManager) ReloadExcludedResources(ctx context.Context) ([]string, error) { + m.log.Info("Reloading excluded resources from ConfigMap") + + configMap := &corev1.ConfigMap{} + configMapKey := types.NamespacedName{ + Namespace: m.namespace, + Name: DefaultExcludedResourcesConfigMapName, + } + + err := m.client.Get(ctx, configMapKey, configMap) + if err != nil { + if errors.IsNotFound(err) { + // ConfigMap was deleted, recreate with defaults + m.log.Info("ConfigMap was deleted, recreating with defaults") + + return m.EnsureConfigMap(ctx) + } + + return nil, fmt.Errorf("failed to reload ConfigMap: %w", err) + } + + exclusions, err := m.parseExcludedResources(configMap) + if err != nil { + m.log.Error(err, "Failed to parse excluded resources after reload, using previous cache") + + return m.cachedExclusions, nil + } + + m.validateCriticalExclusions(exclusions) + m.cachedExclusions = exclusions + + m.log.Info("Reloaded excluded resources from ConfigMap", + "count", len(exclusions)) + + return exclusions, nil +} diff --git a/internal/controller/kubeobjects/velero/exclude_resources_test.go b/internal/controller/kubeobjects/velero/exclude_resources_test.go new file mode 100644 index 0000000000..d3de3337c7 --- /dev/null +++ b/internal/controller/kubeobjects/velero/exclude_resources_test.go @@ -0,0 +1,503 @@ +// SPDX-FileCopyrightText: The RamenDR authors +// SPDX-License-Identifier: Apache-2.0 + +package velero_test + +import ( + "context" + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + "github.com/ramendr/ramen/internal/controller/kubeobjects/velero" +) + +const ( + testNamespace = "test-namespace" +) + +// setupFakeClient creates a fake client with corev1 scheme +func setupFakeClient(t *testing.T, objects ...client.Object) client.Client { + t.Helper() + + scheme := runtime.NewScheme() + err := corev1.AddToScheme(scheme) + require.NoError(t, err) + + return fake.NewClientBuilder().WithScheme(scheme).WithObjects(objects...).Build() +} + +// createConfigMapWithJSON creates a ConfigMap with JSON-formatted excluded resources +func createConfigMapWithJSON(namespace string, resources []string) *corev1.ConfigMap { + resourcesJSON, err := json.Marshal(resources) + if err != nil { + return nil + } + + return &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: velero.DefaultExcludedResourcesConfigMapName, + Namespace: namespace, + }, + Data: map[string]string{ + velero.ExcludedResourcesKey: string(resourcesJSON), + }, + } +} + +// createConfigMapWithCommaSeparated creates a ConfigMap with comma-separated excluded resources +func createConfigMapWithCommaSeparated(namespace string, resourcesStr string) *corev1.ConfigMap { + return &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: velero.DefaultExcludedResourcesConfigMapName, + Namespace: namespace, + }, + Data: map[string]string{ + velero.ExcludedResourcesKey: resourcesStr, + }, + } +} + +// TestEnsureConfigMap_CreatesConfigMapWhenNotExists tests that the manager creates +// a ConfigMap with default resources when it doesn't exist +func TestEnsureConfigMap_CreatesConfigMapWhenNotExists(t *testing.T) { + fakeClient := setupFakeClient(t) + logger := zap.New(zap.UseDevMode(true)) + manager := velero.NewExcludedResourcesManager(fakeClient, testNamespace, logger) + + ctx := context.Background() + exclusions, err := manager.EnsureConfigMap(ctx) + + require.NoError(t, err) + assert.NotEmpty(t, exclusions) + + // Verify ConfigMap was created + configMap := &corev1.ConfigMap{} + configMapKey := types.NamespacedName{ + Namespace: testNamespace, + Name: velero.DefaultExcludedResourcesConfigMapName, + } + err = fakeClient.Get(ctx, configMapKey, configMap) + require.NoError(t, err) + + // Verify it has the expected labels and annotations + assert.Equal(t, "ramen-dr-cluster-operator", configMap.Labels["app.kubernetes.io/name"]) + assert.Equal(t, "kubeobjects-protection", configMap.Labels["app.kubernetes.io/component"]) + assert.Contains(t, configMap.Annotations["ramen.openshift.io/description"], "Default list of resources") + + // Verify the data is valid JSON + var parsedResources []string + + err = json.Unmarshal([]byte(configMap.Data[velero.ExcludedResourcesKey]), &parsedResources) + require.NoError(t, err) + assert.NotEmpty(t, parsedResources) + + // Verify it includes critical resources + assert.Contains(t, parsedResources, "volumereplications.replication.storage.openshift.io") + assert.Contains(t, parsedResources, "replicationsources.volsync.backube") +} + +// TestEnsureConfigMap_ReturnsExistingConfigMapData tests that the manager returns +// existing ConfigMap data when it already exists +func TestEnsureConfigMap_ReturnsExistingConfigMapData(t *testing.T) { + customResources := []string{ + "volumereplications.replication.storage.openshift.io", + "customresource.example.com", + } + existingConfigMap := createConfigMapWithJSON(testNamespace, customResources) + fakeClient := setupFakeClient(t, existingConfigMap) + logger := zap.New(zap.UseDevMode(true)) + manager := velero.NewExcludedResourcesManager(fakeClient, testNamespace, logger) + + ctx := context.Background() + exclusions, err := manager.EnsureConfigMap(ctx) + + require.NoError(t, err) + assert.Equal(t, customResources, exclusions) +} + +// TestEnsureConfigMap_ParsesJSONFormat tests that the manager correctly parses +// JSON-formatted excluded resources +func TestEnsureConfigMap_ParsesJSONFormat(t *testing.T) { + expectedResources := []string{ + "resource1.group.io", + "resource2.group.io", + "persistentvolumeclaims", + } + existingConfigMap := createConfigMapWithJSON(testNamespace, expectedResources) + fakeClient := setupFakeClient(t, existingConfigMap) + logger := zap.New(zap.UseDevMode(true)) + manager := velero.NewExcludedResourcesManager(fakeClient, testNamespace, logger) + + ctx := context.Background() + exclusions, err := manager.EnsureConfigMap(ctx) + + require.NoError(t, err) + assert.Equal(t, expectedResources, exclusions) +} + +// TestEnsureConfigMap_ParsesCommaSeparatedFormat tests backward compatibility +// with comma-separated format +func TestEnsureConfigMap_ParsesCommaSeparatedFormat(t *testing.T) { + resourcesStr := "resource1.group.io, resource2.group.io, persistentvolumeclaims" + expectedResources := []string{ + "resource1.group.io", + "resource2.group.io", + "persistentvolumeclaims", + } + existingConfigMap := createConfigMapWithCommaSeparated(testNamespace, resourcesStr) + fakeClient := setupFakeClient(t, existingConfigMap) + logger := zap.New(zap.UseDevMode(true)) + manager := velero.NewExcludedResourcesManager(fakeClient, testNamespace, logger) + + ctx := context.Background() + exclusions, err := manager.EnsureConfigMap(ctx) + + require.NoError(t, err) + assert.Equal(t, expectedResources, exclusions) +} + +// TestEnsureConfigMap_HandlesEmptyConfigMap tests that empty ConfigMap returns empty list +func TestEnsureConfigMap_HandlesEmptyConfigMap(t *testing.T) { + existingConfigMap := createConfigMapWithJSON(testNamespace, []string{}) + fakeClient := setupFakeClient(t, existingConfigMap) + logger := zap.New(zap.UseDevMode(true)) + manager := velero.NewExcludedResourcesManager(fakeClient, testNamespace, logger) + + ctx := context.Background() + exclusions, err := manager.EnsureConfigMap(ctx) + + require.NoError(t, err) + assert.Empty(t, exclusions) +} + +// TestEnsureConfigMap_HandlesWhitespaceInCommaSeparated tests that whitespace +// is properly trimmed in comma-separated format +func TestEnsureConfigMap_HandlesWhitespaceInCommaSeparated(t *testing.T) { + resourcesStr := " resource1.group.io , resource2.group.io , , persistentvolumeclaims " + expectedResources := []string{ + "resource1.group.io", + "resource2.group.io", + "persistentvolumeclaims", + } + existingConfigMap := createConfigMapWithCommaSeparated(testNamespace, resourcesStr) + fakeClient := setupFakeClient(t, existingConfigMap) + logger := zap.New(zap.UseDevMode(true)) + manager := velero.NewExcludedResourcesManager(fakeClient, testNamespace, logger) + + ctx := context.Background() + exclusions, err := manager.EnsureConfigMap(ctx) + + require.NoError(t, err) + assert.Equal(t, expectedResources, exclusions) +} + +// TestEnsureConfigMap_HandlesMissingResourcesKey tests fallback to defaults +// when ConfigMap exists but missing the resources key +func TestEnsureConfigMap_HandlesMissingResourcesKey(t *testing.T) { + existingConfigMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: velero.DefaultExcludedResourcesConfigMapName, + Namespace: testNamespace, + }, + Data: map[string]string{ + "other-key": "other-value", + }, + } + fakeClient := setupFakeClient(t, existingConfigMap) + logger := zap.New(zap.UseDevMode(true)) + manager := velero.NewExcludedResourcesManager(fakeClient, testNamespace, logger) + + ctx := context.Background() + exclusions, err := manager.EnsureConfigMap(ctx) + + // Should not error, but fall back to defaults + require.NoError(t, err) + assert.NotEmpty(t, exclusions) + // Should include critical resources from defaults + assert.Contains(t, exclusions, "volumereplications.replication.storage.openshift.io") +} + +// TestEnsureConfigMap_HandlesInvalidJSON tests fallback to comma-separated +// when JSON parsing fails +func TestEnsureConfigMap_HandlesInvalidJSON(t *testing.T) { + existingConfigMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: velero.DefaultExcludedResourcesConfigMapName, + Namespace: testNamespace, + }, + Data: map[string]string{ + velero.ExcludedResourcesKey: "resource1, resource2", // Not JSON + }, + } + fakeClient := setupFakeClient(t, existingConfigMap) + logger := zap.New(zap.UseDevMode(true)) + manager := velero.NewExcludedResourcesManager(fakeClient, testNamespace, logger) + + ctx := context.Background() + exclusions, err := manager.EnsureConfigMap(ctx) + + require.NoError(t, err) + assert.Equal(t, []string{"resource1", "resource2"}, exclusions) +} + +// TestGetExcludedResources_UsesCachedValue tests that GetExcludedResources +// returns cached value without hitting the API +func TestGetExcludedResources_UsesCachedValue(t *testing.T) { + customResources := []string{"cached-resource1", "cached-resource2"} + existingConfigMap := createConfigMapWithJSON(testNamespace, customResources) + fakeClient := setupFakeClient(t, existingConfigMap) + logger := zap.New(zap.UseDevMode(true)) + manager := velero.NewExcludedResourcesManager(fakeClient, testNamespace, logger) + + ctx := context.Background() + + // First call to populate cache + exclusions1, err := manager.EnsureConfigMap(ctx) + require.NoError(t, err) + assert.Equal(t, customResources, exclusions1) + + // Delete the ConfigMap to ensure we're using cache + err = fakeClient.Delete(ctx, existingConfigMap) + require.NoError(t, err) + + // GetExcludedResources should return cached value + exclusions2, err := manager.GetExcludedResources(ctx) + require.NoError(t, err) + assert.Equal(t, customResources, exclusions2) +} + +// TestGetExcludedResources_LoadsWhenNotCached tests that GetExcludedResources +// loads from ConfigMap when cache is empty +func TestGetExcludedResources_LoadsWhenNotCached(t *testing.T) { + customResources := []string{"resource1", "resource2"} + existingConfigMap := createConfigMapWithJSON(testNamespace, customResources) + fakeClient := setupFakeClient(t, existingConfigMap) + logger := zap.New(zap.UseDevMode(true)) + manager := velero.NewExcludedResourcesManager(fakeClient, testNamespace, logger) + + ctx := context.Background() + + // Call GetExcludedResources without EnsureConfigMap first + exclusions, err := manager.GetExcludedResources(ctx) + require.NoError(t, err) + assert.Equal(t, customResources, exclusions) +} + +// TestReloadExcludedResources_UpdatesCache tests that ReloadExcludedResources +// refreshes the cache from the ConfigMap +func TestReloadExcludedResources_UpdatesCache(t *testing.T) { + initialResources := []string{"resource1", "resource2"} + existingConfigMap := createConfigMapWithJSON(testNamespace, initialResources) + fakeClient := setupFakeClient(t, existingConfigMap) + logger := zap.New(zap.UseDevMode(true)) + manager := velero.NewExcludedResourcesManager(fakeClient, testNamespace, logger) + + ctx := context.Background() + + // First load + exclusions1, err := manager.EnsureConfigMap(ctx) + require.NoError(t, err) + assert.Equal(t, initialResources, exclusions1) + + // Update ConfigMap + updatedResources := []string{"updated-resource1", "updated-resource2", "updated-resource3"} + + updatedJSON, err := json.Marshal(updatedResources) + if err != nil { + require.NoError(t, err) + } + + existingConfigMap.Data[velero.ExcludedResourcesKey] = string(updatedJSON) + err = fakeClient.Update(ctx, existingConfigMap) + require.NoError(t, err) + + // Reload and verify updated resources + exclusions2, err := manager.ReloadExcludedResources(ctx) + require.NoError(t, err) + assert.Equal(t, updatedResources, exclusions2) + + // Verify cache was updated + exclusions3, err := manager.GetExcludedResources(ctx) + require.NoError(t, err) + assert.Equal(t, updatedResources, exclusions3) +} + +// TestReloadExcludedResources_RecreatesDeletedConfigMap tests that +// ReloadExcludedResources recreates ConfigMap if it was deleted +func TestReloadExcludedResources_RecreatesDeletedConfigMap(t *testing.T) { + initialResources := []string{"resource1", "resource2"} + existingConfigMap := createConfigMapWithJSON(testNamespace, initialResources) + fakeClient := setupFakeClient(t, existingConfigMap) + logger := zap.New(zap.UseDevMode(true)) + manager := velero.NewExcludedResourcesManager(fakeClient, testNamespace, logger) + + ctx := context.Background() + + // First load + exclusions1, err := manager.EnsureConfigMap(ctx) + require.NoError(t, err) + assert.Equal(t, initialResources, exclusions1) + + // Delete ConfigMap + err = fakeClient.Delete(ctx, existingConfigMap) + require.NoError(t, err) + + // Verify ConfigMap is deleted + configMapKey := types.NamespacedName{ + Namespace: testNamespace, + Name: velero.DefaultExcludedResourcesConfigMapName, + } + err = fakeClient.Get(ctx, configMapKey, &corev1.ConfigMap{}) + assert.True(t, errors.IsNotFound(err)) + + // Reload should recreate with defaults + exclusions2, err := manager.ReloadExcludedResources(ctx) + require.NoError(t, err) + assert.NotEmpty(t, exclusions2) + + // Verify ConfigMap was recreated + recreatedConfigMap := &corev1.ConfigMap{} + err = fakeClient.Get(ctx, configMapKey, recreatedConfigMap) + require.NoError(t, err) + assert.NotNil(t, recreatedConfigMap) +} + +// TestReloadExcludedResources_FallbackOnParseError tests that ReloadExcludedResources +// falls back to cached value when parse error occurs +func TestReloadExcludedResources_FallbackOnParseError(t *testing.T) { + initialResources := []string{"resource1", "resource2"} + existingConfigMap := createConfigMapWithJSON(testNamespace, initialResources) + fakeClient := setupFakeClient(t, existingConfigMap) + logger := zap.New(zap.UseDevMode(true)) + manager := velero.NewExcludedResourcesManager(fakeClient, testNamespace, logger) + + ctx := context.Background() + + // First load to populate cache + exclusions1, err := manager.EnsureConfigMap(ctx) + require.NoError(t, err) + assert.Equal(t, initialResources, exclusions1) + + // Update ConfigMap to remove the resources key (will cause parse error) + existingConfigMap.Data = map[string]string{ + "wrong-key": "wrong-value", + } + err = fakeClient.Update(ctx, existingConfigMap) + require.NoError(t, err) + + // Reload should return cached value on parse error + exclusions2, err := manager.ReloadExcludedResources(ctx) + require.NoError(t, err) + assert.Equal(t, initialResources, exclusions2) +} + +// TestEnsureConfigMap_ValidatesCriticalResources tests that missing critical +// resources are logged (warning behavior) +func TestEnsureConfigMap_ValidatesCriticalResources(t *testing.T) { + // ConfigMap without critical resources + resourcesWithoutCritical := []string{ + "persistentvolumeclaims", + "persistentvolumes", + } + existingConfigMap := createConfigMapWithJSON(testNamespace, resourcesWithoutCritical) + fakeClient := setupFakeClient(t, existingConfigMap) + logger := zap.New(zap.UseDevMode(true)) + manager := velero.NewExcludedResourcesManager(fakeClient, testNamespace, logger) + + ctx := context.Background() + + // This should succeed but log warnings + exclusions, err := manager.EnsureConfigMap(ctx) + require.NoError(t, err) + assert.Equal(t, resourcesWithoutCritical, exclusions) + // Note: We can't easily test log output without a custom logger, + // but at least verify it doesn't error out +} + +// TestEnsureConfigMap_ConfigMapWithCriticalResources tests that all critical +// resources are properly recognized +func TestEnsureConfigMap_ConfigMapWithCriticalResources(t *testing.T) { + resourcesWithCritical := []string{ + "volumereplications.replication.storage.openshift.io", + "volumegroupreplications.replication.storage.openshift.io", + "replicationsources.volsync.backube", + "replicationdestinations.volsync.backube", + "persistentvolumeclaims", + } + existingConfigMap := createConfigMapWithJSON(testNamespace, resourcesWithCritical) + fakeClient := setupFakeClient(t, existingConfigMap) + logger := zap.New(zap.UseDevMode(true)) + manager := velero.NewExcludedResourcesManager(fakeClient, testNamespace, logger) + + ctx := context.Background() + + exclusions, err := manager.EnsureConfigMap(ctx) + require.NoError(t, err) + assert.Equal(t, resourcesWithCritical, exclusions) + // All critical resources are present, so no warnings +} + +// TestEnsureConfigMap_MultipleNamespaces tests that managers with different +// namespaces operate independently +func TestEnsureConfigMap_MultipleNamespaces(t *testing.T) { + namespace1 := "namespace1" + namespace2 := "namespace2" + + resources1 := []string{"resource1"} + resources2 := []string{"resource2"} + + configMap1 := createConfigMapWithJSON(namespace1, resources1) + configMap2 := createConfigMapWithJSON(namespace2, resources2) + + fakeClient := setupFakeClient(t, configMap1, configMap2) + logger := zap.New(zap.UseDevMode(true)) + + manager1 := velero.NewExcludedResourcesManager(fakeClient, namespace1, logger) + manager2 := velero.NewExcludedResourcesManager(fakeClient, namespace2, logger) + + ctx := context.Background() + + exclusions1, err := manager1.EnsureConfigMap(ctx) + require.NoError(t, err) + assert.Equal(t, resources1, exclusions1) + + exclusions2, err := manager2.EnsureConfigMap(ctx) + require.NoError(t, err) + assert.Equal(t, resources2, exclusions2) +} + +// TestParseExcludedResources_FiltersEmptyEntries tests that empty entries +// from JSON arrays are filtered out +func TestParseExcludedResources_FiltersEmptyEntries(t *testing.T) { + // JSON with empty strings + resourcesJSON := `["resource1", "", "resource2", " ", "resource3"]` + existingConfigMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: velero.DefaultExcludedResourcesConfigMapName, + Namespace: testNamespace, + }, + Data: map[string]string{ + velero.ExcludedResourcesKey: resourcesJSON, + }, + } + fakeClient := setupFakeClient(t, existingConfigMap) + logger := zap.New(zap.UseDevMode(true)) + manager := velero.NewExcludedResourcesManager(fakeClient, testNamespace, logger) + + ctx := context.Background() + exclusions, err := manager.EnsureConfigMap(ctx) + + require.NoError(t, err) + assert.Equal(t, []string{"resource1", "resource2", "resource3"}, exclusions) +} diff --git a/internal/controller/kubeobjects/velero/requests.go b/internal/controller/kubeobjects/velero/requests.go index 1f5c5ddb00..8603cedc31 100644 --- a/internal/controller/kubeobjects/velero/requests.go +++ b/internal/controller/kubeobjects/velero/requests.go @@ -435,14 +435,13 @@ func getBackupSpecFromObjectsSpec(objectsSpec kubeobjects.Spec) velero.BackupSpe return velero.BackupSpec{ IncludedNamespaces: objectsSpec.IncludedNamespaces, IncludedResources: objectsSpec.IncludedResources, - // exclude VRs from Backup so VRG can create them: see https://github.com/RamenDR/ramen/issues/884 - // exclude EndpointSlices/Endpoints to prevent Submariner conflicts: see https://github.com/RamenDR/ramen/issues/1889 - // exclude VolumeSnapshots and VolumeGroupSnapshots from backup - ExcludedResources: append(objectsSpec.ExcludedResources, "volumereplications.replication.storage.openshift.io", - "volumegroupreplications.replication.storage.openshift.io", "replicationsources.volsync.backube", - "replicationdestinations.volsync.backube", "PersistentVolumeClaims", "PersistentVolumes", - "endpointslices.discovery.k8s.io", "endpoints", "volumesnapshots.snapshot.storage.k8s.io", - "volumegroupsnapshots.groupsnapshot.storage.k8s.io"), + // ExcludedResources are now managed via the default-excluded-resources ConfigMap + // and merged with recipe-level exclusions in vrg_kubeobjects.go + // The objectsSpec.ExcludedResources already contains the merged list: + // - Default exclusions from ConfigMap (bootstrapped with hardcoded defaults if not present) + // - Recipe group-level exclusions + // See: internal/controller/exclude_resources.go and vrg_kubeobjects.go:mergeExcludedResources() + ExcludedResources: objectsSpec.ExcludedResources, LabelSelector: newLabelSelector, OrLabelSelectors: objectsSpec.OrLabelSelectors, TTL: metav1.Duration{}, // TODO: set default here diff --git a/internal/controller/metrics.go b/internal/controller/metrics.go index 8bbb1c7b8b..c3899c03ff 100644 --- a/internal/controller/metrics.go +++ b/internal/controller/metrics.go @@ -375,12 +375,13 @@ func DeleteInvalidCIDRsDetectedMetric(labels prometheus.Labels) bool { } func DRProgressionStateMetricLabels(drpc *rmn.DRPlacementControl, + state string, ) prometheus.Labels { return prometheus.Labels{ ObjType: "DRPlacementControl", ObjName: drpc.Name, ObjNamespace: drpc.Namespace, - ProgressionStateLabel: string(drpc.Status.Progression), + ProgressionStateLabel: state, } } diff --git a/internal/controller/util/mcv_util.go b/internal/controller/util/mcv_util.go index edec94fbb2..2d1ae7f110 100644 --- a/internal/controller/util/mcv_util.go +++ b/internal/controller/util/mcv_util.go @@ -14,6 +14,7 @@ import ( volrep "github.com/csi-addons/kubernetes-csi-addons/api/replication.storage/v1alpha1" "github.com/go-logr/logr" snapv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" + recipev1 "github.com/ramendr/recipe/api/v1alpha1" groupsnapv1beta1 "github.com/red-hat-storage/external-snapshotter/client/v8/apis/volumegroupsnapshot/v1beta1" corev1 "k8s.io/api/core/v1" storagev1 "k8s.io/api/storage/v1" @@ -91,6 +92,9 @@ type ManagedClusterViewGetter interface { GetNSFromManagedCluster( managedCluster, resourceName string) (*corev1.Namespace, error) + GetRecipeFromManagedCluster( + managedCluster, resourceName, resourceNamespace string) (*recipev1.Recipe, error) + ListVGRClassMCVs(managedCluster string) (*viewv1beta1.ManagedClusterViewList, error) GetResource(mcv *viewv1beta1.ManagedClusterView, resource interface{}) error @@ -418,6 +422,26 @@ func (m ManagedClusterViewGetterImpl) GetNSFromManagedCluster(cluster, resourceN return ns, err } +func (m ManagedClusterViewGetterImpl) GetRecipeFromManagedCluster(cluster, resourceName, + resourceNamespace string, +) (*recipev1.Recipe, error) { + recipe := &recipev1.Recipe{} + + err := m.getResourceFromManagedCluster( + resourceName, + resourceNamespace, + cluster, + map[string]string{}, + map[string]string{}, + BuildManagedClusterViewName(resourceName, resourceNamespace, "recipe"), + "Recipe", + recipev1.GroupVersion.Group, + recipev1.GroupVersion.Version, + recipe) + + return recipe, err +} + func (m ManagedClusterViewGetterImpl) ListVRClassMCVs(cluster string) (*viewv1beta1.ManagedClusterViewList, error) { return m.listMCVsWithLabel(cluster, map[string]string{VRClassLabel: ""}) } diff --git a/internal/controller/util/mw_util.go b/internal/controller/util/mw_util.go index 9c04bc1439..f4069df8a2 100644 --- a/internal/controller/util/mw_util.go +++ b/internal/controller/util/mw_util.go @@ -11,6 +11,7 @@ import ( csiaddonsv1alpha1 "github.com/csi-addons/kubernetes-csi-addons/api/csiaddons/v1alpha1" "github.com/go-logr/logr" + recipev1 "github.com/ramendr/recipe/api/v1alpha1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" @@ -53,6 +54,7 @@ const ( MWTypeVRClass string = "vrc" MWTypeVGRClass string = "vgrc" MWTypeDRCConfig string = "drcconfig" + MWTypeRecipe string = "recipe" ) type MWUtil struct { @@ -393,6 +395,44 @@ func Namespace(name string) *corev1.Namespace { } } +// Recipe MW creation +func (mwu *MWUtil) CreateOrUpdateRecipeManifestWork( + recipe *recipev1.Recipe, managedClusterNamespace string, +) error { + manifest, err := mwu.GenerateManifest(prepareRecipeForMW(recipe)) + if err != nil { + return err + } + + manifests := []ocmworkv1.Manifest{*manifest} + mwName := fmt.Sprintf(ManifestWorkNameFormat, mwu.InstName, mwu.TargetNamespace, MWTypeRecipe) + manifestWork := mwu.newManifestWork( + mwName, + managedClusterNamespace, + map[string]string{ + "recipe": recipe.Name, + }, + manifests, + map[string]string{}, + ) + manifestWork.Spec.DeleteOption = &ocmworkv1.DeleteOption{ + PropagationPolicy: ocmworkv1.DeletePropagationPolicyTypeOrphan, + } + _, err = mwu.createOrUpdateManifestWork(manifestWork, managedClusterNamespace) + + return err +} + +func prepareRecipeForMW(recipe *recipev1.Recipe) *recipev1.Recipe { + recipeCopy := &recipev1.Recipe{ + TypeMeta: metav1.TypeMeta{Kind: "Recipe", APIVersion: recipev1.GroupVersion.String()}, + ObjectMeta: ObjectMetaEmbedded(&recipe.ObjectMeta), + } + recipeCopy.Spec = recipe.Spec + + return recipeCopy +} + func ExtractResourceFromManifestWork( mw *ocmworkv1.ManifestWork, object client.Object, @@ -459,6 +499,7 @@ func (mwu *MWUtil) CreateOrUpdateDrClusterManifestWork( mModeClusterRole, drClusterConfigRole, networkFenceClusterRole, + recipeClusterRole, }, objectsToAppend..., ) @@ -557,6 +598,23 @@ var ( }, }, } + + recipeClusterRole = &rbacv1.ClusterRole{ + TypeMeta: metav1.TypeMeta{Kind: "ClusterRole", APIVersion: "rbac.authorization.k8s.io/v1"}, + ObjectMeta: metav1.ObjectMeta{ + Name: "open-cluster-management:klusterlet-work-sa:agent:recipe-edit", + Labels: map[string]string{ + ClusterRoleAggregateLabel: "true", + }, + }, + Rules: []rbacv1.PolicyRule{ + { + APIGroups: []string{recipev1.GroupVersion.Group}, + Resources: []string{"recipes"}, + Verbs: []string{"create", "get", "list", "update"}, + }, + }, + } ) func (mwu *MWUtil) GenerateManifest(obj interface{}) (*ocmworkv1.Manifest, error) { diff --git a/internal/controller/volsync/vshandler.go b/internal/controller/volsync/vshandler.go index 618c06fe77..4d276e8a2c 100644 --- a/internal/controller/volsync/vshandler.go +++ b/internal/controller/volsync/vshandler.go @@ -1000,13 +1000,42 @@ func (v *VSHandler) configureReplicationSourceSpec(rs *volsyncv1alpha1.Replicati return nil } +// cleanupMountJobForFinalSync deletes the mount job and waits for deletion to complete. +// This must be called before PVC cleanup to release the kubernetes.io/pvc-protection finalizer. +func (v *VSHandler) cleanupMountJobForFinalSync(tmpPVCName, pvcNamespace string) error { + if err := v.DeleteMountJob(tmpPVCName, pvcNamespace); err != nil { + return fmt.Errorf("failed to delete mount job for %s/%s: %w", pvcNamespace, tmpPVCName, err) + } + + deleted, err := v.WaitForMountJobDeletion(tmpPVCName, pvcNamespace) + if err != nil { + return fmt.Errorf("error waiting for mount job deletion for %s/%s: %w", pvcNamespace, tmpPVCName, err) + } + + if !deleted { + return fmt.Errorf("waiting for mount job deletion to complete for %s/%s", pvcNamespace, tmpPVCName) + } + + v.log.V(1).Info("Mount job cleanup completed", "tmpPVCName", tmpPVCName) + + return nil +} + //nolint:cyclop,funlen func (v *VSHandler) UndoAfterFinalSync(pvcName, pvcNamespace string) error { v.log.V(1).Info("Undo after final sync", "pvcName", pvcName) + + tmpPVCName := util.GetTmpPVCNameForFinalSync(pvcName) + + // Clean up mount job first to release the PVC protection finalizer + if err := v.cleanupMountJobForFinalSync(tmpPVCName, pvcNamespace); err != nil { + return err + } + // Remove claimRef and reset the original PVC claimRef (without uid) tmpPVC, err := v.getPVC(types.NamespacedName{ Namespace: pvcNamespace, - Name: util.GetTmpPVCNameForFinalSync(pvcName), + Name: tmpPVCName, }) if err == nil { err2 := v.client.Delete(v.ctx, tmpPVC) @@ -3379,6 +3408,71 @@ func (v *VSHandler) EnsureMountJobForUnmountedPVC(rsSpec *ramendrv1alpha1.VolSyn return v.handleMountJobResult(job, pvcNamespacedName, log) } +// DeleteMountJob deletes the mount job for a given PVC if it exists +func (v *VSHandler) DeleteMountJob(pvcName, pvcNamespace string) error { + jobName := util.GetJobName(VolSyncMountJobNamePrefix, pvcName) + + job := &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: jobName, + Namespace: pvcNamespace, + }, + } + + v.log.V(1).Info("Deleting mount job", "jobName", jobName, "namespace", pvcNamespace) + + err := v.client.Delete(v.ctx, job, client.PropagationPolicy(metav1.DeletePropagationForeground)) + if err != nil { + if errors.IsNotFound(err) { + v.log.V(1).Info("Mount job not found, already deleted", "jobName", jobName) + + return nil + } + + v.log.Error(err, "Failed to delete mount job", "jobName", jobName) + + return fmt.Errorf("failed to delete mount job %s/%s: %w", pvcNamespace, jobName, err) + } + + v.log.Info("Successfully deleted mount job", "jobName", jobName, "namespace", pvcNamespace) + + return nil +} + +// WaitForMountJobDeletion waits for the mount job and its pods to be fully deleted +func (v *VSHandler) WaitForMountJobDeletion(pvcName, pvcNamespace string) (bool, error) { + jobName := util.GetJobName(VolSyncMountJobNamePrefix, pvcName) + + job := &batchv1.Job{} + + err := v.client.Get(v.ctx, types.NamespacedName{ + Name: jobName, + Namespace: pvcNamespace, + }, job) + if err != nil { + if errors.IsNotFound(err) { + v.log.V(1).Info("Mount job fully deleted", "jobName", jobName) + + return true, nil // Job is gone, deletion complete + } + + return false, fmt.Errorf("failed to check mount job status %s/%s: %w", pvcNamespace, jobName, err) + } + + // Job still exists, check if it's being deleted + if job.DeletionTimestamp != nil { + v.log.V(1).Info("Mount job deletion in progress", "jobName", jobName, + "deletionTimestamp", job.DeletionTimestamp) + + return false, nil // Still deleting, need to wait + } + + // Job exists but not marked for deletion - this shouldn't happen + v.log.Info("Mount job exists but not marked for deletion", "jobName", jobName) + + return false, fmt.Errorf("mount job %s/%s exists but not marked for deletion", pvcNamespace, jobName) +} + func (v *VSHandler) mountJobRequired( pvcNamespacedName types.NamespacedName, log logr.Logger, diff --git a/internal/controller/volumereplicationgroup_controller.go b/internal/controller/volumereplicationgroup_controller.go index 506311f49d..69784ac8be 100644 --- a/internal/controller/volumereplicationgroup_controller.go +++ b/internal/controller/volumereplicationgroup_controller.go @@ -51,15 +51,18 @@ import ( // VolumeReplicationGroupReconciler reconciles a VolumeReplicationGroup object type VolumeReplicationGroupReconciler struct { client.Client - APIReader client.Reader - Log logr.Logger - ObjStoreGetter ObjectStoreGetter - Scheme *runtime.Scheme - eventRecorder *util.EventReporter - kubeObjects kubeobjects.RequestsManager - RateLimiter *workqueue.TypedRateLimiter[reconcile.Request] - veleroCRsAreWatched bool - recipeRetries sync.Map + APIReader client.Reader + Log logr.Logger + ObjStoreGetter ObjectStoreGetter + Scheme *runtime.Scheme + eventRecorder *util.EventReporter + kubeObjects kubeobjects.RequestsManager + RateLimiter *workqueue.TypedRateLimiter[reconcile.Request] + veleroCRsAreWatched bool + recipeRetries sync.Map + excludedResourcesMgr *velero.ExcludedResourcesManager + excludedResourcesMutex sync.RWMutex + cachedExcludedResources []string } // SetupWithManager sets up the controller with the Manager. @@ -123,6 +126,13 @@ func (r *VolumeReplicationGroupReconciler) SetupWithManager( if !ramenConfig.KubeObjectProtection.Disabled { ctrlBuilder = r.addKubeObjectsOwnsAndWatches(ctrlBuilder) + + // Initialize the excluded resources manager for kube object protection + r.excludedResourcesMgr = velero.NewExcludedResourcesManager( + r.Client, + RamenOperatorNamespace(), + r.Log.WithName("excluded-resources"), + ) } else { r.Log.Info("Kube object protection disabled; don't watch kube objects requests") } @@ -154,6 +164,30 @@ func (r *VolumeReplicationGroupReconciler) configMapFun( ) []reconcile.Request { log := ctrl.Log.WithName("configmap").WithName("VolumeReplicationGroup") + // Handle default-excluded-resources ConfigMap + if configmap.GetName() == velero.DefaultExcludedResourcesConfigMapName && + configmap.GetNamespace() == RamenOperatorNamespace() { + log.Info("Update in default-excluded-resources ConfigMap, reloading") + + // Reload the excluded resources + if r.excludedResourcesMgr != nil { + if excludedResources, err := r.excludedResourcesMgr.ReloadExcludedResources(ctx); err != nil { + log.Error(err, "Failed to reload excluded resources") + } else { + // Update the cached excluded resources in the reconciler + r.excludedResourcesMutex.Lock() + r.cachedExcludedResources = excludedResources + r.excludedResourcesMutex.Unlock() + log.Info("Updated cached excluded resources", "count", len(excludedResources)) + } + } + + // No need to reconcile all VRGs for this change + // The new exclusions will be used in the next backup + return []reconcile.Request{} + } + + // Handle ramen operator config ConfigMap if configmap.GetName() != DrClusterOperatorConfigMapName || configmap.GetNamespace() != RamenOperatorNamespace() { return []reconcile.Request{} } @@ -400,7 +434,7 @@ func filterPVC(reader client.Reader, pvc *corev1.PersistentVolumeClaim, log logr // +kubebuilder:rbac:groups=core,resources=events,verbs=get;create;patch;update // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=ramendr.openshift.io,resources=recipes,verbs=get;list;watch -// +kubebuilder:rbac:groups="",resources=configmaps,verbs=list;watch +// +kubebuilder:rbac:groups="",resources=configmaps,verbs=list;watch;get;create;patch;update // +kubebuilder:rbac:groups="apiextensions.k8s.io",resources=customresourcedefinitions,verbs=get;list;watch // +kubebuilder:rbac:groups=core,resources=pods/exec,verbs=create // +kubebuilder:rbac:groups="kubevirt.io",resources=virtualmachines,verbs=get;list;watch;patch;update;delete @@ -467,6 +501,8 @@ func (r *VolumeReplicationGroupReconciler) Reconcile(ctx context.Context, req ct "Please install velero/oadp and restart the operator", v.instance.Namespace, v.instance.Name) } + r.ensureExcludedResourcesCM(ctx, ramenConfig, log) + v.volSyncHandler = volsync.NewVSHandler(ctx, r.Client, log, v.instance, v.instance.Spec.Async, cephFSCSIDriverNameOrDefault(v.ramenConfig), volSyncDestinationCopyMethodOrDefault(v.ramenConfig), adminNamespaceVRG) @@ -488,6 +524,28 @@ func (r *VolumeReplicationGroupReconciler) Reconcile(ctx context.Context, req ct return res, nil } +func (r *VolumeReplicationGroupReconciler) ensureExcludedResourcesCM(ctx context.Context, + ramenConfig *ramendrv1alpha1.RamenConfig, log logr.Logger, +) { + // Ensure default-excluded-resources ConfigMap exists on first reconciliation + if !ramenConfig.KubeObjectProtection.Disabled && r.excludedResourcesMgr != nil { + r.excludedResourcesMutex.RLock() + needsInit := len(r.cachedExcludedResources) == 0 + r.excludedResourcesMutex.RUnlock() + + if needsInit { + if excludedResources, err := r.excludedResourcesMgr.EnsureConfigMap(ctx); err != nil { + log.Error(err, "Failed to ensure excluded resources ConfigMap") + } else { + r.excludedResourcesMutex.Lock() + r.cachedExcludedResources = excludedResources + r.excludedResourcesMutex.Unlock() + log.Info("Initialized excluded resources from ConfigMap", "count", len(excludedResources)) + } + } + } +} + type cachedObjectStorer struct { storer ObjectStorer err error diff --git a/internal/controller/vrg_kubeobjects.go b/internal/controller/vrg_kubeobjects.go index 5ba751d139..9d411e211f 100644 --- a/internal/controller/vrg_kubeobjects.go +++ b/internal/controller/vrg_kubeobjects.go @@ -377,11 +377,14 @@ func (v *VRGInstance) kubeObjectsGroupCapture( request, ok := requests[requestName] if !ok { + // Merge ConfigMap default exclusions with recipe-level exclusions + captureSpec := v.mergeExcludedResources(captureGroup.Spec) + if _, err := v.reconciler.kubeObjects.ProtectRequestCreate( v.ctx, v.reconciler.Client, v.log, s3StoreAccessor.S3CompatibleEndpoint, s3StoreAccessor.S3Bucket, s3StoreAccessor.S3Region, pathName, s3StoreAccessor.VeleroNamespaceSecretKeyRef, s3StoreAccessor.CACertificates, - captureGroup.Spec, veleroNamespaceName, requestName, + captureSpec, veleroNamespaceName, requestName, labels, annotations, ); err != nil { log1.Error(err, "Kube objects group capture request submit error") @@ -942,6 +945,43 @@ func (v *VRGInstance) kubeObjectsProtectionDelete(result *ctrl.Result) error { ) } +// mergeExcludedResources merges ConfigMap default exclusions with recipe-level exclusions. +// Returns a new Spec with the merged exclusions. +func (v *VRGInstance) mergeExcludedResources(spec kubeobjects.Spec) kubeobjects.Spec { + // Get default exclusions from ConfigMap + v.reconciler.excludedResourcesMutex.RLock() + defaultExclusions := v.reconciler.cachedExcludedResources + v.reconciler.excludedResourcesMutex.RUnlock() + + if len(defaultExclusions) == 0 { + // No default exclusions, return original spec + return spec + } + + // Create a new spec with merged exclusions + // ConfigMap defaults + Recipe group exclusions + mergedExclusions := make([]string, 0, len(defaultExclusions)+len(spec.ExcludedResources)) + mergedExclusions = append(mergedExclusions, defaultExclusions...) + mergedExclusions = append(mergedExclusions, spec.ExcludedResources...) + + // Remove duplicates + seen := make(map[string]bool, len(mergedExclusions)) + uniqueExclusions := make([]string, 0, len(mergedExclusions)) + + for _, resource := range mergedExclusions { + if !seen[resource] { + seen[resource] = true + uniqueExclusions = append(uniqueExclusions, resource) + } + } + + // Create a copy of the spec with merged exclusions + mergedSpec := spec + mergedSpec.ExcludedResources = uniqueExclusions + + return mergedSpec +} + func kubeObjectsRequestsWatch( b *builder.Builder, scheme *runtime.Scheme, kubeObjects kubeobjects.RequestsManager, ) *builder.Builder {