From 0947280cf44f523d5ab719ee5b381725887ff777 Mon Sep 17 00:00:00 2001 From: Jathavedhan M Date: Mon, 8 Jun 2026 19:15:51 +0530 Subject: [PATCH] [cinder-csi-plugin] Fix volume from snapshot AZ selection When CreateVolume restores from a snapshot and the availability zone is derived from topology, clear the topology-derived AZ and let Cinder place the volume in the snapshot's AZ. This avoids cross-AZ Cinder restore failures in multi-AZ clusters using WaitForFirstConsumer. Cinder requires that a volume created from a snapshot resides in the same AZ as the snapshot. When the scheduler picks a node in a different AZ, the topology-derived AZ conflicts with this constraint, causing HTTP 400: "Volume must be in the same availability zone as the snapshot". By omitting the explicit AZ, Cinder automatically selects the correct zone. Signed-off-by: Jathavedhan M --- pkg/csi/cinder/controllerserver.go | 6 ++ pkg/csi/cinder/controllerserver_test.go | 97 +++++++++++++++++++++++++ 2 files changed, 103 insertions(+) diff --git a/pkg/csi/cinder/controllerserver.go b/pkg/csi/cinder/controllerserver.go index 98864a9bfc..e9eb679e1a 100644 --- a/pkg/csi/cinder/controllerserver.go +++ b/pkg/csi/cinder/controllerserver.go @@ -94,10 +94,12 @@ func (cs *controllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol // First check if volAvailability is already specified, if not get preferred from Topology // Required, in case vol AZ is different from node AZ var volAvailability string + volAvailabilityFromTopology := false if volParams["availability"] != "" { volAvailability = volParams["availability"] } else if cs.Driver.withTopology && accessibleTopologyReq != nil { volAvailability = sharedcsi.GetAZFromTopology(topologyKey, accessibleTopologyReq) + volAvailabilityFromTopology = true } // get the PVC annotation @@ -157,6 +159,10 @@ func (cs *controllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol if err == nil && snap.Status != "available" { return nil, status.Errorf(codes.Unavailable, "VolumeContentSource Snapshot %s is not yet available. status: %s", snapshotID, snap.Status) } + if err == nil && volAvailabilityFromTopology { + klog.V(4).Infof("CreateVolume: clearing topology-derived availability zone %q for snapshot %s to let Cinder select the correct zone", volAvailability, snapshotID) + volAvailability = "" + } // In case a snapshot is not found // check if a Backup with the same ID exists diff --git a/pkg/csi/cinder/controllerserver_test.go b/pkg/csi/cinder/controllerserver_test.go index df608d1b2c..a2d7ed009f 100644 --- a/pkg/csi/cinder/controllerserver_test.go +++ b/pkg/csi/cinder/controllerserver_test.go @@ -417,6 +417,103 @@ func TestCreateVolumeFromSnapshot(t *testing.T) { assert.Equal(FakeSnapshotID, actualRes.Volume.ContentSource.GetSnapshot().SnapshotId) } +func TestCreateVolumeFromSnapshotWithTopology(t *testing.T) { + fakeCs, osmock := fakeControllerServer() + + properties := map[string]string{cinderCSIClusterIDKey: FakeCluster} + // Expect empty AZ: the driver should clear the topology-derived AZ + // and let Cinder place the volume in the snapshot's AZ. + osmock.On("CreateVolume", FakeVolName, mock.AnythingOfType("int"), FakeVolType, "", FakeSnapshotID, "", "", properties).Return(&FakeVolFromSnapshot, nil) + osmock.On("GetVolumesByName", FakeVolName).Return(FakeVolListEmpty, nil) + osmock.On("GetBlockStorageOpts").Return(openstack.BlockStorageOpts{}) + + assert := assert.New(t) + + src := &csi.VolumeContentSource{ + Type: &csi.VolumeContentSource_Snapshot{ + Snapshot: &csi.VolumeContentSource_SnapshotSource{ + SnapshotId: FakeSnapshotID, + }, + }, + } + + fakeReq := &csi.CreateVolumeRequest{ + Name: FakeVolName, + VolumeCapabilities: []*csi.VolumeCapability{ + { + AccessMode: &csi.VolumeCapability_AccessMode{ + Mode: csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER, + }, + }, + }, + VolumeContentSource: src, + AccessibilityRequirements: &csi.TopologyRequirement{ + Preferred: []*csi.Topology{ + { + Segments: map[string]string{topologyKey: "az-2"}, + }, + }, + }, + } + + actualRes, err := fakeCs.CreateVolume(FakeCtx, fakeReq) + if err != nil { + t.Errorf("failed to CreateVolume: %v", err) + } + + assert.NotNil(actualRes.Volume) + assert.Equal(FakeSnapshotID, actualRes.Volume.ContentSource.GetSnapshot().SnapshotId) +} + +func TestCreateVolumeFromSnapshotWithExplicitAvailability(t *testing.T) { + fakeCs, osmock := fakeControllerServer() + + properties := map[string]string{cinderCSIClusterIDKey: FakeCluster} + osmock.On("CreateVolume", FakeVolName, mock.AnythingOfType("int"), FakeVolType, "explicit-az", FakeSnapshotID, "", "", properties).Return(&FakeVolFromSnapshot, nil) + osmock.On("GetVolumesByName", FakeVolName).Return(FakeVolListEmpty, nil) + osmock.On("GetBlockStorageOpts").Return(openstack.BlockStorageOpts{}) + + assert := assert.New(t) + + src := &csi.VolumeContentSource{ + Type: &csi.VolumeContentSource_Snapshot{ + Snapshot: &csi.VolumeContentSource_SnapshotSource{ + SnapshotId: FakeSnapshotID, + }, + }, + } + + fakeReq := &csi.CreateVolumeRequest{ + Name: FakeVolName, + Parameters: map[string]string{ + "availability": "explicit-az", + }, + VolumeCapabilities: []*csi.VolumeCapability{ + { + AccessMode: &csi.VolumeCapability_AccessMode{ + Mode: csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER, + }, + }, + }, + VolumeContentSource: src, + AccessibilityRequirements: &csi.TopologyRequirement{ + Preferred: []*csi.Topology{ + { + Segments: map[string]string{topologyKey: "az-2"}, + }, + }, + }, + } + + actualRes, err := fakeCs.CreateVolume(FakeCtx, fakeReq) + if err != nil { + t.Errorf("failed to CreateVolume: %v", err) + } + + assert.NotNil(actualRes.Volume) + assert.Equal(FakeSnapshotID, actualRes.Volume.ContentSource.GetSnapshot().SnapshotId) +} + func TestCreateVolumeFromSourceVolume(t *testing.T) { fakeCs, osmock := fakeControllerServer()