Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/cinder-csi-plugin/using-cinder-csi-plugin.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ These configuration options pertain to block storage and should appear in the `[
Optional. Set to `true` if your set of Block Storage (Cinder) AZs does not match your set of Compute (Nova) AZs and you are manually setting the `topology` parameter on your Storage Class(es). For more information, refer to [When trying to use the topology feature, pods are not able to schedule](./troubleshooting.md#when-trying-to-use-the-topology-feature-pods-are-not-able-to-schedule). Defaults to `false`.
* `ignore-volume-microversion`
Optional. Set to `true` only when your cinder microversion is older than 3.34. This might cause some features to not work as expected, but aims to allow basic operations like creating a volume. Defaults to `false`
* `volume-status-poll-delay`
Optional. The interval in seconds between volume status polls when waiting for a volume to become available before attaching. Value must be a positive integer representing whole seconds (e.g. `3`, `5`, `10`). Defaults to `3`.

### Metadata
These configuration options pertain to metadata and should appear in the `[Metadata]` section of the `$CLOUD_CONFIG` file.
Expand Down
67 changes: 65 additions & 2 deletions pkg/csi/cinder/controllerserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"google.golang.org/grpc/status"
"google.golang.org/protobuf/types/known/timestamppb"

"k8s.io/apimachinery/pkg/util/wait"
sharedcsi "k8s.io/cloud-provider-openstack/pkg/csi"
"k8s.io/cloud-provider-openstack/pkg/csi/cinder/openstack"
"k8s.io/cloud-provider-openstack/pkg/util"
Expand Down Expand Up @@ -313,12 +314,34 @@ func (cs *controllerServer) ControllerPublishVolume(ctx context.Context, req *cs
return nil, status.Error(codes.InvalidArgument, "[ControllerPublishVolume] Volume capability must be provided")
}

_, err := cloud.GetVolume(ctx, volumeID)
// Wait for the volume to reach a state valid for attachment.
// Cinder rejects attachment requests for volumes not in "available"
// (or "in-use" for multiattach) state. Without this wait, the driver
// would immediately hit a 409 Conflict if CreateVolume has just been
// called and the volume is still being provisioned on the backend.
vol, err := cs.waitForVolumeAttachable(ctx, cloud, volumeID)
if err != nil {
if cpoerrors.IsNotFound(err) {
return nil, status.Errorf(codes.NotFound, "[ControllerPublishVolume] Volume %s not found", volumeID)
}
return nil, status.Errorf(codes.Internal, "[ControllerPublishVolume] get volume failed with error %v", err)
klog.Errorf("Failed waiting for volume %s to become attachable: %v", volumeID, err)
return nil, status.Errorf(codes.FailedPrecondition, "[ControllerPublishVolume] Volume %s is not available for attach: %v", volumeID, err)
}

// Check if the volume is already attached to this instance (idempotent).
// This uses the volume object returned by waitForVolumeAttachable to
// avoid a redundant volumes.Get API call.
for _, att := range vol.Attachments {
if instanceID == att.ServerID {
klog.V(4).Infof("Volume %s is already attached to instance %s", volumeID, instanceID)
devicePath, err := cloud.GetAttachmentDiskPath(ctx, instanceID, volumeID)
if err != nil {
klog.Errorf("Failed to GetAttachmentDiskPath: %v", err)
return nil, status.Errorf(codes.Internal, "[ControllerPublishVolume] failed to get device path of attached volume: %v", err)
}
pvInfo := map[string]string{"DevicePath": devicePath}
return &csi.ControllerPublishVolumeResponse{PublishContext: pvInfo}, nil
}
}

_, err = cloud.GetInstanceByID(ctx, instanceID)
Comment thread
kayrus marked this conversation as resolved.
Expand Down Expand Up @@ -359,6 +382,46 @@ func (cs *controllerServer) ControllerPublishVolume(ctx context.Context, req *cs
}, nil
}

// waitForVolumeAttachable polls the volume status until it reaches a state
// that allows attachment. For non-multiattach volumes, only "available" is
// accepted. For multiattach volumes, "in-use" is also valid.
// The wait is bounded by the context deadline (typically the gRPC RPC timeout).
func (cs *controllerServer) waitForVolumeAttachable(ctx context.Context, cloud openstack.IOpenStack, volumeID string) (*volumes.Volume, error) {
bsOpts := cloud.GetBlockStorageOpts()

var lastVol *volumes.Volume
err := wait.PollUntilContextCancel(ctx, bsOpts.VolumeStatusPollInterval(), true, func(ctx context.Context) (bool, error) {
vol, err := cloud.GetVolume(ctx, volumeID)
if err != nil {
return false, err
}
lastVol = vol

if vol.Status == openstack.VolumeAvailableStatus {
return true, nil
}
if vol.Status == openstack.VolumeInUseStatus {
if vol.Multiattach {
return true, nil
}
// If a non-multiattach volume is already in-use, it cannot be
// attached again - fail immediately rather than waiting.
return false, fmt.Errorf("volume %s is already in-use and does not support multiattach", volumeID)
}

if slices.Contains(openstack.VolumeErrorStates[:], vol.Status) {
return false, fmt.Errorf("volume %s is in error state: %s", volumeID, vol.Status)
}
return false, nil
})

if err != nil && ctx.Err() != nil {
return lastVol, fmt.Errorf("timeout waiting for volume %s to become available: %w", volumeID, err)
}

return lastVol, err
}

func (cs *controllerServer) ControllerUnpublishVolume(ctx context.Context, req *csi.ControllerUnpublishVolumeRequest) (*csi.ControllerUnpublishVolumeResponse, error) {
klog.V(4).Infof("ControllerUnpublishVolume: called with args %+v", protosanitizer.StripSecrets(req))

Expand Down
1 change: 1 addition & 0 deletions pkg/csi/cinder/controllerserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,7 @@ func TestDeleteVolume(t *testing.T) {
func TestControllerPublishVolume(t *testing.T) {
fakeCs, osmock := fakeControllerServer()

osmock.On("GetBlockStorageOpts").Return(openstack.BlockStorageOpts{})
osmock.On("AttachVolume", FakeNodeID, FakeVolID).Return(FakeVolID, nil)
osmock.On("WaitDiskAttached", FakeNodeID, FakeVolID).Return(nil)
osmock.On("GetAttachmentDiskPath", FakeNodeID, FakeVolID).Return(FakeDevicePath, nil)
Expand Down
17 changes: 17 additions & 0 deletions pkg/csi/cinder/openstack/openstack.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"net/http"
"os"
"sync"
"time"

"github.com/gophercloud/gophercloud/v2"
"github.com/gophercloud/gophercloud/v2/openstack"
Expand Down Expand Up @@ -91,6 +92,22 @@ type BlockStorageOpts struct {
RescanOnResize bool `gcfg:"rescan-on-resize"`
IgnoreVolumeAZ bool `gcfg:"ignore-volume-az"`
IgnoreVolumeMicroversion bool `gcfg:"ignore-volume-microversion"`
// VolumeStatusPollDelay is the interval in seconds between volume status
// polls when waiting for a volume to become available before attaching.
// Must be a positive integer representing whole seconds (e.g. 3, 5, 10).
// Defaults to 3 seconds if not set or zero.
VolumeStatusPollDelay int `gcfg:"volume-status-poll-delay"`
}

const defaultVolumeStatusPollInterval = 3 * time.Second

// VolumeStatusPollInterval returns the configured poll interval for volume
// status checks, or the default (3s) if not configured.
func (opts BlockStorageOpts) VolumeStatusPollInterval() time.Duration {
if opts.VolumeStatusPollDelay > 0 {
return time.Duration(opts.VolumeStatusPollDelay) * time.Second
}
return defaultVolumeStatusPollInterval
}

type Config struct {
Expand Down
9 changes: 4 additions & 5 deletions pkg/csi/cinder/openstack/openstack_volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"fmt"
"net/http"
"net/url"
"slices"
"strings"
"time"

Expand Down Expand Up @@ -56,7 +57,7 @@ const (
volumeDescription = "Created by OpenStack Cinder CSI driver"
)

var volumeErrorStates = [...]string{"error", "error_extending", "error_deleting"}
var VolumeErrorStates = [...]string{"error", "error_extending", "error_deleting"}

// CreateVolume creates a volume of given size
func (os *OpenStack) CreateVolume(ctx context.Context, opts *volumes.CreateOpts, schedulerHints volumes.SchedulerHintOptsBuilder) (*volumes.Volume, error) {
Expand Down Expand Up @@ -283,10 +284,8 @@ func (os *OpenStack) WaitVolumeTargetStatus(ctx context.Context, volumeID string
return true, nil
}
}
for _, eState := range volumeErrorStates {
if vol.Status == eState {
return false, fmt.Errorf("volume is in Error State : %s", vol.Status)
}
if slices.Contains(VolumeErrorStates[:], vol.Status) {
return false, fmt.Errorf("volume is in Error State : %s", vol.Status)
}
return false, nil
})
Expand Down