diff --git a/include/sys/zio.h b/include/sys/zio.h index c3a199ce813c..4e7f81fa35cc 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -225,25 +225,26 @@ typedef uint64_t zio_flag_t; #define ZIO_FLAG_TRYHARD (1ULL << 17) #define ZIO_FLAG_OPTIONAL (1ULL << 18) #define ZIO_FLAG_DIO_READ (1ULL << 19) +#define ZIO_FLAG_BYPASSED_QUEUE (1ULL << 20) #define ZIO_FLAG_VDEV_INHERIT (ZIO_FLAG_DONT_QUEUE - 1) /* * Flags not inherited by any children. */ -#define ZIO_FLAG_DONT_QUEUE (1ULL << 20) /* must be first for INHERIT */ -#define ZIO_FLAG_DONT_PROPAGATE (1ULL << 21) -#define ZIO_FLAG_IO_BYPASS (1ULL << 22) -#define ZIO_FLAG_IO_REWRITE (1ULL << 23) -#define ZIO_FLAG_RAW_COMPRESS (1ULL << 24) -#define ZIO_FLAG_RAW_ENCRYPT (1ULL << 25) -#define ZIO_FLAG_GANG_CHILD (1ULL << 26) -#define ZIO_FLAG_DDT_CHILD (1ULL << 27) -#define ZIO_FLAG_GODFATHER (1ULL << 28) -#define ZIO_FLAG_NOPWRITE (1ULL << 29) -#define ZIO_FLAG_REEXECUTED (1ULL << 30) -#define ZIO_FLAG_DELEGATED (1ULL << 31) -#define ZIO_FLAG_PREALLOCATED (1ULL << 32) -#define ZIO_FLAG_POSTREAD (1ULL << 33) +#define ZIO_FLAG_DONT_QUEUE (1ULL << 21) /* must be first for INHERIT */ +#define ZIO_FLAG_DONT_PROPAGATE (1ULL << 22) +#define ZIO_FLAG_IO_BYPASS (1ULL << 23) +#define ZIO_FLAG_IO_REWRITE (1ULL << 24) +#define ZIO_FLAG_RAW_COMPRESS (1ULL << 25) +#define ZIO_FLAG_RAW_ENCRYPT (1ULL << 26) +#define ZIO_FLAG_GANG_CHILD (1ULL << 27) +#define ZIO_FLAG_DDT_CHILD (1ULL << 28) +#define ZIO_FLAG_GODFATHER (1ULL << 29) +#define ZIO_FLAG_NOPWRITE (1ULL << 30) +#define ZIO_FLAG_REEXECUTED (1ULL << 31) +#define ZIO_FLAG_DELEGATED (1ULL << 32) +#define ZIO_FLAG_PREALLOCATED (1ULL << 33) +#define ZIO_FLAG_POSTREAD (1ULL << 34) #define ZIO_ALLOCATOR_NONE (-1) #define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE) diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index 11b6c622f8ec..c9cca83d01b6 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -1701,6 +1701,14 @@ itself, it will be clamped. Setting it to zero will cause the kernel's ideal size to be used. This parameter only applies on Linux. . +.It Sy zfs_vdev_disk_calling_thread_io Ns = Ns Sy 1 Ns | Ns 0 Pq uint +Controls calling thread io, note that we only wait for the zio to +complete if it bypassed the vdev queue, all this module parameter does +is enable that capability. +May lead to performance improvements when enabled if backing vdev devices +are fast and low latency. +This parameter currently only applies on Linux. +. .It Sy zfs_expire_snapshot Ns = Ns Sy 300 Ns s Pq int Time before expiring .Pa .zfs/snapshot . diff --git a/man/man8/zpool-events.8 b/man/man8/zpool-events.8 index 12a110580729..d9cc1634a555 100644 --- a/man/man8/zpool-events.8 +++ b/man/man8/zpool-events.8 @@ -517,21 +517,24 @@ ZIO_FLAG_IO_RETRY:0x00008000 ZIO_FLAG_PROBE:0x00010000 ZIO_FLAG_TRYHARD:0x00020000 ZIO_FLAG_OPTIONAL:0x00040000 +ZIO_FLAG_DIO_READ:0x00080000 +ZIO_FLAG_BYPASSED_QUEUE:0x00100000 -ZIO_FLAG_DONT_QUEUE:0x00080000 -ZIO_FLAG_DONT_PROPAGATE:0x00100000 -ZIO_FLAG_IO_BYPASS:0x00200000 -ZIO_FLAG_IO_REWRITE:0x00400000 -ZIO_FLAG_RAW_COMPRESS:0x00800000 -ZIO_FLAG_RAW_ENCRYPT:0x01000000 +ZIO_FLAG_DONT_QUEUE:0x00200000 +ZIO_FLAG_DONT_PROPAGATE:0x00400000 +ZIO_FLAG_IO_BYPASS:0x00800000 +ZIO_FLAG_IO_REWRITE:0x01000000 +ZIO_FLAG_RAW_COMPRESS:0x02000000 +ZIO_FLAG_RAW_ENCRYPT:0x04000000 -ZIO_FLAG_GANG_CHILD:0x02000000 -ZIO_FLAG_DDT_CHILD:0x04000000 -ZIO_FLAG_GODFATHER:0x08000000 -ZIO_FLAG_NOPWRITE:0x10000000 -ZIO_FLAG_REEXECUTED:0x20000000 -ZIO_FLAG_DELEGATED:0x40000000 -ZIO_FLAG_FASTWRITE:0x80000000 +ZIO_FLAG_GANG_CHILD:0x08000000 +ZIO_FLAG_DDT_CHILD:0x10000000 +ZIO_FLAG_GODFATHER:0x20000000 +ZIO_FLAG_NOPWRITE:0x40000000 +ZIO_FLAG_REEXECUTED:0x80000000 +ZIO_FLAG_DELEGATED:0x100000000 +ZIO_FLAG_PREALLOCATED:0x200000000 +ZIO_FLAG_POSTREAD:0x400000000 .TE . .Sh I/O TYPES diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index 7cc19fe5afb7..130983ea02b7 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -106,6 +106,14 @@ static uint_t zfs_vdev_open_timeout_ms = 1000; static unsigned int zfs_vdev_failfast_mask = 1; +/* + * Whether we wait for bio to complete. Also requires that + * zio has bypassed the vdev queue. May lead to performance + * improvements when backing vdev devices are fast and low + * latency. + */ +static unsigned int zfs_vdev_disk_calling_thread_io = 1; + /* * Convert SPA mode flags into bdev open mode flags. */ @@ -604,6 +612,15 @@ vdev_submit_bio(struct bio *bio) current->bio_list = bio_list; } +static inline void +vdev_submit_bio_wait(struct bio *bio) +{ + struct bio_list *bio_list = current->bio_list; + current->bio_list = NULL; + (void) submit_bio_wait(bio); + current->bio_list = bio_list; +} + static inline struct bio * vdev_bio_alloc(struct block_device *bdev, gfp_t gfp_mask, unsigned short nr_vecs) @@ -678,6 +695,7 @@ typedef struct { struct bio *vbio_bio; /* pointer to the current bio */ int vbio_flags; /* bio flags */ + boolean_t vbio_wait; /* wait for completion */ } vbio_t; static vbio_t * @@ -694,6 +712,7 @@ vbio_alloc(zio_t *zio, struct block_device *bdev, int flags) vbio->vbio_offset = zio->io_offset; vbio->vbio_bio = NULL; vbio->vbio_flags = flags; + vbio->vbio_wait = B_FALSE; return (vbio); } @@ -779,16 +798,20 @@ vbio_submit(vbio_t *vbio, abd_t *abd, uint64_t size) (void) abd_iterate_page_func(abd, 0, size, vbio_fill_cb, vbio); ASSERT(vbio->vbio_bio); - vbio->vbio_bio->bi_end_io = vbio_completion; - vbio->vbio_bio->bi_private = vbio; - /* * Once submitted, vbio_bio now owns vbio (through bi_private) and we * can't touch it again. The bio may complete and vbio_completion() be * called and free the vbio before this task is run again, so we must * consider it invalid from this point. */ - vdev_submit_bio(vbio->vbio_bio); + + if (vbio->vbio_wait) { + vdev_submit_bio_wait(vbio->vbio_bio); + } else { + vbio->vbio_bio->bi_end_io = vbio_completion; + vbio->vbio_bio->bi_private = vbio; + vdev_submit_bio(vbio->vbio_bio); + } blk_finish_plug(&plug); } @@ -820,7 +843,12 @@ vbio_completion(struct bio *bio) ASSERT0P(zio->io_bio); zio->io_bio = vbio; - zio_delay_interrupt(zio); + /* Using calling thread io, don't dispatch zio. */ + if (vbio->vbio_wait) + zio_execute(zio); + else + zio_delay_interrupt(zio); + } /* @@ -978,8 +1006,19 @@ vdev_disk_io_rw(zio_t *zio) if (abd != zio->io_abd) vbio->vbio_abd = abd; + boolean_t bio_wait = B_FALSE; + if (zfs_vdev_disk_calling_thread_io && + (zio->io_flags & ZIO_FLAG_BYPASSED_QUEUE)) { + vbio->vbio_wait = bio_wait = B_TRUE; + } /* Fill it with data pages and submit it to the kernel */ vbio_submit(vbio, abd, zio->io_size); + + if (bio_wait) { + vbio->vbio_bio->bi_private = vbio; + vbio_completion(vbio->vbio_bio); + } + return (0); } @@ -1370,3 +1409,6 @@ ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, failfast_mask, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_vdev_disk, zfs_vdev_disk_, max_segs, UINT, ZMOD_RW, "Maximum number of data segments to add to an IO request (min 4)"); + +ZFS_MODULE_PARAM(zfs_vdev_disk, zfs_vdev_disk_, calling_thread_io, UINT, + ZMOD_RW, "Enable calling thread io"); diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c index 43e5f15934ac..77e82d0ce449 100644 --- a/module/zfs/vdev_queue.c +++ b/module/zfs/vdev_queue.c @@ -956,6 +956,7 @@ vdev_queue_io(zio_t *zio) if (!vdev_should_queue_io(zio)) { zio->io_queue_state = ZIO_QS_NONE; + zio->io_flags |= ZIO_FLAG_BYPASSED_QUEUE; return (zio); }