diff options
Diffstat (limited to 'engines')
-rw-r--r-- | engines/binject.c | 457 | ||||
-rw-r--r-- | engines/cpu.c | 122 | ||||
-rw-r--r-- | engines/dev-dax.c | 348 | ||||
-rw-r--r-- | engines/e4defrag.c | 218 | ||||
-rw-r--r-- | engines/falloc.c | 114 | ||||
-rw-r--r-- | engines/ftruncate.c | 56 | ||||
-rw-r--r-- | engines/fusion-aw.c | 183 | ||||
-rw-r--r-- | engines/gfapi.h | 22 | ||||
-rw-r--r-- | engines/glusterfs.c | 306 | ||||
-rw-r--r-- | engines/glusterfs_async.c | 191 | ||||
-rw-r--r-- | engines/glusterfs_sync.c | 98 | ||||
-rw-r--r-- | engines/guasi.c | 269 | ||||
-rw-r--r-- | engines/libaio.c | 396 | ||||
-rw-r--r-- | engines/libhdfs.c | 420 | ||||
-rw-r--r-- | engines/mmap.c | 272 | ||||
-rw-r--r-- | engines/mtd.c | 209 | ||||
-rw-r--r-- | engines/net.c | 1468 | ||||
-rw-r--r-- | engines/null.c | 157 | ||||
-rw-r--r-- | engines/pmemblk.c | 445 | ||||
-rw-r--r-- | engines/posixaio.c | 266 | ||||
-rw-r--r-- | engines/rbd.c | 689 | ||||
-rw-r--r-- | engines/rdma.c | 1372 | ||||
-rw-r--r-- | engines/sg.c | 856 | ||||
-rw-r--r-- | engines/skeleton_external.c | 143 | ||||
-rw-r--r-- | engines/solarisaio.c | 234 | ||||
-rw-r--r-- | engines/splice.c | 311 | ||||
-rw-r--r-- | engines/sync.c | 472 | ||||
-rw-r--r-- | engines/windowsaio.c | 449 |
28 files changed, 0 insertions, 10543 deletions
diff --git a/engines/binject.c b/engines/binject.c deleted file mode 100644 index 932534a0..00000000 --- a/engines/binject.c +++ /dev/null @@ -1,457 +0,0 @@ -/* - * binject engine - * - * IO engine that uses the Linux binject interface to directly inject - * bio's to block devices. - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <assert.h> -#include <string.h> -#include <sys/poll.h> -#include <sys/types.h> -#include <sys/stat.h> - -#include "../fio.h" - -#ifdef FIO_HAVE_BINJECT - -struct binject_data { - struct b_user_cmd *cmds; - struct io_u **events; - struct pollfd *pfds; - int *fd_flags; -}; - -struct binject_file { - unsigned int bs; - int minor; - int fd; -}; - -static void binject_buc_init(struct binject_data *bd, struct io_u *io_u) -{ - struct b_user_cmd *buc = &io_u->buc; - - memset(buc, 0, sizeof(*buc)); - binject_buc_set_magic(buc); - - buc->buf = (unsigned long) io_u->xfer_buf; - buc->len = io_u->xfer_buflen; - buc->offset = io_u->offset; - buc->usr_ptr = (unsigned long) io_u; - - buc->flags = B_FLAG_NOIDLE | B_FLAG_UNPLUG; - assert(buc->buf); -} - -static int pollin_events(struct pollfd *pfds, int fds) -{ - int i; - - for (i = 0; i < fds; i++) - if (pfds[i].revents & POLLIN) - return 1; - - return 0; -} - -static unsigned int binject_read_commands(struct thread_data *td, void *p, - int left, int *err) -{ - struct fio_file *f; - int i, ret, events; - -one_more: - events = 0; - for_each_file(td, f, i) { - struct binject_file *bf = FILE_ENG_DATA(f); - - ret = read(bf->fd, p, left * sizeof(struct b_user_cmd)); - if (ret < 0) { - if (errno == EAGAIN) - continue; - *err = -errno; - td_verror(td, errno, "read"); - break; - } else if (ret) { - p += ret; - events += ret / sizeof(struct b_user_cmd); - } - } - - if (*err || events) - return events; - - usleep(1000); - goto one_more; -} - -static int fio_binject_getevents(struct thread_data *td, unsigned int min, - unsigned int max, - const struct timespec fio_unused *t) -{ - struct binject_data *bd = td->io_ops_data; - int left = max, ret, r = 0, ev_index = 0; - void *buf = bd->cmds; - unsigned int i, events; - struct fio_file *f; - - /* - * Fill in the file descriptors - */ - for_each_file(td, f, i) { - struct binject_file *bf = FILE_ENG_DATA(f); - - /* - * don't block for min events == 0 - */ - if (!min) - bd->fd_flags[i] = fio_set_fd_nonblocking(bf->fd, "binject"); - else - bd->fd_flags[i] = -1; - - bd->pfds[i].fd = bf->fd; - bd->pfds[i].events = POLLIN; - } - - while (left) { - while (!min) { - ret = poll(bd->pfds, td->o.nr_files, -1); - if (ret < 0) { - if (!r) - r = -errno; - td_verror(td, errno, "poll"); - break; - } else if (!ret) - continue; - - if (pollin_events(bd->pfds, td->o.nr_files)) - break; - } - - if (r < 0) - break; - - events = binject_read_commands(td, buf, left, &r); - - if (r < 0) - break; - - left -= events; - r += events; - - for (i = 0; i < events; i++) { - struct b_user_cmd *buc = (struct b_user_cmd *) buf + i; - - bd->events[ev_index] = (struct io_u *) (unsigned long) buc->usr_ptr; - ev_index++; - } - } - - if (!min) { - for_each_file(td, f, i) { - struct binject_file *bf = FILE_ENG_DATA(f); - - if (bd->fd_flags[i] == -1) - continue; - - if (fcntl(bf->fd, F_SETFL, bd->fd_flags[i]) < 0) - log_err("fio: binject failed to restore fcntl flags: %s\n", strerror(errno)); - } - } - - if (r > 0) - assert(ev_index == r); - - return r; -} - -static int fio_binject_doio(struct thread_data *td, struct io_u *io_u) -{ - struct b_user_cmd *buc = &io_u->buc; - struct binject_file *bf = FILE_ENG_DATA(io_u->file); - int ret; - - ret = write(bf->fd, buc, sizeof(*buc)); - if (ret < 0) - return ret; - - return FIO_Q_QUEUED; -} - -static int fio_binject_prep(struct thread_data *td, struct io_u *io_u) -{ - struct binject_data *bd = td->io_ops_data; - struct b_user_cmd *buc = &io_u->buc; - struct binject_file *bf = FILE_ENG_DATA(io_u->file); - - if (io_u->xfer_buflen & (bf->bs - 1)) { - log_err("read/write not sector aligned\n"); - return EINVAL; - } - - if (io_u->ddir == DDIR_READ) { - binject_buc_init(bd, io_u); - buc->type = B_TYPE_READ; - } else if (io_u->ddir == DDIR_WRITE) { - binject_buc_init(bd, io_u); - if (io_u->flags & IO_U_F_BARRIER) - buc->type = B_TYPE_WRITEBARRIER; - else - buc->type = B_TYPE_WRITE; - } else if (io_u->ddir == DDIR_TRIM) { - binject_buc_init(bd, io_u); - buc->type = B_TYPE_DISCARD; - } else { - assert(0); - } - - return 0; -} - -static int fio_binject_queue(struct thread_data *td, struct io_u *io_u) -{ - int ret; - - fio_ro_check(td, io_u); - - ret = fio_binject_doio(td, io_u); - - if (ret < 0) - io_u->error = errno; - - if (io_u->error) { - td_verror(td, io_u->error, "xfer"); - return FIO_Q_COMPLETED; - } - - return ret; -} - -static struct io_u *fio_binject_event(struct thread_data *td, int event) -{ - struct binject_data *bd = td->io_ops_data; - - return bd->events[event]; -} - -static int binject_open_ctl(struct thread_data *td) -{ - int fd; - - fd = open("/dev/binject-ctl", O_RDWR); - if (fd < 0) - td_verror(td, errno, "open binject-ctl"); - - return fd; -} - -static void binject_unmap_dev(struct thread_data *td, struct binject_file *bf) -{ - struct b_ioctl_cmd bic; - int fdb; - - if (bf->fd >= 0) { - close(bf->fd); - bf->fd = -1; - } - - fdb = binject_open_ctl(td); - if (fdb < 0) - return; - - bic.minor = bf->minor; - - if (ioctl(fdb, B_IOCTL_DEL, &bic) < 0) - td_verror(td, errno, "binject dev unmap"); - - close(fdb); -} - -static int binject_map_dev(struct thread_data *td, struct binject_file *bf, - int fd) -{ - struct b_ioctl_cmd bic; - char name[80]; - struct stat sb; - int fdb, dev_there, loops; - - fdb = binject_open_ctl(td); - if (fdb < 0) - return 1; - - bic.fd = fd; - - if (ioctl(fdb, B_IOCTL_ADD, &bic) < 0) { - td_verror(td, errno, "binject dev map"); - close(fdb); - return 1; - } - - bf->minor = bic.minor; - - sprintf(name, "/dev/binject%u", bf->minor); - - /* - * Wait for udev to create the node... - */ - dev_there = loops = 0; - do { - if (!stat(name, &sb)) { - dev_there = 1; - break; - } - - usleep(10000); - } while (++loops < 100); - - close(fdb); - - if (!dev_there) { - log_err("fio: timed out waiting for binject dev\n"); - goto err_unmap; - } - - bf->fd = open(name, O_RDWR); - if (bf->fd < 0) { - td_verror(td, errno, "binject dev open"); -err_unmap: - binject_unmap_dev(td, bf); - return 1; - } - - return 0; -} - -static int fio_binject_close_file(struct thread_data *td, struct fio_file *f) -{ - struct binject_file *bf = FILE_ENG_DATA(f); - - if (bf) { - binject_unmap_dev(td, bf); - free(bf); - FILE_SET_ENG_DATA(f, NULL); - return generic_close_file(td, f); - } - - return 0; -} - -static int fio_binject_open_file(struct thread_data *td, struct fio_file *f) -{ - struct binject_file *bf; - unsigned int bs; - int ret; - - ret = generic_open_file(td, f); - if (ret) - return 1; - - if (f->filetype != FIO_TYPE_BLOCK) { - log_err("fio: binject only works with block devices\n"); - goto err_close; - } - if (ioctl(f->fd, BLKSSZGET, &bs) < 0) { - td_verror(td, errno, "BLKSSZGET"); - goto err_close; - } - - bf = malloc(sizeof(*bf)); - bf->bs = bs; - bf->minor = bf->fd = -1; - FILE_SET_ENG_DATA(f, bf); - - if (binject_map_dev(td, bf, f->fd)) { -err_close: - ret = generic_close_file(td, f); - return 1; - } - - return 0; -} - -static void fio_binject_cleanup(struct thread_data *td) -{ - struct binject_data *bd = td->io_ops_data; - - if (bd) { - free(bd->events); - free(bd->cmds); - free(bd->fd_flags); - free(bd->pfds); - free(bd); - } -} - -static int fio_binject_init(struct thread_data *td) -{ - struct binject_data *bd; - - bd = malloc(sizeof(*bd)); - memset(bd, 0, sizeof(*bd)); - - bd->cmds = malloc(td->o.iodepth * sizeof(struct b_user_cmd)); - memset(bd->cmds, 0, td->o.iodepth * sizeof(struct b_user_cmd)); - - bd->events = malloc(td->o.iodepth * sizeof(struct io_u *)); - memset(bd->events, 0, td->o.iodepth * sizeof(struct io_u *)); - - bd->pfds = malloc(sizeof(struct pollfd) * td->o.nr_files); - memset(bd->pfds, 0, sizeof(struct pollfd) * td->o.nr_files); - - bd->fd_flags = malloc(sizeof(int) * td->o.nr_files); - memset(bd->fd_flags, 0, sizeof(int) * td->o.nr_files); - - td->io_ops_data = bd; - return 0; -} - -static struct ioengine_ops ioengine = { - .name = "binject", - .version = FIO_IOOPS_VERSION, - .init = fio_binject_init, - .prep = fio_binject_prep, - .queue = fio_binject_queue, - .getevents = fio_binject_getevents, - .event = fio_binject_event, - .cleanup = fio_binject_cleanup, - .open_file = fio_binject_open_file, - .close_file = fio_binject_close_file, - .get_file_size = generic_get_file_size, - .flags = FIO_RAWIO | FIO_BARRIER | FIO_MEMALIGN, -}; - -#else /* FIO_HAVE_BINJECT */ - -/* - * When we have a proper configure system in place, we simply wont build - * and install this io engine. For now install a crippled version that - * just complains and fails to load. - */ -static int fio_binject_init(struct thread_data fio_unused *td) -{ - log_err("fio: ioengine binject not available\n"); - return 1; -} - -static struct ioengine_ops ioengine = { - .name = "binject", - .version = FIO_IOOPS_VERSION, - .init = fio_binject_init, -}; - -#endif - -static void fio_init fio_binject_register(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_binject_unregister(void) -{ - unregister_ioengine(&ioengine); -} diff --git a/engines/cpu.c b/engines/cpu.c deleted file mode 100644 index d0b4a895..00000000 --- a/engines/cpu.c +++ /dev/null @@ -1,122 +0,0 @@ -/* - * CPU engine - * - * Doesn't transfer any data, merely burns CPU cycles according to - * the settings. - * - */ -#include "../fio.h" -#include "../optgroup.h" - -struct cpu_options { - void *pad; - unsigned int cpuload; - unsigned int cpucycle; - unsigned int exit_io_done; -}; - -static struct fio_option options[] = { - { - .name = "cpuload", - .lname = "CPU load", - .type = FIO_OPT_INT, - .off1 = offsetof(struct cpu_options, cpuload), - .help = "Use this percentage of CPU", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_INVALID, - }, - { - .name = "cpuchunks", - .lname = "CPU chunk", - .type = FIO_OPT_INT, - .off1 = offsetof(struct cpu_options, cpucycle), - .help = "Length of the CPU burn cycles (usecs)", - .def = "50000", - .parent = "cpuload", - .hide = 1, - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_INVALID, - }, - { - .name = "exit_on_io_done", - .lname = "Exit when IO threads are done", - .type = FIO_OPT_BOOL, - .off1 = offsetof(struct cpu_options, exit_io_done), - .help = "Exit when IO threads finish", - .def = "0", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_INVALID, - }, - { - .name = NULL, - }, -}; - - -static int fio_cpuio_queue(struct thread_data *td, struct io_u fio_unused *io_u) -{ - struct cpu_options *co = td->eo; - - if (co->exit_io_done && !fio_running_or_pending_io_threads()) { - td->done = 1; - return FIO_Q_BUSY; - } - - usec_spin(co->cpucycle); - return FIO_Q_COMPLETED; -} - -static int fio_cpuio_init(struct thread_data *td) -{ - struct thread_options *o = &td->o; - struct cpu_options *co = td->eo; - - if (!co->cpuload) { - td_vmsg(td, EINVAL, "cpu thread needs rate (cpuload=)","cpuio"); - return 1; - } - - if (co->cpuload > 100) - co->cpuload = 100; - - /* - * set thinktime_sleep and thinktime_spin appropriately - */ - o->thinktime_blocks = 1; - o->thinktime_spin = 0; - o->thinktime = (co->cpucycle * (100 - co->cpuload)) / co->cpuload; - - o->nr_files = o->open_files = 1; - - log_info("%s: ioengine=%s, cpuload=%u, cpucycle=%u\n", - td->o.name, td->io_ops->name, co->cpuload, co->cpucycle); - - return 0; -} - -static int fio_cpuio_open(struct thread_data fio_unused *td, - struct fio_file fio_unused *f) -{ - return 0; -} - -static struct ioengine_ops ioengine = { - .name = "cpuio", - .version = FIO_IOOPS_VERSION, - .queue = fio_cpuio_queue, - .init = fio_cpuio_init, - .open_file = fio_cpuio_open, - .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_NOIO, - .options = options, - .option_struct_size = sizeof(struct cpu_options), -}; - -static void fio_init fio_cpuio_register(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_cpuio_unregister(void) -{ - unregister_ioengine(&ioengine); -} diff --git a/engines/dev-dax.c b/engines/dev-dax.c deleted file mode 100644 index 235a31e6..00000000 --- a/engines/dev-dax.c +++ /dev/null @@ -1,348 +0,0 @@ -/* - * device DAX engine - * - * IO engine that reads/writes from files by doing memcpy to/from - * a memory mapped region of DAX enabled device. - * - * Copyright (C) 2016 Intel Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License, - * version 2 as published by the Free Software Foundation.. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -/* - * device dax engine - * IO engine that access a DAX device directly for read and write data - * - * To use: - * ioengine=dev-dax - * - * Other relevant settings: - * iodepth=1 - * direct=0 REQUIRED - * filename=/dev/daxN.N - * bs=2m - * - * direct should be left to 0. Using dev-dax implies that memory access - * is direct. However, dev-dax does not support O_DIRECT flag by design - * since it is not necessary. - * - * bs should adhere to the device dax alignment at minimally. - * - * libpmem.so - * By default, the dev-dax engine will let the system find the libpmem.so - * that it uses. You can use an alternative libpmem by setting the - * FIO_PMEM_LIB environment variable to the full path to the desired - * libpmem.so. - */ - -#include <stdio.h> -#include <limits.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <sys/mman.h> -#include <sys/stat.h> -#include <sys/sysmacros.h> -#include <libgen.h> -#include <libpmem.h> - -#include "../fio.h" -#include "../verify.h" - -/* - * Limits us to 1GiB of mapped files in total to model after - * mmap engine behavior - */ -#define MMAP_TOTAL_SZ (1 * 1024 * 1024 * 1024UL) - -struct fio_devdax_data { - void *devdax_ptr; - size_t devdax_sz; - off_t devdax_off; -}; - -static int fio_devdax_file(struct thread_data *td, struct fio_file *f, - size_t length, off_t off) -{ - struct fio_devdax_data *fdd = FILE_ENG_DATA(f); - int flags = 0; - - if (td_rw(td)) - flags = PROT_READ | PROT_WRITE; - else if (td_write(td)) { - flags = PROT_WRITE; - - if (td->o.verify != VERIFY_NONE) - flags |= PROT_READ; - } else - flags = PROT_READ; - - fdd->devdax_ptr = mmap(NULL, length, flags, MAP_SHARED, f->fd, off); - if (fdd->devdax_ptr == MAP_FAILED) { - fdd->devdax_ptr = NULL; - td_verror(td, errno, "mmap"); - } - - if (td->error && fdd->devdax_ptr) - munmap(fdd->devdax_ptr, length); - - return td->error; -} - -/* - * Just mmap an appropriate portion, we cannot mmap the full extent - */ -static int fio_devdax_prep_limited(struct thread_data *td, struct io_u *io_u) -{ - struct fio_file *f = io_u->file; - struct fio_devdax_data *fdd = FILE_ENG_DATA(f); - - if (io_u->buflen > f->real_file_size) { - log_err("dev-dax: bs too big for dev-dax engine\n"); - return EIO; - } - - fdd->devdax_sz = min(MMAP_TOTAL_SZ, f->real_file_size); - if (fdd->devdax_sz > f->io_size) - fdd->devdax_sz = f->io_size; - - fdd->devdax_off = io_u->offset; - - return fio_devdax_file(td, f, fdd->devdax_sz, fdd->devdax_off); -} - -/* - * Attempt to mmap the entire file - */ -static int fio_devdax_prep_full(struct thread_data *td, struct io_u *io_u) -{ - struct fio_file *f = io_u->file; - struct fio_devdax_data *fdd = FILE_ENG_DATA(f); - int ret; - - if (fio_file_partial_mmap(f)) - return EINVAL; - - if (io_u->offset != (size_t) io_u->offset || - f->io_size != (size_t) f->io_size) { - fio_file_set_partial_mmap(f); - return EINVAL; - } - - fdd->devdax_sz = f->io_size; - fdd->devdax_off = 0; - - ret = fio_devdax_file(td, f, fdd->devdax_sz, fdd->devdax_off); - if (ret) - fio_file_set_partial_mmap(f); - - return ret; -} - -static int fio_devdax_prep(struct thread_data *td, struct io_u *io_u) -{ - struct fio_file *f = io_u->file; - struct fio_devdax_data *fdd = FILE_ENG_DATA(f); - int ret; - - /* - * It fits within existing mapping, use it - */ - if (io_u->offset >= fdd->devdax_off && - io_u->offset + io_u->buflen < fdd->devdax_off + fdd->devdax_sz) - goto done; - - /* - * unmap any existing mapping - */ - if (fdd->devdax_ptr) { - if (munmap(fdd->devdax_ptr, fdd->devdax_sz) < 0) - return errno; - fdd->devdax_ptr = NULL; - } - - if (fio_devdax_prep_full(td, io_u)) { - td_clear_error(td); - ret = fio_devdax_prep_limited(td, io_u); - if (ret) - return ret; - } - -done: - io_u->mmap_data = fdd->devdax_ptr + io_u->offset - fdd->devdax_off - - f->file_offset; - return 0; -} - -static int fio_devdax_queue(struct thread_data *td, struct io_u *io_u) -{ - fio_ro_check(td, io_u); - io_u->error = 0; - - switch (io_u->ddir) { - case DDIR_READ: - memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen); - break; - case DDIR_WRITE: - pmem_memcpy_persist(io_u->mmap_data, io_u->xfer_buf, - io_u->xfer_buflen); - break; - case DDIR_SYNC: - case DDIR_DATASYNC: - case DDIR_SYNC_FILE_RANGE: - break; - default: - io_u->error = EINVAL; - break; - } - - return FIO_Q_COMPLETED; -} - -static int fio_devdax_init(struct thread_data *td) -{ - struct thread_options *o = &td->o; - - if ((o->rw_min_bs & page_mask) && - (o->fsync_blocks || o->fdatasync_blocks)) { - log_err("dev-dax: mmap options dictate a minimum block size of %llu bytes\n", - (unsigned long long) page_size); - return 1; - } - - return 0; -} - -static int fio_devdax_open_file(struct thread_data *td, struct fio_file *f) -{ - struct fio_devdax_data *fdd; - int ret; - - ret = generic_open_file(td, f); - if (ret) - return ret; - - fdd = calloc(1, sizeof(*fdd)); - if (!fdd) { - int fio_unused __ret; - __ret = generic_close_file(td, f); - return 1; - } - - FILE_SET_ENG_DATA(f, fdd); - - return 0; -} - -static int fio_devdax_close_file(struct thread_data *td, struct fio_file *f) -{ - struct fio_devdax_data *fdd = FILE_ENG_DATA(f); - - FILE_SET_ENG_DATA(f, NULL); - free(fdd); - fio_file_clear_partial_mmap(f); - - return generic_close_file(td, f); -} - -static int -fio_devdax_get_file_size(struct thread_data *td, struct fio_file *f) -{ - char spath[PATH_MAX]; - char npath[PATH_MAX]; - char *rpath; - FILE *sfile; - uint64_t size; - struct stat st; - int rc; - - if (fio_file_size_known(f)) - return 0; - - if (f->filetype != FIO_TYPE_CHAR) - return -EINVAL; - - rc = stat(f->file_name, &st); - if (rc < 0) { - log_err("%s: failed to stat file %s (%s)\n", - td->o.name, f->file_name, strerror(errno)); - return -errno; - } - - snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/subsystem", - major(st.st_rdev), minor(st.st_rdev)); - - rpath = realpath(spath, npath); - if (!rpath) { - log_err("%s: realpath on %s failed (%s)\n", - td->o.name, spath, strerror(errno)); - return -errno; - } - - /* check if DAX device */ - if (strcmp("/sys/class/dax", rpath)) { - log_err("%s: %s not a DAX device!\n", - td->o.name, f->file_name); - } - - snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/size", - major(st.st_rdev), minor(st.st_rdev)); - - sfile = fopen(spath, "r"); - if (!sfile) { - log_err("%s: fopen on %s failed (%s)\n", - td->o.name, spath, strerror(errno)); - return 1; - } - - rc = fscanf(sfile, "%lu", &size); - if (rc < 0) { - log_err("%s: fscanf on %s failed (%s)\n", - td->o.name, spath, strerror(errno)); - return 1; - } - - f->real_file_size = size; - - fclose(sfile); - - if (f->file_offset > f->real_file_size) { - log_err("%s: offset extends end (%llu > %llu)\n", td->o.name, - (unsigned long long) f->file_offset, - (unsigned long long) f->real_file_size); - return 1; - } - - fio_file_set_size_known(f); - return 0; -} - -static struct ioengine_ops ioengine = { - .name = "dev-dax", - .version = FIO_IOOPS_VERSION, - .init = fio_devdax_init, - .prep = fio_devdax_prep, - .queue = fio_devdax_queue, - .open_file = fio_devdax_open_file, - .close_file = fio_devdax_close_file, - .get_file_size = fio_devdax_get_file_size, - .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_NOEXTEND | FIO_NODISKUTIL, -}; - -static void fio_init fio_devdax_register(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_devdax_unregister(void) -{ - unregister_ioengine(&ioengine); -} diff --git a/engines/e4defrag.c b/engines/e4defrag.c deleted file mode 100644 index 4b444888..00000000 --- a/engines/e4defrag.c +++ /dev/null @@ -1,218 +0,0 @@ -/* - * ioe_e4defrag: ioengine for git://git.kernel.dk/fio.git - * - * IO engine that does regular EXT4_IOC_MOVE_EXT ioctls to simulate - * defragment activity - * - */ - -#include <sys/types.h> -#include <sys/stat.h> -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <sys/uio.h> -#include <errno.h> -#include <assert.h> -#include <fcntl.h> - -#include "../fio.h" -#include "../optgroup.h" - -#ifndef EXT4_IOC_MOVE_EXT -#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) -struct move_extent { - __u32 reserved; /* should be zero */ - __u32 donor_fd; /* donor file descriptor */ - __u64 orig_start; /* logical start offset in block for orig */ - __u64 donor_start; /* logical start offset in block for donor */ - __u64 len; /* block length to be moved */ - __u64 moved_len; /* moved block length */ -}; -#endif - -struct e4defrag_data { - int donor_fd; - int bsz; -}; - -struct e4defrag_options { - void *pad; - unsigned int inplace; - char * donor_name; -}; - -static struct fio_option options[] = { - { - .name = "donorname", - .lname = "Donor Name", - .type = FIO_OPT_STR_STORE, - .off1 = offsetof(struct e4defrag_options, donor_name), - .help = "File used as a block donor", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_E4DEFRAG, - }, - { - .name = "inplace", - .lname = "In Place", - .type = FIO_OPT_INT, - .off1 = offsetof(struct e4defrag_options, inplace), - .minval = 0, - .maxval = 1, - .help = "Alloc and free space inside defrag event", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_E4DEFRAG, - }, - { - .name = NULL, - }, -}; - -static int fio_e4defrag_init(struct thread_data *td) -{ - int r, len = 0; - struct e4defrag_options *o = td->eo; - struct e4defrag_data *ed; - struct stat stub; - char donor_name[PATH_MAX]; - - if (!strlen(o->donor_name)) { - log_err("'donorname' options required\n"); - return 1; - } - - ed = malloc(sizeof(*ed)); - if (!ed) { - td_verror(td, ENOMEM, "io_queue_init"); - return 1; - } - memset(ed, 0 ,sizeof(*ed)); - - if (td->o.directory) - len = sprintf(donor_name, "%s/", td->o.directory); - sprintf(donor_name + len, "%s", o->donor_name); - - ed->donor_fd = open(donor_name, O_CREAT|O_WRONLY, 0644); - if (ed->donor_fd < 0) { - td_verror(td, errno, "io_queue_init"); - log_err("Can't open donor file %s err:%d\n", donor_name, ed->donor_fd); - free(ed); - return 1; - } - - if (!o->inplace) { - long long __len = td->o.file_size_high - td->o.start_offset; - r = fallocate(ed->donor_fd, 0, td->o.start_offset, __len); - if (r) - goto err; - } - r = fstat(ed->donor_fd, &stub); - if (r) - goto err; - - ed->bsz = stub.st_blksize; - td->io_ops_data = ed; - return 0; -err: - td_verror(td, errno, "io_queue_init"); - close(ed->donor_fd); - free(ed); - return 1; -} - -static void fio_e4defrag_cleanup(struct thread_data *td) -{ - struct e4defrag_data *ed = td->io_ops_data; - if (ed) { - if (ed->donor_fd >= 0) - close(ed->donor_fd); - free(ed); - } -} - - -static int fio_e4defrag_queue(struct thread_data *td, struct io_u *io_u) -{ - - int ret; - unsigned long long len; - struct move_extent me; - struct fio_file *f = io_u->file; - struct e4defrag_data *ed = td->io_ops_data; - struct e4defrag_options *o = td->eo; - - fio_ro_check(td, io_u); - - /* Theoretically defragmentation should not change data, but it - * changes data layout. So this function handle only DDIR_WRITE - * in order to satisfy strict read only access pattern - */ - if (io_u->ddir != DDIR_WRITE) { - io_u->error = EINVAL; - return FIO_Q_COMPLETED; - } - - if (o->inplace) { - ret = fallocate(ed->donor_fd, 0, io_u->offset, io_u->xfer_buflen); - if (ret) - goto out; - } - - memset(&me, 0, sizeof(me)); - me.donor_fd = ed->donor_fd; - me.orig_start = io_u->offset / ed->bsz; - me.donor_start = me.orig_start; - len = (io_u->offset + io_u->xfer_buflen + ed->bsz -1); - me.len = len / ed->bsz - me.orig_start; - - ret = ioctl(f->fd, EXT4_IOC_MOVE_EXT, &me); - len = me.moved_len * ed->bsz; - - if (len > io_u->xfer_buflen) - len = io_u->xfer_buflen; - - if (len != io_u->xfer_buflen) { - if (len) { - io_u->resid = io_u->xfer_buflen - len; - io_u->error = 0; - } else { - /* access beyond i_size */ - io_u->error = EINVAL; - } - } - if (ret) - io_u->error = errno; - - if (o->inplace) - ret = ftruncate(ed->donor_fd, 0); -out: - if (ret && !io_u->error) - io_u->error = errno; - - return FIO_Q_COMPLETED; -} - -static struct ioengine_ops ioengine = { - .name = "e4defrag", - .version = FIO_IOOPS_VERSION, - .init = fio_e4defrag_init, - .queue = fio_e4defrag_queue, - .open_file = generic_open_file, - .close_file = generic_close_file, - .get_file_size = generic_get_file_size, - .flags = FIO_SYNCIO, - .cleanup = fio_e4defrag_cleanup, - .options = options, - .option_struct_size = sizeof(struct e4defrag_options), - -}; - -static void fio_init fio_syncio_register(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_syncio_unregister(void) -{ - unregister_ioengine(&ioengine); -} diff --git a/engines/falloc.c b/engines/falloc.c deleted file mode 100644 index 2b00d525..00000000 --- a/engines/falloc.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * falloc: ioengine for git://git.kernel.dk/fio.git - * - * IO engine that does regular fallocate to simulate data transfer - * as fio ioengine. - * DDIR_READ does fallocate(,mode = FALLOC_FL_KEEP_SIZE,) - * DDIR_WRITE does fallocate(,mode = 0) : fallocate with size extension - * DDIR_TRIM does fallocate(,mode = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE) - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <sys/uio.h> -#include <errno.h> -#include <assert.h> -#include <fcntl.h> - -#include "../fio.h" -#include "../filehash.h" - -/* - * generic_open_file is not appropriate because does not allow to perform - * TRIM in to file - */ -static int open_file(struct thread_data *td, struct fio_file *f) -{ - int from_hash = 0; - - dprint(FD_FILE, "fd open %s\n", f->file_name); - - if (f->filetype != FIO_TYPE_FILE) { - log_err("fio: only files are supported fallocate \n"); - return 1; - } - if (!strcmp(f->file_name, "-")) { - log_err("fio: can't read/write to stdin/out\n"); - return 1; - } - -open_again: - from_hash = file_lookup_open(f, O_CREAT|O_RDWR); - - if (f->fd == -1) { - char buf[FIO_VERROR_SIZE]; - int e = errno; - - snprintf(buf, sizeof(buf), "open(%s)", f->file_name); - td_verror(td, e, buf); - } - - if (!from_hash && f->fd != -1) { - if (add_file_hash(f)) { - int fio_unused ret; - - /* - * OK to ignore, we haven't done anything with it - */ - ret = generic_close_file(td, f); - goto open_again; - } - } - - return 0; -} - -#ifndef FALLOC_FL_KEEP_SIZE -#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */ -#endif -#ifndef FALLOC_FL_PUNCH_HOLE -#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */ -#endif -static int fio_fallocate_queue(struct thread_data *td, struct io_u *io_u) -{ - struct fio_file *f = io_u->file; - int ret; - int flags = 0; - - fio_ro_check(td, io_u); - - if (io_u->ddir == DDIR_READ) - flags = FALLOC_FL_KEEP_SIZE; - else if (io_u->ddir == DDIR_WRITE) - flags = 0; - else if (io_u->ddir == DDIR_TRIM) - flags = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; - - ret = fallocate(f->fd, flags, io_u->offset, io_u->xfer_buflen); - - if (ret) - io_u->error = errno; - - return FIO_Q_COMPLETED; -} - -static struct ioengine_ops ioengine = { - .name = "falloc", - .version = FIO_IOOPS_VERSION, - .queue = fio_fallocate_queue, - .open_file = open_file, - .close_file = generic_close_file, - .get_file_size = generic_get_file_size, - .flags = FIO_SYNCIO -}; - -static void fio_init fio_syncio_register(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_syncio_unregister(void) -{ - unregister_ioengine(&ioengine); -} diff --git a/engines/ftruncate.c b/engines/ftruncate.c deleted file mode 100644 index e86dbac0..00000000 --- a/engines/ftruncate.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * ftruncate: ioengine for git://git.kernel.dk/fio.git - * - * IO engine that does regular truncates to simulate data transfer - * as fio ioengine. - * DDIR_WRITE does ftruncate - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <sys/uio.h> -#include <errno.h> -#include <assert.h> -#include <fcntl.h> - -#include "../fio.h" -#include "../filehash.h" - -static int fio_ftruncate_queue(struct thread_data *td, struct io_u *io_u) -{ - struct fio_file *f = io_u->file; - int ret; - fio_ro_check(td, io_u); - - if (io_u->ddir != DDIR_WRITE) { - io_u->error = EINVAL; - return FIO_Q_COMPLETED; - } - ret = ftruncate(f->fd, io_u->offset); - - if (ret) - io_u->error = errno; - - return FIO_Q_COMPLETED; -} - -static struct ioengine_ops ioengine = { - .name = "ftruncate", - .version = FIO_IOOPS_VERSION, - .queue = fio_ftruncate_queue, - .open_file = generic_open_file, - .close_file = generic_close_file, - .get_file_size = generic_get_file_size, - .flags = FIO_SYNCIO | FIO_FAKEIO -}; - -static void fio_init fio_syncio_register(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_syncio_unregister(void) -{ - unregister_ioengine(&ioengine); -} diff --git a/engines/fusion-aw.c b/engines/fusion-aw.c deleted file mode 100644 index 77844ffe..00000000 --- a/engines/fusion-aw.c +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Custom fio(1) engine that submits synchronous atomic writes to file. - * - * Copyright (C) 2013 Fusion-io, Inc. - * Author: Santhosh Kumar Koundinya (skoundinya@fusionio.com). - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; under version 2 of the License. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License version - * 2 for more details. - * - * You should have received a copy of the GNU General Public License Version 2 - * along with this program; if not see <http://www.gnu.org/licenses/> - */ - -#include <stdlib.h> -#include <stdint.h> - -#include "../fio.h" - -#include <nvm/nvm_primitives.h> - -#define NUM_ATOMIC_CAPABILITIES (5) - -struct fas_data { - nvm_handle_t nvm_handle; - size_t xfer_buf_align; - size_t xfer_buflen_align; - size_t xfer_buflen_max; - size_t sector_size; -}; - -static int queue(struct thread_data *td, struct io_u *io_u) -{ - struct fas_data *d = FILE_ENG_DATA(io_u->file); - int rc; - - if (io_u->ddir != DDIR_WRITE) { - td_vmsg(td, EINVAL, "only writes supported", "io_u->ddir"); - rc = -EINVAL; - goto out; - } - - if ((size_t) io_u->xfer_buf % d->xfer_buf_align) { - td_vmsg(td, EINVAL, "unaligned data buffer", "io_u->xfer_buf"); - rc = -EINVAL; - goto out; - } - - if (io_u->xfer_buflen % d->xfer_buflen_align) { - td_vmsg(td, EINVAL, "unaligned data size", "io_u->xfer_buflen"); - rc = -EINVAL; - goto out; - } - - if (io_u->xfer_buflen > d->xfer_buflen_max) { - td_vmsg(td, EINVAL, "data too big", "io_u->xfer_buflen"); - rc = -EINVAL; - goto out; - } - - rc = nvm_atomic_write(d->nvm_handle, (uint64_t) io_u->xfer_buf, - io_u->xfer_buflen, io_u->offset / d->sector_size); - if (rc == -1) { - td_verror(td, errno, "nvm_atomic_write"); - rc = -errno; - goto out; - } - rc = FIO_Q_COMPLETED; -out: - if (rc < 0) - io_u->error = -rc; - - return rc; -} - -static int open_file(struct thread_data *td, struct fio_file *f) -{ - int rc; - int fio_unused close_file_rc; - struct fas_data *d; - nvm_version_t nvm_version; - nvm_capability_t nvm_capability[NUM_ATOMIC_CAPABILITIES]; - - - d = malloc(sizeof(*d)); - if (!d) { - td_verror(td, ENOMEM, "malloc"); - rc = ENOMEM; - goto error; - } - d->nvm_handle = -1; - FILE_SET_ENG_DATA(f, d); - - rc = generic_open_file(td, f); - - if (rc) - goto free_engine_data; - - /* Set the version of the library as seen when engine is compiled */ - nvm_version.major = NVM_PRIMITIVES_API_MAJOR; - nvm_version.minor = NVM_PRIMITIVES_API_MINOR; - nvm_version.micro = NVM_PRIMITIVES_API_MICRO; - - d->nvm_handle = nvm_get_handle(f->fd, &nvm_version); - if (d->nvm_handle == -1) { - td_vmsg(td, errno, "nvm_get_handle failed", "nvm_get_handle"); - rc = errno; - goto close_file; - } - - nvm_capability[0].cap_id = NVM_CAP_ATOMIC_WRITE_START_ALIGN_ID; - nvm_capability[1].cap_id = NVM_CAP_ATOMIC_WRITE_MULTIPLICITY_ID; - nvm_capability[2].cap_id = NVM_CAP_ATOMIC_WRITE_MAX_VECTOR_SIZE_ID; - nvm_capability[3].cap_id = NVM_CAP_SECTOR_SIZE_ID; - nvm_capability[4].cap_id = NVM_CAP_ATOMIC_MAX_IOV_ID; - rc = nvm_get_capabilities(d->nvm_handle, nvm_capability, - NUM_ATOMIC_CAPABILITIES, false); - if (rc == -1) { - td_vmsg(td, errno, "error in getting atomic write capabilities", "nvm_get_capabilities"); - rc = errno; - goto close_file; - } else if (rc < NUM_ATOMIC_CAPABILITIES) { - td_vmsg(td, EINVAL, "couldn't get all the atomic write capabilities" , "nvm_get_capabilities"); - rc = ECANCELED; - goto close_file; - } - /* Reset rc to 0 because we got all capabilities we needed */ - rc = 0; - d->xfer_buf_align = nvm_capability[0].cap_value; - d->xfer_buflen_align = nvm_capability[1].cap_value; - d->xfer_buflen_max = d->xfer_buflen_align * nvm_capability[2].cap_value * nvm_capability[4].cap_value; - d->sector_size = nvm_capability[3].cap_value; - -out: - return rc; -close_file: - close_file_rc = generic_close_file(td, f); -free_engine_data: - free(d); -error: - f->fd = -1; - FILE_SET_ENG_DATA(f, NULL); - goto out; -} - -static int close_file(struct thread_data *td, struct fio_file *f) -{ - struct fas_data *d = FILE_ENG_DATA(f); - - if (d) { - if (d->nvm_handle != -1) - nvm_release_handle(d->nvm_handle); - free(d); - FILE_SET_ENG_DATA(f, NULL); - } - - return generic_close_file(td, f); -} - -static struct ioengine_ops ioengine = { - .name = "fusion-aw-sync", - .version = FIO_IOOPS_VERSION, - .queue = queue, - .open_file = open_file, - .close_file = close_file, - .get_file_size = generic_get_file_size, - .flags = FIO_SYNCIO | FIO_RAWIO | FIO_MEMALIGN -}; - -static void fio_init fio_fusion_aw_init(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_fusion_aw_exit(void) -{ - unregister_ioengine(&ioengine); -} diff --git a/engines/gfapi.h b/engines/gfapi.h deleted file mode 100644 index 10284314..00000000 --- a/engines/gfapi.h +++ /dev/null @@ -1,22 +0,0 @@ -#include <glusterfs/api/glfs.h> -#include "../fio.h" - -struct gf_options { - void *pad; - char *gf_vol; - char *gf_brick; -}; - -struct gf_data { - glfs_t *fs; - glfs_fd_t *fd; - struct io_u **aio_events; -}; - -extern struct fio_option gfapi_options[]; -extern int fio_gf_setup(struct thread_data *td); -extern void fio_gf_cleanup(struct thread_data *td); -extern int fio_gf_get_file_size(struct thread_data *td, struct fio_file *f); -extern int fio_gf_open_file(struct thread_data *td, struct fio_file *f); -extern int fio_gf_close_file(struct thread_data *td, struct fio_file *f); -extern int fio_gf_unlink_file(struct thread_data *td, struct fio_file *f); diff --git a/engines/glusterfs.c b/engines/glusterfs.c deleted file mode 100644 index 2abc283f..00000000 --- a/engines/glusterfs.c +++ /dev/null @@ -1,306 +0,0 @@ -/* - * glusterfs engine - * - * common Glusterfs's gfapi interface - * - */ - -#include "gfapi.h" -#include "../optgroup.h" - -struct fio_option gfapi_options[] = { - { - .name = "volume", - .lname = "Glusterfs volume", - .type = FIO_OPT_STR_STORE, - .help = "Name of the Glusterfs volume", - .off1 = offsetof(struct gf_options, gf_vol), - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_GFAPI, - }, - { - .name = "brick", - .lname = "Glusterfs brick name", - .type = FIO_OPT_STR_STORE, - .help = "Name of the Glusterfs brick to connect", - .off1 = offsetof(struct gf_options, gf_brick), - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_GFAPI, - }, - { - .name = NULL, - }, -}; - -int fio_gf_setup(struct thread_data *td) -{ - int r = 0; - struct gf_data *g = NULL; - struct gf_options *opt = td->eo; - struct stat sb = { 0, }; - - dprint(FD_IO, "fio setup\n"); - - if (td->io_ops_data) - return 0; - - g = malloc(sizeof(struct gf_data)); - if (!g) { - log_err("malloc failed.\n"); - return -ENOMEM; - } - g->fs = NULL; - g->fd = NULL; - g->aio_events = NULL; - - g->fs = glfs_new(opt->gf_vol); - if (!g->fs) { - log_err("glfs_new failed.\n"); - goto cleanup; - } - glfs_set_logging(g->fs, "/tmp/fio_gfapi.log", 7); - /* default to tcp */ - r = glfs_set_volfile_server(g->fs, "tcp", opt->gf_brick, 0); - if (r) { - log_err("glfs_set_volfile_server failed.\n"); - goto cleanup; - } - r = glfs_init(g->fs); - if (r) { - log_err("glfs_init failed. Is glusterd running on brick?\n"); - goto cleanup; - } - sleep(2); - r = glfs_lstat(g->fs, ".", &sb); - if (r) { - log_err("glfs_lstat failed.\n"); - goto cleanup; - } - dprint(FD_FILE, "fio setup %p\n", g->fs); - td->io_ops_data = g; - return 0; -cleanup: - if (g->fs) - glfs_fini(g->fs); - free(g); - td->io_ops_data = NULL; - return r; -} - -void fio_gf_cleanup(struct thread_data *td) -{ - struct gf_data *g = td->io_ops_data; - - if (g) { - if (g->aio_events) - free(g->aio_events); - if (g->fd) - glfs_close(g->fd); - if (g->fs) - glfs_fini(g->fs); - free(g); - td->io_ops_data = NULL; - } -} - -int fio_gf_get_file_size(struct thread_data *td, struct fio_file *f) -{ - struct stat buf; - int ret; - struct gf_data *g = td->io_ops_data; - - dprint(FD_FILE, "get file size %s\n", f->file_name); - - if (!g || !g->fs) { - return 0; - } - if (fio_file_size_known(f)) - return 0; - - ret = glfs_lstat(g->fs, f->file_name, &buf); - if (ret < 0) { - log_err("glfs_lstat failed.\n"); - return ret; - } - - f->real_file_size = buf.st_size; - fio_file_set_size_known(f); - - return 0; - -} - -int fio_gf_open_file(struct thread_data *td, struct fio_file *f) -{ - - int flags = 0; - int ret = 0; - struct gf_data *g = td->io_ops_data; - struct stat sb = { 0, }; - - if (td_write(td)) { - if (!read_only) - flags = O_RDWR; - } else if (td_read(td)) { - if (!read_only) - flags = O_RDWR; - else - flags = O_RDONLY; - } - - if (td->o.odirect) - flags |= OS_O_DIRECT; - if (td->o.sync_io) - flags |= O_SYNC; - - dprint(FD_FILE, "fio file %s open mode %s td rw %s\n", f->file_name, - flags & O_RDONLY ? "ro" : "rw", td_read(td) ? "read" : "write"); - g->fd = glfs_creat(g->fs, f->file_name, flags, 0644); - if (!g->fd) { - ret = errno; - log_err("glfs_creat failed.\n"); - return ret; - } - /* file for read doesn't exist or shorter than required, create/extend it */ - if (td_read(td)) { - if (glfs_lstat(g->fs, f->file_name, &sb) - || sb.st_size < f->real_file_size) { - dprint(FD_FILE, "fio extend file %s from %ld to %ld\n", - f->file_name, sb.st_size, f->real_file_size); - ret = glfs_ftruncate(g->fd, f->real_file_size); - if (ret) { - log_err("failed fio extend file %s to %ld\n", - f->file_name, f->real_file_size); - } else { - unsigned long long left; - unsigned int bs; - char *b; - int r; - - /* fill the file, copied from extend_file */ - b = malloc(td->o.max_bs[DDIR_WRITE]); - - left = f->real_file_size; - while (left && !td->terminate) { - bs = td->o.max_bs[DDIR_WRITE]; - if (bs > left) - bs = left; - - fill_io_buffer(td, b, bs, bs); - - r = glfs_write(g->fd, b, bs, 0); - dprint(FD_IO, - "fio write %d of %ld file %s\n", - r, f->real_file_size, - f->file_name); - - if (r > 0) { - left -= r; - continue; - } else { - if (r < 0) { - int __e = errno; - - if (__e == ENOSPC) { - if (td->o. - fill_device) - break; - log_info - ("fio: ENOSPC on laying out " - "file, stopping\n"); - break; - } - td_verror(td, errno, - "write"); - } else - td_verror(td, EIO, - "write"); - - break; - } - } - - if (b) - free(b); - glfs_lseek(g->fd, 0, SEEK_SET); - - if (td->terminate && td->o.unlink) { - dprint(FD_FILE, "terminate unlink %s\n", - f->file_name); - glfs_unlink(g->fs, f->file_name); - } else if (td->o.create_fsync) { - if (glfs_fsync(g->fd) < 0) { - dprint(FD_FILE, - "failed to sync, close %s\n", - f->file_name); - td_verror(td, errno, "fsync"); - glfs_close(g->fd); - g->fd = NULL; - return 1; - } - } - } - } - } -#if defined(GFAPI_USE_FADVISE) - { - int r = 0; - if (td_random(td)) { - r = glfs_fadvise(g->fd, 0, f->real_file_size, - POSIX_FADV_RANDOM); - } else { - r = glfs_fadvise(g->fd, 0, f->real_file_size, - POSIX_FADV_SEQUENTIAL); - } - if (r) { - dprint(FD_FILE, "fio %p fadvise %s status %d\n", g->fs, - f->file_name, r); - } - } -#endif - dprint(FD_FILE, "fio %p created %s\n", g->fs, f->file_name); - f->fd = -1; - f->shadow_fd = -1; - td->o.open_files ++; - return ret; -} - -int fio_gf_close_file(struct thread_data *td, struct fio_file *f) -{ - int ret = 0; - struct gf_data *g = td->io_ops_data; - - dprint(FD_FILE, "fd close %s\n", f->file_name); - - if (g) { - if (g->fd && glfs_close(g->fd) < 0) - ret = errno; - g->fd = NULL; - } - - return ret; -} - -int fio_gf_unlink_file(struct thread_data *td, struct fio_file *f) -{ - int ret = 0; - struct gf_data *g = td->io_ops_data; - - dprint(FD_FILE, "fd unlink %s\n", f->file_name); - - if (g) { - if (g->fd && glfs_close(g->fd) < 0) - ret = errno; - - glfs_unlink(g->fs, f->file_name); - - if (g->fs) - glfs_fini(g->fs); - - g->fd = NULL; - free(g); - } - td->io_ops_data = NULL; - - return ret; -} diff --git a/engines/glusterfs_async.c b/engines/glusterfs_async.c deleted file mode 100644 index f46cb263..00000000 --- a/engines/glusterfs_async.c +++ /dev/null @@ -1,191 +0,0 @@ -/* - * glusterfs engine - * - * IO engine using Glusterfs's gfapi async interface - * - */ -#include "gfapi.h" -#define NOT_YET 1 -struct fio_gf_iou { - struct io_u *io_u; - int io_complete; -}; - -static struct io_u *fio_gf_event(struct thread_data *td, int event) -{ - struct gf_data *gf_data = td->io_ops_data; - - dprint(FD_IO, "%s\n", __FUNCTION__); - return gf_data->aio_events[event]; -} - -static int fio_gf_getevents(struct thread_data *td, unsigned int min, - unsigned int max, const struct timespec *t) -{ - struct gf_data *g = td->io_ops_data; - unsigned int events = 0; - struct io_u *io_u; - int i; - - dprint(FD_IO, "%s\n", __FUNCTION__); - do { - io_u_qiter(&td->io_u_all, io_u, i) { - struct fio_gf_iou *io; - - if (!(io_u->flags & IO_U_F_FLIGHT)) - continue; - - io = io_u->engine_data; - if (io->io_complete) { - io->io_complete = 0; - g->aio_events[events] = io_u; - events++; - - if (events >= max) - break; - } - - } - if (events < min) - usleep(100); - else - break; - - } while (1); - - return events; -} - -static void fio_gf_io_u_free(struct thread_data *td, struct io_u *io_u) -{ - struct fio_gf_iou *io = io_u->engine_data; - - if (io) { - if (io->io_complete) - log_err("incomplete IO found.\n"); - io_u->engine_data = NULL; - free(io); - } -} - -static int fio_gf_io_u_init(struct thread_data *td, struct io_u *io_u) -{ - dprint(FD_FILE, "%s\n", __FUNCTION__); - - if (!io_u->engine_data) { - struct fio_gf_iou *io; - - io = malloc(sizeof(struct fio_gf_iou)); - if (!io) { - td_verror(td, errno, "malloc"); - return 1; - } - io->io_complete = 0; - io->io_u = io_u; - io_u->engine_data = io; - } - return 0; -} - -static void gf_async_cb(glfs_fd_t * fd, ssize_t ret, void *data) -{ - struct io_u *io_u = data; - struct fio_gf_iou *iou = io_u->engine_data; - - dprint(FD_IO, "%s ret %lu\n", __FUNCTION__, ret); - iou->io_complete = 1; -} - -static int fio_gf_async_queue(struct thread_data fio_unused * td, - struct io_u *io_u) -{ - struct gf_data *g = td->io_ops_data; - int r; - - dprint(FD_IO, "%s op %s\n", __FUNCTION__, io_ddir_name(io_u->ddir)); - - fio_ro_check(td, io_u); - - if (io_u->ddir == DDIR_READ) - r = glfs_pread_async(g->fd, io_u->xfer_buf, io_u->xfer_buflen, - io_u->offset, 0, gf_async_cb, io_u); - else if (io_u->ddir == DDIR_WRITE) - r = glfs_pwrite_async(g->fd, io_u->xfer_buf, io_u->xfer_buflen, - io_u->offset, 0, gf_async_cb, io_u); -#if defined(CONFIG_GF_TRIM) - else if (io_u->ddir == DDIR_TRIM) - r = glfs_discard_async(g->fd, io_u->offset, io_u->xfer_buflen, - gf_async_cb, io_u); -#endif - else if (io_u->ddir == DDIR_DATASYNC) - r = glfs_fdatasync_async(g->fd, gf_async_cb, io_u); - else if (io_u->ddir == DDIR_SYNC) - r = glfs_fsync_async(g->fd, gf_async_cb, io_u); - else - r = EINVAL; - - if (r) { - log_err("glfs queue failed.\n"); - io_u->error = r; - goto failed; - } - return FIO_Q_QUEUED; - -failed: - io_u->error = r; - td_verror(td, io_u->error, "xfer"); - return FIO_Q_COMPLETED; -} - -static int fio_gf_async_setup(struct thread_data *td) -{ - struct gf_data *g; - int r; - -#if defined(NOT_YET) - log_err("the async interface is still very experimental...\n"); -#endif - r = fio_gf_setup(td); - if (r) - return r; - - td->o.use_thread = 1; - g = td->io_ops_data; - g->aio_events = calloc(td->o.iodepth, sizeof(struct io_u *)); - if (!g->aio_events) { - r = -ENOMEM; - fio_gf_cleanup(td); - return r; - } - - return r; -} - -static struct ioengine_ops ioengine = { - .name = "gfapi_async", - .version = FIO_IOOPS_VERSION, - .init = fio_gf_async_setup, - .cleanup = fio_gf_cleanup, - .queue = fio_gf_async_queue, - .open_file = fio_gf_open_file, - .close_file = fio_gf_close_file, - .unlink_file = fio_gf_unlink_file, - .get_file_size = fio_gf_get_file_size, - .getevents = fio_gf_getevents, - .event = fio_gf_event, - .io_u_init = fio_gf_io_u_init, - .io_u_free = fio_gf_io_u_free, - .options = gfapi_options, - .option_struct_size = sizeof(struct gf_options), - .flags = FIO_DISKLESSIO, -}; - -static void fio_init fio_gf_register(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_gf_unregister(void) -{ - unregister_ioengine(&ioengine); -} diff --git a/engines/glusterfs_sync.c b/engines/glusterfs_sync.c deleted file mode 100644 index 25d05b25..00000000 --- a/engines/glusterfs_sync.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * glusterfs engine - * - * IO engine using Glusterfs's gfapi sync interface - * - */ - -#include "gfapi.h" - -#define LAST_POS(f) ((f)->engine_pos) -static int fio_gf_prep(struct thread_data *td, struct io_u *io_u) -{ - struct fio_file *f = io_u->file; - struct gf_data *g = td->io_ops_data; - - dprint(FD_FILE, "fio prep\n"); - - if (!ddir_rw(io_u->ddir)) - return 0; - - if (LAST_POS(f) != -1ULL && LAST_POS(f) == io_u->offset) - return 0; - - if (glfs_lseek(g->fd, io_u->offset, SEEK_SET) < 0) { - td_verror(td, errno, "lseek"); - return 1; - } - - return 0; -} - -static int fio_gf_queue(struct thread_data *td, struct io_u *io_u) -{ - struct gf_data *g = td->io_ops_data; - int ret = 0; - - dprint(FD_FILE, "fio queue len %lu\n", io_u->xfer_buflen); - fio_ro_check(td, io_u); - - if (io_u->ddir == DDIR_READ) - ret = glfs_read(g->fd, io_u->xfer_buf, io_u->xfer_buflen, 0); - else if (io_u->ddir == DDIR_WRITE) - ret = glfs_write(g->fd, io_u->xfer_buf, io_u->xfer_buflen, 0); - else if (io_u->ddir == DDIR_SYNC) - ret = glfs_fsync(g->fd); - else if (io_u->ddir == DDIR_DATASYNC) - ret = glfs_fdatasync(g->fd); - else { - log_err("unsupported operation.\n"); - return -EINVAL; - } - dprint(FD_FILE, "fio len %lu ret %d\n", io_u->xfer_buflen, ret); - if (io_u->file && ret >= 0 && ddir_rw(io_u->ddir)) - LAST_POS(io_u->file) = io_u->offset + ret; - - if (ret != (int)io_u->xfer_buflen) { - if (ret >= 0) { - io_u->resid = io_u->xfer_buflen - ret; - io_u->error = 0; - return FIO_Q_COMPLETED; - } else - io_u->error = errno; - } - - if (io_u->error) { - log_err("IO failed.\n"); - td_verror(td, io_u->error, "xfer"); - } - - return FIO_Q_COMPLETED; - -} - -static struct ioengine_ops ioengine = { - .name = "gfapi", - .version = FIO_IOOPS_VERSION, - .init = fio_gf_setup, - .cleanup = fio_gf_cleanup, - .prep = fio_gf_prep, - .queue = fio_gf_queue, - .open_file = fio_gf_open_file, - .close_file = fio_gf_close_file, - .unlink_file = fio_gf_unlink_file, - .get_file_size = fio_gf_get_file_size, - .options = gfapi_options, - .option_struct_size = sizeof(struct gf_options), - .flags = FIO_SYNCIO | FIO_DISKLESSIO, -}; - -static void fio_init fio_gf_register(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_gf_unregister(void) -{ - unregister_ioengine(&ioengine); -} diff --git a/engines/guasi.c b/engines/guasi.c deleted file mode 100644 index eb12c899..00000000 --- a/engines/guasi.c +++ /dev/null @@ -1,269 +0,0 @@ -/* - * guasi engine - * - * IO engine using the GUASI library. - * - * Before running make. You'll need the GUASI lib as well: - * - * http://www.xmailserver.org/guasi-lib.html - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <assert.h> - -#include "../fio.h" - -#define GFIO_MIN_THREADS 32 -#ifndef GFIO_MAX_THREADS -#define GFIO_MAX_THREADS 2000 -#endif - -#include <guasi.h> -#include <guasi_syscalls.h> - -#ifdef GFIO_DEBUG -#define GDBG_PRINT(a) printf a -#else -#define GDBG_PRINT(a) (void) 0 -#endif - -struct guasi_data { - guasi_t hctx; - int max_reqs; - guasi_req_t *reqs; - struct io_u **io_us; - int queued_nr; - int reqs_nr; -}; - -static int fio_guasi_prep(struct thread_data fio_unused *td, struct io_u *io_u) -{ - - GDBG_PRINT(("fio_guasi_prep(%p)\n", io_u)); - io_u->greq = NULL; - - return 0; -} - -static struct io_u *fio_guasi_event(struct thread_data *td, int event) -{ - struct guasi_data *ld = td->io_ops_data; - struct io_u *io_u; - struct guasi_reqinfo rinf; - - GDBG_PRINT(("fio_guasi_event(%d)\n", event)); - if (guasi_req_info(ld->reqs[event], &rinf) < 0) { - log_err("guasi_req_info(%d) FAILED!\n", event); - return NULL; - } - io_u = rinf.asid; - io_u->error = EINPROGRESS; - GDBG_PRINT(("fio_guasi_event(%d) -> %p\n", event, io_u)); - if (rinf.status == GUASI_STATUS_COMPLETE) { - io_u->error = rinf.result; - if (io_u->ddir == DDIR_READ || - io_u->ddir == DDIR_WRITE) { - io_u->error = 0; - if (rinf.result != (long) io_u->xfer_buflen) { - if (rinf.result >= 0) - io_u->resid = io_u->xfer_buflen - rinf.result; - else - io_u->error = rinf.error; - } - } - } - - return io_u; -} - -static int fio_guasi_getevents(struct thread_data *td, unsigned int min, - unsigned int max, const struct timespec *t) -{ - struct guasi_data *ld = td->io_ops_data; - int n, r; - long timeo = -1; - - GDBG_PRINT(("fio_guasi_getevents(%d, %d)\n", min, max)); - if (min > ld->max_reqs) - min = ld->max_reqs; - if (max > ld->max_reqs) - max = ld->max_reqs; - if (t) - timeo = t->tv_sec * 1000L + t->tv_nsec / 1000000L; - for (n = 0; n < ld->reqs_nr; n++) - guasi_req_free(ld->reqs[n]); - n = 0; - do { - r = guasi_fetch(ld->hctx, ld->reqs + n, min - n, - max - n, timeo); - if (r < 0) { - log_err("guasi_fetch() FAILED! (%d)\n", r); - break; - } - n += r; - if (n >= min) - break; - } while (1); - ld->reqs_nr = n; - GDBG_PRINT(("fio_guasi_getevents() -> %d\n", n)); - - return n; -} - -static int fio_guasi_queue(struct thread_data *td, struct io_u *io_u) -{ - struct guasi_data *ld = td->io_ops_data; - - fio_ro_check(td, io_u); - - GDBG_PRINT(("fio_guasi_queue(%p)\n", io_u)); - if (ld->queued_nr == (int) td->o.iodepth) - return FIO_Q_BUSY; - - ld->io_us[ld->queued_nr] = io_u; - ld->queued_nr++; - return FIO_Q_QUEUED; -} - -static void fio_guasi_queued(struct thread_data *td, struct io_u **io_us, int nr) -{ - int i; - struct io_u *io_u; - struct timeval now; - - if (!fio_fill_issue_time(td)) - return; - - io_u_mark_submit(td, nr); - fio_gettime(&now, NULL); - for (i = 0; i < nr; i++) { - io_u = io_us[i]; - memcpy(&io_u->issue_time, &now, sizeof(now)); - io_u_queued(td, io_u); - } -} - -static int fio_guasi_commit(struct thread_data *td) -{ - struct guasi_data *ld = td->io_ops_data; - int i; - struct io_u *io_u; - struct fio_file *f; - - GDBG_PRINT(("fio_guasi_commit(%d)\n", ld->queued_nr)); - for (i = 0; i < ld->queued_nr; i++) { - io_u = ld->io_us[i]; - GDBG_PRINT(("fio_guasi_commit(%d) --> %p\n", i, io_u)); - f = io_u->file; - io_u->greq = NULL; - if (io_u->ddir == DDIR_READ) - io_u->greq = guasi__pread(ld->hctx, ld, io_u, 0, - f->fd, io_u->xfer_buf, io_u->xfer_buflen, - io_u->offset); - else if (io_u->ddir == DDIR_WRITE) - io_u->greq = guasi__pwrite(ld->hctx, ld, io_u, 0, - f->fd, io_u->xfer_buf, io_u->xfer_buflen, - io_u->offset); - else if (ddir_sync(io_u->ddir)) - io_u->greq = guasi__fsync(ld->hctx, ld, io_u, 0, f->fd); - else { - log_err("fio_guasi_commit() FAILED: unknow request %d\n", - io_u->ddir); - } - if (io_u->greq == NULL) { - log_err("fio_guasi_commit() FAILED: submit failed (%s)\n", - strerror(errno)); - return -1; - } - } - fio_guasi_queued(td, ld->io_us, i); - ld->queued_nr = 0; - GDBG_PRINT(("fio_guasi_commit() -> %d\n", i)); - - return 0; -} - -static int fio_guasi_cancel(struct thread_data fio_unused *td, - struct io_u *io_u) -{ - GDBG_PRINT(("fio_guasi_cancel(%p) req=%p\n", io_u, io_u->greq)); - if (io_u->greq != NULL) - guasi_req_cancel(io_u->greq); - - return 0; -} - -static void fio_guasi_cleanup(struct thread_data *td) -{ - struct guasi_data *ld = td->io_ops_data; - int n; - - GDBG_PRINT(("fio_guasi_cleanup(%p)\n", ld)); - if (ld) { - for (n = 0; n < ld->reqs_nr; n++) - guasi_req_free(ld->reqs[n]); - guasi_free(ld->hctx); - free(ld->reqs); - free(ld->io_us); - free(ld); - } - GDBG_PRINT(("fio_guasi_cleanup(%p) DONE\n", ld)); -} - -static int fio_guasi_init(struct thread_data *td) -{ - int maxthr; - struct guasi_data *ld = malloc(sizeof(*ld)); - - GDBG_PRINT(("fio_guasi_init(): depth=%d\n", td->o.iodepth)); - memset(ld, 0, sizeof(*ld)); - maxthr = td->o.iodepth > GFIO_MIN_THREADS ? td->o.iodepth: GFIO_MIN_THREADS; - if (maxthr > GFIO_MAX_THREADS) - maxthr = GFIO_MAX_THREADS; - if ((ld->hctx = guasi_create(GFIO_MIN_THREADS, maxthr, 1)) == NULL) { - td_verror(td, errno, "guasi_create"); - free(ld); - return 1; - } - ld->max_reqs = td->o.iodepth; - ld->reqs = malloc(ld->max_reqs * sizeof(guasi_req_t)); - ld->io_us = malloc(ld->max_reqs * sizeof(struct io_u *)); - memset(ld->io_us, 0, ld->max_reqs * sizeof(struct io_u *)); - ld->queued_nr = 0; - ld->reqs_nr = 0; - - td->io_ops_data = ld; - GDBG_PRINT(("fio_guasi_init(): depth=%d -> %p\n", td->o.iodepth, ld)); - - return 0; -} - -static struct ioengine_ops ioengine = { - .name = "guasi", - .version = FIO_IOOPS_VERSION, - .init = fio_guasi_init, - .prep = fio_guasi_prep, - .queue = fio_guasi_queue, - .commit = fio_guasi_commit, - .cancel = fio_guasi_cancel, - .getevents = fio_guasi_getevents, - .event = fio_guasi_event, - .cleanup = fio_guasi_cleanup, - .open_file = generic_open_file, - .close_file = generic_close_file, - .get_file_size = generic_get_file_size, -}; - -static void fio_init fio_guasi_register(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_guasi_unregister(void) -{ - unregister_ioengine(&ioengine); -} - diff --git a/engines/libaio.c b/engines/libaio.c deleted file mode 100644 index e15c519e..00000000 --- a/engines/libaio.c +++ /dev/null @@ -1,396 +0,0 @@ -/* - * libaio engine - * - * IO engine using the Linux native aio interface. - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <assert.h> -#include <libaio.h> - -#include "../fio.h" -#include "../lib/pow2.h" -#include "../optgroup.h" - -static int fio_libaio_commit(struct thread_data *td); - -struct libaio_data { - io_context_t aio_ctx; - struct io_event *aio_events; - struct iocb **iocbs; - struct io_u **io_us; - - /* - * Basic ring buffer. 'head' is incremented in _queue(), and - * 'tail' is incremented in _commit(). We keep 'queued' so - * that we know if the ring is full or empty, when - * 'head' == 'tail'. 'entries' is the ring size, and - * 'is_pow2' is just an optimization to use AND instead of - * modulus to get the remainder on ring increment. - */ - int is_pow2; - unsigned int entries; - unsigned int queued; - unsigned int head; - unsigned int tail; -}; - -struct libaio_options { - void *pad; - unsigned int userspace_reap; -}; - -static struct fio_option options[] = { - { - .name = "userspace_reap", - .lname = "Libaio userspace reaping", - .type = FIO_OPT_STR_SET, - .off1 = offsetof(struct libaio_options, userspace_reap), - .help = "Use alternative user-space reap implementation", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_LIBAIO, - }, - { - .name = NULL, - }, -}; - -static inline void ring_inc(struct libaio_data *ld, unsigned int *val, - unsigned int add) -{ - if (ld->is_pow2) - *val = (*val + add) & (ld->entries - 1); - else - *val = (*val + add) % ld->entries; -} - -static int fio_libaio_prep(struct thread_data fio_unused *td, struct io_u *io_u) -{ - struct fio_file *f = io_u->file; - - if (io_u->ddir == DDIR_READ) - io_prep_pread(&io_u->iocb, f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset); - else if (io_u->ddir == DDIR_WRITE) - io_prep_pwrite(&io_u->iocb, f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset); - else if (ddir_sync(io_u->ddir)) - io_prep_fsync(&io_u->iocb, f->fd); - - return 0; -} - -static struct io_u *fio_libaio_event(struct thread_data *td, int event) -{ - struct libaio_data *ld = td->io_ops_data; - struct io_event *ev; - struct io_u *io_u; - - ev = ld->aio_events + event; - io_u = container_of(ev->obj, struct io_u, iocb); - - if (ev->res != io_u->xfer_buflen) { - if (ev->res > io_u->xfer_buflen) - io_u->error = -ev->res; - else - io_u->resid = io_u->xfer_buflen - ev->res; - } else - io_u->error = 0; - - return io_u; -} - -struct aio_ring { - unsigned id; /** kernel internal index number */ - unsigned nr; /** number of io_events */ - unsigned head; - unsigned tail; - - unsigned magic; - unsigned compat_features; - unsigned incompat_features; - unsigned header_length; /** size of aio_ring */ - - struct io_event events[0]; -}; - -#define AIO_RING_MAGIC 0xa10a10a1 - -static int user_io_getevents(io_context_t aio_ctx, unsigned int max, - struct io_event *events) -{ - long i = 0; - unsigned head; - struct aio_ring *ring = (struct aio_ring*) aio_ctx; - - while (i < max) { - head = ring->head; - - if (head == ring->tail) { - /* There are no more completions */ - break; - } else { - /* There is another completion to reap */ - events[i] = ring->events[head]; - read_barrier(); - ring->head = (head + 1) % ring->nr; - i++; - } - } - - return i; -} - -static int fio_libaio_getevents(struct thread_data *td, unsigned int min, - unsigned int max, const struct timespec *t) -{ - struct libaio_data *ld = td->io_ops_data; - struct libaio_options *o = td->eo; - unsigned actual_min = td->o.iodepth_batch_complete_min == 0 ? 0 : min; - struct timespec __lt, *lt = NULL; - int r, events = 0; - - if (t) { - __lt = *t; - lt = &__lt; - } - - do { - if (o->userspace_reap == 1 - && actual_min == 0 - && ((struct aio_ring *)(ld->aio_ctx))->magic - == AIO_RING_MAGIC) { - r = user_io_getevents(ld->aio_ctx, max, - ld->aio_events + events); - } else { - r = io_getevents(ld->aio_ctx, actual_min, - max, ld->aio_events + events, lt); - } - if (r > 0) - events += r; - else if ((min && r == 0) || r == -EAGAIN) { - fio_libaio_commit(td); - usleep(100); - } else if (r != -EINTR) - break; - } while (events < min); - - return r < 0 ? r : events; -} - -static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct libaio_data *ld = td->io_ops_data; - - fio_ro_check(td, io_u); - - if (ld->queued == td->o.iodepth) - return FIO_Q_BUSY; - - /* - * fsync is tricky, since it can fail and we need to do it - * serialized with other io. the reason is that linux doesn't - * support aio fsync yet. So return busy for the case where we - * have pending io, to let fio complete those first. - */ - if (ddir_sync(io_u->ddir)) { - if (ld->queued) - return FIO_Q_BUSY; - - do_io_u_sync(td, io_u); - return FIO_Q_COMPLETED; - } - - if (io_u->ddir == DDIR_TRIM) { - if (ld->queued) - return FIO_Q_BUSY; - - do_io_u_trim(td, io_u); - return FIO_Q_COMPLETED; - } - - ld->iocbs[ld->head] = &io_u->iocb; - ld->io_us[ld->head] = io_u; - ring_inc(ld, &ld->head, 1); - ld->queued++; - return FIO_Q_QUEUED; -} - -static void fio_libaio_queued(struct thread_data *td, struct io_u **io_us, - unsigned int nr) -{ - struct timeval now; - unsigned int i; - - if (!fio_fill_issue_time(td)) - return; - - fio_gettime(&now, NULL); - - for (i = 0; i < nr; i++) { - struct io_u *io_u = io_us[i]; - - memcpy(&io_u->issue_time, &now, sizeof(now)); - io_u_queued(td, io_u); - } -} - -static int fio_libaio_commit(struct thread_data *td) -{ - struct libaio_data *ld = td->io_ops_data; - struct iocb **iocbs; - struct io_u **io_us; - struct timeval tv; - int ret, wait_start = 0; - - if (!ld->queued) - return 0; - - do { - long nr = ld->queued; - - nr = min((unsigned int) nr, ld->entries - ld->tail); - io_us = ld->io_us + ld->tail; - iocbs = ld->iocbs + ld->tail; - - ret = io_submit(ld->aio_ctx, nr, iocbs); - if (ret > 0) { - fio_libaio_queued(td, io_us, ret); - io_u_mark_submit(td, ret); - - ld->queued -= ret; - ring_inc(ld, &ld->tail, ret); - ret = 0; - wait_start = 0; - } else if (ret == -EINTR || !ret) { - if (!ret) - io_u_mark_submit(td, ret); - wait_start = 0; - continue; - } else if (ret == -EAGAIN) { - /* - * If we get EAGAIN, we should break out without - * error and let the upper layer reap some - * events for us. If we have no queued IO, we - * must loop here. If we loop for more than 30s, - * just error out, something must be buggy in the - * IO path. - */ - if (ld->queued) { - ret = 0; - break; - } - if (!wait_start) { - fio_gettime(&tv, NULL); - wait_start = 1; - } else if (mtime_since_now(&tv) > 30000) { - log_err("fio: aio appears to be stalled, giving up\n"); - break; - } - usleep(1); - continue; - } else if (ret == -ENOMEM) { - /* - * If we get -ENOMEM, reap events if we can. If - * we cannot, treat it as a fatal event since there's - * nothing we can do about it. - */ - if (ld->queued) - ret = 0; - break; - } else - break; - } while (ld->queued); - - return ret; -} - -static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u) -{ - struct libaio_data *ld = td->io_ops_data; - - return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events); -} - -static void fio_libaio_cleanup(struct thread_data *td) -{ - struct libaio_data *ld = td->io_ops_data; - - if (ld) { - /* - * Work-around to avoid huge RCU stalls at exit time. If we - * don't do this here, then it'll be torn down by exit_aio(). - * But for that case we can parallellize the freeing, thus - * speeding it up a lot. - */ - if (!(td->flags & TD_F_CHILD)) - io_destroy(ld->aio_ctx); - free(ld->aio_events); - free(ld->iocbs); - free(ld->io_us); - free(ld); - } -} - -static int fio_libaio_init(struct thread_data *td) -{ - struct libaio_options *o = td->eo; - struct libaio_data *ld; - int err = 0; - - ld = calloc(1, sizeof(*ld)); - - /* - * First try passing in 0 for queue depth, since we don't - * care about the user ring. If that fails, the kernel is too old - * and we need the right depth. - */ - if (!o->userspace_reap) - err = io_queue_init(INT_MAX, &ld->aio_ctx); - if (o->userspace_reap || err == -EINVAL) - err = io_queue_init(td->o.iodepth, &ld->aio_ctx); - if (err) { - td_verror(td, -err, "io_queue_init"); - log_err("fio: check /proc/sys/fs/aio-max-nr\n"); - free(ld); - return 1; - } - - ld->entries = td->o.iodepth; - ld->is_pow2 = is_power_of_2(ld->entries); - ld->aio_events = calloc(ld->entries, sizeof(struct io_event)); - ld->iocbs = calloc(ld->entries, sizeof(struct iocb *)); - ld->io_us = calloc(ld->entries, sizeof(struct io_u *)); - - td->io_ops_data = ld; - return 0; -} - -static struct ioengine_ops ioengine = { - .name = "libaio", - .version = FIO_IOOPS_VERSION, - .init = fio_libaio_init, - .prep = fio_libaio_prep, - .queue = fio_libaio_queue, - .commit = fio_libaio_commit, - .cancel = fio_libaio_cancel, - .getevents = fio_libaio_getevents, - .event = fio_libaio_event, - .cleanup = fio_libaio_cleanup, - .open_file = generic_open_file, - .close_file = generic_close_file, - .get_file_size = generic_get_file_size, - .options = options, - .option_struct_size = sizeof(struct libaio_options), -}; - -static void fio_init fio_libaio_register(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_libaio_unregister(void) -{ - unregister_ioengine(&ioengine); -} diff --git a/engines/libhdfs.c b/engines/libhdfs.c deleted file mode 100644 index 96a0871d..00000000 --- a/engines/libhdfs.c +++ /dev/null @@ -1,420 +0,0 @@ -/* - * libhdfs engine - * - * this engine helps perform read/write operations on hdfs cluster using - * libhdfs. hdfs doesnot support modification of data once file is created. - * - * so to mimic that create many files of small size (e.g 256k), and this - * engine select a file based on the offset generated by fio. - * - * thus, random reads and writes can also be achieved with this logic. - * - */ - -#include <math.h> -#include <hdfs.h> - -#include "../fio.h" -#include "../optgroup.h" - -#define CHUNCK_NAME_LENGTH_MAX 80 -#define CHUNCK_CREATION_BUFFER_SIZE 65536 - -struct hdfsio_data { - hdfsFS fs; - hdfsFile fp; - uint64_t curr_file_id; -}; - -struct hdfsio_options { - void *pad; /* needed because offset can't be 0 for a option defined used offsetof */ - char *host; - char *directory; - unsigned int port; - unsigned int chunck_size; - unsigned int single_instance; - unsigned int use_direct; -}; - -static struct fio_option options[] = { - { - .name = "namenode", - .lname = "hfds namenode", - .type = FIO_OPT_STR_STORE, - .off1 = offsetof(struct hdfsio_options, host), - .def = "localhost", - .help = "Namenode of the HDFS cluster", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_HDFS, - }, - { - .name = "hostname", - .lname = "hfds namenode", - .type = FIO_OPT_STR_STORE, - .off1 = offsetof(struct hdfsio_options, host), - .def = "localhost", - .help = "Namenode of the HDFS cluster", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_HDFS, - }, - { - .name = "port", - .lname = "hdfs namenode port", - .type = FIO_OPT_INT, - .off1 = offsetof(struct hdfsio_options, port), - .def = "9000", - .minval = 1, - .maxval = 65535, - .help = "Port used by the HDFS cluster namenode", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_HDFS, - }, - { - .name = "hdfsdirectory", - .lname = "hfds directory", - .type = FIO_OPT_STR_STORE, - .off1 = offsetof(struct hdfsio_options, directory), - .def = "/", - .help = "The HDFS directory where fio will create chuncks", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_HDFS, - }, - { - .name = "chunk_size", - .alias = "chunck_size", - .lname = "Chunk size", - .type = FIO_OPT_INT, - .off1 = offsetof(struct hdfsio_options, chunck_size), - .def = "1048576", - .help = "Size of individual chunck", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_HDFS, - }, - { - .name = "single_instance", - .lname = "Single Instance", - .type = FIO_OPT_BOOL, - .off1 = offsetof(struct hdfsio_options, single_instance), - .def = "1", - .help = "Use a single instance", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_HDFS, - }, - { - .name = "hdfs_use_direct", - .lname = "HDFS Use Direct", - .type = FIO_OPT_BOOL, - .off1 = offsetof(struct hdfsio_options, use_direct), - .def = "0", - .help = "Use readDirect instead of hdfsRead", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_HDFS, - }, - { - .name = NULL, - }, -}; - - -static int get_chunck_name(char *dest, char *file_name, uint64_t chunk_id) { - return snprintf(dest, CHUNCK_NAME_LENGTH_MAX, "%s_%lu", file_name, chunk_id); -} - -static int fio_hdfsio_prep(struct thread_data *td, struct io_u *io_u) -{ - struct hdfsio_options *options = td->eo; - struct hdfsio_data *hd = td->io_ops_data; - unsigned long f_id; - char fname[CHUNCK_NAME_LENGTH_MAX]; - int open_flags; - - /* find out file id based on the offset generated by fio */ - f_id = floor(io_u->offset / options-> chunck_size); - - if (f_id == hd->curr_file_id) { - /* file is already open */ - return 0; - } - - if (hd->curr_file_id != -1) { - if ( hdfsCloseFile(hd->fs, hd->fp) == -1) { - log_err("hdfs: unable to close file: %s\n", strerror(errno)); - return errno; - } - hd->curr_file_id = -1; - } - - if (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_SYNC) { - open_flags = O_RDONLY; - } else if (io_u->ddir == DDIR_WRITE) { - open_flags = O_WRONLY; - } else { - log_err("hdfs: Invalid I/O Operation\n"); - return 0; - } - - get_chunck_name(fname, io_u->file->file_name, f_id); - hd->fp = hdfsOpenFile(hd->fs, fname, open_flags, 0, 0, - options->chunck_size); - if(hd->fp == NULL) { - log_err("hdfs: unable to open file: %s: %d\n", fname, strerror(errno)); - return errno; - } - hd->curr_file_id = f_id; - - return 0; -} - -static int fio_hdfsio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct hdfsio_data *hd = td->io_ops_data; - struct hdfsio_options *options = td->eo; - int ret; - unsigned long offset; - - offset = io_u->offset % options->chunck_size; - - if( (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_WRITE) && - hdfsTell(hd->fs, hd->fp) != offset && hdfsSeek(hd->fs, hd->fp, offset) != 0 ) { - log_err("hdfs: seek failed: %s, are you doing random write smaller than chunck size ?\n", strerror(errno)); - io_u->error = errno; - return FIO_Q_COMPLETED; - }; - - // do the IO - if (io_u->ddir == DDIR_READ) { - if (options->use_direct) { - ret = readDirect(hd->fs, hd->fp, io_u->xfer_buf, io_u->xfer_buflen); - } else { - ret = hdfsRead(hd->fs, hd->fp, io_u->xfer_buf, io_u->xfer_buflen); - } - } else if (io_u->ddir == DDIR_WRITE) { - ret = hdfsWrite(hd->fs, hd->fp, io_u->xfer_buf, - io_u->xfer_buflen); - } else if (io_u->ddir == DDIR_SYNC) { - ret = hdfsFlush(hd->fs, hd->fp); - } else { - log_err("hdfs: Invalid I/O Operation: %d\n", io_u->ddir); - ret = EINVAL; - } - - // Check if the IO went fine, or is incomplete - if (ret != (int)io_u->xfer_buflen) { - if (ret >= 0) { - io_u->resid = io_u->xfer_buflen - ret; - io_u->error = 0; - return FIO_Q_COMPLETED; - } else { - io_u->error = errno; - } - } - - if (io_u->error) - td_verror(td, io_u->error, "xfer"); - - return FIO_Q_COMPLETED; -} - -int fio_hdfsio_open_file(struct thread_data *td, struct fio_file *f) -{ - if (td->o.odirect) { - td->error = EINVAL; - return 0; - } - - return 0; -} - -int fio_hdfsio_close_file(struct thread_data *td, struct fio_file *f) -{ - struct hdfsio_data *hd = td->io_ops_data; - - if (hd->curr_file_id != -1) { - if ( hdfsCloseFile(hd->fs, hd->fp) == -1) { - log_err("hdfs: unable to close file: %s\n", strerror(errno)); - return errno; - } - hd->curr_file_id = -1; - } - return 0; -} - -static int fio_hdfsio_init(struct thread_data *td) -{ - struct hdfsio_options *options = td->eo; - struct hdfsio_data *hd = td->io_ops_data; - struct fio_file *f; - uint64_t j,k; - int i, failure = 0; - uint8_t buffer[CHUNCK_CREATION_BUFFER_SIZE]; - uint64_t bytes_left; - char fname[CHUNCK_NAME_LENGTH_MAX]; - hdfsFile fp; - hdfsFileInfo *fi; - tOffset fi_size; - - for_each_file(td, f, i) { - k = 0; - for(j=0; j < f->real_file_size; j += options->chunck_size) { - get_chunck_name(fname, f->file_name, k++); - fi = hdfsGetPathInfo(hd->fs, fname); - fi_size = fi ? fi->mSize : 0; - // fill exist and is big enough, nothing to do - if( fi && fi_size >= options->chunck_size) { - continue; - } - fp = hdfsOpenFile(hd->fs, fname, O_WRONLY, 0, 0, - options->chunck_size); - if(fp == NULL) { - failure = errno; - log_err("hdfs: unable to prepare file chunk %s: %s\n", fname, strerror(errno)); - break; - } - bytes_left = options->chunck_size; - memset(buffer, 0, CHUNCK_CREATION_BUFFER_SIZE); - while( bytes_left > CHUNCK_CREATION_BUFFER_SIZE) { - if( hdfsWrite(hd->fs, fp, buffer, CHUNCK_CREATION_BUFFER_SIZE) - != CHUNCK_CREATION_BUFFER_SIZE) { - failure = errno; - log_err("hdfs: unable to prepare file chunk %s: %s\n", fname, strerror(errno)); - break; - }; - bytes_left -= CHUNCK_CREATION_BUFFER_SIZE; - } - if(bytes_left > 0) { - if( hdfsWrite(hd->fs, fp, buffer, bytes_left) - != bytes_left) { - failure = errno; - break; - }; - } - if( hdfsCloseFile(hd->fs, fp) != 0) { - failure = errno; - log_err("hdfs: unable to prepare file chunk %s: %s\n", fname, strerror(errno)); - break; - } - } - if(failure) { - break; - } - } - - if( !failure ) { - fio_file_set_size_known(f); - } - - return failure; -} - -static int fio_hdfsio_setup(struct thread_data *td) -{ - struct hdfsio_data *hd; - struct fio_file *f; - int i; - uint64_t file_size, total_file_size; - - if (!td->io_ops_data) { - hd = malloc(sizeof(*hd)); - memset(hd, 0, sizeof(*hd)); - - hd->curr_file_id = -1; - - td->io_ops_data = hd; - } - - total_file_size = 0; - file_size = 0; - - for_each_file(td, f, i) { - if(!td->o.file_size_low) { - file_size = floor(td->o.size / td->o.nr_files); - total_file_size += file_size; - } - else if (td->o.file_size_low == td->o.file_size_high) - file_size = td->o.file_size_low; - else { - file_size = get_rand_file_size(td); - } - f->real_file_size = file_size; - } - /* If the size doesn't divide nicely with the chunck size, - * make the last files bigger. - * Used only if filesize was not explicitely given - */ - if (!td->o.file_size_low && total_file_size < td->o.size) { - f->real_file_size += (td->o.size - total_file_size); - } - - return 0; -} - -static int fio_hdfsio_io_u_init(struct thread_data *td, struct io_u *io_u) -{ - struct hdfsio_data *hd = td->io_ops_data; - struct hdfsio_options *options = td->eo; - int failure; - struct hdfsBuilder *bld; - - if (options->host == NULL || options->port == 0) { - log_err("hdfs: server not defined\n"); - return EINVAL; - } - - bld = hdfsNewBuilder(); - if (!bld) { - failure = errno; - log_err("hdfs: unable to allocate connect builder\n"); - return failure; - } - hdfsBuilderSetNameNode(bld, options->host); - hdfsBuilderSetNameNodePort(bld, options->port); - if(! options->single_instance) { - hdfsBuilderSetForceNewInstance(bld); - } - hd->fs = hdfsBuilderConnect(bld); - - /* hdfsSetWorkingDirectory succeed on non existend directory */ - if (hdfsExists(hd->fs, options->directory) < 0 || hdfsSetWorkingDirectory(hd->fs, options->directory) < 0) { - failure = errno; - log_err("hdfs: invalid working directory %s: %s\n", options->directory, strerror(errno)); - return failure; - } - - return 0; -} - -static void fio_hdfsio_io_u_free(struct thread_data *td, struct io_u *io_u) -{ - struct hdfsio_data *hd = td->io_ops_data; - - if (hd->fs && hdfsDisconnect(hd->fs) < 0) { - log_err("hdfs: disconnect failed: %d\n", errno); - } -} - -static struct ioengine_ops ioengine_hdfs = { - .name = "libhdfs", - .version = FIO_IOOPS_VERSION, - .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_NODISKUTIL, - .setup = fio_hdfsio_setup, - .init = fio_hdfsio_init, - .prep = fio_hdfsio_prep, - .queue = fio_hdfsio_queue, - .open_file = fio_hdfsio_open_file, - .close_file = fio_hdfsio_close_file, - .io_u_init = fio_hdfsio_io_u_init, - .io_u_free = fio_hdfsio_io_u_free, - .option_struct_size = sizeof(struct hdfsio_options), - .options = options, -}; - - -static void fio_init fio_hdfsio_register(void) -{ - register_ioengine(&ioengine_hdfs); -} - -static void fio_exit fio_hdfsio_unregister(void) -{ - unregister_ioengine(&ioengine_hdfs); -} diff --git a/engines/mmap.c b/engines/mmap.c deleted file mode 100644 index bc038f4f..00000000 --- a/engines/mmap.c +++ /dev/null @@ -1,272 +0,0 @@ -/* - * mmap engine - * - * IO engine that reads/writes from files by doing memcpy to/from - * a memory mapped region of the file. - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <sys/mman.h> - -#include "../fio.h" -#include "../verify.h" - -/* - * Limits us to 1GiB of mapped files in total - */ -#define MMAP_TOTAL_SZ (1 * 1024 * 1024 * 1024UL) - -static unsigned long mmap_map_size; - -struct fio_mmap_data { - void *mmap_ptr; - size_t mmap_sz; - off_t mmap_off; -}; - -static int fio_mmap_file(struct thread_data *td, struct fio_file *f, - size_t length, off_t off) -{ - struct fio_mmap_data *fmd = FILE_ENG_DATA(f); - int flags = 0; - - if (td_rw(td)) - flags = PROT_READ | PROT_WRITE; - else if (td_write(td)) { - flags = PROT_WRITE; - - if (td->o.verify != VERIFY_NONE) - flags |= PROT_READ; - } else - flags = PROT_READ; - - fmd->mmap_ptr = mmap(NULL, length, flags, MAP_SHARED, f->fd, off); - if (fmd->mmap_ptr == MAP_FAILED) { - fmd->mmap_ptr = NULL; - td_verror(td, errno, "mmap"); - goto err; - } - - if (!td_random(td)) { - if (posix_madvise(fmd->mmap_ptr, length, POSIX_MADV_SEQUENTIAL) < 0) { - td_verror(td, errno, "madvise"); - goto err; - } - } else { - if (posix_madvise(fmd->mmap_ptr, length, POSIX_MADV_RANDOM) < 0) { - td_verror(td, errno, "madvise"); - goto err; - } - } - if (posix_madvise(fmd->mmap_ptr, length, POSIX_MADV_DONTNEED) < 0) { - td_verror(td, errno, "madvise"); - goto err; - } - -#ifdef FIO_MADV_FREE - if (f->filetype == FIO_TYPE_BLOCK) - (void) posix_madvise(fmd->mmap_ptr, fmd->mmap_sz, FIO_MADV_FREE); -#endif - -err: - if (td->error && fmd->mmap_ptr) - munmap(fmd->mmap_ptr, length); - - return td->error; -} - -/* - * Just mmap an appropriate portion, we cannot mmap the full extent - */ -static int fio_mmapio_prep_limited(struct thread_data *td, struct io_u *io_u) -{ - struct fio_file *f = io_u->file; - struct fio_mmap_data *fmd = FILE_ENG_DATA(f); - - if (io_u->buflen > mmap_map_size) { - log_err("fio: bs too big for mmap engine\n"); - return EIO; - } - - fmd->mmap_sz = mmap_map_size; - if (fmd->mmap_sz > f->io_size) - fmd->mmap_sz = f->io_size; - - fmd->mmap_off = io_u->offset; - - return fio_mmap_file(td, f, fmd->mmap_sz, fmd->mmap_off); -} - -/* - * Attempt to mmap the entire file - */ -static int fio_mmapio_prep_full(struct thread_data *td, struct io_u *io_u) -{ - struct fio_file *f = io_u->file; - struct fio_mmap_data *fmd = FILE_ENG_DATA(f); - int ret; - - if (fio_file_partial_mmap(f)) - return EINVAL; - if (io_u->offset != (size_t) io_u->offset || - f->io_size != (size_t) f->io_size) { - fio_file_set_partial_mmap(f); - return EINVAL; - } - - fmd->mmap_sz = f->io_size; - fmd->mmap_off = 0; - - ret = fio_mmap_file(td, f, fmd->mmap_sz, fmd->mmap_off); - if (ret) - fio_file_set_partial_mmap(f); - - return ret; -} - -static int fio_mmapio_prep(struct thread_data *td, struct io_u *io_u) -{ - struct fio_file *f = io_u->file; - struct fio_mmap_data *fmd = FILE_ENG_DATA(f); - int ret; - - /* - * It fits within existing mapping, use it - */ - if (io_u->offset >= fmd->mmap_off && - io_u->offset + io_u->buflen < fmd->mmap_off + fmd->mmap_sz) - goto done; - - /* - * unmap any existing mapping - */ - if (fmd->mmap_ptr) { - if (munmap(fmd->mmap_ptr, fmd->mmap_sz) < 0) - return errno; - fmd->mmap_ptr = NULL; - } - - if (fio_mmapio_prep_full(td, io_u)) { - td_clear_error(td); - ret = fio_mmapio_prep_limited(td, io_u); - if (ret) - return ret; - } - -done: - io_u->mmap_data = fmd->mmap_ptr + io_u->offset - fmd->mmap_off - - f->file_offset; - return 0; -} - -static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct fio_file *f = io_u->file; - struct fio_mmap_data *fmd = FILE_ENG_DATA(f); - - fio_ro_check(td, io_u); - - if (io_u->ddir == DDIR_READ) - memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen); - else if (io_u->ddir == DDIR_WRITE) - memcpy(io_u->mmap_data, io_u->xfer_buf, io_u->xfer_buflen); - else if (ddir_sync(io_u->ddir)) { - if (msync(fmd->mmap_ptr, fmd->mmap_sz, MS_SYNC)) { - io_u->error = errno; - td_verror(td, io_u->error, "msync"); - } - } else if (io_u->ddir == DDIR_TRIM) { - int ret = do_io_u_trim(td, io_u); - - if (!ret) - td_verror(td, io_u->error, "trim"); - } - - - /* - * not really direct, but should drop the pages from the cache - */ - if (td->o.odirect && ddir_rw(io_u->ddir)) { - if (msync(io_u->mmap_data, io_u->xfer_buflen, MS_SYNC) < 0) { - io_u->error = errno; - td_verror(td, io_u->error, "msync"); - } - if (posix_madvise(io_u->mmap_data, io_u->xfer_buflen, POSIX_MADV_DONTNEED) < 0) { - io_u->error = errno; - td_verror(td, io_u->error, "madvise"); - } - } - - return FIO_Q_COMPLETED; -} - -static int fio_mmapio_init(struct thread_data *td) -{ - struct thread_options *o = &td->o; - - if ((o->rw_min_bs & page_mask) && - (o->odirect || o->fsync_blocks || o->fdatasync_blocks)) { - log_err("fio: mmap options dictate a minimum block size of " - "%llu bytes\n", (unsigned long long) page_size); - return 1; - } - - mmap_map_size = MMAP_TOTAL_SZ / o->nr_files; - return 0; -} - -static int fio_mmapio_open_file(struct thread_data *td, struct fio_file *f) -{ - struct fio_mmap_data *fmd; - int ret; - - ret = generic_open_file(td, f); - if (ret) - return ret; - - fmd = calloc(1, sizeof(*fmd)); - if (!fmd) { - int fio_unused __ret; - __ret = generic_close_file(td, f); - return 1; - } - - FILE_SET_ENG_DATA(f, fmd); - return 0; -} - -static int fio_mmapio_close_file(struct thread_data *td, struct fio_file *f) -{ - struct fio_mmap_data *fmd = FILE_ENG_DATA(f); - - FILE_SET_ENG_DATA(f, NULL); - free(fmd); - fio_file_clear_partial_mmap(f); - - return generic_close_file(td, f); -} - -static struct ioengine_ops ioengine = { - .name = "mmap", - .version = FIO_IOOPS_VERSION, - .init = fio_mmapio_init, - .prep = fio_mmapio_prep, - .queue = fio_mmapio_queue, - .open_file = fio_mmapio_open_file, - .close_file = fio_mmapio_close_file, - .get_file_size = generic_get_file_size, - .flags = FIO_SYNCIO | FIO_NOEXTEND, -}; - -static void fio_init fio_mmapio_register(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_mmapio_unregister(void) -{ - unregister_ioengine(&ioengine); -} diff --git a/engines/mtd.c b/engines/mtd.c deleted file mode 100644 index 3c22a1b1..00000000 --- a/engines/mtd.c +++ /dev/null @@ -1,209 +0,0 @@ -/* - * MTD engine - * - * IO engine that reads/writes from MTD character devices. - * - */ -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <sys/ioctl.h> -#include <mtd/mtd-user.h> - -#include "../fio.h" -#include "../verify.h" -#include "../oslib/libmtd.h" - -static libmtd_t desc; - -struct fio_mtd_data { - struct mtd_dev_info info; -}; - -static int fio_mtd_maybe_mark_bad(struct thread_data *td, - struct fio_mtd_data *fmd, - struct io_u *io_u, int eb) -{ - int ret; - if (errno == EIO) { - ret = mtd_mark_bad(&fmd->info, io_u->file->fd, eb); - if (ret != 0) { - io_u->error = errno; - td_verror(td, errno, "mtd_mark_bad"); - return -1; - } - } - return 0; -} - -static int fio_mtd_is_bad(struct thread_data *td, - struct fio_mtd_data *fmd, - struct io_u *io_u, int eb) -{ - int ret = mtd_is_bad(&fmd->info, io_u->file->fd, eb); - if (ret == -1) { - io_u->error = errno; - td_verror(td, errno, "mtd_is_bad"); - } else if (ret == 1) - io_u->error = EIO; /* Silent failure--don't flood stderr */ - return ret; -} - -static int fio_mtd_queue(struct thread_data *td, struct io_u *io_u) -{ - struct fio_file *f = io_u->file; - struct fio_mtd_data *fmd = FILE_ENG_DATA(f); - int local_offs = 0; - int ret; - - fio_ro_check(td, io_u); - - /* - * Errors tend to pertain to particular erase blocks, so divide up - * I/O to erase block size. - * If an error is encountered, log it and keep going onto the next - * block because the error probably just pertains to that block. - * TODO(dehrenberg): Divide up reads and writes into page-sized - * operations to get more fine-grained information about errors. - */ - while (local_offs < io_u->buflen) { - int eb = (io_u->offset + local_offs) / fmd->info.eb_size; - int eb_offs = (io_u->offset + local_offs) % fmd->info.eb_size; - /* The length is the smaller of the length remaining in the - * buffer and the distance to the end of the erase block */ - int len = min((int)io_u->buflen - local_offs, - (int)fmd->info.eb_size - eb_offs); - char *buf = ((char *)io_u->buf) + local_offs; - - if (td->o.skip_bad) { - ret = fio_mtd_is_bad(td, fmd, io_u, eb); - if (ret == -1) - break; - else if (ret == 1) - goto next; - } - if (io_u->ddir == DDIR_READ) { - ret = mtd_read(&fmd->info, f->fd, eb, eb_offs, buf, len); - if (ret != 0) { - io_u->error = errno; - td_verror(td, errno, "mtd_read"); - if (fio_mtd_maybe_mark_bad(td, fmd, io_u, eb)) - break; - } - } else if (io_u->ddir == DDIR_WRITE) { - ret = mtd_write(desc, &fmd->info, f->fd, eb, - eb_offs, buf, len, NULL, 0, 0); - if (ret != 0) { - io_u->error = errno; - td_verror(td, errno, "mtd_write"); - if (fio_mtd_maybe_mark_bad(td, fmd, io_u, eb)) - break; - } - } else if (io_u->ddir == DDIR_TRIM) { - if (eb_offs != 0 || len != fmd->info.eb_size) { - io_u->error = EINVAL; - td_verror(td, EINVAL, - "trim on MTD must be erase block-aligned"); - } - ret = mtd_erase(desc, &fmd->info, f->fd, eb); - if (ret != 0) { - io_u->error = errno; - td_verror(td, errno, "mtd_erase"); - if (fio_mtd_maybe_mark_bad(td, fmd, io_u, eb)) - break; - } - } else { - io_u->error = ENOTSUP; - td_verror(td, io_u->error, "operation not supported on mtd"); - } - -next: - local_offs += len; - } - - return FIO_Q_COMPLETED; -} - -static int fio_mtd_open_file(struct thread_data *td, struct fio_file *f) -{ - struct fio_mtd_data *fmd; - int ret; - - ret = generic_open_file(td, f); - if (ret) - return ret; - - fmd = calloc(1, sizeof(*fmd)); - if (!fmd) - goto err_close; - - ret = mtd_get_dev_info(desc, f->file_name, &fmd->info); - if (ret != 0) { - td_verror(td, errno, "mtd_get_dev_info"); - goto err_free; - } - - FILE_SET_ENG_DATA(f, fmd); - return 0; - -err_free: - free(fmd); -err_close: - { - int fio_unused __ret; - __ret = generic_close_file(td, f); - return 1; - } -} - -static int fio_mtd_close_file(struct thread_data *td, struct fio_file *f) -{ - struct fio_mtd_data *fmd = FILE_ENG_DATA(f); - - FILE_SET_ENG_DATA(f, NULL); - free(fmd); - - return generic_close_file(td, f); -} - -static int fio_mtd_get_file_size(struct thread_data *td, struct fio_file *f) -{ - struct mtd_dev_info info; - - int ret = mtd_get_dev_info(desc, f->file_name, &info); - if (ret != 0) { - td_verror(td, errno, "mtd_get_dev_info"); - return errno; - } - f->real_file_size = info.size; - - return 0; -} - -static struct ioengine_ops ioengine = { - .name = "mtd", - .version = FIO_IOOPS_VERSION, - .queue = fio_mtd_queue, - .open_file = fio_mtd_open_file, - .close_file = fio_mtd_close_file, - .get_file_size = fio_mtd_get_file_size, - .flags = FIO_SYNCIO | FIO_NOEXTEND, -}; - -static void fio_init fio_mtd_register(void) -{ - desc = libmtd_open(); - register_ioengine(&ioengine); -} - -static void fio_exit fio_mtd_unregister(void) -{ - unregister_ioengine(&ioengine); - libmtd_close(desc); - desc = NULL; -} - - - diff --git a/engines/net.c b/engines/net.c deleted file mode 100644 index 37d44fd8..00000000 --- a/engines/net.c +++ /dev/null @@ -1,1468 +0,0 @@ -/* - * net engine - * - * IO engine that reads/writes to/from sockets. - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <signal.h> -#include <errno.h> -#include <assert.h> -#include <netinet/in.h> -#include <netinet/tcp.h> -#include <arpa/inet.h> -#include <netdb.h> -#include <sys/poll.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/socket.h> -#include <sys/un.h> - -#include "../fio.h" -#include "../verify.h" -#include "../optgroup.h" - -struct netio_data { - int listenfd; - int use_splice; - int seq_off; - int pipes[2]; - struct sockaddr_in addr; - struct sockaddr_in6 addr6; - struct sockaddr_un addr_un; - uint64_t udp_send_seq; - uint64_t udp_recv_seq; -}; - -struct netio_options { - struct thread_data *td; - unsigned int port; - unsigned int proto; - unsigned int listen; - unsigned int pingpong; - unsigned int nodelay; - unsigned int ttl; - unsigned int window_size; - unsigned int mss; - char *intfc; -}; - -struct udp_close_msg { - uint32_t magic; - uint32_t cmd; -}; - -struct udp_seq { - uint64_t magic; - uint64_t seq; - uint64_t bs; -}; - -enum { - FIO_LINK_CLOSE = 0x89, - FIO_LINK_OPEN_CLOSE_MAGIC = 0x6c696e6b, - FIO_LINK_OPEN = 0x98, - FIO_UDP_SEQ_MAGIC = 0x657375716e556563ULL, - - FIO_TYPE_TCP = 1, - FIO_TYPE_UDP = 2, - FIO_TYPE_UNIX = 3, - FIO_TYPE_TCP_V6 = 4, - FIO_TYPE_UDP_V6 = 5, -}; - -static int str_hostname_cb(void *data, const char *input); -static struct fio_option options[] = { - { - .name = "hostname", - .lname = "net engine hostname", - .type = FIO_OPT_STR_STORE, - .cb = str_hostname_cb, - .help = "Hostname for net IO engine", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_NETIO, - }, - { - .name = "port", - .lname = "net engine port", - .type = FIO_OPT_INT, - .off1 = offsetof(struct netio_options, port), - .minval = 1, - .maxval = 65535, - .help = "Port to use for TCP or UDP net connections", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_NETIO, - }, - { - .name = "protocol", - .lname = "net engine protocol", - .alias = "proto", - .type = FIO_OPT_STR, - .off1 = offsetof(struct netio_options, proto), - .help = "Network protocol to use", - .def = "tcp", - .posval = { - { .ival = "tcp", - .oval = FIO_TYPE_TCP, - .help = "Transmission Control Protocol", - }, -#ifdef CONFIG_IPV6 - { .ival = "tcpv6", - .oval = FIO_TYPE_TCP_V6, - .help = "Transmission Control Protocol V6", - }, -#endif - { .ival = "udp", - .oval = FIO_TYPE_UDP, - .help = "User Datagram Protocol", - }, -#ifdef CONFIG_IPV6 - { .ival = "udpv6", - .oval = FIO_TYPE_UDP_V6, - .help = "User Datagram Protocol V6", - }, -#endif - { .ival = "unix", - .oval = FIO_TYPE_UNIX, - .help = "UNIX domain socket", - }, - }, - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_NETIO, - }, -#ifdef CONFIG_TCP_NODELAY - { - .name = "nodelay", - .lname = "No Delay", - .type = FIO_OPT_BOOL, - .off1 = offsetof(struct netio_options, nodelay), - .help = "Use TCP_NODELAY on TCP connections", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_NETIO, - }, -#endif - { - .name = "listen", - .lname = "net engine listen", - .type = FIO_OPT_STR_SET, - .off1 = offsetof(struct netio_options, listen), - .help = "Listen for incoming TCP connections", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_NETIO, - }, - { - .name = "pingpong", - .lname = "Ping Pong", - .type = FIO_OPT_STR_SET, - .off1 = offsetof(struct netio_options, pingpong), - .help = "Ping-pong IO requests", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_NETIO, - }, - { - .name = "interface", - .lname = "net engine interface", - .type = FIO_OPT_STR_STORE, - .off1 = offsetof(struct netio_options, intfc), - .help = "Network interface to use", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_NETIO, - }, - { - .name = "ttl", - .lname = "net engine multicast ttl", - .type = FIO_OPT_INT, - .off1 = offsetof(struct netio_options, ttl), - .def = "1", - .minval = 0, - .help = "Time-to-live value for outgoing UDP multicast packets", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_NETIO, - }, -#ifdef CONFIG_NET_WINDOWSIZE - { - .name = "window_size", - .lname = "Window Size", - .type = FIO_OPT_INT, - .off1 = offsetof(struct netio_options, window_size), - .minval = 0, - .help = "Set socket buffer window size", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_NETIO, - }, -#endif -#ifdef CONFIG_NET_MSS - { - .name = "mss", - .lname = "Maximum segment size", - .type = FIO_OPT_INT, - .off1 = offsetof(struct netio_options, mss), - .minval = 0, - .help = "Set TCP maximum segment size", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_NETIO, - }, -#endif - { - .name = NULL, - }, -}; - -static inline int is_udp(struct netio_options *o) -{ - return o->proto == FIO_TYPE_UDP || o->proto == FIO_TYPE_UDP_V6; -} - -static inline int is_tcp(struct netio_options *o) -{ - return o->proto == FIO_TYPE_TCP || o->proto == FIO_TYPE_TCP_V6; -} - -static inline int is_ipv6(struct netio_options *o) -{ - return o->proto == FIO_TYPE_UDP_V6 || o->proto == FIO_TYPE_TCP_V6; -} - -static int set_window_size(struct thread_data *td, int fd) -{ -#ifdef CONFIG_NET_WINDOWSIZE - struct netio_options *o = td->eo; - unsigned int wss; - int snd, rcv, ret; - - if (!o->window_size) - return 0; - - rcv = o->listen || o->pingpong; - snd = !o->listen || o->pingpong; - wss = o->window_size; - ret = 0; - - if (rcv) { - ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, (void *) &wss, - sizeof(wss)); - if (ret < 0) - td_verror(td, errno, "rcvbuf window size"); - } - if (snd && !ret) { - ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, (void *) &wss, - sizeof(wss)); - if (ret < 0) - td_verror(td, errno, "sndbuf window size"); - } - - return ret; -#else - td_verror(td, -EINVAL, "setsockopt window size"); - return -1; -#endif -} - -static int set_mss(struct thread_data *td, int fd) -{ -#ifdef CONFIG_NET_MSS - struct netio_options *o = td->eo; - unsigned int mss; - int ret; - - if (!o->mss || !is_tcp(o)) - return 0; - - mss = o->mss; - ret = setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, (void *) &mss, - sizeof(mss)); - if (ret < 0) - td_verror(td, errno, "setsockopt TCP_MAXSEG"); - - return ret; -#else - td_verror(td, -EINVAL, "setsockopt TCP_MAXSEG"); - return -1; -#endif -} - - -/* - * Return -1 for error and 'nr events' for a positive number - * of events - */ -static int poll_wait(struct thread_data *td, int fd, short events) -{ - struct pollfd pfd; - int ret; - - while (!td->terminate) { - pfd.fd = fd; - pfd.events = events; - ret = poll(&pfd, 1, -1); - if (ret < 0) { - if (errno == EINTR) - break; - - td_verror(td, errno, "poll"); - return -1; - } else if (!ret) - continue; - - break; - } - - if (pfd.revents & events) - return 1; - - return -1; -} - -static int fio_netio_is_multicast(const char *mcaddr) -{ - in_addr_t addr = inet_network(mcaddr); - if (addr == -1) - return 0; - - if (inet_network("224.0.0.0") <= addr && - inet_network("239.255.255.255") >= addr) - return 1; - - return 0; -} - - -static int fio_netio_prep(struct thread_data *td, struct io_u *io_u) -{ - struct netio_options *o = td->eo; - - /* - * Make sure we don't see spurious reads to a receiver, and vice versa - */ - if (is_tcp(o)) - return 0; - - if ((o->listen && io_u->ddir == DDIR_WRITE) || - (!o->listen && io_u->ddir == DDIR_READ)) { - td_verror(td, EINVAL, "bad direction"); - return 1; - } - - return 0; -} - -#ifdef CONFIG_LINUX_SPLICE -static int splice_io_u(int fdin, int fdout, unsigned int len) -{ - int bytes = 0; - - while (len) { - int ret = splice(fdin, NULL, fdout, NULL, len, 0); - - if (ret < 0) { - if (!bytes) - bytes = ret; - - break; - } else if (!ret) - break; - - bytes += ret; - len -= ret; - } - - return bytes; -} - -/* - * Receive bytes from a socket and fill them into the internal pipe - */ -static int splice_in(struct thread_data *td, struct io_u *io_u) -{ - struct netio_data *nd = td->io_ops_data; - - return splice_io_u(io_u->file->fd, nd->pipes[1], io_u->xfer_buflen); -} - -/* - * Transmit 'len' bytes from the internal pipe - */ -static int splice_out(struct thread_data *td, struct io_u *io_u, - unsigned int len) -{ - struct netio_data *nd = td->io_ops_data; - - return splice_io_u(nd->pipes[0], io_u->file->fd, len); -} - -static int vmsplice_io_u(struct io_u *io_u, int fd, unsigned int len) -{ - struct iovec iov = { - .iov_base = io_u->xfer_buf, - .iov_len = len, - }; - int bytes = 0; - - while (iov.iov_len) { - int ret = vmsplice(fd, &iov, 1, SPLICE_F_MOVE); - - if (ret < 0) { - if (!bytes) - bytes = ret; - break; - } else if (!ret) - break; - - iov.iov_len -= ret; - iov.iov_base += ret; - bytes += ret; - } - - return bytes; - -} - -/* - * vmsplice() pipe to io_u buffer - */ -static int vmsplice_io_u_out(struct thread_data *td, struct io_u *io_u, - unsigned int len) -{ - struct netio_data *nd = td->io_ops_data; - - return vmsplice_io_u(io_u, nd->pipes[0], len); -} - -/* - * vmsplice() io_u to pipe - */ -static int vmsplice_io_u_in(struct thread_data *td, struct io_u *io_u) -{ - struct netio_data *nd = td->io_ops_data; - - return vmsplice_io_u(io_u, nd->pipes[1], io_u->xfer_buflen); -} - -/* - * splice receive - transfer socket data into a pipe using splice, then map - * that pipe data into the io_u using vmsplice. - */ -static int fio_netio_splice_in(struct thread_data *td, struct io_u *io_u) -{ - int ret; - - ret = splice_in(td, io_u); - if (ret > 0) - return vmsplice_io_u_out(td, io_u, ret); - - return ret; -} - -/* - * splice transmit - map data from the io_u into a pipe by using vmsplice, - * then transfer that pipe to a socket using splice. - */ -static int fio_netio_splice_out(struct thread_data *td, struct io_u *io_u) -{ - int ret; - - ret = vmsplice_io_u_in(td, io_u); - if (ret > 0) - return splice_out(td, io_u, ret); - - return ret; -} -#else -static int fio_netio_splice_in(struct thread_data *td, struct io_u *io_u) -{ - errno = EOPNOTSUPP; - return -1; -} - -static int fio_netio_splice_out(struct thread_data *td, struct io_u *io_u) -{ - errno = EOPNOTSUPP; - return -1; -} -#endif - -static void store_udp_seq(struct netio_data *nd, struct io_u *io_u) -{ - struct udp_seq *us; - - if (io_u->xfer_buflen < sizeof(*us)) - return; - - us = io_u->xfer_buf + io_u->xfer_buflen - sizeof(*us); - us->magic = cpu_to_le64((uint64_t) FIO_UDP_SEQ_MAGIC); - us->bs = cpu_to_le64((uint64_t) io_u->xfer_buflen); - us->seq = cpu_to_le64(nd->udp_send_seq++); -} - -static void verify_udp_seq(struct thread_data *td, struct netio_data *nd, - struct io_u *io_u) -{ - struct udp_seq *us; - uint64_t seq; - - if (io_u->xfer_buflen < sizeof(*us)) - return; - - if (nd->seq_off) - return; - - us = io_u->xfer_buf + io_u->xfer_buflen - sizeof(*us); - if (le64_to_cpu(us->magic) != FIO_UDP_SEQ_MAGIC) - return; - if (le64_to_cpu(us->bs) != io_u->xfer_buflen) { - nd->seq_off = 1; - return; - } - - seq = le64_to_cpu(us->seq); - - if (seq != nd->udp_recv_seq) - td->ts.drop_io_u[io_u->ddir] += seq - nd->udp_recv_seq; - - nd->udp_recv_seq = seq + 1; -} - -static int fio_netio_send(struct thread_data *td, struct io_u *io_u) -{ - struct netio_data *nd = td->io_ops_data; - struct netio_options *o = td->eo; - int ret, flags = 0; - - do { - if (is_udp(o)) { - const struct sockaddr *to; - socklen_t len; - - if (is_ipv6(o)) { - to = (struct sockaddr *) &nd->addr6; - len = sizeof(nd->addr6); - } else { - to = (struct sockaddr *) &nd->addr; - len = sizeof(nd->addr); - } - - if (td->o.verify == VERIFY_NONE) - store_udp_seq(nd, io_u); - - ret = sendto(io_u->file->fd, io_u->xfer_buf, - io_u->xfer_buflen, flags, to, len); - } else { - /* - * if we are going to write more, set MSG_MORE - */ -#ifdef MSG_MORE - if ((td->this_io_bytes[DDIR_WRITE] + io_u->xfer_buflen < - td->o.size) && !o->pingpong) - flags |= MSG_MORE; -#endif - ret = send(io_u->file->fd, io_u->xfer_buf, - io_u->xfer_buflen, flags); - } - if (ret > 0) - break; - - ret = poll_wait(td, io_u->file->fd, POLLOUT); - if (ret <= 0) - break; - } while (1); - - return ret; -} - -static int is_close_msg(struct io_u *io_u, int len) -{ - struct udp_close_msg *msg; - - if (len != sizeof(struct udp_close_msg)) - return 0; - - msg = io_u->xfer_buf; - if (le32_to_cpu(msg->magic) != FIO_LINK_OPEN_CLOSE_MAGIC) - return 0; - if (le32_to_cpu(msg->cmd) != FIO_LINK_CLOSE) - return 0; - - return 1; -} - -static int fio_netio_recv(struct thread_data *td, struct io_u *io_u) -{ - struct netio_data *nd = td->io_ops_data; - struct netio_options *o = td->eo; - int ret, flags = 0; - - do { - if (is_udp(o)) { - struct sockaddr *from; - socklen_t l, *len = &l; - - if (o->listen) { - if (!is_ipv6(o)) { - from = (struct sockaddr *) &nd->addr; - *len = sizeof(nd->addr); - } else { - from = (struct sockaddr *) &nd->addr6; - *len = sizeof(nd->addr6); - } - } else { - from = NULL; - len = NULL; - } - - ret = recvfrom(io_u->file->fd, io_u->xfer_buf, - io_u->xfer_buflen, flags, from, len); - - if (is_close_msg(io_u, ret)) { - td->done = 1; - return 0; - } - } else { - ret = recv(io_u->file->fd, io_u->xfer_buf, - io_u->xfer_buflen, flags); - - if (is_close_msg(io_u, ret)) { - td->done = 1; - return 0; - } - } - if (ret > 0) - break; - else if (!ret && (flags & MSG_WAITALL)) - break; - - ret = poll_wait(td, io_u->file->fd, POLLIN); - if (ret <= 0) - break; - flags |= MSG_WAITALL; - } while (1); - - if (is_udp(o) && td->o.verify == VERIFY_NONE) - verify_udp_seq(td, nd, io_u); - - return ret; -} - -static int __fio_netio_queue(struct thread_data *td, struct io_u *io_u, - enum fio_ddir ddir) -{ - struct netio_data *nd = td->io_ops_data; - struct netio_options *o = td->eo; - int ret; - - if (ddir == DDIR_WRITE) { - if (!nd->use_splice || is_udp(o) || - o->proto == FIO_TYPE_UNIX) - ret = fio_netio_send(td, io_u); - else - ret = fio_netio_splice_out(td, io_u); - } else if (ddir == DDIR_READ) { - if (!nd->use_splice || is_udp(o) || - o->proto == FIO_TYPE_UNIX) - ret = fio_netio_recv(td, io_u); - else - ret = fio_netio_splice_in(td, io_u); - } else - ret = 0; /* must be a SYNC */ - - if (ret != (int) io_u->xfer_buflen) { - if (ret > 0) { - io_u->resid = io_u->xfer_buflen - ret; - io_u->error = 0; - return FIO_Q_COMPLETED; - } else if (!ret) - return FIO_Q_BUSY; - else { - int err = errno; - - if (ddir == DDIR_WRITE && err == EMSGSIZE) - return FIO_Q_BUSY; - - io_u->error = err; - } - } - - if (io_u->error) - td_verror(td, io_u->error, "xfer"); - - return FIO_Q_COMPLETED; -} - -static int fio_netio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct netio_options *o = td->eo; - int ret; - - fio_ro_check(td, io_u); - - ret = __fio_netio_queue(td, io_u, io_u->ddir); - if (!o->pingpong || ret != FIO_Q_COMPLETED) - return ret; - - /* - * For ping-pong mode, receive or send reply as needed - */ - if (td_read(td) && io_u->ddir == DDIR_READ) - ret = __fio_netio_queue(td, io_u, DDIR_WRITE); - else if (td_write(td) && io_u->ddir == DDIR_WRITE) - ret = __fio_netio_queue(td, io_u, DDIR_READ); - - return ret; -} - -static int fio_netio_connect(struct thread_data *td, struct fio_file *f) -{ - struct netio_data *nd = td->io_ops_data; - struct netio_options *o = td->eo; - int type, domain; - - if (o->proto == FIO_TYPE_TCP) { - domain = AF_INET; - type = SOCK_STREAM; - } else if (o->proto == FIO_TYPE_TCP_V6) { - domain = AF_INET6; - type = SOCK_STREAM; - } else if (o->proto == FIO_TYPE_UDP) { - domain = AF_INET; - type = SOCK_DGRAM; - } else if (o->proto == FIO_TYPE_UDP_V6) { - domain = AF_INET6; - type = SOCK_DGRAM; - } else if (o->proto == FIO_TYPE_UNIX) { - domain = AF_UNIX; - type = SOCK_STREAM; - } else { - log_err("fio: bad network type %d\n", o->proto); - f->fd = -1; - return 1; - } - - f->fd = socket(domain, type, 0); - if (f->fd < 0) { - td_verror(td, errno, "socket"); - return 1; - } - -#ifdef CONFIG_TCP_NODELAY - if (o->nodelay && is_tcp(o)) { - int optval = 1; - - if (setsockopt(f->fd, IPPROTO_TCP, TCP_NODELAY, (void *) &optval, sizeof(int)) < 0) { - log_err("fio: cannot set TCP_NODELAY option on socket (%s), disable with 'nodelay=0'\n", strerror(errno)); - return 1; - } - } -#endif - - if (set_window_size(td, f->fd)) { - close(f->fd); - return 1; - } - if (set_mss(td, f->fd)) { - close(f->fd); - return 1; - } - - if (is_udp(o)) { - if (!fio_netio_is_multicast(td->o.filename)) - return 0; - if (is_ipv6(o)) { - log_err("fio: multicast not supported on IPv6\n"); - close(f->fd); - return 1; - } - - if (o->intfc) { - struct in_addr interface_addr; - - if (inet_aton(o->intfc, &interface_addr) == 0) { - log_err("fio: interface not valid interface IP\n"); - close(f->fd); - return 1; - } - if (setsockopt(f->fd, IPPROTO_IP, IP_MULTICAST_IF, (const char*)&interface_addr, sizeof(interface_addr)) < 0) { - td_verror(td, errno, "setsockopt IP_MULTICAST_IF"); - close(f->fd); - return 1; - } - } - if (setsockopt(f->fd, IPPROTO_IP, IP_MULTICAST_TTL, (const char*)&o->ttl, sizeof(o->ttl)) < 0) { - td_verror(td, errno, "setsockopt IP_MULTICAST_TTL"); - close(f->fd); - return 1; - } - return 0; - } else if (o->proto == FIO_TYPE_TCP) { - socklen_t len = sizeof(nd->addr); - - if (connect(f->fd, (struct sockaddr *) &nd->addr, len) < 0) { - td_verror(td, errno, "connect"); - close(f->fd); - return 1; - } - } else if (o->proto == FIO_TYPE_TCP_V6) { - socklen_t len = sizeof(nd->addr6); - - if (connect(f->fd, (struct sockaddr *) &nd->addr6, len) < 0) { - td_verror(td, errno, "connect"); - close(f->fd); - return 1; - } - - } else { - struct sockaddr_un *addr = &nd->addr_un; - socklen_t len; - - len = sizeof(addr->sun_family) + strlen(addr->sun_path) + 1; - - if (connect(f->fd, (struct sockaddr *) addr, len) < 0) { - td_verror(td, errno, "connect"); - close(f->fd); - return 1; - } - } - - return 0; -} - -static int fio_netio_accept(struct thread_data *td, struct fio_file *f) -{ - struct netio_data *nd = td->io_ops_data; - struct netio_options *o = td->eo; - socklen_t socklen; - int state; - - if (is_udp(o)) { - f->fd = nd->listenfd; - return 0; - } - - state = td->runstate; - td_set_runstate(td, TD_SETTING_UP); - - log_info("fio: waiting for connection\n"); - - if (poll_wait(td, nd->listenfd, POLLIN) < 0) - goto err; - - if (o->proto == FIO_TYPE_TCP) { - socklen = sizeof(nd->addr); - f->fd = accept(nd->listenfd, (struct sockaddr *) &nd->addr, &socklen); - } else { - socklen = sizeof(nd->addr6); - f->fd = accept(nd->listenfd, (struct sockaddr *) &nd->addr6, &socklen); - } - - if (f->fd < 0) { - td_verror(td, errno, "accept"); - goto err; - } - -#ifdef CONFIG_TCP_NODELAY - if (o->nodelay && is_tcp(o)) { - int optval = 1; - - if (setsockopt(f->fd, IPPROTO_TCP, TCP_NODELAY, (void *) &optval, sizeof(int)) < 0) { - log_err("fio: cannot set TCP_NODELAY option on socket (%s), disable with 'nodelay=0'\n", strerror(errno)); - return 1; - } - } -#endif - - reset_all_stats(td); - td_set_runstate(td, state); - return 0; -err: - td_set_runstate(td, state); - return 1; -} - -static void fio_netio_send_close(struct thread_data *td, struct fio_file *f) -{ - struct netio_data *nd = td->io_ops_data; - struct netio_options *o = td->eo; - struct udp_close_msg msg; - struct sockaddr *to; - socklen_t len; - int ret; - - if (is_ipv6(o)) { - to = (struct sockaddr *) &nd->addr6; - len = sizeof(nd->addr6); - } else { - to = (struct sockaddr *) &nd->addr; - len = sizeof(nd->addr); - } - - msg.magic = cpu_to_le32((uint32_t) FIO_LINK_OPEN_CLOSE_MAGIC); - msg.cmd = cpu_to_le32((uint32_t) FIO_LINK_CLOSE); - - ret = sendto(f->fd, (void *) &msg, sizeof(msg), MSG_WAITALL, to, len); - if (ret < 0) - td_verror(td, errno, "sendto udp link close"); -} - -static int fio_netio_close_file(struct thread_data *td, struct fio_file *f) -{ - /* - * Notify the receiver that we are closing down the link - */ - fio_netio_send_close(td, f); - - return generic_close_file(td, f); -} - -static int fio_netio_udp_recv_open(struct thread_data *td, struct fio_file *f) -{ - struct netio_data *nd = td->io_ops_data; - struct netio_options *o = td->eo; - struct udp_close_msg msg; - struct sockaddr *to; - socklen_t len; - int ret; - - if (is_ipv6(o)) { - len = sizeof(nd->addr6); - to = (struct sockaddr *) &nd->addr6; - } else { - len = sizeof(nd->addr); - to = (struct sockaddr *) &nd->addr; - } - - ret = recvfrom(f->fd, (void *) &msg, sizeof(msg), MSG_WAITALL, to, &len); - if (ret < 0) { - td_verror(td, errno, "recvfrom udp link open"); - return ret; - } - - if (ntohl(msg.magic) != FIO_LINK_OPEN_CLOSE_MAGIC || - ntohl(msg.cmd) != FIO_LINK_OPEN) { - log_err("fio: bad udp open magic %x/%x\n", ntohl(msg.magic), - ntohl(msg.cmd)); - return -1; - } - - fio_gettime(&td->start, NULL); - return 0; -} - -static int fio_netio_send_open(struct thread_data *td, struct fio_file *f) -{ - struct netio_data *nd = td->io_ops_data; - struct netio_options *o = td->eo; - struct udp_close_msg msg; - struct sockaddr *to; - socklen_t len; - int ret; - - if (is_ipv6(o)) { - len = sizeof(nd->addr6); - to = (struct sockaddr *) &nd->addr6; - } else { - len = sizeof(nd->addr); - to = (struct sockaddr *) &nd->addr; - } - - msg.magic = htonl(FIO_LINK_OPEN_CLOSE_MAGIC); - msg.cmd = htonl(FIO_LINK_OPEN); - - ret = sendto(f->fd, (void *) &msg, sizeof(msg), MSG_WAITALL, to, len); - if (ret < 0) { - td_verror(td, errno, "sendto udp link open"); - return ret; - } - - return 0; -} - -static int fio_netio_open_file(struct thread_data *td, struct fio_file *f) -{ - int ret; - struct netio_options *o = td->eo; - - if (o->listen) - ret = fio_netio_accept(td, f); - else - ret = fio_netio_connect(td, f); - - if (ret) { - f->fd = -1; - return ret; - } - - if (is_udp(o)) { - if (td_write(td)) - ret = fio_netio_send_open(td, f); - else { - int state; - - state = td->runstate; - td_set_runstate(td, TD_SETTING_UP); - ret = fio_netio_udp_recv_open(td, f); - td_set_runstate(td, state); - } - } - - if (ret) - fio_netio_close_file(td, f); - - return ret; -} - -static int fio_fill_addr(struct thread_data *td, const char *host, int af, - void *dst, struct addrinfo **res) -{ - struct netio_options *o = td->eo; - struct addrinfo hints; - int ret; - - if (inet_pton(af, host, dst)) - return 0; - - memset(&hints, 0, sizeof(hints)); - - if (is_tcp(o)) - hints.ai_socktype = SOCK_STREAM; - else - hints.ai_socktype = SOCK_DGRAM; - - if (is_ipv6(o)) - hints.ai_family = AF_INET6; - else - hints.ai_family = AF_INET; - - ret = getaddrinfo(host, NULL, &hints, res); - if (ret) { - int e = EINVAL; - char str[128]; - - if (ret == EAI_SYSTEM) - e = errno; - - snprintf(str, sizeof(str), "getaddrinfo: %s", gai_strerror(ret)); - td_verror(td, e, str); - return 1; - } - - return 0; -} - -static int fio_netio_setup_connect_inet(struct thread_data *td, - const char *host, unsigned short port) -{ - struct netio_data *nd = td->io_ops_data; - struct netio_options *o = td->eo; - struct addrinfo *res = NULL; - void *dst, *src; - int af, len; - - if (!host) { - log_err("fio: connect with no host to connect to.\n"); - if (td_read(td)) - log_err("fio: did you forget to set 'listen'?\n"); - - td_verror(td, EINVAL, "no hostname= set"); - return 1; - } - - nd->addr.sin_family = AF_INET; - nd->addr.sin_port = htons(port); - nd->addr6.sin6_family = AF_INET6; - nd->addr6.sin6_port = htons(port); - - if (is_ipv6(o)) { - af = AF_INET6; - dst = &nd->addr6.sin6_addr; - } else { - af = AF_INET; - dst = &nd->addr.sin_addr; - } - - if (fio_fill_addr(td, host, af, dst, &res)) - return 1; - - if (!res) - return 0; - - if (is_ipv6(o)) { - len = sizeof(nd->addr6.sin6_addr); - src = &((struct sockaddr_in6 *) res->ai_addr)->sin6_addr; - } else { - len = sizeof(nd->addr.sin_addr); - src = &((struct sockaddr_in *) res->ai_addr)->sin_addr; - } - - memcpy(dst, src, len); - freeaddrinfo(res); - return 0; -} - -static int fio_netio_setup_connect_unix(struct thread_data *td, - const char *path) -{ - struct netio_data *nd = td->io_ops_data; - struct sockaddr_un *soun = &nd->addr_un; - - soun->sun_family = AF_UNIX; - memset(soun->sun_path, 0, sizeof(soun->sun_path)); - strncpy(soun->sun_path, path, sizeof(soun->sun_path) - 1); - return 0; -} - -static int fio_netio_setup_connect(struct thread_data *td) -{ - struct netio_options *o = td->eo; - - if (is_udp(o) || is_tcp(o)) - return fio_netio_setup_connect_inet(td, td->o.filename,o->port); - else - return fio_netio_setup_connect_unix(td, td->o.filename); -} - -static int fio_netio_setup_listen_unix(struct thread_data *td, const char *path) -{ - struct netio_data *nd = td->io_ops_data; - struct sockaddr_un *addr = &nd->addr_un; - mode_t mode; - int len, fd; - - fd = socket(AF_UNIX, SOCK_STREAM, 0); - if (fd < 0) { - log_err("fio: socket: %s\n", strerror(errno)); - return -1; - } - - mode = umask(000); - - memset(addr, 0, sizeof(*addr)); - addr->sun_family = AF_UNIX; - strncpy(addr->sun_path, path, sizeof(addr->sun_path) - 1); - unlink(path); - - len = sizeof(addr->sun_family) + strlen(path) + 1; - - if (bind(fd, (struct sockaddr *) addr, len) < 0) { - log_err("fio: bind: %s\n", strerror(errno)); - close(fd); - return -1; - } - - umask(mode); - nd->listenfd = fd; - return 0; -} - -static int fio_netio_setup_listen_inet(struct thread_data *td, short port) -{ - struct netio_data *nd = td->io_ops_data; - struct netio_options *o = td->eo; - struct ip_mreq mr; - struct sockaddr_in sin; - struct sockaddr *saddr; - int fd, opt, type, domain; - socklen_t len; - - memset(&sin, 0, sizeof(sin)); - - if (o->proto == FIO_TYPE_TCP) { - type = SOCK_STREAM; - domain = AF_INET; - } else if (o->proto == FIO_TYPE_TCP_V6) { - type = SOCK_STREAM; - domain = AF_INET6; - } else if (o->proto == FIO_TYPE_UDP) { - type = SOCK_DGRAM; - domain = AF_INET; - } else if (o->proto == FIO_TYPE_UDP_V6) { - type = SOCK_DGRAM; - domain = AF_INET6; - } else { - log_err("fio: unknown proto %d\n", o->proto); - return 1; - } - - fd = socket(domain, type, 0); - if (fd < 0) { - td_verror(td, errno, "socket"); - return 1; - } - - opt = 1; - if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (void *) &opt, sizeof(opt)) < 0) { - td_verror(td, errno, "setsockopt"); - close(fd); - return 1; - } -#ifdef SO_REUSEPORT - if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, (void *) &opt, sizeof(opt)) < 0) { - td_verror(td, errno, "setsockopt"); - close(fd); - return 1; - } -#endif - - if (set_window_size(td, fd)) { - close(fd); - return 1; - } - if (set_mss(td, fd)) { - close(fd); - return 1; - } - - if (td->o.filename) { - if (!is_udp(o) || !fio_netio_is_multicast(td->o.filename)) { - log_err("fio: hostname not valid for non-multicast inbound network IO\n"); - close(fd); - return 1; - } - if (is_ipv6(o)) { - log_err("fio: IPv6 not supported for multicast network IO\n"); - close(fd); - return 1; - } - - inet_aton(td->o.filename, &sin.sin_addr); - - mr.imr_multiaddr = sin.sin_addr; - if (o->intfc) { - if (inet_aton(o->intfc, &mr.imr_interface) == 0) { - log_err("fio: interface not valid interface IP\n"); - close(fd); - return 1; - } - } else { - mr.imr_interface.s_addr = htonl(INADDR_ANY); - } - - if (setsockopt(fd, IPPROTO_IP, IP_ADD_MEMBERSHIP, (const char*)&mr, sizeof(mr)) < 0) { - td_verror(td, errno, "setsockopt IP_ADD_MEMBERSHIP"); - close(fd); - return 1; - } - } - - if (!is_ipv6(o)) { - saddr = (struct sockaddr *) &nd->addr; - len = sizeof(nd->addr); - - nd->addr.sin_family = AF_INET; - nd->addr.sin_addr.s_addr = sin.sin_addr.s_addr ? sin.sin_addr.s_addr : htonl(INADDR_ANY); - nd->addr.sin_port = htons(port); - } else { - saddr = (struct sockaddr *) &nd->addr6; - len = sizeof(nd->addr6); - - nd->addr6.sin6_family = AF_INET6; - nd->addr6.sin6_addr = in6addr_any; - nd->addr6.sin6_port = htons(port); - } - - if (bind(fd, saddr, len) < 0) { - close(fd); - td_verror(td, errno, "bind"); - return 1; - } - - nd->listenfd = fd; - return 0; -} - -static int fio_netio_setup_listen(struct thread_data *td) -{ - struct netio_data *nd = td->io_ops_data; - struct netio_options *o = td->eo; - int ret; - - if (is_udp(o) || is_tcp(o)) - ret = fio_netio_setup_listen_inet(td, o->port); - else - ret = fio_netio_setup_listen_unix(td, td->o.filename); - - if (ret) - return ret; - if (is_udp(o)) - return 0; - - if (listen(nd->listenfd, 10) < 0) { - td_verror(td, errno, "listen"); - nd->listenfd = -1; - return 1; - } - - return 0; -} - -static int fio_netio_init(struct thread_data *td) -{ - struct netio_options *o = td->eo; - int ret; - -#ifdef WIN32 - WSADATA wsd; - WSAStartup(MAKEWORD(2,2), &wsd); -#endif - - if (td_random(td)) { - log_err("fio: network IO can't be random\n"); - return 1; - } - - if (o->proto == FIO_TYPE_UNIX && o->port) { - log_err("fio: network IO port not valid with unix socket\n"); - return 1; - } else if (o->proto != FIO_TYPE_UNIX && !o->port) { - log_err("fio: network IO requires port for tcp or udp\n"); - return 1; - } - - o->port += td->subjob_number; - - if (!is_tcp(o)) { - if (o->listen) { - log_err("fio: listen only valid for TCP proto IO\n"); - return 1; - } - if (td_rw(td)) { - log_err("fio: datagram network connections must be" - " read OR write\n"); - return 1; - } - if (o->proto == FIO_TYPE_UNIX && !td->o.filename) { - log_err("fio: UNIX sockets need host/filename\n"); - return 1; - } - o->listen = td_read(td); - } - - if (o->listen) - ret = fio_netio_setup_listen(td); - else - ret = fio_netio_setup_connect(td); - - return ret; -} - -static void fio_netio_cleanup(struct thread_data *td) -{ - struct netio_data *nd = td->io_ops_data; - - if (nd) { - if (nd->listenfd != -1) - close(nd->listenfd); - if (nd->pipes[0] != -1) - close(nd->pipes[0]); - if (nd->pipes[1] != -1) - close(nd->pipes[1]); - - free(nd); - } -} - -static int fio_netio_setup(struct thread_data *td) -{ - struct netio_data *nd; - - if (!td->files_index) { - add_file(td, td->o.filename ?: "net", 0, 0); - td->o.nr_files = td->o.nr_files ?: 1; - td->o.open_files++; - } - - if (!td->io_ops_data) { - nd = malloc(sizeof(*nd)); - - memset(nd, 0, sizeof(*nd)); - nd->listenfd = -1; - nd->pipes[0] = nd->pipes[1] = -1; - td->io_ops_data = nd; - } - - return 0; -} - -static void fio_netio_terminate(struct thread_data *td) -{ - kill(td->pid, SIGTERM); -} - -#ifdef CONFIG_LINUX_SPLICE -static int fio_netio_setup_splice(struct thread_data *td) -{ - struct netio_data *nd; - - fio_netio_setup(td); - - nd = td->io_ops_data; - if (nd) { - if (pipe(nd->pipes) < 0) - return 1; - - nd->use_splice = 1; - return 0; - } - - return 1; -} - -static struct ioengine_ops ioengine_splice = { - .name = "netsplice", - .version = FIO_IOOPS_VERSION, - .prep = fio_netio_prep, - .queue = fio_netio_queue, - .setup = fio_netio_setup_splice, - .init = fio_netio_init, - .cleanup = fio_netio_cleanup, - .open_file = fio_netio_open_file, - .close_file = fio_netio_close_file, - .terminate = fio_netio_terminate, - .options = options, - .option_struct_size = sizeof(struct netio_options), - .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_UNIDIR | - FIO_PIPEIO, -}; -#endif - -static struct ioengine_ops ioengine_rw = { - .name = "net", - .version = FIO_IOOPS_VERSION, - .prep = fio_netio_prep, - .queue = fio_netio_queue, - .setup = fio_netio_setup, - .init = fio_netio_init, - .cleanup = fio_netio_cleanup, - .open_file = fio_netio_open_file, - .close_file = fio_netio_close_file, - .terminate = fio_netio_terminate, - .options = options, - .option_struct_size = sizeof(struct netio_options), - .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_UNIDIR | - FIO_PIPEIO | FIO_BIT_BASED, -}; - -static int str_hostname_cb(void *data, const char *input) -{ - struct netio_options *o = data; - - if (o->td->o.filename) - free(o->td->o.filename); - o->td->o.filename = strdup(input); - return 0; -} - -static void fio_init fio_netio_register(void) -{ - register_ioengine(&ioengine_rw); -#ifdef CONFIG_LINUX_SPLICE - register_ioengine(&ioengine_splice); -#endif -} - -static void fio_exit fio_netio_unregister(void) -{ - unregister_ioengine(&ioengine_rw); -#ifdef CONFIG_LINUX_SPLICE - unregister_ioengine(&ioengine_splice); -#endif -} diff --git a/engines/null.c b/engines/null.c deleted file mode 100644 index 812cadfe..00000000 --- a/engines/null.c +++ /dev/null @@ -1,157 +0,0 @@ -/* - * null engine - * - * IO engine that doesn't do any real IO transfers, it just pretends to. - * The main purpose is to test fio itself. - * - * It also can act as external C++ engine - compiled with: - * - * g++ -O2 -g -shared -rdynamic -fPIC -o null.so null.c -DFIO_EXTERNAL_ENGINE - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <assert.h> - -#include "../fio.h" - -struct null_data { - struct io_u **io_us; - int queued; - int events; -}; - -static struct io_u *fio_null_event(struct thread_data *td, int event) -{ - struct null_data *nd = (struct null_data *) td->io_ops_data; - - return nd->io_us[event]; -} - -static int fio_null_getevents(struct thread_data *td, unsigned int min_events, - unsigned int fio_unused max, - const struct timespec fio_unused *t) -{ - struct null_data *nd = (struct null_data *) td->io_ops_data; - int ret = 0; - - if (min_events) { - ret = nd->events; - nd->events = 0; - } - - return ret; -} - -static int fio_null_commit(struct thread_data *td) -{ - struct null_data *nd = (struct null_data *) td->io_ops_data; - - if (!nd->events) { -#ifndef FIO_EXTERNAL_ENGINE - io_u_mark_submit(td, nd->queued); -#endif - nd->events = nd->queued; - nd->queued = 0; - } - - return 0; -} - -static int fio_null_queue(struct thread_data *td, struct io_u *io_u) -{ - struct null_data *nd = (struct null_data *) td->io_ops_data; - - fio_ro_check(td, io_u); - - if (td->io_ops->flags & FIO_SYNCIO) - return FIO_Q_COMPLETED; - if (nd->events) - return FIO_Q_BUSY; - - nd->io_us[nd->queued++] = io_u; - return FIO_Q_QUEUED; -} - -static int fio_null_open(struct thread_data fio_unused *td, - struct fio_file fio_unused *f) -{ - return 0; -} - -static void fio_null_cleanup(struct thread_data *td) -{ - struct null_data *nd = (struct null_data *) td->io_ops_data; - - if (nd) { - free(nd->io_us); - free(nd); - } -} - -static int fio_null_init(struct thread_data *td) -{ - struct null_data *nd = (struct null_data *) malloc(sizeof(*nd)); - - memset(nd, 0, sizeof(*nd)); - - if (td->o.iodepth != 1) { - nd->io_us = (struct io_u **) malloc(td->o.iodepth * sizeof(struct io_u *)); - memset(nd->io_us, 0, td->o.iodepth * sizeof(struct io_u *)); - } else - td->io_ops->flags |= FIO_SYNCIO; - - td->io_ops_data = nd; - return 0; -} - -#ifndef __cplusplus -static struct ioengine_ops ioengine = { - .name = "null", - .version = FIO_IOOPS_VERSION, - .queue = fio_null_queue, - .commit = fio_null_commit, - .getevents = fio_null_getevents, - .event = fio_null_event, - .init = fio_null_init, - .cleanup = fio_null_cleanup, - .open_file = fio_null_open, - .flags = FIO_DISKLESSIO | FIO_FAKEIO, -}; - -static void fio_init fio_null_register(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_null_unregister(void) -{ - unregister_ioengine(&ioengine); -} - -#else - -#ifdef FIO_EXTERNAL_ENGINE -extern "C" { -static struct ioengine_ops ioengine; -void get_ioengine(struct ioengine_ops **ioengine_ptr) -{ - *ioengine_ptr = &ioengine; - - ioengine.name = "cpp_null"; - ioengine.version = FIO_IOOPS_VERSION; - ioengine.queue = fio_null_queue; - ioengine.commit = fio_null_commit; - ioengine.getevents = fio_null_getevents; - ioengine.event = fio_null_event; - ioengine.init = fio_null_init; - ioengine.cleanup = fio_null_cleanup; - ioengine.open_file = fio_null_open; - ioengine.flags = FIO_DISKLESSIO | FIO_FAKEIO; -} -} -#endif /* FIO_EXTERNAL_ENGINE */ - -#endif /* __cplusplus */ diff --git a/engines/pmemblk.c b/engines/pmemblk.c deleted file mode 100644 index 52af9eda..00000000 --- a/engines/pmemblk.c +++ /dev/null @@ -1,445 +0,0 @@ -/* - * pmemblk: IO engine that uses NVML libpmemblk to read and write data - * - * Copyright (C) 2016 Hewlett Packard Enterprise Development LP - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License, - * version 2 as published by the Free Software Foundation.. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the Free - * Software Foundation, Inc., 59 Temple Place, Suite 330, - * Boston, MA 02111-1307 USA - */ - -/* - * pmemblk engine - * - * IO engine that uses libpmemblk to read and write data - * - * To use: - * ioengine=pmemblk - * - * Other relevant settings: - * thread=1 REQUIRED - * iodepth=1 - * direct=1 - * unlink=1 - * filename=/mnt/pmem0/fiotestfile,BSIZE,FSIZEMiB - * - * thread must be set to 1 for pmemblk as multiple processes cannot - * open the same block pool file. - * - * iodepth should be set to 1 as pmemblk is always synchronous. - * Use numjobs to scale up. - * - * direct=1 is implied as pmemblk is always direct. A warning message - * is printed if this is not specified. - * - * unlink=1 removes the block pool file after testing, and is optional. - * - * The pmem device must have a DAX-capable filesystem and be mounted - * with DAX enabled. filename must point to a file on that filesystem. - * - * Example: - * mkfs.xfs /dev/pmem0 - * mkdir /mnt/pmem0 - * mount -o dax /dev/pmem0 /mnt/pmem0 - * - * When specifying the filename, if the block pool file does not already - * exist, then the pmemblk engine creates the pool file if you specify - * the block and file sizes. BSIZE is the block size in bytes. - * FSIZEMB is the pool file size in MiB. - * - * See examples/pmemblk.fio for more. - * - */ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <sys/uio.h> -#include <errno.h> -#include <assert.h> -#include <string.h> -#include <libpmem.h> -#include <libpmemblk.h> - -#include "../fio.h" - -/* - * libpmemblk - */ -typedef struct fio_pmemblk_file *fio_pmemblk_file_t; - -struct fio_pmemblk_file { - fio_pmemblk_file_t pmb_next; - char *pmb_filename; - uint64_t pmb_refcnt; - PMEMblkpool *pmb_pool; - size_t pmb_bsize; - size_t pmb_nblocks; -}; - -static fio_pmemblk_file_t Cache; - -static pthread_mutex_t CacheLock = PTHREAD_MUTEX_INITIALIZER; - -#define PMB_CREATE (0x0001) /* should create file */ - -fio_pmemblk_file_t fio_pmemblk_cache_lookup(const char *filename) -{ - fio_pmemblk_file_t i; - - for (i = Cache; i != NULL; i = i->pmb_next) - if (!strcmp(filename, i->pmb_filename)) - return i; - - return NULL; -} - -static void fio_pmemblk_cache_insert(fio_pmemblk_file_t pmb) -{ - pmb->pmb_next = Cache; - Cache = pmb; -} - -static void fio_pmemblk_cache_remove(fio_pmemblk_file_t pmb) -{ - fio_pmemblk_file_t i; - - if (pmb == Cache) { - Cache = Cache->pmb_next; - pmb->pmb_next = NULL; - return; - } - - for (i = Cache; i != NULL; i = i->pmb_next) - if (pmb == i->pmb_next) { - i->pmb_next = i->pmb_next->pmb_next; - pmb->pmb_next = NULL; - return; - } -} - -/* - * to control block size and gross file size at the libpmemblk - * level, we allow the block size and file size to be appended - * to the file name: - * - * path[,bsize,fsizemib] - * - * note that we do not use the fio option "filesize" to dictate - * the file size because we can only give libpmemblk the gross - * file size, which is different from the net or usable file - * size (which is probably what fio wants). - * - * the final path without the parameters is returned in ppath. - * the block size and file size are returned in pbsize and fsize. - * - * note that the user specifies the file size in MiB, but - * we return bytes from here. - */ -static void pmb_parse_path(const char *pathspec, char **ppath, uint64_t *pbsize, - uint64_t *pfsize) -{ - char *path; - char *s; - uint64_t bsize; - uint64_t fsizemib; - - path = strdup(pathspec); - if (!path) { - *ppath = NULL; - return; - } - - /* extract sizes, if given */ - s = strrchr(path, ','); - if (s && (fsizemib = strtoull(s + 1, NULL, 10))) { - *s = 0; - s = strrchr(path, ','); - if (s && (bsize = strtoull(s + 1, NULL, 10))) { - *s = 0; - *ppath = path; - *pbsize = bsize; - *pfsize = fsizemib << 20; - return; - } - } - - /* size specs not found */ - strcpy(path, pathspec); - *ppath = path; - *pbsize = 0; - *pfsize = 0; -} - -static fio_pmemblk_file_t pmb_open(const char *pathspec, int flags) -{ - fio_pmemblk_file_t pmb; - char *path = NULL; - uint64_t bsize = 0; - uint64_t fsize = 0; - - pmb_parse_path(pathspec, &path, &bsize, &fsize); - if (!path) - return NULL; - - pthread_mutex_lock(&CacheLock); - - pmb = fio_pmemblk_cache_lookup(path); - if (!pmb) { - pmb = malloc(sizeof(*pmb)); - if (!pmb) - goto error; - - /* try opening existing first, create it if needed */ - pmb->pmb_pool = pmemblk_open(path, bsize); - if (!pmb->pmb_pool && (errno == ENOENT) && - (flags & PMB_CREATE) && (0 < fsize) && (0 < bsize)) { - pmb->pmb_pool = - pmemblk_create(path, bsize, fsize, 0644); - } - if (!pmb->pmb_pool) { - log_err("pmemblk: unable to open pmemblk pool file %s (%s)\n", - path, strerror(errno)); - goto error; - } - - pmb->pmb_filename = path; - pmb->pmb_next = NULL; - pmb->pmb_refcnt = 0; - pmb->pmb_bsize = pmemblk_bsize(pmb->pmb_pool); - pmb->pmb_nblocks = pmemblk_nblock(pmb->pmb_pool); - - fio_pmemblk_cache_insert(pmb); - } - - pmb->pmb_refcnt += 1; - - pthread_mutex_unlock(&CacheLock); - - return pmb; - -error: - if (pmb) { - if (pmb->pmb_pool) - pmemblk_close(pmb->pmb_pool); - pmb->pmb_pool = NULL; - pmb->pmb_filename = NULL; - free(pmb); - } - if (path) - free(path); - - pthread_mutex_unlock(&CacheLock); - return NULL; -} - -static void pmb_close(fio_pmemblk_file_t pmb, const bool keep) -{ - pthread_mutex_lock(&CacheLock); - - pmb->pmb_refcnt--; - - if (!keep && !pmb->pmb_refcnt) { - pmemblk_close(pmb->pmb_pool); - pmb->pmb_pool = NULL; - free(pmb->pmb_filename); - pmb->pmb_filename = NULL; - fio_pmemblk_cache_remove(pmb); - free(pmb); - } - - pthread_mutex_unlock(&CacheLock); -} - -static int pmb_get_flags(struct thread_data *td, uint64_t *pflags) -{ - static int thread_warned = 0; - static int odirect_warned = 0; - - uint64_t flags = 0; - - if (!td->o.use_thread) { - if (!thread_warned) { - thread_warned = 1; - log_err("pmemblk: must set thread=1 for pmemblk engine\n"); - } - return 1; - } - - if (!td->o.odirect && !odirect_warned) { - odirect_warned = 1; - log_info("pmemblk: direct == 0, but pmemblk is always direct\n"); - } - - if (td->o.allow_create) - flags |= PMB_CREATE; - - (*pflags) = flags; - return 0; -} - -static int fio_pmemblk_open_file(struct thread_data *td, struct fio_file *f) -{ - uint64_t flags = 0; - fio_pmemblk_file_t pmb; - - if (pmb_get_flags(td, &flags)) - return 1; - - pmb = pmb_open(f->file_name, flags); - if (!pmb) - return 1; - - FILE_SET_ENG_DATA(f, pmb); - return 0; -} - -static int fio_pmemblk_close_file(struct thread_data fio_unused *td, - struct fio_file *f) -{ - fio_pmemblk_file_t pmb = FILE_ENG_DATA(f); - - if (pmb) - pmb_close(pmb, false); - - FILE_SET_ENG_DATA(f, NULL); - return 0; -} - -static int fio_pmemblk_get_file_size(struct thread_data *td, struct fio_file *f) -{ - uint64_t flags = 0; - fio_pmemblk_file_t pmb = FILE_ENG_DATA(f); - - if (fio_file_size_known(f)) - return 0; - - if (!pmb) { - if (pmb_get_flags(td, &flags)) - return 1; - pmb = pmb_open(f->file_name, flags); - if (!pmb) - return 1; - } - - f->real_file_size = pmb->pmb_bsize * pmb->pmb_nblocks; - - fio_file_set_size_known(f); - - if (!FILE_ENG_DATA(f)) - pmb_close(pmb, true); - - return 0; -} - -static int fio_pmemblk_queue(struct thread_data *td, struct io_u *io_u) -{ - struct fio_file *f = io_u->file; - fio_pmemblk_file_t pmb = FILE_ENG_DATA(f); - - unsigned long long off; - unsigned long len; - void *buf; - - fio_ro_check(td, io_u); - - switch (io_u->ddir) { - case DDIR_READ: - case DDIR_WRITE: - off = io_u->offset; - len = io_u->xfer_buflen; - - io_u->error = EINVAL; - if (off % pmb->pmb_bsize) - break; - if (len % pmb->pmb_bsize) - break; - if ((off + len) / pmb->pmb_bsize > pmb->pmb_nblocks) - break; - - io_u->error = 0; - buf = io_u->xfer_buf; - off /= pmb->pmb_bsize; - len /= pmb->pmb_bsize; - while (0 < len) { - if (io_u->ddir == DDIR_READ && - 0 != pmemblk_read(pmb->pmb_pool, buf, off)) { - io_u->error = errno; - break; - } else if (0 != pmemblk_write(pmb->pmb_pool, buf, off)) { - io_u->error = errno; - break; - } - buf += pmb->pmb_bsize; - off++; - len--; - } - off *= pmb->pmb_bsize; - len *= pmb->pmb_bsize; - io_u->resid = io_u->xfer_buflen - (off - io_u->offset); - break; - case DDIR_SYNC: - case DDIR_DATASYNC: - case DDIR_SYNC_FILE_RANGE: - /* we're always sync'd */ - io_u->error = 0; - break; - default: - io_u->error = EINVAL; - break; - } - - return FIO_Q_COMPLETED; -} - -static int fio_pmemblk_unlink_file(struct thread_data *td, struct fio_file *f) -{ - char *path = NULL; - uint64_t bsize = 0; - uint64_t fsize = 0; - - /* - * we need our own unlink in case the user has specified - * the block and file sizes in the path name. we parse - * the file_name to determine the file name we actually used. - */ - - pmb_parse_path(f->file_name, &path, &bsize, &fsize); - if (!path) - return ENOENT; - - unlink(path); - free(path); - return 0; -} - -static struct ioengine_ops ioengine = { - .name = "pmemblk", - .version = FIO_IOOPS_VERSION, - .queue = fio_pmemblk_queue, - .open_file = fio_pmemblk_open_file, - .close_file = fio_pmemblk_close_file, - .get_file_size = fio_pmemblk_get_file_size, - .unlink_file = fio_pmemblk_unlink_file, - .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_NOEXTEND | FIO_NODISKUTIL, -}; - -static void fio_init fio_pmemblk_register(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_pmemblk_unregister(void) -{ - unregister_ioengine(&ioengine); -} diff --git a/engines/posixaio.c b/engines/posixaio.c deleted file mode 100644 index bddb1ec3..00000000 --- a/engines/posixaio.c +++ /dev/null @@ -1,266 +0,0 @@ -/* - * posixaio engine - * - * IO engine that uses the posix defined aio interface. - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <fcntl.h> - -#include "../fio.h" - -struct posixaio_data { - struct io_u **aio_events; - unsigned int queued; -}; - -static int fill_timespec(struct timespec *ts) -{ -#ifdef CONFIG_CLOCK_GETTIME -#ifdef CONFIG_CLOCK_MONOTONIC - clockid_t clk = CLOCK_MONOTONIC; -#else - clockid_t clk = CLOCK_REALTIME; -#endif - if (!clock_gettime(clk, ts)) - return 0; - - perror("clock_gettime"); - return 1; -#else - struct timeval tv; - - gettimeofday(&tv, NULL); - ts->tv_sec = tv.tv_sec; - ts->tv_nsec = tv.tv_usec * 1000; - return 0; -#endif -} - -static unsigned long long ts_utime_since_now(struct timespec *t) -{ - long long sec, nsec; - struct timespec now; - - if (fill_timespec(&now)) - return 0; - - sec = now.tv_sec - t->tv_sec; - nsec = now.tv_nsec - t->tv_nsec; - if (sec > 0 && nsec < 0) { - sec--; - nsec += 1000000000; - } - - sec *= 1000000; - nsec /= 1000; - return sec + nsec; -} - -static int fio_posixaio_cancel(struct thread_data fio_unused *td, - struct io_u *io_u) -{ - struct fio_file *f = io_u->file; - int r = aio_cancel(f->fd, &io_u->aiocb); - - if (r == AIO_ALLDONE || r == AIO_CANCELED) - return 0; - - return 1; -} - -static int fio_posixaio_prep(struct thread_data fio_unused *td, - struct io_u *io_u) -{ - os_aiocb_t *aiocb = &io_u->aiocb; - struct fio_file *f = io_u->file; - - aiocb->aio_fildes = f->fd; - aiocb->aio_buf = io_u->xfer_buf; - aiocb->aio_nbytes = io_u->xfer_buflen; - aiocb->aio_offset = io_u->offset; - aiocb->aio_sigevent.sigev_notify = SIGEV_NONE; - - io_u->seen = 0; - return 0; -} - -#define SUSPEND_ENTRIES 8 - -static int fio_posixaio_getevents(struct thread_data *td, unsigned int min, - unsigned int max, const struct timespec *t) -{ - struct posixaio_data *pd = td->io_ops_data; - os_aiocb_t *suspend_list[SUSPEND_ENTRIES]; - struct timespec start; - int have_timeout = 0; - int suspend_entries; - struct io_u *io_u; - unsigned int r; - int i; - - if (t && !fill_timespec(&start)) - have_timeout = 1; - else - memset(&start, 0, sizeof(start)); - - r = 0; -restart: - memset(suspend_list, 0, sizeof(suspend_list)); - suspend_entries = 0; - io_u_qiter(&td->io_u_all, io_u, i) { - int err; - - if (io_u->seen || !(io_u->flags & IO_U_F_FLIGHT)) - continue; - - err = aio_error(&io_u->aiocb); - if (err == EINPROGRESS) { - if (suspend_entries < SUSPEND_ENTRIES) { - suspend_list[suspend_entries] = &io_u->aiocb; - suspend_entries++; - } - continue; - } - - io_u->seen = 1; - pd->queued--; - pd->aio_events[r++] = io_u; - - if (err == ECANCELED) - io_u->resid = io_u->xfer_buflen; - else if (!err) { - ssize_t retval = aio_return(&io_u->aiocb); - - io_u->resid = io_u->xfer_buflen - retval; - } else - io_u->error = err; - } - - if (r >= min) - return r; - - if (have_timeout) { - unsigned long long usec; - - usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000); - if (ts_utime_since_now(&start) > usec) - return r; - } - - /* - * must have some in-flight, wait for at least one - */ - aio_suspend((const os_aiocb_t * const *)suspend_list, - suspend_entries, t); - goto restart; -} - -static struct io_u *fio_posixaio_event(struct thread_data *td, int event) -{ - struct posixaio_data *pd = td->io_ops_data; - - return pd->aio_events[event]; -} - -static int fio_posixaio_queue(struct thread_data *td, - struct io_u *io_u) -{ - struct posixaio_data *pd = td->io_ops_data; - os_aiocb_t *aiocb = &io_u->aiocb; - int ret; - - fio_ro_check(td, io_u); - - if (io_u->ddir == DDIR_READ) - ret = aio_read(aiocb); - else if (io_u->ddir == DDIR_WRITE) - ret = aio_write(aiocb); - else if (io_u->ddir == DDIR_TRIM) { - if (pd->queued) - return FIO_Q_BUSY; - - do_io_u_trim(td, io_u); - return FIO_Q_COMPLETED; - } else { -#ifdef CONFIG_POSIXAIO_FSYNC - ret = aio_fsync(O_SYNC, aiocb); -#else - if (pd->queued) - return FIO_Q_BUSY; - - do_io_u_sync(td, io_u); - return FIO_Q_COMPLETED; -#endif - } - - if (ret) { - int aio_err = errno; - - /* - * At least OSX has a very low limit on the number of pending - * IOs, so if it returns EAGAIN, we are out of resources - * to queue more. Just return FIO_Q_BUSY to naturally - * drop off at this depth. - */ - if (aio_err == EAGAIN) - return FIO_Q_BUSY; - - io_u->error = aio_err; - td_verror(td, io_u->error, "xfer"); - return FIO_Q_COMPLETED; - } - - pd->queued++; - return FIO_Q_QUEUED; -} - -static void fio_posixaio_cleanup(struct thread_data *td) -{ - struct posixaio_data *pd = td->io_ops_data; - - if (pd) { - free(pd->aio_events); - free(pd); - } -} - -static int fio_posixaio_init(struct thread_data *td) -{ - struct posixaio_data *pd = malloc(sizeof(*pd)); - - memset(pd, 0, sizeof(*pd)); - pd->aio_events = malloc(td->o.iodepth * sizeof(struct io_u *)); - memset(pd->aio_events, 0, td->o.iodepth * sizeof(struct io_u *)); - - td->io_ops_data = pd; - return 0; -} - -static struct ioengine_ops ioengine = { - .name = "posixaio", - .version = FIO_IOOPS_VERSION, - .init = fio_posixaio_init, - .prep = fio_posixaio_prep, - .queue = fio_posixaio_queue, - .cancel = fio_posixaio_cancel, - .getevents = fio_posixaio_getevents, - .event = fio_posixaio_event, - .cleanup = fio_posixaio_cleanup, - .open_file = generic_open_file, - .close_file = generic_close_file, - .get_file_size = generic_get_file_size, -}; - -static void fio_init fio_posixaio_register(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_posixaio_unregister(void) -{ - unregister_ioengine(&ioengine); -} diff --git a/engines/rbd.c b/engines/rbd.c deleted file mode 100644 index 4bae425c..00000000 --- a/engines/rbd.c +++ /dev/null @@ -1,689 +0,0 @@ -/* - * rbd engine - * - * IO engine using Ceph's librbd to test RADOS Block Devices. - * - */ - -#include <rbd/librbd.h> - -#include "../fio.h" -#include "../optgroup.h" -#ifdef CONFIG_RBD_BLKIN -#include <zipkin_c.h> -#endif - -#ifdef CONFIG_RBD_POLL -/* add for poll */ -#include <poll.h> -#include <sys/eventfd.h> -#endif - -struct fio_rbd_iou { - struct io_u *io_u; - rbd_completion_t completion; - int io_seen; - int io_complete; -#ifdef CONFIG_RBD_BLKIN - struct blkin_trace_info info; -#endif -}; - -struct rbd_data { - rados_t cluster; - rados_ioctx_t io_ctx; - rbd_image_t image; - struct io_u **aio_events; - struct io_u **sort_events; - int fd; /* add for poll */ - bool connected; -}; - -struct rbd_options { - void *pad; - char *cluster_name; - char *rbd_name; - char *pool_name; - char *client_name; - int busy_poll; -}; - -static struct fio_option options[] = { - { - .name = "clustername", - .lname = "ceph cluster name", - .type = FIO_OPT_STR_STORE, - .help = "Cluster name for ceph", - .off1 = offsetof(struct rbd_options, cluster_name), - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_RBD, - }, - { - .name = "rbdname", - .lname = "rbd engine rbdname", - .type = FIO_OPT_STR_STORE, - .help = "RBD name for RBD engine", - .off1 = offsetof(struct rbd_options, rbd_name), - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_RBD, - }, - { - .name = "pool", - .lname = "rbd engine pool", - .type = FIO_OPT_STR_STORE, - .help = "Name of the pool hosting the RBD for the RBD engine", - .off1 = offsetof(struct rbd_options, pool_name), - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_RBD, - }, - { - .name = "clientname", - .lname = "rbd engine clientname", - .type = FIO_OPT_STR_STORE, - .help = "Name of the ceph client to access the RBD for the RBD engine", - .off1 = offsetof(struct rbd_options, client_name), - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_RBD, - }, - { - .name = "busy_poll", - .lname = "Busy poll", - .type = FIO_OPT_BOOL, - .help = "Busy poll for completions instead of sleeping", - .off1 = offsetof(struct rbd_options, busy_poll), - .def = "0", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_RBD, - }, - { - .name = NULL, - }, -}; - -static int _fio_setup_rbd_data(struct thread_data *td, - struct rbd_data **rbd_data_ptr) -{ - struct rbd_data *rbd; - - if (td->io_ops_data) - return 0; - - rbd = calloc(1, sizeof(struct rbd_data)); - if (!rbd) - goto failed; - - rbd->connected = false; - - /* add for poll, init fd: -1 */ - rbd->fd = -1; - - rbd->aio_events = calloc(td->o.iodepth, sizeof(struct io_u *)); - if (!rbd->aio_events) - goto failed; - - rbd->sort_events = calloc(td->o.iodepth, sizeof(struct io_u *)); - if (!rbd->sort_events) - goto failed; - - *rbd_data_ptr = rbd; - return 0; - -failed: - if (rbd) { - if (rbd->aio_events) - free(rbd->aio_events); - if (rbd->sort_events) - free(rbd->sort_events); - free(rbd); - } - return 1; - -} - -#ifdef CONFIG_RBD_POLL -static bool _fio_rbd_setup_poll(struct rbd_data *rbd) -{ - int r; - - /* add for rbd poll */ - rbd->fd = eventfd(0, EFD_NONBLOCK); - if (rbd->fd < 0) { - log_err("eventfd failed.\n"); - return false; - } - - r = rbd_set_image_notification(rbd->image, rbd->fd, EVENT_TYPE_EVENTFD); - if (r < 0) { - log_err("rbd_set_image_notification failed.\n"); - close(rbd->fd); - rbd->fd = -1; - return false; - } - - return true; -} -#else -static bool _fio_rbd_setup_poll(struct rbd_data *rbd) -{ - return true; -} -#endif - -static int _fio_rbd_connect(struct thread_data *td) -{ - struct rbd_data *rbd = td->io_ops_data; - struct rbd_options *o = td->eo; - int r; - - if (o->cluster_name) { - char *client_name = NULL; - - /* - * If we specify cluser name, the rados_create2 - * will not assume 'client.'. name is considered - * as a full type.id namestr - */ - if (o->client_name) { - if (!index(o->client_name, '.')) { - client_name = calloc(1, strlen("client.") + - strlen(o->client_name) + 1); - strcat(client_name, "client."); - strcat(client_name, o->client_name); - } else { - client_name = o->client_name; - } - } - - r = rados_create2(&rbd->cluster, o->cluster_name, - client_name, 0); - - if (client_name && !index(o->client_name, '.')) - free(client_name); - } else - r = rados_create(&rbd->cluster, o->client_name); - - if (r < 0) { - log_err("rados_create failed.\n"); - goto failed_early; - } - - r = rados_conf_read_file(rbd->cluster, NULL); - if (r < 0) { - log_err("rados_conf_read_file failed.\n"); - goto failed_early; - } - - r = rados_connect(rbd->cluster); - if (r < 0) { - log_err("rados_connect failed.\n"); - goto failed_shutdown; - } - - r = rados_ioctx_create(rbd->cluster, o->pool_name, &rbd->io_ctx); - if (r < 0) { - log_err("rados_ioctx_create failed.\n"); - goto failed_shutdown; - } - - r = rbd_open(rbd->io_ctx, o->rbd_name, &rbd->image, NULL /*snap */ ); - if (r < 0) { - log_err("rbd_open failed.\n"); - goto failed_open; - } - - if (!_fio_rbd_setup_poll(rbd)) - goto failed_poll; - - return 0; - -failed_poll: - rbd_close(rbd->image); - rbd->image = NULL; -failed_open: - rados_ioctx_destroy(rbd->io_ctx); - rbd->io_ctx = NULL; -failed_shutdown: - rados_shutdown(rbd->cluster); - rbd->cluster = NULL; -failed_early: - return 1; -} - -static void _fio_rbd_disconnect(struct rbd_data *rbd) -{ - if (!rbd) - return; - - /* close eventfd */ - if (rbd->fd != -1) { - close(rbd->fd); - rbd->fd = -1; - } - - /* shutdown everything */ - if (rbd->image) { - rbd_close(rbd->image); - rbd->image = NULL; - } - - if (rbd->io_ctx) { - rados_ioctx_destroy(rbd->io_ctx); - rbd->io_ctx = NULL; - } - - if (rbd->cluster) { - rados_shutdown(rbd->cluster); - rbd->cluster = NULL; - } -} - -static void _fio_rbd_finish_aiocb(rbd_completion_t comp, void *data) -{ - struct fio_rbd_iou *fri = data; - struct io_u *io_u = fri->io_u; - ssize_t ret; - - /* - * Looks like return value is 0 for success, or < 0 for - * a specific error. So we have to assume that it can't do - * partial completions. - */ - ret = rbd_aio_get_return_value(fri->completion); - if (ret < 0) { - io_u->error = -ret; - io_u->resid = io_u->xfer_buflen; - } else - io_u->error = 0; - - fri->io_complete = 1; -} - -static struct io_u *fio_rbd_event(struct thread_data *td, int event) -{ - struct rbd_data *rbd = td->io_ops_data; - - return rbd->aio_events[event]; -} - -static inline int fri_check_complete(struct rbd_data *rbd, struct io_u *io_u, - unsigned int *events) -{ - struct fio_rbd_iou *fri = io_u->engine_data; - - if (fri->io_complete) { - fri->io_seen = 1; - rbd->aio_events[*events] = io_u; - (*events)++; - - rbd_aio_release(fri->completion); - return 1; - } - - return 0; -} - -static inline int rbd_io_u_seen(struct io_u *io_u) -{ - struct fio_rbd_iou *fri = io_u->engine_data; - - return fri->io_seen; -} - -static void rbd_io_u_wait_complete(struct io_u *io_u) -{ - struct fio_rbd_iou *fri = io_u->engine_data; - - rbd_aio_wait_for_complete(fri->completion); -} - -static int rbd_io_u_cmp(const void *p1, const void *p2) -{ - const struct io_u **a = (const struct io_u **) p1; - const struct io_u **b = (const struct io_u **) p2; - uint64_t at, bt; - - at = utime_since_now(&(*a)->start_time); - bt = utime_since_now(&(*b)->start_time); - - if (at < bt) - return -1; - else if (at == bt) - return 0; - else - return 1; -} - -static int rbd_iter_events(struct thread_data *td, unsigned int *events, - unsigned int min_evts, int wait) -{ - struct rbd_data *rbd = td->io_ops_data; - unsigned int this_events = 0; - struct io_u *io_u; - int i, sidx = 0; - -#ifdef CONFIG_RBD_POLL - int ret = 0; - int event_num = 0; - struct fio_rbd_iou *fri = NULL; - rbd_completion_t comps[min_evts]; - - struct pollfd pfd; - pfd.fd = rbd->fd; - pfd.events = POLLIN; - - ret = poll(&pfd, 1, -1); - if (ret <= 0) - return 0; - - assert(pfd.revents & POLLIN); - - event_num = rbd_poll_io_events(rbd->image, comps, min_evts); - - for (i = 0; i < event_num; i++) { - fri = rbd_aio_get_arg(comps[i]); - io_u = fri->io_u; -#else - io_u_qiter(&td->io_u_all, io_u, i) { -#endif - if (!(io_u->flags & IO_U_F_FLIGHT)) - continue; - if (rbd_io_u_seen(io_u)) - continue; - - if (fri_check_complete(rbd, io_u, events)) - this_events++; - else if (wait) - rbd->sort_events[sidx++] = io_u; - } - - if (!wait || !sidx) - return this_events; - - /* - * Sort events, oldest issue first, then wait on as many as we - * need in order of age. If we have enough events, stop waiting, - * and just check if any of the older ones are done. - */ - if (sidx > 1) - qsort(rbd->sort_events, sidx, sizeof(struct io_u *), rbd_io_u_cmp); - - for (i = 0; i < sidx; i++) { - io_u = rbd->sort_events[i]; - - if (fri_check_complete(rbd, io_u, events)) { - this_events++; - continue; - } - - /* - * Stop waiting when we have enough, but continue checking - * all pending IOs if they are complete. - */ - if (*events >= min_evts) - continue; - - rbd_io_u_wait_complete(io_u); - - if (fri_check_complete(rbd, io_u, events)) - this_events++; - } - - return this_events; -} - -static int fio_rbd_getevents(struct thread_data *td, unsigned int min, - unsigned int max, const struct timespec *t) -{ - unsigned int this_events, events = 0; - struct rbd_options *o = td->eo; - int wait = 0; - - do { - this_events = rbd_iter_events(td, &events, min, wait); - - if (events >= min) - break; - if (this_events) - continue; - - if (!o->busy_poll) - wait = 1; - else - nop; - } while (1); - - return events; -} - -static int fio_rbd_queue(struct thread_data *td, struct io_u *io_u) -{ - struct rbd_data *rbd = td->io_ops_data; - struct fio_rbd_iou *fri = io_u->engine_data; - int r = -1; - - fio_ro_check(td, io_u); - - fri->io_seen = 0; - fri->io_complete = 0; - - r = rbd_aio_create_completion(fri, _fio_rbd_finish_aiocb, - &fri->completion); - if (r < 0) { - log_err("rbd_aio_create_completion failed.\n"); - goto failed; - } - - if (io_u->ddir == DDIR_WRITE) { -#ifdef CONFIG_RBD_BLKIN - blkin_init_trace_info(&fri->info); - r = rbd_aio_write_traced(rbd->image, io_u->offset, io_u->xfer_buflen, - io_u->xfer_buf, fri->completion, &fri->info); -#else - r = rbd_aio_write(rbd->image, io_u->offset, io_u->xfer_buflen, - io_u->xfer_buf, fri->completion); -#endif - if (r < 0) { - log_err("rbd_aio_write failed.\n"); - goto failed_comp; - } - - } else if (io_u->ddir == DDIR_READ) { -#ifdef CONFIG_RBD_BLKIN - blkin_init_trace_info(&fri->info); - r = rbd_aio_read_traced(rbd->image, io_u->offset, io_u->xfer_buflen, - io_u->xfer_buf, fri->completion, &fri->info); -#else - r = rbd_aio_read(rbd->image, io_u->offset, io_u->xfer_buflen, - io_u->xfer_buf, fri->completion); -#endif - - if (r < 0) { - log_err("rbd_aio_read failed.\n"); - goto failed_comp; - } - } else if (io_u->ddir == DDIR_TRIM) { - r = rbd_aio_discard(rbd->image, io_u->offset, - io_u->xfer_buflen, fri->completion); - if (r < 0) { - log_err("rbd_aio_discard failed.\n"); - goto failed_comp; - } - } else if (io_u->ddir == DDIR_SYNC) { - r = rbd_aio_flush(rbd->image, fri->completion); - if (r < 0) { - log_err("rbd_flush failed.\n"); - goto failed_comp; - } - } else { - dprint(FD_IO, "%s: Warning: unhandled ddir: %d\n", __func__, - io_u->ddir); - goto failed_comp; - } - - return FIO_Q_QUEUED; -failed_comp: - rbd_aio_release(fri->completion); -failed: - io_u->error = -r; - td_verror(td, io_u->error, "xfer"); - return FIO_Q_COMPLETED; -} - -static int fio_rbd_init(struct thread_data *td) -{ - int r; - struct rbd_data *rbd = td->io_ops_data; - - if (rbd->connected) - return 0; - - r = _fio_rbd_connect(td); - if (r) { - log_err("fio_rbd_connect failed, return code: %d .\n", r); - goto failed; - } - - return 0; - -failed: - return 1; -} - -static void fio_rbd_cleanup(struct thread_data *td) -{ - struct rbd_data *rbd = td->io_ops_data; - - if (rbd) { - _fio_rbd_disconnect(rbd); - free(rbd->aio_events); - free(rbd->sort_events); - free(rbd); - } -} - -static int fio_rbd_setup(struct thread_data *td) -{ - rbd_image_info_t info; - struct fio_file *f; - struct rbd_data *rbd = NULL; - int r; - - /* allocate engine specific structure to deal with librbd. */ - r = _fio_setup_rbd_data(td, &rbd); - if (r) { - log_err("fio_setup_rbd_data failed.\n"); - goto cleanup; - } - td->io_ops_data = rbd; - - /* librbd does not allow us to run first in the main thread and later - * in a fork child. It needs to be the same process context all the - * time. - */ - td->o.use_thread = 1; - - /* connect in the main thread to determine to determine - * the size of the given RADOS block device. And disconnect - * later on. - */ - r = _fio_rbd_connect(td); - if (r) { - log_err("fio_rbd_connect failed.\n"); - goto cleanup; - } - rbd->connected = true; - - /* get size of the RADOS block device */ - r = rbd_stat(rbd->image, &info, sizeof(info)); - if (r < 0) { - log_err("rbd_status failed.\n"); - goto cleanup; - } else if (info.size == 0) { - log_err("image size should be larger than zero.\n"); - r = -EINVAL; - goto cleanup; - } - - dprint(FD_IO, "rbd-engine: image size: %lu\n", info.size); - - /* taken from "net" engine. Pretend we deal with files, - * even if we do not have any ideas about files. - * The size of the RBD is set instead of a artificial file. - */ - if (!td->files_index) { - add_file(td, td->o.filename ? : "rbd", 0, 0); - td->o.nr_files = td->o.nr_files ? : 1; - td->o.open_files++; - } - f = td->files[0]; - f->real_file_size = info.size; - - return 0; - -cleanup: - fio_rbd_cleanup(td); - return r; -} - -static int fio_rbd_open(struct thread_data *td, struct fio_file *f) -{ - return 0; -} - -static int fio_rbd_invalidate(struct thread_data *td, struct fio_file *f) -{ -#if defined(CONFIG_RBD_INVAL) - struct rbd_data *rbd = td->io_ops_data; - - return rbd_invalidate_cache(rbd->image); -#else - return 0; -#endif -} - -static void fio_rbd_io_u_free(struct thread_data *td, struct io_u *io_u) -{ - struct fio_rbd_iou *fri = io_u->engine_data; - - if (fri) { - io_u->engine_data = NULL; - free(fri); - } -} - -static int fio_rbd_io_u_init(struct thread_data *td, struct io_u *io_u) -{ - struct fio_rbd_iou *fri; - - fri = calloc(1, sizeof(*fri)); - fri->io_u = io_u; - io_u->engine_data = fri; - return 0; -} - -static struct ioengine_ops ioengine = { - .name = "rbd", - .version = FIO_IOOPS_VERSION, - .setup = fio_rbd_setup, - .init = fio_rbd_init, - .queue = fio_rbd_queue, - .getevents = fio_rbd_getevents, - .event = fio_rbd_event, - .cleanup = fio_rbd_cleanup, - .open_file = fio_rbd_open, - .invalidate = fio_rbd_invalidate, - .options = options, - .io_u_init = fio_rbd_io_u_init, - .io_u_free = fio_rbd_io_u_free, - .option_struct_size = sizeof(struct rbd_options), -}; - -static void fio_init fio_rbd_register(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_rbd_unregister(void) -{ - unregister_ioengine(&ioengine); -} diff --git a/engines/rdma.c b/engines/rdma.c deleted file mode 100644 index 10e60dc8..00000000 --- a/engines/rdma.c +++ /dev/null @@ -1,1372 +0,0 @@ -/* - * RDMA I/O engine - * - * RDMA I/O engine based on the IB verbs and RDMA/CM user space libraries. - * Supports both RDMA memory semantics and channel semantics - * for the InfiniBand, RoCE and iWARP protocols. - * - * You will need the Linux RDMA software installed, either - * from your Linux distributor or directly from openfabrics.org: - * - * http://www.openfabrics.org/downloads/OFED/ - * - * Exchanging steps of RDMA ioengine control messages: - * 1. client side sends test mode (RDMA_WRITE/RDMA_READ/SEND) - * to server side. - * 2. server side parses test mode, and sends back confirmation - * to client side. In RDMA WRITE/READ test, this confirmation - * includes memory information, such as rkey, address. - * 3. client side initiates test loop. - * 4. In RDMA WRITE/READ test, client side sends a completion - * notification to server side. Server side updates its - * td->done as true. - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <assert.h> -#include <netinet/in.h> -#include <arpa/inet.h> -#include <netdb.h> -#include <sys/poll.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <sys/time.h> -#include <sys/resource.h> - -#include <pthread.h> -#include <inttypes.h> - -#include "../fio.h" -#include "../hash.h" -#include "../optgroup.h" - -#include <rdma/rdma_cma.h> -#include <infiniband/arch.h> - -#define FIO_RDMA_MAX_IO_DEPTH 512 - -enum rdma_io_mode { - FIO_RDMA_UNKNOWN = 0, - FIO_RDMA_MEM_WRITE, - FIO_RDMA_MEM_READ, - FIO_RDMA_CHA_SEND, - FIO_RDMA_CHA_RECV -}; - -struct rdmaio_options { - struct thread_data *td; - unsigned int port; - enum rdma_io_mode verb; -}; - -static int str_hostname_cb(void *data, const char *input) -{ - struct rdmaio_options *o = data; - - if (o->td->o.filename) - free(o->td->o.filename); - o->td->o.filename = strdup(input); - return 0; -} - -static struct fio_option options[] = { - { - .name = "hostname", - .lname = "rdma engine hostname", - .type = FIO_OPT_STR_STORE, - .cb = str_hostname_cb, - .help = "Hostname for RDMA IO engine", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_RDMA, - }, - { - .name = "port", - .lname = "rdma engine port", - .type = FIO_OPT_INT, - .off1 = offsetof(struct rdmaio_options, port), - .minval = 1, - .maxval = 65535, - .help = "Port to use for RDMA connections", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_RDMA, - }, - { - .name = "verb", - .lname = "RDMA engine verb", - .alias = "proto", - .type = FIO_OPT_STR, - .off1 = offsetof(struct rdmaio_options, verb), - .help = "RDMA engine verb", - .def = "write", - .posval = { - { .ival = "write", - .oval = FIO_RDMA_MEM_WRITE, - .help = "Memory Write", - }, - { .ival = "read", - .oval = FIO_RDMA_MEM_READ, - .help = "Memory Read", - }, - { .ival = "send", - .oval = FIO_RDMA_CHA_SEND, - .help = "Posted Send", - }, - { .ival = "recv", - .oval = FIO_RDMA_CHA_RECV, - .help = "Posted Receive", - }, - }, - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_RDMA, - }, - { - .name = NULL, - }, -}; - -struct remote_u { - uint64_t buf; - uint32_t rkey; - uint32_t size; -}; - -struct rdma_info_blk { - uint32_t mode; /* channel semantic or memory semantic */ - uint32_t nr; /* client: io depth - server: number of records for memory semantic - */ - uint32_t max_bs; /* maximum block size */ - struct remote_u rmt_us[FIO_RDMA_MAX_IO_DEPTH]; -}; - -struct rdma_io_u_data { - uint64_t wr_id; - struct ibv_send_wr sq_wr; - struct ibv_recv_wr rq_wr; - struct ibv_sge rdma_sgl; -}; - -struct rdmaio_data { - int is_client; - enum rdma_io_mode rdma_protocol; - char host[64]; - struct sockaddr_in addr; - - struct ibv_recv_wr rq_wr; - struct ibv_sge recv_sgl; - struct rdma_info_blk recv_buf; - struct ibv_mr *recv_mr; - - struct ibv_send_wr sq_wr; - struct ibv_sge send_sgl; - struct rdma_info_blk send_buf; - struct ibv_mr *send_mr; - - struct ibv_comp_channel *channel; - struct ibv_cq *cq; - struct ibv_pd *pd; - struct ibv_qp *qp; - - pthread_t cmthread; - struct rdma_event_channel *cm_channel; - struct rdma_cm_id *cm_id; - struct rdma_cm_id *child_cm_id; - - int cq_event_num; - - struct remote_u *rmt_us; - int rmt_nr; - struct io_u **io_us_queued; - int io_u_queued_nr; - struct io_u **io_us_flight; - int io_u_flight_nr; - struct io_u **io_us_completed; - int io_u_completed_nr; - - struct frand_state rand_state; -}; - -static int client_recv(struct thread_data *td, struct ibv_wc *wc) -{ - struct rdmaio_data *rd = td->io_ops_data; - unsigned int max_bs; - - if (wc->byte_len != sizeof(rd->recv_buf)) { - log_err("Received bogus data, size %d\n", wc->byte_len); - return 1; - } - - max_bs = max(td->o.max_bs[DDIR_READ], td->o.max_bs[DDIR_WRITE]); - if (max_bs > ntohl(rd->recv_buf.max_bs)) { - log_err("fio: Server's block size (%d) must be greater than or " - "equal to the client's block size (%d)!\n", - ntohl(rd->recv_buf.max_bs), max_bs); - return 1; - } - - /* store mr info for MEMORY semantic */ - if ((rd->rdma_protocol == FIO_RDMA_MEM_WRITE) || - (rd->rdma_protocol == FIO_RDMA_MEM_READ)) { - /* struct flist_head *entry; */ - int i = 0; - - rd->rmt_nr = ntohl(rd->recv_buf.nr); - - for (i = 0; i < rd->rmt_nr; i++) { - rd->rmt_us[i].buf = ntohll(rd->recv_buf.rmt_us[i].buf); - rd->rmt_us[i].rkey = ntohl(rd->recv_buf.rmt_us[i].rkey); - rd->rmt_us[i].size = ntohl(rd->recv_buf.rmt_us[i].size); - - dprint(FD_IO, - "fio: Received rkey %x addr %" PRIx64 - " len %d from peer\n", rd->rmt_us[i].rkey, - rd->rmt_us[i].buf, rd->rmt_us[i].size); - } - } - - return 0; -} - -static int server_recv(struct thread_data *td, struct ibv_wc *wc) -{ - struct rdmaio_data *rd = td->io_ops_data; - unsigned int max_bs; - - if (wc->wr_id == FIO_RDMA_MAX_IO_DEPTH) { - rd->rdma_protocol = ntohl(rd->recv_buf.mode); - - /* CHANNEL semantic, do nothing */ - if (rd->rdma_protocol == FIO_RDMA_CHA_SEND) - rd->rdma_protocol = FIO_RDMA_CHA_RECV; - - max_bs = max(td->o.max_bs[DDIR_READ], td->o.max_bs[DDIR_WRITE]); - if (max_bs < ntohl(rd->recv_buf.max_bs)) { - log_err("fio: Server's block size (%d) must be greater than or " - "equal to the client's block size (%d)!\n", - ntohl(rd->recv_buf.max_bs), max_bs); - return 1; - } - - } - - return 0; -} - -static int cq_event_handler(struct thread_data *td, enum ibv_wc_opcode opcode) -{ - struct rdmaio_data *rd = td->io_ops_data; - struct ibv_wc wc; - struct rdma_io_u_data *r_io_u_d; - int ret; - int compevnum = 0; - int i; - - while ((ret = ibv_poll_cq(rd->cq, 1, &wc)) == 1) { - ret = 0; - compevnum++; - - if (wc.status) { - log_err("fio: cq completion status %d(%s)\n", - wc.status, ibv_wc_status_str(wc.status)); - return -1; - } - - switch (wc.opcode) { - - case IBV_WC_RECV: - if (rd->is_client == 1) - ret = client_recv(td, &wc); - else - ret = server_recv(td, &wc); - - if (ret) - return -1; - - if (wc.wr_id == FIO_RDMA_MAX_IO_DEPTH) - break; - - for (i = 0; i < rd->io_u_flight_nr; i++) { - r_io_u_d = rd->io_us_flight[i]->engine_data; - - if (wc.wr_id == r_io_u_d->rq_wr.wr_id) { - rd->io_us_flight[i]->resid = - rd->io_us_flight[i]->buflen - - wc.byte_len; - - rd->io_us_flight[i]->error = 0; - - rd->io_us_completed[rd-> - io_u_completed_nr] - = rd->io_us_flight[i]; - rd->io_u_completed_nr++; - break; - } - } - if (i == rd->io_u_flight_nr) - log_err("fio: recv wr %" PRId64 " not found\n", - wc.wr_id); - else { - /* put the last one into middle of the list */ - rd->io_us_flight[i] = - rd->io_us_flight[rd->io_u_flight_nr - 1]; - rd->io_u_flight_nr--; - } - - break; - - case IBV_WC_SEND: - case IBV_WC_RDMA_WRITE: - case IBV_WC_RDMA_READ: - if (wc.wr_id == FIO_RDMA_MAX_IO_DEPTH) - break; - - for (i = 0; i < rd->io_u_flight_nr; i++) { - r_io_u_d = rd->io_us_flight[i]->engine_data; - - if (wc.wr_id == r_io_u_d->sq_wr.wr_id) { - rd->io_us_completed[rd-> - io_u_completed_nr] - = rd->io_us_flight[i]; - rd->io_u_completed_nr++; - break; - } - } - if (i == rd->io_u_flight_nr) - log_err("fio: send wr %" PRId64 " not found\n", - wc.wr_id); - else { - /* put the last one into middle of the list */ - rd->io_us_flight[i] = - rd->io_us_flight[rd->io_u_flight_nr - 1]; - rd->io_u_flight_nr--; - } - - break; - - default: - log_info("fio: unknown completion event %d\n", - wc.opcode); - return -1; - } - rd->cq_event_num++; - } - - if (ret) { - log_err("fio: poll error %d\n", ret); - return 1; - } - - return compevnum; -} - -/* - * Return -1 for error and 'nr events' for a positive number - * of events - */ -static int rdma_poll_wait(struct thread_data *td, enum ibv_wc_opcode opcode) -{ - struct rdmaio_data *rd = td->io_ops_data; - struct ibv_cq *ev_cq; - void *ev_ctx; - int ret; - - if (rd->cq_event_num > 0) { /* previous left */ - rd->cq_event_num--; - return 0; - } - -again: - if (ibv_get_cq_event(rd->channel, &ev_cq, &ev_ctx) != 0) { - log_err("fio: Failed to get cq event!\n"); - return -1; - } - if (ev_cq != rd->cq) { - log_err("fio: Unknown CQ!\n"); - return -1; - } - if (ibv_req_notify_cq(rd->cq, 0) != 0) { - log_err("fio: Failed to set notify!\n"); - return -1; - } - - ret = cq_event_handler(td, opcode); - if (ret == 0) - goto again; - - ibv_ack_cq_events(rd->cq, ret); - - rd->cq_event_num--; - - return ret; -} - -static int fio_rdmaio_setup_qp(struct thread_data *td) -{ - struct rdmaio_data *rd = td->io_ops_data; - struct ibv_qp_init_attr init_attr; - int qp_depth = td->o.iodepth * 2; /* 2 times of io depth */ - - if (rd->is_client == 0) - rd->pd = ibv_alloc_pd(rd->child_cm_id->verbs); - else - rd->pd = ibv_alloc_pd(rd->cm_id->verbs); - - if (rd->pd == NULL) { - log_err("fio: ibv_alloc_pd fail: %m\n"); - return 1; - } - - if (rd->is_client == 0) - rd->channel = ibv_create_comp_channel(rd->child_cm_id->verbs); - else - rd->channel = ibv_create_comp_channel(rd->cm_id->verbs); - if (rd->channel == NULL) { - log_err("fio: ibv_create_comp_channel fail: %m\n"); - goto err1; - } - - if (qp_depth < 16) - qp_depth = 16; - - if (rd->is_client == 0) - rd->cq = ibv_create_cq(rd->child_cm_id->verbs, - qp_depth, rd, rd->channel, 0); - else - rd->cq = ibv_create_cq(rd->cm_id->verbs, - qp_depth, rd, rd->channel, 0); - if (rd->cq == NULL) { - log_err("fio: ibv_create_cq failed: %m\n"); - goto err2; - } - - if (ibv_req_notify_cq(rd->cq, 0) != 0) { - log_err("fio: ibv_req_notify_cq failed: %m\n"); - goto err3; - } - - /* create queue pair */ - memset(&init_attr, 0, sizeof(init_attr)); - init_attr.cap.max_send_wr = qp_depth; - init_attr.cap.max_recv_wr = qp_depth; - init_attr.cap.max_recv_sge = 1; - init_attr.cap.max_send_sge = 1; - init_attr.qp_type = IBV_QPT_RC; - init_attr.send_cq = rd->cq; - init_attr.recv_cq = rd->cq; - - if (rd->is_client == 0) { - if (rdma_create_qp(rd->child_cm_id, rd->pd, &init_attr) != 0) { - log_err("fio: rdma_create_qp failed: %m\n"); - goto err3; - } - rd->qp = rd->child_cm_id->qp; - } else { - if (rdma_create_qp(rd->cm_id, rd->pd, &init_attr) != 0) { - log_err("fio: rdma_create_qp failed: %m\n"); - goto err3; - } - rd->qp = rd->cm_id->qp; - } - - return 0; - -err3: - ibv_destroy_cq(rd->cq); -err2: - ibv_destroy_comp_channel(rd->channel); -err1: - ibv_dealloc_pd(rd->pd); - - return 1; -} - -static int fio_rdmaio_setup_control_msg_buffers(struct thread_data *td) -{ - struct rdmaio_data *rd = td->io_ops_data; - - rd->recv_mr = ibv_reg_mr(rd->pd, &rd->recv_buf, sizeof(rd->recv_buf), - IBV_ACCESS_LOCAL_WRITE); - if (rd->recv_mr == NULL) { - log_err("fio: recv_buf reg_mr failed: %m\n"); - return 1; - } - - rd->send_mr = ibv_reg_mr(rd->pd, &rd->send_buf, sizeof(rd->send_buf), - 0); - if (rd->send_mr == NULL) { - log_err("fio: send_buf reg_mr failed: %m\n"); - ibv_dereg_mr(rd->recv_mr); - return 1; - } - - /* setup work request */ - /* recv wq */ - rd->recv_sgl.addr = (uint64_t) (unsigned long)&rd->recv_buf; - rd->recv_sgl.length = sizeof(rd->recv_buf); - rd->recv_sgl.lkey = rd->recv_mr->lkey; - rd->rq_wr.sg_list = &rd->recv_sgl; - rd->rq_wr.num_sge = 1; - rd->rq_wr.wr_id = FIO_RDMA_MAX_IO_DEPTH; - - /* send wq */ - rd->send_sgl.addr = (uint64_t) (unsigned long)&rd->send_buf; - rd->send_sgl.length = sizeof(rd->send_buf); - rd->send_sgl.lkey = rd->send_mr->lkey; - - rd->sq_wr.opcode = IBV_WR_SEND; - rd->sq_wr.send_flags = IBV_SEND_SIGNALED; - rd->sq_wr.sg_list = &rd->send_sgl; - rd->sq_wr.num_sge = 1; - rd->sq_wr.wr_id = FIO_RDMA_MAX_IO_DEPTH; - - return 0; -} - -static int get_next_channel_event(struct thread_data *td, - struct rdma_event_channel *channel, - enum rdma_cm_event_type wait_event) -{ - struct rdmaio_data *rd = td->io_ops_data; - struct rdma_cm_event *event; - int ret; - - ret = rdma_get_cm_event(channel, &event); - if (ret) { - log_err("fio: rdma_get_cm_event: %d\n", ret); - return 1; - } - - if (event->event != wait_event) { - log_err("fio: event is %s instead of %s\n", - rdma_event_str(event->event), - rdma_event_str(wait_event)); - return 1; - } - - switch (event->event) { - case RDMA_CM_EVENT_CONNECT_REQUEST: - rd->child_cm_id = event->id; - break; - default: - break; - } - - rdma_ack_cm_event(event); - - return 0; -} - -static int fio_rdmaio_prep(struct thread_data *td, struct io_u *io_u) -{ - struct rdmaio_data *rd = td->io_ops_data; - struct rdma_io_u_data *r_io_u_d; - - r_io_u_d = io_u->engine_data; - - switch (rd->rdma_protocol) { - case FIO_RDMA_MEM_WRITE: - case FIO_RDMA_MEM_READ: - r_io_u_d->rdma_sgl.addr = (uint64_t) (unsigned long)io_u->buf; - r_io_u_d->rdma_sgl.lkey = io_u->mr->lkey; - r_io_u_d->sq_wr.wr_id = r_io_u_d->wr_id; - r_io_u_d->sq_wr.send_flags = IBV_SEND_SIGNALED; - r_io_u_d->sq_wr.sg_list = &r_io_u_d->rdma_sgl; - r_io_u_d->sq_wr.num_sge = 1; - break; - case FIO_RDMA_CHA_SEND: - r_io_u_d->rdma_sgl.addr = (uint64_t) (unsigned long)io_u->buf; - r_io_u_d->rdma_sgl.lkey = io_u->mr->lkey; - r_io_u_d->rdma_sgl.length = io_u->buflen; - r_io_u_d->sq_wr.wr_id = r_io_u_d->wr_id; - r_io_u_d->sq_wr.opcode = IBV_WR_SEND; - r_io_u_d->sq_wr.send_flags = IBV_SEND_SIGNALED; - r_io_u_d->sq_wr.sg_list = &r_io_u_d->rdma_sgl; - r_io_u_d->sq_wr.num_sge = 1; - break; - case FIO_RDMA_CHA_RECV: - r_io_u_d->rdma_sgl.addr = (uint64_t) (unsigned long)io_u->buf; - r_io_u_d->rdma_sgl.lkey = io_u->mr->lkey; - r_io_u_d->rdma_sgl.length = io_u->buflen; - r_io_u_d->rq_wr.wr_id = r_io_u_d->wr_id; - r_io_u_d->rq_wr.sg_list = &r_io_u_d->rdma_sgl; - r_io_u_d->rq_wr.num_sge = 1; - break; - default: - log_err("fio: unknown rdma protocol - %d\n", rd->rdma_protocol); - break; - } - - return 0; -} - -static struct io_u *fio_rdmaio_event(struct thread_data *td, int event) -{ - struct rdmaio_data *rd = td->io_ops_data; - struct io_u *io_u; - int i; - - io_u = rd->io_us_completed[0]; - for (i = 0; i < rd->io_u_completed_nr - 1; i++) - rd->io_us_completed[i] = rd->io_us_completed[i + 1]; - - rd->io_u_completed_nr--; - - dprint_io_u(io_u, "fio_rdmaio_event"); - - return io_u; -} - -static int fio_rdmaio_getevents(struct thread_data *td, unsigned int min, - unsigned int max, const struct timespec *t) -{ - struct rdmaio_data *rd = td->io_ops_data; - enum ibv_wc_opcode comp_opcode; - struct ibv_cq *ev_cq; - void *ev_ctx; - int ret, r = 0; - comp_opcode = IBV_WC_RDMA_WRITE; - - switch (rd->rdma_protocol) { - case FIO_RDMA_MEM_WRITE: - comp_opcode = IBV_WC_RDMA_WRITE; - break; - case FIO_RDMA_MEM_READ: - comp_opcode = IBV_WC_RDMA_READ; - break; - case FIO_RDMA_CHA_SEND: - comp_opcode = IBV_WC_SEND; - break; - case FIO_RDMA_CHA_RECV: - comp_opcode = IBV_WC_RECV; - break; - default: - log_err("fio: unknown rdma protocol - %d\n", rd->rdma_protocol); - break; - } - - if (rd->cq_event_num > 0) { /* previous left */ - rd->cq_event_num--; - return 0; - } - -again: - if (ibv_get_cq_event(rd->channel, &ev_cq, &ev_ctx) != 0) { - log_err("fio: Failed to get cq event!\n"); - return -1; - } - if (ev_cq != rd->cq) { - log_err("fio: Unknown CQ!\n"); - return -1; - } - if (ibv_req_notify_cq(rd->cq, 0) != 0) { - log_err("fio: Failed to set notify!\n"); - return -1; - } - - ret = cq_event_handler(td, comp_opcode); - if (ret < 1) - goto again; - - ibv_ack_cq_events(rd->cq, ret); - - r += ret; - if (r < min) - goto again; - - rd->cq_event_num -= r; - - return r; -} - -static int fio_rdmaio_send(struct thread_data *td, struct io_u **io_us, - unsigned int nr) -{ - struct rdmaio_data *rd = td->io_ops_data; - struct ibv_send_wr *bad_wr; -#if 0 - enum ibv_wc_opcode comp_opcode; - comp_opcode = IBV_WC_RDMA_WRITE; -#endif - int i; - long index; - struct rdma_io_u_data *r_io_u_d; - - r_io_u_d = NULL; - - for (i = 0; i < nr; i++) { - /* RDMA_WRITE or RDMA_READ */ - switch (rd->rdma_protocol) { - case FIO_RDMA_MEM_WRITE: - /* compose work request */ - r_io_u_d = io_us[i]->engine_data; - index = __rand(&rd->rand_state) % rd->rmt_nr; - r_io_u_d->sq_wr.opcode = IBV_WR_RDMA_WRITE; - r_io_u_d->sq_wr.wr.rdma.rkey = rd->rmt_us[index].rkey; - r_io_u_d->sq_wr.wr.rdma.remote_addr = \ - rd->rmt_us[index].buf; - r_io_u_d->sq_wr.sg_list->length = io_us[i]->buflen; - break; - case FIO_RDMA_MEM_READ: - /* compose work request */ - r_io_u_d = io_us[i]->engine_data; - index = __rand(&rd->rand_state) % rd->rmt_nr; - r_io_u_d->sq_wr.opcode = IBV_WR_RDMA_READ; - r_io_u_d->sq_wr.wr.rdma.rkey = rd->rmt_us[index].rkey; - r_io_u_d->sq_wr.wr.rdma.remote_addr = \ - rd->rmt_us[index].buf; - r_io_u_d->sq_wr.sg_list->length = io_us[i]->buflen; - break; - case FIO_RDMA_CHA_SEND: - r_io_u_d = io_us[i]->engine_data; - r_io_u_d->sq_wr.opcode = IBV_WR_SEND; - r_io_u_d->sq_wr.send_flags = IBV_SEND_SIGNALED; - break; - default: - log_err("fio: unknown rdma protocol - %d\n", - rd->rdma_protocol); - break; - } - - if (ibv_post_send(rd->qp, &r_io_u_d->sq_wr, &bad_wr) != 0) { - log_err("fio: ibv_post_send fail: %m\n"); - return -1; - } - - dprint_io_u(io_us[i], "fio_rdmaio_send"); - } - - /* wait for completion - rdma_poll_wait(td, comp_opcode); */ - - return i; -} - -static int fio_rdmaio_recv(struct thread_data *td, struct io_u **io_us, - unsigned int nr) -{ - struct rdmaio_data *rd = td->io_ops_data; - struct ibv_recv_wr *bad_wr; - struct rdma_io_u_data *r_io_u_d; - int i; - - i = 0; - if (rd->rdma_protocol == FIO_RDMA_CHA_RECV) { - /* post io_u into recv queue */ - for (i = 0; i < nr; i++) { - r_io_u_d = io_us[i]->engine_data; - if (ibv_post_recv(rd->qp, &r_io_u_d->rq_wr, &bad_wr) != - 0) { - log_err("fio: ibv_post_recv fail: %m\n"); - return 1; - } - } - } else if ((rd->rdma_protocol == FIO_RDMA_MEM_READ) - || (rd->rdma_protocol == FIO_RDMA_MEM_WRITE)) { - /* re-post the rq_wr */ - if (ibv_post_recv(rd->qp, &rd->rq_wr, &bad_wr) != 0) { - log_err("fio: ibv_post_recv fail: %m\n"); - return 1; - } - - rdma_poll_wait(td, IBV_WC_RECV); - - dprint(FD_IO, "fio: recv FINISH message\n"); - td->done = 1; - return 0; - } - - return i; -} - -static int fio_rdmaio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct rdmaio_data *rd = td->io_ops_data; - - fio_ro_check(td, io_u); - - if (rd->io_u_queued_nr == (int)td->o.iodepth) - return FIO_Q_BUSY; - - rd->io_us_queued[rd->io_u_queued_nr] = io_u; - rd->io_u_queued_nr++; - - dprint_io_u(io_u, "fio_rdmaio_queue"); - - return FIO_Q_QUEUED; -} - -static void fio_rdmaio_queued(struct thread_data *td, struct io_u **io_us, - unsigned int nr) -{ - struct rdmaio_data *rd = td->io_ops_data; - struct timeval now; - unsigned int i; - - if (!fio_fill_issue_time(td)) - return; - - fio_gettime(&now, NULL); - - for (i = 0; i < nr; i++) { - struct io_u *io_u = io_us[i]; - - /* queued -> flight */ - rd->io_us_flight[rd->io_u_flight_nr] = io_u; - rd->io_u_flight_nr++; - - memcpy(&io_u->issue_time, &now, sizeof(now)); - io_u_queued(td, io_u); - } -} - -static int fio_rdmaio_commit(struct thread_data *td) -{ - struct rdmaio_data *rd = td->io_ops_data; - struct io_u **io_us; - int ret; - - if (!rd->io_us_queued) - return 0; - - io_us = rd->io_us_queued; - do { - /* RDMA_WRITE or RDMA_READ */ - if (rd->is_client) - ret = fio_rdmaio_send(td, io_us, rd->io_u_queued_nr); - else if (!rd->is_client) - ret = fio_rdmaio_recv(td, io_us, rd->io_u_queued_nr); - else - ret = 0; /* must be a SYNC */ - - if (ret > 0) { - fio_rdmaio_queued(td, io_us, ret); - io_u_mark_submit(td, ret); - rd->io_u_queued_nr -= ret; - io_us += ret; - ret = 0; - } else - break; - } while (rd->io_u_queued_nr); - - return ret; -} - -static int fio_rdmaio_connect(struct thread_data *td, struct fio_file *f) -{ - struct rdmaio_data *rd = td->io_ops_data; - struct rdma_conn_param conn_param; - struct ibv_send_wr *bad_wr; - - memset(&conn_param, 0, sizeof(conn_param)); - conn_param.responder_resources = 1; - conn_param.initiator_depth = 1; - conn_param.retry_count = 10; - - if (rdma_connect(rd->cm_id, &conn_param) != 0) { - log_err("fio: rdma_connect fail: %m\n"); - return 1; - } - - if (get_next_channel_event - (td, rd->cm_channel, RDMA_CM_EVENT_ESTABLISHED) != 0) { - log_err("fio: wait for RDMA_CM_EVENT_ESTABLISHED\n"); - return 1; - } - - /* send task request */ - rd->send_buf.mode = htonl(rd->rdma_protocol); - rd->send_buf.nr = htonl(td->o.iodepth); - - if (ibv_post_send(rd->qp, &rd->sq_wr, &bad_wr) != 0) { - log_err("fio: ibv_post_send fail: %m\n"); - return 1; - } - - if (rdma_poll_wait(td, IBV_WC_SEND) < 0) - return 1; - - /* wait for remote MR info from server side */ - if (rdma_poll_wait(td, IBV_WC_RECV) < 0) - return 1; - - /* In SEND/RECV test, it's a good practice to setup the iodepth of - * of the RECV side deeper than that of the SEND side to - * avoid RNR (receiver not ready) error. The - * SEND side may send so many unsolicited message before - * RECV side commits sufficient recv buffers into recv queue. - * This may lead to RNR error. Here, SEND side pauses for a while - * during which RECV side commits sufficient recv buffers. - */ - usleep(500000); - - return 0; -} - -static int fio_rdmaio_accept(struct thread_data *td, struct fio_file *f) -{ - struct rdmaio_data *rd = td->io_ops_data; - struct rdma_conn_param conn_param; - struct ibv_send_wr *bad_wr; - int ret = 0; - - /* rdma_accept() - then wait for accept success */ - memset(&conn_param, 0, sizeof(conn_param)); - conn_param.responder_resources = 1; - conn_param.initiator_depth = 1; - - if (rdma_accept(rd->child_cm_id, &conn_param) != 0) { - log_err("fio: rdma_accept: %m\n"); - return 1; - } - - if (get_next_channel_event - (td, rd->cm_channel, RDMA_CM_EVENT_ESTABLISHED) != 0) { - log_err("fio: wait for RDMA_CM_EVENT_ESTABLISHED\n"); - return 1; - } - - /* wait for request */ - ret = rdma_poll_wait(td, IBV_WC_RECV) < 0; - - if (ibv_post_send(rd->qp, &rd->sq_wr, &bad_wr) != 0) { - log_err("fio: ibv_post_send fail: %m\n"); - return 1; - } - - if (rdma_poll_wait(td, IBV_WC_SEND) < 0) - return 1; - - return ret; -} - -static int fio_rdmaio_open_file(struct thread_data *td, struct fio_file *f) -{ - if (td_read(td)) - return fio_rdmaio_accept(td, f); - else - return fio_rdmaio_connect(td, f); -} - -static int fio_rdmaio_close_file(struct thread_data *td, struct fio_file *f) -{ - struct rdmaio_data *rd = td->io_ops_data; - struct ibv_send_wr *bad_wr; - - /* unregister rdma buffer */ - - /* - * Client sends notification to the server side - */ - /* refer to: http://linux.die.net/man/7/rdma_cm */ - if ((rd->is_client == 1) && ((rd->rdma_protocol == FIO_RDMA_MEM_WRITE) - || (rd->rdma_protocol == - FIO_RDMA_MEM_READ))) { - if (ibv_post_send(rd->qp, &rd->sq_wr, &bad_wr) != 0) { - log_err("fio: ibv_post_send fail: %m\n"); - return 1; - } - - dprint(FD_IO, "fio: close information sent success\n"); - rdma_poll_wait(td, IBV_WC_SEND); - } - - if (rd->is_client == 1) - rdma_disconnect(rd->cm_id); - else { - rdma_disconnect(rd->child_cm_id); -#if 0 - rdma_disconnect(rd->cm_id); -#endif - } - -#if 0 - if (get_next_channel_event(td, rd->cm_channel, RDMA_CM_EVENT_DISCONNECTED) != 0) { - log_err("fio: wait for RDMA_CM_EVENT_DISCONNECTED\n"); - return 1; - } -#endif - - ibv_destroy_cq(rd->cq); - ibv_destroy_qp(rd->qp); - - if (rd->is_client == 1) - rdma_destroy_id(rd->cm_id); - else { - rdma_destroy_id(rd->child_cm_id); - rdma_destroy_id(rd->cm_id); - } - - ibv_destroy_comp_channel(rd->channel); - ibv_dealloc_pd(rd->pd); - - return 0; -} - -static int fio_rdmaio_setup_connect(struct thread_data *td, const char *host, - unsigned short port) -{ - struct rdmaio_data *rd = td->io_ops_data; - struct ibv_recv_wr *bad_wr; - int err; - - rd->addr.sin_family = AF_INET; - rd->addr.sin_port = htons(port); - - if (inet_aton(host, &rd->addr.sin_addr) != 1) { - struct hostent *hent; - - hent = gethostbyname(host); - if (!hent) { - td_verror(td, errno, "gethostbyname"); - return 1; - } - - memcpy(&rd->addr.sin_addr, hent->h_addr, 4); - } - - /* resolve route */ - err = rdma_resolve_addr(rd->cm_id, NULL, (struct sockaddr *)&rd->addr, 2000); - if (err != 0) { - log_err("fio: rdma_resolve_addr: %d\n", err); - return 1; - } - - err = get_next_channel_event(td, rd->cm_channel, RDMA_CM_EVENT_ADDR_RESOLVED); - if (err != 0) { - log_err("fio: get_next_channel_event: %d\n", err); - return 1; - } - - /* resolve route */ - err = rdma_resolve_route(rd->cm_id, 2000); - if (err != 0) { - log_err("fio: rdma_resolve_route: %d\n", err); - return 1; - } - - err = get_next_channel_event(td, rd->cm_channel, RDMA_CM_EVENT_ROUTE_RESOLVED); - if (err != 0) { - log_err("fio: get_next_channel_event: %d\n", err); - return 1; - } - - /* create qp and buffer */ - if (fio_rdmaio_setup_qp(td) != 0) - return 1; - - if (fio_rdmaio_setup_control_msg_buffers(td) != 0) - return 1; - - /* post recv buf */ - err = ibv_post_recv(rd->qp, &rd->rq_wr, &bad_wr); - if (err != 0) { - log_err("fio: ibv_post_recv fail: %d\n", err); - return 1; - } - - return 0; -} - -static int fio_rdmaio_setup_listen(struct thread_data *td, short port) -{ - struct rdmaio_data *rd = td->io_ops_data; - struct ibv_recv_wr *bad_wr; - int state = td->runstate; - - td_set_runstate(td, TD_SETTING_UP); - - rd->addr.sin_family = AF_INET; - rd->addr.sin_addr.s_addr = htonl(INADDR_ANY); - rd->addr.sin_port = htons(port); - - /* rdma_listen */ - if (rdma_bind_addr(rd->cm_id, (struct sockaddr *)&rd->addr) != 0) { - log_err("fio: rdma_bind_addr fail: %m\n"); - return 1; - } - - if (rdma_listen(rd->cm_id, 3) != 0) { - log_err("fio: rdma_listen fail: %m\n"); - return 1; - } - - log_info("fio: waiting for connection\n"); - - /* wait for CONNECT_REQUEST */ - if (get_next_channel_event - (td, rd->cm_channel, RDMA_CM_EVENT_CONNECT_REQUEST) != 0) { - log_err("fio: wait for RDMA_CM_EVENT_CONNECT_REQUEST\n"); - return 1; - } - - if (fio_rdmaio_setup_qp(td) != 0) - return 1; - - if (fio_rdmaio_setup_control_msg_buffers(td) != 0) - return 1; - - /* post recv buf */ - if (ibv_post_recv(rd->qp, &rd->rq_wr, &bad_wr) != 0) { - log_err("fio: ibv_post_recv fail: %m\n"); - return 1; - } - - td_set_runstate(td, state); - return 0; -} - -static int check_set_rlimits(struct thread_data *td) -{ -#ifdef CONFIG_RLIMIT_MEMLOCK - struct rlimit rl; - - /* check RLIMIT_MEMLOCK */ - if (getrlimit(RLIMIT_MEMLOCK, &rl) != 0) { - log_err("fio: getrlimit fail: %d(%s)\n", - errno, strerror(errno)); - return 1; - } - - /* soft limit */ - if ((rl.rlim_cur != RLIM_INFINITY) - && (rl.rlim_cur < td->orig_buffer_size)) { - log_err("fio: soft RLIMIT_MEMLOCK is: %" PRId64 "\n", - rl.rlim_cur); - log_err("fio: total block size is: %zd\n", - td->orig_buffer_size); - /* try to set larger RLIMIT_MEMLOCK */ - rl.rlim_cur = rl.rlim_max; - if (setrlimit(RLIMIT_MEMLOCK, &rl) != 0) { - log_err("fio: setrlimit fail: %d(%s)\n", - errno, strerror(errno)); - log_err("fio: you may try enlarge MEMLOCK by root\n"); - log_err("# ulimit -l unlimited\n"); - return 1; - } - } -#endif - - return 0; -} - -static int compat_options(struct thread_data *td) -{ - // The original RDMA engine had an ugly / seperator - // on the filename for it's options. This function - // retains backwards compatibility with it.100 - - struct rdmaio_options *o = td->eo; - char *modep, *portp; - char *filename = td->o.filename; - - if (!filename) - return 0; - - portp = strchr(filename, '/'); - if (portp == NULL) - return 0; - - *portp = '\0'; - portp++; - - o->port = strtol(portp, NULL, 10); - if (!o->port || o->port > 65535) - goto bad_host; - - modep = strchr(portp, '/'); - if (modep != NULL) { - *modep = '\0'; - modep++; - } - - if (modep) { - if (!strncmp("rdma_write", modep, strlen(modep)) || - !strncmp("RDMA_WRITE", modep, strlen(modep))) - o->verb = FIO_RDMA_MEM_WRITE; - else if (!strncmp("rdma_read", modep, strlen(modep)) || - !strncmp("RDMA_READ", modep, strlen(modep))) - o->verb = FIO_RDMA_MEM_READ; - else if (!strncmp("send", modep, strlen(modep)) || - !strncmp("SEND", modep, strlen(modep))) - o->verb = FIO_RDMA_CHA_SEND; - else - goto bad_host; - } else - o->verb = FIO_RDMA_MEM_WRITE; - - - return 0; - -bad_host: - log_err("fio: bad rdma host/port/protocol: %s\n", td->o.filename); - return 1; -} - -static int fio_rdmaio_init(struct thread_data *td) -{ - struct rdmaio_data *rd = td->io_ops_data; - struct rdmaio_options *o = td->eo; - unsigned int max_bs; - int ret, i; - - if (td_rw(td)) { - log_err("fio: rdma connections must be read OR write\n"); - return 1; - } - if (td_random(td)) { - log_err("fio: RDMA network IO can't be random\n"); - return 1; - } - - if (compat_options(td)) - return 1; - - if (!o->port) { - log_err("fio: no port has been specified which is required " - "for the rdma engine\n"); - return 1; - } - - if (check_set_rlimits(td)) - return 1; - - rd->rdma_protocol = o->verb; - rd->cq_event_num = 0; - - rd->cm_channel = rdma_create_event_channel(); - if (!rd->cm_channel) { - log_err("fio: rdma_create_event_channel fail: %m\n"); - return 1; - } - - ret = rdma_create_id(rd->cm_channel, &rd->cm_id, rd, RDMA_PS_TCP); - if (ret) { - log_err("fio: rdma_create_id fail: %m\n"); - return 1; - } - - if ((rd->rdma_protocol == FIO_RDMA_MEM_WRITE) || - (rd->rdma_protocol == FIO_RDMA_MEM_READ)) { - rd->rmt_us = - malloc(FIO_RDMA_MAX_IO_DEPTH * sizeof(struct remote_u)); - memset(rd->rmt_us, 0, - FIO_RDMA_MAX_IO_DEPTH * sizeof(struct remote_u)); - rd->rmt_nr = 0; - } - - rd->io_us_queued = malloc(td->o.iodepth * sizeof(struct io_u *)); - memset(rd->io_us_queued, 0, td->o.iodepth * sizeof(struct io_u *)); - rd->io_u_queued_nr = 0; - - rd->io_us_flight = malloc(td->o.iodepth * sizeof(struct io_u *)); - memset(rd->io_us_flight, 0, td->o.iodepth * sizeof(struct io_u *)); - rd->io_u_flight_nr = 0; - - rd->io_us_completed = malloc(td->o.iodepth * sizeof(struct io_u *)); - memset(rd->io_us_completed, 0, td->o.iodepth * sizeof(struct io_u *)); - rd->io_u_completed_nr = 0; - - if (td_read(td)) { /* READ as the server */ - rd->is_client = 0; - td->flags |= TD_F_NO_PROGRESS; - /* server rd->rdma_buf_len will be setup after got request */ - ret = fio_rdmaio_setup_listen(td, o->port); - } else { /* WRITE as the client */ - rd->is_client = 1; - ret = fio_rdmaio_setup_connect(td, td->o.filename, o->port); - } - - max_bs = max(td->o.max_bs[DDIR_READ], td->o.max_bs[DDIR_WRITE]); - rd->send_buf.max_bs = htonl(max_bs); - - /* register each io_u in the free list */ - for (i = 0; i < td->io_u_freelist.nr; i++) { - struct io_u *io_u = td->io_u_freelist.io_us[i]; - - io_u->engine_data = malloc(sizeof(struct rdma_io_u_data)); - memset(io_u->engine_data, 0, sizeof(struct rdma_io_u_data)); - ((struct rdma_io_u_data *)io_u->engine_data)->wr_id = i; - - io_u->mr = ibv_reg_mr(rd->pd, io_u->buf, max_bs, - IBV_ACCESS_LOCAL_WRITE | - IBV_ACCESS_REMOTE_READ | - IBV_ACCESS_REMOTE_WRITE); - if (io_u->mr == NULL) { - log_err("fio: ibv_reg_mr io_u failed: %m\n"); - return 1; - } - - rd->send_buf.rmt_us[i].buf = - htonll((uint64_t) (unsigned long)io_u->buf); - rd->send_buf.rmt_us[i].rkey = htonl(io_u->mr->rkey); - rd->send_buf.rmt_us[i].size = htonl(max_bs); - -#if 0 - log_info("fio: Send rkey %x addr %" PRIx64 " len %d to client\n", io_u->mr->rkey, io_u->buf, max_bs); */ -#endif - } - - rd->send_buf.nr = htonl(i); - - return ret; -} - -static void fio_rdmaio_cleanup(struct thread_data *td) -{ - struct rdmaio_data *rd = td->io_ops_data; - - if (rd) - free(rd); -} - -static int fio_rdmaio_setup(struct thread_data *td) -{ - struct rdmaio_data *rd; - - if (!td->files_index) { - add_file(td, td->o.filename ?: "rdma", 0, 0); - td->o.nr_files = td->o.nr_files ?: 1; - td->o.open_files++; - } - - if (!td->io_ops_data) { - rd = malloc(sizeof(*rd)); - - memset(rd, 0, sizeof(*rd)); - init_rand_seed(&rd->rand_state, (unsigned int) GOLDEN_RATIO_PRIME, 0); - td->io_ops_data = rd; - } - - return 0; -} - -static struct ioengine_ops ioengine_rw = { - .name = "rdma", - .version = FIO_IOOPS_VERSION, - .setup = fio_rdmaio_setup, - .init = fio_rdmaio_init, - .prep = fio_rdmaio_prep, - .queue = fio_rdmaio_queue, - .commit = fio_rdmaio_commit, - .getevents = fio_rdmaio_getevents, - .event = fio_rdmaio_event, - .cleanup = fio_rdmaio_cleanup, - .open_file = fio_rdmaio_open_file, - .close_file = fio_rdmaio_close_file, - .flags = FIO_DISKLESSIO | FIO_UNIDIR | FIO_PIPEIO, - .options = options, - .option_struct_size = sizeof(struct rdmaio_options), -}; - -static void fio_init fio_rdmaio_register(void) -{ - register_ioengine(&ioengine_rw); -} - -static void fio_exit fio_rdmaio_unregister(void) -{ - unregister_ioengine(&ioengine_rw); -} diff --git a/engines/sg.c b/engines/sg.c deleted file mode 100644 index 2148e87c..00000000 --- a/engines/sg.c +++ /dev/null @@ -1,856 +0,0 @@ -/* - * sg engine - * - * IO engine that uses the Linux SG v3 interface to talk to SCSI devices - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <assert.h> -#include <sys/poll.h> - -#include "../fio.h" - -#ifdef FIO_HAVE_SGIO - -#define MAX_10B_LBA 0xFFFFFFFFULL -#define SCSI_TIMEOUT_MS 30000 // 30 second timeout; currently no method to override -#define MAX_SB 64 // sense block maximum return size - -struct sgio_cmd { - unsigned char cdb[16]; // enhanced from 10 to support 16 byte commands - unsigned char sb[MAX_SB]; // add sense block to commands - int nr; -}; - -struct sgio_data { - struct sgio_cmd *cmds; - struct io_u **events; - struct pollfd *pfds; - int *fd_flags; - void *sgbuf; - unsigned int bs; - int type_checked; -}; - -static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr, - struct io_u *io_u, int fs) -{ - struct sgio_cmd *sc = &sd->cmds[io_u->index]; - - memset(hdr, 0, sizeof(*hdr)); - memset(sc->cdb, 0, sizeof(sc->cdb)); - - hdr->interface_id = 'S'; - hdr->cmdp = sc->cdb; - hdr->cmd_len = sizeof(sc->cdb); - hdr->sbp = sc->sb; - hdr->mx_sb_len = sizeof(sc->sb); - hdr->pack_id = io_u->index; - hdr->usr_ptr = io_u; - - if (fs) { - hdr->dxferp = io_u->xfer_buf; - hdr->dxfer_len = io_u->xfer_buflen; - } -} - -static int pollin_events(struct pollfd *pfds, int fds) -{ - int i; - - for (i = 0; i < fds; i++) - if (pfds[i].revents & POLLIN) - return 1; - - return 0; -} - -static int sg_fd_read(int fd, void *data, size_t size) -{ - int err = 0; - - while (size) { - ssize_t ret; - - ret = read(fd, data, size); - if (ret < 0) { - if (errno == EAGAIN || errno == EINTR) - continue; - err = errno; - break; - } else if (!ret) - break; - else { - data += ret; - size -= ret; - } - } - - if (err) - return err; - if (size) - return EAGAIN; - - return 0; -} - -static int fio_sgio_getevents(struct thread_data *td, unsigned int min, - unsigned int max, - const struct timespec fio_unused *t) -{ - struct sgio_data *sd = td->io_ops_data; - int left = max, eventNum, ret, r = 0; - void *buf = sd->sgbuf; - unsigned int i, events; - struct fio_file *f; - - /* - * Fill in the file descriptors - */ - for_each_file(td, f, i) { - /* - * don't block for min events == 0 - */ - if (!min) - sd->fd_flags[i] = fio_set_fd_nonblocking(f->fd, "sg"); - else - sd->fd_flags[i] = -1; - - sd->pfds[i].fd = f->fd; - sd->pfds[i].events = POLLIN; - } - - while (left) { - void *p; - - dprint(FD_IO, "sgio_getevents: sd %p: left=%d\n", sd, left); - - do { - if (!min) - break; - - ret = poll(sd->pfds, td->o.nr_files, -1); - if (ret < 0) { - if (!r) - r = -errno; - td_verror(td, errno, "poll"); - break; - } else if (!ret) - continue; - - if (pollin_events(sd->pfds, td->o.nr_files)) - break; - } while (1); - - if (r < 0) - break; - -re_read: - p = buf; - events = 0; - for_each_file(td, f, i) { - for (eventNum = 0; eventNum < left; eventNum++) { - ret = sg_fd_read(f->fd, p, sizeof(struct sg_io_hdr)); - dprint(FD_IO, "sgio_getevents: ret: %d\n", ret); - if (ret) { - r = -ret; - td_verror(td, r, "sg_read"); - break; - } - p += sizeof(struct sg_io_hdr); - events++; - dprint(FD_IO, "sgio_getevents: events: %d\n", events); - } - } - - if (r < 0 && !events) - break; - if (!events) { - usleep(1000); - goto re_read; - } - - left -= events; - r += events; - - for (i = 0; i < events; i++) { - struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i; - sd->events[i] = hdr->usr_ptr; - - /* record if an io error occurred, ignore resid */ - if (hdr->info & SG_INFO_CHECK) { - struct io_u *io_u; - io_u = (struct io_u *)(hdr->usr_ptr); - memcpy((void*)&(io_u->hdr), (void*)hdr, sizeof(struct sg_io_hdr)); - sd->events[i]->error = EIO; - } - } - } - - if (!min) { - for_each_file(td, f, i) { - if (sd->fd_flags[i] == -1) - continue; - - if (fcntl(f->fd, F_SETFL, sd->fd_flags[i]) < 0) - log_err("fio: sg failed to restore fcntl flags: %s\n", strerror(errno)); - } - } - - return r; -} - -static int fio_sgio_ioctl_doio(struct thread_data *td, - struct fio_file *f, struct io_u *io_u) -{ - struct sgio_data *sd = td->io_ops_data; - struct sg_io_hdr *hdr = &io_u->hdr; - int ret; - - sd->events[0] = io_u; - - ret = ioctl(f->fd, SG_IO, hdr); - if (ret < 0) - return ret; - - /* record if an io error occurred */ - if (hdr->info & SG_INFO_CHECK) - io_u->error = EIO; - - return FIO_Q_COMPLETED; -} - -static int fio_sgio_rw_doio(struct fio_file *f, struct io_u *io_u, int do_sync) -{ - struct sg_io_hdr *hdr = &io_u->hdr; - int ret; - - ret = write(f->fd, hdr, sizeof(*hdr)); - if (ret < 0) - return ret; - - if (do_sync) { - ret = read(f->fd, hdr, sizeof(*hdr)); - if (ret < 0) - return ret; - - /* record if an io error occurred */ - if (hdr->info & SG_INFO_CHECK) - io_u->error = EIO; - - return FIO_Q_COMPLETED; - } - - return FIO_Q_QUEUED; -} - -static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int do_sync) -{ - struct fio_file *f = io_u->file; - int ret; - - if (f->filetype == FIO_TYPE_BLOCK) { - ret = fio_sgio_ioctl_doio(td, f, io_u); - td->error = io_u->error; - } else { - ret = fio_sgio_rw_doio(f, io_u, do_sync); - if (do_sync) - td->error = io_u->error; - } - - return ret; -} - -static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u) -{ - struct sg_io_hdr *hdr = &io_u->hdr; - struct sgio_data *sd = td->io_ops_data; - long long nr_blocks, lba; - - if (io_u->xfer_buflen & (sd->bs - 1)) { - log_err("read/write not sector aligned\n"); - return EINVAL; - } - - nr_blocks = io_u->xfer_buflen / sd->bs; - lba = io_u->offset / sd->bs; - - if (io_u->ddir == DDIR_READ) { - sgio_hdr_init(sd, hdr, io_u, 1); - - hdr->dxfer_direction = SG_DXFER_FROM_DEV; - if (lba < MAX_10B_LBA) - hdr->cmdp[0] = 0x28; // read(10) - else - hdr->cmdp[0] = 0x88; // read(16) - } else if (io_u->ddir == DDIR_WRITE) { - sgio_hdr_init(sd, hdr, io_u, 1); - - hdr->dxfer_direction = SG_DXFER_TO_DEV; - if (lba < MAX_10B_LBA) - hdr->cmdp[0] = 0x2a; // write(10) - else - hdr->cmdp[0] = 0x8a; // write(16) - } else { - sgio_hdr_init(sd, hdr, io_u, 0); - hdr->dxfer_direction = SG_DXFER_NONE; - if (lba < MAX_10B_LBA) - hdr->cmdp[0] = 0x35; // synccache(10) - else - hdr->cmdp[0] = 0x91; // synccache(16) - } - - /* - * for synccache, we leave lba and length to 0 to sync all - * blocks on medium. - */ - if (hdr->dxfer_direction != SG_DXFER_NONE) { - if (lba < MAX_10B_LBA) { - hdr->cmdp[2] = (unsigned char) ((lba >> 24) & 0xff); - hdr->cmdp[3] = (unsigned char) ((lba >> 16) & 0xff); - hdr->cmdp[4] = (unsigned char) ((lba >> 8) & 0xff); - hdr->cmdp[5] = (unsigned char) (lba & 0xff); - hdr->cmdp[7] = (unsigned char) ((nr_blocks >> 8) & 0xff); - hdr->cmdp[8] = (unsigned char) (nr_blocks & 0xff); - } else { - hdr->cmdp[2] = (unsigned char) ((lba >> 56) & 0xff); - hdr->cmdp[3] = (unsigned char) ((lba >> 48) & 0xff); - hdr->cmdp[4] = (unsigned char) ((lba >> 40) & 0xff); - hdr->cmdp[5] = (unsigned char) ((lba >> 32) & 0xff); - hdr->cmdp[6] = (unsigned char) ((lba >> 24) & 0xff); - hdr->cmdp[7] = (unsigned char) ((lba >> 16) & 0xff); - hdr->cmdp[8] = (unsigned char) ((lba >> 8) & 0xff); - hdr->cmdp[9] = (unsigned char) (lba & 0xff); - hdr->cmdp[10] = (unsigned char) ((nr_blocks >> 32) & 0xff); - hdr->cmdp[11] = (unsigned char) ((nr_blocks >> 16) & 0xff); - hdr->cmdp[12] = (unsigned char) ((nr_blocks >> 8) & 0xff); - hdr->cmdp[13] = (unsigned char) (nr_blocks & 0xff); - } - } - - hdr->timeout = SCSI_TIMEOUT_MS; - return 0; -} - -static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct sg_io_hdr *hdr = &io_u->hdr; - int ret, do_sync = 0; - - fio_ro_check(td, io_u); - - if (td->o.sync_io || td->o.odirect || ddir_sync(io_u->ddir)) - do_sync = 1; - - ret = fio_sgio_doio(td, io_u, do_sync); - - if (ret < 0) - io_u->error = errno; - else if (hdr->status) { - io_u->resid = hdr->resid; - io_u->error = EIO; - } - - if (io_u->error) { - td_verror(td, io_u->error, "xfer"); - return FIO_Q_COMPLETED; - } - - return ret; -} - -static struct io_u *fio_sgio_event(struct thread_data *td, int event) -{ - struct sgio_data *sd = td->io_ops_data; - - return sd->events[event]; -} - -static int fio_sgio_read_capacity(struct thread_data *td, unsigned int *bs, - unsigned long long *max_lba) -{ - /* - * need to do read capacity operation w/o benefit of sd or - * io_u structures, which are not initialized until later. - */ - struct sg_io_hdr hdr; - unsigned char cmd[16]; - unsigned char sb[64]; - unsigned char buf[32]; // read capacity return - int ret; - int fd = -1; - - struct fio_file *f = td->files[0]; - - /* open file independent of rest of application */ - fd = open(f->file_name, O_RDONLY); - if (fd < 0) - return -errno; - - memset(&hdr, 0, sizeof(hdr)); - memset(cmd, 0, sizeof(cmd)); - memset(sb, 0, sizeof(sb)); - memset(buf, 0, sizeof(buf)); - - /* First let's try a 10 byte read capacity. */ - hdr.interface_id = 'S'; - hdr.cmdp = cmd; - hdr.cmd_len = 10; - hdr.sbp = sb; - hdr.mx_sb_len = sizeof(sb); - hdr.timeout = SCSI_TIMEOUT_MS; - hdr.cmdp[0] = 0x25; // Read Capacity(10) - hdr.dxfer_direction = SG_DXFER_FROM_DEV; - hdr.dxferp = buf; - hdr.dxfer_len = sizeof(buf); - - ret = ioctl(fd, SG_IO, &hdr); - if (ret < 0) { - close(fd); - return ret; - } - - *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7]; - *max_lba = ((buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]) & MAX_10B_LBA; // for some reason max_lba is being sign extended even though unsigned. - - /* - * If max lba masked by MAX_10B_LBA equals MAX_10B_LBA, - * then need to retry with 16 byte Read Capacity command. - */ - if (*max_lba == MAX_10B_LBA) { - hdr.cmd_len = 16; - hdr.cmdp[0] = 0x9e; // service action - hdr.cmdp[1] = 0x10; // Read Capacity(16) - hdr.cmdp[10] = (unsigned char) ((sizeof(buf) >> 24) & 0xff); - hdr.cmdp[11] = (unsigned char) ((sizeof(buf) >> 16) & 0xff); - hdr.cmdp[12] = (unsigned char) ((sizeof(buf) >> 8) & 0xff); - hdr.cmdp[13] = (unsigned char) (sizeof(buf) & 0xff); - - hdr.dxfer_direction = SG_DXFER_FROM_DEV; - hdr.dxferp = buf; - hdr.dxfer_len = sizeof(buf); - - ret = ioctl(fd, SG_IO, &hdr); - if (ret < 0) { - close(fd); - return ret; - } - - /* record if an io error occurred */ - if (hdr.info & SG_INFO_CHECK) - td_verror(td, EIO, "fio_sgio_read_capacity"); - - *bs = (buf[8] << 24) | (buf[9] << 16) | (buf[10] << 8) | buf[11]; - *max_lba = ((unsigned long long)buf[0] << 56) | - ((unsigned long long)buf[1] << 48) | - ((unsigned long long)buf[2] << 40) | - ((unsigned long long)buf[3] << 32) | - ((unsigned long long)buf[4] << 24) | - ((unsigned long long)buf[5] << 16) | - ((unsigned long long)buf[6] << 8) | - (unsigned long long)buf[7]; - } - - close(fd); - return 0; -} - -static void fio_sgio_cleanup(struct thread_data *td) -{ - struct sgio_data *sd = td->io_ops_data; - - if (sd) { - free(sd->events); - free(sd->cmds); - free(sd->fd_flags); - free(sd->pfds); - free(sd->sgbuf); - free(sd); - } -} - -static int fio_sgio_init(struct thread_data *td) -{ - struct sgio_data *sd; - - sd = malloc(sizeof(*sd)); - memset(sd, 0, sizeof(*sd)); - sd->cmds = malloc(td->o.iodepth * sizeof(struct sgio_cmd)); - memset(sd->cmds, 0, td->o.iodepth * sizeof(struct sgio_cmd)); - sd->events = malloc(td->o.iodepth * sizeof(struct io_u *)); - memset(sd->events, 0, td->o.iodepth * sizeof(struct io_u *)); - sd->pfds = malloc(sizeof(struct pollfd) * td->o.nr_files); - memset(sd->pfds, 0, sizeof(struct pollfd) * td->o.nr_files); - sd->fd_flags = malloc(sizeof(int) * td->o.nr_files); - memset(sd->fd_flags, 0, sizeof(int) * td->o.nr_files); - sd->sgbuf = malloc(sizeof(struct sg_io_hdr) * td->o.iodepth); - memset(sd->sgbuf, 0, sizeof(struct sg_io_hdr) * td->o.iodepth); - sd->type_checked = 0; - td->io_ops_data = sd; - - /* - * we want to do it, regardless of whether odirect is set or not - */ - td->o.override_sync = 1; - return 0; -} - -static int fio_sgio_type_check(struct thread_data *td, struct fio_file *f) -{ - struct sgio_data *sd = td->io_ops_data; - unsigned int bs = 0; - unsigned long long max_lba = 0; - - if (f->filetype == FIO_TYPE_BLOCK) { - if (ioctl(f->fd, BLKSSZGET, &bs) < 0) { - td_verror(td, errno, "ioctl"); - return 1; - } - } else if (f->filetype == FIO_TYPE_CHAR) { - int version, ret; - - if (ioctl(f->fd, SG_GET_VERSION_NUM, &version) < 0) { - td_verror(td, errno, "ioctl"); - return 1; - } - - ret = fio_sgio_read_capacity(td, &bs, &max_lba); - if (ret) { - td_verror(td, td->error, "fio_sgio_read_capacity"); - log_err("ioengine sg unable to read capacity successfully\n"); - return 1; - } - } else { - td_verror(td, EINVAL, "wrong file type"); - log_err("ioengine sg only works on block or character devices\n"); - return 1; - } - - sd->bs = bs; - // Determine size of commands needed based on max_lba - if (max_lba >= MAX_10B_LBA) { - dprint(FD_IO, "sgio_type_check: using 16 byte read/write " - "commands for lba above 0x%016llx/0x%016llx\n", - MAX_10B_LBA, max_lba); - } - - if (f->filetype == FIO_TYPE_BLOCK) { - td->io_ops->getevents = NULL; - td->io_ops->event = NULL; - } - sd->type_checked = 1; - - return 0; -} - -static int fio_sgio_open(struct thread_data *td, struct fio_file *f) -{ - struct sgio_data *sd = td->io_ops_data; - int ret; - - ret = generic_open_file(td, f); - if (ret) - return ret; - - if (sd && !sd->type_checked && fio_sgio_type_check(td, f)) { - ret = generic_close_file(td, f); - return 1; - } - - return 0; -} - -/* - * Build an error string with details about the driver, host or scsi - * error contained in the sg header Caller will use as necessary. - */ -static char *fio_sgio_errdetails(struct io_u *io_u) -{ - struct sg_io_hdr *hdr = &io_u->hdr; -#define MAXERRDETAIL 1024 -#define MAXMSGCHUNK 128 - char *msg, msgchunk[MAXMSGCHUNK], *ret = NULL; - int i; - - msg = calloc(1, MAXERRDETAIL); - - /* - * can't seem to find sg_err.h, so I'll just echo the define values - * so others can search on internet to find clearer clues of meaning. - */ - if (hdr->info & SG_INFO_CHECK) { - ret = msg; - if (hdr->host_status) { - snprintf(msgchunk, MAXMSGCHUNK, "SG Host Status: 0x%02x; ", hdr->host_status); - strlcat(msg, msgchunk, MAXERRDETAIL); - switch (hdr->host_status) { - case 0x01: - strlcat(msg, "SG_ERR_DID_NO_CONNECT", MAXERRDETAIL); - break; - case 0x02: - strlcat(msg, "SG_ERR_DID_BUS_BUSY", MAXERRDETAIL); - break; - case 0x03: - strlcat(msg, "SG_ERR_DID_TIME_OUT", MAXERRDETAIL); - break; - case 0x04: - strlcat(msg, "SG_ERR_DID_BAD_TARGET", MAXERRDETAIL); - break; - case 0x05: - strlcat(msg, "SG_ERR_DID_ABORT", MAXERRDETAIL); - break; - case 0x06: - strlcat(msg, "SG_ERR_DID_PARITY", MAXERRDETAIL); - break; - case 0x07: - strlcat(msg, "SG_ERR_DID_ERROR (internal error)", MAXERRDETAIL); - break; - case 0x08: - strlcat(msg, "SG_ERR_DID_RESET", MAXERRDETAIL); - break; - case 0x09: - strlcat(msg, "SG_ERR_DID_BAD_INTR (unexpected)", MAXERRDETAIL); - break; - case 0x0a: - strlcat(msg, "SG_ERR_DID_PASSTHROUGH", MAXERRDETAIL); - break; - case 0x0b: - strlcat(msg, "SG_ERR_DID_SOFT_ERROR (driver retry?)", MAXERRDETAIL); - break; - case 0x0c: - strlcat(msg, "SG_ERR_DID_IMM_RETRY", MAXERRDETAIL); - break; - case 0x0d: - strlcat(msg, "SG_ERR_DID_REQUEUE", MAXERRDETAIL); - break; - case 0x0e: - strlcat(msg, "SG_ERR_DID_TRANSPORT_DISRUPTED", MAXERRDETAIL); - break; - case 0x0f: - strlcat(msg, "SG_ERR_DID_TRANSPORT_FAILFAST", MAXERRDETAIL); - break; - case 0x10: - strlcat(msg, "SG_ERR_DID_TARGET_FAILURE", MAXERRDETAIL); - break; - case 0x11: - strlcat(msg, "SG_ERR_DID_NEXUS_FAILURE", MAXERRDETAIL); - break; - case 0x12: - strlcat(msg, "SG_ERR_DID_ALLOC_FAILURE", MAXERRDETAIL); - break; - case 0x13: - strlcat(msg, "SG_ERR_DID_MEDIUM_ERROR", MAXERRDETAIL); - break; - default: - strlcat(msg, "Unknown", MAXERRDETAIL); - break; - } - strlcat(msg, ". ", MAXERRDETAIL); - } - if (hdr->driver_status) { - snprintf(msgchunk, MAXMSGCHUNK, "SG Driver Status: 0x%02x; ", hdr->driver_status); - strlcat(msg, msgchunk, MAXERRDETAIL); - switch (hdr->driver_status & 0x0F) { - case 0x01: - strlcat(msg, "SG_ERR_DRIVER_BUSY", MAXERRDETAIL); - break; - case 0x02: - strlcat(msg, "SG_ERR_DRIVER_SOFT", MAXERRDETAIL); - break; - case 0x03: - strlcat(msg, "SG_ERR_DRIVER_MEDIA", MAXERRDETAIL); - break; - case 0x04: - strlcat(msg, "SG_ERR_DRIVER_ERROR", MAXERRDETAIL); - break; - case 0x05: - strlcat(msg, "SG_ERR_DRIVER_INVALID", MAXERRDETAIL); - break; - case 0x06: - strlcat(msg, "SG_ERR_DRIVER_TIMEOUT", MAXERRDETAIL); - break; - case 0x07: - strlcat(msg, "SG_ERR_DRIVER_HARD", MAXERRDETAIL); - break; - case 0x08: - strlcat(msg, "SG_ERR_DRIVER_SENSE", MAXERRDETAIL); - break; - default: - strlcat(msg, "Unknown", MAXERRDETAIL); - break; - } - strlcat(msg, "; ", MAXERRDETAIL); - switch (hdr->driver_status & 0xF0) { - case 0x10: - strlcat(msg, "SG_ERR_SUGGEST_RETRY", MAXERRDETAIL); - break; - case 0x20: - strlcat(msg, "SG_ERR_SUGGEST_ABORT", MAXERRDETAIL); - break; - case 0x30: - strlcat(msg, "SG_ERR_SUGGEST_REMAP", MAXERRDETAIL); - break; - case 0x40: - strlcat(msg, "SG_ERR_SUGGEST_DIE", MAXERRDETAIL); - break; - case 0x80: - strlcat(msg, "SG_ERR_SUGGEST_SENSE", MAXERRDETAIL); - break; - } - strlcat(msg, ". ", MAXERRDETAIL); - } - if (hdr->status) { - snprintf(msgchunk, MAXMSGCHUNK, "SG SCSI Status: 0x%02x; ", hdr->status); - strlcat(msg, msgchunk, MAXERRDETAIL); - // SCSI 3 status codes - switch (hdr->status) { - case 0x02: - strlcat(msg, "CHECK_CONDITION", MAXERRDETAIL); - break; - case 0x04: - strlcat(msg, "CONDITION_MET", MAXERRDETAIL); - break; - case 0x08: - strlcat(msg, "BUSY", MAXERRDETAIL); - break; - case 0x10: - strlcat(msg, "INTERMEDIATE", MAXERRDETAIL); - break; - case 0x14: - strlcat(msg, "INTERMEDIATE_CONDITION_MET", MAXERRDETAIL); - break; - case 0x18: - strlcat(msg, "RESERVATION_CONFLICT", MAXERRDETAIL); - break; - case 0x22: - strlcat(msg, "COMMAND_TERMINATED", MAXERRDETAIL); - break; - case 0x28: - strlcat(msg, "TASK_SET_FULL", MAXERRDETAIL); - break; - case 0x30: - strlcat(msg, "ACA_ACTIVE", MAXERRDETAIL); - break; - case 0x40: - strlcat(msg, "TASK_ABORTED", MAXERRDETAIL); - break; - default: - strlcat(msg, "Unknown", MAXERRDETAIL); - break; - } - strlcat(msg, ". ", MAXERRDETAIL); - } - if (hdr->sb_len_wr) { - snprintf(msgchunk, MAXMSGCHUNK, "Sense Data (%d bytes):", hdr->sb_len_wr); - strlcat(msg, msgchunk, MAXERRDETAIL); - for (i = 0; i < hdr->sb_len_wr; i++) { - snprintf(msgchunk, MAXMSGCHUNK, " %02x", hdr->sbp[i]); - strlcat(msg, msgchunk, MAXERRDETAIL); - } - strlcat(msg, ". ", MAXERRDETAIL); - } - if (hdr->resid != 0) { - snprintf(msgchunk, MAXMSGCHUNK, "SG Driver: %d bytes out of %d not transferred. ", hdr->resid, hdr->dxfer_len); - strlcat(msg, msgchunk, MAXERRDETAIL); - ret = msg; - } - } - - if (!ret) - ret = strdup("SG Driver did not report a Host, Driver or Device check"); - - return ret; -} - -/* - * get max file size from read capacity. - */ -static int fio_sgio_get_file_size(struct thread_data *td, struct fio_file *f) -{ - /* - * get_file_size is being called even before sgio_init is - * called, so none of the sg_io structures are - * initialized in the thread_data yet. So we need to do the - * ReadCapacity without any of those helpers. One of the effects - * is that ReadCapacity may get called 4 times on each open: - * readcap(10) followed by readcap(16) if needed - just to get - * the file size after the init occurs - it will be called - * again when "type_check" is called during structure - * initialization I'm not sure how to prevent this little - * inefficiency. - */ - unsigned int bs = 0; - unsigned long long max_lba = 0; - int ret; - - if (fio_file_size_known(f)) - return 0; - - if (f->filetype != FIO_TYPE_BLOCK && f->filetype != FIO_TYPE_CHAR) { - td_verror(td, EINVAL, "wrong file type"); - log_err("ioengine sg only works on block or character devices\n"); - return 1; - } - - ret = fio_sgio_read_capacity(td, &bs, &max_lba); - if (ret ) { - td_verror(td, td->error, "fio_sgio_read_capacity"); - log_err("ioengine sg unable to successfully execute read capacity to get block size and maximum lba\n"); - return 1; - } - - f->real_file_size = (max_lba + 1) * bs; - fio_file_set_size_known(f); - return 0; -} - - -static struct ioengine_ops ioengine = { - .name = "sg", - .version = FIO_IOOPS_VERSION, - .init = fio_sgio_init, - .prep = fio_sgio_prep, - .queue = fio_sgio_queue, - .getevents = fio_sgio_getevents, - .errdetails = fio_sgio_errdetails, - .event = fio_sgio_event, - .cleanup = fio_sgio_cleanup, - .open_file = fio_sgio_open, - .close_file = generic_close_file, - .get_file_size = fio_sgio_get_file_size, - .flags = FIO_SYNCIO | FIO_RAWIO, -}; - -#else /* FIO_HAVE_SGIO */ - -/* - * When we have a proper configure system in place, we simply wont build - * and install this io engine. For now install a crippled version that - * just complains and fails to load. - */ -static int fio_sgio_init(struct thread_data fio_unused *td) -{ - log_err("fio: ioengine sg not available\n"); - return 1; -} - -static struct ioengine_ops ioengine = { - .name = "sg", - .version = FIO_IOOPS_VERSION, - .init = fio_sgio_init, -}; - -#endif - -static void fio_init fio_sgio_register(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_sgio_unregister(void) -{ - unregister_ioengine(&ioengine); -} diff --git a/engines/skeleton_external.c b/engines/skeleton_external.c deleted file mode 100644 index 4bebcc45..00000000 --- a/engines/skeleton_external.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Skeleton for a sample external io engine - * - * Should be compiled with: - * - * gcc -Wall -O2 -g -shared -rdynamic -fPIC -o engine.o engine.c - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <assert.h> - -#include "../fio.h" - -/* - * The core of the module is identical to the ones included with fio, - * read those. You cannot use register_ioengine() and unregister_ioengine() - * for external modules, they should be gotten through dlsym() - */ - -/* - * The ->event() hook is called to match an event number with an io_u. - * After the core has called ->getevents() and it has returned eg 3, - * the ->event() hook must return the 3 events that have completed for - * subsequent calls to ->event() with [0-2]. Required. - */ -static struct io_u *fio_skeleton_event(struct thread_data *td, int event) -{ - return NULL; -} - -/* - * The ->getevents() hook is used to reap completion events from an async - * io engine. It returns the number of completed events since the last call, - * which may then be retrieved by calling the ->event() hook with the event - * numbers. Required. - */ -static int fio_skeleton_getevents(struct thread_data *td, unsigned int min, - unsigned int max, const struct timespec *t) -{ - return 0; -} - -/* - * The ->cancel() hook attempts to cancel the io_u. Only relevant for - * async io engines, and need not be supported. - */ -static int fio_skeleton_cancel(struct thread_data *td, struct io_u *io_u) -{ - return 0; -} - -/* - * The ->queue() hook is responsible for initiating io on the io_u - * being passed in. If the io engine is a synchronous one, io may complete - * before ->queue() returns. Required. - * - * The io engine must transfer in the direction noted by io_u->ddir - * to the buffer pointed to by io_u->xfer_buf for as many bytes as - * io_u->xfer_buflen. Residual data count may be set in io_u->resid - * for a short read/write. - */ -static int fio_skeleton_queue(struct thread_data *td, struct io_u *io_u) -{ - /* - * Double sanity check to catch errant write on a readonly setup - */ - fio_ro_check(td, io_u); - - /* - * Could return FIO_Q_QUEUED for a queued request, - * FIO_Q_COMPLETED for a completed request, and FIO_Q_BUSY - * if we could queue no more at this point (you'd have to - * define ->commit() to handle that. - */ - return FIO_Q_COMPLETED; -} - -/* - * The ->prep() function is called for each io_u prior to being submitted - * with ->queue(). This hook allows the io engine to perform any - * preparatory actions on the io_u, before being submitted. Not required. - */ -static int fio_skeleton_prep(struct thread_data *td, struct io_u *io_u) -{ - return 0; -} - -/* - * The init function is called once per thread/process, and should set up - * any structures that this io engine requires to keep track of io. Not - * required. - */ -static int fio_skeleton_init(struct thread_data *td) -{ - return 0; -} - -/* - * This is paired with the ->init() function and is called when a thread is - * done doing io. Should tear down anything setup by the ->init() function. - * Not required. - */ -static void fio_skeleton_cleanup(struct thread_data *td) -{ -} - -/* - * Hook for opening the given file. Unless the engine has special - * needs, it usually just provides generic_open_file() as the handler. - */ -static int fio_skeleton_open(struct thread_data *td, struct fio_file *f) -{ - return generic_open_file(td, f); -} - -/* - * Hook for closing a file. See fio_skeleton_open(). - */ -static int fio_skeleton_close(struct thread_data *td, struct fio_file *f) -{ - return generic_close_file(td, f); -} - -/* - * Note that the structure is exported, so that fio can get it via - * dlsym(..., "ioengine"); for (and only for) external engines. - */ -struct ioengine_ops ioengine = { - .name = "engine_name", - .version = FIO_IOOPS_VERSION, - .init = fio_skeleton_init, - .prep = fio_skeleton_prep, - .queue = fio_skeleton_queue, - .cancel = fio_skeleton_cancel, - .getevents = fio_skeleton_getevents, - .event = fio_skeleton_event, - .cleanup = fio_skeleton_cleanup, - .open_file = fio_skeleton_open, - .close_file = fio_skeleton_close, -}; diff --git a/engines/solarisaio.c b/engines/solarisaio.c deleted file mode 100644 index 151f31d4..00000000 --- a/engines/solarisaio.c +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Native Solaris async IO engine - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <signal.h> -#include <errno.h> - -#include "../fio.h" - -#include <sys/asynch.h> - -struct solarisaio_data { - struct io_u **aio_events; - unsigned int aio_pending; - unsigned int nr; - unsigned int max_depth; -}; - -static int fio_solarisaio_cancel(struct thread_data fio_unused *td, - struct io_u *io_u) -{ - return aiocancel(&io_u->resultp); -} - -static int fio_solarisaio_prep(struct thread_data fio_unused *td, - struct io_u *io_u) -{ - struct solarisaio_data *sd = td->io_ops_data; - - io_u->resultp.aio_return = AIO_INPROGRESS; - io_u->engine_data = sd; - return 0; -} - -static void wait_for_event(struct timeval *tv) -{ - struct solarisaio_data *sd; - struct io_u *io_u; - aio_result_t *res; - - res = aiowait(tv); - if (res == (aio_result_t *) -1) { - int err = errno; - - if (err != EINVAL) { - log_err("fio: solarisaio got %d in aiowait\n", err); - exit(err); - } - return; - } else if (!res) - return; - - io_u = container_of(res, struct io_u, resultp); - sd = io_u->engine_data; - - if (io_u->resultp.aio_return >= 0) { - io_u->resid = io_u->xfer_buflen - io_u->resultp.aio_return; - io_u->error = 0; - } else - io_u->error = io_u->resultp.aio_errno; - - /* - * For SIGIO, we need a write barrier between the two, so that - * the ->aio_pending store is seen after the ->aio_events store - */ - sd->aio_events[sd->aio_pending] = io_u; - write_barrier(); - sd->aio_pending++; - sd->nr--; -} - -static int fio_solarisaio_getevents(struct thread_data *td, unsigned int min, - unsigned int max, const struct timespec *t) -{ - struct solarisaio_data *sd = td->io_ops_data; - struct timeval tv; - int ret; - - if (!min || !t) { - tv.tv_sec = 0; - tv.tv_usec = 0; - } else { - tv.tv_sec = t->tv_sec; - tv.tv_usec = t->tv_nsec / 1000; - } - - while (sd->aio_pending < min) - wait_for_event(&tv); - - /* - * should be OK without locking, as int operations should be atomic - */ - ret = sd->aio_pending; - sd->aio_pending -= ret; - return ret; -} - -static struct io_u *fio_solarisaio_event(struct thread_data *td, int event) -{ - struct solarisaio_data *sd = td->io_ops_data; - - return sd->aio_events[event]; -} - -static int fio_solarisaio_queue(struct thread_data fio_unused *td, - struct io_u *io_u) -{ - struct solarisaio_data *sd = td->io_ops_data; - struct fio_file *f = io_u->file; - off_t off; - int ret; - - fio_ro_check(td, io_u); - - if (io_u->ddir == DDIR_SYNC) { - if (sd->nr) - return FIO_Q_BUSY; - if (fsync(f->fd) < 0) - io_u->error = errno; - - return FIO_Q_COMPLETED; - } - - if (io_u->ddir == DDIR_DATASYNC) { - if (sd->nr) - return FIO_Q_BUSY; - if (fdatasync(f->fd) < 0) - io_u->error = errno; - - return FIO_Q_COMPLETED; - } - - if (sd->nr == sd->max_depth) - return FIO_Q_BUSY; - - off = io_u->offset; - if (io_u->ddir == DDIR_READ) - ret = aioread(f->fd, io_u->xfer_buf, io_u->xfer_buflen, off, - SEEK_SET, &io_u->resultp); - else - ret = aiowrite(f->fd, io_u->xfer_buf, io_u->xfer_buflen, off, - SEEK_SET, &io_u->resultp); - if (ret) { - io_u->error = errno; - td_verror(td, io_u->error, "xfer"); - return FIO_Q_COMPLETED; - } - - sd->nr++; - return FIO_Q_QUEUED; -} - -static void fio_solarisaio_cleanup(struct thread_data *td) -{ - struct solarisaio_data *sd = td->io_ops_data; - - if (sd) { - free(sd->aio_events); - free(sd); - } -} - -/* - * Set USE_SIGNAL_COMPLETIONS to use SIGIO as completion events. - */ -#ifdef USE_SIGNAL_COMPLETIONS -static void fio_solarisaio_sigio(int sig) -{ - wait_for_event(NULL); -} - -static void fio_solarisaio_init_sigio(void) -{ - struct sigaction act; - - memset(&act, 0, sizeof(act)); - act.sa_handler = fio_solarisaio_sigio; - act.sa_flags = SA_RESTART; - sigaction(SIGIO, &act, NULL); -} -#endif - -static int fio_solarisaio_init(struct thread_data *td) -{ - struct solarisaio_data *sd = malloc(sizeof(*sd)); - unsigned int max_depth; - - max_depth = td->o.iodepth; - if (max_depth > MAXASYNCHIO) { - max_depth = MAXASYNCHIO; - log_info("fio: lower depth to %d due to OS constraints\n", - max_depth); - } - - memset(sd, 0, sizeof(*sd)); - sd->aio_events = malloc(max_depth * sizeof(struct io_u *)); - memset(sd->aio_events, 0, max_depth * sizeof(struct io_u *)); - sd->max_depth = max_depth; - -#ifdef USE_SIGNAL_COMPLETIONS - fio_solarisaio_init_sigio(); -#endif - - td->io_ops_data = sd; - return 0; -} - -static struct ioengine_ops ioengine = { - .name = "solarisaio", - .version = FIO_IOOPS_VERSION, - .init = fio_solarisaio_init, - .prep = fio_solarisaio_prep, - .queue = fio_solarisaio_queue, - .cancel = fio_solarisaio_cancel, - .getevents = fio_solarisaio_getevents, - .event = fio_solarisaio_event, - .cleanup = fio_solarisaio_cleanup, - .open_file = generic_open_file, - .close_file = generic_close_file, - .get_file_size = generic_get_file_size, -}; - -static void fio_init fio_solarisaio_register(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_solarisaio_unregister(void) -{ - unregister_ioengine(&ioengine); -} diff --git a/engines/splice.c b/engines/splice.c deleted file mode 100644 index eba093e8..00000000 --- a/engines/splice.c +++ /dev/null @@ -1,311 +0,0 @@ -/* - * splice engine - * - * IO engine that transfers data by doing splices to/from pipes and - * the files. - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <assert.h> -#include <sys/poll.h> -#include <sys/mman.h> - -#include "../fio.h" - -struct spliceio_data { - int pipe[2]; - int vmsplice_to_user; - int vmsplice_to_user_map; -}; - -/* - * vmsplice didn't use to support splicing to user space, this is the old - * variant of getting that job done. Doesn't make a lot of sense, but it - * uses splices to move data from the source into a pipe. - */ -static int fio_splice_read_old(struct thread_data *td, struct io_u *io_u) -{ - struct spliceio_data *sd = td->io_ops_data; - struct fio_file *f = io_u->file; - int ret, ret2, buflen; - off_t offset; - void *p; - - offset = io_u->offset; - buflen = io_u->xfer_buflen; - p = io_u->xfer_buf; - while (buflen) { - int this_len = buflen; - - if (this_len > SPLICE_DEF_SIZE) - this_len = SPLICE_DEF_SIZE; - - ret = splice(f->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE); - if (ret < 0) { - if (errno == ENODATA || errno == EAGAIN) - continue; - - return -errno; - } - - buflen -= ret; - - while (ret) { - ret2 = read(sd->pipe[0], p, ret); - if (ret2 < 0) - return -errno; - - ret -= ret2; - p += ret2; - } - } - - return io_u->xfer_buflen; -} - -/* - * We can now vmsplice into userspace, so do the transfer by splicing into - * a pipe and vmsplicing that into userspace. - */ -static int fio_splice_read(struct thread_data *td, struct io_u *io_u) -{ - struct spliceio_data *sd = td->io_ops_data; - struct fio_file *f = io_u->file; - struct iovec iov; - int ret , buflen, mmap_len; - off_t offset; - void *p, *map; - - ret = 0; - offset = io_u->offset; - mmap_len = buflen = io_u->xfer_buflen; - - if (sd->vmsplice_to_user_map) { - map = mmap(io_u->xfer_buf, buflen, PROT_READ, MAP_PRIVATE|OS_MAP_ANON, 0, 0); - if (map == MAP_FAILED) { - td_verror(td, errno, "mmap io_u"); - return -1; - } - - p = map; - } else { - map = NULL; - p = io_u->xfer_buf; - } - - while (buflen) { - int this_len = buflen; - int flags = 0; - - if (this_len > SPLICE_DEF_SIZE) { - this_len = SPLICE_DEF_SIZE; - flags = SPLICE_F_MORE; - } - - ret = splice(f->fd, &offset, sd->pipe[1], NULL, this_len,flags); - if (ret < 0) { - if (errno == ENODATA || errno == EAGAIN) - continue; - - td_verror(td, errno, "splice-from-fd"); - break; - } - - buflen -= ret; - iov.iov_base = p; - iov.iov_len = ret; - - while (iov.iov_len) { - ret = vmsplice(sd->pipe[0], &iov, 1, SPLICE_F_MOVE); - if (ret < 0) { - if (errno == EFAULT && - sd->vmsplice_to_user_map) { - sd->vmsplice_to_user_map = 0; - munmap(map, mmap_len); - map = NULL; - p = io_u->xfer_buf; - iov.iov_base = p; - continue; - } - if (errno == EBADF) { - ret = -EBADF; - break; - } - td_verror(td, errno, "vmsplice"); - break; - } else if (!ret) { - td_verror(td, ENODATA, "vmsplice"); - ret = -1; - break; - } - - iov.iov_len -= ret; - iov.iov_base += ret; - p += ret; - } - if (ret < 0) - break; - } - - if (sd->vmsplice_to_user_map && munmap(map, mmap_len) < 0) { - td_verror(td, errno, "munnap io_u"); - return -1; - } - if (ret < 0) - return ret; - - return io_u->xfer_buflen; -} - -/* - * For splice writing, we can vmsplice our data buffer directly into a - * pipe and then splice that to a file. - */ -static int fio_splice_write(struct thread_data *td, struct io_u *io_u) -{ - struct spliceio_data *sd = td->io_ops_data; - struct iovec iov = { - .iov_base = io_u->xfer_buf, - .iov_len = io_u->xfer_buflen, - }; - struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, }; - struct fio_file *f = io_u->file; - off_t off = io_u->offset; - int ret, ret2; - - while (iov.iov_len) { - if (poll(&pfd, 1, -1) < 0) - return errno; - - ret = vmsplice(sd->pipe[1], &iov, 1, SPLICE_F_NONBLOCK); - if (ret < 0) - return -errno; - - iov.iov_len -= ret; - iov.iov_base += ret; - - while (ret) { - ret2 = splice(sd->pipe[0], NULL, f->fd, &off, ret, 0); - if (ret2 < 0) - return -errno; - - ret -= ret2; - } - } - - return io_u->xfer_buflen; -} - -static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct spliceio_data *sd = td->io_ops_data; - int ret = 0; - - fio_ro_check(td, io_u); - - if (io_u->ddir == DDIR_READ) { - if (sd->vmsplice_to_user) { - ret = fio_splice_read(td, io_u); - /* - * This kernel doesn't support vmsplice to user - * space. Reset the vmsplice_to_user flag, so that - * we retry below and don't hit this path again. - */ - if (ret == -EBADF) - sd->vmsplice_to_user = 0; - } - if (!sd->vmsplice_to_user) - ret = fio_splice_read_old(td, io_u); - } else if (io_u->ddir == DDIR_WRITE) - ret = fio_splice_write(td, io_u); - else if (io_u->ddir == DDIR_TRIM) - ret = do_io_u_trim(td, io_u); - else - ret = do_io_u_sync(td, io_u); - - if (ret != (int) io_u->xfer_buflen) { - if (ret >= 0) { - io_u->resid = io_u->xfer_buflen - ret; - io_u->error = 0; - return FIO_Q_COMPLETED; - } else - io_u->error = errno; - } - - if (io_u->error) { - td_verror(td, io_u->error, "xfer"); - if (io_u->error == EINVAL) - log_err("fio: looks like splice doesn't work on this" - " file system\n"); - } - - return FIO_Q_COMPLETED; -} - -static void fio_spliceio_cleanup(struct thread_data *td) -{ - struct spliceio_data *sd = td->io_ops_data; - - if (sd) { - close(sd->pipe[0]); - close(sd->pipe[1]); - free(sd); - } -} - -static int fio_spliceio_init(struct thread_data *td) -{ - struct spliceio_data *sd = malloc(sizeof(*sd)); - - if (pipe(sd->pipe) < 0) { - td_verror(td, errno, "pipe"); - free(sd); - return 1; - } - - /* - * Assume this work, we'll reset this if it doesn't - */ - sd->vmsplice_to_user = 1; - - /* - * Works with "real" vmsplice to user, eg mapping pages directly. - * Reset if we fail. - */ - sd->vmsplice_to_user_map = 1; - - /* - * And if vmsplice_to_user works, we definitely need aligned - * buffers. Just set ->odirect to force that. - */ - if (td_read(td)) - td->o.mem_align = 1; - - td->io_ops_data = sd; - return 0; -} - -static struct ioengine_ops ioengine = { - .name = "splice", - .version = FIO_IOOPS_VERSION, - .init = fio_spliceio_init, - .queue = fio_spliceio_queue, - .cleanup = fio_spliceio_cleanup, - .open_file = generic_open_file, - .close_file = generic_close_file, - .get_file_size = generic_get_file_size, - .flags = FIO_SYNCIO | FIO_PIPEIO, -}; - -static void fio_init fio_spliceio_register(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_spliceio_unregister(void) -{ - unregister_ioengine(&ioengine); -} diff --git a/engines/sync.c b/engines/sync.c deleted file mode 100644 index e76bbbb4..00000000 --- a/engines/sync.c +++ /dev/null @@ -1,472 +0,0 @@ -/* - * sync/psync engine - * - * IO engine that does regular read(2)/write(2) with lseek(2) to transfer - * data and IO engine that does regular pread(2)/pwrite(2) to transfer data. - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <sys/uio.h> -#include <errno.h> -#include <assert.h> - -#include "../fio.h" -#include "../optgroup.h" - -/* - * Sync engine uses engine_data to store last offset - */ -#define LAST_POS(f) ((f)->engine_pos) - -struct syncio_data { - struct iovec *iovecs; - struct io_u **io_us; - unsigned int queued; - unsigned int events; - unsigned long queued_bytes; - - unsigned long long last_offset; - struct fio_file *last_file; - enum fio_ddir last_ddir; -}; - -#ifdef FIO_HAVE_PWRITEV2 -struct psyncv2_options { - void *pad; - unsigned int hipri; -}; - -static struct fio_option options[] = { - { - .name = "hipri", - .lname = "RWF_HIPRI", - .type = FIO_OPT_STR_SET, - .off1 = offsetof(struct psyncv2_options, hipri), - .help = "Set RWF_HIPRI for pwritev2/preadv2", - .category = FIO_OPT_C_ENGINE, - .group = FIO_OPT_G_INVALID, - }, - { - .name = NULL, - }, -}; -#endif - -static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u) -{ - struct fio_file *f = io_u->file; - - if (!ddir_rw(io_u->ddir)) - return 0; - - if (LAST_POS(f) != -1ULL && LAST_POS(f) == io_u->offset) - return 0; - - if (lseek(f->fd, io_u->offset, SEEK_SET) == -1) { - td_verror(td, errno, "lseek"); - return 1; - } - - return 0; -} - -static int fio_io_end(struct thread_data *td, struct io_u *io_u, int ret) -{ - if (io_u->file && ret >= 0 && ddir_rw(io_u->ddir)) - LAST_POS(io_u->file) = io_u->offset + ret; - - if (ret != (int) io_u->xfer_buflen) { - if (ret >= 0) { - io_u->resid = io_u->xfer_buflen - ret; - io_u->error = 0; - return FIO_Q_COMPLETED; - } else - io_u->error = errno; - } - - if (io_u->error) { - io_u_log_error(td, io_u); - td_verror(td, io_u->error, "xfer"); - } - - return FIO_Q_COMPLETED; -} - -#ifdef CONFIG_PWRITEV -static int fio_pvsyncio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct syncio_data *sd = td->io_ops_data; - struct iovec *iov = &sd->iovecs[0]; - struct fio_file *f = io_u->file; - int ret; - - fio_ro_check(td, io_u); - - iov->iov_base = io_u->xfer_buf; - iov->iov_len = io_u->xfer_buflen; - - if (io_u->ddir == DDIR_READ) - ret = preadv(f->fd, iov, 1, io_u->offset); - else if (io_u->ddir == DDIR_WRITE) - ret = pwritev(f->fd, iov, 1, io_u->offset); - else if (io_u->ddir == DDIR_TRIM) { - do_io_u_trim(td, io_u); - return FIO_Q_COMPLETED; - } else - ret = do_io_u_sync(td, io_u); - - return fio_io_end(td, io_u, ret); -} -#endif - -#ifdef FIO_HAVE_PWRITEV2 -static int fio_pvsyncio2_queue(struct thread_data *td, struct io_u *io_u) -{ - struct syncio_data *sd = td->io_ops_data; - struct psyncv2_options *o = td->eo; - struct iovec *iov = &sd->iovecs[0]; - struct fio_file *f = io_u->file; - int ret, flags = 0; - - fio_ro_check(td, io_u); - - if (o->hipri) - flags |= RWF_HIPRI; - - iov->iov_base = io_u->xfer_buf; - iov->iov_len = io_u->xfer_buflen; - - if (io_u->ddir == DDIR_READ) - ret = preadv2(f->fd, iov, 1, io_u->offset, flags); - else if (io_u->ddir == DDIR_WRITE) - ret = pwritev2(f->fd, iov, 1, io_u->offset, flags); - else if (io_u->ddir == DDIR_TRIM) { - do_io_u_trim(td, io_u); - return FIO_Q_COMPLETED; - } else - ret = do_io_u_sync(td, io_u); - - return fio_io_end(td, io_u, ret); -} -#endif - - -static int fio_psyncio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct fio_file *f = io_u->file; - int ret; - - fio_ro_check(td, io_u); - - if (io_u->ddir == DDIR_READ) - ret = pread(f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset); - else if (io_u->ddir == DDIR_WRITE) - ret = pwrite(f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset); - else if (io_u->ddir == DDIR_TRIM) { - do_io_u_trim(td, io_u); - return FIO_Q_COMPLETED; - } else - ret = do_io_u_sync(td, io_u); - - return fio_io_end(td, io_u, ret); -} - -static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct fio_file *f = io_u->file; - int ret; - - fio_ro_check(td, io_u); - - if (io_u->ddir == DDIR_READ) - ret = read(f->fd, io_u->xfer_buf, io_u->xfer_buflen); - else if (io_u->ddir == DDIR_WRITE) - ret = write(f->fd, io_u->xfer_buf, io_u->xfer_buflen); - else if (io_u->ddir == DDIR_TRIM) { - do_io_u_trim(td, io_u); - return FIO_Q_COMPLETED; - } else - ret = do_io_u_sync(td, io_u); - - return fio_io_end(td, io_u, ret); -} - -static int fio_vsyncio_getevents(struct thread_data *td, unsigned int min, - unsigned int max, - const struct timespec fio_unused *t) -{ - struct syncio_data *sd = td->io_ops_data; - int ret; - - if (min) { - ret = sd->events; - sd->events = 0; - } else - ret = 0; - - dprint(FD_IO, "vsyncio_getevents: min=%d,max=%d: %d\n", min, max, ret); - return ret; -} - -static struct io_u *fio_vsyncio_event(struct thread_data *td, int event) -{ - struct syncio_data *sd = td->io_ops_data; - - return sd->io_us[event]; -} - -static int fio_vsyncio_append(struct thread_data *td, struct io_u *io_u) -{ - struct syncio_data *sd = td->io_ops_data; - - if (ddir_sync(io_u->ddir)) - return 0; - - if (io_u->offset == sd->last_offset && io_u->file == sd->last_file && - io_u->ddir == sd->last_ddir) - return 1; - - return 0; -} - -static void fio_vsyncio_set_iov(struct syncio_data *sd, struct io_u *io_u, - int idx) -{ - sd->io_us[idx] = io_u; - sd->iovecs[idx].iov_base = io_u->xfer_buf; - sd->iovecs[idx].iov_len = io_u->xfer_buflen; - sd->last_offset = io_u->offset + io_u->xfer_buflen; - sd->last_file = io_u->file; - sd->last_ddir = io_u->ddir; - sd->queued_bytes += io_u->xfer_buflen; - sd->queued++; -} - -static int fio_vsyncio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct syncio_data *sd = td->io_ops_data; - - fio_ro_check(td, io_u); - - if (!fio_vsyncio_append(td, io_u)) { - dprint(FD_IO, "vsyncio_queue: no append (%d)\n", sd->queued); - /* - * If we can't append and have stuff queued, tell fio to - * commit those first and then retry this io - */ - if (sd->queued) - return FIO_Q_BUSY; - if (ddir_sync(io_u->ddir)) { - int ret = do_io_u_sync(td, io_u); - - return fio_io_end(td, io_u, ret); - } - - sd->queued = 0; - sd->queued_bytes = 0; - fio_vsyncio_set_iov(sd, io_u, 0); - } else { - if (sd->queued == td->o.iodepth) { - dprint(FD_IO, "vsyncio_queue: max depth %d\n", sd->queued); - return FIO_Q_BUSY; - } - - dprint(FD_IO, "vsyncio_queue: append\n"); - fio_vsyncio_set_iov(sd, io_u, sd->queued); - } - - dprint(FD_IO, "vsyncio_queue: depth now %d\n", sd->queued); - return FIO_Q_QUEUED; -} - -/* - * Check that we transferred all bytes, or saw an error, etc - */ -static int fio_vsyncio_end(struct thread_data *td, ssize_t bytes) -{ - struct syncio_data *sd = td->io_ops_data; - struct io_u *io_u; - unsigned int i; - int err; - - /* - * transferred everything, perfect - */ - if (bytes == sd->queued_bytes) - return 0; - - err = errno; - for (i = 0; i < sd->queued; i++) { - io_u = sd->io_us[i]; - - if (bytes == -1) { - io_u->error = err; - } else { - unsigned int this_io; - - this_io = bytes; - if (this_io > io_u->xfer_buflen) - this_io = io_u->xfer_buflen; - - io_u->resid = io_u->xfer_buflen - this_io; - io_u->error = 0; - bytes -= this_io; - } - } - - if (bytes == -1) { - td_verror(td, err, "xfer vsync"); - return -err; - } - - return 0; -} - -static int fio_vsyncio_commit(struct thread_data *td) -{ - struct syncio_data *sd = td->io_ops_data; - struct fio_file *f; - ssize_t ret; - - if (!sd->queued) - return 0; - - io_u_mark_submit(td, sd->queued); - f = sd->last_file; - - if (lseek(f->fd, sd->io_us[0]->offset, SEEK_SET) == -1) { - int err = -errno; - - td_verror(td, errno, "lseek"); - return err; - } - - if (sd->last_ddir == DDIR_READ) - ret = readv(f->fd, sd->iovecs, sd->queued); - else - ret = writev(f->fd, sd->iovecs, sd->queued); - - dprint(FD_IO, "vsyncio_commit: %d\n", (int) ret); - sd->events = sd->queued; - sd->queued = 0; - return fio_vsyncio_end(td, ret); -} - -static int fio_vsyncio_init(struct thread_data *td) -{ - struct syncio_data *sd; - - sd = malloc(sizeof(*sd)); - memset(sd, 0, sizeof(*sd)); - sd->last_offset = -1ULL; - sd->iovecs = malloc(td->o.iodepth * sizeof(struct iovec)); - sd->io_us = malloc(td->o.iodepth * sizeof(struct io_u *)); - - td->io_ops_data = sd; - return 0; -} - -static void fio_vsyncio_cleanup(struct thread_data *td) -{ - struct syncio_data *sd = td->io_ops_data; - - if (sd) { - free(sd->iovecs); - free(sd->io_us); - free(sd); - } -} - -static struct ioengine_ops ioengine_rw = { - .name = "sync", - .version = FIO_IOOPS_VERSION, - .prep = fio_syncio_prep, - .queue = fio_syncio_queue, - .open_file = generic_open_file, - .close_file = generic_close_file, - .get_file_size = generic_get_file_size, - .flags = FIO_SYNCIO, -}; - -static struct ioengine_ops ioengine_prw = { - .name = "psync", - .version = FIO_IOOPS_VERSION, - .queue = fio_psyncio_queue, - .open_file = generic_open_file, - .close_file = generic_close_file, - .get_file_size = generic_get_file_size, - .flags = FIO_SYNCIO, -}; - -static struct ioengine_ops ioengine_vrw = { - .name = "vsync", - .version = FIO_IOOPS_VERSION, - .init = fio_vsyncio_init, - .cleanup = fio_vsyncio_cleanup, - .queue = fio_vsyncio_queue, - .commit = fio_vsyncio_commit, - .event = fio_vsyncio_event, - .getevents = fio_vsyncio_getevents, - .open_file = generic_open_file, - .close_file = generic_close_file, - .get_file_size = generic_get_file_size, - .flags = FIO_SYNCIO, -}; - -#ifdef CONFIG_PWRITEV -static struct ioengine_ops ioengine_pvrw = { - .name = "pvsync", - .version = FIO_IOOPS_VERSION, - .init = fio_vsyncio_init, - .cleanup = fio_vsyncio_cleanup, - .queue = fio_pvsyncio_queue, - .open_file = generic_open_file, - .close_file = generic_close_file, - .get_file_size = generic_get_file_size, - .flags = FIO_SYNCIO, -}; -#endif - -#ifdef FIO_HAVE_PWRITEV2 -static struct ioengine_ops ioengine_pvrw2 = { - .name = "pvsync2", - .version = FIO_IOOPS_VERSION, - .init = fio_vsyncio_init, - .cleanup = fio_vsyncio_cleanup, - .queue = fio_pvsyncio2_queue, - .open_file = generic_open_file, - .close_file = generic_close_file, - .get_file_size = generic_get_file_size, - .flags = FIO_SYNCIO, - .options = options, - .option_struct_size = sizeof(struct psyncv2_options), -}; -#endif - -static void fio_init fio_syncio_register(void) -{ - register_ioengine(&ioengine_rw); - register_ioengine(&ioengine_prw); - register_ioengine(&ioengine_vrw); -#ifdef CONFIG_PWRITEV - register_ioengine(&ioengine_pvrw); -#endif -#ifdef FIO_HAVE_PWRITEV2 - register_ioengine(&ioengine_pvrw2); -#endif -} - -static void fio_exit fio_syncio_unregister(void) -{ - unregister_ioengine(&ioengine_rw); - unregister_ioengine(&ioengine_prw); - unregister_ioengine(&ioengine_vrw); -#ifdef CONFIG_PWRITEV - unregister_ioengine(&ioengine_pvrw); -#endif -#ifdef FIO_HAVE_PWRITEV2 - unregister_ioengine(&ioengine_pvrw2); -#endif -} diff --git a/engines/windowsaio.c b/engines/windowsaio.c deleted file mode 100644 index f5cb0483..00000000 --- a/engines/windowsaio.c +++ /dev/null @@ -1,449 +0,0 @@ -/* - * windowsaio engine - * - * IO engine using Windows IO Completion Ports. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <signal.h> -#include <errno.h> - -#include "../fio.h" - -typedef BOOL (WINAPI *CANCELIOEX)(HANDLE hFile, LPOVERLAPPED lpOverlapped); - -int geterrno_from_win_error (DWORD code, int deferrno); - -struct fio_overlapped { - OVERLAPPED o; - struct io_u *io_u; - BOOL io_complete; -}; - -struct windowsaio_data { - struct io_u **aio_events; - HANDLE iocp; - HANDLE iothread; - HANDLE iocomplete_event; - BOOL iothread_running; -}; - -struct thread_ctx { - HANDLE iocp; - struct windowsaio_data *wd; -}; - -static BOOL timeout_expired(DWORD start_count, DWORD end_count); -static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min, - unsigned int max, const struct timespec *t); -static struct io_u *fio_windowsaio_event(struct thread_data *td, int event); -static int fio_windowsaio_queue(struct thread_data *td, - struct io_u *io_u); -static void fio_windowsaio_cleanup(struct thread_data *td); -static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter); -static int fio_windowsaio_init(struct thread_data *td); -static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f); -static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f); - -static int fio_windowsaio_init(struct thread_data *td) -{ - struct windowsaio_data *wd; - int rc = 0; - - wd = calloc(1, sizeof(struct windowsaio_data)); - if (wd == NULL) { - log_err("windowsaio: failed to allocate memory for engine data\n"); - rc = 1; - } - - if (!rc) { - wd->aio_events = malloc(td->o.iodepth * sizeof(struct io_u*)); - if (wd->aio_events == NULL) { - log_err("windowsaio: failed to allocate memory for aio events list\n"); - rc = 1; - } - } - - if (!rc) { - /* Create an auto-reset event */ - wd->iocomplete_event = CreateEvent(NULL, FALSE, FALSE, NULL); - if (wd->iocomplete_event == NULL) { - log_err("windowsaio: failed to create io complete event handle\n"); - rc = 1; - } - } - - if (rc) { - if (wd != NULL) { - if (wd->aio_events != NULL) - free(wd->aio_events); - - free(wd); - } - } - - td->io_ops_data = wd; - - if (!rc) { - struct thread_ctx *ctx; - struct windowsaio_data *wd; - HANDLE hFile; - - hFile = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); - if (hFile == INVALID_HANDLE_VALUE) { - log_err("windowsaio: failed to create io completion port\n"); - rc = 1; - } - - wd = td->io_ops_data; - wd->iothread_running = TRUE; - wd->iocp = hFile; - - if (!rc) - ctx = malloc(sizeof(struct thread_ctx)); - - if (!rc && ctx == NULL) - { - log_err("windowsaio: failed to allocate memory for thread context structure\n"); - CloseHandle(hFile); - rc = 1; - } - - if (!rc) - { - DWORD threadid; - - ctx->iocp = hFile; - ctx->wd = wd; - wd->iothread = CreateThread(NULL, 0, IoCompletionRoutine, ctx, 0, &threadid); - - if (wd->iothread != NULL) - fio_setaffinity(threadid, td->o.cpumask); - else - log_err("windowsaio: failed to create io completion thread\n"); - } - - if (rc || wd->iothread == NULL) - rc = 1; - } - - return rc; -} - -static void fio_windowsaio_cleanup(struct thread_data *td) -{ - struct windowsaio_data *wd; - - wd = td->io_ops_data; - - if (wd != NULL) { - wd->iothread_running = FALSE; - WaitForSingleObject(wd->iothread, INFINITE); - - CloseHandle(wd->iothread); - CloseHandle(wd->iocomplete_event); - - free(wd->aio_events); - free(wd); - - td->io_ops_data = NULL; - } -} - - -static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f) -{ - int rc = 0; - DWORD flags = FILE_FLAG_POSIX_SEMANTICS | FILE_FLAG_OVERLAPPED; - DWORD sharemode = FILE_SHARE_READ | FILE_SHARE_WRITE; - DWORD openmode = OPEN_ALWAYS; - DWORD access; - - dprint(FD_FILE, "fd open %s\n", f->file_name); - - if (f->filetype == FIO_TYPE_PIPE) { - log_err("windowsaio: pipes are not supported\n"); - return 1; - } - - if (!strcmp(f->file_name, "-")) { - log_err("windowsaio: can't read/write to stdin/out\n"); - return 1; - } - - if (td->o.odirect) - flags |= FILE_FLAG_NO_BUFFERING; - if (td->o.sync_io) - flags |= FILE_FLAG_WRITE_THROUGH; - - /* - * Inform Windows whether we're going to be doing sequential or - * random io so it can tune the Cache Manager - */ - if (td->o.td_ddir == TD_DDIR_READ || - td->o.td_ddir == TD_DDIR_WRITE) - flags |= FILE_FLAG_SEQUENTIAL_SCAN; - else - flags |= FILE_FLAG_RANDOM_ACCESS; - - if (!td_write(td) || read_only) - access = GENERIC_READ; - else - access = (GENERIC_READ | GENERIC_WRITE); - - if (td->o.create_on_open) - openmode = OPEN_ALWAYS; - else - openmode = OPEN_EXISTING; - - f->hFile = CreateFile(f->file_name, access, sharemode, - NULL, openmode, flags, NULL); - - if (f->hFile == INVALID_HANDLE_VALUE) { - log_err("windowsaio: failed to open file \"%s\"\n", f->file_name); - rc = 1; - } - - /* Only set up the completion port and thread if we're not just - * querying the device size */ - if (!rc && td->io_ops_data != NULL) { - struct windowsaio_data *wd; - - wd = td->io_ops_data; - - if (CreateIoCompletionPort(f->hFile, wd->iocp, 0, 0) == NULL) { - log_err("windowsaio: failed to create io completion port\n"); - rc = 1; - } - } - - return rc; -} - -static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f) -{ - int rc = 0; - - dprint(FD_FILE, "fd close %s\n", f->file_name); - - if (f->hFile != INVALID_HANDLE_VALUE) { - if (!CloseHandle(f->hFile)) { - log_info("windowsaio: failed to close file handle for \"%s\"\n", f->file_name); - rc = 1; - } - } - - f->hFile = INVALID_HANDLE_VALUE; - return rc; -} - -static BOOL timeout_expired(DWORD start_count, DWORD end_count) -{ - BOOL expired = FALSE; - DWORD current_time; - - current_time = GetTickCount(); - - if ((end_count > start_count) && current_time >= end_count) - expired = TRUE; - else if (current_time < start_count && current_time > end_count) - expired = TRUE; - - return expired; -} - -static struct io_u* fio_windowsaio_event(struct thread_data *td, int event) -{ - struct windowsaio_data *wd = td->io_ops_data; - return wd->aio_events[event]; -} - -static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min, - unsigned int max, - const struct timespec *t) -{ - struct windowsaio_data *wd = td->io_ops_data; - unsigned int dequeued = 0; - struct io_u *io_u; - int i; - struct fio_overlapped *fov; - DWORD start_count = 0; - DWORD end_count = 0; - DWORD status; - DWORD mswait = 250; - - if (t != NULL) { - mswait = (t->tv_sec * 1000) + (t->tv_nsec / 1000000); - start_count = GetTickCount(); - end_count = start_count + (t->tv_sec * 1000) + (t->tv_nsec / 1000000); - } - - do { - io_u_qiter(&td->io_u_all, io_u, i) { - if (!(io_u->flags & IO_U_F_FLIGHT)) - continue; - - fov = (struct fio_overlapped*)io_u->engine_data; - - if (fov->io_complete) { - fov->io_complete = FALSE; - wd->aio_events[dequeued] = io_u; - dequeued++; - } - - } - if (dequeued >= min) - break; - - if (dequeued < min) { - status = WaitForSingleObject(wd->iocomplete_event, mswait); - if (status != WAIT_OBJECT_0 && dequeued >= min) - break; - } - - if (dequeued >= min || (t != NULL && timeout_expired(start_count, end_count))) - break; - } while (1); - - return dequeued; -} - -static int fio_windowsaio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct fio_overlapped *o = io_u->engine_data; - LPOVERLAPPED lpOvl = &o->o; - BOOL success = FALSE; - int rc = FIO_Q_COMPLETED; - - fio_ro_check(td, io_u); - - lpOvl->Internal = 0; - lpOvl->InternalHigh = 0; - lpOvl->Offset = io_u->offset & 0xFFFFFFFF; - lpOvl->OffsetHigh = io_u->offset >> 32; - - switch (io_u->ddir) { - case DDIR_WRITE: - success = WriteFile(io_u->file->hFile, io_u->xfer_buf, io_u->xfer_buflen, NULL, lpOvl); - break; - case DDIR_READ: - success = ReadFile(io_u->file->hFile, io_u->xfer_buf, io_u->xfer_buflen, NULL, lpOvl); - break; - case DDIR_SYNC: - case DDIR_DATASYNC: - case DDIR_SYNC_FILE_RANGE: - success = FlushFileBuffers(io_u->file->hFile); - if (!success) { - log_err("windowsaio: failed to flush file buffers\n"); - io_u->error = win_to_posix_error(GetLastError()); - } - - return FIO_Q_COMPLETED; - break; - case DDIR_TRIM: - log_err("windowsaio: manual TRIM isn't supported on Windows\n"); - io_u->error = 1; - io_u->resid = io_u->xfer_buflen; - return FIO_Q_COMPLETED; - break; - default: - assert(0); - break; - } - - if (success || GetLastError() == ERROR_IO_PENDING) - rc = FIO_Q_QUEUED; - else { - io_u->error = win_to_posix_error(GetLastError()); - io_u->resid = io_u->xfer_buflen; - } - - return rc; -} - -/* Runs as a thread and waits for queued IO to complete */ -static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter) -{ - OVERLAPPED *ovl; - struct fio_overlapped *fov; - struct io_u *io_u; - struct windowsaio_data *wd; - struct thread_ctx *ctx; - ULONG_PTR ulKey = 0; - DWORD bytes; - - ctx = (struct thread_ctx*)lpParameter; - wd = ctx->wd; - - do { - if (!GetQueuedCompletionStatus(ctx->iocp, &bytes, &ulKey, &ovl, 250) && ovl == NULL) - continue; - - fov = CONTAINING_RECORD(ovl, struct fio_overlapped, o); - io_u = fov->io_u; - - if (ovl->Internal == ERROR_SUCCESS) { - io_u->resid = io_u->xfer_buflen - ovl->InternalHigh; - io_u->error = 0; - } else { - io_u->resid = io_u->xfer_buflen; - io_u->error = win_to_posix_error(GetLastError()); - } - - fov->io_complete = TRUE; - SetEvent(wd->iocomplete_event); - } while (ctx->wd->iothread_running); - - CloseHandle(ctx->iocp); - free(ctx); - return 0; -} - -static void fio_windowsaio_io_u_free(struct thread_data *td, struct io_u *io_u) -{ - struct fio_overlapped *o = io_u->engine_data; - - if (o) { - io_u->engine_data = NULL; - free(o); - } -} - -static int fio_windowsaio_io_u_init(struct thread_data *td, struct io_u *io_u) -{ - struct fio_overlapped *o; - - o = malloc(sizeof(*o)); - o->io_complete = FALSE; - o->io_u = io_u; - o->o.hEvent = NULL; - io_u->engine_data = o; - return 0; -} - -static struct ioengine_ops ioengine = { - .name = "windowsaio", - .version = FIO_IOOPS_VERSION, - .init = fio_windowsaio_init, - .queue = fio_windowsaio_queue, - .getevents = fio_windowsaio_getevents, - .event = fio_windowsaio_event, - .cleanup = fio_windowsaio_cleanup, - .open_file = fio_windowsaio_open_file, - .close_file = fio_windowsaio_close_file, - .get_file_size = generic_get_file_size, - .io_u_init = fio_windowsaio_io_u_init, - .io_u_free = fio_windowsaio_io_u_free, -}; - -static void fio_init fio_windowsaio_register(void) -{ - register_ioengine(&ioengine); -} - -static void fio_exit fio_windowsaio_unregister(void) -{ - unregister_ioengine(&ioengine); -} |