summaryrefslogtreecommitdiff
path: root/engines
diff options
context:
space:
mode:
Diffstat (limited to 'engines')
-rw-r--r--engines/binject.c457
-rw-r--r--engines/cpu.c122
-rw-r--r--engines/dev-dax.c348
-rw-r--r--engines/e4defrag.c218
-rw-r--r--engines/falloc.c114
-rw-r--r--engines/ftruncate.c56
-rw-r--r--engines/fusion-aw.c183
-rw-r--r--engines/gfapi.h22
-rw-r--r--engines/glusterfs.c306
-rw-r--r--engines/glusterfs_async.c191
-rw-r--r--engines/glusterfs_sync.c98
-rw-r--r--engines/guasi.c269
-rw-r--r--engines/libaio.c396
-rw-r--r--engines/libhdfs.c420
-rw-r--r--engines/mmap.c272
-rw-r--r--engines/mtd.c209
-rw-r--r--engines/net.c1468
-rw-r--r--engines/null.c157
-rw-r--r--engines/pmemblk.c445
-rw-r--r--engines/posixaio.c266
-rw-r--r--engines/rbd.c689
-rw-r--r--engines/rdma.c1372
-rw-r--r--engines/sg.c856
-rw-r--r--engines/skeleton_external.c143
-rw-r--r--engines/solarisaio.c234
-rw-r--r--engines/splice.c311
-rw-r--r--engines/sync.c472
-rw-r--r--engines/windowsaio.c449
28 files changed, 0 insertions, 10543 deletions
diff --git a/engines/binject.c b/engines/binject.c
deleted file mode 100644
index 932534a0..00000000
--- a/engines/binject.c
+++ /dev/null
@@ -1,457 +0,0 @@
-/*
- * binject engine
- *
- * IO engine that uses the Linux binject interface to directly inject
- * bio's to block devices.
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <assert.h>
-#include <string.h>
-#include <sys/poll.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "../fio.h"
-
-#ifdef FIO_HAVE_BINJECT
-
-struct binject_data {
- struct b_user_cmd *cmds;
- struct io_u **events;
- struct pollfd *pfds;
- int *fd_flags;
-};
-
-struct binject_file {
- unsigned int bs;
- int minor;
- int fd;
-};
-
-static void binject_buc_init(struct binject_data *bd, struct io_u *io_u)
-{
- struct b_user_cmd *buc = &io_u->buc;
-
- memset(buc, 0, sizeof(*buc));
- binject_buc_set_magic(buc);
-
- buc->buf = (unsigned long) io_u->xfer_buf;
- buc->len = io_u->xfer_buflen;
- buc->offset = io_u->offset;
- buc->usr_ptr = (unsigned long) io_u;
-
- buc->flags = B_FLAG_NOIDLE | B_FLAG_UNPLUG;
- assert(buc->buf);
-}
-
-static int pollin_events(struct pollfd *pfds, int fds)
-{
- int i;
-
- for (i = 0; i < fds; i++)
- if (pfds[i].revents & POLLIN)
- return 1;
-
- return 0;
-}
-
-static unsigned int binject_read_commands(struct thread_data *td, void *p,
- int left, int *err)
-{
- struct fio_file *f;
- int i, ret, events;
-
-one_more:
- events = 0;
- for_each_file(td, f, i) {
- struct binject_file *bf = FILE_ENG_DATA(f);
-
- ret = read(bf->fd, p, left * sizeof(struct b_user_cmd));
- if (ret < 0) {
- if (errno == EAGAIN)
- continue;
- *err = -errno;
- td_verror(td, errno, "read");
- break;
- } else if (ret) {
- p += ret;
- events += ret / sizeof(struct b_user_cmd);
- }
- }
-
- if (*err || events)
- return events;
-
- usleep(1000);
- goto one_more;
-}
-
-static int fio_binject_getevents(struct thread_data *td, unsigned int min,
- unsigned int max,
- const struct timespec fio_unused *t)
-{
- struct binject_data *bd = td->io_ops_data;
- int left = max, ret, r = 0, ev_index = 0;
- void *buf = bd->cmds;
- unsigned int i, events;
- struct fio_file *f;
-
- /*
- * Fill in the file descriptors
- */
- for_each_file(td, f, i) {
- struct binject_file *bf = FILE_ENG_DATA(f);
-
- /*
- * don't block for min events == 0
- */
- if (!min)
- bd->fd_flags[i] = fio_set_fd_nonblocking(bf->fd, "binject");
- else
- bd->fd_flags[i] = -1;
-
- bd->pfds[i].fd = bf->fd;
- bd->pfds[i].events = POLLIN;
- }
-
- while (left) {
- while (!min) {
- ret = poll(bd->pfds, td->o.nr_files, -1);
- if (ret < 0) {
- if (!r)
- r = -errno;
- td_verror(td, errno, "poll");
- break;
- } else if (!ret)
- continue;
-
- if (pollin_events(bd->pfds, td->o.nr_files))
- break;
- }
-
- if (r < 0)
- break;
-
- events = binject_read_commands(td, buf, left, &r);
-
- if (r < 0)
- break;
-
- left -= events;
- r += events;
-
- for (i = 0; i < events; i++) {
- struct b_user_cmd *buc = (struct b_user_cmd *) buf + i;
-
- bd->events[ev_index] = (struct io_u *) (unsigned long) buc->usr_ptr;
- ev_index++;
- }
- }
-
- if (!min) {
- for_each_file(td, f, i) {
- struct binject_file *bf = FILE_ENG_DATA(f);
-
- if (bd->fd_flags[i] == -1)
- continue;
-
- if (fcntl(bf->fd, F_SETFL, bd->fd_flags[i]) < 0)
- log_err("fio: binject failed to restore fcntl flags: %s\n", strerror(errno));
- }
- }
-
- if (r > 0)
- assert(ev_index == r);
-
- return r;
-}
-
-static int fio_binject_doio(struct thread_data *td, struct io_u *io_u)
-{
- struct b_user_cmd *buc = &io_u->buc;
- struct binject_file *bf = FILE_ENG_DATA(io_u->file);
- int ret;
-
- ret = write(bf->fd, buc, sizeof(*buc));
- if (ret < 0)
- return ret;
-
- return FIO_Q_QUEUED;
-}
-
-static int fio_binject_prep(struct thread_data *td, struct io_u *io_u)
-{
- struct binject_data *bd = td->io_ops_data;
- struct b_user_cmd *buc = &io_u->buc;
- struct binject_file *bf = FILE_ENG_DATA(io_u->file);
-
- if (io_u->xfer_buflen & (bf->bs - 1)) {
- log_err("read/write not sector aligned\n");
- return EINVAL;
- }
-
- if (io_u->ddir == DDIR_READ) {
- binject_buc_init(bd, io_u);
- buc->type = B_TYPE_READ;
- } else if (io_u->ddir == DDIR_WRITE) {
- binject_buc_init(bd, io_u);
- if (io_u->flags & IO_U_F_BARRIER)
- buc->type = B_TYPE_WRITEBARRIER;
- else
- buc->type = B_TYPE_WRITE;
- } else if (io_u->ddir == DDIR_TRIM) {
- binject_buc_init(bd, io_u);
- buc->type = B_TYPE_DISCARD;
- } else {
- assert(0);
- }
-
- return 0;
-}
-
-static int fio_binject_queue(struct thread_data *td, struct io_u *io_u)
-{
- int ret;
-
- fio_ro_check(td, io_u);
-
- ret = fio_binject_doio(td, io_u);
-
- if (ret < 0)
- io_u->error = errno;
-
- if (io_u->error) {
- td_verror(td, io_u->error, "xfer");
- return FIO_Q_COMPLETED;
- }
-
- return ret;
-}
-
-static struct io_u *fio_binject_event(struct thread_data *td, int event)
-{
- struct binject_data *bd = td->io_ops_data;
-
- return bd->events[event];
-}
-
-static int binject_open_ctl(struct thread_data *td)
-{
- int fd;
-
- fd = open("/dev/binject-ctl", O_RDWR);
- if (fd < 0)
- td_verror(td, errno, "open binject-ctl");
-
- return fd;
-}
-
-static void binject_unmap_dev(struct thread_data *td, struct binject_file *bf)
-{
- struct b_ioctl_cmd bic;
- int fdb;
-
- if (bf->fd >= 0) {
- close(bf->fd);
- bf->fd = -1;
- }
-
- fdb = binject_open_ctl(td);
- if (fdb < 0)
- return;
-
- bic.minor = bf->minor;
-
- if (ioctl(fdb, B_IOCTL_DEL, &bic) < 0)
- td_verror(td, errno, "binject dev unmap");
-
- close(fdb);
-}
-
-static int binject_map_dev(struct thread_data *td, struct binject_file *bf,
- int fd)
-{
- struct b_ioctl_cmd bic;
- char name[80];
- struct stat sb;
- int fdb, dev_there, loops;
-
- fdb = binject_open_ctl(td);
- if (fdb < 0)
- return 1;
-
- bic.fd = fd;
-
- if (ioctl(fdb, B_IOCTL_ADD, &bic) < 0) {
- td_verror(td, errno, "binject dev map");
- close(fdb);
- return 1;
- }
-
- bf->minor = bic.minor;
-
- sprintf(name, "/dev/binject%u", bf->minor);
-
- /*
- * Wait for udev to create the node...
- */
- dev_there = loops = 0;
- do {
- if (!stat(name, &sb)) {
- dev_there = 1;
- break;
- }
-
- usleep(10000);
- } while (++loops < 100);
-
- close(fdb);
-
- if (!dev_there) {
- log_err("fio: timed out waiting for binject dev\n");
- goto err_unmap;
- }
-
- bf->fd = open(name, O_RDWR);
- if (bf->fd < 0) {
- td_verror(td, errno, "binject dev open");
-err_unmap:
- binject_unmap_dev(td, bf);
- return 1;
- }
-
- return 0;
-}
-
-static int fio_binject_close_file(struct thread_data *td, struct fio_file *f)
-{
- struct binject_file *bf = FILE_ENG_DATA(f);
-
- if (bf) {
- binject_unmap_dev(td, bf);
- free(bf);
- FILE_SET_ENG_DATA(f, NULL);
- return generic_close_file(td, f);
- }
-
- return 0;
-}
-
-static int fio_binject_open_file(struct thread_data *td, struct fio_file *f)
-{
- struct binject_file *bf;
- unsigned int bs;
- int ret;
-
- ret = generic_open_file(td, f);
- if (ret)
- return 1;
-
- if (f->filetype != FIO_TYPE_BLOCK) {
- log_err("fio: binject only works with block devices\n");
- goto err_close;
- }
- if (ioctl(f->fd, BLKSSZGET, &bs) < 0) {
- td_verror(td, errno, "BLKSSZGET");
- goto err_close;
- }
-
- bf = malloc(sizeof(*bf));
- bf->bs = bs;
- bf->minor = bf->fd = -1;
- FILE_SET_ENG_DATA(f, bf);
-
- if (binject_map_dev(td, bf, f->fd)) {
-err_close:
- ret = generic_close_file(td, f);
- return 1;
- }
-
- return 0;
-}
-
-static void fio_binject_cleanup(struct thread_data *td)
-{
- struct binject_data *bd = td->io_ops_data;
-
- if (bd) {
- free(bd->events);
- free(bd->cmds);
- free(bd->fd_flags);
- free(bd->pfds);
- free(bd);
- }
-}
-
-static int fio_binject_init(struct thread_data *td)
-{
- struct binject_data *bd;
-
- bd = malloc(sizeof(*bd));
- memset(bd, 0, sizeof(*bd));
-
- bd->cmds = malloc(td->o.iodepth * sizeof(struct b_user_cmd));
- memset(bd->cmds, 0, td->o.iodepth * sizeof(struct b_user_cmd));
-
- bd->events = malloc(td->o.iodepth * sizeof(struct io_u *));
- memset(bd->events, 0, td->o.iodepth * sizeof(struct io_u *));
-
- bd->pfds = malloc(sizeof(struct pollfd) * td->o.nr_files);
- memset(bd->pfds, 0, sizeof(struct pollfd) * td->o.nr_files);
-
- bd->fd_flags = malloc(sizeof(int) * td->o.nr_files);
- memset(bd->fd_flags, 0, sizeof(int) * td->o.nr_files);
-
- td->io_ops_data = bd;
- return 0;
-}
-
-static struct ioengine_ops ioengine = {
- .name = "binject",
- .version = FIO_IOOPS_VERSION,
- .init = fio_binject_init,
- .prep = fio_binject_prep,
- .queue = fio_binject_queue,
- .getevents = fio_binject_getevents,
- .event = fio_binject_event,
- .cleanup = fio_binject_cleanup,
- .open_file = fio_binject_open_file,
- .close_file = fio_binject_close_file,
- .get_file_size = generic_get_file_size,
- .flags = FIO_RAWIO | FIO_BARRIER | FIO_MEMALIGN,
-};
-
-#else /* FIO_HAVE_BINJECT */
-
-/*
- * When we have a proper configure system in place, we simply wont build
- * and install this io engine. For now install a crippled version that
- * just complains and fails to load.
- */
-static int fio_binject_init(struct thread_data fio_unused *td)
-{
- log_err("fio: ioengine binject not available\n");
- return 1;
-}
-
-static struct ioengine_ops ioengine = {
- .name = "binject",
- .version = FIO_IOOPS_VERSION,
- .init = fio_binject_init,
-};
-
-#endif
-
-static void fio_init fio_binject_register(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_binject_unregister(void)
-{
- unregister_ioengine(&ioengine);
-}
diff --git a/engines/cpu.c b/engines/cpu.c
deleted file mode 100644
index d0b4a895..00000000
--- a/engines/cpu.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * CPU engine
- *
- * Doesn't transfer any data, merely burns CPU cycles according to
- * the settings.
- *
- */
-#include "../fio.h"
-#include "../optgroup.h"
-
-struct cpu_options {
- void *pad;
- unsigned int cpuload;
- unsigned int cpucycle;
- unsigned int exit_io_done;
-};
-
-static struct fio_option options[] = {
- {
- .name = "cpuload",
- .lname = "CPU load",
- .type = FIO_OPT_INT,
- .off1 = offsetof(struct cpu_options, cpuload),
- .help = "Use this percentage of CPU",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_INVALID,
- },
- {
- .name = "cpuchunks",
- .lname = "CPU chunk",
- .type = FIO_OPT_INT,
- .off1 = offsetof(struct cpu_options, cpucycle),
- .help = "Length of the CPU burn cycles (usecs)",
- .def = "50000",
- .parent = "cpuload",
- .hide = 1,
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_INVALID,
- },
- {
- .name = "exit_on_io_done",
- .lname = "Exit when IO threads are done",
- .type = FIO_OPT_BOOL,
- .off1 = offsetof(struct cpu_options, exit_io_done),
- .help = "Exit when IO threads finish",
- .def = "0",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_INVALID,
- },
- {
- .name = NULL,
- },
-};
-
-
-static int fio_cpuio_queue(struct thread_data *td, struct io_u fio_unused *io_u)
-{
- struct cpu_options *co = td->eo;
-
- if (co->exit_io_done && !fio_running_or_pending_io_threads()) {
- td->done = 1;
- return FIO_Q_BUSY;
- }
-
- usec_spin(co->cpucycle);
- return FIO_Q_COMPLETED;
-}
-
-static int fio_cpuio_init(struct thread_data *td)
-{
- struct thread_options *o = &td->o;
- struct cpu_options *co = td->eo;
-
- if (!co->cpuload) {
- td_vmsg(td, EINVAL, "cpu thread needs rate (cpuload=)","cpuio");
- return 1;
- }
-
- if (co->cpuload > 100)
- co->cpuload = 100;
-
- /*
- * set thinktime_sleep and thinktime_spin appropriately
- */
- o->thinktime_blocks = 1;
- o->thinktime_spin = 0;
- o->thinktime = (co->cpucycle * (100 - co->cpuload)) / co->cpuload;
-
- o->nr_files = o->open_files = 1;
-
- log_info("%s: ioengine=%s, cpuload=%u, cpucycle=%u\n",
- td->o.name, td->io_ops->name, co->cpuload, co->cpucycle);
-
- return 0;
-}
-
-static int fio_cpuio_open(struct thread_data fio_unused *td,
- struct fio_file fio_unused *f)
-{
- return 0;
-}
-
-static struct ioengine_ops ioengine = {
- .name = "cpuio",
- .version = FIO_IOOPS_VERSION,
- .queue = fio_cpuio_queue,
- .init = fio_cpuio_init,
- .open_file = fio_cpuio_open,
- .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_NOIO,
- .options = options,
- .option_struct_size = sizeof(struct cpu_options),
-};
-
-static void fio_init fio_cpuio_register(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_cpuio_unregister(void)
-{
- unregister_ioengine(&ioengine);
-}
diff --git a/engines/dev-dax.c b/engines/dev-dax.c
deleted file mode 100644
index 235a31e6..00000000
--- a/engines/dev-dax.c
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * device DAX engine
- *
- * IO engine that reads/writes from files by doing memcpy to/from
- * a memory mapped region of DAX enabled device.
- *
- * Copyright (C) 2016 Intel Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License,
- * version 2 as published by the Free Software Foundation..
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-/*
- * device dax engine
- * IO engine that access a DAX device directly for read and write data
- *
- * To use:
- * ioengine=dev-dax
- *
- * Other relevant settings:
- * iodepth=1
- * direct=0 REQUIRED
- * filename=/dev/daxN.N
- * bs=2m
- *
- * direct should be left to 0. Using dev-dax implies that memory access
- * is direct. However, dev-dax does not support O_DIRECT flag by design
- * since it is not necessary.
- *
- * bs should adhere to the device dax alignment at minimally.
- *
- * libpmem.so
- * By default, the dev-dax engine will let the system find the libpmem.so
- * that it uses. You can use an alternative libpmem by setting the
- * FIO_PMEM_LIB environment variable to the full path to the desired
- * libpmem.so.
- */
-
-#include <stdio.h>
-#include <limits.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/sysmacros.h>
-#include <libgen.h>
-#include <libpmem.h>
-
-#include "../fio.h"
-#include "../verify.h"
-
-/*
- * Limits us to 1GiB of mapped files in total to model after
- * mmap engine behavior
- */
-#define MMAP_TOTAL_SZ (1 * 1024 * 1024 * 1024UL)
-
-struct fio_devdax_data {
- void *devdax_ptr;
- size_t devdax_sz;
- off_t devdax_off;
-};
-
-static int fio_devdax_file(struct thread_data *td, struct fio_file *f,
- size_t length, off_t off)
-{
- struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
- int flags = 0;
-
- if (td_rw(td))
- flags = PROT_READ | PROT_WRITE;
- else if (td_write(td)) {
- flags = PROT_WRITE;
-
- if (td->o.verify != VERIFY_NONE)
- flags |= PROT_READ;
- } else
- flags = PROT_READ;
-
- fdd->devdax_ptr = mmap(NULL, length, flags, MAP_SHARED, f->fd, off);
- if (fdd->devdax_ptr == MAP_FAILED) {
- fdd->devdax_ptr = NULL;
- td_verror(td, errno, "mmap");
- }
-
- if (td->error && fdd->devdax_ptr)
- munmap(fdd->devdax_ptr, length);
-
- return td->error;
-}
-
-/*
- * Just mmap an appropriate portion, we cannot mmap the full extent
- */
-static int fio_devdax_prep_limited(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_file *f = io_u->file;
- struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
-
- if (io_u->buflen > f->real_file_size) {
- log_err("dev-dax: bs too big for dev-dax engine\n");
- return EIO;
- }
-
- fdd->devdax_sz = min(MMAP_TOTAL_SZ, f->real_file_size);
- if (fdd->devdax_sz > f->io_size)
- fdd->devdax_sz = f->io_size;
-
- fdd->devdax_off = io_u->offset;
-
- return fio_devdax_file(td, f, fdd->devdax_sz, fdd->devdax_off);
-}
-
-/*
- * Attempt to mmap the entire file
- */
-static int fio_devdax_prep_full(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_file *f = io_u->file;
- struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
- int ret;
-
- if (fio_file_partial_mmap(f))
- return EINVAL;
-
- if (io_u->offset != (size_t) io_u->offset ||
- f->io_size != (size_t) f->io_size) {
- fio_file_set_partial_mmap(f);
- return EINVAL;
- }
-
- fdd->devdax_sz = f->io_size;
- fdd->devdax_off = 0;
-
- ret = fio_devdax_file(td, f, fdd->devdax_sz, fdd->devdax_off);
- if (ret)
- fio_file_set_partial_mmap(f);
-
- return ret;
-}
-
-static int fio_devdax_prep(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_file *f = io_u->file;
- struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
- int ret;
-
- /*
- * It fits within existing mapping, use it
- */
- if (io_u->offset >= fdd->devdax_off &&
- io_u->offset + io_u->buflen < fdd->devdax_off + fdd->devdax_sz)
- goto done;
-
- /*
- * unmap any existing mapping
- */
- if (fdd->devdax_ptr) {
- if (munmap(fdd->devdax_ptr, fdd->devdax_sz) < 0)
- return errno;
- fdd->devdax_ptr = NULL;
- }
-
- if (fio_devdax_prep_full(td, io_u)) {
- td_clear_error(td);
- ret = fio_devdax_prep_limited(td, io_u);
- if (ret)
- return ret;
- }
-
-done:
- io_u->mmap_data = fdd->devdax_ptr + io_u->offset - fdd->devdax_off -
- f->file_offset;
- return 0;
-}
-
-static int fio_devdax_queue(struct thread_data *td, struct io_u *io_u)
-{
- fio_ro_check(td, io_u);
- io_u->error = 0;
-
- switch (io_u->ddir) {
- case DDIR_READ:
- memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen);
- break;
- case DDIR_WRITE:
- pmem_memcpy_persist(io_u->mmap_data, io_u->xfer_buf,
- io_u->xfer_buflen);
- break;
- case DDIR_SYNC:
- case DDIR_DATASYNC:
- case DDIR_SYNC_FILE_RANGE:
- break;
- default:
- io_u->error = EINVAL;
- break;
- }
-
- return FIO_Q_COMPLETED;
-}
-
-static int fio_devdax_init(struct thread_data *td)
-{
- struct thread_options *o = &td->o;
-
- if ((o->rw_min_bs & page_mask) &&
- (o->fsync_blocks || o->fdatasync_blocks)) {
- log_err("dev-dax: mmap options dictate a minimum block size of %llu bytes\n",
- (unsigned long long) page_size);
- return 1;
- }
-
- return 0;
-}
-
-static int fio_devdax_open_file(struct thread_data *td, struct fio_file *f)
-{
- struct fio_devdax_data *fdd;
- int ret;
-
- ret = generic_open_file(td, f);
- if (ret)
- return ret;
-
- fdd = calloc(1, sizeof(*fdd));
- if (!fdd) {
- int fio_unused __ret;
- __ret = generic_close_file(td, f);
- return 1;
- }
-
- FILE_SET_ENG_DATA(f, fdd);
-
- return 0;
-}
-
-static int fio_devdax_close_file(struct thread_data *td, struct fio_file *f)
-{
- struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
-
- FILE_SET_ENG_DATA(f, NULL);
- free(fdd);
- fio_file_clear_partial_mmap(f);
-
- return generic_close_file(td, f);
-}
-
-static int
-fio_devdax_get_file_size(struct thread_data *td, struct fio_file *f)
-{
- char spath[PATH_MAX];
- char npath[PATH_MAX];
- char *rpath;
- FILE *sfile;
- uint64_t size;
- struct stat st;
- int rc;
-
- if (fio_file_size_known(f))
- return 0;
-
- if (f->filetype != FIO_TYPE_CHAR)
- return -EINVAL;
-
- rc = stat(f->file_name, &st);
- if (rc < 0) {
- log_err("%s: failed to stat file %s (%s)\n",
- td->o.name, f->file_name, strerror(errno));
- return -errno;
- }
-
- snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/subsystem",
- major(st.st_rdev), minor(st.st_rdev));
-
- rpath = realpath(spath, npath);
- if (!rpath) {
- log_err("%s: realpath on %s failed (%s)\n",
- td->o.name, spath, strerror(errno));
- return -errno;
- }
-
- /* check if DAX device */
- if (strcmp("/sys/class/dax", rpath)) {
- log_err("%s: %s not a DAX device!\n",
- td->o.name, f->file_name);
- }
-
- snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/size",
- major(st.st_rdev), minor(st.st_rdev));
-
- sfile = fopen(spath, "r");
- if (!sfile) {
- log_err("%s: fopen on %s failed (%s)\n",
- td->o.name, spath, strerror(errno));
- return 1;
- }
-
- rc = fscanf(sfile, "%lu", &size);
- if (rc < 0) {
- log_err("%s: fscanf on %s failed (%s)\n",
- td->o.name, spath, strerror(errno));
- return 1;
- }
-
- f->real_file_size = size;
-
- fclose(sfile);
-
- if (f->file_offset > f->real_file_size) {
- log_err("%s: offset extends end (%llu > %llu)\n", td->o.name,
- (unsigned long long) f->file_offset,
- (unsigned long long) f->real_file_size);
- return 1;
- }
-
- fio_file_set_size_known(f);
- return 0;
-}
-
-static struct ioengine_ops ioengine = {
- .name = "dev-dax",
- .version = FIO_IOOPS_VERSION,
- .init = fio_devdax_init,
- .prep = fio_devdax_prep,
- .queue = fio_devdax_queue,
- .open_file = fio_devdax_open_file,
- .close_file = fio_devdax_close_file,
- .get_file_size = fio_devdax_get_file_size,
- .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_NOEXTEND | FIO_NODISKUTIL,
-};
-
-static void fio_init fio_devdax_register(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_devdax_unregister(void)
-{
- unregister_ioengine(&ioengine);
-}
diff --git a/engines/e4defrag.c b/engines/e4defrag.c
deleted file mode 100644
index 4b444888..00000000
--- a/engines/e4defrag.c
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * ioe_e4defrag: ioengine for git://git.kernel.dk/fio.git
- *
- * IO engine that does regular EXT4_IOC_MOVE_EXT ioctls to simulate
- * defragment activity
- *
- */
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/uio.h>
-#include <errno.h>
-#include <assert.h>
-#include <fcntl.h>
-
-#include "../fio.h"
-#include "../optgroup.h"
-
-#ifndef EXT4_IOC_MOVE_EXT
-#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
-struct move_extent {
- __u32 reserved; /* should be zero */
- __u32 donor_fd; /* donor file descriptor */
- __u64 orig_start; /* logical start offset in block for orig */
- __u64 donor_start; /* logical start offset in block for donor */
- __u64 len; /* block length to be moved */
- __u64 moved_len; /* moved block length */
-};
-#endif
-
-struct e4defrag_data {
- int donor_fd;
- int bsz;
-};
-
-struct e4defrag_options {
- void *pad;
- unsigned int inplace;
- char * donor_name;
-};
-
-static struct fio_option options[] = {
- {
- .name = "donorname",
- .lname = "Donor Name",
- .type = FIO_OPT_STR_STORE,
- .off1 = offsetof(struct e4defrag_options, donor_name),
- .help = "File used as a block donor",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_E4DEFRAG,
- },
- {
- .name = "inplace",
- .lname = "In Place",
- .type = FIO_OPT_INT,
- .off1 = offsetof(struct e4defrag_options, inplace),
- .minval = 0,
- .maxval = 1,
- .help = "Alloc and free space inside defrag event",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_E4DEFRAG,
- },
- {
- .name = NULL,
- },
-};
-
-static int fio_e4defrag_init(struct thread_data *td)
-{
- int r, len = 0;
- struct e4defrag_options *o = td->eo;
- struct e4defrag_data *ed;
- struct stat stub;
- char donor_name[PATH_MAX];
-
- if (!strlen(o->donor_name)) {
- log_err("'donorname' options required\n");
- return 1;
- }
-
- ed = malloc(sizeof(*ed));
- if (!ed) {
- td_verror(td, ENOMEM, "io_queue_init");
- return 1;
- }
- memset(ed, 0 ,sizeof(*ed));
-
- if (td->o.directory)
- len = sprintf(donor_name, "%s/", td->o.directory);
- sprintf(donor_name + len, "%s", o->donor_name);
-
- ed->donor_fd = open(donor_name, O_CREAT|O_WRONLY, 0644);
- if (ed->donor_fd < 0) {
- td_verror(td, errno, "io_queue_init");
- log_err("Can't open donor file %s err:%d\n", donor_name, ed->donor_fd);
- free(ed);
- return 1;
- }
-
- if (!o->inplace) {
- long long __len = td->o.file_size_high - td->o.start_offset;
- r = fallocate(ed->donor_fd, 0, td->o.start_offset, __len);
- if (r)
- goto err;
- }
- r = fstat(ed->donor_fd, &stub);
- if (r)
- goto err;
-
- ed->bsz = stub.st_blksize;
- td->io_ops_data = ed;
- return 0;
-err:
- td_verror(td, errno, "io_queue_init");
- close(ed->donor_fd);
- free(ed);
- return 1;
-}
-
-static void fio_e4defrag_cleanup(struct thread_data *td)
-{
- struct e4defrag_data *ed = td->io_ops_data;
- if (ed) {
- if (ed->donor_fd >= 0)
- close(ed->donor_fd);
- free(ed);
- }
-}
-
-
-static int fio_e4defrag_queue(struct thread_data *td, struct io_u *io_u)
-{
-
- int ret;
- unsigned long long len;
- struct move_extent me;
- struct fio_file *f = io_u->file;
- struct e4defrag_data *ed = td->io_ops_data;
- struct e4defrag_options *o = td->eo;
-
- fio_ro_check(td, io_u);
-
- /* Theoretically defragmentation should not change data, but it
- * changes data layout. So this function handle only DDIR_WRITE
- * in order to satisfy strict read only access pattern
- */
- if (io_u->ddir != DDIR_WRITE) {
- io_u->error = EINVAL;
- return FIO_Q_COMPLETED;
- }
-
- if (o->inplace) {
- ret = fallocate(ed->donor_fd, 0, io_u->offset, io_u->xfer_buflen);
- if (ret)
- goto out;
- }
-
- memset(&me, 0, sizeof(me));
- me.donor_fd = ed->donor_fd;
- me.orig_start = io_u->offset / ed->bsz;
- me.donor_start = me.orig_start;
- len = (io_u->offset + io_u->xfer_buflen + ed->bsz -1);
- me.len = len / ed->bsz - me.orig_start;
-
- ret = ioctl(f->fd, EXT4_IOC_MOVE_EXT, &me);
- len = me.moved_len * ed->bsz;
-
- if (len > io_u->xfer_buflen)
- len = io_u->xfer_buflen;
-
- if (len != io_u->xfer_buflen) {
- if (len) {
- io_u->resid = io_u->xfer_buflen - len;
- io_u->error = 0;
- } else {
- /* access beyond i_size */
- io_u->error = EINVAL;
- }
- }
- if (ret)
- io_u->error = errno;
-
- if (o->inplace)
- ret = ftruncate(ed->donor_fd, 0);
-out:
- if (ret && !io_u->error)
- io_u->error = errno;
-
- return FIO_Q_COMPLETED;
-}
-
-static struct ioengine_ops ioengine = {
- .name = "e4defrag",
- .version = FIO_IOOPS_VERSION,
- .init = fio_e4defrag_init,
- .queue = fio_e4defrag_queue,
- .open_file = generic_open_file,
- .close_file = generic_close_file,
- .get_file_size = generic_get_file_size,
- .flags = FIO_SYNCIO,
- .cleanup = fio_e4defrag_cleanup,
- .options = options,
- .option_struct_size = sizeof(struct e4defrag_options),
-
-};
-
-static void fio_init fio_syncio_register(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_syncio_unregister(void)
-{
- unregister_ioengine(&ioengine);
-}
diff --git a/engines/falloc.c b/engines/falloc.c
deleted file mode 100644
index 2b00d525..00000000
--- a/engines/falloc.c
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * falloc: ioengine for git://git.kernel.dk/fio.git
- *
- * IO engine that does regular fallocate to simulate data transfer
- * as fio ioengine.
- * DDIR_READ does fallocate(,mode = FALLOC_FL_KEEP_SIZE,)
- * DDIR_WRITE does fallocate(,mode = 0) : fallocate with size extension
- * DDIR_TRIM does fallocate(,mode = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/uio.h>
-#include <errno.h>
-#include <assert.h>
-#include <fcntl.h>
-
-#include "../fio.h"
-#include "../filehash.h"
-
-/*
- * generic_open_file is not appropriate because does not allow to perform
- * TRIM in to file
- */
-static int open_file(struct thread_data *td, struct fio_file *f)
-{
- int from_hash = 0;
-
- dprint(FD_FILE, "fd open %s\n", f->file_name);
-
- if (f->filetype != FIO_TYPE_FILE) {
- log_err("fio: only files are supported fallocate \n");
- return 1;
- }
- if (!strcmp(f->file_name, "-")) {
- log_err("fio: can't read/write to stdin/out\n");
- return 1;
- }
-
-open_again:
- from_hash = file_lookup_open(f, O_CREAT|O_RDWR);
-
- if (f->fd == -1) {
- char buf[FIO_VERROR_SIZE];
- int e = errno;
-
- snprintf(buf, sizeof(buf), "open(%s)", f->file_name);
- td_verror(td, e, buf);
- }
-
- if (!from_hash && f->fd != -1) {
- if (add_file_hash(f)) {
- int fio_unused ret;
-
- /*
- * OK to ignore, we haven't done anything with it
- */
- ret = generic_close_file(td, f);
- goto open_again;
- }
- }
-
- return 0;
-}
-
-#ifndef FALLOC_FL_KEEP_SIZE
-#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
-#endif
-#ifndef FALLOC_FL_PUNCH_HOLE
-#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
-#endif
-static int fio_fallocate_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_file *f = io_u->file;
- int ret;
- int flags = 0;
-
- fio_ro_check(td, io_u);
-
- if (io_u->ddir == DDIR_READ)
- flags = FALLOC_FL_KEEP_SIZE;
- else if (io_u->ddir == DDIR_WRITE)
- flags = 0;
- else if (io_u->ddir == DDIR_TRIM)
- flags = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
-
- ret = fallocate(f->fd, flags, io_u->offset, io_u->xfer_buflen);
-
- if (ret)
- io_u->error = errno;
-
- return FIO_Q_COMPLETED;
-}
-
-static struct ioengine_ops ioengine = {
- .name = "falloc",
- .version = FIO_IOOPS_VERSION,
- .queue = fio_fallocate_queue,
- .open_file = open_file,
- .close_file = generic_close_file,
- .get_file_size = generic_get_file_size,
- .flags = FIO_SYNCIO
-};
-
-static void fio_init fio_syncio_register(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_syncio_unregister(void)
-{
- unregister_ioengine(&ioengine);
-}
diff --git a/engines/ftruncate.c b/engines/ftruncate.c
deleted file mode 100644
index e86dbac0..00000000
--- a/engines/ftruncate.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * ftruncate: ioengine for git://git.kernel.dk/fio.git
- *
- * IO engine that does regular truncates to simulate data transfer
- * as fio ioengine.
- * DDIR_WRITE does ftruncate
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/uio.h>
-#include <errno.h>
-#include <assert.h>
-#include <fcntl.h>
-
-#include "../fio.h"
-#include "../filehash.h"
-
-static int fio_ftruncate_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_file *f = io_u->file;
- int ret;
- fio_ro_check(td, io_u);
-
- if (io_u->ddir != DDIR_WRITE) {
- io_u->error = EINVAL;
- return FIO_Q_COMPLETED;
- }
- ret = ftruncate(f->fd, io_u->offset);
-
- if (ret)
- io_u->error = errno;
-
- return FIO_Q_COMPLETED;
-}
-
-static struct ioengine_ops ioengine = {
- .name = "ftruncate",
- .version = FIO_IOOPS_VERSION,
- .queue = fio_ftruncate_queue,
- .open_file = generic_open_file,
- .close_file = generic_close_file,
- .get_file_size = generic_get_file_size,
- .flags = FIO_SYNCIO | FIO_FAKEIO
-};
-
-static void fio_init fio_syncio_register(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_syncio_unregister(void)
-{
- unregister_ioengine(&ioengine);
-}
diff --git a/engines/fusion-aw.c b/engines/fusion-aw.c
deleted file mode 100644
index 77844ffe..00000000
--- a/engines/fusion-aw.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Custom fio(1) engine that submits synchronous atomic writes to file.
- *
- * Copyright (C) 2013 Fusion-io, Inc.
- * Author: Santhosh Kumar Koundinya (skoundinya@fusionio.com).
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; under version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License version
- * 2 for more details.
- *
- * You should have received a copy of the GNU General Public License Version 2
- * along with this program; if not see <http://www.gnu.org/licenses/>
- */
-
-#include <stdlib.h>
-#include <stdint.h>
-
-#include "../fio.h"
-
-#include <nvm/nvm_primitives.h>
-
-#define NUM_ATOMIC_CAPABILITIES (5)
-
-struct fas_data {
- nvm_handle_t nvm_handle;
- size_t xfer_buf_align;
- size_t xfer_buflen_align;
- size_t xfer_buflen_max;
- size_t sector_size;
-};
-
-static int queue(struct thread_data *td, struct io_u *io_u)
-{
- struct fas_data *d = FILE_ENG_DATA(io_u->file);
- int rc;
-
- if (io_u->ddir != DDIR_WRITE) {
- td_vmsg(td, EINVAL, "only writes supported", "io_u->ddir");
- rc = -EINVAL;
- goto out;
- }
-
- if ((size_t) io_u->xfer_buf % d->xfer_buf_align) {
- td_vmsg(td, EINVAL, "unaligned data buffer", "io_u->xfer_buf");
- rc = -EINVAL;
- goto out;
- }
-
- if (io_u->xfer_buflen % d->xfer_buflen_align) {
- td_vmsg(td, EINVAL, "unaligned data size", "io_u->xfer_buflen");
- rc = -EINVAL;
- goto out;
- }
-
- if (io_u->xfer_buflen > d->xfer_buflen_max) {
- td_vmsg(td, EINVAL, "data too big", "io_u->xfer_buflen");
- rc = -EINVAL;
- goto out;
- }
-
- rc = nvm_atomic_write(d->nvm_handle, (uint64_t) io_u->xfer_buf,
- io_u->xfer_buflen, io_u->offset / d->sector_size);
- if (rc == -1) {
- td_verror(td, errno, "nvm_atomic_write");
- rc = -errno;
- goto out;
- }
- rc = FIO_Q_COMPLETED;
-out:
- if (rc < 0)
- io_u->error = -rc;
-
- return rc;
-}
-
-static int open_file(struct thread_data *td, struct fio_file *f)
-{
- int rc;
- int fio_unused close_file_rc;
- struct fas_data *d;
- nvm_version_t nvm_version;
- nvm_capability_t nvm_capability[NUM_ATOMIC_CAPABILITIES];
-
-
- d = malloc(sizeof(*d));
- if (!d) {
- td_verror(td, ENOMEM, "malloc");
- rc = ENOMEM;
- goto error;
- }
- d->nvm_handle = -1;
- FILE_SET_ENG_DATA(f, d);
-
- rc = generic_open_file(td, f);
-
- if (rc)
- goto free_engine_data;
-
- /* Set the version of the library as seen when engine is compiled */
- nvm_version.major = NVM_PRIMITIVES_API_MAJOR;
- nvm_version.minor = NVM_PRIMITIVES_API_MINOR;
- nvm_version.micro = NVM_PRIMITIVES_API_MICRO;
-
- d->nvm_handle = nvm_get_handle(f->fd, &nvm_version);
- if (d->nvm_handle == -1) {
- td_vmsg(td, errno, "nvm_get_handle failed", "nvm_get_handle");
- rc = errno;
- goto close_file;
- }
-
- nvm_capability[0].cap_id = NVM_CAP_ATOMIC_WRITE_START_ALIGN_ID;
- nvm_capability[1].cap_id = NVM_CAP_ATOMIC_WRITE_MULTIPLICITY_ID;
- nvm_capability[2].cap_id = NVM_CAP_ATOMIC_WRITE_MAX_VECTOR_SIZE_ID;
- nvm_capability[3].cap_id = NVM_CAP_SECTOR_SIZE_ID;
- nvm_capability[4].cap_id = NVM_CAP_ATOMIC_MAX_IOV_ID;
- rc = nvm_get_capabilities(d->nvm_handle, nvm_capability,
- NUM_ATOMIC_CAPABILITIES, false);
- if (rc == -1) {
- td_vmsg(td, errno, "error in getting atomic write capabilities", "nvm_get_capabilities");
- rc = errno;
- goto close_file;
- } else if (rc < NUM_ATOMIC_CAPABILITIES) {
- td_vmsg(td, EINVAL, "couldn't get all the atomic write capabilities" , "nvm_get_capabilities");
- rc = ECANCELED;
- goto close_file;
- }
- /* Reset rc to 0 because we got all capabilities we needed */
- rc = 0;
- d->xfer_buf_align = nvm_capability[0].cap_value;
- d->xfer_buflen_align = nvm_capability[1].cap_value;
- d->xfer_buflen_max = d->xfer_buflen_align * nvm_capability[2].cap_value * nvm_capability[4].cap_value;
- d->sector_size = nvm_capability[3].cap_value;
-
-out:
- return rc;
-close_file:
- close_file_rc = generic_close_file(td, f);
-free_engine_data:
- free(d);
-error:
- f->fd = -1;
- FILE_SET_ENG_DATA(f, NULL);
- goto out;
-}
-
-static int close_file(struct thread_data *td, struct fio_file *f)
-{
- struct fas_data *d = FILE_ENG_DATA(f);
-
- if (d) {
- if (d->nvm_handle != -1)
- nvm_release_handle(d->nvm_handle);
- free(d);
- FILE_SET_ENG_DATA(f, NULL);
- }
-
- return generic_close_file(td, f);
-}
-
-static struct ioengine_ops ioengine = {
- .name = "fusion-aw-sync",
- .version = FIO_IOOPS_VERSION,
- .queue = queue,
- .open_file = open_file,
- .close_file = close_file,
- .get_file_size = generic_get_file_size,
- .flags = FIO_SYNCIO | FIO_RAWIO | FIO_MEMALIGN
-};
-
-static void fio_init fio_fusion_aw_init(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_fusion_aw_exit(void)
-{
- unregister_ioengine(&ioengine);
-}
diff --git a/engines/gfapi.h b/engines/gfapi.h
deleted file mode 100644
index 10284314..00000000
--- a/engines/gfapi.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#include <glusterfs/api/glfs.h>
-#include "../fio.h"
-
-struct gf_options {
- void *pad;
- char *gf_vol;
- char *gf_brick;
-};
-
-struct gf_data {
- glfs_t *fs;
- glfs_fd_t *fd;
- struct io_u **aio_events;
-};
-
-extern struct fio_option gfapi_options[];
-extern int fio_gf_setup(struct thread_data *td);
-extern void fio_gf_cleanup(struct thread_data *td);
-extern int fio_gf_get_file_size(struct thread_data *td, struct fio_file *f);
-extern int fio_gf_open_file(struct thread_data *td, struct fio_file *f);
-extern int fio_gf_close_file(struct thread_data *td, struct fio_file *f);
-extern int fio_gf_unlink_file(struct thread_data *td, struct fio_file *f);
diff --git a/engines/glusterfs.c b/engines/glusterfs.c
deleted file mode 100644
index 2abc283f..00000000
--- a/engines/glusterfs.c
+++ /dev/null
@@ -1,306 +0,0 @@
-/*
- * glusterfs engine
- *
- * common Glusterfs's gfapi interface
- *
- */
-
-#include "gfapi.h"
-#include "../optgroup.h"
-
-struct fio_option gfapi_options[] = {
- {
- .name = "volume",
- .lname = "Glusterfs volume",
- .type = FIO_OPT_STR_STORE,
- .help = "Name of the Glusterfs volume",
- .off1 = offsetof(struct gf_options, gf_vol),
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_GFAPI,
- },
- {
- .name = "brick",
- .lname = "Glusterfs brick name",
- .type = FIO_OPT_STR_STORE,
- .help = "Name of the Glusterfs brick to connect",
- .off1 = offsetof(struct gf_options, gf_brick),
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_GFAPI,
- },
- {
- .name = NULL,
- },
-};
-
-int fio_gf_setup(struct thread_data *td)
-{
- int r = 0;
- struct gf_data *g = NULL;
- struct gf_options *opt = td->eo;
- struct stat sb = { 0, };
-
- dprint(FD_IO, "fio setup\n");
-
- if (td->io_ops_data)
- return 0;
-
- g = malloc(sizeof(struct gf_data));
- if (!g) {
- log_err("malloc failed.\n");
- return -ENOMEM;
- }
- g->fs = NULL;
- g->fd = NULL;
- g->aio_events = NULL;
-
- g->fs = glfs_new(opt->gf_vol);
- if (!g->fs) {
- log_err("glfs_new failed.\n");
- goto cleanup;
- }
- glfs_set_logging(g->fs, "/tmp/fio_gfapi.log", 7);
- /* default to tcp */
- r = glfs_set_volfile_server(g->fs, "tcp", opt->gf_brick, 0);
- if (r) {
- log_err("glfs_set_volfile_server failed.\n");
- goto cleanup;
- }
- r = glfs_init(g->fs);
- if (r) {
- log_err("glfs_init failed. Is glusterd running on brick?\n");
- goto cleanup;
- }
- sleep(2);
- r = glfs_lstat(g->fs, ".", &sb);
- if (r) {
- log_err("glfs_lstat failed.\n");
- goto cleanup;
- }
- dprint(FD_FILE, "fio setup %p\n", g->fs);
- td->io_ops_data = g;
- return 0;
-cleanup:
- if (g->fs)
- glfs_fini(g->fs);
- free(g);
- td->io_ops_data = NULL;
- return r;
-}
-
-void fio_gf_cleanup(struct thread_data *td)
-{
- struct gf_data *g = td->io_ops_data;
-
- if (g) {
- if (g->aio_events)
- free(g->aio_events);
- if (g->fd)
- glfs_close(g->fd);
- if (g->fs)
- glfs_fini(g->fs);
- free(g);
- td->io_ops_data = NULL;
- }
-}
-
-int fio_gf_get_file_size(struct thread_data *td, struct fio_file *f)
-{
- struct stat buf;
- int ret;
- struct gf_data *g = td->io_ops_data;
-
- dprint(FD_FILE, "get file size %s\n", f->file_name);
-
- if (!g || !g->fs) {
- return 0;
- }
- if (fio_file_size_known(f))
- return 0;
-
- ret = glfs_lstat(g->fs, f->file_name, &buf);
- if (ret < 0) {
- log_err("glfs_lstat failed.\n");
- return ret;
- }
-
- f->real_file_size = buf.st_size;
- fio_file_set_size_known(f);
-
- return 0;
-
-}
-
-int fio_gf_open_file(struct thread_data *td, struct fio_file *f)
-{
-
- int flags = 0;
- int ret = 0;
- struct gf_data *g = td->io_ops_data;
- struct stat sb = { 0, };
-
- if (td_write(td)) {
- if (!read_only)
- flags = O_RDWR;
- } else if (td_read(td)) {
- if (!read_only)
- flags = O_RDWR;
- else
- flags = O_RDONLY;
- }
-
- if (td->o.odirect)
- flags |= OS_O_DIRECT;
- if (td->o.sync_io)
- flags |= O_SYNC;
-
- dprint(FD_FILE, "fio file %s open mode %s td rw %s\n", f->file_name,
- flags & O_RDONLY ? "ro" : "rw", td_read(td) ? "read" : "write");
- g->fd = glfs_creat(g->fs, f->file_name, flags, 0644);
- if (!g->fd) {
- ret = errno;
- log_err("glfs_creat failed.\n");
- return ret;
- }
- /* file for read doesn't exist or shorter than required, create/extend it */
- if (td_read(td)) {
- if (glfs_lstat(g->fs, f->file_name, &sb)
- || sb.st_size < f->real_file_size) {
- dprint(FD_FILE, "fio extend file %s from %ld to %ld\n",
- f->file_name, sb.st_size, f->real_file_size);
- ret = glfs_ftruncate(g->fd, f->real_file_size);
- if (ret) {
- log_err("failed fio extend file %s to %ld\n",
- f->file_name, f->real_file_size);
- } else {
- unsigned long long left;
- unsigned int bs;
- char *b;
- int r;
-
- /* fill the file, copied from extend_file */
- b = malloc(td->o.max_bs[DDIR_WRITE]);
-
- left = f->real_file_size;
- while (left && !td->terminate) {
- bs = td->o.max_bs[DDIR_WRITE];
- if (bs > left)
- bs = left;
-
- fill_io_buffer(td, b, bs, bs);
-
- r = glfs_write(g->fd, b, bs, 0);
- dprint(FD_IO,
- "fio write %d of %ld file %s\n",
- r, f->real_file_size,
- f->file_name);
-
- if (r > 0) {
- left -= r;
- continue;
- } else {
- if (r < 0) {
- int __e = errno;
-
- if (__e == ENOSPC) {
- if (td->o.
- fill_device)
- break;
- log_info
- ("fio: ENOSPC on laying out "
- "file, stopping\n");
- break;
- }
- td_verror(td, errno,
- "write");
- } else
- td_verror(td, EIO,
- "write");
-
- break;
- }
- }
-
- if (b)
- free(b);
- glfs_lseek(g->fd, 0, SEEK_SET);
-
- if (td->terminate && td->o.unlink) {
- dprint(FD_FILE, "terminate unlink %s\n",
- f->file_name);
- glfs_unlink(g->fs, f->file_name);
- } else if (td->o.create_fsync) {
- if (glfs_fsync(g->fd) < 0) {
- dprint(FD_FILE,
- "failed to sync, close %s\n",
- f->file_name);
- td_verror(td, errno, "fsync");
- glfs_close(g->fd);
- g->fd = NULL;
- return 1;
- }
- }
- }
- }
- }
-#if defined(GFAPI_USE_FADVISE)
- {
- int r = 0;
- if (td_random(td)) {
- r = glfs_fadvise(g->fd, 0, f->real_file_size,
- POSIX_FADV_RANDOM);
- } else {
- r = glfs_fadvise(g->fd, 0, f->real_file_size,
- POSIX_FADV_SEQUENTIAL);
- }
- if (r) {
- dprint(FD_FILE, "fio %p fadvise %s status %d\n", g->fs,
- f->file_name, r);
- }
- }
-#endif
- dprint(FD_FILE, "fio %p created %s\n", g->fs, f->file_name);
- f->fd = -1;
- f->shadow_fd = -1;
- td->o.open_files ++;
- return ret;
-}
-
-int fio_gf_close_file(struct thread_data *td, struct fio_file *f)
-{
- int ret = 0;
- struct gf_data *g = td->io_ops_data;
-
- dprint(FD_FILE, "fd close %s\n", f->file_name);
-
- if (g) {
- if (g->fd && glfs_close(g->fd) < 0)
- ret = errno;
- g->fd = NULL;
- }
-
- return ret;
-}
-
-int fio_gf_unlink_file(struct thread_data *td, struct fio_file *f)
-{
- int ret = 0;
- struct gf_data *g = td->io_ops_data;
-
- dprint(FD_FILE, "fd unlink %s\n", f->file_name);
-
- if (g) {
- if (g->fd && glfs_close(g->fd) < 0)
- ret = errno;
-
- glfs_unlink(g->fs, f->file_name);
-
- if (g->fs)
- glfs_fini(g->fs);
-
- g->fd = NULL;
- free(g);
- }
- td->io_ops_data = NULL;
-
- return ret;
-}
diff --git a/engines/glusterfs_async.c b/engines/glusterfs_async.c
deleted file mode 100644
index f46cb263..00000000
--- a/engines/glusterfs_async.c
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * glusterfs engine
- *
- * IO engine using Glusterfs's gfapi async interface
- *
- */
-#include "gfapi.h"
-#define NOT_YET 1
-struct fio_gf_iou {
- struct io_u *io_u;
- int io_complete;
-};
-
-static struct io_u *fio_gf_event(struct thread_data *td, int event)
-{
- struct gf_data *gf_data = td->io_ops_data;
-
- dprint(FD_IO, "%s\n", __FUNCTION__);
- return gf_data->aio_events[event];
-}
-
-static int fio_gf_getevents(struct thread_data *td, unsigned int min,
- unsigned int max, const struct timespec *t)
-{
- struct gf_data *g = td->io_ops_data;
- unsigned int events = 0;
- struct io_u *io_u;
- int i;
-
- dprint(FD_IO, "%s\n", __FUNCTION__);
- do {
- io_u_qiter(&td->io_u_all, io_u, i) {
- struct fio_gf_iou *io;
-
- if (!(io_u->flags & IO_U_F_FLIGHT))
- continue;
-
- io = io_u->engine_data;
- if (io->io_complete) {
- io->io_complete = 0;
- g->aio_events[events] = io_u;
- events++;
-
- if (events >= max)
- break;
- }
-
- }
- if (events < min)
- usleep(100);
- else
- break;
-
- } while (1);
-
- return events;
-}
-
-static void fio_gf_io_u_free(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_gf_iou *io = io_u->engine_data;
-
- if (io) {
- if (io->io_complete)
- log_err("incomplete IO found.\n");
- io_u->engine_data = NULL;
- free(io);
- }
-}
-
-static int fio_gf_io_u_init(struct thread_data *td, struct io_u *io_u)
-{
- dprint(FD_FILE, "%s\n", __FUNCTION__);
-
- if (!io_u->engine_data) {
- struct fio_gf_iou *io;
-
- io = malloc(sizeof(struct fio_gf_iou));
- if (!io) {
- td_verror(td, errno, "malloc");
- return 1;
- }
- io->io_complete = 0;
- io->io_u = io_u;
- io_u->engine_data = io;
- }
- return 0;
-}
-
-static void gf_async_cb(glfs_fd_t * fd, ssize_t ret, void *data)
-{
- struct io_u *io_u = data;
- struct fio_gf_iou *iou = io_u->engine_data;
-
- dprint(FD_IO, "%s ret %lu\n", __FUNCTION__, ret);
- iou->io_complete = 1;
-}
-
-static int fio_gf_async_queue(struct thread_data fio_unused * td,
- struct io_u *io_u)
-{
- struct gf_data *g = td->io_ops_data;
- int r;
-
- dprint(FD_IO, "%s op %s\n", __FUNCTION__, io_ddir_name(io_u->ddir));
-
- fio_ro_check(td, io_u);
-
- if (io_u->ddir == DDIR_READ)
- r = glfs_pread_async(g->fd, io_u->xfer_buf, io_u->xfer_buflen,
- io_u->offset, 0, gf_async_cb, io_u);
- else if (io_u->ddir == DDIR_WRITE)
- r = glfs_pwrite_async(g->fd, io_u->xfer_buf, io_u->xfer_buflen,
- io_u->offset, 0, gf_async_cb, io_u);
-#if defined(CONFIG_GF_TRIM)
- else if (io_u->ddir == DDIR_TRIM)
- r = glfs_discard_async(g->fd, io_u->offset, io_u->xfer_buflen,
- gf_async_cb, io_u);
-#endif
- else if (io_u->ddir == DDIR_DATASYNC)
- r = glfs_fdatasync_async(g->fd, gf_async_cb, io_u);
- else if (io_u->ddir == DDIR_SYNC)
- r = glfs_fsync_async(g->fd, gf_async_cb, io_u);
- else
- r = EINVAL;
-
- if (r) {
- log_err("glfs queue failed.\n");
- io_u->error = r;
- goto failed;
- }
- return FIO_Q_QUEUED;
-
-failed:
- io_u->error = r;
- td_verror(td, io_u->error, "xfer");
- return FIO_Q_COMPLETED;
-}
-
-static int fio_gf_async_setup(struct thread_data *td)
-{
- struct gf_data *g;
- int r;
-
-#if defined(NOT_YET)
- log_err("the async interface is still very experimental...\n");
-#endif
- r = fio_gf_setup(td);
- if (r)
- return r;
-
- td->o.use_thread = 1;
- g = td->io_ops_data;
- g->aio_events = calloc(td->o.iodepth, sizeof(struct io_u *));
- if (!g->aio_events) {
- r = -ENOMEM;
- fio_gf_cleanup(td);
- return r;
- }
-
- return r;
-}
-
-static struct ioengine_ops ioengine = {
- .name = "gfapi_async",
- .version = FIO_IOOPS_VERSION,
- .init = fio_gf_async_setup,
- .cleanup = fio_gf_cleanup,
- .queue = fio_gf_async_queue,
- .open_file = fio_gf_open_file,
- .close_file = fio_gf_close_file,
- .unlink_file = fio_gf_unlink_file,
- .get_file_size = fio_gf_get_file_size,
- .getevents = fio_gf_getevents,
- .event = fio_gf_event,
- .io_u_init = fio_gf_io_u_init,
- .io_u_free = fio_gf_io_u_free,
- .options = gfapi_options,
- .option_struct_size = sizeof(struct gf_options),
- .flags = FIO_DISKLESSIO,
-};
-
-static void fio_init fio_gf_register(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_gf_unregister(void)
-{
- unregister_ioengine(&ioengine);
-}
diff --git a/engines/glusterfs_sync.c b/engines/glusterfs_sync.c
deleted file mode 100644
index 25d05b25..00000000
--- a/engines/glusterfs_sync.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * glusterfs engine
- *
- * IO engine using Glusterfs's gfapi sync interface
- *
- */
-
-#include "gfapi.h"
-
-#define LAST_POS(f) ((f)->engine_pos)
-static int fio_gf_prep(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_file *f = io_u->file;
- struct gf_data *g = td->io_ops_data;
-
- dprint(FD_FILE, "fio prep\n");
-
- if (!ddir_rw(io_u->ddir))
- return 0;
-
- if (LAST_POS(f) != -1ULL && LAST_POS(f) == io_u->offset)
- return 0;
-
- if (glfs_lseek(g->fd, io_u->offset, SEEK_SET) < 0) {
- td_verror(td, errno, "lseek");
- return 1;
- }
-
- return 0;
-}
-
-static int fio_gf_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct gf_data *g = td->io_ops_data;
- int ret = 0;
-
- dprint(FD_FILE, "fio queue len %lu\n", io_u->xfer_buflen);
- fio_ro_check(td, io_u);
-
- if (io_u->ddir == DDIR_READ)
- ret = glfs_read(g->fd, io_u->xfer_buf, io_u->xfer_buflen, 0);
- else if (io_u->ddir == DDIR_WRITE)
- ret = glfs_write(g->fd, io_u->xfer_buf, io_u->xfer_buflen, 0);
- else if (io_u->ddir == DDIR_SYNC)
- ret = glfs_fsync(g->fd);
- else if (io_u->ddir == DDIR_DATASYNC)
- ret = glfs_fdatasync(g->fd);
- else {
- log_err("unsupported operation.\n");
- return -EINVAL;
- }
- dprint(FD_FILE, "fio len %lu ret %d\n", io_u->xfer_buflen, ret);
- if (io_u->file && ret >= 0 && ddir_rw(io_u->ddir))
- LAST_POS(io_u->file) = io_u->offset + ret;
-
- if (ret != (int)io_u->xfer_buflen) {
- if (ret >= 0) {
- io_u->resid = io_u->xfer_buflen - ret;
- io_u->error = 0;
- return FIO_Q_COMPLETED;
- } else
- io_u->error = errno;
- }
-
- if (io_u->error) {
- log_err("IO failed.\n");
- td_verror(td, io_u->error, "xfer");
- }
-
- return FIO_Q_COMPLETED;
-
-}
-
-static struct ioengine_ops ioengine = {
- .name = "gfapi",
- .version = FIO_IOOPS_VERSION,
- .init = fio_gf_setup,
- .cleanup = fio_gf_cleanup,
- .prep = fio_gf_prep,
- .queue = fio_gf_queue,
- .open_file = fio_gf_open_file,
- .close_file = fio_gf_close_file,
- .unlink_file = fio_gf_unlink_file,
- .get_file_size = fio_gf_get_file_size,
- .options = gfapi_options,
- .option_struct_size = sizeof(struct gf_options),
- .flags = FIO_SYNCIO | FIO_DISKLESSIO,
-};
-
-static void fio_init fio_gf_register(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_gf_unregister(void)
-{
- unregister_ioengine(&ioengine);
-}
diff --git a/engines/guasi.c b/engines/guasi.c
deleted file mode 100644
index eb12c899..00000000
--- a/engines/guasi.c
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * guasi engine
- *
- * IO engine using the GUASI library.
- *
- * Before running make. You'll need the GUASI lib as well:
- *
- * http://www.xmailserver.org/guasi-lib.html
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <assert.h>
-
-#include "../fio.h"
-
-#define GFIO_MIN_THREADS 32
-#ifndef GFIO_MAX_THREADS
-#define GFIO_MAX_THREADS 2000
-#endif
-
-#include <guasi.h>
-#include <guasi_syscalls.h>
-
-#ifdef GFIO_DEBUG
-#define GDBG_PRINT(a) printf a
-#else
-#define GDBG_PRINT(a) (void) 0
-#endif
-
-struct guasi_data {
- guasi_t hctx;
- int max_reqs;
- guasi_req_t *reqs;
- struct io_u **io_us;
- int queued_nr;
- int reqs_nr;
-};
-
-static int fio_guasi_prep(struct thread_data fio_unused *td, struct io_u *io_u)
-{
-
- GDBG_PRINT(("fio_guasi_prep(%p)\n", io_u));
- io_u->greq = NULL;
-
- return 0;
-}
-
-static struct io_u *fio_guasi_event(struct thread_data *td, int event)
-{
- struct guasi_data *ld = td->io_ops_data;
- struct io_u *io_u;
- struct guasi_reqinfo rinf;
-
- GDBG_PRINT(("fio_guasi_event(%d)\n", event));
- if (guasi_req_info(ld->reqs[event], &rinf) < 0) {
- log_err("guasi_req_info(%d) FAILED!\n", event);
- return NULL;
- }
- io_u = rinf.asid;
- io_u->error = EINPROGRESS;
- GDBG_PRINT(("fio_guasi_event(%d) -> %p\n", event, io_u));
- if (rinf.status == GUASI_STATUS_COMPLETE) {
- io_u->error = rinf.result;
- if (io_u->ddir == DDIR_READ ||
- io_u->ddir == DDIR_WRITE) {
- io_u->error = 0;
- if (rinf.result != (long) io_u->xfer_buflen) {
- if (rinf.result >= 0)
- io_u->resid = io_u->xfer_buflen - rinf.result;
- else
- io_u->error = rinf.error;
- }
- }
- }
-
- return io_u;
-}
-
-static int fio_guasi_getevents(struct thread_data *td, unsigned int min,
- unsigned int max, const struct timespec *t)
-{
- struct guasi_data *ld = td->io_ops_data;
- int n, r;
- long timeo = -1;
-
- GDBG_PRINT(("fio_guasi_getevents(%d, %d)\n", min, max));
- if (min > ld->max_reqs)
- min = ld->max_reqs;
- if (max > ld->max_reqs)
- max = ld->max_reqs;
- if (t)
- timeo = t->tv_sec * 1000L + t->tv_nsec / 1000000L;
- for (n = 0; n < ld->reqs_nr; n++)
- guasi_req_free(ld->reqs[n]);
- n = 0;
- do {
- r = guasi_fetch(ld->hctx, ld->reqs + n, min - n,
- max - n, timeo);
- if (r < 0) {
- log_err("guasi_fetch() FAILED! (%d)\n", r);
- break;
- }
- n += r;
- if (n >= min)
- break;
- } while (1);
- ld->reqs_nr = n;
- GDBG_PRINT(("fio_guasi_getevents() -> %d\n", n));
-
- return n;
-}
-
-static int fio_guasi_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct guasi_data *ld = td->io_ops_data;
-
- fio_ro_check(td, io_u);
-
- GDBG_PRINT(("fio_guasi_queue(%p)\n", io_u));
- if (ld->queued_nr == (int) td->o.iodepth)
- return FIO_Q_BUSY;
-
- ld->io_us[ld->queued_nr] = io_u;
- ld->queued_nr++;
- return FIO_Q_QUEUED;
-}
-
-static void fio_guasi_queued(struct thread_data *td, struct io_u **io_us, int nr)
-{
- int i;
- struct io_u *io_u;
- struct timeval now;
-
- if (!fio_fill_issue_time(td))
- return;
-
- io_u_mark_submit(td, nr);
- fio_gettime(&now, NULL);
- for (i = 0; i < nr; i++) {
- io_u = io_us[i];
- memcpy(&io_u->issue_time, &now, sizeof(now));
- io_u_queued(td, io_u);
- }
-}
-
-static int fio_guasi_commit(struct thread_data *td)
-{
- struct guasi_data *ld = td->io_ops_data;
- int i;
- struct io_u *io_u;
- struct fio_file *f;
-
- GDBG_PRINT(("fio_guasi_commit(%d)\n", ld->queued_nr));
- for (i = 0; i < ld->queued_nr; i++) {
- io_u = ld->io_us[i];
- GDBG_PRINT(("fio_guasi_commit(%d) --> %p\n", i, io_u));
- f = io_u->file;
- io_u->greq = NULL;
- if (io_u->ddir == DDIR_READ)
- io_u->greq = guasi__pread(ld->hctx, ld, io_u, 0,
- f->fd, io_u->xfer_buf, io_u->xfer_buflen,
- io_u->offset);
- else if (io_u->ddir == DDIR_WRITE)
- io_u->greq = guasi__pwrite(ld->hctx, ld, io_u, 0,
- f->fd, io_u->xfer_buf, io_u->xfer_buflen,
- io_u->offset);
- else if (ddir_sync(io_u->ddir))
- io_u->greq = guasi__fsync(ld->hctx, ld, io_u, 0, f->fd);
- else {
- log_err("fio_guasi_commit() FAILED: unknow request %d\n",
- io_u->ddir);
- }
- if (io_u->greq == NULL) {
- log_err("fio_guasi_commit() FAILED: submit failed (%s)\n",
- strerror(errno));
- return -1;
- }
- }
- fio_guasi_queued(td, ld->io_us, i);
- ld->queued_nr = 0;
- GDBG_PRINT(("fio_guasi_commit() -> %d\n", i));
-
- return 0;
-}
-
-static int fio_guasi_cancel(struct thread_data fio_unused *td,
- struct io_u *io_u)
-{
- GDBG_PRINT(("fio_guasi_cancel(%p) req=%p\n", io_u, io_u->greq));
- if (io_u->greq != NULL)
- guasi_req_cancel(io_u->greq);
-
- return 0;
-}
-
-static void fio_guasi_cleanup(struct thread_data *td)
-{
- struct guasi_data *ld = td->io_ops_data;
- int n;
-
- GDBG_PRINT(("fio_guasi_cleanup(%p)\n", ld));
- if (ld) {
- for (n = 0; n < ld->reqs_nr; n++)
- guasi_req_free(ld->reqs[n]);
- guasi_free(ld->hctx);
- free(ld->reqs);
- free(ld->io_us);
- free(ld);
- }
- GDBG_PRINT(("fio_guasi_cleanup(%p) DONE\n", ld));
-}
-
-static int fio_guasi_init(struct thread_data *td)
-{
- int maxthr;
- struct guasi_data *ld = malloc(sizeof(*ld));
-
- GDBG_PRINT(("fio_guasi_init(): depth=%d\n", td->o.iodepth));
- memset(ld, 0, sizeof(*ld));
- maxthr = td->o.iodepth > GFIO_MIN_THREADS ? td->o.iodepth: GFIO_MIN_THREADS;
- if (maxthr > GFIO_MAX_THREADS)
- maxthr = GFIO_MAX_THREADS;
- if ((ld->hctx = guasi_create(GFIO_MIN_THREADS, maxthr, 1)) == NULL) {
- td_verror(td, errno, "guasi_create");
- free(ld);
- return 1;
- }
- ld->max_reqs = td->o.iodepth;
- ld->reqs = malloc(ld->max_reqs * sizeof(guasi_req_t));
- ld->io_us = malloc(ld->max_reqs * sizeof(struct io_u *));
- memset(ld->io_us, 0, ld->max_reqs * sizeof(struct io_u *));
- ld->queued_nr = 0;
- ld->reqs_nr = 0;
-
- td->io_ops_data = ld;
- GDBG_PRINT(("fio_guasi_init(): depth=%d -> %p\n", td->o.iodepth, ld));
-
- return 0;
-}
-
-static struct ioengine_ops ioengine = {
- .name = "guasi",
- .version = FIO_IOOPS_VERSION,
- .init = fio_guasi_init,
- .prep = fio_guasi_prep,
- .queue = fio_guasi_queue,
- .commit = fio_guasi_commit,
- .cancel = fio_guasi_cancel,
- .getevents = fio_guasi_getevents,
- .event = fio_guasi_event,
- .cleanup = fio_guasi_cleanup,
- .open_file = generic_open_file,
- .close_file = generic_close_file,
- .get_file_size = generic_get_file_size,
-};
-
-static void fio_init fio_guasi_register(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_guasi_unregister(void)
-{
- unregister_ioengine(&ioengine);
-}
-
diff --git a/engines/libaio.c b/engines/libaio.c
deleted file mode 100644
index e15c519e..00000000
--- a/engines/libaio.c
+++ /dev/null
@@ -1,396 +0,0 @@
-/*
- * libaio engine
- *
- * IO engine using the Linux native aio interface.
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <assert.h>
-#include <libaio.h>
-
-#include "../fio.h"
-#include "../lib/pow2.h"
-#include "../optgroup.h"
-
-static int fio_libaio_commit(struct thread_data *td);
-
-struct libaio_data {
- io_context_t aio_ctx;
- struct io_event *aio_events;
- struct iocb **iocbs;
- struct io_u **io_us;
-
- /*
- * Basic ring buffer. 'head' is incremented in _queue(), and
- * 'tail' is incremented in _commit(). We keep 'queued' so
- * that we know if the ring is full or empty, when
- * 'head' == 'tail'. 'entries' is the ring size, and
- * 'is_pow2' is just an optimization to use AND instead of
- * modulus to get the remainder on ring increment.
- */
- int is_pow2;
- unsigned int entries;
- unsigned int queued;
- unsigned int head;
- unsigned int tail;
-};
-
-struct libaio_options {
- void *pad;
- unsigned int userspace_reap;
-};
-
-static struct fio_option options[] = {
- {
- .name = "userspace_reap",
- .lname = "Libaio userspace reaping",
- .type = FIO_OPT_STR_SET,
- .off1 = offsetof(struct libaio_options, userspace_reap),
- .help = "Use alternative user-space reap implementation",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_LIBAIO,
- },
- {
- .name = NULL,
- },
-};
-
-static inline void ring_inc(struct libaio_data *ld, unsigned int *val,
- unsigned int add)
-{
- if (ld->is_pow2)
- *val = (*val + add) & (ld->entries - 1);
- else
- *val = (*val + add) % ld->entries;
-}
-
-static int fio_libaio_prep(struct thread_data fio_unused *td, struct io_u *io_u)
-{
- struct fio_file *f = io_u->file;
-
- if (io_u->ddir == DDIR_READ)
- io_prep_pread(&io_u->iocb, f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
- else if (io_u->ddir == DDIR_WRITE)
- io_prep_pwrite(&io_u->iocb, f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
- else if (ddir_sync(io_u->ddir))
- io_prep_fsync(&io_u->iocb, f->fd);
-
- return 0;
-}
-
-static struct io_u *fio_libaio_event(struct thread_data *td, int event)
-{
- struct libaio_data *ld = td->io_ops_data;
- struct io_event *ev;
- struct io_u *io_u;
-
- ev = ld->aio_events + event;
- io_u = container_of(ev->obj, struct io_u, iocb);
-
- if (ev->res != io_u->xfer_buflen) {
- if (ev->res > io_u->xfer_buflen)
- io_u->error = -ev->res;
- else
- io_u->resid = io_u->xfer_buflen - ev->res;
- } else
- io_u->error = 0;
-
- return io_u;
-}
-
-struct aio_ring {
- unsigned id; /** kernel internal index number */
- unsigned nr; /** number of io_events */
- unsigned head;
- unsigned tail;
-
- unsigned magic;
- unsigned compat_features;
- unsigned incompat_features;
- unsigned header_length; /** size of aio_ring */
-
- struct io_event events[0];
-};
-
-#define AIO_RING_MAGIC 0xa10a10a1
-
-static int user_io_getevents(io_context_t aio_ctx, unsigned int max,
- struct io_event *events)
-{
- long i = 0;
- unsigned head;
- struct aio_ring *ring = (struct aio_ring*) aio_ctx;
-
- while (i < max) {
- head = ring->head;
-
- if (head == ring->tail) {
- /* There are no more completions */
- break;
- } else {
- /* There is another completion to reap */
- events[i] = ring->events[head];
- read_barrier();
- ring->head = (head + 1) % ring->nr;
- i++;
- }
- }
-
- return i;
-}
-
-static int fio_libaio_getevents(struct thread_data *td, unsigned int min,
- unsigned int max, const struct timespec *t)
-{
- struct libaio_data *ld = td->io_ops_data;
- struct libaio_options *o = td->eo;
- unsigned actual_min = td->o.iodepth_batch_complete_min == 0 ? 0 : min;
- struct timespec __lt, *lt = NULL;
- int r, events = 0;
-
- if (t) {
- __lt = *t;
- lt = &__lt;
- }
-
- do {
- if (o->userspace_reap == 1
- && actual_min == 0
- && ((struct aio_ring *)(ld->aio_ctx))->magic
- == AIO_RING_MAGIC) {
- r = user_io_getevents(ld->aio_ctx, max,
- ld->aio_events + events);
- } else {
- r = io_getevents(ld->aio_ctx, actual_min,
- max, ld->aio_events + events, lt);
- }
- if (r > 0)
- events += r;
- else if ((min && r == 0) || r == -EAGAIN) {
- fio_libaio_commit(td);
- usleep(100);
- } else if (r != -EINTR)
- break;
- } while (events < min);
-
- return r < 0 ? r : events;
-}
-
-static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct libaio_data *ld = td->io_ops_data;
-
- fio_ro_check(td, io_u);
-
- if (ld->queued == td->o.iodepth)
- return FIO_Q_BUSY;
-
- /*
- * fsync is tricky, since it can fail and we need to do it
- * serialized with other io. the reason is that linux doesn't
- * support aio fsync yet. So return busy for the case where we
- * have pending io, to let fio complete those first.
- */
- if (ddir_sync(io_u->ddir)) {
- if (ld->queued)
- return FIO_Q_BUSY;
-
- do_io_u_sync(td, io_u);
- return FIO_Q_COMPLETED;
- }
-
- if (io_u->ddir == DDIR_TRIM) {
- if (ld->queued)
- return FIO_Q_BUSY;
-
- do_io_u_trim(td, io_u);
- return FIO_Q_COMPLETED;
- }
-
- ld->iocbs[ld->head] = &io_u->iocb;
- ld->io_us[ld->head] = io_u;
- ring_inc(ld, &ld->head, 1);
- ld->queued++;
- return FIO_Q_QUEUED;
-}
-
-static void fio_libaio_queued(struct thread_data *td, struct io_u **io_us,
- unsigned int nr)
-{
- struct timeval now;
- unsigned int i;
-
- if (!fio_fill_issue_time(td))
- return;
-
- fio_gettime(&now, NULL);
-
- for (i = 0; i < nr; i++) {
- struct io_u *io_u = io_us[i];
-
- memcpy(&io_u->issue_time, &now, sizeof(now));
- io_u_queued(td, io_u);
- }
-}
-
-static int fio_libaio_commit(struct thread_data *td)
-{
- struct libaio_data *ld = td->io_ops_data;
- struct iocb **iocbs;
- struct io_u **io_us;
- struct timeval tv;
- int ret, wait_start = 0;
-
- if (!ld->queued)
- return 0;
-
- do {
- long nr = ld->queued;
-
- nr = min((unsigned int) nr, ld->entries - ld->tail);
- io_us = ld->io_us + ld->tail;
- iocbs = ld->iocbs + ld->tail;
-
- ret = io_submit(ld->aio_ctx, nr, iocbs);
- if (ret > 0) {
- fio_libaio_queued(td, io_us, ret);
- io_u_mark_submit(td, ret);
-
- ld->queued -= ret;
- ring_inc(ld, &ld->tail, ret);
- ret = 0;
- wait_start = 0;
- } else if (ret == -EINTR || !ret) {
- if (!ret)
- io_u_mark_submit(td, ret);
- wait_start = 0;
- continue;
- } else if (ret == -EAGAIN) {
- /*
- * If we get EAGAIN, we should break out without
- * error and let the upper layer reap some
- * events for us. If we have no queued IO, we
- * must loop here. If we loop for more than 30s,
- * just error out, something must be buggy in the
- * IO path.
- */
- if (ld->queued) {
- ret = 0;
- break;
- }
- if (!wait_start) {
- fio_gettime(&tv, NULL);
- wait_start = 1;
- } else if (mtime_since_now(&tv) > 30000) {
- log_err("fio: aio appears to be stalled, giving up\n");
- break;
- }
- usleep(1);
- continue;
- } else if (ret == -ENOMEM) {
- /*
- * If we get -ENOMEM, reap events if we can. If
- * we cannot, treat it as a fatal event since there's
- * nothing we can do about it.
- */
- if (ld->queued)
- ret = 0;
- break;
- } else
- break;
- } while (ld->queued);
-
- return ret;
-}
-
-static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u)
-{
- struct libaio_data *ld = td->io_ops_data;
-
- return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events);
-}
-
-static void fio_libaio_cleanup(struct thread_data *td)
-{
- struct libaio_data *ld = td->io_ops_data;
-
- if (ld) {
- /*
- * Work-around to avoid huge RCU stalls at exit time. If we
- * don't do this here, then it'll be torn down by exit_aio().
- * But for that case we can parallellize the freeing, thus
- * speeding it up a lot.
- */
- if (!(td->flags & TD_F_CHILD))
- io_destroy(ld->aio_ctx);
- free(ld->aio_events);
- free(ld->iocbs);
- free(ld->io_us);
- free(ld);
- }
-}
-
-static int fio_libaio_init(struct thread_data *td)
-{
- struct libaio_options *o = td->eo;
- struct libaio_data *ld;
- int err = 0;
-
- ld = calloc(1, sizeof(*ld));
-
- /*
- * First try passing in 0 for queue depth, since we don't
- * care about the user ring. If that fails, the kernel is too old
- * and we need the right depth.
- */
- if (!o->userspace_reap)
- err = io_queue_init(INT_MAX, &ld->aio_ctx);
- if (o->userspace_reap || err == -EINVAL)
- err = io_queue_init(td->o.iodepth, &ld->aio_ctx);
- if (err) {
- td_verror(td, -err, "io_queue_init");
- log_err("fio: check /proc/sys/fs/aio-max-nr\n");
- free(ld);
- return 1;
- }
-
- ld->entries = td->o.iodepth;
- ld->is_pow2 = is_power_of_2(ld->entries);
- ld->aio_events = calloc(ld->entries, sizeof(struct io_event));
- ld->iocbs = calloc(ld->entries, sizeof(struct iocb *));
- ld->io_us = calloc(ld->entries, sizeof(struct io_u *));
-
- td->io_ops_data = ld;
- return 0;
-}
-
-static struct ioengine_ops ioengine = {
- .name = "libaio",
- .version = FIO_IOOPS_VERSION,
- .init = fio_libaio_init,
- .prep = fio_libaio_prep,
- .queue = fio_libaio_queue,
- .commit = fio_libaio_commit,
- .cancel = fio_libaio_cancel,
- .getevents = fio_libaio_getevents,
- .event = fio_libaio_event,
- .cleanup = fio_libaio_cleanup,
- .open_file = generic_open_file,
- .close_file = generic_close_file,
- .get_file_size = generic_get_file_size,
- .options = options,
- .option_struct_size = sizeof(struct libaio_options),
-};
-
-static void fio_init fio_libaio_register(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_libaio_unregister(void)
-{
- unregister_ioengine(&ioengine);
-}
diff --git a/engines/libhdfs.c b/engines/libhdfs.c
deleted file mode 100644
index 96a0871d..00000000
--- a/engines/libhdfs.c
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * libhdfs engine
- *
- * this engine helps perform read/write operations on hdfs cluster using
- * libhdfs. hdfs doesnot support modification of data once file is created.
- *
- * so to mimic that create many files of small size (e.g 256k), and this
- * engine select a file based on the offset generated by fio.
- *
- * thus, random reads and writes can also be achieved with this logic.
- *
- */
-
-#include <math.h>
-#include <hdfs.h>
-
-#include "../fio.h"
-#include "../optgroup.h"
-
-#define CHUNCK_NAME_LENGTH_MAX 80
-#define CHUNCK_CREATION_BUFFER_SIZE 65536
-
-struct hdfsio_data {
- hdfsFS fs;
- hdfsFile fp;
- uint64_t curr_file_id;
-};
-
-struct hdfsio_options {
- void *pad; /* needed because offset can't be 0 for a option defined used offsetof */
- char *host;
- char *directory;
- unsigned int port;
- unsigned int chunck_size;
- unsigned int single_instance;
- unsigned int use_direct;
-};
-
-static struct fio_option options[] = {
- {
- .name = "namenode",
- .lname = "hfds namenode",
- .type = FIO_OPT_STR_STORE,
- .off1 = offsetof(struct hdfsio_options, host),
- .def = "localhost",
- .help = "Namenode of the HDFS cluster",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_HDFS,
- },
- {
- .name = "hostname",
- .lname = "hfds namenode",
- .type = FIO_OPT_STR_STORE,
- .off1 = offsetof(struct hdfsio_options, host),
- .def = "localhost",
- .help = "Namenode of the HDFS cluster",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_HDFS,
- },
- {
- .name = "port",
- .lname = "hdfs namenode port",
- .type = FIO_OPT_INT,
- .off1 = offsetof(struct hdfsio_options, port),
- .def = "9000",
- .minval = 1,
- .maxval = 65535,
- .help = "Port used by the HDFS cluster namenode",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_HDFS,
- },
- {
- .name = "hdfsdirectory",
- .lname = "hfds directory",
- .type = FIO_OPT_STR_STORE,
- .off1 = offsetof(struct hdfsio_options, directory),
- .def = "/",
- .help = "The HDFS directory where fio will create chuncks",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_HDFS,
- },
- {
- .name = "chunk_size",
- .alias = "chunck_size",
- .lname = "Chunk size",
- .type = FIO_OPT_INT,
- .off1 = offsetof(struct hdfsio_options, chunck_size),
- .def = "1048576",
- .help = "Size of individual chunck",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_HDFS,
- },
- {
- .name = "single_instance",
- .lname = "Single Instance",
- .type = FIO_OPT_BOOL,
- .off1 = offsetof(struct hdfsio_options, single_instance),
- .def = "1",
- .help = "Use a single instance",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_HDFS,
- },
- {
- .name = "hdfs_use_direct",
- .lname = "HDFS Use Direct",
- .type = FIO_OPT_BOOL,
- .off1 = offsetof(struct hdfsio_options, use_direct),
- .def = "0",
- .help = "Use readDirect instead of hdfsRead",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_HDFS,
- },
- {
- .name = NULL,
- },
-};
-
-
-static int get_chunck_name(char *dest, char *file_name, uint64_t chunk_id) {
- return snprintf(dest, CHUNCK_NAME_LENGTH_MAX, "%s_%lu", file_name, chunk_id);
-}
-
-static int fio_hdfsio_prep(struct thread_data *td, struct io_u *io_u)
-{
- struct hdfsio_options *options = td->eo;
- struct hdfsio_data *hd = td->io_ops_data;
- unsigned long f_id;
- char fname[CHUNCK_NAME_LENGTH_MAX];
- int open_flags;
-
- /* find out file id based on the offset generated by fio */
- f_id = floor(io_u->offset / options-> chunck_size);
-
- if (f_id == hd->curr_file_id) {
- /* file is already open */
- return 0;
- }
-
- if (hd->curr_file_id != -1) {
- if ( hdfsCloseFile(hd->fs, hd->fp) == -1) {
- log_err("hdfs: unable to close file: %s\n", strerror(errno));
- return errno;
- }
- hd->curr_file_id = -1;
- }
-
- if (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_SYNC) {
- open_flags = O_RDONLY;
- } else if (io_u->ddir == DDIR_WRITE) {
- open_flags = O_WRONLY;
- } else {
- log_err("hdfs: Invalid I/O Operation\n");
- return 0;
- }
-
- get_chunck_name(fname, io_u->file->file_name, f_id);
- hd->fp = hdfsOpenFile(hd->fs, fname, open_flags, 0, 0,
- options->chunck_size);
- if(hd->fp == NULL) {
- log_err("hdfs: unable to open file: %s: %d\n", fname, strerror(errno));
- return errno;
- }
- hd->curr_file_id = f_id;
-
- return 0;
-}
-
-static int fio_hdfsio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct hdfsio_data *hd = td->io_ops_data;
- struct hdfsio_options *options = td->eo;
- int ret;
- unsigned long offset;
-
- offset = io_u->offset % options->chunck_size;
-
- if( (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_WRITE) &&
- hdfsTell(hd->fs, hd->fp) != offset && hdfsSeek(hd->fs, hd->fp, offset) != 0 ) {
- log_err("hdfs: seek failed: %s, are you doing random write smaller than chunck size ?\n", strerror(errno));
- io_u->error = errno;
- return FIO_Q_COMPLETED;
- };
-
- // do the IO
- if (io_u->ddir == DDIR_READ) {
- if (options->use_direct) {
- ret = readDirect(hd->fs, hd->fp, io_u->xfer_buf, io_u->xfer_buflen);
- } else {
- ret = hdfsRead(hd->fs, hd->fp, io_u->xfer_buf, io_u->xfer_buflen);
- }
- } else if (io_u->ddir == DDIR_WRITE) {
- ret = hdfsWrite(hd->fs, hd->fp, io_u->xfer_buf,
- io_u->xfer_buflen);
- } else if (io_u->ddir == DDIR_SYNC) {
- ret = hdfsFlush(hd->fs, hd->fp);
- } else {
- log_err("hdfs: Invalid I/O Operation: %d\n", io_u->ddir);
- ret = EINVAL;
- }
-
- // Check if the IO went fine, or is incomplete
- if (ret != (int)io_u->xfer_buflen) {
- if (ret >= 0) {
- io_u->resid = io_u->xfer_buflen - ret;
- io_u->error = 0;
- return FIO_Q_COMPLETED;
- } else {
- io_u->error = errno;
- }
- }
-
- if (io_u->error)
- td_verror(td, io_u->error, "xfer");
-
- return FIO_Q_COMPLETED;
-}
-
-int fio_hdfsio_open_file(struct thread_data *td, struct fio_file *f)
-{
- if (td->o.odirect) {
- td->error = EINVAL;
- return 0;
- }
-
- return 0;
-}
-
-int fio_hdfsio_close_file(struct thread_data *td, struct fio_file *f)
-{
- struct hdfsio_data *hd = td->io_ops_data;
-
- if (hd->curr_file_id != -1) {
- if ( hdfsCloseFile(hd->fs, hd->fp) == -1) {
- log_err("hdfs: unable to close file: %s\n", strerror(errno));
- return errno;
- }
- hd->curr_file_id = -1;
- }
- return 0;
-}
-
-static int fio_hdfsio_init(struct thread_data *td)
-{
- struct hdfsio_options *options = td->eo;
- struct hdfsio_data *hd = td->io_ops_data;
- struct fio_file *f;
- uint64_t j,k;
- int i, failure = 0;
- uint8_t buffer[CHUNCK_CREATION_BUFFER_SIZE];
- uint64_t bytes_left;
- char fname[CHUNCK_NAME_LENGTH_MAX];
- hdfsFile fp;
- hdfsFileInfo *fi;
- tOffset fi_size;
-
- for_each_file(td, f, i) {
- k = 0;
- for(j=0; j < f->real_file_size; j += options->chunck_size) {
- get_chunck_name(fname, f->file_name, k++);
- fi = hdfsGetPathInfo(hd->fs, fname);
- fi_size = fi ? fi->mSize : 0;
- // fill exist and is big enough, nothing to do
- if( fi && fi_size >= options->chunck_size) {
- continue;
- }
- fp = hdfsOpenFile(hd->fs, fname, O_WRONLY, 0, 0,
- options->chunck_size);
- if(fp == NULL) {
- failure = errno;
- log_err("hdfs: unable to prepare file chunk %s: %s\n", fname, strerror(errno));
- break;
- }
- bytes_left = options->chunck_size;
- memset(buffer, 0, CHUNCK_CREATION_BUFFER_SIZE);
- while( bytes_left > CHUNCK_CREATION_BUFFER_SIZE) {
- if( hdfsWrite(hd->fs, fp, buffer, CHUNCK_CREATION_BUFFER_SIZE)
- != CHUNCK_CREATION_BUFFER_SIZE) {
- failure = errno;
- log_err("hdfs: unable to prepare file chunk %s: %s\n", fname, strerror(errno));
- break;
- };
- bytes_left -= CHUNCK_CREATION_BUFFER_SIZE;
- }
- if(bytes_left > 0) {
- if( hdfsWrite(hd->fs, fp, buffer, bytes_left)
- != bytes_left) {
- failure = errno;
- break;
- };
- }
- if( hdfsCloseFile(hd->fs, fp) != 0) {
- failure = errno;
- log_err("hdfs: unable to prepare file chunk %s: %s\n", fname, strerror(errno));
- break;
- }
- }
- if(failure) {
- break;
- }
- }
-
- if( !failure ) {
- fio_file_set_size_known(f);
- }
-
- return failure;
-}
-
-static int fio_hdfsio_setup(struct thread_data *td)
-{
- struct hdfsio_data *hd;
- struct fio_file *f;
- int i;
- uint64_t file_size, total_file_size;
-
- if (!td->io_ops_data) {
- hd = malloc(sizeof(*hd));
- memset(hd, 0, sizeof(*hd));
-
- hd->curr_file_id = -1;
-
- td->io_ops_data = hd;
- }
-
- total_file_size = 0;
- file_size = 0;
-
- for_each_file(td, f, i) {
- if(!td->o.file_size_low) {
- file_size = floor(td->o.size / td->o.nr_files);
- total_file_size += file_size;
- }
- else if (td->o.file_size_low == td->o.file_size_high)
- file_size = td->o.file_size_low;
- else {
- file_size = get_rand_file_size(td);
- }
- f->real_file_size = file_size;
- }
- /* If the size doesn't divide nicely with the chunck size,
- * make the last files bigger.
- * Used only if filesize was not explicitely given
- */
- if (!td->o.file_size_low && total_file_size < td->o.size) {
- f->real_file_size += (td->o.size - total_file_size);
- }
-
- return 0;
-}
-
-static int fio_hdfsio_io_u_init(struct thread_data *td, struct io_u *io_u)
-{
- struct hdfsio_data *hd = td->io_ops_data;
- struct hdfsio_options *options = td->eo;
- int failure;
- struct hdfsBuilder *bld;
-
- if (options->host == NULL || options->port == 0) {
- log_err("hdfs: server not defined\n");
- return EINVAL;
- }
-
- bld = hdfsNewBuilder();
- if (!bld) {
- failure = errno;
- log_err("hdfs: unable to allocate connect builder\n");
- return failure;
- }
- hdfsBuilderSetNameNode(bld, options->host);
- hdfsBuilderSetNameNodePort(bld, options->port);
- if(! options->single_instance) {
- hdfsBuilderSetForceNewInstance(bld);
- }
- hd->fs = hdfsBuilderConnect(bld);
-
- /* hdfsSetWorkingDirectory succeed on non existend directory */
- if (hdfsExists(hd->fs, options->directory) < 0 || hdfsSetWorkingDirectory(hd->fs, options->directory) < 0) {
- failure = errno;
- log_err("hdfs: invalid working directory %s: %s\n", options->directory, strerror(errno));
- return failure;
- }
-
- return 0;
-}
-
-static void fio_hdfsio_io_u_free(struct thread_data *td, struct io_u *io_u)
-{
- struct hdfsio_data *hd = td->io_ops_data;
-
- if (hd->fs && hdfsDisconnect(hd->fs) < 0) {
- log_err("hdfs: disconnect failed: %d\n", errno);
- }
-}
-
-static struct ioengine_ops ioengine_hdfs = {
- .name = "libhdfs",
- .version = FIO_IOOPS_VERSION,
- .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_NODISKUTIL,
- .setup = fio_hdfsio_setup,
- .init = fio_hdfsio_init,
- .prep = fio_hdfsio_prep,
- .queue = fio_hdfsio_queue,
- .open_file = fio_hdfsio_open_file,
- .close_file = fio_hdfsio_close_file,
- .io_u_init = fio_hdfsio_io_u_init,
- .io_u_free = fio_hdfsio_io_u_free,
- .option_struct_size = sizeof(struct hdfsio_options),
- .options = options,
-};
-
-
-static void fio_init fio_hdfsio_register(void)
-{
- register_ioengine(&ioengine_hdfs);
-}
-
-static void fio_exit fio_hdfsio_unregister(void)
-{
- unregister_ioengine(&ioengine_hdfs);
-}
diff --git a/engines/mmap.c b/engines/mmap.c
deleted file mode 100644
index bc038f4f..00000000
--- a/engines/mmap.c
+++ /dev/null
@@ -1,272 +0,0 @@
-/*
- * mmap engine
- *
- * IO engine that reads/writes from files by doing memcpy to/from
- * a memory mapped region of the file.
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sys/mman.h>
-
-#include "../fio.h"
-#include "../verify.h"
-
-/*
- * Limits us to 1GiB of mapped files in total
- */
-#define MMAP_TOTAL_SZ (1 * 1024 * 1024 * 1024UL)
-
-static unsigned long mmap_map_size;
-
-struct fio_mmap_data {
- void *mmap_ptr;
- size_t mmap_sz;
- off_t mmap_off;
-};
-
-static int fio_mmap_file(struct thread_data *td, struct fio_file *f,
- size_t length, off_t off)
-{
- struct fio_mmap_data *fmd = FILE_ENG_DATA(f);
- int flags = 0;
-
- if (td_rw(td))
- flags = PROT_READ | PROT_WRITE;
- else if (td_write(td)) {
- flags = PROT_WRITE;
-
- if (td->o.verify != VERIFY_NONE)
- flags |= PROT_READ;
- } else
- flags = PROT_READ;
-
- fmd->mmap_ptr = mmap(NULL, length, flags, MAP_SHARED, f->fd, off);
- if (fmd->mmap_ptr == MAP_FAILED) {
- fmd->mmap_ptr = NULL;
- td_verror(td, errno, "mmap");
- goto err;
- }
-
- if (!td_random(td)) {
- if (posix_madvise(fmd->mmap_ptr, length, POSIX_MADV_SEQUENTIAL) < 0) {
- td_verror(td, errno, "madvise");
- goto err;
- }
- } else {
- if (posix_madvise(fmd->mmap_ptr, length, POSIX_MADV_RANDOM) < 0) {
- td_verror(td, errno, "madvise");
- goto err;
- }
- }
- if (posix_madvise(fmd->mmap_ptr, length, POSIX_MADV_DONTNEED) < 0) {
- td_verror(td, errno, "madvise");
- goto err;
- }
-
-#ifdef FIO_MADV_FREE
- if (f->filetype == FIO_TYPE_BLOCK)
- (void) posix_madvise(fmd->mmap_ptr, fmd->mmap_sz, FIO_MADV_FREE);
-#endif
-
-err:
- if (td->error && fmd->mmap_ptr)
- munmap(fmd->mmap_ptr, length);
-
- return td->error;
-}
-
-/*
- * Just mmap an appropriate portion, we cannot mmap the full extent
- */
-static int fio_mmapio_prep_limited(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_file *f = io_u->file;
- struct fio_mmap_data *fmd = FILE_ENG_DATA(f);
-
- if (io_u->buflen > mmap_map_size) {
- log_err("fio: bs too big for mmap engine\n");
- return EIO;
- }
-
- fmd->mmap_sz = mmap_map_size;
- if (fmd->mmap_sz > f->io_size)
- fmd->mmap_sz = f->io_size;
-
- fmd->mmap_off = io_u->offset;
-
- return fio_mmap_file(td, f, fmd->mmap_sz, fmd->mmap_off);
-}
-
-/*
- * Attempt to mmap the entire file
- */
-static int fio_mmapio_prep_full(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_file *f = io_u->file;
- struct fio_mmap_data *fmd = FILE_ENG_DATA(f);
- int ret;
-
- if (fio_file_partial_mmap(f))
- return EINVAL;
- if (io_u->offset != (size_t) io_u->offset ||
- f->io_size != (size_t) f->io_size) {
- fio_file_set_partial_mmap(f);
- return EINVAL;
- }
-
- fmd->mmap_sz = f->io_size;
- fmd->mmap_off = 0;
-
- ret = fio_mmap_file(td, f, fmd->mmap_sz, fmd->mmap_off);
- if (ret)
- fio_file_set_partial_mmap(f);
-
- return ret;
-}
-
-static int fio_mmapio_prep(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_file *f = io_u->file;
- struct fio_mmap_data *fmd = FILE_ENG_DATA(f);
- int ret;
-
- /*
- * It fits within existing mapping, use it
- */
- if (io_u->offset >= fmd->mmap_off &&
- io_u->offset + io_u->buflen < fmd->mmap_off + fmd->mmap_sz)
- goto done;
-
- /*
- * unmap any existing mapping
- */
- if (fmd->mmap_ptr) {
- if (munmap(fmd->mmap_ptr, fmd->mmap_sz) < 0)
- return errno;
- fmd->mmap_ptr = NULL;
- }
-
- if (fio_mmapio_prep_full(td, io_u)) {
- td_clear_error(td);
- ret = fio_mmapio_prep_limited(td, io_u);
- if (ret)
- return ret;
- }
-
-done:
- io_u->mmap_data = fmd->mmap_ptr + io_u->offset - fmd->mmap_off -
- f->file_offset;
- return 0;
-}
-
-static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_file *f = io_u->file;
- struct fio_mmap_data *fmd = FILE_ENG_DATA(f);
-
- fio_ro_check(td, io_u);
-
- if (io_u->ddir == DDIR_READ)
- memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen);
- else if (io_u->ddir == DDIR_WRITE)
- memcpy(io_u->mmap_data, io_u->xfer_buf, io_u->xfer_buflen);
- else if (ddir_sync(io_u->ddir)) {
- if (msync(fmd->mmap_ptr, fmd->mmap_sz, MS_SYNC)) {
- io_u->error = errno;
- td_verror(td, io_u->error, "msync");
- }
- } else if (io_u->ddir == DDIR_TRIM) {
- int ret = do_io_u_trim(td, io_u);
-
- if (!ret)
- td_verror(td, io_u->error, "trim");
- }
-
-
- /*
- * not really direct, but should drop the pages from the cache
- */
- if (td->o.odirect && ddir_rw(io_u->ddir)) {
- if (msync(io_u->mmap_data, io_u->xfer_buflen, MS_SYNC) < 0) {
- io_u->error = errno;
- td_verror(td, io_u->error, "msync");
- }
- if (posix_madvise(io_u->mmap_data, io_u->xfer_buflen, POSIX_MADV_DONTNEED) < 0) {
- io_u->error = errno;
- td_verror(td, io_u->error, "madvise");
- }
- }
-
- return FIO_Q_COMPLETED;
-}
-
-static int fio_mmapio_init(struct thread_data *td)
-{
- struct thread_options *o = &td->o;
-
- if ((o->rw_min_bs & page_mask) &&
- (o->odirect || o->fsync_blocks || o->fdatasync_blocks)) {
- log_err("fio: mmap options dictate a minimum block size of "
- "%llu bytes\n", (unsigned long long) page_size);
- return 1;
- }
-
- mmap_map_size = MMAP_TOTAL_SZ / o->nr_files;
- return 0;
-}
-
-static int fio_mmapio_open_file(struct thread_data *td, struct fio_file *f)
-{
- struct fio_mmap_data *fmd;
- int ret;
-
- ret = generic_open_file(td, f);
- if (ret)
- return ret;
-
- fmd = calloc(1, sizeof(*fmd));
- if (!fmd) {
- int fio_unused __ret;
- __ret = generic_close_file(td, f);
- return 1;
- }
-
- FILE_SET_ENG_DATA(f, fmd);
- return 0;
-}
-
-static int fio_mmapio_close_file(struct thread_data *td, struct fio_file *f)
-{
- struct fio_mmap_data *fmd = FILE_ENG_DATA(f);
-
- FILE_SET_ENG_DATA(f, NULL);
- free(fmd);
- fio_file_clear_partial_mmap(f);
-
- return generic_close_file(td, f);
-}
-
-static struct ioengine_ops ioengine = {
- .name = "mmap",
- .version = FIO_IOOPS_VERSION,
- .init = fio_mmapio_init,
- .prep = fio_mmapio_prep,
- .queue = fio_mmapio_queue,
- .open_file = fio_mmapio_open_file,
- .close_file = fio_mmapio_close_file,
- .get_file_size = generic_get_file_size,
- .flags = FIO_SYNCIO | FIO_NOEXTEND,
-};
-
-static void fio_init fio_mmapio_register(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_mmapio_unregister(void)
-{
- unregister_ioengine(&ioengine);
-}
diff --git a/engines/mtd.c b/engines/mtd.c
deleted file mode 100644
index 3c22a1b1..00000000
--- a/engines/mtd.c
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * MTD engine
- *
- * IO engine that reads/writes from MTD character devices.
- *
- */
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sys/ioctl.h>
-#include <mtd/mtd-user.h>
-
-#include "../fio.h"
-#include "../verify.h"
-#include "../oslib/libmtd.h"
-
-static libmtd_t desc;
-
-struct fio_mtd_data {
- struct mtd_dev_info info;
-};
-
-static int fio_mtd_maybe_mark_bad(struct thread_data *td,
- struct fio_mtd_data *fmd,
- struct io_u *io_u, int eb)
-{
- int ret;
- if (errno == EIO) {
- ret = mtd_mark_bad(&fmd->info, io_u->file->fd, eb);
- if (ret != 0) {
- io_u->error = errno;
- td_verror(td, errno, "mtd_mark_bad");
- return -1;
- }
- }
- return 0;
-}
-
-static int fio_mtd_is_bad(struct thread_data *td,
- struct fio_mtd_data *fmd,
- struct io_u *io_u, int eb)
-{
- int ret = mtd_is_bad(&fmd->info, io_u->file->fd, eb);
- if (ret == -1) {
- io_u->error = errno;
- td_verror(td, errno, "mtd_is_bad");
- } else if (ret == 1)
- io_u->error = EIO; /* Silent failure--don't flood stderr */
- return ret;
-}
-
-static int fio_mtd_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_file *f = io_u->file;
- struct fio_mtd_data *fmd = FILE_ENG_DATA(f);
- int local_offs = 0;
- int ret;
-
- fio_ro_check(td, io_u);
-
- /*
- * Errors tend to pertain to particular erase blocks, so divide up
- * I/O to erase block size.
- * If an error is encountered, log it and keep going onto the next
- * block because the error probably just pertains to that block.
- * TODO(dehrenberg): Divide up reads and writes into page-sized
- * operations to get more fine-grained information about errors.
- */
- while (local_offs < io_u->buflen) {
- int eb = (io_u->offset + local_offs) / fmd->info.eb_size;
- int eb_offs = (io_u->offset + local_offs) % fmd->info.eb_size;
- /* The length is the smaller of the length remaining in the
- * buffer and the distance to the end of the erase block */
- int len = min((int)io_u->buflen - local_offs,
- (int)fmd->info.eb_size - eb_offs);
- char *buf = ((char *)io_u->buf) + local_offs;
-
- if (td->o.skip_bad) {
- ret = fio_mtd_is_bad(td, fmd, io_u, eb);
- if (ret == -1)
- break;
- else if (ret == 1)
- goto next;
- }
- if (io_u->ddir == DDIR_READ) {
- ret = mtd_read(&fmd->info, f->fd, eb, eb_offs, buf, len);
- if (ret != 0) {
- io_u->error = errno;
- td_verror(td, errno, "mtd_read");
- if (fio_mtd_maybe_mark_bad(td, fmd, io_u, eb))
- break;
- }
- } else if (io_u->ddir == DDIR_WRITE) {
- ret = mtd_write(desc, &fmd->info, f->fd, eb,
- eb_offs, buf, len, NULL, 0, 0);
- if (ret != 0) {
- io_u->error = errno;
- td_verror(td, errno, "mtd_write");
- if (fio_mtd_maybe_mark_bad(td, fmd, io_u, eb))
- break;
- }
- } else if (io_u->ddir == DDIR_TRIM) {
- if (eb_offs != 0 || len != fmd->info.eb_size) {
- io_u->error = EINVAL;
- td_verror(td, EINVAL,
- "trim on MTD must be erase block-aligned");
- }
- ret = mtd_erase(desc, &fmd->info, f->fd, eb);
- if (ret != 0) {
- io_u->error = errno;
- td_verror(td, errno, "mtd_erase");
- if (fio_mtd_maybe_mark_bad(td, fmd, io_u, eb))
- break;
- }
- } else {
- io_u->error = ENOTSUP;
- td_verror(td, io_u->error, "operation not supported on mtd");
- }
-
-next:
- local_offs += len;
- }
-
- return FIO_Q_COMPLETED;
-}
-
-static int fio_mtd_open_file(struct thread_data *td, struct fio_file *f)
-{
- struct fio_mtd_data *fmd;
- int ret;
-
- ret = generic_open_file(td, f);
- if (ret)
- return ret;
-
- fmd = calloc(1, sizeof(*fmd));
- if (!fmd)
- goto err_close;
-
- ret = mtd_get_dev_info(desc, f->file_name, &fmd->info);
- if (ret != 0) {
- td_verror(td, errno, "mtd_get_dev_info");
- goto err_free;
- }
-
- FILE_SET_ENG_DATA(f, fmd);
- return 0;
-
-err_free:
- free(fmd);
-err_close:
- {
- int fio_unused __ret;
- __ret = generic_close_file(td, f);
- return 1;
- }
-}
-
-static int fio_mtd_close_file(struct thread_data *td, struct fio_file *f)
-{
- struct fio_mtd_data *fmd = FILE_ENG_DATA(f);
-
- FILE_SET_ENG_DATA(f, NULL);
- free(fmd);
-
- return generic_close_file(td, f);
-}
-
-static int fio_mtd_get_file_size(struct thread_data *td, struct fio_file *f)
-{
- struct mtd_dev_info info;
-
- int ret = mtd_get_dev_info(desc, f->file_name, &info);
- if (ret != 0) {
- td_verror(td, errno, "mtd_get_dev_info");
- return errno;
- }
- f->real_file_size = info.size;
-
- return 0;
-}
-
-static struct ioengine_ops ioengine = {
- .name = "mtd",
- .version = FIO_IOOPS_VERSION,
- .queue = fio_mtd_queue,
- .open_file = fio_mtd_open_file,
- .close_file = fio_mtd_close_file,
- .get_file_size = fio_mtd_get_file_size,
- .flags = FIO_SYNCIO | FIO_NOEXTEND,
-};
-
-static void fio_init fio_mtd_register(void)
-{
- desc = libmtd_open();
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_mtd_unregister(void)
-{
- unregister_ioengine(&ioengine);
- libmtd_close(desc);
- desc = NULL;
-}
-
-
-
diff --git a/engines/net.c b/engines/net.c
deleted file mode 100644
index 37d44fd8..00000000
--- a/engines/net.c
+++ /dev/null
@@ -1,1468 +0,0 @@
-/*
- * net engine
- *
- * IO engine that reads/writes to/from sockets.
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <signal.h>
-#include <errno.h>
-#include <assert.h>
-#include <netinet/in.h>
-#include <netinet/tcp.h>
-#include <arpa/inet.h>
-#include <netdb.h>
-#include <sys/poll.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-
-#include "../fio.h"
-#include "../verify.h"
-#include "../optgroup.h"
-
-struct netio_data {
- int listenfd;
- int use_splice;
- int seq_off;
- int pipes[2];
- struct sockaddr_in addr;
- struct sockaddr_in6 addr6;
- struct sockaddr_un addr_un;
- uint64_t udp_send_seq;
- uint64_t udp_recv_seq;
-};
-
-struct netio_options {
- struct thread_data *td;
- unsigned int port;
- unsigned int proto;
- unsigned int listen;
- unsigned int pingpong;
- unsigned int nodelay;
- unsigned int ttl;
- unsigned int window_size;
- unsigned int mss;
- char *intfc;
-};
-
-struct udp_close_msg {
- uint32_t magic;
- uint32_t cmd;
-};
-
-struct udp_seq {
- uint64_t magic;
- uint64_t seq;
- uint64_t bs;
-};
-
-enum {
- FIO_LINK_CLOSE = 0x89,
- FIO_LINK_OPEN_CLOSE_MAGIC = 0x6c696e6b,
- FIO_LINK_OPEN = 0x98,
- FIO_UDP_SEQ_MAGIC = 0x657375716e556563ULL,
-
- FIO_TYPE_TCP = 1,
- FIO_TYPE_UDP = 2,
- FIO_TYPE_UNIX = 3,
- FIO_TYPE_TCP_V6 = 4,
- FIO_TYPE_UDP_V6 = 5,
-};
-
-static int str_hostname_cb(void *data, const char *input);
-static struct fio_option options[] = {
- {
- .name = "hostname",
- .lname = "net engine hostname",
- .type = FIO_OPT_STR_STORE,
- .cb = str_hostname_cb,
- .help = "Hostname for net IO engine",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_NETIO,
- },
- {
- .name = "port",
- .lname = "net engine port",
- .type = FIO_OPT_INT,
- .off1 = offsetof(struct netio_options, port),
- .minval = 1,
- .maxval = 65535,
- .help = "Port to use for TCP or UDP net connections",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_NETIO,
- },
- {
- .name = "protocol",
- .lname = "net engine protocol",
- .alias = "proto",
- .type = FIO_OPT_STR,
- .off1 = offsetof(struct netio_options, proto),
- .help = "Network protocol to use",
- .def = "tcp",
- .posval = {
- { .ival = "tcp",
- .oval = FIO_TYPE_TCP,
- .help = "Transmission Control Protocol",
- },
-#ifdef CONFIG_IPV6
- { .ival = "tcpv6",
- .oval = FIO_TYPE_TCP_V6,
- .help = "Transmission Control Protocol V6",
- },
-#endif
- { .ival = "udp",
- .oval = FIO_TYPE_UDP,
- .help = "User Datagram Protocol",
- },
-#ifdef CONFIG_IPV6
- { .ival = "udpv6",
- .oval = FIO_TYPE_UDP_V6,
- .help = "User Datagram Protocol V6",
- },
-#endif
- { .ival = "unix",
- .oval = FIO_TYPE_UNIX,
- .help = "UNIX domain socket",
- },
- },
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_NETIO,
- },
-#ifdef CONFIG_TCP_NODELAY
- {
- .name = "nodelay",
- .lname = "No Delay",
- .type = FIO_OPT_BOOL,
- .off1 = offsetof(struct netio_options, nodelay),
- .help = "Use TCP_NODELAY on TCP connections",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_NETIO,
- },
-#endif
- {
- .name = "listen",
- .lname = "net engine listen",
- .type = FIO_OPT_STR_SET,
- .off1 = offsetof(struct netio_options, listen),
- .help = "Listen for incoming TCP connections",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_NETIO,
- },
- {
- .name = "pingpong",
- .lname = "Ping Pong",
- .type = FIO_OPT_STR_SET,
- .off1 = offsetof(struct netio_options, pingpong),
- .help = "Ping-pong IO requests",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_NETIO,
- },
- {
- .name = "interface",
- .lname = "net engine interface",
- .type = FIO_OPT_STR_STORE,
- .off1 = offsetof(struct netio_options, intfc),
- .help = "Network interface to use",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_NETIO,
- },
- {
- .name = "ttl",
- .lname = "net engine multicast ttl",
- .type = FIO_OPT_INT,
- .off1 = offsetof(struct netio_options, ttl),
- .def = "1",
- .minval = 0,
- .help = "Time-to-live value for outgoing UDP multicast packets",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_NETIO,
- },
-#ifdef CONFIG_NET_WINDOWSIZE
- {
- .name = "window_size",
- .lname = "Window Size",
- .type = FIO_OPT_INT,
- .off1 = offsetof(struct netio_options, window_size),
- .minval = 0,
- .help = "Set socket buffer window size",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_NETIO,
- },
-#endif
-#ifdef CONFIG_NET_MSS
- {
- .name = "mss",
- .lname = "Maximum segment size",
- .type = FIO_OPT_INT,
- .off1 = offsetof(struct netio_options, mss),
- .minval = 0,
- .help = "Set TCP maximum segment size",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_NETIO,
- },
-#endif
- {
- .name = NULL,
- },
-};
-
-static inline int is_udp(struct netio_options *o)
-{
- return o->proto == FIO_TYPE_UDP || o->proto == FIO_TYPE_UDP_V6;
-}
-
-static inline int is_tcp(struct netio_options *o)
-{
- return o->proto == FIO_TYPE_TCP || o->proto == FIO_TYPE_TCP_V6;
-}
-
-static inline int is_ipv6(struct netio_options *o)
-{
- return o->proto == FIO_TYPE_UDP_V6 || o->proto == FIO_TYPE_TCP_V6;
-}
-
-static int set_window_size(struct thread_data *td, int fd)
-{
-#ifdef CONFIG_NET_WINDOWSIZE
- struct netio_options *o = td->eo;
- unsigned int wss;
- int snd, rcv, ret;
-
- if (!o->window_size)
- return 0;
-
- rcv = o->listen || o->pingpong;
- snd = !o->listen || o->pingpong;
- wss = o->window_size;
- ret = 0;
-
- if (rcv) {
- ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, (void *) &wss,
- sizeof(wss));
- if (ret < 0)
- td_verror(td, errno, "rcvbuf window size");
- }
- if (snd && !ret) {
- ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, (void *) &wss,
- sizeof(wss));
- if (ret < 0)
- td_verror(td, errno, "sndbuf window size");
- }
-
- return ret;
-#else
- td_verror(td, -EINVAL, "setsockopt window size");
- return -1;
-#endif
-}
-
-static int set_mss(struct thread_data *td, int fd)
-{
-#ifdef CONFIG_NET_MSS
- struct netio_options *o = td->eo;
- unsigned int mss;
- int ret;
-
- if (!o->mss || !is_tcp(o))
- return 0;
-
- mss = o->mss;
- ret = setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, (void *) &mss,
- sizeof(mss));
- if (ret < 0)
- td_verror(td, errno, "setsockopt TCP_MAXSEG");
-
- return ret;
-#else
- td_verror(td, -EINVAL, "setsockopt TCP_MAXSEG");
- return -1;
-#endif
-}
-
-
-/*
- * Return -1 for error and 'nr events' for a positive number
- * of events
- */
-static int poll_wait(struct thread_data *td, int fd, short events)
-{
- struct pollfd pfd;
- int ret;
-
- while (!td->terminate) {
- pfd.fd = fd;
- pfd.events = events;
- ret = poll(&pfd, 1, -1);
- if (ret < 0) {
- if (errno == EINTR)
- break;
-
- td_verror(td, errno, "poll");
- return -1;
- } else if (!ret)
- continue;
-
- break;
- }
-
- if (pfd.revents & events)
- return 1;
-
- return -1;
-}
-
-static int fio_netio_is_multicast(const char *mcaddr)
-{
- in_addr_t addr = inet_network(mcaddr);
- if (addr == -1)
- return 0;
-
- if (inet_network("224.0.0.0") <= addr &&
- inet_network("239.255.255.255") >= addr)
- return 1;
-
- return 0;
-}
-
-
-static int fio_netio_prep(struct thread_data *td, struct io_u *io_u)
-{
- struct netio_options *o = td->eo;
-
- /*
- * Make sure we don't see spurious reads to a receiver, and vice versa
- */
- if (is_tcp(o))
- return 0;
-
- if ((o->listen && io_u->ddir == DDIR_WRITE) ||
- (!o->listen && io_u->ddir == DDIR_READ)) {
- td_verror(td, EINVAL, "bad direction");
- return 1;
- }
-
- return 0;
-}
-
-#ifdef CONFIG_LINUX_SPLICE
-static int splice_io_u(int fdin, int fdout, unsigned int len)
-{
- int bytes = 0;
-
- while (len) {
- int ret = splice(fdin, NULL, fdout, NULL, len, 0);
-
- if (ret < 0) {
- if (!bytes)
- bytes = ret;
-
- break;
- } else if (!ret)
- break;
-
- bytes += ret;
- len -= ret;
- }
-
- return bytes;
-}
-
-/*
- * Receive bytes from a socket and fill them into the internal pipe
- */
-static int splice_in(struct thread_data *td, struct io_u *io_u)
-{
- struct netio_data *nd = td->io_ops_data;
-
- return splice_io_u(io_u->file->fd, nd->pipes[1], io_u->xfer_buflen);
-}
-
-/*
- * Transmit 'len' bytes from the internal pipe
- */
-static int splice_out(struct thread_data *td, struct io_u *io_u,
- unsigned int len)
-{
- struct netio_data *nd = td->io_ops_data;
-
- return splice_io_u(nd->pipes[0], io_u->file->fd, len);
-}
-
-static int vmsplice_io_u(struct io_u *io_u, int fd, unsigned int len)
-{
- struct iovec iov = {
- .iov_base = io_u->xfer_buf,
- .iov_len = len,
- };
- int bytes = 0;
-
- while (iov.iov_len) {
- int ret = vmsplice(fd, &iov, 1, SPLICE_F_MOVE);
-
- if (ret < 0) {
- if (!bytes)
- bytes = ret;
- break;
- } else if (!ret)
- break;
-
- iov.iov_len -= ret;
- iov.iov_base += ret;
- bytes += ret;
- }
-
- return bytes;
-
-}
-
-/*
- * vmsplice() pipe to io_u buffer
- */
-static int vmsplice_io_u_out(struct thread_data *td, struct io_u *io_u,
- unsigned int len)
-{
- struct netio_data *nd = td->io_ops_data;
-
- return vmsplice_io_u(io_u, nd->pipes[0], len);
-}
-
-/*
- * vmsplice() io_u to pipe
- */
-static int vmsplice_io_u_in(struct thread_data *td, struct io_u *io_u)
-{
- struct netio_data *nd = td->io_ops_data;
-
- return vmsplice_io_u(io_u, nd->pipes[1], io_u->xfer_buflen);
-}
-
-/*
- * splice receive - transfer socket data into a pipe using splice, then map
- * that pipe data into the io_u using vmsplice.
- */
-static int fio_netio_splice_in(struct thread_data *td, struct io_u *io_u)
-{
- int ret;
-
- ret = splice_in(td, io_u);
- if (ret > 0)
- return vmsplice_io_u_out(td, io_u, ret);
-
- return ret;
-}
-
-/*
- * splice transmit - map data from the io_u into a pipe by using vmsplice,
- * then transfer that pipe to a socket using splice.
- */
-static int fio_netio_splice_out(struct thread_data *td, struct io_u *io_u)
-{
- int ret;
-
- ret = vmsplice_io_u_in(td, io_u);
- if (ret > 0)
- return splice_out(td, io_u, ret);
-
- return ret;
-}
-#else
-static int fio_netio_splice_in(struct thread_data *td, struct io_u *io_u)
-{
- errno = EOPNOTSUPP;
- return -1;
-}
-
-static int fio_netio_splice_out(struct thread_data *td, struct io_u *io_u)
-{
- errno = EOPNOTSUPP;
- return -1;
-}
-#endif
-
-static void store_udp_seq(struct netio_data *nd, struct io_u *io_u)
-{
- struct udp_seq *us;
-
- if (io_u->xfer_buflen < sizeof(*us))
- return;
-
- us = io_u->xfer_buf + io_u->xfer_buflen - sizeof(*us);
- us->magic = cpu_to_le64((uint64_t) FIO_UDP_SEQ_MAGIC);
- us->bs = cpu_to_le64((uint64_t) io_u->xfer_buflen);
- us->seq = cpu_to_le64(nd->udp_send_seq++);
-}
-
-static void verify_udp_seq(struct thread_data *td, struct netio_data *nd,
- struct io_u *io_u)
-{
- struct udp_seq *us;
- uint64_t seq;
-
- if (io_u->xfer_buflen < sizeof(*us))
- return;
-
- if (nd->seq_off)
- return;
-
- us = io_u->xfer_buf + io_u->xfer_buflen - sizeof(*us);
- if (le64_to_cpu(us->magic) != FIO_UDP_SEQ_MAGIC)
- return;
- if (le64_to_cpu(us->bs) != io_u->xfer_buflen) {
- nd->seq_off = 1;
- return;
- }
-
- seq = le64_to_cpu(us->seq);
-
- if (seq != nd->udp_recv_seq)
- td->ts.drop_io_u[io_u->ddir] += seq - nd->udp_recv_seq;
-
- nd->udp_recv_seq = seq + 1;
-}
-
-static int fio_netio_send(struct thread_data *td, struct io_u *io_u)
-{
- struct netio_data *nd = td->io_ops_data;
- struct netio_options *o = td->eo;
- int ret, flags = 0;
-
- do {
- if (is_udp(o)) {
- const struct sockaddr *to;
- socklen_t len;
-
- if (is_ipv6(o)) {
- to = (struct sockaddr *) &nd->addr6;
- len = sizeof(nd->addr6);
- } else {
- to = (struct sockaddr *) &nd->addr;
- len = sizeof(nd->addr);
- }
-
- if (td->o.verify == VERIFY_NONE)
- store_udp_seq(nd, io_u);
-
- ret = sendto(io_u->file->fd, io_u->xfer_buf,
- io_u->xfer_buflen, flags, to, len);
- } else {
- /*
- * if we are going to write more, set MSG_MORE
- */
-#ifdef MSG_MORE
- if ((td->this_io_bytes[DDIR_WRITE] + io_u->xfer_buflen <
- td->o.size) && !o->pingpong)
- flags |= MSG_MORE;
-#endif
- ret = send(io_u->file->fd, io_u->xfer_buf,
- io_u->xfer_buflen, flags);
- }
- if (ret > 0)
- break;
-
- ret = poll_wait(td, io_u->file->fd, POLLOUT);
- if (ret <= 0)
- break;
- } while (1);
-
- return ret;
-}
-
-static int is_close_msg(struct io_u *io_u, int len)
-{
- struct udp_close_msg *msg;
-
- if (len != sizeof(struct udp_close_msg))
- return 0;
-
- msg = io_u->xfer_buf;
- if (le32_to_cpu(msg->magic) != FIO_LINK_OPEN_CLOSE_MAGIC)
- return 0;
- if (le32_to_cpu(msg->cmd) != FIO_LINK_CLOSE)
- return 0;
-
- return 1;
-}
-
-static int fio_netio_recv(struct thread_data *td, struct io_u *io_u)
-{
- struct netio_data *nd = td->io_ops_data;
- struct netio_options *o = td->eo;
- int ret, flags = 0;
-
- do {
- if (is_udp(o)) {
- struct sockaddr *from;
- socklen_t l, *len = &l;
-
- if (o->listen) {
- if (!is_ipv6(o)) {
- from = (struct sockaddr *) &nd->addr;
- *len = sizeof(nd->addr);
- } else {
- from = (struct sockaddr *) &nd->addr6;
- *len = sizeof(nd->addr6);
- }
- } else {
- from = NULL;
- len = NULL;
- }
-
- ret = recvfrom(io_u->file->fd, io_u->xfer_buf,
- io_u->xfer_buflen, flags, from, len);
-
- if (is_close_msg(io_u, ret)) {
- td->done = 1;
- return 0;
- }
- } else {
- ret = recv(io_u->file->fd, io_u->xfer_buf,
- io_u->xfer_buflen, flags);
-
- if (is_close_msg(io_u, ret)) {
- td->done = 1;
- return 0;
- }
- }
- if (ret > 0)
- break;
- else if (!ret && (flags & MSG_WAITALL))
- break;
-
- ret = poll_wait(td, io_u->file->fd, POLLIN);
- if (ret <= 0)
- break;
- flags |= MSG_WAITALL;
- } while (1);
-
- if (is_udp(o) && td->o.verify == VERIFY_NONE)
- verify_udp_seq(td, nd, io_u);
-
- return ret;
-}
-
-static int __fio_netio_queue(struct thread_data *td, struct io_u *io_u,
- enum fio_ddir ddir)
-{
- struct netio_data *nd = td->io_ops_data;
- struct netio_options *o = td->eo;
- int ret;
-
- if (ddir == DDIR_WRITE) {
- if (!nd->use_splice || is_udp(o) ||
- o->proto == FIO_TYPE_UNIX)
- ret = fio_netio_send(td, io_u);
- else
- ret = fio_netio_splice_out(td, io_u);
- } else if (ddir == DDIR_READ) {
- if (!nd->use_splice || is_udp(o) ||
- o->proto == FIO_TYPE_UNIX)
- ret = fio_netio_recv(td, io_u);
- else
- ret = fio_netio_splice_in(td, io_u);
- } else
- ret = 0; /* must be a SYNC */
-
- if (ret != (int) io_u->xfer_buflen) {
- if (ret > 0) {
- io_u->resid = io_u->xfer_buflen - ret;
- io_u->error = 0;
- return FIO_Q_COMPLETED;
- } else if (!ret)
- return FIO_Q_BUSY;
- else {
- int err = errno;
-
- if (ddir == DDIR_WRITE && err == EMSGSIZE)
- return FIO_Q_BUSY;
-
- io_u->error = err;
- }
- }
-
- if (io_u->error)
- td_verror(td, io_u->error, "xfer");
-
- return FIO_Q_COMPLETED;
-}
-
-static int fio_netio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct netio_options *o = td->eo;
- int ret;
-
- fio_ro_check(td, io_u);
-
- ret = __fio_netio_queue(td, io_u, io_u->ddir);
- if (!o->pingpong || ret != FIO_Q_COMPLETED)
- return ret;
-
- /*
- * For ping-pong mode, receive or send reply as needed
- */
- if (td_read(td) && io_u->ddir == DDIR_READ)
- ret = __fio_netio_queue(td, io_u, DDIR_WRITE);
- else if (td_write(td) && io_u->ddir == DDIR_WRITE)
- ret = __fio_netio_queue(td, io_u, DDIR_READ);
-
- return ret;
-}
-
-static int fio_netio_connect(struct thread_data *td, struct fio_file *f)
-{
- struct netio_data *nd = td->io_ops_data;
- struct netio_options *o = td->eo;
- int type, domain;
-
- if (o->proto == FIO_TYPE_TCP) {
- domain = AF_INET;
- type = SOCK_STREAM;
- } else if (o->proto == FIO_TYPE_TCP_V6) {
- domain = AF_INET6;
- type = SOCK_STREAM;
- } else if (o->proto == FIO_TYPE_UDP) {
- domain = AF_INET;
- type = SOCK_DGRAM;
- } else if (o->proto == FIO_TYPE_UDP_V6) {
- domain = AF_INET6;
- type = SOCK_DGRAM;
- } else if (o->proto == FIO_TYPE_UNIX) {
- domain = AF_UNIX;
- type = SOCK_STREAM;
- } else {
- log_err("fio: bad network type %d\n", o->proto);
- f->fd = -1;
- return 1;
- }
-
- f->fd = socket(domain, type, 0);
- if (f->fd < 0) {
- td_verror(td, errno, "socket");
- return 1;
- }
-
-#ifdef CONFIG_TCP_NODELAY
- if (o->nodelay && is_tcp(o)) {
- int optval = 1;
-
- if (setsockopt(f->fd, IPPROTO_TCP, TCP_NODELAY, (void *) &optval, sizeof(int)) < 0) {
- log_err("fio: cannot set TCP_NODELAY option on socket (%s), disable with 'nodelay=0'\n", strerror(errno));
- return 1;
- }
- }
-#endif
-
- if (set_window_size(td, f->fd)) {
- close(f->fd);
- return 1;
- }
- if (set_mss(td, f->fd)) {
- close(f->fd);
- return 1;
- }
-
- if (is_udp(o)) {
- if (!fio_netio_is_multicast(td->o.filename))
- return 0;
- if (is_ipv6(o)) {
- log_err("fio: multicast not supported on IPv6\n");
- close(f->fd);
- return 1;
- }
-
- if (o->intfc) {
- struct in_addr interface_addr;
-
- if (inet_aton(o->intfc, &interface_addr) == 0) {
- log_err("fio: interface not valid interface IP\n");
- close(f->fd);
- return 1;
- }
- if (setsockopt(f->fd, IPPROTO_IP, IP_MULTICAST_IF, (const char*)&interface_addr, sizeof(interface_addr)) < 0) {
- td_verror(td, errno, "setsockopt IP_MULTICAST_IF");
- close(f->fd);
- return 1;
- }
- }
- if (setsockopt(f->fd, IPPROTO_IP, IP_MULTICAST_TTL, (const char*)&o->ttl, sizeof(o->ttl)) < 0) {
- td_verror(td, errno, "setsockopt IP_MULTICAST_TTL");
- close(f->fd);
- return 1;
- }
- return 0;
- } else if (o->proto == FIO_TYPE_TCP) {
- socklen_t len = sizeof(nd->addr);
-
- if (connect(f->fd, (struct sockaddr *) &nd->addr, len) < 0) {
- td_verror(td, errno, "connect");
- close(f->fd);
- return 1;
- }
- } else if (o->proto == FIO_TYPE_TCP_V6) {
- socklen_t len = sizeof(nd->addr6);
-
- if (connect(f->fd, (struct sockaddr *) &nd->addr6, len) < 0) {
- td_verror(td, errno, "connect");
- close(f->fd);
- return 1;
- }
-
- } else {
- struct sockaddr_un *addr = &nd->addr_un;
- socklen_t len;
-
- len = sizeof(addr->sun_family) + strlen(addr->sun_path) + 1;
-
- if (connect(f->fd, (struct sockaddr *) addr, len) < 0) {
- td_verror(td, errno, "connect");
- close(f->fd);
- return 1;
- }
- }
-
- return 0;
-}
-
-static int fio_netio_accept(struct thread_data *td, struct fio_file *f)
-{
- struct netio_data *nd = td->io_ops_data;
- struct netio_options *o = td->eo;
- socklen_t socklen;
- int state;
-
- if (is_udp(o)) {
- f->fd = nd->listenfd;
- return 0;
- }
-
- state = td->runstate;
- td_set_runstate(td, TD_SETTING_UP);
-
- log_info("fio: waiting for connection\n");
-
- if (poll_wait(td, nd->listenfd, POLLIN) < 0)
- goto err;
-
- if (o->proto == FIO_TYPE_TCP) {
- socklen = sizeof(nd->addr);
- f->fd = accept(nd->listenfd, (struct sockaddr *) &nd->addr, &socklen);
- } else {
- socklen = sizeof(nd->addr6);
- f->fd = accept(nd->listenfd, (struct sockaddr *) &nd->addr6, &socklen);
- }
-
- if (f->fd < 0) {
- td_verror(td, errno, "accept");
- goto err;
- }
-
-#ifdef CONFIG_TCP_NODELAY
- if (o->nodelay && is_tcp(o)) {
- int optval = 1;
-
- if (setsockopt(f->fd, IPPROTO_TCP, TCP_NODELAY, (void *) &optval, sizeof(int)) < 0) {
- log_err("fio: cannot set TCP_NODELAY option on socket (%s), disable with 'nodelay=0'\n", strerror(errno));
- return 1;
- }
- }
-#endif
-
- reset_all_stats(td);
- td_set_runstate(td, state);
- return 0;
-err:
- td_set_runstate(td, state);
- return 1;
-}
-
-static void fio_netio_send_close(struct thread_data *td, struct fio_file *f)
-{
- struct netio_data *nd = td->io_ops_data;
- struct netio_options *o = td->eo;
- struct udp_close_msg msg;
- struct sockaddr *to;
- socklen_t len;
- int ret;
-
- if (is_ipv6(o)) {
- to = (struct sockaddr *) &nd->addr6;
- len = sizeof(nd->addr6);
- } else {
- to = (struct sockaddr *) &nd->addr;
- len = sizeof(nd->addr);
- }
-
- msg.magic = cpu_to_le32((uint32_t) FIO_LINK_OPEN_CLOSE_MAGIC);
- msg.cmd = cpu_to_le32((uint32_t) FIO_LINK_CLOSE);
-
- ret = sendto(f->fd, (void *) &msg, sizeof(msg), MSG_WAITALL, to, len);
- if (ret < 0)
- td_verror(td, errno, "sendto udp link close");
-}
-
-static int fio_netio_close_file(struct thread_data *td, struct fio_file *f)
-{
- /*
- * Notify the receiver that we are closing down the link
- */
- fio_netio_send_close(td, f);
-
- return generic_close_file(td, f);
-}
-
-static int fio_netio_udp_recv_open(struct thread_data *td, struct fio_file *f)
-{
- struct netio_data *nd = td->io_ops_data;
- struct netio_options *o = td->eo;
- struct udp_close_msg msg;
- struct sockaddr *to;
- socklen_t len;
- int ret;
-
- if (is_ipv6(o)) {
- len = sizeof(nd->addr6);
- to = (struct sockaddr *) &nd->addr6;
- } else {
- len = sizeof(nd->addr);
- to = (struct sockaddr *) &nd->addr;
- }
-
- ret = recvfrom(f->fd, (void *) &msg, sizeof(msg), MSG_WAITALL, to, &len);
- if (ret < 0) {
- td_verror(td, errno, "recvfrom udp link open");
- return ret;
- }
-
- if (ntohl(msg.magic) != FIO_LINK_OPEN_CLOSE_MAGIC ||
- ntohl(msg.cmd) != FIO_LINK_OPEN) {
- log_err("fio: bad udp open magic %x/%x\n", ntohl(msg.magic),
- ntohl(msg.cmd));
- return -1;
- }
-
- fio_gettime(&td->start, NULL);
- return 0;
-}
-
-static int fio_netio_send_open(struct thread_data *td, struct fio_file *f)
-{
- struct netio_data *nd = td->io_ops_data;
- struct netio_options *o = td->eo;
- struct udp_close_msg msg;
- struct sockaddr *to;
- socklen_t len;
- int ret;
-
- if (is_ipv6(o)) {
- len = sizeof(nd->addr6);
- to = (struct sockaddr *) &nd->addr6;
- } else {
- len = sizeof(nd->addr);
- to = (struct sockaddr *) &nd->addr;
- }
-
- msg.magic = htonl(FIO_LINK_OPEN_CLOSE_MAGIC);
- msg.cmd = htonl(FIO_LINK_OPEN);
-
- ret = sendto(f->fd, (void *) &msg, sizeof(msg), MSG_WAITALL, to, len);
- if (ret < 0) {
- td_verror(td, errno, "sendto udp link open");
- return ret;
- }
-
- return 0;
-}
-
-static int fio_netio_open_file(struct thread_data *td, struct fio_file *f)
-{
- int ret;
- struct netio_options *o = td->eo;
-
- if (o->listen)
- ret = fio_netio_accept(td, f);
- else
- ret = fio_netio_connect(td, f);
-
- if (ret) {
- f->fd = -1;
- return ret;
- }
-
- if (is_udp(o)) {
- if (td_write(td))
- ret = fio_netio_send_open(td, f);
- else {
- int state;
-
- state = td->runstate;
- td_set_runstate(td, TD_SETTING_UP);
- ret = fio_netio_udp_recv_open(td, f);
- td_set_runstate(td, state);
- }
- }
-
- if (ret)
- fio_netio_close_file(td, f);
-
- return ret;
-}
-
-static int fio_fill_addr(struct thread_data *td, const char *host, int af,
- void *dst, struct addrinfo **res)
-{
- struct netio_options *o = td->eo;
- struct addrinfo hints;
- int ret;
-
- if (inet_pton(af, host, dst))
- return 0;
-
- memset(&hints, 0, sizeof(hints));
-
- if (is_tcp(o))
- hints.ai_socktype = SOCK_STREAM;
- else
- hints.ai_socktype = SOCK_DGRAM;
-
- if (is_ipv6(o))
- hints.ai_family = AF_INET6;
- else
- hints.ai_family = AF_INET;
-
- ret = getaddrinfo(host, NULL, &hints, res);
- if (ret) {
- int e = EINVAL;
- char str[128];
-
- if (ret == EAI_SYSTEM)
- e = errno;
-
- snprintf(str, sizeof(str), "getaddrinfo: %s", gai_strerror(ret));
- td_verror(td, e, str);
- return 1;
- }
-
- return 0;
-}
-
-static int fio_netio_setup_connect_inet(struct thread_data *td,
- const char *host, unsigned short port)
-{
- struct netio_data *nd = td->io_ops_data;
- struct netio_options *o = td->eo;
- struct addrinfo *res = NULL;
- void *dst, *src;
- int af, len;
-
- if (!host) {
- log_err("fio: connect with no host to connect to.\n");
- if (td_read(td))
- log_err("fio: did you forget to set 'listen'?\n");
-
- td_verror(td, EINVAL, "no hostname= set");
- return 1;
- }
-
- nd->addr.sin_family = AF_INET;
- nd->addr.sin_port = htons(port);
- nd->addr6.sin6_family = AF_INET6;
- nd->addr6.sin6_port = htons(port);
-
- if (is_ipv6(o)) {
- af = AF_INET6;
- dst = &nd->addr6.sin6_addr;
- } else {
- af = AF_INET;
- dst = &nd->addr.sin_addr;
- }
-
- if (fio_fill_addr(td, host, af, dst, &res))
- return 1;
-
- if (!res)
- return 0;
-
- if (is_ipv6(o)) {
- len = sizeof(nd->addr6.sin6_addr);
- src = &((struct sockaddr_in6 *) res->ai_addr)->sin6_addr;
- } else {
- len = sizeof(nd->addr.sin_addr);
- src = &((struct sockaddr_in *) res->ai_addr)->sin_addr;
- }
-
- memcpy(dst, src, len);
- freeaddrinfo(res);
- return 0;
-}
-
-static int fio_netio_setup_connect_unix(struct thread_data *td,
- const char *path)
-{
- struct netio_data *nd = td->io_ops_data;
- struct sockaddr_un *soun = &nd->addr_un;
-
- soun->sun_family = AF_UNIX;
- memset(soun->sun_path, 0, sizeof(soun->sun_path));
- strncpy(soun->sun_path, path, sizeof(soun->sun_path) - 1);
- return 0;
-}
-
-static int fio_netio_setup_connect(struct thread_data *td)
-{
- struct netio_options *o = td->eo;
-
- if (is_udp(o) || is_tcp(o))
- return fio_netio_setup_connect_inet(td, td->o.filename,o->port);
- else
- return fio_netio_setup_connect_unix(td, td->o.filename);
-}
-
-static int fio_netio_setup_listen_unix(struct thread_data *td, const char *path)
-{
- struct netio_data *nd = td->io_ops_data;
- struct sockaddr_un *addr = &nd->addr_un;
- mode_t mode;
- int len, fd;
-
- fd = socket(AF_UNIX, SOCK_STREAM, 0);
- if (fd < 0) {
- log_err("fio: socket: %s\n", strerror(errno));
- return -1;
- }
-
- mode = umask(000);
-
- memset(addr, 0, sizeof(*addr));
- addr->sun_family = AF_UNIX;
- strncpy(addr->sun_path, path, sizeof(addr->sun_path) - 1);
- unlink(path);
-
- len = sizeof(addr->sun_family) + strlen(path) + 1;
-
- if (bind(fd, (struct sockaddr *) addr, len) < 0) {
- log_err("fio: bind: %s\n", strerror(errno));
- close(fd);
- return -1;
- }
-
- umask(mode);
- nd->listenfd = fd;
- return 0;
-}
-
-static int fio_netio_setup_listen_inet(struct thread_data *td, short port)
-{
- struct netio_data *nd = td->io_ops_data;
- struct netio_options *o = td->eo;
- struct ip_mreq mr;
- struct sockaddr_in sin;
- struct sockaddr *saddr;
- int fd, opt, type, domain;
- socklen_t len;
-
- memset(&sin, 0, sizeof(sin));
-
- if (o->proto == FIO_TYPE_TCP) {
- type = SOCK_STREAM;
- domain = AF_INET;
- } else if (o->proto == FIO_TYPE_TCP_V6) {
- type = SOCK_STREAM;
- domain = AF_INET6;
- } else if (o->proto == FIO_TYPE_UDP) {
- type = SOCK_DGRAM;
- domain = AF_INET;
- } else if (o->proto == FIO_TYPE_UDP_V6) {
- type = SOCK_DGRAM;
- domain = AF_INET6;
- } else {
- log_err("fio: unknown proto %d\n", o->proto);
- return 1;
- }
-
- fd = socket(domain, type, 0);
- if (fd < 0) {
- td_verror(td, errno, "socket");
- return 1;
- }
-
- opt = 1;
- if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (void *) &opt, sizeof(opt)) < 0) {
- td_verror(td, errno, "setsockopt");
- close(fd);
- return 1;
- }
-#ifdef SO_REUSEPORT
- if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, (void *) &opt, sizeof(opt)) < 0) {
- td_verror(td, errno, "setsockopt");
- close(fd);
- return 1;
- }
-#endif
-
- if (set_window_size(td, fd)) {
- close(fd);
- return 1;
- }
- if (set_mss(td, fd)) {
- close(fd);
- return 1;
- }
-
- if (td->o.filename) {
- if (!is_udp(o) || !fio_netio_is_multicast(td->o.filename)) {
- log_err("fio: hostname not valid for non-multicast inbound network IO\n");
- close(fd);
- return 1;
- }
- if (is_ipv6(o)) {
- log_err("fio: IPv6 not supported for multicast network IO\n");
- close(fd);
- return 1;
- }
-
- inet_aton(td->o.filename, &sin.sin_addr);
-
- mr.imr_multiaddr = sin.sin_addr;
- if (o->intfc) {
- if (inet_aton(o->intfc, &mr.imr_interface) == 0) {
- log_err("fio: interface not valid interface IP\n");
- close(fd);
- return 1;
- }
- } else {
- mr.imr_interface.s_addr = htonl(INADDR_ANY);
- }
-
- if (setsockopt(fd, IPPROTO_IP, IP_ADD_MEMBERSHIP, (const char*)&mr, sizeof(mr)) < 0) {
- td_verror(td, errno, "setsockopt IP_ADD_MEMBERSHIP");
- close(fd);
- return 1;
- }
- }
-
- if (!is_ipv6(o)) {
- saddr = (struct sockaddr *) &nd->addr;
- len = sizeof(nd->addr);
-
- nd->addr.sin_family = AF_INET;
- nd->addr.sin_addr.s_addr = sin.sin_addr.s_addr ? sin.sin_addr.s_addr : htonl(INADDR_ANY);
- nd->addr.sin_port = htons(port);
- } else {
- saddr = (struct sockaddr *) &nd->addr6;
- len = sizeof(nd->addr6);
-
- nd->addr6.sin6_family = AF_INET6;
- nd->addr6.sin6_addr = in6addr_any;
- nd->addr6.sin6_port = htons(port);
- }
-
- if (bind(fd, saddr, len) < 0) {
- close(fd);
- td_verror(td, errno, "bind");
- return 1;
- }
-
- nd->listenfd = fd;
- return 0;
-}
-
-static int fio_netio_setup_listen(struct thread_data *td)
-{
- struct netio_data *nd = td->io_ops_data;
- struct netio_options *o = td->eo;
- int ret;
-
- if (is_udp(o) || is_tcp(o))
- ret = fio_netio_setup_listen_inet(td, o->port);
- else
- ret = fio_netio_setup_listen_unix(td, td->o.filename);
-
- if (ret)
- return ret;
- if (is_udp(o))
- return 0;
-
- if (listen(nd->listenfd, 10) < 0) {
- td_verror(td, errno, "listen");
- nd->listenfd = -1;
- return 1;
- }
-
- return 0;
-}
-
-static int fio_netio_init(struct thread_data *td)
-{
- struct netio_options *o = td->eo;
- int ret;
-
-#ifdef WIN32
- WSADATA wsd;
- WSAStartup(MAKEWORD(2,2), &wsd);
-#endif
-
- if (td_random(td)) {
- log_err("fio: network IO can't be random\n");
- return 1;
- }
-
- if (o->proto == FIO_TYPE_UNIX && o->port) {
- log_err("fio: network IO port not valid with unix socket\n");
- return 1;
- } else if (o->proto != FIO_TYPE_UNIX && !o->port) {
- log_err("fio: network IO requires port for tcp or udp\n");
- return 1;
- }
-
- o->port += td->subjob_number;
-
- if (!is_tcp(o)) {
- if (o->listen) {
- log_err("fio: listen only valid for TCP proto IO\n");
- return 1;
- }
- if (td_rw(td)) {
- log_err("fio: datagram network connections must be"
- " read OR write\n");
- return 1;
- }
- if (o->proto == FIO_TYPE_UNIX && !td->o.filename) {
- log_err("fio: UNIX sockets need host/filename\n");
- return 1;
- }
- o->listen = td_read(td);
- }
-
- if (o->listen)
- ret = fio_netio_setup_listen(td);
- else
- ret = fio_netio_setup_connect(td);
-
- return ret;
-}
-
-static void fio_netio_cleanup(struct thread_data *td)
-{
- struct netio_data *nd = td->io_ops_data;
-
- if (nd) {
- if (nd->listenfd != -1)
- close(nd->listenfd);
- if (nd->pipes[0] != -1)
- close(nd->pipes[0]);
- if (nd->pipes[1] != -1)
- close(nd->pipes[1]);
-
- free(nd);
- }
-}
-
-static int fio_netio_setup(struct thread_data *td)
-{
- struct netio_data *nd;
-
- if (!td->files_index) {
- add_file(td, td->o.filename ?: "net", 0, 0);
- td->o.nr_files = td->o.nr_files ?: 1;
- td->o.open_files++;
- }
-
- if (!td->io_ops_data) {
- nd = malloc(sizeof(*nd));
-
- memset(nd, 0, sizeof(*nd));
- nd->listenfd = -1;
- nd->pipes[0] = nd->pipes[1] = -1;
- td->io_ops_data = nd;
- }
-
- return 0;
-}
-
-static void fio_netio_terminate(struct thread_data *td)
-{
- kill(td->pid, SIGTERM);
-}
-
-#ifdef CONFIG_LINUX_SPLICE
-static int fio_netio_setup_splice(struct thread_data *td)
-{
- struct netio_data *nd;
-
- fio_netio_setup(td);
-
- nd = td->io_ops_data;
- if (nd) {
- if (pipe(nd->pipes) < 0)
- return 1;
-
- nd->use_splice = 1;
- return 0;
- }
-
- return 1;
-}
-
-static struct ioengine_ops ioengine_splice = {
- .name = "netsplice",
- .version = FIO_IOOPS_VERSION,
- .prep = fio_netio_prep,
- .queue = fio_netio_queue,
- .setup = fio_netio_setup_splice,
- .init = fio_netio_init,
- .cleanup = fio_netio_cleanup,
- .open_file = fio_netio_open_file,
- .close_file = fio_netio_close_file,
- .terminate = fio_netio_terminate,
- .options = options,
- .option_struct_size = sizeof(struct netio_options),
- .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_UNIDIR |
- FIO_PIPEIO,
-};
-#endif
-
-static struct ioengine_ops ioengine_rw = {
- .name = "net",
- .version = FIO_IOOPS_VERSION,
- .prep = fio_netio_prep,
- .queue = fio_netio_queue,
- .setup = fio_netio_setup,
- .init = fio_netio_init,
- .cleanup = fio_netio_cleanup,
- .open_file = fio_netio_open_file,
- .close_file = fio_netio_close_file,
- .terminate = fio_netio_terminate,
- .options = options,
- .option_struct_size = sizeof(struct netio_options),
- .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_UNIDIR |
- FIO_PIPEIO | FIO_BIT_BASED,
-};
-
-static int str_hostname_cb(void *data, const char *input)
-{
- struct netio_options *o = data;
-
- if (o->td->o.filename)
- free(o->td->o.filename);
- o->td->o.filename = strdup(input);
- return 0;
-}
-
-static void fio_init fio_netio_register(void)
-{
- register_ioengine(&ioengine_rw);
-#ifdef CONFIG_LINUX_SPLICE
- register_ioengine(&ioengine_splice);
-#endif
-}
-
-static void fio_exit fio_netio_unregister(void)
-{
- unregister_ioengine(&ioengine_rw);
-#ifdef CONFIG_LINUX_SPLICE
- unregister_ioengine(&ioengine_splice);
-#endif
-}
diff --git a/engines/null.c b/engines/null.c
deleted file mode 100644
index 812cadfe..00000000
--- a/engines/null.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * null engine
- *
- * IO engine that doesn't do any real IO transfers, it just pretends to.
- * The main purpose is to test fio itself.
- *
- * It also can act as external C++ engine - compiled with:
- *
- * g++ -O2 -g -shared -rdynamic -fPIC -o null.so null.c -DFIO_EXTERNAL_ENGINE
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <assert.h>
-
-#include "../fio.h"
-
-struct null_data {
- struct io_u **io_us;
- int queued;
- int events;
-};
-
-static struct io_u *fio_null_event(struct thread_data *td, int event)
-{
- struct null_data *nd = (struct null_data *) td->io_ops_data;
-
- return nd->io_us[event];
-}
-
-static int fio_null_getevents(struct thread_data *td, unsigned int min_events,
- unsigned int fio_unused max,
- const struct timespec fio_unused *t)
-{
- struct null_data *nd = (struct null_data *) td->io_ops_data;
- int ret = 0;
-
- if (min_events) {
- ret = nd->events;
- nd->events = 0;
- }
-
- return ret;
-}
-
-static int fio_null_commit(struct thread_data *td)
-{
- struct null_data *nd = (struct null_data *) td->io_ops_data;
-
- if (!nd->events) {
-#ifndef FIO_EXTERNAL_ENGINE
- io_u_mark_submit(td, nd->queued);
-#endif
- nd->events = nd->queued;
- nd->queued = 0;
- }
-
- return 0;
-}
-
-static int fio_null_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct null_data *nd = (struct null_data *) td->io_ops_data;
-
- fio_ro_check(td, io_u);
-
- if (td->io_ops->flags & FIO_SYNCIO)
- return FIO_Q_COMPLETED;
- if (nd->events)
- return FIO_Q_BUSY;
-
- nd->io_us[nd->queued++] = io_u;
- return FIO_Q_QUEUED;
-}
-
-static int fio_null_open(struct thread_data fio_unused *td,
- struct fio_file fio_unused *f)
-{
- return 0;
-}
-
-static void fio_null_cleanup(struct thread_data *td)
-{
- struct null_data *nd = (struct null_data *) td->io_ops_data;
-
- if (nd) {
- free(nd->io_us);
- free(nd);
- }
-}
-
-static int fio_null_init(struct thread_data *td)
-{
- struct null_data *nd = (struct null_data *) malloc(sizeof(*nd));
-
- memset(nd, 0, sizeof(*nd));
-
- if (td->o.iodepth != 1) {
- nd->io_us = (struct io_u **) malloc(td->o.iodepth * sizeof(struct io_u *));
- memset(nd->io_us, 0, td->o.iodepth * sizeof(struct io_u *));
- } else
- td->io_ops->flags |= FIO_SYNCIO;
-
- td->io_ops_data = nd;
- return 0;
-}
-
-#ifndef __cplusplus
-static struct ioengine_ops ioengine = {
- .name = "null",
- .version = FIO_IOOPS_VERSION,
- .queue = fio_null_queue,
- .commit = fio_null_commit,
- .getevents = fio_null_getevents,
- .event = fio_null_event,
- .init = fio_null_init,
- .cleanup = fio_null_cleanup,
- .open_file = fio_null_open,
- .flags = FIO_DISKLESSIO | FIO_FAKEIO,
-};
-
-static void fio_init fio_null_register(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_null_unregister(void)
-{
- unregister_ioengine(&ioengine);
-}
-
-#else
-
-#ifdef FIO_EXTERNAL_ENGINE
-extern "C" {
-static struct ioengine_ops ioengine;
-void get_ioengine(struct ioengine_ops **ioengine_ptr)
-{
- *ioengine_ptr = &ioengine;
-
- ioengine.name = "cpp_null";
- ioengine.version = FIO_IOOPS_VERSION;
- ioengine.queue = fio_null_queue;
- ioengine.commit = fio_null_commit;
- ioengine.getevents = fio_null_getevents;
- ioengine.event = fio_null_event;
- ioengine.init = fio_null_init;
- ioengine.cleanup = fio_null_cleanup;
- ioengine.open_file = fio_null_open;
- ioengine.flags = FIO_DISKLESSIO | FIO_FAKEIO;
-}
-}
-#endif /* FIO_EXTERNAL_ENGINE */
-
-#endif /* __cplusplus */
diff --git a/engines/pmemblk.c b/engines/pmemblk.c
deleted file mode 100644
index 52af9eda..00000000
--- a/engines/pmemblk.c
+++ /dev/null
@@ -1,445 +0,0 @@
-/*
- * pmemblk: IO engine that uses NVML libpmemblk to read and write data
- *
- * Copyright (C) 2016 Hewlett Packard Enterprise Development LP
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License,
- * version 2 as published by the Free Software Foundation..
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the Free
- * Software Foundation, Inc., 59 Temple Place, Suite 330,
- * Boston, MA 02111-1307 USA
- */
-
-/*
- * pmemblk engine
- *
- * IO engine that uses libpmemblk to read and write data
- *
- * To use:
- * ioengine=pmemblk
- *
- * Other relevant settings:
- * thread=1 REQUIRED
- * iodepth=1
- * direct=1
- * unlink=1
- * filename=/mnt/pmem0/fiotestfile,BSIZE,FSIZEMiB
- *
- * thread must be set to 1 for pmemblk as multiple processes cannot
- * open the same block pool file.
- *
- * iodepth should be set to 1 as pmemblk is always synchronous.
- * Use numjobs to scale up.
- *
- * direct=1 is implied as pmemblk is always direct. A warning message
- * is printed if this is not specified.
- *
- * unlink=1 removes the block pool file after testing, and is optional.
- *
- * The pmem device must have a DAX-capable filesystem and be mounted
- * with DAX enabled. filename must point to a file on that filesystem.
- *
- * Example:
- * mkfs.xfs /dev/pmem0
- * mkdir /mnt/pmem0
- * mount -o dax /dev/pmem0 /mnt/pmem0
- *
- * When specifying the filename, if the block pool file does not already
- * exist, then the pmemblk engine creates the pool file if you specify
- * the block and file sizes. BSIZE is the block size in bytes.
- * FSIZEMB is the pool file size in MiB.
- *
- * See examples/pmemblk.fio for more.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/uio.h>
-#include <errno.h>
-#include <assert.h>
-#include <string.h>
-#include <libpmem.h>
-#include <libpmemblk.h>
-
-#include "../fio.h"
-
-/*
- * libpmemblk
- */
-typedef struct fio_pmemblk_file *fio_pmemblk_file_t;
-
-struct fio_pmemblk_file {
- fio_pmemblk_file_t pmb_next;
- char *pmb_filename;
- uint64_t pmb_refcnt;
- PMEMblkpool *pmb_pool;
- size_t pmb_bsize;
- size_t pmb_nblocks;
-};
-
-static fio_pmemblk_file_t Cache;
-
-static pthread_mutex_t CacheLock = PTHREAD_MUTEX_INITIALIZER;
-
-#define PMB_CREATE (0x0001) /* should create file */
-
-fio_pmemblk_file_t fio_pmemblk_cache_lookup(const char *filename)
-{
- fio_pmemblk_file_t i;
-
- for (i = Cache; i != NULL; i = i->pmb_next)
- if (!strcmp(filename, i->pmb_filename))
- return i;
-
- return NULL;
-}
-
-static void fio_pmemblk_cache_insert(fio_pmemblk_file_t pmb)
-{
- pmb->pmb_next = Cache;
- Cache = pmb;
-}
-
-static void fio_pmemblk_cache_remove(fio_pmemblk_file_t pmb)
-{
- fio_pmemblk_file_t i;
-
- if (pmb == Cache) {
- Cache = Cache->pmb_next;
- pmb->pmb_next = NULL;
- return;
- }
-
- for (i = Cache; i != NULL; i = i->pmb_next)
- if (pmb == i->pmb_next) {
- i->pmb_next = i->pmb_next->pmb_next;
- pmb->pmb_next = NULL;
- return;
- }
-}
-
-/*
- * to control block size and gross file size at the libpmemblk
- * level, we allow the block size and file size to be appended
- * to the file name:
- *
- * path[,bsize,fsizemib]
- *
- * note that we do not use the fio option "filesize" to dictate
- * the file size because we can only give libpmemblk the gross
- * file size, which is different from the net or usable file
- * size (which is probably what fio wants).
- *
- * the final path without the parameters is returned in ppath.
- * the block size and file size are returned in pbsize and fsize.
- *
- * note that the user specifies the file size in MiB, but
- * we return bytes from here.
- */
-static void pmb_parse_path(const char *pathspec, char **ppath, uint64_t *pbsize,
- uint64_t *pfsize)
-{
- char *path;
- char *s;
- uint64_t bsize;
- uint64_t fsizemib;
-
- path = strdup(pathspec);
- if (!path) {
- *ppath = NULL;
- return;
- }
-
- /* extract sizes, if given */
- s = strrchr(path, ',');
- if (s && (fsizemib = strtoull(s + 1, NULL, 10))) {
- *s = 0;
- s = strrchr(path, ',');
- if (s && (bsize = strtoull(s + 1, NULL, 10))) {
- *s = 0;
- *ppath = path;
- *pbsize = bsize;
- *pfsize = fsizemib << 20;
- return;
- }
- }
-
- /* size specs not found */
- strcpy(path, pathspec);
- *ppath = path;
- *pbsize = 0;
- *pfsize = 0;
-}
-
-static fio_pmemblk_file_t pmb_open(const char *pathspec, int flags)
-{
- fio_pmemblk_file_t pmb;
- char *path = NULL;
- uint64_t bsize = 0;
- uint64_t fsize = 0;
-
- pmb_parse_path(pathspec, &path, &bsize, &fsize);
- if (!path)
- return NULL;
-
- pthread_mutex_lock(&CacheLock);
-
- pmb = fio_pmemblk_cache_lookup(path);
- if (!pmb) {
- pmb = malloc(sizeof(*pmb));
- if (!pmb)
- goto error;
-
- /* try opening existing first, create it if needed */
- pmb->pmb_pool = pmemblk_open(path, bsize);
- if (!pmb->pmb_pool && (errno == ENOENT) &&
- (flags & PMB_CREATE) && (0 < fsize) && (0 < bsize)) {
- pmb->pmb_pool =
- pmemblk_create(path, bsize, fsize, 0644);
- }
- if (!pmb->pmb_pool) {
- log_err("pmemblk: unable to open pmemblk pool file %s (%s)\n",
- path, strerror(errno));
- goto error;
- }
-
- pmb->pmb_filename = path;
- pmb->pmb_next = NULL;
- pmb->pmb_refcnt = 0;
- pmb->pmb_bsize = pmemblk_bsize(pmb->pmb_pool);
- pmb->pmb_nblocks = pmemblk_nblock(pmb->pmb_pool);
-
- fio_pmemblk_cache_insert(pmb);
- }
-
- pmb->pmb_refcnt += 1;
-
- pthread_mutex_unlock(&CacheLock);
-
- return pmb;
-
-error:
- if (pmb) {
- if (pmb->pmb_pool)
- pmemblk_close(pmb->pmb_pool);
- pmb->pmb_pool = NULL;
- pmb->pmb_filename = NULL;
- free(pmb);
- }
- if (path)
- free(path);
-
- pthread_mutex_unlock(&CacheLock);
- return NULL;
-}
-
-static void pmb_close(fio_pmemblk_file_t pmb, const bool keep)
-{
- pthread_mutex_lock(&CacheLock);
-
- pmb->pmb_refcnt--;
-
- if (!keep && !pmb->pmb_refcnt) {
- pmemblk_close(pmb->pmb_pool);
- pmb->pmb_pool = NULL;
- free(pmb->pmb_filename);
- pmb->pmb_filename = NULL;
- fio_pmemblk_cache_remove(pmb);
- free(pmb);
- }
-
- pthread_mutex_unlock(&CacheLock);
-}
-
-static int pmb_get_flags(struct thread_data *td, uint64_t *pflags)
-{
- static int thread_warned = 0;
- static int odirect_warned = 0;
-
- uint64_t flags = 0;
-
- if (!td->o.use_thread) {
- if (!thread_warned) {
- thread_warned = 1;
- log_err("pmemblk: must set thread=1 for pmemblk engine\n");
- }
- return 1;
- }
-
- if (!td->o.odirect && !odirect_warned) {
- odirect_warned = 1;
- log_info("pmemblk: direct == 0, but pmemblk is always direct\n");
- }
-
- if (td->o.allow_create)
- flags |= PMB_CREATE;
-
- (*pflags) = flags;
- return 0;
-}
-
-static int fio_pmemblk_open_file(struct thread_data *td, struct fio_file *f)
-{
- uint64_t flags = 0;
- fio_pmemblk_file_t pmb;
-
- if (pmb_get_flags(td, &flags))
- return 1;
-
- pmb = pmb_open(f->file_name, flags);
- if (!pmb)
- return 1;
-
- FILE_SET_ENG_DATA(f, pmb);
- return 0;
-}
-
-static int fio_pmemblk_close_file(struct thread_data fio_unused *td,
- struct fio_file *f)
-{
- fio_pmemblk_file_t pmb = FILE_ENG_DATA(f);
-
- if (pmb)
- pmb_close(pmb, false);
-
- FILE_SET_ENG_DATA(f, NULL);
- return 0;
-}
-
-static int fio_pmemblk_get_file_size(struct thread_data *td, struct fio_file *f)
-{
- uint64_t flags = 0;
- fio_pmemblk_file_t pmb = FILE_ENG_DATA(f);
-
- if (fio_file_size_known(f))
- return 0;
-
- if (!pmb) {
- if (pmb_get_flags(td, &flags))
- return 1;
- pmb = pmb_open(f->file_name, flags);
- if (!pmb)
- return 1;
- }
-
- f->real_file_size = pmb->pmb_bsize * pmb->pmb_nblocks;
-
- fio_file_set_size_known(f);
-
- if (!FILE_ENG_DATA(f))
- pmb_close(pmb, true);
-
- return 0;
-}
-
-static int fio_pmemblk_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_file *f = io_u->file;
- fio_pmemblk_file_t pmb = FILE_ENG_DATA(f);
-
- unsigned long long off;
- unsigned long len;
- void *buf;
-
- fio_ro_check(td, io_u);
-
- switch (io_u->ddir) {
- case DDIR_READ:
- case DDIR_WRITE:
- off = io_u->offset;
- len = io_u->xfer_buflen;
-
- io_u->error = EINVAL;
- if (off % pmb->pmb_bsize)
- break;
- if (len % pmb->pmb_bsize)
- break;
- if ((off + len) / pmb->pmb_bsize > pmb->pmb_nblocks)
- break;
-
- io_u->error = 0;
- buf = io_u->xfer_buf;
- off /= pmb->pmb_bsize;
- len /= pmb->pmb_bsize;
- while (0 < len) {
- if (io_u->ddir == DDIR_READ &&
- 0 != pmemblk_read(pmb->pmb_pool, buf, off)) {
- io_u->error = errno;
- break;
- } else if (0 != pmemblk_write(pmb->pmb_pool, buf, off)) {
- io_u->error = errno;
- break;
- }
- buf += pmb->pmb_bsize;
- off++;
- len--;
- }
- off *= pmb->pmb_bsize;
- len *= pmb->pmb_bsize;
- io_u->resid = io_u->xfer_buflen - (off - io_u->offset);
- break;
- case DDIR_SYNC:
- case DDIR_DATASYNC:
- case DDIR_SYNC_FILE_RANGE:
- /* we're always sync'd */
- io_u->error = 0;
- break;
- default:
- io_u->error = EINVAL;
- break;
- }
-
- return FIO_Q_COMPLETED;
-}
-
-static int fio_pmemblk_unlink_file(struct thread_data *td, struct fio_file *f)
-{
- char *path = NULL;
- uint64_t bsize = 0;
- uint64_t fsize = 0;
-
- /*
- * we need our own unlink in case the user has specified
- * the block and file sizes in the path name. we parse
- * the file_name to determine the file name we actually used.
- */
-
- pmb_parse_path(f->file_name, &path, &bsize, &fsize);
- if (!path)
- return ENOENT;
-
- unlink(path);
- free(path);
- return 0;
-}
-
-static struct ioengine_ops ioengine = {
- .name = "pmemblk",
- .version = FIO_IOOPS_VERSION,
- .queue = fio_pmemblk_queue,
- .open_file = fio_pmemblk_open_file,
- .close_file = fio_pmemblk_close_file,
- .get_file_size = fio_pmemblk_get_file_size,
- .unlink_file = fio_pmemblk_unlink_file,
- .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_NOEXTEND | FIO_NODISKUTIL,
-};
-
-static void fio_init fio_pmemblk_register(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_pmemblk_unregister(void)
-{
- unregister_ioengine(&ioengine);
-}
diff --git a/engines/posixaio.c b/engines/posixaio.c
deleted file mode 100644
index bddb1ec3..00000000
--- a/engines/posixaio.c
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * posixaio engine
- *
- * IO engine that uses the posix defined aio interface.
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <fcntl.h>
-
-#include "../fio.h"
-
-struct posixaio_data {
- struct io_u **aio_events;
- unsigned int queued;
-};
-
-static int fill_timespec(struct timespec *ts)
-{
-#ifdef CONFIG_CLOCK_GETTIME
-#ifdef CONFIG_CLOCK_MONOTONIC
- clockid_t clk = CLOCK_MONOTONIC;
-#else
- clockid_t clk = CLOCK_REALTIME;
-#endif
- if (!clock_gettime(clk, ts))
- return 0;
-
- perror("clock_gettime");
- return 1;
-#else
- struct timeval tv;
-
- gettimeofday(&tv, NULL);
- ts->tv_sec = tv.tv_sec;
- ts->tv_nsec = tv.tv_usec * 1000;
- return 0;
-#endif
-}
-
-static unsigned long long ts_utime_since_now(struct timespec *t)
-{
- long long sec, nsec;
- struct timespec now;
-
- if (fill_timespec(&now))
- return 0;
-
- sec = now.tv_sec - t->tv_sec;
- nsec = now.tv_nsec - t->tv_nsec;
- if (sec > 0 && nsec < 0) {
- sec--;
- nsec += 1000000000;
- }
-
- sec *= 1000000;
- nsec /= 1000;
- return sec + nsec;
-}
-
-static int fio_posixaio_cancel(struct thread_data fio_unused *td,
- struct io_u *io_u)
-{
- struct fio_file *f = io_u->file;
- int r = aio_cancel(f->fd, &io_u->aiocb);
-
- if (r == AIO_ALLDONE || r == AIO_CANCELED)
- return 0;
-
- return 1;
-}
-
-static int fio_posixaio_prep(struct thread_data fio_unused *td,
- struct io_u *io_u)
-{
- os_aiocb_t *aiocb = &io_u->aiocb;
- struct fio_file *f = io_u->file;
-
- aiocb->aio_fildes = f->fd;
- aiocb->aio_buf = io_u->xfer_buf;
- aiocb->aio_nbytes = io_u->xfer_buflen;
- aiocb->aio_offset = io_u->offset;
- aiocb->aio_sigevent.sigev_notify = SIGEV_NONE;
-
- io_u->seen = 0;
- return 0;
-}
-
-#define SUSPEND_ENTRIES 8
-
-static int fio_posixaio_getevents(struct thread_data *td, unsigned int min,
- unsigned int max, const struct timespec *t)
-{
- struct posixaio_data *pd = td->io_ops_data;
- os_aiocb_t *suspend_list[SUSPEND_ENTRIES];
- struct timespec start;
- int have_timeout = 0;
- int suspend_entries;
- struct io_u *io_u;
- unsigned int r;
- int i;
-
- if (t && !fill_timespec(&start))
- have_timeout = 1;
- else
- memset(&start, 0, sizeof(start));
-
- r = 0;
-restart:
- memset(suspend_list, 0, sizeof(suspend_list));
- suspend_entries = 0;
- io_u_qiter(&td->io_u_all, io_u, i) {
- int err;
-
- if (io_u->seen || !(io_u->flags & IO_U_F_FLIGHT))
- continue;
-
- err = aio_error(&io_u->aiocb);
- if (err == EINPROGRESS) {
- if (suspend_entries < SUSPEND_ENTRIES) {
- suspend_list[suspend_entries] = &io_u->aiocb;
- suspend_entries++;
- }
- continue;
- }
-
- io_u->seen = 1;
- pd->queued--;
- pd->aio_events[r++] = io_u;
-
- if (err == ECANCELED)
- io_u->resid = io_u->xfer_buflen;
- else if (!err) {
- ssize_t retval = aio_return(&io_u->aiocb);
-
- io_u->resid = io_u->xfer_buflen - retval;
- } else
- io_u->error = err;
- }
-
- if (r >= min)
- return r;
-
- if (have_timeout) {
- unsigned long long usec;
-
- usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000);
- if (ts_utime_since_now(&start) > usec)
- return r;
- }
-
- /*
- * must have some in-flight, wait for at least one
- */
- aio_suspend((const os_aiocb_t * const *)suspend_list,
- suspend_entries, t);
- goto restart;
-}
-
-static struct io_u *fio_posixaio_event(struct thread_data *td, int event)
-{
- struct posixaio_data *pd = td->io_ops_data;
-
- return pd->aio_events[event];
-}
-
-static int fio_posixaio_queue(struct thread_data *td,
- struct io_u *io_u)
-{
- struct posixaio_data *pd = td->io_ops_data;
- os_aiocb_t *aiocb = &io_u->aiocb;
- int ret;
-
- fio_ro_check(td, io_u);
-
- if (io_u->ddir == DDIR_READ)
- ret = aio_read(aiocb);
- else if (io_u->ddir == DDIR_WRITE)
- ret = aio_write(aiocb);
- else if (io_u->ddir == DDIR_TRIM) {
- if (pd->queued)
- return FIO_Q_BUSY;
-
- do_io_u_trim(td, io_u);
- return FIO_Q_COMPLETED;
- } else {
-#ifdef CONFIG_POSIXAIO_FSYNC
- ret = aio_fsync(O_SYNC, aiocb);
-#else
- if (pd->queued)
- return FIO_Q_BUSY;
-
- do_io_u_sync(td, io_u);
- return FIO_Q_COMPLETED;
-#endif
- }
-
- if (ret) {
- int aio_err = errno;
-
- /*
- * At least OSX has a very low limit on the number of pending
- * IOs, so if it returns EAGAIN, we are out of resources
- * to queue more. Just return FIO_Q_BUSY to naturally
- * drop off at this depth.
- */
- if (aio_err == EAGAIN)
- return FIO_Q_BUSY;
-
- io_u->error = aio_err;
- td_verror(td, io_u->error, "xfer");
- return FIO_Q_COMPLETED;
- }
-
- pd->queued++;
- return FIO_Q_QUEUED;
-}
-
-static void fio_posixaio_cleanup(struct thread_data *td)
-{
- struct posixaio_data *pd = td->io_ops_data;
-
- if (pd) {
- free(pd->aio_events);
- free(pd);
- }
-}
-
-static int fio_posixaio_init(struct thread_data *td)
-{
- struct posixaio_data *pd = malloc(sizeof(*pd));
-
- memset(pd, 0, sizeof(*pd));
- pd->aio_events = malloc(td->o.iodepth * sizeof(struct io_u *));
- memset(pd->aio_events, 0, td->o.iodepth * sizeof(struct io_u *));
-
- td->io_ops_data = pd;
- return 0;
-}
-
-static struct ioengine_ops ioengine = {
- .name = "posixaio",
- .version = FIO_IOOPS_VERSION,
- .init = fio_posixaio_init,
- .prep = fio_posixaio_prep,
- .queue = fio_posixaio_queue,
- .cancel = fio_posixaio_cancel,
- .getevents = fio_posixaio_getevents,
- .event = fio_posixaio_event,
- .cleanup = fio_posixaio_cleanup,
- .open_file = generic_open_file,
- .close_file = generic_close_file,
- .get_file_size = generic_get_file_size,
-};
-
-static void fio_init fio_posixaio_register(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_posixaio_unregister(void)
-{
- unregister_ioengine(&ioengine);
-}
diff --git a/engines/rbd.c b/engines/rbd.c
deleted file mode 100644
index 4bae425c..00000000
--- a/engines/rbd.c
+++ /dev/null
@@ -1,689 +0,0 @@
-/*
- * rbd engine
- *
- * IO engine using Ceph's librbd to test RADOS Block Devices.
- *
- */
-
-#include <rbd/librbd.h>
-
-#include "../fio.h"
-#include "../optgroup.h"
-#ifdef CONFIG_RBD_BLKIN
-#include <zipkin_c.h>
-#endif
-
-#ifdef CONFIG_RBD_POLL
-/* add for poll */
-#include <poll.h>
-#include <sys/eventfd.h>
-#endif
-
-struct fio_rbd_iou {
- struct io_u *io_u;
- rbd_completion_t completion;
- int io_seen;
- int io_complete;
-#ifdef CONFIG_RBD_BLKIN
- struct blkin_trace_info info;
-#endif
-};
-
-struct rbd_data {
- rados_t cluster;
- rados_ioctx_t io_ctx;
- rbd_image_t image;
- struct io_u **aio_events;
- struct io_u **sort_events;
- int fd; /* add for poll */
- bool connected;
-};
-
-struct rbd_options {
- void *pad;
- char *cluster_name;
- char *rbd_name;
- char *pool_name;
- char *client_name;
- int busy_poll;
-};
-
-static struct fio_option options[] = {
- {
- .name = "clustername",
- .lname = "ceph cluster name",
- .type = FIO_OPT_STR_STORE,
- .help = "Cluster name for ceph",
- .off1 = offsetof(struct rbd_options, cluster_name),
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_RBD,
- },
- {
- .name = "rbdname",
- .lname = "rbd engine rbdname",
- .type = FIO_OPT_STR_STORE,
- .help = "RBD name for RBD engine",
- .off1 = offsetof(struct rbd_options, rbd_name),
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_RBD,
- },
- {
- .name = "pool",
- .lname = "rbd engine pool",
- .type = FIO_OPT_STR_STORE,
- .help = "Name of the pool hosting the RBD for the RBD engine",
- .off1 = offsetof(struct rbd_options, pool_name),
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_RBD,
- },
- {
- .name = "clientname",
- .lname = "rbd engine clientname",
- .type = FIO_OPT_STR_STORE,
- .help = "Name of the ceph client to access the RBD for the RBD engine",
- .off1 = offsetof(struct rbd_options, client_name),
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_RBD,
- },
- {
- .name = "busy_poll",
- .lname = "Busy poll",
- .type = FIO_OPT_BOOL,
- .help = "Busy poll for completions instead of sleeping",
- .off1 = offsetof(struct rbd_options, busy_poll),
- .def = "0",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_RBD,
- },
- {
- .name = NULL,
- },
-};
-
-static int _fio_setup_rbd_data(struct thread_data *td,
- struct rbd_data **rbd_data_ptr)
-{
- struct rbd_data *rbd;
-
- if (td->io_ops_data)
- return 0;
-
- rbd = calloc(1, sizeof(struct rbd_data));
- if (!rbd)
- goto failed;
-
- rbd->connected = false;
-
- /* add for poll, init fd: -1 */
- rbd->fd = -1;
-
- rbd->aio_events = calloc(td->o.iodepth, sizeof(struct io_u *));
- if (!rbd->aio_events)
- goto failed;
-
- rbd->sort_events = calloc(td->o.iodepth, sizeof(struct io_u *));
- if (!rbd->sort_events)
- goto failed;
-
- *rbd_data_ptr = rbd;
- return 0;
-
-failed:
- if (rbd) {
- if (rbd->aio_events)
- free(rbd->aio_events);
- if (rbd->sort_events)
- free(rbd->sort_events);
- free(rbd);
- }
- return 1;
-
-}
-
-#ifdef CONFIG_RBD_POLL
-static bool _fio_rbd_setup_poll(struct rbd_data *rbd)
-{
- int r;
-
- /* add for rbd poll */
- rbd->fd = eventfd(0, EFD_NONBLOCK);
- if (rbd->fd < 0) {
- log_err("eventfd failed.\n");
- return false;
- }
-
- r = rbd_set_image_notification(rbd->image, rbd->fd, EVENT_TYPE_EVENTFD);
- if (r < 0) {
- log_err("rbd_set_image_notification failed.\n");
- close(rbd->fd);
- rbd->fd = -1;
- return false;
- }
-
- return true;
-}
-#else
-static bool _fio_rbd_setup_poll(struct rbd_data *rbd)
-{
- return true;
-}
-#endif
-
-static int _fio_rbd_connect(struct thread_data *td)
-{
- struct rbd_data *rbd = td->io_ops_data;
- struct rbd_options *o = td->eo;
- int r;
-
- if (o->cluster_name) {
- char *client_name = NULL;
-
- /*
- * If we specify cluser name, the rados_create2
- * will not assume 'client.'. name is considered
- * as a full type.id namestr
- */
- if (o->client_name) {
- if (!index(o->client_name, '.')) {
- client_name = calloc(1, strlen("client.") +
- strlen(o->client_name) + 1);
- strcat(client_name, "client.");
- strcat(client_name, o->client_name);
- } else {
- client_name = o->client_name;
- }
- }
-
- r = rados_create2(&rbd->cluster, o->cluster_name,
- client_name, 0);
-
- if (client_name && !index(o->client_name, '.'))
- free(client_name);
- } else
- r = rados_create(&rbd->cluster, o->client_name);
-
- if (r < 0) {
- log_err("rados_create failed.\n");
- goto failed_early;
- }
-
- r = rados_conf_read_file(rbd->cluster, NULL);
- if (r < 0) {
- log_err("rados_conf_read_file failed.\n");
- goto failed_early;
- }
-
- r = rados_connect(rbd->cluster);
- if (r < 0) {
- log_err("rados_connect failed.\n");
- goto failed_shutdown;
- }
-
- r = rados_ioctx_create(rbd->cluster, o->pool_name, &rbd->io_ctx);
- if (r < 0) {
- log_err("rados_ioctx_create failed.\n");
- goto failed_shutdown;
- }
-
- r = rbd_open(rbd->io_ctx, o->rbd_name, &rbd->image, NULL /*snap */ );
- if (r < 0) {
- log_err("rbd_open failed.\n");
- goto failed_open;
- }
-
- if (!_fio_rbd_setup_poll(rbd))
- goto failed_poll;
-
- return 0;
-
-failed_poll:
- rbd_close(rbd->image);
- rbd->image = NULL;
-failed_open:
- rados_ioctx_destroy(rbd->io_ctx);
- rbd->io_ctx = NULL;
-failed_shutdown:
- rados_shutdown(rbd->cluster);
- rbd->cluster = NULL;
-failed_early:
- return 1;
-}
-
-static void _fio_rbd_disconnect(struct rbd_data *rbd)
-{
- if (!rbd)
- return;
-
- /* close eventfd */
- if (rbd->fd != -1) {
- close(rbd->fd);
- rbd->fd = -1;
- }
-
- /* shutdown everything */
- if (rbd->image) {
- rbd_close(rbd->image);
- rbd->image = NULL;
- }
-
- if (rbd->io_ctx) {
- rados_ioctx_destroy(rbd->io_ctx);
- rbd->io_ctx = NULL;
- }
-
- if (rbd->cluster) {
- rados_shutdown(rbd->cluster);
- rbd->cluster = NULL;
- }
-}
-
-static void _fio_rbd_finish_aiocb(rbd_completion_t comp, void *data)
-{
- struct fio_rbd_iou *fri = data;
- struct io_u *io_u = fri->io_u;
- ssize_t ret;
-
- /*
- * Looks like return value is 0 for success, or < 0 for
- * a specific error. So we have to assume that it can't do
- * partial completions.
- */
- ret = rbd_aio_get_return_value(fri->completion);
- if (ret < 0) {
- io_u->error = -ret;
- io_u->resid = io_u->xfer_buflen;
- } else
- io_u->error = 0;
-
- fri->io_complete = 1;
-}
-
-static struct io_u *fio_rbd_event(struct thread_data *td, int event)
-{
- struct rbd_data *rbd = td->io_ops_data;
-
- return rbd->aio_events[event];
-}
-
-static inline int fri_check_complete(struct rbd_data *rbd, struct io_u *io_u,
- unsigned int *events)
-{
- struct fio_rbd_iou *fri = io_u->engine_data;
-
- if (fri->io_complete) {
- fri->io_seen = 1;
- rbd->aio_events[*events] = io_u;
- (*events)++;
-
- rbd_aio_release(fri->completion);
- return 1;
- }
-
- return 0;
-}
-
-static inline int rbd_io_u_seen(struct io_u *io_u)
-{
- struct fio_rbd_iou *fri = io_u->engine_data;
-
- return fri->io_seen;
-}
-
-static void rbd_io_u_wait_complete(struct io_u *io_u)
-{
- struct fio_rbd_iou *fri = io_u->engine_data;
-
- rbd_aio_wait_for_complete(fri->completion);
-}
-
-static int rbd_io_u_cmp(const void *p1, const void *p2)
-{
- const struct io_u **a = (const struct io_u **) p1;
- const struct io_u **b = (const struct io_u **) p2;
- uint64_t at, bt;
-
- at = utime_since_now(&(*a)->start_time);
- bt = utime_since_now(&(*b)->start_time);
-
- if (at < bt)
- return -1;
- else if (at == bt)
- return 0;
- else
- return 1;
-}
-
-static int rbd_iter_events(struct thread_data *td, unsigned int *events,
- unsigned int min_evts, int wait)
-{
- struct rbd_data *rbd = td->io_ops_data;
- unsigned int this_events = 0;
- struct io_u *io_u;
- int i, sidx = 0;
-
-#ifdef CONFIG_RBD_POLL
- int ret = 0;
- int event_num = 0;
- struct fio_rbd_iou *fri = NULL;
- rbd_completion_t comps[min_evts];
-
- struct pollfd pfd;
- pfd.fd = rbd->fd;
- pfd.events = POLLIN;
-
- ret = poll(&pfd, 1, -1);
- if (ret <= 0)
- return 0;
-
- assert(pfd.revents & POLLIN);
-
- event_num = rbd_poll_io_events(rbd->image, comps, min_evts);
-
- for (i = 0; i < event_num; i++) {
- fri = rbd_aio_get_arg(comps[i]);
- io_u = fri->io_u;
-#else
- io_u_qiter(&td->io_u_all, io_u, i) {
-#endif
- if (!(io_u->flags & IO_U_F_FLIGHT))
- continue;
- if (rbd_io_u_seen(io_u))
- continue;
-
- if (fri_check_complete(rbd, io_u, events))
- this_events++;
- else if (wait)
- rbd->sort_events[sidx++] = io_u;
- }
-
- if (!wait || !sidx)
- return this_events;
-
- /*
- * Sort events, oldest issue first, then wait on as many as we
- * need in order of age. If we have enough events, stop waiting,
- * and just check if any of the older ones are done.
- */
- if (sidx > 1)
- qsort(rbd->sort_events, sidx, sizeof(struct io_u *), rbd_io_u_cmp);
-
- for (i = 0; i < sidx; i++) {
- io_u = rbd->sort_events[i];
-
- if (fri_check_complete(rbd, io_u, events)) {
- this_events++;
- continue;
- }
-
- /*
- * Stop waiting when we have enough, but continue checking
- * all pending IOs if they are complete.
- */
- if (*events >= min_evts)
- continue;
-
- rbd_io_u_wait_complete(io_u);
-
- if (fri_check_complete(rbd, io_u, events))
- this_events++;
- }
-
- return this_events;
-}
-
-static int fio_rbd_getevents(struct thread_data *td, unsigned int min,
- unsigned int max, const struct timespec *t)
-{
- unsigned int this_events, events = 0;
- struct rbd_options *o = td->eo;
- int wait = 0;
-
- do {
- this_events = rbd_iter_events(td, &events, min, wait);
-
- if (events >= min)
- break;
- if (this_events)
- continue;
-
- if (!o->busy_poll)
- wait = 1;
- else
- nop;
- } while (1);
-
- return events;
-}
-
-static int fio_rbd_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct rbd_data *rbd = td->io_ops_data;
- struct fio_rbd_iou *fri = io_u->engine_data;
- int r = -1;
-
- fio_ro_check(td, io_u);
-
- fri->io_seen = 0;
- fri->io_complete = 0;
-
- r = rbd_aio_create_completion(fri, _fio_rbd_finish_aiocb,
- &fri->completion);
- if (r < 0) {
- log_err("rbd_aio_create_completion failed.\n");
- goto failed;
- }
-
- if (io_u->ddir == DDIR_WRITE) {
-#ifdef CONFIG_RBD_BLKIN
- blkin_init_trace_info(&fri->info);
- r = rbd_aio_write_traced(rbd->image, io_u->offset, io_u->xfer_buflen,
- io_u->xfer_buf, fri->completion, &fri->info);
-#else
- r = rbd_aio_write(rbd->image, io_u->offset, io_u->xfer_buflen,
- io_u->xfer_buf, fri->completion);
-#endif
- if (r < 0) {
- log_err("rbd_aio_write failed.\n");
- goto failed_comp;
- }
-
- } else if (io_u->ddir == DDIR_READ) {
-#ifdef CONFIG_RBD_BLKIN
- blkin_init_trace_info(&fri->info);
- r = rbd_aio_read_traced(rbd->image, io_u->offset, io_u->xfer_buflen,
- io_u->xfer_buf, fri->completion, &fri->info);
-#else
- r = rbd_aio_read(rbd->image, io_u->offset, io_u->xfer_buflen,
- io_u->xfer_buf, fri->completion);
-#endif
-
- if (r < 0) {
- log_err("rbd_aio_read failed.\n");
- goto failed_comp;
- }
- } else if (io_u->ddir == DDIR_TRIM) {
- r = rbd_aio_discard(rbd->image, io_u->offset,
- io_u->xfer_buflen, fri->completion);
- if (r < 0) {
- log_err("rbd_aio_discard failed.\n");
- goto failed_comp;
- }
- } else if (io_u->ddir == DDIR_SYNC) {
- r = rbd_aio_flush(rbd->image, fri->completion);
- if (r < 0) {
- log_err("rbd_flush failed.\n");
- goto failed_comp;
- }
- } else {
- dprint(FD_IO, "%s: Warning: unhandled ddir: %d\n", __func__,
- io_u->ddir);
- goto failed_comp;
- }
-
- return FIO_Q_QUEUED;
-failed_comp:
- rbd_aio_release(fri->completion);
-failed:
- io_u->error = -r;
- td_verror(td, io_u->error, "xfer");
- return FIO_Q_COMPLETED;
-}
-
-static int fio_rbd_init(struct thread_data *td)
-{
- int r;
- struct rbd_data *rbd = td->io_ops_data;
-
- if (rbd->connected)
- return 0;
-
- r = _fio_rbd_connect(td);
- if (r) {
- log_err("fio_rbd_connect failed, return code: %d .\n", r);
- goto failed;
- }
-
- return 0;
-
-failed:
- return 1;
-}
-
-static void fio_rbd_cleanup(struct thread_data *td)
-{
- struct rbd_data *rbd = td->io_ops_data;
-
- if (rbd) {
- _fio_rbd_disconnect(rbd);
- free(rbd->aio_events);
- free(rbd->sort_events);
- free(rbd);
- }
-}
-
-static int fio_rbd_setup(struct thread_data *td)
-{
- rbd_image_info_t info;
- struct fio_file *f;
- struct rbd_data *rbd = NULL;
- int r;
-
- /* allocate engine specific structure to deal with librbd. */
- r = _fio_setup_rbd_data(td, &rbd);
- if (r) {
- log_err("fio_setup_rbd_data failed.\n");
- goto cleanup;
- }
- td->io_ops_data = rbd;
-
- /* librbd does not allow us to run first in the main thread and later
- * in a fork child. It needs to be the same process context all the
- * time.
- */
- td->o.use_thread = 1;
-
- /* connect in the main thread to determine to determine
- * the size of the given RADOS block device. And disconnect
- * later on.
- */
- r = _fio_rbd_connect(td);
- if (r) {
- log_err("fio_rbd_connect failed.\n");
- goto cleanup;
- }
- rbd->connected = true;
-
- /* get size of the RADOS block device */
- r = rbd_stat(rbd->image, &info, sizeof(info));
- if (r < 0) {
- log_err("rbd_status failed.\n");
- goto cleanup;
- } else if (info.size == 0) {
- log_err("image size should be larger than zero.\n");
- r = -EINVAL;
- goto cleanup;
- }
-
- dprint(FD_IO, "rbd-engine: image size: %lu\n", info.size);
-
- /* taken from "net" engine. Pretend we deal with files,
- * even if we do not have any ideas about files.
- * The size of the RBD is set instead of a artificial file.
- */
- if (!td->files_index) {
- add_file(td, td->o.filename ? : "rbd", 0, 0);
- td->o.nr_files = td->o.nr_files ? : 1;
- td->o.open_files++;
- }
- f = td->files[0];
- f->real_file_size = info.size;
-
- return 0;
-
-cleanup:
- fio_rbd_cleanup(td);
- return r;
-}
-
-static int fio_rbd_open(struct thread_data *td, struct fio_file *f)
-{
- return 0;
-}
-
-static int fio_rbd_invalidate(struct thread_data *td, struct fio_file *f)
-{
-#if defined(CONFIG_RBD_INVAL)
- struct rbd_data *rbd = td->io_ops_data;
-
- return rbd_invalidate_cache(rbd->image);
-#else
- return 0;
-#endif
-}
-
-static void fio_rbd_io_u_free(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_rbd_iou *fri = io_u->engine_data;
-
- if (fri) {
- io_u->engine_data = NULL;
- free(fri);
- }
-}
-
-static int fio_rbd_io_u_init(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_rbd_iou *fri;
-
- fri = calloc(1, sizeof(*fri));
- fri->io_u = io_u;
- io_u->engine_data = fri;
- return 0;
-}
-
-static struct ioengine_ops ioengine = {
- .name = "rbd",
- .version = FIO_IOOPS_VERSION,
- .setup = fio_rbd_setup,
- .init = fio_rbd_init,
- .queue = fio_rbd_queue,
- .getevents = fio_rbd_getevents,
- .event = fio_rbd_event,
- .cleanup = fio_rbd_cleanup,
- .open_file = fio_rbd_open,
- .invalidate = fio_rbd_invalidate,
- .options = options,
- .io_u_init = fio_rbd_io_u_init,
- .io_u_free = fio_rbd_io_u_free,
- .option_struct_size = sizeof(struct rbd_options),
-};
-
-static void fio_init fio_rbd_register(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_rbd_unregister(void)
-{
- unregister_ioengine(&ioengine);
-}
diff --git a/engines/rdma.c b/engines/rdma.c
deleted file mode 100644
index 10e60dc8..00000000
--- a/engines/rdma.c
+++ /dev/null
@@ -1,1372 +0,0 @@
-/*
- * RDMA I/O engine
- *
- * RDMA I/O engine based on the IB verbs and RDMA/CM user space libraries.
- * Supports both RDMA memory semantics and channel semantics
- * for the InfiniBand, RoCE and iWARP protocols.
- *
- * You will need the Linux RDMA software installed, either
- * from your Linux distributor or directly from openfabrics.org:
- *
- * http://www.openfabrics.org/downloads/OFED/
- *
- * Exchanging steps of RDMA ioengine control messages:
- * 1. client side sends test mode (RDMA_WRITE/RDMA_READ/SEND)
- * to server side.
- * 2. server side parses test mode, and sends back confirmation
- * to client side. In RDMA WRITE/READ test, this confirmation
- * includes memory information, such as rkey, address.
- * 3. client side initiates test loop.
- * 4. In RDMA WRITE/READ test, client side sends a completion
- * notification to server side. Server side updates its
- * td->done as true.
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <assert.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <netdb.h>
-#include <sys/poll.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-
-#include <pthread.h>
-#include <inttypes.h>
-
-#include "../fio.h"
-#include "../hash.h"
-#include "../optgroup.h"
-
-#include <rdma/rdma_cma.h>
-#include <infiniband/arch.h>
-
-#define FIO_RDMA_MAX_IO_DEPTH 512
-
-enum rdma_io_mode {
- FIO_RDMA_UNKNOWN = 0,
- FIO_RDMA_MEM_WRITE,
- FIO_RDMA_MEM_READ,
- FIO_RDMA_CHA_SEND,
- FIO_RDMA_CHA_RECV
-};
-
-struct rdmaio_options {
- struct thread_data *td;
- unsigned int port;
- enum rdma_io_mode verb;
-};
-
-static int str_hostname_cb(void *data, const char *input)
-{
- struct rdmaio_options *o = data;
-
- if (o->td->o.filename)
- free(o->td->o.filename);
- o->td->o.filename = strdup(input);
- return 0;
-}
-
-static struct fio_option options[] = {
- {
- .name = "hostname",
- .lname = "rdma engine hostname",
- .type = FIO_OPT_STR_STORE,
- .cb = str_hostname_cb,
- .help = "Hostname for RDMA IO engine",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_RDMA,
- },
- {
- .name = "port",
- .lname = "rdma engine port",
- .type = FIO_OPT_INT,
- .off1 = offsetof(struct rdmaio_options, port),
- .minval = 1,
- .maxval = 65535,
- .help = "Port to use for RDMA connections",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_RDMA,
- },
- {
- .name = "verb",
- .lname = "RDMA engine verb",
- .alias = "proto",
- .type = FIO_OPT_STR,
- .off1 = offsetof(struct rdmaio_options, verb),
- .help = "RDMA engine verb",
- .def = "write",
- .posval = {
- { .ival = "write",
- .oval = FIO_RDMA_MEM_WRITE,
- .help = "Memory Write",
- },
- { .ival = "read",
- .oval = FIO_RDMA_MEM_READ,
- .help = "Memory Read",
- },
- { .ival = "send",
- .oval = FIO_RDMA_CHA_SEND,
- .help = "Posted Send",
- },
- { .ival = "recv",
- .oval = FIO_RDMA_CHA_RECV,
- .help = "Posted Receive",
- },
- },
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_RDMA,
- },
- {
- .name = NULL,
- },
-};
-
-struct remote_u {
- uint64_t buf;
- uint32_t rkey;
- uint32_t size;
-};
-
-struct rdma_info_blk {
- uint32_t mode; /* channel semantic or memory semantic */
- uint32_t nr; /* client: io depth
- server: number of records for memory semantic
- */
- uint32_t max_bs; /* maximum block size */
- struct remote_u rmt_us[FIO_RDMA_MAX_IO_DEPTH];
-};
-
-struct rdma_io_u_data {
- uint64_t wr_id;
- struct ibv_send_wr sq_wr;
- struct ibv_recv_wr rq_wr;
- struct ibv_sge rdma_sgl;
-};
-
-struct rdmaio_data {
- int is_client;
- enum rdma_io_mode rdma_protocol;
- char host[64];
- struct sockaddr_in addr;
-
- struct ibv_recv_wr rq_wr;
- struct ibv_sge recv_sgl;
- struct rdma_info_blk recv_buf;
- struct ibv_mr *recv_mr;
-
- struct ibv_send_wr sq_wr;
- struct ibv_sge send_sgl;
- struct rdma_info_blk send_buf;
- struct ibv_mr *send_mr;
-
- struct ibv_comp_channel *channel;
- struct ibv_cq *cq;
- struct ibv_pd *pd;
- struct ibv_qp *qp;
-
- pthread_t cmthread;
- struct rdma_event_channel *cm_channel;
- struct rdma_cm_id *cm_id;
- struct rdma_cm_id *child_cm_id;
-
- int cq_event_num;
-
- struct remote_u *rmt_us;
- int rmt_nr;
- struct io_u **io_us_queued;
- int io_u_queued_nr;
- struct io_u **io_us_flight;
- int io_u_flight_nr;
- struct io_u **io_us_completed;
- int io_u_completed_nr;
-
- struct frand_state rand_state;
-};
-
-static int client_recv(struct thread_data *td, struct ibv_wc *wc)
-{
- struct rdmaio_data *rd = td->io_ops_data;
- unsigned int max_bs;
-
- if (wc->byte_len != sizeof(rd->recv_buf)) {
- log_err("Received bogus data, size %d\n", wc->byte_len);
- return 1;
- }
-
- max_bs = max(td->o.max_bs[DDIR_READ], td->o.max_bs[DDIR_WRITE]);
- if (max_bs > ntohl(rd->recv_buf.max_bs)) {
- log_err("fio: Server's block size (%d) must be greater than or "
- "equal to the client's block size (%d)!\n",
- ntohl(rd->recv_buf.max_bs), max_bs);
- return 1;
- }
-
- /* store mr info for MEMORY semantic */
- if ((rd->rdma_protocol == FIO_RDMA_MEM_WRITE) ||
- (rd->rdma_protocol == FIO_RDMA_MEM_READ)) {
- /* struct flist_head *entry; */
- int i = 0;
-
- rd->rmt_nr = ntohl(rd->recv_buf.nr);
-
- for (i = 0; i < rd->rmt_nr; i++) {
- rd->rmt_us[i].buf = ntohll(rd->recv_buf.rmt_us[i].buf);
- rd->rmt_us[i].rkey = ntohl(rd->recv_buf.rmt_us[i].rkey);
- rd->rmt_us[i].size = ntohl(rd->recv_buf.rmt_us[i].size);
-
- dprint(FD_IO,
- "fio: Received rkey %x addr %" PRIx64
- " len %d from peer\n", rd->rmt_us[i].rkey,
- rd->rmt_us[i].buf, rd->rmt_us[i].size);
- }
- }
-
- return 0;
-}
-
-static int server_recv(struct thread_data *td, struct ibv_wc *wc)
-{
- struct rdmaio_data *rd = td->io_ops_data;
- unsigned int max_bs;
-
- if (wc->wr_id == FIO_RDMA_MAX_IO_DEPTH) {
- rd->rdma_protocol = ntohl(rd->recv_buf.mode);
-
- /* CHANNEL semantic, do nothing */
- if (rd->rdma_protocol == FIO_RDMA_CHA_SEND)
- rd->rdma_protocol = FIO_RDMA_CHA_RECV;
-
- max_bs = max(td->o.max_bs[DDIR_READ], td->o.max_bs[DDIR_WRITE]);
- if (max_bs < ntohl(rd->recv_buf.max_bs)) {
- log_err("fio: Server's block size (%d) must be greater than or "
- "equal to the client's block size (%d)!\n",
- ntohl(rd->recv_buf.max_bs), max_bs);
- return 1;
- }
-
- }
-
- return 0;
-}
-
-static int cq_event_handler(struct thread_data *td, enum ibv_wc_opcode opcode)
-{
- struct rdmaio_data *rd = td->io_ops_data;
- struct ibv_wc wc;
- struct rdma_io_u_data *r_io_u_d;
- int ret;
- int compevnum = 0;
- int i;
-
- while ((ret = ibv_poll_cq(rd->cq, 1, &wc)) == 1) {
- ret = 0;
- compevnum++;
-
- if (wc.status) {
- log_err("fio: cq completion status %d(%s)\n",
- wc.status, ibv_wc_status_str(wc.status));
- return -1;
- }
-
- switch (wc.opcode) {
-
- case IBV_WC_RECV:
- if (rd->is_client == 1)
- ret = client_recv(td, &wc);
- else
- ret = server_recv(td, &wc);
-
- if (ret)
- return -1;
-
- if (wc.wr_id == FIO_RDMA_MAX_IO_DEPTH)
- break;
-
- for (i = 0; i < rd->io_u_flight_nr; i++) {
- r_io_u_d = rd->io_us_flight[i]->engine_data;
-
- if (wc.wr_id == r_io_u_d->rq_wr.wr_id) {
- rd->io_us_flight[i]->resid =
- rd->io_us_flight[i]->buflen
- - wc.byte_len;
-
- rd->io_us_flight[i]->error = 0;
-
- rd->io_us_completed[rd->
- io_u_completed_nr]
- = rd->io_us_flight[i];
- rd->io_u_completed_nr++;
- break;
- }
- }
- if (i == rd->io_u_flight_nr)
- log_err("fio: recv wr %" PRId64 " not found\n",
- wc.wr_id);
- else {
- /* put the last one into middle of the list */
- rd->io_us_flight[i] =
- rd->io_us_flight[rd->io_u_flight_nr - 1];
- rd->io_u_flight_nr--;
- }
-
- break;
-
- case IBV_WC_SEND:
- case IBV_WC_RDMA_WRITE:
- case IBV_WC_RDMA_READ:
- if (wc.wr_id == FIO_RDMA_MAX_IO_DEPTH)
- break;
-
- for (i = 0; i < rd->io_u_flight_nr; i++) {
- r_io_u_d = rd->io_us_flight[i]->engine_data;
-
- if (wc.wr_id == r_io_u_d->sq_wr.wr_id) {
- rd->io_us_completed[rd->
- io_u_completed_nr]
- = rd->io_us_flight[i];
- rd->io_u_completed_nr++;
- break;
- }
- }
- if (i == rd->io_u_flight_nr)
- log_err("fio: send wr %" PRId64 " not found\n",
- wc.wr_id);
- else {
- /* put the last one into middle of the list */
- rd->io_us_flight[i] =
- rd->io_us_flight[rd->io_u_flight_nr - 1];
- rd->io_u_flight_nr--;
- }
-
- break;
-
- default:
- log_info("fio: unknown completion event %d\n",
- wc.opcode);
- return -1;
- }
- rd->cq_event_num++;
- }
-
- if (ret) {
- log_err("fio: poll error %d\n", ret);
- return 1;
- }
-
- return compevnum;
-}
-
-/*
- * Return -1 for error and 'nr events' for a positive number
- * of events
- */
-static int rdma_poll_wait(struct thread_data *td, enum ibv_wc_opcode opcode)
-{
- struct rdmaio_data *rd = td->io_ops_data;
- struct ibv_cq *ev_cq;
- void *ev_ctx;
- int ret;
-
- if (rd->cq_event_num > 0) { /* previous left */
- rd->cq_event_num--;
- return 0;
- }
-
-again:
- if (ibv_get_cq_event(rd->channel, &ev_cq, &ev_ctx) != 0) {
- log_err("fio: Failed to get cq event!\n");
- return -1;
- }
- if (ev_cq != rd->cq) {
- log_err("fio: Unknown CQ!\n");
- return -1;
- }
- if (ibv_req_notify_cq(rd->cq, 0) != 0) {
- log_err("fio: Failed to set notify!\n");
- return -1;
- }
-
- ret = cq_event_handler(td, opcode);
- if (ret == 0)
- goto again;
-
- ibv_ack_cq_events(rd->cq, ret);
-
- rd->cq_event_num--;
-
- return ret;
-}
-
-static int fio_rdmaio_setup_qp(struct thread_data *td)
-{
- struct rdmaio_data *rd = td->io_ops_data;
- struct ibv_qp_init_attr init_attr;
- int qp_depth = td->o.iodepth * 2; /* 2 times of io depth */
-
- if (rd->is_client == 0)
- rd->pd = ibv_alloc_pd(rd->child_cm_id->verbs);
- else
- rd->pd = ibv_alloc_pd(rd->cm_id->verbs);
-
- if (rd->pd == NULL) {
- log_err("fio: ibv_alloc_pd fail: %m\n");
- return 1;
- }
-
- if (rd->is_client == 0)
- rd->channel = ibv_create_comp_channel(rd->child_cm_id->verbs);
- else
- rd->channel = ibv_create_comp_channel(rd->cm_id->verbs);
- if (rd->channel == NULL) {
- log_err("fio: ibv_create_comp_channel fail: %m\n");
- goto err1;
- }
-
- if (qp_depth < 16)
- qp_depth = 16;
-
- if (rd->is_client == 0)
- rd->cq = ibv_create_cq(rd->child_cm_id->verbs,
- qp_depth, rd, rd->channel, 0);
- else
- rd->cq = ibv_create_cq(rd->cm_id->verbs,
- qp_depth, rd, rd->channel, 0);
- if (rd->cq == NULL) {
- log_err("fio: ibv_create_cq failed: %m\n");
- goto err2;
- }
-
- if (ibv_req_notify_cq(rd->cq, 0) != 0) {
- log_err("fio: ibv_req_notify_cq failed: %m\n");
- goto err3;
- }
-
- /* create queue pair */
- memset(&init_attr, 0, sizeof(init_attr));
- init_attr.cap.max_send_wr = qp_depth;
- init_attr.cap.max_recv_wr = qp_depth;
- init_attr.cap.max_recv_sge = 1;
- init_attr.cap.max_send_sge = 1;
- init_attr.qp_type = IBV_QPT_RC;
- init_attr.send_cq = rd->cq;
- init_attr.recv_cq = rd->cq;
-
- if (rd->is_client == 0) {
- if (rdma_create_qp(rd->child_cm_id, rd->pd, &init_attr) != 0) {
- log_err("fio: rdma_create_qp failed: %m\n");
- goto err3;
- }
- rd->qp = rd->child_cm_id->qp;
- } else {
- if (rdma_create_qp(rd->cm_id, rd->pd, &init_attr) != 0) {
- log_err("fio: rdma_create_qp failed: %m\n");
- goto err3;
- }
- rd->qp = rd->cm_id->qp;
- }
-
- return 0;
-
-err3:
- ibv_destroy_cq(rd->cq);
-err2:
- ibv_destroy_comp_channel(rd->channel);
-err1:
- ibv_dealloc_pd(rd->pd);
-
- return 1;
-}
-
-static int fio_rdmaio_setup_control_msg_buffers(struct thread_data *td)
-{
- struct rdmaio_data *rd = td->io_ops_data;
-
- rd->recv_mr = ibv_reg_mr(rd->pd, &rd->recv_buf, sizeof(rd->recv_buf),
- IBV_ACCESS_LOCAL_WRITE);
- if (rd->recv_mr == NULL) {
- log_err("fio: recv_buf reg_mr failed: %m\n");
- return 1;
- }
-
- rd->send_mr = ibv_reg_mr(rd->pd, &rd->send_buf, sizeof(rd->send_buf),
- 0);
- if (rd->send_mr == NULL) {
- log_err("fio: send_buf reg_mr failed: %m\n");
- ibv_dereg_mr(rd->recv_mr);
- return 1;
- }
-
- /* setup work request */
- /* recv wq */
- rd->recv_sgl.addr = (uint64_t) (unsigned long)&rd->recv_buf;
- rd->recv_sgl.length = sizeof(rd->recv_buf);
- rd->recv_sgl.lkey = rd->recv_mr->lkey;
- rd->rq_wr.sg_list = &rd->recv_sgl;
- rd->rq_wr.num_sge = 1;
- rd->rq_wr.wr_id = FIO_RDMA_MAX_IO_DEPTH;
-
- /* send wq */
- rd->send_sgl.addr = (uint64_t) (unsigned long)&rd->send_buf;
- rd->send_sgl.length = sizeof(rd->send_buf);
- rd->send_sgl.lkey = rd->send_mr->lkey;
-
- rd->sq_wr.opcode = IBV_WR_SEND;
- rd->sq_wr.send_flags = IBV_SEND_SIGNALED;
- rd->sq_wr.sg_list = &rd->send_sgl;
- rd->sq_wr.num_sge = 1;
- rd->sq_wr.wr_id = FIO_RDMA_MAX_IO_DEPTH;
-
- return 0;
-}
-
-static int get_next_channel_event(struct thread_data *td,
- struct rdma_event_channel *channel,
- enum rdma_cm_event_type wait_event)
-{
- struct rdmaio_data *rd = td->io_ops_data;
- struct rdma_cm_event *event;
- int ret;
-
- ret = rdma_get_cm_event(channel, &event);
- if (ret) {
- log_err("fio: rdma_get_cm_event: %d\n", ret);
- return 1;
- }
-
- if (event->event != wait_event) {
- log_err("fio: event is %s instead of %s\n",
- rdma_event_str(event->event),
- rdma_event_str(wait_event));
- return 1;
- }
-
- switch (event->event) {
- case RDMA_CM_EVENT_CONNECT_REQUEST:
- rd->child_cm_id = event->id;
- break;
- default:
- break;
- }
-
- rdma_ack_cm_event(event);
-
- return 0;
-}
-
-static int fio_rdmaio_prep(struct thread_data *td, struct io_u *io_u)
-{
- struct rdmaio_data *rd = td->io_ops_data;
- struct rdma_io_u_data *r_io_u_d;
-
- r_io_u_d = io_u->engine_data;
-
- switch (rd->rdma_protocol) {
- case FIO_RDMA_MEM_WRITE:
- case FIO_RDMA_MEM_READ:
- r_io_u_d->rdma_sgl.addr = (uint64_t) (unsigned long)io_u->buf;
- r_io_u_d->rdma_sgl.lkey = io_u->mr->lkey;
- r_io_u_d->sq_wr.wr_id = r_io_u_d->wr_id;
- r_io_u_d->sq_wr.send_flags = IBV_SEND_SIGNALED;
- r_io_u_d->sq_wr.sg_list = &r_io_u_d->rdma_sgl;
- r_io_u_d->sq_wr.num_sge = 1;
- break;
- case FIO_RDMA_CHA_SEND:
- r_io_u_d->rdma_sgl.addr = (uint64_t) (unsigned long)io_u->buf;
- r_io_u_d->rdma_sgl.lkey = io_u->mr->lkey;
- r_io_u_d->rdma_sgl.length = io_u->buflen;
- r_io_u_d->sq_wr.wr_id = r_io_u_d->wr_id;
- r_io_u_d->sq_wr.opcode = IBV_WR_SEND;
- r_io_u_d->sq_wr.send_flags = IBV_SEND_SIGNALED;
- r_io_u_d->sq_wr.sg_list = &r_io_u_d->rdma_sgl;
- r_io_u_d->sq_wr.num_sge = 1;
- break;
- case FIO_RDMA_CHA_RECV:
- r_io_u_d->rdma_sgl.addr = (uint64_t) (unsigned long)io_u->buf;
- r_io_u_d->rdma_sgl.lkey = io_u->mr->lkey;
- r_io_u_d->rdma_sgl.length = io_u->buflen;
- r_io_u_d->rq_wr.wr_id = r_io_u_d->wr_id;
- r_io_u_d->rq_wr.sg_list = &r_io_u_d->rdma_sgl;
- r_io_u_d->rq_wr.num_sge = 1;
- break;
- default:
- log_err("fio: unknown rdma protocol - %d\n", rd->rdma_protocol);
- break;
- }
-
- return 0;
-}
-
-static struct io_u *fio_rdmaio_event(struct thread_data *td, int event)
-{
- struct rdmaio_data *rd = td->io_ops_data;
- struct io_u *io_u;
- int i;
-
- io_u = rd->io_us_completed[0];
- for (i = 0; i < rd->io_u_completed_nr - 1; i++)
- rd->io_us_completed[i] = rd->io_us_completed[i + 1];
-
- rd->io_u_completed_nr--;
-
- dprint_io_u(io_u, "fio_rdmaio_event");
-
- return io_u;
-}
-
-static int fio_rdmaio_getevents(struct thread_data *td, unsigned int min,
- unsigned int max, const struct timespec *t)
-{
- struct rdmaio_data *rd = td->io_ops_data;
- enum ibv_wc_opcode comp_opcode;
- struct ibv_cq *ev_cq;
- void *ev_ctx;
- int ret, r = 0;
- comp_opcode = IBV_WC_RDMA_WRITE;
-
- switch (rd->rdma_protocol) {
- case FIO_RDMA_MEM_WRITE:
- comp_opcode = IBV_WC_RDMA_WRITE;
- break;
- case FIO_RDMA_MEM_READ:
- comp_opcode = IBV_WC_RDMA_READ;
- break;
- case FIO_RDMA_CHA_SEND:
- comp_opcode = IBV_WC_SEND;
- break;
- case FIO_RDMA_CHA_RECV:
- comp_opcode = IBV_WC_RECV;
- break;
- default:
- log_err("fio: unknown rdma protocol - %d\n", rd->rdma_protocol);
- break;
- }
-
- if (rd->cq_event_num > 0) { /* previous left */
- rd->cq_event_num--;
- return 0;
- }
-
-again:
- if (ibv_get_cq_event(rd->channel, &ev_cq, &ev_ctx) != 0) {
- log_err("fio: Failed to get cq event!\n");
- return -1;
- }
- if (ev_cq != rd->cq) {
- log_err("fio: Unknown CQ!\n");
- return -1;
- }
- if (ibv_req_notify_cq(rd->cq, 0) != 0) {
- log_err("fio: Failed to set notify!\n");
- return -1;
- }
-
- ret = cq_event_handler(td, comp_opcode);
- if (ret < 1)
- goto again;
-
- ibv_ack_cq_events(rd->cq, ret);
-
- r += ret;
- if (r < min)
- goto again;
-
- rd->cq_event_num -= r;
-
- return r;
-}
-
-static int fio_rdmaio_send(struct thread_data *td, struct io_u **io_us,
- unsigned int nr)
-{
- struct rdmaio_data *rd = td->io_ops_data;
- struct ibv_send_wr *bad_wr;
-#if 0
- enum ibv_wc_opcode comp_opcode;
- comp_opcode = IBV_WC_RDMA_WRITE;
-#endif
- int i;
- long index;
- struct rdma_io_u_data *r_io_u_d;
-
- r_io_u_d = NULL;
-
- for (i = 0; i < nr; i++) {
- /* RDMA_WRITE or RDMA_READ */
- switch (rd->rdma_protocol) {
- case FIO_RDMA_MEM_WRITE:
- /* compose work request */
- r_io_u_d = io_us[i]->engine_data;
- index = __rand(&rd->rand_state) % rd->rmt_nr;
- r_io_u_d->sq_wr.opcode = IBV_WR_RDMA_WRITE;
- r_io_u_d->sq_wr.wr.rdma.rkey = rd->rmt_us[index].rkey;
- r_io_u_d->sq_wr.wr.rdma.remote_addr = \
- rd->rmt_us[index].buf;
- r_io_u_d->sq_wr.sg_list->length = io_us[i]->buflen;
- break;
- case FIO_RDMA_MEM_READ:
- /* compose work request */
- r_io_u_d = io_us[i]->engine_data;
- index = __rand(&rd->rand_state) % rd->rmt_nr;
- r_io_u_d->sq_wr.opcode = IBV_WR_RDMA_READ;
- r_io_u_d->sq_wr.wr.rdma.rkey = rd->rmt_us[index].rkey;
- r_io_u_d->sq_wr.wr.rdma.remote_addr = \
- rd->rmt_us[index].buf;
- r_io_u_d->sq_wr.sg_list->length = io_us[i]->buflen;
- break;
- case FIO_RDMA_CHA_SEND:
- r_io_u_d = io_us[i]->engine_data;
- r_io_u_d->sq_wr.opcode = IBV_WR_SEND;
- r_io_u_d->sq_wr.send_flags = IBV_SEND_SIGNALED;
- break;
- default:
- log_err("fio: unknown rdma protocol - %d\n",
- rd->rdma_protocol);
- break;
- }
-
- if (ibv_post_send(rd->qp, &r_io_u_d->sq_wr, &bad_wr) != 0) {
- log_err("fio: ibv_post_send fail: %m\n");
- return -1;
- }
-
- dprint_io_u(io_us[i], "fio_rdmaio_send");
- }
-
- /* wait for completion
- rdma_poll_wait(td, comp_opcode); */
-
- return i;
-}
-
-static int fio_rdmaio_recv(struct thread_data *td, struct io_u **io_us,
- unsigned int nr)
-{
- struct rdmaio_data *rd = td->io_ops_data;
- struct ibv_recv_wr *bad_wr;
- struct rdma_io_u_data *r_io_u_d;
- int i;
-
- i = 0;
- if (rd->rdma_protocol == FIO_RDMA_CHA_RECV) {
- /* post io_u into recv queue */
- for (i = 0; i < nr; i++) {
- r_io_u_d = io_us[i]->engine_data;
- if (ibv_post_recv(rd->qp, &r_io_u_d->rq_wr, &bad_wr) !=
- 0) {
- log_err("fio: ibv_post_recv fail: %m\n");
- return 1;
- }
- }
- } else if ((rd->rdma_protocol == FIO_RDMA_MEM_READ)
- || (rd->rdma_protocol == FIO_RDMA_MEM_WRITE)) {
- /* re-post the rq_wr */
- if (ibv_post_recv(rd->qp, &rd->rq_wr, &bad_wr) != 0) {
- log_err("fio: ibv_post_recv fail: %m\n");
- return 1;
- }
-
- rdma_poll_wait(td, IBV_WC_RECV);
-
- dprint(FD_IO, "fio: recv FINISH message\n");
- td->done = 1;
- return 0;
- }
-
- return i;
-}
-
-static int fio_rdmaio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct rdmaio_data *rd = td->io_ops_data;
-
- fio_ro_check(td, io_u);
-
- if (rd->io_u_queued_nr == (int)td->o.iodepth)
- return FIO_Q_BUSY;
-
- rd->io_us_queued[rd->io_u_queued_nr] = io_u;
- rd->io_u_queued_nr++;
-
- dprint_io_u(io_u, "fio_rdmaio_queue");
-
- return FIO_Q_QUEUED;
-}
-
-static void fio_rdmaio_queued(struct thread_data *td, struct io_u **io_us,
- unsigned int nr)
-{
- struct rdmaio_data *rd = td->io_ops_data;
- struct timeval now;
- unsigned int i;
-
- if (!fio_fill_issue_time(td))
- return;
-
- fio_gettime(&now, NULL);
-
- for (i = 0; i < nr; i++) {
- struct io_u *io_u = io_us[i];
-
- /* queued -> flight */
- rd->io_us_flight[rd->io_u_flight_nr] = io_u;
- rd->io_u_flight_nr++;
-
- memcpy(&io_u->issue_time, &now, sizeof(now));
- io_u_queued(td, io_u);
- }
-}
-
-static int fio_rdmaio_commit(struct thread_data *td)
-{
- struct rdmaio_data *rd = td->io_ops_data;
- struct io_u **io_us;
- int ret;
-
- if (!rd->io_us_queued)
- return 0;
-
- io_us = rd->io_us_queued;
- do {
- /* RDMA_WRITE or RDMA_READ */
- if (rd->is_client)
- ret = fio_rdmaio_send(td, io_us, rd->io_u_queued_nr);
- else if (!rd->is_client)
- ret = fio_rdmaio_recv(td, io_us, rd->io_u_queued_nr);
- else
- ret = 0; /* must be a SYNC */
-
- if (ret > 0) {
- fio_rdmaio_queued(td, io_us, ret);
- io_u_mark_submit(td, ret);
- rd->io_u_queued_nr -= ret;
- io_us += ret;
- ret = 0;
- } else
- break;
- } while (rd->io_u_queued_nr);
-
- return ret;
-}
-
-static int fio_rdmaio_connect(struct thread_data *td, struct fio_file *f)
-{
- struct rdmaio_data *rd = td->io_ops_data;
- struct rdma_conn_param conn_param;
- struct ibv_send_wr *bad_wr;
-
- memset(&conn_param, 0, sizeof(conn_param));
- conn_param.responder_resources = 1;
- conn_param.initiator_depth = 1;
- conn_param.retry_count = 10;
-
- if (rdma_connect(rd->cm_id, &conn_param) != 0) {
- log_err("fio: rdma_connect fail: %m\n");
- return 1;
- }
-
- if (get_next_channel_event
- (td, rd->cm_channel, RDMA_CM_EVENT_ESTABLISHED) != 0) {
- log_err("fio: wait for RDMA_CM_EVENT_ESTABLISHED\n");
- return 1;
- }
-
- /* send task request */
- rd->send_buf.mode = htonl(rd->rdma_protocol);
- rd->send_buf.nr = htonl(td->o.iodepth);
-
- if (ibv_post_send(rd->qp, &rd->sq_wr, &bad_wr) != 0) {
- log_err("fio: ibv_post_send fail: %m\n");
- return 1;
- }
-
- if (rdma_poll_wait(td, IBV_WC_SEND) < 0)
- return 1;
-
- /* wait for remote MR info from server side */
- if (rdma_poll_wait(td, IBV_WC_RECV) < 0)
- return 1;
-
- /* In SEND/RECV test, it's a good practice to setup the iodepth of
- * of the RECV side deeper than that of the SEND side to
- * avoid RNR (receiver not ready) error. The
- * SEND side may send so many unsolicited message before
- * RECV side commits sufficient recv buffers into recv queue.
- * This may lead to RNR error. Here, SEND side pauses for a while
- * during which RECV side commits sufficient recv buffers.
- */
- usleep(500000);
-
- return 0;
-}
-
-static int fio_rdmaio_accept(struct thread_data *td, struct fio_file *f)
-{
- struct rdmaio_data *rd = td->io_ops_data;
- struct rdma_conn_param conn_param;
- struct ibv_send_wr *bad_wr;
- int ret = 0;
-
- /* rdma_accept() - then wait for accept success */
- memset(&conn_param, 0, sizeof(conn_param));
- conn_param.responder_resources = 1;
- conn_param.initiator_depth = 1;
-
- if (rdma_accept(rd->child_cm_id, &conn_param) != 0) {
- log_err("fio: rdma_accept: %m\n");
- return 1;
- }
-
- if (get_next_channel_event
- (td, rd->cm_channel, RDMA_CM_EVENT_ESTABLISHED) != 0) {
- log_err("fio: wait for RDMA_CM_EVENT_ESTABLISHED\n");
- return 1;
- }
-
- /* wait for request */
- ret = rdma_poll_wait(td, IBV_WC_RECV) < 0;
-
- if (ibv_post_send(rd->qp, &rd->sq_wr, &bad_wr) != 0) {
- log_err("fio: ibv_post_send fail: %m\n");
- return 1;
- }
-
- if (rdma_poll_wait(td, IBV_WC_SEND) < 0)
- return 1;
-
- return ret;
-}
-
-static int fio_rdmaio_open_file(struct thread_data *td, struct fio_file *f)
-{
- if (td_read(td))
- return fio_rdmaio_accept(td, f);
- else
- return fio_rdmaio_connect(td, f);
-}
-
-static int fio_rdmaio_close_file(struct thread_data *td, struct fio_file *f)
-{
- struct rdmaio_data *rd = td->io_ops_data;
- struct ibv_send_wr *bad_wr;
-
- /* unregister rdma buffer */
-
- /*
- * Client sends notification to the server side
- */
- /* refer to: http://linux.die.net/man/7/rdma_cm */
- if ((rd->is_client == 1) && ((rd->rdma_protocol == FIO_RDMA_MEM_WRITE)
- || (rd->rdma_protocol ==
- FIO_RDMA_MEM_READ))) {
- if (ibv_post_send(rd->qp, &rd->sq_wr, &bad_wr) != 0) {
- log_err("fio: ibv_post_send fail: %m\n");
- return 1;
- }
-
- dprint(FD_IO, "fio: close information sent success\n");
- rdma_poll_wait(td, IBV_WC_SEND);
- }
-
- if (rd->is_client == 1)
- rdma_disconnect(rd->cm_id);
- else {
- rdma_disconnect(rd->child_cm_id);
-#if 0
- rdma_disconnect(rd->cm_id);
-#endif
- }
-
-#if 0
- if (get_next_channel_event(td, rd->cm_channel, RDMA_CM_EVENT_DISCONNECTED) != 0) {
- log_err("fio: wait for RDMA_CM_EVENT_DISCONNECTED\n");
- return 1;
- }
-#endif
-
- ibv_destroy_cq(rd->cq);
- ibv_destroy_qp(rd->qp);
-
- if (rd->is_client == 1)
- rdma_destroy_id(rd->cm_id);
- else {
- rdma_destroy_id(rd->child_cm_id);
- rdma_destroy_id(rd->cm_id);
- }
-
- ibv_destroy_comp_channel(rd->channel);
- ibv_dealloc_pd(rd->pd);
-
- return 0;
-}
-
-static int fio_rdmaio_setup_connect(struct thread_data *td, const char *host,
- unsigned short port)
-{
- struct rdmaio_data *rd = td->io_ops_data;
- struct ibv_recv_wr *bad_wr;
- int err;
-
- rd->addr.sin_family = AF_INET;
- rd->addr.sin_port = htons(port);
-
- if (inet_aton(host, &rd->addr.sin_addr) != 1) {
- struct hostent *hent;
-
- hent = gethostbyname(host);
- if (!hent) {
- td_verror(td, errno, "gethostbyname");
- return 1;
- }
-
- memcpy(&rd->addr.sin_addr, hent->h_addr, 4);
- }
-
- /* resolve route */
- err = rdma_resolve_addr(rd->cm_id, NULL, (struct sockaddr *)&rd->addr, 2000);
- if (err != 0) {
- log_err("fio: rdma_resolve_addr: %d\n", err);
- return 1;
- }
-
- err = get_next_channel_event(td, rd->cm_channel, RDMA_CM_EVENT_ADDR_RESOLVED);
- if (err != 0) {
- log_err("fio: get_next_channel_event: %d\n", err);
- return 1;
- }
-
- /* resolve route */
- err = rdma_resolve_route(rd->cm_id, 2000);
- if (err != 0) {
- log_err("fio: rdma_resolve_route: %d\n", err);
- return 1;
- }
-
- err = get_next_channel_event(td, rd->cm_channel, RDMA_CM_EVENT_ROUTE_RESOLVED);
- if (err != 0) {
- log_err("fio: get_next_channel_event: %d\n", err);
- return 1;
- }
-
- /* create qp and buffer */
- if (fio_rdmaio_setup_qp(td) != 0)
- return 1;
-
- if (fio_rdmaio_setup_control_msg_buffers(td) != 0)
- return 1;
-
- /* post recv buf */
- err = ibv_post_recv(rd->qp, &rd->rq_wr, &bad_wr);
- if (err != 0) {
- log_err("fio: ibv_post_recv fail: %d\n", err);
- return 1;
- }
-
- return 0;
-}
-
-static int fio_rdmaio_setup_listen(struct thread_data *td, short port)
-{
- struct rdmaio_data *rd = td->io_ops_data;
- struct ibv_recv_wr *bad_wr;
- int state = td->runstate;
-
- td_set_runstate(td, TD_SETTING_UP);
-
- rd->addr.sin_family = AF_INET;
- rd->addr.sin_addr.s_addr = htonl(INADDR_ANY);
- rd->addr.sin_port = htons(port);
-
- /* rdma_listen */
- if (rdma_bind_addr(rd->cm_id, (struct sockaddr *)&rd->addr) != 0) {
- log_err("fio: rdma_bind_addr fail: %m\n");
- return 1;
- }
-
- if (rdma_listen(rd->cm_id, 3) != 0) {
- log_err("fio: rdma_listen fail: %m\n");
- return 1;
- }
-
- log_info("fio: waiting for connection\n");
-
- /* wait for CONNECT_REQUEST */
- if (get_next_channel_event
- (td, rd->cm_channel, RDMA_CM_EVENT_CONNECT_REQUEST) != 0) {
- log_err("fio: wait for RDMA_CM_EVENT_CONNECT_REQUEST\n");
- return 1;
- }
-
- if (fio_rdmaio_setup_qp(td) != 0)
- return 1;
-
- if (fio_rdmaio_setup_control_msg_buffers(td) != 0)
- return 1;
-
- /* post recv buf */
- if (ibv_post_recv(rd->qp, &rd->rq_wr, &bad_wr) != 0) {
- log_err("fio: ibv_post_recv fail: %m\n");
- return 1;
- }
-
- td_set_runstate(td, state);
- return 0;
-}
-
-static int check_set_rlimits(struct thread_data *td)
-{
-#ifdef CONFIG_RLIMIT_MEMLOCK
- struct rlimit rl;
-
- /* check RLIMIT_MEMLOCK */
- if (getrlimit(RLIMIT_MEMLOCK, &rl) != 0) {
- log_err("fio: getrlimit fail: %d(%s)\n",
- errno, strerror(errno));
- return 1;
- }
-
- /* soft limit */
- if ((rl.rlim_cur != RLIM_INFINITY)
- && (rl.rlim_cur < td->orig_buffer_size)) {
- log_err("fio: soft RLIMIT_MEMLOCK is: %" PRId64 "\n",
- rl.rlim_cur);
- log_err("fio: total block size is: %zd\n",
- td->orig_buffer_size);
- /* try to set larger RLIMIT_MEMLOCK */
- rl.rlim_cur = rl.rlim_max;
- if (setrlimit(RLIMIT_MEMLOCK, &rl) != 0) {
- log_err("fio: setrlimit fail: %d(%s)\n",
- errno, strerror(errno));
- log_err("fio: you may try enlarge MEMLOCK by root\n");
- log_err("# ulimit -l unlimited\n");
- return 1;
- }
- }
-#endif
-
- return 0;
-}
-
-static int compat_options(struct thread_data *td)
-{
- // The original RDMA engine had an ugly / seperator
- // on the filename for it's options. This function
- // retains backwards compatibility with it.100
-
- struct rdmaio_options *o = td->eo;
- char *modep, *portp;
- char *filename = td->o.filename;
-
- if (!filename)
- return 0;
-
- portp = strchr(filename, '/');
- if (portp == NULL)
- return 0;
-
- *portp = '\0';
- portp++;
-
- o->port = strtol(portp, NULL, 10);
- if (!o->port || o->port > 65535)
- goto bad_host;
-
- modep = strchr(portp, '/');
- if (modep != NULL) {
- *modep = '\0';
- modep++;
- }
-
- if (modep) {
- if (!strncmp("rdma_write", modep, strlen(modep)) ||
- !strncmp("RDMA_WRITE", modep, strlen(modep)))
- o->verb = FIO_RDMA_MEM_WRITE;
- else if (!strncmp("rdma_read", modep, strlen(modep)) ||
- !strncmp("RDMA_READ", modep, strlen(modep)))
- o->verb = FIO_RDMA_MEM_READ;
- else if (!strncmp("send", modep, strlen(modep)) ||
- !strncmp("SEND", modep, strlen(modep)))
- o->verb = FIO_RDMA_CHA_SEND;
- else
- goto bad_host;
- } else
- o->verb = FIO_RDMA_MEM_WRITE;
-
-
- return 0;
-
-bad_host:
- log_err("fio: bad rdma host/port/protocol: %s\n", td->o.filename);
- return 1;
-}
-
-static int fio_rdmaio_init(struct thread_data *td)
-{
- struct rdmaio_data *rd = td->io_ops_data;
- struct rdmaio_options *o = td->eo;
- unsigned int max_bs;
- int ret, i;
-
- if (td_rw(td)) {
- log_err("fio: rdma connections must be read OR write\n");
- return 1;
- }
- if (td_random(td)) {
- log_err("fio: RDMA network IO can't be random\n");
- return 1;
- }
-
- if (compat_options(td))
- return 1;
-
- if (!o->port) {
- log_err("fio: no port has been specified which is required "
- "for the rdma engine\n");
- return 1;
- }
-
- if (check_set_rlimits(td))
- return 1;
-
- rd->rdma_protocol = o->verb;
- rd->cq_event_num = 0;
-
- rd->cm_channel = rdma_create_event_channel();
- if (!rd->cm_channel) {
- log_err("fio: rdma_create_event_channel fail: %m\n");
- return 1;
- }
-
- ret = rdma_create_id(rd->cm_channel, &rd->cm_id, rd, RDMA_PS_TCP);
- if (ret) {
- log_err("fio: rdma_create_id fail: %m\n");
- return 1;
- }
-
- if ((rd->rdma_protocol == FIO_RDMA_MEM_WRITE) ||
- (rd->rdma_protocol == FIO_RDMA_MEM_READ)) {
- rd->rmt_us =
- malloc(FIO_RDMA_MAX_IO_DEPTH * sizeof(struct remote_u));
- memset(rd->rmt_us, 0,
- FIO_RDMA_MAX_IO_DEPTH * sizeof(struct remote_u));
- rd->rmt_nr = 0;
- }
-
- rd->io_us_queued = malloc(td->o.iodepth * sizeof(struct io_u *));
- memset(rd->io_us_queued, 0, td->o.iodepth * sizeof(struct io_u *));
- rd->io_u_queued_nr = 0;
-
- rd->io_us_flight = malloc(td->o.iodepth * sizeof(struct io_u *));
- memset(rd->io_us_flight, 0, td->o.iodepth * sizeof(struct io_u *));
- rd->io_u_flight_nr = 0;
-
- rd->io_us_completed = malloc(td->o.iodepth * sizeof(struct io_u *));
- memset(rd->io_us_completed, 0, td->o.iodepth * sizeof(struct io_u *));
- rd->io_u_completed_nr = 0;
-
- if (td_read(td)) { /* READ as the server */
- rd->is_client = 0;
- td->flags |= TD_F_NO_PROGRESS;
- /* server rd->rdma_buf_len will be setup after got request */
- ret = fio_rdmaio_setup_listen(td, o->port);
- } else { /* WRITE as the client */
- rd->is_client = 1;
- ret = fio_rdmaio_setup_connect(td, td->o.filename, o->port);
- }
-
- max_bs = max(td->o.max_bs[DDIR_READ], td->o.max_bs[DDIR_WRITE]);
- rd->send_buf.max_bs = htonl(max_bs);
-
- /* register each io_u in the free list */
- for (i = 0; i < td->io_u_freelist.nr; i++) {
- struct io_u *io_u = td->io_u_freelist.io_us[i];
-
- io_u->engine_data = malloc(sizeof(struct rdma_io_u_data));
- memset(io_u->engine_data, 0, sizeof(struct rdma_io_u_data));
- ((struct rdma_io_u_data *)io_u->engine_data)->wr_id = i;
-
- io_u->mr = ibv_reg_mr(rd->pd, io_u->buf, max_bs,
- IBV_ACCESS_LOCAL_WRITE |
- IBV_ACCESS_REMOTE_READ |
- IBV_ACCESS_REMOTE_WRITE);
- if (io_u->mr == NULL) {
- log_err("fio: ibv_reg_mr io_u failed: %m\n");
- return 1;
- }
-
- rd->send_buf.rmt_us[i].buf =
- htonll((uint64_t) (unsigned long)io_u->buf);
- rd->send_buf.rmt_us[i].rkey = htonl(io_u->mr->rkey);
- rd->send_buf.rmt_us[i].size = htonl(max_bs);
-
-#if 0
- log_info("fio: Send rkey %x addr %" PRIx64 " len %d to client\n", io_u->mr->rkey, io_u->buf, max_bs); */
-#endif
- }
-
- rd->send_buf.nr = htonl(i);
-
- return ret;
-}
-
-static void fio_rdmaio_cleanup(struct thread_data *td)
-{
- struct rdmaio_data *rd = td->io_ops_data;
-
- if (rd)
- free(rd);
-}
-
-static int fio_rdmaio_setup(struct thread_data *td)
-{
- struct rdmaio_data *rd;
-
- if (!td->files_index) {
- add_file(td, td->o.filename ?: "rdma", 0, 0);
- td->o.nr_files = td->o.nr_files ?: 1;
- td->o.open_files++;
- }
-
- if (!td->io_ops_data) {
- rd = malloc(sizeof(*rd));
-
- memset(rd, 0, sizeof(*rd));
- init_rand_seed(&rd->rand_state, (unsigned int) GOLDEN_RATIO_PRIME, 0);
- td->io_ops_data = rd;
- }
-
- return 0;
-}
-
-static struct ioengine_ops ioengine_rw = {
- .name = "rdma",
- .version = FIO_IOOPS_VERSION,
- .setup = fio_rdmaio_setup,
- .init = fio_rdmaio_init,
- .prep = fio_rdmaio_prep,
- .queue = fio_rdmaio_queue,
- .commit = fio_rdmaio_commit,
- .getevents = fio_rdmaio_getevents,
- .event = fio_rdmaio_event,
- .cleanup = fio_rdmaio_cleanup,
- .open_file = fio_rdmaio_open_file,
- .close_file = fio_rdmaio_close_file,
- .flags = FIO_DISKLESSIO | FIO_UNIDIR | FIO_PIPEIO,
- .options = options,
- .option_struct_size = sizeof(struct rdmaio_options),
-};
-
-static void fio_init fio_rdmaio_register(void)
-{
- register_ioengine(&ioengine_rw);
-}
-
-static void fio_exit fio_rdmaio_unregister(void)
-{
- unregister_ioengine(&ioengine_rw);
-}
diff --git a/engines/sg.c b/engines/sg.c
deleted file mode 100644
index 2148e87c..00000000
--- a/engines/sg.c
+++ /dev/null
@@ -1,856 +0,0 @@
-/*
- * sg engine
- *
- * IO engine that uses the Linux SG v3 interface to talk to SCSI devices
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <assert.h>
-#include <sys/poll.h>
-
-#include "../fio.h"
-
-#ifdef FIO_HAVE_SGIO
-
-#define MAX_10B_LBA 0xFFFFFFFFULL
-#define SCSI_TIMEOUT_MS 30000 // 30 second timeout; currently no method to override
-#define MAX_SB 64 // sense block maximum return size
-
-struct sgio_cmd {
- unsigned char cdb[16]; // enhanced from 10 to support 16 byte commands
- unsigned char sb[MAX_SB]; // add sense block to commands
- int nr;
-};
-
-struct sgio_data {
- struct sgio_cmd *cmds;
- struct io_u **events;
- struct pollfd *pfds;
- int *fd_flags;
- void *sgbuf;
- unsigned int bs;
- int type_checked;
-};
-
-static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
- struct io_u *io_u, int fs)
-{
- struct sgio_cmd *sc = &sd->cmds[io_u->index];
-
- memset(hdr, 0, sizeof(*hdr));
- memset(sc->cdb, 0, sizeof(sc->cdb));
-
- hdr->interface_id = 'S';
- hdr->cmdp = sc->cdb;
- hdr->cmd_len = sizeof(sc->cdb);
- hdr->sbp = sc->sb;
- hdr->mx_sb_len = sizeof(sc->sb);
- hdr->pack_id = io_u->index;
- hdr->usr_ptr = io_u;
-
- if (fs) {
- hdr->dxferp = io_u->xfer_buf;
- hdr->dxfer_len = io_u->xfer_buflen;
- }
-}
-
-static int pollin_events(struct pollfd *pfds, int fds)
-{
- int i;
-
- for (i = 0; i < fds; i++)
- if (pfds[i].revents & POLLIN)
- return 1;
-
- return 0;
-}
-
-static int sg_fd_read(int fd, void *data, size_t size)
-{
- int err = 0;
-
- while (size) {
- ssize_t ret;
-
- ret = read(fd, data, size);
- if (ret < 0) {
- if (errno == EAGAIN || errno == EINTR)
- continue;
- err = errno;
- break;
- } else if (!ret)
- break;
- else {
- data += ret;
- size -= ret;
- }
- }
-
- if (err)
- return err;
- if (size)
- return EAGAIN;
-
- return 0;
-}
-
-static int fio_sgio_getevents(struct thread_data *td, unsigned int min,
- unsigned int max,
- const struct timespec fio_unused *t)
-{
- struct sgio_data *sd = td->io_ops_data;
- int left = max, eventNum, ret, r = 0;
- void *buf = sd->sgbuf;
- unsigned int i, events;
- struct fio_file *f;
-
- /*
- * Fill in the file descriptors
- */
- for_each_file(td, f, i) {
- /*
- * don't block for min events == 0
- */
- if (!min)
- sd->fd_flags[i] = fio_set_fd_nonblocking(f->fd, "sg");
- else
- sd->fd_flags[i] = -1;
-
- sd->pfds[i].fd = f->fd;
- sd->pfds[i].events = POLLIN;
- }
-
- while (left) {
- void *p;
-
- dprint(FD_IO, "sgio_getevents: sd %p: left=%d\n", sd, left);
-
- do {
- if (!min)
- break;
-
- ret = poll(sd->pfds, td->o.nr_files, -1);
- if (ret < 0) {
- if (!r)
- r = -errno;
- td_verror(td, errno, "poll");
- break;
- } else if (!ret)
- continue;
-
- if (pollin_events(sd->pfds, td->o.nr_files))
- break;
- } while (1);
-
- if (r < 0)
- break;
-
-re_read:
- p = buf;
- events = 0;
- for_each_file(td, f, i) {
- for (eventNum = 0; eventNum < left; eventNum++) {
- ret = sg_fd_read(f->fd, p, sizeof(struct sg_io_hdr));
- dprint(FD_IO, "sgio_getevents: ret: %d\n", ret);
- if (ret) {
- r = -ret;
- td_verror(td, r, "sg_read");
- break;
- }
- p += sizeof(struct sg_io_hdr);
- events++;
- dprint(FD_IO, "sgio_getevents: events: %d\n", events);
- }
- }
-
- if (r < 0 && !events)
- break;
- if (!events) {
- usleep(1000);
- goto re_read;
- }
-
- left -= events;
- r += events;
-
- for (i = 0; i < events; i++) {
- struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
- sd->events[i] = hdr->usr_ptr;
-
- /* record if an io error occurred, ignore resid */
- if (hdr->info & SG_INFO_CHECK) {
- struct io_u *io_u;
- io_u = (struct io_u *)(hdr->usr_ptr);
- memcpy((void*)&(io_u->hdr), (void*)hdr, sizeof(struct sg_io_hdr));
- sd->events[i]->error = EIO;
- }
- }
- }
-
- if (!min) {
- for_each_file(td, f, i) {
- if (sd->fd_flags[i] == -1)
- continue;
-
- if (fcntl(f->fd, F_SETFL, sd->fd_flags[i]) < 0)
- log_err("fio: sg failed to restore fcntl flags: %s\n", strerror(errno));
- }
- }
-
- return r;
-}
-
-static int fio_sgio_ioctl_doio(struct thread_data *td,
- struct fio_file *f, struct io_u *io_u)
-{
- struct sgio_data *sd = td->io_ops_data;
- struct sg_io_hdr *hdr = &io_u->hdr;
- int ret;
-
- sd->events[0] = io_u;
-
- ret = ioctl(f->fd, SG_IO, hdr);
- if (ret < 0)
- return ret;
-
- /* record if an io error occurred */
- if (hdr->info & SG_INFO_CHECK)
- io_u->error = EIO;
-
- return FIO_Q_COMPLETED;
-}
-
-static int fio_sgio_rw_doio(struct fio_file *f, struct io_u *io_u, int do_sync)
-{
- struct sg_io_hdr *hdr = &io_u->hdr;
- int ret;
-
- ret = write(f->fd, hdr, sizeof(*hdr));
- if (ret < 0)
- return ret;
-
- if (do_sync) {
- ret = read(f->fd, hdr, sizeof(*hdr));
- if (ret < 0)
- return ret;
-
- /* record if an io error occurred */
- if (hdr->info & SG_INFO_CHECK)
- io_u->error = EIO;
-
- return FIO_Q_COMPLETED;
- }
-
- return FIO_Q_QUEUED;
-}
-
-static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int do_sync)
-{
- struct fio_file *f = io_u->file;
- int ret;
-
- if (f->filetype == FIO_TYPE_BLOCK) {
- ret = fio_sgio_ioctl_doio(td, f, io_u);
- td->error = io_u->error;
- } else {
- ret = fio_sgio_rw_doio(f, io_u, do_sync);
- if (do_sync)
- td->error = io_u->error;
- }
-
- return ret;
-}
-
-static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
-{
- struct sg_io_hdr *hdr = &io_u->hdr;
- struct sgio_data *sd = td->io_ops_data;
- long long nr_blocks, lba;
-
- if (io_u->xfer_buflen & (sd->bs - 1)) {
- log_err("read/write not sector aligned\n");
- return EINVAL;
- }
-
- nr_blocks = io_u->xfer_buflen / sd->bs;
- lba = io_u->offset / sd->bs;
-
- if (io_u->ddir == DDIR_READ) {
- sgio_hdr_init(sd, hdr, io_u, 1);
-
- hdr->dxfer_direction = SG_DXFER_FROM_DEV;
- if (lba < MAX_10B_LBA)
- hdr->cmdp[0] = 0x28; // read(10)
- else
- hdr->cmdp[0] = 0x88; // read(16)
- } else if (io_u->ddir == DDIR_WRITE) {
- sgio_hdr_init(sd, hdr, io_u, 1);
-
- hdr->dxfer_direction = SG_DXFER_TO_DEV;
- if (lba < MAX_10B_LBA)
- hdr->cmdp[0] = 0x2a; // write(10)
- else
- hdr->cmdp[0] = 0x8a; // write(16)
- } else {
- sgio_hdr_init(sd, hdr, io_u, 0);
- hdr->dxfer_direction = SG_DXFER_NONE;
- if (lba < MAX_10B_LBA)
- hdr->cmdp[0] = 0x35; // synccache(10)
- else
- hdr->cmdp[0] = 0x91; // synccache(16)
- }
-
- /*
- * for synccache, we leave lba and length to 0 to sync all
- * blocks on medium.
- */
- if (hdr->dxfer_direction != SG_DXFER_NONE) {
- if (lba < MAX_10B_LBA) {
- hdr->cmdp[2] = (unsigned char) ((lba >> 24) & 0xff);
- hdr->cmdp[3] = (unsigned char) ((lba >> 16) & 0xff);
- hdr->cmdp[4] = (unsigned char) ((lba >> 8) & 0xff);
- hdr->cmdp[5] = (unsigned char) (lba & 0xff);
- hdr->cmdp[7] = (unsigned char) ((nr_blocks >> 8) & 0xff);
- hdr->cmdp[8] = (unsigned char) (nr_blocks & 0xff);
- } else {
- hdr->cmdp[2] = (unsigned char) ((lba >> 56) & 0xff);
- hdr->cmdp[3] = (unsigned char) ((lba >> 48) & 0xff);
- hdr->cmdp[4] = (unsigned char) ((lba >> 40) & 0xff);
- hdr->cmdp[5] = (unsigned char) ((lba >> 32) & 0xff);
- hdr->cmdp[6] = (unsigned char) ((lba >> 24) & 0xff);
- hdr->cmdp[7] = (unsigned char) ((lba >> 16) & 0xff);
- hdr->cmdp[8] = (unsigned char) ((lba >> 8) & 0xff);
- hdr->cmdp[9] = (unsigned char) (lba & 0xff);
- hdr->cmdp[10] = (unsigned char) ((nr_blocks >> 32) & 0xff);
- hdr->cmdp[11] = (unsigned char) ((nr_blocks >> 16) & 0xff);
- hdr->cmdp[12] = (unsigned char) ((nr_blocks >> 8) & 0xff);
- hdr->cmdp[13] = (unsigned char) (nr_blocks & 0xff);
- }
- }
-
- hdr->timeout = SCSI_TIMEOUT_MS;
- return 0;
-}
-
-static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct sg_io_hdr *hdr = &io_u->hdr;
- int ret, do_sync = 0;
-
- fio_ro_check(td, io_u);
-
- if (td->o.sync_io || td->o.odirect || ddir_sync(io_u->ddir))
- do_sync = 1;
-
- ret = fio_sgio_doio(td, io_u, do_sync);
-
- if (ret < 0)
- io_u->error = errno;
- else if (hdr->status) {
- io_u->resid = hdr->resid;
- io_u->error = EIO;
- }
-
- if (io_u->error) {
- td_verror(td, io_u->error, "xfer");
- return FIO_Q_COMPLETED;
- }
-
- return ret;
-}
-
-static struct io_u *fio_sgio_event(struct thread_data *td, int event)
-{
- struct sgio_data *sd = td->io_ops_data;
-
- return sd->events[event];
-}
-
-static int fio_sgio_read_capacity(struct thread_data *td, unsigned int *bs,
- unsigned long long *max_lba)
-{
- /*
- * need to do read capacity operation w/o benefit of sd or
- * io_u structures, which are not initialized until later.
- */
- struct sg_io_hdr hdr;
- unsigned char cmd[16];
- unsigned char sb[64];
- unsigned char buf[32]; // read capacity return
- int ret;
- int fd = -1;
-
- struct fio_file *f = td->files[0];
-
- /* open file independent of rest of application */
- fd = open(f->file_name, O_RDONLY);
- if (fd < 0)
- return -errno;
-
- memset(&hdr, 0, sizeof(hdr));
- memset(cmd, 0, sizeof(cmd));
- memset(sb, 0, sizeof(sb));
- memset(buf, 0, sizeof(buf));
-
- /* First let's try a 10 byte read capacity. */
- hdr.interface_id = 'S';
- hdr.cmdp = cmd;
- hdr.cmd_len = 10;
- hdr.sbp = sb;
- hdr.mx_sb_len = sizeof(sb);
- hdr.timeout = SCSI_TIMEOUT_MS;
- hdr.cmdp[0] = 0x25; // Read Capacity(10)
- hdr.dxfer_direction = SG_DXFER_FROM_DEV;
- hdr.dxferp = buf;
- hdr.dxfer_len = sizeof(buf);
-
- ret = ioctl(fd, SG_IO, &hdr);
- if (ret < 0) {
- close(fd);
- return ret;
- }
-
- *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
- *max_lba = ((buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]) & MAX_10B_LBA; // for some reason max_lba is being sign extended even though unsigned.
-
- /*
- * If max lba masked by MAX_10B_LBA equals MAX_10B_LBA,
- * then need to retry with 16 byte Read Capacity command.
- */
- if (*max_lba == MAX_10B_LBA) {
- hdr.cmd_len = 16;
- hdr.cmdp[0] = 0x9e; // service action
- hdr.cmdp[1] = 0x10; // Read Capacity(16)
- hdr.cmdp[10] = (unsigned char) ((sizeof(buf) >> 24) & 0xff);
- hdr.cmdp[11] = (unsigned char) ((sizeof(buf) >> 16) & 0xff);
- hdr.cmdp[12] = (unsigned char) ((sizeof(buf) >> 8) & 0xff);
- hdr.cmdp[13] = (unsigned char) (sizeof(buf) & 0xff);
-
- hdr.dxfer_direction = SG_DXFER_FROM_DEV;
- hdr.dxferp = buf;
- hdr.dxfer_len = sizeof(buf);
-
- ret = ioctl(fd, SG_IO, &hdr);
- if (ret < 0) {
- close(fd);
- return ret;
- }
-
- /* record if an io error occurred */
- if (hdr.info & SG_INFO_CHECK)
- td_verror(td, EIO, "fio_sgio_read_capacity");
-
- *bs = (buf[8] << 24) | (buf[9] << 16) | (buf[10] << 8) | buf[11];
- *max_lba = ((unsigned long long)buf[0] << 56) |
- ((unsigned long long)buf[1] << 48) |
- ((unsigned long long)buf[2] << 40) |
- ((unsigned long long)buf[3] << 32) |
- ((unsigned long long)buf[4] << 24) |
- ((unsigned long long)buf[5] << 16) |
- ((unsigned long long)buf[6] << 8) |
- (unsigned long long)buf[7];
- }
-
- close(fd);
- return 0;
-}
-
-static void fio_sgio_cleanup(struct thread_data *td)
-{
- struct sgio_data *sd = td->io_ops_data;
-
- if (sd) {
- free(sd->events);
- free(sd->cmds);
- free(sd->fd_flags);
- free(sd->pfds);
- free(sd->sgbuf);
- free(sd);
- }
-}
-
-static int fio_sgio_init(struct thread_data *td)
-{
- struct sgio_data *sd;
-
- sd = malloc(sizeof(*sd));
- memset(sd, 0, sizeof(*sd));
- sd->cmds = malloc(td->o.iodepth * sizeof(struct sgio_cmd));
- memset(sd->cmds, 0, td->o.iodepth * sizeof(struct sgio_cmd));
- sd->events = malloc(td->o.iodepth * sizeof(struct io_u *));
- memset(sd->events, 0, td->o.iodepth * sizeof(struct io_u *));
- sd->pfds = malloc(sizeof(struct pollfd) * td->o.nr_files);
- memset(sd->pfds, 0, sizeof(struct pollfd) * td->o.nr_files);
- sd->fd_flags = malloc(sizeof(int) * td->o.nr_files);
- memset(sd->fd_flags, 0, sizeof(int) * td->o.nr_files);
- sd->sgbuf = malloc(sizeof(struct sg_io_hdr) * td->o.iodepth);
- memset(sd->sgbuf, 0, sizeof(struct sg_io_hdr) * td->o.iodepth);
- sd->type_checked = 0;
- td->io_ops_data = sd;
-
- /*
- * we want to do it, regardless of whether odirect is set or not
- */
- td->o.override_sync = 1;
- return 0;
-}
-
-static int fio_sgio_type_check(struct thread_data *td, struct fio_file *f)
-{
- struct sgio_data *sd = td->io_ops_data;
- unsigned int bs = 0;
- unsigned long long max_lba = 0;
-
- if (f->filetype == FIO_TYPE_BLOCK) {
- if (ioctl(f->fd, BLKSSZGET, &bs) < 0) {
- td_verror(td, errno, "ioctl");
- return 1;
- }
- } else if (f->filetype == FIO_TYPE_CHAR) {
- int version, ret;
-
- if (ioctl(f->fd, SG_GET_VERSION_NUM, &version) < 0) {
- td_verror(td, errno, "ioctl");
- return 1;
- }
-
- ret = fio_sgio_read_capacity(td, &bs, &max_lba);
- if (ret) {
- td_verror(td, td->error, "fio_sgio_read_capacity");
- log_err("ioengine sg unable to read capacity successfully\n");
- return 1;
- }
- } else {
- td_verror(td, EINVAL, "wrong file type");
- log_err("ioengine sg only works on block or character devices\n");
- return 1;
- }
-
- sd->bs = bs;
- // Determine size of commands needed based on max_lba
- if (max_lba >= MAX_10B_LBA) {
- dprint(FD_IO, "sgio_type_check: using 16 byte read/write "
- "commands for lba above 0x%016llx/0x%016llx\n",
- MAX_10B_LBA, max_lba);
- }
-
- if (f->filetype == FIO_TYPE_BLOCK) {
- td->io_ops->getevents = NULL;
- td->io_ops->event = NULL;
- }
- sd->type_checked = 1;
-
- return 0;
-}
-
-static int fio_sgio_open(struct thread_data *td, struct fio_file *f)
-{
- struct sgio_data *sd = td->io_ops_data;
- int ret;
-
- ret = generic_open_file(td, f);
- if (ret)
- return ret;
-
- if (sd && !sd->type_checked && fio_sgio_type_check(td, f)) {
- ret = generic_close_file(td, f);
- return 1;
- }
-
- return 0;
-}
-
-/*
- * Build an error string with details about the driver, host or scsi
- * error contained in the sg header Caller will use as necessary.
- */
-static char *fio_sgio_errdetails(struct io_u *io_u)
-{
- struct sg_io_hdr *hdr = &io_u->hdr;
-#define MAXERRDETAIL 1024
-#define MAXMSGCHUNK 128
- char *msg, msgchunk[MAXMSGCHUNK], *ret = NULL;
- int i;
-
- msg = calloc(1, MAXERRDETAIL);
-
- /*
- * can't seem to find sg_err.h, so I'll just echo the define values
- * so others can search on internet to find clearer clues of meaning.
- */
- if (hdr->info & SG_INFO_CHECK) {
- ret = msg;
- if (hdr->host_status) {
- snprintf(msgchunk, MAXMSGCHUNK, "SG Host Status: 0x%02x; ", hdr->host_status);
- strlcat(msg, msgchunk, MAXERRDETAIL);
- switch (hdr->host_status) {
- case 0x01:
- strlcat(msg, "SG_ERR_DID_NO_CONNECT", MAXERRDETAIL);
- break;
- case 0x02:
- strlcat(msg, "SG_ERR_DID_BUS_BUSY", MAXERRDETAIL);
- break;
- case 0x03:
- strlcat(msg, "SG_ERR_DID_TIME_OUT", MAXERRDETAIL);
- break;
- case 0x04:
- strlcat(msg, "SG_ERR_DID_BAD_TARGET", MAXERRDETAIL);
- break;
- case 0x05:
- strlcat(msg, "SG_ERR_DID_ABORT", MAXERRDETAIL);
- break;
- case 0x06:
- strlcat(msg, "SG_ERR_DID_PARITY", MAXERRDETAIL);
- break;
- case 0x07:
- strlcat(msg, "SG_ERR_DID_ERROR (internal error)", MAXERRDETAIL);
- break;
- case 0x08:
- strlcat(msg, "SG_ERR_DID_RESET", MAXERRDETAIL);
- break;
- case 0x09:
- strlcat(msg, "SG_ERR_DID_BAD_INTR (unexpected)", MAXERRDETAIL);
- break;
- case 0x0a:
- strlcat(msg, "SG_ERR_DID_PASSTHROUGH", MAXERRDETAIL);
- break;
- case 0x0b:
- strlcat(msg, "SG_ERR_DID_SOFT_ERROR (driver retry?)", MAXERRDETAIL);
- break;
- case 0x0c:
- strlcat(msg, "SG_ERR_DID_IMM_RETRY", MAXERRDETAIL);
- break;
- case 0x0d:
- strlcat(msg, "SG_ERR_DID_REQUEUE", MAXERRDETAIL);
- break;
- case 0x0e:
- strlcat(msg, "SG_ERR_DID_TRANSPORT_DISRUPTED", MAXERRDETAIL);
- break;
- case 0x0f:
- strlcat(msg, "SG_ERR_DID_TRANSPORT_FAILFAST", MAXERRDETAIL);
- break;
- case 0x10:
- strlcat(msg, "SG_ERR_DID_TARGET_FAILURE", MAXERRDETAIL);
- break;
- case 0x11:
- strlcat(msg, "SG_ERR_DID_NEXUS_FAILURE", MAXERRDETAIL);
- break;
- case 0x12:
- strlcat(msg, "SG_ERR_DID_ALLOC_FAILURE", MAXERRDETAIL);
- break;
- case 0x13:
- strlcat(msg, "SG_ERR_DID_MEDIUM_ERROR", MAXERRDETAIL);
- break;
- default:
- strlcat(msg, "Unknown", MAXERRDETAIL);
- break;
- }
- strlcat(msg, ". ", MAXERRDETAIL);
- }
- if (hdr->driver_status) {
- snprintf(msgchunk, MAXMSGCHUNK, "SG Driver Status: 0x%02x; ", hdr->driver_status);
- strlcat(msg, msgchunk, MAXERRDETAIL);
- switch (hdr->driver_status & 0x0F) {
- case 0x01:
- strlcat(msg, "SG_ERR_DRIVER_BUSY", MAXERRDETAIL);
- break;
- case 0x02:
- strlcat(msg, "SG_ERR_DRIVER_SOFT", MAXERRDETAIL);
- break;
- case 0x03:
- strlcat(msg, "SG_ERR_DRIVER_MEDIA", MAXERRDETAIL);
- break;
- case 0x04:
- strlcat(msg, "SG_ERR_DRIVER_ERROR", MAXERRDETAIL);
- break;
- case 0x05:
- strlcat(msg, "SG_ERR_DRIVER_INVALID", MAXERRDETAIL);
- break;
- case 0x06:
- strlcat(msg, "SG_ERR_DRIVER_TIMEOUT", MAXERRDETAIL);
- break;
- case 0x07:
- strlcat(msg, "SG_ERR_DRIVER_HARD", MAXERRDETAIL);
- break;
- case 0x08:
- strlcat(msg, "SG_ERR_DRIVER_SENSE", MAXERRDETAIL);
- break;
- default:
- strlcat(msg, "Unknown", MAXERRDETAIL);
- break;
- }
- strlcat(msg, "; ", MAXERRDETAIL);
- switch (hdr->driver_status & 0xF0) {
- case 0x10:
- strlcat(msg, "SG_ERR_SUGGEST_RETRY", MAXERRDETAIL);
- break;
- case 0x20:
- strlcat(msg, "SG_ERR_SUGGEST_ABORT", MAXERRDETAIL);
- break;
- case 0x30:
- strlcat(msg, "SG_ERR_SUGGEST_REMAP", MAXERRDETAIL);
- break;
- case 0x40:
- strlcat(msg, "SG_ERR_SUGGEST_DIE", MAXERRDETAIL);
- break;
- case 0x80:
- strlcat(msg, "SG_ERR_SUGGEST_SENSE", MAXERRDETAIL);
- break;
- }
- strlcat(msg, ". ", MAXERRDETAIL);
- }
- if (hdr->status) {
- snprintf(msgchunk, MAXMSGCHUNK, "SG SCSI Status: 0x%02x; ", hdr->status);
- strlcat(msg, msgchunk, MAXERRDETAIL);
- // SCSI 3 status codes
- switch (hdr->status) {
- case 0x02:
- strlcat(msg, "CHECK_CONDITION", MAXERRDETAIL);
- break;
- case 0x04:
- strlcat(msg, "CONDITION_MET", MAXERRDETAIL);
- break;
- case 0x08:
- strlcat(msg, "BUSY", MAXERRDETAIL);
- break;
- case 0x10:
- strlcat(msg, "INTERMEDIATE", MAXERRDETAIL);
- break;
- case 0x14:
- strlcat(msg, "INTERMEDIATE_CONDITION_MET", MAXERRDETAIL);
- break;
- case 0x18:
- strlcat(msg, "RESERVATION_CONFLICT", MAXERRDETAIL);
- break;
- case 0x22:
- strlcat(msg, "COMMAND_TERMINATED", MAXERRDETAIL);
- break;
- case 0x28:
- strlcat(msg, "TASK_SET_FULL", MAXERRDETAIL);
- break;
- case 0x30:
- strlcat(msg, "ACA_ACTIVE", MAXERRDETAIL);
- break;
- case 0x40:
- strlcat(msg, "TASK_ABORTED", MAXERRDETAIL);
- break;
- default:
- strlcat(msg, "Unknown", MAXERRDETAIL);
- break;
- }
- strlcat(msg, ". ", MAXERRDETAIL);
- }
- if (hdr->sb_len_wr) {
- snprintf(msgchunk, MAXMSGCHUNK, "Sense Data (%d bytes):", hdr->sb_len_wr);
- strlcat(msg, msgchunk, MAXERRDETAIL);
- for (i = 0; i < hdr->sb_len_wr; i++) {
- snprintf(msgchunk, MAXMSGCHUNK, " %02x", hdr->sbp[i]);
- strlcat(msg, msgchunk, MAXERRDETAIL);
- }
- strlcat(msg, ". ", MAXERRDETAIL);
- }
- if (hdr->resid != 0) {
- snprintf(msgchunk, MAXMSGCHUNK, "SG Driver: %d bytes out of %d not transferred. ", hdr->resid, hdr->dxfer_len);
- strlcat(msg, msgchunk, MAXERRDETAIL);
- ret = msg;
- }
- }
-
- if (!ret)
- ret = strdup("SG Driver did not report a Host, Driver or Device check");
-
- return ret;
-}
-
-/*
- * get max file size from read capacity.
- */
-static int fio_sgio_get_file_size(struct thread_data *td, struct fio_file *f)
-{
- /*
- * get_file_size is being called even before sgio_init is
- * called, so none of the sg_io structures are
- * initialized in the thread_data yet. So we need to do the
- * ReadCapacity without any of those helpers. One of the effects
- * is that ReadCapacity may get called 4 times on each open:
- * readcap(10) followed by readcap(16) if needed - just to get
- * the file size after the init occurs - it will be called
- * again when "type_check" is called during structure
- * initialization I'm not sure how to prevent this little
- * inefficiency.
- */
- unsigned int bs = 0;
- unsigned long long max_lba = 0;
- int ret;
-
- if (fio_file_size_known(f))
- return 0;
-
- if (f->filetype != FIO_TYPE_BLOCK && f->filetype != FIO_TYPE_CHAR) {
- td_verror(td, EINVAL, "wrong file type");
- log_err("ioengine sg only works on block or character devices\n");
- return 1;
- }
-
- ret = fio_sgio_read_capacity(td, &bs, &max_lba);
- if (ret ) {
- td_verror(td, td->error, "fio_sgio_read_capacity");
- log_err("ioengine sg unable to successfully execute read capacity to get block size and maximum lba\n");
- return 1;
- }
-
- f->real_file_size = (max_lba + 1) * bs;
- fio_file_set_size_known(f);
- return 0;
-}
-
-
-static struct ioengine_ops ioengine = {
- .name = "sg",
- .version = FIO_IOOPS_VERSION,
- .init = fio_sgio_init,
- .prep = fio_sgio_prep,
- .queue = fio_sgio_queue,
- .getevents = fio_sgio_getevents,
- .errdetails = fio_sgio_errdetails,
- .event = fio_sgio_event,
- .cleanup = fio_sgio_cleanup,
- .open_file = fio_sgio_open,
- .close_file = generic_close_file,
- .get_file_size = fio_sgio_get_file_size,
- .flags = FIO_SYNCIO | FIO_RAWIO,
-};
-
-#else /* FIO_HAVE_SGIO */
-
-/*
- * When we have a proper configure system in place, we simply wont build
- * and install this io engine. For now install a crippled version that
- * just complains and fails to load.
- */
-static int fio_sgio_init(struct thread_data fio_unused *td)
-{
- log_err("fio: ioengine sg not available\n");
- return 1;
-}
-
-static struct ioengine_ops ioengine = {
- .name = "sg",
- .version = FIO_IOOPS_VERSION,
- .init = fio_sgio_init,
-};
-
-#endif
-
-static void fio_init fio_sgio_register(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_sgio_unregister(void)
-{
- unregister_ioengine(&ioengine);
-}
diff --git a/engines/skeleton_external.c b/engines/skeleton_external.c
deleted file mode 100644
index 4bebcc45..00000000
--- a/engines/skeleton_external.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Skeleton for a sample external io engine
- *
- * Should be compiled with:
- *
- * gcc -Wall -O2 -g -shared -rdynamic -fPIC -o engine.o engine.c
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <assert.h>
-
-#include "../fio.h"
-
-/*
- * The core of the module is identical to the ones included with fio,
- * read those. You cannot use register_ioengine() and unregister_ioengine()
- * for external modules, they should be gotten through dlsym()
- */
-
-/*
- * The ->event() hook is called to match an event number with an io_u.
- * After the core has called ->getevents() and it has returned eg 3,
- * the ->event() hook must return the 3 events that have completed for
- * subsequent calls to ->event() with [0-2]. Required.
- */
-static struct io_u *fio_skeleton_event(struct thread_data *td, int event)
-{
- return NULL;
-}
-
-/*
- * The ->getevents() hook is used to reap completion events from an async
- * io engine. It returns the number of completed events since the last call,
- * which may then be retrieved by calling the ->event() hook with the event
- * numbers. Required.
- */
-static int fio_skeleton_getevents(struct thread_data *td, unsigned int min,
- unsigned int max, const struct timespec *t)
-{
- return 0;
-}
-
-/*
- * The ->cancel() hook attempts to cancel the io_u. Only relevant for
- * async io engines, and need not be supported.
- */
-static int fio_skeleton_cancel(struct thread_data *td, struct io_u *io_u)
-{
- return 0;
-}
-
-/*
- * The ->queue() hook is responsible for initiating io on the io_u
- * being passed in. If the io engine is a synchronous one, io may complete
- * before ->queue() returns. Required.
- *
- * The io engine must transfer in the direction noted by io_u->ddir
- * to the buffer pointed to by io_u->xfer_buf for as many bytes as
- * io_u->xfer_buflen. Residual data count may be set in io_u->resid
- * for a short read/write.
- */
-static int fio_skeleton_queue(struct thread_data *td, struct io_u *io_u)
-{
- /*
- * Double sanity check to catch errant write on a readonly setup
- */
- fio_ro_check(td, io_u);
-
- /*
- * Could return FIO_Q_QUEUED for a queued request,
- * FIO_Q_COMPLETED for a completed request, and FIO_Q_BUSY
- * if we could queue no more at this point (you'd have to
- * define ->commit() to handle that.
- */
- return FIO_Q_COMPLETED;
-}
-
-/*
- * The ->prep() function is called for each io_u prior to being submitted
- * with ->queue(). This hook allows the io engine to perform any
- * preparatory actions on the io_u, before being submitted. Not required.
- */
-static int fio_skeleton_prep(struct thread_data *td, struct io_u *io_u)
-{
- return 0;
-}
-
-/*
- * The init function is called once per thread/process, and should set up
- * any structures that this io engine requires to keep track of io. Not
- * required.
- */
-static int fio_skeleton_init(struct thread_data *td)
-{
- return 0;
-}
-
-/*
- * This is paired with the ->init() function and is called when a thread is
- * done doing io. Should tear down anything setup by the ->init() function.
- * Not required.
- */
-static void fio_skeleton_cleanup(struct thread_data *td)
-{
-}
-
-/*
- * Hook for opening the given file. Unless the engine has special
- * needs, it usually just provides generic_open_file() as the handler.
- */
-static int fio_skeleton_open(struct thread_data *td, struct fio_file *f)
-{
- return generic_open_file(td, f);
-}
-
-/*
- * Hook for closing a file. See fio_skeleton_open().
- */
-static int fio_skeleton_close(struct thread_data *td, struct fio_file *f)
-{
- return generic_close_file(td, f);
-}
-
-/*
- * Note that the structure is exported, so that fio can get it via
- * dlsym(..., "ioengine"); for (and only for) external engines.
- */
-struct ioengine_ops ioengine = {
- .name = "engine_name",
- .version = FIO_IOOPS_VERSION,
- .init = fio_skeleton_init,
- .prep = fio_skeleton_prep,
- .queue = fio_skeleton_queue,
- .cancel = fio_skeleton_cancel,
- .getevents = fio_skeleton_getevents,
- .event = fio_skeleton_event,
- .cleanup = fio_skeleton_cleanup,
- .open_file = fio_skeleton_open,
- .close_file = fio_skeleton_close,
-};
diff --git a/engines/solarisaio.c b/engines/solarisaio.c
deleted file mode 100644
index 151f31d4..00000000
--- a/engines/solarisaio.c
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Native Solaris async IO engine
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <signal.h>
-#include <errno.h>
-
-#include "../fio.h"
-
-#include <sys/asynch.h>
-
-struct solarisaio_data {
- struct io_u **aio_events;
- unsigned int aio_pending;
- unsigned int nr;
- unsigned int max_depth;
-};
-
-static int fio_solarisaio_cancel(struct thread_data fio_unused *td,
- struct io_u *io_u)
-{
- return aiocancel(&io_u->resultp);
-}
-
-static int fio_solarisaio_prep(struct thread_data fio_unused *td,
- struct io_u *io_u)
-{
- struct solarisaio_data *sd = td->io_ops_data;
-
- io_u->resultp.aio_return = AIO_INPROGRESS;
- io_u->engine_data = sd;
- return 0;
-}
-
-static void wait_for_event(struct timeval *tv)
-{
- struct solarisaio_data *sd;
- struct io_u *io_u;
- aio_result_t *res;
-
- res = aiowait(tv);
- if (res == (aio_result_t *) -1) {
- int err = errno;
-
- if (err != EINVAL) {
- log_err("fio: solarisaio got %d in aiowait\n", err);
- exit(err);
- }
- return;
- } else if (!res)
- return;
-
- io_u = container_of(res, struct io_u, resultp);
- sd = io_u->engine_data;
-
- if (io_u->resultp.aio_return >= 0) {
- io_u->resid = io_u->xfer_buflen - io_u->resultp.aio_return;
- io_u->error = 0;
- } else
- io_u->error = io_u->resultp.aio_errno;
-
- /*
- * For SIGIO, we need a write barrier between the two, so that
- * the ->aio_pending store is seen after the ->aio_events store
- */
- sd->aio_events[sd->aio_pending] = io_u;
- write_barrier();
- sd->aio_pending++;
- sd->nr--;
-}
-
-static int fio_solarisaio_getevents(struct thread_data *td, unsigned int min,
- unsigned int max, const struct timespec *t)
-{
- struct solarisaio_data *sd = td->io_ops_data;
- struct timeval tv;
- int ret;
-
- if (!min || !t) {
- tv.tv_sec = 0;
- tv.tv_usec = 0;
- } else {
- tv.tv_sec = t->tv_sec;
- tv.tv_usec = t->tv_nsec / 1000;
- }
-
- while (sd->aio_pending < min)
- wait_for_event(&tv);
-
- /*
- * should be OK without locking, as int operations should be atomic
- */
- ret = sd->aio_pending;
- sd->aio_pending -= ret;
- return ret;
-}
-
-static struct io_u *fio_solarisaio_event(struct thread_data *td, int event)
-{
- struct solarisaio_data *sd = td->io_ops_data;
-
- return sd->aio_events[event];
-}
-
-static int fio_solarisaio_queue(struct thread_data fio_unused *td,
- struct io_u *io_u)
-{
- struct solarisaio_data *sd = td->io_ops_data;
- struct fio_file *f = io_u->file;
- off_t off;
- int ret;
-
- fio_ro_check(td, io_u);
-
- if (io_u->ddir == DDIR_SYNC) {
- if (sd->nr)
- return FIO_Q_BUSY;
- if (fsync(f->fd) < 0)
- io_u->error = errno;
-
- return FIO_Q_COMPLETED;
- }
-
- if (io_u->ddir == DDIR_DATASYNC) {
- if (sd->nr)
- return FIO_Q_BUSY;
- if (fdatasync(f->fd) < 0)
- io_u->error = errno;
-
- return FIO_Q_COMPLETED;
- }
-
- if (sd->nr == sd->max_depth)
- return FIO_Q_BUSY;
-
- off = io_u->offset;
- if (io_u->ddir == DDIR_READ)
- ret = aioread(f->fd, io_u->xfer_buf, io_u->xfer_buflen, off,
- SEEK_SET, &io_u->resultp);
- else
- ret = aiowrite(f->fd, io_u->xfer_buf, io_u->xfer_buflen, off,
- SEEK_SET, &io_u->resultp);
- if (ret) {
- io_u->error = errno;
- td_verror(td, io_u->error, "xfer");
- return FIO_Q_COMPLETED;
- }
-
- sd->nr++;
- return FIO_Q_QUEUED;
-}
-
-static void fio_solarisaio_cleanup(struct thread_data *td)
-{
- struct solarisaio_data *sd = td->io_ops_data;
-
- if (sd) {
- free(sd->aio_events);
- free(sd);
- }
-}
-
-/*
- * Set USE_SIGNAL_COMPLETIONS to use SIGIO as completion events.
- */
-#ifdef USE_SIGNAL_COMPLETIONS
-static void fio_solarisaio_sigio(int sig)
-{
- wait_for_event(NULL);
-}
-
-static void fio_solarisaio_init_sigio(void)
-{
- struct sigaction act;
-
- memset(&act, 0, sizeof(act));
- act.sa_handler = fio_solarisaio_sigio;
- act.sa_flags = SA_RESTART;
- sigaction(SIGIO, &act, NULL);
-}
-#endif
-
-static int fio_solarisaio_init(struct thread_data *td)
-{
- struct solarisaio_data *sd = malloc(sizeof(*sd));
- unsigned int max_depth;
-
- max_depth = td->o.iodepth;
- if (max_depth > MAXASYNCHIO) {
- max_depth = MAXASYNCHIO;
- log_info("fio: lower depth to %d due to OS constraints\n",
- max_depth);
- }
-
- memset(sd, 0, sizeof(*sd));
- sd->aio_events = malloc(max_depth * sizeof(struct io_u *));
- memset(sd->aio_events, 0, max_depth * sizeof(struct io_u *));
- sd->max_depth = max_depth;
-
-#ifdef USE_SIGNAL_COMPLETIONS
- fio_solarisaio_init_sigio();
-#endif
-
- td->io_ops_data = sd;
- return 0;
-}
-
-static struct ioengine_ops ioengine = {
- .name = "solarisaio",
- .version = FIO_IOOPS_VERSION,
- .init = fio_solarisaio_init,
- .prep = fio_solarisaio_prep,
- .queue = fio_solarisaio_queue,
- .cancel = fio_solarisaio_cancel,
- .getevents = fio_solarisaio_getevents,
- .event = fio_solarisaio_event,
- .cleanup = fio_solarisaio_cleanup,
- .open_file = generic_open_file,
- .close_file = generic_close_file,
- .get_file_size = generic_get_file_size,
-};
-
-static void fio_init fio_solarisaio_register(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_solarisaio_unregister(void)
-{
- unregister_ioengine(&ioengine);
-}
diff --git a/engines/splice.c b/engines/splice.c
deleted file mode 100644
index eba093e8..00000000
--- a/engines/splice.c
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
- * splice engine
- *
- * IO engine that transfers data by doing splices to/from pipes and
- * the files.
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <assert.h>
-#include <sys/poll.h>
-#include <sys/mman.h>
-
-#include "../fio.h"
-
-struct spliceio_data {
- int pipe[2];
- int vmsplice_to_user;
- int vmsplice_to_user_map;
-};
-
-/*
- * vmsplice didn't use to support splicing to user space, this is the old
- * variant of getting that job done. Doesn't make a lot of sense, but it
- * uses splices to move data from the source into a pipe.
- */
-static int fio_splice_read_old(struct thread_data *td, struct io_u *io_u)
-{
- struct spliceio_data *sd = td->io_ops_data;
- struct fio_file *f = io_u->file;
- int ret, ret2, buflen;
- off_t offset;
- void *p;
-
- offset = io_u->offset;
- buflen = io_u->xfer_buflen;
- p = io_u->xfer_buf;
- while (buflen) {
- int this_len = buflen;
-
- if (this_len > SPLICE_DEF_SIZE)
- this_len = SPLICE_DEF_SIZE;
-
- ret = splice(f->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE);
- if (ret < 0) {
- if (errno == ENODATA || errno == EAGAIN)
- continue;
-
- return -errno;
- }
-
- buflen -= ret;
-
- while (ret) {
- ret2 = read(sd->pipe[0], p, ret);
- if (ret2 < 0)
- return -errno;
-
- ret -= ret2;
- p += ret2;
- }
- }
-
- return io_u->xfer_buflen;
-}
-
-/*
- * We can now vmsplice into userspace, so do the transfer by splicing into
- * a pipe and vmsplicing that into userspace.
- */
-static int fio_splice_read(struct thread_data *td, struct io_u *io_u)
-{
- struct spliceio_data *sd = td->io_ops_data;
- struct fio_file *f = io_u->file;
- struct iovec iov;
- int ret , buflen, mmap_len;
- off_t offset;
- void *p, *map;
-
- ret = 0;
- offset = io_u->offset;
- mmap_len = buflen = io_u->xfer_buflen;
-
- if (sd->vmsplice_to_user_map) {
- map = mmap(io_u->xfer_buf, buflen, PROT_READ, MAP_PRIVATE|OS_MAP_ANON, 0, 0);
- if (map == MAP_FAILED) {
- td_verror(td, errno, "mmap io_u");
- return -1;
- }
-
- p = map;
- } else {
- map = NULL;
- p = io_u->xfer_buf;
- }
-
- while (buflen) {
- int this_len = buflen;
- int flags = 0;
-
- if (this_len > SPLICE_DEF_SIZE) {
- this_len = SPLICE_DEF_SIZE;
- flags = SPLICE_F_MORE;
- }
-
- ret = splice(f->fd, &offset, sd->pipe[1], NULL, this_len,flags);
- if (ret < 0) {
- if (errno == ENODATA || errno == EAGAIN)
- continue;
-
- td_verror(td, errno, "splice-from-fd");
- break;
- }
-
- buflen -= ret;
- iov.iov_base = p;
- iov.iov_len = ret;
-
- while (iov.iov_len) {
- ret = vmsplice(sd->pipe[0], &iov, 1, SPLICE_F_MOVE);
- if (ret < 0) {
- if (errno == EFAULT &&
- sd->vmsplice_to_user_map) {
- sd->vmsplice_to_user_map = 0;
- munmap(map, mmap_len);
- map = NULL;
- p = io_u->xfer_buf;
- iov.iov_base = p;
- continue;
- }
- if (errno == EBADF) {
- ret = -EBADF;
- break;
- }
- td_verror(td, errno, "vmsplice");
- break;
- } else if (!ret) {
- td_verror(td, ENODATA, "vmsplice");
- ret = -1;
- break;
- }
-
- iov.iov_len -= ret;
- iov.iov_base += ret;
- p += ret;
- }
- if (ret < 0)
- break;
- }
-
- if (sd->vmsplice_to_user_map && munmap(map, mmap_len) < 0) {
- td_verror(td, errno, "munnap io_u");
- return -1;
- }
- if (ret < 0)
- return ret;
-
- return io_u->xfer_buflen;
-}
-
-/*
- * For splice writing, we can vmsplice our data buffer directly into a
- * pipe and then splice that to a file.
- */
-static int fio_splice_write(struct thread_data *td, struct io_u *io_u)
-{
- struct spliceio_data *sd = td->io_ops_data;
- struct iovec iov = {
- .iov_base = io_u->xfer_buf,
- .iov_len = io_u->xfer_buflen,
- };
- struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, };
- struct fio_file *f = io_u->file;
- off_t off = io_u->offset;
- int ret, ret2;
-
- while (iov.iov_len) {
- if (poll(&pfd, 1, -1) < 0)
- return errno;
-
- ret = vmsplice(sd->pipe[1], &iov, 1, SPLICE_F_NONBLOCK);
- if (ret < 0)
- return -errno;
-
- iov.iov_len -= ret;
- iov.iov_base += ret;
-
- while (ret) {
- ret2 = splice(sd->pipe[0], NULL, f->fd, &off, ret, 0);
- if (ret2 < 0)
- return -errno;
-
- ret -= ret2;
- }
- }
-
- return io_u->xfer_buflen;
-}
-
-static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct spliceio_data *sd = td->io_ops_data;
- int ret = 0;
-
- fio_ro_check(td, io_u);
-
- if (io_u->ddir == DDIR_READ) {
- if (sd->vmsplice_to_user) {
- ret = fio_splice_read(td, io_u);
- /*
- * This kernel doesn't support vmsplice to user
- * space. Reset the vmsplice_to_user flag, so that
- * we retry below and don't hit this path again.
- */
- if (ret == -EBADF)
- sd->vmsplice_to_user = 0;
- }
- if (!sd->vmsplice_to_user)
- ret = fio_splice_read_old(td, io_u);
- } else if (io_u->ddir == DDIR_WRITE)
- ret = fio_splice_write(td, io_u);
- else if (io_u->ddir == DDIR_TRIM)
- ret = do_io_u_trim(td, io_u);
- else
- ret = do_io_u_sync(td, io_u);
-
- if (ret != (int) io_u->xfer_buflen) {
- if (ret >= 0) {
- io_u->resid = io_u->xfer_buflen - ret;
- io_u->error = 0;
- return FIO_Q_COMPLETED;
- } else
- io_u->error = errno;
- }
-
- if (io_u->error) {
- td_verror(td, io_u->error, "xfer");
- if (io_u->error == EINVAL)
- log_err("fio: looks like splice doesn't work on this"
- " file system\n");
- }
-
- return FIO_Q_COMPLETED;
-}
-
-static void fio_spliceio_cleanup(struct thread_data *td)
-{
- struct spliceio_data *sd = td->io_ops_data;
-
- if (sd) {
- close(sd->pipe[0]);
- close(sd->pipe[1]);
- free(sd);
- }
-}
-
-static int fio_spliceio_init(struct thread_data *td)
-{
- struct spliceio_data *sd = malloc(sizeof(*sd));
-
- if (pipe(sd->pipe) < 0) {
- td_verror(td, errno, "pipe");
- free(sd);
- return 1;
- }
-
- /*
- * Assume this work, we'll reset this if it doesn't
- */
- sd->vmsplice_to_user = 1;
-
- /*
- * Works with "real" vmsplice to user, eg mapping pages directly.
- * Reset if we fail.
- */
- sd->vmsplice_to_user_map = 1;
-
- /*
- * And if vmsplice_to_user works, we definitely need aligned
- * buffers. Just set ->odirect to force that.
- */
- if (td_read(td))
- td->o.mem_align = 1;
-
- td->io_ops_data = sd;
- return 0;
-}
-
-static struct ioengine_ops ioengine = {
- .name = "splice",
- .version = FIO_IOOPS_VERSION,
- .init = fio_spliceio_init,
- .queue = fio_spliceio_queue,
- .cleanup = fio_spliceio_cleanup,
- .open_file = generic_open_file,
- .close_file = generic_close_file,
- .get_file_size = generic_get_file_size,
- .flags = FIO_SYNCIO | FIO_PIPEIO,
-};
-
-static void fio_init fio_spliceio_register(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_spliceio_unregister(void)
-{
- unregister_ioengine(&ioengine);
-}
diff --git a/engines/sync.c b/engines/sync.c
deleted file mode 100644
index e76bbbb4..00000000
--- a/engines/sync.c
+++ /dev/null
@@ -1,472 +0,0 @@
-/*
- * sync/psync engine
- *
- * IO engine that does regular read(2)/write(2) with lseek(2) to transfer
- * data and IO engine that does regular pread(2)/pwrite(2) to transfer data.
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/uio.h>
-#include <errno.h>
-#include <assert.h>
-
-#include "../fio.h"
-#include "../optgroup.h"
-
-/*
- * Sync engine uses engine_data to store last offset
- */
-#define LAST_POS(f) ((f)->engine_pos)
-
-struct syncio_data {
- struct iovec *iovecs;
- struct io_u **io_us;
- unsigned int queued;
- unsigned int events;
- unsigned long queued_bytes;
-
- unsigned long long last_offset;
- struct fio_file *last_file;
- enum fio_ddir last_ddir;
-};
-
-#ifdef FIO_HAVE_PWRITEV2
-struct psyncv2_options {
- void *pad;
- unsigned int hipri;
-};
-
-static struct fio_option options[] = {
- {
- .name = "hipri",
- .lname = "RWF_HIPRI",
- .type = FIO_OPT_STR_SET,
- .off1 = offsetof(struct psyncv2_options, hipri),
- .help = "Set RWF_HIPRI for pwritev2/preadv2",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_INVALID,
- },
- {
- .name = NULL,
- },
-};
-#endif
-
-static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_file *f = io_u->file;
-
- if (!ddir_rw(io_u->ddir))
- return 0;
-
- if (LAST_POS(f) != -1ULL && LAST_POS(f) == io_u->offset)
- return 0;
-
- if (lseek(f->fd, io_u->offset, SEEK_SET) == -1) {
- td_verror(td, errno, "lseek");
- return 1;
- }
-
- return 0;
-}
-
-static int fio_io_end(struct thread_data *td, struct io_u *io_u, int ret)
-{
- if (io_u->file && ret >= 0 && ddir_rw(io_u->ddir))
- LAST_POS(io_u->file) = io_u->offset + ret;
-
- if (ret != (int) io_u->xfer_buflen) {
- if (ret >= 0) {
- io_u->resid = io_u->xfer_buflen - ret;
- io_u->error = 0;
- return FIO_Q_COMPLETED;
- } else
- io_u->error = errno;
- }
-
- if (io_u->error) {
- io_u_log_error(td, io_u);
- td_verror(td, io_u->error, "xfer");
- }
-
- return FIO_Q_COMPLETED;
-}
-
-#ifdef CONFIG_PWRITEV
-static int fio_pvsyncio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct syncio_data *sd = td->io_ops_data;
- struct iovec *iov = &sd->iovecs[0];
- struct fio_file *f = io_u->file;
- int ret;
-
- fio_ro_check(td, io_u);
-
- iov->iov_base = io_u->xfer_buf;
- iov->iov_len = io_u->xfer_buflen;
-
- if (io_u->ddir == DDIR_READ)
- ret = preadv(f->fd, iov, 1, io_u->offset);
- else if (io_u->ddir == DDIR_WRITE)
- ret = pwritev(f->fd, iov, 1, io_u->offset);
- else if (io_u->ddir == DDIR_TRIM) {
- do_io_u_trim(td, io_u);
- return FIO_Q_COMPLETED;
- } else
- ret = do_io_u_sync(td, io_u);
-
- return fio_io_end(td, io_u, ret);
-}
-#endif
-
-#ifdef FIO_HAVE_PWRITEV2
-static int fio_pvsyncio2_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct syncio_data *sd = td->io_ops_data;
- struct psyncv2_options *o = td->eo;
- struct iovec *iov = &sd->iovecs[0];
- struct fio_file *f = io_u->file;
- int ret, flags = 0;
-
- fio_ro_check(td, io_u);
-
- if (o->hipri)
- flags |= RWF_HIPRI;
-
- iov->iov_base = io_u->xfer_buf;
- iov->iov_len = io_u->xfer_buflen;
-
- if (io_u->ddir == DDIR_READ)
- ret = preadv2(f->fd, iov, 1, io_u->offset, flags);
- else if (io_u->ddir == DDIR_WRITE)
- ret = pwritev2(f->fd, iov, 1, io_u->offset, flags);
- else if (io_u->ddir == DDIR_TRIM) {
- do_io_u_trim(td, io_u);
- return FIO_Q_COMPLETED;
- } else
- ret = do_io_u_sync(td, io_u);
-
- return fio_io_end(td, io_u, ret);
-}
-#endif
-
-
-static int fio_psyncio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_file *f = io_u->file;
- int ret;
-
- fio_ro_check(td, io_u);
-
- if (io_u->ddir == DDIR_READ)
- ret = pread(f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
- else if (io_u->ddir == DDIR_WRITE)
- ret = pwrite(f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
- else if (io_u->ddir == DDIR_TRIM) {
- do_io_u_trim(td, io_u);
- return FIO_Q_COMPLETED;
- } else
- ret = do_io_u_sync(td, io_u);
-
- return fio_io_end(td, io_u, ret);
-}
-
-static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_file *f = io_u->file;
- int ret;
-
- fio_ro_check(td, io_u);
-
- if (io_u->ddir == DDIR_READ)
- ret = read(f->fd, io_u->xfer_buf, io_u->xfer_buflen);
- else if (io_u->ddir == DDIR_WRITE)
- ret = write(f->fd, io_u->xfer_buf, io_u->xfer_buflen);
- else if (io_u->ddir == DDIR_TRIM) {
- do_io_u_trim(td, io_u);
- return FIO_Q_COMPLETED;
- } else
- ret = do_io_u_sync(td, io_u);
-
- return fio_io_end(td, io_u, ret);
-}
-
-static int fio_vsyncio_getevents(struct thread_data *td, unsigned int min,
- unsigned int max,
- const struct timespec fio_unused *t)
-{
- struct syncio_data *sd = td->io_ops_data;
- int ret;
-
- if (min) {
- ret = sd->events;
- sd->events = 0;
- } else
- ret = 0;
-
- dprint(FD_IO, "vsyncio_getevents: min=%d,max=%d: %d\n", min, max, ret);
- return ret;
-}
-
-static struct io_u *fio_vsyncio_event(struct thread_data *td, int event)
-{
- struct syncio_data *sd = td->io_ops_data;
-
- return sd->io_us[event];
-}
-
-static int fio_vsyncio_append(struct thread_data *td, struct io_u *io_u)
-{
- struct syncio_data *sd = td->io_ops_data;
-
- if (ddir_sync(io_u->ddir))
- return 0;
-
- if (io_u->offset == sd->last_offset && io_u->file == sd->last_file &&
- io_u->ddir == sd->last_ddir)
- return 1;
-
- return 0;
-}
-
-static void fio_vsyncio_set_iov(struct syncio_data *sd, struct io_u *io_u,
- int idx)
-{
- sd->io_us[idx] = io_u;
- sd->iovecs[idx].iov_base = io_u->xfer_buf;
- sd->iovecs[idx].iov_len = io_u->xfer_buflen;
- sd->last_offset = io_u->offset + io_u->xfer_buflen;
- sd->last_file = io_u->file;
- sd->last_ddir = io_u->ddir;
- sd->queued_bytes += io_u->xfer_buflen;
- sd->queued++;
-}
-
-static int fio_vsyncio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct syncio_data *sd = td->io_ops_data;
-
- fio_ro_check(td, io_u);
-
- if (!fio_vsyncio_append(td, io_u)) {
- dprint(FD_IO, "vsyncio_queue: no append (%d)\n", sd->queued);
- /*
- * If we can't append and have stuff queued, tell fio to
- * commit those first and then retry this io
- */
- if (sd->queued)
- return FIO_Q_BUSY;
- if (ddir_sync(io_u->ddir)) {
- int ret = do_io_u_sync(td, io_u);
-
- return fio_io_end(td, io_u, ret);
- }
-
- sd->queued = 0;
- sd->queued_bytes = 0;
- fio_vsyncio_set_iov(sd, io_u, 0);
- } else {
- if (sd->queued == td->o.iodepth) {
- dprint(FD_IO, "vsyncio_queue: max depth %d\n", sd->queued);
- return FIO_Q_BUSY;
- }
-
- dprint(FD_IO, "vsyncio_queue: append\n");
- fio_vsyncio_set_iov(sd, io_u, sd->queued);
- }
-
- dprint(FD_IO, "vsyncio_queue: depth now %d\n", sd->queued);
- return FIO_Q_QUEUED;
-}
-
-/*
- * Check that we transferred all bytes, or saw an error, etc
- */
-static int fio_vsyncio_end(struct thread_data *td, ssize_t bytes)
-{
- struct syncio_data *sd = td->io_ops_data;
- struct io_u *io_u;
- unsigned int i;
- int err;
-
- /*
- * transferred everything, perfect
- */
- if (bytes == sd->queued_bytes)
- return 0;
-
- err = errno;
- for (i = 0; i < sd->queued; i++) {
- io_u = sd->io_us[i];
-
- if (bytes == -1) {
- io_u->error = err;
- } else {
- unsigned int this_io;
-
- this_io = bytes;
- if (this_io > io_u->xfer_buflen)
- this_io = io_u->xfer_buflen;
-
- io_u->resid = io_u->xfer_buflen - this_io;
- io_u->error = 0;
- bytes -= this_io;
- }
- }
-
- if (bytes == -1) {
- td_verror(td, err, "xfer vsync");
- return -err;
- }
-
- return 0;
-}
-
-static int fio_vsyncio_commit(struct thread_data *td)
-{
- struct syncio_data *sd = td->io_ops_data;
- struct fio_file *f;
- ssize_t ret;
-
- if (!sd->queued)
- return 0;
-
- io_u_mark_submit(td, sd->queued);
- f = sd->last_file;
-
- if (lseek(f->fd, sd->io_us[0]->offset, SEEK_SET) == -1) {
- int err = -errno;
-
- td_verror(td, errno, "lseek");
- return err;
- }
-
- if (sd->last_ddir == DDIR_READ)
- ret = readv(f->fd, sd->iovecs, sd->queued);
- else
- ret = writev(f->fd, sd->iovecs, sd->queued);
-
- dprint(FD_IO, "vsyncio_commit: %d\n", (int) ret);
- sd->events = sd->queued;
- sd->queued = 0;
- return fio_vsyncio_end(td, ret);
-}
-
-static int fio_vsyncio_init(struct thread_data *td)
-{
- struct syncio_data *sd;
-
- sd = malloc(sizeof(*sd));
- memset(sd, 0, sizeof(*sd));
- sd->last_offset = -1ULL;
- sd->iovecs = malloc(td->o.iodepth * sizeof(struct iovec));
- sd->io_us = malloc(td->o.iodepth * sizeof(struct io_u *));
-
- td->io_ops_data = sd;
- return 0;
-}
-
-static void fio_vsyncio_cleanup(struct thread_data *td)
-{
- struct syncio_data *sd = td->io_ops_data;
-
- if (sd) {
- free(sd->iovecs);
- free(sd->io_us);
- free(sd);
- }
-}
-
-static struct ioengine_ops ioengine_rw = {
- .name = "sync",
- .version = FIO_IOOPS_VERSION,
- .prep = fio_syncio_prep,
- .queue = fio_syncio_queue,
- .open_file = generic_open_file,
- .close_file = generic_close_file,
- .get_file_size = generic_get_file_size,
- .flags = FIO_SYNCIO,
-};
-
-static struct ioengine_ops ioengine_prw = {
- .name = "psync",
- .version = FIO_IOOPS_VERSION,
- .queue = fio_psyncio_queue,
- .open_file = generic_open_file,
- .close_file = generic_close_file,
- .get_file_size = generic_get_file_size,
- .flags = FIO_SYNCIO,
-};
-
-static struct ioengine_ops ioengine_vrw = {
- .name = "vsync",
- .version = FIO_IOOPS_VERSION,
- .init = fio_vsyncio_init,
- .cleanup = fio_vsyncio_cleanup,
- .queue = fio_vsyncio_queue,
- .commit = fio_vsyncio_commit,
- .event = fio_vsyncio_event,
- .getevents = fio_vsyncio_getevents,
- .open_file = generic_open_file,
- .close_file = generic_close_file,
- .get_file_size = generic_get_file_size,
- .flags = FIO_SYNCIO,
-};
-
-#ifdef CONFIG_PWRITEV
-static struct ioengine_ops ioengine_pvrw = {
- .name = "pvsync",
- .version = FIO_IOOPS_VERSION,
- .init = fio_vsyncio_init,
- .cleanup = fio_vsyncio_cleanup,
- .queue = fio_pvsyncio_queue,
- .open_file = generic_open_file,
- .close_file = generic_close_file,
- .get_file_size = generic_get_file_size,
- .flags = FIO_SYNCIO,
-};
-#endif
-
-#ifdef FIO_HAVE_PWRITEV2
-static struct ioengine_ops ioengine_pvrw2 = {
- .name = "pvsync2",
- .version = FIO_IOOPS_VERSION,
- .init = fio_vsyncio_init,
- .cleanup = fio_vsyncio_cleanup,
- .queue = fio_pvsyncio2_queue,
- .open_file = generic_open_file,
- .close_file = generic_close_file,
- .get_file_size = generic_get_file_size,
- .flags = FIO_SYNCIO,
- .options = options,
- .option_struct_size = sizeof(struct psyncv2_options),
-};
-#endif
-
-static void fio_init fio_syncio_register(void)
-{
- register_ioengine(&ioengine_rw);
- register_ioengine(&ioengine_prw);
- register_ioengine(&ioengine_vrw);
-#ifdef CONFIG_PWRITEV
- register_ioengine(&ioengine_pvrw);
-#endif
-#ifdef FIO_HAVE_PWRITEV2
- register_ioengine(&ioengine_pvrw2);
-#endif
-}
-
-static void fio_exit fio_syncio_unregister(void)
-{
- unregister_ioengine(&ioengine_rw);
- unregister_ioengine(&ioengine_prw);
- unregister_ioengine(&ioengine_vrw);
-#ifdef CONFIG_PWRITEV
- unregister_ioengine(&ioengine_pvrw);
-#endif
-#ifdef FIO_HAVE_PWRITEV2
- unregister_ioengine(&ioengine_pvrw2);
-#endif
-}
diff --git a/engines/windowsaio.c b/engines/windowsaio.c
deleted file mode 100644
index f5cb0483..00000000
--- a/engines/windowsaio.c
+++ /dev/null
@@ -1,449 +0,0 @@
-/*
- * windowsaio engine
- *
- * IO engine using Windows IO Completion Ports.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <signal.h>
-#include <errno.h>
-
-#include "../fio.h"
-
-typedef BOOL (WINAPI *CANCELIOEX)(HANDLE hFile, LPOVERLAPPED lpOverlapped);
-
-int geterrno_from_win_error (DWORD code, int deferrno);
-
-struct fio_overlapped {
- OVERLAPPED o;
- struct io_u *io_u;
- BOOL io_complete;
-};
-
-struct windowsaio_data {
- struct io_u **aio_events;
- HANDLE iocp;
- HANDLE iothread;
- HANDLE iocomplete_event;
- BOOL iothread_running;
-};
-
-struct thread_ctx {
- HANDLE iocp;
- struct windowsaio_data *wd;
-};
-
-static BOOL timeout_expired(DWORD start_count, DWORD end_count);
-static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min,
- unsigned int max, const struct timespec *t);
-static struct io_u *fio_windowsaio_event(struct thread_data *td, int event);
-static int fio_windowsaio_queue(struct thread_data *td,
- struct io_u *io_u);
-static void fio_windowsaio_cleanup(struct thread_data *td);
-static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter);
-static int fio_windowsaio_init(struct thread_data *td);
-static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f);
-static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f);
-
-static int fio_windowsaio_init(struct thread_data *td)
-{
- struct windowsaio_data *wd;
- int rc = 0;
-
- wd = calloc(1, sizeof(struct windowsaio_data));
- if (wd == NULL) {
- log_err("windowsaio: failed to allocate memory for engine data\n");
- rc = 1;
- }
-
- if (!rc) {
- wd->aio_events = malloc(td->o.iodepth * sizeof(struct io_u*));
- if (wd->aio_events == NULL) {
- log_err("windowsaio: failed to allocate memory for aio events list\n");
- rc = 1;
- }
- }
-
- if (!rc) {
- /* Create an auto-reset event */
- wd->iocomplete_event = CreateEvent(NULL, FALSE, FALSE, NULL);
- if (wd->iocomplete_event == NULL) {
- log_err("windowsaio: failed to create io complete event handle\n");
- rc = 1;
- }
- }
-
- if (rc) {
- if (wd != NULL) {
- if (wd->aio_events != NULL)
- free(wd->aio_events);
-
- free(wd);
- }
- }
-
- td->io_ops_data = wd;
-
- if (!rc) {
- struct thread_ctx *ctx;
- struct windowsaio_data *wd;
- HANDLE hFile;
-
- hFile = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
- if (hFile == INVALID_HANDLE_VALUE) {
- log_err("windowsaio: failed to create io completion port\n");
- rc = 1;
- }
-
- wd = td->io_ops_data;
- wd->iothread_running = TRUE;
- wd->iocp = hFile;
-
- if (!rc)
- ctx = malloc(sizeof(struct thread_ctx));
-
- if (!rc && ctx == NULL)
- {
- log_err("windowsaio: failed to allocate memory for thread context structure\n");
- CloseHandle(hFile);
- rc = 1;
- }
-
- if (!rc)
- {
- DWORD threadid;
-
- ctx->iocp = hFile;
- ctx->wd = wd;
- wd->iothread = CreateThread(NULL, 0, IoCompletionRoutine, ctx, 0, &threadid);
-
- if (wd->iothread != NULL)
- fio_setaffinity(threadid, td->o.cpumask);
- else
- log_err("windowsaio: failed to create io completion thread\n");
- }
-
- if (rc || wd->iothread == NULL)
- rc = 1;
- }
-
- return rc;
-}
-
-static void fio_windowsaio_cleanup(struct thread_data *td)
-{
- struct windowsaio_data *wd;
-
- wd = td->io_ops_data;
-
- if (wd != NULL) {
- wd->iothread_running = FALSE;
- WaitForSingleObject(wd->iothread, INFINITE);
-
- CloseHandle(wd->iothread);
- CloseHandle(wd->iocomplete_event);
-
- free(wd->aio_events);
- free(wd);
-
- td->io_ops_data = NULL;
- }
-}
-
-
-static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f)
-{
- int rc = 0;
- DWORD flags = FILE_FLAG_POSIX_SEMANTICS | FILE_FLAG_OVERLAPPED;
- DWORD sharemode = FILE_SHARE_READ | FILE_SHARE_WRITE;
- DWORD openmode = OPEN_ALWAYS;
- DWORD access;
-
- dprint(FD_FILE, "fd open %s\n", f->file_name);
-
- if (f->filetype == FIO_TYPE_PIPE) {
- log_err("windowsaio: pipes are not supported\n");
- return 1;
- }
-
- if (!strcmp(f->file_name, "-")) {
- log_err("windowsaio: can't read/write to stdin/out\n");
- return 1;
- }
-
- if (td->o.odirect)
- flags |= FILE_FLAG_NO_BUFFERING;
- if (td->o.sync_io)
- flags |= FILE_FLAG_WRITE_THROUGH;
-
- /*
- * Inform Windows whether we're going to be doing sequential or
- * random io so it can tune the Cache Manager
- */
- if (td->o.td_ddir == TD_DDIR_READ ||
- td->o.td_ddir == TD_DDIR_WRITE)
- flags |= FILE_FLAG_SEQUENTIAL_SCAN;
- else
- flags |= FILE_FLAG_RANDOM_ACCESS;
-
- if (!td_write(td) || read_only)
- access = GENERIC_READ;
- else
- access = (GENERIC_READ | GENERIC_WRITE);
-
- if (td->o.create_on_open)
- openmode = OPEN_ALWAYS;
- else
- openmode = OPEN_EXISTING;
-
- f->hFile = CreateFile(f->file_name, access, sharemode,
- NULL, openmode, flags, NULL);
-
- if (f->hFile == INVALID_HANDLE_VALUE) {
- log_err("windowsaio: failed to open file \"%s\"\n", f->file_name);
- rc = 1;
- }
-
- /* Only set up the completion port and thread if we're not just
- * querying the device size */
- if (!rc && td->io_ops_data != NULL) {
- struct windowsaio_data *wd;
-
- wd = td->io_ops_data;
-
- if (CreateIoCompletionPort(f->hFile, wd->iocp, 0, 0) == NULL) {
- log_err("windowsaio: failed to create io completion port\n");
- rc = 1;
- }
- }
-
- return rc;
-}
-
-static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f)
-{
- int rc = 0;
-
- dprint(FD_FILE, "fd close %s\n", f->file_name);
-
- if (f->hFile != INVALID_HANDLE_VALUE) {
- if (!CloseHandle(f->hFile)) {
- log_info("windowsaio: failed to close file handle for \"%s\"\n", f->file_name);
- rc = 1;
- }
- }
-
- f->hFile = INVALID_HANDLE_VALUE;
- return rc;
-}
-
-static BOOL timeout_expired(DWORD start_count, DWORD end_count)
-{
- BOOL expired = FALSE;
- DWORD current_time;
-
- current_time = GetTickCount();
-
- if ((end_count > start_count) && current_time >= end_count)
- expired = TRUE;
- else if (current_time < start_count && current_time > end_count)
- expired = TRUE;
-
- return expired;
-}
-
-static struct io_u* fio_windowsaio_event(struct thread_data *td, int event)
-{
- struct windowsaio_data *wd = td->io_ops_data;
- return wd->aio_events[event];
-}
-
-static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min,
- unsigned int max,
- const struct timespec *t)
-{
- struct windowsaio_data *wd = td->io_ops_data;
- unsigned int dequeued = 0;
- struct io_u *io_u;
- int i;
- struct fio_overlapped *fov;
- DWORD start_count = 0;
- DWORD end_count = 0;
- DWORD status;
- DWORD mswait = 250;
-
- if (t != NULL) {
- mswait = (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
- start_count = GetTickCount();
- end_count = start_count + (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
- }
-
- do {
- io_u_qiter(&td->io_u_all, io_u, i) {
- if (!(io_u->flags & IO_U_F_FLIGHT))
- continue;
-
- fov = (struct fio_overlapped*)io_u->engine_data;
-
- if (fov->io_complete) {
- fov->io_complete = FALSE;
- wd->aio_events[dequeued] = io_u;
- dequeued++;
- }
-
- }
- if (dequeued >= min)
- break;
-
- if (dequeued < min) {
- status = WaitForSingleObject(wd->iocomplete_event, mswait);
- if (status != WAIT_OBJECT_0 && dequeued >= min)
- break;
- }
-
- if (dequeued >= min || (t != NULL && timeout_expired(start_count, end_count)))
- break;
- } while (1);
-
- return dequeued;
-}
-
-static int fio_windowsaio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_overlapped *o = io_u->engine_data;
- LPOVERLAPPED lpOvl = &o->o;
- BOOL success = FALSE;
- int rc = FIO_Q_COMPLETED;
-
- fio_ro_check(td, io_u);
-
- lpOvl->Internal = 0;
- lpOvl->InternalHigh = 0;
- lpOvl->Offset = io_u->offset & 0xFFFFFFFF;
- lpOvl->OffsetHigh = io_u->offset >> 32;
-
- switch (io_u->ddir) {
- case DDIR_WRITE:
- success = WriteFile(io_u->file->hFile, io_u->xfer_buf, io_u->xfer_buflen, NULL, lpOvl);
- break;
- case DDIR_READ:
- success = ReadFile(io_u->file->hFile, io_u->xfer_buf, io_u->xfer_buflen, NULL, lpOvl);
- break;
- case DDIR_SYNC:
- case DDIR_DATASYNC:
- case DDIR_SYNC_FILE_RANGE:
- success = FlushFileBuffers(io_u->file->hFile);
- if (!success) {
- log_err("windowsaio: failed to flush file buffers\n");
- io_u->error = win_to_posix_error(GetLastError());
- }
-
- return FIO_Q_COMPLETED;
- break;
- case DDIR_TRIM:
- log_err("windowsaio: manual TRIM isn't supported on Windows\n");
- io_u->error = 1;
- io_u->resid = io_u->xfer_buflen;
- return FIO_Q_COMPLETED;
- break;
- default:
- assert(0);
- break;
- }
-
- if (success || GetLastError() == ERROR_IO_PENDING)
- rc = FIO_Q_QUEUED;
- else {
- io_u->error = win_to_posix_error(GetLastError());
- io_u->resid = io_u->xfer_buflen;
- }
-
- return rc;
-}
-
-/* Runs as a thread and waits for queued IO to complete */
-static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter)
-{
- OVERLAPPED *ovl;
- struct fio_overlapped *fov;
- struct io_u *io_u;
- struct windowsaio_data *wd;
- struct thread_ctx *ctx;
- ULONG_PTR ulKey = 0;
- DWORD bytes;
-
- ctx = (struct thread_ctx*)lpParameter;
- wd = ctx->wd;
-
- do {
- if (!GetQueuedCompletionStatus(ctx->iocp, &bytes, &ulKey, &ovl, 250) && ovl == NULL)
- continue;
-
- fov = CONTAINING_RECORD(ovl, struct fio_overlapped, o);
- io_u = fov->io_u;
-
- if (ovl->Internal == ERROR_SUCCESS) {
- io_u->resid = io_u->xfer_buflen - ovl->InternalHigh;
- io_u->error = 0;
- } else {
- io_u->resid = io_u->xfer_buflen;
- io_u->error = win_to_posix_error(GetLastError());
- }
-
- fov->io_complete = TRUE;
- SetEvent(wd->iocomplete_event);
- } while (ctx->wd->iothread_running);
-
- CloseHandle(ctx->iocp);
- free(ctx);
- return 0;
-}
-
-static void fio_windowsaio_io_u_free(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_overlapped *o = io_u->engine_data;
-
- if (o) {
- io_u->engine_data = NULL;
- free(o);
- }
-}
-
-static int fio_windowsaio_io_u_init(struct thread_data *td, struct io_u *io_u)
-{
- struct fio_overlapped *o;
-
- o = malloc(sizeof(*o));
- o->io_complete = FALSE;
- o->io_u = io_u;
- o->o.hEvent = NULL;
- io_u->engine_data = o;
- return 0;
-}
-
-static struct ioengine_ops ioengine = {
- .name = "windowsaio",
- .version = FIO_IOOPS_VERSION,
- .init = fio_windowsaio_init,
- .queue = fio_windowsaio_queue,
- .getevents = fio_windowsaio_getevents,
- .event = fio_windowsaio_event,
- .cleanup = fio_windowsaio_cleanup,
- .open_file = fio_windowsaio_open_file,
- .close_file = fio_windowsaio_close_file,
- .get_file_size = generic_get_file_size,
- .io_u_init = fio_windowsaio_io_u_init,
- .io_u_free = fio_windowsaio_io_u_free,
-};
-
-static void fio_init fio_windowsaio_register(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_windowsaio_unregister(void)
-{
- unregister_ioengine(&ioengine);
-}