mirror of https://github.com/xemu-project/xemu.git
Implement an fd pool to get real AIO with posix-aio
This patch implements a simple fd pool to allow many AIO requests with posix-aio. The result is significantly improved performance (identical to that reported for linux-aio) for both cache=on and cache=off. The fundamental problem with posix-aio is that it limits itself to one thread per-file descriptor. I don't know why this is, but this patch provides a simple mechanism to work around this (duplicating the file descriptor). This isn't a great solution, but it seems like a reasonable intermediate step between posix-aio and a custom thread-pool to replace it. Ryan Harper will be posting some performance analysis he did comparing posix-aio with fd pooling against linux-aio. The size of the posix-aio thread pool and the fd pool were largely determined by him based on this analysis. Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5323 c046a42c-6fe2-441c-8c8c-71466251a162
This commit is contained in:
parent
997306fc22
commit
5353872545
|
@ -84,10 +84,16 @@
|
||||||
reopen it to see if the disk has been changed */
|
reopen it to see if the disk has been changed */
|
||||||
#define FD_OPEN_TIMEOUT 1000
|
#define FD_OPEN_TIMEOUT 1000
|
||||||
|
|
||||||
|
/* posix-aio doesn't allow multiple outstanding requests to a single file
|
||||||
|
* descriptor. we implement a pool of dup()'d file descriptors to work
|
||||||
|
* around this */
|
||||||
|
#define RAW_FD_POOL_SIZE 64
|
||||||
|
|
||||||
typedef struct BDRVRawState {
|
typedef struct BDRVRawState {
|
||||||
int fd;
|
int fd;
|
||||||
int type;
|
int type;
|
||||||
unsigned int lseek_err_cnt;
|
unsigned int lseek_err_cnt;
|
||||||
|
int fd_pool[RAW_FD_POOL_SIZE];
|
||||||
#if defined(__linux__)
|
#if defined(__linux__)
|
||||||
/* linux floppy specific */
|
/* linux floppy specific */
|
||||||
int fd_open_flags;
|
int fd_open_flags;
|
||||||
|
@ -109,6 +115,7 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
|
||||||
{
|
{
|
||||||
BDRVRawState *s = bs->opaque;
|
BDRVRawState *s = bs->opaque;
|
||||||
int fd, open_flags, ret;
|
int fd, open_flags, ret;
|
||||||
|
int i;
|
||||||
|
|
||||||
posix_aio_init();
|
posix_aio_init();
|
||||||
|
|
||||||
|
@ -138,6 +145,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
s->fd = fd;
|
s->fd = fd;
|
||||||
|
for (i = 0; i < RAW_FD_POOL_SIZE; i++)
|
||||||
|
s->fd_pool[i] = -1;
|
||||||
#if defined(O_DIRECT)
|
#if defined(O_DIRECT)
|
||||||
s->aligned_buf = NULL;
|
s->aligned_buf = NULL;
|
||||||
if (flags & BDRV_O_DIRECT) {
|
if (flags & BDRV_O_DIRECT) {
|
||||||
|
@ -436,6 +445,7 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset,
|
||||||
|
|
||||||
typedef struct RawAIOCB {
|
typedef struct RawAIOCB {
|
||||||
BlockDriverAIOCB common;
|
BlockDriverAIOCB common;
|
||||||
|
int fd;
|
||||||
struct aiocb aiocb;
|
struct aiocb aiocb;
|
||||||
struct RawAIOCB *next;
|
struct RawAIOCB *next;
|
||||||
int ret;
|
int ret;
|
||||||
|
@ -447,6 +457,38 @@ typedef struct PosixAioState
|
||||||
RawAIOCB *first_aio;
|
RawAIOCB *first_aio;
|
||||||
} PosixAioState;
|
} PosixAioState;
|
||||||
|
|
||||||
|
static int raw_fd_pool_get(BDRVRawState *s)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
|
||||||
|
/* already in use */
|
||||||
|
if (s->fd_pool[i] != -1)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* try to dup file descriptor */
|
||||||
|
s->fd_pool[i] = dup(s->fd);
|
||||||
|
if (s->fd_pool[i] != -1)
|
||||||
|
return s->fd_pool[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* we couldn't dup the file descriptor so just use the main one */
|
||||||
|
return s->fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void raw_fd_pool_put(RawAIOCB *acb)
|
||||||
|
{
|
||||||
|
BDRVRawState *s = acb->common.bs->opaque;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
|
||||||
|
if (s->fd_pool[i] == acb->fd) {
|
||||||
|
close(s->fd_pool[i]);
|
||||||
|
s->fd_pool[i] = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void posix_aio_read(void *opaque)
|
static void posix_aio_read(void *opaque)
|
||||||
{
|
{
|
||||||
PosixAioState *s = opaque;
|
PosixAioState *s = opaque;
|
||||||
|
@ -487,6 +529,7 @@ static void posix_aio_read(void *opaque)
|
||||||
if (ret == ECANCELED) {
|
if (ret == ECANCELED) {
|
||||||
/* remove the request */
|
/* remove the request */
|
||||||
*pacb = acb->next;
|
*pacb = acb->next;
|
||||||
|
raw_fd_pool_put(acb);
|
||||||
qemu_aio_release(acb);
|
qemu_aio_release(acb);
|
||||||
} else if (ret != EINPROGRESS) {
|
} else if (ret != EINPROGRESS) {
|
||||||
/* end of aio */
|
/* end of aio */
|
||||||
|
@ -503,6 +546,7 @@ static void posix_aio_read(void *opaque)
|
||||||
*pacb = acb->next;
|
*pacb = acb->next;
|
||||||
/* call the callback */
|
/* call the callback */
|
||||||
acb->common.cb(acb->common.opaque, ret);
|
acb->common.cb(acb->common.opaque, ret);
|
||||||
|
raw_fd_pool_put(acb);
|
||||||
qemu_aio_release(acb);
|
qemu_aio_release(acb);
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
|
@ -577,7 +621,8 @@ static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
|
||||||
acb = qemu_aio_get(bs, cb, opaque);
|
acb = qemu_aio_get(bs, cb, opaque);
|
||||||
if (!acb)
|
if (!acb)
|
||||||
return NULL;
|
return NULL;
|
||||||
acb->aiocb.aio_fildes = s->fd;
|
acb->fd = raw_fd_pool_get(s);
|
||||||
|
acb->aiocb.aio_fildes = acb->fd;
|
||||||
acb->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
|
acb->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
|
||||||
acb->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
|
acb->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
|
||||||
acb->aiocb.aio_buf = buf;
|
acb->aiocb.aio_buf = buf;
|
||||||
|
@ -684,6 +729,7 @@ static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
|
||||||
break;
|
break;
|
||||||
} else if (*pacb == acb) {
|
} else if (*pacb == acb) {
|
||||||
*pacb = acb->next;
|
*pacb = acb->next;
|
||||||
|
raw_fd_pool_put(acb);
|
||||||
qemu_aio_release(acb);
|
qemu_aio_release(acb);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -697,6 +743,18 @@ static int posix_aio_init(void)
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_AIO */
|
#endif /* CONFIG_AIO */
|
||||||
|
|
||||||
|
static void raw_close_fd_pool(BDRVRawState *s)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < RAW_FD_POOL_SIZE; i++) {
|
||||||
|
if (s->fd_pool[i] != -1) {
|
||||||
|
close(s->fd_pool[i]);
|
||||||
|
s->fd_pool[i] = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void raw_close(BlockDriverState *bs)
|
static void raw_close(BlockDriverState *bs)
|
||||||
{
|
{
|
||||||
BDRVRawState *s = bs->opaque;
|
BDRVRawState *s = bs->opaque;
|
||||||
|
@ -708,6 +766,7 @@ static void raw_close(BlockDriverState *bs)
|
||||||
qemu_free(s->aligned_buf);
|
qemu_free(s->aligned_buf);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
raw_close_fd_pool(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int raw_truncate(BlockDriverState *bs, int64_t offset)
|
static int raw_truncate(BlockDriverState *bs, int64_t offset)
|
||||||
|
@ -898,7 +957,7 @@ kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex ma
|
||||||
static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
|
static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
|
||||||
{
|
{
|
||||||
BDRVRawState *s = bs->opaque;
|
BDRVRawState *s = bs->opaque;
|
||||||
int fd, open_flags, ret;
|
int fd, open_flags, ret, i;
|
||||||
|
|
||||||
posix_aio_init();
|
posix_aio_init();
|
||||||
|
|
||||||
|
@ -963,6 +1022,8 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
s->fd = fd;
|
s->fd = fd;
|
||||||
|
for (i = 0; i < RAW_FD_POOL_SIZE; i++)
|
||||||
|
s->fd_pool[i] = -1;
|
||||||
#if defined(__linux__)
|
#if defined(__linux__)
|
||||||
/* close fd so that we can reopen it as needed */
|
/* close fd so that we can reopen it as needed */
|
||||||
if (s->type == FTYPE_FD) {
|
if (s->type == FTYPE_FD) {
|
||||||
|
@ -975,7 +1036,6 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__linux__)
|
#if defined(__linux__)
|
||||||
|
|
||||||
/* Note: we do not have a reliable method to detect if the floppy is
|
/* Note: we do not have a reliable method to detect if the floppy is
|
||||||
present. The current method is to try to open the floppy at every
|
present. The current method is to try to open the floppy at every
|
||||||
I/O and to keep it opened during a few hundreds of ms. */
|
I/O and to keep it opened during a few hundreds of ms. */
|
||||||
|
@ -991,6 +1051,7 @@ static int fd_open(BlockDriverState *bs)
|
||||||
(qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
|
(qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
|
||||||
close(s->fd);
|
close(s->fd);
|
||||||
s->fd = -1;
|
s->fd = -1;
|
||||||
|
raw_close_fd_pool(s);
|
||||||
#ifdef DEBUG_FLOPPY
|
#ifdef DEBUG_FLOPPY
|
||||||
printf("Floppy closed\n");
|
printf("Floppy closed\n");
|
||||||
#endif
|
#endif
|
||||||
|
@ -1091,6 +1152,7 @@ static int raw_eject(BlockDriverState *bs, int eject_flag)
|
||||||
if (s->fd >= 0) {
|
if (s->fd >= 0) {
|
||||||
close(s->fd);
|
close(s->fd);
|
||||||
s->fd = -1;
|
s->fd = -1;
|
||||||
|
raw_close_fd_pool(s);
|
||||||
}
|
}
|
||||||
fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK);
|
fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK);
|
||||||
if (fd >= 0) {
|
if (fd >= 0) {
|
||||||
|
|
Loading…
Reference in New Issue