X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=fs%2Faio.c;h=9acfd0708d922545b68ce5938b3f118422790793;hb=67d4abc84c535bd88ace5fbc8b8752736771a33f;hp=e29ec485af255822b8414be128fc8ef66da9a204;hpb=eed631e0d741d1a1067cfc6d709fdf2363126f9c;p=pandora-kernel.git diff --git a/fs/aio.c b/fs/aio.c index e29ec485af25..9acfd0708d92 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -228,12 +228,6 @@ static void __put_ioctx(struct kioctx *ctx) call_rcu(&ctx->rcu_head, ctx_rcu_free); } -static inline void get_ioctx(struct kioctx *kioctx) -{ - BUG_ON(atomic_read(&kioctx->users) <= 0); - atomic_inc(&kioctx->users); -} - static inline int try_get_ioctx(struct kioctx *kioctx) { return atomic_inc_not_zero(&kioctx->users); @@ -273,7 +267,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) mm = ctx->mm = current->mm; atomic_inc(&mm->mm_count); - atomic_set(&ctx->users, 1); + atomic_set(&ctx->users, 2); spin_lock_init(&ctx->ctx_lock); spin_lock_init(&ctx->ring_info.ring_lock); init_waitqueue_head(&ctx->wait); @@ -440,8 +434,6 @@ void exit_aio(struct mm_struct *mm) static struct kiocb *__aio_get_req(struct kioctx *ctx) { struct kiocb *req = NULL; - struct aio_ring *ring; - int okay = 0; req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL); if (unlikely(!req)) @@ -459,39 +451,123 @@ static struct kiocb *__aio_get_req(struct kioctx *ctx) INIT_LIST_HEAD(&req->ki_run_list); req->ki_eventfd = NULL; - /* Check if the completion queue has enough free space to - * accept an event from this io. - */ + return req; +} + +/* + * struct kiocb's are allocated in batches to reduce the number of + * times the ctx lock is acquired and released. + */ +#define KIOCB_BATCH_SIZE 32L +struct kiocb_batch { + struct list_head head; + long count; /* number of requests left to allocate */ +}; + +static void kiocb_batch_init(struct kiocb_batch *batch, long total) +{ + INIT_LIST_HEAD(&batch->head); + batch->count = total; +} + +static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch) +{ + struct kiocb *req, *n; + + if (list_empty(&batch->head)) + return; + spin_lock_irq(&ctx->ctx_lock); - ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0); - if (ctx->reqs_active < aio_ring_avail(&ctx->ring_info, ring)) { - list_add(&req->ki_list, &ctx->active_reqs); - ctx->reqs_active++; - okay = 1; + list_for_each_entry_safe(req, n, &batch->head, ki_batch) { + list_del(&req->ki_batch); + list_del(&req->ki_list); + kmem_cache_free(kiocb_cachep, req); + ctx->reqs_active--; } - kunmap_atomic(ring, KM_USER0); + if (unlikely(!ctx->reqs_active && ctx->dead)) + wake_up_all(&ctx->wait); spin_unlock_irq(&ctx->ctx_lock); +} - if (!okay) { - kmem_cache_free(kiocb_cachep, req); - req = NULL; +/* + * Allocate a batch of kiocbs. This avoids taking and dropping the + * context lock a lot during setup. + */ +static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch) +{ + unsigned short allocated, to_alloc; + long avail; + bool called_fput = false; + struct kiocb *req, *n; + struct aio_ring *ring; + + to_alloc = min(batch->count, KIOCB_BATCH_SIZE); + for (allocated = 0; allocated < to_alloc; allocated++) { + req = __aio_get_req(ctx); + if (!req) + /* allocation failed, go with what we've got */ + break; + list_add(&req->ki_batch, &batch->head); } - return req; + if (allocated == 0) + goto out; + +retry: + spin_lock_irq(&ctx->ctx_lock); + ring = kmap_atomic(ctx->ring_info.ring_pages[0]); + + avail = aio_ring_avail(&ctx->ring_info, ring) - ctx->reqs_active; + BUG_ON(avail < 0); + if (avail == 0 && !called_fput) { + /* + * Handle a potential starvation case. It is possible that + * we hold the last reference on a struct file, causing us + * to delay the final fput to non-irq context. In this case, + * ctx->reqs_active is artificially high. Calling the fput + * routine here may free up a slot in the event completion + * ring, allowing this allocation to succeed. + */ + kunmap_atomic(ring); + spin_unlock_irq(&ctx->ctx_lock); + aio_fput_routine(NULL); + called_fput = true; + goto retry; + } + + if (avail < allocated) { + /* Trim back the number of requests. */ + list_for_each_entry_safe(req, n, &batch->head, ki_batch) { + list_del(&req->ki_batch); + kmem_cache_free(kiocb_cachep, req); + if (--allocated <= avail) + break; + } + } + + batch->count -= allocated; + list_for_each_entry(req, &batch->head, ki_batch) { + list_add(&req->ki_list, &ctx->active_reqs); + ctx->reqs_active++; + } + + kunmap_atomic(ring); + spin_unlock_irq(&ctx->ctx_lock); + +out: + return allocated; } -static inline struct kiocb *aio_get_req(struct kioctx *ctx) +static inline struct kiocb *aio_get_req(struct kioctx *ctx, + struct kiocb_batch *batch) { struct kiocb *req; - /* Handle a potential starvation case -- should be exceedingly rare as - * requests will be stuck on fput_head only if the aio_fput_routine is - * delayed and the requests were the last user of the struct file. - */ - req = __aio_get_req(ctx); - if (unlikely(NULL == req)) { - aio_fput_routine(NULL); - req = __aio_get_req(ctx); - } + + if (list_empty(&batch->head)) + if (kiocb_batch_refill(ctx, batch) == 0) + return NULL; + req = list_first_entry(&batch->head, struct kiocb, ki_batch); + list_del(&req->ki_batch); return req; } @@ -527,11 +603,16 @@ static void aio_fput_routine(struct work_struct *data) fput(req->ki_filp); /* Link the iocb into the context's free list */ + rcu_read_lock(); spin_lock_irq(&ctx->ctx_lock); really_put_req(ctx, req); + /* + * at that point ctx might've been killed, but actual + * freeing is RCU'd + */ spin_unlock_irq(&ctx->ctx_lock); + rcu_read_unlock(); - put_ioctx(ctx); spin_lock_irq(&fput_lock); } spin_unlock_irq(&fput_lock); @@ -562,7 +643,6 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) * this function will be executed w/out any aio kthread wakeup. */ if (unlikely(!fput_atomic(req->ki_filp))) { - get_ioctx(ctx); spin_lock(&fput_lock); list_add(&req->ki_list, &fput_head); spin_unlock(&fput_lock); @@ -1022,6 +1102,13 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent) head = ring->head % info->nr; if (head != ring->tail) { struct io_event *evp = aio_ring_event(info, head, KM_USER1); + + /* + * Ensure that once we've read the current tail pointer, that + * we also see the events that were stored up to the tail. + */ + smp_rmb(); + *ent = *evp; head = (head + 1) % info->nr; smp_mb(); /* finish reading the event before updatng the head */ @@ -1032,9 +1119,9 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent) spin_unlock(&info->ring_lock); out: - kunmap_atomic(ring, KM_USER0); dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret, (unsigned long)ring->head, (unsigned long)ring->tail); + kunmap_atomic(ring, KM_USER0); return ret; } @@ -1256,10 +1343,10 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) ret = PTR_ERR(ioctx); if (!IS_ERR(ioctx)) { ret = put_user(ioctx->user_id, ctxp); - if (!ret) + if (!ret) { + put_ioctx(ioctx); return 0; - - get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */ + } io_destroy(ioctx); } @@ -1387,13 +1474,17 @@ static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb, bool compat) ret = compat_rw_copy_check_uvector(type, (struct compat_iovec __user *)kiocb->ki_buf, kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec, - &kiocb->ki_iovec); + &kiocb->ki_iovec, 1); else #endif ret = rw_copy_check_uvector(type, (struct iovec __user *)kiocb->ki_buf, kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec, - &kiocb->ki_iovec); + &kiocb->ki_iovec, 1); + if (ret < 0) + goto out; + + ret = rw_verify_area(type, kiocb->ki_filp, &kiocb->ki_pos, ret); if (ret < 0) goto out; @@ -1408,11 +1499,17 @@ out: return ret; } -static ssize_t aio_setup_single_vector(struct kiocb *kiocb) +static ssize_t aio_setup_single_vector(int type, struct file * file, struct kiocb *kiocb) { + int bytes; + + bytes = rw_verify_area(type, file, &kiocb->ki_pos, kiocb->ki_left); + if (bytes < 0) + return bytes; + kiocb->ki_iovec = &kiocb->ki_inline_vec; kiocb->ki_iovec->iov_base = kiocb->ki_buf; - kiocb->ki_iovec->iov_len = kiocb->ki_left; + kiocb->ki_iovec->iov_len = bytes; kiocb->ki_nr_segs = 1; kiocb->ki_cur_seg = 0; return 0; @@ -1437,10 +1534,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat) if (unlikely(!access_ok(VERIFY_WRITE, kiocb->ki_buf, kiocb->ki_left))) break; - ret = security_file_permission(file, MAY_READ); - if (unlikely(ret)) - break; - ret = aio_setup_single_vector(kiocb); + ret = aio_setup_single_vector(READ, file, kiocb); if (ret) break; ret = -EINVAL; @@ -1455,10 +1549,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat) if (unlikely(!access_ok(VERIFY_READ, kiocb->ki_buf, kiocb->ki_left))) break; - ret = security_file_permission(file, MAY_WRITE); - if (unlikely(ret)) - break; - ret = aio_setup_single_vector(kiocb); + ret = aio_setup_single_vector(WRITE, file, kiocb); if (ret) break; ret = -EINVAL; @@ -1469,9 +1560,6 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat) ret = -EBADF; if (unlikely(!(file->f_mode & FMODE_READ))) break; - ret = security_file_permission(file, MAY_READ); - if (unlikely(ret)) - break; ret = aio_setup_vectored_rw(READ, kiocb, compat); if (ret) break; @@ -1483,9 +1571,6 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat) ret = -EBADF; if (unlikely(!(file->f_mode & FMODE_WRITE))) break; - ret = security_file_permission(file, MAY_WRITE); - if (unlikely(ret)) - break; ret = aio_setup_vectored_rw(WRITE, kiocb, compat); if (ret) break; @@ -1515,7 +1600,8 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat) } static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, - struct iocb *iocb, bool compat) + struct iocb *iocb, struct kiocb_batch *batch, + bool compat) { struct kiocb *req; struct file *file; @@ -1541,7 +1627,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, if (unlikely(!file)) return -EBADF; - req = aio_get_req(ctx); /* returns with 2 references to req */ + req = aio_get_req(ctx, batch); /* returns with 2 references to req */ if (unlikely(!req)) { fput(file); return -EAGAIN; @@ -1621,8 +1707,9 @@ long do_io_submit(aio_context_t ctx_id, long nr, { struct kioctx *ctx; long ret = 0; - int i; + int i = 0; struct blk_plug plug; + struct kiocb_batch batch; if (unlikely(nr < 0)) return -EINVAL; @@ -1639,6 +1726,8 @@ long do_io_submit(aio_context_t ctx_id, long nr, return -EINVAL; } + kiocb_batch_init(&batch, nr); + blk_start_plug(&plug); /* @@ -1659,12 +1748,13 @@ long do_io_submit(aio_context_t ctx_id, long nr, break; } - ret = io_submit_one(ctx, user_iocb, &tmp, compat); + ret = io_submit_one(ctx, user_iocb, &tmp, &batch, compat); if (ret) break; } blk_finish_plug(&plug); + kiocb_batch_free(ctx, &batch); put_ioctx(ctx); return i ? i : ret; }