#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
- #include <linux/aio.h>
+ #include <linux/uio.h>
#include <asm/ebcdic.h>
#include "hypfs.h"
parent = dentry->d_parent;
mutex_lock(&parent->d_inode->i_mutex);
if (hypfs_positive(dentry)) {
- if (S_ISDIR(dentry->d_inode->i_mode))
+ if (d_is_dir(dentry))
simple_rmdir(parent->d_inode, dentry);
else
simple_unlink(parent->d_inode, dentry);
return nonseekable_open(inode, filp);
}
-static ssize_t hypfs_aio_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t offset)
+static ssize_t hypfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
- char *data;
- ssize_t ret;
- struct file *filp = iocb->ki_filp;
- /* XXX: temporary */
- char __user *buf = iov[0].iov_base;
- size_t count = iov[0].iov_len;
-
- if (nr_segs != 1)
- return -EINVAL;
-
- data = filp->private_data;
- ret = simple_read_from_buffer(buf, count, &offset, data, strlen(data));
- if (ret <= 0)
- return ret;
+ struct file *file = iocb->ki_filp;
+ char *data = file->private_data;
+ size_t available = strlen(data);
+ loff_t pos = iocb->ki_pos;
+ size_t count;
- iocb->ki_pos += ret;
- file_accessed(filp);
-
- return ret;
+ if (pos < 0)
+ return -EINVAL;
+ if (pos >= available || !iov_iter_count(to))
+ return 0;
+ count = copy_to_iter(data + pos, available - pos, to);
+ if (!count)
+ return -EFAULT;
+ iocb->ki_pos = pos + count;
+ file_accessed(file);
+ return count;
}
-static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t offset)
+
+static ssize_t hypfs_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
int rc;
struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;
struct hypfs_sb_info *fs_info = sb->s_fs_info;
- size_t count = iov_length(iov, nr_segs);
+ size_t count = iov_iter_count(from);
/*
* Currently we only allow one update per second for two reasons:
}
hypfs_update_update(sb);
rc = count;
+ iov_iter_advance(from, count);
out:
mutex_unlock(&fs_info->lock);
return rc;
static const struct file_operations hypfs_file_ops = {
.open = hypfs_open,
.release = hypfs_release,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = hypfs_aio_read,
- .aio_write = hypfs_aio_write,
+ .read = new_sync_read,
+ .write = new_sync_write,
+ .read_iter = hypfs_read_iter,
+ .write_iter = hypfs_write_iter,
.llseek = no_llseek,
};
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/io.h>
- #include <linux/aio.h>
#include <linux/jiffies.h>
#include <asm/pgtable.h>
#include <linux/delay.h>
* unless perhaps the user has mpin'ed the pages
* themselves.
*/
- qib_devinfo(dd->pcidev,
- "Failed to lock addr %p, %u pages: "
- "errno %d\n", (void *) vaddr, cnt, -ret);
+ qib_devinfo(
+ dd->pcidev,
+ "Failed to lock addr %p, %u pages: errno %d\n",
+ (void *) vaddr, cnt, -ret);
goto done;
}
for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) {
goto cleanup;
}
if (copy_to_user((void __user *) (unsigned long) ti->tidmap,
- tidmap, sizeof tidmap)) {
+ tidmap, sizeof(tidmap))) {
ret = -EFAULT;
goto cleanup;
}
}
if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap,
- sizeof tidmap)) {
+ sizeof(tidmap))) {
ret = -EFAULT;
goto done;
}
/* rcvegrbufs are read-only on the slave */
if (vma->vm_flags & VM_WRITE) {
qib_devinfo(dd->pcidev,
- "Can't map eager buffers as "
- "writable (flags=%lx)\n", vma->vm_flags);
+ "Can't map eager buffers as writable (flags=%lx)\n",
+ vma->vm_flags);
ret = -EPERM;
goto bail;
}
*/
if (weight >= qib_cpulist_count) {
int cpu;
+
cpu = find_first_zero_bit(qib_cpulist,
qib_cpulist_count);
if (cpu == qib_cpulist_count)
if (!qib_compatible_subctxts(uinfo->spu_userversion >> 16,
uinfo->spu_userversion & 0xffff)) {
qib_devinfo(dd->pcidev,
- "Mismatched user version (%d.%d) and driver "
- "version (%d.%d) while context sharing. Ensure "
- "that driver and library are from the same "
- "release.\n",
+ "Mismatched user version (%d.%d) and driver version (%d.%d) while context sharing. Ensure that driver and library are from the same release.\n",
(int) (uinfo->spu_userversion >> 16),
(int) (uinfo->spu_userversion & 0xffff),
QIB_USER_SWMAJOR, QIB_USER_SWMINOR);
}
if (!ppd) {
u32 pidx = ctxt % dd->num_pports;
+
if (usable(dd->pport + pidx))
ppd = dd->pport + pidx;
else {
if (alg == QIB_PORT_ALG_ACROSS) {
unsigned inuse = ~0U;
+
/* find device (with ACTIVE ports) with fewest ctxts in use */
for (ndev = 0; ndev < devmax; ndev++) {
struct qib_devdata *dd = qib_lookup(ndev);
unsigned cused = 0, cfree = 0, pusable = 0;
+
if (!dd)
continue;
if (port && port <= dd->num_pports &&
} else {
for (ndev = 0; ndev < devmax; ndev++) {
struct qib_devdata *dd = qib_lookup(ndev);
+
if (dd) {
ret = choose_port_ctxt(fp, dd, port, uinfo);
if (!ret)
}
for (ndev = 0; ndev < devmax; ndev++) {
struct qib_devdata *dd = qib_lookup(ndev);
+
if (dd) {
if (pcibus_to_node(dd->pcidev->bus) < 0) {
ret = -EINVAL;
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
- #include <linux/aio.h>
#include <linux/errno.h>
#include <linux/mtio.h>
#include <linux/ioctl.h>
#include <linux/mutex.h>
#include <linux/atomic.h>
#include <linux/ratelimit.h>
+ #include <linux/uio.h>
#include "scsi.h"
#include <scsi/scsi_dbg.h>
sg_new_read(Sg_fd * sfp, char __user *buf, size_t count, Sg_request * srp)
{
sg_io_hdr_t *hp = &srp->header;
- int err = 0;
+ int err = 0, err2;
int len;
if (count < SZ_SG_IO_HDR) {
goto err_out;
}
err_out:
- err = sg_finish_rem_req(srp);
- return (0 == err) ? count : err;
+ err2 = sg_finish_rem_req(srp);
+ return err ? : err2 ? : count;
}
static ssize_t
}
/* Rely on write phase to clean out srp status values, so no "else" */
+ /*
+ * Free the request as soon as it is complete so that its resources
+ * can be reused without waiting for userspace to read() the
+ * result. But keep the associated bio (if any) around until
+ * blk_rq_unmap_user() can be called from user context.
+ */
+ srp->rq = NULL;
+ if (rq->cmd != rq->__cmd)
+ kfree(rq->cmd);
+ __blk_put_request(rq->q, rq);
+
write_lock_irqsave(&sfp->rq_list_lock, iflags);
if (unlikely(srp->orphan)) {
if (sfp->keep_orphan)
return -ENOMEM;
}
- rq = blk_get_request(q, rw, GFP_ATOMIC);
+ /*
+ * NOTE
+ *
+ * With scsi-mq enabled, there are a fixed number of preallocated
+ * requests equal in number to shost->can_queue. If all of the
+ * preallocated requests are already in use, then using GFP_ATOMIC with
+ * blk_get_request() will return -EWOULDBLOCK, whereas using GFP_KERNEL
+ * will cause blk_get_request() to sleep until an active command
+ * completes, freeing up a request. Neither option is ideal, but
+ * GFP_KERNEL is the better choice to prevent userspace from getting an
+ * unexpected EWOULDBLOCK.
+ *
+ * With scsi-mq disabled, blk_get_request() with GFP_KERNEL usually
+ * does not sleep except under memory pressure.
+ */
+ rq = blk_get_request(q, rw, GFP_KERNEL);
if (IS_ERR(rq)) {
kfree(long_cmdp);
return PTR_ERR(rq);
SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sfp->parentdp,
"sg_finish_rem_req: res_used=%d\n",
(int) srp->res_used));
- if (srp->rq) {
- if (srp->bio)
- ret = blk_rq_unmap_user(srp->bio);
+ if (srp->bio)
+ ret = blk_rq_unmap_user(srp->bio);
+ if (srp->rq) {
if (srp->rq->cmd != srp->rq->__cmd)
kfree(srp->rq->cmd);
blk_put_request(srp->rq);
unsigned id;
};
+ /*
+ * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
+ * cancelled or completed (this makes a certain amount of sense because
+ * successful cancellation - io_cancel() - does deliver the completion to
+ * userspace).
+ *
+ * And since most things don't implement kiocb cancellation and we'd really like
+ * kiocb completion to be lockless when possible, we use ki_cancel to
+ * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
+ * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
+ */
+ #define KIOCB_CANCELLED ((void *) (~0ULL))
+
+ struct aio_kiocb {
+ struct kiocb common;
+
+ struct kioctx *ki_ctx;
+ kiocb_cancel_fn *ki_cancel;
+
+ struct iocb __user *ki_user_iocb; /* user's aiocb */
+ __u64 ki_user_data; /* user's data for completion */
+
+ struct list_head ki_list; /* the aio core uses this
+ * for cancellation */
+
+ /*
+ * If the aio_resfd field of the userspace iocb is not zero,
+ * this is the underlying eventfd context to deliver events to.
+ */
+ struct eventfd_ctx *ki_eventfd;
+ };
+
/*------ sysctl variables----*/
static DEFINE_SPINLOCK(aio_nr_lock);
unsigned long aio_nr; /* current system wide number of aio requests */
if (IS_ERR(aio_mnt))
panic("Failed to create aio fs mount.");
- kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+ kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
- void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
+ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
{
+ struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
struct kioctx *ctx = req->ki_ctx;
unsigned long flags;
}
EXPORT_SYMBOL(kiocb_set_cancel_fn);
- static int kiocb_cancel(struct kiocb *kiocb)
+ static int kiocb_cancel(struct aio_kiocb *kiocb)
{
kiocb_cancel_fn *old, *cancel;
cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
} while (cancel != old);
- return cancel(kiocb);
+ return cancel(&kiocb->common);
}
static void free_ioctx(struct work_struct *work)
static void free_ioctx_users(struct percpu_ref *ref)
{
struct kioctx *ctx = container_of(ref, struct kioctx, users);
- struct kiocb *req;
+ struct aio_kiocb *req;
spin_lock_irq(&ctx->ctx_lock);
while (!list_empty(&ctx->active_reqs)) {
req = list_first_entry(&ctx->active_reqs,
- struct kiocb, ki_list);
+ struct aio_kiocb, ki_list);
list_del_init(&req->ki_list);
kiocb_cancel(req);
return 0;
}
- /* wait_on_sync_kiocb:
- * Waits on the given sync kiocb to complete.
- */
- ssize_t wait_on_sync_kiocb(struct kiocb *req)
- {
- while (!req->ki_ctx) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (req->ki_ctx)
- break;
- io_schedule();
- }
- __set_current_state(TASK_RUNNING);
- return req->ki_user_data;
- }
- EXPORT_SYMBOL(wait_on_sync_kiocb);
-
/*
* exit_aio: called when the last user of mm goes away. At this point, there is
* no way for any new requests to be submited or any of the io_* syscalls to be
* Allocate a slot for an aio request.
* Returns NULL if no requests are free.
*/
- static inline struct kiocb *aio_get_req(struct kioctx *ctx)
+ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
{
- struct kiocb *req;
+ struct aio_kiocb *req;
if (!get_reqs_available(ctx)) {
user_refill_reqs_available(ctx);
return NULL;
}
- static void kiocb_free(struct kiocb *req)
+ static void kiocb_free(struct aio_kiocb *req)
{
- if (req->ki_filp)
- fput(req->ki_filp);
+ if (req->common.ki_filp)
+ fput(req->common.ki_filp);
if (req->ki_eventfd != NULL)
eventfd_ctx_put(req->ki_eventfd);
kmem_cache_free(kiocb_cachep, req);
/* aio_complete
* Called when the io request on the given iocb is complete.
*/
- void aio_complete(struct kiocb *iocb, long res, long res2)
+ static void aio_complete(struct kiocb *kiocb, long res, long res2)
{
+ struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
struct kioctx *ctx = iocb->ki_ctx;
struct aio_ring *ring;
struct io_event *ev_page, *event;
* ref, no other paths have a way to get another ref
* - the sync task helpfully left a reference to itself in the iocb
*/
- if (is_sync_kiocb(iocb)) {
- iocb->ki_user_data = res;
- smp_wmb();
- iocb->ki_ctx = ERR_PTR(-EXDEV);
- wake_up_process(iocb->ki_obj.tsk);
- return;
- }
+ BUG_ON(is_sync_kiocb(kiocb));
if (iocb->ki_list.next) {
unsigned long flags;
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
- event->obj = (u64)(unsigned long)iocb->ki_obj.user;
+ event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
event->data = iocb->ki_user_data;
event->res = res;
event->res2 = res2;
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
- ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data,
+ ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
res, res2);
/* after flagging the request as done, we
percpu_ref_put(&ctx->reqs);
}
- EXPORT_SYMBOL(aio_complete);
/* aio_read_events_ring
* Pull an event off of the ioctx's event ring. Returns the number of
ret = -EINVAL;
if (unlikely(ctx || nr_events == 0)) {
- pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n",
+ pr_debug("EINVAL: ctx %lu nr_events %u\n",
ctx, nr_events);
goto out;
}
return ret;
}
- pr_debug("EINVAL: io_destroy: invalid context id\n");
+ pr_debug("EINVAL: invalid context id\n");
return -EINVAL;
}
static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
int rw, char __user *buf,
unsigned long *nr_segs,
+ size_t *len,
struct iovec **iovec,
bool compat)
{
ssize_t ret;
- *nr_segs = kiocb->ki_nbytes;
+ *nr_segs = *len;
#ifdef CONFIG_COMPAT
if (compat)
if (ret < 0)
return ret;
- /* ki_nbytes now reflect bytes instead of segs */
- kiocb->ki_nbytes = ret;
+ /* len now reflect bytes instead of segs */
+ *len = ret;
return 0;
}
static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
int rw, char __user *buf,
unsigned long *nr_segs,
+ size_t len,
struct iovec *iovec)
{
- if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes)))
+ if (unlikely(!access_ok(!rw, buf, len)))
return -EFAULT;
iovec->iov_base = buf;
- iovec->iov_len = kiocb->ki_nbytes;
+ iovec->iov_len = len;
*nr_segs = 1;
return 0;
}
* Performs the initial checks and io submission.
*/
static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
- char __user *buf, bool compat)
+ char __user *buf, size_t len, bool compat)
{
struct file *file = req->ki_filp;
ssize_t ret;
if (!rw_op && !iter_op)
return -EINVAL;
- ret = (opcode == IOCB_CMD_PREADV ||
- opcode == IOCB_CMD_PWRITEV)
- ? aio_setup_vectored_rw(req, rw, buf, &nr_segs,
- &iovec, compat)
- : aio_setup_single_vector(req, rw, buf, &nr_segs,
- iovec);
+ if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
+ ret = aio_setup_vectored_rw(req, rw, buf, &nr_segs,
+ &len, &iovec, compat);
+ else
+ ret = aio_setup_single_vector(req, rw, buf, &nr_segs,
+ len, iovec);
if (!ret)
- ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
+ ret = rw_verify_area(rw, file, &req->ki_pos, len);
if (ret < 0) {
if (iovec != inline_vecs)
kfree(iovec);
return ret;
}
- req->ki_nbytes = ret;
+ len = ret;
/* XXX: move/kill - rw_verify_area()? */
/* This matches the pread()/pwrite() logic */
file_start_write(file);
if (iter_op) {
- iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
+ iov_iter_init(&iter, rw, iovec, nr_segs, len);
ret = iter_op(req, &iter);
} else {
ret = rw_op(req, iovec, nr_segs, req->ki_pos);
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
struct iocb *iocb, bool compat)
{
- struct kiocb *req;
+ struct aio_kiocb *req;
ssize_t ret;
/* enforce forwards compatibility on users */
(iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
((ssize_t)iocb->aio_nbytes < 0)
)) {
- pr_debug("EINVAL: io_submit: overflow check\n");
+ pr_debug("EINVAL: overflow check\n");
return -EINVAL;
}
if (unlikely(!req))
return -EAGAIN;
- req->ki_filp = fget(iocb->aio_fildes);
- if (unlikely(!req->ki_filp)) {
+ req->common.ki_filp = fget(iocb->aio_fildes);
+ if (unlikely(!req->common.ki_filp)) {
ret = -EBADF;
goto out_put_req;
}
+ req->common.ki_pos = iocb->aio_offset;
+ req->common.ki_complete = aio_complete;
+ req->common.ki_flags = 0;
if (iocb->aio_flags & IOCB_FLAG_RESFD) {
/*
req->ki_eventfd = NULL;
goto out_put_req;
}
+
+ req->common.ki_flags |= IOCB_EVENTFD;
}
ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
goto out_put_req;
}
- req->ki_obj.user = user_iocb;
+ req->ki_user_iocb = user_iocb;
req->ki_user_data = iocb->aio_data;
- req->ki_pos = iocb->aio_offset;
- req->ki_nbytes = iocb->aio_nbytes;
- ret = aio_run_iocb(req, iocb->aio_lio_opcode,
+ ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
(char __user *)(unsigned long)iocb->aio_buf,
+ iocb->aio_nbytes,
compat);
if (ret)
goto out_put_req;
/* lookup_kiocb
* Finds a given iocb for cancellation.
*/
- static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
- u32 key)
+ static struct aio_kiocb *
+ lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
{
- struct list_head *pos;
+ struct aio_kiocb *kiocb;
assert_spin_locked(&ctx->ctx_lock);
return NULL;
/* TODO: use a hash or array, this sucks. */
- list_for_each(pos, &ctx->active_reqs) {
- struct kiocb *kiocb = list_kiocb(pos);
- if (kiocb->ki_obj.user == iocb)
+ list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
+ if (kiocb->ki_user_iocb == iocb)
return kiocb;
}
return NULL;
struct io_event __user *, result)
{
struct kioctx *ctx;
- struct kiocb *kiocb;
+ struct aio_kiocb *kiocb;
u32 key;
int ret;
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
- #include <linux/aio.h>
#include <linux/falloc.h>
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/compat.h>
#include <linux/slab.h>
#include <linux/btrfs.h>
+ #include <linux/uio.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
mutex_unlock(&inode->i_mutex);
/*
- * we want to make sure fsync finds this change
- * but we haven't joined a transaction running right now.
- *
- * Later on, someone is sure to update the inode and get the
- * real transid recorded.
- *
- * We set last_trans now to the fs_info generation + 1,
- * this will either be one more than the running transaction
- * or the generation used for the next transaction if there isn't
- * one running right now.
- *
* We also have to set last_sub_trans to the current log transid,
* otherwise subsequent syncs to a file that's been synced in this
* transaction will appear to have already occured.
*/
- BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
BTRFS_I(inode)->last_sub_trans = root->log_transid;
if (num_written > 0) {
err = generic_write_sync(file, pos, num_written);
atomic_inc(&root->log_batch);
/*
- * check the transaction that last modified this inode
- * and see if its already been committed
- */
- if (!BTRFS_I(inode)->last_trans) {
- mutex_unlock(&inode->i_mutex);
- goto out;
- }
-
- /*
- * if the last transaction that changed this file was before
- * the current transaction, we can bail out now without any
- * syncing
+ * If the last transaction that changed this file was before the current
+ * transaction and we have the full sync flag set in our inode, we can
+ * bail out now without any syncing.
+ *
+ * Note that we can't bail out if the full sync flag isn't set. This is
+ * because when the full sync flag is set we start all ordered extents
+ * and wait for them to fully complete - when they complete they update
+ * the inode's last_trans field through:
+ *
+ * btrfs_finish_ordered_io() ->
+ * btrfs_update_inode_fallback() ->
+ * btrfs_update_inode() ->
+ * btrfs_set_inode_last_trans()
+ *
+ * So we are sure that last_trans is up to date and can do this check to
+ * bail out safely. For the fast path, when the full sync flag is not
+ * set in our inode, we can not do it because we start only our ordered
+ * extents and don't wait for them to complete (that is when
+ * btrfs_finish_ordered_io runs), so here at this point their last_trans
+ * value might be less than or equals to fs_info->last_trans_committed,
+ * and setting a speculative last_trans for an inode when a buffered
+ * write is made (such as fs_info->generation + 1 for example) would not
+ * be reliable since after setting the value and before fsync is called
+ * any number of transactions can start and commit (transaction kthread
+ * commits the current transaction periodically), and a transaction
+ * commit does not start nor waits for ordered extents to complete.
*/
smp_mb();
if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
- BTRFS_I(inode)->last_trans <=
- root->fs_info->last_trans_committed) {
- BTRFS_I(inode)->last_trans = 0;
-
+ (full_sync && BTRFS_I(inode)->last_trans <=
+ root->fs_info->last_trans_committed)) {
/*
* We'v had everything committed since the last time we were
* modified so clear this flag in case it was set for whatever
bool same_page;
bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
u64 ino_size;
+ bool truncated_page = false;
+ bool updated_inode = false;
ret = btrfs_wait_ordered_range(inode, offset, len);
if (ret)
* entire page.
*/
if (same_page && len < PAGE_CACHE_SIZE) {
- if (offset < ino_size)
+ if (offset < ino_size) {
+ truncated_page = true;
ret = btrfs_truncate_page(inode, offset, len, 0);
+ } else {
+ ret = 0;
+ }
goto out_only_mutex;
}
/* zero back part of the first page */
if (offset < ino_size) {
+ truncated_page = true;
ret = btrfs_truncate_page(inode, offset, 0, 0);
if (ret) {
mutex_unlock(&inode->i_mutex);
if (!ret) {
/* zero the front end of the last page */
if (tail_start + tail_len < ino_size) {
+ truncated_page = true;
ret = btrfs_truncate_page(inode,
tail_start + tail_len, 0, 1);
if (ret)
}
if (lockend < lockstart) {
- mutex_unlock(&inode->i_mutex);
- return 0;
+ ret = 0;
+ goto out_only_mutex;
}
while (1) {
trans->block_rsv = &root->fs_info->trans_block_rsv;
ret = btrfs_update_inode(trans, root, inode);
+ updated_inode = true;
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root);
out_free:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state, GFP_NOFS);
out_only_mutex:
+ if (!updated_inode && truncated_page && !ret && !err) {
+ /*
+ * If we only end up zeroing part of a page, we still need to
+ * update the inode item, so that all the time fields are
+ * updated as well as the necessary btrfs inode in memory fields
+ * for detecting, at fsync time, if the inode isn't yet in the
+ * log tree or it's there but not up to date.
+ */
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ } else {
+ err = btrfs_update_inode(trans, root, inode);
+ ret = btrfs_end_transaction(trans, root);
+ }
+ }
mutex_unlock(&inode->i_mutex);
if (ret && !err)
err = ret;
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
- #include <linux/aio.h>
#include <linux/bit_spinlock.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
#include <linux/btrfs.h>
#include <linux/blkdev.h>
#include <linux/posix_acl_xattr.h>
+ #include <linux/uio.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
static int btrfs_dirty_inode(struct inode *inode);
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+void btrfs_test_inode_set_ops(struct inode *inode)
+{
+ BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+}
+#endif
+
static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir,
const struct qstr *qstr)
static void btrfs_split_extent_hook(struct inode *inode,
struct extent_state *orig, u64 split)
{
+ u64 size;
+
/* not delalloc, ignore it */
if (!(orig->state & EXTENT_DELALLOC))
return;
+ size = orig->end - orig->start + 1;
+ if (size > BTRFS_MAX_EXTENT_SIZE) {
+ u64 num_extents;
+ u64 new_size;
+
+ /*
+ * See the explanation in btrfs_merge_extent_hook, the same
+ * applies here, just in reverse.
+ */
+ new_size = orig->end - split + 1;
+ num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
+ BTRFS_MAX_EXTENT_SIZE);
+ new_size = split - orig->start;
+ num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
+ BTRFS_MAX_EXTENT_SIZE);
+ if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
+ BTRFS_MAX_EXTENT_SIZE) >= num_extents)
+ return;
+ }
+
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
struct extent_state *new,
struct extent_state *other)
{
+ u64 new_size, old_size;
+ u64 num_extents;
+
/* not delalloc, ignore it */
if (!(other->state & EXTENT_DELALLOC))
return;
+ if (new->start > other->start)
+ new_size = new->end - other->start + 1;
+ else
+ new_size = other->end - new->start + 1;
+
+ /* we're not bigger than the max, unreserve the space and go */
+ if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents--;
+ spin_unlock(&BTRFS_I(inode)->lock);
+ return;
+ }
+
+ /*
+ * We have to add up either side to figure out how many extents were
+ * accounted for before we merged into one big extent. If the number of
+ * extents we accounted for is <= the amount we need for the new range
+ * then we can return, otherwise drop. Think of it like this
+ *
+ * [ 4k][MAX_SIZE]
+ *
+ * So we've grown the extent by a MAX_SIZE extent, this would mean we
+ * need 2 outstanding extents, on one side we have 1 and the other side
+ * we have 1 so they are == and we can return. But in this case
+ *
+ * [MAX_SIZE+4k][MAX_SIZE+4k]
+ *
+ * Each range on their own accounts for 2 extents, but merged together
+ * they are only 3 extents worth of accounting, so we need to drop in
+ * this case.
+ */
+ old_size = other->end - other->start + 1;
+ num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
+ BTRFS_MAX_EXTENT_SIZE);
+ old_size = new->end - new->start + 1;
+ num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
+ BTRFS_MAX_EXTENT_SIZE);
+
+ if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
+ BTRFS_MAX_EXTENT_SIZE) >= num_extents)
+ return;
+
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents--;
spin_unlock(&BTRFS_I(inode)->lock);
* have pending delalloc work to be done.
*/
static void btrfs_set_bit_hook(struct inode *inode,
- struct extent_state *state, unsigned long *bits)
+ struct extent_state *state, unsigned *bits)
{
if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC))
spin_unlock(&BTRFS_I(inode)->lock);
}
+ /* For sanity tests */
+ if (btrfs_test_is_dummy_root(root))
+ return;
+
__percpu_counter_add(&root->fs_info->delalloc_bytes, len,
root->fs_info->delalloc_batch);
spin_lock(&BTRFS_I(inode)->lock);
*/
static void btrfs_clear_bit_hook(struct inode *inode,
struct extent_state *state,
- unsigned long *bits)
+ unsigned *bits)
{
u64 len = state->end + 1 - state->start;
+ u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1,
+ BTRFS_MAX_EXTENT_SIZE);
spin_lock(&BTRFS_I(inode)->lock);
if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
*bits &= ~EXTENT_FIRST_DELALLOC;
} else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents--;
+ BTRFS_I(inode)->outstanding_extents -= num_extents;
spin_unlock(&BTRFS_I(inode)->lock);
}
root != root->fs_info->tree_root)
btrfs_delalloc_release_metadata(inode, len);
+ /* For sanity tests. */
+ if (btrfs_test_is_dummy_root(root))
+ return;
+
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
&& do_list && !(state->state & EXTENT_NORESERVE))
btrfs_free_reserved_data_space(inode, len);
return 0;
zeroit:
if (__ratelimit(&_rs))
- btrfs_info(BTRFS_I(inode)->root->fs_info,
+ btrfs_warn(BTRFS_I(inode)->root->fs_info,
"csum failed ino %llu off %llu csum %u expected csum %u",
btrfs_ino(inode), start, csum, csum_expected);
memset(kaddr + pgoff, 1, len);
out:
if (ret)
- btrfs_crit(root->fs_info,
+ btrfs_err(root->fs_info,
"could not do orphan cleanup %d", ret);
btrfs_free_path(path);
return ret;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_inode_item *inode_item;
- struct btrfs_timespec *tspec;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key location;
unsigned long ptr;
i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
- tspec = btrfs_inode_atime(inode_item);
- inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
- inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
+ inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
+ inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
- tspec = btrfs_inode_mtime(inode_item);
- inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
- inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
+ inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
+ inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
- tspec = btrfs_inode_ctime(inode_item);
- inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
- inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
+ inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
+ inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
+
+ BTRFS_I(inode)->i_otime.tv_sec =
+ btrfs_timespec_sec(leaf, &inode_item->otime);
+ BTRFS_I(inode)->i_otime.tv_nsec =
+ btrfs_timespec_nsec(leaf, &inode_item->otime);
inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
- btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
+ btrfs_set_token_timespec_sec(leaf, &item->atime,
inode->i_atime.tv_sec, &token);
- btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
+ btrfs_set_token_timespec_nsec(leaf, &item->atime,
inode->i_atime.tv_nsec, &token);
- btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
+ btrfs_set_token_timespec_sec(leaf, &item->mtime,
inode->i_mtime.tv_sec, &token);
- btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
+ btrfs_set_token_timespec_nsec(leaf, &item->mtime,
inode->i_mtime.tv_nsec, &token);
- btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
+ btrfs_set_token_timespec_sec(leaf, &item->ctime,
inode->i_ctime.tv_sec, &token);
- btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
+ btrfs_set_token_timespec_nsec(leaf, &item->ctime,
inode->i_ctime.tv_nsec, &token);
+ btrfs_set_token_timespec_sec(leaf, &item->otime,
+ BTRFS_I(inode)->i_otime.tv_sec, &token);
+ btrfs_set_token_timespec_nsec(leaf, &item->otime,
+ BTRFS_I(inode)->i_otime.tv_nsec, &token);
+
btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
&token);
btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
struct btrfs_root *new_root;
struct btrfs_root_ref *ref;
struct extent_buffer *leaf;
+ struct btrfs_key key;
int ret;
int err = 0;
}
err = -ENOENT;
- ret = btrfs_find_item(root->fs_info->tree_root, path,
- BTRFS_I(dir)->root->root_key.objectid,
- location->objectid, BTRFS_ROOT_REF_KEY, NULL);
+ key.objectid = BTRFS_I(dir)->root->root_key.objectid;
+ key.type = BTRFS_ROOT_REF_KEY;
+ key.offset = location->objectid;
+
+ ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, path,
+ 0, 0);
if (ret) {
if (ret < 0)
err = ret;
inode->i_op = &btrfs_dir_ro_inode_operations;
inode->i_fop = &simple_dir_operations;
inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ inode->i_mtime = CURRENT_TIME;
+ inode->i_atime = inode->i_mtime;
+ inode->i_ctime = inode->i_mtime;
+ BTRFS_I(inode)->i_otime = inode->i_mtime;
return inode;
}
inode_init_owner(inode, dir, mode);
inode_set_bytes(inode, 0);
- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+
+ inode->i_mtime = CURRENT_TIME;
+ inode->i_atime = inode->i_mtime;
+ inode->i_ctime = inode->i_mtime;
+ BTRFS_I(inode)->i_otime = inode->i_mtime;
+
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item,
u64 start = iblock << inode->i_blkbits;
u64 lockstart, lockend;
u64 len = bh_result->b_size;
+ u64 *outstanding_extents = NULL;
int unlock_bits = EXTENT_LOCKED;
int ret = 0;
if (create)
- unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
+ unlock_bits |= EXTENT_DIRTY;
else
len = min_t(u64, len, root->sectorsize);
lockstart = start;
lockend = start + len - 1;
+ if (current->journal_info) {
+ /*
+ * Need to pull our outstanding extents and set journal_info to NULL so
+ * that anything that needs to check if there's a transction doesn't get
+ * confused.
+ */
+ outstanding_extents = current->journal_info;
+ current->journal_info = NULL;
+ }
+
/*
* If this errors out it's because we couldn't invalidate pagecache for
* this range and we need to fallback to buffered.
((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
em->block_start != EXTENT_MAP_HOLE)) {
int type;
- int ret;
u64 block_start, orig_start, orig_block_len, ram_bytes;
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
if (start + len > i_size_read(inode))
i_size_write(inode, start + len);
- spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents++;
- spin_unlock(&BTRFS_I(inode)->lock);
+ /*
+ * If we have an outstanding_extents count still set then we're
+ * within our reservation, otherwise we need to adjust our inode
+ * counter appropriately.
+ */
+ if (*outstanding_extents) {
+ (*outstanding_extents)--;
+ } else {
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents++;
+ spin_unlock(&BTRFS_I(inode)->lock);
+ }
- ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
- lockstart + len - 1, EXTENT_DELALLOC, NULL,
- &cached_state, GFP_NOFS);
- BUG_ON(ret);
+ current->journal_info = outstanding_extents;
+ btrfs_free_reserved_data_space(inode, len);
}
/*
unlock_err:
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
unlock_bits, 1, 0, &cached_state, GFP_NOFS);
+ if (outstanding_extents)
+ current->journal_info = outstanding_extents;
return ret;
}
}
/* async crcs make it difficult to collect full stripe writes. */
- if (btrfs_get_alloc_profile(root, 1) &
- (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))
+ if (btrfs_get_alloc_profile(root, 1) & BTRFS_BLOCK_GROUP_RAID56_MASK)
async_submit = 0;
else
async_submit = 1;
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
+ u64 outstanding_extents = 0;
size_t count = 0;
int flags = 0;
bool wakeup = true;
ret = btrfs_delalloc_reserve_space(inode, count);
if (ret)
goto out;
+ outstanding_extents = div64_u64(count +
+ BTRFS_MAX_EXTENT_SIZE - 1,
+ BTRFS_MAX_EXTENT_SIZE);
+
+ /*
+ * We need to know how many extents we reserved so that we can
+ * do the accounting properly if we go over the number we
+ * originally calculated. Abuse current->journal_info for this.
+ */
+ current->journal_info = &outstanding_extents;
} else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
&BTRFS_I(inode)->runtime_flags)) {
inode_dio_done(inode);
iter, offset, btrfs_get_blocks_direct, NULL,
btrfs_submit_direct, flags);
if (rw & WRITE) {
+ current->journal_info = NULL;
if (ret < 0 && ret != -EIOCBQUEUED)
btrfs_delalloc_release_space(inode, count);
else if (ret >= 0 && (size_t)ret < count)
btrfs_delalloc_release_space(inode,
count - (size_t)ret);
- else
- btrfs_delalloc_release_metadata(inode, 0);
}
out:
if (wakeup)
ei->delayed_node = NULL;
+ ei->i_otime.tv_sec = 0;
+ ei->i_otime.tv_nsec = 0;
+
inode = &ei->vfs_inode;
extent_map_tree_init(&ei->extent_tree);
extent_io_tree_init(&ei->io_tree, &inode->i_data);
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/writeback.h>
- #include <linux/aio.h>
#include <linux/falloc.h>
#include "super.h"
err = ceph_mdsc_do_request(mdsc,
(flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
req);
+ err = ceph_handle_snapdir(req, dentry, err);
if (err)
goto out_req;
- err = ceph_handle_snapdir(req, dentry, err);
if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
err = ceph_handle_notrace_create(dir, dentry);
}
if (err)
goto out_req;
- if (dn || dentry->d_inode == NULL || S_ISLNK(dentry->d_inode->i_mode)) {
+ if (dn || dentry->d_inode == NULL || d_is_symlink(dentry)) {
/* make vfs retry on splice, ENOENT, or symlink */
dout("atomic_open finish_no_open on dn %p\n", dn);
err = finish_no_open(file, dn);
if (ret >= 0) {
int didpages;
if (was_short && (pos + ret < inode->i_size)) {
- u64 tmp = min(this_len - ret,
- inode->i_size - pos - ret);
+ int zlen = min(this_len - ret,
+ inode->i_size - pos - ret);
+ int zoff = (o_direct ? buf_align : io_align) +
+ read + ret;
dout(" zero gap %llu to %llu\n",
- pos + ret, pos + ret + tmp);
- ceph_zero_page_vector_range(page_align + read + ret,
- tmp, pages);
- ret += tmp;
+ pos + ret, pos + ret + zlen);
+ ceph_zero_page_vector_range(zoff, zlen, pages);
+ ret += zlen;
}
didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
{
struct file *filp = iocb->ki_filp;
struct ceph_file_info *fi = filp->private_data;
- size_t len = iocb->ki_nbytes;
+ size_t len = iov_iter_count(to);
struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
struct page *pinned_page = NULL;
i_size = i_size_read(inode);
if (retry_op == READ_INLINE) {
- /* does not support inline data > PAGE_SIZE */
- if (i_size > PAGE_CACHE_SIZE) {
- ret = -EIO;
- } else if (iocb->ki_pos < i_size) {
+ BUG_ON(ret > 0 || read > 0);
+ if (iocb->ki_pos < i_size &&
+ iocb->ki_pos < PAGE_CACHE_SIZE) {
loff_t end = min_t(loff_t, i_size,
iocb->ki_pos + len);
+ end = min_t(loff_t, end, PAGE_CACHE_SIZE);
if (statret < end)
zero_user_segment(page, statret, end);
ret = copy_page_to_iter(page,
iocb->ki_pos & ~PAGE_MASK,
end - iocb->ki_pos, to);
iocb->ki_pos += ret;
- } else {
- ret = 0;
+ read += ret;
+ }
+ if (iocb->ki_pos < i_size && read < len) {
+ size_t zlen = min_t(size_t, len - read,
+ i_size - iocb->ki_pos);
+ ret = iov_iter_zero(zlen, to);
+ iocb->ki_pos += ret;
+ read += ret;
}
__free_pages(page, 0);
- return ret;
+ return read;
}
/* hit EOF or hole? */
if (retry_op == CHECK_EOF && iocb->ki_pos < i_size &&
- ret < len) {
+ ret < len) {
dout("sync_read hit hole, ppos %lld < size %lld"
", reading more\n", iocb->ki_pos,
inode->i_size);
#include <linux/security.h>
#include <linux/compat.h>
#include <linux/fs_stack.h>
- #include <linux/aio.h>
#include "ecryptfs_kernel.h"
/**
struct file *file = iocb->ki_filp;
rc = generic_file_read_iter(iocb, to);
- /*
- * Even though this is a async interface, we need to wait
- * for IO to finish to update atime
- */
- if (-EIOCBQUEUED == rc)
- rc = wait_on_sync_kiocb(iocb);
if (rc >= 0) {
path = ecryptfs_dentry_to_lower_path(file->f_path.dentry);
touch_atime(path);
}
ecryptfs_set_file_lower(
file, ecryptfs_inode_to_private(inode)->lower_file);
- if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
+ if (d_is_dir(ecryptfs_dentry)) {
ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
mutex_lock(&crypt_stat->cs_mutex);
crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
struct file *lower_file = ecryptfs_file_to_lower(file);
long rc = -ENOTTY;
- if (lower_file->f_op->unlocked_ioctl)
+ if (!lower_file->f_op->unlocked_ioctl)
+ return rc;
+
+ switch (cmd) {
+ case FITRIM:
+ case FS_IOC_GETFLAGS:
+ case FS_IOC_SETFLAGS:
+ case FS_IOC_GETVERSION:
+ case FS_IOC_SETVERSION:
rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
- return rc;
+ fsstack_copy_attr_all(file_inode(file), file_inode(lower_file));
+
+ return rc;
+ default:
+ return rc;
+ }
}
#ifdef CONFIG_COMPAT
struct file *lower_file = ecryptfs_file_to_lower(file);
long rc = -ENOIOCTLCMD;
- if (lower_file->f_op->compat_ioctl)
+ if (!lower_file->f_op->compat_ioctl)
+ return rc;
+
+ switch (cmd) {
+ case FITRIM:
+ case FS_IOC32_GETFLAGS:
+ case FS_IOC32_SETFLAGS:
+ case FS_IOC32_GETVERSION:
+ case FS_IOC32_SETVERSION:
rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
- return rc;
+ fsstack_copy_attr_all(file_inode(file), file_inode(lower_file));
+
+ return rc;
+ default:
+ return rc;
+ }
}
#endif
* (sct@redhat.com), 1993, 1998
*/
- #include <linux/aio.h>
#include "ext4_jbd2.h"
#include "truncate.h"
+ #include <linux/uio.h>
#include <trace/events/ext4.h>
* to free. Everything was covered by the start
* of the range.
*/
- return 0;
- } else {
- /* Shared branch grows from an indirect block */
- partial2--;
+ goto do_indirects;
}
} else {
/*
/* Punch happened within the same level (n == n2) */
partial = ext4_find_shared(inode, n, offsets, chain, &nr);
partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
- /*
- * ext4_find_shared returns Indirect structure which
- * points to the last element which should not be
- * removed by truncate. But this is end of the range
- * in punch_hole so we need to point to the next element
- */
- partial2->p++;
- while ((partial > chain) || (partial2 > chain2)) {
- /* We're at the same block, so we're almost finished */
- if ((partial->bh && partial2->bh) &&
- (partial->bh->b_blocknr == partial2->bh->b_blocknr)) {
- if ((partial > chain) && (partial2 > chain2)) {
+
+ /* Free top, but only if partial2 isn't its subtree. */
+ if (nr) {
+ int level = min(partial - chain, partial2 - chain2);
+ int i;
+ int subtree = 1;
+
+ for (i = 0; i <= level; i++) {
+ if (offsets[i] != offsets2[i]) {
+ subtree = 0;
+ break;
+ }
+ }
+
+ if (!subtree) {
+ if (partial == chain) {
+ /* Shared branch grows from the inode */
+ ext4_free_branches(handle, inode, NULL,
+ &nr, &nr+1,
+ (chain+n-1) - partial);
+ *partial->p = 0;
+ } else {
+ /* Shared branch grows from an indirect block */
+ BUFFER_TRACE(partial->bh, "get_write_access");
ext4_free_branches(handle, inode, partial->bh,
- partial->p + 1,
- partial2->p,
+ partial->p,
+ partial->p+1,
(chain+n-1) - partial);
- BUFFER_TRACE(partial->bh, "call brelse");
- brelse(partial->bh);
- BUFFER_TRACE(partial2->bh, "call brelse");
- brelse(partial2->bh);
}
- return 0;
}
+ }
+
+ if (!nr2) {
/*
- * Clear the ends of indirect blocks on the shared branch
- * at the start of the range
+ * ext4_find_shared returns Indirect structure which
+ * points to the last element which should not be
+ * removed by truncate. But this is end of the range
+ * in punch_hole so we need to point to the next element
*/
- if (partial > chain) {
+ partial2->p++;
+ }
+
+ while (partial > chain || partial2 > chain2) {
+ int depth = (chain+n-1) - partial;
+ int depth2 = (chain2+n2-1) - partial2;
+
+ if (partial > chain && partial2 > chain2 &&
+ partial->bh->b_blocknr == partial2->bh->b_blocknr) {
+ /*
+ * We've converged on the same block. Clear the range,
+ * then we're done.
+ */
ext4_free_branches(handle, inode, partial->bh,
- partial->p + 1,
- (__le32 *)partial->bh->b_data+addr_per_block,
- (chain+n-1) - partial);
+ partial->p + 1,
+ partial2->p,
+ (chain+n-1) - partial);
BUFFER_TRACE(partial->bh, "call brelse");
brelse(partial->bh);
- partial--;
+ BUFFER_TRACE(partial2->bh, "call brelse");
+ brelse(partial2->bh);
+ return 0;
}
+
/*
- * Clear the ends of indirect blocks on the shared branch
- * at the end of the range
+ * The start and end partial branches may not be at the same
+ * level even though the punch happened within one level. So, we
+ * give them a chance to arrive at the same level, then walk
+ * them in step with each other until we converge on the same
+ * block.
*/
- if (partial2 > chain2) {
+ if (partial > chain && depth <= depth2) {
+ ext4_free_branches(handle, inode, partial->bh,
+ partial->p + 1,
+ (__le32 *)partial->bh->b_data+addr_per_block,
+ (chain+n-1) - partial);
+ BUFFER_TRACE(partial->bh, "call brelse");
+ brelse(partial->bh);
+ partial--;
+ }
+ if (partial2 > chain2 && depth2 <= depth) {
ext4_free_branches(handle, inode, partial2->bh,
(__le32 *)partial2->bh->b_data,
partial2->p,
- (chain2+n-1) - partial2);
+ (chain2+n2-1) - partial2);
BUFFER_TRACE(partial2->bh, "call brelse");
brelse(partial2->bh);
partial2--;
}
}
+ return 0;
do_indirects:
/* Kill the remaining (whole) subtrees */
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
- #include <linux/aio.h>
#include <linux/bitops.h>
#include "ext4_jbd2.h"
{
handle_t *handle = ext4_journal_current_handle();
struct inode *inode = mapping->host;
+ loff_t old_size = inode->i_size;
int ret = 0, ret2;
int i_size_changed = 0;
unlock_page(page);
page_cache_release(page);
+ if (old_size < pos)
+ pagecache_isize_extended(inode, old_size, pos);
/*
* Don't mark the inode dirty under page lock. First, it unnecessarily
* makes the holding time of page lock longer. Second, it forces lock
{
handle_t *handle = ext4_journal_current_handle();
struct inode *inode = mapping->host;
+ loff_t old_size = inode->i_size;
int ret = 0, ret2;
int partial = 0;
unsigned from, to;
unlock_page(page);
page_cache_release(page);
+ if (old_size < pos)
+ pagecache_isize_extended(inode, old_size, pos);
+
if (size_changed) {
ret2 = ext4_mark_inode_dirty(handle, inode);
if (!ret)
#include <linux/pipe_fs_i.h>
#include <linux/swap.h>
#include <linux/splice.h>
- #include <linux/aio.h>
MODULE_ALIAS_MISCDEV(FUSE_MINOR);
MODULE_ALIAS("devname:fuse");
newpage = buf->page;
- if (WARN_ON(!PageUptodate(newpage)))
- return -EIO;
+ if (!PageUptodate(newpage))
+ SetPageUptodate(newpage);
ClearPageMappedToDisk(newpage);
return err;
}
+static int fuse_dev_open(struct inode *inode, struct file *file)
+{
+ /*
+ * The fuse device's file's private_data is used to hold
+ * the fuse_conn(ection) when it is mounted, and is used to
+ * keep track of whether the file has been mounted already.
+ */
+ file->private_data = NULL;
+ return 0;
+}
+
static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
unsigned int size, struct fuse_copy_state *cs)
{
+ /* Don't try to move pages (yet) */
+ cs->move_pages = 0;
+
switch (code) {
case FUSE_NOTIFY_POLL:
return fuse_notify_poll(fc, size, cs);
const struct file_operations fuse_dev_operations = {
.owner = THIS_MODULE,
+ .open = fuse_dev_open,
.llseek = no_llseek,
.read = do_sync_read,
.aio_read = fuse_dev_read,
return -EINVAL;
#else
- VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
+ VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
if (rw == READ)
return nfs_file_direct_read(iocb, iter, pos);
void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
struct nfs_direct_req *dreq)
{
- cinfo->lock = &dreq->lock;
+ cinfo->lock = &dreq->inode->i_lock;
cinfo->mds = &dreq->mds_cinfo;
cinfo->ds = &dreq->ds_cinfo;
cinfo->dreq = dreq;
long res = (long) dreq->error;
if (!res)
res = (long) dreq->count;
- aio_complete(dreq->iocb, res, 0);
+ dreq->iocb->ki_complete(dreq->iocb, res, 0);
}
complete_all(&dreq->completion);
#include <linux/nfs_mount.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
- #include <linux/aio.h>
#include <linux/gfp.h>
#include <linux/swap.h>
iocb->ki_filp,
iov_iter_count(to), (unsigned long) iocb->ki_pos);
- result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
+ result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping);
if (!result) {
result = generic_file_read_iter(iocb, to);
if (result > 0)
dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
filp, (unsigned long) count, (unsigned long long) *ppos);
- res = nfs_revalidate_mapping(inode, filp->f_mapping);
+ res = nfs_revalidate_mapping_protected(inode, filp->f_mapping);
if (!res) {
res = generic_file_splice_read(filp, ppos, pipe, count, flags);
if (res > 0)
nfs_wait_bit_killable, TASK_KILLABLE);
if (ret)
return ret;
+ /*
+ * Wait for O_DIRECT to complete
+ */
+ nfs_inode_dio_wait(mapping->host);
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
/* make sure the cache has finished storing the page */
nfs_fscache_wait_on_page_write(NFS_I(inode), page);
+ wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
+
lock_page(page);
mapping = page_file_mapping(page);
if (mapping != inode->i_mapping)
/*
* file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc.
+ * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc.
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
#include <linux/swap.h>
#include <linux/uio.h>
#include <linux/writeback.h>
- #include <linux/aio.h>
#include <asm/page.h>
#include <asm/uaccess.h>
return err;
}
-/**
- * ntfs_fault_in_pages_readable -
- *
- * Fault a number of userspace pages into pagetables.
- *
- * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes
- * with more than two userspace pages as well as handling the single page case
- * elegantly.
- *
- * If you find this difficult to understand, then think of the while loop being
- * the following code, except that we do without the integer variable ret:
- *
- * do {
- * ret = __get_user(c, uaddr);
- * uaddr += PAGE_SIZE;
- * } while (!ret && uaddr < end);
- *
- * Note, the final __get_user() may well run out-of-bounds of the user buffer,
- * but _not_ out-of-bounds of the page the user buffer belongs to, and since
- * this is only a read and not a write, and since it is still in the same page,
- * it should not matter and this makes the code much simpler.
- */
-static inline void ntfs_fault_in_pages_readable(const char __user *uaddr,
- int bytes)
+static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos,
+ size_t *count)
{
- const char __user *end;
- volatile char c;
-
- /* Set @end to the first byte outside the last page we care about. */
- end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes);
-
- while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end))
- ;
-}
-
-/**
- * ntfs_fault_in_pages_readable_iovec -
- *
- * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs.
- */
-static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
- size_t iov_ofs, int bytes)
-{
- do {
- const char __user *buf;
- unsigned len;
+ loff_t pos;
+ s64 end, ll;
+ ssize_t err;
+ unsigned long flags;
+ struct inode *vi = file_inode(file);
+ ntfs_inode *base_ni, *ni = NTFS_I(vi);
+ ntfs_volume *vol = ni->vol;
- buf = iov->iov_base + iov_ofs;
- len = iov->iov_len - iov_ofs;
- if (len > bytes)
- len = bytes;
- ntfs_fault_in_pages_readable(buf, len);
- bytes -= len;
- iov++;
- iov_ofs = 0;
- } while (bytes);
+ ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
+ "0x%llx, count 0x%lx.", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type),
+ (unsigned long long)*ppos, (unsigned long)*count);
+ /* We can write back this queue in page reclaim. */
+ current->backing_dev_info = inode_to_bdi(vi);
+ err = generic_write_checks(file, ppos, count, S_ISBLK(vi->i_mode));
+ if (unlikely(err))
+ goto out;
+ /*
+ * All checks have passed. Before we start doing any writing we want
+ * to abort any totally illegal writes.
+ */
+ BUG_ON(NInoMstProtected(ni));
+ BUG_ON(ni->type != AT_DATA);
+ /* If file is encrypted, deny access, just like NT4. */
+ if (NInoEncrypted(ni)) {
+ /* Only $DATA attributes can be encrypted. */
+ /*
+ * Reminder for later: Encrypted files are _always_
+ * non-resident so that the content can always be encrypted.
+ */
+ ntfs_debug("Denying write access to encrypted file.");
+ err = -EACCES;
+ goto out;
+ }
+ if (NInoCompressed(ni)) {
+ /* Only unnamed $DATA attribute can be compressed. */
+ BUG_ON(ni->name_len);
+ /*
+ * Reminder for later: If resident, the data is not actually
+ * compressed. Only on the switch to non-resident does
+ * compression kick in. This is in contrast to encrypted files
+ * (see above).
+ */
+ ntfs_error(vi->i_sb, "Writing to compressed files is not "
+ "implemented yet. Sorry.");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ if (*count == 0)
+ goto out;
+ base_ni = ni;
+ if (NInoAttr(ni))
+ base_ni = ni->ext.base_ntfs_ino;
+ err = file_remove_suid(file);
+ if (unlikely(err))
+ goto out;
+ /*
+ * Our ->update_time method always succeeds thus file_update_time()
+ * cannot fail either so there is no need to check the return code.
+ */
+ file_update_time(file);
+ pos = *ppos;
+ /* The first byte after the last cluster being written to. */
+ end = (pos + *count + vol->cluster_size_mask) &
+ ~(u64)vol->cluster_size_mask;
+ /*
+ * If the write goes beyond the allocated size, extend the allocation
+ * to cover the whole of the write, rounded up to the nearest cluster.
+ */
+ read_lock_irqsave(&ni->size_lock, flags);
+ ll = ni->allocated_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ if (end > ll) {
+ /*
+ * Extend the allocation without changing the data size.
+ *
+ * Note we ensure the allocation is big enough to at least
+ * write some data but we do not require the allocation to be
+ * complete, i.e. it may be partial.
+ */
+ ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
+ if (likely(ll >= 0)) {
+ BUG_ON(pos >= ll);
+ /* If the extension was partial truncate the write. */
+ if (end > ll) {
+ ntfs_debug("Truncating write to inode 0x%lx, "
+ "attribute type 0x%x, because "
+ "the allocation was only "
+ "partially extended.",
+ vi->i_ino, (unsigned)
+ le32_to_cpu(ni->type));
+ *count = ll - pos;
+ }
+ } else {
+ err = ll;
+ read_lock_irqsave(&ni->size_lock, flags);
+ ll = ni->allocated_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ /* Perform a partial write if possible or fail. */
+ if (pos < ll) {
+ ntfs_debug("Truncating write to inode 0x%lx "
+ "attribute type 0x%x, because "
+ "extending the allocation "
+ "failed (error %d).",
+ vi->i_ino, (unsigned)
+ le32_to_cpu(ni->type),
+ (int)-err);
+ *count = ll - pos;
+ } else {
+ if (err != -ENOSPC)
+ ntfs_error(vi->i_sb, "Cannot perform "
+ "write to inode "
+ "0x%lx, attribute "
+ "type 0x%x, because "
+ "extending the "
+ "allocation failed "
+ "(error %ld).",
+ vi->i_ino, (unsigned)
+ le32_to_cpu(ni->type),
+ (long)-err);
+ else
+ ntfs_debug("Cannot perform write to "
+ "inode 0x%lx, "
+ "attribute type 0x%x, "
+ "because there is not "
+ "space left.",
+ vi->i_ino, (unsigned)
+ le32_to_cpu(ni->type));
+ goto out;
+ }
+ }
+ }
+ /*
+ * If the write starts beyond the initialized size, extend it up to the
+ * beginning of the write and initialize all non-sparse space between
+ * the old initialized size and the new one. This automatically also
+ * increments the vfs inode->i_size to keep it above or equal to the
+ * initialized_size.
+ */
+ read_lock_irqsave(&ni->size_lock, flags);
+ ll = ni->initialized_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ if (pos > ll) {
+ /*
+ * Wait for ongoing direct i/o to complete before proceeding.
+ * New direct i/o cannot start as we hold i_mutex.
+ */
+ inode_dio_wait(vi);
+ err = ntfs_attr_extend_initialized(ni, pos);
+ if (unlikely(err < 0))
+ ntfs_error(vi->i_sb, "Cannot perform write to inode "
+ "0x%lx, attribute type 0x%x, because "
+ "extending the initialized size "
+ "failed (error %d).", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type),
+ (int)-err);
+ }
+out:
+ return err;
}
/**
goto err_out;
}
}
- err = add_to_page_cache_lru(*cached_page, mapping, index,
- GFP_KERNEL);
+ err = add_to_page_cache_lru(*cached_page, mapping,
+ index, GFP_KERNEL);
if (unlikely(err)) {
if (err == -EEXIST)
continue;
return err;
}
-/*
- * Copy as much as we can into the pages and return the number of bytes which
- * were successfully copied. If a fault is encountered then clear the pages
- * out to (ofs + bytes) and return the number of bytes which were copied.
- */
-static inline size_t ntfs_copy_from_user(struct page **pages,
- unsigned nr_pages, unsigned ofs, const char __user *buf,
- size_t bytes)
-{
- struct page **last_page = pages + nr_pages;
- char *addr;
- size_t total = 0;
- unsigned len;
- int left;
-
- do {
- len = PAGE_CACHE_SIZE - ofs;
- if (len > bytes)
- len = bytes;
- addr = kmap_atomic(*pages);
- left = __copy_from_user_inatomic(addr + ofs, buf, len);
- kunmap_atomic(addr);
- if (unlikely(left)) {
- /* Do it the slow way. */
- addr = kmap(*pages);
- left = __copy_from_user(addr + ofs, buf, len);
- kunmap(*pages);
- if (unlikely(left))
- goto err_out;
- }
- total += len;
- bytes -= len;
- if (!bytes)
- break;
- buf += len;
- ofs = 0;
- } while (++pages < last_page);
-out:
- return total;
-err_out:
- total += len - left;
- /* Zero the rest of the target like __copy_from_user(). */
- while (++pages < last_page) {
- bytes -= len;
- if (!bytes)
- break;
- len = PAGE_CACHE_SIZE;
- if (len > bytes)
- len = bytes;
- zero_user(*pages, 0, len);
- }
- goto out;
-}
-
-static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr,
- const struct iovec *iov, size_t iov_ofs, size_t bytes)
-{
- size_t total = 0;
-
- while (1) {
- const char __user *buf = iov->iov_base + iov_ofs;
- unsigned len;
- size_t left;
-
- len = iov->iov_len - iov_ofs;
- if (len > bytes)
- len = bytes;
- left = __copy_from_user_inatomic(vaddr, buf, len);
- total += len;
- bytes -= len;
- vaddr += len;
- if (unlikely(left)) {
- total -= left;
- break;
- }
- if (!bytes)
- break;
- iov++;
- iov_ofs = 0;
- }
- return total;
-}
-
-static inline void ntfs_set_next_iovec(const struct iovec **iovp,
- size_t *iov_ofsp, size_t bytes)
-{
- const struct iovec *iov = *iovp;
- size_t iov_ofs = *iov_ofsp;
-
- while (bytes) {
- unsigned len;
-
- len = iov->iov_len - iov_ofs;
- if (len > bytes)
- len = bytes;
- bytes -= len;
- iov_ofs += len;
- if (iov->iov_len == iov_ofs) {
- iov++;
- iov_ofs = 0;
- }
- }
- *iovp = iov;
- *iov_ofsp = iov_ofs;
-}
-
-/*
- * This has the same side-effects and return value as ntfs_copy_from_user().
- * The difference is that on a fault we need to memset the remainder of the
- * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s
- * single-segment behaviour.
- *
- * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when
- * atomic and when not atomic. This is ok because it calls
- * __copy_from_user_inatomic() and it is ok to call this when non-atomic. In
- * fact, the only difference between __copy_from_user_inatomic() and
- * __copy_from_user() is that the latter calls might_sleep() and the former
- * should not zero the tail of the buffer on error. And on many architectures
- * __copy_from_user_inatomic() is just defined to __copy_from_user() so it
- * makes no difference at all on those architectures.
- */
-static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
- unsigned nr_pages, unsigned ofs, const struct iovec **iov,
- size_t *iov_ofs, size_t bytes)
-{
- struct page **last_page = pages + nr_pages;
- char *addr;
- size_t copied, len, total = 0;
-
- do {
- len = PAGE_CACHE_SIZE - ofs;
- if (len > bytes)
- len = bytes;
- addr = kmap_atomic(*pages);
- copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
- *iov, *iov_ofs, len);
- kunmap_atomic(addr);
- if (unlikely(copied != len)) {
- /* Do it the slow way. */
- addr = kmap(*pages);
- copied = __ntfs_copy_from_user_iovec_inatomic(addr +
- ofs, *iov, *iov_ofs, len);
- if (unlikely(copied != len))
- goto err_out;
- kunmap(*pages);
- }
- total += len;
- ntfs_set_next_iovec(iov, iov_ofs, len);
- bytes -= len;
- if (!bytes)
- break;
- ofs = 0;
- } while (++pages < last_page);
-out:
- return total;
-err_out:
- BUG_ON(copied > len);
- /* Zero the rest of the target like __copy_from_user(). */
- memset(addr + ofs + copied, 0, len - copied);
- kunmap(*pages);
- total += copied;
- ntfs_set_next_iovec(iov, iov_ofs, copied);
- while (++pages < last_page) {
- bytes -= len;
- if (!bytes)
- break;
- len = PAGE_CACHE_SIZE;
- if (len > bytes)
- len = bytes;
- zero_user(*pages, 0, len);
- }
- goto out;
-}
-
static inline void ntfs_flush_dcache_pages(struct page **pages,
unsigned nr_pages)
{
return err;
}
-static void ntfs_write_failed(struct address_space *mapping, loff_t to)
+/*
+ * Copy as much as we can into the pages and return the number of bytes which
+ * were successfully copied. If a fault is encountered then clear the pages
+ * out to (ofs + bytes) and return the number of bytes which were copied.
+ */
+static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages,
+ unsigned ofs, struct iov_iter *i, size_t bytes)
{
- struct inode *inode = mapping->host;
+ struct page **last_page = pages + nr_pages;
+ size_t total = 0;
+ struct iov_iter data = *i;
+ unsigned len, copied;
- if (to > inode->i_size) {
- truncate_pagecache(inode, inode->i_size);
- ntfs_truncate_vfs(inode);
- }
+ do {
+ len = PAGE_CACHE_SIZE - ofs;
+ if (len > bytes)
+ len = bytes;
+ copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs,
+ len);
+ total += copied;
+ bytes -= copied;
+ if (!bytes)
+ break;
+ iov_iter_advance(&data, copied);
+ if (copied < len)
+ goto err;
+ ofs = 0;
+ } while (++pages < last_page);
+out:
+ return total;
+err:
+ /* Zero the rest of the target like __copy_from_user(). */
+ len = PAGE_CACHE_SIZE - copied;
+ do {
+ if (len > bytes)
+ len = bytes;
+ zero_user(*pages, copied, len);
+ bytes -= len;
+ copied = 0;
+ len = PAGE_CACHE_SIZE;
+ } while (++pages < last_page);
+ goto out;
}
/**
- * ntfs_file_buffered_write -
- *
- * Locking: The vfs is holding ->i_mutex on the inode.
+ * ntfs_perform_write - perform buffered write to a file
+ * @file: file to write to
+ * @i: iov_iter with data to write
+ * @pos: byte offset in file at which to begin writing to
*/
-static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
- const struct iovec *iov, unsigned long nr_segs,
- loff_t pos, loff_t *ppos, size_t count)
+static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
+ loff_t pos)
{
- struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *vi = mapping->host;
ntfs_inode *ni = NTFS_I(vi);
ntfs_volume *vol = ni->vol;
struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER];
struct page *cached_page = NULL;
- char __user *buf = NULL;
- s64 end, ll;
VCN last_vcn;
LCN lcn;
- unsigned long flags;
- size_t bytes, iov_ofs = 0; /* Offset in the current iovec. */
- ssize_t status, written;
+ size_t bytes;
+ ssize_t status, written = 0;
unsigned nr_pages;
- int err;
- ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
- "pos 0x%llx, count 0x%lx.",
- vi->i_ino, (unsigned)le32_to_cpu(ni->type),
- (unsigned long long)pos, (unsigned long)count);
- if (unlikely(!count))
- return 0;
- BUG_ON(NInoMstProtected(ni));
- /*
- * If the attribute is not an index root and it is encrypted or
- * compressed, we cannot write to it yet. Note we need to check for
- * AT_INDEX_ALLOCATION since this is the type of both directory and
- * index inodes.
- */
- if (ni->type != AT_INDEX_ALLOCATION) {
- /* If file is encrypted, deny access, just like NT4. */
- if (NInoEncrypted(ni)) {
- /*
- * Reminder for later: Encrypted files are _always_
- * non-resident so that the content can always be
- * encrypted.
- */
- ntfs_debug("Denying write access to encrypted file.");
- return -EACCES;
- }
- if (NInoCompressed(ni)) {
- /* Only unnamed $DATA attribute can be compressed. */
- BUG_ON(ni->type != AT_DATA);
- BUG_ON(ni->name_len);
- /*
- * Reminder for later: If resident, the data is not
- * actually compressed. Only on the switch to non-
- * resident does compression kick in. This is in
- * contrast to encrypted files (see above).
- */
- ntfs_error(vi->i_sb, "Writing to compressed files is "
- "not implemented yet. Sorry.");
- return -EOPNOTSUPP;
- }
- }
+ ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
+ "0x%llx, count 0x%lx.", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type),
+ (unsigned long long)pos,
+ (unsigned long)iov_iter_count(i));
/*
* If a previous ntfs_truncate() failed, repeat it and abort if it
* fails again.
*/
if (unlikely(NInoTruncateFailed(ni))) {
+ int err;
+
inode_dio_wait(vi);
err = ntfs_truncate(vi);
if (err || NInoTruncateFailed(ni)) {
return err;
}
}
- /* The first byte after the write. */
- end = pos + count;
- /*
- * If the write goes beyond the allocated size, extend the allocation
- * to cover the whole of the write, rounded up to the nearest cluster.
- */
- read_lock_irqsave(&ni->size_lock, flags);
- ll = ni->allocated_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- if (end > ll) {
- /* Extend the allocation without changing the data size. */
- ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
- if (likely(ll >= 0)) {
- BUG_ON(pos >= ll);
- /* If the extension was partial truncate the write. */
- if (end > ll) {
- ntfs_debug("Truncating write to inode 0x%lx, "
- "attribute type 0x%x, because "
- "the allocation was only "
- "partially extended.",
- vi->i_ino, (unsigned)
- le32_to_cpu(ni->type));
- end = ll;
- count = ll - pos;
- }
- } else {
- err = ll;
- read_lock_irqsave(&ni->size_lock, flags);
- ll = ni->allocated_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- /* Perform a partial write if possible or fail. */
- if (pos < ll) {
- ntfs_debug("Truncating write to inode 0x%lx, "
- "attribute type 0x%x, because "
- "extending the allocation "
- "failed (error code %i).",
- vi->i_ino, (unsigned)
- le32_to_cpu(ni->type), err);
- end = ll;
- count = ll - pos;
- } else {
- ntfs_error(vol->sb, "Cannot perform write to "
- "inode 0x%lx, attribute type "
- "0x%x, because extending the "
- "allocation failed (error "
- "code %i).", vi->i_ino,
- (unsigned)
- le32_to_cpu(ni->type), err);
- return err;
- }
- }
- }
- written = 0;
- /*
- * If the write starts beyond the initialized size, extend it up to the
- * beginning of the write and initialize all non-sparse space between
- * the old initialized size and the new one. This automatically also
- * increments the vfs inode->i_size to keep it above or equal to the
- * initialized_size.
- */
- read_lock_irqsave(&ni->size_lock, flags);
- ll = ni->initialized_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- if (pos > ll) {
- err = ntfs_attr_extend_initialized(ni, pos);
- if (err < 0) {
- ntfs_error(vol->sb, "Cannot perform write to inode "
- "0x%lx, attribute type 0x%x, because "
- "extending the initialized size "
- "failed (error code %i).", vi->i_ino,
- (unsigned)le32_to_cpu(ni->type), err);
- status = err;
- goto err_out;
- }
- }
/*
* Determine the number of pages per cluster for non-resident
* attributes.
nr_pages = 1;
if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni))
nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT;
- /* Finally, perform the actual write. */
last_vcn = -1;
- if (likely(nr_segs == 1))
- buf = iov->iov_base;
do {
VCN vcn;
pgoff_t idx, start_idx;
vol->cluster_size_bits, false);
up_read(&ni->runlist.lock);
if (unlikely(lcn < LCN_HOLE)) {
- status = -EIO;
if (lcn == LCN_ENOMEM)
status = -ENOMEM;
- else
+ else {
+ status = -EIO;
ntfs_error(vol->sb, "Cannot "
"perform write to "
"inode 0x%lx, "
"is corrupt.",
vi->i_ino, (unsigned)
le32_to_cpu(ni->type));
+ }
break;
}
if (lcn == LCN_HOLE) {
}
}
}
- if (bytes > count)
- bytes = count;
+ if (bytes > iov_iter_count(i))
+ bytes = iov_iter_count(i);
+again:
/*
* Bring in the user page(s) that we will copy from _first_.
* Otherwise there is a nasty deadlock on copying from the same
* pages being swapped out between us bringing them into memory
* and doing the actual copying.
*/
- if (likely(nr_segs == 1))
- ntfs_fault_in_pages_readable(buf, bytes);
- else
- ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes);
+ if (unlikely(iov_iter_fault_in_multipages_readable(i, bytes))) {
+ status = -EFAULT;
+ break;
+ }
/* Get and lock @do_pages starting at index @start_idx. */
status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages,
pages, &cached_page);
status = ntfs_prepare_pages_for_non_resident_write(
pages, do_pages, pos, bytes);
if (unlikely(status)) {
- loff_t i_size;
-
do {
unlock_page(pages[--do_pages]);
page_cache_release(pages[do_pages]);
} while (do_pages);
- /*
- * The write preparation may have instantiated
- * allocated space outside i_size. Trim this
- * off again. We can ignore any errors in this
- * case as we will just be waisting a bit of
- * allocated space, which is not a disaster.
- */
- i_size = i_size_read(vi);
- if (pos + bytes > i_size) {
- ntfs_write_failed(mapping, pos + bytes);
- }
break;
}
}
u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index;
- if (likely(nr_segs == 1)) {
- copied = ntfs_copy_from_user(pages + u, do_pages - u,
- ofs, buf, bytes);
- buf += copied;
- } else
- copied = ntfs_copy_from_user_iovec(pages + u,
- do_pages - u, ofs, &iov, &iov_ofs,
- bytes);
+ copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs,
+ i, bytes);
ntfs_flush_dcache_pages(pages + u, do_pages - u);
- status = ntfs_commit_pages_after_write(pages, do_pages, pos,
- bytes);
- if (likely(!status)) {
- written += copied;
- count -= copied;
- pos += copied;
- if (unlikely(copied != bytes))
- status = -EFAULT;
+ status = 0;
+ if (likely(copied == bytes)) {
+ status = ntfs_commit_pages_after_write(pages, do_pages,
+ pos, bytes);
+ if (!status)
+ status = bytes;
}
do {
unlock_page(pages[--do_pages]);
page_cache_release(pages[do_pages]);
} while (do_pages);
- if (unlikely(status))
+ if (unlikely(status < 0))
break;
- balance_dirty_pages_ratelimited(mapping);
+ copied = status;
cond_resched();
- } while (count);
-err_out:
- *ppos = pos;
+ if (unlikely(!copied)) {
+ size_t sc;
+
+ /*
+ * We failed to copy anything. Fall back to single
+ * segment length write.
+ *
+ * This is needed to avoid possible livelock in the
+ * case that all segments in the iov cannot be copied
+ * at once without a pagefault.
+ */
+ sc = iov_iter_single_seg_count(i);
+ if (bytes > sc)
+ bytes = sc;
+ goto again;
+ }
+ iov_iter_advance(i, copied);
+ pos += copied;
+ written += copied;
+ balance_dirty_pages_ratelimited(mapping);
+ if (fatal_signal_pending(current)) {
+ status = -EINTR;
+ break;
+ }
+ } while (iov_iter_count(i));
if (cached_page)
page_cache_release(cached_page);
ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
}
/**
- * ntfs_file_aio_write_nolock -
+ * ntfs_file_write_iter_nolock - write data to a file
+ * @iocb: IO state structure (file, offset, etc.)
+ * @from: iov_iter with data to write
+ *
+ * Basically the same as __generic_file_write_iter() except that it ends
+ * up calling ntfs_perform_write() instead of generic_perform_write() and that
+ * O_DIRECT is not implemented.
*/
-static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
- const struct iovec *iov, unsigned long nr_segs, loff_t *ppos)
+static ssize_t ntfs_file_write_iter_nolock(struct kiocb *iocb,
+ struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- loff_t pos;
- size_t count; /* after file limit checks */
- ssize_t written, err;
+ loff_t pos = iocb->ki_pos;
+ ssize_t written = 0;
+ ssize_t err;
+ size_t count = iov_iter_count(from);
- count = iov_length(iov, nr_segs);
- pos = *ppos;
- /* We can write back this queue in page reclaim. */
- current->backing_dev_info = inode_to_bdi(inode);
- written = 0;
- err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
- if (err)
- goto out;
- if (!count)
- goto out;
- err = file_remove_suid(file);
- if (err)
- goto out;
- err = file_update_time(file);
- if (err)
- goto out;
- written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos,
- count);
-out:
+ err = ntfs_prepare_file_for_write(file, &pos, &count);
+ if (count && !err) {
+ iov_iter_truncate(from, count);
+ written = ntfs_perform_write(file, from, pos);
+ if (likely(written >= 0))
+ iocb->ki_pos = pos + written;
+ }
current->backing_dev_info = NULL;
return written ? written : err;
}
/**
- * ntfs_file_aio_write -
+ * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock()
+ * @iocb: IO state structure
+ * @from: iov_iter with data to write
+ *
+ * Basically the same as generic_file_write_iter() except that it ends up
+ * calling ntfs_file_write_iter_nolock() instead of
+ * __generic_file_write_iter().
*/
-static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
+ struct inode *vi = file_inode(file);
ssize_t ret;
- BUG_ON(iocb->ki_pos != pos);
-
- mutex_lock(&inode->i_mutex);
- ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
- mutex_unlock(&inode->i_mutex);
+ mutex_lock(&vi->i_mutex);
+ ret = ntfs_file_write_iter_nolock(iocb, from);
+ mutex_unlock(&vi->i_mutex);
if (ret > 0) {
- int err = generic_write_sync(file, iocb->ki_pos - ret, ret);
+ ssize_t err;
+
+ err = generic_write_sync(file, iocb->ki_pos - ret, ret);
if (err < 0)
ret = err;
}
#endif /* NTFS_RW */
const struct file_operations ntfs_file_ops = {
- .llseek = generic_file_llseek, /* Seek inside file. */
- .read = new_sync_read, /* Read from file. */
- .read_iter = generic_file_read_iter, /* Async read from file. */
+ .llseek = generic_file_llseek,
+ .read = new_sync_read,
+ .read_iter = generic_file_read_iter,
#ifdef NTFS_RW
- .write = do_sync_write, /* Write to file. */
- .aio_write = ntfs_file_aio_write, /* Async write to file. */
- /*.release = ,*/ /* Last file is closed. See
- fs/ext2/file.c::
- ext2_release_file() for
- how to use this to discard
- preallocated space for
- write opened files. */
- .fsync = ntfs_file_fsync, /* Sync a file to disk. */
- /*.aio_fsync = ,*/ /* Sync all outstanding async
- i/o operations on a
- kiocb. */
+ .write = new_sync_write,
+ .write_iter = ntfs_file_write_iter,
+ .fsync = ntfs_file_fsync,
#endif /* NTFS_RW */
- /*.ioctl = ,*/ /* Perform function on the
- mounted filesystem. */
- .mmap = generic_file_mmap, /* Mmap file. */
- .open = ntfs_file_open, /* Open file. */
- .splice_read = generic_file_splice_read /* Zero-copy data send with
- the data source being on
- the ntfs partition. We do
- not need to care about the
- data destination. */
- /*.sendpage = ,*/ /* Zero-copy data send with
- the data destination being
- on the ntfs partition. We
- do not need to care about
- the data source. */
+ .mmap = generic_file_mmap,
+ .open = ntfs_file_open,
+ .splice_read = generic_file_splice_read,
};
const struct inode_operations ntfs_file_inode_ops = {
#include "xfs_trace.h"
#include "xfs_log.h"
#include "xfs_icache.h"
+#include "xfs_pnfs.h"
- #include <linux/aio.h>
#include <linux/dcache.h>
#include <linux/falloc.h>
#include <linux/pagevec.h>
xfs_zero_last_block(
struct xfs_inode *ip,
xfs_fsize_t offset,
- xfs_fsize_t isize)
+ xfs_fsize_t isize,
+ bool *did_zeroing)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize);
zero_len = mp->m_sb.sb_blocksize - zero_offset;
if (isize + zero_len > offset)
zero_len = offset - isize;
+ *did_zeroing = true;
return xfs_iozero(ip, isize, zero_len);
}
xfs_zero_eof(
struct xfs_inode *ip,
xfs_off_t offset, /* starting I/O offset */
- xfs_fsize_t isize) /* current inode size */
+ xfs_fsize_t isize, /* current inode size */
+ bool *did_zeroing)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t start_zero_fsb;
* We only zero a part of that block so it is handled specially.
*/
if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
- error = xfs_zero_last_block(ip, offset, isize);
+ error = xfs_zero_last_block(ip, offset, isize, did_zeroing);
if (error)
return error;
}
if (error)
return error;
+ *did_zeroing = true;
start_zero_fsb = imap.br_startoff + imap.br_blockcount;
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
}
if (error)
return error;
+ error = xfs_break_layouts(inode, iolock);
+ if (error)
+ return error;
+
/*
* If the offset is beyond the size of the file, we need to zero any
* blocks that fall between the existing EOF and the start of this
* having to redo all checks before.
*/
if (*pos > i_size_read(inode)) {
+ bool zero = false;
+
if (*iolock == XFS_IOLOCK_SHARED) {
xfs_rw_iunlock(ip, *iolock);
*iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, *iolock);
goto restart;
}
- error = xfs_zero_eof(ip, *pos, i_size_read(inode));
+ error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero);
if (error)
return error;
}
struct xfs_inode *ip = XFS_I(inode);
long error;
enum xfs_prealloc_flags flags = 0;
+ uint iolock = XFS_IOLOCK_EXCL;
loff_t new_size = 0;
if (!S_ISREG(inode->i_mode))
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
return -EOPNOTSUPP;
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
+ xfs_ilock(ip, iolock);
+ error = xfs_break_layouts(inode, &iolock);
+ if (error)
+ goto out_unlock;
+
if (mode & FALLOC_FL_PUNCH_HOLE) {
error = xfs_free_file_space(ip, offset, len);
if (error)
}
out_unlock:
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ xfs_iunlock(ip, iolock);
return error;
}
struct address_space;
struct writeback_control;
+ #define IOCB_EVENTFD (1 << 0)
+
+ struct kiocb {
+ struct file *ki_filp;
+ loff_t ki_pos;
+ void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
+ void *private;
+ int ki_flags;
+ };
+
+ static inline bool is_sync_kiocb(struct kiocb *kiocb)
+ {
+ return kiocb->ki_complete == NULL;
+ }
+
+ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
+ {
+ *kiocb = (struct kiocb) {
+ .ki_filp = filp,
+ };
+ }
+
/*
* "descriptor" for what we're up to with a read.
* This allows us to use the same read code yet
struct list_head flc_flock;
struct list_head flc_posix;
struct list_head flc_lease;
- int flc_flock_cnt;
- int flc_posix_cnt;
- int flc_lease_cnt;
};
/* The following constant reflects the upper bound of the file/locking space */
const __user char *uptr; /* original userland pointer */
struct audit_names *aname;
int refcnt;
- bool separate; /* should "name" be freed? */
+ const char iname[];
};
extern long vfs_truncate(struct path *, loff_t);
#include <linux/security.h>
#include <linux/bootmem.h>
#include <linux/memblock.h>
- #include <linux/aio.h>
#include <linux/syscalls.h>
#include <linux/kexec.h>
#include <linux/kdb.h>
#include <linux/irq_work.h>
#include <linux/utsname.h>
#include <linux/ctype.h>
+ #include <linux/uio.h>
#include <asm/uaccess.h>
int i;
int level = default_message_loglevel;
int facility = 1; /* LOG_USER */
- size_t len = iocb->ki_nbytes;
+ size_t len = iov_iter_count(from);
ssize_t ret = len;
if (len > LOG_LINE_MAX)
#ifdef CONFIG_KGDB_KDB
if (unlikely(kdb_trap_printk)) {
- r = vkdb_printf(fmt, args);
+ r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
return r;
}
#endif
for (i = 0, c = console_cmdline;
i < MAX_CMDLINECONSOLES && c->name[0];
i++, c++) {
+ BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name));
if (strcmp(c->name, newcon->name) != 0)
continue;
if (newcon->index >= 0 &&
#include <linux/mm.h>
#include <linux/export.h>
#include <linux/swap.h>
- #include <linux/aio.h>
+ #include <linux/uio.h>
static struct vfsmount *shm_mnt;
bool shmem_mapping(struct address_space *mapping)
{
+ if (!mapping->host)
+ return false;
+
return mapping->host->i_sb->s_op == &shmem_ops;
}
static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
{
- bool old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
- bool new_is_dir = S_ISDIR(new_dentry->d_inode->i_mode);
+ bool old_is_dir = d_is_dir(old_dentry);
+ bool new_is_dir = d_is_dir(new_dentry);
if (old_dir != new_dir && old_is_dir != new_is_dir) {
if (old_is_dir) {
init_sync_kiocb(&iocb, NULL);
ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) :
__sock_sendmsg(&iocb, sock, msg, size);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&iocb);
+ BUG_ON(ret == -EIOCBQUEUED);
return ret;
}
init_sync_kiocb(&iocb, NULL);
ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&iocb);
+ BUG_ON(ret == -EIOCBQUEUED);
return ret;
}
EXPORT_SYMBOL(sock_recvmsg);
init_sync_kiocb(&iocb, NULL);
ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&iocb);
+ BUG_ON(ret == -EIOCBQUEUED);
return ret;
}
if (iocb->ki_pos != 0)
return -ESPIPE;
- if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */
+ if (!iov_iter_count(to)) /* Match SYS5 behaviour */
return 0;
res = __sock_recvmsg(iocb, sock, &msg,
- iocb->ki_nbytes, msg.msg_flags);
+ iov_iter_count(to), msg.msg_flags);
*to = msg.msg_iter;
return res;
}
if (sock->type == SOCK_SEQPACKET)
msg.msg_flags |= MSG_EOR;
- res = __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes);
+ res = __sock_sendmsg(iocb, sock, &msg, iov_iter_count(from));
*from = msg.msg_iter;
return res;
}
if (len > INT_MAX)
len = INT_MAX;
+ if (unlikely(!access_ok(VERIFY_READ, buff, len)))
+ return -EFAULT;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
if (size > INT_MAX)
size = INT_MAX;
+ if (unlikely(!access_ok(VERIFY_WRITE, ubuf, size)))
+ return -EFAULT;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/pm_qos.h>
- #include <linux/aio.h>
#include <linux/io.h>
#include <linux/dma-mapping.h>
#include <sound/core.h>
#include <sound/pcm_params.h>
#include <sound/timer.h>
#include <sound/minors.h>
+ #include <linux/uio.h>
/*
* Compatibility
if (! snd_pcm_playback_empty(substream)) {
snd_pcm_do_start(substream, SNDRV_PCM_STATE_DRAINING);
snd_pcm_post_start(substream, SNDRV_PCM_STATE_DRAINING);
+ } else {
+ runtime->status->state = SNDRV_PCM_STATE_SETUP;
}
break;
case SNDRV_PCM_STATE_RUNNING: