mm: thp: set the accessed flag for old pages on access fault

[pandora-kernel.git] / drivers / vhost / vhost.c
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c

index ea966b3..ae66278 100644 (file)
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -37,6 +37,8 @@ enum {
         VHOST_MEMORY_F_LOG = 0x1,
  };
  
+static unsigned vhost_zcopy_mask __read_mostly;
+
  #define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num])
  #define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num])
  
@@ -179,6 +181,9 @@ static void vhost_vq_reset(struct vhost_dev *dev,
         vq->call_ctx = NULL;
         vq->call = NULL;
         vq->log_ctx = NULL;
+       vq->upend_idx = 0;
+       vq->done_idx = 0;
+       vq->ubufs = NULL;
  }
  
  static int vhost_worker(void *data)
@@ -217,6 +222,8 @@ static int vhost_worker(void *data)
                 if (work) {
                         __set_current_state(TASK_RUNNING);
                         work->fn(work);
+                       if (need_resched())
+                               schedule();
                 } else
                         schedule();
  
@@ -225,10 +232,28 @@ static int vhost_worker(void *data)
         return 0;
  }
  
+static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
+{
+       kfree(vq->indirect);
+       vq->indirect = NULL;
+       kfree(vq->log);
+       vq->log = NULL;
+       kfree(vq->heads);
+       vq->heads = NULL;
+       kfree(vq->ubuf_info);
+       vq->ubuf_info = NULL;
+}
+
+void vhost_enable_zcopy(int vq)
+{
+       vhost_zcopy_mask |= 0x1 << vq;
+}
+
  /* Helper to allocate iovec buffers for all vqs. */
  static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
  {
         int i;
+       bool zcopy;
  
         for (i = 0; i < dev->nvqs; ++i) {
                 dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect *
@@ -237,19 +262,21 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
                                           GFP_KERNEL);
                 dev->vqs[i].heads = kmalloc(sizeof *dev->vqs[i].heads *
                                             UIO_MAXIOV, GFP_KERNEL);
-
+               zcopy = vhost_zcopy_mask & (0x1 << i);
+               if (zcopy)
+                       dev->vqs[i].ubuf_info =
+                               kmalloc(sizeof *dev->vqs[i].ubuf_info *
+                                       UIO_MAXIOV, GFP_KERNEL);
                 if (!dev->vqs[i].indirect || !dev->vqs[i].log ||
-                       !dev->vqs[i].heads)
+                       !dev->vqs[i].heads ||
+                       (zcopy && !dev->vqs[i].ubuf_info))
                         goto err_nomem;
         }
         return 0;
  
  err_nomem:
-       for (; i >= 0; --i) {
-               kfree(dev->vqs[i].indirect);
-               kfree(dev->vqs[i].log);
-               kfree(dev->vqs[i].heads);
-       }
+       for (; i >= 0; --i)
+               vhost_vq_free_iovecs(&dev->vqs[i]);
         return -ENOMEM;
  }
  
@@ -257,14 +284,8 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev)
  {
         int i;
  
-       for (i = 0; i < dev->nvqs; ++i) {
-               kfree(dev->vqs[i].indirect);
-               dev->vqs[i].indirect = NULL;
-               kfree(dev->vqs[i].log);
-               dev->vqs[i].log = NULL;
-               kfree(dev->vqs[i].heads);
-               dev->vqs[i].heads = NULL;
-       }
+       for (i = 0; i < dev->nvqs; ++i)
+               vhost_vq_free_iovecs(&dev->vqs[i]);
  }
  
  long vhost_dev_init(struct vhost_dev *dev,
@@ -287,6 +308,7 @@ long vhost_dev_init(struct vhost_dev *dev,
                 dev->vqs[i].log = NULL;
                 dev->vqs[i].indirect = NULL;
                 dev->vqs[i].heads = NULL;
+               dev->vqs[i].ubuf_info = NULL;
                 dev->vqs[i].dev = dev;
                 mutex_init(&dev->vqs[i].mutex);
                 vhost_vq_reset(dev, dev->vqs + i);
@@ -390,6 +412,30 @@ long vhost_dev_reset_owner(struct vhost_dev *dev)
         return 0;
  }
  
+/* In case of DMA done not in order in lower device driver for some reason.
+ * upend_idx is used to track end of used idx, done_idx is used to track head
+ * of used idx. Once lower device DMA done contiguously, we will signal KVM
+ * guest used idx.
+ */
+int vhost_zerocopy_signal_used(struct vhost_virtqueue *vq)
+{
+       int i;
+       int j = 0;
+
+       for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) {
+               if ((vq->heads[i].len == VHOST_DMA_DONE_LEN)) {
+                       vq->heads[i].len = VHOST_DMA_CLEAR_LEN;
+                       vhost_add_used_and_signal(vq->dev, vq,
+                                                 vq->heads[i].id, 0);
+                       ++j;
+               } else
+                       break;
+       }
+       if (j)
+               vq->done_idx = i;
+       return j;
+}
+
  /* Caller should have device mutex */
  void vhost_dev_cleanup(struct vhost_dev *dev)
  {
@@ -400,6 +446,13 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
                         vhost_poll_stop(&dev->vqs[i].poll);
                         vhost_poll_flush(&dev->vqs[i].poll);
                 }
+               /* Wait for all lower device DMAs done. */
+               if (dev->vqs[i].ubufs)
+                       vhost_ubuf_put_and_wait(dev->vqs[i].ubufs);
+
+               /* Signal guest as appropriate. */
+               vhost_zerocopy_signal_used(&dev->vqs[i]);
+
                 if (dev->vqs[i].error_ctx)
                         eventfd_ctx_put(dev->vqs[i].error_ctx);
                 if (dev->vqs[i].error)
@@ -578,17 +631,6 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
         return 0;
  }
  
-static int init_used(struct vhost_virtqueue *vq,
-                    struct vring_used __user *used)
-{
-       int r = put_user(vq->used_flags, &used->flags);
-
-       if (r)
-               return r;
-       vq->signalled_used_valid = false;
-       return get_user(vq->last_used_idx, &used->idx);
-}
-
  static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
  {
         struct file *eventfp, *filep = NULL,
@@ -701,10 +743,6 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
                         }
                 }
  
-               r = init_used(vq, (struct vring_used __user *)(unsigned long)
-                             a.used_user_addr);
-               if (r)
-                       break;
                 vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG));
                 vq->desc = (void __user *)(unsigned long)a.desc_user_addr;
                 vq->avail = (void __user *)(unsigned long)a.avail_user_addr;
@@ -959,6 +997,57 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
         return 0;
  }
  
+static int vhost_update_used_flags(struct vhost_virtqueue *vq)
+{
+       void __user *used;
+       if (__put_user(vq->used_flags, &vq->used->flags) < 0)
+               return -EFAULT;
+       if (unlikely(vq->log_used)) {
+               /* Make sure the flag is seen before log. */
+               smp_wmb();
+               /* Log used flag write. */
+               used = &vq->used->flags;
+               log_write(vq->log_base, vq->log_addr +
+                         (used - (void __user *)vq->used),
+                         sizeof vq->used->flags);
+               if (vq->log_ctx)
+                       eventfd_signal(vq->log_ctx, 1);
+       }
+       return 0;
+}
+
+static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event)
+{
+       if (__put_user(vq->avail_idx, vhost_avail_event(vq)))
+               return -EFAULT;
+       if (unlikely(vq->log_used)) {
+               void __user *used;
+               /* Make sure the event is seen before log. */
+               smp_wmb();
+               /* Log avail event write */
+               used = vhost_avail_event(vq);
+               log_write(vq->log_base, vq->log_addr +
+                         (used - (void __user *)vq->used),
+                         sizeof *vhost_avail_event(vq));
+               if (vq->log_ctx)
+                       eventfd_signal(vq->log_ctx, 1);
+       }
+       return 0;
+}
+
+int vhost_init_used(struct vhost_virtqueue *vq)
+{
+       int r;
+       if (!vq->private_data)
+               return 0;
+
+       r = vhost_update_used_flags(vq);
+       if (r)
+               return r;
+       vq->signalled_used_valid = false;
+       return get_user(vq->last_used_idx, &vq->used->idx);
+}
+
  static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len,
                           struct iovec iov[], int iov_size)
  {
@@ -1430,34 +1519,20 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
                 return false;
         vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
         if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
-               r = put_user(vq->used_flags, &vq->used->flags);
+               r = vhost_update_used_flags(vq);
                 if (r) {
                         vq_err(vq, "Failed to enable notification at %p: %d\n",
                                &vq->used->flags, r);
                         return false;
                 }
         } else {
-               r = put_user(vq->avail_idx, vhost_avail_event(vq));
+               r = vhost_update_avail_event(vq, vq->avail_idx);
                 if (r) {
                         vq_err(vq, "Failed to update avail event index at %p: %d\n",
                                vhost_avail_event(vq), r);
                         return false;
                 }
         }
-       if (unlikely(vq->log_used)) {
-               void __user *used;
-               /* Make sure data is seen before log. */
-               smp_wmb();
-               used = vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX) ?
-                       &vq->used->flags : vhost_avail_event(vq);
-               /* Log used flags or event index entry write. Both are 16 bit
-                * fields. */
-               log_write(vq->log_base, vq->log_addr +
-                          (used - (void __user *)vq->used),
-                         sizeof(u16));
-               if (vq->log_ctx)
-                       eventfd_signal(vq->log_ctx, 1);
-       }
         /* They could have slipped one in as we were doing that: make
          * sure it's written, then check again. */
         smp_mb();
@@ -1480,9 +1555,55 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
                 return;
         vq->used_flags |= VRING_USED_F_NO_NOTIFY;
         if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
-               r = put_user(vq->used_flags, &vq->used->flags);
+               r = vhost_update_used_flags(vq);
                 if (r)
                         vq_err(vq, "Failed to enable notification at %p: %d\n",
                                &vq->used->flags, r);
         }
  }
+
+static void vhost_zerocopy_done_signal(struct kref *kref)
+{
+       struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref,
+                                                   kref);
+       wake_up(&ubufs->wait);
+}
+
+struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq,
+                                       bool zcopy)
+{
+       struct vhost_ubuf_ref *ubufs;
+       /* No zero copy backend? Nothing to count. */
+       if (!zcopy)
+               return NULL;
+       ubufs = kmalloc(sizeof *ubufs, GFP_KERNEL);
+       if (!ubufs)
+               return ERR_PTR(-ENOMEM);
+       kref_init(&ubufs->kref);
+       init_waitqueue_head(&ubufs->wait);
+       ubufs->vq = vq;
+       return ubufs;
+}
+
+void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs)
+{
+       kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
+}
+
+void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs)
+{
+       kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
+       wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount));
+       kfree(ubufs);
+}
+
+void vhost_zerocopy_callback(void *arg)
+{
+       struct ubuf_info *ubuf = arg;
+       struct vhost_ubuf_ref *ubufs = ubuf->arg;
+       struct vhost_virtqueue *vq = ubufs->vq;
+
+       /* set len = 1 to mark this desc buffers done DMA */
+       vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
+       kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
+}