9p/trans_virtio: discard zero-length reply
[pandora-kernel.git] / net / 9p / trans_virtio.c
index e317583..a107265 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/inet.h>
 #include <linux/idr.h>
 #include <linux/file.h>
+#include <linux/highmem.h>
 #include <linux/slab.h>
 #include <net/9p/9p.h>
 #include <linux/parser.h>
@@ -150,12 +151,10 @@ static void req_done(struct virtqueue *vq)
        while (1) {
                spin_lock_irqsave(&chan->lock, flags);
                rc = virtqueue_get_buf(chan->vq, &len);
-
                if (rc == NULL) {
                        spin_unlock_irqrestore(&chan->lock, flags);
                        break;
                }
-
                chan->ring_bufs_avail = 1;
                spin_unlock_irqrestore(&chan->lock, flags);
                /* Wakeup if anyone waiting for VirtIO ring space. */
@@ -163,19 +162,9 @@ static void req_done(struct virtqueue *vq)
                P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
                P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
                req = p9_tag_lookup(chan->client, rc->tag);
-               if (req->tc->private) {
-                       struct trans_rpage_info *rp = req->tc->private;
-                       int p = rp->rp_nr_pages;
-                       /*Release pages */
-                       p9_release_req_pages(rp);
-                       atomic_sub(p, &vp_pinned);
-                       wake_up(&vp_wq);
-                       if (rp->rp_alloc)
-                               kfree(rp);
-                       req->tc->private = NULL;
-               }
                req->status = REQ_STATUS_RCVD;
-               p9_client_cb(chan->client, req);
+               if (len)
+                       p9_client_cb(chan->client, req);
        }
 }
 
@@ -193,9 +182,8 @@ static void req_done(struct virtqueue *vq)
  *
  */
 
-static int
-pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,
-                                                               int count)
+static int pack_sg_list(struct scatterlist *sg, int start,
+                       int limit, char *data, int count)
 {
        int s;
        int index = start;
@@ -224,31 +212,36 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
  * this takes a list of pages.
  * @sg: scatter/gather list to pack into
  * @start: which segment of the sg_list to start at
- * @pdata_off: Offset into the first page
  * @**pdata: a list of pages to add into sg.
+ * @nr_pages: number of pages to pack into the scatter/gather list
+ * @data: data to pack into scatter/gather list
  * @count: amount of data to pack into the scatter/gather list
  */
 static int
-pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off,
-               struct page **pdata, int count)
+pack_sg_list_p(struct scatterlist *sg, int start, int limit,
+              struct page **pdata, int nr_pages, char *data, int count)
 {
-       int s;
-       int i = 0;
+       int i = 0, s;
+       int data_off;
        int index = start;
 
-       if (pdata_off) {
-               s = min((int)(PAGE_SIZE - pdata_off), count);
-               sg_set_page(&sg[index++], pdata[i++], s, pdata_off);
-               count -= s;
-       }
-
-       while (count) {
-               BUG_ON(index > limit);
-               s = min((int)PAGE_SIZE, count);
-               sg_set_page(&sg[index++], pdata[i++], s, 0);
+       BUG_ON(nr_pages > (limit - start));
+       /*
+        * if the first page doesn't start at
+        * page boundary find the offset
+        */
+       data_off = offset_in_page(data);
+       while (nr_pages) {
+               s = rest_of_page(data);
+               if (s > count)
+                       s = count;
+               sg_set_page(&sg[index++], pdata[i++], s, data_off);
+               data_off = 0;
+               data += s;
                count -= s;
+               nr_pages--;
        }
-       return index-start;
+       return index - start;
 }
 
 /**
@@ -261,143 +254,211 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off,
 static int
 p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 {
-       int in, out, inp, outp;
-       struct virtio_chan *chan = client->trans;
+       int err;
+       int in, out;
        unsigned long flags;
-       size_t pdata_off = 0;
-       struct trans_rpage_info *rpinfo = NULL;
-       int err, pdata_len = 0;
+       struct virtio_chan *chan = client->trans;
 
        P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
 
        req->status = REQ_STATUS_SENT;
+req_retry:
+       spin_lock_irqsave(&chan->lock, flags);
 
-       if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) {
-               int nr_pages = p9_nr_pages(req);
-               int rpinfo_size = sizeof(struct trans_rpage_info) +
-                       sizeof(struct page *) * nr_pages;
+       /* Handle out VirtIO ring buffers */
+       out = pack_sg_list(chan->sg, 0,
+                          VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
 
-               if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
-                       err = wait_event_interruptible(vp_wq,
-                               atomic_read(&vp_pinned) < chan->p9_max_pages);
+       in = pack_sg_list(chan->sg, out,
+                         VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
+
+       err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
+       if (err < 0) {
+               if (err == -ENOSPC) {
+                       chan->ring_bufs_avail = 0;
+                       spin_unlock_irqrestore(&chan->lock, flags);
+                       err = wait_event_killable(*chan->vc_wq,
+                                                 chan->ring_bufs_avail);
                        if (err  == -ERESTARTSYS)
                                return err;
-                       P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n");
-               }
 
-               if (rpinfo_size <= (req->tc->capacity - req->tc->size)) {
-                       /* We can use sdata */
-                       req->tc->private = req->tc->sdata + req->tc->size;
-                       rpinfo = (struct trans_rpage_info *)req->tc->private;
-                       rpinfo->rp_alloc = 0;
+                       P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
+                       goto req_retry;
                } else {
-                       req->tc->private = kmalloc(rpinfo_size, GFP_NOFS);
-                       if (!req->tc->private) {
-                               P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: "
-                                       "private kmalloc returned NULL");
-                               return -ENOMEM;
-                       }
-                       rpinfo = (struct trans_rpage_info *)req->tc->private;
-                       rpinfo->rp_alloc = 1;
+                       spin_unlock_irqrestore(&chan->lock, flags);
+                       P9_DPRINTK(P9_DEBUG_TRANS,
+                                       "9p debug: "
+                                       "virtio rpc add_buf returned failure");
+                       return -EIO;
                }
+       }
+       virtqueue_kick(chan->vq);
+       spin_unlock_irqrestore(&chan->lock, flags);
+
+       P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
+       return 0;
+}
 
-               err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages,
-                               req->tc->id == P9_TREAD ? 1 : 0);
-               if (err < 0) {
-                       if (rpinfo->rp_alloc)
-                               kfree(rpinfo);
+static int p9_get_mapped_pages(struct virtio_chan *chan,
+                              struct page **pages, char *data,
+                              int nr_pages, int write, int kern_buf)
+{
+       int err;
+       if (!kern_buf) {
+               /*
+                * We allow only p9_max_pages pinned. We wait for the
+                * Other zc request to finish here
+                */
+               if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
+                       err = wait_event_killable(vp_wq,
+                             (atomic_read(&vp_pinned) < chan->p9_max_pages));
+                       if (err == -ERESTARTSYS)
+                               return err;
+               }
+               err = p9_payload_gup(data, &nr_pages, pages, write);
+               if (err < 0)
                        return err;
-               } else {
-                       atomic_add(rpinfo->rp_nr_pages, &vp_pinned);
+               atomic_add(nr_pages, &vp_pinned);
+       } else {
+               /* kernel buffer, no need to pin pages */
+               int s, index = 0;
+               int count = nr_pages;
+               while (nr_pages) {
+                       s = rest_of_page(data);
+                       pages[index++] = kmap_to_page(data);
+                       data += s;
+                       nr_pages--;
                }
+               nr_pages = count;
        }
+       return nr_pages;
+}
 
-req_retry_pinned:
-       spin_lock_irqsave(&chan->lock, flags);
+/**
+ * p9_virtio_zc_request - issue a zero copy request
+ * @client: client instance issuing the request
+ * @req: request to be issued
+ * @uidata: user bffer that should be ued for zero copy read
+ * @uodata: user buffer that shoud be user for zero copy write
+ * @inlen: read buffer size
+ * @olen: write buffer size
+ * @hdrlen: reader header size, This is the size of response protocol data
+ *
+ */
+static int
+p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
+                    char *uidata, char *uodata, int inlen,
+                    int outlen, int in_hdr_len, int kern_buf)
+{
+       int in, out, err;
+       unsigned long flags;
+       int in_nr_pages = 0, out_nr_pages = 0;
+       struct page **in_pages = NULL, **out_pages = NULL;
+       struct virtio_chan *chan = client->trans;
 
-       /* Handle out VirtIO ring buffers */
-       out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
-                       req->tc->size);
-
-       if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) {
-               /* We have additional write payload buffer to take care */
-               if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
-                       outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
-                                       pdata_off, rpinfo->rp_data, pdata_len);
-               } else {
-                       char *pbuf;
-                       if (req->tc->pubuf)
-                               pbuf = (__force char *) req->tc->pubuf;
-                       else
-                               pbuf = req->tc->pkbuf;
-                       outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf,
-                                       req->tc->pbuf_size);
+       P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
+
+       if (uodata) {
+               out_nr_pages = p9_nr_pages(uodata, outlen);
+               out_pages = kmalloc(sizeof(struct page *) * out_nr_pages,
+                                   GFP_NOFS);
+               if (!out_pages) {
+                       err = -ENOMEM;
+                       goto err_out;
+               }
+               out_nr_pages = p9_get_mapped_pages(chan, out_pages, uodata,
+                                                  out_nr_pages, 0, kern_buf);
+               if (out_nr_pages < 0) {
+                       err = out_nr_pages;
+                       kfree(out_pages);
+                       out_pages = NULL;
+                       goto err_out;
                }
-               out += outp;
        }
-
-       /* Handle in VirtIO ring buffers */
-       if (req->tc->pbuf_size &&
-               ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) {
-               /*
-                * Take care of additional Read payload.
-                * 11 is the read/write header = PDU Header(7) + IO Size (4).
-                * Arrange in such a way that server places header in the
-                * alloced memory and payload onto the user buffer.
-                */
-               inp = pack_sg_list(chan->sg, out,
-                                  VIRTQUEUE_NUM, req->rc->sdata, 11);
-               /*
-                * Running executables in the filesystem may result in
-                * a read request with kernel buffer as opposed to user buffer.
-                */
-               if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
-                       in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM,
-                                       pdata_off, rpinfo->rp_data, pdata_len);
-               } else {
-                       char *pbuf;
-                       if (req->tc->pubuf)
-                               pbuf = (__force char *) req->tc->pubuf;
-                       else
-                               pbuf = req->tc->pkbuf;
-
-                       in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM,
-                                       pbuf, req->tc->pbuf_size);
+       if (uidata) {
+               in_nr_pages = p9_nr_pages(uidata, inlen);
+               in_pages = kmalloc(sizeof(struct page *) * in_nr_pages,
+                                  GFP_NOFS);
+               if (!in_pages) {
+                       err = -ENOMEM;
+                       goto err_out;
+               }
+               in_nr_pages = p9_get_mapped_pages(chan, in_pages, uidata,
+                                                 in_nr_pages, 1, kern_buf);
+               if (in_nr_pages < 0) {
+                       err = in_nr_pages;
+                       kfree(in_pages);
+                       in_pages = NULL;
+                       goto err_out;
                }
-               in += inp;
-       } else {
-               in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM,
-                                 req->rc->sdata, req->rc->capacity);
        }
+       req->status = REQ_STATUS_SENT;
+req_retry_pinned:
+       spin_lock_irqsave(&chan->lock, flags);
+       /* out data */
+       out = pack_sg_list(chan->sg, 0,
+                          VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
+
+       if (out_pages)
+               out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
+                                     out_pages, out_nr_pages, uodata, outlen);
+       /*
+        * Take care of in data
+        * For example TREAD have 11.
+        * 11 is the read/write header = PDU Header(7) + IO Size (4).
+        * Arrange in such a way that server places header in the
+        * alloced memory and payload onto the user buffer.
+        */
+       in = pack_sg_list(chan->sg, out,
+                         VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
+       if (in_pages)
+               in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
+                                    in_pages, in_nr_pages, uidata, inlen);
 
        err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
        if (err < 0) {
                if (err == -ENOSPC) {
                        chan->ring_bufs_avail = 0;
                        spin_unlock_irqrestore(&chan->lock, flags);
-                       err = wait_event_interruptible(*chan->vc_wq,
-                                                       chan->ring_bufs_avail);
+                       err = wait_event_killable(*chan->vc_wq,
+                                                 chan->ring_bufs_avail);
                        if (err  == -ERESTARTSYS)
-                               return err;
+                               goto err_out;
 
                        P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
                        goto req_retry_pinned;
                } else {
                        spin_unlock_irqrestore(&chan->lock, flags);
                        P9_DPRINTK(P9_DEBUG_TRANS,
-                                       "9p debug: "
-                                       "virtio rpc add_buf returned failure");
-                       if (rpinfo && rpinfo->rp_alloc)
-                               kfree(rpinfo);
-                       return -EIO;
+                                  "9p debug: "
+                                  "virtio rpc add_buf returned failure");
+                       err = -EIO;
+                       goto err_out;
                }
        }
-
        virtqueue_kick(chan->vq);
        spin_unlock_irqrestore(&chan->lock, flags);
-
        P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
-       return 0;
+       err = wait_event_killable(*req->wq, req->status >= REQ_STATUS_RCVD);
+       /*
+        * Non kernel buffers are pinned, unpin them
+        */
+err_out:
+       if (!kern_buf) {
+               if (in_pages) {
+                       p9_release_pages(in_pages, in_nr_pages);
+                       atomic_sub(in_nr_pages, &vp_pinned);
+               }
+               if (out_pages) {
+                       p9_release_pages(out_pages, out_nr_pages);
+                       atomic_sub(out_nr_pages, &vp_pinned);
+               }
+               /* wakeup anybody waiting for slots to pin pages */
+               wake_up(&vp_wq);
+       }
+       kfree(in_pages);
+       kfree(out_pages);
+       return err;
 }
 
 static ssize_t p9_mount_tag_show(struct device *dev,
@@ -591,8 +652,8 @@ static struct p9_trans_module p9_virtio_trans = {
        .create = p9_virtio_create,
        .close = p9_virtio_close,
        .request = p9_virtio_request,
+       .zc_request = p9_virtio_zc_request,
        .cancel = p9_virtio_cancel,
-
        /*
         * We leave one entry for input and one entry for response
         * headers. We also skip one more entry to accomodate, address
@@ -600,7 +661,6 @@ static struct p9_trans_module p9_virtio_trans = {
         * page in zero copy.
         */
        .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3),
-       .pref = P9_TRANS_PREF_PAYLOAD_SEP,
        .def = 0,
        .owner = THIS_MODULE,
 };