xen-blkback: don't leak stack data via response ring

[pandora-kernel.git] / drivers / block / xen-blkback / blkback.c
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c

index 1540792..347aabc 100644 (file)
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -39,6 +39,9 @@
  #include <linux/list.h>
  #include <linux/delay.h>
  #include <linux/freezer.h>
+#include <linux/loop.h>
+#include <linux/falloc.h>
+#include <linux/fs.h>
  
  #include <xen/events.h>
  #include <xen/page.h>
@@ -258,19 +261,23 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
  
  static void print_stats(struct xen_blkif *blkif)
  {
-       pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d\n",
+       pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d"
+                "  |  ds %4d\n",
                  current->comm, blkif->st_oo_req,
-                blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req);
+                blkif->st_rd_req, blkif->st_wr_req,
+                blkif->st_f_req, blkif->st_ds_req);
         blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
         blkif->st_rd_req = 0;
         blkif->st_wr_req = 0;
         blkif->st_oo_req = 0;
+       blkif->st_ds_req = 0;
  }
  
  int xen_blkif_schedule(void *arg)
  {
         struct xen_blkif *blkif = arg;
         struct xen_vbd *vbd = &blkif->vbd;
+       int ret;
  
         xen_blkif_get(blkif);
  
@@ -291,8 +298,12 @@ int xen_blkif_schedule(void *arg)
                 blkif->waiting_reqs = 0;
                 smp_mb(); /* clear flag *before* checking for work */
  
-               if (do_block_io_op(blkif))
+               ret = do_block_io_op(blkif);
+               if (ret > 0)
                         blkif->waiting_reqs = 1;
+               if (ret == -EACCES)
+                       wait_event_interruptible(blkif->shutdown_wq,
+                                                kthread_should_stop());
  
                 if (log_stats && time_after(jiffies, blkif->st_print))
                         print_stats(blkif);
@@ -410,6 +421,59 @@ static int xen_blkbk_map(struct blkif_request *req,
         return ret;
  }
  
+static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req)
+{
+       int err = 0;
+       int status = BLKIF_RSP_OKAY;
+       struct block_device *bdev = blkif->vbd.bdev;
+
+       if (blkif->blk_backend_type == BLKIF_BACKEND_PHY)
+               /* just forward the discard request */
+               err = blkdev_issue_discard(bdev,
+                               req->u.discard.sector_number,
+                               req->u.discard.nr_sectors,
+                               GFP_KERNEL, 0);
+       else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
+               /* punch a hole in the backing file */
+               struct loop_device *lo = bdev->bd_disk->private_data;
+               struct file *file = lo->lo_backing_file;
+
+               if (file->f_op->fallocate)
+                       err = file->f_op->fallocate(file,
+                               FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+                               req->u.discard.sector_number << 9,
+                               req->u.discard.nr_sectors << 9);
+               else
+                       err = -EOPNOTSUPP;
+       } else
+               err = -EOPNOTSUPP;
+
+       if (err == -EOPNOTSUPP) {
+               pr_debug(DRV_PFX "discard op failed, not supported\n");
+               status = BLKIF_RSP_EOPNOTSUPP;
+       } else if (err)
+               status = BLKIF_RSP_ERROR;
+
+       make_response(blkif, req->id, req->operation, status);
+}
+
+static void xen_blk_drain_io(struct xen_blkif *blkif)
+{
+       atomic_set(&blkif->drain, 1);
+       do {
+               /* The initial value is one, and one refcnt taken at the
+                * start of the xen_blkif_schedule thread. */
+               if (atomic_read(&blkif->refcnt) <= 2)
+                       break;
+               wait_for_completion_interruptible_timeout(
+                               &blkif->drain_complete, HZ);
+
+               if (!atomic_read(&blkif->drain))
+                       break;
+       } while (!kthread_should_stop());
+       atomic_set(&blkif->drain, 0);
+}
+
  /*
   * Completion callback on the bio's. Called as bh->b_end_io()
   */
@@ -422,6 +486,11 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
                 pr_debug(DRV_PFX "flush diskcache op failed, not supported\n");
                 xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
                 pending_req->status = BLKIF_RSP_EOPNOTSUPP;
+       } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
+                   (error == -EOPNOTSUPP)) {
+               pr_debug(DRV_PFX "write barrier op failed, not supported\n");
+               xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0);
+               pending_req->status = BLKIF_RSP_EOPNOTSUPP;
         } else if (error) {
                 pr_debug(DRV_PFX "Buffer not up-to-date at end of operation,"
                          " error=%d\n", error);
@@ -438,6 +507,10 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
                 make_response(pending_req->blkif, pending_req->id,
                               pending_req->operation, pending_req->status);
                 xen_blkif_put(pending_req->blkif);
+               if (atomic_read(&pending_req->blkif->refcnt) <= 2) {
+                       if (atomic_read(&pending_req->blkif->drain))
+                               complete(&pending_req->blkif->drain_complete);
+               }
                 free_req(pending_req);
         }
  }
@@ -471,6 +544,12 @@ __do_block_io_op(struct xen_blkif *blkif)
         rp = blk_rings->common.sring->req_prod;
         rmb(); /* Ensure we see queued requests up to 'rp'. */
  
+       if (RING_REQUEST_PROD_OVERFLOW(&blk_rings->common, rp)) {
+               rc = blk_rings->common.rsp_prod_pvt;
+               pr_warn(DRV_PFX "Frontend provided bogus ring requests (%d - %d = %d). Halting ring processing on dev=%04x\n",
+                       rp, rc, rp - rc, blkif->vbd.pdevice);
+               return -EACCES;
+       }
         while (rc != rp) {
  
                 if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
@@ -532,7 +611,6 @@ do_block_io_op(struct xen_blkif *blkif)
  
         return more_to_do;
  }
-
  /*
   * Transmutation of the 'struct blkif_request' to a proper 'struct bio'
   * and call the 'submit_bio' to pass it to the underlying storage.
@@ -549,6 +627,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
         int i, nbio = 0;
         int operation;
         struct blk_plug plug;
+       bool drain = false;
  
         switch (req->operation) {
         case BLKIF_OP_READ:
@@ -559,11 +638,16 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
                 blkif->st_wr_req++;
                 operation = WRITE_ODIRECT;
                 break;
+       case BLKIF_OP_WRITE_BARRIER:
+               drain = true;
         case BLKIF_OP_FLUSH_DISKCACHE:
                 blkif->st_f_req++;
                 operation = WRITE_FLUSH;
                 break;
-       case BLKIF_OP_WRITE_BARRIER:
+       case BLKIF_OP_DISCARD:
+               blkif->st_ds_req++;
+               operation = REQ_DISCARD;
+               break;
         default:
                 operation = 0; /* make gcc happy */
                 goto fail_response;
@@ -572,7 +656,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
  
         /* Check that the number of segments is sane. */
         nseg = req->nr_segments;
-       if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
+       if (unlikely(nseg == 0 && operation != WRITE_FLUSH &&
+                               operation != REQ_DISCARD) ||
             unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
                 pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
                          nseg);
@@ -621,16 +706,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
                 }
         }
  
+       /* Wait on all outstanding I/O's and once that has been completed
+        * issue the WRITE_FLUSH.
+        */
+       if (drain)
+               xen_blk_drain_io(pending_req->blkif);
+
         /*
          * If we have failed at this point, we need to undo the M2P override,
          * set gnttab_set_unmap_op on all of the grant references and perform
          * the hypercall to unmap the grants - that is all done in
          * xen_blkbk_unmap.
          */
-       if (xen_blkbk_map(req, pending_req, seg))
+       if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg))
                 goto fail_flush;
  
-       /* This corresponding xen_blkif_put is done in __end_block_io_op */
+       /*
+        * This corresponding xen_blkif_put is done in __end_block_io_op, or
+        * below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
+        */
         xen_blkif_get(blkif);
  
         for (i = 0; i < nseg; i++) {
@@ -654,27 +748,28 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
                 preq.sector_number += seg[i].nsec;
         }
  
-       /* This will be hit if the operation was a flush. */
+       /* This will be hit if the operation was a flush or discard. */
         if (!bio) {
-               BUG_ON(operation != WRITE_FLUSH);
+               BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD);
  
-               bio = bio_alloc(GFP_KERNEL, 0);
-               if (unlikely(bio == NULL))
-                       goto fail_put_bio;
+               if (operation == WRITE_FLUSH) {
+                       bio = bio_alloc(GFP_KERNEL, 0);
+                       if (unlikely(bio == NULL))
+                               goto fail_put_bio;
  
-               biolist[nbio++] = bio;
-               bio->bi_bdev    = preq.bdev;
-               bio->bi_private = pending_req;
-               bio->bi_end_io  = end_block_io_op;
+                       biolist[nbio++] = bio;
+                       bio->bi_bdev    = preq.bdev;
+                       bio->bi_private = pending_req;
+                       bio->bi_end_io  = end_block_io_op;
+               } else if (operation == REQ_DISCARD) {
+                       xen_blk_discard(blkif, req);
+                       xen_blkif_put(blkif);
+                       free_req(pending_req);
+                       return 0;
+               }
         }
  
-       /*
-        * We set it one so that the last submit_bio does not have to call
-        * atomic_inc.
-        */
         atomic_set(&pending_req->pendcnt, nbio);
-
-       /* Get a reference count for the disk queue and start sending I/O */
         blk_start_plug(&plug);
  
         for (i = 0; i < nbio; i++)
@@ -685,7 +780,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
  
         if (operation == READ)
                 blkif->st_rd_sect += preq.nr_sects;
-       else if (operation == WRITE || operation == WRITE_FLUSH)
+       else if (operation & WRITE)
                 blkif->st_wr_sect += preq.nr_sects;
  
         return 0;
@@ -702,6 +797,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
   fail_put_bio:
         for (i = 0; i < nbio; i++)
                 bio_put(biolist[i]);
+       atomic_set(&pending_req->pendcnt, 1);
         __end_block_io_op(pending_req, -EINVAL);
         msleep(1); /* back off a bit */
         return -EIO;
@@ -715,33 +811,34 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
  static void make_response(struct xen_blkif *blkif, u64 id,
                           unsigned short op, int st)
  {
-       struct blkif_response  resp;
+       struct blkif_response *resp;
         unsigned long     flags;
         union blkif_back_rings *blk_rings = &blkif->blk_rings;
         int notify;
  
-       resp.id        = id;
-       resp.operation = op;
-       resp.status    = st;
-
         spin_lock_irqsave(&blkif->blk_ring_lock, flags);
         /* Place on the response ring for the relevant domain. */
         switch (blkif->blk_protocol) {
         case BLKIF_PROTOCOL_NATIVE:
-               memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
-                      &resp, sizeof(resp));
+               resp = RING_GET_RESPONSE(&blk_rings->native,
+                                        blk_rings->native.rsp_prod_pvt);
                 break;
         case BLKIF_PROTOCOL_X86_32:
-               memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
-                      &resp, sizeof(resp));
+               resp = RING_GET_RESPONSE(&blk_rings->x86_32,
+                                        blk_rings->x86_32.rsp_prod_pvt);
                 break;
         case BLKIF_PROTOCOL_X86_64:
-               memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
-                      &resp, sizeof(resp));
+               resp = RING_GET_RESPONSE(&blk_rings->x86_64,
+                                        blk_rings->x86_64.rsp_prod_pvt);
                 break;
         default:
                 BUG();
         }
+
+       resp->id        = id;
+       resp->operation = op;
+       resp->status    = st;
+
         blk_rings->common.rsp_prod_pvt++;
         RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
         spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
@@ -765,9 +862,9 @@ static int __init xen_blkif_init(void)
  
         mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
  
-       blkbk->pending_reqs          = kmalloc(sizeof(blkbk->pending_reqs[0]) *
+       blkbk->pending_reqs          = kzalloc(sizeof(blkbk->pending_reqs[0]) *
                                         xen_blkif_reqs, GFP_KERNEL);
-       blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) *
+       blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) *
                                         mmap_pages, GFP_KERNEL);
         blkbk->pending_pages         = kzalloc(sizeof(blkbk->pending_pages[0]) *
                                         mmap_pages, GFP_KERNEL);
@@ -790,8 +887,6 @@ static int __init xen_blkif_init(void)
         if (rc)
                 goto failed_init;
  
-       memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs));
-
         INIT_LIST_HEAD(&blkbk->pending_free);
         spin_lock_init(&blkbk->pending_free_lock);
         init_waitqueue_head(&blkbk->pending_free_wq);