Merge branch 'for-2.6.36' of git://git.kernel.dk/linux-2.6-block
[pandora-kernel.git] / drivers / block / xen-blkfront.c
index f63ac3d..ac1b682 100644 (file)
@@ -41,6 +41,7 @@
 #include <linux/cdrom.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/scatterlist.h>
 
 #include <xen/xen.h>
@@ -79,6 +80,7 @@ static const struct block_device_operations xlvbd_block_fops;
  */
 struct blkfront_info
 {
+       struct mutex mutex;
        struct xenbus_device *xbdev;
        struct gendisk *gd;
        int vdevice;
@@ -95,16 +97,14 @@ struct blkfront_info
        unsigned long shadow_free;
        int feature_barrier;
        int is_ready;
-
-       /**
-        * The number of people holding this device open.  We won't allow a
-        * hot-unplug unless this is 0.
-        */
-       int users;
 };
 
 static DEFINE_SPINLOCK(blkif_io_lock);
 
+static unsigned int nr_minors;
+static unsigned long *minors;
+static DEFINE_SPINLOCK(minor_lock);
+
 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
        (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
 #define GRANT_INVALID_REF      0
@@ -139,6 +139,55 @@ static void add_id_to_freelist(struct blkfront_info *info,
        info->shadow_free = id;
 }
 
+static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
+{
+       unsigned int end = minor + nr;
+       int rc;
+
+       if (end > nr_minors) {
+               unsigned long *bitmap, *old;
+
+               bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap),
+                                GFP_KERNEL);
+               if (bitmap == NULL)
+                       return -ENOMEM;
+
+               spin_lock(&minor_lock);
+               if (end > nr_minors) {
+                       old = minors;
+                       memcpy(bitmap, minors,
+                              BITS_TO_LONGS(nr_minors) * sizeof(*bitmap));
+                       minors = bitmap;
+                       nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG;
+               } else
+                       old = bitmap;
+               spin_unlock(&minor_lock);
+               kfree(old);
+       }
+
+       spin_lock(&minor_lock);
+       if (find_next_bit(minors, end, minor) >= end) {
+               for (; minor < end; ++minor)
+                       __set_bit(minor, minors);
+               rc = 0;
+       } else
+               rc = -EBUSY;
+       spin_unlock(&minor_lock);
+
+       return rc;
+}
+
+static void xlbd_release_minors(unsigned int minor, unsigned int nr)
+{
+       unsigned int end = minor + nr;
+
+       BUG_ON(end > nr_minors);
+       spin_lock(&minor_lock);
+       for (; minor < end; ++minor)
+               __clear_bit(minor, minors);
+       spin_unlock(&minor_lock);
+}
+
 static void blkif_restart_queue_callback(void *arg)
 {
        struct blkfront_info *info = (struct blkfront_info *)arg;
@@ -239,7 +288,7 @@ static int blkif_queue_request(struct request *req)
 
        ring_req->operation = rq_data_dir(req) ?
                BLKIF_OP_WRITE : BLKIF_OP_READ;
-       if (blk_barrier_rq(req))
+       if (req->cmd_flags & REQ_HARDBARRIER)
                ring_req->operation = BLKIF_OP_WRITE_BARRIER;
 
        ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
@@ -310,7 +359,7 @@ static void do_blkif_request(struct request_queue *rq)
 
                blk_start_request(req);
 
-               if (!blk_fs_request(req)) {
+               if (req->cmd_type != REQ_TYPE_FS) {
                        __blk_end_request_all(req, -EIO);
                        continue;
                }
@@ -372,17 +421,22 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 static int xlvbd_barrier(struct blkfront_info *info)
 {
        int err;
+       const char *barrier;
 
-       err = blk_queue_ordered(info->rq,
-                               info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE,
-                               NULL);
+       switch (info->feature_barrier) {
+       case QUEUE_ORDERED_DRAIN:       barrier = "enabled (drain)"; break;
+       case QUEUE_ORDERED_TAG:         barrier = "enabled (tag)"; break;
+       case QUEUE_ORDERED_NONE:        barrier = "disabled"; break;
+       default:                        return -EINVAL;
+       }
+
+       err = blk_queue_ordered(info->rq, info->feature_barrier);
 
        if (err)
                return err;
 
        printk(KERN_INFO "blkfront: %s: barriers %s\n",
-              info->gd->disk_name,
-              info->feature_barrier ? "enabled" : "disabled");
+              info->gd->disk_name, barrier);
        return 0;
 }
 
@@ -418,9 +472,14 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
        if ((minor % nr_parts) == 0)
                nr_minors = nr_parts;
 
+       err = xlbd_reserve_minors(minor, nr_minors);
+       if (err)
+               goto out;
+       err = -ENODEV;
+
        gd = alloc_disk(nr_minors);
        if (gd == NULL)
-               goto out;
+               goto release;
 
        offset = minor / nr_parts;
 
@@ -451,14 +510,13 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 
        if (xlvbd_init_blk_queue(gd, sector_size)) {
                del_gendisk(gd);
-               goto out;
+               goto release;
        }
 
        info->rq = gd->queue;
        info->gd = gd;
 
-       if (info->feature_barrier)
-               xlvbd_barrier(info);
+       xlvbd_barrier(info);
 
        if (vdisk_info & VDISK_READONLY)
                set_disk_ro(gd, 1);
@@ -471,10 +529,45 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 
        return 0;
 
+ release:
+       xlbd_release_minors(minor, nr_minors);
  out:
        return err;
 }
 
+static void xlvbd_release_gendisk(struct blkfront_info *info)
+{
+       unsigned int minor, nr_minors;
+       unsigned long flags;
+
+       if (info->rq == NULL)
+               return;
+
+       spin_lock_irqsave(&blkif_io_lock, flags);
+
+       /* No more blkif_request(). */
+       blk_stop_queue(info->rq);
+
+       /* No more gnttab callback work. */
+       gnttab_cancel_free_callback(&info->callback);
+       spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+       /* Flush gnttab callback work. Must be done with no locks held. */
+       flush_scheduled_work();
+
+       del_gendisk(info->gd);
+
+       minor = info->gd->first_minor;
+       nr_minors = info->gd->minors;
+       xlbd_release_minors(minor, nr_minors);
+
+       blk_cleanup_queue(info->rq);
+       info->rq = NULL;
+
+       put_disk(info->gd);
+       info->gd = NULL;
+}
+
 static void kick_pending_request_queues(struct blkfront_info *info)
 {
        if (!RING_FULL(&info->ring)) {
@@ -569,7 +662,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
                                printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
                                       info->gd->disk_name);
                                error = -EOPNOTSUPP;
-                               info->feature_barrier = 0;
+                               info->feature_barrier = QUEUE_ORDERED_NONE;
                                xlvbd_barrier(info);
                        }
                        /* fall through */
@@ -652,7 +745,7 @@ fail:
 
 
 /* Common code used when first setting up, and when resuming. */
-static int talk_to_backend(struct xenbus_device *dev,
+static int talk_to_blkback(struct xenbus_device *dev,
                           struct blkfront_info *info)
 {
        const char *message = NULL;
@@ -712,7 +805,6 @@ again:
        return err;
 }
 
-
 /**
  * Entry point to this code when a new device is created.  Allocate the basic
  * structures and the ring buffer for communication with the backend, and
@@ -773,6 +865,7 @@ static int blkfront_probe(struct xenbus_device *dev,
                return -ENOMEM;
        }
 
+       mutex_init(&info->mutex);
        info->xbdev = dev;
        info->vdevice = vdevice;
        info->connected = BLKIF_STATE_DISCONNECTED;
@@ -786,7 +879,7 @@ static int blkfront_probe(struct xenbus_device *dev,
        info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
        dev_set_drvdata(&dev->dev, info);
 
-       err = talk_to_backend(dev, info);
+       err = talk_to_blkback(dev, info);
        if (err) {
                kfree(info);
                dev_set_drvdata(&dev->dev, NULL);
@@ -881,13 +974,50 @@ static int blkfront_resume(struct xenbus_device *dev)
 
        blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 
-       err = talk_to_backend(dev, info);
+       err = talk_to_blkback(dev, info);
        if (info->connected == BLKIF_STATE_SUSPENDED && !err)
                err = blkif_recover(info);
 
        return err;
 }
 
+static void
+blkfront_closing(struct blkfront_info *info)
+{
+       struct xenbus_device *xbdev = info->xbdev;
+       struct block_device *bdev = NULL;
+
+       mutex_lock(&info->mutex);
+
+       if (xbdev->state == XenbusStateClosing) {
+               mutex_unlock(&info->mutex);
+               return;
+       }
+
+       if (info->gd)
+               bdev = bdget_disk(info->gd, 0);
+
+       mutex_unlock(&info->mutex);
+
+       if (!bdev) {
+               xenbus_frontend_closed(xbdev);
+               return;
+       }
+
+       mutex_lock(&bdev->bd_mutex);
+
+       if (bdev->bd_openers) {
+               xenbus_dev_error(xbdev, -EBUSY,
+                                "Device in use; refusing to close");
+               xenbus_switch_state(xbdev, XenbusStateClosing);
+       } else {
+               xlvbd_release_gendisk(info);
+               xenbus_frontend_closed(xbdev);
+       }
+
+       mutex_unlock(&bdev->bd_mutex);
+       bdput(bdev);
+}
 
 /*
  * Invoked when the backend is finally 'ready' (and has told produced
@@ -899,11 +1029,31 @@ static void blkfront_connect(struct blkfront_info *info)
        unsigned long sector_size;
        unsigned int binfo;
        int err;
-
-       if ((info->connected == BLKIF_STATE_CONNECTED) ||
-           (info->connected == BLKIF_STATE_SUSPENDED) )
+       int barrier;
+
+       switch (info->connected) {
+       case BLKIF_STATE_CONNECTED:
+               /*
+                * Potentially, the back-end may be signalling
+                * a capacity change; update the capacity.
+                */
+               err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+                                  "sectors", "%Lu", &sectors);
+               if (XENBUS_EXIST_ERR(err))
+                       return;
+               printk(KERN_INFO "Setting capacity to %Lu\n",
+                      sectors);
+               set_capacity(info->gd, sectors);
+               revalidate_disk(info->gd);
+
+               /* fall through */
+       case BLKIF_STATE_SUSPENDED:
                return;
 
+       default:
+               break;
+       }
+
        dev_dbg(&info->xbdev->dev, "%s:%s.\n",
                __func__, info->xbdev->otherend);
 
@@ -920,10 +1070,26 @@ static void blkfront_connect(struct blkfront_info *info)
        }
 
        err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-                           "feature-barrier", "%lu", &info->feature_barrier,
+                           "feature-barrier", "%lu", &barrier,
                            NULL);
+
+       /*
+        * If there's no "feature-barrier" defined, then it means
+        * we're dealing with a very old backend which writes
+        * synchronously; draining will do what needs to get done.
+        *
+        * If there are barriers, then we can do full queued writes
+        * with tagged barriers.
+        *
+        * If barriers are not supported, then there's no much we can
+        * do, so just set ordering to NONE.
+        */
        if (err)
-               info->feature_barrier = 0;
+               info->feature_barrier = QUEUE_ORDERED_DRAIN;
+       else if (barrier)
+               info->feature_barrier = QUEUE_ORDERED_TAG;
+       else
+               info->feature_barrier = QUEUE_ORDERED_NONE;
 
        err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
        if (err) {
@@ -945,53 +1111,15 @@ static void blkfront_connect(struct blkfront_info *info)
        info->is_ready = 1;
 }
 
-/**
- * Handle the change of state of the backend to Closing.  We must delete our
- * device-layer structures now, to ensure that writes are flushed through to
- * the backend.  Once is this done, we can switch to Closed in
- * acknowledgement.
- */
-static void blkfront_closing(struct xenbus_device *dev)
-{
-       struct blkfront_info *info = dev_get_drvdata(&dev->dev);
-       unsigned long flags;
-
-       dev_dbg(&dev->dev, "blkfront_closing: %s removed\n", dev->nodename);
-
-       if (info->rq == NULL)
-               goto out;
-
-       spin_lock_irqsave(&blkif_io_lock, flags);
-
-       /* No more blkif_request(). */
-       blk_stop_queue(info->rq);
-
-       /* No more gnttab callback work. */
-       gnttab_cancel_free_callback(&info->callback);
-       spin_unlock_irqrestore(&blkif_io_lock, flags);
-
-       /* Flush gnttab callback work. Must be done with no locks held. */
-       flush_scheduled_work();
-
-       blk_cleanup_queue(info->rq);
-       info->rq = NULL;
-
-       del_gendisk(info->gd);
-
- out:
-       xenbus_frontend_closed(dev);
-}
-
 /**
  * Callback received when the backend's state changes.
  */
-static void backend_changed(struct xenbus_device *dev,
+static void blkback_changed(struct xenbus_device *dev,
                            enum xenbus_state backend_state)
 {
        struct blkfront_info *info = dev_get_drvdata(&dev->dev);
-       struct block_device *bd;
 
-       dev_dbg(&dev->dev, "blkfront:backend_changed.\n");
+       dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state);
 
        switch (backend_state) {
        case XenbusStateInitialising:
@@ -1006,35 +1134,56 @@ static void backend_changed(struct xenbus_device *dev,
                break;
 
        case XenbusStateClosing:
-               if (info->gd == NULL) {
-                       xenbus_frontend_closed(dev);
-                       break;
-               }
-               bd = bdget_disk(info->gd, 0);
-               if (bd == NULL)
-                       xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
-
-               mutex_lock(&bd->bd_mutex);
-               if (info->users > 0)
-                       xenbus_dev_error(dev, -EBUSY,
-                                        "Device in use; refusing to close");
-               else
-                       blkfront_closing(dev);
-               mutex_unlock(&bd->bd_mutex);
-               bdput(bd);
+               blkfront_closing(info);
                break;
        }
 }
 
-static int blkfront_remove(struct xenbus_device *dev)
+static int blkfront_remove(struct xenbus_device *xbdev)
 {
-       struct blkfront_info *info = dev_get_drvdata(&dev->dev);
+       struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
+       struct block_device *bdev = NULL;
+       struct gendisk *disk;
 
-       dev_dbg(&dev->dev, "blkfront_remove: %s removed\n", dev->nodename);
+       dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
 
        blkif_free(info, 0);
 
-       kfree(info);
+       mutex_lock(&info->mutex);
+
+       disk = info->gd;
+       if (disk)
+               bdev = bdget_disk(disk, 0);
+
+       info->xbdev = NULL;
+       mutex_unlock(&info->mutex);
+
+       if (!bdev) {
+               kfree(info);
+               return 0;
+       }
+
+       /*
+        * The xbdev was removed before we reached the Closed
+        * state. See if it's safe to remove the disk. If the bdev
+        * isn't closed yet, we let release take care of it.
+        */
+
+       mutex_lock(&bdev->bd_mutex);
+       info = disk->private_data;
+
+       dev_warn(disk_to_dev(disk),
+                "%s was hot-unplugged, %d stale handles\n",
+                xbdev->nodename, bdev->bd_openers);
+
+       if (info && !bdev->bd_openers) {
+               xlvbd_release_gendisk(info);
+               disk->private_data = NULL;
+               kfree(info);
+       }
+
+       mutex_unlock(&bdev->bd_mutex);
+       bdput(bdev);
 
        return 0;
 }
@@ -1043,30 +1192,78 @@ static int blkfront_is_ready(struct xenbus_device *dev)
 {
        struct blkfront_info *info = dev_get_drvdata(&dev->dev);
 
-       return info->is_ready;
+       return info->is_ready && info->xbdev;
 }
 
 static int blkif_open(struct block_device *bdev, fmode_t mode)
 {
-       struct blkfront_info *info = bdev->bd_disk->private_data;
-       info->users++;
-       return 0;
+       struct gendisk *disk = bdev->bd_disk;
+       struct blkfront_info *info;
+       int err = 0;
+
+       lock_kernel();
+
+       info = disk->private_data;
+       if (!info) {
+               /* xbdev gone */
+               err = -ERESTARTSYS;
+               goto out;
+       }
+
+       mutex_lock(&info->mutex);
+
+       if (!info->gd)
+               /* xbdev is closed */
+               err = -ERESTARTSYS;
+
+       mutex_unlock(&info->mutex);
+
+out:
+       unlock_kernel();
+       return err;
 }
 
 static int blkif_release(struct gendisk *disk, fmode_t mode)
 {
        struct blkfront_info *info = disk->private_data;
-       info->users--;
-       if (info->users == 0) {
-               /* Check whether we have been instructed to close.  We will
-                  have ignored this request initially, as the device was
-                  still mounted. */
-               struct xenbus_device *dev = info->xbdev;
-               enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
-
-               if (state == XenbusStateClosing && info->is_ready)
-                       blkfront_closing(dev);
+       struct block_device *bdev;
+       struct xenbus_device *xbdev;
+
+       lock_kernel();
+
+       bdev = bdget_disk(disk, 0);
+       bdput(bdev);
+
+       if (bdev->bd_openers)
+               goto out;
+
+       /*
+        * Check if we have been instructed to close. We will have
+        * deferred this request, because the bdev was still open.
+        */
+
+       mutex_lock(&info->mutex);
+       xbdev = info->xbdev;
+
+       if (xbdev && xbdev->state == XenbusStateClosing) {
+               /* pending switch to state closed */
+               dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
+               xlvbd_release_gendisk(info);
+               xenbus_frontend_closed(info->xbdev);
+       }
+
+       mutex_unlock(&info->mutex);
+
+       if (!xbdev) {
+               /* sudden device removal */
+               dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
+               xlvbd_release_gendisk(info);
+               disk->private_data = NULL;
+               kfree(info);
        }
+
+out:
+       unlock_kernel();
        return 0;
 }
 
@@ -1076,7 +1273,7 @@ static const struct block_device_operations xlvbd_block_fops =
        .open = blkif_open,
        .release = blkif_release,
        .getgeo = blkif_getgeo,
-       .locked_ioctl = blkif_ioctl,
+       .ioctl = blkif_ioctl,
 };
 
 
@@ -1092,7 +1289,7 @@ static struct xenbus_driver blkfront = {
        .probe = blkfront_probe,
        .remove = blkfront_remove,
        .resume = blkfront_resume,
-       .otherend_changed = backend_changed,
+       .otherend_changed = blkback_changed,
        .is_ready = blkfront_is_ready,
 };