x86, lockdep: fix "WARNING: at kernel/lockdep.c:2658 check_flags+0x4c/0x128()"
[pandora-kernel.git] / drivers / md / dm.c
index f2d24eb..372369b 100644 (file)
@@ -71,9 +71,22 @@ union map_info *dm_get_mapinfo(struct bio *bio)
 #define DMF_DELETING 4
 #define DMF_NOFLUSH_SUSPENDING 5
 
+/*
+ * Work processed by per-device workqueue.
+ */
+struct dm_wq_req {
+       enum {
+               DM_WQ_FLUSH_ALL,
+               DM_WQ_FLUSH_DEFERRED,
+       } type;
+       struct work_struct work;
+       struct mapped_device *md;
+       void *context;
+};
+
 struct mapped_device {
        struct rw_semaphore io_lock;
-       struct semaphore suspend_lock;
+       struct mutex suspend_lock;
        spinlock_t pushback_lock;
        rwlock_t map_lock;
        atomic_t holders;
@@ -95,6 +108,11 @@ struct mapped_device {
        struct bio_list deferred;
        struct bio_list pushback;
 
+       /*
+        * Processing queue (flush/barriers)
+        */
+       struct workqueue_struct *wq;
+
        /*
         * The current mapping.
         */
@@ -181,19 +199,21 @@ static void local_exit(void)
        DMINFO("cleaned up");
 }
 
-int (*_inits[])(void) __initdata = {
+static int (*_inits[])(void) __initdata = {
        local_init,
        dm_target_init,
        dm_linear_init,
        dm_stripe_init,
+       dm_kcopyd_init,
        dm_interface_init,
 };
 
-void (*_exits[])(void) = {
+static void (*_exits[])(void) = {
        local_exit,
        dm_target_exit,
        dm_linear_exit,
        dm_stripe_exit,
+       dm_kcopyd_exit,
        dm_interface_exit,
 };
 
@@ -904,7 +924,7 @@ static void free_minor(int minor)
 /*
  * See if the device with a specific minor # is free.
  */
-static int specific_minor(struct mapped_device *md, int minor)
+static int specific_minor(int minor)
 {
        int r, m;
 
@@ -937,7 +957,7 @@ out:
        return r;
 }
 
-static int next_free_minor(struct mapped_device *md, int *minor)
+static int next_free_minor(int *minor)
 {
        int r, m;
 
@@ -948,9 +968,8 @@ static int next_free_minor(struct mapped_device *md, int *minor)
        spin_lock(&_minor_lock);
 
        r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m);
-       if (r) {
+       if (r)
                goto out;
-       }
 
        if (m >= (1 << MINORBITS)) {
                idr_remove(&_minor_idr, m);
@@ -973,7 +992,7 @@ static struct block_device_operations dm_blk_dops;
 static struct mapped_device *alloc_dev(int minor)
 {
        int r;
-       struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
+       struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL);
        void *old_md;
 
        if (!md) {
@@ -982,19 +1001,18 @@ static struct mapped_device *alloc_dev(int minor)
        }
 
        if (!try_module_get(THIS_MODULE))
-               goto bad0;
+               goto bad_module_get;
 
        /* get a minor number for the dev */
        if (minor == DM_ANY_MINOR)
-               r = next_free_minor(md, &minor);
+               r = next_free_minor(&minor);
        else
-               r = specific_minor(md, minor);
+               r = specific_minor(minor);
        if (r < 0)
-               goto bad1;
+               goto bad_minor;
 
-       memset(md, 0, sizeof(*md));
        init_rwsem(&md->io_lock);
-       init_MUTEX(&md->suspend_lock);
+       mutex_init(&md->suspend_lock);
        spin_lock_init(&md->pushback_lock);
        rwlock_init(&md->map_lock);
        atomic_set(&md->holders, 1);
@@ -1006,7 +1024,7 @@ static struct mapped_device *alloc_dev(int minor)
 
        md->queue = blk_alloc_queue(GFP_KERNEL);
        if (!md->queue)
-               goto bad1_free_minor;
+               goto bad_queue;
 
        md->queue->queuedata = md;
        md->queue->backing_dev_info.congested_fn = dm_any_congested;
@@ -1017,11 +1035,11 @@ static struct mapped_device *alloc_dev(int minor)
 
        md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache);
        if (!md->io_pool)
-               goto bad2;
+               goto bad_io_pool;
 
        md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache);
        if (!md->tio_pool)
-               goto bad3;
+               goto bad_tio_pool;
 
        md->bs = bioset_create(16, 16);
        if (!md->bs)
@@ -1029,7 +1047,7 @@ static struct mapped_device *alloc_dev(int minor)
 
        md->disk = alloc_disk(1);
        if (!md->disk)
-               goto bad4;
+               goto bad_disk;
 
        atomic_set(&md->pending, 0);
        init_waitqueue_head(&md->wait);
@@ -1044,6 +1062,10 @@ static struct mapped_device *alloc_dev(int minor)
        add_disk(md->disk);
        format_dev_t(md->name, MKDEV(_major, minor));
 
+       md->wq = create_singlethread_workqueue("kdmflush");
+       if (!md->wq)
+               goto bad_thread;
+
        /* Populate the mapping, nobody knows we exist yet */
        spin_lock(&_minor_lock);
        old_md = idr_replace(&_minor_idr, md, minor);
@@ -1053,19 +1075,21 @@ static struct mapped_device *alloc_dev(int minor)
 
        return md;
 
- bad4:
+bad_thread:
+       put_disk(md->disk);
+bad_disk:
        bioset_free(md->bs);
- bad_no_bioset:
+bad_no_bioset:
        mempool_destroy(md->tio_pool);
- bad3:
+bad_tio_pool:
        mempool_destroy(md->io_pool);
- bad2:
+bad_io_pool:
        blk_cleanup_queue(md->queue);
- bad1_free_minor:
+bad_queue:
        free_minor(minor);
- bad1:
+bad_minor:
        module_put(THIS_MODULE);
- bad0:
+bad_module_get:
        kfree(md);
        return NULL;
 }
@@ -1080,6 +1104,7 @@ static void free_dev(struct mapped_device *md)
                unlock_fs(md);
                bdput(md->suspended_bdev);
        }
+       destroy_workqueue(md->wq);
        mempool_destroy(md->tio_pool);
        mempool_destroy(md->io_pool);
        bioset_free(md->bs);
@@ -1259,20 +1284,91 @@ void dm_put(struct mapped_device *md)
 }
 EXPORT_SYMBOL_GPL(dm_put);
 
+static int dm_wait_for_completion(struct mapped_device *md)
+{
+       int r = 0;
+
+       while (1) {
+               set_current_state(TASK_INTERRUPTIBLE);
+
+               smp_mb();
+               if (!atomic_read(&md->pending))
+                       break;
+
+               if (signal_pending(current)) {
+                       r = -EINTR;
+                       break;
+               }
+
+               io_schedule();
+       }
+       set_current_state(TASK_RUNNING);
+
+       return r;
+}
+
 /*
  * Process the deferred bios
  */
-static void __flush_deferred_io(struct mapped_device *md, struct bio *c)
+static void __flush_deferred_io(struct mapped_device *md)
 {
-       struct bio *n;
+       struct bio *c;
 
-       while (c) {
-               n = c->bi_next;
-               c->bi_next = NULL;
+       while ((c = bio_list_pop(&md->deferred))) {
                if (__split_bio(md, c))
                        bio_io_error(c);
-               c = n;
        }
+
+       clear_bit(DMF_BLOCK_IO, &md->flags);
+}
+
+static void __merge_pushback_list(struct mapped_device *md)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&md->pushback_lock, flags);
+       clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
+       bio_list_merge_head(&md->deferred, &md->pushback);
+       bio_list_init(&md->pushback);
+       spin_unlock_irqrestore(&md->pushback_lock, flags);
+}
+
+static void dm_wq_work(struct work_struct *work)
+{
+       struct dm_wq_req *req = container_of(work, struct dm_wq_req, work);
+       struct mapped_device *md = req->md;
+
+       down_write(&md->io_lock);
+       switch (req->type) {
+       case DM_WQ_FLUSH_ALL:
+               __merge_pushback_list(md);
+               /* pass through */
+       case DM_WQ_FLUSH_DEFERRED:
+               __flush_deferred_io(md);
+               break;
+       default:
+               DMERR("dm_wq_work: unrecognised work type %d", req->type);
+               BUG();
+       }
+       up_write(&md->io_lock);
+}
+
+static void dm_wq_queue(struct mapped_device *md, int type, void *context,
+                       struct dm_wq_req *req)
+{
+       req->type = type;
+       req->md = md;
+       req->context = context;
+       INIT_WORK(&req->work, dm_wq_work);
+       queue_work(md->wq, &req->work);
+}
+
+static void dm_queue_flush(struct mapped_device *md, int type, void *context)
+{
+       struct dm_wq_req req;
+
+       dm_wq_queue(md, type, context, &req);
+       flush_workqueue(md->wq);
 }
 
 /*
@@ -1282,7 +1378,7 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table)
 {
        int r = -EINVAL;
 
-       down(&md->suspend_lock);
+       mutex_lock(&md->suspend_lock);
 
        /* device must be suspended */
        if (!dm_suspended(md))
@@ -1297,7 +1393,7 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table)
        r = __bind(md, table);
 
 out:
-       up(&md->suspend_lock);
+       mutex_unlock(&md->suspend_lock);
        return r;
 }
 
@@ -1346,17 +1442,17 @@ static void unlock_fs(struct mapped_device *md)
 int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 {
        struct dm_table *map = NULL;
-       unsigned long flags;
        DECLARE_WAITQUEUE(wait, current);
-       struct bio *def;
-       int r = -EINVAL;
+       int r = 0;
        int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
        int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
 
-       down(&md->suspend_lock);
+       mutex_lock(&md->suspend_lock);
 
-       if (dm_suspended(md))
+       if (dm_suspended(md)) {
+               r = -EINVAL;
                goto out_unlock;
+       }
 
        map = dm_get_table(md);
 
@@ -1378,16 +1474,16 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
                        r = -ENOMEM;
                        goto flush_and_out;
                }
-       }
 
-       /*
-        * Flush I/O to the device.
-        * noflush supersedes do_lockfs, because lock_fs() needs to flush I/Os.
-        */
-       if (do_lockfs && !noflush) {
-               r = lock_fs(md);
-               if (r)
-                       goto out;
+               /*
+                * Flush I/O to the device. noflush supersedes do_lockfs,
+                * because lock_fs() needs to flush I/Os.
+                */
+               if (do_lockfs) {
+                       r = lock_fs(md);
+                       if (r)
+                               goto out;
+               }
        }
 
        /*
@@ -1404,66 +1500,36 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
                dm_table_unplug_all(map);
 
        /*
-        * Then we wait for the already mapped ios to
-        * complete.
+        * Wait for the already-mapped ios to complete.
         */
-       while (1) {
-               set_current_state(TASK_INTERRUPTIBLE);
-
-               if (!atomic_read(&md->pending) || signal_pending(current))
-                       break;
-
-               io_schedule();
-       }
-       set_current_state(TASK_RUNNING);
+       r = dm_wait_for_completion(md);
 
        down_write(&md->io_lock);
        remove_wait_queue(&md->wait, &wait);
 
-       if (noflush) {
-               spin_lock_irqsave(&md->pushback_lock, flags);
-               clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
-               bio_list_merge_head(&md->deferred, &md->pushback);
-               bio_list_init(&md->pushback);
-               spin_unlock_irqrestore(&md->pushback_lock, flags);
-       }
+       if (noflush)
+               __merge_pushback_list(md);
+       up_write(&md->io_lock);
 
        /* were we interrupted ? */
-       r = -EINTR;
-       if (atomic_read(&md->pending)) {
-               clear_bit(DMF_BLOCK_IO, &md->flags);
-               def = bio_list_get(&md->deferred);
-               __flush_deferred_io(md, def);
-               up_write(&md->io_lock);
+       if (r < 0) {
+               dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
+
                unlock_fs(md);
                goto out; /* pushback list is already flushed, so skip flush */
        }
-       up_write(&md->io_lock);
 
        dm_table_postsuspend_targets(map);
 
        set_bit(DMF_SUSPENDED, &md->flags);
 
-       r = 0;
-
 flush_and_out:
-       if (r && noflush) {
+       if (r && noflush)
                /*
                 * Because there may be already I/Os in the pushback list,
                 * flush them before return.
                 */
-               down_write(&md->io_lock);
-
-               spin_lock_irqsave(&md->pushback_lock, flags);
-               clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
-               bio_list_merge_head(&md->deferred, &md->pushback);
-               bio_list_init(&md->pushback);
-               spin_unlock_irqrestore(&md->pushback_lock, flags);
-
-               def = bio_list_get(&md->deferred);
-               __flush_deferred_io(md, def);
-               up_write(&md->io_lock);
-       }
+               dm_queue_flush(md, DM_WQ_FLUSH_ALL, NULL);
 
 out:
        if (r && md->suspended_bdev) {
@@ -1474,17 +1540,16 @@ out:
        dm_table_put(map);
 
 out_unlock:
-       up(&md->suspend_lock);
+       mutex_unlock(&md->suspend_lock);
        return r;
 }
 
 int dm_resume(struct mapped_device *md)
 {
        int r = -EINVAL;
-       struct bio *def;
        struct dm_table *map = NULL;
 
-       down(&md->suspend_lock);
+       mutex_lock(&md->suspend_lock);
        if (!dm_suspended(md))
                goto out;
 
@@ -1496,12 +1561,7 @@ int dm_resume(struct mapped_device *md)
        if (r)
                goto out;
 
-       down_write(&md->io_lock);
-       clear_bit(DMF_BLOCK_IO, &md->flags);
-
-       def = bio_list_get(&md->deferred);
-       __flush_deferred_io(md, def);
-       up_write(&md->io_lock);
+       dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
 
        unlock_fs(md);
 
@@ -1520,7 +1580,7 @@ int dm_resume(struct mapped_device *md)
 
 out:
        dm_table_put(map);
-       up(&md->suspend_lock);
+       mutex_unlock(&md->suspend_lock);
 
        return r;
 }