dm: fix various targets to dm_register_target after module __init resources created
authormonty_pavel@sina.com <monty_pavel@sina.com>
Fri, 24 Nov 2017 17:43:50 +0000 (01:43 +0800)
committerBen Hutchings <ben@decadent.org.uk>
Sat, 3 Mar 2018 15:50:43 +0000 (15:50 +0000)
commit 7e6358d244e4706fe612a77b9c36519a33600ac0 upstream.

A NULL pointer is seen if two concurrent "vgchange -ay -K <vg name>"
processes race to load the dm-thin-pool module:

 PID: 25992 TASK: ffff883cd7d23500 CPU: 4 COMMAND: "vgchange"
  #0 [ffff883cd743d600] machine_kexec at ffffffff81038fa9
  0000001 [ffff883cd743d660] crash_kexec at ffffffff810c5992
  0000002 [ffff883cd743d730] oops_end at ffffffff81515c90
  0000003 [ffff883cd743d760] no_context at ffffffff81049f1b
  0000004 [ffff883cd743d7b0] __bad_area_nosemaphore at ffffffff8104a1a5
  0000005 [ffff883cd743d800] bad_area at ffffffff8104a2ce
  0000006 [ffff883cd743d830] __do_page_fault at ffffffff8104aa6f
  0000007 [ffff883cd743d950] do_page_fault at ffffffff81517bae
  0000008 [ffff883cd743d980] page_fault at ffffffff81514f95
     [exception RIP: kmem_cache_alloc+108]
     RIP: ffffffff8116ef3c RSP: ffff883cd743da38 RFLAGS: 00010046
     RAX: 0000000000000004 RBX: ffffffff81121b90 RCX: ffff881bf1e78cc0
     RDX: 0000000000000000 RSI: 00000000000000d0 RDI: 0000000000000000
     RBP: ffff883cd743da68 R8: ffff881bf1a4eb00 R9: 0000000080042000
     R10: 0000000000002000 R11: 0000000000000000 R12: 00000000000000d0
     R13: 0000000000000000 R14: 00000000000000d0 R15: 0000000000000246
     ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
  0000009 [ffff883cd743da70] mempool_alloc_slab at ffffffff81121ba5
 0000010 [ffff883cd743da80] mempool_create_node at ffffffff81122083
 0000011 [ffff883cd743dad0] mempool_create at ffffffff811220f4
 0000012 [ffff883cd743dae0] pool_ctr at ffffffffa08de049 [dm_thin_pool]
 0000013 [ffff883cd743dbd0] dm_table_add_target at ffffffffa0005f2f [dm_mod]
 0000014 [ffff883cd743dc30] table_load at ffffffffa0008ba9 [dm_mod]
 0000015 [ffff883cd743dc90] ctl_ioctl at ffffffffa0009dc4 [dm_mod]

The race results in a NULL pointer because:

Process A (vgchange -ay -K):
  a. send DM_LIST_VERSIONS_CMD ioctl;
  b. pool_target not registered;
  c. modprobe dm_thin_pool and wait until end.

Process B (vgchange -ay -K):
  a. send DM_LIST_VERSIONS_CMD ioctl;
  b. pool_target registered;
  c. table_load->dm_table_add_target->pool_ctr;
  d. _new_mapping_cache is NULL and panic.
Note:
  1. process A and process B are two concurrent processes.
  2. pool_target can be detected by process B but
  _new_mapping_cache initialization has not ended.

To fix dm-thin-pool, and other targets (cache, multipath, and snapshot)
with the same problem, simply dm_register_target() after all resources
created during module init (as labelled with __init) are finished.

Signed-off-by: monty <monty_pavel@sina.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
[bwh: Backported to 3.2:
 - Drop changes in dm-cache (non-existent) and dm-thin (doesn't have this bug)
 - In dm-snap, reorder cleanup of tracked_chunk_cache too
 - Adjust context]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
drivers/md/dm-mpath.c
drivers/md/dm-snap.c

index 8483407..dd0c882 100644 (file)
@@ -1658,13 +1658,6 @@ static int __init dm_multipath_init(void)
        if (!_mpio_cache)
                return -ENOMEM;
 
-       r = dm_register_target(&multipath_target);
-       if (r < 0) {
-               DMERR("register failed %d", r);
-               r = -EINVAL;
-               goto bad_register_target;
-       }
-
        kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0);
        if (!kmultipathd) {
                DMERR("failed to create workqueue kmpathd");
@@ -1686,17 +1679,24 @@ static int __init dm_multipath_init(void)
                goto bad_alloc_kmpath_handlerd;
        }
 
+       r = dm_register_target(&multipath_target);
+       if (r < 0) {
+               DMERR("register failed %d", r);
+               r = -EINVAL;
+               goto bad_register_target;
+       }
+
        DMINFO("version %u.%u.%u loaded",
               multipath_target.version[0], multipath_target.version[1],
               multipath_target.version[2]);
 
        return 0;
 
+bad_register_target:
+       destroy_workqueue(kmpath_handlerd);
 bad_alloc_kmpath_handlerd:
        destroy_workqueue(kmultipathd);
 bad_alloc_kmultipathd:
-       dm_unregister_target(&multipath_target);
-bad_register_target:
        kmem_cache_destroy(_mpio_cache);
 
        return r;
index 6674ebf..5d6b533 100644 (file)
@@ -2293,24 +2293,6 @@ static int __init dm_snapshot_init(void)
                return r;
        }
 
-       r = dm_register_target(&snapshot_target);
-       if (r < 0) {
-               DMERR("snapshot target register failed %d", r);
-               goto bad_register_snapshot_target;
-       }
-
-       r = dm_register_target(&origin_target);
-       if (r < 0) {
-               DMERR("Origin target register failed %d", r);
-               goto bad_register_origin_target;
-       }
-
-       r = dm_register_target(&merge_target);
-       if (r < 0) {
-               DMERR("Merge target register failed %d", r);
-               goto bad_register_merge_target;
-       }
-
        r = init_origin_hash();
        if (r) {
                DMERR("init_origin_hash failed.");
@@ -2338,8 +2320,32 @@ static int __init dm_snapshot_init(void)
                goto bad_tracked_chunk_cache;
        }
 
+       r = dm_register_target(&snapshot_target);
+       if (r < 0) {
+               DMERR("snapshot target register failed %d", r);
+               goto bad_register_snapshot_target;
+       }
+
+       r = dm_register_target(&origin_target);
+       if (r < 0) {
+               DMERR("Origin target register failed %d", r);
+               goto bad_register_origin_target;
+       }
+
+       r = dm_register_target(&merge_target);
+       if (r < 0) {
+               DMERR("Merge target register failed %d", r);
+               goto bad_register_merge_target;
+       }
+
        return 0;
 
+bad_register_merge_target:
+       dm_unregister_target(&origin_target);
+bad_register_origin_target:
+       dm_unregister_target(&snapshot_target);
+bad_register_snapshot_target:
+       kmem_cache_destroy(tracked_chunk_cache);
 bad_tracked_chunk_cache:
        kmem_cache_destroy(pending_cache);
 bad_pending_cache:
@@ -2347,12 +2353,6 @@ bad_pending_cache:
 bad_exception_cache:
        exit_origin_hash();
 bad_origin_hash:
-       dm_unregister_target(&merge_target);
-bad_register_merge_target:
-       dm_unregister_target(&origin_target);
-bad_register_origin_target:
-       dm_unregister_target(&snapshot_target);
-bad_register_snapshot_target:
        dm_exception_store_exit();
 
        return r;