ext4: Fix race between read_block_bitmap() and mark_diskspace_used()
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Tue, 6 Jan 2009 02:38:26 +0000 (21:38 -0500)
committerTheodore Ts'o <tytso@mit.edu>
Tue, 6 Jan 2009 02:38:26 +0000 (21:38 -0500)
We need to make sure we update the block bitmap and clear
EXT4_BG_BLOCK_UNINIT flag with sb_bgl_lock held, since
ext4_read_block_bitmap() looks at EXT4_BG_BLOCK_UNINIT to decide
whether to initialize the block bitmap each time it is called
(introduced by commit c806e68f), and this can race with block
allocations in ext4_mb_mark_diskspace_used().

ext4_read_block_bitmap does:

spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
ext4_init_block_bitmap(sb, bh, block_group, desc);

Now on the block allocation side we do

mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), bitmap_bh->b_data,
ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
....
spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);

ie on allocation we update the bitmap then we take the sb_bgl_lock
and clear the EXT4_BG_BLOCK_UNINIT flag. What can happen is a
parallel ext4_read_block_bitmap can zero out the bitmap in between
the above mb_set_bits and spin_lock(sb_bg_lock..)

The race results in below user visible errors
EXT4-fs error (device sdb1): ext4_mb_release_inode_pa: free 100, pa_free 105
EXT4-fs error (device sdb1): mb_free_blocks: double-free of inode 0's block ..

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@kernel.org
fs/ext4/mballoc.c

index cda6963..d559a03 100644 (file)
@@ -1070,7 +1070,10 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
                        cur += 32;
                        continue;
                }
-               mb_clear_bit_atomic(lock, cur, bm);
+               if (lock)
+                       mb_clear_bit_atomic(lock, cur, bm);
+               else
+                       mb_clear_bit(cur, bm);
                cur++;
        }
 }
@@ -1088,7 +1091,10 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
                        cur += 32;
                        continue;
                }
-               mb_set_bit_atomic(lock, cur, bm);
+               if (lock)
+                       mb_set_bit_atomic(lock, cur, bm);
+               else
+                       mb_set_bit(cur, bm);
                cur++;
        }
 }
@@ -3035,10 +3041,9 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                }
        }
 #endif
-       mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), bitmap_bh->b_data,
-                               ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
-
        spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
+       mb_set_bits(NULL, bitmap_bh->b_data,
+                               ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
        if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
                gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
                gdp->bg_free_blocks_count =