Btrfs: superblock duplication
authorYan Zheng <zheng.yan@oracle.com>
Mon, 8 Dec 2008 21:46:26 +0000 (16:46 -0500)
committerChris Mason <chris.mason@oracle.com>
Mon, 8 Dec 2008 21:46:26 +0000 (16:46 -0500)
This patch implements superblock duplication. Superblocks
are stored at offset 16K, 64M and 256G on every devices.
Spaces used by superblocks are preserved by the allocator,
which uses a reverse mapping function to find the logical
addresses that correspond to superblocks. Thank you,

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent-tree.c
fs/btrfs/free-space-cache.c
fs/btrfs/transaction.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h

index 61dc3b2..c72f4f3 100644 (file)
@@ -1595,8 +1595,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                     fs_info, BTRFS_ROOT_TREE_OBJECTID);
 
 
-       bh = __bread(fs_devices->latest_bdev,
-                    BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
+       bh = btrfs_read_dev_super(fs_devices->latest_bdev);
        if (!bh)
                goto fail_iput;
 
@@ -1710,7 +1709,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        }
 
        mutex_lock(&fs_info->chunk_mutex);
-       ret = btrfs_read_sys_array(tree_root);
+       ret = btrfs_read_sys_array(tree_root, btrfs_super_bytenr(disk_super));
        mutex_unlock(&fs_info->chunk_mutex);
        if (ret) {
                printk("btrfs: failed to read the system array on %s\n",
@@ -1905,19 +1904,147 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
        put_bh(bh);
 }
 
-static int write_all_supers(struct btrfs_root *root)
+struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
+{
+       struct buffer_head *bh;
+       struct buffer_head *latest = NULL;
+       struct btrfs_super_block *super;
+       int i;
+       u64 transid = 0;
+       u64 bytenr;
+
+       /* we would like to check all the supers, but that would make
+        * a btrfs mount succeed after a mkfs from a different FS.
+        * So, we need to add a special mount option to scan for
+        * later supers, using BTRFS_SUPER_MIRROR_MAX instead
+        */
+       for (i = 0; i < 1; i++) {
+               bytenr = btrfs_sb_offset(i);
+               if (bytenr + 4096 >= i_size_read(bdev->bd_inode))
+                       break;
+               bh = __bread(bdev, bytenr / 4096, 4096);
+               if (!bh)
+                       continue;
+
+               super = (struct btrfs_super_block *)bh->b_data;
+               if (btrfs_super_bytenr(super) != bytenr ||
+                   strncmp((char *)(&super->magic), BTRFS_MAGIC,
+                           sizeof(super->magic))) {
+                       brelse(bh);
+                       continue;
+               }
+
+               if (!latest || btrfs_super_generation(super) > transid) {
+                       brelse(latest);
+                       latest = bh;
+                       transid = btrfs_super_generation(super);
+               } else {
+                       brelse(bh);
+               }
+       }
+       return latest;
+}
+
+static int write_dev_supers(struct btrfs_device *device,
+                           struct btrfs_super_block *sb,
+                           int do_barriers, int wait, int max_mirrors)
+{
+       struct buffer_head *bh;
+       int i;
+       int ret;
+       int errors = 0;
+       u32 crc;
+       u64 bytenr;
+       int last_barrier = 0;
+
+       if (max_mirrors == 0)
+               max_mirrors = BTRFS_SUPER_MIRROR_MAX;
+
+       /* make sure only the last submit_bh does a barrier */
+       if (do_barriers) {
+               for (i = 0; i < max_mirrors; i++) {
+                       bytenr = btrfs_sb_offset(i);
+                       if (bytenr + BTRFS_SUPER_INFO_SIZE >=
+                           device->total_bytes)
+                               break;
+                       last_barrier = i;
+               }
+       }
+
+       for (i = 0; i < max_mirrors; i++) {
+               bytenr = btrfs_sb_offset(i);
+               if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
+                       break;
+
+               if (wait) {
+                       bh = __find_get_block(device->bdev, bytenr / 4096,
+                                             BTRFS_SUPER_INFO_SIZE);
+                       BUG_ON(!bh);
+                       brelse(bh);
+                       wait_on_buffer(bh);
+                       if (buffer_uptodate(bh)) {
+                               brelse(bh);
+                               continue;
+                       }
+               } else {
+                       btrfs_set_super_bytenr(sb, bytenr);
+
+                       crc = ~(u32)0;
+                       crc = btrfs_csum_data(NULL, (char *)sb +
+                                             BTRFS_CSUM_SIZE, crc,
+                                             BTRFS_SUPER_INFO_SIZE -
+                                             BTRFS_CSUM_SIZE);
+                       btrfs_csum_final(crc, sb->csum);
+
+                       bh = __getblk(device->bdev, bytenr / 4096,
+                                     BTRFS_SUPER_INFO_SIZE);
+                       memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
+
+                       set_buffer_uptodate(bh);
+                       get_bh(bh);
+                       lock_buffer(bh);
+                       bh->b_end_io = btrfs_end_buffer_write_sync;
+               }
+
+               if (i == last_barrier && do_barriers && device->barriers) {
+                       ret = submit_bh(WRITE_BARRIER, bh);
+                       if (ret == -EOPNOTSUPP) {
+                               printk("btrfs: disabling barriers on dev %s\n",
+                                      device->name);
+                               set_buffer_uptodate(bh);
+                               device->barriers = 0;
+                               get_bh(bh);
+                               lock_buffer(bh);
+                               ret = submit_bh(WRITE, bh);
+                       }
+               } else {
+                       ret = submit_bh(WRITE, bh);
+               }
+
+               if (!ret && wait) {
+                       wait_on_buffer(bh);
+                       if (!buffer_uptodate(bh))
+                               errors++;
+               } else if (ret) {
+                       errors++;
+               }
+               if (wait)
+                       brelse(bh);
+       }
+       return errors < i ? 0 : -1;
+}
+
+int write_all_supers(struct btrfs_root *root, int max_mirrors)
 {
        struct list_head *cur;
        struct list_head *head = &root->fs_info->fs_devices->devices;
        struct btrfs_device *dev;
        struct btrfs_super_block *sb;
        struct btrfs_dev_item *dev_item;
-       struct buffer_head *bh;
        int ret;
        int do_barriers;
        int max_errors;
        int total_errors = 0;
-       u32 crc;
        u64 flags;
 
        max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
@@ -1944,40 +2071,11 @@ static int write_all_supers(struct btrfs_root *root)
                btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
                memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE);
                memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE);
+
                flags = btrfs_super_flags(sb);
                btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
 
-
-               crc = ~(u32)0;
-               crc = btrfs_csum_data(root, (char *)sb + BTRFS_CSUM_SIZE, crc,
-                                     BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
-               btrfs_csum_final(crc, sb->csum);
-
-               bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / 4096,
-                             BTRFS_SUPER_INFO_SIZE);
-
-               memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
-               dev->pending_io = bh;
-
-               get_bh(bh);
-               set_buffer_uptodate(bh);
-               lock_buffer(bh);
-               bh->b_end_io = btrfs_end_buffer_write_sync;
-
-               if (do_barriers && dev->barriers) {
-                       ret = submit_bh(WRITE_BARRIER, bh);
-                       if (ret == -EOPNOTSUPP) {
-                               printk("btrfs: disabling barriers on dev %s\n",
-                                      dev->name);
-                               set_buffer_uptodate(bh);
-                               dev->barriers = 0;
-                               get_bh(bh);
-                               lock_buffer(bh);
-                               ret = submit_bh(WRITE, bh);
-                       }
-               } else {
-                       ret = submit_bh(WRITE, bh);
-               }
+               ret = write_dev_supers(dev, sb, do_barriers, 0, max_mirrors);
                if (ret)
                        total_errors++;
        }
@@ -1985,8 +2083,8 @@ static int write_all_supers(struct btrfs_root *root)
                printk("btrfs: %d errors while writing supers\n", total_errors);
                BUG();
        }
-       total_errors = 0;
 
+       total_errors = 0;
        list_for_each(cur, head) {
                dev = list_entry(cur, struct btrfs_device, dev_list);
                if (!dev->bdev)
@@ -1994,29 +2092,9 @@ static int write_all_supers(struct btrfs_root *root)
                if (!dev->in_fs_metadata || !dev->writeable)
                        continue;
 
-               BUG_ON(!dev->pending_io);
-               bh = dev->pending_io;
-               wait_on_buffer(bh);
-               if (!buffer_uptodate(dev->pending_io)) {
-                       if (do_barriers && dev->barriers) {
-                               printk("btrfs: disabling barriers on dev %s\n",
-                                      dev->name);
-                               set_buffer_uptodate(bh);
-                               get_bh(bh);
-                               lock_buffer(bh);
-                               dev->barriers = 0;
-                               ret = submit_bh(WRITE, bh);
-                               BUG_ON(ret);
-                               wait_on_buffer(bh);
-                               if (!buffer_uptodate(bh))
-                                       total_errors++;
-                       } else {
-                               total_errors++;
-                       }
-
-               }
-               dev->pending_io = NULL;
-               brelse(bh);
+               ret = write_dev_supers(dev, sb, do_barriers, 1, max_mirrors);
+               if (ret)
+                       total_errors++;
        }
        if (total_errors > max_errors) {
                printk("btrfs: %d errors while writing supers\n", total_errors);
@@ -2025,12 +2103,12 @@ static int write_all_supers(struct btrfs_root *root)
        return 0;
 }
 
-int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
-                     *root)
+int write_ctree_super(struct btrfs_trans_handle *trans,
+                     struct btrfs_root *root, int max_mirrors)
 {
        int ret;
 
-       ret = write_all_supers(root);
+       ret = write_all_supers(root, max_mirrors);
        return ret;
 }
 
@@ -2116,7 +2194,7 @@ int btrfs_commit_super(struct btrfs_root *root)
        ret = btrfs_write_and_wait_transaction(NULL, root);
        BUG_ON(ret);
 
-       ret = write_ctree_super(NULL, root);
+       ret = write_ctree_super(NULL, root, 0);
        return ret;
 }
 
index 717e948..c0ff404 100644 (file)
 #ifndef __DISKIO__
 #define __DISKIO__
 
-#define BTRFS_SUPER_INFO_OFFSET (16 * 1024)
+#define BTRFS_SUPER_INFO_OFFSET (64 * 1024)
 #define BTRFS_SUPER_INFO_SIZE 4096
+
+#define BTRFS_SUPER_MIRROR_MAX  3
+#define BTRFS_SUPER_MIRROR_SHIFT 12
+
+static inline u64 btrfs_sb_offset(int mirror)
+{
+       u64 start = 16 * 1024;
+       if (mirror)
+               return start << (BTRFS_SUPER_MIRROR_SHIFT * mirror);
+       return BTRFS_SUPER_INFO_OFFSET;
+}
+
 struct btrfs_device;
 struct btrfs_fs_devices;
 
@@ -37,7 +49,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                              char *options);
 int close_ctree(struct btrfs_root *root);
 int write_ctree_super(struct btrfs_trans_handle *trans,
-                     struct btrfs_root *root);
+                     struct btrfs_root *root, int max_mirrors);
+struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
 int btrfs_commit_super(struct btrfs_root *root);
 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
                                            u64 bytenr, u32 blocksize);
index d156385..803647b 100644 (file)
@@ -189,6 +189,29 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
        return 0;
 }
 
+static int remove_sb_from_cache(struct btrfs_root *root,
+                               struct btrfs_block_group_cache *cache)
+{
+       u64 bytenr;
+       u64 *logical;
+       int stripe_len;
+       int i, nr, ret;
+
+       for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+               bytenr = btrfs_sb_offset(i);
+               ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
+                                      cache->key.objectid, bytenr, 0,
+                                      &logical, &nr, &stripe_len);
+               BUG_ON(ret);
+               while (nr--) {
+                       btrfs_remove_free_space(cache, logical[nr],
+                                               stripe_len);
+               }
+               kfree(logical);
+       }
+       return 0;
+}
+
 static int cache_block_group(struct btrfs_root *root,
                             struct btrfs_block_group_cache *block_group)
 {
@@ -197,9 +220,7 @@ static int cache_block_group(struct btrfs_root *root,
        struct btrfs_key key;
        struct extent_buffer *leaf;
        int slot;
-       u64 last = 0;
-       u64 first_free;
-       int found = 0;
+       u64 last = block_group->key.objectid;
 
        if (!block_group)
                return 0;
@@ -220,23 +241,13 @@ static int cache_block_group(struct btrfs_root *root,
         * skip the locking here
         */
        path->skip_locking = 1;
-       first_free = max_t(u64, block_group->key.objectid,
-                          BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
-       key.objectid = block_group->key.objectid;
+       key.objectid = last;
        key.offset = 0;
        btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
                goto err;
-       ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
-       if (ret < 0)
-               goto err;
-       if (ret == 0) {
-               leaf = path->nodes[0];
-               btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-               if (key.objectid + key.offset > first_free)
-                       first_free = key.objectid + key.offset;
-       }
+
        while(1) {
                leaf = path->nodes[0];
                slot = path->slots[0];
@@ -258,11 +269,6 @@ static int cache_block_group(struct btrfs_root *root,
                        break;
 
                if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
-                       if (!found) {
-                               last = first_free;
-                               found = 1;
-                       }
-
                        add_new_free_space(block_group, root->fs_info, last,
                                           key.objectid);
 
@@ -272,13 +278,11 @@ next:
                path->slots[0]++;
        }
 
-       if (!found)
-               last = first_free;
-
        add_new_free_space(block_group, root->fs_info, last,
                           block_group->key.objectid +
                           block_group->key.offset);
 
+       remove_sb_from_cache(root, block_group);
        block_group->cached = 1;
        ret = 0;
 err:
@@ -1974,10 +1978,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                if (alloc) {
                        old_val += num_bytes;
                        cache->space_info->bytes_used += num_bytes;
-                       if (cache->ro) {
+                       if (cache->ro)
                                cache->space_info->bytes_readonly -= num_bytes;
-                               WARN_ON(1);
-                       }
                        btrfs_set_block_group_used(&cache->item, old_val);
                        spin_unlock(&cache->lock);
                        spin_unlock(&cache->space_info->lock);
index 09462ad..2e69b9c 100644 (file)
@@ -290,7 +290,6 @@ __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
                        ret = -EINVAL;
                        goto out;
                }
-
                unlink_free_space(block_group, info);
 
                if (info->bytes == bytes) {
Simple merge
Simple merge
Simple merge
Simple merge