ext4: add new online resize interface

author Yongqiang Yang <xiaoqiangnk@gmail.com>

Wed, 4 Jan 2012 22:09:44 +0000 (17:09 -0500)

committer Theodore Ts'o <tytso@mit.edu>

Wed, 4 Jan 2012 22:09:44 +0000 (17:09 -0500)
author Yongqiang Yang <xiaoqiangnk@gmail.com>
Wed, 4 Jan 2012 22:09:44 +0000 (17:09 -0500)
committer Theodore Ts'o <tytso@mit.edu>
Wed, 4 Jan 2012 22:09:44 +0000 (17:09 -0500)
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt

index 4917cf2..10ec463 100644 (file)
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -581,6 +581,13 @@ Table of Ext4 specific ioctls
                               behaviour may change in the future as it is
                               not necessary and has been done this way only
                               for sake of simplicity.
+
+ EXT4_IOC_RESIZE_FS          Resize the filesystem to a new size.  The number
+                             of blocks of resized filesystem is passed in via
+                             64 bit integer argument.  The kernel allocates
+                             bitmaps and inode table, the userspace tool thus
+                             just passes the new number of blocks.
+
  ..............................................................................
  
  References
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h

index 05058e2..4bc0e82 100644 (file)
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -583,6 +583,7 @@ enum {
   /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
  #define EXT4_IOC_ALLOC_DA_BLKS         _IO('f', 12)
  #define EXT4_IOC_MOVE_EXT              _IOWR('f', 15, struct move_extent)
+#define EXT4_IOC_RESIZE_FS             _IOW('f', 16, __u64)
  
  #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
  /*
@@ -1929,6 +1930,7 @@ extern int ext4_group_add(struct super_block *sb,
  extern int ext4_group_extend(struct super_block *sb,
                                 struct ext4_super_block *es,
                                 ext4_fsblk_t n_blocks_count);
+extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
  
  /* super.c */
  extern void *ext4_kvmalloc(size_t size, gfp_t flags);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c

index ff1aab7..c1a9880 100644 (file)
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -18,6 +18,8 @@
  #include "ext4_jbd2.h"
  #include "ext4.h"
  
+#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
+
  long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
  {
         struct inode *inode = filp->f_dentry->d_inode;
@@ -329,6 +331,60 @@ mext_out:
                 return err;
         }
  
+       case EXT4_IOC_RESIZE_FS: {
+               ext4_fsblk_t n_blocks_count;
+               struct super_block *sb = inode->i_sb;
+               int err = 0, err2 = 0;
+
+               if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                              EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
+                       ext4_msg(sb, KERN_ERR,
+                                "Online resizing not (yet) supported with bigalloc");
+                       return -EOPNOTSUPP;
+               }
+
+               if (EXT4_HAS_INCOMPAT_FEATURE(sb,
+                              EXT4_FEATURE_INCOMPAT_META_BG)) {
+                       ext4_msg(sb, KERN_ERR,
+                                "Online resizing not (yet) supported with meta_bg");
+                       return -EOPNOTSUPP;
+               }
+
+               if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
+                                  sizeof(__u64))) {
+                       return -EFAULT;
+               }
+
+               if (n_blocks_count > MAX_32_NUM &&
+                   !EXT4_HAS_INCOMPAT_FEATURE(sb,
+                                              EXT4_FEATURE_INCOMPAT_64BIT)) {
+                       ext4_msg(sb, KERN_ERR,
+                                "File system only supports 32-bit block numbers");
+                       return -EOPNOTSUPP;
+               }
+
+               err = ext4_resize_begin(sb);
+               if (err)
+                       return err;
+
+               err = mnt_want_write(filp->f_path.mnt);
+               if (err)
+                       goto resizefs_out;
+
+               err = ext4_resize_fs(sb, n_blocks_count);
+               if (EXT4_SB(sb)->s_journal) {
+                       jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
+                       err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+                       jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+               }
+               if (err == 0)
+                       err = err2;
+               mnt_drop_write(filp->f_path.mnt);
+resizefs_out:
+               ext4_resize_end(sb);
+               return err;
+       }
+
         case FITRIM:
         {
                 struct request_queue *q = bdev_get_queue(sb->s_bdev);
@@ -427,6 +483,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
         }
         case EXT4_IOC_MOVE_EXT:
         case FITRIM:
+       case EXT4_IOC_RESIZE_FS:
                 break;
         default:
                 return -ENOIOCTLCMD;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c

index dac2356..5fe2a01 100644 (file)
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1430,6 +1430,70 @@ exit:
         return err;
  }
  
+static int ext4_setup_next_flex_gd(struct super_block *sb,
+                                   struct ext4_new_flex_group_data *flex_gd,
+                                   ext4_fsblk_t n_blocks_count,
+                                   unsigned long flexbg_size)
+{
+       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+       struct ext4_new_group_data *group_data = flex_gd->groups;
+       ext4_fsblk_t o_blocks_count;
+       ext4_group_t n_group;
+       ext4_group_t group;
+       ext4_group_t last_group;
+       ext4_grpblk_t last;
+       ext4_grpblk_t blocks_per_group;
+       unsigned long i;
+
+       blocks_per_group = EXT4_BLOCKS_PER_GROUP(sb);
+
+       o_blocks_count = ext4_blocks_count(es);
+
+       if (o_blocks_count == n_blocks_count)
+               return 0;
+
+       ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
+       BUG_ON(last);
+       ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last);
+
+       last_group = group | (flexbg_size - 1);
+       if (last_group > n_group)
+               last_group = n_group;
+
+       flex_gd->count = last_group - group + 1;
+
+       for (i = 0; i < flex_gd->count; i++) {
+               int overhead;
+
+               group_data[i].group = group + i;
+               group_data[i].blocks_count = blocks_per_group;
+               overhead = ext4_bg_has_super(sb, group + i) ?
+                          (1 + ext4_bg_num_gdb(sb, group + i) +
+                           le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
+               group_data[i].free_blocks_count = blocks_per_group - overhead;
+               if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                              EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
+                       flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
+                                              EXT4_BG_INODE_UNINIT;
+               else
+                       flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED;
+       }
+
+       if (last_group == n_group &&
+           EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                      EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
+               /* We need to initialize block bitmap of last group. */
+               flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT;
+
+       if ((last_group == n_group) && (last != blocks_per_group - 1)) {
+               group_data[i - 1].blocks_count = last + 1;
+               group_data[i - 1].free_blocks_count -= blocks_per_group-
+                                       last - 1;
+       }
+
+       return 1;
+}
+
  /* Add group descriptor data to an existing or new group descriptor block.
   * Ensure we handle all possible error conditions _before_ we start modifying
   * the filesystem, because we cannot abort the transaction and not have it
@@ -1827,3 +1891,116 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
  exit_put:
         return err;
  } /* ext4_group_extend */
+
+/*
+ * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count
+ *
+ * @sb: super block of the fs to be resized
+ * @n_blocks_count: the number of blocks resides in the resized fs
+ */
+int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
+{
+       struct ext4_new_flex_group_data *flex_gd = NULL;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_super_block *es = sbi->s_es;
+       struct buffer_head *bh;
+       struct inode *resize_inode;
+       ext4_fsblk_t o_blocks_count;
+       ext4_group_t o_group;
+       ext4_group_t n_group;
+       ext4_grpblk_t offset;
+       unsigned long n_desc_blocks;
+       unsigned long o_desc_blocks;
+       unsigned long desc_blocks;
+       int err = 0, flexbg_size = 1;
+
+       o_blocks_count = ext4_blocks_count(es);
+
+       if (test_opt(sb, DEBUG))
+               printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu "
+                      "upto %llu blocks\n", o_blocks_count, n_blocks_count);
+
+       if (n_blocks_count < o_blocks_count) {
+               /* On-line shrinking not supported */
+               ext4_warning(sb, "can't shrink FS - resize aborted");
+               return -EINVAL;
+       }
+
+       if (n_blocks_count == o_blocks_count)
+               /* Nothing need to do */
+               return 0;
+
+       ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset);
+       ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset);
+
+       n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) /
+                       EXT4_DESC_PER_BLOCK(sb);
+       o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
+                       EXT4_DESC_PER_BLOCK(sb);
+       desc_blocks = n_desc_blocks - o_desc_blocks;
+
+       if (desc_blocks &&
+           (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) ||
+            le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) {
+               ext4_warning(sb, "No reserved GDT blocks, can't resize");
+               return -EPERM;
+       }
+
+       resize_inode = ext4_iget(sb, EXT4_RESIZE_INO);
+       if (IS_ERR(resize_inode)) {
+               ext4_warning(sb, "Error opening resize inode");
+               return PTR_ERR(resize_inode);
+       }
+
+       /* See if the device is actually as big as what was requested */
+       bh = sb_bread(sb, n_blocks_count - 1);
+       if (!bh) {
+               ext4_warning(sb, "can't read last block, resize aborted");
+               return -ENOSPC;
+       }
+       brelse(bh);
+
+       if (offset != 0) {
+               /* extend the last group */
+               ext4_grpblk_t add;
+               add = EXT4_BLOCKS_PER_GROUP(sb) - offset;
+               err = ext4_group_extend_no_check(sb, o_blocks_count, add);
+               if (err)
+                       goto out;
+       }
+
+       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
+           es->s_log_groups_per_flex)
+               flexbg_size = 1 << es->s_log_groups_per_flex;
+
+       o_blocks_count = ext4_blocks_count(es);
+       if (o_blocks_count == n_blocks_count)
+               goto out;
+
+       flex_gd = alloc_flex_gd(flexbg_size);
+       if (flex_gd == NULL) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       /* Add flex groups. Note that a regular group is a
+        * flex group with 1 group.
+        */
+       while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
+                                             flexbg_size)) {
+               ext4_alloc_group_tables(sb, flex_gd, flexbg_size);
+               err = ext4_flex_group_add(sb, resize_inode, flex_gd);
+               if (unlikely(err))
+                       break;
+       }
+
+out:
+       if (flex_gd)
+               free_flex_gd(flex_gd);
+
+       iput(resize_inode);
+       if (test_opt(sb, DEBUG))
+               printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu "
+                      "upto %llu blocks\n", o_blocks_count, n_blocks_count);
+       return err;
+}
author	Yongqiang Yang <xiaoqiangnk@gmail.com>
	Wed, 4 Jan 2012 22:09:44 +0000 (17:09 -0500)
committer	Theodore Ts'o <tytso@mit.edu>
	Wed, 4 Jan 2012 22:09:44 +0000 (17:09 -0500)
Documentation/filesystems/ext4.txt		patch \| blob \| history
fs/ext4/ext4.h		patch \| blob \| history
fs/ext4/ioctl.c		patch \| blob \| history
fs/ext4/resize.c		patch \| blob \| history