ocfs2/xattr: Remove additional bucket allocation in bucket defragment.
[pandora-kernel.git] / fs / ocfs2 / xattr.c
index 71d9e7b..87cf39d 100644 (file)
@@ -61,7 +61,14 @@ struct ocfs2_xattr_def_value_root {
 };
 
 struct ocfs2_xattr_bucket {
+       /* The inode these xattrs are associated with */
+       struct inode *bu_inode;
+
+       /* The actual buffers that make up the bucket */
        struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
+
+       /* How many blocks make up one bucket for this filesystem */
+       int bu_blocks;
 };
 
 #define OCFS2_XATTR_ROOT_SIZE  (sizeof(struct ocfs2_xattr_def_value_root))
@@ -97,7 +104,7 @@ struct ocfs2_xattr_search {
         */
        struct buffer_head *xattr_bh;
        struct ocfs2_xattr_header *header;
-       struct ocfs2_xattr_bucket bucket;
+       struct ocfs2_xattr_bucket *bucket;
        void *base;
        void *end;
        struct ocfs2_xattr_entry *here;
@@ -157,69 +164,91 @@ static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
 
-static void ocfs2_xattr_bucket_relse(struct inode *inode,
-                                    struct ocfs2_xattr_bucket *bucket)
+static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
+{
+       struct ocfs2_xattr_bucket *bucket;
+       int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+
+       BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
+
+       bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
+       if (bucket) {
+               bucket->bu_inode = inode;
+               bucket->bu_blocks = blks;
+       }
+
+       return bucket;
+}
+
+static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
 {
-       int i, blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+       int i;
 
-       for (i = 0; i < blks; i++) {
+       for (i = 0; i < bucket->bu_blocks; i++) {
                brelse(bucket->bu_bhs[i]);
                bucket->bu_bhs[i] = NULL;
        }
 }
 
+static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
+{
+       if (bucket) {
+               ocfs2_xattr_bucket_relse(bucket);
+               bucket->bu_inode = NULL;
+               kfree(bucket);
+       }
+}
+
 /*
  * A bucket that has never been written to disk doesn't need to be
  * read.  We just need the buffer_heads.  Don't call this for
  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
  * them fully.
  */
-static int ocfs2_init_xattr_bucket(struct inode *inode,
-                                  struct ocfs2_xattr_bucket *bucket,
+static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
                                   u64 xb_blkno)
 {
        int i, rc = 0;
-       int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 
-       for (i = 0; i < blks; i++) {
-               bucket->bu_bhs[i] = sb_getblk(inode->i_sb, xb_blkno + i);
+       for (i = 0; i < bucket->bu_blocks; i++) {
+               bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
+                                             xb_blkno + i);
                if (!bucket->bu_bhs[i]) {
                        rc = -EIO;
                        mlog_errno(rc);
                        break;
                }
 
-               ocfs2_set_new_buffer_uptodate(inode, bucket->bu_bhs[i]);
+               ocfs2_set_new_buffer_uptodate(bucket->bu_inode,
+                                             bucket->bu_bhs[i]);
        }
 
        if (rc)
-               ocfs2_xattr_bucket_relse(inode, bucket);
+               ocfs2_xattr_bucket_relse(bucket);
        return rc;
 }
 
 /* Read the xattr bucket at xb_blkno */
-static int ocfs2_read_xattr_bucket(struct inode *inode,
-                                  struct ocfs2_xattr_bucket *bucket,
+static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
                                   u64 xb_blkno)
 {
-       int rc, blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+       int rc;
 
-       rc = ocfs2_read_blocks(inode, xb_blkno, blks, bucket->bu_bhs, 0);
+       rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno,
+                              bucket->bu_blocks, bucket->bu_bhs, 0);
        if (rc)
-               ocfs2_xattr_bucket_relse(inode, bucket);
+               ocfs2_xattr_bucket_relse(bucket);
        return rc;
 }
 
 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
-                                            struct inode *inode,
                                             struct ocfs2_xattr_bucket *bucket,
                                             int type)
 {
        int i, rc = 0;
-       int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 
-       for (i = 0; i < blks; i++) {
-               rc = ocfs2_journal_access(handle, inode,
+       for (i = 0; i < bucket->bu_blocks; i++) {
+               rc = ocfs2_journal_access(handle, bucket->bu_inode,
                                          bucket->bu_bhs[i], type);
                if (rc) {
                        mlog_errno(rc);
@@ -231,24 +260,24 @@ static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
 }
 
 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
-                                            struct inode *inode,
                                             struct ocfs2_xattr_bucket *bucket)
 {
-       int i, blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+       int i;
 
-       for (i = 0; i < blks; i++)
+       for (i = 0; i < bucket->bu_blocks; i++)
                ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
 }
 
-static void ocfs2_xattr_bucket_copy_data(struct inode *inode,
-                                        struct ocfs2_xattr_bucket *dest,
+static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
                                         struct ocfs2_xattr_bucket *src)
 {
        int i;
-       int blocksize = inode->i_sb->s_blocksize;
-       int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+       int blocksize = src->bu_inode->i_sb->s_blocksize;
+
+       BUG_ON(dest->bu_blocks != src->bu_blocks);
+       BUG_ON(dest->bu_inode != src->bu_inode);
 
-       for (i = 0; i < blks; i++) {
+       for (i = 0; i < src->bu_blocks; i++) {
                memcpy(bucket_block(dest, i), bucket_block(src, i),
                       blocksize);
        }
@@ -869,7 +898,12 @@ static int ocfs2_xattr_block_get(struct inode *inode,
        size_t size;
        int ret = -ENODATA, name_offset, name_len, block_off, i;
 
-       memset(&xs->bucket, 0, sizeof(xs->bucket));
+       xs->bucket = ocfs2_xattr_bucket_new(inode);
+       if (!xs->bucket) {
+               ret = -ENOMEM;
+               mlog_errno(ret);
+               goto cleanup;
+       }
 
        ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
        if (ret) {
@@ -895,11 +929,11 @@ static int ocfs2_xattr_block_get(struct inode *inode,
 
                if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
                        ret = ocfs2_xattr_bucket_get_name_value(inode,
-                                                               bucket_xh(&xs->bucket),
+                                                               bucket_xh(xs->bucket),
                                                                i,
                                                                &block_off,
                                                                &name_offset);
-                       xs->base = bucket_block(&xs->bucket, block_off);
+                       xs->base = bucket_block(xs->bucket, block_off);
                }
                if (ocfs2_xattr_is_local(xs->here)) {
                        memcpy(buffer, (void *)xs->base +
@@ -917,8 +951,7 @@ static int ocfs2_xattr_block_get(struct inode *inode,
        }
        ret = size;
 cleanup:
-       ocfs2_xattr_bucket_relse(inode, &xs->bucket);
-       memset(&xs->bucket, 0, sizeof(xs->bucket));
+       ocfs2_xattr_bucket_free(xs->bucket);
 
        brelse(xs->xattr_bh);
        xs->xattr_bh = NULL;
@@ -2047,10 +2080,20 @@ int ocfs2_xattr_set(struct inode *inode,
        if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
                return -EOPNOTSUPP;
 
+       /*
+        * Only xbs will be used on indexed trees.  xis doesn't need a
+        * bucket.
+        */
+       xbs.bucket = ocfs2_xattr_bucket_new(inode);
+       if (!xbs.bucket) {
+               mlog_errno(-ENOMEM);
+               return -ENOMEM;
+       }
+
        ret = ocfs2_inode_lock(inode, &di_bh, 1);
        if (ret < 0) {
                mlog_errno(ret);
-               return ret;
+               goto cleanup_nolock;
        }
        xis.inode_bh = xbs.inode_bh = di_bh;
        di = (struct ocfs2_dinode *)di_bh->b_data;
@@ -2127,9 +2170,10 @@ int ocfs2_xattr_set(struct inode *inode,
 cleanup:
        up_write(&OCFS2_I(inode)->ip_xattr_sem);
        ocfs2_inode_unlock(inode, 1);
+cleanup_nolock:
        brelse(di_bh);
        brelse(xbs.xattr_bh);
-       ocfs2_xattr_bucket_relse(inode, &xbs.bucket);
+       ocfs2_xattr_bucket_free(xbs.bucket);
 
        return ret;
 }
@@ -2204,7 +2248,7 @@ typedef int (xattr_bucket_func)(struct inode *inode,
                                void *para);
 
 static int ocfs2_find_xe_in_bucket(struct inode *inode,
-                                  struct buffer_head *header_bh,
+                                  struct ocfs2_xattr_bucket *bucket,
                                   int name_index,
                                   const char *name,
                                   u32 name_hash,
@@ -2212,11 +2256,9 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode,
                                   int *found)
 {
        int i, ret = 0, cmp = 1, block_off, new_offset;
-       struct ocfs2_xattr_header *xh =
-                       (struct ocfs2_xattr_header *)header_bh->b_data;
+       struct ocfs2_xattr_header *xh = bucket_xh(bucket);
        size_t name_len = strlen(name);
        struct ocfs2_xattr_entry *xe = NULL;
-       struct buffer_head *name_bh = NULL;
        char *xe_name;
 
        /*
@@ -2247,19 +2289,8 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode,
                        break;
                }
 
-               ret = ocfs2_read_block(inode, header_bh->b_blocknr + block_off,
-                                      &name_bh);
-               if (ret) {
-                       mlog_errno(ret);
-                       break;
-               }
-               xe_name = name_bh->b_data + new_offset;
-
-               cmp = memcmp(name, xe_name, name_len);
-               brelse(name_bh);
-               name_bh = NULL;
-
-               if (cmp == 0) {
+               xe_name = bucket_block(bucket, block_off) + new_offset;
+               if (!memcmp(name, xe_name, name_len)) {
                        *xe_index = i;
                        *found = 1;
                        ret = 0;
@@ -2289,39 +2320,42 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
                                   struct ocfs2_xattr_search *xs)
 {
        int ret, found = 0;
-       struct buffer_head *bh = NULL;
-       struct buffer_head *lower_bh = NULL;
        struct ocfs2_xattr_header *xh = NULL;
        struct ocfs2_xattr_entry *xe = NULL;
        u16 index = 0;
        u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
        int low_bucket = 0, bucket, high_bucket;
+       struct ocfs2_xattr_bucket *search;
        u32 last_hash;
-       u64 blkno;
+       u64 blkno, lower_blkno = 0;
+
+       search = ocfs2_xattr_bucket_new(inode);
+       if (!search) {
+               ret = -ENOMEM;
+               mlog_errno(ret);
+               goto out;
+       }
 
-       ret = ocfs2_read_block(inode, p_blkno, &bh);
+       ret = ocfs2_read_xattr_bucket(search, p_blkno);
        if (ret) {
                mlog_errno(ret);
                goto out;
        }
 
-       xh = (struct ocfs2_xattr_header *)bh->b_data;
+       xh = bucket_xh(search);
        high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
-
        while (low_bucket <= high_bucket) {
-               brelse(bh);
-               bh = NULL;
-               bucket = (low_bucket + high_bucket) / 2;
+               ocfs2_xattr_bucket_relse(search);
 
+               bucket = (low_bucket + high_bucket) / 2;
                blkno = p_blkno + bucket * blk_per_bucket;
-
-               ret = ocfs2_read_block(inode, blkno, &bh);
+               ret = ocfs2_read_xattr_bucket(search, blkno);
                if (ret) {
                        mlog_errno(ret);
                        goto out;
                }
 
-               xh = (struct ocfs2_xattr_header *)bh->b_data;
+               xh = bucket_xh(search);
                xe = &xh->xh_entries[0];
                if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
                        high_bucket = bucket - 1;
@@ -2338,10 +2372,8 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
 
                last_hash = le32_to_cpu(xe->xe_name_hash);
 
-               /* record lower_bh which may be the insert place. */
-               brelse(lower_bh);
-               lower_bh = bh;
-               bh = NULL;
+               /* record lower_blkno which may be the insert place. */
+               lower_blkno = blkno;
 
                if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
                        low_bucket = bucket + 1;
@@ -2349,7 +2381,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
                }
 
                /* the searched xattr should reside in this bucket if exists. */
-               ret = ocfs2_find_xe_in_bucket(inode, lower_bh,
+               ret = ocfs2_find_xe_in_bucket(inode, search,
                                              name_index, name, name_hash,
                                              &index, &found);
                if (ret) {
@@ -2364,44 +2396,29 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
         * When the xattr's hash value is in the gap of 2 buckets, we will
         * always set it to the previous bucket.
         */
-       if (!lower_bh) {
-               /*
-                * We can't find any bucket whose first name_hash is less
-                * than the find name_hash.
-                */
-               BUG_ON(bh->b_blocknr != p_blkno);
-               lower_bh = bh;
-               bh = NULL;
+       if (!lower_blkno)
+               lower_blkno = p_blkno;
+
+       /* This should be in cache - we just read it during the search */
+       ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
+       if (ret) {
+               mlog_errno(ret);
+               goto out;
        }
-       xs->bucket.bu_bhs[0] = lower_bh;
-       lower_bh = NULL;
 
-       xs->header = bucket_xh(&xs->bucket);
-       xs->base = bucket_block(&xs->bucket, 0);
+       xs->header = bucket_xh(xs->bucket);
+       xs->base = bucket_block(xs->bucket, 0);
        xs->end = xs->base + inode->i_sb->s_blocksize;
 
        if (found) {
-               /*
-                * If we have found the xattr enty, read all the blocks in
-                * this bucket.
-                */
-               ret = ocfs2_read_blocks(inode, bucket_blkno(&xs->bucket) + 1,
-                                       blk_per_bucket - 1, &xs->bucket.bu_bhs[1],
-                                       0);
-               if (ret) {
-                       mlog_errno(ret);
-                       goto out;
-               }
-
                xs->here = &xs->header->xh_entries[index];
                mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
-                    (unsigned long long)bucket_blkno(&xs->bucket), index);
+                    (unsigned long long)bucket_blkno(xs->bucket), index);
        } else
                ret = -ENODATA;
 
 out:
-       brelse(bh);
-       brelse(lower_bh);
+       ocfs2_xattr_bucket_free(search);
        return ret;
 }
 
@@ -2453,22 +2470,24 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
                                       void *para)
 {
        int i, ret = 0;
-       int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
        u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
        u32 num_buckets = clusters * bpc;
-       struct ocfs2_xattr_bucket bucket;
+       struct ocfs2_xattr_bucket *bucket;
 
-       memset(&bucket, 0, sizeof(bucket));
+       bucket = ocfs2_xattr_bucket_new(inode);
+       if (!bucket) {
+               mlog_errno(-ENOMEM);
+               return -ENOMEM;
+       }
 
        mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
             clusters, (unsigned long long)blkno);
 
-       for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) {
-               ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket,
-                                       bucket.bu_bhs, 0);
+       for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
+               ret = ocfs2_read_xattr_bucket(bucket, blkno);
                if (ret) {
                        mlog_errno(ret);
-                       goto out;
+                       break;
                }
 
                /*
@@ -2476,26 +2495,24 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
                 * in the 1st bucket.
                 */
                if (i == 0)
-                       num_buckets = le16_to_cpu(bucket_xh(&bucket)->xh_num_buckets);
+                       num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
 
                mlog(0, "iterating xattr bucket %llu, first hash %u\n",
                     (unsigned long long)blkno,
-                    le32_to_cpu(bucket_xh(&bucket)->xh_entries[0].xe_name_hash));
+                    le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
                if (func) {
-                       ret = func(inode, &bucket, para);
-                       if (ret) {
+                       ret = func(inode, bucket, para);
+                       if (ret)
                                mlog_errno(ret);
-                               break;
-                       }
+                       /* Fall through to bucket_relse() */
                }
 
-               ocfs2_xattr_bucket_relse(inode, &bucket);
-               memset(&bucket, 0, sizeof(bucket));
+               ocfs2_xattr_bucket_relse(bucket);
+               if (ret)
+                       break;
        }
 
-out:
-       ocfs2_xattr_bucket_relse(inode, &bucket);
-
+       ocfs2_xattr_bucket_free(bucket);
        return ret;
 }
 
@@ -2632,32 +2649,34 @@ static void swap_xe(void *a, void *b, int size)
 /*
  * When the ocfs2_xattr_block is filled up, new bucket will be created
  * and all the xattr entries will be moved to the new bucket.
+ * The header goes at the start of the bucket, and the names+values are
+ * filled from the end.  This is why *target starts as the last buffer.
  * Note: we need to sort the entries since they are not saved in order
  * in the ocfs2_xattr_block.
  */
 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
                                           struct buffer_head *xb_bh,
-                                          struct buffer_head *xh_bh,
-                                          struct buffer_head *data_bh)
+                                          struct ocfs2_xattr_bucket *bucket)
 {
        int i, blocksize = inode->i_sb->s_blocksize;
+       int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
        u16 offset, size, off_change;
        struct ocfs2_xattr_entry *xe;
        struct ocfs2_xattr_block *xb =
                                (struct ocfs2_xattr_block *)xb_bh->b_data;
        struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
-       struct ocfs2_xattr_header *xh =
-                               (struct ocfs2_xattr_header *)xh_bh->b_data;
+       struct ocfs2_xattr_header *xh = bucket_xh(bucket);
        u16 count = le16_to_cpu(xb_xh->xh_count);
-       char *target = xh_bh->b_data, *src = xb_bh->b_data;
+       char *src = xb_bh->b_data;
+       char *target = bucket_block(bucket, blks - 1);
 
        mlog(0, "cp xattr from block %llu to bucket %llu\n",
             (unsigned long long)xb_bh->b_blocknr,
-            (unsigned long long)xh_bh->b_blocknr);
+            (unsigned long long)bucket_blkno(bucket));
+
+       for (i = 0; i < blks; i++)
+               memset(bucket_block(bucket, i), 0, blocksize);
 
-       memset(xh_bh->b_data, 0, blocksize);
-       if (data_bh)
-               memset(data_bh->b_data, 0, blocksize);
        /*
         * Since the xe_name_offset is based on ocfs2_xattr_header,
         * there is a offset change corresponding to the change of
@@ -2669,8 +2688,6 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
        size = blocksize - offset;
 
        /* copy all the names and values. */
-       if (data_bh)
-               target = data_bh->b_data;
        memcpy(target + offset, src + offset, size);
 
        /* Init new header now. */
@@ -2680,7 +2697,7 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
        xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
 
        /* copy all the entries. */
-       target = xh_bh->b_data;
+       target = bucket_block(bucket, 0);
        offset = offsetof(struct ocfs2_xattr_header, xh_entries);
        size = count * sizeof(struct ocfs2_xattr_entry);
        memcpy(target + offset, (char *)xb_xh + offset, size);
@@ -2706,42 +2723,24 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
  * While if the entry is in index b-tree, "bucket" indicates the
  * real place of the xattr.
  */
-static int ocfs2_xattr_update_xattr_search(struct inode *inode,
-                                          struct ocfs2_xattr_search *xs,
-                                          struct buffer_head *old_bh,
-                                          struct buffer_head *new_bh)
+static void ocfs2_xattr_update_xattr_search(struct inode *inode,
+                                           struct ocfs2_xattr_search *xs,
+                                           struct buffer_head *old_bh)
 {
-       int ret = 0;
        char *buf = old_bh->b_data;
        struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
        struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
-       int i, blocksize = inode->i_sb->s_blocksize;
-       u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
-
-       xs->bucket.bu_bhs[0] = new_bh;
-       get_bh(new_bh);
-       xs->header = bucket_xh(&xs->bucket);
+       int i;
 
-       xs->base = new_bh->b_data;
+       xs->header = bucket_xh(xs->bucket);
+       xs->base = bucket_block(xs->bucket, 0);
        xs->end = xs->base + inode->i_sb->s_blocksize;
 
-       if (!xs->not_found) {
-               if (OCFS2_XATTR_BUCKET_SIZE != blocksize) {
-                       ret = ocfs2_read_blocks(inode,
-                                       bucket_blkno(&xs->bucket) + 1,
-                                       blk_per_bucket - 1, &xs->bucket.bu_bhs[1],
-                                       0);
-                       if (ret) {
-                               mlog_errno(ret);
-                               return ret;
-                       }
-
-               }
-               i = xs->here - old_xh->xh_entries;
-               xs->here = &xs->header->xh_entries[i];
-       }
+       if (xs->not_found)
+               return;
 
-       return ret;
+       i = xs->here - old_xh->xh_entries;
+       xs->here = &xs->header->xh_entries[i];
 }
 
 static int ocfs2_xattr_create_index_block(struct inode *inode,
@@ -2754,18 +2753,17 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
        struct ocfs2_inode_info *oi = OCFS2_I(inode);
        struct ocfs2_alloc_context *data_ac;
-       struct buffer_head *xh_bh = NULL, *data_bh = NULL;
        struct buffer_head *xb_bh = xs->xattr_bh;
        struct ocfs2_xattr_block *xb =
                        (struct ocfs2_xattr_block *)xb_bh->b_data;
        struct ocfs2_xattr_tree_root *xr;
        u16 xb_flags = le16_to_cpu(xb->xb_flags);
-       u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 
        mlog(0, "create xattr index block for %llu\n",
             (unsigned long long)xb_bh->b_blocknr);
 
        BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
+       BUG_ON(!xs->bucket);
 
        ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
        if (ret) {
@@ -2781,10 +2779,10 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
        down_write(&oi->ip_alloc_sem);
 
        /*
-        * 3 more credits, one for xattr block update, one for the 1st block
-        * of the new xattr bucket and one for the value/data.
+        * We need more credits.  One for the xattr block update and one
+        * for each block of the new xattr bucket.
         */
-       credits += 3;
+       credits += 1 + ocfs2_blocks_per_xattr_bucket(inode->i_sb);
        handle = ocfs2_start_trans(osb, credits);
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
@@ -2815,51 +2813,23 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
        mlog(0, "allocate 1 cluster from %llu to xattr block\n",
             (unsigned long long)blkno);
 
-       xh_bh = sb_getblk(inode->i_sb, blkno);
-       if (!xh_bh) {
-               ret = -EIO;
+       ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
+       if (ret) {
                mlog_errno(ret);
                goto out_commit;
        }
 
-       ocfs2_set_new_buffer_uptodate(inode, xh_bh);
-
-       ret = ocfs2_journal_access(handle, inode, xh_bh,
-                                  OCFS2_JOURNAL_ACCESS_CREATE);
+       ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
+                                               OCFS2_JOURNAL_ACCESS_CREATE);
        if (ret) {
                mlog_errno(ret);
                goto out_commit;
        }
 
-       if (bpb > 1) {
-               data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1);
-               if (!data_bh) {
-                       ret = -EIO;
-                       mlog_errno(ret);
-                       goto out_commit;
-               }
-
-               ocfs2_set_new_buffer_uptodate(inode, data_bh);
+       ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
+       ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
 
-               ret = ocfs2_journal_access(handle, inode, data_bh,
-                                          OCFS2_JOURNAL_ACCESS_CREATE);
-               if (ret) {
-                       mlog_errno(ret);
-                       goto out_commit;
-               }
-       }
-
-       ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh);
-
-       ocfs2_journal_dirty(handle, xh_bh);
-       if (data_bh)
-               ocfs2_journal_dirty(handle, data_bh);
-
-       ret = ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh);
-       if (ret) {
-               mlog_errno(ret);
-               goto out_commit;
-       }
+       ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
 
        /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
        memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
@@ -2894,9 +2864,6 @@ out:
        if (data_ac)
                ocfs2_free_alloc_context(data_ac);
 
-       brelse(xh_bh);
-       brelse(data_bh);
-
        return ret;
 }
 
@@ -2927,22 +2894,11 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
        struct ocfs2_xattr_header *xh;
        char *entries, *buf, *bucket_buf = NULL;
        u64 blkno = bucket_blkno(bucket);
-       u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
        u16 xh_free_start;
        size_t blocksize = inode->i_sb->s_blocksize;
        handle_t *handle;
-       struct buffer_head **bhs;
        struct ocfs2_xattr_entry *xe;
 
-       bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
-                       GFP_NOFS);
-       if (!bhs)
-               return -ENOMEM;
-
-       ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, bhs, 0);
-       if (ret)
-               goto out;
-
        /*
         * In order to make the operation more efficient and generic,
         * we copy all the blocks into a contiguous memory and do the
@@ -2956,10 +2912,10 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
        }
 
        buf = bucket_buf;
-       for (i = 0; i < blk_per_bucket; i++, buf += blocksize)
-               memcpy(buf, bhs[i]->b_data, blocksize);
+       for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
+               memcpy(buf, bucket_block(bucket, i), blocksize);
 
-       handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket);
+       handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), bucket->bu_blocks);
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
                handle = NULL;
@@ -2967,13 +2923,11 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
                goto out;
        }
 
-       for (i = 0; i < blk_per_bucket; i++) {
-               ret = ocfs2_journal_access(handle, inode, bhs[i],
-                                          OCFS2_JOURNAL_ACCESS_WRITE);
-               if (ret < 0) {
-                       mlog_errno(ret);
-                       goto commit;
-               }
+       ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
+                                               OCFS2_JOURNAL_ACCESS_WRITE);
+       if (ret < 0) {
+               mlog_errno(ret);
+               goto commit;
        }
 
        xh = (struct ocfs2_xattr_header *)bucket_buf;
@@ -3042,21 +2996,13 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
             cmp_xe, swap_xe);
 
        buf = bucket_buf;
-       for (i = 0; i < blk_per_bucket; i++, buf += blocksize) {
-               memcpy(bhs[i]->b_data, buf, blocksize);
-               ocfs2_journal_dirty(handle, bhs[i]);
-       }
+       for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
+               memcpy(bucket_block(bucket, i), buf, blocksize);
+       ocfs2_xattr_bucket_journal_dirty(handle, bucket);
 
 commit:
        ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
 out:
-
-       if (bhs) {
-               for (i = 0; i < blk_per_bucket; i++)
-                       brelse(bhs[i]);
-       }
-       kfree(bhs);
-
        kfree(bucket_buf);
        return ret;
 }
@@ -3244,8 +3190,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
 {
        int ret, i;
        int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
-       u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
-       struct ocfs2_xattr_bucket s_bucket, t_bucket;
+       struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
        struct ocfs2_xattr_header *xh;
        struct ocfs2_xattr_entry *xe;
        int blocksize = inode->i_sb->s_blocksize;
@@ -3253,16 +3198,21 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
        mlog(0, "move some of xattrs from bucket %llu to %llu\n",
             (unsigned long long)blk, (unsigned long long)new_blk);
 
-       memset(&s_bucket, 0, sizeof(struct ocfs2_xattr_bucket));
-       memset(&t_bucket, 0, sizeof(struct ocfs2_xattr_bucket));
+       s_bucket = ocfs2_xattr_bucket_new(inode);
+       t_bucket = ocfs2_xattr_bucket_new(inode);
+       if (!s_bucket || !t_bucket) {
+               ret = -ENOMEM;
+               mlog_errno(ret);
+               goto out;
+       }
 
-       ret = ocfs2_read_xattr_bucket(inode, &s_bucket, blk);
+       ret = ocfs2_read_xattr_bucket(s_bucket, blk);
        if (ret) {
                mlog_errno(ret);
                goto out;
        }
 
-       ret = ocfs2_xattr_bucket_journal_access(handle, inode, &s_bucket,
+       ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
                                                OCFS2_JOURNAL_ACCESS_WRITE);
        if (ret) {
                mlog_errno(ret);
@@ -3273,13 +3223,13 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
         * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
         * there's no need to read it.
         */
-       ret = ocfs2_init_xattr_bucket(inode, &t_bucket, new_blk);
+       ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
        if (ret) {
                mlog_errno(ret);
                goto out;
        }
 
-       ret = ocfs2_xattr_bucket_journal_access(handle, inode, &t_bucket,
+       ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
                                                new_bucket_head ?
                                                OCFS2_JOURNAL_ACCESS_CREATE :
                                                OCFS2_JOURNAL_ACCESS_WRITE);
@@ -3288,7 +3238,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
                goto out;
        }
 
-       xh = bucket_xh(&s_bucket);
+       xh = bucket_xh(s_bucket);
        count = le16_to_cpu(xh->xh_count);
        start = ocfs2_xattr_find_divide_pos(xh);
 
@@ -3300,10 +3250,10 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
                 * The hash value is set as one larger than
                 * that of the last entry in the previous bucket.
                 */
-               for (i = 0; i < blk_per_bucket; i++)
-                       memset(bucket_block(&t_bucket, i), 0, blocksize);
+               for (i = 0; i < t_bucket->bu_blocks; i++)
+                       memset(bucket_block(t_bucket, i), 0, blocksize);
 
-               xh = bucket_xh(&t_bucket);
+               xh = bucket_xh(t_bucket);
                xh->xh_free_start = cpu_to_le16(blocksize);
                xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
                le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
@@ -3312,10 +3262,10 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
        }
 
        /* copy the whole bucket to the new first. */
-       ocfs2_xattr_bucket_copy_data(inode, &t_bucket, &s_bucket);
+       ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
 
        /* update the new bucket. */
-       xh = bucket_xh(&t_bucket);
+       xh = bucket_xh(t_bucket);
 
        /*
         * Calculate the total name/value len and xh_free_start for
@@ -3379,7 +3329,7 @@ set_num_buckets:
        else
                xh->xh_num_buckets = 0;
 
-       ocfs2_xattr_bucket_journal_dirty(handle, inode, &t_bucket);
+       ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
 
        /* store the first_hash of the new bucket. */
        if (first_hash)
@@ -3393,18 +3343,18 @@ set_num_buckets:
        if (start == count)
                goto out;
 
-       xh = bucket_xh(&s_bucket);
+       xh = bucket_xh(s_bucket);
        memset(&xh->xh_entries[start], 0,
               sizeof(struct ocfs2_xattr_entry) * (count - start));
        xh->xh_count = cpu_to_le16(start);
        xh->xh_free_start = cpu_to_le16(name_offset);
        xh->xh_name_value_len = cpu_to_le16(name_value_len);
 
-       ocfs2_xattr_bucket_journal_dirty(handle, inode, &s_bucket);
+       ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
 
 out:
-       ocfs2_xattr_bucket_relse(inode, &s_bucket);
-       ocfs2_xattr_bucket_relse(inode, &t_bucket);
+       ocfs2_xattr_bucket_free(s_bucket);
+       ocfs2_xattr_bucket_free(t_bucket);
 
        return ret;
 }
@@ -3422,7 +3372,7 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
                                 int t_is_new)
 {
        int ret;
-       struct ocfs2_xattr_bucket s_bucket, t_bucket;
+       struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
 
        BUG_ON(s_blkno == t_blkno);
 
@@ -3430,10 +3380,15 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
             (unsigned long long)s_blkno, (unsigned long long)t_blkno,
             t_is_new);
 
-       memset(&s_bucket, 0, sizeof(struct ocfs2_xattr_bucket));
-       memset(&t_bucket, 0, sizeof(struct ocfs2_xattr_bucket));
-
-       ret = ocfs2_read_xattr_bucket(inode, &s_bucket, s_blkno);
+       s_bucket = ocfs2_xattr_bucket_new(inode);
+       t_bucket = ocfs2_xattr_bucket_new(inode);
+       if (!s_bucket || !t_bucket) {
+               ret = -ENOMEM;
+               mlog_errno(ret);
+               goto out;
+       }
+  
+       ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
        if (ret)
                goto out;
 
@@ -3441,23 +3396,23 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
         * Even if !t_is_new, we're overwriting t_bucket.  Thus,
         * there's no need to read it.
         */
-       ret = ocfs2_init_xattr_bucket(inode, &t_bucket, t_blkno);
+       ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
        if (ret)
                goto out;
 
-       ret = ocfs2_xattr_bucket_journal_access(handle, inode, &t_bucket,
+       ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
                                                t_is_new ?
                                                OCFS2_JOURNAL_ACCESS_CREATE :
                                                OCFS2_JOURNAL_ACCESS_WRITE);
        if (ret)
                goto out;
 
-       ocfs2_xattr_bucket_copy_data(inode, &t_bucket, &s_bucket);
-       ocfs2_xattr_bucket_journal_dirty(handle, inode, &t_bucket);
+       ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
+       ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
 
 out:
-       ocfs2_xattr_bucket_relse(inode, &s_bucket);
-       ocfs2_xattr_bucket_relse(inode, &t_bucket);
+       ocfs2_xattr_bucket_free(t_bucket);
+       ocfs2_xattr_bucket_free(s_bucket);
 
        return ret;
 }
@@ -4009,7 +3964,7 @@ static void ocfs2_xattr_set_entry_normal(struct inode *inode,
                                xe->xe_value_size = 0;
 
                        val = ocfs2_xattr_bucket_get_val(inode,
-                                                        &xs->bucket, offs);
+                                                        xs->bucket, offs);
                        memset(val + OCFS2_XATTR_SIZE(name_len), 0,
                               size - OCFS2_XATTR_SIZE(name_len));
                        if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
@@ -4087,8 +4042,7 @@ set_new_name_value:
                xh->xh_free_start = cpu_to_le16(offs);
        }
 
-       val = ocfs2_xattr_bucket_get_val(inode,
-                                        &xs->bucket, offs - size);
+       val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size);
        xe->xe_name_offset = cpu_to_le16(offs - size);
 
        memset(val, 0, size);
@@ -4117,25 +4071,24 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
 {
        int ret;
        handle_t *handle = NULL;
-       u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+       u64 blkno;
 
        mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
             (unsigned long)xi->value_len, xi->name_index,
-            (unsigned long long)bucket_blkno(&xs->bucket));
+            (unsigned long long)bucket_blkno(xs->bucket));
 
-       if (!xs->bucket.bu_bhs[1]) {
-               ret = ocfs2_read_blocks(inode,
-                                       bucket_blkno(&xs->bucket) + 1,
-                                       blk_per_bucket - 1, &xs->bucket.bu_bhs[1],
-                                       0);
+       if (!xs->bucket->bu_bhs[1]) {
+               blkno = bucket_blkno(xs->bucket);
+               ocfs2_xattr_bucket_relse(xs->bucket);
+               ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
                if (ret) {
                        mlog_errno(ret);
                        goto out;
                }
        }
 
-       handle = ocfs2_start_trans(osb, blk_per_bucket);
+       handle = ocfs2_start_trans(osb, xs->bucket->bu_blocks);
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
                handle = NULL;
@@ -4143,7 +4096,7 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
                goto out;
        }
 
-       ret = ocfs2_xattr_bucket_journal_access(handle, inode, &xs->bucket,
+       ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
                                                OCFS2_JOURNAL_ACCESS_WRITE);
        if (ret < 0) {
                mlog_errno(ret);
@@ -4151,7 +4104,7 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
        }
 
        ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
-       ocfs2_xattr_bucket_journal_dirty(handle, inode, &xs->bucket);
+       ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
 
 out:
        ocfs2_commit_trans(osb, handle);
@@ -4264,10 +4217,10 @@ static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
        struct ocfs2_xattr_entry *xe = xs->here;
        struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
 
-       BUG_ON(!xs->bucket.bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
+       BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
 
        offset = xe - xh->xh_entries;
-       ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bu_bhs[0],
+       ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket->bu_bhs[0],
                                                offset, len);
        if (ret)
                mlog_errno(ret);
@@ -4387,7 +4340,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
                                         struct ocfs2_xattr_search *xs)
 {
        handle_t *handle = NULL;
-       struct ocfs2_xattr_header *xh = bucket_xh(&xs->bucket);
+       struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
        struct ocfs2_xattr_entry *last = &xh->xh_entries[
                                                le16_to_cpu(xh->xh_count) - 1];
        int ret = 0;
@@ -4400,7 +4353,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
                return;
        }
 
-       ret = ocfs2_xattr_bucket_journal_access(handle, inode, &xs->bucket,
+       ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
                                                OCFS2_JOURNAL_ACCESS_WRITE);
        if (ret) {
                mlog_errno(ret);
@@ -4413,7 +4366,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
        memset(last, 0, sizeof(struct ocfs2_xattr_entry));
        le16_add_cpu(&xh->xh_count, -1);
 
-       ocfs2_xattr_bucket_journal_dirty(handle, inode, &xs->bucket);
+       ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
 
 out_commit:
        ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
@@ -4565,7 +4518,7 @@ try_again:
 
        mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
                        "of %u which exceed block size\n",
-                       (unsigned long long)bucket_blkno(&xs->bucket),
+                       (unsigned long long)bucket_blkno(xs->bucket),
                        header_size);
 
        if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
@@ -4605,7 +4558,7 @@ try_again:
        mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
             "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
             " %u\n", xs->not_found,
-            (unsigned long long)bucket_blkno(&xs->bucket),
+            (unsigned long long)bucket_blkno(xs->bucket),
             free, need, max_free, le16_to_cpu(xh->xh_free_start),
             le16_to_cpu(xh->xh_name_value_len));
 
@@ -4617,7 +4570,7 @@ try_again:
                         * name/value will be moved, the xe shouldn't be changed
                         * in xs.
                         */
-                       ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket);
+                       ret = ocfs2_defrag_xattr_bucket(inode, xs->bucket);
                        if (ret) {
                                mlog_errno(ret);
                                goto out;
@@ -4649,7 +4602,7 @@ try_again:
                 * add a new bucket for the insert.
                 */
                ret = ocfs2_check_xattr_bucket_collision(inode,
-                                                        &xs->bucket,
+                                                        xs->bucket,
                                                         xi->name);
                if (ret) {
                        mlog_errno(ret);
@@ -4658,14 +4611,13 @@ try_again:
 
                ret = ocfs2_add_new_xattr_bucket(inode,
                                                 xs->xattr_bh,
-                                                xs->bucket.bu_bhs[0]);
+                                                xs->bucket->bu_bhs[0]);
                if (ret) {
                        mlog_errno(ret);
                        goto out;
                }
 
-               ocfs2_xattr_bucket_relse(inode, &xs->bucket);
-               memset(&xs->bucket, 0, sizeof(xs->bucket));
+               ocfs2_xattr_bucket_relse(xs->bucket);
 
                ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
                                                   xi->name_index,