[GFS2] Reverse block order in build_height
authorSteven Whitehouse <swhiteho@redhat.com>
Fri, 12 May 2006 16:09:15 +0000 (12:09 -0400)
committerSteven Whitehouse <swhiteho@redhat.com>
Fri, 12 May 2006 16:09:15 +0000 (12:09 -0400)
The original code ordered the blocks allocated in the build_height
routine backwards causing excessive disk seeks during a read of the
metadata. This patch reverses the order to try and reduce disk seeks.

Example: A five level metadata tree, I = Inode, P = Pointers, D = Data

You need to read the blocks in the order:

I P5 P4 P3 P2 P1 D

in order to read a single data block. The new code now orders the blocks
in this way. The old code used to order them as:

I P1 P2 P3 P4 P5 D

requiring two extra seeks on average. Note that for files which are
grown by gradual extension rather than by truncate or by llseek/write
at a large offset, this doesn't apply. In the case of writing to a
file linearly, this routine will only be called upon to extend the
height of the tree by one block at a time, so the ordering is
determined by when its called rather than by the internals of the
routine itself. Optimising that part of the ordering is a much
harder problem.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
fs/gfs2/bmap.c

index 474b9a1..31c3e92 100644 (file)
@@ -164,72 +164,62 @@ static unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size)
  * @ip: The GFS2 inode
  * @height: The height to build to
  *
- * This routine makes sure that the metadata tree is tall enough to hold
- * "size" bytes of data.
  *
  * Returns: errno
  */
 
-static int build_height(struct gfs2_inode *ip, int height)
+static int build_height(struct inode *inode, unsigned height)
 {
-       struct gfs2_sbd *sdp = ip->i_sbd;
-       struct buffer_head *bh, *dibh;
-       uint64_t block = 0, *bp;
-       unsigned int x;
-       int new_block;
+       struct gfs2_inode *ip = inode->u.generic_ip;
+       unsigned new_height = height - ip->i_di.di_height;
+       struct buffer_head *dibh;
+       struct buffer_head *blocks[GFS2_MAX_META_HEIGHT];
        int error;
+       u64 *bp;
+       u64 bn;
+       unsigned n;
 
-       while (ip->i_di.di_height < height) {
-               error = gfs2_meta_inode_buffer(ip, &dibh);
-               if (error)
-                       return error;
-
-               new_block = 0;
-               bp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode));
-               for (x = 0; x < sdp->sd_diptrs; x++, bp++)
-                       if (*bp) {
-                               new_block = 1;
-                               break;
-                       }
-
-               if (new_block) {
-                       /* Get a new block, fill it with the old direct
-                          pointers, and write it out */
+       if (height <= ip->i_di.di_height)
+               return 0;
 
-                       block = gfs2_alloc_meta(ip);
+       error = gfs2_meta_inode_buffer(ip, &dibh);
+       if (error)
+               return error;
 
-                       bh = gfs2_meta_new(ip->i_gl, block);
-                       gfs2_trans_add_bh(ip->i_gl, bh, 1);
-                       gfs2_metatype_set(bh,
-                                         GFS2_METATYPE_IN,
+       for(n = 0; n < new_height; n++) {
+               bn = gfs2_alloc_meta(ip);
+               blocks[n] = gfs2_meta_new(ip->i_gl, bn);
+               gfs2_trans_add_bh(ip->i_gl, blocks[n], 1);
+       }
+       
+       n = 0;
+       bn = blocks[0]->b_blocknr;
+       if (new_height > 1) {
+               for(; n < new_height-1; n++) {
+                       gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN,
                                          GFS2_FORMAT_IN);
-                       gfs2_buffer_copy_tail(bh,
-                                             sizeof(struct gfs2_meta_header),
-                                             dibh, sizeof(struct gfs2_dinode));
-
-                       brelse(bh);
-               }
-
-               /*  Set up the new direct pointer and write it out to disk  */
-
-               gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-
-               gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
-
-               if (new_block) {
-                       *(uint64_t *)(dibh->b_data +
-                                     sizeof(struct gfs2_dinode)) =
-                                     cpu_to_be64(block);
-                       ip->i_di.di_blocks++;
+                       gfs2_buffer_clear_tail(blocks[n],
+                                              sizeof(struct gfs2_meta_header));
+                       bp = (u64 *)(blocks[n]->b_data +
+                                    sizeof(struct gfs2_meta_header));
+                       *bp = cpu_to_be64(blocks[n+1]->b_blocknr);
+                       brelse(blocks[n]);
+                       blocks[n] = NULL;
                }
-
-               ip->i_di.di_height++;
-
-               gfs2_dinode_out(&ip->i_di, dibh->b_data);
-               brelse(dibh);
        }
-
-       return 0;
+       gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
+       gfs2_buffer_copy_tail(blocks[n], sizeof(struct gfs2_meta_header),
+                             dibh, sizeof(struct gfs2_dinode));
+       brelse(blocks[n]);
+       gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+       gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
+       bp = (u64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
+       *bp = cpu_to_be64(bn);
+       ip->i_di.di_height += new_height;
+       ip->i_di.di_blocks += new_height;
+       gfs2_dinode_out(&ip->i_di, dibh->b_data);
+       brelse(dibh);
+       return error;
 }
 
 /**
@@ -416,7 +406,7 @@ static struct buffer_head *gfs2_block_pointers(struct inode *inode, u64 lblock,
                if (!create)
                        goto out;
 
-               error = build_height(ip, height);
+               error = build_height(inode, height);
                if (error)
                        goto out;
        }
@@ -806,7 +796,7 @@ static int do_grow(struct gfs2_inode *ip, uint64_t size)
                h = calc_tree_height(ip, size);
                if (ip->i_di.di_height < h) {
                        down_write(&ip->i_rw_mutex);
-                       error = build_height(ip, h);
+                       error = build_height(ip->i_vnode, h);
                        up_write(&ip->i_rw_mutex);
                        if (error)
                                goto out_end_trans;