GFS2: Fix inode deallocation race
[pandora-kernel.git] / fs / gfs2 / rgrp.c
index bef3ab6..33c8407 100644 (file)
@@ -963,17 +963,18 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
  *          The inode, if one has been found, in inode.
  */
 
-static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked,
-                          u64 skip)
+static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
 {
        u32 goal = 0, block;
        u64 no_addr;
        struct gfs2_sbd *sdp = rgd->rd_sbd;
        unsigned int n;
+       struct gfs2_glock *gl;
+       struct gfs2_inode *ip;
+       int error;
+       int found = 0;
 
-       for(;;) {
-               if (goal >= rgd->rd_data)
-                       break;
+       while (goal < rgd->rd_data) {
                down_write(&sdp->sd_log_flush_lock);
                n = 1;
                block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
@@ -990,11 +991,32 @@ static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked,
                if (no_addr == skip)
                        continue;
                *last_unlinked = no_addr;
-               return no_addr;
+
+               error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl);
+               if (error)
+                       continue;
+
+               /* If the inode is already in cache, we can ignore it here
+                * because the existing inode disposal code will deal with
+                * it when all refs have gone away. Accessing gl_object like
+                * this is not safe in general. Here it is ok because we do
+                * not dereference the pointer, and we only need an approx
+                * answer to whether it is NULL or not.
+                */
+               ip = gl->gl_object;
+
+               if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
+                       gfs2_glock_put(gl);
+               else
+                       found++;
+
+               /* Limit reclaim to sensible number of tasks */
+               if (found > 2*NR_CPUS)
+                       return;
        }
 
        rgd->rd_flags &= ~GFS2_RDF_CHECK;
-       return 0;
+       return;
 }
 
 /**
@@ -1075,11 +1097,9 @@ static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
  * Try to acquire rgrp in way which avoids contending with others.
  *
  * Returns: errno
- *          unlinked: the block address of an unlinked block to be reclaimed
  */
 
-static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
-                         u64 *last_unlinked)
+static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_rgrpd *rgd, *begin = NULL;
@@ -1089,7 +1109,6 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
        int loops = 0;
        int error, rg_locked;
 
-       *unlinked = 0;
        rgd = gfs2_blk2rgrpd(sdp, ip->i_goal);
 
        while (rgd) {
@@ -1106,17 +1125,10 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
                case 0:
                        if (try_rgrp_fit(rgd, al))
                                goto out;
-                       /* If the rg came in already locked, there's no
-                          way we can recover from a failed try_rgrp_unlink
-                          because that would require an iput which can only
-                          happen after the rgrp is unlocked. */
-                       if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK)
-                               *unlinked = try_rgrp_unlink(rgd, last_unlinked,
-                                                          ip->i_no_addr);
+                       if (rgd->rd_flags & GFS2_RDF_CHECK)
+                               try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
                        if (!rg_locked)
                                gfs2_glock_dq_uninit(&al->al_rgd_gh);
-                       if (*unlinked)
-                               return -EAGAIN;
                        /* fall through */
                case GLR_TRYFAILED:
                        rgd = recent_rgrp_next(rgd);
@@ -1145,13 +1157,10 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
                case 0:
                        if (try_rgrp_fit(rgd, al))
                                goto out;
-                       if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK)
-                               *unlinked = try_rgrp_unlink(rgd, last_unlinked,
-                                                           ip->i_no_addr);
+                       if (rgd->rd_flags & GFS2_RDF_CHECK)
+                               try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
                        if (!rg_locked)
                                gfs2_glock_dq_uninit(&al->al_rgd_gh);
-                       if (*unlinked)
-                               return -EAGAIN;
                        break;
 
                case GLR_TRYFAILED:
@@ -1204,12 +1213,12 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_alloc *al = ip->i_alloc;
        int error = 0;
-       u64 last_unlinked = NO_BLOCK, unlinked;
+       u64 last_unlinked = NO_BLOCK;
+       int tries = 0;
 
        if (gfs2_assert_warn(sdp, al->al_requested))
                return -EINVAL;
 
-try_again:
        if (hold_rindex) {
                /* We need to hold the rindex unless the inode we're using is
                   the rindex itself, in which case it's already held. */
@@ -1218,31 +1227,23 @@ try_again:
                else if (!sdp->sd_rgrps) /* We may not have the rindex read
                                            in, so: */
                        error = gfs2_ri_update_special(ip);
+               if (error)
+                       return error;
        }
 
-       if (error)
-               return error;
+       do {
+               error = get_local_rgrp(ip, &last_unlinked);
+               /* If there is no space, flushing the log may release some */
+               if (error)
+                       gfs2_log_flush(sdp, NULL);
+       } while (error && tries++ < 3);
 
-       /* Find an rgrp suitable for allocation.  If it encounters any unlinked
-          dinodes along the way, error will equal -EAGAIN and unlinked will
-          contains it block address. We then need to look up that inode and
-          try to free it, and try the allocation again. */
-       error = get_local_rgrp(ip, &unlinked, &last_unlinked);
        if (error) {
                if (hold_rindex && ip != GFS2_I(sdp->sd_rindex))
                        gfs2_glock_dq_uninit(&al->al_ri_gh);
-               if (error != -EAGAIN)
-                       return error;
-
-               gfs2_process_unlinked_inode(ip->i_inode.i_sb, unlinked);
-               /* regardless of whether or not gfs2_process_unlinked_inode
-                  was successful, we don't want to repeat it again. */
-               last_unlinked = unlinked;
-               gfs2_log_flush(sdp, NULL);
-               error = 0;
-
-               goto try_again;
+               return error;
        }
+
        /* no error, so we have the rgrp set in the inode's allocation. */
        al->al_file = file;
        al->al_line = line;