ocfs2: abstract out allocation locking
[pandora-kernel.git] / fs / ocfs2 / file.c
index f2cd3bf..3bcf362 100644 (file)
@@ -344,18 +344,6 @@ static int ocfs2_truncate_file(struct inode *inode,
        }
        ocfs2_data_unlock(inode, 1);
 
-       if (le32_to_cpu(fe->i_clusters) ==
-           ocfs2_clusters_for_bytes(osb->sb, new_i_size)) {
-               mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n",
-                    fe->i_clusters);
-               /* No allocation change is required, so lets fast path
-                * this truncate. */
-               status = ocfs2_simple_size_update(inode, di_bh, new_i_size);
-               if (status < 0)
-                       mlog_errno(status);
-               goto bail;
-       }
-
        /* alright, we're going to need to do a full blown alloc size
         * change. Orphan the inode so that recovery can complete the
         * truncate if necessary. This does the task of marking
@@ -397,6 +385,7 @@ bail:
  */
 int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
                               struct inode *inode,
+                              u32 *logical_offset,
                               u32 clusters_to_add,
                               struct buffer_head *fe_bh,
                               handle_t *handle,
@@ -460,18 +449,14 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
        block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
        mlog(0, "Allocating %u clusters at block %u for inode %llu\n",
             num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
-       status = ocfs2_insert_extent(osb, handle, inode, fe_bh, block,
-                                    num_bits, meta_ac);
+       status = ocfs2_insert_extent(osb, handle, inode, fe_bh,
+                                    *logical_offset, block, num_bits,
+                                    meta_ac);
        if (status < 0) {
                mlog_errno(status);
                goto leave;
        }
 
-       le32_add_cpu(&fe->i_clusters, num_bits);
-       spin_lock(&OCFS2_I(inode)->ip_lock);
-       OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
-       spin_unlock(&OCFS2_I(inode)->ip_lock);
-
        status = ocfs2_journal_dirty(handle, fe_bh);
        if (status < 0) {
                mlog_errno(status);
@@ -479,6 +464,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
        }
 
        clusters_to_add -= num_bits;
+       *logical_offset += num_bits;
 
        if (clusters_to_add) {
                mlog(0, "need to alloc once more, clusters = %u, wanted = "
@@ -494,14 +480,87 @@ leave:
        return status;
 }
 
+/*
+ * For a given allocation, determine which allocators will need to be
+ * accessed, and lock them, reserving the appropriate number of bits.
+ *
+ * Called from ocfs2_extend_allocation() for file systems which don't
+ * support holes, and from ocfs2_prepare_write() for file systems
+ * which understand sparse inodes.
+ */
+static int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
+                                u32 clusters_to_add,
+                                struct ocfs2_alloc_context **data_ac,
+                                struct ocfs2_alloc_context **meta_ac)
+{
+       int ret, num_free_extents;
+       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+       *meta_ac = NULL;
+       *data_ac = NULL;
+
+       mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
+            "clusters_to_add = %u\n",
+            (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode),
+            le32_to_cpu(di->i_clusters), clusters_to_add);
+
+       num_free_extents = ocfs2_num_free_extents(osb, inode, di);
+       if (num_free_extents < 0) {
+               ret = num_free_extents;
+               mlog_errno(ret);
+               goto out;
+       }
+
+       /*
+        * Sparse allocation file systems need to be more conservative
+        * with reserving room for expansion - the actual allocation
+        * happens while we've got a journal handle open so re-taking
+        * a cluster lock (because we ran out of room for another
+        * extent) will violate ordering rules.
+        *
+        * Most of the time we'll only be seeing this 1 page at a time
+        * anyway.
+        */
+       if (!num_free_extents ||
+           (ocfs2_sparse_alloc(osb) && num_free_extents < clusters_to_add)) {
+               ret = ocfs2_reserve_new_metadata(osb, di, meta_ac);
+               if (ret < 0) {
+                       if (ret != -ENOSPC)
+                               mlog_errno(ret);
+                       goto out;
+               }
+       }
+
+       ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
+       if (ret < 0) {
+               if (ret != -ENOSPC)
+                       mlog_errno(ret);
+               goto out;
+       }
+
+out:
+       if (ret) {
+               if (*meta_ac) {
+                       ocfs2_free_alloc_context(*meta_ac);
+                       *meta_ac = NULL;
+               }
+
+               /*
+                * We cannot have an error and a non null *data_ac.
+                */
+       }
+
+       return ret;
+}
+
 static int ocfs2_extend_allocation(struct inode *inode,
                                   u32 clusters_to_add)
 {
        int status = 0;
        int restart_func = 0;
        int drop_alloc_sem = 0;
-       int credits, num_free_extents;
-       u32 prev_clusters;
+       int credits;
+       u32 prev_clusters, logical_start;
        struct buffer_head *bh = NULL;
        struct ocfs2_dinode *fe = NULL;
        handle_t *handle = NULL;
@@ -512,6 +571,12 @@ static int ocfs2_extend_allocation(struct inode *inode,
 
        mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);
 
+       /*
+        * This function only exists for file systems which don't
+        * support holes.
+        */
+       BUG_ON(ocfs2_sparse_alloc(osb));
+
        status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
                                  OCFS2_BH_CACHED, inode);
        if (status < 0) {
@@ -526,39 +591,18 @@ static int ocfs2_extend_allocation(struct inode *inode,
                goto leave;
        }
 
+       logical_start = OCFS2_I(inode)->ip_clusters;
+
 restart_all:
        BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
 
-       mlog(0, "extend inode %llu, i_size = %lld, fe->i_clusters = %u, "
-            "clusters_to_add = %u\n",
-            (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode),
-            fe->i_clusters, clusters_to_add);
-
-       num_free_extents = ocfs2_num_free_extents(osb,
-                                                 inode,
-                                                 fe);
-       if (num_free_extents < 0) {
-               status = num_free_extents;
+       status = ocfs2_lock_allocators(inode, fe, clusters_to_add, &data_ac,
+                                      &meta_ac);
+       if (status) {
                mlog_errno(status);
                goto leave;
        }
 
-       if (!num_free_extents) {
-               status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac);
-               if (status < 0) {
-                       if (status != -ENOSPC)
-                               mlog_errno(status);
-                       goto leave;
-               }
-       }
-
-       status = ocfs2_reserve_clusters(osb, clusters_to_add, &data_ac);
-       if (status < 0) {
-               if (status != -ENOSPC)
-                       mlog_errno(status);
-               goto leave;
-       }
-
        /* blocks peope in read/write from reading our allocation
         * until we're done changing it. We depend on i_mutex to block
         * other extend/truncate calls while we're here. Ordering wrt
@@ -590,6 +634,7 @@ restarted_transaction:
 
        status = ocfs2_do_extend_allocation(osb,
                                            inode,
+                                           &logical_start,
                                            clusters_to_add,
                                            bh,
                                            handle,
@@ -778,7 +823,7 @@ static int ocfs2_extend_file(struct inode *inode,
                             size_t tail_to_skip)
 {
        int ret = 0;
-       u32 clusters_to_add;
+       u32 clusters_to_add = 0;
 
        BUG_ON(!tail_to_skip && !di_bh);
 
@@ -790,6 +835,11 @@ static int ocfs2_extend_file(struct inode *inode,
                goto out;
        BUG_ON(new_i_size < i_size_read(inode));
 
+       if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
+               BUG_ON(tail_to_skip != 0);
+               goto out_update_size;
+       }
+
        clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size) - 
                OCFS2_I(inode)->ip_clusters;
 
@@ -825,6 +875,7 @@ static int ocfs2_extend_file(struct inode *inode,
                goto out_unlock;
        }
 
+out_update_size:
        if (!tail_to_skip) {
                /* We're being called from ocfs2_setattr() which wants
                 * us to update i_size */
@@ -834,7 +885,8 @@ static int ocfs2_extend_file(struct inode *inode,
        }
 
 out_unlock:
-       ocfs2_data_unlock(inode, 1);
+       if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+               ocfs2_data_unlock(inode, 1);
 
 out:
        return ret;
@@ -972,7 +1024,8 @@ int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
 
        ret = ocfs2_meta_lock(inode, NULL, 0);
        if (ret) {
-               mlog_errno(ret);
+               if (ret != -ENOENT)
+                       mlog_errno(ret);
                goto out;
        }
 
@@ -1089,6 +1142,14 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
                } else {
                        saved_pos = *ppos;
                }
+
+               /*
+                * The rest of this loop is concerned with legacy file
+                * systems which don't support sparse files.
+                */
+               if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+                       break;
+
                newsize = count + saved_pos;
 
                mlog(0, "pos=%lld newsize=%lld cursize=%lld\n",