Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...

[pandora-kernel.git] / fs / btrfs / file.c
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c

index b78bbba..30982bb 100644 (file)
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1811,22 +1811,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
         mutex_unlock(&inode->i_mutex);
  
         /*
-        * we want to make sure fsync finds this change
-        * but we haven't joined a transaction running right now.
-        *
-        * Later on, someone is sure to update the inode and get the
-        * real transid recorded.
-        *
-        * We set last_trans now to the fs_info generation + 1,
-        * this will either be one more than the running transaction
-        * or the generation used for the next transaction if there isn't
-        * one running right now.
-        *
          * We also have to set last_sub_trans to the current log transid,
          * otherwise subsequent syncs to a file that's been synced in this
          * transaction will appear to have already occured.
          */
-       BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
         BTRFS_I(inode)->last_sub_trans = root->log_transid;
         if (num_written > 0) {
                 err = generic_write_sync(file, pos, num_written);
@@ -1959,25 +1947,37 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
         atomic_inc(&root->log_batch);
  
         /*
-        * check the transaction that last modified this inode
-        * and see if its already been committed
-        */
-       if (!BTRFS_I(inode)->last_trans) {
-               mutex_unlock(&inode->i_mutex);
-               goto out;
-       }
-
-       /*
-        * if the last transaction that changed this file was before
-        * the current transaction, we can bail out now without any
-        * syncing
+        * If the last transaction that changed this file was before the current
+        * transaction and we have the full sync flag set in our inode, we can
+        * bail out now without any syncing.
+        *
+        * Note that we can't bail out if the full sync flag isn't set. This is
+        * because when the full sync flag is set we start all ordered extents
+        * and wait for them to fully complete - when they complete they update
+        * the inode's last_trans field through:
+        *
+        *     btrfs_finish_ordered_io() ->
+        *         btrfs_update_inode_fallback() ->
+        *             btrfs_update_inode() ->
+        *                 btrfs_set_inode_last_trans()
+        *
+        * So we are sure that last_trans is up to date and can do this check to
+        * bail out safely. For the fast path, when the full sync flag is not
+        * set in our inode, we can not do it because we start only our ordered
+        * extents and don't wait for them to complete (that is when
+        * btrfs_finish_ordered_io runs), so here at this point their last_trans
+        * value might be less than or equals to fs_info->last_trans_committed,
+        * and setting a speculative last_trans for an inode when a buffered
+        * write is made (such as fs_info->generation + 1 for example) would not
+        * be reliable since after setting the value and before fsync is called
+        * any number of transactions can start and commit (transaction kthread
+        * commits the current transaction periodically), and a transaction
+        * commit does not start nor waits for ordered extents to complete.
          */
         smp_mb();
         if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
-           BTRFS_I(inode)->last_trans <=
-           root->fs_info->last_trans_committed) {
-               BTRFS_I(inode)->last_trans = 0;
-
+           (full_sync && BTRFS_I(inode)->last_trans <=
+            root->fs_info->last_trans_committed)) {
                 /*
                  * We'v had everything committed since the last time we were
                  * modified so clear this flag in case it was set for whatever
@@ -2275,6 +2275,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
         bool same_page;
         bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
         u64 ino_size;
+       bool truncated_page = false;
+       bool updated_inode = false;
  
         ret = btrfs_wait_ordered_range(inode, offset, len);
         if (ret)
@@ -2306,13 +2308,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
          * entire page.
          */
         if (same_page && len < PAGE_CACHE_SIZE) {
-               if (offset < ino_size)
+               if (offset < ino_size) {
+                       truncated_page = true;
                         ret = btrfs_truncate_page(inode, offset, len, 0);
+               } else {
+                       ret = 0;
+               }
                 goto out_only_mutex;
         }
  
         /* zero back part of the first page */
         if (offset < ino_size) {
+               truncated_page = true;
                 ret = btrfs_truncate_page(inode, offset, 0, 0);
                 if (ret) {
                         mutex_unlock(&inode->i_mutex);
@@ -2348,6 +2355,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                 if (!ret) {
                         /* zero the front end of the last page */
                         if (tail_start + tail_len < ino_size) {
+                               truncated_page = true;
                                 ret = btrfs_truncate_page(inode,
                                                 tail_start + tail_len, 0, 1);
                                 if (ret)
@@ -2357,8 +2365,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
         }
  
         if (lockend < lockstart) {
-               mutex_unlock(&inode->i_mutex);
-               return 0;
+               ret = 0;
+               goto out_only_mutex;
         }
  
         while (1) {
@@ -2506,6 +2514,7 @@ out_trans:
  
         trans->block_rsv = &root->fs_info->trans_block_rsv;
         ret = btrfs_update_inode(trans, root, inode);
+       updated_inode = true;
         btrfs_end_transaction(trans, root);
         btrfs_btree_balance_dirty(root);
  out_free:
@@ -2515,6 +2524,22 @@ out:
         unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
                              &cached_state, GFP_NOFS);
  out_only_mutex:
+       if (!updated_inode && truncated_page && !ret && !err) {
+               /*
+                * If we only end up zeroing part of a page, we still need to
+                * update the inode item, so that all the time fields are
+                * updated as well as the necessary btrfs inode in memory fields
+                * for detecting, at fsync time, if the inode isn't yet in the
+                * log tree or it's there but not up to date.
+                */
+               trans = btrfs_start_transaction(root, 1);
+               if (IS_ERR(trans)) {
+                       err = PTR_ERR(trans);
+               } else {
+                       err = btrfs_update_inode(trans, root, inode);
+                       ret = btrfs_end_transaction(trans, root);
+               }
+       }
         mutex_unlock(&inode->i_mutex);
         if (ret && !err)
                 err = ret;