Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
[pandora-kernel.git] / fs / btrfs / tree-log.c
index 1a9585d..c5b8ba3 100644 (file)
@@ -453,11 +453,13 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
 insert:
        btrfs_release_path(path);
        /* try to insert the key into the destination tree */
+       path->skip_release_on_error = 1;
        ret = btrfs_insert_empty_item(trans, root, path,
                                      key, item_size);
+       path->skip_release_on_error = 0;
 
        /* make sure any existing item is the correct size */
-       if (ret == -EEXIST) {
+       if (ret == -EEXIST || ret == -EOVERFLOW) {
                u32 found_size;
                found_size = btrfs_item_size_nr(path->nodes[0],
                                                path->slots[0]);
@@ -488,8 +490,20 @@ insert:
                src_item = (struct btrfs_inode_item *)src_ptr;
                dst_item = (struct btrfs_inode_item *)dst_ptr;
 
-               if (btrfs_inode_generation(eb, src_item) == 0)
+               if (btrfs_inode_generation(eb, src_item) == 0) {
+                       struct extent_buffer *dst_eb = path->nodes[0];
+
+                       if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
+                           S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) {
+                               struct btrfs_map_token token;
+                               u64 ino_size = btrfs_inode_size(eb, src_item);
+
+                               btrfs_init_map_token(&token);
+                               btrfs_set_token_inode_size(dst_eb, dst_item,
+                                                          ino_size, &token);
+                       }
                        goto no_copy;
+               }
 
                if (overwrite_root &&
                    S_ISDIR(btrfs_inode_mode(eb, src_item)) &&
@@ -844,7 +858,7 @@ out:
 static noinline int backref_in_log(struct btrfs_root *log,
                                   struct btrfs_key *key,
                                   u64 ref_objectid,
-                                  char *name, int namelen)
+                                  const char *name, int namelen)
 {
        struct btrfs_path *path;
        struct btrfs_inode_ref *ref;
@@ -998,7 +1012,7 @@ again:
                base = btrfs_item_ptr_offset(leaf, path->slots[0]);
 
                while (cur_offset < item_size) {
-                       extref = (struct btrfs_inode_extref *)base + cur_offset;
+                       extref = (struct btrfs_inode_extref *)(base + cur_offset);
 
                        victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
 
@@ -1254,13 +1268,14 @@ out:
 }
 
 static int insert_orphan_item(struct btrfs_trans_handle *trans,
-                             struct btrfs_root *root, u64 offset)
+                             struct btrfs_root *root, u64 ino)
 {
        int ret;
-       ret = btrfs_find_item(root, NULL, BTRFS_ORPHAN_OBJECTID,
-                       offset, BTRFS_ORPHAN_ITEM_KEY, NULL);
-       if (ret > 0)
-               ret = btrfs_insert_orphan_item(trans, root, offset);
+
+       ret = btrfs_insert_orphan_item(trans, root, ino);
+       if (ret == -EEXIST)
+               ret = 0;
+
        return ret;
 }
 
@@ -1287,6 +1302,7 @@ static int count_inode_extrefs(struct btrfs_root *root,
                leaf = path->nodes[0];
                item_size = btrfs_item_size_nr(leaf, path->slots[0]);
                ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+               cur_offset = 0;
 
                while (cur_offset < item_size) {
                        extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
@@ -1302,7 +1318,7 @@ static int count_inode_extrefs(struct btrfs_root *root,
        }
        btrfs_release_path(path);
 
-       if (ret < 0)
+       if (ret < 0 && ret != -ENOENT)
                return ret;
        return nlink;
 }
@@ -1394,9 +1410,6 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
        nlink = ret;
 
        ret = count_inode_extrefs(root, inode, path);
-       if (ret == -ENOENT)
-               ret = 0;
-
        if (ret < 0)
                goto out;
 
@@ -1556,6 +1569,30 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
        return ret;
 }
 
+/*
+ * Return true if an inode reference exists in the log for the given name,
+ * inode and parent inode.
+ */
+static bool name_in_log_ref(struct btrfs_root *log_root,
+                           const char *name, const int name_len,
+                           const u64 dirid, const u64 ino)
+{
+       struct btrfs_key search_key;
+
+       search_key.objectid = ino;
+       search_key.type = BTRFS_INODE_REF_KEY;
+       search_key.offset = dirid;
+       if (backref_in_log(log_root, &search_key, dirid, name, name_len))
+               return true;
+
+       search_key.type = BTRFS_INODE_EXTREF_KEY;
+       search_key.offset = btrfs_extref_hash(dirid, name, name_len);
+       if (backref_in_log(log_root, &search_key, dirid, name, name_len))
+               return true;
+
+       return false;
+}
+
 /*
  * take a single entry in a log directory item and replay it into
  * the subvolume.
@@ -1666,10 +1703,17 @@ out:
        return ret;
 
 insert:
+       if (name_in_log_ref(root->log_root, name, name_len,
+                           key->objectid, log_key.objectid)) {
+               /* The dentry will be added later. */
+               ret = 0;
+               update_size = false;
+               goto out;
+       }
        btrfs_release_path(path);
        ret = insert_one_name(trans, root, path, key->objectid, key->offset,
                              name, name_len, log_type, &log_key);
-       if (ret && ret != -ENOENT)
+       if (ret && ret != -ENOENT && ret != -EEXIST)
                goto out;
        update_size = false;
        ret = 0;
@@ -2164,7 +2208,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
                parent = path->nodes[*level];
                root_owner = btrfs_header_owner(parent);
 
-               next = btrfs_find_create_tree_block(root, bytenr, blocksize);
+               next = btrfs_find_create_tree_block(root, bytenr);
                if (!next)
                        return -ENOMEM;
 
@@ -2416,8 +2460,8 @@ static void wait_for_writer(struct btrfs_trans_handle *trans,
                mutex_unlock(&root->log_mutex);
                if (atomic_read(&root->log_writers))
                        schedule();
-               mutex_lock(&root->log_mutex);
                finish_wait(&root->log_writer_wait, &wait);
+               mutex_lock(&root->log_mutex);
        }
 }
 
@@ -3219,7 +3263,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
 static void fill_inode_item(struct btrfs_trans_handle *trans,
                            struct extent_buffer *leaf,
                            struct btrfs_inode_item *item,
-                           struct inode *inode, int log_inode_only)
+                           struct inode *inode, int log_inode_only,
+                           u64 logged_isize)
 {
        struct btrfs_map_token token;
 
@@ -3232,7 +3277,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
                 * to say 'update this inode with these values'
                 */
                btrfs_set_token_inode_generation(leaf, item, 0, &token);
-               btrfs_set_token_inode_size(leaf, item, 0, &token);
+               btrfs_set_token_inode_size(leaf, item, logged_isize, &token);
        } else {
                btrfs_set_token_inode_generation(leaf, item,
                                                 BTRFS_I(inode)->generation,
@@ -3245,19 +3290,19 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
        btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
        btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
 
-       btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
+       btrfs_set_token_timespec_sec(leaf, &item->atime,
                                     inode->i_atime.tv_sec, &token);
-       btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
+       btrfs_set_token_timespec_nsec(leaf, &item->atime,
                                      inode->i_atime.tv_nsec, &token);
 
-       btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
+       btrfs_set_token_timespec_sec(leaf, &item->mtime,
                                     inode->i_mtime.tv_sec, &token);
-       btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
+       btrfs_set_token_timespec_nsec(leaf, &item->mtime,
                                      inode->i_mtime.tv_nsec, &token);
 
-       btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
+       btrfs_set_token_timespec_sec(leaf, &item->ctime,
                                     inode->i_ctime.tv_sec, &token);
-       btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
+       btrfs_set_token_timespec_nsec(leaf, &item->ctime,
                                      inode->i_ctime.tv_nsec, &token);
 
        btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
@@ -3284,7 +3329,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
                return ret;
        inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
                                    struct btrfs_inode_item);
-       fill_inode_item(trans, path->nodes[0], inode_item, inode, 0);
+       fill_inode_item(trans, path->nodes[0], inode_item, inode, 0, 0);
        btrfs_release_path(path);
        return 0;
 }
@@ -3293,7 +3338,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
                               struct inode *inode,
                               struct btrfs_path *dst_path,
                               struct btrfs_path *src_path, u64 *last_extent,
-                              int start_slot, int nr, int inode_only)
+                              int start_slot, int nr, int inode_only,
+                              u64 logged_isize)
 {
        unsigned long src_offset;
        unsigned long dst_offset;
@@ -3350,7 +3396,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
                                                    dst_path->slots[0],
                                                    struct btrfs_inode_item);
                        fill_inode_item(trans, dst_path->nodes[0], inode_item,
-                                       inode, inode_only == LOG_INODE_EXISTS);
+                                       inode, inode_only == LOG_INODE_EXISTS,
+                                       logged_isize);
                } else {
                        copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
                                           src_offset, ins_sizes[i]);
@@ -3902,6 +3949,33 @@ process:
        return ret;
 }
 
+static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
+                            struct btrfs_path *path, u64 *size_ret)
+{
+       struct btrfs_key key;
+       int ret;
+
+       key.objectid = btrfs_ino(inode);
+       key.type = BTRFS_INODE_ITEM_KEY;
+       key.offset = 0;
+
+       ret = btrfs_search_slot(NULL, log, &key, path, 0, 0);
+       if (ret < 0) {
+               return ret;
+       } else if (ret > 0) {
+               *size_ret = i_size_read(inode);
+       } else {
+               struct btrfs_inode_item *item;
+
+               item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                                     struct btrfs_inode_item);
+               *size_ret = btrfs_inode_size(path->nodes[0], item);
+       }
+
+       btrfs_release_path(path);
+       return 0;
+}
+
 /* log a single inode in the tree log.
  * At least one parent directory for this inode must exist in the tree
  * or be logged already.
@@ -3939,6 +4013,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
        bool fast_search = false;
        u64 ino = btrfs_ino(inode);
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       u64 logged_isize = 0;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -3966,15 +4041,22 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
                max_key.type = (u8)-1;
        max_key.offset = (u64)-1;
 
-       /* Only run delayed items if we are a dir or a new file */
+       /*
+        * Only run delayed items if we are a dir or a new file.
+        * Otherwise commit the delayed inode only, which is needed in
+        * order for the log replay code to mark inodes for link count
+        * fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items).
+        */
        if (S_ISDIR(inode->i_mode) ||
-           BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) {
+           BTRFS_I(inode)->generation > root->fs_info->last_trans_committed)
                ret = btrfs_commit_inode_delayed_items(trans, inode);
-               if (ret) {
-                       btrfs_free_path(path);
-                       btrfs_free_path(dst_path);
-                       return ret;
-               }
+       else
+               ret = btrfs_commit_inode_delayed_inode(inode);
+
+       if (ret) {
+               btrfs_free_path(path);
+               btrfs_free_path(dst_path);
+               return ret;
        }
 
        mutex_lock(&BTRFS_I(inode)->log_mutex);
@@ -3988,22 +4070,56 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
        if (S_ISDIR(inode->i_mode)) {
                int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
 
-               if (inode_only == LOG_INODE_EXISTS)
-                       max_key_type = BTRFS_XATTR_ITEM_KEY;
+               if (inode_only == LOG_INODE_EXISTS) {
+                       max_key_type = BTRFS_INODE_EXTREF_KEY;
+                       max_key.type = max_key_type;
+               }
                ret = drop_objectid_items(trans, log, path, ino, max_key_type);
        } else {
-               if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-                                      &BTRFS_I(inode)->runtime_flags)) {
-                       clear_bit(BTRFS_INODE_COPY_EVERYTHING,
-                                 &BTRFS_I(inode)->runtime_flags);
-                       ret = btrfs_truncate_inode_items(trans, log,
-                                                        inode, 0, 0);
-               } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
-                                             &BTRFS_I(inode)->runtime_flags) ||
+               if (inode_only == LOG_INODE_EXISTS) {
+                       /*
+                        * Make sure the new inode item we write to the log has
+                        * the same isize as the current one (if it exists).
+                        * This is necessary to prevent data loss after log
+                        * replay, and also to prevent doing a wrong expanding
+                        * truncate - for e.g. create file, write 4K into offset
+                        * 0, fsync, write 4K into offset 4096, add hard link,
+                        * fsync some other file (to sync log), power fail - if
+                        * we use the inode's current i_size, after log replay
+                        * we get a 8Kb file, with the last 4Kb extent as a hole
+                        * (zeroes), as if an expanding truncate happened,
+                        * instead of getting a file of 4Kb only.
+                        */
+                       err = logged_inode_size(log, inode, path,
+                                               &logged_isize);
+                       if (err)
+                               goto out_unlock;
+               }
+               if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+                            &BTRFS_I(inode)->runtime_flags)) {
+                       if (inode_only == LOG_INODE_EXISTS) {
+                               max_key.type = BTRFS_INODE_EXTREF_KEY;
+                               ret = drop_objectid_items(trans, log, path, ino,
+                                                         max_key.type);
+                       } else {
+                               clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+                                         &BTRFS_I(inode)->runtime_flags);
+                               clear_bit(BTRFS_INODE_COPY_EVERYTHING,
+                                         &BTRFS_I(inode)->runtime_flags);
+                               ret = btrfs_truncate_inode_items(trans, log,
+                                                                inode, 0, 0);
+                       }
+               } else if (test_bit(BTRFS_INODE_COPY_EVERYTHING,
+                                   &BTRFS_I(inode)->runtime_flags) ||
                           inode_only == LOG_INODE_EXISTS) {
-                       if (inode_only == LOG_INODE_ALL)
+                       if (inode_only == LOG_INODE_ALL) {
+                               clear_bit(BTRFS_INODE_COPY_EVERYTHING,
+                                         &BTRFS_I(inode)->runtime_flags);
                                fast_search = true;
-                       max_key.type = BTRFS_XATTR_ITEM_KEY;
+                               max_key.type = BTRFS_XATTR_ITEM_KEY;
+                       } else {
+                               max_key.type = BTRFS_INODE_EXTREF_KEY;
+                       }
                        ret = drop_objectid_items(trans, log, path, ino,
                                                  max_key.type);
                } else {
@@ -4047,7 +4163,8 @@ again:
                }
 
                ret = copy_items(trans, inode, dst_path, path, &last_extent,
-                                ins_start_slot, ins_nr, inode_only);
+                                ins_start_slot, ins_nr, inode_only,
+                                logged_isize);
                if (ret < 0) {
                        err = ret;
                        goto out_unlock;
@@ -4071,7 +4188,7 @@ next_slot:
                if (ins_nr) {
                        ret = copy_items(trans, inode, dst_path, path,
                                         &last_extent, ins_start_slot,
-                                        ins_nr, inode_only);
+                                        ins_nr, inode_only, logged_isize);
                        if (ret < 0) {
                                err = ret;
                                goto out_unlock;
@@ -4092,7 +4209,8 @@ next_slot:
        }
        if (ins_nr) {
                ret = copy_items(trans, inode, dst_path, path, &last_extent,
-                                ins_start_slot, ins_nr, inode_only);
+                                ins_start_slot, ins_nr, inode_only,
+                                logged_isize);
                if (ret < 0) {
                        err = ret;
                        goto out_unlock;
@@ -4273,6 +4391,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
        struct dentry *old_parent = NULL;
        int ret = 0;
        u64 last_committed = root->fs_info->last_trans_committed;
+       const struct dentry * const first_parent = parent;
+       const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
+                                last_committed);
 
        sb = inode->i_sb;
 
@@ -4328,7 +4449,6 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
                goto end_trans;
        }
 
-       inode_only = LOG_INODE_EXISTS;
        while (1) {
                if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
                        break;
@@ -4337,8 +4457,22 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
                if (root != BTRFS_I(inode)->root)
                        break;
 
+               /*
+                * On unlink we must make sure our immediate parent directory
+                * inode is fully logged. This is to prevent leaving dangling
+                * directory index entries and a wrong directory inode's i_size.
+                * Not doing so can result in a directory being impossible to
+                * delete after log replay (rmdir will always fail with error
+                * -ENOTEMPTY).
+                */
+               if (did_unlink && parent == first_parent)
+                       inode_only = LOG_INODE_ALL;
+               else
+                       inode_only = LOG_INODE_EXISTS;
+
                if (BTRFS_I(inode)->generation >
-                   root->fs_info->last_trans_committed) {
+                   root->fs_info->last_trans_committed ||
+                   inode_only == LOG_INODE_ALL) {
                        ret = btrfs_log_inode(trans, root, inode, inode_only,
                                              0, LLONG_MAX, ctx);
                        if (ret)