fs/btrfs/transaction.c

   1 /*
   2  * Copyright (C) 2007 Oracle.  All rights reserved.
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public
   6  * License v2 as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful,
   9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11  * General Public License for more details.
  12  *
  13  * You should have received a copy of the GNU General Public
  14  * License along with this program; if not, write to the
  15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16  * Boston, MA 021110-1307, USA.
  17  */
  18
  19 #include <linux/fs.h>
  20 #include <linux/slab.h>
  21 #include <linux/sched.h>
  22 #include <linux/writeback.h>
  23 #include <linux/pagemap.h>
  24 #include <linux/blkdev.h>
  25 #include "ctree.h"
  26 #include "disk-io.h"
  27 #include "transaction.h"
  28 #include "locking.h"
  29 #include "tree-log.h"
  30
  31 #define BTRFS_ROOT_TRANS_TAG 0
  32
  33 static noinline void put_transaction(struct btrfs_transaction *transaction)
  34 {
  35         WARN_ON(atomic_read(&transaction->use_count) == 0);
  36         if (atomic_dec_and_test(&transaction->use_count)) {
  37                 memset(transaction, 0, sizeof(*transaction));
  38                 kmem_cache_free(btrfs_transaction_cachep, transaction);
  39         }
  40 }
  41
  42 static noinline void switch_commit_root(struct btrfs_root *root)
  43 {
  44         free_extent_buffer(root->commit_root);
  45         root->commit_root = btrfs_root_node(root);
  46 }
  47
  48 /*
  49  * either allocate a new transaction or hop into the existing one
  50  */
  51 static noinline int join_transaction(struct btrfs_root *root)
  52 {
  53         struct btrfs_transaction *cur_trans;
  54         cur_trans = root->fs_info->running_transaction;
  55         if (!cur_trans) {
  56                 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
  57                                              GFP_NOFS);
  58                 if (!cur_trans)
  59                         return -ENOMEM;
  60                 root->fs_info->generation++;
  61                 atomic_set(&cur_trans->num_writers, 1);
  62                 cur_trans->num_joined = 0;
  63                 cur_trans->transid = root->fs_info->generation;
  64                 init_waitqueue_head(&cur_trans->writer_wait);
  65                 init_waitqueue_head(&cur_trans->commit_wait);
  66                 cur_trans->in_commit = 0;
  67                 cur_trans->blocked = 0;
  68                 atomic_set(&cur_trans->use_count, 1);
  69                 cur_trans->commit_done = 0;
  70                 cur_trans->start_time = get_seconds();
  71
  72                 cur_trans->delayed_refs.root = RB_ROOT;
  73                 cur_trans->delayed_refs.num_entries = 0;
  74                 cur_trans->delayed_refs.num_heads_ready = 0;
  75                 cur_trans->delayed_refs.num_heads = 0;
  76                 cur_trans->delayed_refs.flushing = 0;
  77                 cur_trans->delayed_refs.run_delayed_start = 0;
  78                 spin_lock_init(&cur_trans->delayed_refs.lock);
  79
  80                 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
  81                 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
  82                 extent_io_tree_init(&cur_trans->dirty_pages,
  83                                      root->fs_info->btree_inode->i_mapping,
  84                                      GFP_NOFS);
  85                 spin_lock(&root->fs_info->new_trans_lock);
  86                 root->fs_info->running_transaction = cur_trans;
  87                 spin_unlock(&root->fs_info->new_trans_lock);
  88         } else {
  89                 atomic_inc(&cur_trans->num_writers);
  90                 cur_trans->num_joined++;
  91         }
  92
  93         return 0;
  94 }
  95
  96 /*
  97  * this does all the record keeping required to make sure that a reference
  98  * counted root is properly recorded in a given transaction.  This is required
  99  * to make sure the old root from before we joined the transaction is deleted
 100  * when the transaction commits
 101  */
 102 static noinline int record_root_in_trans(struct btrfs_trans_handle *trans,
 103                                          struct btrfs_root *root)
 104 {
 105         if (root->ref_cows && root->last_trans < trans->transid) {
 106                 WARN_ON(root == root->fs_info->extent_root);
 107                 WARN_ON(root->commit_root != root->node);
 108
 109                 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
 110                            (unsigned long)root->root_key.objectid,
 111                            BTRFS_ROOT_TRANS_TAG);
 112                 root->last_trans = trans->transid;
 113                 btrfs_init_reloc_root(trans, root);
 114         }
 115         return 0;
 116 }
 117
 118 int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
 119                                struct btrfs_root *root)
 120 {
 121         if (!root->ref_cows)
 122                 return 0;
 123
 124         mutex_lock(&root->fs_info->trans_mutex);
 125         if (root->last_trans == trans->transid) {
 126                 mutex_unlock(&root->fs_info->trans_mutex);
 127                 return 0;
 128         }
 129
 130         record_root_in_trans(trans, root);
 131         mutex_unlock(&root->fs_info->trans_mutex);
 132         return 0;
 133 }
 134
 135 /* wait for commit against the current transaction to become unblocked
 136  * when this is done, it is safe to start a new transaction, but the current
 137  * transaction might not be fully on disk.
 138  */
 139 static void wait_current_trans(struct btrfs_root *root)
 140 {
 141         struct btrfs_transaction *cur_trans;
 142
 143         cur_trans = root->fs_info->running_transaction;
 144         if (cur_trans && cur_trans->blocked) {
 145                 DEFINE_WAIT(wait);
 146                 atomic_inc(&cur_trans->use_count);
 147                 while (1) {
 148                         prepare_to_wait(&root->fs_info->transaction_wait, &wait,
 149                                         TASK_UNINTERRUPTIBLE);
 150                         if (!cur_trans->blocked)
 151                                 break;
 152                         mutex_unlock(&root->fs_info->trans_mutex);
 153                         schedule();
 154                         mutex_lock(&root->fs_info->trans_mutex);
 155                 }
 156                 finish_wait(&root->fs_info->transaction_wait, &wait);
 157                 put_transaction(cur_trans);
 158         }
 159 }
 160
 161 enum btrfs_trans_type {
 162         TRANS_START,
 163         TRANS_JOIN,
 164         TRANS_USERSPACE,
 165         TRANS_JOIN_NOLOCK,
 166 };
 167
 168 static int may_wait_transaction(struct btrfs_root *root, int type)
 169 {
 170         if (!root->fs_info->log_root_recovering &&
 171             ((type == TRANS_START && !root->fs_info->open_ioctl_trans) ||
 172              type == TRANS_USERSPACE))
 173                 return 1;
 174         return 0;
 175 }
 176
 177 static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
 178                                                     u64 num_items, int type)
 179 {
 180         struct btrfs_trans_handle *h;
 181         struct btrfs_transaction *cur_trans;
 182         int retries = 0;
 183         int ret;
 184
 185         if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
 186                 return ERR_PTR(-EROFS);
 187 again:
 188         h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
 189         if (!h)
 190                 return ERR_PTR(-ENOMEM);
 191
 192         if (type != TRANS_JOIN_NOLOCK)
 193                 mutex_lock(&root->fs_info->trans_mutex);
 194         if (may_wait_transaction(root, type))
 195                 wait_current_trans(root);
 196
 197         ret = join_transaction(root);
 198         if (ret < 0) {
 199                 kmem_cache_free(btrfs_trans_handle_cachep, h);
 200                 if (type != TRANS_JOIN_NOLOCK)
 201                         mutex_unlock(&root->fs_info->trans_mutex);
 202                 return ERR_PTR(ret);
 203         }
 204
 205         cur_trans = root->fs_info->running_transaction;
 206         atomic_inc(&cur_trans->use_count);
 207         if (type != TRANS_JOIN_NOLOCK)
 208                 mutex_unlock(&root->fs_info->trans_mutex);
 209
 210         h->transid = cur_trans->transid;
 211         h->transaction = cur_trans;
 212         h->blocks_used = 0;
 213         h->block_group = 0;
 214         h->bytes_reserved = 0;
 215         h->delayed_ref_updates = 0;
 216         h->block_rsv = NULL;
 217
 218         smp_mb();
 219         if (cur_trans->blocked && may_wait_transaction(root, type)) {
 220                 btrfs_commit_transaction(h, root);
 221                 goto again;
 222         }
 223
 224         if (num_items > 0) {
 225                 ret = btrfs_trans_reserve_metadata(h, root, num_items);
 226                 if (ret == -EAGAIN && !retries) {
 227                         retries++;
 228                         btrfs_commit_transaction(h, root);
 229                         goto again;
 230                 } else if (ret == -EAGAIN) {
 231                         /*
 232                          * We have already retried and got EAGAIN, so really we
 233                          * don't have space, so set ret to -ENOSPC.
 234                          */
 235                         ret = -ENOSPC;
 236                 }
 237
 238                 if (ret < 0) {
 239                         btrfs_end_transaction(h, root);
 240                         return ERR_PTR(ret);
 241                 }
 242         }
 243
 244         if (type != TRANS_JOIN_NOLOCK)
 245                 mutex_lock(&root->fs_info->trans_mutex);
 246         record_root_in_trans(h, root);
 247         if (type != TRANS_JOIN_NOLOCK)
 248                 mutex_unlock(&root->fs_info->trans_mutex);
 249
 250         if (!current->journal_info && type != TRANS_USERSPACE)
 251                 current->journal_info = h;
 252         return h;
 253 }
 254
 255 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
 256                                                    int num_items)
 257 {
 258         return start_transaction(root, num_items, TRANS_START);
 259 }
 260 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
 261                                                    int num_blocks)
 262 {
 263         return start_transaction(root, 0, TRANS_JOIN);
 264 }
 265
 266 struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
 267                                                           int num_blocks)
 268 {
 269         return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
 270 }
 271
 272 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
 273                                                          int num_blocks)
 274 {
 275         return start_transaction(r, 0, TRANS_USERSPACE);
 276 }
 277
 278 /* wait for a transaction commit to be fully complete */
 279 static noinline int wait_for_commit(struct btrfs_root *root,
 280                                     struct btrfs_transaction *commit)
 281 {
 282         DEFINE_WAIT(wait);
 283         mutex_lock(&root->fs_info->trans_mutex);
 284         while (!commit->commit_done) {
 285                 prepare_to_wait(&commit->commit_wait, &wait,
 286                                 TASK_UNINTERRUPTIBLE);
 287                 if (commit->commit_done)
 288                         break;
 289                 mutex_unlock(&root->fs_info->trans_mutex);
 290                 schedule();
 291                 mutex_lock(&root->fs_info->trans_mutex);
 292         }
 293         mutex_unlock(&root->fs_info->trans_mutex);
 294         finish_wait(&commit->commit_wait, &wait);
 295         return 0;
 296 }
 297
 298 int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
 299 {
 300         struct btrfs_transaction *cur_trans = NULL, *t;
 301         int ret;
 302
 303         mutex_lock(&root->fs_info->trans_mutex);
 304
 305         ret = 0;
 306         if (transid) {
 307                 if (transid <= root->fs_info->last_trans_committed)
 308                         goto out_unlock;
 309
 310                 /* find specified transaction */
 311                 list_for_each_entry(t, &root->fs_info->trans_list, list) {
 312                         if (t->transid == transid) {
 313                                 cur_trans = t;
 314                                 break;
 315                         }
 316                         if (t->transid > transid)
 317                                 break;
 318                 }
 319                 ret = -EINVAL;
 320                 if (!cur_trans)
 321                         goto out_unlock;  /* bad transid */
 322         } else {
 323                 /* find newest transaction that is committing | committed */
 324                 list_for_each_entry_reverse(t, &root->fs_info->trans_list,
 325                                             list) {
 326                         if (t->in_commit) {
 327                                 if (t->commit_done)
 328                                         goto out_unlock;
 329                                 cur_trans = t;
 330                                 break;
 331                         }
 332                 }
 333                 if (!cur_trans)
 334                         goto out_unlock;  /* nothing committing|committed */
 335         }
 336
 337         atomic_inc(&cur_trans->use_count);
 338         mutex_unlock(&root->fs_info->trans_mutex);
 339
 340         wait_for_commit(root, cur_trans);
 341
 342         mutex_lock(&root->fs_info->trans_mutex);
 343         put_transaction(cur_trans);
 344         ret = 0;
 345 out_unlock:
 346         mutex_unlock(&root->fs_info->trans_mutex);
 347         return ret;
 348 }
 349
 350 #if 0
 351 /*
 352  * rate limit against the drop_snapshot code.  This helps to slow down new
 353  * operations if the drop_snapshot code isn't able to keep up.
 354  */
 355 static void throttle_on_drops(struct btrfs_root *root)
 356 {
 357         struct btrfs_fs_info *info = root->fs_info;
 358         int harder_count = 0;
 359
 360 harder:
 361         if (atomic_read(&info->throttles)) {
 362                 DEFINE_WAIT(wait);
 363                 int thr;
 364                 thr = atomic_read(&info->throttle_gen);
 365
 366                 do {
 367                         prepare_to_wait(&info->transaction_throttle,
 368                                         &wait, TASK_UNINTERRUPTIBLE);
 369                         if (!atomic_read(&info->throttles)) {
 370                                 finish_wait(&info->transaction_throttle, &wait);
 371                                 break;
 372                         }
 373                         schedule();
 374                         finish_wait(&info->transaction_throttle, &wait);
 375                 } while (thr == atomic_read(&info->throttle_gen));
 376                 harder_count++;
 377
 378                 if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 &&
 379                     harder_count < 2)
 380                         goto harder;
 381
 382                 if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 &&
 383                     harder_count < 10)
 384                         goto harder;
 385
 386                 if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 &&
 387                     harder_count < 20)
 388                         goto harder;
 389         }
 390 }
 391 #endif
 392
 393 void btrfs_throttle(struct btrfs_root *root)
 394 {
 395         mutex_lock(&root->fs_info->trans_mutex);
 396         if (!root->fs_info->open_ioctl_trans)
 397                 wait_current_trans(root);
 398         mutex_unlock(&root->fs_info->trans_mutex);
 399 }
 400
 401 static int should_end_transaction(struct btrfs_trans_handle *trans,
 402                                   struct btrfs_root *root)
 403 {
 404         int ret;
 405         ret = btrfs_block_rsv_check(trans, root,
 406                                     &root->fs_info->global_block_rsv, 0, 5);
 407         return ret ? 1 : 0;
 408 }
 409
 410 int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
 411                                  struct btrfs_root *root)
 412 {
 413         struct btrfs_transaction *cur_trans = trans->transaction;
 414         int updates;
 415
 416         if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
 417                 return 1;
 418
 419         updates = trans->delayed_ref_updates;
 420         trans->delayed_ref_updates = 0;
 421         if (updates)
 422                 btrfs_run_delayed_refs(trans, root, updates);
 423
 424         return should_end_transaction(trans, root);
 425 }
 426
 427 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 428                           struct btrfs_root *root, int throttle, int lock)
 429 {
 430         struct btrfs_transaction *cur_trans = trans->transaction;
 431         struct btrfs_fs_info *info = root->fs_info;
 432         int count = 0;
 433
 434         while (count < 4) {
 435                 unsigned long cur = trans->delayed_ref_updates;
 436                 trans->delayed_ref_updates = 0;
 437                 if (cur &&
 438                     trans->transaction->delayed_refs.num_heads_ready > 64) {
 439                         trans->delayed_ref_updates = 0;
 440
 441                         /*
 442                          * do a full flush if the transaction is trying
 443                          * to close
 444                          */
 445                         if (trans->transaction->delayed_refs.flushing)
 446                                 cur = 0;
 447                         btrfs_run_delayed_refs(trans, root, cur);
 448                 } else {
 449                         break;
 450                 }
 451                 count++;
 452         }
 453
 454         btrfs_trans_release_metadata(trans, root);
 455
 456         if (lock && !root->fs_info->open_ioctl_trans &&
 457             should_end_transaction(trans, root))
 458                 trans->transaction->blocked = 1;
 459
 460         if (lock && cur_trans->blocked && !cur_trans->in_commit) {
 461                 if (throttle)
 462                         return btrfs_commit_transaction(trans, root);
 463                 else
 464                         wake_up_process(info->transaction_kthread);
 465         }
 466
 467         WARN_ON(cur_trans != info->running_transaction);
 468         WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
 469         atomic_dec(&cur_trans->num_writers);
 470
 471         smp_mb();
 472         if (waitqueue_active(&cur_trans->writer_wait))
 473                 wake_up(&cur_trans->writer_wait);
 474         put_transaction(cur_trans);
 475
 476         if (current->journal_info == trans)
 477                 current->journal_info = NULL;
 478         memset(trans, 0, sizeof(*trans));
 479         kmem_cache_free(btrfs_trans_handle_cachep, trans);
 480
 481         if (throttle)
 482                 btrfs_run_delayed_iputs(root);
 483
 484         return 0;
 485 }
 486
 487 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
 488                           struct btrfs_root *root)
 489 {
 490         return __btrfs_end_transaction(trans, root, 0, 1);
 491 }
 492
 493 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
 494                                    struct btrfs_root *root)
 495 {
 496         return __btrfs_end_transaction(trans, root, 1, 1);
 497 }
 498
 499 int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
 500                                  struct btrfs_root *root)
 501 {
 502         return __btrfs_end_transaction(trans, root, 0, 0);
 503 }
 504
 505 /*
 506  * when btree blocks are allocated, they have some corresponding bits set for
 507  * them in one of two extent_io trees.  This is used to make sure all of
 508  * those extents are sent to disk but does not wait on them
 509  */
 510 int btrfs_write_marked_extents(struct btrfs_root *root,
 511                                struct extent_io_tree *dirty_pages, int mark)
 512 {
 513         int ret;
 514         int err = 0;
 515         int werr = 0;
 516         struct page *page;
 517         struct inode *btree_inode = root->fs_info->btree_inode;
 518         u64 start = 0;
 519         u64 end;
 520         unsigned long index;
 521
 522         while (1) {
 523                 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
 524                                             mark);
 525                 if (ret)
 526                         break;
 527                 while (start <= end) {
 528                         cond_resched();
 529
 530                         index = start >> PAGE_CACHE_SHIFT;
 531                         start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
 532                         page = find_get_page(btree_inode->i_mapping, index);
 533                         if (!page)
 534                                 continue;
 535
 536                         btree_lock_page_hook(page);
 537                         if (!page->mapping) {
 538                                 unlock_page(page);
 539                                 page_cache_release(page);
 540                                 continue;
 541                         }
 542
 543                         if (PageWriteback(page)) {
 544                                 if (PageDirty(page))
 545                                         wait_on_page_writeback(page);
 546                                 else {
 547                                         unlock_page(page);
 548                                         page_cache_release(page);
 549                                         continue;
 550                                 }
 551                         }
 552                         err = write_one_page(page, 0);
 553                         if (err)
 554                                 werr = err;
 555                         page_cache_release(page);
 556                 }
 557         }
 558         if (err)
 559                 werr = err;
 560         return werr;
 561 }
 562
 563 /*
 564  * when btree blocks are allocated, they have some corresponding bits set for
 565  * them in one of two extent_io trees.  This is used to make sure all of
 566  * those extents are on disk for transaction or log commit.  We wait
 567  * on all the pages and clear them from the dirty pages state tree
 568  */
 569 int btrfs_wait_marked_extents(struct btrfs_root *root,
 570                               struct extent_io_tree *dirty_pages, int mark)
 571 {
 572         int ret;
 573         int err = 0;
 574         int werr = 0;
 575         struct page *page;
 576         struct inode *btree_inode = root->fs_info->btree_inode;
 577         u64 start = 0;
 578         u64 end;
 579         unsigned long index;
 580
 581         while (1) {
 582                 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
 583                                             mark);
 584                 if (ret)
 585                         break;
 586
 587                 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
 588                 while (start <= end) {
 589                         index = start >> PAGE_CACHE_SHIFT;
 590                         start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
 591                         page = find_get_page(btree_inode->i_mapping, index);
 592                         if (!page)
 593                                 continue;
 594                         if (PageDirty(page)) {
 595                                 btree_lock_page_hook(page);
 596                                 wait_on_page_writeback(page);
 597                                 err = write_one_page(page, 0);
 598                                 if (err)
 599                                         werr = err;
 600                         }
 601                         wait_on_page_writeback(page);
 602                         page_cache_release(page);
 603                         cond_resched();
 604                 }
 605         }
 606         if (err)
 607                 werr = err;
 608         return werr;
 609 }
 610
 611 /*
 612  * when btree blocks are allocated, they have some corresponding bits set for
 613  * them in one of two extent_io trees.  This is used to make sure all of
 614  * those extents are on disk for transaction or log commit
 615  */
 616 int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
 617                                 struct extent_io_tree *dirty_pages, int mark)
 618 {
 619         int ret;
 620         int ret2;
 621
 622         ret = btrfs_write_marked_extents(root, dirty_pages, mark);
 623         ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
 624         return ret || ret2;
 625 }
 626
 627 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
 628                                      struct btrfs_root *root)
 629 {
 630         if (!trans || !trans->transaction) {
 631                 struct inode *btree_inode;
 632                 btree_inode = root->fs_info->btree_inode;
 633                 return filemap_write_and_wait(btree_inode->i_mapping);
 634         }
 635         return btrfs_write_and_wait_marked_extents(root,
 636                                            &trans->transaction->dirty_pages,
 637                                            EXTENT_DIRTY);
 638 }
 639
 640 /*
 641  * this is used to update the root pointer in the tree of tree roots.
 642  *
 643  * But, in the case of the extent allocation tree, updating the root
 644  * pointer may allocate blocks which may change the root of the extent
 645  * allocation tree.
 646  *
 647  * So, this loops and repeats and makes sure the cowonly root didn't
 648  * change while the root pointer was being updated in the metadata.
 649  */
 650 static int update_cowonly_root(struct btrfs_trans_handle *trans,
 651                                struct btrfs_root *root)
 652 {
 653         int ret;
 654         u64 old_root_bytenr;
 655         u64 old_root_used;
 656         struct btrfs_root *tree_root = root->fs_info->tree_root;
 657
 658         old_root_used = btrfs_root_used(&root->root_item);
 659         btrfs_write_dirty_block_groups(trans, root);
 660
 661         while (1) {
 662                 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
 663                 if (old_root_bytenr == root->node->start &&
 664                     old_root_used == btrfs_root_used(&root->root_item))
 665                         break;
 666
 667                 btrfs_set_root_node(&root->root_item, root->node);
 668                 ret = btrfs_update_root(trans, tree_root,
 669                                         &root->root_key,
 670                                         &root->root_item);
 671                 BUG_ON(ret);
 672
 673                 old_root_used = btrfs_root_used(&root->root_item);
 674                 ret = btrfs_write_dirty_block_groups(trans, root);
 675                 BUG_ON(ret);
 676         }
 677
 678         if (root != root->fs_info->extent_root)
 679                 switch_commit_root(root);
 680
 681         return 0;
 682 }
 683
 684 /*
 685  * update all the cowonly tree roots on disk
 686  */
 687 static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
 688                                          struct btrfs_root *root)
 689 {
 690         struct btrfs_fs_info *fs_info = root->fs_info;
 691         struct list_head *next;
 692         struct extent_buffer *eb;
 693         int ret;
 694
 695         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
 696         BUG_ON(ret);
 697
 698         eb = btrfs_lock_root_node(fs_info->tree_root);
 699         btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb);
 700         btrfs_tree_unlock(eb);
 701         free_extent_buffer(eb);
 702
 703         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
 704         BUG_ON(ret);
 705
 706         while (!list_empty(&fs_info->dirty_cowonly_roots)) {
 707                 next = fs_info->dirty_cowonly_roots.next;
 708                 list_del_init(next);
 709                 root = list_entry(next, struct btrfs_root, dirty_list);
 710
 711                 update_cowonly_root(trans, root);
 712         }
 713
 714         down_write(&fs_info->extent_commit_sem);
 715         switch_commit_root(fs_info->extent_root);
 716         up_write(&fs_info->extent_commit_sem);
 717
 718         return 0;
 719 }
 720
 721 /*
 722  * dead roots are old snapshots that need to be deleted.  This allocates
 723  * a dirty root struct and adds it into the list of dead roots that need to
 724  * be deleted
 725  */
 726 int btrfs_add_dead_root(struct btrfs_root *root)
 727 {
 728         mutex_lock(&root->fs_info->trans_mutex);
 729         list_add(&root->root_list, &root->fs_info->dead_roots);
 730         mutex_unlock(&root->fs_info->trans_mutex);
 731         return 0;
 732 }
 733
 734 /*
 735  * update all the cowonly tree roots on disk
 736  */
 737 static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
 738                                     struct btrfs_root *root)
 739 {
 740         struct btrfs_root *gang[8];
 741         struct btrfs_fs_info *fs_info = root->fs_info;
 742         int i;
 743         int ret;
 744         int err = 0;
 745
 746         while (1) {
 747                 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
 748                                                  (void **)gang, 0,
 749                                                  ARRAY_SIZE(gang),
 750                                                  BTRFS_ROOT_TRANS_TAG);
 751                 if (ret == 0)
 752                         break;
 753                 for (i = 0; i < ret; i++) {
 754                         root = gang[i];
 755                         radix_tree_tag_clear(&fs_info->fs_roots_radix,
 756                                         (unsigned long)root->root_key.objectid,
 757                                         BTRFS_ROOT_TRANS_TAG);
 758
 759                         btrfs_free_log(trans, root);
 760                         btrfs_update_reloc_root(trans, root);
 761                         btrfs_orphan_commit_root(trans, root);
 762
 763                         if (root->commit_root != root->node) {
 764                                 switch_commit_root(root);
 765                                 btrfs_set_root_node(&root->root_item,
 766                                                     root->node);
 767                         }
 768
 769                         err = btrfs_update_root(trans, fs_info->tree_root,
 770                                                 &root->root_key,
 771                                                 &root->root_item);
 772                         if (err)
 773                                 break;
 774                 }
 775         }
 776         return err;
 777 }
 778
 779 /*
 780  * defrag a given btree.  If cacheonly == 1, this won't read from the disk,
 781  * otherwise every leaf in the btree is read and defragged.
 782  */
 783 int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
 784 {
 785         struct btrfs_fs_info *info = root->fs_info;
 786         struct btrfs_trans_handle *trans;
 787         int ret;
 788         unsigned long nr;
 789
 790         if (xchg(&root->defrag_running, 1))
 791                 return 0;
 792
 793         while (1) {
 794                 trans = btrfs_start_transaction(root, 0);
 795                 if (IS_ERR(trans))
 796                         return PTR_ERR(trans);
 797
 798                 ret = btrfs_defrag_leaves(trans, root, cacheonly);
 799
 800                 nr = trans->blocks_used;
 801                 btrfs_end_transaction(trans, root);
 802                 btrfs_btree_balance_dirty(info->tree_root, nr);
 803                 cond_resched();
 804
 805                 if (root->fs_info->closing || ret != -EAGAIN)
 806                         break;
 807         }
 808         root->defrag_running = 0;
 809         return ret;
 810 }
 811
 812 #if 0
 813 /*
 814  * when dropping snapshots, we generate a ton of delayed refs, and it makes
 815  * sense not to join the transaction while it is trying to flush the current
 816  * queue of delayed refs out.
 817  *
 818  * This is used by the drop snapshot code only
 819  */
 820 static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
 821 {
 822         DEFINE_WAIT(wait);
 823
 824         mutex_lock(&info->trans_mutex);
 825         while (info->running_transaction &&
 826                info->running_transaction->delayed_refs.flushing) {
 827                 prepare_to_wait(&info->transaction_wait, &wait,
 828                                 TASK_UNINTERRUPTIBLE);
 829                 mutex_unlock(&info->trans_mutex);
 830
 831                 schedule();
 832
 833                 mutex_lock(&info->trans_mutex);
 834                 finish_wait(&info->transaction_wait, &wait);
 835         }
 836         mutex_unlock(&info->trans_mutex);
 837         return 0;
 838 }
 839
 840 /*
 841  * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
 842  * all of them
 843  */
 844 int btrfs_drop_dead_root(struct btrfs_root *root)
 845 {
 846         struct btrfs_trans_handle *trans;
 847         struct btrfs_root *tree_root = root->fs_info->tree_root;
 848         unsigned long nr;
 849         int ret;
 850
 851         while (1) {
 852                 /*
 853                  * we don't want to jump in and create a bunch of
 854                  * delayed refs if the transaction is starting to close
 855                  */
 856                 wait_transaction_pre_flush(tree_root->fs_info);
 857                 trans = btrfs_start_transaction(tree_root, 1);
 858
 859                 /*
 860                  * we've joined a transaction, make sure it isn't
 861                  * closing right now
 862                  */
 863                 if (trans->transaction->delayed_refs.flushing) {
 864                         btrfs_end_transaction(trans, tree_root);
 865                         continue;
 866                 }
 867
 868                 ret = btrfs_drop_snapshot(trans, root);
 869                 if (ret != -EAGAIN)
 870                         break;
 871
 872                 ret = btrfs_update_root(trans, tree_root,
 873                                         &root->root_key,
 874                                         &root->root_item);
 875                 if (ret)
 876                         break;
 877
 878                 nr = trans->blocks_used;
 879                 ret = btrfs_end_transaction(trans, tree_root);
 880                 BUG_ON(ret);
 881
 882                 btrfs_btree_balance_dirty(tree_root, nr);
 883                 cond_resched();
 884         }
 885         BUG_ON(ret);
 886
 887         ret = btrfs_del_root(trans, tree_root, &root->root_key);
 888         BUG_ON(ret);
 889
 890         nr = trans->blocks_used;
 891         ret = btrfs_end_transaction(trans, tree_root);
 892         BUG_ON(ret);
 893
 894         free_extent_buffer(root->node);
 895         free_extent_buffer(root->commit_root);
 896         kfree(root);
 897
 898         btrfs_btree_balance_dirty(tree_root, nr);
 899         return ret;
 900 }
 901 #endif
 902
 903 /*
 904  * new snapshots need to be created at a very specific time in the
 905  * transaction commit.  This does the actual creation
 906  */
 907 static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 908                                    struct btrfs_fs_info *fs_info,
 909                                    struct btrfs_pending_snapshot *pending)
 910 {
 911         struct btrfs_key key;
 912         struct btrfs_root_item *new_root_item;
 913         struct btrfs_root *tree_root = fs_info->tree_root;
 914         struct btrfs_root *root = pending->root;
 915         struct btrfs_root *parent_root;
 916         struct inode *parent_inode;
 917         struct dentry *parent;
 918         struct dentry *dentry;
 919         struct extent_buffer *tmp;
 920         struct extent_buffer *old;
 921         int ret;
 922         u64 to_reserve = 0;
 923         u64 index = 0;
 924         u64 objectid;
 925         u64 root_flags;
 926
 927         new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
 928         if (!new_root_item) {
 929                 pending->error = -ENOMEM;
 930                 goto fail;
 931         }
 932
 933         ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid);
 934         if (ret) {
 935                 pending->error = ret;
 936                 goto fail;
 937         }
 938
 939         btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
 940         btrfs_orphan_pre_snapshot(trans, pending, &to_reserve);
 941
 942         if (to_reserve > 0) {
 943                 ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv,
 944                                           to_reserve);
 945                 if (ret) {
 946                         pending->error = ret;
 947                         goto fail;
 948                 }
 949         }
 950
 951         key.objectid = objectid;
 952         key.offset = (u64)-1;
 953         key.type = BTRFS_ROOT_ITEM_KEY;
 954
 955         trans->block_rsv = &pending->block_rsv;
 956
 957         dentry = pending->dentry;
 958         parent = dget_parent(dentry);
 959         parent_inode = parent->d_inode;
 960         parent_root = BTRFS_I(parent_inode)->root;
 961         record_root_in_trans(trans, parent_root);
 962
 963         /*
 964          * insert the directory item
 965          */
 966         ret = btrfs_set_inode_index(parent_inode, &index);
 967         BUG_ON(ret);
 968         ret = btrfs_insert_dir_item(trans, parent_root,
 969                                 dentry->d_name.name, dentry->d_name.len,
 970                                 parent_inode->i_ino, &key,
 971                                 BTRFS_FT_DIR, index);
 972         BUG_ON(ret);
 973
 974         btrfs_i_size_write(parent_inode, parent_inode->i_size +
 975                                          dentry->d_name.len * 2);
 976         ret = btrfs_update_inode(trans, parent_root, parent_inode);
 977         BUG_ON(ret);
 978
 979         record_root_in_trans(trans, root);
 980         btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
 981         memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
 982         btrfs_check_and_init_root_item(new_root_item);
 983
 984         root_flags = btrfs_root_flags(new_root_item);
 985         if (pending->readonly)
 986                 root_flags |= BTRFS_ROOT_SUBVOL_RDONLY;
 987         else
 988                 root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY;
 989         btrfs_set_root_flags(new_root_item, root_flags);
 990
 991         old = btrfs_lock_root_node(root);
 992         btrfs_cow_block(trans, root, old, NULL, 0, &old);
 993         btrfs_set_lock_blocking(old);
 994
 995         btrfs_copy_root(trans, root, old, &tmp, objectid);
 996         btrfs_tree_unlock(old);
 997         free_extent_buffer(old);
 998
 999         btrfs_set_root_node(new_root_item, tmp);
1000         /* record when the snapshot was created in key.offset */
1001         key.offset = trans->transid;
1002         ret = btrfs_insert_root(trans, tree_root, &key, new_root_item);
1003         btrfs_tree_unlock(tmp);
1004         free_extent_buffer(tmp);
1005         BUG_ON(ret);
1006
1007         /*
1008          * insert root back/forward references
1009          */
1010         ret = btrfs_add_root_ref(trans, tree_root, objectid,
1011                                  parent_root->root_key.objectid,
1012                                  parent_inode->i_ino, index,
1013                                  dentry->d_name.name, dentry->d_name.len);
1014         BUG_ON(ret);
1015         dput(parent);
1016
1017         key.offset = (u64)-1;
1018         pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
1019         BUG_ON(IS_ERR(pending->snap));
1020
1021         btrfs_reloc_post_snapshot(trans, pending);
1022         btrfs_orphan_post_snapshot(trans, pending);
1023 fail:
1024         kfree(new_root_item);
1025         btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1);
1026         return 0;
1027 }
1028
1029 /*
1030  * create all the snapshots we've scheduled for creation
1031  */
1032 static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
1033                                              struct btrfs_fs_info *fs_info)
1034 {
1035         struct btrfs_pending_snapshot *pending;
1036         struct list_head *head = &trans->transaction->pending_snapshots;
1037         int ret;
1038
1039         list_for_each_entry(pending, head, list) {
1040                 ret = create_pending_snapshot(trans, fs_info, pending);
1041                 BUG_ON(ret);
1042         }
1043         return 0;
1044 }
1045
1046 static void update_super_roots(struct btrfs_root *root)
1047 {
1048         struct btrfs_root_item *root_item;
1049         struct btrfs_super_block *super;
1050
1051         super = &root->fs_info->super_copy;
1052
1053         root_item = &root->fs_info->chunk_root->root_item;
1054         super->chunk_root = root_item->bytenr;
1055         super->chunk_root_generation = root_item->generation;
1056         super->chunk_root_level = root_item->level;
1057
1058         root_item = &root->fs_info->tree_root->root_item;
1059         super->root = root_item->bytenr;
1060         super->generation = root_item->generation;
1061         super->root_level = root_item->level;
1062         if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE))
1063                 super->cache_generation = root_item->generation;
1064 }
1065
1066 int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
1067 {
1068         int ret = 0;
1069         spin_lock(&info->new_trans_lock);
1070         if (info->running_transaction)
1071                 ret = info->running_transaction->in_commit;
1072         spin_unlock(&info->new_trans_lock);
1073         return ret;
1074 }
1075
1076 int btrfs_transaction_blocked(struct btrfs_fs_info *info)
1077 {
1078         int ret = 0;
1079         spin_lock(&info->new_trans_lock);
1080         if (info->running_transaction)
1081                 ret = info->running_transaction->blocked;
1082         spin_unlock(&info->new_trans_lock);
1083         return ret;
1084 }
1085
1086 /*
1087  * wait for the current transaction commit to start and block subsequent
1088  * transaction joins
1089  */
1090 static void wait_current_trans_commit_start(struct btrfs_root *root,
1091                                             struct btrfs_transaction *trans)
1092 {
1093         DEFINE_WAIT(wait);
1094
1095         if (trans->in_commit)
1096                 return;
1097
1098         while (1) {
1099                 prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
1100                                 TASK_UNINTERRUPTIBLE);
1101                 if (trans->in_commit) {
1102                         finish_wait(&root->fs_info->transaction_blocked_wait,
1103                                     &wait);
1104                         break;
1105                 }
1106                 mutex_unlock(&root->fs_info->trans_mutex);
1107                 schedule();
1108                 mutex_lock(&root->fs_info->trans_mutex);
1109                 finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
1110         }
1111 }
1112
1113 /*
1114  * wait for the current transaction to start and then become unblocked.
1115  * caller holds ref.
1116  */
1117 static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
1118                                          struct btrfs_transaction *trans)
1119 {
1120         DEFINE_WAIT(wait);
1121
1122         if (trans->commit_done || (trans->in_commit && !trans->blocked))
1123                 return;
1124
1125         while (1) {
1126                 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
1127                                 TASK_UNINTERRUPTIBLE);
1128                 if (trans->commit_done ||
1129                     (trans->in_commit && !trans->blocked)) {
1130                         finish_wait(&root->fs_info->transaction_wait,
1131                                     &wait);
1132                         break;
1133                 }
1134                 mutex_unlock(&root->fs_info->trans_mutex);
1135                 schedule();
1136                 mutex_lock(&root->fs_info->trans_mutex);
1137                 finish_wait(&root->fs_info->transaction_wait,
1138                             &wait);
1139         }
1140 }
1141
1142 /*
1143  * commit transactions asynchronously. once btrfs_commit_transaction_async
1144  * returns, any subsequent transaction will not be allowed to join.
1145  */
1146 struct btrfs_async_commit {
1147         struct btrfs_trans_handle *newtrans;
1148         struct btrfs_root *root;
1149         struct delayed_work work;
1150 };
1151
1152 static void do_async_commit(struct work_struct *work)
1153 {
1154         struct btrfs_async_commit *ac =
1155                 container_of(work, struct btrfs_async_commit, work.work);
1156
1157         btrfs_commit_transaction(ac->newtrans, ac->root);
1158         kfree(ac);
1159 }
1160
1161 int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1162                                    struct btrfs_root *root,
1163                                    int wait_for_unblock)
1164 {
1165         struct btrfs_async_commit *ac;
1166         struct btrfs_transaction *cur_trans;
1167
1168         ac = kmalloc(sizeof(*ac), GFP_NOFS);
1169         if (!ac)
1170                 return -ENOMEM;
1171
1172         INIT_DELAYED_WORK(&ac->work, do_async_commit);
1173         ac->root = root;
1174         ac->newtrans = btrfs_join_transaction(root, 0);
1175         if (IS_ERR(ac->newtrans)) {
1176                 int err = PTR_ERR(ac->newtrans);
1177                 kfree(ac);
1178                 return err;
1179         }
1180
1181         /* take transaction reference */
1182         mutex_lock(&root->fs_info->trans_mutex);
1183         cur_trans = trans->transaction;
1184         atomic_inc(&cur_trans->use_count);
1185         mutex_unlock(&root->fs_info->trans_mutex);
1186
1187         btrfs_end_transaction(trans, root);
1188         schedule_delayed_work(&ac->work, 0);
1189
1190         /* wait for transaction to start and unblock */
1191         mutex_lock(&root->fs_info->trans_mutex);
1192         if (wait_for_unblock)
1193                 wait_current_trans_commit_start_and_unblock(root, cur_trans);
1194         else
1195                 wait_current_trans_commit_start(root, cur_trans);
1196         put_transaction(cur_trans);
1197         mutex_unlock(&root->fs_info->trans_mutex);
1198
1199         return 0;
1200 }
1201
1202 /*
1203  * btrfs_transaction state sequence:
1204  *    in_commit = 0, blocked = 0  (initial)
1205  *    in_commit = 1, blocked = 1
1206  *    blocked = 0
1207  *    commit_done = 1
1208  */
1209 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1210                              struct btrfs_root *root)
1211 {
1212         unsigned long joined = 0;
1213         struct btrfs_transaction *cur_trans;
1214         struct btrfs_transaction *prev_trans = NULL;
1215         DEFINE_WAIT(wait);
1216         int ret;
1217         int should_grow = 0;
1218         unsigned long now = get_seconds();
1219         int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT);
1220
1221         btrfs_run_ordered_operations(root, 0);
1222
1223         /* make a pass through all the delayed refs we have so far
1224          * any runnings procs may add more while we are here
1225          */
1226         ret = btrfs_run_delayed_refs(trans, root, 0);
1227         BUG_ON(ret);
1228
1229         btrfs_trans_release_metadata(trans, root);
1230
1231         cur_trans = trans->transaction;
1232         /*
1233          * set the flushing flag so procs in this transaction have to
1234          * start sending their work down.
1235          */
1236         cur_trans->delayed_refs.flushing = 1;
1237
1238         ret = btrfs_run_delayed_refs(trans, root, 0);
1239         BUG_ON(ret);
1240
1241         mutex_lock(&root->fs_info->trans_mutex);
1242         if (cur_trans->in_commit) {
1243                 atomic_inc(&cur_trans->use_count);
1244                 mutex_unlock(&root->fs_info->trans_mutex);
1245                 btrfs_end_transaction(trans, root);
1246
1247                 ret = wait_for_commit(root, cur_trans);
1248                 BUG_ON(ret);
1249
1250                 mutex_lock(&root->fs_info->trans_mutex);
1251                 put_transaction(cur_trans);
1252                 mutex_unlock(&root->fs_info->trans_mutex);
1253
1254                 return 0;
1255         }
1256
1257         trans->transaction->in_commit = 1;
1258         trans->transaction->blocked = 1;
1259         wake_up(&root->fs_info->transaction_blocked_wait);
1260
1261         if (cur_trans->list.prev != &root->fs_info->trans_list) {
1262                 prev_trans = list_entry(cur_trans->list.prev,
1263                                         struct btrfs_transaction, list);
1264                 if (!prev_trans->commit_done) {
1265                         atomic_inc(&prev_trans->use_count);
1266                         mutex_unlock(&root->fs_info->trans_mutex);
1267
1268                         wait_for_commit(root, prev_trans);
1269
1270                         mutex_lock(&root->fs_info->trans_mutex);
1271                         put_transaction(prev_trans);
1272                 }
1273         }
1274
1275         if (now < cur_trans->start_time || now - cur_trans->start_time < 1)
1276                 should_grow = 1;
1277
1278         do {
1279                 int snap_pending = 0;
1280                 joined = cur_trans->num_joined;
1281                 if (!list_empty(&trans->transaction->pending_snapshots))
1282                         snap_pending = 1;
1283
1284                 WARN_ON(cur_trans != trans->transaction);
1285                 mutex_unlock(&root->fs_info->trans_mutex);
1286
1287                 if (flush_on_commit || snap_pending) {
1288                         btrfs_start_delalloc_inodes(root, 1);
1289                         ret = btrfs_wait_ordered_extents(root, 0, 1);
1290                         BUG_ON(ret);
1291                 }
1292
1293                 /*
1294                  * rename don't use btrfs_join_transaction, so, once we
1295                  * set the transaction to blocked above, we aren't going
1296                  * to get any new ordered operations.  We can safely run
1297                  * it here and no for sure that nothing new will be added
1298                  * to the list
1299                  */
1300                 btrfs_run_ordered_operations(root, 1);
1301
1302                 prepare_to_wait(&cur_trans->writer_wait, &wait,
1303                                 TASK_UNINTERRUPTIBLE);
1304
1305                 smp_mb();
1306                 if (atomic_read(&cur_trans->num_writers) > 1)
1307                         schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1308                 else if (should_grow)
1309                         schedule_timeout(1);
1310
1311                 mutex_lock(&root->fs_info->trans_mutex);
1312                 finish_wait(&cur_trans->writer_wait, &wait);
1313         } while (atomic_read(&cur_trans->num_writers) > 1 ||
1314                  (should_grow && cur_trans->num_joined != joined));
1315
1316         ret = create_pending_snapshots(trans, root->fs_info);
1317         BUG_ON(ret);
1318
1319         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1320         BUG_ON(ret);
1321
1322         WARN_ON(cur_trans != trans->transaction);
1323
1324         /* btrfs_commit_tree_roots is responsible for getting the
1325          * various roots consistent with each other.  Every pointer
1326          * in the tree of tree roots has to point to the most up to date
1327          * root for every subvolume and other tree.  So, we have to keep
1328          * the tree logging code from jumping in and changing any
1329          * of the trees.
1330          *
1331          * At this point in the commit, there can't be any tree-log
1332          * writers, but a little lower down we drop the trans mutex
1333          * and let new people in.  By holding the tree_log_mutex
1334          * from now until after the super is written, we avoid races
1335          * with the tree-log code.
1336          */
1337         mutex_lock(&root->fs_info->tree_log_mutex);
1338
1339         ret = commit_fs_roots(trans, root);
1340         BUG_ON(ret);
1341
1342         /* commit_fs_roots gets rid of all the tree log roots, it is now
1343          * safe to free the root of tree log roots
1344          */
1345         btrfs_free_log_root_tree(trans, root->fs_info);
1346
1347         ret = commit_cowonly_roots(trans, root);
1348         BUG_ON(ret);
1349
1350         btrfs_prepare_extent_commit(trans, root);
1351
1352         cur_trans = root->fs_info->running_transaction;
1353         spin_lock(&root->fs_info->new_trans_lock);
1354         root->fs_info->running_transaction = NULL;
1355         spin_unlock(&root->fs_info->new_trans_lock);
1356
1357         btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1358                             root->fs_info->tree_root->node);
1359         switch_commit_root(root->fs_info->tree_root);
1360
1361         btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
1362                             root->fs_info->chunk_root->node);
1363         switch_commit_root(root->fs_info->chunk_root);
1364
1365         update_super_roots(root);
1366
1367         if (!root->fs_info->log_root_recovering) {
1368                 btrfs_set_super_log_root(&root->fs_info->super_copy, 0);
1369                 btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0);
1370         }
1371
1372         memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy,
1373                sizeof(root->fs_info->super_copy));
1374
1375         trans->transaction->blocked = 0;
1376
1377         wake_up(&root->fs_info->transaction_wait);
1378
1379         mutex_unlock(&root->fs_info->trans_mutex);
1380         ret = btrfs_write_and_wait_transaction(trans, root);
1381         BUG_ON(ret);
1382         write_ctree_super(trans, root, 0);
1383
1384         /*
1385          * the super is written, we can safely allow the tree-loggers
1386          * to go about their business
1387          */
1388         mutex_unlock(&root->fs_info->tree_log_mutex);
1389
1390         btrfs_finish_extent_commit(trans, root);
1391
1392         mutex_lock(&root->fs_info->trans_mutex);
1393
1394         cur_trans->commit_done = 1;
1395
1396         root->fs_info->last_trans_committed = cur_trans->transid;
1397
1398         wake_up(&cur_trans->commit_wait);
1399
1400         list_del_init(&cur_trans->list);
1401         put_transaction(cur_trans);
1402         put_transaction(cur_trans);
1403
1404         trace_btrfs_transaction_commit(root);
1405
1406         mutex_unlock(&root->fs_info->trans_mutex);
1407
1408         if (current->journal_info == trans)
1409                 current->journal_info = NULL;
1410
1411         kmem_cache_free(btrfs_trans_handle_cachep, trans);
1412
1413         if (current != root->fs_info->transaction_kthread)
1414                 btrfs_run_delayed_iputs(root);
1415
1416         return ret;
1417 }
1418
1419 /*
1420  * interface function to delete all the snapshots we have scheduled for deletion
1421  */
1422 int btrfs_clean_old_snapshots(struct btrfs_root *root)
1423 {
1424         LIST_HEAD(list);
1425         struct btrfs_fs_info *fs_info = root->fs_info;
1426
1427         mutex_lock(&fs_info->trans_mutex);
1428         list_splice_init(&fs_info->dead_roots, &list);
1429         mutex_unlock(&fs_info->trans_mutex);
1430
1431         while (!list_empty(&list)) {
1432                 root = list_entry(list.next, struct btrfs_root, root_list);
1433                 list_del(&root->root_list);
1434
1435                 if (btrfs_header_backref_rev(root->node) <
1436                     BTRFS_MIXED_BACKREF_REV)
1437                         btrfs_drop_snapshot(root, NULL, 0);
1438                 else
1439                         btrfs_drop_snapshot(root, NULL, 1);
1440         }
1441         return 0;
1442 }