fs/btrfs/scrub.c

   1 /*
   2  * Copyright (C) 2011 STRATO.  All rights reserved.
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public
   6  * License v2 as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful,
   9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11  * General Public License for more details.
  12  *
  13  * You should have received a copy of the GNU General Public
  14  * License along with this program; if not, write to the
  15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16  * Boston, MA 021110-1307, USA.
  17  */
  18
  19 #include <linux/sched.h>
  20 #include <linux/pagemap.h>
  21 #include <linux/writeback.h>
  22 #include <linux/blkdev.h>
  23 #include <linux/rbtree.h>
  24 #include <linux/slab.h>
  25 #include <linux/workqueue.h>
  26 #include "ctree.h"
  27 #include "volumes.h"
  28 #include "disk-io.h"
  29 #include "ordered-data.h"
  30
  31 /*
  32  * This is only the first step towards a full-features scrub. It reads all
  33  * extent and super block and verifies the checksums. In case a bad checksum
  34  * is found or the extent cannot be read, good data will be written back if
  35  * any can be found.
  36  *
  37  * Future enhancements:
  38  *  - To enhance the performance, better read-ahead strategies for the
  39  *    extent-tree can be employed.
  40  *  - In case an unrepairable extent is encountered, track which files are
  41  *    affected and report them
  42  *  - In case of a read error on files with nodatasum, map the file and read
  43  *    the extent to trigger a writeback of the good copy
  44  *  - track and record media errors, throw out bad devices
  45  *  - add a mode to also read unallocated space
  46  *  - make the prefetch cancellable
  47  */
  48
  49 struct scrub_bio;
  50 struct scrub_page;
  51 struct scrub_dev;
  52 static void scrub_bio_end_io(struct bio *bio, int err);
  53 static void scrub_checksum(struct btrfs_work *work);
  54 static int scrub_checksum_data(struct scrub_dev *sdev,
  55                                struct scrub_page *spag, void *buffer);
  56 static int scrub_checksum_tree_block(struct scrub_dev *sdev,
  57                                      struct scrub_page *spag, u64 logical,
  58                                      void *buffer);
  59 static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer);
  60 static int scrub_fixup_check(struct scrub_bio *sbio, int ix);
  61 static void scrub_fixup_end_io(struct bio *bio, int err);
  62 static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
  63                           struct page *page);
  64 static void scrub_fixup(struct scrub_bio *sbio, int ix);
  65
  66 #define SCRUB_PAGES_PER_BIO     16      /* 64k per bio */
  67 #define SCRUB_BIOS_PER_DEV      16      /* 1 MB per device in flight */
  68
  69 struct scrub_page {
  70         u64                     flags;  /* extent flags */
  71         u64                     generation;
  72         u64                     mirror_num;
  73         int                     have_csum;
  74         u8                      csum[BTRFS_CSUM_SIZE];
  75 };
  76
  77 struct scrub_bio {
  78         int                     index;
  79         struct scrub_dev        *sdev;
  80         struct bio              *bio;
  81         int                     err;
  82         u64                     logical;
  83         u64                     physical;
  84         struct scrub_page       spag[SCRUB_PAGES_PER_BIO];
  85         u64                     count;
  86         int                     next_free;
  87         struct btrfs_work       work;
  88 };
  89
  90 struct scrub_dev {
  91         struct scrub_bio        *bios[SCRUB_BIOS_PER_DEV];
  92         struct btrfs_device     *dev;
  93         int                     first_free;
  94         int                     curr;
  95         atomic_t                in_flight;
  96         spinlock_t              list_lock;
  97         wait_queue_head_t       list_wait;
  98         u16                     csum_size;
  99         struct list_head        csum_list;
 100         atomic_t                cancel_req;
 101         int                     readonly;
 102         /*
 103          * statistics
 104          */
 105         struct btrfs_scrub_progress stat;
 106         spinlock_t              stat_lock;
 107 };
 108
 109 static void scrub_free_csums(struct scrub_dev *sdev)
 110 {
 111         while (!list_empty(&sdev->csum_list)) {
 112                 struct btrfs_ordered_sum *sum;
 113                 sum = list_first_entry(&sdev->csum_list,
 114                                        struct btrfs_ordered_sum, list);
 115                 list_del(&sum->list);
 116                 kfree(sum);
 117         }
 118 }
 119
 120 static void scrub_free_bio(struct bio *bio)
 121 {
 122         int i;
 123         struct page *last_page = NULL;
 124
 125         if (!bio)
 126                 return;
 127
 128         for (i = 0; i < bio->bi_vcnt; ++i) {
 129                 if (bio->bi_io_vec[i].bv_page == last_page)
 130                         continue;
 131                 last_page = bio->bi_io_vec[i].bv_page;
 132                 __free_page(last_page);
 133         }
 134         bio_put(bio);
 135 }
 136
 137 static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
 138 {
 139         int i;
 140
 141         if (!sdev)
 142                 return;
 143
 144         for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
 145                 struct scrub_bio *sbio = sdev->bios[i];
 146
 147                 if (!sbio)
 148                         break;
 149
 150                 scrub_free_bio(sbio->bio);
 151                 kfree(sbio);
 152         }
 153
 154         scrub_free_csums(sdev);
 155         kfree(sdev);
 156 }
 157
 158 static noinline_for_stack
 159 struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
 160 {
 161         struct scrub_dev *sdev;
 162         int             i;
 163         struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
 164
 165         sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
 166         if (!sdev)
 167                 goto nomem;
 168         sdev->dev = dev;
 169         for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
 170                 struct scrub_bio *sbio;
 171
 172                 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
 173                 if (!sbio)
 174                         goto nomem;
 175                 sdev->bios[i] = sbio;
 176
 177                 sbio->index = i;
 178                 sbio->sdev = sdev;
 179                 sbio->count = 0;
 180                 sbio->work.func = scrub_checksum;
 181
 182                 if (i != SCRUB_BIOS_PER_DEV-1)
 183                         sdev->bios[i]->next_free = i + 1;
 184                  else
 185                         sdev->bios[i]->next_free = -1;
 186         }
 187         sdev->first_free = 0;
 188         sdev->curr = -1;
 189         atomic_set(&sdev->in_flight, 0);
 190         atomic_set(&sdev->cancel_req, 0);
 191         sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy);
 192         INIT_LIST_HEAD(&sdev->csum_list);
 193
 194         spin_lock_init(&sdev->list_lock);
 195         spin_lock_init(&sdev->stat_lock);
 196         init_waitqueue_head(&sdev->list_wait);
 197         return sdev;
 198
 199 nomem:
 200         scrub_free_dev(sdev);
 201         return ERR_PTR(-ENOMEM);
 202 }
 203
 204 /*
 205  * scrub_recheck_error gets called when either verification of the page
 206  * failed or the bio failed to read, e.g. with EIO. In the latter case,
 207  * recheck_error gets called for every page in the bio, even though only
 208  * one may be bad
 209  */
 210 static void scrub_recheck_error(struct scrub_bio *sbio, int ix)
 211 {
 212         if (sbio->err) {
 213                 if (scrub_fixup_io(READ, sbio->sdev->dev->bdev,
 214                                    (sbio->physical + ix * PAGE_SIZE) >> 9,
 215                                    sbio->bio->bi_io_vec[ix].bv_page) == 0) {
 216                         if (scrub_fixup_check(sbio, ix) == 0)
 217                                 return;
 218                 }
 219         }
 220
 221         scrub_fixup(sbio, ix);
 222 }
 223
 224 static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
 225 {
 226         int ret = 1;
 227         struct page *page;
 228         void *buffer;
 229         u64 flags = sbio->spag[ix].flags;
 230
 231         page = sbio->bio->bi_io_vec[ix].bv_page;
 232         buffer = kmap_atomic(page, KM_USER0);
 233         if (flags & BTRFS_EXTENT_FLAG_DATA) {
 234                 ret = scrub_checksum_data(sbio->sdev,
 235                                           sbio->spag + ix, buffer);
 236         } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
 237                 ret = scrub_checksum_tree_block(sbio->sdev,
 238                                                 sbio->spag + ix,
 239                                                 sbio->logical + ix * PAGE_SIZE,
 240                                                 buffer);
 241         } else {
 242                 WARN_ON(1);
 243         }
 244         kunmap_atomic(buffer, KM_USER0);
 245
 246         return ret;
 247 }
 248
 249 static void scrub_fixup_end_io(struct bio *bio, int err)
 250 {
 251         complete((struct completion *)bio->bi_private);
 252 }
 253
 254 static void scrub_fixup(struct scrub_bio *sbio, int ix)
 255 {
 256         struct scrub_dev *sdev = sbio->sdev;
 257         struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
 258         struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
 259         struct btrfs_multi_bio *multi = NULL;
 260         u64 logical = sbio->logical + ix * PAGE_SIZE;
 261         u64 length;
 262         int i;
 263         int ret;
 264         DECLARE_COMPLETION_ONSTACK(complete);
 265
 266         if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) &&
 267             (sbio->spag[ix].have_csum == 0)) {
 268                 /*
 269                  * nodatasum, don't try to fix anything
 270                  * FIXME: we can do better, open the inode and trigger a
 271                  * writeback
 272                  */
 273                 goto uncorrectable;
 274         }
 275
 276         length = PAGE_SIZE;
 277         ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length,
 278                               &multi, 0);
 279         if (ret || !multi || length < PAGE_SIZE) {
 280                 printk(KERN_ERR
 281                        "scrub_fixup: btrfs_map_block failed us for %llu\n",
 282                        (unsigned long long)logical);
 283                 WARN_ON(1);
 284                 return;
 285         }
 286
 287         if (multi->num_stripes == 1)
 288                 /* there aren't any replicas */
 289                 goto uncorrectable;
 290
 291         /*
 292          * first find a good copy
 293          */
 294         for (i = 0; i < multi->num_stripes; ++i) {
 295                 if (i == sbio->spag[ix].mirror_num)
 296                         continue;
 297
 298                 if (scrub_fixup_io(READ, multi->stripes[i].dev->bdev,
 299                                    multi->stripes[i].physical >> 9,
 300                                    sbio->bio->bi_io_vec[ix].bv_page)) {
 301                         /* I/O-error, this is not a good copy */
 302                         continue;
 303                 }
 304
 305                 if (scrub_fixup_check(sbio, ix) == 0)
 306                         break;
 307         }
 308         if (i == multi->num_stripes)
 309                 goto uncorrectable;
 310
 311         if (!sdev->readonly) {
 312                 /*
 313                  * bi_io_vec[ix].bv_page now contains good data, write it back
 314                  */
 315                 if (scrub_fixup_io(WRITE, sdev->dev->bdev,
 316                                    (sbio->physical + ix * PAGE_SIZE) >> 9,
 317                                    sbio->bio->bi_io_vec[ix].bv_page)) {
 318                         /* I/O-error, writeback failed, give up */
 319                         goto uncorrectable;
 320                 }
 321         }
 322
 323         kfree(multi);
 324         spin_lock(&sdev->stat_lock);
 325         ++sdev->stat.corrected_errors;
 326         spin_unlock(&sdev->stat_lock);
 327
 328         if (printk_ratelimit())
 329                 printk(KERN_ERR "btrfs: fixed up at %llu\n",
 330                        (unsigned long long)logical);
 331         return;
 332
 333 uncorrectable:
 334         kfree(multi);
 335         spin_lock(&sdev->stat_lock);
 336         ++sdev->stat.uncorrectable_errors;
 337         spin_unlock(&sdev->stat_lock);
 338
 339         if (printk_ratelimit())
 340                 printk(KERN_ERR "btrfs: unable to fixup at %llu\n",
 341                          (unsigned long long)logical);
 342 }
 343
 344 static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
 345                          struct page *page)
 346 {
 347         struct bio *bio = NULL;
 348         int ret;
 349         DECLARE_COMPLETION_ONSTACK(complete);
 350
 351         bio = bio_alloc(GFP_NOFS, 1);
 352         bio->bi_bdev = bdev;
 353         bio->bi_sector = sector;
 354         bio_add_page(bio, page, PAGE_SIZE, 0);
 355         bio->bi_end_io = scrub_fixup_end_io;
 356         bio->bi_private = &complete;
 357         submit_bio(rw, bio);
 358
 359         /* this will also unplug the queue */
 360         wait_for_completion(&complete);
 361
 362         ret = !test_bit(BIO_UPTODATE, &bio->bi_flags);
 363         bio_put(bio);
 364         return ret;
 365 }
 366
 367 static void scrub_bio_end_io(struct bio *bio, int err)
 368 {
 369         struct scrub_bio *sbio = bio->bi_private;
 370         struct scrub_dev *sdev = sbio->sdev;
 371         struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
 372
 373         sbio->err = err;
 374         sbio->bio = bio;
 375
 376         btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
 377 }
 378
 379 static void scrub_checksum(struct btrfs_work *work)
 380 {
 381         struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
 382         struct scrub_dev *sdev = sbio->sdev;
 383         struct page *page;
 384         void *buffer;
 385         int i;
 386         u64 flags;
 387         u64 logical;
 388         int ret;
 389
 390         if (sbio->err) {
 391                 for (i = 0; i < sbio->count; ++i)
 392                         scrub_recheck_error(sbio, i);
 393
 394                 sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
 395                 sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
 396                 sbio->bio->bi_phys_segments = 0;
 397                 sbio->bio->bi_idx = 0;
 398
 399                 for (i = 0; i < sbio->count; i++) {
 400                         struct bio_vec *bi;
 401                         bi = &sbio->bio->bi_io_vec[i];
 402                         bi->bv_offset = 0;
 403                         bi->bv_len = PAGE_SIZE;
 404                 }
 405
 406                 spin_lock(&sdev->stat_lock);
 407                 ++sdev->stat.read_errors;
 408                 spin_unlock(&sdev->stat_lock);
 409                 goto out;
 410         }
 411         for (i = 0; i < sbio->count; ++i) {
 412                 page = sbio->bio->bi_io_vec[i].bv_page;
 413                 buffer = kmap_atomic(page, KM_USER0);
 414                 flags = sbio->spag[i].flags;
 415                 logical = sbio->logical + i * PAGE_SIZE;
 416                 ret = 0;
 417                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
 418                         ret = scrub_checksum_data(sdev, sbio->spag + i, buffer);
 419                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
 420                         ret = scrub_checksum_tree_block(sdev, sbio->spag + i,
 421                                                         logical, buffer);
 422                 } else if (flags & BTRFS_EXTENT_FLAG_SUPER) {
 423                         BUG_ON(i);
 424                         (void)scrub_checksum_super(sbio, buffer);
 425                 } else {
 426                         WARN_ON(1);
 427                 }
 428                 kunmap_atomic(buffer, KM_USER0);
 429                 if (ret)
 430                         scrub_recheck_error(sbio, i);
 431         }
 432
 433 out:
 434         scrub_free_bio(sbio->bio);
 435         sbio->bio = NULL;
 436         spin_lock(&sdev->list_lock);
 437         sbio->next_free = sdev->first_free;
 438         sdev->first_free = sbio->index;
 439         spin_unlock(&sdev->list_lock);
 440         atomic_dec(&sdev->in_flight);
 441         wake_up(&sdev->list_wait);
 442 }
 443
 444 static int scrub_checksum_data(struct scrub_dev *sdev,
 445                                struct scrub_page *spag, void *buffer)
 446 {
 447         u8 csum[BTRFS_CSUM_SIZE];
 448         u32 crc = ~(u32)0;
 449         int fail = 0;
 450         struct btrfs_root *root = sdev->dev->dev_root;
 451
 452         if (!spag->have_csum)
 453                 return 0;
 454
 455         crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE);
 456         btrfs_csum_final(crc, csum);
 457         if (memcmp(csum, spag->csum, sdev->csum_size))
 458                 fail = 1;
 459
 460         spin_lock(&sdev->stat_lock);
 461         ++sdev->stat.data_extents_scrubbed;
 462         sdev->stat.data_bytes_scrubbed += PAGE_SIZE;
 463         if (fail)
 464                 ++sdev->stat.csum_errors;
 465         spin_unlock(&sdev->stat_lock);
 466
 467         return fail;
 468 }
 469
 470 static int scrub_checksum_tree_block(struct scrub_dev *sdev,
 471                                      struct scrub_page *spag, u64 logical,
 472                                      void *buffer)
 473 {
 474         struct btrfs_header *h;
 475         struct btrfs_root *root = sdev->dev->dev_root;
 476         struct btrfs_fs_info *fs_info = root->fs_info;
 477         u8 csum[BTRFS_CSUM_SIZE];
 478         u32 crc = ~(u32)0;
 479         int fail = 0;
 480         int crc_fail = 0;
 481
 482         /*
 483          * we don't use the getter functions here, as we
 484          * a) don't have an extent buffer and
 485          * b) the page is already kmapped
 486          */
 487         h = (struct btrfs_header *)buffer;
 488
 489         if (logical != le64_to_cpu(h->bytenr))
 490                 ++fail;
 491
 492         if (spag->generation != le64_to_cpu(h->generation))
 493                 ++fail;
 494
 495         if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
 496                 ++fail;
 497
 498         if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
 499                    BTRFS_UUID_SIZE))
 500                 ++fail;
 501
 502         crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
 503                               PAGE_SIZE - BTRFS_CSUM_SIZE);
 504         btrfs_csum_final(crc, csum);
 505         if (memcmp(csum, h->csum, sdev->csum_size))
 506                 ++crc_fail;
 507
 508         spin_lock(&sdev->stat_lock);
 509         ++sdev->stat.tree_extents_scrubbed;
 510         sdev->stat.tree_bytes_scrubbed += PAGE_SIZE;
 511         if (crc_fail)
 512                 ++sdev->stat.csum_errors;
 513         if (fail)
 514                 ++sdev->stat.verify_errors;
 515         spin_unlock(&sdev->stat_lock);
 516
 517         return fail || crc_fail;
 518 }
 519
 520 static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
 521 {
 522         struct btrfs_super_block *s;
 523         u64 logical;
 524         struct scrub_dev *sdev = sbio->sdev;
 525         struct btrfs_root *root = sdev->dev->dev_root;
 526         struct btrfs_fs_info *fs_info = root->fs_info;
 527         u8 csum[BTRFS_CSUM_SIZE];
 528         u32 crc = ~(u32)0;
 529         int fail = 0;
 530
 531         s = (struct btrfs_super_block *)buffer;
 532         logical = sbio->logical;
 533
 534         if (logical != le64_to_cpu(s->bytenr))
 535                 ++fail;
 536
 537         if (sbio->spag[0].generation != le64_to_cpu(s->generation))
 538                 ++fail;
 539
 540         if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
 541                 ++fail;
 542
 543         crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
 544                               PAGE_SIZE - BTRFS_CSUM_SIZE);
 545         btrfs_csum_final(crc, csum);
 546         if (memcmp(csum, s->csum, sbio->sdev->csum_size))
 547                 ++fail;
 548
 549         if (fail) {
 550                 /*
 551                  * if we find an error in a super block, we just report it.
 552                  * They will get written with the next transaction commit
 553                  * anyway
 554                  */
 555                 spin_lock(&sdev->stat_lock);
 556                 ++sdev->stat.super_errors;
 557                 spin_unlock(&sdev->stat_lock);
 558         }
 559
 560         return fail;
 561 }
 562
 563 static int scrub_submit(struct scrub_dev *sdev)
 564 {
 565         struct scrub_bio *sbio;
 566         struct bio *bio;
 567         int i;
 568
 569         if (sdev->curr == -1)
 570                 return 0;
 571
 572         sbio = sdev->bios[sdev->curr];
 573
 574         bio = bio_alloc(GFP_NOFS, sbio->count);
 575         if (!bio)
 576                 goto nomem;
 577
 578         bio->bi_private = sbio;
 579         bio->bi_end_io = scrub_bio_end_io;
 580         bio->bi_bdev = sdev->dev->bdev;
 581         bio->bi_sector = sbio->physical >> 9;
 582
 583         for (i = 0; i < sbio->count; ++i) {
 584                 struct page *page;
 585                 int ret;
 586
 587                 page = alloc_page(GFP_NOFS);
 588                 if (!page)
 589                         goto nomem;
 590
 591                 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
 592                 if (!ret) {
 593                         __free_page(page);
 594                         goto nomem;
 595                 }
 596         }
 597
 598         sbio->err = 0;
 599         sdev->curr = -1;
 600         atomic_inc(&sdev->in_flight);
 601
 602         submit_bio(READ, bio);
 603
 604         return 0;
 605
 606 nomem:
 607         scrub_free_bio(bio);
 608
 609         return -ENOMEM;
 610 }
 611
 612 static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
 613                       u64 physical, u64 flags, u64 gen, u64 mirror_num,
 614                       u8 *csum, int force)
 615 {
 616         struct scrub_bio *sbio;
 617
 618 again:
 619         /*
 620          * grab a fresh bio or wait for one to become available
 621          */
 622         while (sdev->curr == -1) {
 623                 spin_lock(&sdev->list_lock);
 624                 sdev->curr = sdev->first_free;
 625                 if (sdev->curr != -1) {
 626                         sdev->first_free = sdev->bios[sdev->curr]->next_free;
 627                         sdev->bios[sdev->curr]->next_free = -1;
 628                         sdev->bios[sdev->curr]->count = 0;
 629                         spin_unlock(&sdev->list_lock);
 630                 } else {
 631                         spin_unlock(&sdev->list_lock);
 632                         wait_event(sdev->list_wait, sdev->first_free != -1);
 633                 }
 634         }
 635         sbio = sdev->bios[sdev->curr];
 636         if (sbio->count == 0) {
 637                 sbio->physical = physical;
 638                 sbio->logical = logical;
 639         } else if (sbio->physical + sbio->count * PAGE_SIZE != physical ||
 640                    sbio->logical + sbio->count * PAGE_SIZE != logical) {
 641                 int ret;
 642
 643                 ret = scrub_submit(sdev);
 644                 if (ret)
 645                         return ret;
 646                 goto again;
 647         }
 648         sbio->spag[sbio->count].flags = flags;
 649         sbio->spag[sbio->count].generation = gen;
 650         sbio->spag[sbio->count].have_csum = 0;
 651         sbio->spag[sbio->count].mirror_num = mirror_num;
 652         if (csum) {
 653                 sbio->spag[sbio->count].have_csum = 1;
 654                 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
 655         }
 656         ++sbio->count;
 657         if (sbio->count == SCRUB_PAGES_PER_BIO || force) {
 658                 int ret;
 659
 660                 ret = scrub_submit(sdev);
 661                 if (ret)
 662                         return ret;
 663         }
 664
 665         return 0;
 666 }
 667
 668 static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
 669                            u8 *csum)
 670 {
 671         struct btrfs_ordered_sum *sum = NULL;
 672         int ret = 0;
 673         unsigned long i;
 674         unsigned long num_sectors;
 675         u32 sectorsize = sdev->dev->dev_root->sectorsize;
 676
 677         while (!list_empty(&sdev->csum_list)) {
 678                 sum = list_first_entry(&sdev->csum_list,
 679                                        struct btrfs_ordered_sum, list);
 680                 if (sum->bytenr > logical)
 681                         return 0;
 682                 if (sum->bytenr + sum->len > logical)
 683                         break;
 684
 685                 ++sdev->stat.csum_discards;
 686                 list_del(&sum->list);
 687                 kfree(sum);
 688                 sum = NULL;
 689         }
 690         if (!sum)
 691                 return 0;
 692
 693         num_sectors = sum->len / sectorsize;
 694         for (i = 0; i < num_sectors; ++i) {
 695                 if (sum->sums[i].bytenr == logical) {
 696                         memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
 697                         ret = 1;
 698                         break;
 699                 }
 700         }
 701         if (ret && i == num_sectors - 1) {
 702                 list_del(&sum->list);
 703                 kfree(sum);
 704         }
 705         return ret;
 706 }
 707
 708 /* scrub extent tries to collect up to 64 kB for each bio */
 709 static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
 710                         u64 physical, u64 flags, u64 gen, u64 mirror_num)
 711 {
 712         int ret;
 713         u8 csum[BTRFS_CSUM_SIZE];
 714
 715         while (len) {
 716                 u64 l = min_t(u64, len, PAGE_SIZE);
 717                 int have_csum = 0;
 718
 719                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
 720                         /* push csums to sbio */
 721                         have_csum = scrub_find_csum(sdev, logical, l, csum);
 722                         if (have_csum == 0)
 723                                 ++sdev->stat.no_csum;
 724                 }
 725                 ret = scrub_page(sdev, logical, l, physical, flags, gen,
 726                                  mirror_num, have_csum ? csum : NULL, 0);
 727                 if (ret)
 728                         return ret;
 729                 len -= l;
 730                 logical += l;
 731                 physical += l;
 732         }
 733         return 0;
 734 }
 735
 736 static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
 737         struct map_lookup *map, int num, u64 base, u64 length)
 738 {
 739         struct btrfs_path *path;
 740         struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
 741         struct btrfs_root *root = fs_info->extent_root;
 742         struct btrfs_root *csum_root = fs_info->csum_root;
 743         struct btrfs_extent_item *extent;
 744         struct blk_plug plug;
 745         u64 flags;
 746         int ret;
 747         int slot;
 748         int i;
 749         u64 nstripes;
 750         int start_stripe;
 751         struct extent_buffer *l;
 752         struct btrfs_key key;
 753         u64 physical;
 754         u64 logical;
 755         u64 generation;
 756         u64 mirror_num;
 757
 758         u64 increment = map->stripe_len;
 759         u64 offset;
 760
 761         nstripes = length;
 762         offset = 0;
 763         do_div(nstripes, map->stripe_len);
 764         if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
 765                 offset = map->stripe_len * num;
 766                 increment = map->stripe_len * map->num_stripes;
 767                 mirror_num = 0;
 768         } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
 769                 int factor = map->num_stripes / map->sub_stripes;
 770                 offset = map->stripe_len * (num / map->sub_stripes);
 771                 increment = map->stripe_len * factor;
 772                 mirror_num = num % map->sub_stripes;
 773         } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
 774                 increment = map->stripe_len;
 775                 mirror_num = num % map->num_stripes;
 776         } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
 777                 increment = map->stripe_len;
 778                 mirror_num = num % map->num_stripes;
 779         } else {
 780                 increment = map->stripe_len;
 781                 mirror_num = 0;
 782         }
 783
 784         path = btrfs_alloc_path();
 785         if (!path)
 786                 return -ENOMEM;
 787
 788         path->reada = 2;
 789         path->search_commit_root = 1;
 790         path->skip_locking = 1;
 791
 792         /*
 793          * find all extents for each stripe and just read them to get
 794          * them into the page cache
 795          * FIXME: we can do better. build a more intelligent prefetching
 796          */
 797         logical = base + offset;
 798         physical = map->stripes[num].physical;
 799         ret = 0;
 800         for (i = 0; i < nstripes; ++i) {
 801                 key.objectid = logical;
 802                 key.type = BTRFS_EXTENT_ITEM_KEY;
 803                 key.offset = (u64)0;
 804
 805                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 806                 if (ret < 0)
 807                         goto out_noplug;
 808
 809                 /*
 810                  * we might miss half an extent here, but that doesn't matter,
 811                  * as it's only the prefetch
 812                  */
 813                 while (1) {
 814                         l = path->nodes[0];
 815                         slot = path->slots[0];
 816                         if (slot >= btrfs_header_nritems(l)) {
 817                                 ret = btrfs_next_leaf(root, path);
 818                                 if (ret == 0)
 819                                         continue;
 820                                 if (ret < 0)
 821                                         goto out_noplug;
 822
 823                                 break;
 824                         }
 825                         btrfs_item_key_to_cpu(l, &key, slot);
 826
 827                         if (key.objectid >= logical + map->stripe_len)
 828                                 break;
 829
 830                         path->slots[0]++;
 831                 }
 832                 btrfs_release_path(path);
 833                 logical += increment;
 834                 physical += map->stripe_len;
 835                 cond_resched();
 836         }
 837
 838         /*
 839          * collect all data csums for the stripe to avoid seeking during
 840          * the scrub. This might currently (crc32) end up to be about 1MB
 841          */
 842         start_stripe = 0;
 843         blk_start_plug(&plug);
 844 again:
 845         logical = base + offset + start_stripe * increment;
 846         for (i = start_stripe; i < nstripes; ++i) {
 847                 ret = btrfs_lookup_csums_range(csum_root, logical,
 848                                                logical + map->stripe_len - 1,
 849                                                &sdev->csum_list, 1);
 850                 if (ret)
 851                         goto out;
 852
 853                 logical += increment;
 854                 cond_resched();
 855         }
 856         /*
 857          * now find all extents for each stripe and scrub them
 858          */
 859         logical = base + offset + start_stripe * increment;
 860         physical = map->stripes[num].physical + start_stripe * map->stripe_len;
 861         ret = 0;
 862         for (i = start_stripe; i < nstripes; ++i) {
 863                 /*
 864                  * canceled?
 865                  */
 866                 if (atomic_read(&fs_info->scrub_cancel_req) ||
 867                     atomic_read(&sdev->cancel_req)) {
 868                         ret = -ECANCELED;
 869                         goto out;
 870                 }
 871                 /*
 872                  * check to see if we have to pause
 873                  */
 874                 if (atomic_read(&fs_info->scrub_pause_req)) {
 875                         /* push queued extents */
 876                         scrub_submit(sdev);
 877                         wait_event(sdev->list_wait,
 878                                    atomic_read(&sdev->in_flight) == 0);
 879                         atomic_inc(&fs_info->scrubs_paused);
 880                         wake_up(&fs_info->scrub_pause_wait);
 881                         mutex_lock(&fs_info->scrub_lock);
 882                         while (atomic_read(&fs_info->scrub_pause_req)) {
 883                                 mutex_unlock(&fs_info->scrub_lock);
 884                                 wait_event(fs_info->scrub_pause_wait,
 885                                    atomic_read(&fs_info->scrub_pause_req) == 0);
 886                                 mutex_lock(&fs_info->scrub_lock);
 887                         }
 888                         atomic_dec(&fs_info->scrubs_paused);
 889                         mutex_unlock(&fs_info->scrub_lock);
 890                         wake_up(&fs_info->scrub_pause_wait);
 891                         scrub_free_csums(sdev);
 892                         start_stripe = i;
 893                         goto again;
 894                 }
 895
 896                 key.objectid = logical;
 897                 key.type = BTRFS_EXTENT_ITEM_KEY;
 898                 key.offset = (u64)0;
 899
 900                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 901                 if (ret < 0)
 902                         goto out;
 903                 if (ret > 0) {
 904                         ret = btrfs_previous_item(root, path, 0,
 905                                                   BTRFS_EXTENT_ITEM_KEY);
 906                         if (ret < 0)
 907                                 goto out;
 908                         if (ret > 0) {
 909                                 /* there's no smaller item, so stick with the
 910                                  * larger one */
 911                                 btrfs_release_path(path);
 912                                 ret = btrfs_search_slot(NULL, root, &key,
 913                                                         path, 0, 0);
 914                                 if (ret < 0)
 915                                         goto out;
 916                         }
 917                 }
 918
 919                 while (1) {
 920                         l = path->nodes[0];
 921                         slot = path->slots[0];
 922                         if (slot >= btrfs_header_nritems(l)) {
 923                                 ret = btrfs_next_leaf(root, path);
 924                                 if (ret == 0)
 925                                         continue;
 926                                 if (ret < 0)
 927                                         goto out;
 928
 929                                 break;
 930                         }
 931                         btrfs_item_key_to_cpu(l, &key, slot);
 932
 933                         if (key.objectid + key.offset <= logical)
 934                                 goto next;
 935
 936                         if (key.objectid >= logical + map->stripe_len)
 937                                 break;
 938
 939                         if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
 940                                 goto next;
 941
 942                         extent = btrfs_item_ptr(l, slot,
 943                                                 struct btrfs_extent_item);
 944                         flags = btrfs_extent_flags(l, extent);
 945                         generation = btrfs_extent_generation(l, extent);
 946
 947                         if (key.objectid < logical &&
 948                             (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
 949                                 printk(KERN_ERR
 950                                        "btrfs scrub: tree block %llu spanning "
 951                                        "stripes, ignored. logical=%llu\n",
 952                                        (unsigned long long)key.objectid,
 953                                        (unsigned long long)logical);
 954                                 goto next;
 955                         }
 956
 957                         /*
 958                          * trim extent to this stripe
 959                          */
 960                         if (key.objectid < logical) {
 961                                 key.offset -= logical - key.objectid;
 962                                 key.objectid = logical;
 963                         }
 964                         if (key.objectid + key.offset >
 965                             logical + map->stripe_len) {
 966                                 key.offset = logical + map->stripe_len -
 967                                              key.objectid;
 968                         }
 969
 970                         ret = scrub_extent(sdev, key.objectid, key.offset,
 971                                            key.objectid - logical + physical,
 972                                            flags, generation, mirror_num);
 973                         if (ret)
 974                                 goto out;
 975
 976 next:
 977                         path->slots[0]++;
 978                 }
 979                 btrfs_release_path(path);
 980                 logical += increment;
 981                 physical += map->stripe_len;
 982                 spin_lock(&sdev->stat_lock);
 983                 sdev->stat.last_physical = physical;
 984                 spin_unlock(&sdev->stat_lock);
 985         }
 986         /* push queued extents */
 987         scrub_submit(sdev);
 988
 989 out:
 990         blk_finish_plug(&plug);
 991 out_noplug:
 992         btrfs_free_path(path);
 993         return ret < 0 ? ret : 0;
 994 }
 995
 996 static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
 997         u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length)
 998 {
 999         struct btrfs_mapping_tree *map_tree =
1000                 &sdev->dev->dev_root->fs_info->mapping_tree;
1001         struct map_lookup *map;
1002         struct extent_map *em;
1003         int i;
1004         int ret = -EINVAL;
1005
1006         read_lock(&map_tree->map_tree.lock);
1007         em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
1008         read_unlock(&map_tree->map_tree.lock);
1009
1010         if (!em)
1011                 return -EINVAL;
1012
1013         map = (struct map_lookup *)em->bdev;
1014         if (em->start != chunk_offset)
1015                 goto out;
1016
1017         if (em->len < length)
1018                 goto out;
1019
1020         for (i = 0; i < map->num_stripes; ++i) {
1021                 if (map->stripes[i].dev == sdev->dev) {
1022                         ret = scrub_stripe(sdev, map, i, chunk_offset, length);
1023                         if (ret)
1024                                 goto out;
1025                 }
1026         }
1027 out:
1028         free_extent_map(em);
1029
1030         return ret;
1031 }
1032
1033 static noinline_for_stack
1034 int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1035 {
1036         struct btrfs_dev_extent *dev_extent = NULL;
1037         struct btrfs_path *path;
1038         struct btrfs_root *root = sdev->dev->dev_root;
1039         struct btrfs_fs_info *fs_info = root->fs_info;
1040         u64 length;
1041         u64 chunk_tree;
1042         u64 chunk_objectid;
1043         u64 chunk_offset;
1044         int ret;
1045         int slot;
1046         struct extent_buffer *l;
1047         struct btrfs_key key;
1048         struct btrfs_key found_key;
1049         struct btrfs_block_group_cache *cache;
1050
1051         path = btrfs_alloc_path();
1052         if (!path)
1053                 return -ENOMEM;
1054
1055         path->reada = 2;
1056         path->search_commit_root = 1;
1057         path->skip_locking = 1;
1058
1059         key.objectid = sdev->dev->devid;
1060         key.offset = 0ull;
1061         key.type = BTRFS_DEV_EXTENT_KEY;
1062
1063
1064         while (1) {
1065                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1066                 if (ret < 0)
1067                         break;
1068                 if (ret > 0) {
1069                         if (path->slots[0] >=
1070                             btrfs_header_nritems(path->nodes[0])) {
1071                                 ret = btrfs_next_leaf(root, path);
1072                                 if (ret)
1073                                         break;
1074                         }
1075                 }
1076
1077                 l = path->nodes[0];
1078                 slot = path->slots[0];
1079
1080                 btrfs_item_key_to_cpu(l, &found_key, slot);
1081
1082                 if (found_key.objectid != sdev->dev->devid)
1083                         break;
1084
1085                 if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
1086                         break;
1087
1088                 if (found_key.offset >= end)
1089                         break;
1090
1091                 if (found_key.offset < key.offset)
1092                         break;
1093
1094                 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1095                 length = btrfs_dev_extent_length(l, dev_extent);
1096
1097                 if (found_key.offset + length <= start) {
1098                         key.offset = found_key.offset + length;
1099                         btrfs_release_path(path);
1100                         continue;
1101                 }
1102
1103                 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1104                 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
1105                 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
1106
1107                 /*
1108                  * get a reference on the corresponding block group to prevent
1109                  * the chunk from going away while we scrub it
1110                  */
1111                 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
1112                 if (!cache) {
1113                         ret = -ENOENT;
1114                         break;
1115                 }
1116                 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
1117                                   chunk_offset, length);
1118                 btrfs_put_block_group(cache);
1119                 if (ret)
1120                         break;
1121
1122                 key.offset = found_key.offset + length;
1123                 btrfs_release_path(path);
1124         }
1125
1126         btrfs_free_path(path);
1127
1128         /*
1129          * ret can still be 1 from search_slot or next_leaf,
1130          * that's not an error
1131          */
1132         return ret < 0 ? ret : 0;
1133 }
1134
1135 static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
1136 {
1137         int     i;
1138         u64     bytenr;
1139         u64     gen;
1140         int     ret;
1141         struct btrfs_device *device = sdev->dev;
1142         struct btrfs_root *root = device->dev_root;
1143
1144         gen = root->fs_info->last_trans_committed;
1145
1146         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1147                 bytenr = btrfs_sb_offset(i);
1148                 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
1149                         break;
1150
1151                 ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr,
1152                                  BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
1153                 if (ret)
1154                         return ret;
1155         }
1156         wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1157
1158         return 0;
1159 }
1160
1161 /*
1162  * get a reference count on fs_info->scrub_workers. start worker if necessary
1163  */
1164 static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
1165 {
1166         struct btrfs_fs_info *fs_info = root->fs_info;
1167
1168         mutex_lock(&fs_info->scrub_lock);
1169         if (fs_info->scrub_workers_refcnt == 0)
1170                 btrfs_start_workers(&fs_info->scrub_workers, 1);
1171         ++fs_info->scrub_workers_refcnt;
1172         mutex_unlock(&fs_info->scrub_lock);
1173
1174         return 0;
1175 }
1176
1177 static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
1178 {
1179         struct btrfs_fs_info *fs_info = root->fs_info;
1180
1181         mutex_lock(&fs_info->scrub_lock);
1182         if (--fs_info->scrub_workers_refcnt == 0)
1183                 btrfs_stop_workers(&fs_info->scrub_workers);
1184         WARN_ON(fs_info->scrub_workers_refcnt < 0);
1185         mutex_unlock(&fs_info->scrub_lock);
1186 }
1187
1188
1189 int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1190                     struct btrfs_scrub_progress *progress, int readonly)
1191 {
1192         struct scrub_dev *sdev;
1193         struct btrfs_fs_info *fs_info = root->fs_info;
1194         int ret;
1195         struct btrfs_device *dev;
1196
1197         if (btrfs_fs_closing(root->fs_info))
1198                 return -EINVAL;
1199
1200         /*
1201          * check some assumptions
1202          */
1203         if (root->sectorsize != PAGE_SIZE ||
1204             root->sectorsize != root->leafsize ||
1205             root->sectorsize != root->nodesize) {
1206                 printk(KERN_ERR "btrfs_scrub: size assumptions fail\n");
1207                 return -EINVAL;
1208         }
1209
1210         ret = scrub_workers_get(root);
1211         if (ret)
1212                 return ret;
1213
1214         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1215         dev = btrfs_find_device(root, devid, NULL, NULL);
1216         if (!dev || dev->missing) {
1217                 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1218                 scrub_workers_put(root);
1219                 return -ENODEV;
1220         }
1221         mutex_lock(&fs_info->scrub_lock);
1222
1223         if (!dev->in_fs_metadata) {
1224                 mutex_unlock(&fs_info->scrub_lock);
1225                 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1226                 scrub_workers_put(root);
1227                 return -ENODEV;
1228         }
1229
1230         if (dev->scrub_device) {
1231                 mutex_unlock(&fs_info->scrub_lock);
1232                 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1233                 scrub_workers_put(root);
1234                 return -EINPROGRESS;
1235         }
1236         sdev = scrub_setup_dev(dev);
1237         if (IS_ERR(sdev)) {
1238                 mutex_unlock(&fs_info->scrub_lock);
1239                 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1240                 scrub_workers_put(root);
1241                 return PTR_ERR(sdev);
1242         }
1243         sdev->readonly = readonly;
1244         dev->scrub_device = sdev;
1245
1246         atomic_inc(&fs_info->scrubs_running);
1247         mutex_unlock(&fs_info->scrub_lock);
1248         mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1249
1250         down_read(&fs_info->scrub_super_lock);
1251         ret = scrub_supers(sdev);
1252         up_read(&fs_info->scrub_super_lock);
1253
1254         if (!ret)
1255                 ret = scrub_enumerate_chunks(sdev, start, end);
1256
1257         wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1258
1259         atomic_dec(&fs_info->scrubs_running);
1260         wake_up(&fs_info->scrub_pause_wait);
1261
1262         if (progress)
1263                 memcpy(progress, &sdev->stat, sizeof(*progress));
1264
1265         mutex_lock(&fs_info->scrub_lock);
1266         dev->scrub_device = NULL;
1267         mutex_unlock(&fs_info->scrub_lock);
1268
1269         scrub_free_dev(sdev);
1270         scrub_workers_put(root);
1271
1272         return ret;
1273 }
1274
1275 int btrfs_scrub_pause(struct btrfs_root *root)
1276 {
1277         struct btrfs_fs_info *fs_info = root->fs_info;
1278
1279         mutex_lock(&fs_info->scrub_lock);
1280         atomic_inc(&fs_info->scrub_pause_req);
1281         while (atomic_read(&fs_info->scrubs_paused) !=
1282                atomic_read(&fs_info->scrubs_running)) {
1283                 mutex_unlock(&fs_info->scrub_lock);
1284                 wait_event(fs_info->scrub_pause_wait,
1285                            atomic_read(&fs_info->scrubs_paused) ==
1286                            atomic_read(&fs_info->scrubs_running));
1287                 mutex_lock(&fs_info->scrub_lock);
1288         }
1289         mutex_unlock(&fs_info->scrub_lock);
1290
1291         return 0;
1292 }
1293
1294 int btrfs_scrub_continue(struct btrfs_root *root)
1295 {
1296         struct btrfs_fs_info *fs_info = root->fs_info;
1297
1298         atomic_dec(&fs_info->scrub_pause_req);
1299         wake_up(&fs_info->scrub_pause_wait);
1300         return 0;
1301 }
1302
1303 int btrfs_scrub_pause_super(struct btrfs_root *root)
1304 {
1305         down_write(&root->fs_info->scrub_super_lock);
1306         return 0;
1307 }
1308
1309 int btrfs_scrub_continue_super(struct btrfs_root *root)
1310 {
1311         up_write(&root->fs_info->scrub_super_lock);
1312         return 0;
1313 }
1314
1315 int btrfs_scrub_cancel(struct btrfs_root *root)
1316 {
1317         struct btrfs_fs_info *fs_info = root->fs_info;
1318
1319         mutex_lock(&fs_info->scrub_lock);
1320         if (!atomic_read(&fs_info->scrubs_running)) {
1321                 mutex_unlock(&fs_info->scrub_lock);
1322                 return -ENOTCONN;
1323         }
1324
1325         atomic_inc(&fs_info->scrub_cancel_req);
1326         while (atomic_read(&fs_info->scrubs_running)) {
1327                 mutex_unlock(&fs_info->scrub_lock);
1328                 wait_event(fs_info->scrub_pause_wait,
1329                            atomic_read(&fs_info->scrubs_running) == 0);
1330                 mutex_lock(&fs_info->scrub_lock);
1331         }
1332         atomic_dec(&fs_info->scrub_cancel_req);
1333         mutex_unlock(&fs_info->scrub_lock);
1334
1335         return 0;
1336 }
1337
1338 int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev)
1339 {
1340         struct btrfs_fs_info *fs_info = root->fs_info;
1341         struct scrub_dev *sdev;
1342
1343         mutex_lock(&fs_info->scrub_lock);
1344         sdev = dev->scrub_device;
1345         if (!sdev) {
1346                 mutex_unlock(&fs_info->scrub_lock);
1347                 return -ENOTCONN;
1348         }
1349         atomic_inc(&sdev->cancel_req);
1350         while (dev->scrub_device) {
1351                 mutex_unlock(&fs_info->scrub_lock);
1352                 wait_event(fs_info->scrub_pause_wait,
1353                            dev->scrub_device == NULL);
1354                 mutex_lock(&fs_info->scrub_lock);
1355         }
1356         mutex_unlock(&fs_info->scrub_lock);
1357
1358         return 0;
1359 }
1360 int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
1361 {
1362         struct btrfs_fs_info *fs_info = root->fs_info;
1363         struct btrfs_device *dev;
1364         int ret;
1365
1366         /*
1367          * we have to hold the device_list_mutex here so the device
1368          * does not go away in cancel_dev. FIXME: find a better solution
1369          */
1370         mutex_lock(&fs_info->fs_devices->device_list_mutex);
1371         dev = btrfs_find_device(root, devid, NULL, NULL);
1372         if (!dev) {
1373                 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1374                 return -ENODEV;
1375         }
1376         ret = btrfs_scrub_cancel_dev(root, dev);
1377         mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1378
1379         return ret;
1380 }
1381
1382 int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
1383                          struct btrfs_scrub_progress *progress)
1384 {
1385         struct btrfs_device *dev;
1386         struct scrub_dev *sdev = NULL;
1387
1388         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1389         dev = btrfs_find_device(root, devid, NULL, NULL);
1390         if (dev)
1391                 sdev = dev->scrub_device;
1392         if (sdev)
1393                 memcpy(progress, &sdev->stat, sizeof(*progress));
1394         mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1395
1396         return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV;
1397 }