drbd: Implemented priority inheritance for resync requests

author Philipp Reisner <philipp.reisner@linbit.com>

Sun, 7 Nov 2010 14:56:29 +0000 (15:56 +0100)

committer Philipp Reisner <philipp.reisner@linbit.com>

Thu, 10 Mar 2011 10:34:53 +0000 (11:34 +0100)
author Philipp Reisner <philipp.reisner@linbit.com>
Sun, 7 Nov 2010 14:56:29 +0000 (15:56 +0100)
committer Philipp Reisner <philipp.reisner@linbit.com>
Thu, 10 Mar 2011 10:34:53 +0000 (11:34 +0100)
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c

index 33f6cc5..28f85d9 100644 (file)
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -182,6 +182,7 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
         if (unlikely(tmp != NULL)) {
                 struct bm_extent  *bm_ext = lc_entry(tmp, struct bm_extent, lce);
                 if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
+                       set_bit(BME_PRIORITY, &bm_ext->flags);
                         spin_unlock_irq(&mdev->al_lock);
                         return NULL;
                 }
@@ -1297,8 +1298,7 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector)
         }
  
         if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
-               clear_bit(BME_LOCKED, &bm_ext->flags);
-               clear_bit(BME_NO_WRITES, &bm_ext->flags);
+               bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */
                 mdev->resync_locked--;
                 wake_up(&mdev->al_wait);
         }
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h

index da02cce..366873d 100644 (file)
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1326,6 +1326,7 @@ struct bm_extent {
  
  #define BME_NO_WRITES  0  /* bm_extent.flags: no more requests on this one! */
  #define BME_LOCKED     1  /* bm_extent.flags: syncer active on this one. */
+#define BME_PRIORITY   2  /* finish resync IO on this extent ASAP! App IO waiting! */
  
  /* drbd_bitmap.c */
  /*
@@ -1552,7 +1553,7 @@ extern int w_start_resync(struct drbd_conf *, struct drbd_work *, int);
  extern void resync_timer_fn(unsigned long data);
  
  /* drbd_receiver.c */
-extern int drbd_rs_should_slow_down(struct drbd_conf *mdev);
+extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector);
  extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
                 const unsigned rw, const int fault_type);
  extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c

index ee9238e..0630a2e 100644 (file)
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1862,10 +1862,11 @@ out_interrupted:
   * The current sync rate used here uses only the most recent two step marks,
   * to have a short time average so we can react faster.
   */
-int drbd_rs_should_slow_down(struct drbd_conf *mdev)
+int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
  {
         struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
         unsigned long db, dt, dbdt;
+       struct lc_element *tmp;
         int curr_events;
         int throttle = 0;
  
@@ -1873,9 +1874,22 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev)
         if (mdev->sync_conf.c_min_rate == 0)
                 return 0;
  
+       spin_lock_irq(&mdev->al_lock);
+       tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
+       if (tmp) {
+               struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
+               if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
+                       spin_unlock_irq(&mdev->al_lock);
+                       return 0;
+               }
+               /* Do not slow down if app IO is already waiting for this extent */
+       }
+       spin_unlock_irq(&mdev->al_lock);
+
         curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
                       (int)part_stat_read(&disk->part0, sectors[1]) -
                         atomic_read(&mdev->rs_sect_ev);
+
         if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
                 unsigned long rs_left;
                 int i;
@@ -2060,9 +2074,9 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
          * we would also throttle its application reads.
          * In that case, throttling is done on the SyncTarget only.
          */
-       if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev))
-               msleep(100);
-       if (drbd_rs_begin_io(mdev, e->sector))
+       if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
+               schedule_timeout_uninterruptible(HZ/10);
+       if (drbd_rs_begin_io(mdev, sector))
                 goto out_free_e;
  
  submit_for_resync:
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c

index 982d684..4008130 100644 (file)
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -355,7 +355,7 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
         if (!get_ldev(mdev))
                 return -EIO;
  
-       if (drbd_rs_should_slow_down(mdev))
+       if (drbd_rs_should_slow_down(mdev, sector))
                 goto defer;
  
         /* GFP_TRY, because if there is no memory available right now, this may
@@ -503,16 +503,6 @@ int drbd_rs_number_requests(struct drbd_conf *mdev)
                 number = SLEEP_TIME * mdev->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
         }
  
-       /* Throttle resync on lower level disk activity, which may also be
-        * caused by application IO on Primary/SyncTarget.
-        * Keep this after the call to drbd_rs_controller, as that assumes
-        * to be called as precisely as possible every SLEEP_TIME,
-        * and would be confused otherwise. */
-       if (number && drbd_rs_should_slow_down(mdev)) {
-               mdev->c_sync_rate = 1;
-               number = 0;
-       }
-
         /* ignore the amount of pending requests, the resync controller should
          * throttle down to incoming reply rate soon enough anyways. */
         return number;
@@ -594,7 +584,8 @@ next_sector:
  
                 sector = BM_BIT_TO_SECT(bit);
  
-               if (drbd_try_rs_begin_io(mdev, sector)) {
+               if (drbd_rs_should_slow_down(mdev, sector) ||
+                   drbd_try_rs_begin_io(mdev, sector)) {
                         mdev->bm_resync_fo = bit;
                         goto requeue;
                 }
@@ -719,7 +710,8 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca
  
                 size = BM_BLOCK_SIZE;
  
-               if (drbd_try_rs_begin_io(mdev, sector)) {
+               if (drbd_rs_should_slow_down(mdev, sector) ||
+                   drbd_try_rs_begin_io(mdev, sector)) {
                         mdev->ov_position = sector;
                         goto requeue;
                 }
author	Philipp Reisner <philipp.reisner@linbit.com>
	Sun, 7 Nov 2010 14:56:29 +0000 (15:56 +0100)
committer	Philipp Reisner <philipp.reisner@linbit.com>
	Thu, 10 Mar 2011 10:34:53 +0000 (11:34 +0100)
drivers/block/drbd/drbd_actlog.c		patch \| blob \| history
drivers/block/drbd/drbd_int.h		patch \| blob \| history
drivers/block/drbd/drbd_receiver.c		patch \| blob \| history
drivers/block/drbd/drbd_worker.c		patch \| blob \| history