From: Philipp Reisner Date: Sun, 7 Nov 2010 14:56:29 +0000 (+0100) Subject: drbd: Implemented priority inheritance for resync requests X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=e3555d8545976703938d1b59e2db509426dbe02c;p=linux-beck.git drbd: Implemented priority inheritance for resync requests We only issue resync requests if there is no significant application IO going on. = Application IO has higher priority than resnyc IO. If application IO can not be started because the resync process locked an resync_lru entry, start the IO operations necessary to release the lock ASAP. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 33f6cc537d08..28f85d950781 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -182,6 +182,7 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) if (unlikely(tmp != NULL)) { struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { + set_bit(BME_PRIORITY, &bm_ext->flags); spin_unlock_irq(&mdev->al_lock); return NULL; } @@ -1297,8 +1298,7 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) } if (lc_put(mdev->resync, &bm_ext->lce) == 0) { - clear_bit(BME_LOCKED, &bm_ext->flags); - clear_bit(BME_NO_WRITES, &bm_ext->flags); + bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */ mdev->resync_locked--; wake_up(&mdev->al_wait); } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index da02cce374c9..366873d661b8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1326,6 +1326,7 @@ struct bm_extent { #define BME_NO_WRITES 0 /* bm_extent.flags: no more requests on this one! */ #define BME_LOCKED 1 /* bm_extent.flags: syncer active on this one. */ +#define BME_PRIORITY 2 /* finish resync IO on this extent ASAP! App IO waiting! */ /* drbd_bitmap.c */ /* @@ -1552,7 +1553,7 @@ extern int w_start_resync(struct drbd_conf *, struct drbd_work *, int); extern void resync_timer_fn(unsigned long data); /* drbd_receiver.c */ -extern int drbd_rs_should_slow_down(struct drbd_conf *mdev); +extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector); extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, const unsigned rw, const int fault_type); extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index ee9238e59327..0630a2e122d3 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1862,10 +1862,11 @@ out_interrupted: * The current sync rate used here uses only the most recent two step marks, * to have a short time average so we can react faster. */ -int drbd_rs_should_slow_down(struct drbd_conf *mdev) +int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) { struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk; unsigned long db, dt, dbdt; + struct lc_element *tmp; int curr_events; int throttle = 0; @@ -1873,9 +1874,22 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev) if (mdev->sync_conf.c_min_rate == 0) return 0; + spin_lock_irq(&mdev->al_lock); + tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector)); + if (tmp) { + struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); + if (test_bit(BME_PRIORITY, &bm_ext->flags)) { + spin_unlock_irq(&mdev->al_lock); + return 0; + } + /* Do not slow down if app IO is already waiting for this extent */ + } + spin_unlock_irq(&mdev->al_lock); + curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + (int)part_stat_read(&disk->part0, sectors[1]) - atomic_read(&mdev->rs_sect_ev); + if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) { unsigned long rs_left; int i; @@ -2060,9 +2074,9 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un * we would also throttle its application reads. * In that case, throttling is done on the SyncTarget only. */ - if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev)) - msleep(100); - if (drbd_rs_begin_io(mdev, e->sector)) + if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector)) + schedule_timeout_uninterruptible(HZ/10); + if (drbd_rs_begin_io(mdev, sector)) goto out_free_e; submit_for_resync: diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 982d68432a0f..4008130f2b2c 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -355,7 +355,7 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) if (!get_ldev(mdev)) return -EIO; - if (drbd_rs_should_slow_down(mdev)) + if (drbd_rs_should_slow_down(mdev, sector)) goto defer; /* GFP_TRY, because if there is no memory available right now, this may @@ -503,16 +503,6 @@ int drbd_rs_number_requests(struct drbd_conf *mdev) number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); } - /* Throttle resync on lower level disk activity, which may also be - * caused by application IO on Primary/SyncTarget. - * Keep this after the call to drbd_rs_controller, as that assumes - * to be called as precisely as possible every SLEEP_TIME, - * and would be confused otherwise. */ - if (number && drbd_rs_should_slow_down(mdev)) { - mdev->c_sync_rate = 1; - number = 0; - } - /* ignore the amount of pending requests, the resync controller should * throttle down to incoming reply rate soon enough anyways. */ return number; @@ -594,7 +584,8 @@ next_sector: sector = BM_BIT_TO_SECT(bit); - if (drbd_try_rs_begin_io(mdev, sector)) { + if (drbd_rs_should_slow_down(mdev, sector) || + drbd_try_rs_begin_io(mdev, sector)) { mdev->bm_resync_fo = bit; goto requeue; } @@ -719,7 +710,8 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca size = BM_BLOCK_SIZE; - if (drbd_try_rs_begin_io(mdev, sector)) { + if (drbd_rs_should_slow_down(mdev, sector) || + drbd_try_rs_begin_io(mdev, sector)) { mdev->ov_position = sector; goto requeue; }