From c40f341f1e7fd4eddcfc5881d94cfa8669071ee6 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Wed, 19 Aug 2015 08:14:42 +1000 Subject: [PATCH] md-cluster: Use a small window for resync Suspending the entire device for resync could take too long. Resync in small chunks. cluster's resync window (32M) is maintained in r1conf as cluster_sync_low and cluster_sync_high and processed in raid1's sync_request(). If the current resync is outside the cluster resync window: 1. Set the cluster_sync_low to curr_resync_completed. 2. Check if the sync will fit in the new window, if not issue a wait_barrier() and set cluster_sync_low to sector_nr. 3. Set cluster_sync_high to cluster_sync_low + resync_window. 4. Send a message to all nodes so they may add it in their suspension list. bitmap_cond_end_sync is modified to allow to force a sync inorder to get the curr_resync_completed uptodate with the sector passed. Signed-off-by: Goldwyn Rodrigues Signed-off-by: NeilBrown --- drivers/md/bitmap.c | 4 ++-- drivers/md/bitmap.h | 2 +- drivers/md/md-cluster.c | 41 +++++------------------------------------ drivers/md/md-cluster.h | 4 +--- drivers/md/md.c | 8 -------- drivers/md/raid1.c | 26 +++++++++++++++++++++++++- drivers/md/raid1.h | 7 +++++++ drivers/md/raid10.c | 2 +- drivers/md/raid5.c | 2 +- 9 files changed, 43 insertions(+), 53 deletions(-) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index e9d3ee703e6d..4f22e919787a 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1570,7 +1570,7 @@ void bitmap_close_sync(struct bitmap *bitmap) } EXPORT_SYMBOL(bitmap_close_sync); -void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) +void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force) { sector_t s = 0; sector_t blocks; @@ -1581,7 +1581,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) bitmap->last_end_sync = jiffies; return; } - if (time_before(jiffies, (bitmap->last_end_sync + if (!force && time_before(jiffies, (bitmap->last_end_sync + bitmap->mddev->bitmap_info.daemon_sleep))) return; wait_event(bitmap->mddev->recovery_wait, diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index 8731fa06855f..7d5c3a610ca5 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h @@ -257,7 +257,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded); void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted); void bitmap_close_sync(struct bitmap *bitmap); -void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector); +void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force); void bitmap_unplug(struct bitmap *bitmap); void bitmap_daemon_work(struct mddev *mddev); diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 4a965f22be20..b94a2e68ef43 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -802,15 +802,6 @@ static int slot_number(struct mddev *mddev) return cinfo->slot_number - 1; } -static void resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi) -{ - struct md_cluster_info *cinfo = mddev->cluster_info; - - add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi); - /* Re-acquire the lock to refresh LVB */ - dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW); -} - static int metadata_update_start(struct mddev *mddev) { return lock_comm(mddev->cluster_info); @@ -836,45 +827,25 @@ static int metadata_update_cancel(struct mddev *mddev) return dlm_unlock_sync(cinfo->token_lockres); } -static int resync_send(struct mddev *mddev, enum msg_type type, - sector_t lo, sector_t hi) +static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi) { struct md_cluster_info *cinfo = mddev->cluster_info; struct cluster_msg cmsg; int slot = cinfo->slot_number - 1; + add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi); + /* Re-acquire the lock to refresh LVB */ + dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW); pr_info("%s:%d lo: %llu hi: %llu\n", __func__, __LINE__, (unsigned long long)lo, (unsigned long long)hi); - resync_info_update(mddev, lo, hi); - cmsg.type = cpu_to_le32(type); + cmsg.type = cpu_to_le32(RESYNCING); cmsg.slot = cpu_to_le32(slot); cmsg.low = cpu_to_le64(lo); cmsg.high = cpu_to_le64(hi); return sendmsg(cinfo, &cmsg); } -static int resync_start(struct mddev *mddev, sector_t lo, sector_t hi) -{ - pr_info("%s:%d\n", __func__, __LINE__); - return resync_send(mddev, RESYNCING, lo, hi); -} - -static void resync_finish(struct mddev *mddev) -{ - struct md_cluster_info *cinfo = mddev->cluster_info; - struct cluster_msg cmsg; - int slot = cinfo->slot_number - 1; - - pr_info("%s:%d\n", __func__, __LINE__); - resync_send(mddev, RESYNCING, 0, 0); - if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { - cmsg.type = cpu_to_le32(BITMAP_NEEDS_SYNC); - cmsg.slot = cpu_to_le32(slot); - sendmsg(cinfo, &cmsg); - } -} - static int area_resyncing(struct mddev *mddev, int direction, sector_t lo, sector_t hi) { @@ -997,8 +968,6 @@ static struct md_cluster_operations cluster_ops = { .leave = leave, .slot_number = slot_number, .resync_info_update = resync_info_update, - .resync_start = resync_start, - .resync_finish = resync_finish, .metadata_update_start = metadata_update_start, .metadata_update_finish = metadata_update_finish, .metadata_update_cancel = metadata_update_cancel, diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h index 00defe2badbc..f5bdc0c86eaa 100644 --- a/drivers/md/md-cluster.h +++ b/drivers/md/md-cluster.h @@ -12,9 +12,7 @@ struct md_cluster_operations { int (*join)(struct mddev *mddev, int nodes); int (*leave)(struct mddev *mddev); int (*slot_number)(struct mddev *mddev); - void (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi); - int (*resync_start)(struct mddev *mddev, sector_t lo, sector_t hi); - void (*resync_finish)(struct mddev *mddev); + int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi); int (*metadata_update_start)(struct mddev *mddev); int (*metadata_update_finish)(struct mddev *mddev); int (*metadata_update_cancel)(struct mddev *mddev); diff --git a/drivers/md/md.c b/drivers/md/md.c index 1e1bdd86f40c..9798a9921a38 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7805,9 +7805,6 @@ void md_do_sync(struct md_thread *thread) md_new_event(mddev); update_time = jiffies; - if (mddev_is_clustered(mddev)) - md_cluster_ops->resync_start(mddev, j, max_sectors); - blk_start_plug(&plug); while (j < max_sectors) { sector_t sectors; @@ -7871,8 +7868,6 @@ void md_do_sync(struct md_thread *thread) j = max_sectors; if (j > 2) mddev->curr_resync = j; - if (mddev_is_clustered(mddev)) - md_cluster_ops->resync_info_update(mddev, j, max_sectors); mddev->curr_mark_cnt = io_sectors; if (last_check == 0) /* this is the earliest that rebuild will be @@ -7979,9 +7974,6 @@ void md_do_sync(struct md_thread *thread) } } skip: - if (mddev_is_clustered(mddev)) - md_cluster_ops->resync_finish(mddev); - set_bit(MD_CHANGE_DEVS, &mddev->flags); spin_lock(&mddev->lock); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 049df6c4a8cc..1dd13bb52940 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -90,6 +90,8 @@ static void r1bio_pool_free(void *r1_bio, void *data) #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) #define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH) #define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9) +#define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW) +#define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9) #define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS) static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) @@ -2488,6 +2490,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp bitmap_close_sync(mddev->bitmap); close_sync(conf); + + if (mddev_is_clustered(mddev)) { + conf->cluster_sync_low = 0; + conf->cluster_sync_high = 0; + /* Send zeros to mark end of resync */ + md_cluster_ops->resync_info_update(mddev, 0, 0); + } return 0; } @@ -2508,7 +2517,12 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp return sync_blocks; } - bitmap_cond_end_sync(mddev->bitmap, sector_nr); + /* we are incrementing sector_nr below. To be safe, we check against + * sector_nr + two times RESYNC_SECTORS + */ + + bitmap_cond_end_sync(mddev->bitmap, sector_nr, + mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high)); r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); raise_barrier(conf, sector_nr); @@ -2699,6 +2713,16 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp bio_full: r1_bio->sectors = nr_sectors; + if (mddev_is_clustered(mddev) && + conf->cluster_sync_high < sector_nr + nr_sectors) { + conf->cluster_sync_low = mddev->curr_resync_completed; + conf->cluster_sync_high = conf->cluster_sync_low + CLUSTER_RESYNC_WINDOW_SECTORS; + /* Send resync message */ + md_cluster_ops->resync_info_update(mddev, + conf->cluster_sync_low, + conf->cluster_sync_high); + } + /* For a user-requested sync, we read all readable devices and do a * compare */ diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index c52d7139c5d7..61c39b390cd8 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -111,6 +111,13 @@ struct r1conf { * the new thread here until we fully activate the array. */ struct md_thread *thread; + + /* Keep track of cluster resync window to send to other + * nodes. + */ + sector_t cluster_sync_low; + sector_t cluster_sync_high; + }; /* diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 7c99a4037715..5f30b7526c1f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3137,7 +3137,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, /* resync. Schedule a read for every block at this virt offset */ int count = 0; - bitmap_cond_end_sync(mddev->bitmap, sector_nr); + bitmap_cond_end_sync(mddev->bitmap, sector_nr, 0); if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, mddev->degraded) && diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 49bb8d3ff9be..5b79770c4f08 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5613,7 +5613,7 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */ } - bitmap_cond_end_sync(mddev->bitmap, sector_nr); + bitmap_cond_end_sync(mddev->bitmap, sector_nr, false); sh = get_active_stripe(conf, sector_nr, 0, 1, 0); if (sh == NULL) { -- 2.39.5