From: Lars Ellenberg Date: Tue, 11 Feb 2014 07:57:18 +0000 (+0100) Subject: drbd: close race when detaching from disk X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=ba3c6fb87d2df008eed8faaf01bb198e512fa72f;p=linux-beck.git drbd: close race when detaching from disk BUG: unable to handle kernel NULL pointer dereference at 0000000000000058 IP: bd_release+0x21/0x70 Process drbd_w_t7146 Call Trace: close_bdev_exclusive drbd_free_ldev [drbd] drbd_ldev_destroy [drbd] w_after_state_ch [drbd] Race probably went like this: state.disk = D_FAILED ... first one to hit zero during D_FAILED: put_ldev() /* ----------------> 0 */ i = atomic_dec_return() if (i == 0) if (state.disk == D_FAILED) schedule_work(go_diskless) /* 1 <------ */ get_ldev_if_state() go_diskless() do_some_pre_cleanup() corresponding put_ldev(): force_state(D_DISKLESS) /* 0 <------ */ i = atomic_dec_return() if (i == 0) atomic_inc() /* ---------> 1 */ state.disk = D_DISKLESS schedule_work(after_state_ch) /* execution pre-empted by IRQ ? */ after_state_ch() put_ldev() i = atomic_dec_return() /* 0 */ if (i == 0) if (state.disk == D_DISKLESS) if (state.disk == D_DISKLESS) drbd_ldev_destroy() drbd_ldev_destroy(); Trying to fix this by checking the disk state *before* the atomic_dec_return(), which implies memory barriers, and by inserting extra memory barriers around the state assignment in __drbd_set_state(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a16f9ae3c98a..a0ffc19ccf0e 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1946,6 +1946,11 @@ static inline bool is_sync_state(enum drbd_conns connection_state) static inline void put_ldev(struct drbd_device *device) { + enum drbd_disk_state ds = device->state.disk; + /* We must check the state *before* the atomic_dec becomes visible, + * or we have a theoretical race where someone hitting zero, + * while state still D_FAILED, will then see D_DISKLESS in the + * condition below and calling into destroy, where he must not, yet. */ int i = atomic_dec_return(&device->local_cnt); /* This may be called from some endio handler, @@ -1954,10 +1959,10 @@ static inline void put_ldev(struct drbd_device *device) __release(local); D_ASSERT(device, i >= 0); if (i == 0) { - if (device->state.disk == D_DISKLESS) + if (ds == D_DISKLESS) /* even internal references gone, safe to destroy */ drbd_ldev_destroy(device); - if (device->state.disk == D_FAILED) { + if (ds == D_FAILED) { /* all application IO references gone. */ if (!test_and_set_bit(GO_DISKLESS, &device->flags)) drbd_queue_work(&first_peer_device(device)->connection->sender_work, diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 1bddd6cf8ac7..6629f4668102 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -958,7 +958,6 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, enum drbd_state_rv rv = SS_SUCCESS; enum sanitize_state_warnings ssw; struct after_state_chg_work *ascw; - bool did_remote, should_do_remote; os = drbd_read_state(device); @@ -1010,18 +1009,22 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, (os.disk != D_DISKLESS && ns.disk == D_DISKLESS)) atomic_inc(&device->local_cnt); - did_remote = drbd_should_do_remote(device->state); if (!is_sync_state(os.conn) && is_sync_state(ns.conn)) clear_bit(RS_DONE, &device->flags); + /* changes to local_cnt and device flags should be visible before + * changes to state, which again should be visible before anything else + * depending on that change happens. */ + smp_wmb(); device->state.i = ns.i; - should_do_remote = drbd_should_do_remote(device->state); device->resource->susp = ns.susp; device->resource->susp_nod = ns.susp_nod; device->resource->susp_fen = ns.susp_fen; + smp_wmb(); /* put replicated vs not-replicated requests in seperate epochs */ - if (did_remote != should_do_remote) + if (drbd_should_do_remote((union drbd_dev_state)os.i) != + drbd_should_do_remote((union drbd_dev_state)ns.i)) start_new_tl_epoch(connection); if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)