From: Lars Ellenberg Date: Mon, 20 Aug 2012 09:05:23 +0000 (+0200) Subject: drbd: don't send out P_BARRIER with stale information X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=4eb9b3cba00471a01699cceb0f4b1f0cb8111ee2;p=linux-beck.git drbd: don't send out P_BARRIER with stale information We must only send P_BARRIER for epochs we actually sent P_DATA in. If we (re-)establish a connection, we reinitialized the send.current_epoch_nr, but forgot to reset send.current_epoch_writes. This could result in a spurious P_BARRIER with stale epoch information, and a disconnect/reconnect cycle once the then "unexpected" P_BARRIER_ACK is received: BAD! BarrierAck #28823 received, expected #28829! Introduce re_init_if_first_write() and maybe_send_barrier() helpers, and call them appropriately for read/write/set-out-of-sync requests. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 1c9c6fd332c3..c674f17773a6 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1265,6 +1265,27 @@ int w_send_write_hint(struct drbd_work *w, int cancel) return drbd_send_command(mdev, sock, P_UNPLUG_REMOTE, 0, NULL, 0); } +static void re_init_if_first_write(struct drbd_tconn *tconn, unsigned int epoch) +{ + if (!tconn->send.seen_any_write_yet) { + tconn->send.seen_any_write_yet = true; + tconn->send.current_epoch_nr = epoch; + tconn->send.current_epoch_writes = 0; + } +} + +static void maybe_send_barrier(struct drbd_tconn *tconn, unsigned int epoch) +{ + /* re-init if first write on this connection */ + if (!tconn->send.seen_any_write_yet) + return; + if (tconn->send.current_epoch_nr != epoch) { + if (tconn->send.current_epoch_writes) + drbd_send_barrier(tconn); + tconn->send.current_epoch_nr = epoch; + } +} + int w_send_out_of_sync(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); @@ -1277,19 +1298,11 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel) return 0; } - if (!tconn->send.seen_any_write_yet) { - tconn->send.seen_any_write_yet = true; - tconn->send.current_epoch_nr = req->epoch; - } - if (tconn->send.current_epoch_nr != req->epoch) { - if (tconn->send.current_epoch_writes) - drbd_send_barrier(tconn); - tconn->send.current_epoch_nr = req->epoch; - } /* this time, no tconn->send.current_epoch_writes++; * If it was sent, it was the closing barrier for the last * replicated epoch, before we went into AHEAD mode. * No more barriers will be sent, until we leave AHEAD mode again. */ + maybe_send_barrier(tconn, req->epoch); err = drbd_send_out_of_sync(mdev, req); req_mod(req, OOS_HANDED_TO_NETWORK); @@ -1315,15 +1328,8 @@ int w_send_dblock(struct drbd_work *w, int cancel) return 0; } - if (!tconn->send.seen_any_write_yet) { - tconn->send.seen_any_write_yet = true; - tconn->send.current_epoch_nr = req->epoch; - } - if (tconn->send.current_epoch_nr != req->epoch) { - if (tconn->send.current_epoch_writes) - drbd_send_barrier(tconn); - tconn->send.current_epoch_nr = req->epoch; - } + re_init_if_first_write(tconn, req->epoch); + maybe_send_barrier(tconn, req->epoch); tconn->send.current_epoch_writes++; err = drbd_send_dblock(mdev, req); @@ -1352,12 +1358,7 @@ int w_send_read_req(struct drbd_work *w, int cancel) /* Even read requests may close a write epoch, * if there was any yet. */ - if (tconn->send.seen_any_write_yet && - tconn->send.current_epoch_nr != req->epoch) { - if (tconn->send.current_epoch_writes) - drbd_send_barrier(tconn); - tconn->send.current_epoch_nr = req->epoch; - } + maybe_send_barrier(tconn, req->epoch); err = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size, (unsigned long)req);