4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/ptlrpc/recover.c
38 * Author: Mike Shaver <shaver@clusterfs.com>
41 #define DEBUG_SUBSYSTEM S_RPC
42 # include <linux/libcfs/libcfs.h>
44 #include <obd_support.h>
45 #include <lustre_ha.h>
46 #include <lustre_net.h>
47 #include <lustre_import.h>
48 #include <lustre_export.h>
51 #include <obd_class.h>
52 #include <obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
53 #include <linux/list.h>
55 #include "ptlrpc_internal.h"
58 * Start recovery on disconnected import.
59 * This is done by just attempting a connect
61 void ptlrpc_initiate_recovery(struct obd_import *imp)
63 CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
64 ptlrpc_connect_import(imp);
70 * Identify what request from replay list needs to be replayed next
71 * (based on what we have already replayed) and send it to server.
73 int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
76 struct list_head *tmp, *pos;
77 struct ptlrpc_request *req = NULL;
82 /* It might have committed some after we last spoke, so make sure we
83 * get rid of them now.
85 spin_lock(&imp->imp_lock);
86 imp->imp_last_transno_checked = 0;
87 ptlrpc_free_committed(imp);
88 last_transno = imp->imp_last_replay_transno;
89 spin_unlock(&imp->imp_lock);
91 CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n",
92 imp, obd2cli_tgt(imp->imp_obd),
93 imp->imp_peer_committed_transno, last_transno);
95 /* Do I need to hold a lock across this iteration? We shouldn't be
96 * racing with any additions to the list, because we're in recovery
97 * and are therefore not processing additional requests to add. Calls
98 * to ptlrpc_free_committed might commit requests, but nothing "newer"
99 * than the one we're replaying (it can't be committed until it's
100 * replayed, and we're doing that here). l_f_e_safe protects against
101 * problems with the current request being committed, in the unlikely
102 * event of that race. So, in conclusion, I think that it's safe to
103 * perform this list-walk without the imp_lock held.
105 * But, the {mdc,osc}_replay_open callbacks both iterate
106 * request lists, and have comments saying they assume the
107 * imp_lock is being held by ptlrpc_replay, but it's not. it's
108 * just a little race...
110 list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
111 req = list_entry(tmp, struct ptlrpc_request,
114 /* If need to resend the last sent transno (because a
115 reconnect has occurred), then stop on the matching
116 req and send it again. If, however, the last sent
117 transno has been committed then we continue replay
118 from the next request. */
119 if (req->rq_transno > last_transno) {
120 if (imp->imp_resend_replay)
121 lustre_msg_add_flags(req->rq_reqmsg,
128 spin_lock(&imp->imp_lock);
129 imp->imp_resend_replay = 0;
130 spin_unlock(&imp->imp_lock);
133 rc = ptlrpc_replay_req(req);
135 CERROR("recovery replay error %d for req "
136 LPU64"\n", rc, req->rq_xid);
145 * Schedule resending of request on sending_list. This is done after
146 * we completed replaying of requests and locks.
148 int ptlrpc_resend(struct obd_import *imp)
150 struct ptlrpc_request *req, *next;
152 /* As long as we're in recovery, nothing should be added to the sending
153 * list, so we don't need to hold the lock during this iteration and
156 /* Well... what if lctl recover is called twice at the same time?
158 spin_lock(&imp->imp_lock);
159 if (imp->imp_state != LUSTRE_IMP_RECOVER) {
160 spin_unlock(&imp->imp_lock);
164 list_for_each_entry_safe(req, next, &imp->imp_sending_list,
166 LASSERTF((long)req > PAGE_CACHE_SIZE && req != LP_POISON,
167 "req %p bad\n", req);
168 LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
169 if (!ptlrpc_no_resend(req))
170 ptlrpc_resend_req(req);
172 spin_unlock(&imp->imp_lock);
176 EXPORT_SYMBOL(ptlrpc_resend);
179 * Go through all requests in delayed list and wake their threads
182 void ptlrpc_wake_delayed(struct obd_import *imp)
184 struct list_head *tmp, *pos;
185 struct ptlrpc_request *req;
187 spin_lock(&imp->imp_lock);
188 list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
189 req = list_entry(tmp, struct ptlrpc_request, rq_list);
191 DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
192 ptlrpc_client_wake_req(req);
194 spin_unlock(&imp->imp_lock);
196 EXPORT_SYMBOL(ptlrpc_wake_delayed);
198 void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
200 struct obd_import *imp = failed_req->rq_import;
202 CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
203 imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
204 imp->imp_connection->c_remote_uuid.uuid);
206 if (ptlrpc_set_import_discon(imp,
207 lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
208 if (!imp->imp_replayable) {
209 CDEBUG(D_HA, "import %s@%s for %s not replayable, "
210 "auto-deactivating\n",
211 obd2cli_tgt(imp->imp_obd),
212 imp->imp_connection->c_remote_uuid.uuid,
213 imp->imp_obd->obd_name);
214 ptlrpc_deactivate_import(imp);
216 /* to control recovery via lctl {disable|enable}_recovery */
217 if (imp->imp_deactive == 0)
218 ptlrpc_connect_import(imp);
221 /* Wait for recovery to complete and resend. If evicted, then
222 this request will be errored out later.*/
223 spin_lock(&failed_req->rq_lock);
224 if (!failed_req->rq_no_resend)
225 failed_req->rq_resend = 1;
226 spin_unlock(&failed_req->rq_lock);
232 * Administratively active/deactive a client.
233 * This should only be called by the ioctl interface, currently
234 * - the lctl deactivate and activate commands
235 * - echo 0/1 >> /proc/osc/XXX/active
236 * - client umount -f (ll_umount_begin)
238 int ptlrpc_set_import_active(struct obd_import *imp, int active)
240 struct obd_device *obd = imp->imp_obd;
245 /* When deactivating, mark import invalid, and abort in-flight
248 LCONSOLE_WARN("setting import %s INACTIVE by administrator "
249 "request\n", obd2cli_tgt(imp->imp_obd));
251 /* set before invalidate to avoid messages about imp_inval
252 * set without imp_deactive in ptlrpc_import_delay_req */
253 spin_lock(&imp->imp_lock);
254 imp->imp_deactive = 1;
255 spin_unlock(&imp->imp_lock);
257 obd_import_event(imp->imp_obd, imp, IMP_EVENT_DEACTIVATE);
259 ptlrpc_invalidate_import(imp);
262 /* When activating, mark import valid, and attempt recovery */
264 CDEBUG(D_HA, "setting import %s VALID\n",
265 obd2cli_tgt(imp->imp_obd));
267 spin_lock(&imp->imp_lock);
268 imp->imp_deactive = 0;
269 spin_unlock(&imp->imp_lock);
270 obd_import_event(imp->imp_obd, imp, IMP_EVENT_ACTIVATE);
272 rc = ptlrpc_recover_import(imp, NULL, 0);
277 EXPORT_SYMBOL(ptlrpc_set_import_active);
279 /* Attempt to reconnect an import */
280 int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async)
284 spin_lock(&imp->imp_lock);
285 if (imp->imp_state == LUSTRE_IMP_NEW || imp->imp_deactive ||
286 atomic_read(&imp->imp_inval_count))
288 spin_unlock(&imp->imp_lock);
292 /* force import to be disconnected. */
293 ptlrpc_set_import_discon(imp, 0);
296 struct obd_uuid uuid;
298 /* intruct import to use new uuid */
299 obd_str2uuid(&uuid, new_uuid);
300 rc = import_set_conn_priority(imp, &uuid);
305 /* Check if reconnect is already in progress */
306 spin_lock(&imp->imp_lock);
307 if (imp->imp_state != LUSTRE_IMP_DISCON) {
308 imp->imp_force_verify = 1;
311 spin_unlock(&imp->imp_lock);
315 rc = ptlrpc_connect_import(imp);
320 struct l_wait_info lwi;
321 int secs = cfs_time_seconds(obd_timeout);
323 CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
324 obd2cli_tgt(imp->imp_obd), secs);
326 lwi = LWI_TIMEOUT(secs, NULL, NULL);
327 rc = l_wait_event(imp->imp_recovery_waitq,
328 !ptlrpc_import_in_recovery(imp), &lwi);
329 CDEBUG(D_HA, "%s: recovery finished\n",
330 obd2cli_tgt(imp->imp_obd));
337 EXPORT_SYMBOL(ptlrpc_recover_import);
339 int ptlrpc_import_in_recovery(struct obd_import *imp)
342 spin_lock(&imp->imp_lock);
343 if (imp->imp_state == LUSTRE_IMP_FULL ||
344 imp->imp_state == LUSTRE_IMP_CLOSED ||
345 imp->imp_state == LUSTRE_IMP_DISCON)
347 spin_unlock(&imp->imp_lock);