#define OBD_CONNECT_SHORTIO 0x2000000000000ULL/* short io */
#define OBD_CONNECT_PINGLESS 0x4000000000000ULL/* pings not required */
#define OBD_CONNECT_FLOCK_DEAD 0x8000000000000ULL/* flock deadlock detection */
+#define OBD_CONNECT_DISP_STRIPE 0x10000000000000ULL/*create stripe disposition*/
/* XXX README XXX:
* Please DO NOT add flag values here before first ensuring that this same
OBD_CONNECT_LIGHTWEIGHT | OBD_CONNECT_UMASK | \
OBD_CONNECT_LVB_TYPE | OBD_CONNECT_LAYOUTLOCK |\
OBD_CONNECT_PINGLESS | OBD_CONNECT_MAX_EASIZE |\
- OBD_CONNECT_FLOCK_DEAD)
+ OBD_CONNECT_FLOCK_DEAD | \
+ OBD_CONNECT_DISP_STRIPE)
+
#define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \
#define DISP_ENQ_CREATE_REF 0x01000000
#define DISP_OPEN_LOCK 0x02000000
#define DISP_OPEN_LEASE 0x04000000
+#define DISP_OPEN_STRIPE 0x08000000
/* INODE LOCK PARTS */
#define MDS_INODELOCK_LOOKUP 0x000001 /* For namespace, dentry etc, and also
return ocd->ocd_ibits_known;
}
+static inline bool imp_connect_disp_stripe(struct obd_import *imp)
+{
+ struct obd_connect_data *ocd;
+
+ LASSERT(imp != NULL);
+ ocd = &imp->imp_connect_data;
+ return ocd->ocd_connect_flags & OBD_CONNECT_DISP_STRIPE;
+}
+
extern struct obd_export *class_conn2export(struct lustre_handle *conn);
extern struct obd_device *class_conn2obd(struct lustre_handle *conn);
struct list_head imp_delayed_list;
/** @} */
+ /**
+ * List of requests that are retained for committed open replay. Once
+ * open is committed, open replay request will be moved from the
+ * imp_replay_list into the imp_committed_list.
+ * The imp_replay_cursor is for accelerating searching during replay.
+ * @{
+ */
+ struct list_head imp_committed_list;
+ struct list_head *imp_replay_cursor;
+ /** @} */
+
/** obd device for this import */
struct obd_device *imp_obd;
* request queues, request management, etc.
* @{
*/
+void ptlrpc_request_committed(struct ptlrpc_request *req, int force);
+
void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
struct ptlrpc_client *);
void ptlrpc_cleanup_client(struct obd_import *imp);
struct obd_client_handle *mod_och;
struct ptlrpc_request *mod_open_req;
struct ptlrpc_request *mod_close_req;
- atomic_t mod_refcount;
+ atomic_t mod_refcount;
+ bool mod_is_create;
};
struct lookup_intent;
int (*m_set_open_replay_data)(struct obd_export *,
struct obd_client_handle *,
- struct ptlrpc_request *);
+ struct lookup_intent *);
int (*m_clear_open_replay_data)(struct obd_export *,
struct obd_client_handle *);
int (*m_set_lock_data)(struct obd_export *, __u64 *, void *, __u64 *);
static inline int md_set_open_replay_data(struct obd_export *exp,
struct obd_client_handle *och,
- struct ptlrpc_request *open_req)
+ struct lookup_intent *it)
{
EXP_CHECK_MD_OP(exp, set_open_replay_data);
EXP_MD_COUNTER_INCREMENT(exp, set_open_replay_data);
- return MDP(exp->exp_obd, set_open_replay_data)(exp, och, open_req);
+ return MDP(exp->exp_obd, set_open_replay_data)(exp, och, it);
}
static inline int md_clear_open_replay_data(struct obd_export *exp,
och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
och->och_flags = it->it_flags;
- return md_set_open_replay_data(md_exp, och, req);
+ return md_set_open_replay_data(md_exp, och, it);
}
int ll_local_open(struct file *file, struct lookup_intent *it,
OBD_CONNECT_LAYOUTLOCK |
OBD_CONNECT_PINGLESS |
OBD_CONNECT_MAX_EASIZE |
- OBD_CONNECT_FLOCK_DEAD;
+ OBD_CONNECT_FLOCK_DEAD |
+ OBD_CONNECT_DISP_STRIPE;
if (sbi->ll_flags & LL_SBI_SOM_PREVIEW)
data->ocd_connect_flags |= OBD_CONNECT_SOM;
int lmv_set_open_replay_data(struct obd_export *exp,
struct obd_client_handle *och,
- struct ptlrpc_request *open_req)
+ struct lookup_intent *it)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
if (IS_ERR(tgt))
return PTR_ERR(tgt);
- return md_set_open_replay_data(tgt->ltd_exp, och, open_req);
+ return md_set_open_replay_data(tgt->ltd_exp, och, it);
}
int lmv_clear_open_replay_data(struct obd_export *exp,
int mdc_set_open_replay_data(struct obd_export *exp,
struct obd_client_handle *och,
- struct ptlrpc_request *open_req);
+ struct lookup_intent *it);
int mdc_clear_open_replay_data(struct obd_export *exp,
struct obd_client_handle *och);
* happens immediately after swabbing below, new reply
* is swabbed by that handler correctly.
*/
- mdc_set_open_replay_data(NULL, NULL, req);
+ mdc_set_open_replay_data(NULL, NULL, it);
}
if ((body->valid & (OBD_MD_FLDIREA | OBD_MD_FLEASIZE)) != 0) {
req->rq_cb_data = *mod;
(*mod)->mod_open_req = req;
req->rq_commit_cb = mdc_commit_open;
+ (*mod)->mod_is_create = true;
/**
* Take an extra reference on \var mod, it protects \var
* mod from being freed on eviction (commit callback is
int mdc_set_open_replay_data(struct obd_export *exp,
struct obd_client_handle *och,
- struct ptlrpc_request *open_req)
+ struct lookup_intent *it)
{
struct md_open_data *mod;
struct mdt_rec_create *rec;
struct mdt_body *body;
+ struct ptlrpc_request *open_req = it->d.lustre.it_data;
struct obd_import *imp = open_req->rq_import;
if (!open_req->rq_replay)
spin_lock(&open_req->rq_lock);
och->och_mod = mod;
mod->mod_och = och;
+ mod->mod_is_create = it_disposition(it, DISP_OPEN_CREATE) ||
+ it_disposition(it, DISP_OPEN_STRIPE);
mod->mod_open_req = open_req;
open_req->rq_cb_data = mod;
open_req->rq_commit_cb = mdc_commit_open;
return 0;
}
+static void mdc_free_open(struct md_open_data *mod)
+{
+ int committed = 0;
+
+ if (mod->mod_is_create == 0 &&
+ imp_connect_disp_stripe(mod->mod_open_req->rq_import))
+ committed = 1;
+
+ LASSERT(mod->mod_open_req->rq_replay == 0);
+
+ DEBUG_REQ(D_RPCTRACE, mod->mod_open_req, "free open request\n");
+
+ ptlrpc_request_committed(mod->mod_open_req, committed);
+ if (mod->mod_close_req)
+ ptlrpc_request_committed(mod->mod_close_req, committed);
+}
+
int mdc_clear_open_replay_data(struct obd_export *exp,
struct obd_client_handle *och)
{
return 0;
LASSERT(mod != LP_POISON);
+ LASSERT(mod->mod_open_req != NULL);
+ mdc_free_open(mod);
mod->mod_och = NULL;
och->och_mod = NULL;
if (mod) {
if (rc != 0)
mod->mod_close_req = NULL;
+ LASSERT(mod->mod_open_req != NULL);
+ mdc_free_open(mod);
+
/* Since now, mod is accessed through setattr req only,
* thus DW req does not keep a reference on mod anymore. */
obd_mod_put(mod);
INIT_LIST_HEAD(&imp->imp_replay_list);
INIT_LIST_HEAD(&imp->imp_sending_list);
INIT_LIST_HEAD(&imp->imp_delayed_list);
+ INIT_LIST_HEAD(&imp->imp_committed_list);
+ imp->imp_replay_cursor = &imp->imp_committed_list;
spin_lock_init(&imp->imp_lock);
imp->imp_last_success_conn = 0;
imp->imp_state = LUSTRE_IMP_NEW;
"short_io",
"pingless",
"flock_deadlock",
+ "disp_stripe",
"unknown",
NULL
};
}
EXPORT_SYMBOL(ptlrpc_unregister_reply);
+static void ptlrpc_free_request(struct ptlrpc_request *req)
+{
+ spin_lock(&req->rq_lock);
+ req->rq_replay = 0;
+ spin_unlock(&req->rq_lock);
+
+ if (req->rq_commit_cb != NULL)
+ req->rq_commit_cb(req);
+ list_del_init(&req->rq_replay_list);
+
+ __ptlrpc_req_finished(req, 1);
+}
+
+/**
+ * the request is committed and dropped from the replay list of its import
+ */
+void ptlrpc_request_committed(struct ptlrpc_request *req, int force)
+{
+ struct obd_import *imp = req->rq_import;
+
+ spin_lock(&imp->imp_lock);
+ if (list_empty(&req->rq_replay_list)) {
+ spin_unlock(&imp->imp_lock);
+ return;
+ }
+
+ if (force || req->rq_transno <= imp->imp_peer_committed_transno)
+ ptlrpc_free_request(req);
+
+ spin_unlock(&imp->imp_lock);
+}
+EXPORT_SYMBOL(ptlrpc_request_committed);
+
/**
* Iterates through replay_list on import and prunes
* all requests have transno smaller than last_committed for the
*/
void ptlrpc_free_committed(struct obd_import *imp)
{
- struct list_head *tmp, *saved;
- struct ptlrpc_request *req;
+ struct ptlrpc_request *req, *saved;
struct ptlrpc_request *last_req = NULL; /* temporary fire escape */
+ bool skip_committed_list = true;
LASSERT(imp != NULL);
CDEBUG(D_RPCTRACE, "%s: committing for last_committed "LPU64" gen %d\n",
imp->imp_obd->obd_name, imp->imp_peer_committed_transno,
imp->imp_generation);
+
+ if (imp->imp_generation != imp->imp_last_generation_checked)
+ skip_committed_list = false;
+
imp->imp_last_transno_checked = imp->imp_peer_committed_transno;
imp->imp_last_generation_checked = imp->imp_generation;
- list_for_each_safe(tmp, saved, &imp->imp_replay_list) {
- req = list_entry(tmp, struct ptlrpc_request,
- rq_replay_list);
-
+ list_for_each_entry_safe(req, saved, &imp->imp_replay_list,
+ rq_replay_list) {
/* XXX ok to remove when 1357 resolved - rread 05/29/03 */
LASSERT(req != last_req);
last_req = req;
GOTO(free_req, 0);
}
- if (req->rq_replay) {
- DEBUG_REQ(D_RPCTRACE, req, "keeping (FL_REPLAY)");
- continue;
- }
-
/* not yet committed */
if (req->rq_transno > imp->imp_peer_committed_transno) {
DEBUG_REQ(D_RPCTRACE, req, "stopping search");
break;
}
+ if (req->rq_replay) {
+ DEBUG_REQ(D_RPCTRACE, req, "keeping (FL_REPLAY)");
+ list_move_tail(&req->rq_replay_list,
+ &imp->imp_committed_list);
+ continue;
+ }
+
DEBUG_REQ(D_INFO, req, "commit (last_committed "LPU64")",
imp->imp_peer_committed_transno);
free_req:
- spin_lock(&req->rq_lock);
- req->rq_replay = 0;
- spin_unlock(&req->rq_lock);
- if (req->rq_commit_cb != NULL)
- req->rq_commit_cb(req);
- list_del_init(&req->rq_replay_list);
- __ptlrpc_req_finished(req, 1);
+ ptlrpc_free_request(req);
+ }
+ if (skip_committed_list)
+ return;
+
+ list_for_each_entry_safe(req, saved, &imp->imp_committed_list,
+ rq_replay_list) {
+ LASSERT(req->rq_transno != 0);
+ if (req->rq_import_generation < imp->imp_generation) {
+ DEBUG_REQ(D_RPCTRACE, req, "free stale open request");
+ ptlrpc_free_request(req);
+ }
}
}
struct ptlrpc_request *req;
struct list_head *tmp;
- if (list_empty(&imp->imp_replay_list))
- return 0;
- tmp = imp->imp_replay_list.next;
- req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
- *transno = req->rq_transno;
- if (req->rq_transno == 0) {
- DEBUG_REQ(D_ERROR, req, "zero transno in replay");
- LBUG();
+ /* The requests in committed_list always have smaller transnos than
+ * the requests in replay_list */
+ if (!list_empty(&imp->imp_committed_list)) {
+ tmp = imp->imp_committed_list.next;
+ req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
+ *transno = req->rq_transno;
+ if (req->rq_transno == 0) {
+ DEBUG_REQ(D_ERROR, req,
+ "zero transno in committed_list");
+ LBUG();
+ }
+ return 1;
}
-
- return 1;
+ if (!list_empty(&imp->imp_replay_list)) {
+ tmp = imp->imp_replay_list.next;
+ req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
+ *transno = req->rq_transno;
+ if (req->rq_transno == 0) {
+ DEBUG_REQ(D_ERROR, req, "zero transno in replay_list");
+ LBUG();
+ }
+ return 1;
+ }
+ return 0;
}
/**
* imp_lock is being held by ptlrpc_replay, but it's not. it's
* just a little race...
*/
- list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
+
+ /* Replay all the committed open requests on committed_list first */
+ if (!list_empty(&imp->imp_committed_list)) {
+ tmp = imp->imp_committed_list.prev;
req = list_entry(tmp, struct ptlrpc_request,
rq_replay_list);
- /* If need to resend the last sent transno (because a
- reconnect has occurred), then stop on the matching
- req and send it again. If, however, the last sent
- transno has been committed then we continue replay
- from the next request. */
+ /* The last request on committed_list hasn't been replayed */
if (req->rq_transno > last_transno) {
- if (imp->imp_resend_replay)
- lustre_msg_add_flags(req->rq_reqmsg,
- MSG_RESENT);
- break;
+ /* Since the imp_committed_list is immutable before
+ * all of it's requests being replayed, it's safe to
+ * use a cursor to accelerate the search */
+ imp->imp_replay_cursor = imp->imp_replay_cursor->next;
+
+ while (imp->imp_replay_cursor !=
+ &imp->imp_committed_list) {
+ req = list_entry(imp->imp_replay_cursor,
+ struct ptlrpc_request,
+ rq_replay_list);
+ if (req->rq_transno > last_transno)
+ break;
+
+ req = NULL;
+ imp->imp_replay_cursor =
+ imp->imp_replay_cursor->next;
+ }
+ } else {
+ /* All requests on committed_list have been replayed */
+ imp->imp_replay_cursor = &imp->imp_committed_list;
+ req = NULL;
+ }
+ }
+
+ /* All the requests in committed list have been replayed, let's replay
+ * the imp_replay_list */
+ if (req == NULL) {
+ list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
+ req = list_entry(tmp, struct ptlrpc_request,
+ rq_replay_list);
+
+ if (req->rq_transno > last_transno)
+ break;
+ req = NULL;
}
- req = NULL;
}
+ /* If need to resend the last sent transno (because a reconnect
+ * has occurred), then stop on the matching req and send it again.
+ * If, however, the last sent transno has been committed then we
+ * continue replay from the next request. */
+ if (req != NULL && imp->imp_resend_replay)
+ lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
+
spin_lock(&imp->imp_lock);
imp->imp_resend_replay = 0;
spin_unlock(&imp->imp_lock);