}
}
-struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, kib_rdma_desc_t *rd,
+struct ib_mr *kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd,
int negotiated_nfrags)
{
- __u16 nfrags = (negotiated_nfrags != -1) ?
- negotiated_nfrags : *kiblnd_tunables.kib_map_on_demand;
+ kib_net_t *net = ni->ni_data;
+ kib_hca_dev_t *hdev = net->ibn_dev->ibd_hdev;
+ struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+ __u16 nfrags;
+ int mod;
+
+ tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ mod = tunables->lnd_map_on_demand;
+ nfrags = (negotiated_nfrags != -1) ? negotiated_nfrags : mod;
LASSERT(hdev->ibh_mrs);
- if (*kiblnd_tunables.kib_map_on_demand > 0 &&
- nfrags <= rd->rd_nfrags)
+ if (mod > 0 && nfrags <= rd->rd_nfrags)
return NULL;
return hdev->ibh_mrs;
}
}
-static int kiblnd_fmr_pool_size(int ncpts)
+static int
+kiblnd_fmr_pool_size(struct lnet_ioctl_config_o2iblnd_tunables *tunables,
+ int ncpts)
{
- int size = *kiblnd_tunables.kib_fmr_pool_size / ncpts;
+ int size = tunables->lnd_fmr_pool_size / ncpts;
return max(IBLND_FMR_POOL, size);
}
-static int kiblnd_fmr_flush_trigger(int ncpts)
+static int
+kiblnd_fmr_flush_trigger(struct lnet_ioctl_config_o2iblnd_tunables *tunables,
+ int ncpts)
{
- int size = *kiblnd_tunables.kib_fmr_flush_trigger / ncpts;
+ int size = tunables->lnd_fmr_flush_trigger / ncpts;
return max(IBLND_FMR_POOL_FLUSH, size);
}
.dirty_watermark = fps->fps_flush_trigger,
.flush_function = NULL,
.flush_arg = NULL,
- .cache = !!*kiblnd_tunables.kib_fmr_cache};
+ .cache = !!fps->fps_cache };
int rc = 0;
fpo->fmr.fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd,
}
}
-static int kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt,
- kib_net_t *net, int pool_size,
- int flush_trigger)
+static int
+kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, int ncpts,
+ kib_net_t *net,
+ struct lnet_ioctl_config_o2iblnd_tunables *tunables)
{
kib_fmr_pool_t *fpo;
int rc;
fps->fps_net = net;
fps->fps_cpt = cpt;
- fps->fps_pool_size = pool_size;
- fps->fps_flush_trigger = flush_trigger;
+
+ fps->fps_pool_size = kiblnd_fmr_pool_size(tunables, ncpts);
+ fps->fps_flush_trigger = kiblnd_fmr_flush_trigger(tunables, ncpts);
+ fps->fps_cache = tunables->lnd_fmr_cache;
+
spin_lock_init(&fps->fps_lock);
INIT_LIST_HEAD(&fps->fps_pool_list);
INIT_LIST_HEAD(&fps->fps_failed_pool_list);
}
}
-static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts)
+static int kiblnd_net_init_pools(kib_net_t *net, lnet_ni_t *ni, __u32 *cpts,
+ int ncpts)
{
+ struct lnet_ioctl_config_o2iblnd_tunables *tunables;
unsigned long flags;
int cpt;
- int rc = 0;
+ int rc;
int i;
+ tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+
read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- if (!*kiblnd_tunables.kib_map_on_demand) {
+ if (!tunables->lnd_map_on_demand) {
read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
goto create_tx_pool;
}
read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- if (*kiblnd_tunables.kib_fmr_pool_size <
- *kiblnd_tunables.kib_ntx / 4) {
+ if (tunables->lnd_fmr_pool_size < *kiblnd_tunables.kib_ntx / 4) {
CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n",
- *kiblnd_tunables.kib_fmr_pool_size,
+ tunables->lnd_fmr_pool_size,
*kiblnd_tunables.kib_ntx / 4);
rc = -EINVAL;
goto failed;
for (i = 0; i < ncpts; i++) {
cpt = !cpts ? i : cpts[i];
- rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, net,
- kiblnd_fmr_pool_size(ncpts),
- kiblnd_fmr_flush_trigger(ncpts));
+ rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, ncpts,
+ net, tunables);
if (rc) {
CERROR("Can't initialize FMR pool for CPT %d: %d\n",
cpt, rc);
if (rc)
goto failed;
- rc = kiblnd_net_init_pools(net, ni->ni_cpts, ni->ni_ncpts);
+ rc = kiblnd_net_init_pools(net, ni, ni->ni_cpts, ni->ni_ncpts);
if (rc) {
CERROR("Failed to initialize NI pools: %d\n", rc);
goto failed;
int *kib_timeout; /* comms timeout (seconds) */
int *kib_keepalive; /* keepalive timeout (seconds) */
int *kib_ntx; /* # tx descs */
- int *kib_peercredits_hiw; /* # when eagerly to return credits */
char **kib_default_ipif; /* default IPoIB interface */
int *kib_retry_count;
int *kib_rnr_retry_count;
- int *kib_concurrent_sends; /* send work queue sizing */
int *kib_ib_mtu; /* IB MTU */
- int *kib_map_on_demand; /* map-on-demand if RD has more */
- /* fragments than this value, 0 */
- /* disable map-on-demand */
- int *kib_fmr_pool_size; /* # FMRs in pool */
- int *kib_fmr_flush_trigger; /* When to trigger FMR flush */
- int *kib_fmr_cache; /* enable FMR pool cache? */
int *kib_require_priv_port; /* accept only privileged ports */
int *kib_use_priv_port; /* use privileged port for active connect */
int *kib_nscheds; /* # threads on each CPT */
#define IBLND_CREDITS_DEFAULT 8 /* default # of peer credits */
#define IBLND_CREDITS_MAX ((typeof(((kib_msg_t *) 0)->ibm_credits)) - 1) /* Max # of peer credits */
-#define IBLND_CREDITS_HIGHWATER(v) ((v) == IBLND_MSG_VERSION_1 ? \
- IBLND_CREDIT_HIGHWATER_V1 : \
- *kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */
+/* when eagerly to return credits */
+#define IBLND_CREDITS_HIGHWATER(t, v) ((v) == IBLND_MSG_VERSION_1 ? \
+ IBLND_CREDIT_HIGHWATER_V1 : \
+ t->lnd_peercredits_hiw)
#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(&init_net, \
cb, dev, \
int fps_cpt; /* CPT id */
int fps_pool_size;
int fps_flush_trigger;
+ int fps_cache;
int fps_increasing; /* is allocating new pool */
unsigned long fps_next_retry; /* time stamp for retry if*/
/* failed to allocate */
static inline int
kiblnd_cfg_rdma_frags(struct lnet_ni *ni)
{
- int mod = *kiblnd_tunables.kib_map_on_demand;
+ struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+ int mod;
+
+ tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ mod = tunables->lnd_map_on_demand;
return mod ? mod : IBLND_MAX_RDMA_FRAGS;
}
static inline int
kiblnd_concurrent_sends(int version, struct lnet_ni *ni)
{
+ struct lnet_ioctl_config_o2iblnd_tunables *tunables;
int concurrent_sends;
- concurrent_sends = *kiblnd_tunables.kib_concurrent_sends;
+ tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ concurrent_sends = tunables->lnd_concurrent_sends;
if (version == IBLND_MSG_VERSION_1) {
if (concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2)
static inline int
kiblnd_need_noop(kib_conn_t *conn)
{
+ struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+ lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
+
LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
+ tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
if (conn->ibc_outstanding_credits <
- IBLND_CREDITS_HIGHWATER(conn->ibc_version) &&
+ IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) &&
!kiblnd_send_keepalive(conn))
return 0; /* No need to send NOOP */
#define KIBLND_CONN_PARAM(e) ((e)->param.conn.private_data)
#define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len)
-struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev,
- kib_rdma_desc_t *rd,
+struct ib_mr *kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd,
int negotiated_nfrags);
void kiblnd_map_rx_descs(kib_conn_t *conn);
void kiblnd_unmap_rx_descs(kib_conn_t *conn);
static int kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
int nfrags)
{
- kib_hca_dev_t *hdev = tx->tx_pool->tpo_hdev;
kib_net_t *net = ni->ni_data;
+ kib_hca_dev_t *hdev = net->ibn_dev->ibd_hdev;
struct ib_mr *mr = NULL;
__u32 nob;
int i;
nob += rd->rd_frags[i].rf_nob;
}
- mr = kiblnd_find_rd_dma_mr(hdev, rd, tx->tx_conn ?
+ mr = kiblnd_find_rd_dma_mr(ni, rd, tx->tx_conn ?
tx->tx_conn->ibc_max_frags : -1);
if (mr) {
/* found pre-mapping MR */
reason = "Unknown";
break;
- case IBLND_REJECT_RDMA_FRAGS:
+ case IBLND_REJECT_RDMA_FRAGS: {
+ struct lnet_ioctl_config_lnd_tunables *tunables;
+
if (!cp) {
reason = "can't negotiate max frags";
goto out;
}
- if (!*kiblnd_tunables.kib_map_on_demand) {
+ tunables = peer->ibp_ni->ni_lnd_tunables;
+ if (!tunables->lt_tun_u.lt_o2ib.lnd_map_on_demand) {
reason = "map_on_demand must be enabled";
goto out;
}
peer->ibp_max_frags = frag_num;
reason = "rdma fragments";
break;
-
+ }
case IBLND_REJECT_MSG_QUEUE_SIZE:
if (!cp) {
reason = "can't negotiate queue depth";
.kib_timeout = &timeout,
.kib_keepalive = &keepalive,
.kib_ntx = &ntx,
- .kib_peercredits_hiw = &peer_credits_hiw,
.kib_default_ipif = &ipif_name,
.kib_retry_count = &retry_count,
.kib_rnr_retry_count = &rnr_retry_count,
- .kib_concurrent_sends = &concurrent_sends,
.kib_ib_mtu = &ib_mtu,
- .kib_map_on_demand = &map_on_demand,
- .kib_fmr_pool_size = &fmr_pool_size,
- .kib_fmr_flush_trigger = &fmr_flush_trigger,
- .kib_fmr_cache = &fmr_cache,
.kib_require_priv_port = &require_privileged_port,
.kib_use_priv_port = &use_privileged_port,
.kib_nscheds = &nscheds
int kiblnd_tunables_setup(struct lnet_ni *ni)
{
+ struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+
+ /*
+ * if there was no tunables specified, setup the tunables to be
+ * defaulted
+ */
+ if (!ni->ni_lnd_tunables) {
+ LIBCFS_ALLOC(ni->ni_lnd_tunables,
+ sizeof(*ni->ni_lnd_tunables));
+ if (!ni->ni_lnd_tunables)
+ return -ENOMEM;
+
+ memcpy(&ni->ni_lnd_tunables->lt_tun_u.lt_o2ib,
+ &default_tunables, sizeof(*tunables));
+ }
+ tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+
+ /* Current API version */
+ tunables->lnd_version = 0;
+
if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
*kiblnd_tunables.kib_ib_mtu);
if (ni->ni_peertxcredits > credits)
ni->ni_peertxcredits = credits;
- if (*kiblnd_tunables.kib_peercredits_hiw < ni->ni_peertxcredits / 2)
- *kiblnd_tunables.kib_peercredits_hiw = ni->ni_peertxcredits / 2;
+ if (!tunables->lnd_peercredits_hiw)
+ tunables->lnd_peercredits_hiw = peer_credits_hiw;
- if (*kiblnd_tunables.kib_peercredits_hiw >= ni->ni_peertxcredits)
- *kiblnd_tunables.kib_peercredits_hiw = ni->ni_peertxcredits - 1;
+ if (tunables->lnd_peercredits_hiw < ni->ni_peertxcredits / 2)
+ tunables->lnd_peercredits_hiw = ni->ni_peertxcredits / 2;
- if (*kiblnd_tunables.kib_map_on_demand < 0 ||
- *kiblnd_tunables.kib_map_on_demand > IBLND_MAX_RDMA_FRAGS)
- *kiblnd_tunables.kib_map_on_demand = 0; /* disable map-on-demand */
+ if (tunables->lnd_peercredits_hiw >= ni->ni_peertxcredits)
+ tunables->lnd_peercredits_hiw = ni->ni_peertxcredits - 1;
- if (*kiblnd_tunables.kib_map_on_demand == 1)
- *kiblnd_tunables.kib_map_on_demand = 2; /* don't make sense to create map if only one fragment */
+ if (tunables->lnd_map_on_demand < 0 ||
+ tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) {
+ /* disable map-on-demand */
+ tunables->lnd_map_on_demand = 0;
+ }
- if (!*kiblnd_tunables.kib_concurrent_sends) {
- if (*kiblnd_tunables.kib_map_on_demand > 0 &&
- *kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8)
- *kiblnd_tunables.kib_concurrent_sends = ni->ni_peertxcredits * 2;
- else
- *kiblnd_tunables.kib_concurrent_sends = ni->ni_peertxcredits;
+ if (tunables->lnd_map_on_demand == 1) {
+ /* don't make sense to create map if only one fragment */
+ tunables->lnd_map_on_demand = 2;
}
- if (*kiblnd_tunables.kib_concurrent_sends > ni->ni_peertxcredits * 2)
- *kiblnd_tunables.kib_concurrent_sends = ni->ni_peertxcredits * 2;
+ if (!tunables->lnd_concurrent_sends) {
+ if (tunables->lnd_map_on_demand > 0 &&
+ tunables->lnd_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) {
+ tunables->lnd_concurrent_sends =
+ ni->ni_peertxcredits * 2;
+ } else {
+ tunables->lnd_concurrent_sends = ni->ni_peertxcredits;
+ }
+ }
+
+ if (tunables->lnd_concurrent_sends > ni->ni_peertxcredits * 2)
+ tunables->lnd_concurrent_sends = ni->ni_peertxcredits * 2;
- if (*kiblnd_tunables.kib_concurrent_sends < ni->ni_peertxcredits / 2)
- *kiblnd_tunables.kib_concurrent_sends = ni->ni_peertxcredits / 2;
+ if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits / 2)
+ tunables->lnd_concurrent_sends = ni->ni_peertxcredits / 2;
- if (*kiblnd_tunables.kib_concurrent_sends < ni->ni_peertxcredits) {
+ if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits) {
CWARN("Concurrent sends %d is lower than message queue size: %d, performance may drop slightly.\n",
- *kiblnd_tunables.kib_concurrent_sends, ni->ni_peertxcredits);
+ tunables->lnd_concurrent_sends, ni->ni_peertxcredits);
}
+ if (!tunables->lnd_fmr_pool_size)
+ tunables->lnd_fmr_pool_size = fmr_pool_size;
+ if (!tunables->lnd_fmr_flush_trigger)
+ tunables->lnd_fmr_flush_trigger = fmr_flush_trigger;
+ if (!tunables->lnd_fmr_cache)
+ tunables->lnd_fmr_cache = fmr_cache;
+
return 0;
}