2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <linux/lockdep.h>
44 #include <rdma/ib_cache.h>
46 #include <linux/atomic.h>
48 #include <scsi/scsi.h>
49 #include <scsi/scsi_device.h>
50 #include <scsi/scsi_dbg.h>
51 #include <scsi/scsi_tcq.h>
53 #include <scsi/scsi_transport_srp.h>
57 #define DRV_NAME "ib_srp"
58 #define PFX DRV_NAME ": "
59 #define DRV_VERSION "2.0"
60 #define DRV_RELDATE "July 26, 2015"
62 MODULE_AUTHOR("Roland Dreier");
63 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
64 MODULE_LICENSE("Dual BSD/GPL");
65 MODULE_VERSION(DRV_VERSION);
66 MODULE_INFO(release_date, DRV_RELDATE);
68 #if !defined(CONFIG_DYNAMIC_DEBUG)
69 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
70 #define DYNAMIC_DEBUG_BRANCH(descriptor) false
73 static unsigned int srp_sg_tablesize;
74 static unsigned int cmd_sg_entries;
75 static unsigned int indirect_sg_entries;
76 static bool allow_ext_sg;
77 static bool prefer_fr = true;
78 static bool register_always = true;
79 static bool never_register;
80 static int topspin_workarounds = 1;
82 module_param(srp_sg_tablesize, uint, 0444);
83 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
85 module_param(cmd_sg_entries, uint, 0444);
86 MODULE_PARM_DESC(cmd_sg_entries,
87 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
89 module_param(indirect_sg_entries, uint, 0444);
90 MODULE_PARM_DESC(indirect_sg_entries,
91 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
93 module_param(allow_ext_sg, bool, 0444);
94 MODULE_PARM_DESC(allow_ext_sg,
95 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
97 module_param(topspin_workarounds, int, 0444);
98 MODULE_PARM_DESC(topspin_workarounds,
99 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
101 module_param(prefer_fr, bool, 0444);
102 MODULE_PARM_DESC(prefer_fr,
103 "Whether to use fast registration if both FMR and fast registration are supported");
105 module_param(register_always, bool, 0444);
106 MODULE_PARM_DESC(register_always,
107 "Use memory registration even for contiguous memory regions");
109 module_param(never_register, bool, 0444);
110 MODULE_PARM_DESC(never_register, "Never register memory");
112 static const struct kernel_param_ops srp_tmo_ops;
114 static int srp_reconnect_delay = 10;
115 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
117 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
119 static int srp_fast_io_fail_tmo = 15;
120 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
122 MODULE_PARM_DESC(fast_io_fail_tmo,
123 "Number of seconds between the observation of a transport"
124 " layer error and failing all I/O. \"off\" means that this"
125 " functionality is disabled.");
127 static int srp_dev_loss_tmo = 600;
128 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
130 MODULE_PARM_DESC(dev_loss_tmo,
131 "Maximum number of seconds that the SRP transport should"
132 " insulate transport layer errors. After this time has been"
133 " exceeded the SCSI host is removed. Should be"
134 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
135 " if fast_io_fail_tmo has not been set. \"off\" means that"
136 " this functionality is disabled.");
138 static unsigned ch_count;
139 module_param(ch_count, uint, 0444);
140 MODULE_PARM_DESC(ch_count,
141 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
143 static void srp_add_one(struct ib_device *device);
144 static void srp_remove_one(struct ib_device *device, void *client_data);
145 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
146 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
148 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
150 static struct scsi_transport_template *ib_srp_transport_template;
151 static struct workqueue_struct *srp_remove_wq;
153 static struct ib_client srp_client = {
156 .remove = srp_remove_one
159 static struct ib_sa_client srp_sa_client;
161 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
163 int tmo = *(int *)kp->arg;
166 return sprintf(buffer, "%d", tmo);
168 return sprintf(buffer, "off");
171 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
175 res = srp_parse_tmo(&tmo, val);
179 if (kp->arg == &srp_reconnect_delay)
180 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
182 else if (kp->arg == &srp_fast_io_fail_tmo)
183 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
185 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
189 *(int *)kp->arg = tmo;
195 static const struct kernel_param_ops srp_tmo_ops = {
200 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
202 return (struct srp_target_port *) host->hostdata;
205 static const char *srp_target_info(struct Scsi_Host *host)
207 return host_to_target(host)->target_name;
210 static int srp_target_is_topspin(struct srp_target_port *target)
212 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
213 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
215 return topspin_workarounds &&
216 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
217 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
220 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
222 enum dma_data_direction direction)
226 iu = kmalloc(sizeof *iu, gfp_mask);
230 iu->buf = kzalloc(size, gfp_mask);
234 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
236 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
240 iu->direction = direction;
252 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
257 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
263 static void srp_qp_event(struct ib_event *event, void *context)
265 pr_debug("QP event %s (%d)\n",
266 ib_event_msg(event->event), event->event);
269 static int srp_init_qp(struct srp_target_port *target,
272 struct ib_qp_attr *attr;
275 attr = kmalloc(sizeof *attr, GFP_KERNEL);
279 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
280 target->srp_host->port,
281 be16_to_cpu(target->pkey),
286 attr->qp_state = IB_QPS_INIT;
287 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
288 IB_ACCESS_REMOTE_WRITE);
289 attr->port_num = target->srp_host->port;
291 ret = ib_modify_qp(qp, attr,
302 static int srp_new_cm_id(struct srp_rdma_ch *ch)
304 struct srp_target_port *target = ch->target;
305 struct ib_cm_id *new_cm_id;
307 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
309 if (IS_ERR(new_cm_id))
310 return PTR_ERR(new_cm_id);
313 ib_destroy_cm_id(ch->cm_id);
314 ch->cm_id = new_cm_id;
315 if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
316 target->srp_host->port))
317 ch->path.rec_type = SA_PATH_REC_TYPE_OPA;
319 ch->path.rec_type = SA_PATH_REC_TYPE_IB;
320 ch->path.sgid = target->sgid;
321 ch->path.dgid = target->orig_dgid;
322 ch->path.pkey = target->pkey;
323 ch->path.service_id = target->service_id;
328 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
330 struct srp_device *dev = target->srp_host->srp_dev;
331 struct ib_fmr_pool_param fmr_param;
333 memset(&fmr_param, 0, sizeof(fmr_param));
334 fmr_param.pool_size = target->mr_pool_size;
335 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
337 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
338 fmr_param.page_shift = ilog2(dev->mr_page_size);
339 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
340 IB_ACCESS_REMOTE_WRITE |
341 IB_ACCESS_REMOTE_READ);
343 return ib_create_fmr_pool(dev->pd, &fmr_param);
347 * srp_destroy_fr_pool() - free the resources owned by a pool
348 * @pool: Fast registration pool to be destroyed.
350 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
353 struct srp_fr_desc *d;
358 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
366 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
367 * @device: IB device to allocate fast registration descriptors for.
368 * @pd: Protection domain associated with the FR descriptors.
369 * @pool_size: Number of descriptors to allocate.
370 * @max_page_list_len: Maximum fast registration work request page list length.
372 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
373 struct ib_pd *pd, int pool_size,
374 int max_page_list_len)
376 struct srp_fr_pool *pool;
377 struct srp_fr_desc *d;
379 int i, ret = -EINVAL;
384 pool = kzalloc(sizeof(struct srp_fr_pool) +
385 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
388 pool->size = pool_size;
389 pool->max_page_list_len = max_page_list_len;
390 spin_lock_init(&pool->lock);
391 INIT_LIST_HEAD(&pool->free_list);
393 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
394 mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
399 pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
400 dev_name(&device->dev));
404 list_add_tail(&d->entry, &pool->free_list);
411 srp_destroy_fr_pool(pool);
419 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
420 * @pool: Pool to obtain descriptor from.
422 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
424 struct srp_fr_desc *d = NULL;
427 spin_lock_irqsave(&pool->lock, flags);
428 if (!list_empty(&pool->free_list)) {
429 d = list_first_entry(&pool->free_list, typeof(*d), entry);
432 spin_unlock_irqrestore(&pool->lock, flags);
438 * srp_fr_pool_put() - put an FR descriptor back in the free list
439 * @pool: Pool the descriptor was allocated from.
440 * @desc: Pointer to an array of fast registration descriptor pointers.
441 * @n: Number of descriptors to put back.
443 * Note: The caller must already have queued an invalidation request for
444 * desc->mr->rkey before calling this function.
446 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
452 spin_lock_irqsave(&pool->lock, flags);
453 for (i = 0; i < n; i++)
454 list_add(&desc[i]->entry, &pool->free_list);
455 spin_unlock_irqrestore(&pool->lock, flags);
458 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
460 struct srp_device *dev = target->srp_host->srp_dev;
462 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
463 dev->max_pages_per_mr);
467 * srp_destroy_qp() - destroy an RDMA queue pair
468 * @qp: RDMA queue pair.
470 * Drain the qp before destroying it. This avoids that the receive
471 * completion handler can access the queue pair while it is
474 static void srp_destroy_qp(struct srp_rdma_ch *ch, struct ib_qp *qp)
476 spin_lock_irq(&ch->lock);
477 ib_process_cq_direct(ch->send_cq, -1);
478 spin_unlock_irq(&ch->lock);
484 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
486 struct srp_target_port *target = ch->target;
487 struct srp_device *dev = target->srp_host->srp_dev;
488 struct ib_qp_init_attr *init_attr;
489 struct ib_cq *recv_cq, *send_cq;
491 struct ib_fmr_pool *fmr_pool = NULL;
492 struct srp_fr_pool *fr_pool = NULL;
493 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
496 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
500 /* queue_size + 1 for ib_drain_rq() */
501 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
502 ch->comp_vector, IB_POLL_SOFTIRQ);
503 if (IS_ERR(recv_cq)) {
504 ret = PTR_ERR(recv_cq);
508 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
509 ch->comp_vector, IB_POLL_DIRECT);
510 if (IS_ERR(send_cq)) {
511 ret = PTR_ERR(send_cq);
515 init_attr->event_handler = srp_qp_event;
516 init_attr->cap.max_send_wr = m * target->queue_size;
517 init_attr->cap.max_recv_wr = target->queue_size + 1;
518 init_attr->cap.max_recv_sge = 1;
519 init_attr->cap.max_send_sge = 1;
520 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
521 init_attr->qp_type = IB_QPT_RC;
522 init_attr->send_cq = send_cq;
523 init_attr->recv_cq = recv_cq;
525 qp = ib_create_qp(dev->pd, init_attr);
531 ret = srp_init_qp(target, qp);
535 if (dev->use_fast_reg) {
536 fr_pool = srp_alloc_fr_pool(target);
537 if (IS_ERR(fr_pool)) {
538 ret = PTR_ERR(fr_pool);
539 shost_printk(KERN_WARNING, target->scsi_host, PFX
540 "FR pool allocation failed (%d)\n", ret);
543 } else if (dev->use_fmr) {
544 fmr_pool = srp_alloc_fmr_pool(target);
545 if (IS_ERR(fmr_pool)) {
546 ret = PTR_ERR(fmr_pool);
547 shost_printk(KERN_WARNING, target->scsi_host, PFX
548 "FMR pool allocation failed (%d)\n", ret);
554 srp_destroy_qp(ch, ch->qp);
556 ib_free_cq(ch->recv_cq);
558 ib_free_cq(ch->send_cq);
561 ch->recv_cq = recv_cq;
562 ch->send_cq = send_cq;
564 if (dev->use_fast_reg) {
566 srp_destroy_fr_pool(ch->fr_pool);
567 ch->fr_pool = fr_pool;
568 } else if (dev->use_fmr) {
570 ib_destroy_fmr_pool(ch->fmr_pool);
571 ch->fmr_pool = fmr_pool;
592 * Note: this function may be called without srp_alloc_iu_bufs() having been
593 * invoked. Hence the ch->[rt]x_ring checks.
595 static void srp_free_ch_ib(struct srp_target_port *target,
596 struct srp_rdma_ch *ch)
598 struct srp_device *dev = target->srp_host->srp_dev;
605 ib_destroy_cm_id(ch->cm_id);
609 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
613 if (dev->use_fast_reg) {
615 srp_destroy_fr_pool(ch->fr_pool);
616 } else if (dev->use_fmr) {
618 ib_destroy_fmr_pool(ch->fmr_pool);
621 srp_destroy_qp(ch, ch->qp);
622 ib_free_cq(ch->send_cq);
623 ib_free_cq(ch->recv_cq);
626 * Avoid that the SCSI error handler tries to use this channel after
627 * it has been freed. The SCSI error handler can namely continue
628 * trying to perform recovery actions after scsi_remove_host()
634 ch->send_cq = ch->recv_cq = NULL;
637 for (i = 0; i < target->queue_size; ++i)
638 srp_free_iu(target->srp_host, ch->rx_ring[i]);
643 for (i = 0; i < target->queue_size; ++i)
644 srp_free_iu(target->srp_host, ch->tx_ring[i]);
650 static void srp_path_rec_completion(int status,
651 struct sa_path_rec *pathrec,
654 struct srp_rdma_ch *ch = ch_ptr;
655 struct srp_target_port *target = ch->target;
659 shost_printk(KERN_ERR, target->scsi_host,
660 PFX "Got failed path rec status %d\n", status);
666 static int srp_lookup_path(struct srp_rdma_ch *ch)
668 struct srp_target_port *target = ch->target;
671 ch->path.numb_path = 1;
673 init_completion(&ch->done);
675 ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
676 target->srp_host->srp_dev->dev,
677 target->srp_host->port,
679 IB_SA_PATH_REC_SERVICE_ID |
680 IB_SA_PATH_REC_DGID |
681 IB_SA_PATH_REC_SGID |
682 IB_SA_PATH_REC_NUMB_PATH |
684 SRP_PATH_REC_TIMEOUT_MS,
686 srp_path_rec_completion,
687 ch, &ch->path_query);
688 if (ch->path_query_id < 0)
689 return ch->path_query_id;
691 ret = wait_for_completion_interruptible(&ch->done);
696 shost_printk(KERN_WARNING, target->scsi_host,
697 PFX "Path record query failed\n");
702 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
704 struct srp_target_port *target = ch->target;
706 struct ib_cm_req_param param;
707 struct srp_login_req priv;
711 req = kzalloc(sizeof *req, GFP_KERNEL);
715 req->param.primary_path = &ch->path;
716 req->param.alternate_path = NULL;
717 req->param.service_id = target->service_id;
718 req->param.qp_num = ch->qp->qp_num;
719 req->param.qp_type = ch->qp->qp_type;
720 req->param.private_data = &req->priv;
721 req->param.private_data_len = sizeof req->priv;
722 req->param.flow_control = 1;
724 get_random_bytes(&req->param.starting_psn, 4);
725 req->param.starting_psn &= 0xffffff;
728 * Pick some arbitrary defaults here; we could make these
729 * module parameters if anyone cared about setting them.
731 req->param.responder_resources = 4;
732 req->param.remote_cm_response_timeout = 20;
733 req->param.local_cm_response_timeout = 20;
734 req->param.retry_count = target->tl_retry_count;
735 req->param.rnr_retry_count = 7;
736 req->param.max_cm_retries = 15;
738 req->priv.opcode = SRP_LOGIN_REQ;
740 req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
741 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
742 SRP_BUF_FORMAT_INDIRECT);
743 req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI :
744 SRP_MULTICHAN_SINGLE);
746 * In the published SRP specification (draft rev. 16a), the
747 * port identifier format is 8 bytes of ID extension followed
748 * by 8 bytes of GUID. Older drafts put the two halves in the
749 * opposite order, so that the GUID comes first.
751 * Targets conforming to these obsolete drafts can be
752 * recognized by the I/O Class they report.
754 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
755 memcpy(req->priv.initiator_port_id,
756 &target->sgid.global.interface_id, 8);
757 memcpy(req->priv.initiator_port_id + 8,
758 &target->initiator_ext, 8);
759 memcpy(req->priv.target_port_id, &target->ioc_guid, 8);
760 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
762 memcpy(req->priv.initiator_port_id,
763 &target->initiator_ext, 8);
764 memcpy(req->priv.initiator_port_id + 8,
765 &target->sgid.global.interface_id, 8);
766 memcpy(req->priv.target_port_id, &target->id_ext, 8);
767 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
771 * Topspin/Cisco SRP targets will reject our login unless we
772 * zero out the first 8 bytes of our initiator port ID and set
773 * the second 8 bytes to the local node GUID.
775 if (srp_target_is_topspin(target)) {
776 shost_printk(KERN_DEBUG, target->scsi_host,
777 PFX "Topspin/Cisco initiator port ID workaround "
778 "activated for target GUID %016llx\n",
779 be64_to_cpu(target->ioc_guid));
780 memset(req->priv.initiator_port_id, 0, 8);
781 memcpy(req->priv.initiator_port_id + 8,
782 &target->srp_host->srp_dev->dev->node_guid, 8);
785 status = ib_send_cm_req(ch->cm_id, &req->param);
792 static bool srp_queue_remove_work(struct srp_target_port *target)
794 bool changed = false;
796 spin_lock_irq(&target->lock);
797 if (target->state != SRP_TARGET_REMOVED) {
798 target->state = SRP_TARGET_REMOVED;
801 spin_unlock_irq(&target->lock);
804 queue_work(srp_remove_wq, &target->remove_work);
809 static void srp_disconnect_target(struct srp_target_port *target)
811 struct srp_rdma_ch *ch;
814 /* XXX should send SRP_I_LOGOUT request */
816 for (i = 0; i < target->ch_count; i++) {
818 ch->connected = false;
819 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
820 shost_printk(KERN_DEBUG, target->scsi_host,
821 PFX "Sending CM DREQ failed\n");
826 static void srp_free_req_data(struct srp_target_port *target,
827 struct srp_rdma_ch *ch)
829 struct srp_device *dev = target->srp_host->srp_dev;
830 struct ib_device *ibdev = dev->dev;
831 struct srp_request *req;
837 for (i = 0; i < target->req_ring_size; ++i) {
838 req = &ch->req_ring[i];
839 if (dev->use_fast_reg) {
842 kfree(req->fmr_list);
843 kfree(req->map_page);
845 if (req->indirect_dma_addr) {
846 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
847 target->indirect_size,
850 kfree(req->indirect_desc);
857 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
859 struct srp_target_port *target = ch->target;
860 struct srp_device *srp_dev = target->srp_host->srp_dev;
861 struct ib_device *ibdev = srp_dev->dev;
862 struct srp_request *req;
865 int i, ret = -ENOMEM;
867 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
872 for (i = 0; i < target->req_ring_size; ++i) {
873 req = &ch->req_ring[i];
874 mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
878 if (srp_dev->use_fast_reg) {
879 req->fr_list = mr_list;
881 req->fmr_list = mr_list;
882 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
883 sizeof(void *), GFP_KERNEL);
887 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
888 if (!req->indirect_desc)
891 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
892 target->indirect_size,
894 if (ib_dma_mapping_error(ibdev, dma_addr))
897 req->indirect_dma_addr = dma_addr;
906 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
907 * @shost: SCSI host whose attributes to remove from sysfs.
909 * Note: Any attributes defined in the host template and that did not exist
910 * before invocation of this function will be ignored.
912 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
914 struct device_attribute **attr;
916 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
917 device_remove_file(&shost->shost_dev, *attr);
920 static void srp_remove_target(struct srp_target_port *target)
922 struct srp_rdma_ch *ch;
925 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
927 srp_del_scsi_host_attr(target->scsi_host);
928 srp_rport_get(target->rport);
929 srp_remove_host(target->scsi_host);
930 scsi_remove_host(target->scsi_host);
931 srp_stop_rport_timers(target->rport);
932 srp_disconnect_target(target);
933 for (i = 0; i < target->ch_count; i++) {
935 srp_free_ch_ib(target, ch);
937 cancel_work_sync(&target->tl_err_work);
938 srp_rport_put(target->rport);
939 for (i = 0; i < target->ch_count; i++) {
941 srp_free_req_data(target, ch);
946 spin_lock(&target->srp_host->target_lock);
947 list_del(&target->list);
948 spin_unlock(&target->srp_host->target_lock);
950 scsi_host_put(target->scsi_host);
953 static void srp_remove_work(struct work_struct *work)
955 struct srp_target_port *target =
956 container_of(work, struct srp_target_port, remove_work);
958 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
960 srp_remove_target(target);
963 static void srp_rport_delete(struct srp_rport *rport)
965 struct srp_target_port *target = rport->lld_data;
967 srp_queue_remove_work(target);
971 * srp_connected_ch() - number of connected channels
972 * @target: SRP target port.
974 static int srp_connected_ch(struct srp_target_port *target)
978 for (i = 0; i < target->ch_count; i++)
979 c += target->ch[i].connected;
984 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
986 struct srp_target_port *target = ch->target;
989 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
991 ret = srp_lookup_path(ch);
996 init_completion(&ch->done);
997 ret = srp_send_req(ch, multich);
1000 ret = wait_for_completion_interruptible(&ch->done);
1005 * The CM event handling code will set status to
1006 * SRP_PORT_REDIRECT if we get a port redirect REJ
1007 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1008 * redirect REJ back.
1013 ch->connected = true;
1016 case SRP_PORT_REDIRECT:
1017 ret = srp_lookup_path(ch);
1022 case SRP_DLID_REDIRECT:
1025 case SRP_STALE_CONN:
1026 shost_printk(KERN_ERR, target->scsi_host, PFX
1027 "giving up on stale connection\n");
1037 return ret <= 0 ? ret : -ENODEV;
1040 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1042 srp_handle_qp_err(cq, wc, "INV RKEY");
1045 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1048 struct ib_send_wr *bad_wr;
1049 struct ib_send_wr wr = {
1050 .opcode = IB_WR_LOCAL_INV,
1054 .ex.invalidate_rkey = rkey,
1057 wr.wr_cqe = &req->reg_cqe;
1058 req->reg_cqe.done = srp_inv_rkey_err_done;
1059 return ib_post_send(ch->qp, &wr, &bad_wr);
1062 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1063 struct srp_rdma_ch *ch,
1064 struct srp_request *req)
1066 struct srp_target_port *target = ch->target;
1067 struct srp_device *dev = target->srp_host->srp_dev;
1068 struct ib_device *ibdev = dev->dev;
1071 if (!scsi_sglist(scmnd) ||
1072 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1073 scmnd->sc_data_direction != DMA_FROM_DEVICE))
1076 if (dev->use_fast_reg) {
1077 struct srp_fr_desc **pfr;
1079 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1080 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1082 shost_printk(KERN_ERR, target->scsi_host, PFX
1083 "Queueing INV WR for rkey %#x failed (%d)\n",
1084 (*pfr)->mr->rkey, res);
1085 queue_work(system_long_wq,
1086 &target->tl_err_work);
1090 srp_fr_pool_put(ch->fr_pool, req->fr_list,
1092 } else if (dev->use_fmr) {
1093 struct ib_pool_fmr **pfmr;
1095 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1096 ib_fmr_pool_unmap(*pfmr);
1099 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1100 scmnd->sc_data_direction);
1104 * srp_claim_req - Take ownership of the scmnd associated with a request.
1105 * @ch: SRP RDMA channel.
1106 * @req: SRP request.
1107 * @sdev: If not NULL, only take ownership for this SCSI device.
1108 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1109 * ownership of @req->scmnd if it equals @scmnd.
1112 * Either NULL or a pointer to the SCSI command the caller became owner of.
1114 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1115 struct srp_request *req,
1116 struct scsi_device *sdev,
1117 struct scsi_cmnd *scmnd)
1119 unsigned long flags;
1121 spin_lock_irqsave(&ch->lock, flags);
1123 (!sdev || req->scmnd->device == sdev) &&
1124 (!scmnd || req->scmnd == scmnd)) {
1130 spin_unlock_irqrestore(&ch->lock, flags);
1136 * srp_free_req() - Unmap data and adjust ch->req_lim.
1137 * @ch: SRP RDMA channel.
1138 * @req: Request to be freed.
1139 * @scmnd: SCSI command associated with @req.
1140 * @req_lim_delta: Amount to be added to @target->req_lim.
1142 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1143 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1145 unsigned long flags;
1147 srp_unmap_data(scmnd, ch, req);
1149 spin_lock_irqsave(&ch->lock, flags);
1150 ch->req_lim += req_lim_delta;
1151 spin_unlock_irqrestore(&ch->lock, flags);
1154 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1155 struct scsi_device *sdev, int result)
1157 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1160 srp_free_req(ch, req, scmnd, 0);
1161 scmnd->result = result;
1162 scmnd->scsi_done(scmnd);
1166 static void srp_terminate_io(struct srp_rport *rport)
1168 struct srp_target_port *target = rport->lld_data;
1169 struct srp_rdma_ch *ch;
1170 struct Scsi_Host *shost = target->scsi_host;
1171 struct scsi_device *sdev;
1175 * Invoking srp_terminate_io() while srp_queuecommand() is running
1176 * is not safe. Hence the warning statement below.
1178 shost_for_each_device(sdev, shost)
1179 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1181 for (i = 0; i < target->ch_count; i++) {
1182 ch = &target->ch[i];
1184 for (j = 0; j < target->req_ring_size; ++j) {
1185 struct srp_request *req = &ch->req_ring[j];
1187 srp_finish_req(ch, req, NULL,
1188 DID_TRANSPORT_FAILFAST << 16);
1194 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1195 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1196 * srp_reset_device() or srp_reset_host() calls will occur while this function
1197 * is in progress. One way to realize that is not to call this function
1198 * directly but to call srp_reconnect_rport() instead since that last function
1199 * serializes calls of this function via rport->mutex and also blocks
1200 * srp_queuecommand() calls before invoking this function.
1202 static int srp_rport_reconnect(struct srp_rport *rport)
1204 struct srp_target_port *target = rport->lld_data;
1205 struct srp_rdma_ch *ch;
1207 bool multich = false;
1209 srp_disconnect_target(target);
1211 if (target->state == SRP_TARGET_SCANNING)
1215 * Now get a new local CM ID so that we avoid confusing the target in
1216 * case things are really fouled up. Doing so also ensures that all CM
1217 * callbacks will have finished before a new QP is allocated.
1219 for (i = 0; i < target->ch_count; i++) {
1220 ch = &target->ch[i];
1221 ret += srp_new_cm_id(ch);
1223 for (i = 0; i < target->ch_count; i++) {
1224 ch = &target->ch[i];
1225 for (j = 0; j < target->req_ring_size; ++j) {
1226 struct srp_request *req = &ch->req_ring[j];
1228 srp_finish_req(ch, req, NULL, DID_RESET << 16);
1231 for (i = 0; i < target->ch_count; i++) {
1232 ch = &target->ch[i];
1234 * Whether or not creating a new CM ID succeeded, create a new
1235 * QP. This guarantees that all completion callback function
1236 * invocations have finished before request resetting starts.
1238 ret += srp_create_ch_ib(ch);
1240 INIT_LIST_HEAD(&ch->free_tx);
1241 for (j = 0; j < target->queue_size; ++j)
1242 list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1245 target->qp_in_error = false;
1247 for (i = 0; i < target->ch_count; i++) {
1248 ch = &target->ch[i];
1251 ret = srp_connect_ch(ch, multich);
1256 shost_printk(KERN_INFO, target->scsi_host,
1257 PFX "reconnect succeeded\n");
1262 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1263 unsigned int dma_len, u32 rkey)
1265 struct srp_direct_buf *desc = state->desc;
1267 WARN_ON_ONCE(!dma_len);
1269 desc->va = cpu_to_be64(dma_addr);
1270 desc->key = cpu_to_be32(rkey);
1271 desc->len = cpu_to_be32(dma_len);
1273 state->total_len += dma_len;
1278 static int srp_map_finish_fmr(struct srp_map_state *state,
1279 struct srp_rdma_ch *ch)
1281 struct srp_target_port *target = ch->target;
1282 struct srp_device *dev = target->srp_host->srp_dev;
1283 struct ib_pd *pd = target->pd;
1284 struct ib_pool_fmr *fmr;
1287 if (state->fmr.next >= state->fmr.end) {
1288 shost_printk(KERN_ERR, ch->target->scsi_host,
1289 PFX "Out of MRs (mr_per_cmd = %d)\n",
1290 ch->target->mr_per_cmd);
1294 WARN_ON_ONCE(!dev->use_fmr);
1296 if (state->npages == 0)
1299 if (state->npages == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1300 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1301 pd->unsafe_global_rkey);
1305 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1306 state->npages, io_addr);
1308 return PTR_ERR(fmr);
1310 *state->fmr.next++ = fmr;
1313 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1314 state->dma_len, fmr->fmr->rkey);
1323 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1325 srp_handle_qp_err(cq, wc, "FAST REG");
1329 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1330 * where to start in the first element. If sg_offset_p != NULL then
1331 * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1332 * byte that has not yet been mapped.
1334 static int srp_map_finish_fr(struct srp_map_state *state,
1335 struct srp_request *req,
1336 struct srp_rdma_ch *ch, int sg_nents,
1337 unsigned int *sg_offset_p)
1339 struct srp_target_port *target = ch->target;
1340 struct srp_device *dev = target->srp_host->srp_dev;
1341 struct ib_pd *pd = target->pd;
1342 struct ib_send_wr *bad_wr;
1343 struct ib_reg_wr wr;
1344 struct srp_fr_desc *desc;
1348 if (state->fr.next >= state->fr.end) {
1349 shost_printk(KERN_ERR, ch->target->scsi_host,
1350 PFX "Out of MRs (mr_per_cmd = %d)\n",
1351 ch->target->mr_per_cmd);
1355 WARN_ON_ONCE(!dev->use_fast_reg);
1357 if (sg_nents == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1358 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1360 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1361 sg_dma_len(state->sg) - sg_offset,
1362 pd->unsafe_global_rkey);
1368 desc = srp_fr_pool_get(ch->fr_pool);
1372 rkey = ib_inc_rkey(desc->mr->rkey);
1373 ib_update_fast_reg_key(desc->mr, rkey);
1375 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1377 if (unlikely(n < 0)) {
1378 srp_fr_pool_put(ch->fr_pool, &desc, 1);
1379 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1380 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1381 sg_offset_p ? *sg_offset_p : -1, n);
1385 WARN_ON_ONCE(desc->mr->length == 0);
1387 req->reg_cqe.done = srp_reg_mr_err_done;
1390 wr.wr.opcode = IB_WR_REG_MR;
1391 wr.wr.wr_cqe = &req->reg_cqe;
1393 wr.wr.send_flags = 0;
1395 wr.key = desc->mr->rkey;
1396 wr.access = (IB_ACCESS_LOCAL_WRITE |
1397 IB_ACCESS_REMOTE_READ |
1398 IB_ACCESS_REMOTE_WRITE);
1400 *state->fr.next++ = desc;
1403 srp_map_desc(state, desc->mr->iova,
1404 desc->mr->length, desc->mr->rkey);
1406 err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1407 if (unlikely(err)) {
1408 WARN_ON_ONCE(err == -ENOMEM);
1415 static int srp_map_sg_entry(struct srp_map_state *state,
1416 struct srp_rdma_ch *ch,
1417 struct scatterlist *sg)
1419 struct srp_target_port *target = ch->target;
1420 struct srp_device *dev = target->srp_host->srp_dev;
1421 struct ib_device *ibdev = dev->dev;
1422 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1423 unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1424 unsigned int len = 0;
1427 WARN_ON_ONCE(!dma_len);
1430 unsigned offset = dma_addr & ~dev->mr_page_mask;
1432 if (state->npages == dev->max_pages_per_mr ||
1433 (state->npages > 0 && offset != 0)) {
1434 ret = srp_map_finish_fmr(state, ch);
1439 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1442 state->base_dma_addr = dma_addr;
1443 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1444 state->dma_len += len;
1450 * If the end of the MR is not on a page boundary then we need to
1451 * close it out and start a new one -- we can only merge at page
1455 if ((dma_addr & ~dev->mr_page_mask) != 0)
1456 ret = srp_map_finish_fmr(state, ch);
1460 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1461 struct srp_request *req, struct scatterlist *scat,
1464 struct scatterlist *sg;
1467 state->pages = req->map_page;
1468 state->fmr.next = req->fmr_list;
1469 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1471 for_each_sg(scat, sg, count, i) {
1472 ret = srp_map_sg_entry(state, ch, sg);
1477 ret = srp_map_finish_fmr(state, ch);
1484 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1485 struct srp_request *req, struct scatterlist *scat,
1488 unsigned int sg_offset = 0;
1490 state->fr.next = req->fr_list;
1491 state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1500 n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1501 if (unlikely(n < 0))
1505 for (i = 0; i < n; i++)
1506 state->sg = sg_next(state->sg);
1512 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1513 struct srp_request *req, struct scatterlist *scat,
1516 struct srp_target_port *target = ch->target;
1517 struct srp_device *dev = target->srp_host->srp_dev;
1518 struct scatterlist *sg;
1521 for_each_sg(scat, sg, count, i) {
1522 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1523 ib_sg_dma_len(dev->dev, sg),
1524 target->pd->unsafe_global_rkey);
1531 * Register the indirect data buffer descriptor with the HCA.
1533 * Note: since the indirect data buffer descriptor has been allocated with
1534 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1537 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1538 void **next_mr, void **end_mr, u32 idb_len,
1541 struct srp_target_port *target = ch->target;
1542 struct srp_device *dev = target->srp_host->srp_dev;
1543 struct srp_map_state state;
1544 struct srp_direct_buf idb_desc;
1546 struct scatterlist idb_sg[1];
1549 memset(&state, 0, sizeof(state));
1550 memset(&idb_desc, 0, sizeof(idb_desc));
1551 state.gen.next = next_mr;
1552 state.gen.end = end_mr;
1553 state.desc = &idb_desc;
1554 state.base_dma_addr = req->indirect_dma_addr;
1555 state.dma_len = idb_len;
1557 if (dev->use_fast_reg) {
1559 sg_init_one(idb_sg, req->indirect_desc, idb_len);
1560 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1561 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1562 idb_sg->dma_length = idb_sg->length; /* hack^2 */
1564 ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1567 WARN_ON_ONCE(ret < 1);
1568 } else if (dev->use_fmr) {
1569 state.pages = idb_pages;
1570 state.pages[0] = (req->indirect_dma_addr &
1573 ret = srp_map_finish_fmr(&state, ch);
1580 *idb_rkey = idb_desc.key;
1585 static void srp_check_mapping(struct srp_map_state *state,
1586 struct srp_rdma_ch *ch, struct srp_request *req,
1587 struct scatterlist *scat, int count)
1589 struct srp_device *dev = ch->target->srp_host->srp_dev;
1590 struct srp_fr_desc **pfr;
1591 u64 desc_len = 0, mr_len = 0;
1594 for (i = 0; i < state->ndesc; i++)
1595 desc_len += be32_to_cpu(req->indirect_desc[i].len);
1596 if (dev->use_fast_reg)
1597 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1598 mr_len += (*pfr)->mr->length;
1599 else if (dev->use_fmr)
1600 for (i = 0; i < state->nmdesc; i++)
1601 mr_len += be32_to_cpu(req->indirect_desc[i].len);
1602 if (desc_len != scsi_bufflen(req->scmnd) ||
1603 mr_len > scsi_bufflen(req->scmnd))
1604 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1605 scsi_bufflen(req->scmnd), desc_len, mr_len,
1606 state->ndesc, state->nmdesc);
1610 * srp_map_data() - map SCSI data buffer onto an SRP request
1611 * @scmnd: SCSI command to map
1612 * @ch: SRP RDMA channel
1615 * Returns the length in bytes of the SRP_CMD IU or a negative value if
1618 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1619 struct srp_request *req)
1621 struct srp_target_port *target = ch->target;
1622 struct ib_pd *pd = target->pd;
1623 struct scatterlist *scat;
1624 struct srp_cmd *cmd = req->cmd->buf;
1625 int len, nents, count, ret;
1626 struct srp_device *dev;
1627 struct ib_device *ibdev;
1628 struct srp_map_state state;
1629 struct srp_indirect_buf *indirect_hdr;
1630 u32 idb_len, table_len;
1634 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1635 return sizeof (struct srp_cmd);
1637 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1638 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1639 shost_printk(KERN_WARNING, target->scsi_host,
1640 PFX "Unhandled data direction %d\n",
1641 scmnd->sc_data_direction);
1645 nents = scsi_sg_count(scmnd);
1646 scat = scsi_sglist(scmnd);
1648 dev = target->srp_host->srp_dev;
1651 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1652 if (unlikely(count == 0))
1655 fmt = SRP_DATA_DESC_DIRECT;
1656 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1658 if (count == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1660 * The midlayer only generated a single gather/scatter
1661 * entry, or DMA mapping coalesced everything to a
1662 * single entry. So a direct descriptor along with
1663 * the DMA MR suffices.
1665 struct srp_direct_buf *buf = (void *) cmd->add_data;
1667 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1668 buf->key = cpu_to_be32(pd->unsafe_global_rkey);
1669 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1676 * We have more than one scatter/gather entry, so build our indirect
1677 * descriptor table, trying to merge as many entries as we can.
1679 indirect_hdr = (void *) cmd->add_data;
1681 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1682 target->indirect_size, DMA_TO_DEVICE);
1684 memset(&state, 0, sizeof(state));
1685 state.desc = req->indirect_desc;
1686 if (dev->use_fast_reg)
1687 ret = srp_map_sg_fr(&state, ch, req, scat, count);
1688 else if (dev->use_fmr)
1689 ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1691 ret = srp_map_sg_dma(&state, ch, req, scat, count);
1692 req->nmdesc = state.nmdesc;
1697 DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1698 "Memory mapping consistency check");
1699 if (DYNAMIC_DEBUG_BRANCH(ddm))
1700 srp_check_mapping(&state, ch, req, scat, count);
1703 /* We've mapped the request, now pull as much of the indirect
1704 * descriptor table as we can into the command buffer. If this
1705 * target is not using an external indirect table, we are
1706 * guaranteed to fit into the command, as the SCSI layer won't
1707 * give us more S/G entries than we allow.
1709 if (state.ndesc == 1) {
1711 * Memory registration collapsed the sg-list into one entry,
1712 * so use a direct descriptor.
1714 struct srp_direct_buf *buf = (void *) cmd->add_data;
1716 *buf = req->indirect_desc[0];
1720 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1721 !target->allow_ext_sg)) {
1722 shost_printk(KERN_ERR, target->scsi_host,
1723 "Could not fit S/G list into SRP_CMD\n");
1728 count = min(state.ndesc, target->cmd_sg_cnt);
1729 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1730 idb_len = sizeof(struct srp_indirect_buf) + table_len;
1732 fmt = SRP_DATA_DESC_INDIRECT;
1733 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1734 len += count * sizeof (struct srp_direct_buf);
1736 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1737 count * sizeof (struct srp_direct_buf));
1739 if (!(pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1740 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1741 idb_len, &idb_rkey);
1746 idb_rkey = cpu_to_be32(pd->unsafe_global_rkey);
1749 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1750 indirect_hdr->table_desc.key = idb_rkey;
1751 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1752 indirect_hdr->len = cpu_to_be32(state.total_len);
1754 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1755 cmd->data_out_desc_cnt = count;
1757 cmd->data_in_desc_cnt = count;
1759 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1763 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1764 cmd->buf_fmt = fmt << 4;
1771 srp_unmap_data(scmnd, ch, req);
1772 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1778 * Return an IU and possible credit to the free pool
1780 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1781 enum srp_iu_type iu_type)
1783 unsigned long flags;
1785 spin_lock_irqsave(&ch->lock, flags);
1786 list_add(&iu->list, &ch->free_tx);
1787 if (iu_type != SRP_IU_RSP)
1789 spin_unlock_irqrestore(&ch->lock, flags);
1793 * Must be called with ch->lock held to protect req_lim and free_tx.
1794 * If IU is not sent, it must be returned using srp_put_tx_iu().
1797 * An upper limit for the number of allocated information units for each
1799 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1800 * more than Scsi_Host.can_queue requests.
1801 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1802 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1803 * one unanswered SRP request to an initiator.
1805 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1806 enum srp_iu_type iu_type)
1808 struct srp_target_port *target = ch->target;
1809 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1812 lockdep_assert_held(&ch->lock);
1814 ib_process_cq_direct(ch->send_cq, -1);
1816 if (list_empty(&ch->free_tx))
1819 /* Initiator responses to target requests do not consume credits */
1820 if (iu_type != SRP_IU_RSP) {
1821 if (ch->req_lim <= rsv) {
1822 ++target->zero_req_lim;
1829 iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1830 list_del(&iu->list);
1835 * Note: if this function is called from inside ib_drain_sq() then it will
1836 * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
1837 * with status IB_WC_SUCCESS then that's a bug.
1839 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
1841 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1842 struct srp_rdma_ch *ch = cq->cq_context;
1844 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1845 srp_handle_qp_err(cq, wc, "SEND");
1849 lockdep_assert_held(&ch->lock);
1851 list_add(&iu->list, &ch->free_tx);
1854 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1856 struct srp_target_port *target = ch->target;
1858 struct ib_send_wr wr, *bad_wr;
1860 list.addr = iu->dma;
1862 list.lkey = target->lkey;
1864 iu->cqe.done = srp_send_done;
1867 wr.wr_cqe = &iu->cqe;
1870 wr.opcode = IB_WR_SEND;
1871 wr.send_flags = IB_SEND_SIGNALED;
1873 return ib_post_send(ch->qp, &wr, &bad_wr);
1876 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1878 struct srp_target_port *target = ch->target;
1879 struct ib_recv_wr wr, *bad_wr;
1882 list.addr = iu->dma;
1883 list.length = iu->size;
1884 list.lkey = target->lkey;
1886 iu->cqe.done = srp_recv_done;
1889 wr.wr_cqe = &iu->cqe;
1893 return ib_post_recv(ch->qp, &wr, &bad_wr);
1896 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1898 struct srp_target_port *target = ch->target;
1899 struct srp_request *req;
1900 struct scsi_cmnd *scmnd;
1901 unsigned long flags;
1903 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1904 spin_lock_irqsave(&ch->lock, flags);
1905 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1906 if (rsp->tag == ch->tsk_mgmt_tag) {
1907 ch->tsk_mgmt_status = -1;
1908 if (be32_to_cpu(rsp->resp_data_len) >= 4)
1909 ch->tsk_mgmt_status = rsp->data[3];
1910 complete(&ch->tsk_mgmt_done);
1912 shost_printk(KERN_ERR, target->scsi_host,
1913 "Received tsk mgmt response too late for tag %#llx\n",
1916 spin_unlock_irqrestore(&ch->lock, flags);
1918 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1919 if (scmnd && scmnd->host_scribble) {
1920 req = (void *)scmnd->host_scribble;
1921 scmnd = srp_claim_req(ch, req, NULL, scmnd);
1926 shost_printk(KERN_ERR, target->scsi_host,
1927 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1928 rsp->tag, ch - target->ch, ch->qp->qp_num);
1930 spin_lock_irqsave(&ch->lock, flags);
1931 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1932 spin_unlock_irqrestore(&ch->lock, flags);
1936 scmnd->result = rsp->status;
1938 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1939 memcpy(scmnd->sense_buffer, rsp->data +
1940 be32_to_cpu(rsp->resp_data_len),
1941 min_t(int, be32_to_cpu(rsp->sense_data_len),
1942 SCSI_SENSE_BUFFERSIZE));
1945 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1946 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1947 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1948 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1949 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1950 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1951 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1952 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1954 srp_free_req(ch, req, scmnd,
1955 be32_to_cpu(rsp->req_lim_delta));
1957 scmnd->host_scribble = NULL;
1958 scmnd->scsi_done(scmnd);
1962 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1965 struct srp_target_port *target = ch->target;
1966 struct ib_device *dev = target->srp_host->srp_dev->dev;
1967 unsigned long flags;
1971 spin_lock_irqsave(&ch->lock, flags);
1972 ch->req_lim += req_delta;
1973 iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1974 spin_unlock_irqrestore(&ch->lock, flags);
1977 shost_printk(KERN_ERR, target->scsi_host, PFX
1978 "no IU available to send response\n");
1982 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1983 memcpy(iu->buf, rsp, len);
1984 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1986 err = srp_post_send(ch, iu, len);
1988 shost_printk(KERN_ERR, target->scsi_host, PFX
1989 "unable to post response: %d\n", err);
1990 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1996 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1997 struct srp_cred_req *req)
1999 struct srp_cred_rsp rsp = {
2000 .opcode = SRP_CRED_RSP,
2003 s32 delta = be32_to_cpu(req->req_lim_delta);
2005 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2006 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
2007 "problems processing SRP_CRED_REQ\n");
2010 static void srp_process_aer_req(struct srp_rdma_ch *ch,
2011 struct srp_aer_req *req)
2013 struct srp_target_port *target = ch->target;
2014 struct srp_aer_rsp rsp = {
2015 .opcode = SRP_AER_RSP,
2018 s32 delta = be32_to_cpu(req->req_lim_delta);
2020 shost_printk(KERN_ERR, target->scsi_host, PFX
2021 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
2023 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2024 shost_printk(KERN_ERR, target->scsi_host, PFX
2025 "problems processing SRP_AER_REQ\n");
2028 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2030 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2031 struct srp_rdma_ch *ch = cq->cq_context;
2032 struct srp_target_port *target = ch->target;
2033 struct ib_device *dev = target->srp_host->srp_dev->dev;
2037 if (unlikely(wc->status != IB_WC_SUCCESS)) {
2038 srp_handle_qp_err(cq, wc, "RECV");
2042 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2045 opcode = *(u8 *) iu->buf;
2048 shost_printk(KERN_ERR, target->scsi_host,
2049 PFX "recv completion, opcode 0x%02x\n", opcode);
2050 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2051 iu->buf, wc->byte_len, true);
2056 srp_process_rsp(ch, iu->buf);
2060 srp_process_cred_req(ch, iu->buf);
2064 srp_process_aer_req(ch, iu->buf);
2068 /* XXX Handle target logout */
2069 shost_printk(KERN_WARNING, target->scsi_host,
2070 PFX "Got target logout request\n");
2074 shost_printk(KERN_WARNING, target->scsi_host,
2075 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2079 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2082 res = srp_post_recv(ch, iu);
2084 shost_printk(KERN_ERR, target->scsi_host,
2085 PFX "Recv failed with error code %d\n", res);
2089 * srp_tl_err_work() - handle a transport layer error
2090 * @work: Work structure embedded in an SRP target port.
2092 * Note: This function may get invoked before the rport has been created,
2093 * hence the target->rport test.
2095 static void srp_tl_err_work(struct work_struct *work)
2097 struct srp_target_port *target;
2099 target = container_of(work, struct srp_target_port, tl_err_work);
2101 srp_start_tl_fail_timers(target->rport);
2104 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2107 struct srp_rdma_ch *ch = cq->cq_context;
2108 struct srp_target_port *target = ch->target;
2110 if (ch->connected && !target->qp_in_error) {
2111 shost_printk(KERN_ERR, target->scsi_host,
2112 PFX "failed %s status %s (%d) for CQE %p\n",
2113 opname, ib_wc_status_msg(wc->status), wc->status,
2115 queue_work(system_long_wq, &target->tl_err_work);
2117 target->qp_in_error = true;
2120 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2122 struct srp_target_port *target = host_to_target(shost);
2123 struct srp_rport *rport = target->rport;
2124 struct srp_rdma_ch *ch;
2125 struct srp_request *req;
2127 struct srp_cmd *cmd;
2128 struct ib_device *dev;
2129 unsigned long flags;
2133 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2136 * The SCSI EH thread is the only context from which srp_queuecommand()
2137 * can get invoked for blocked devices (SDEV_BLOCK /
2138 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2139 * locking the rport mutex if invoked from inside the SCSI EH.
2142 mutex_lock(&rport->mutex);
2144 scmnd->result = srp_chkready(target->rport);
2145 if (unlikely(scmnd->result))
2148 WARN_ON_ONCE(scmnd->request->tag < 0);
2149 tag = blk_mq_unique_tag(scmnd->request);
2150 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2151 idx = blk_mq_unique_tag_to_tag(tag);
2152 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2153 dev_name(&shost->shost_gendev), tag, idx,
2154 target->req_ring_size);
2156 spin_lock_irqsave(&ch->lock, flags);
2157 iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2158 spin_unlock_irqrestore(&ch->lock, flags);
2163 req = &ch->req_ring[idx];
2164 dev = target->srp_host->srp_dev->dev;
2165 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2168 scmnd->host_scribble = (void *) req;
2171 memset(cmd, 0, sizeof *cmd);
2173 cmd->opcode = SRP_CMD;
2174 int_to_scsilun(scmnd->device->lun, &cmd->lun);
2176 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2181 len = srp_map_data(scmnd, ch, req);
2183 shost_printk(KERN_ERR, target->scsi_host,
2184 PFX "Failed to map data (%d)\n", len);
2186 * If we ran out of memory descriptors (-ENOMEM) because an
2187 * application is queuing many requests with more than
2188 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2189 * to reduce queue depth temporarily.
2191 scmnd->result = len == -ENOMEM ?
2192 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2196 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2199 if (srp_post_send(ch, iu, len)) {
2200 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2208 mutex_unlock(&rport->mutex);
2213 srp_unmap_data(scmnd, ch, req);
2216 srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2219 * Avoid that the loops that iterate over the request ring can
2220 * encounter a dangling SCSI command pointer.
2225 if (scmnd->result) {
2226 scmnd->scsi_done(scmnd);
2229 ret = SCSI_MLQUEUE_HOST_BUSY;
2236 * Note: the resources allocated in this function are freed in
2239 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2241 struct srp_target_port *target = ch->target;
2244 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2248 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2253 for (i = 0; i < target->queue_size; ++i) {
2254 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2256 GFP_KERNEL, DMA_FROM_DEVICE);
2257 if (!ch->rx_ring[i])
2261 for (i = 0; i < target->queue_size; ++i) {
2262 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2264 GFP_KERNEL, DMA_TO_DEVICE);
2265 if (!ch->tx_ring[i])
2268 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2274 for (i = 0; i < target->queue_size; ++i) {
2275 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2276 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2289 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2291 uint64_t T_tr_ns, max_compl_time_ms;
2292 uint32_t rq_tmo_jiffies;
2295 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2296 * table 91), both the QP timeout and the retry count have to be set
2297 * for RC QP's during the RTR to RTS transition.
2299 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2300 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2303 * Set target->rq_tmo_jiffies to one second more than the largest time
2304 * it can take before an error completion is generated. See also
2305 * C9-140..142 in the IBTA spec for more information about how to
2306 * convert the QP Local ACK Timeout value to nanoseconds.
2308 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2309 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2310 do_div(max_compl_time_ms, NSEC_PER_MSEC);
2311 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2313 return rq_tmo_jiffies;
2316 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2317 const struct srp_login_rsp *lrsp,
2318 struct srp_rdma_ch *ch)
2320 struct srp_target_port *target = ch->target;
2321 struct ib_qp_attr *qp_attr = NULL;
2326 if (lrsp->opcode == SRP_LOGIN_RSP) {
2327 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2328 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
2331 * Reserve credits for task management so we don't
2332 * bounce requests back to the SCSI mid-layer.
2334 target->scsi_host->can_queue
2335 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2336 target->scsi_host->can_queue);
2337 target->scsi_host->cmd_per_lun
2338 = min_t(int, target->scsi_host->can_queue,
2339 target->scsi_host->cmd_per_lun);
2341 shost_printk(KERN_WARNING, target->scsi_host,
2342 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2348 ret = srp_alloc_iu_bufs(ch);
2354 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2358 qp_attr->qp_state = IB_QPS_RTR;
2359 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2363 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2367 for (i = 0; i < target->queue_size; i++) {
2368 struct srp_iu *iu = ch->rx_ring[i];
2370 ret = srp_post_recv(ch, iu);
2375 qp_attr->qp_state = IB_QPS_RTS;
2376 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2380 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2382 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2386 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2395 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2396 struct ib_cm_event *event,
2397 struct srp_rdma_ch *ch)
2399 struct srp_target_port *target = ch->target;
2400 struct Scsi_Host *shost = target->scsi_host;
2401 struct ib_class_port_info *cpi;
2404 switch (event->param.rej_rcvd.reason) {
2405 case IB_CM_REJ_PORT_CM_REDIRECT:
2406 cpi = event->param.rej_rcvd.ari;
2407 sa_path_set_dlid(&ch->path, htonl(ntohs(cpi->redirect_lid)));
2408 ch->path.pkey = cpi->redirect_pkey;
2409 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2410 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2412 ch->status = sa_path_get_dlid(&ch->path) ?
2413 SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2416 case IB_CM_REJ_PORT_REDIRECT:
2417 if (srp_target_is_topspin(target)) {
2419 * Topspin/Cisco SRP gateways incorrectly send
2420 * reject reason code 25 when they mean 24
2423 memcpy(ch->path.dgid.raw,
2424 event->param.rej_rcvd.ari, 16);
2426 shost_printk(KERN_DEBUG, shost,
2427 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2428 be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2429 be64_to_cpu(ch->path.dgid.global.interface_id));
2431 ch->status = SRP_PORT_REDIRECT;
2433 shost_printk(KERN_WARNING, shost,
2434 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2435 ch->status = -ECONNRESET;
2439 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2440 shost_printk(KERN_WARNING, shost,
2441 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2442 ch->status = -ECONNRESET;
2445 case IB_CM_REJ_CONSUMER_DEFINED:
2446 opcode = *(u8 *) event->private_data;
2447 if (opcode == SRP_LOGIN_REJ) {
2448 struct srp_login_rej *rej = event->private_data;
2449 u32 reason = be32_to_cpu(rej->reason);
2451 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2452 shost_printk(KERN_WARNING, shost,
2453 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2455 shost_printk(KERN_WARNING, shost, PFX
2456 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2458 target->orig_dgid.raw, reason);
2460 shost_printk(KERN_WARNING, shost,
2461 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2462 " opcode 0x%02x\n", opcode);
2463 ch->status = -ECONNRESET;
2466 case IB_CM_REJ_STALE_CONN:
2467 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
2468 ch->status = SRP_STALE_CONN;
2472 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2473 event->param.rej_rcvd.reason);
2474 ch->status = -ECONNRESET;
2478 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2480 struct srp_rdma_ch *ch = cm_id->context;
2481 struct srp_target_port *target = ch->target;
2484 switch (event->event) {
2485 case IB_CM_REQ_ERROR:
2486 shost_printk(KERN_DEBUG, target->scsi_host,
2487 PFX "Sending CM REQ failed\n");
2489 ch->status = -ECONNRESET;
2492 case IB_CM_REP_RECEIVED:
2494 srp_cm_rep_handler(cm_id, event->private_data, ch);
2497 case IB_CM_REJ_RECEIVED:
2498 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2501 srp_cm_rej_handler(cm_id, event, ch);
2504 case IB_CM_DREQ_RECEIVED:
2505 shost_printk(KERN_WARNING, target->scsi_host,
2506 PFX "DREQ received - connection closed\n");
2507 ch->connected = false;
2508 if (ib_send_cm_drep(cm_id, NULL, 0))
2509 shost_printk(KERN_ERR, target->scsi_host,
2510 PFX "Sending CM DREP failed\n");
2511 queue_work(system_long_wq, &target->tl_err_work);
2514 case IB_CM_TIMEWAIT_EXIT:
2515 shost_printk(KERN_ERR, target->scsi_host,
2516 PFX "connection closed\n");
2522 case IB_CM_MRA_RECEIVED:
2523 case IB_CM_DREQ_ERROR:
2524 case IB_CM_DREP_RECEIVED:
2528 shost_printk(KERN_WARNING, target->scsi_host,
2529 PFX "Unhandled CM event %d\n", event->event);
2534 complete(&ch->done);
2540 * srp_change_queue_depth - setting device queue depth
2541 * @sdev: scsi device struct
2542 * @qdepth: requested queue depth
2544 * Returns queue depth.
2547 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2549 if (!sdev->tagged_supported)
2551 return scsi_change_queue_depth(sdev, qdepth);
2554 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2555 u8 func, u8 *status)
2557 struct srp_target_port *target = ch->target;
2558 struct srp_rport *rport = target->rport;
2559 struct ib_device *dev = target->srp_host->srp_dev->dev;
2561 struct srp_tsk_mgmt *tsk_mgmt;
2564 if (!ch->connected || target->qp_in_error)
2568 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2569 * invoked while a task management function is being sent.
2571 mutex_lock(&rport->mutex);
2572 spin_lock_irq(&ch->lock);
2573 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2574 spin_unlock_irq(&ch->lock);
2577 mutex_unlock(&rport->mutex);
2582 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2585 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2587 tsk_mgmt->opcode = SRP_TSK_MGMT;
2588 int_to_scsilun(lun, &tsk_mgmt->lun);
2589 tsk_mgmt->tsk_mgmt_func = func;
2590 tsk_mgmt->task_tag = req_tag;
2592 spin_lock_irq(&ch->lock);
2593 ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
2594 tsk_mgmt->tag = ch->tsk_mgmt_tag;
2595 spin_unlock_irq(&ch->lock);
2597 init_completion(&ch->tsk_mgmt_done);
2599 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2601 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2602 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2603 mutex_unlock(&rport->mutex);
2607 res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
2608 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
2609 if (res > 0 && status)
2610 *status = ch->tsk_mgmt_status;
2611 mutex_unlock(&rport->mutex);
2613 WARN_ON_ONCE(res < 0);
2615 return res > 0 ? 0 : -1;
2618 static int srp_abort(struct scsi_cmnd *scmnd)
2620 struct srp_target_port *target = host_to_target(scmnd->device->host);
2621 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2624 struct srp_rdma_ch *ch;
2627 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2631 tag = blk_mq_unique_tag(scmnd->request);
2632 ch_idx = blk_mq_unique_tag_to_hwq(tag);
2633 if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2635 ch = &target->ch[ch_idx];
2636 if (!srp_claim_req(ch, req, NULL, scmnd))
2638 shost_printk(KERN_ERR, target->scsi_host,
2639 "Sending SRP abort for tag %#x\n", tag);
2640 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2641 SRP_TSK_ABORT_TASK, NULL) == 0)
2643 else if (target->rport->state == SRP_RPORT_LOST)
2647 srp_free_req(ch, req, scmnd, 0);
2648 scmnd->result = DID_ABORT << 16;
2649 scmnd->scsi_done(scmnd);
2654 static int srp_reset_device(struct scsi_cmnd *scmnd)
2656 struct srp_target_port *target = host_to_target(scmnd->device->host);
2657 struct srp_rdma_ch *ch;
2661 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2663 ch = &target->ch[0];
2664 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2665 SRP_TSK_LUN_RESET, &status))
2670 for (i = 0; i < target->ch_count; i++) {
2671 ch = &target->ch[i];
2672 for (i = 0; i < target->req_ring_size; ++i) {
2673 struct srp_request *req = &ch->req_ring[i];
2675 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2682 static int srp_reset_host(struct scsi_cmnd *scmnd)
2684 struct srp_target_port *target = host_to_target(scmnd->device->host);
2686 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2688 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2691 static int srp_slave_alloc(struct scsi_device *sdev)
2693 struct Scsi_Host *shost = sdev->host;
2694 struct srp_target_port *target = host_to_target(shost);
2695 struct srp_device *srp_dev = target->srp_host->srp_dev;
2698 blk_queue_virt_boundary(sdev->request_queue,
2699 ~srp_dev->mr_page_mask);
2704 static int srp_slave_configure(struct scsi_device *sdev)
2706 struct Scsi_Host *shost = sdev->host;
2707 struct srp_target_port *target = host_to_target(shost);
2708 struct request_queue *q = sdev->request_queue;
2709 unsigned long timeout;
2711 if (sdev->type == TYPE_DISK) {
2712 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2713 blk_queue_rq_timeout(q, timeout);
2719 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2722 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2724 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2727 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2730 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2732 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2735 static ssize_t show_service_id(struct device *dev,
2736 struct device_attribute *attr, char *buf)
2738 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2740 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2743 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2746 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2748 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2751 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2754 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2756 return sprintf(buf, "%pI6\n", target->sgid.raw);
2759 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2762 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2763 struct srp_rdma_ch *ch = &target->ch[0];
2765 return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2768 static ssize_t show_orig_dgid(struct device *dev,
2769 struct device_attribute *attr, char *buf)
2771 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2773 return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2776 static ssize_t show_req_lim(struct device *dev,
2777 struct device_attribute *attr, char *buf)
2779 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2780 struct srp_rdma_ch *ch;
2781 int i, req_lim = INT_MAX;
2783 for (i = 0; i < target->ch_count; i++) {
2784 ch = &target->ch[i];
2785 req_lim = min(req_lim, ch->req_lim);
2787 return sprintf(buf, "%d\n", req_lim);
2790 static ssize_t show_zero_req_lim(struct device *dev,
2791 struct device_attribute *attr, char *buf)
2793 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2795 return sprintf(buf, "%d\n", target->zero_req_lim);
2798 static ssize_t show_local_ib_port(struct device *dev,
2799 struct device_attribute *attr, char *buf)
2801 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2803 return sprintf(buf, "%d\n", target->srp_host->port);
2806 static ssize_t show_local_ib_device(struct device *dev,
2807 struct device_attribute *attr, char *buf)
2809 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2811 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2814 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2817 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2819 return sprintf(buf, "%d\n", target->ch_count);
2822 static ssize_t show_comp_vector(struct device *dev,
2823 struct device_attribute *attr, char *buf)
2825 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2827 return sprintf(buf, "%d\n", target->comp_vector);
2830 static ssize_t show_tl_retry_count(struct device *dev,
2831 struct device_attribute *attr, char *buf)
2833 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2835 return sprintf(buf, "%d\n", target->tl_retry_count);
2838 static ssize_t show_cmd_sg_entries(struct device *dev,
2839 struct device_attribute *attr, char *buf)
2841 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2843 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2846 static ssize_t show_allow_ext_sg(struct device *dev,
2847 struct device_attribute *attr, char *buf)
2849 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2851 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2854 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
2855 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
2856 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
2857 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
2858 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
2859 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
2860 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
2861 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
2862 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
2863 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
2864 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2865 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
2866 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
2867 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
2868 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
2869 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
2871 static struct device_attribute *srp_host_attrs[] = {
2874 &dev_attr_service_id,
2878 &dev_attr_orig_dgid,
2880 &dev_attr_zero_req_lim,
2881 &dev_attr_local_ib_port,
2882 &dev_attr_local_ib_device,
2884 &dev_attr_comp_vector,
2885 &dev_attr_tl_retry_count,
2886 &dev_attr_cmd_sg_entries,
2887 &dev_attr_allow_ext_sg,
2891 static struct scsi_host_template srp_template = {
2892 .module = THIS_MODULE,
2893 .name = "InfiniBand SRP initiator",
2894 .proc_name = DRV_NAME,
2895 .slave_alloc = srp_slave_alloc,
2896 .slave_configure = srp_slave_configure,
2897 .info = srp_target_info,
2898 .queuecommand = srp_queuecommand,
2899 .change_queue_depth = srp_change_queue_depth,
2900 .eh_timed_out = srp_timed_out,
2901 .eh_abort_handler = srp_abort,
2902 .eh_device_reset_handler = srp_reset_device,
2903 .eh_host_reset_handler = srp_reset_host,
2904 .skip_settle_delay = true,
2905 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
2906 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
2908 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
2909 .use_clustering = ENABLE_CLUSTERING,
2910 .shost_attrs = srp_host_attrs,
2911 .track_queue_depth = 1,
2914 static int srp_sdev_count(struct Scsi_Host *host)
2916 struct scsi_device *sdev;
2919 shost_for_each_device(sdev, host)
2927 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2928 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2929 * removal has been scheduled.
2930 * 0 and target->state != SRP_TARGET_REMOVED upon success.
2932 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2934 struct srp_rport_identifiers ids;
2935 struct srp_rport *rport;
2937 target->state = SRP_TARGET_SCANNING;
2938 sprintf(target->target_name, "SRP.T10:%016llX",
2939 be64_to_cpu(target->id_ext));
2941 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
2944 memcpy(ids.port_id, &target->id_ext, 8);
2945 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2946 ids.roles = SRP_RPORT_ROLE_TARGET;
2947 rport = srp_rport_add(target->scsi_host, &ids);
2948 if (IS_ERR(rport)) {
2949 scsi_remove_host(target->scsi_host);
2950 return PTR_ERR(rport);
2953 rport->lld_data = target;
2954 target->rport = rport;
2956 spin_lock(&host->target_lock);
2957 list_add_tail(&target->list, &host->target_list);
2958 spin_unlock(&host->target_lock);
2960 scsi_scan_target(&target->scsi_host->shost_gendev,
2961 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
2963 if (srp_connected_ch(target) < target->ch_count ||
2964 target->qp_in_error) {
2965 shost_printk(KERN_INFO, target->scsi_host,
2966 PFX "SCSI scan failed - removing SCSI host\n");
2967 srp_queue_remove_work(target);
2971 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
2972 dev_name(&target->scsi_host->shost_gendev),
2973 srp_sdev_count(target->scsi_host));
2975 spin_lock_irq(&target->lock);
2976 if (target->state == SRP_TARGET_SCANNING)
2977 target->state = SRP_TARGET_LIVE;
2978 spin_unlock_irq(&target->lock);
2984 static void srp_release_dev(struct device *dev)
2986 struct srp_host *host =
2987 container_of(dev, struct srp_host, dev);
2989 complete(&host->released);
2992 static struct class srp_class = {
2993 .name = "infiniband_srp",
2994 .dev_release = srp_release_dev
2998 * srp_conn_unique() - check whether the connection to a target is unique
3000 * @target: SRP target port.
3002 static bool srp_conn_unique(struct srp_host *host,
3003 struct srp_target_port *target)
3005 struct srp_target_port *t;
3008 if (target->state == SRP_TARGET_REMOVED)
3013 spin_lock(&host->target_lock);
3014 list_for_each_entry(t, &host->target_list, list) {
3016 target->id_ext == t->id_ext &&
3017 target->ioc_guid == t->ioc_guid &&
3018 target->initiator_ext == t->initiator_ext) {
3023 spin_unlock(&host->target_lock);
3030 * Target ports are added by writing
3032 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3033 * pkey=<P_Key>,service_id=<service ID>
3035 * to the add_target sysfs attribute.
3039 SRP_OPT_ID_EXT = 1 << 0,
3040 SRP_OPT_IOC_GUID = 1 << 1,
3041 SRP_OPT_DGID = 1 << 2,
3042 SRP_OPT_PKEY = 1 << 3,
3043 SRP_OPT_SERVICE_ID = 1 << 4,
3044 SRP_OPT_MAX_SECT = 1 << 5,
3045 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
3046 SRP_OPT_IO_CLASS = 1 << 7,
3047 SRP_OPT_INITIATOR_EXT = 1 << 8,
3048 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
3049 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
3050 SRP_OPT_SG_TABLESIZE = 1 << 11,
3051 SRP_OPT_COMP_VECTOR = 1 << 12,
3052 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
3053 SRP_OPT_QUEUE_SIZE = 1 << 14,
3054 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
3058 SRP_OPT_SERVICE_ID),
3061 static const match_table_t srp_opt_tokens = {
3062 { SRP_OPT_ID_EXT, "id_ext=%s" },
3063 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
3064 { SRP_OPT_DGID, "dgid=%s" },
3065 { SRP_OPT_PKEY, "pkey=%x" },
3066 { SRP_OPT_SERVICE_ID, "service_id=%s" },
3067 { SRP_OPT_MAX_SECT, "max_sect=%d" },
3068 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
3069 { SRP_OPT_IO_CLASS, "io_class=%x" },
3070 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
3071 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
3072 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
3073 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
3074 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
3075 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
3076 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
3077 { SRP_OPT_ERR, NULL }
3080 static int srp_parse_options(const char *buf, struct srp_target_port *target)
3082 char *options, *sep_opt;
3085 substring_t args[MAX_OPT_ARGS];
3091 options = kstrdup(buf, GFP_KERNEL);
3096 while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3100 token = match_token(p, srp_opt_tokens, args);
3104 case SRP_OPT_ID_EXT:
3105 p = match_strdup(args);
3110 target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3114 case SRP_OPT_IOC_GUID:
3115 p = match_strdup(args);
3120 target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
3125 p = match_strdup(args);
3130 if (strlen(p) != 32) {
3131 pr_warn("bad dest GID parameter '%s'\n", p);
3136 for (i = 0; i < 16; ++i) {
3137 strlcpy(dgid, p + i * 2, sizeof(dgid));
3138 if (sscanf(dgid, "%hhx",
3139 &target->orig_dgid.raw[i]) < 1) {
3149 if (match_hex(args, &token)) {
3150 pr_warn("bad P_Key parameter '%s'\n", p);
3153 target->pkey = cpu_to_be16(token);
3156 case SRP_OPT_SERVICE_ID:
3157 p = match_strdup(args);
3162 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3166 case SRP_OPT_MAX_SECT:
3167 if (match_int(args, &token)) {
3168 pr_warn("bad max sect parameter '%s'\n", p);
3171 target->scsi_host->max_sectors = token;
3174 case SRP_OPT_QUEUE_SIZE:
3175 if (match_int(args, &token) || token < 1) {
3176 pr_warn("bad queue_size parameter '%s'\n", p);
3179 target->scsi_host->can_queue = token;
3180 target->queue_size = token + SRP_RSP_SQ_SIZE +
3181 SRP_TSK_MGMT_SQ_SIZE;
3182 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3183 target->scsi_host->cmd_per_lun = token;
3186 case SRP_OPT_MAX_CMD_PER_LUN:
3187 if (match_int(args, &token) || token < 1) {
3188 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3192 target->scsi_host->cmd_per_lun = token;
3195 case SRP_OPT_IO_CLASS:
3196 if (match_hex(args, &token)) {
3197 pr_warn("bad IO class parameter '%s'\n", p);
3200 if (token != SRP_REV10_IB_IO_CLASS &&
3201 token != SRP_REV16A_IB_IO_CLASS) {
3202 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3203 token, SRP_REV10_IB_IO_CLASS,
3204 SRP_REV16A_IB_IO_CLASS);
3207 target->io_class = token;
3210 case SRP_OPT_INITIATOR_EXT:
3211 p = match_strdup(args);
3216 target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3220 case SRP_OPT_CMD_SG_ENTRIES:
3221 if (match_int(args, &token) || token < 1 || token > 255) {
3222 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3226 target->cmd_sg_cnt = token;
3229 case SRP_OPT_ALLOW_EXT_SG:
3230 if (match_int(args, &token)) {
3231 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3234 target->allow_ext_sg = !!token;
3237 case SRP_OPT_SG_TABLESIZE:
3238 if (match_int(args, &token) || token < 1 ||
3239 token > SG_MAX_SEGMENTS) {
3240 pr_warn("bad max sg_tablesize parameter '%s'\n",
3244 target->sg_tablesize = token;
3247 case SRP_OPT_COMP_VECTOR:
3248 if (match_int(args, &token) || token < 0) {
3249 pr_warn("bad comp_vector parameter '%s'\n", p);
3252 target->comp_vector = token;
3255 case SRP_OPT_TL_RETRY_COUNT:
3256 if (match_int(args, &token) || token < 2 || token > 7) {
3257 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3261 target->tl_retry_count = token;
3265 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3271 if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3274 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3275 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3276 !(srp_opt_tokens[i].token & opt_mask))
3277 pr_warn("target creation request is missing parameter '%s'\n",
3278 srp_opt_tokens[i].pattern);
3280 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3281 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3282 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3283 target->scsi_host->cmd_per_lun,
3284 target->scsi_host->can_queue);
3291 static ssize_t srp_create_target(struct device *dev,
3292 struct device_attribute *attr,
3293 const char *buf, size_t count)
3295 struct srp_host *host =
3296 container_of(dev, struct srp_host, dev);
3297 struct Scsi_Host *target_host;
3298 struct srp_target_port *target;
3299 struct srp_rdma_ch *ch;
3300 struct srp_device *srp_dev = host->srp_dev;
3301 struct ib_device *ibdev = srp_dev->dev;
3302 int ret, node_idx, node, cpu, i;
3303 unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3304 bool multich = false;
3306 target_host = scsi_host_alloc(&srp_template,
3307 sizeof (struct srp_target_port));
3311 target_host->transportt = ib_srp_transport_template;
3312 target_host->max_channel = 0;
3313 target_host->max_id = 1;
3314 target_host->max_lun = -1LL;
3315 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3317 target = host_to_target(target_host);
3319 target->io_class = SRP_REV16A_IB_IO_CLASS;
3320 target->scsi_host = target_host;
3321 target->srp_host = host;
3322 target->pd = host->srp_dev->pd;
3323 target->lkey = host->srp_dev->pd->local_dma_lkey;
3324 target->cmd_sg_cnt = cmd_sg_entries;
3325 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
3326 target->allow_ext_sg = allow_ext_sg;
3327 target->tl_retry_count = 7;
3328 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
3331 * Avoid that the SCSI host can be removed by srp_remove_target()
3332 * before this function returns.
3334 scsi_host_get(target->scsi_host);
3336 ret = mutex_lock_interruptible(&host->add_target_mutex);
3340 ret = srp_parse_options(buf, target);
3344 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3346 if (!srp_conn_unique(target->srp_host, target)) {
3347 shost_printk(KERN_INFO, target->scsi_host,
3348 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3349 be64_to_cpu(target->id_ext),
3350 be64_to_cpu(target->ioc_guid),
3351 be64_to_cpu(target->initiator_ext));
3356 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3357 target->cmd_sg_cnt < target->sg_tablesize) {
3358 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3359 target->sg_tablesize = target->cmd_sg_cnt;
3362 if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3364 * FR and FMR can only map one HCA page per entry. If the
3365 * start address is not aligned on a HCA page boundary two
3366 * entries will be used for the head and the tail although
3367 * these two entries combined contain at most one HCA page of
3368 * data. Hence the "+ 1" in the calculation below.
3370 * The indirect data buffer descriptor is contiguous so the
3371 * memory for that buffer will only be registered if
3372 * register_always is true. Hence add one to mr_per_cmd if
3373 * register_always has been set.
3375 max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3376 (ilog2(srp_dev->mr_page_size) - 9);
3377 mr_per_cmd = register_always +
3378 (target->scsi_host->max_sectors + 1 +
3379 max_sectors_per_mr - 1) / max_sectors_per_mr;
3380 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3381 target->scsi_host->max_sectors,
3382 srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3383 max_sectors_per_mr, mr_per_cmd);
3386 target_host->sg_tablesize = target->sg_tablesize;
3387 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3388 target->mr_per_cmd = mr_per_cmd;
3389 target->indirect_size = target->sg_tablesize *
3390 sizeof (struct srp_direct_buf);
3391 target->max_iu_len = sizeof (struct srp_cmd) +
3392 sizeof (struct srp_indirect_buf) +
3393 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3395 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3396 INIT_WORK(&target->remove_work, srp_remove_work);
3397 spin_lock_init(&target->lock);
3398 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3403 target->ch_count = max_t(unsigned, num_online_nodes(),
3405 min(4 * num_online_nodes(),
3406 ibdev->num_comp_vectors),
3407 num_online_cpus()));
3408 target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3414 for_each_online_node(node) {
3415 const int ch_start = (node_idx * target->ch_count /
3416 num_online_nodes());
3417 const int ch_end = ((node_idx + 1) * target->ch_count /
3418 num_online_nodes());
3419 const int cv_start = (node_idx * ibdev->num_comp_vectors /
3420 num_online_nodes() + target->comp_vector)
3421 % ibdev->num_comp_vectors;
3422 const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3423 num_online_nodes() + target->comp_vector)
3424 % ibdev->num_comp_vectors;
3427 for_each_online_cpu(cpu) {
3428 if (cpu_to_node(cpu) != node)
3430 if (ch_start + cpu_idx >= ch_end)
3432 ch = &target->ch[ch_start + cpu_idx];
3433 ch->target = target;
3434 ch->comp_vector = cv_start == cv_end ? cv_start :
3435 cv_start + cpu_idx % (cv_end - cv_start);
3436 spin_lock_init(&ch->lock);
3437 INIT_LIST_HEAD(&ch->free_tx);
3438 ret = srp_new_cm_id(ch);
3440 goto err_disconnect;
3442 ret = srp_create_ch_ib(ch);
3444 goto err_disconnect;
3446 ret = srp_alloc_req_data(ch);
3448 goto err_disconnect;
3450 ret = srp_connect_ch(ch, multich);
3452 shost_printk(KERN_ERR, target->scsi_host,
3453 PFX "Connection %d/%d to %pI6 failed\n",
3456 ch->target->orig_dgid.raw);
3457 if (node_idx == 0 && cpu_idx == 0) {
3460 srp_free_ch_ib(target, ch);
3461 srp_free_req_data(target, ch);
3462 target->ch_count = ch - target->ch;
3474 target->scsi_host->nr_hw_queues = target->ch_count;
3476 ret = srp_add_target(host, target);
3478 goto err_disconnect;
3480 if (target->state != SRP_TARGET_REMOVED) {
3481 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3482 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3483 be64_to_cpu(target->id_ext),
3484 be64_to_cpu(target->ioc_guid),
3485 be16_to_cpu(target->pkey),
3486 be64_to_cpu(target->service_id),
3487 target->sgid.raw, target->orig_dgid.raw);
3493 mutex_unlock(&host->add_target_mutex);
3496 scsi_host_put(target->scsi_host);
3498 scsi_host_put(target->scsi_host);
3503 srp_disconnect_target(target);
3506 for (i = 0; i < target->ch_count; i++) {
3507 ch = &target->ch[i];
3508 srp_free_ch_ib(target, ch);
3509 srp_free_req_data(target, ch);
3516 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3518 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3521 struct srp_host *host = container_of(dev, struct srp_host, dev);
3523 return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3526 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3528 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3531 struct srp_host *host = container_of(dev, struct srp_host, dev);
3533 return sprintf(buf, "%d\n", host->port);
3536 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3538 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3540 struct srp_host *host;
3542 host = kzalloc(sizeof *host, GFP_KERNEL);
3546 INIT_LIST_HEAD(&host->target_list);
3547 spin_lock_init(&host->target_lock);
3548 init_completion(&host->released);
3549 mutex_init(&host->add_target_mutex);
3550 host->srp_dev = device;
3553 host->dev.class = &srp_class;
3554 host->dev.parent = device->dev->dev.parent;
3555 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3557 if (device_register(&host->dev))
3559 if (device_create_file(&host->dev, &dev_attr_add_target))
3561 if (device_create_file(&host->dev, &dev_attr_ibdev))
3563 if (device_create_file(&host->dev, &dev_attr_port))
3569 device_unregister(&host->dev);
3577 static void srp_add_one(struct ib_device *device)
3579 struct srp_device *srp_dev;
3580 struct ib_device_attr *attr = &device->attrs;
3581 struct srp_host *host;
3582 int mr_page_shift, p;
3583 u64 max_pages_per_mr;
3584 unsigned int flags = 0;
3586 srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
3591 * Use the smallest page size supported by the HCA, down to a
3592 * minimum of 4096 bytes. We're unlikely to build large sglists
3593 * out of smaller entries.
3595 mr_page_shift = max(12, ffs(attr->page_size_cap) - 1);
3596 srp_dev->mr_page_size = 1 << mr_page_shift;
3597 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
3598 max_pages_per_mr = attr->max_mr_size;
3599 do_div(max_pages_per_mr, srp_dev->mr_page_size);
3600 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
3601 attr->max_mr_size, srp_dev->mr_page_size,
3602 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
3603 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3606 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3607 device->map_phys_fmr && device->unmap_fmr);
3608 srp_dev->has_fr = (attr->device_cap_flags &
3609 IB_DEVICE_MEM_MGT_EXTENSIONS);
3610 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
3611 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3612 } else if (!never_register &&
3613 attr->max_mr_size >= 2 * srp_dev->mr_page_size) {
3614 srp_dev->use_fast_reg = (srp_dev->has_fr &&
3615 (!srp_dev->has_fmr || prefer_fr));
3616 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3619 if (never_register || !register_always ||
3620 (!srp_dev->has_fmr && !srp_dev->has_fr))
3621 flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
3623 if (srp_dev->use_fast_reg) {
3624 srp_dev->max_pages_per_mr =
3625 min_t(u32, srp_dev->max_pages_per_mr,
3626 attr->max_fast_reg_page_list_len);
3628 srp_dev->mr_max_size = srp_dev->mr_page_size *
3629 srp_dev->max_pages_per_mr;
3630 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3631 device->name, mr_page_shift, attr->max_mr_size,
3632 attr->max_fast_reg_page_list_len,
3633 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3635 INIT_LIST_HEAD(&srp_dev->dev_list);
3637 srp_dev->dev = device;
3638 srp_dev->pd = ib_alloc_pd(device, flags);
3639 if (IS_ERR(srp_dev->pd))
3643 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3644 host = srp_add_port(srp_dev, p);
3646 list_add_tail(&host->list, &srp_dev->dev_list);
3649 ib_set_client_data(device, &srp_client, srp_dev);
3656 static void srp_remove_one(struct ib_device *device, void *client_data)
3658 struct srp_device *srp_dev;
3659 struct srp_host *host, *tmp_host;
3660 struct srp_target_port *target;
3662 srp_dev = client_data;
3666 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3667 device_unregister(&host->dev);
3669 * Wait for the sysfs entry to go away, so that no new
3670 * target ports can be created.
3672 wait_for_completion(&host->released);
3675 * Remove all target ports.
3677 spin_lock(&host->target_lock);
3678 list_for_each_entry(target, &host->target_list, list)
3679 srp_queue_remove_work(target);
3680 spin_unlock(&host->target_lock);
3683 * Wait for tl_err and target port removal tasks.
3685 flush_workqueue(system_long_wq);
3686 flush_workqueue(srp_remove_wq);
3691 ib_dealloc_pd(srp_dev->pd);
3696 static struct srp_function_template ib_srp_transport_functions = {
3697 .has_rport_state = true,
3698 .reset_timer_if_blocked = true,
3699 .reconnect_delay = &srp_reconnect_delay,
3700 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
3701 .dev_loss_tmo = &srp_dev_loss_tmo,
3702 .reconnect = srp_rport_reconnect,
3703 .rport_delete = srp_rport_delete,
3704 .terminate_rport_io = srp_terminate_io,
3707 static int __init srp_init_module(void)
3711 if (srp_sg_tablesize) {
3712 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3713 if (!cmd_sg_entries)
3714 cmd_sg_entries = srp_sg_tablesize;
3717 if (!cmd_sg_entries)
3718 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3720 if (cmd_sg_entries > 255) {
3721 pr_warn("Clamping cmd_sg_entries to 255\n");
3722 cmd_sg_entries = 255;
3725 if (!indirect_sg_entries)
3726 indirect_sg_entries = cmd_sg_entries;
3727 else if (indirect_sg_entries < cmd_sg_entries) {
3728 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3730 indirect_sg_entries = cmd_sg_entries;
3733 if (indirect_sg_entries > SG_MAX_SEGMENTS) {
3734 pr_warn("Clamping indirect_sg_entries to %u\n",
3736 indirect_sg_entries = SG_MAX_SEGMENTS;
3739 srp_remove_wq = create_workqueue("srp_remove");
3740 if (!srp_remove_wq) {
3746 ib_srp_transport_template =
3747 srp_attach_transport(&ib_srp_transport_functions);
3748 if (!ib_srp_transport_template)
3751 ret = class_register(&srp_class);
3753 pr_err("couldn't register class infiniband_srp\n");
3757 ib_sa_register_client(&srp_sa_client);
3759 ret = ib_register_client(&srp_client);
3761 pr_err("couldn't register IB client\n");
3769 ib_sa_unregister_client(&srp_sa_client);
3770 class_unregister(&srp_class);
3773 srp_release_transport(ib_srp_transport_template);
3776 destroy_workqueue(srp_remove_wq);
3780 static void __exit srp_cleanup_module(void)
3782 ib_unregister_client(&srp_client);
3783 ib_sa_unregister_client(&srp_sa_client);
3784 class_unregister(&srp_class);
3785 srp_release_transport(ib_srp_transport_template);
3786 destroy_workqueue(srp_remove_wq);
3789 module_init(srp_init_module);
3790 module_exit(srp_cleanup_module);