]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/infiniband/hw/hfi1/ruc.c
Merge remote-tracking branches 'regulator/topic/helpers', 'regulator/topic/hi655x...
[karo-tx-linux.git] / drivers / infiniband / hw / hfi1 / ruc.c
1 /*
2  * Copyright(c) 2015, 2016 Intel Corporation.
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * BSD LICENSE
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  *
24  *  - Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  *  - Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in
28  *    the documentation and/or other materials provided with the
29  *    distribution.
30  *  - Neither the name of Intel Corporation nor the names of its
31  *    contributors may be used to endorse or promote products derived
32  *    from this software without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  *
46  */
47
48 #include <linux/spinlock.h>
49
50 #include "hfi.h"
51 #include "mad.h"
52 #include "qp.h"
53 #include "verbs_txreq.h"
54 #include "trace.h"
55
56 /*
57  * Validate a RWQE and fill in the SGE state.
58  * Return 1 if OK.
59  */
60 static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
61 {
62         int i, j, ret;
63         struct ib_wc wc;
64         struct rvt_lkey_table *rkt;
65         struct rvt_pd *pd;
66         struct rvt_sge_state *ss;
67
68         rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table;
69         pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
70         ss = &qp->r_sge;
71         ss->sg_list = qp->r_sg_list;
72         qp->r_len = 0;
73         for (i = j = 0; i < wqe->num_sge; i++) {
74                 if (wqe->sg_list[i].length == 0)
75                         continue;
76                 /* Check LKEY */
77                 if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
78                                  &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
79                         goto bad_lkey;
80                 qp->r_len += wqe->sg_list[i].length;
81                 j++;
82         }
83         ss->num_sge = j;
84         ss->total_len = qp->r_len;
85         ret = 1;
86         goto bail;
87
88 bad_lkey:
89         while (j) {
90                 struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
91
92                 rvt_put_mr(sge->mr);
93         }
94         ss->num_sge = 0;
95         memset(&wc, 0, sizeof(wc));
96         wc.wr_id = wqe->wr_id;
97         wc.status = IB_WC_LOC_PROT_ERR;
98         wc.opcode = IB_WC_RECV;
99         wc.qp = &qp->ibqp;
100         /* Signal solicited completion event. */
101         rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
102         ret = 0;
103 bail:
104         return ret;
105 }
106
107 /**
108  * hfi1_rvt_get_rwqe - copy the next RWQE into the QP's RWQE
109  * @qp: the QP
110  * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
111  *
112  * Return -1 if there is a local error, 0 if no RWQE is available,
113  * otherwise return 1.
114  *
115  * Can be called from interrupt level.
116  */
117 int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only)
118 {
119         unsigned long flags;
120         struct rvt_rq *rq;
121         struct rvt_rwq *wq;
122         struct rvt_srq *srq;
123         struct rvt_rwqe *wqe;
124         void (*handler)(struct ib_event *, void *);
125         u32 tail;
126         int ret;
127
128         if (qp->ibqp.srq) {
129                 srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
130                 handler = srq->ibsrq.event_handler;
131                 rq = &srq->rq;
132         } else {
133                 srq = NULL;
134                 handler = NULL;
135                 rq = &qp->r_rq;
136         }
137
138         spin_lock_irqsave(&rq->lock, flags);
139         if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
140                 ret = 0;
141                 goto unlock;
142         }
143
144         wq = rq->wq;
145         tail = wq->tail;
146         /* Validate tail before using it since it is user writable. */
147         if (tail >= rq->size)
148                 tail = 0;
149         if (unlikely(tail == wq->head)) {
150                 ret = 0;
151                 goto unlock;
152         }
153         /* Make sure entry is read after head index is read. */
154         smp_rmb();
155         wqe = rvt_get_rwqe_ptr(rq, tail);
156         /*
157          * Even though we update the tail index in memory, the verbs
158          * consumer is not supposed to post more entries until a
159          * completion is generated.
160          */
161         if (++tail >= rq->size)
162                 tail = 0;
163         wq->tail = tail;
164         if (!wr_id_only && !init_sge(qp, wqe)) {
165                 ret = -1;
166                 goto unlock;
167         }
168         qp->r_wr_id = wqe->wr_id;
169
170         ret = 1;
171         set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
172         if (handler) {
173                 u32 n;
174
175                 /*
176                  * Validate head pointer value and compute
177                  * the number of remaining WQEs.
178                  */
179                 n = wq->head;
180                 if (n >= rq->size)
181                         n = 0;
182                 if (n < tail)
183                         n += rq->size - tail;
184                 else
185                         n -= tail;
186                 if (n < srq->limit) {
187                         struct ib_event ev;
188
189                         srq->limit = 0;
190                         spin_unlock_irqrestore(&rq->lock, flags);
191                         ev.device = qp->ibqp.device;
192                         ev.element.srq = qp->ibqp.srq;
193                         ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
194                         handler(&ev, srq->ibsrq.srq_context);
195                         goto bail;
196                 }
197         }
198 unlock:
199         spin_unlock_irqrestore(&rq->lock, flags);
200 bail:
201         return ret;
202 }
203
204 static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
205 {
206         return (gid->global.interface_id == id &&
207                 (gid->global.subnet_prefix == gid_prefix ||
208                  gid->global.subnet_prefix == IB_DEFAULT_GID_PREFIX));
209 }
210
211 /*
212  *
213  * This should be called with the QP r_lock held.
214  *
215  * The s_lock will be acquired around the hfi1_migrate_qp() call.
216  */
217 int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
218                        int has_grh, struct rvt_qp *qp, u32 bth0)
219 {
220         __be64 guid;
221         unsigned long flags;
222         u8 sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
223
224         if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
225                 if (!has_grh) {
226                         if (qp->alt_ah_attr.ah_flags & IB_AH_GRH)
227                                 goto err;
228                 } else {
229                         if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
230                                 goto err;
231                         guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
232                         if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
233                                     guid))
234                                 goto err;
235                         if (!gid_ok(
236                                 &hdr->u.l.grh.sgid,
237                                 qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
238                                 qp->alt_ah_attr.grh.dgid.global.interface_id))
239                                 goto err;
240                 }
241                 if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
242                                             sc5, be16_to_cpu(hdr->lrh[3])))) {
243                         hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
244                                        (u16)bth0,
245                                        (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
246                                        0, qp->ibqp.qp_num,
247                                        be16_to_cpu(hdr->lrh[3]),
248                                        be16_to_cpu(hdr->lrh[1]));
249                         goto err;
250                 }
251                 /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
252                 if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid ||
253                     ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num)
254                         goto err;
255                 spin_lock_irqsave(&qp->s_lock, flags);
256                 hfi1_migrate_qp(qp);
257                 spin_unlock_irqrestore(&qp->s_lock, flags);
258         } else {
259                 if (!has_grh) {
260                         if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
261                                 goto err;
262                 } else {
263                         if (!(qp->remote_ah_attr.ah_flags & IB_AH_GRH))
264                                 goto err;
265                         guid = get_sguid(ibp,
266                                          qp->remote_ah_attr.grh.sgid_index);
267                         if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
268                                     guid))
269                                 goto err;
270                         if (!gid_ok(
271                              &hdr->u.l.grh.sgid,
272                              qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
273                              qp->remote_ah_attr.grh.dgid.global.interface_id))
274                                 goto err;
275                 }
276                 if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
277                                             sc5, be16_to_cpu(hdr->lrh[3])))) {
278                         hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
279                                        (u16)bth0,
280                                        (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
281                                        0, qp->ibqp.qp_num,
282                                        be16_to_cpu(hdr->lrh[3]),
283                                        be16_to_cpu(hdr->lrh[1]));
284                         goto err;
285                 }
286                 /* Validate the SLID. See Ch. 9.6.1.5 */
287                 if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid ||
288                     ppd_from_ibp(ibp)->port != qp->port_num)
289                         goto err;
290                 if (qp->s_mig_state == IB_MIG_REARM &&
291                     !(bth0 & IB_BTH_MIG_REQ))
292                         qp->s_mig_state = IB_MIG_ARMED;
293         }
294
295         return 0;
296
297 err:
298         return 1;
299 }
300
301 /**
302  * ruc_loopback - handle UC and RC loopback requests
303  * @sqp: the sending QP
304  *
305  * This is called from hfi1_do_send() to
306  * forward a WQE addressed to the same HFI.
307  * Note that although we are single threaded due to the send engine, we still
308  * have to protect against post_send().  We don't have to worry about
309  * receive interrupts since this is a connected protocol and all packets
310  * will pass through here.
311  */
312 static void ruc_loopback(struct rvt_qp *sqp)
313 {
314         struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
315         struct rvt_qp *qp;
316         struct rvt_swqe *wqe;
317         struct rvt_sge *sge;
318         unsigned long flags;
319         struct ib_wc wc;
320         u64 sdata;
321         atomic64_t *maddr;
322         enum ib_wc_status send_status;
323         bool release;
324         int ret;
325         bool copy_last = false;
326         int local_ops = 0;
327
328         rcu_read_lock();
329
330         /*
331          * Note that we check the responder QP state after
332          * checking the requester's state.
333          */
334         qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
335                             sqp->remote_qpn);
336
337         spin_lock_irqsave(&sqp->s_lock, flags);
338
339         /* Return if we are already busy processing a work request. */
340         if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
341             !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
342                 goto unlock;
343
344         sqp->s_flags |= RVT_S_BUSY;
345
346 again:
347         smp_read_barrier_depends(); /* see post_one_send() */
348         if (sqp->s_last == ACCESS_ONCE(sqp->s_head))
349                 goto clr_busy;
350         wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
351
352         /* Return if it is not OK to start a new work request. */
353         if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
354                 if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
355                         goto clr_busy;
356                 /* We are in the error state, flush the work request. */
357                 send_status = IB_WC_WR_FLUSH_ERR;
358                 goto flush_send;
359         }
360
361         /*
362          * We can rely on the entry not changing without the s_lock
363          * being held until we update s_last.
364          * We increment s_cur to indicate s_last is in progress.
365          */
366         if (sqp->s_last == sqp->s_cur) {
367                 if (++sqp->s_cur >= sqp->s_size)
368                         sqp->s_cur = 0;
369         }
370         spin_unlock_irqrestore(&sqp->s_lock, flags);
371
372         if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
373             qp->ibqp.qp_type != sqp->ibqp.qp_type) {
374                 ibp->rvp.n_pkt_drops++;
375                 /*
376                  * For RC, the requester would timeout and retry so
377                  * shortcut the timeouts and just signal too many retries.
378                  */
379                 if (sqp->ibqp.qp_type == IB_QPT_RC)
380                         send_status = IB_WC_RETRY_EXC_ERR;
381                 else
382                         send_status = IB_WC_SUCCESS;
383                 goto serr;
384         }
385
386         memset(&wc, 0, sizeof(wc));
387         send_status = IB_WC_SUCCESS;
388
389         release = true;
390         sqp->s_sge.sge = wqe->sg_list[0];
391         sqp->s_sge.sg_list = wqe->sg_list + 1;
392         sqp->s_sge.num_sge = wqe->wr.num_sge;
393         sqp->s_len = wqe->length;
394         switch (wqe->wr.opcode) {
395         case IB_WR_REG_MR:
396                 goto send_comp;
397
398         case IB_WR_LOCAL_INV:
399                 if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
400                         if (rvt_invalidate_rkey(sqp,
401                                                 wqe->wr.ex.invalidate_rkey))
402                                 send_status = IB_WC_LOC_PROT_ERR;
403                         local_ops = 1;
404                 }
405                 goto send_comp;
406
407         case IB_WR_SEND_WITH_INV:
408                 if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) {
409                         wc.wc_flags = IB_WC_WITH_INVALIDATE;
410                         wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey;
411                 }
412                 goto send;
413
414         case IB_WR_SEND_WITH_IMM:
415                 wc.wc_flags = IB_WC_WITH_IMM;
416                 wc.ex.imm_data = wqe->wr.ex.imm_data;
417                 /* FALLTHROUGH */
418         case IB_WR_SEND:
419 send:
420                 ret = hfi1_rvt_get_rwqe(qp, 0);
421                 if (ret < 0)
422                         goto op_err;
423                 if (!ret)
424                         goto rnr_nak;
425                 break;
426
427         case IB_WR_RDMA_WRITE_WITH_IMM:
428                 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
429                         goto inv_err;
430                 wc.wc_flags = IB_WC_WITH_IMM;
431                 wc.ex.imm_data = wqe->wr.ex.imm_data;
432                 ret = hfi1_rvt_get_rwqe(qp, 1);
433                 if (ret < 0)
434                         goto op_err;
435                 if (!ret)
436                         goto rnr_nak;
437                 /* skip copy_last set and qp_access_flags recheck */
438                 goto do_write;
439         case IB_WR_RDMA_WRITE:
440                 copy_last = rvt_is_user_qp(qp);
441                 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
442                         goto inv_err;
443 do_write:
444                 if (wqe->length == 0)
445                         break;
446                 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
447                                           wqe->rdma_wr.remote_addr,
448                                           wqe->rdma_wr.rkey,
449                                           IB_ACCESS_REMOTE_WRITE)))
450                         goto acc_err;
451                 qp->r_sge.sg_list = NULL;
452                 qp->r_sge.num_sge = 1;
453                 qp->r_sge.total_len = wqe->length;
454                 break;
455
456         case IB_WR_RDMA_READ:
457                 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
458                         goto inv_err;
459                 if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
460                                           wqe->rdma_wr.remote_addr,
461                                           wqe->rdma_wr.rkey,
462                                           IB_ACCESS_REMOTE_READ)))
463                         goto acc_err;
464                 release = false;
465                 sqp->s_sge.sg_list = NULL;
466                 sqp->s_sge.num_sge = 1;
467                 qp->r_sge.sge = wqe->sg_list[0];
468                 qp->r_sge.sg_list = wqe->sg_list + 1;
469                 qp->r_sge.num_sge = wqe->wr.num_sge;
470                 qp->r_sge.total_len = wqe->length;
471                 break;
472
473         case IB_WR_ATOMIC_CMP_AND_SWP:
474         case IB_WR_ATOMIC_FETCH_AND_ADD:
475                 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
476                         goto inv_err;
477                 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
478                                           wqe->atomic_wr.remote_addr,
479                                           wqe->atomic_wr.rkey,
480                                           IB_ACCESS_REMOTE_ATOMIC)))
481                         goto acc_err;
482                 /* Perform atomic OP and save result. */
483                 maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
484                 sdata = wqe->atomic_wr.compare_add;
485                 *(u64 *)sqp->s_sge.sge.vaddr =
486                         (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
487                         (u64)atomic64_add_return(sdata, maddr) - sdata :
488                         (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
489                                       sdata, wqe->atomic_wr.swap);
490                 rvt_put_mr(qp->r_sge.sge.mr);
491                 qp->r_sge.num_sge = 0;
492                 goto send_comp;
493
494         default:
495                 send_status = IB_WC_LOC_QP_OP_ERR;
496                 goto serr;
497         }
498
499         sge = &sqp->s_sge.sge;
500         while (sqp->s_len) {
501                 u32 len = sqp->s_len;
502
503                 if (len > sge->length)
504                         len = sge->length;
505                 if (len > sge->sge_length)
506                         len = sge->sge_length;
507                 WARN_ON_ONCE(len == 0);
508                 hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last);
509                 sge->vaddr += len;
510                 sge->length -= len;
511                 sge->sge_length -= len;
512                 if (sge->sge_length == 0) {
513                         if (!release)
514                                 rvt_put_mr(sge->mr);
515                         if (--sqp->s_sge.num_sge)
516                                 *sge = *sqp->s_sge.sg_list++;
517                 } else if (sge->length == 0 && sge->mr->lkey) {
518                         if (++sge->n >= RVT_SEGSZ) {
519                                 if (++sge->m >= sge->mr->mapsz)
520                                         break;
521                                 sge->n = 0;
522                         }
523                         sge->vaddr =
524                                 sge->mr->map[sge->m]->segs[sge->n].vaddr;
525                         sge->length =
526                                 sge->mr->map[sge->m]->segs[sge->n].length;
527                 }
528                 sqp->s_len -= len;
529         }
530         if (release)
531                 rvt_put_ss(&qp->r_sge);
532
533         if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
534                 goto send_comp;
535
536         if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
537                 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
538         else
539                 wc.opcode = IB_WC_RECV;
540         wc.wr_id = qp->r_wr_id;
541         wc.status = IB_WC_SUCCESS;
542         wc.byte_len = wqe->length;
543         wc.qp = &qp->ibqp;
544         wc.src_qp = qp->remote_qpn;
545         wc.slid = qp->remote_ah_attr.dlid;
546         wc.sl = qp->remote_ah_attr.sl;
547         wc.port_num = 1;
548         /* Signal completion event if the solicited bit is set. */
549         rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
550                      wqe->wr.send_flags & IB_SEND_SOLICITED);
551
552 send_comp:
553         spin_lock_irqsave(&sqp->s_lock, flags);
554         ibp->rvp.n_loop_pkts++;
555 flush_send:
556         sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
557         hfi1_send_complete(sqp, wqe, send_status);
558         if (local_ops) {
559                 atomic_dec(&sqp->local_ops_pending);
560                 local_ops = 0;
561         }
562         goto again;
563
564 rnr_nak:
565         /* Handle RNR NAK */
566         if (qp->ibqp.qp_type == IB_QPT_UC)
567                 goto send_comp;
568         ibp->rvp.n_rnr_naks++;
569         /*
570          * Note: we don't need the s_lock held since the BUSY flag
571          * makes this single threaded.
572          */
573         if (sqp->s_rnr_retry == 0) {
574                 send_status = IB_WC_RNR_RETRY_EXC_ERR;
575                 goto serr;
576         }
577         if (sqp->s_rnr_retry_cnt < 7)
578                 sqp->s_rnr_retry--;
579         spin_lock_irqsave(&sqp->s_lock, flags);
580         if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
581                 goto clr_busy;
582         rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
583                                 IB_AETH_CREDIT_SHIFT);
584         goto clr_busy;
585
586 op_err:
587         send_status = IB_WC_REM_OP_ERR;
588         wc.status = IB_WC_LOC_QP_OP_ERR;
589         goto err;
590
591 inv_err:
592         send_status = IB_WC_REM_INV_REQ_ERR;
593         wc.status = IB_WC_LOC_QP_OP_ERR;
594         goto err;
595
596 acc_err:
597         send_status = IB_WC_REM_ACCESS_ERR;
598         wc.status = IB_WC_LOC_PROT_ERR;
599 err:
600         /* responder goes to error state */
601         rvt_rc_error(qp, wc.status);
602
603 serr:
604         spin_lock_irqsave(&sqp->s_lock, flags);
605         hfi1_send_complete(sqp, wqe, send_status);
606         if (sqp->ibqp.qp_type == IB_QPT_RC) {
607                 int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
608
609                 sqp->s_flags &= ~RVT_S_BUSY;
610                 spin_unlock_irqrestore(&sqp->s_lock, flags);
611                 if (lastwqe) {
612                         struct ib_event ev;
613
614                         ev.device = sqp->ibqp.device;
615                         ev.element.qp = &sqp->ibqp;
616                         ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
617                         sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
618                 }
619                 goto done;
620         }
621 clr_busy:
622         sqp->s_flags &= ~RVT_S_BUSY;
623 unlock:
624         spin_unlock_irqrestore(&sqp->s_lock, flags);
625 done:
626         rcu_read_unlock();
627 }
628
629 /**
630  * hfi1_make_grh - construct a GRH header
631  * @ibp: a pointer to the IB port
632  * @hdr: a pointer to the GRH header being constructed
633  * @grh: the global route address to send to
634  * @hwords: the number of 32 bit words of header being sent
635  * @nwords: the number of 32 bit words of data being sent
636  *
637  * Return the size of the header in 32 bit words.
638  */
639 u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
640                   struct ib_global_route *grh, u32 hwords, u32 nwords)
641 {
642         hdr->version_tclass_flow =
643                 cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) |
644                             (grh->traffic_class << IB_GRH_TCLASS_SHIFT) |
645                             (grh->flow_label << IB_GRH_FLOW_SHIFT));
646         hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
647         /* next_hdr is defined by C8-7 in ch. 8.4.1 */
648         hdr->next_hdr = IB_GRH_NEXT_HDR;
649         hdr->hop_limit = grh->hop_limit;
650         /* The SGID is 32-bit aligned. */
651         hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
652         hdr->sgid.global.interface_id =
653                 grh->sgid_index < HFI1_GUIDS_PER_PORT ?
654                 get_sguid(ibp, grh->sgid_index) :
655                 get_sguid(ibp, HFI1_PORT_GUID_INDEX);
656         hdr->dgid = grh->dgid;
657
658         /* GRH header size in 32-bit words. */
659         return sizeof(struct ib_grh) / sizeof(u32);
660 }
661
662 #define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, hdr.u.oth.bth[2]) / 4)
663
664 /**
665  * build_ahg - create ahg in s_ahg
666  * @qp: a pointer to QP
667  * @npsn: the next PSN for the request/response
668  *
669  * This routine handles the AHG by allocating an ahg entry and causing the
670  * copy of the first middle.
671  *
672  * Subsequent middles use the copied entry, editing the
673  * PSN with 1 or 2 edits.
674  */
675 static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
676 {
677         struct hfi1_qp_priv *priv = qp->priv;
678
679         if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR))
680                 clear_ahg(qp);
681         if (!(qp->s_flags & RVT_S_AHG_VALID)) {
682                 /* first middle that needs copy  */
683                 if (qp->s_ahgidx < 0)
684                         qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde);
685                 if (qp->s_ahgidx >= 0) {
686                         qp->s_ahgpsn = npsn;
687                         priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
688                         /* save to protect a change in another thread */
689                         priv->s_ahg->ahgidx = qp->s_ahgidx;
690                         qp->s_flags |= RVT_S_AHG_VALID;
691                 }
692         } else {
693                 /* subsequent middle after valid */
694                 if (qp->s_ahgidx >= 0) {
695                         priv->s_ahg->tx_flags |= SDMA_TXREQ_F_USE_AHG;
696                         priv->s_ahg->ahgidx = qp->s_ahgidx;
697                         priv->s_ahg->ahgcount++;
698                         priv->s_ahg->ahgdesc[0] =
699                                 sdma_build_ahg_descriptor(
700                                         (__force u16)cpu_to_be16((u16)npsn),
701                                         BTH2_OFFSET,
702                                         16,
703                                         16);
704                         if ((npsn & 0xffff0000) !=
705                                         (qp->s_ahgpsn & 0xffff0000)) {
706                                 priv->s_ahg->ahgcount++;
707                                 priv->s_ahg->ahgdesc[1] =
708                                         sdma_build_ahg_descriptor(
709                                                 (__force u16)cpu_to_be16(
710                                                         (u16)(npsn >> 16)),
711                                                 BTH2_OFFSET,
712                                                 0,
713                                                 16);
714                         }
715                 }
716         }
717 }
718
719 void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
720                           u32 bth0, u32 bth2, int middle,
721                           struct hfi1_pkt_state *ps)
722 {
723         struct hfi1_qp_priv *priv = qp->priv;
724         struct hfi1_ibport *ibp = ps->ibp;
725         u16 lrh0;
726         u32 nwords;
727         u32 extra_bytes;
728         u32 bth1;
729
730         /* Construct the header. */
731         extra_bytes = -ps->s_txreq->s_cur_size & 3;
732         nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2;
733         lrh0 = HFI1_LRH_BTH;
734         if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
735                 qp->s_hdrwords += hfi1_make_grh(ibp,
736                                                 &ps->s_txreq->phdr.hdr.u.l.grh,
737                                                 &qp->remote_ah_attr.grh,
738                                                 qp->s_hdrwords, nwords);
739                 lrh0 = HFI1_LRH_GRH;
740                 middle = 0;
741         }
742         lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
743         /*
744          * reset s_ahg/AHG fields
745          *
746          * This insures that the ahgentry/ahgcount
747          * are at a non-AHG default to protect
748          * build_verbs_tx_desc() from using
749          * an include ahgidx.
750          *
751          * build_ahg() will modify as appropriate
752          * to use the AHG feature.
753          */
754         priv->s_ahg->tx_flags = 0;
755         priv->s_ahg->ahgcount = 0;
756         priv->s_ahg->ahgidx = 0;
757         if (qp->s_mig_state == IB_MIG_MIGRATED)
758                 bth0 |= IB_BTH_MIG_REQ;
759         else
760                 middle = 0;
761         if (middle)
762                 build_ahg(qp, bth2);
763         else
764                 qp->s_flags &= ~RVT_S_AHG_VALID;
765         ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
766         ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
767         ps->s_txreq->phdr.hdr.lrh[2] =
768                 cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
769         ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
770                                        qp->remote_ah_attr.src_path_bits);
771         bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
772         bth0 |= extra_bytes << 20;
773         ohdr->bth[0] = cpu_to_be32(bth0);
774         bth1 = qp->remote_qpn;
775         if (qp->s_flags & RVT_S_ECN) {
776                 qp->s_flags &= ~RVT_S_ECN;
777                 /* we recently received a FECN, so return a BECN */
778                 bth1 |= (HFI1_BECN_MASK << HFI1_BECN_SHIFT);
779         }
780         ohdr->bth[1] = cpu_to_be32(bth1);
781         ohdr->bth[2] = cpu_to_be32(bth2);
782 }
783
784 /* when sending, force a reschedule every one of these periods */
785 #define SEND_RESCHED_TIMEOUT (5 * HZ)  /* 5s in jiffies */
786
787 void _hfi1_do_send(struct work_struct *work)
788 {
789         struct iowait *wait = container_of(work, struct iowait, iowork);
790         struct rvt_qp *qp = iowait_to_qp(wait);
791
792         hfi1_do_send(qp);
793 }
794
795 /**
796  * hfi1_do_send - perform a send on a QP
797  * @work: contains a pointer to the QP
798  *
799  * Process entries in the send work queue until credit or queue is
800  * exhausted.  Only allow one CPU to send a packet per QP.
801  * Otherwise, two threads could send packets out of order.
802  */
803 void hfi1_do_send(struct rvt_qp *qp)
804 {
805         struct hfi1_pkt_state ps;
806         struct hfi1_qp_priv *priv = qp->priv;
807         int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
808         unsigned long timeout;
809         unsigned long timeout_int;
810         int cpu;
811
812         ps.dev = to_idev(qp->ibqp.device);
813         ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
814         ps.ppd = ppd_from_ibp(ps.ibp);
815
816         switch (qp->ibqp.qp_type) {
817         case IB_QPT_RC:
818                 if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
819                                                                 ) - 1)) ==
820                                  ps.ppd->lid)) {
821                         ruc_loopback(qp);
822                         return;
823                 }
824                 make_req = hfi1_make_rc_req;
825                 timeout_int = (qp->timeout_jiffies);
826                 break;
827         case IB_QPT_UC:
828                 if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
829                                                                 ) - 1)) ==
830                                  ps.ppd->lid)) {
831                         ruc_loopback(qp);
832                         return;
833                 }
834                 make_req = hfi1_make_uc_req;
835                 timeout_int = SEND_RESCHED_TIMEOUT;
836                 break;
837         default:
838                 make_req = hfi1_make_ud_req;
839                 timeout_int = SEND_RESCHED_TIMEOUT;
840         }
841
842         spin_lock_irqsave(&qp->s_lock, ps.flags);
843
844         /* Return if we are already busy processing a work request. */
845         if (!hfi1_send_ok(qp)) {
846                 spin_unlock_irqrestore(&qp->s_lock, ps.flags);
847                 return;
848         }
849
850         qp->s_flags |= RVT_S_BUSY;
851
852         timeout = jiffies + (timeout_int) / 8;
853         cpu = priv->s_sde ? priv->s_sde->cpu :
854                         cpumask_first(cpumask_of_node(ps.ppd->dd->node));
855         /* insure a pre-built packet is handled  */
856         ps.s_txreq = get_waiting_verbs_txreq(qp);
857         do {
858                 /* Check for a constructed packet to be sent. */
859                 if (qp->s_hdrwords != 0) {
860                         spin_unlock_irqrestore(&qp->s_lock, ps.flags);
861                         /*
862                          * If the packet cannot be sent now, return and
863                          * the send engine will be woken up later.
864                          */
865                         if (hfi1_verbs_send(qp, &ps))
866                                 return;
867                         /* Record that s_ahg is empty. */
868                         qp->s_hdrwords = 0;
869                         /* allow other tasks to run */
870                         if (unlikely(time_after(jiffies, timeout))) {
871                                 if (workqueue_congested(cpu,
872                                                         ps.ppd->hfi1_wq)) {
873                                         spin_lock_irqsave(
874                                                 &qp->s_lock,
875                                                 ps.flags);
876                                         qp->s_flags &= ~RVT_S_BUSY;
877                                         hfi1_schedule_send(qp);
878                                         spin_unlock_irqrestore(
879                                                 &qp->s_lock,
880                                                 ps.flags);
881                                         this_cpu_inc(
882                                                 *ps.ppd->dd->send_schedule);
883                                         return;
884                                 }
885                                 if (!irqs_disabled()) {
886                                         cond_resched();
887                                         this_cpu_inc(
888                                            *ps.ppd->dd->send_schedule);
889                                 }
890                                 timeout = jiffies + (timeout_int) / 8;
891                         }
892                         spin_lock_irqsave(&qp->s_lock, ps.flags);
893                 }
894         } while (make_req(qp, &ps));
895
896         spin_unlock_irqrestore(&qp->s_lock, ps.flags);
897 }
898
899 /*
900  * This should be called with s_lock held.
901  */
902 void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
903                         enum ib_wc_status status)
904 {
905         u32 old_last, last;
906
907         if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
908                 return;
909
910         last = qp->s_last;
911         old_last = last;
912         if (++last >= qp->s_size)
913                 last = 0;
914         qp->s_last = last;
915         /* See post_send() */
916         barrier();
917         rvt_put_swqe(wqe);
918         if (qp->ibqp.qp_type == IB_QPT_UD ||
919             qp->ibqp.qp_type == IB_QPT_SMI ||
920             qp->ibqp.qp_type == IB_QPT_GSI)
921                 atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
922
923         rvt_qp_swqe_complete(qp, wqe, status);
924
925         if (qp->s_acked == old_last)
926                 qp->s_acked = last;
927         if (qp->s_cur == old_last)
928                 qp->s_cur = last;
929         if (qp->s_tail == old_last)
930                 qp->s_tail = last;
931         if (qp->state == IB_QPS_SQD && last == qp->s_cur)
932                 qp->s_draining = 0;
933 }