]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - drivers/infiniband/hw/qedr/verbs.c
RDMA/qedr: Add 64KB PAGE_SIZE support to user-space queues
[karo-tx-linux.git] / drivers / infiniband / hw / qedr / verbs.c
index 2091902848e6c47bb1f3dfdc4cc08cec99386dc0..d6723c365c7fba36168bc11c227de8bc34eef862 100644 (file)
@@ -653,14 +653,15 @@ static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
 
 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
                               struct qedr_pbl *pbl,
-                              struct qedr_pbl_info *pbl_info)
+                              struct qedr_pbl_info *pbl_info, u32 pg_shift)
 {
        int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
+       u32 fw_pg_cnt, fw_pg_per_umem_pg;
        struct qedr_pbl *pbl_tbl;
        struct scatterlist *sg;
        struct regpair *pbe;
+       u64 pg_addr;
        int entry;
-       u32 addr;
 
        if (!pbl_info->num_pbes)
                return;
@@ -681,31 +682,37 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
 
        pbe_cnt = 0;
 
-       shift = ilog2(umem->page_size);
+       shift = umem->page_shift;
+
+       fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
 
        for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
                pages = sg_dma_len(sg) >> shift;
+               pg_addr = sg_dma_address(sg);
                for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
-                       /* store the page address in pbe */
-                       pbe->lo = cpu_to_le32(sg_dma_address(sg) +
-                                             umem->page_size * pg_cnt);
-                       addr = upper_32_bits(sg_dma_address(sg) +
-                                            umem->page_size * pg_cnt);
-                       pbe->hi = cpu_to_le32(addr);
-                       pbe_cnt++;
-                       total_num_pbes++;
-                       pbe++;
-
-                       if (total_num_pbes == pbl_info->num_pbes)
-                               return;
-
-                       /* If the given pbl is full storing the pbes,
-                        * move to next pbl.
-                        */
-                       if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
-                               pbl_tbl++;
-                               pbe = (struct regpair *)pbl_tbl->va;
-                               pbe_cnt = 0;
+                       for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
+                               pbe->lo = cpu_to_le32(pg_addr);
+                               pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
+
+                               pg_addr += BIT(pg_shift);
+                               pbe_cnt++;
+                               total_num_pbes++;
+                               pbe++;
+
+                               if (total_num_pbes == pbl_info->num_pbes)
+                                       return;
+
+                               /* If the given pbl is full storing the pbes,
+                                * move to next pbl.
+                                */
+                               if (pbe_cnt ==
+                                   (pbl_info->pbl_size / sizeof(u64))) {
+                                       pbl_tbl++;
+                                       pbe = (struct regpair *)pbl_tbl->va;
+                                       pbe_cnt = 0;
+                               }
+
+                               fw_pg_cnt++;
                        }
                }
        }
@@ -754,7 +761,7 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
                                       u64 buf_addr, size_t buf_len,
                                       int access, int dmasync)
 {
-       int page_cnt;
+       u32 fw_pages;
        int rc;
 
        q->buf_addr = buf_addr;
@@ -766,8 +773,10 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
                return PTR_ERR(q->umem);
        }
 
-       page_cnt = ib_umem_page_count(q->umem);
-       rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, page_cnt, 0);
+       fw_pages = ib_umem_page_count(q->umem) <<
+           (q->umem->page_shift - FW_PAGE_SHIFT);
+
+       rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
        if (rc)
                goto err0;
 
@@ -777,7 +786,8 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
                goto err0;
        }
 
-       qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info);
+               qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
+                                  FW_PAGE_SHIFT);
 
        return 0;
 
@@ -822,6 +832,17 @@ int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 {
        struct qedr_cq *cq = get_qedr_cq(ibcq);
        unsigned long sflags;
+       struct qedr_dev *dev;
+
+       dev = get_qedr_dev(ibcq->device);
+
+       if (cq->destroyed) {
+               DP_ERR(dev,
+                      "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
+                      cq, cq->icid);
+               return -EINVAL;
+       }
+
 
        if (cq->cq_type == QEDR_CQ_TYPE_GSI)
                return 0;
@@ -987,35 +1008,82 @@ int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
        return 0;
 }
 
+#define QEDR_DESTROY_CQ_MAX_ITERATIONS         (10)
+#define QEDR_DESTROY_CQ_ITER_DURATION          (10)
+
 int qedr_destroy_cq(struct ib_cq *ibcq)
 {
        struct qedr_dev *dev = get_qedr_dev(ibcq->device);
        struct qed_rdma_destroy_cq_out_params oparams;
        struct qed_rdma_destroy_cq_in_params iparams;
        struct qedr_cq *cq = get_qedr_cq(ibcq);
+       int iter;
+       int rc;
+
+       DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
 
-       DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq: cq_id %d", cq->icid);
+       cq->destroyed = 1;
 
        /* GSIs CQs are handled by driver, so they don't exist in the FW */
-       if (cq->cq_type != QEDR_CQ_TYPE_GSI) {
-               int rc;
+       if (cq->cq_type == QEDR_CQ_TYPE_GSI)
+               goto done;
 
-               iparams.icid = cq->icid;
-               rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams,
-                                              &oparams);
-               if (rc)
-                       return rc;
-               dev->ops->common->chain_free(dev->cdev, &cq->pbl);
-       }
+       iparams.icid = cq->icid;
+       rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
+       if (rc)
+               return rc;
+
+       dev->ops->common->chain_free(dev->cdev, &cq->pbl);
 
        if (ibcq->uobject && ibcq->uobject->context) {
                qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
                ib_umem_release(cq->q.umem);
        }
 
+       /* We don't want the IRQ handler to handle a non-existing CQ so we
+        * wait until all CNQ interrupts, if any, are received. This will always
+        * happen and will always happen very fast. If not, then a serious error
+        * has occured. That is why we can use a long delay.
+        * We spin for a short time so we don’t lose time on context switching
+        * in case all the completions are handled in that span. Otherwise
+        * we sleep for a while and check again. Since the CNQ may be
+        * associated with (only) the current CPU we use msleep to allow the
+        * current CPU to be freed.
+        * The CNQ notification is increased in qedr_irq_handler().
+        */
+       iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
+       while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
+               udelay(QEDR_DESTROY_CQ_ITER_DURATION);
+               iter--;
+       }
+
+       iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
+       while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
+               msleep(QEDR_DESTROY_CQ_ITER_DURATION);
+               iter--;
+       }
+
+       if (oparams.num_cq_notif != cq->cnq_notif)
+               goto err;
+
+       /* Note that we don't need to have explicit code to wait for the
+        * completion of the event handler because it is invoked from the EQ.
+        * Since the destroy CQ ramrod has also been received on the EQ we can
+        * be certain that there's no event handler in process.
+        */
+done:
+       cq->sig = ~cq->sig;
+
        kfree(cq);
 
        return 0;
+
+err:
+       DP_ERR(dev,
+              "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
+              cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
+
+       return -EINVAL;
 }
 
 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
@@ -1026,13 +1094,15 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
 {
        enum rdma_network_type nw_type;
        struct ib_gid_attr gid_attr;
+       const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
        union ib_gid gid;
        u32 ipv4_addr;
        int rc = 0;
        int i;
 
-       rc = ib_get_cached_gid(ibqp->device, attr->ah_attr.port_num,
-                              attr->ah_attr.grh.sgid_index, &gid, &gid_attr);
+       rc = ib_get_cached_gid(ibqp->device,
+                              rdma_ah_get_port_num(&attr->ah_attr),
+                              grh->sgid_index, &gid, &gid_attr);
        if (rc)
                return rc;
 
@@ -1049,7 +1119,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
                        memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
                               sizeof(qp_params->sgid));
                        memcpy(&qp_params->dgid.bytes[0],
-                              &attr->ah_attr.grh.dgid,
+                              &grh->dgid,
                               sizeof(qp_params->dgid));
                        qp_params->roce_mode = ROCE_V2_IPV6;
                        SET_FIELD(qp_params->modify_flags,
@@ -1059,7 +1129,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
                        memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
                               sizeof(qp_params->sgid));
                        memcpy(&qp_params->dgid.bytes[0],
-                              &attr->ah_attr.grh.dgid,
+                              &grh->dgid,
                               sizeof(qp_params->dgid));
                        qp_params->roce_mode = ROCE_V1;
                        break;
@@ -1069,7 +1139,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
                        ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
                        qp_params->sgid.ipv4_addr = ipv4_addr;
                        ipv4_addr =
-                           qedr_get_ipv4_from_gid(attr->ah_attr.grh.dgid.raw);
+                           qedr_get_ipv4_from_gid(grh->dgid.raw);
                        qp_params->dgid.ipv4_addr = ipv4_addr;
                        SET_FIELD(qp_params->modify_flags,
                                  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
@@ -1691,6 +1761,7 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        struct qedr_qp *qp = get_qedr_qp(ibqp);
        struct qed_rdma_modify_qp_in_params qp_params = { 0 };
        struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
+       const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
        enum ib_qp_state old_qp_state, new_qp_state;
        int rc = 0;
 
@@ -1773,17 +1844,17 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                SET_FIELD(qp_params.modify_flags,
                          QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
 
-               qp_params.traffic_class_tos = attr->ah_attr.grh.traffic_class;
-               qp_params.flow_label = attr->ah_attr.grh.flow_label;
-               qp_params.hop_limit_ttl = attr->ah_attr.grh.hop_limit;
+               qp_params.traffic_class_tos = grh->traffic_class;
+               qp_params.flow_label = grh->flow_label;
+               qp_params.hop_limit_ttl = grh->hop_limit;
 
-               qp->sgid_idx = attr->ah_attr.grh.sgid_index;
+               qp->sgid_idx = grh->sgid_index;
 
                rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
                if (rc) {
                        DP_ERR(dev,
                               "modify qp: problems with GID index %d (rc=%d)\n",
-                              attr->ah_attr.grh.sgid_index, rc);
+                              grh->sgid_index, rc);
                        return rc;
                }
 
@@ -1968,25 +2039,21 @@ int qedr_query_qp(struct ib_qp *ibqp,
        qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
        qp_init_attr->cap = qp_attr->cap;
 
-       memcpy(&qp_attr->ah_attr.grh.dgid.raw[0], &params.dgid.bytes[0],
-              sizeof(qp_attr->ah_attr.grh.dgid.raw));
-
-       qp_attr->ah_attr.grh.flow_label = params.flow_label;
-       qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx;
-       qp_attr->ah_attr.grh.hop_limit = params.hop_limit_ttl;
-       qp_attr->ah_attr.grh.traffic_class = params.traffic_class_tos;
-
-       qp_attr->ah_attr.ah_flags = IB_AH_GRH;
-       qp_attr->ah_attr.port_num = 1;
-       qp_attr->ah_attr.sl = 0;
+       qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+       rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
+                       params.flow_label, qp->sgid_idx,
+                       params.hop_limit_ttl, params.traffic_class_tos);
+       rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
+       rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
+       rdma_ah_set_sl(&qp_attr->ah_attr, 0);
        qp_attr->timeout = params.timeout;
        qp_attr->rnr_retry = params.rnr_retry;
        qp_attr->retry_cnt = params.retry_cnt;
        qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
        qp_attr->pkey_index = params.pkey_index;
        qp_attr->port_num = 1;
-       qp_attr->ah_attr.src_path_bits = 0;
-       qp_attr->ah_attr.static_rate = 0;
+       rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
+       rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
        qp_attr->alt_pkey_index = 0;
        qp_attr->alt_port_num = 0;
        qp_attr->alt_timeout = 0;
@@ -2054,7 +2121,7 @@ int qedr_destroy_qp(struct ib_qp *ibqp)
        return rc;
 }
 
-struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
                             struct ib_udata *udata)
 {
        struct qedr_ah *ah;
@@ -2169,7 +2236,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
                goto err1;
 
        qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
-                          &mr->info.pbl_info);
+                          &mr->info.pbl_info, mr->umem->page_shift);
 
        rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
        if (rc) {
@@ -2190,7 +2257,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
        mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
        mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
        mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
-       mr->hw_mr.page_size_log = ilog2(mr->umem->page_size);
+       mr->hw_mr.page_size_log = mr->umem->page_shift;
        mr->hw_mr.fbo = ib_umem_offset(mr->umem);
        mr->hw_mr.length = len;
        mr->hw_mr.vaddr = usr_addr;
@@ -2625,6 +2692,8 @@ static int qedr_prepare_reg(struct qedr_qp *qp,
        fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
        fwqe1->l_key = wr->key;
 
+       fwqe2->access_ctrl = 0;
+
        SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
                   !!(wr->access & IB_ACCESS_REMOTE_READ));
        SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
@@ -3150,6 +3219,10 @@ static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
                case IB_WC_REG_MR:
                        qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
                        break;
+               case IB_WC_RDMA_READ:
+               case IB_WC_SEND:
+                       wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
+                       break;
                default:
                        break;
                }
@@ -3271,57 +3344,81 @@ static int qedr_poll_cq_req(struct qedr_dev *dev,
        return cnt;
 }
 
-static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
-                              struct qedr_cq *cq, struct ib_wc *wc,
-                              struct rdma_cqe_responder *resp, u64 wr_id)
+static inline int qedr_cqe_resp_status_to_ib(u8 status)
 {
-       enum ib_wc_status wc_status = IB_WC_SUCCESS;
-       u8 flags;
-
-       wc->opcode = IB_WC_RECV;
-       wc->wc_flags = 0;
-
-       switch (resp->status) {
+       switch (status) {
        case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
-               wc_status = IB_WC_LOC_ACCESS_ERR;
-               break;
+               return IB_WC_LOC_ACCESS_ERR;
        case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
-               wc_status = IB_WC_LOC_LEN_ERR;
-               break;
+               return IB_WC_LOC_LEN_ERR;
        case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
-               wc_status = IB_WC_LOC_QP_OP_ERR;
-               break;
+               return IB_WC_LOC_QP_OP_ERR;
        case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
-               wc_status = IB_WC_LOC_PROT_ERR;
-               break;
+               return IB_WC_LOC_PROT_ERR;
        case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
-               wc_status = IB_WC_MW_BIND_ERR;
-               break;
+               return IB_WC_MW_BIND_ERR;
        case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
-               wc_status = IB_WC_REM_INV_RD_REQ_ERR;
-               break;
+               return IB_WC_REM_INV_RD_REQ_ERR;
        case RDMA_CQE_RESP_STS_OK:
-               wc_status = IB_WC_SUCCESS;
-               wc->byte_len = le32_to_cpu(resp->length);
+               return IB_WC_SUCCESS;
+       default:
+               return IB_WC_GENERAL_ERR;
+       }
+}
+
+static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
+                                         struct ib_wc *wc)
+{
+       wc->status = IB_WC_SUCCESS;
+       wc->byte_len = le32_to_cpu(resp->length);
 
-               flags = resp->flags & QEDR_RESP_RDMA_IMM;
+       if (resp->flags & QEDR_RESP_IMM) {
+               wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
+               wc->wc_flags |= IB_WC_WITH_IMM;
 
-               if (flags == QEDR_RESP_RDMA_IMM)
+               if (resp->flags & QEDR_RESP_RDMA)
                        wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
 
-               if (flags == QEDR_RESP_RDMA_IMM || flags == QEDR_RESP_IMM) {
-                       wc->ex.imm_data =
-                               le32_to_cpu(resp->imm_data_or_inv_r_Key);
-                       wc->wc_flags |= IB_WC_WITH_IMM;
-               }
-               break;
-       default:
-               wc->status = IB_WC_GENERAL_ERR;
-               DP_ERR(dev, "Invalid CQE status detected\n");
+               if (resp->flags & QEDR_RESP_INV)
+                       return -EINVAL;
+
+       } else if (resp->flags & QEDR_RESP_INV) {
+               wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
+               wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+
+               if (resp->flags & QEDR_RESP_RDMA)
+                       return -EINVAL;
+
+       } else if (resp->flags & QEDR_RESP_RDMA) {
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
+                              struct qedr_cq *cq, struct ib_wc *wc,
+                              struct rdma_cqe_responder *resp, u64 wr_id)
+{
+       /* Must fill fields before qedr_set_ok_cqe_resp_wc() */
+       wc->opcode = IB_WC_RECV;
+       wc->wc_flags = 0;
+
+       if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
+               if (qedr_set_ok_cqe_resp_wc(resp, wc))
+                       DP_ERR(dev,
+                              "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
+                              cq, cq->icid, resp->flags);
+
+       } else {
+               wc->status = qedr_cqe_resp_status_to_ib(resp->status);
+               if (wc->status == IB_WC_GENERAL_ERR)
+                       DP_ERR(dev,
+                              "CQ %p (icid=%d) contains an invalid CQE status %d\n",
+                              cq, cq->icid, resp->status);
        }
 
-       /* fill WC */
-       wc->status = wc_status;
+       /* Fill the rest of the WC */
        wc->vendor_err = 0;
        wc->src_qp = qp->id;
        wc->qp = &qp->ibqp;
@@ -3416,6 +3513,13 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
        int update = 0;
        int done = 0;
 
+       if (cq->destroyed) {
+               DP_ERR(dev,
+                      "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
+                      cq, cq->icid);
+               return 0;
+       }
+
        if (cq->cq_type == QEDR_CQ_TYPE_GSI)
                return qedr_gsi_poll_cq(ibcq, num_entries, wc);