]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
IB/hfi1: Get rid of divide when setting the tx request header
authorSebastian Sanchez <sebastian.sanchez@intel.com>
Thu, 4 May 2017 12:14:16 +0000 (05:14 -0700)
committerDoug Ledford <dledford@redhat.com>
Thu, 4 May 2017 23:31:46 +0000 (19:31 -0400)
Div instructions show costly in profiles when
the tx request header is set. Using right shift
instead of a divide operation reduces the cycles
spent in the function that sets the tx request
header as shown in the profile. Use right shift
operation instead.

Profile before change:
43.24%  009
|
|--23.41%-- user_sdma_send_pkts
|          |
|          |--99.90%-- hfi1_user_sdma_process_requestAfter:

Profile after change:
45.75%  009
|
|--14.81%-- user_sdma_send_pkts
|          |
|          |--99.95%-- hfi1_user_sdma_process_request

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hfi1/user_sdma.c

index 0749689d76439f7ac95efdfceb04c5299bd80f26..8adb6dfeb2eb321ea0ba397b95b22def547e23bd 100644 (file)
@@ -143,7 +143,9 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
 
 /* KDETH OM multipliers and switch over point */
 #define KDETH_OM_SMALL     4
+#define KDETH_OM_SMALL_SHIFT     2
 #define KDETH_OM_LARGE     64
+#define KDETH_OM_LARGE_SHIFT     6
 #define KDETH_OM_MAX_SIZE  (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
 
 /* Tx request flag bits */
@@ -228,12 +230,6 @@ struct user_sdma_request {
         * size of the TID entry.
         */
        u32 tidoffset;
-       /*
-        * KDETH.OM
-        * Remember this because the header template always sets it
-        * to 0.
-        */
-       u8 omfactor;
        /*
         * We copy the iovs for this request (based on
         * info.iovcnt). These are only the data vectors
@@ -1323,6 +1319,7 @@ static int set_txreq_header(struct user_sdma_request *req,
 {
        struct hfi1_user_sdma_pkt_q *pq = req->pq;
        struct hfi1_pkt_header *hdr = &tx->hdr;
+       u8 omfactor; /* KDETH.OM */
        u16 pbclen;
        int ret;
        u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
@@ -1400,8 +1397,9 @@ static int set_txreq_header(struct user_sdma_request *req,
                        }
                        tidval = req->tids[req->tididx];
                }
-               req->omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >=
-                       KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE : KDETH_OM_SMALL;
+               omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >=
+                       KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT :
+                       KDETH_OM_SMALL_SHIFT;
                /* Set KDETH.TIDCtrl based on value for this TID. */
                KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL,
                          EXP_TID_GET(tidval, CTRL));
@@ -1416,12 +1414,12 @@ static int set_txreq_header(struct user_sdma_request *req,
                 * transfer.
                 */
                SDMA_DBG(req, "TID offset %ubytes %uunits om%u",
-                        req->tidoffset, req->tidoffset / req->omfactor,
-                        req->omfactor != KDETH_OM_SMALL);
+                        req->tidoffset, req->tidoffset >> omfactor,
+                        omfactor != KDETH_OM_SMALL_SHIFT);
                KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET,
-                         req->tidoffset / req->omfactor);
+                         req->tidoffset >> omfactor);
                KDETH_SET(hdr->kdeth.ver_tid_offset, OM,
-                         req->omfactor != KDETH_OM_SMALL);
+                         omfactor != KDETH_OM_SMALL_SHIFT);
        }
 done:
        trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt,
@@ -1433,6 +1431,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
                                struct user_sdma_txreq *tx, u32 len)
 {
        int diff = 0;
+       u8 omfactor; /* KDETH.OM */
        struct hfi1_user_sdma_pkt_q *pq = req->pq;
        struct hfi1_pkt_header *hdr = &req->hdr;
        u16 pbclen = le16_to_cpu(hdr->pbc[0]);
@@ -1484,14 +1483,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
                        }
                        tidval = req->tids[req->tididx];
                }
-               req->omfactor = ((EXP_TID_GET(tidval, LEN) *
+               omfactor = ((EXP_TID_GET(tidval, LEN) *
                                  PAGE_SIZE) >=
-                                KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE :
-                       KDETH_OM_SMALL;
+                                KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT :
+                                KDETH_OM_SMALL_SHIFT;
                /* KDETH.OM and KDETH.OFFSET (TID) */
                AHG_HEADER_SET(req->ahg, diff, 7, 0, 16,
-                              ((!!(req->omfactor - KDETH_OM_SMALL)) << 15 |
-                               ((req->tidoffset / req->omfactor) & 0x7fff)));
+                              ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 |
+                               ((req->tidoffset >> omfactor)
+                                & 0x7fff)));
                /* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */
                val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) |
                                   (EXP_TID_GET(tidval, IDX) & 0x3ff));