From: James Smart Date: Fri, 3 Aug 2012 16:35:54 +0000 (-0400) Subject: [SCSI] lpfc 8.3.33: Parallelize SLI-4 Q distribution X-Git-Tag: next-20120917~76^2~2^2~41 X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=2a76a28314993d27668fc7f9d5b6427facce4318;p=karo-tx-linux.git [SCSI] lpfc 8.3.33: Parallelize SLI-4 Q distribution Commonize SLI-3/4 Ring/Queue framework, to keep SLI-3 compatibility Parallelize SLI-4 Q distribution - to use multiple posting/completion queues Signed-off-by: James Smart Signed-off-by: James Bottomley --- diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index a65c05a8d488..a870af1b5478 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -732,7 +732,7 @@ struct lpfc_hba { uint32_t hbq_count; /* Count of configured HBQs */ struct hbq_s hbqs[LPFC_MAX_HBQS]; /* local copy of hbq indicies */ - uint32_t fcp_qidx; /* next work queue to post work to */ + atomic_t fcp_qidx; /* next work queue to post work to */ unsigned long pci_bar0_map; /* Physical address for PCI BAR0 */ unsigned long pci_bar1_map; /* Physical address for PCI BAR1 */ diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h index 4ab0e3542c5d..d74470481ca6 100644 --- a/drivers/scsi/lpfc/lpfc_hw.h +++ b/drivers/scsi/lpfc/lpfc_hw.h @@ -1188,8 +1188,8 @@ typedef struct { */ /* Number of rings currently used and available. */ -#define MAX_CONFIGURED_RINGS 3 -#define MAX_RINGS 4 +#define MAX_SLI3_CONFIGURED_RINGS 3 +#define MAX_SLI3_RINGS 4 /* IOCB / Mailbox is owned by FireFly */ #define OWN_CHIP 1 @@ -2993,7 +2993,7 @@ typedef struct _PCB { uint32_t pgpAddrLow; uint32_t pgpAddrHigh; - SLI2_RDSC rdsc[MAX_RINGS]; + SLI2_RDSC rdsc[MAX_SLI3_RINGS]; } PCB_t; /* NEW_FEATURE */ @@ -3103,18 +3103,18 @@ struct lpfc_pgp { struct sli2_desc { uint32_t unused1[16]; - struct lpfc_hgp host[MAX_RINGS]; - struct lpfc_pgp port[MAX_RINGS]; + struct lpfc_hgp host[MAX_SLI3_RINGS]; + struct lpfc_pgp port[MAX_SLI3_RINGS]; }; struct sli3_desc { - struct lpfc_hgp host[MAX_RINGS]; + struct lpfc_hgp host[MAX_SLI3_RINGS]; uint32_t reserved[8]; uint32_t hbq_put[16]; }; struct sli3_pgp { - struct lpfc_pgp port[MAX_RINGS]; + struct lpfc_pgp port[MAX_SLI3_RINGS]; uint32_t hbq_get[16]; }; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 9efe5f82a6a1..176302f0e02c 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -4551,6 +4551,13 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba) phba->cfg_sg_seg_cnt = LPFC_DEFAULT_MENLO_SG_SEG_CNT; } + if (!phba->sli.ring) + phba->sli.ring = (struct lpfc_sli_ring *) + kzalloc(LPFC_SLI3_MAX_RING * + sizeof(struct lpfc_sli_ring), GFP_KERNEL); + if (!phba->sli.ring) + return -ENOMEM; + /* * Since the sg_tablesize is module parameter, the sg_dma_buf_size * used to create the sg_dma_buf_pool must be dynamically calculated. @@ -4709,6 +4716,16 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) if (phba->cfg_enable_bg) sges_per_segment = 2; + /* + * For SLI4, instead of using ring 0 (LPFC_FCP_RING) for FCP commands + * we will associate a new ring, for each FCP fastpath EQ/CQ/WQ tuple. + */ + if (!phba->sli.ring) + phba->sli.ring = kzalloc( + (LPFC_SLI3_MAX_RING + phba->cfg_fcp_eq_count) * + sizeof(struct lpfc_sli_ring), GFP_KERNEL); + if (!phba->sli.ring) + return -ENOMEM; /* * Since the sg_tablesize is module parameter, the sg_dma_buf_size * used to create the sg_dma_buf_pool must be dynamically calculated. @@ -5555,6 +5572,10 @@ lpfc_hba_free(struct lpfc_hba *phba) /* Release the driver assigned board number */ idr_remove(&lpfc_hba_index, phba->brd_no); + /* Free memory allocated with sli rings */ + kfree(phba->sli.ring); + phba->sli.ring = NULL; + kfree(phba); return; } @@ -6924,6 +6945,8 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba) int lpfc_sli4_queue_setup(struct lpfc_hba *phba) { + struct lpfc_sli *psli = &phba->sli; + struct lpfc_sli_ring *pring; int rc = -ENOMEM; int fcp_eqidx, fcp_cqidx, fcp_wqidx; int fcp_cq_index = 0; @@ -7107,6 +7130,12 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba) "rc = 0x%x\n", rc); goto out_destroy_mbx_wq; } + + /* Bind this WQ to the ELS ring */ + pring = &psli->ring[LPFC_ELS_RING]; + pring->sli.sli4.wqp = (void *)phba->sli4_hba.els_wq; + phba->sli4_hba.els_cq->pring = pring; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "2590 ELS WQ setup: wq-id=%d, parent cq-id=%d\n", phba->sli4_hba.els_wq->queue_id, @@ -7137,6 +7166,12 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba) "WQ (%d), rc = 0x%x\n", fcp_wqidx, rc); goto out_destroy_fcp_wq; } + + /* Bind this WQ to the next FCP ring */ + pring = &psli->ring[MAX_SLI3_CONFIGURED_RINGS + fcp_wqidx]; + pring->sli.sli4.wqp = (void *)phba->sli4_hba.fcp_wq[fcp_wqidx]; + phba->sli4_hba.fcp_cq[fcp_cq_index]->pring = pring; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "2591 FCP WQ setup: wq[%d]-id=%d, " "parent cq[%d]-id=%d\n", diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index d7afd0fb4579..982bd40353ec 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -7796,14 +7796,14 @@ lpfc_sli4_bpl2sgl(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq, * * Return: index into SLI4 fast-path FCP queue index. **/ -static uint32_t +static inline uint32_t lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba) { - ++phba->fcp_qidx; - if (phba->fcp_qidx >= phba->cfg_fcp_wq_count) - phba->fcp_qidx = 0; + int i; - return phba->fcp_qidx; + i = atomic_add_return(1, &phba->fcp_qidx); + i = (i % phba->cfg_fcp_wq_count); + return i; } /** @@ -8323,16 +8323,6 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number, if ((piocb->iocb_flag & LPFC_IO_FCP) || (piocb->iocb_flag & LPFC_USE_FCPWQIDX)) { - /* - * For FCP command IOCB, get a new WQ index to distribute - * WQE across the WQsr. On the other hand, for abort IOCB, - * it carries the same WQ index to the original command - * IOCB. - */ - if (piocb->iocb_flag & LPFC_IO_FCP) - piocb->fcp_wqidx = lpfc_sli4_scmd_to_wqidx_distr(phba); - if (unlikely(!phba->sli4_hba.fcp_wq)) - return IOCB_ERROR; if (lpfc_sli4_wq_put(phba->sli4_hba.fcp_wq[piocb->fcp_wqidx], &wqe)) return IOCB_ERROR; @@ -8413,11 +8403,18 @@ int lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number, struct lpfc_iocbq *piocb, uint32_t flag) { - struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING]; + struct lpfc_sli_ring *pring; unsigned long iflags; - int rc; + int rc, idx; if (phba->sli_rev == LPFC_SLI_REV4) { + if (piocb->iocb_flag & LPFC_IO_FCP) { + if (unlikely(!phba->sli4_hba.fcp_wq)) + return IOCB_ERROR; + idx = lpfc_sli4_scmd_to_wqidx_distr(phba); + piocb->fcp_wqidx = idx; + ring_number = MAX_SLI3_CONFIGURED_RINGS + idx; + } pring = &phba->sli.ring[ring_number]; spin_lock_irqsave(&pring->ring_lock, iflags); rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag); @@ -8712,7 +8709,9 @@ lpfc_sli_setup(struct lpfc_hba *phba) struct lpfc_sli *psli = &phba->sli; struct lpfc_sli_ring *pring; - psli->num_rings = MAX_CONFIGURED_RINGS; + psli->num_rings = MAX_SLI3_CONFIGURED_RINGS; + if (phba->sli_rev == LPFC_SLI_REV4) + psli->num_rings += phba->cfg_fcp_eq_count; psli->sli_flag = 0; psli->fcp_ring = LPFC_FCP_RING; psli->next_ring = LPFC_FCP_NEXT_RING; @@ -11191,6 +11190,7 @@ lpfc_sli4_sp_handle_mcqe(struct lpfc_hba *phba, struct lpfc_cqe *cqe) /** * lpfc_sli4_sp_handle_els_wcqe - Handle els work-queue completion event * @phba: Pointer to HBA context object. + * @cq: Pointer to associated CQ * @wcqe: Pointer to work-queue completion queue entry. * * This routine handles an ELS work-queue completion event. @@ -11198,12 +11198,12 @@ lpfc_sli4_sp_handle_mcqe(struct lpfc_hba *phba, struct lpfc_cqe *cqe) * Return: true if work posted to worker thread, otherwise false. **/ static bool -lpfc_sli4_sp_handle_els_wcqe(struct lpfc_hba *phba, +lpfc_sli4_sp_handle_els_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq, struct lpfc_wcqe_complete *wcqe) { struct lpfc_iocbq *irspiocbq; unsigned long iflags; - struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING]; + struct lpfc_sli_ring *pring = cq->pring; /* Get an irspiocbq for later ELS response processing use */ irspiocbq = lpfc_sli_get_iocbq(phba); @@ -11408,7 +11408,7 @@ lpfc_sli4_sp_handle_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq, case CQE_CODE_COMPL_WQE: /* Process the WQ/RQ complete event */ phba->last_completion_time = jiffies; - workposted = lpfc_sli4_sp_handle_els_wcqe(phba, + workposted = lpfc_sli4_sp_handle_els_wcqe(phba, cq, (struct lpfc_wcqe_complete *)&cqevt); break; case CQE_CODE_RELEASE_WQE: @@ -11540,16 +11540,18 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe) /** * lpfc_sli4_fp_handle_fcp_wcqe - Process fast-path work queue completion entry - * @eqe: Pointer to fast-path completion queue entry. + * @phba: Pointer to HBA context object. + * @cq: Pointer to associated CQ + * @wcqe: Pointer to work-queue completion queue entry. * * This routine process a fast-path work queue completion entry from fast-path * event queue for FCP command response completion. **/ static void -lpfc_sli4_fp_handle_fcp_wcqe(struct lpfc_hba *phba, +lpfc_sli4_fp_handle_fcp_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq, struct lpfc_wcqe_complete *wcqe) { - struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_FCP_RING]; + struct lpfc_sli_ring *pring = cq->pring; struct lpfc_iocbq *cmdiocbq; struct lpfc_iocbq irspiocbq; unsigned long iflags; @@ -11667,7 +11669,7 @@ lpfc_sli4_fp_handle_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq, cq->CQ_wq++; /* Process the WQ complete event */ phba->last_completion_time = jiffies; - lpfc_sli4_fp_handle_fcp_wcqe(phba, + lpfc_sli4_fp_handle_fcp_wcqe(phba, cq, (struct lpfc_wcqe_complete *)&wcqe); break; case CQE_CODE_RELEASE_WQE: diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h index 2d64a2bab36a..4b9b44e8e064 100644 --- a/drivers/scsi/lpfc/lpfc_sli.h +++ b/drivers/scsi/lpfc/lpfc_sli.h @@ -131,7 +131,9 @@ typedef struct lpfcMboxq { #define LPFC_MAX_RING_MASK 5 /* max num of rctl/type masks allowed per ring */ -#define LPFC_MAX_RING 4 /* max num of SLI rings used by driver */ +#define LPFC_SLI3_MAX_RING 4 /* Max num of SLI3 rings used by driver. + For SLI4, an additional ring for each + FCP WQ will be allocated. */ struct lpfc_sli_ring; @@ -172,7 +174,7 @@ struct lpfc_sli3_ring { }; struct lpfc_sli4_ring { - void *wqp; /* Pointer to associated WQ */ + struct lpfc_queue *wqp; /* Pointer to associated WQ */ }; @@ -284,7 +286,7 @@ struct lpfc_sli { #define LPFC_MENLO_MAINT 0x1000 /* need for menl fw download */ #define LPFC_SLI_ASYNC_MBX_BLK 0x2000 /* Async mailbox is blocked */ - struct lpfc_sli_ring ring[LPFC_MAX_RING]; + struct lpfc_sli_ring *ring; int fcp_ring; /* ring used for FCP initiator commands */ int next_ring; diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index f4b57654787b..e7d84134beb1 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -142,6 +142,8 @@ struct lpfc_queue { uint32_t host_index; /* The host's index for putting or getting */ uint32_t hba_index; /* The last known hba index for get or put */ + struct lpfc_sli_ring *pring; /* ptr to io ring associated with q */ + /* For q stats */ uint32_t q_cnt_1; uint32_t q_cnt_2;