]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - drivers/scsi/hpsa.c
Merge tag 'pinctrl-v3.20-1' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw...
[karo-tx-linux.git] / drivers / scsi / hpsa.c
index 9edacff962a39ff95b4e6b15b96ac5992a16623e..95d581c45413fb38dd778e2b4695e7f7159c9c0b 100644 (file)
 #define DRIVER_NAME "HP HPSA Driver (v " HPSA_DRIVER_VERSION ")"
 #define HPSA "hpsa"
 
-/* How long to wait (in milliseconds) for board to go into simple mode */
-#define MAX_CONFIG_WAIT 30000
+/* How long to wait for CISS doorbell communication */
+#define CLEAR_EVENT_WAIT_INTERVAL 20   /* ms for each msleep() call */
+#define MODE_CHANGE_WAIT_INTERVAL 10   /* ms for each msleep() call */
+#define MAX_CLEAR_EVENT_WAIT 30000     /* times 20 ms = 600 s */
+#define MAX_MODE_CHANGE_WAIT 2000      /* times 10 ms = 20 s */
 #define MAX_IOCTL_CONFIG_WAIT 1000
 
 /*define how many times we will try a command because of bus resets */
@@ -165,24 +168,24 @@ static struct board_type products[] = {
        {0x1926103C, "Smart Array P731m", &SA5_access},
        {0x1928103C, "Smart Array P230i", &SA5_access},
        {0x1929103C, "Smart Array P530", &SA5_access},
-       {0x21BD103C, "Smart Array", &SA5_access},
-       {0x21BE103C, "Smart Array", &SA5_access},
-       {0x21BF103C, "Smart Array", &SA5_access},
-       {0x21C0103C, "Smart Array", &SA5_access},
-       {0x21C1103C, "Smart Array", &SA5_access},
-       {0x21C2103C, "Smart Array", &SA5_access},
-       {0x21C3103C, "Smart Array", &SA5_access},
+       {0x21BD103C, "Smart Array P244br", &SA5_access},
+       {0x21BE103C, "Smart Array P741m", &SA5_access},
+       {0x21BF103C, "Smart HBA H240ar", &SA5_access},
+       {0x21C0103C, "Smart Array P440ar", &SA5_access},
+       {0x21C1103C, "Smart Array P840ar", &SA5_access},
+       {0x21C2103C, "Smart Array P440", &SA5_access},
+       {0x21C3103C, "Smart Array P441", &SA5_access},
        {0x21C4103C, "Smart Array", &SA5_access},
-       {0x21C5103C, "Smart Array", &SA5_access},
-       {0x21C6103C, "Smart Array", &SA5_access},
-       {0x21C7103C, "Smart Array", &SA5_access},
-       {0x21C8103C, "Smart Array", &SA5_access},
+       {0x21C5103C, "Smart Array P841", &SA5_access},
+       {0x21C6103C, "Smart HBA H244br", &SA5_access},
+       {0x21C7103C, "Smart HBA H240", &SA5_access},
+       {0x21C8103C, "Smart HBA H241", &SA5_access},
        {0x21C9103C, "Smart Array", &SA5_access},
-       {0x21CA103C, "Smart Array", &SA5_access},
-       {0x21CB103C, "Smart Array", &SA5_access},
+       {0x21CA103C, "Smart Array P246br", &SA5_access},
+       {0x21CB103C, "Smart Array P840", &SA5_access},
        {0x21CC103C, "Smart Array", &SA5_access},
        {0x21CD103C, "Smart Array", &SA5_access},
-       {0x21CE103C, "Smart Array", &SA5_access},
+       {0x21CE103C, "Smart HBA", &SA5_access},
        {0x00761590, "HP Storage P1224 Array Controller", &SA5_access},
        {0x00871590, "HP Storage P1224e Array Controller", &SA5_access},
        {0x007D1590, "HP Storage P1228 Array Controller", &SA5_access},
@@ -196,8 +199,6 @@ static int number_of_controllers;
 static irqreturn_t do_hpsa_intr_intx(int irq, void *dev_id);
 static irqreturn_t do_hpsa_intr_msi(int irq, void *dev_id);
 static int hpsa_ioctl(struct scsi_device *dev, int cmd, void __user *arg);
-static void lock_and_start_io(struct ctlr_info *h);
-static void start_io(struct ctlr_info *h, unsigned long *flags);
 
 #ifdef CONFIG_COMPAT
 static int hpsa_compat_ioctl(struct scsi_device *dev, int cmd,
@@ -205,18 +206,18 @@ static int hpsa_compat_ioctl(struct scsi_device *dev, int cmd,
 #endif
 
 static void cmd_free(struct ctlr_info *h, struct CommandList *c);
-static void cmd_special_free(struct ctlr_info *h, struct CommandList *c);
 static struct CommandList *cmd_alloc(struct ctlr_info *h);
-static struct CommandList *cmd_special_alloc(struct ctlr_info *h);
 static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
        void *buff, size_t size, u16 page_code, unsigned char *scsi3addr,
        int cmd_type);
+static void hpsa_free_cmd_pool(struct ctlr_info *h);
 #define VPD_PAGE (1 << 8)
 
 static int hpsa_scsi_queue_command(struct Scsi_Host *h, struct scsi_cmnd *cmd);
 static void hpsa_scan_start(struct Scsi_Host *);
 static int hpsa_scan_finished(struct Scsi_Host *sh,
        unsigned long elapsed_time);
+static int hpsa_change_queue_depth(struct scsi_device *sdev, int qdepth);
 
 static int hpsa_eh_device_reset_handler(struct scsi_cmnd *scsicmd);
 static int hpsa_eh_abort_handler(struct scsi_cmnd *scsicmd);
@@ -242,14 +243,15 @@ static int hpsa_lookup_board_id(struct pci_dev *pdev, u32 *board_id);
 static int hpsa_wait_for_board_state(struct pci_dev *pdev, void __iomem *vaddr,
                                     int wait_for_ready);
 static inline void finish_cmd(struct CommandList *c);
-static void hpsa_wait_for_mode_change_ack(struct ctlr_info *h);
+static int hpsa_wait_for_mode_change_ack(struct ctlr_info *h);
 #define BOARD_NOT_READY 0
 #define BOARD_READY 1
 static void hpsa_drain_accel_commands(struct ctlr_info *h);
 static void hpsa_flush_cache(struct ctlr_info *h);
 static int hpsa_scsi_ioaccel_queue_command(struct ctlr_info *h,
        struct CommandList *c, u32 ioaccel_handle, u8 *cdb, int cdb_len,
-       u8 *scsi3addr);
+       u8 *scsi3addr, struct hpsa_scsi_dev_t *phys_disk);
+static void hpsa_command_resubmit_worker(struct work_struct *work);
 
 static inline struct ctlr_info *sdev_to_hba(struct scsi_device *sdev)
 {
@@ -506,8 +508,8 @@ static inline int is_logical_dev_addr_mode(unsigned char scsi3addr[])
        return (scsi3addr[3] & 0xC0) == 0x40;
 }
 
-static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
-       "1(ADM)", "UNKNOWN"
+static const char * const raid_label[] = { "0", "4", "1(+0)", "5", "5+1", "6",
+       "1(+0)ADM", "UNKNOWN"
 };
 #define HPSA_RAID_0    0
 #define HPSA_RAID_4    1
@@ -672,7 +674,7 @@ static struct scsi_host_template hpsa_driver_template = {
        .queuecommand           = hpsa_scsi_queue_command,
        .scan_start             = hpsa_scan_start,
        .scan_finished          = hpsa_scan_finished,
-       .change_queue_depth     = scsi_change_queue_depth,
+       .change_queue_depth     = hpsa_change_queue_depth,
        .this_id                = -1,
        .use_clustering         = ENABLE_CLUSTERING,
        .eh_abort_handler       = hpsa_eh_abort_handler,
@@ -689,13 +691,6 @@ static struct scsi_host_template hpsa_driver_template = {
        .no_write_same = 1,
 };
 
-
-/* Enqueuing and dequeuing functions for cmdlists. */
-static inline void addQ(struct list_head *list, struct CommandList *c)
-{
-       list_add_tail(&c->list, list);
-}
-
 static inline u32 next_command(struct ctlr_info *h, u8 q)
 {
        u32 a;
@@ -829,31 +824,21 @@ static void dial_up_lockup_detection_on_fw_flash_complete(struct ctlr_info *h,
 static void enqueue_cmd_and_start_io(struct ctlr_info *h,
        struct CommandList *c)
 {
-       unsigned long flags;
-
+       dial_down_lockup_detection_during_fw_flash(h, c);
+       atomic_inc(&h->commands_outstanding);
        switch (c->cmd_type) {
        case CMD_IOACCEL1:
                set_ioaccel1_performant_mode(h, c);
+               writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
                break;
        case CMD_IOACCEL2:
                set_ioaccel2_performant_mode(h, c);
+               writel(c->busaddr, h->vaddr + IOACCEL2_INBOUND_POSTQ_32);
                break;
        default:
                set_performant_mode(h, c);
+               h->access.submit_command(h, c);
        }
-       dial_down_lockup_detection_during_fw_flash(h, c);
-       spin_lock_irqsave(&h->lock, flags);
-       addQ(&h->reqQ, c);
-       h->Qdepth++;
-       start_io(h, &flags);
-       spin_unlock_irqrestore(&h->lock, flags);
-}
-
-static inline void removeQ(struct CommandList *c)
-{
-       if (WARN_ON(list_empty(&c->list)))
-               return;
-       list_del_init(&c->list);
 }
 
 static inline int is_hba_lunid(unsigned char scsi3addr[])
@@ -985,12 +970,24 @@ static void hpsa_scsi_update_entry(struct ctlr_info *h, int hostno,
        /* Raid level changed. */
        h->dev[entry]->raid_level = new_entry->raid_level;
 
-       /* Raid offload parameters changed. */
+       /* Raid offload parameters changed.  Careful about the ordering. */
+       if (new_entry->offload_config && new_entry->offload_enabled) {
+               /*
+                * if drive is newly offload_enabled, we want to copy the
+                * raid map data first.  If previously offload_enabled and
+                * offload_config were set, raid map data had better be
+                * the same as it was before.  if raid map data is changed
+                * then it had better be the case that
+                * h->dev[entry]->offload_enabled is currently 0.
+                */
+               h->dev[entry]->raid_map = new_entry->raid_map;
+               h->dev[entry]->ioaccel_handle = new_entry->ioaccel_handle;
+               wmb(); /* ensure raid map updated prior to ->offload_enabled */
+       }
        h->dev[entry]->offload_config = new_entry->offload_config;
-       h->dev[entry]->offload_enabled = new_entry->offload_enabled;
-       h->dev[entry]->ioaccel_handle = new_entry->ioaccel_handle;
        h->dev[entry]->offload_to_mirror = new_entry->offload_to_mirror;
-       h->dev[entry]->raid_map = new_entry->raid_map;
+       h->dev[entry]->offload_enabled = new_entry->offload_enabled;
+       h->dev[entry]->queue_depth = new_entry->queue_depth;
 
        dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d updated.\n",
                scsi_device_type(new_entry->devtype), hostno, new_entry->bus,
@@ -1116,6 +1113,8 @@ static inline int device_updated(struct hpsa_scsi_dev_t *dev1,
                return 1;
        if (dev1->offload_enabled != dev2->offload_enabled)
                return 1;
+       if (dev1->queue_depth != dev2->queue_depth)
+               return 1;
        return 0;
 }
 
@@ -1261,6 +1260,85 @@ static void hpsa_show_volume_status(struct ctlr_info *h,
        }
 }
 
+/*
+ * Figure the list of physical drive pointers for a logical drive with
+ * raid offload configured.
+ */
+static void hpsa_figure_phys_disk_ptrs(struct ctlr_info *h,
+                               struct hpsa_scsi_dev_t *dev[], int ndevices,
+                               struct hpsa_scsi_dev_t *logical_drive)
+{
+       struct raid_map_data *map = &logical_drive->raid_map;
+       struct raid_map_disk_data *dd = &map->data[0];
+       int i, j;
+       int total_disks_per_row = le16_to_cpu(map->data_disks_per_row) +
+                               le16_to_cpu(map->metadata_disks_per_row);
+       int nraid_map_entries = le16_to_cpu(map->row_cnt) *
+                               le16_to_cpu(map->layout_map_count) *
+                               total_disks_per_row;
+       int nphys_disk = le16_to_cpu(map->layout_map_count) *
+                               total_disks_per_row;
+       int qdepth;
+
+       if (nraid_map_entries > RAID_MAP_MAX_ENTRIES)
+               nraid_map_entries = RAID_MAP_MAX_ENTRIES;
+
+       qdepth = 0;
+       for (i = 0; i < nraid_map_entries; i++) {
+               logical_drive->phys_disk[i] = NULL;
+               if (!logical_drive->offload_config)
+                       continue;
+               for (j = 0; j < ndevices; j++) {
+                       if (dev[j]->devtype != TYPE_DISK)
+                               continue;
+                       if (is_logical_dev_addr_mode(dev[j]->scsi3addr))
+                               continue;
+                       if (dev[j]->ioaccel_handle != dd[i].ioaccel_handle)
+                               continue;
+
+                       logical_drive->phys_disk[i] = dev[j];
+                       if (i < nphys_disk)
+                               qdepth = min(h->nr_cmds, qdepth +
+                                   logical_drive->phys_disk[i]->queue_depth);
+                       break;
+               }
+
+               /*
+                * This can happen if a physical drive is removed and
+                * the logical drive is degraded.  In that case, the RAID
+                * map data will refer to a physical disk which isn't actually
+                * present.  And in that case offload_enabled should already
+                * be 0, but we'll turn it off here just in case
+                */
+               if (!logical_drive->phys_disk[i]) {
+                       logical_drive->offload_enabled = 0;
+                       logical_drive->queue_depth = h->nr_cmds;
+               }
+       }
+       if (nraid_map_entries)
+               /*
+                * This is correct for reads, too high for full stripe writes,
+                * way too high for partial stripe writes
+                */
+               logical_drive->queue_depth = qdepth;
+       else
+               logical_drive->queue_depth = h->nr_cmds;
+}
+
+static void hpsa_update_log_drive_phys_drive_ptrs(struct ctlr_info *h,
+                               struct hpsa_scsi_dev_t *dev[], int ndevices)
+{
+       int i;
+
+       for (i = 0; i < ndevices; i++) {
+               if (dev[i]->devtype != TYPE_DISK)
+                       continue;
+               if (!is_logical_dev_addr_mode(dev[i]->scsi3addr))
+                       continue;
+               hpsa_figure_phys_disk_ptrs(h, dev, ndevices, dev[i]);
+       }
+}
+
 static void adjust_hpsa_scsi_table(struct ctlr_info *h, int hostno,
        struct hpsa_scsi_dev_t *sd[], int nsds)
 {
@@ -1445,8 +1523,12 @@ static int hpsa_slave_alloc(struct scsi_device *sdev)
        spin_lock_irqsave(&h->devlock, flags);
        sd = lookup_hpsa_scsi_dev(h, sdev_channel(sdev),
                sdev_id(sdev), sdev->lun);
-       if (sd != NULL)
+       if (sd != NULL) {
                sdev->hostdata = sd;
+               if (sd->queue_depth)
+                       scsi_change_queue_depth(sdev, sd->queue_depth);
+               atomic_set(&sd->ioaccel_cmds_out, 0);
+       }
        spin_unlock_irqrestore(&h->devlock, flags);
        return 0;
 }
@@ -1479,13 +1561,17 @@ static int hpsa_allocate_sg_chain_blocks(struct ctlr_info *h)
 
        h->cmd_sg_list = kzalloc(sizeof(*h->cmd_sg_list) * h->nr_cmds,
                                GFP_KERNEL);
-       if (!h->cmd_sg_list)
+       if (!h->cmd_sg_list) {
+               dev_err(&h->pdev->dev, "Failed to allocate SG list\n");
                return -ENOMEM;
+       }
        for (i = 0; i < h->nr_cmds; i++) {
                h->cmd_sg_list[i] = kmalloc(sizeof(*h->cmd_sg_list[i]) *
                                                h->chainsize, GFP_KERNEL);
-               if (!h->cmd_sg_list[i])
+               if (!h->cmd_sg_list[i]) {
+                       dev_err(&h->pdev->dev, "Failed to allocate cmd SG\n");
                        goto clean;
+               }
        }
        return 0;
 
@@ -1636,7 +1722,6 @@ static void process_ioaccel2_completion(struct ctlr_info *h,
                struct hpsa_scsi_dev_t *dev)
 {
        struct io_accel2_cmd *c2 = &h->ioaccel2_cmd_pool[c->cmdindex];
-       int raid_retry = 0;
 
        /* check for good status */
        if (likely(c2->error_data.serv_response == 0 &&
@@ -1653,26 +1738,22 @@ static void process_ioaccel2_completion(struct ctlr_info *h,
        if (is_logical_dev_addr_mode(dev->scsi3addr) &&
                c2->error_data.serv_response ==
                        IOACCEL2_SERV_RESPONSE_FAILURE) {
-               dev->offload_enabled = 0;
-               h->drv_req_rescan = 1;  /* schedule controller for a rescan */
-               cmd->result = DID_SOFT_ERROR << 16;
-               cmd_free(h, c);
-               cmd->scsi_done(cmd);
-               return;
-       }
-       raid_retry = handle_ioaccel_mode2_error(h, c, cmd, c2);
-       /* If error found, disable Smart Path, schedule a rescan,
-        * and force a retry on the standard path.
-        */
-       if (raid_retry) {
-               dev_warn(&h->pdev->dev, "%s: Retrying on standard path.\n",
-                       "HP SSD Smart Path");
-               dev->offload_enabled = 0; /* Disable Smart Path */
-               h->drv_req_rescan = 1;    /* schedule controller rescan */
-               cmd->result = DID_SOFT_ERROR << 16;
+               if (c2->error_data.status ==
+                       IOACCEL2_STATUS_SR_IOACCEL_DISABLED)
+                       dev->offload_enabled = 0;
+               goto retry_cmd;
        }
+
+       if (handle_ioaccel_mode2_error(h, c, cmd, c2))
+               goto retry_cmd;
+
        cmd_free(h, c);
        cmd->scsi_done(cmd);
+       return;
+
+retry_cmd:
+       INIT_WORK(&c->work, hpsa_command_resubmit_worker);
+       queue_work_on(raw_smp_processor_id(), h->resubmit_wq, &c->work);
 }
 
 static void complete_scsi_command(struct CommandList *cp)
@@ -1688,7 +1769,7 @@ static void complete_scsi_command(struct CommandList *cp)
        unsigned long sense_data_size;
 
        ei = cp->err_info;
-       cmd = (struct scsi_cmnd *) cp->scsi_cmd;
+       cmd = cp->scsi_cmd;
        h = cp->h;
        dev = cmd->device->hostdata;
 
@@ -1700,6 +1781,9 @@ static void complete_scsi_command(struct CommandList *cp)
        cmd->result = (DID_OK << 16);           /* host byte */
        cmd->result |= (COMMAND_COMPLETE << 8); /* msg byte */
 
+       if (cp->cmd_type == CMD_IOACCEL2 || cp->cmd_type == CMD_IOACCEL1)
+               atomic_dec(&cp->phys_disk->ioaccel_cmds_out);
+
        if (cp->cmd_type == CMD_IOACCEL2)
                return process_ioaccel2_completion(h, cp, cmd, dev);
 
@@ -1707,6 +1791,8 @@ static void complete_scsi_command(struct CommandList *cp)
 
        scsi_set_resid(cmd, ei->ResidualCnt);
        if (ei->CommandStatus == 0) {
+               if (cp->cmd_type == CMD_IOACCEL1)
+                       atomic_dec(&cp->phys_disk->ioaccel_cmds_out);
                cmd_free(h, cp);
                cmd->scsi_done(cmd);
                return;
@@ -1742,9 +1828,9 @@ static void complete_scsi_command(struct CommandList *cp)
                if (is_logical_dev_addr_mode(dev->scsi3addr)) {
                        if (ei->CommandStatus == CMD_IOACCEL_DISABLED)
                                dev->offload_enabled = 0;
-                       cmd->result = DID_SOFT_ERROR << 16;
-                       cmd_free(h, cp);
-                       cmd->scsi_done(cmd);
+                       INIT_WORK(&cp->work, hpsa_command_resubmit_worker);
+                       queue_work_on(raw_smp_processor_id(),
+                                       h->resubmit_wq, &cp->work);
                        return;
                }
        }
@@ -1801,9 +1887,8 @@ static void complete_scsi_command(struct CommandList *cp)
        case CMD_DATA_UNDERRUN: /* let mid layer handle it. */
                break;
        case CMD_DATA_OVERRUN:
-               dev_warn(&h->pdev->dev, "cp %p has"
-                       " completed with data overrun "
-                       "reported\n", cp);
+               dev_warn(&h->pdev->dev,
+                       "CDB %16phN data overrun\n", cp->Request.CDB);
                break;
        case CMD_INVALID: {
                /* print_bytes(cp, sizeof(*cp), 1, 0);
@@ -1819,34 +1904,38 @@ static void complete_scsi_command(struct CommandList *cp)
                break;
        case CMD_PROTOCOL_ERR:
                cmd->result = DID_ERROR << 16;
-               dev_warn(&h->pdev->dev, "cp %p has "
-                       "protocol error\n", cp);
+               dev_warn(&h->pdev->dev, "CDB %16phN : protocol error\n",
+                               cp->Request.CDB);
                break;
        case CMD_HARDWARE_ERR:
                cmd->result = DID_ERROR << 16;
-               dev_warn(&h->pdev->dev, "cp %p had  hardware error\n", cp);
+               dev_warn(&h->pdev->dev, "CDB %16phN : hardware error\n",
+                       cp->Request.CDB);
                break;
        case CMD_CONNECTION_LOST:
                cmd->result = DID_ERROR << 16;
-               dev_warn(&h->pdev->dev, "cp %p had connection lost\n", cp);
+               dev_warn(&h->pdev->dev, "CDB %16phN : connection lost\n",
+                       cp->Request.CDB);
                break;
        case CMD_ABORTED:
                cmd->result = DID_ABORT << 16;
-               dev_warn(&h->pdev->dev, "cp %p was aborted with status 0x%x\n",
-                               cp, ei->ScsiStatus);
+               dev_warn(&h->pdev->dev, "CDB %16phN was aborted with status 0x%x\n",
+                               cp->Request.CDB, ei->ScsiStatus);
                break;
        case CMD_ABORT_FAILED:
                cmd->result = DID_ERROR << 16;
-               dev_warn(&h->pdev->dev, "cp %p reports abort failed\n", cp);
+               dev_warn(&h->pdev->dev, "CDB %16phN : abort failed\n",
+                       cp->Request.CDB);
                break;
        case CMD_UNSOLICITED_ABORT:
                cmd->result = DID_SOFT_ERROR << 16; /* retry the command */
-               dev_warn(&h->pdev->dev, "cp %p aborted due to an unsolicited "
-                       "abort\n", cp);
+               dev_warn(&h->pdev->dev, "CDB %16phN : unsolicited abort\n",
+                       cp->Request.CDB);
                break;
        case CMD_TIMEOUT:
                cmd->result = DID_TIME_OUT << 16;
-               dev_warn(&h->pdev->dev, "cp %p timedout\n", cp);
+               dev_warn(&h->pdev->dev, "CDB %16phN timed out\n",
+                       cp->Request.CDB);
                break;
        case CMD_UNABORTABLE:
                cmd->result = DID_ERROR << 16;
@@ -2051,10 +2140,10 @@ static int hpsa_scsi_do_inquiry(struct ctlr_info *h, unsigned char *scsi3addr,
        struct CommandList *c;
        struct ErrorInfo *ei;
 
-       c = cmd_special_alloc(h);
+       c = cmd_alloc(h);
 
-       if (c == NULL) {                        /* trouble... */
-               dev_warn(&h->pdev->dev, "cmd_special_alloc returned NULL!\n");
+       if (c == NULL) {
+               dev_warn(&h->pdev->dev, "cmd_alloc returned NULL!\n");
                return -ENOMEM;
        }
 
@@ -2070,7 +2159,7 @@ static int hpsa_scsi_do_inquiry(struct ctlr_info *h, unsigned char *scsi3addr,
                rc = -1;
        }
 out:
-       cmd_special_free(h, c);
+       cmd_free(h, c);
        return rc;
 }
 
@@ -2082,10 +2171,9 @@ static int hpsa_bmic_ctrl_mode_sense(struct ctlr_info *h,
        struct CommandList *c;
        struct ErrorInfo *ei;
 
-       c = cmd_special_alloc(h);
-
+       c = cmd_alloc(h);
        if (c == NULL) {                        /* trouble... */
-               dev_warn(&h->pdev->dev, "cmd_special_alloc returned NULL!\n");
+               dev_warn(&h->pdev->dev, "cmd_alloc returned NULL!\n");
                return -ENOMEM;
        }
 
@@ -2101,7 +2189,7 @@ static int hpsa_bmic_ctrl_mode_sense(struct ctlr_info *h,
                rc = -1;
        }
 out:
-       cmd_special_free(h, c);
+       cmd_free(h, c);
        return rc;
        }
 
@@ -2112,10 +2200,10 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr,
        struct CommandList *c;
        struct ErrorInfo *ei;
 
-       c = cmd_special_alloc(h);
+       c = cmd_alloc(h);
 
        if (c == NULL) {                        /* trouble... */
-               dev_warn(&h->pdev->dev, "cmd_special_alloc returned NULL!\n");
+               dev_warn(&h->pdev->dev, "cmd_alloc returned NULL!\n");
                return -ENOMEM;
        }
 
@@ -2131,7 +2219,7 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr,
                hpsa_scsi_interpret_error(h, c);
                rc = -1;
        }
-       cmd_special_free(h, c);
+       cmd_free(h, c);
        return rc;
 }
 
@@ -2239,26 +2327,26 @@ static int hpsa_get_raid_map(struct ctlr_info *h,
        struct CommandList *c;
        struct ErrorInfo *ei;
 
-       c = cmd_special_alloc(h);
+       c = cmd_alloc(h);
        if (c == NULL) {
-               dev_warn(&h->pdev->dev, "cmd_special_alloc returned NULL!\n");
+               dev_warn(&h->pdev->dev, "cmd_alloc returned NULL!\n");
                return -ENOMEM;
        }
        if (fill_cmd(c, HPSA_GET_RAID_MAP, h, &this_device->raid_map,
                        sizeof(this_device->raid_map), 0,
                        scsi3addr, TYPE_CMD)) {
                dev_warn(&h->pdev->dev, "Out of memory in hpsa_get_raid_map()\n");
-               cmd_special_free(h, c);
+               cmd_free(h, c);
                return -ENOMEM;
        }
        hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE);
        ei = c->err_info;
        if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) {
                hpsa_scsi_interpret_error(h, c);
-               cmd_special_free(h, c);
+               cmd_free(h, c);
                return -1;
        }
-       cmd_special_free(h, c);
+       cmd_free(h, c);
 
        /* @todo in the future, dynamically allocate RAID map memory */
        if (le32_to_cpu(this_device->raid_map.structure_size) >
@@ -2270,6 +2358,34 @@ static int hpsa_get_raid_map(struct ctlr_info *h,
        return rc;
 }
 
+static int hpsa_bmic_id_physical_device(struct ctlr_info *h,
+               unsigned char scsi3addr[], u16 bmic_device_index,
+               struct bmic_identify_physical_device *buf, size_t bufsize)
+{
+       int rc = IO_OK;
+       struct CommandList *c;
+       struct ErrorInfo *ei;
+
+       c = cmd_alloc(h);
+       rc = fill_cmd(c, BMIC_IDENTIFY_PHYSICAL_DEVICE, h, buf, bufsize,
+               0, RAID_CTLR_LUNID, TYPE_CMD);
+       if (rc)
+               goto out;
+
+       c->Request.CDB[2] = bmic_device_index & 0xff;
+       c->Request.CDB[9] = (bmic_device_index >> 8) & 0xff;
+
+       hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE);
+       ei = c->err_info;
+       if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) {
+               hpsa_scsi_interpret_error(h, c);
+               rc = -1;
+       }
+out:
+       cmd_free(h, c);
+       return rc;
+}
+
 static int hpsa_vpd_page_supported(struct ctlr_info *h,
        unsigned char scsi3addr[], u8 page)
 {
@@ -2370,7 +2486,7 @@ static int hpsa_get_device_id(struct ctlr_info *h, unsigned char *scsi3addr,
 }
 
 static int hpsa_scsi_do_report_luns(struct ctlr_info *h, int logical,
-               struct ReportLUNdata *buf, int bufsize,
+               void *buf, int bufsize,
                int extended_response)
 {
        int rc = IO_OK;
@@ -2378,9 +2494,9 @@ static int hpsa_scsi_do_report_luns(struct ctlr_info *h, int logical,
        unsigned char scsi3addr[8];
        struct ErrorInfo *ei;
 
-       c = cmd_special_alloc(h);
+       c = cmd_alloc(h);
        if (c == NULL) {                        /* trouble... */
-               dev_err(&h->pdev->dev, "cmd_special_alloc returned NULL!\n");
+               dev_err(&h->pdev->dev, "cmd_alloc returned NULL!\n");
                return -1;
        }
        /* address the controller */
@@ -2399,24 +2515,26 @@ static int hpsa_scsi_do_report_luns(struct ctlr_info *h, int logical,
                hpsa_scsi_interpret_error(h, c);
                rc = -1;
        } else {
-               if (buf->extended_response_flag != extended_response) {
+               struct ReportLUNdata *rld = buf;
+
+               if (rld->extended_response_flag != extended_response) {
                        dev_err(&h->pdev->dev,
                                "report luns requested format %u, got %u\n",
                                extended_response,
-                               buf->extended_response_flag);
+                               rld->extended_response_flag);
                        rc = -1;
                }
        }
 out:
-       cmd_special_free(h, c);
+       cmd_free(h, c);
        return rc;
 }
 
 static inline int hpsa_scsi_do_report_phys_luns(struct ctlr_info *h,
-               struct ReportLUNdata *buf,
-               int bufsize, int extended_response)
+               struct ReportExtendedLUNdata *buf, int bufsize)
 {
-       return hpsa_scsi_do_report_luns(h, 0, buf, bufsize, extended_response);
+       return hpsa_scsi_do_report_luns(h, 0, buf, bufsize,
+                                               HPSA_REPORT_PHYS_EXTENDED);
 }
 
 static inline int hpsa_scsi_do_report_log_luns(struct ctlr_info *h,
@@ -2591,6 +2709,7 @@ static int hpsa_update_device_info(struct ctlr_info *h,
                this_device->offload_config = 0;
                this_device->offload_enabled = 0;
                this_device->volume_offline = 0;
+               this_device->queue_depth = h->nr_cmds;
        }
 
        if (is_OBDR_device) {
@@ -2733,7 +2852,6 @@ static int hpsa_get_pdisk_of_ioaccel2(struct ctlr_info *h,
 {
        struct ReportExtendedLUNdata *physicals = NULL;
        int responsesize = 24;  /* size of physical extended response */
-       int extended = 2;       /* flag forces reporting 'other dev info'. */
        int reportsize = sizeof(*physicals) + HPSA_MAX_PHYS_LUN * responsesize;
        u32 nphysicals = 0;     /* number of reported physical devs */
        int found = 0;          /* found match (1) or not (0) */
@@ -2780,8 +2898,7 @@ static int hpsa_get_pdisk_of_ioaccel2(struct ctlr_info *h,
        physicals = kzalloc(reportsize, GFP_KERNEL);
        if (physicals == NULL)
                return 0;
-       if (hpsa_scsi_do_report_phys_luns(h, (struct ReportLUNdata *) physicals,
-               reportsize, extended)) {
+       if (hpsa_scsi_do_report_phys_luns(h, physicals, reportsize)) {
                dev_err(&h->pdev->dev,
                        "Can't lookup %s device handle: report physical LUNs failed.\n",
                        "HP SSD Smart Path");
@@ -2822,34 +2939,20 @@ static int hpsa_get_pdisk_of_ioaccel2(struct ctlr_info *h,
  * Returns 0 on success, -1 otherwise.
  */
 static int hpsa_gather_lun_info(struct ctlr_info *h,
-       int reportphyslunsize, int reportloglunsize,
-       struct ReportLUNdata *physdev, u32 *nphysicals, int *physical_mode,
+       struct ReportExtendedLUNdata *physdev, u32 *nphysicals,
        struct ReportLUNdata *logdev, u32 *nlogicals)
 {
-       int physical_entry_size = 8;
-
-       *physical_mode = 0;
-
-       /* For I/O accelerator mode we need to read physical device handles */
-       if (h->transMethod & CFGTBL_Trans_io_accel1 ||
-               h->transMethod & CFGTBL_Trans_io_accel2) {
-               *physical_mode = HPSA_REPORT_PHYS_EXTENDED;
-               physical_entry_size = 24;
-       }
-       if (hpsa_scsi_do_report_phys_luns(h, physdev, reportphyslunsize,
-                                                       *physical_mode)) {
+       if (hpsa_scsi_do_report_phys_luns(h, physdev, sizeof(*physdev))) {
                dev_err(&h->pdev->dev, "report physical LUNs failed.\n");
                return -1;
        }
-       *nphysicals = be32_to_cpu(*((__be32 *)physdev->LUNListLength)) /
-                                                       physical_entry_size;
+       *nphysicals = be32_to_cpu(*((__be32 *)physdev->LUNListLength)) / 24;
        if (*nphysicals > HPSA_MAX_PHYS_LUN) {
-               dev_warn(&h->pdev->dev, "maximum physical LUNs (%d) exceeded."
-                       "  %d LUNs ignored.\n", HPSA_MAX_PHYS_LUN,
-                       *nphysicals - HPSA_MAX_PHYS_LUN);
+               dev_warn(&h->pdev->dev, "maximum physical LUNs (%d) exceeded. %d LUNs ignored.\n",
+                       HPSA_MAX_PHYS_LUN, *nphysicals - HPSA_MAX_PHYS_LUN);
                *nphysicals = HPSA_MAX_PHYS_LUN;
        }
-       if (hpsa_scsi_do_report_log_luns(h, logdev, reportloglunsize)) {
+       if (hpsa_scsi_do_report_log_luns(h, logdev, sizeof(*logdev))) {
                dev_err(&h->pdev->dev, "report logical LUNs failed.\n");
                return -1;
        }
@@ -2922,6 +3025,33 @@ static int hpsa_hba_mode_enabled(struct ctlr_info *h)
        return hba_mode_enabled;
 }
 
+/* get physical drive ioaccel handle and queue depth */
+static void hpsa_get_ioaccel_drive_info(struct ctlr_info *h,
+               struct hpsa_scsi_dev_t *dev,
+               u8 *lunaddrbytes,
+               struct bmic_identify_physical_device *id_phys)
+{
+       int rc;
+       struct ext_report_lun_entry *rle =
+               (struct ext_report_lun_entry *) lunaddrbytes;
+
+       dev->ioaccel_handle = rle->ioaccel_handle;
+       memset(id_phys, 0, sizeof(*id_phys));
+       rc = hpsa_bmic_id_physical_device(h, lunaddrbytes,
+                       GET_BMIC_DRIVE_NUMBER(lunaddrbytes), id_phys,
+                       sizeof(*id_phys));
+       if (!rc)
+               /* Reserve space for FW operations */
+#define DRIVE_CMDS_RESERVED_FOR_FW 2
+#define DRIVE_QUEUE_DEPTH 7
+               dev->queue_depth =
+                       le16_to_cpu(id_phys->current_queue_depth_limit) -
+                               DRIVE_CMDS_RESERVED_FOR_FW;
+       else
+               dev->queue_depth = DRIVE_QUEUE_DEPTH; /* conservative */
+       atomic_set(&dev->ioaccel_cmds_out, 0);
+}
+
 static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
 {
        /* the idea here is we could get notified
@@ -2936,9 +3066,9 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
         */
        struct ReportExtendedLUNdata *physdev_list = NULL;
        struct ReportLUNdata *logdev_list = NULL;
+       struct bmic_identify_physical_device *id_phys = NULL;
        u32 nphysicals = 0;
        u32 nlogicals = 0;
-       int physical_mode = 0;
        u32 ndev_allocated = 0;
        struct hpsa_scsi_dev_t **currentsd, *this_device, *tmpdevice;
        int ncurrent = 0;
@@ -2951,8 +3081,10 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
        physdev_list = kzalloc(sizeof(*physdev_list), GFP_KERNEL);
        logdev_list = kzalloc(sizeof(*logdev_list), GFP_KERNEL);
        tmpdevice = kzalloc(sizeof(*tmpdevice), GFP_KERNEL);
+       id_phys = kzalloc(sizeof(*id_phys), GFP_KERNEL);
 
-       if (!currentsd || !physdev_list || !logdev_list || !tmpdevice) {
+       if (!currentsd || !physdev_list || !logdev_list ||
+               !tmpdevice || !id_phys) {
                dev_err(&h->pdev->dev, "out of memory\n");
                goto out;
        }
@@ -2969,10 +3101,8 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
 
        h->hba_mode_enabled = rescan_hba_mode;
 
-       if (hpsa_gather_lun_info(h,
-                       sizeof(*physdev_list), sizeof(*logdev_list),
-                       (struct ReportLUNdata *) physdev_list, &nphysicals,
-                       &physical_mode, logdev_list, &nlogicals))
+       if (hpsa_gather_lun_info(h, physdev_list, &nphysicals,
+                       logdev_list, &nlogicals))
                goto out;
 
        /* We might see up to the maximum number of logical and physical disks
@@ -3069,10 +3199,11 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
                                ncurrent++;
                                break;
                        }
-                       if (physical_mode == HPSA_REPORT_PHYS_EXTENDED) {
-                               memcpy(&this_device->ioaccel_handle,
-                                       &lunaddrbytes[20],
-                                       sizeof(this_device->ioaccel_handle));
+                       if (h->transMethod & CFGTBL_Trans_io_accel1 ||
+                               h->transMethod & CFGTBL_Trans_io_accel2) {
+                               hpsa_get_ioaccel_drive_info(h, this_device,
+                                                       lunaddrbytes, id_phys);
+                               atomic_set(&this_device->ioaccel_cmds_out, 0);
                                ncurrent++;
                        }
                        break;
@@ -3096,6 +3227,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
                if (ncurrent >= HPSA_MAX_DEVICES)
                        break;
        }
+       hpsa_update_log_drive_phys_drive_ptrs(h, currentsd, ncurrent);
        adjust_hpsa_scsi_table(h, hostno, currentsd, ncurrent);
 out:
        kfree(tmpdevice);
@@ -3104,9 +3236,22 @@ out:
        kfree(currentsd);
        kfree(physdev_list);
        kfree(logdev_list);
+       kfree(id_phys);
 }
 
-/* hpsa_scatter_gather takes a struct scsi_cmnd, (cmd), and does the pci
+static void hpsa_set_sg_descriptor(struct SGDescriptor *desc,
+                                  struct scatterlist *sg)
+{
+       u64 addr64 = (u64) sg_dma_address(sg);
+       unsigned int len = sg_dma_len(sg);
+
+       desc->Addr = cpu_to_le64(addr64);
+       desc->Len = cpu_to_le32(len);
+       desc->Ext = 0;
+}
+
+/*
+ * hpsa_scatter_gather takes a struct scsi_cmnd, (cmd), and does the pci
  * dma mapping  and fills in the scatter gather entries of the
  * hpsa command, cp.
  */
@@ -3114,9 +3259,7 @@ static int hpsa_scatter_gather(struct ctlr_info *h,
                struct CommandList *cp,
                struct scsi_cmnd *cmd)
 {
-       unsigned int len;
        struct scatterlist *sg;
-       u64 addr64;
        int use_sg, i, sg_index, chained;
        struct SGDescriptor *curr_sg;
 
@@ -3139,13 +3282,11 @@ static int hpsa_scatter_gather(struct ctlr_info *h,
                        curr_sg = h->cmd_sg_list[cp->cmdindex];
                        sg_index = 0;
                }
-               addr64 = (u64) sg_dma_address(sg);
-               len  = sg_dma_len(sg);
-               curr_sg->Addr = cpu_to_le64(addr64);
-               curr_sg->Len = cpu_to_le32(len);
-               curr_sg->Ext = cpu_to_le32(0);
+               hpsa_set_sg_descriptor(curr_sg, sg);
                curr_sg++;
        }
+
+       /* Back the pointer up to the last entry and mark it as "last". */
        (--curr_sg)->Ext = cpu_to_le32(HPSA_SG_LAST);
 
        if (use_sg + chained > h->maxSG)
@@ -3164,7 +3305,7 @@ static int hpsa_scatter_gather(struct ctlr_info *h,
 sglist_finished:
 
        cp->Header.SGList = (u8) use_sg;   /* no. SGs contig in this cmd */
-       cp->Header.SGTotal = cpu_to_le16(use_sg); /* total sgs in this cmd list */
+       cp->Header.SGTotal = cpu_to_le16(use_sg); /* total sgs in cmd list */
        return 0;
 }
 
@@ -3218,7 +3359,7 @@ static int fixup_ioaccel_cdb(u8 *cdb, int *cdb_len)
 
 static int hpsa_scsi_ioaccel1_queue_command(struct ctlr_info *h,
        struct CommandList *c, u32 ioaccel_handle, u8 *cdb, int cdb_len,
-       u8 *scsi3addr)
+       u8 *scsi3addr, struct hpsa_scsi_dev_t *phys_disk)
 {
        struct scsi_cmnd *cmd = c->scsi_cmd;
        struct io_accel1_cmd *cp = &h->ioaccel_cmd_pool[c->cmdindex];
@@ -3231,13 +3372,17 @@ static int hpsa_scsi_ioaccel1_queue_command(struct ctlr_info *h,
        u32 control = IOACCEL1_CONTROL_SIMPLEQUEUE;
 
        /* TODO: implement chaining support */
-       if (scsi_sg_count(cmd) > h->ioaccel_maxsg)
+       if (scsi_sg_count(cmd) > h->ioaccel_maxsg) {
+               atomic_dec(&phys_disk->ioaccel_cmds_out);
                return IO_ACCEL_INELIGIBLE;
+       }
 
        BUG_ON(cmd->cmd_len > IOACCEL1_IOFLAGS_CDBLEN_MAX);
 
-       if (fixup_ioaccel_cdb(cdb, &cdb_len))
+       if (fixup_ioaccel_cdb(cdb, &cdb_len)) {
+               atomic_dec(&phys_disk->ioaccel_cmds_out);
                return IO_ACCEL_INELIGIBLE;
+       }
 
        c->cmd_type = CMD_IOACCEL1;
 
@@ -3247,8 +3392,10 @@ static int hpsa_scsi_ioaccel1_queue_command(struct ctlr_info *h,
        BUG_ON(c->busaddr & 0x0000007F);
 
        use_sg = scsi_dma_map(cmd);
-       if (use_sg < 0)
+       if (use_sg < 0) {
+               atomic_dec(&phys_disk->ioaccel_cmds_out);
                return use_sg;
+       }
 
        if (use_sg) {
                curr_sg = cp->SG;
@@ -3307,8 +3454,10 @@ static int hpsa_scsi_ioaccel_direct_map(struct ctlr_info *h,
        struct scsi_cmnd *cmd = c->scsi_cmd;
        struct hpsa_scsi_dev_t *dev = cmd->device->hostdata;
 
+       c->phys_disk = dev;
+
        return hpsa_scsi_ioaccel_queue_command(h, c, dev->ioaccel_handle,
-               cmd->cmnd, cmd->cmd_len, dev->scsi3addr);
+               cmd->cmnd, cmd->cmd_len, dev->scsi3addr, dev);
 }
 
 /*
@@ -3322,8 +3471,6 @@ static void set_encrypt_ioaccel2(struct ctlr_info *h,
        struct raid_map_data *map = &dev->raid_map;
        u64 first_block;
 
-       BUG_ON(!(dev->offload_config && dev->offload_enabled));
-
        /* Are we doing encryption on this device */
        if (!(le16_to_cpu(map->flags) & RAID_MAP_FLAG_ENCRYPT_ON))
                return;
@@ -3372,7 +3519,7 @@ static void set_encrypt_ioaccel2(struct ctlr_info *h,
 
 static int hpsa_scsi_ioaccel2_queue_command(struct ctlr_info *h,
        struct CommandList *c, u32 ioaccel_handle, u8 *cdb, int cdb_len,
-       u8 *scsi3addr)
+       u8 *scsi3addr, struct hpsa_scsi_dev_t *phys_disk)
 {
        struct scsi_cmnd *cmd = c->scsi_cmd;
        struct io_accel2_cmd *cp = &h->ioaccel2_cmd_pool[c->cmdindex];
@@ -3383,11 +3530,16 @@ static int hpsa_scsi_ioaccel2_queue_command(struct ctlr_info *h,
        u32 len;
        u32 total_len = 0;
 
-       if (scsi_sg_count(cmd) > h->ioaccel_maxsg)
+       if (scsi_sg_count(cmd) > h->ioaccel_maxsg) {
+               atomic_dec(&phys_disk->ioaccel_cmds_out);
                return IO_ACCEL_INELIGIBLE;
+       }
 
-       if (fixup_ioaccel_cdb(cdb, &cdb_len))
+       if (fixup_ioaccel_cdb(cdb, &cdb_len)) {
+               atomic_dec(&phys_disk->ioaccel_cmds_out);
                return IO_ACCEL_INELIGIBLE;
+       }
+
        c->cmd_type = CMD_IOACCEL2;
        /* Adjust the DMA address to point to the accelerated command buffer */
        c->busaddr = (u32) h->ioaccel2_cmd_pool_dhandle +
@@ -3398,8 +3550,10 @@ static int hpsa_scsi_ioaccel2_queue_command(struct ctlr_info *h,
        cp->IU_type = IOACCEL2_IU_TYPE;
 
        use_sg = scsi_dma_map(cmd);
-       if (use_sg < 0)
+       if (use_sg < 0) {
+               atomic_dec(&phys_disk->ioaccel_cmds_out);
                return use_sg;
+       }
 
        if (use_sg) {
                BUG_ON(use_sg > IOACCEL2_MAXSGENTRIES);
@@ -3445,8 +3599,7 @@ static int hpsa_scsi_ioaccel2_queue_command(struct ctlr_info *h,
        set_encrypt_ioaccel2(h, c, cp);
 
        cp->scsi_nexus = cpu_to_le32(ioaccel_handle);
-       cp->Tag = cpu_to_le32(c->cmdindex << DIRECT_LOOKUP_SHIFT |
-                               DIRECT_LOOKUP_BIT);
+       cp->Tag = cpu_to_le32(c->cmdindex << DIRECT_LOOKUP_SHIFT);
        memcpy(cp->cdb, cdb, sizeof(cp->cdb));
 
        /* fill in sg elements */
@@ -3466,14 +3619,22 @@ static int hpsa_scsi_ioaccel2_queue_command(struct ctlr_info *h,
  */
 static int hpsa_scsi_ioaccel_queue_command(struct ctlr_info *h,
        struct CommandList *c, u32 ioaccel_handle, u8 *cdb, int cdb_len,
-       u8 *scsi3addr)
+       u8 *scsi3addr, struct hpsa_scsi_dev_t *phys_disk)
 {
+       /* Try to honor the device's queue depth */
+       if (atomic_inc_return(&phys_disk->ioaccel_cmds_out) >
+                                       phys_disk->queue_depth) {
+               atomic_dec(&phys_disk->ioaccel_cmds_out);
+               return IO_ACCEL_INELIGIBLE;
+       }
        if (h->transMethod & CFGTBL_Trans_io_accel1)
                return hpsa_scsi_ioaccel1_queue_command(h, c, ioaccel_handle,
-                                               cdb, cdb_len, scsi3addr);
+                                               cdb, cdb_len, scsi3addr,
+                                               phys_disk);
        else
                return hpsa_scsi_ioaccel2_queue_command(h, c, ioaccel_handle,
-                                               cdb, cdb_len, scsi3addr);
+                                               cdb, cdb_len, scsi3addr,
+                                               phys_disk);
 }
 
 static void raid_map_helper(struct raid_map_data *map,
@@ -3540,8 +3701,6 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h,
 #endif
        int offload_to_mirror;
 
-       BUG_ON(!(dev->offload_config && dev->offload_enabled));
-
        /* check for valid opcode, get LBA and block count */
        switch (cmd->cmnd[0]) {
        case WRITE_6:
@@ -3777,6 +3936,11 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h,
                return IO_ACCEL_INELIGIBLE;
        }
 
+       if (unlikely(map_index >= RAID_MAP_MAX_ENTRIES))
+               return IO_ACCEL_INELIGIBLE;
+
+       c->phys_disk = dev->phys_disk[map_index];
+
        disk_handle = dd[map_index].ioaccel_handle;
        disk_block = le64_to_cpu(map->disk_starting_blk) +
                        first_row * le16_to_cpu(map->strip_size) +
@@ -3824,78 +3988,21 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h,
                cdb_len = 10;
        }
        return hpsa_scsi_ioaccel_queue_command(h, c, disk_handle, cdb, cdb_len,
-                                               dev->scsi3addr);
+                                               dev->scsi3addr,
+                                               dev->phys_disk[map_index]);
 }
 
-/*
- * Running in struct Scsi_Host->host_lock less mode using LLD internal
- * struct ctlr_info *h->lock w/ spin_lock_irqsave() protection.
- */
-static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
+/* Submit commands down the "normal" RAID stack path */
+static int hpsa_ciss_submit(struct ctlr_info *h,
+       struct CommandList *c, struct scsi_cmnd *cmd,
+       unsigned char scsi3addr[])
 {
-       struct ctlr_info *h;
-       struct hpsa_scsi_dev_t *dev;
-       unsigned char scsi3addr[8];
-       struct CommandList *c;
-       int rc = 0;
-
-       /* Get the ptr to our adapter structure out of cmd->host. */
-       h = sdev_to_hba(cmd->device);
-       dev = cmd->device->hostdata;
-       if (!dev) {
-               cmd->result = DID_NO_CONNECT << 16;
-               cmd->scsi_done(cmd);
-               return 0;
-       }
-       memcpy(scsi3addr, dev->scsi3addr, sizeof(scsi3addr));
-
-       if (unlikely(lockup_detected(h))) {
-               cmd->result = DID_ERROR << 16;
-               cmd->scsi_done(cmd);
-               return 0;
-       }
-       c = cmd_alloc(h);
-       if (c == NULL) {                        /* trouble... */
-               dev_err(&h->pdev->dev, "cmd_alloc returned NULL!\n");
-               return SCSI_MLQUEUE_HOST_BUSY;
-       }
-
-       /* Fill in the command list header */
-       /* save c in case we have to abort it  */
        cmd->host_scribble = (unsigned char *) c;
-
        c->cmd_type = CMD_SCSI;
        c->scsi_cmd = cmd;
-
-       /* Call alternate submit routine for I/O accelerated commands.
-        * Retries always go down the normal I/O path.
-        */
-       if (likely(cmd->retries == 0 &&
-               cmd->request->cmd_type == REQ_TYPE_FS &&
-               h->acciopath_status)) {
-               if (dev->offload_enabled) {
-                       rc = hpsa_scsi_ioaccel_raid_map(h, c);
-                       if (rc == 0)
-                               return 0; /* Sent on ioaccel path */
-                       if (rc < 0) {   /* scsi_dma_map failed. */
-                               cmd_free(h, c);
-                               return SCSI_MLQUEUE_HOST_BUSY;
-                       }
-               } else if (dev->ioaccel_handle) {
-                       rc = hpsa_scsi_ioaccel_direct_map(h, c);
-                       if (rc == 0)
-                               return 0; /* Sent on direct map path */
-                       if (rc < 0) {   /* scsi_dma_map failed. */
-                               cmd_free(h, c);
-                               return SCSI_MLQUEUE_HOST_BUSY;
-                       }
-               }
-       }
-
        c->Header.ReplyQueue = 0;  /* unused in simple mode */
        memcpy(&c->Header.LUN.LunAddrBytes[0], &scsi3addr[0], 8);
-       c->Header.tag = cpu_to_le64((c->cmdindex << DIRECT_LOOKUP_SHIFT) |
-                                       DIRECT_LOOKUP_BIT);
+       c->Header.tag = cpu_to_le64((c->cmdindex << DIRECT_LOOKUP_SHIFT));
 
        /* Fill in the request block... */
 
@@ -3951,74 +4058,175 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
        return 0;
 }
 
-static int do_not_scan_if_controller_locked_up(struct ctlr_info *h)
-{
-       unsigned long flags;
-
-       /*
-        * Don't let rescans be initiated on a controller known
-        * to be locked up.  If the controller locks up *during*
-        * a rescan, that thread is probably hosed, but at least
-        * we can prevent new rescan threads from piling up on a
-        * locked up controller.
-        */
-       if (unlikely(lockup_detected(h))) {
-               spin_lock_irqsave(&h->scan_lock, flags);
-               h->scan_finished = 1;
-               wake_up_all(&h->scan_wait_queue);
-               spin_unlock_irqrestore(&h->scan_lock, flags);
-               return 1;
-       }
-       return 0;
-}
-
-static void hpsa_scan_start(struct Scsi_Host *sh)
+static void hpsa_command_resubmit_worker(struct work_struct *work)
 {
-       struct ctlr_info *h = shost_to_hba(sh);
-       unsigned long flags;
+       struct scsi_cmnd *cmd;
+       struct hpsa_scsi_dev_t *dev;
+       struct CommandList *c =
+                       container_of(work, struct CommandList, work);
 
-       if (do_not_scan_if_controller_locked_up(h))
+       cmd = c->scsi_cmd;
+       dev = cmd->device->hostdata;
+       if (!dev) {
+               cmd->result = DID_NO_CONNECT << 16;
+               cmd->scsi_done(cmd);
                return;
-
-       /* wait until any scan already in progress is finished. */
-       while (1) {
-               spin_lock_irqsave(&h->scan_lock, flags);
-               if (h->scan_finished)
-                       break;
-               spin_unlock_irqrestore(&h->scan_lock, flags);
-               wait_event(h->scan_wait_queue, h->scan_finished);
-               /* Note: We don't need to worry about a race between this
-                * thread and driver unload because the midlayer will
-                * have incremented the reference count, so unload won't
-                * happen if we're in here.
+       }
+       if (hpsa_ciss_submit(c->h, c, cmd, dev->scsi3addr)) {
+               /*
+                * If we get here, it means dma mapping failed. Try
+                * again via scsi mid layer, which will then get
+                * SCSI_MLQUEUE_HOST_BUSY.
                 */
+               cmd->result = DID_IMM_RETRY << 16;
+               cmd->scsi_done(cmd);
        }
-       h->scan_finished = 0; /* mark scan as in progress */
-       spin_unlock_irqrestore(&h->scan_lock, flags);
-
-       if (do_not_scan_if_controller_locked_up(h))
-               return;
-
-       hpsa_update_scsi_devices(h, h->scsi_host->host_no);
-
-       spin_lock_irqsave(&h->scan_lock, flags);
-       h->scan_finished = 1; /* mark scan as finished. */
-       wake_up_all(&h->scan_wait_queue);
-       spin_unlock_irqrestore(&h->scan_lock, flags);
 }
 
-static int hpsa_scan_finished(struct Scsi_Host *sh,
-       unsigned long elapsed_time)
+/* Running in struct Scsi_Host->host_lock less mode */
+static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
 {
-       struct ctlr_info *h = shost_to_hba(sh);
-       unsigned long flags;
-       int finished;
+       struct ctlr_info *h;
+       struct hpsa_scsi_dev_t *dev;
+       unsigned char scsi3addr[8];
+       struct CommandList *c;
+       int rc = 0;
 
-       spin_lock_irqsave(&h->scan_lock, flags);
-       finished = h->scan_finished;
-       spin_unlock_irqrestore(&h->scan_lock, flags);
-       return finished;
-}
+       /* Get the ptr to our adapter structure out of cmd->host. */
+       h = sdev_to_hba(cmd->device);
+       dev = cmd->device->hostdata;
+       if (!dev) {
+               cmd->result = DID_NO_CONNECT << 16;
+               cmd->scsi_done(cmd);
+               return 0;
+       }
+       memcpy(scsi3addr, dev->scsi3addr, sizeof(scsi3addr));
+
+       if (unlikely(lockup_detected(h))) {
+               cmd->result = DID_ERROR << 16;
+               cmd->scsi_done(cmd);
+               return 0;
+       }
+       c = cmd_alloc(h);
+       if (c == NULL) {                        /* trouble... */
+               dev_err(&h->pdev->dev, "cmd_alloc returned NULL!\n");
+               return SCSI_MLQUEUE_HOST_BUSY;
+       }
+       if (unlikely(lockup_detected(h))) {
+               cmd->result = DID_ERROR << 16;
+               cmd_free(h, c);
+               cmd->scsi_done(cmd);
+               return 0;
+       }
+
+       /*
+        * Call alternate submit routine for I/O accelerated commands.
+        * Retries always go down the normal I/O path.
+        */
+       if (likely(cmd->retries == 0 &&
+               cmd->request->cmd_type == REQ_TYPE_FS &&
+               h->acciopath_status)) {
+
+               cmd->host_scribble = (unsigned char *) c;
+               c->cmd_type = CMD_SCSI;
+               c->scsi_cmd = cmd;
+
+               if (dev->offload_enabled) {
+                       rc = hpsa_scsi_ioaccel_raid_map(h, c);
+                       if (rc == 0)
+                               return 0; /* Sent on ioaccel path */
+                       if (rc < 0) {   /* scsi_dma_map failed. */
+                               cmd_free(h, c);
+                               return SCSI_MLQUEUE_HOST_BUSY;
+                       }
+               } else if (dev->ioaccel_handle) {
+                       rc = hpsa_scsi_ioaccel_direct_map(h, c);
+                       if (rc == 0)
+                               return 0; /* Sent on direct map path */
+                       if (rc < 0) {   /* scsi_dma_map failed. */
+                               cmd_free(h, c);
+                               return SCSI_MLQUEUE_HOST_BUSY;
+                       }
+               }
+       }
+       return hpsa_ciss_submit(h, c, cmd, scsi3addr);
+}
+
+static void hpsa_scan_complete(struct ctlr_info *h)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&h->scan_lock, flags);
+       h->scan_finished = 1;
+       wake_up_all(&h->scan_wait_queue);
+       spin_unlock_irqrestore(&h->scan_lock, flags);
+}
+
+static void hpsa_scan_start(struct Scsi_Host *sh)
+{
+       struct ctlr_info *h = shost_to_hba(sh);
+       unsigned long flags;
+
+       /*
+        * Don't let rescans be initiated on a controller known to be locked
+        * up.  If the controller locks up *during* a rescan, that thread is
+        * probably hosed, but at least we can prevent new rescan threads from
+        * piling up on a locked up controller.
+        */
+       if (unlikely(lockup_detected(h)))
+               return hpsa_scan_complete(h);
+
+       /* wait until any scan already in progress is finished. */
+       while (1) {
+               spin_lock_irqsave(&h->scan_lock, flags);
+               if (h->scan_finished)
+                       break;
+               spin_unlock_irqrestore(&h->scan_lock, flags);
+               wait_event(h->scan_wait_queue, h->scan_finished);
+               /* Note: We don't need to worry about a race between this
+                * thread and driver unload because the midlayer will
+                * have incremented the reference count, so unload won't
+                * happen if we're in here.
+                */
+       }
+       h->scan_finished = 0; /* mark scan as in progress */
+       spin_unlock_irqrestore(&h->scan_lock, flags);
+
+       if (unlikely(lockup_detected(h)))
+               return hpsa_scan_complete(h);
+
+       hpsa_update_scsi_devices(h, h->scsi_host->host_no);
+
+       hpsa_scan_complete(h);
+}
+
+static int hpsa_change_queue_depth(struct scsi_device *sdev, int qdepth)
+{
+       struct hpsa_scsi_dev_t *logical_drive = sdev->hostdata;
+
+       if (!logical_drive)
+               return -ENODEV;
+
+       if (qdepth < 1)
+               qdepth = 1;
+       else if (qdepth > logical_drive->queue_depth)
+               qdepth = logical_drive->queue_depth;
+
+       return scsi_change_queue_depth(sdev, qdepth);
+}
+
+static int hpsa_scan_finished(struct Scsi_Host *sh,
+       unsigned long elapsed_time)
+{
+       struct ctlr_info *h = shost_to_hba(sh);
+       unsigned long flags;
+       int finished;
+
+       spin_lock_irqsave(&h->scan_lock, flags);
+       finished = h->scan_finished;
+       spin_unlock_irqrestore(&h->scan_lock, flags);
+       return finished;
+}
 
 static void hpsa_unregister_scsi(struct ctlr_info *h)
 {
@@ -4044,11 +4252,11 @@ static int hpsa_register_scsi(struct ctlr_info *h)
        sh->max_cmd_len = MAX_COMMAND_SIZE;
        sh->max_lun = HPSA_MAX_LUN;
        sh->max_id = HPSA_MAX_LUN;
-       sh->can_queue = h->nr_cmds;
-       if (h->hba_mode_enabled)
-               sh->cmd_per_lun = 7;
-       else
-               sh->cmd_per_lun = h->nr_cmds;
+       sh->can_queue = h->nr_cmds -
+                       HPSA_CMDS_RESERVED_FOR_ABORTS -
+                       HPSA_CMDS_RESERVED_FOR_DRIVER -
+                       HPSA_MAX_CONCURRENT_PASSTHRUS;
+       sh->cmd_per_lun = sh->can_queue;
        sh->sg_tablesize = h->maxsgentries;
        h->scsi_host = sh;
        sh->hostdata[0] = (unsigned long) h;
@@ -4079,7 +4287,7 @@ static int wait_for_device_to_become_ready(struct ctlr_info *h,
        int waittime = 1; /* seconds */
        struct CommandList *c;
 
-       c = cmd_special_alloc(h);
+       c = cmd_alloc(h);
        if (!c) {
                dev_warn(&h->pdev->dev, "out of memory in "
                        "wait_for_device_to_become_ready.\n");
@@ -4125,7 +4333,7 @@ static int wait_for_device_to_become_ready(struct ctlr_info *h,
        else
                dev_warn(&h->pdev->dev, "device is ready.\n");
 
-       cmd_special_free(h, c);
+       cmd_free(h, c);
        return rc;
 }
 
@@ -4142,6 +4350,10 @@ static int hpsa_eh_device_reset_handler(struct scsi_cmnd *scsicmd)
        h = sdev_to_hba(scsicmd->device);
        if (h == NULL) /* paranoia */
                return FAILED;
+
+       if (lockup_detected(h))
+               return FAILED;
+
        dev = scsicmd->device->hostdata;
        if (!dev) {
                dev_err(&h->pdev->dev, "hpsa_eh_device_reset_handler: "
@@ -4207,9 +4419,9 @@ static int hpsa_send_abort(struct ctlr_info *h, unsigned char *scsi3addr,
        struct ErrorInfo *ei;
        __le32 tagupper, taglower;
 
-       c = cmd_special_alloc(h);
+       c = cmd_alloc(h);
        if (c == NULL) {        /* trouble... */
-               dev_warn(&h->pdev->dev, "cmd_special_alloc returned NULL!\n");
+               dev_warn(&h->pdev->dev, "cmd_alloc returned NULL!\n");
                return -ENOMEM;
        }
 
@@ -4238,62 +4450,12 @@ static int hpsa_send_abort(struct ctlr_info *h, unsigned char *scsi3addr,
                rc = -1;
                break;
        }
-       cmd_special_free(h, c);
+       cmd_free(h, c);
        dev_dbg(&h->pdev->dev, "%s: Tag:0x%08x:%08x: Finished.\n",
                __func__, tagupper, taglower);
        return rc;
 }
 
-/*
- * hpsa_find_cmd_in_queue
- *
- * Used to determine whether a command (find) is still present
- * in queue_head.   Optionally excludes the last element of queue_head.
- *
- * This is used to avoid unnecessary aborts.  Commands in h->reqQ have
- * not yet been submitted, and so can be aborted by the driver without
- * sending an abort to the hardware.
- *
- * Returns pointer to command if found in queue, NULL otherwise.
- */
-static struct CommandList *hpsa_find_cmd_in_queue(struct ctlr_info *h,
-                       struct scsi_cmnd *find, struct list_head *queue_head)
-{
-       unsigned long flags;
-       struct CommandList *c = NULL;   /* ptr into cmpQ */
-
-       if (!find)
-               return NULL;
-       spin_lock_irqsave(&h->lock, flags);
-       list_for_each_entry(c, queue_head, list) {
-               if (c->scsi_cmd == NULL) /* e.g.: passthru ioctl */
-                       continue;
-               if (c->scsi_cmd == find) {
-                       spin_unlock_irqrestore(&h->lock, flags);
-                       return c;
-               }
-       }
-       spin_unlock_irqrestore(&h->lock, flags);
-       return NULL;
-}
-
-static struct CommandList *hpsa_find_cmd_in_queue_by_tag(struct ctlr_info *h,
-                                       u8 *tag, struct list_head *queue_head)
-{
-       unsigned long flags;
-       struct CommandList *c;
-
-       spin_lock_irqsave(&h->lock, flags);
-       list_for_each_entry(c, queue_head, list) {
-               if (memcmp(&c->Header.tag, tag, 8) != 0)
-                       continue;
-               spin_unlock_irqrestore(&h->lock, flags);
-               return c;
-       }
-       spin_unlock_irqrestore(&h->lock, flags);
-       return NULL;
-}
-
 /* ioaccel2 path firmware cannot handle abort task requests.
  * Change abort requests to physical target reset, and send to the
  * address of the physical disk used for the ioaccel 2 command.
@@ -4311,7 +4473,7 @@ static int hpsa_send_reset_as_abort_ioaccel2(struct ctlr_info *h,
        unsigned char *psa = &phys_scsi3addr[0];
 
        /* Get a pointer to the hpsa logical device. */
-       scmd = (struct scsi_cmnd *) abort->scsi_cmd;
+       scmd = abort->scsi_cmd;
        dev = (struct hpsa_scsi_dev_t *)(scmd->device->hostdata);
        if (dev == NULL) {
                dev_warn(&h->pdev->dev,
@@ -4380,10 +4542,6 @@ static int hpsa_send_reset_as_abort_ioaccel2(struct ctlr_info *h,
 static int hpsa_send_abort_both_ways(struct ctlr_info *h,
        unsigned char *scsi3addr, struct CommandList *abort)
 {
-       u8 swizzled_tag[8];
-       struct CommandList *c;
-       int rc = 0, rc2 = 0;
-
        /* ioccelerator mode 2 commands should be aborted via the
         * accelerated path, since RAID path is unaware of these commands,
         * but underlying firmware can't handle abort TMF.
@@ -4392,27 +4550,8 @@ static int hpsa_send_abort_both_ways(struct ctlr_info *h,
        if (abort->cmd_type == CMD_IOACCEL2)
                return hpsa_send_reset_as_abort_ioaccel2(h, scsi3addr, abort);
 
-       /* we do not expect to find the swizzled tag in our queue, but
-        * check anyway just to be sure the assumptions which make this
-        * the case haven't become wrong.
-        */
-       memcpy(swizzled_tag, &abort->Request.CDB[4], 8);
-       swizzle_abort_tag(swizzled_tag);
-       c = hpsa_find_cmd_in_queue_by_tag(h, swizzled_tag, &h->cmpQ);
-       if (c != NULL) {
-               dev_warn(&h->pdev->dev, "Unexpectedly found byte-swapped tag in completion queue.\n");
-               return hpsa_send_abort(h, scsi3addr, abort, 0);
-       }
-       rc = hpsa_send_abort(h, scsi3addr, abort, 0);
-
-       /* if the command is still in our queue, we can't conclude that it was
-        * aborted (it might have just completed normally) but in any case
-        * we don't need to try to abort it another way.
-        */
-       c = hpsa_find_cmd_in_queue(h, abort->scsi_cmd, &h->cmpQ);
-       if (c)
-               rc2 = hpsa_send_abort(h, scsi3addr, abort, 1);
-       return rc && rc2;
+       return hpsa_send_abort(h, scsi3addr, abort, 0) &&
+                       hpsa_send_abort(h, scsi3addr, abort, 1);
 }
 
 /* Send an abort for the specified command.
@@ -4426,11 +4565,11 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
        struct ctlr_info *h;
        struct hpsa_scsi_dev_t *dev;
        struct CommandList *abort; /* pointer to command to be aborted */
-       struct CommandList *found;
        struct scsi_cmnd *as;   /* ptr to scsi cmd inside aborted command. */
        char msg[256];          /* For debug messaging. */
        int ml = 0;
        __le32 tagupper, taglower;
+       int refcount;
 
        /* Find the controller of the command to be aborted */
        h = sdev_to_hba(sc->device);
@@ -4438,6 +4577,9 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
                        "ABORT REQUEST FAILED, Controller lookup failed.\n"))
                return FAILED;
 
+       if (lockup_detected(h))
+               return FAILED;
+
        /* Check that controller supports some kind of task abort */
        if (!(HPSATMF_PHYS_TASK_ABORT & h->TMFSupportFlags) &&
                !(HPSATMF_LOG_TASK_ABORT & h->TMFSupportFlags))
@@ -4459,41 +4601,23 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
        /* Get SCSI command to be aborted */
        abort = (struct CommandList *) sc->host_scribble;
        if (abort == NULL) {
-               dev_err(&h->pdev->dev, "%s FAILED, Command to abort is NULL.\n",
-                               msg);
-               return FAILED;
+               /* This can happen if the command already completed. */
+               return SUCCESS;
+       }
+       refcount = atomic_inc_return(&abort->refcount);
+       if (refcount == 1) { /* Command is done already. */
+               cmd_free(h, abort);
+               return SUCCESS;
        }
        hpsa_get_tag(h, abort, &taglower, &tagupper);
        ml += sprintf(msg+ml, "Tag:0x%08x:%08x ", tagupper, taglower);
-       as  = (struct scsi_cmnd *) abort->scsi_cmd;
+       as  = abort->scsi_cmd;
        if (as != NULL)
                ml += sprintf(msg+ml, "Command:0x%x SN:0x%lx ",
                        as->cmnd[0], as->serial_number);
        dev_dbg(&h->pdev->dev, "%s\n", msg);
        dev_warn(&h->pdev->dev, "Abort request on C%d:B%d:T%d:L%d\n",
                h->scsi_host->host_no, dev->bus, dev->target, dev->lun);
-
-       /* Search reqQ to See if command is queued but not submitted,
-        * if so, complete the command with aborted status and remove
-        * it from the reqQ.
-        */
-       found = hpsa_find_cmd_in_queue(h, sc, &h->reqQ);
-       if (found) {
-               found->err_info->CommandStatus = CMD_ABORTED;
-               finish_cmd(found);
-               dev_info(&h->pdev->dev, "%s Request SUCCEEDED (driver queue).\n",
-                               msg);
-               return SUCCESS;
-       }
-
-       /* not in reqQ, if also not in cmpQ, must have already completed */
-       found = hpsa_find_cmd_in_queue(h, sc, &h->cmpQ);
-       if (!found)  {
-               dev_dbg(&h->pdev->dev, "%s Request SUCCEEDED (not known to driver).\n",
-                               msg);
-               return SUCCESS;
-       }
-
        /*
         * Command is in flight, or possibly already completed
         * by the firmware (but not to the scsi mid layer) but we can't
@@ -4505,6 +4629,7 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
                dev_warn(&h->pdev->dev, "FAILED abort on device C%d:B%d:T%d:L%d\n",
                        h->scsi_host->host_no,
                        dev->bus, dev->target, dev->lun);
+               cmd_free(h, abort);
                return FAILED;
        }
        dev_info(&h->pdev->dev, "%s REQUEST SUCCEEDED.\n", msg);
@@ -4516,32 +4641,38 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
         */
 #define ABORT_COMPLETE_WAIT_SECS 30
        for (i = 0; i < ABORT_COMPLETE_WAIT_SECS * 10; i++) {
-               found = hpsa_find_cmd_in_queue(h, sc, &h->cmpQ);
-               if (!found)
+               refcount = atomic_read(&abort->refcount);
+               if (refcount < 2) {
+                       cmd_free(h, abort);
                        return SUCCESS;
-               msleep(100);
+               } else {
+                       msleep(100);
+               }
        }
        dev_warn(&h->pdev->dev, "%s FAILED. Aborted command has not completed after %d seconds.\n",
                msg, ABORT_COMPLETE_WAIT_SECS);
+       cmd_free(h, abort);
        return FAILED;
 }
 
-
 /*
  * For operations that cannot sleep, a command block is allocated at init,
  * and managed by cmd_alloc() and cmd_free() using a simple bitmap to track
  * which ones are free or in use.  Lock must be held when calling this.
  * cmd_free() is the complement.
  */
+
 static struct CommandList *cmd_alloc(struct ctlr_info *h)
 {
        struct CommandList *c;
        int i;
        union u64bit temp64;
        dma_addr_t cmd_dma_handle, err_dma_handle;
-       int loopcount;
+       int refcount;
+       unsigned long offset;
 
-       /* There is some *extremely* small but non-zero chance that that
+       /*
+        * There is some *extremely* small but non-zero chance that that
         * multiple threads could get in here, and one thread could
         * be scanning through the list of bits looking for a free
         * one, but the free ones are always behind him, and other
@@ -4552,24 +4683,30 @@ static struct CommandList *cmd_alloc(struct ctlr_info *h)
         * infrequently as to be indistinguishable from never.
         */
 
-       loopcount = 0;
-       do {
-               i = find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds);
-               if (i == h->nr_cmds)
-                       i = 0;
-               loopcount++;
-       } while (test_and_set_bit(i & (BITS_PER_LONG - 1),
-                 h->cmd_pool_bits + (i / BITS_PER_LONG)) != 0 &&
-               loopcount < 10);
-
-       /* Thread got starved?  We do not expect this to ever happen. */
-       if (loopcount >= 10)
-               return NULL;
-
-       c = h->cmd_pool + i;
-       memset(c, 0, sizeof(*c));
-       cmd_dma_handle = h->cmd_pool_dhandle
-           + i * sizeof(*c);
+       offset = h->last_allocation; /* benignly racy */
+       for (;;) {
+               i = find_next_zero_bit(h->cmd_pool_bits, h->nr_cmds, offset);
+               if (unlikely(i == h->nr_cmds)) {
+                       offset = 0;
+                       continue;
+               }
+               c = h->cmd_pool + i;
+               refcount = atomic_inc_return(&c->refcount);
+               if (unlikely(refcount > 1)) {
+                       cmd_free(h, c); /* already in use */
+                       offset = (i + 1) % h->nr_cmds;
+                       continue;
+               }
+               set_bit(i & (BITS_PER_LONG - 1),
+                       h->cmd_pool_bits + (i / BITS_PER_LONG));
+               break; /* it's ours now. */
+       }
+       h->last_allocation = i; /* benignly racy */
+
+       /* Zero out all of commandlist except the last field, refcount */
+       memset(c, 0, offsetof(struct CommandList, refcount));
+       c->Header.tag = cpu_to_le64((u64) (i << DIRECT_LOOKUP_SHIFT));
+       cmd_dma_handle = h->cmd_pool_dhandle + i * sizeof(*c);
        c->err_info = h->errinfo_pool + i;
        memset(c->err_info, 0, sizeof(*c->err_info));
        err_dma_handle = h->errinfo_pool_dhandle
@@ -4577,45 +4714,10 @@ static struct CommandList *cmd_alloc(struct ctlr_info *h)
 
        c->cmdindex = i;
 
-       INIT_LIST_HEAD(&c->list);
        c->busaddr = (u32) cmd_dma_handle;
        temp64.val = (u64) err_dma_handle;
-       c->ErrDesc.Addr = cpu_to_le64(err_dma_handle);
-       c->ErrDesc.Len = cpu_to_le32(sizeof(*c->err_info));
-
-       c->h = h;
-       return c;
-}
-
-/* For operations that can wait for kmalloc to possibly sleep,
- * this routine can be called. Lock need not be held to call
- * cmd_special_alloc. cmd_special_free() is the complement.
- */
-static struct CommandList *cmd_special_alloc(struct ctlr_info *h)
-{
-       struct CommandList *c;
-       dma_addr_t cmd_dma_handle, err_dma_handle;
-
-       c = pci_zalloc_consistent(h->pdev, sizeof(*c), &cmd_dma_handle);
-       if (c == NULL)
-               return NULL;
-
-       c->cmd_type = CMD_SCSI;
-       c->cmdindex = -1;
-
-       c->err_info = pci_zalloc_consistent(h->pdev, sizeof(*c->err_info),
-                                           &err_dma_handle);
-
-       if (c->err_info == NULL) {
-               pci_free_consistent(h->pdev,
-                       sizeof(*c), c, cmd_dma_handle);
-               return NULL;
-       }
-
-       INIT_LIST_HEAD(&c->list);
-       c->busaddr = (u32) cmd_dma_handle;
-       c->ErrDesc.Addr = cpu_to_le64(err_dma_handle);
-       c->ErrDesc.Len = cpu_to_le32(sizeof(*c->err_info));
+       c->ErrDesc.Addr = cpu_to_le64((u64) err_dma_handle);
+       c->ErrDesc.Len = cpu_to_le32((u32) sizeof(*c->err_info));
 
        c->h = h;
        return c;
@@ -4623,20 +4725,13 @@ static struct CommandList *cmd_special_alloc(struct ctlr_info *h)
 
 static void cmd_free(struct ctlr_info *h, struct CommandList *c)
 {
-       int i;
-
-       i = c - h->cmd_pool;
-       clear_bit(i & (BITS_PER_LONG - 1),
-                 h->cmd_pool_bits + (i / BITS_PER_LONG));
-}
+       if (atomic_dec_and_test(&c->refcount)) {
+               int i;
 
-static void cmd_special_free(struct ctlr_info *h, struct CommandList *c)
-{
-       pci_free_consistent(h->pdev, sizeof(*c->err_info),
-                           c->err_info,
-                           (dma_addr_t) le64_to_cpu(c->ErrDesc.Addr));
-       pci_free_consistent(h->pdev, sizeof(*c),
-                           c, (dma_addr_t) (c->busaddr & DIRECT_LOOKUP_MASK));
+               i = c - h->cmd_pool;
+               clear_bit(i & (BITS_PER_LONG - 1),
+                         h->cmd_pool_bits + (i / BITS_PER_LONG));
+       }
 }
 
 #ifdef CONFIG_COMPAT
@@ -4817,7 +4912,7 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp)
                        memset(buff, 0, iocommand.buf_size);
                }
        }
-       c = cmd_special_alloc(h);
+       c = cmd_alloc(h);
        if (c == NULL) {
                rc = -ENOMEM;
                goto out_kfree;
@@ -4834,8 +4929,6 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp)
                c->Header.SGTotal = cpu_to_le16(0);
        }
        memcpy(&c->Header.LUN, &iocommand.LUN_info, sizeof(c->Header.LUN));
-       /* use the kernel address the cmd block for tag */
-       c->Header.tag = cpu_to_le64(c->busaddr);
 
        /* Fill in Request block */
        memcpy(&c->Request, &iocommand.Request,
@@ -4876,7 +4969,7 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp)
                }
        }
 out:
-       cmd_special_free(h, c);
+       cmd_free(h, c);
 out_kfree:
        kfree(buff);
        return rc;
@@ -4954,7 +5047,7 @@ static int hpsa_big_passthru_ioctl(struct ctlr_info *h, void __user *argp)
                data_ptr += sz;
                sg_used++;
        }
-       c = cmd_special_alloc(h);
+       c = cmd_alloc(h);
        if (c == NULL) {
                status = -ENOMEM;
                goto cleanup1;
@@ -4964,7 +5057,6 @@ static int hpsa_big_passthru_ioctl(struct ctlr_info *h, void __user *argp)
        c->Header.SGList = (u8) sg_used;
        c->Header.SGTotal = cpu_to_le16(sg_used);
        memcpy(&c->Header.LUN, &ioc->LUN_info, sizeof(c->Header.LUN));
-       c->Header.tag = cpu_to_le64(c->busaddr);
        memcpy(&c->Request, &ioc->Request, sizeof(c->Request));
        if (ioc->buf_size > 0) {
                int i;
@@ -5011,7 +5103,7 @@ static int hpsa_big_passthru_ioctl(struct ctlr_info *h, void __user *argp)
        }
        status = 0;
 cleanup0:
-       cmd_special_free(h, c);
+       cmd_free(h, c);
 cleanup1:
        if (buff) {
                int i;
@@ -5033,35 +5125,6 @@ static void check_ioctl_unit_attention(struct ctlr_info *h,
                (void) check_for_unit_attention(h, c);
 }
 
-static int increment_passthru_count(struct ctlr_info *h)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&h->passthru_count_lock, flags);
-       if (h->passthru_count >= HPSA_MAX_CONCURRENT_PASSTHRUS) {
-               spin_unlock_irqrestore(&h->passthru_count_lock, flags);
-               return -1;
-       }
-       h->passthru_count++;
-       spin_unlock_irqrestore(&h->passthru_count_lock, flags);
-       return 0;
-}
-
-static void decrement_passthru_count(struct ctlr_info *h)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&h->passthru_count_lock, flags);
-       if (h->passthru_count <= 0) {
-               spin_unlock_irqrestore(&h->passthru_count_lock, flags);
-               /* not expecting to get here. */
-               dev_warn(&h->pdev->dev, "Bug detected, passthru_count seems to be incorrect.\n");
-               return;
-       }
-       h->passthru_count--;
-       spin_unlock_irqrestore(&h->passthru_count_lock, flags);
-}
-
 /*
  * ioctl
  */
@@ -5084,16 +5147,16 @@ static int hpsa_ioctl(struct scsi_device *dev, int cmd, void __user *arg)
        case CCISS_GETDRIVVER:
                return hpsa_getdrivver_ioctl(h, argp);
        case CCISS_PASSTHRU:
-               if (increment_passthru_count(h))
+               if (atomic_dec_if_positive(&h->passthru_cmds_avail) < 0)
                        return -EAGAIN;
                rc = hpsa_passthru_ioctl(h, argp);
-               decrement_passthru_count(h);
+               atomic_inc(&h->passthru_cmds_avail);
                return rc;
        case CCISS_BIG_PASSTHRU:
-               if (increment_passthru_count(h))
+               if (atomic_dec_if_positive(&h->passthru_cmds_avail) < 0)
                        return -EAGAIN;
                rc = hpsa_big_passthru_ioctl(h, argp);
-               decrement_passthru_count(h);
+               atomic_inc(&h->passthru_cmds_avail);
                return rc;
        default:
                return -ENOTTY;
@@ -5137,7 +5200,6 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
                c->Header.SGList = 0;
                c->Header.SGTotal = cpu_to_le16(0);
        }
-       c->Header.tag = cpu_to_le64(c->busaddr);
        memcpy(c->Header.LUN.LunAddrBytes, scsi3addr, 8);
 
        if (cmd_type == TYPE_CMD) {
@@ -5209,6 +5271,16 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
                        c->Request.CDB[7] = (size >> 16) & 0xFF;
                        c->Request.CDB[8] = (size >> 8) & 0xFF;
                        break;
+               case BMIC_IDENTIFY_PHYSICAL_DEVICE:
+                       c->Request.CDBLen = 10;
+                       c->Request.type_attr_dir =
+                               TYPE_ATTR_DIR(cmd_type, ATTR_SIMPLE, XFER_READ);
+                       c->Request.Timeout = 0;
+                       c->Request.CDB[0] = BMIC_READ;
+                       c->Request.CDB[6] = BMIC_IDENTIFY_PHYSICAL_DEVICE;
+                       c->Request.CDB[7] = (size >> 16) & 0xFF;
+                       c->Request.CDB[8] = (size >> 8) & 0XFF;
+                       break;
                default:
                        dev_warn(&h->pdev->dev, "unknown command 0x%c\n", cmd);
                        BUG();
@@ -5295,47 +5367,6 @@ static void __iomem *remap_pci_mem(ulong base, ulong size)
        return page_remapped ? (page_remapped + page_offs) : NULL;
 }
 
-/* Takes cmds off the submission queue and sends them to the hardware,
- * then puts them on the queue of cmds waiting for completion.
- * Assumes h->lock is held
- */
-static void start_io(struct ctlr_info *h, unsigned long *flags)
-{
-       struct CommandList *c;
-
-       while (!list_empty(&h->reqQ)) {
-               c = list_entry(h->reqQ.next, struct CommandList, list);
-               /* can't do anything if fifo is full */
-               if ((h->access.fifo_full(h))) {
-                       h->fifo_recently_full = 1;
-                       dev_warn(&h->pdev->dev, "fifo full\n");
-                       break;
-               }
-               h->fifo_recently_full = 0;
-
-               /* Get the first entry from the Request Q */
-               removeQ(c);
-               h->Qdepth--;
-
-               /* Put job onto the completed Q */
-               addQ(&h->cmpQ, c);
-               atomic_inc(&h->commands_outstanding);
-               spin_unlock_irqrestore(&h->lock, *flags);
-               /* Tell the controller execute command */
-               h->access.submit_command(h, c);
-               spin_lock_irqsave(&h->lock, *flags);
-       }
-}
-
-static void lock_and_start_io(struct ctlr_info *h)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&h->lock, flags);
-       start_io(h, &flags);
-       spin_unlock_irqrestore(&h->lock, flags);
-}
-
 static inline unsigned long get_next_completion(struct ctlr_info *h, u8 q)
 {
        return h->access.command_completed(h, q);
@@ -5364,53 +5395,12 @@ static inline int bad_tag(struct ctlr_info *h, u32 tag_index,
 
 static inline void finish_cmd(struct CommandList *c)
 {
-       unsigned long flags;
-       int io_may_be_stalled = 0;
-       struct ctlr_info *h = c->h;
-       int count;
-
-       spin_lock_irqsave(&h->lock, flags);
-       removeQ(c);
-
-       /*
-        * Check for possibly stalled i/o.
-        *
-        * If a fifo_full condition is encountered, requests will back up
-        * in h->reqQ.  This queue is only emptied out by start_io which is
-        * only called when a new i/o request comes in.  If no i/o's are
-        * forthcoming, the i/o's in h->reqQ can get stuck.  So we call
-        * start_io from here if we detect such a danger.
-        *
-        * Normally, we shouldn't hit this case, but pounding on the
-        * CCISS_PASSTHRU ioctl can provoke it.  Only call start_io if
-        * commands_outstanding is low.  We want to avoid calling
-        * start_io from in here as much as possible, and esp. don't
-        * want to get in a cycle where we call start_io every time
-        * through here.
-        */
-       count = atomic_read(&h->commands_outstanding);
-       spin_unlock_irqrestore(&h->lock, flags);
-       if (unlikely(h->fifo_recently_full) && count < 5)
-               io_may_be_stalled = 1;
-
        dial_up_lockup_detection_on_fw_flash_complete(c->h, c);
        if (likely(c->cmd_type == CMD_IOACCEL1 || c->cmd_type == CMD_SCSI
                        || c->cmd_type == CMD_IOACCEL2))
                complete_scsi_command(c);
        else if (c->cmd_type == CMD_IOCTL_PEND)
                complete(c->waiting);
-       if (unlikely(io_may_be_stalled))
-               lock_and_start_io(h);
-}
-
-static inline u32 hpsa_tag_contains_index(u32 tag)
-{
-       return tag & DIRECT_LOOKUP_BIT;
-}
-
-static inline u32 hpsa_tag_to_index(u32 tag)
-{
-       return tag >> DIRECT_LOOKUP_SHIFT;
 }
 
 
@@ -5430,34 +5420,13 @@ static inline void process_indexed_cmd(struct ctlr_info *h,
        u32 tag_index;
        struct CommandList *c;
 
-       tag_index = hpsa_tag_to_index(raw_tag);
+       tag_index = raw_tag >> DIRECT_LOOKUP_SHIFT;
        if (!bad_tag(h, tag_index, raw_tag)) {
                c = h->cmd_pool + tag_index;
                finish_cmd(c);
        }
 }
 
-/* process completion of a non-indexed command */
-static inline void process_nonindexed_cmd(struct ctlr_info *h,
-       u32 raw_tag)
-{
-       u32 tag;
-       struct CommandList *c = NULL;
-       unsigned long flags;
-
-       tag = hpsa_tag_discard_error_bits(h, raw_tag);
-       spin_lock_irqsave(&h->lock, flags);
-       list_for_each_entry(c, &h->cmpQ, list) {
-               if ((c->busaddr & 0xFFFFFFE0) == (tag & 0xFFFFFFE0)) {
-                       spin_unlock_irqrestore(&h->lock, flags);
-                       finish_cmd(c);
-                       return;
-               }
-       }
-       spin_unlock_irqrestore(&h->lock, flags);
-       bad_tag(h, h->nr_cmds + 1, raw_tag);
-}
-
 /* Some controllers, like p400, will give us one interrupt
  * after a soft reset, even if we turned interrupts off.
  * Only need to check for this in the hpsa_xxx_discard_completions
@@ -5535,10 +5504,7 @@ static irqreturn_t do_hpsa_intr_intx(int irq, void *queue)
        while (interrupt_pending(h)) {
                raw_tag = get_next_completion(h, q);
                while (raw_tag != FIFO_EMPTY) {
-                       if (likely(hpsa_tag_contains_index(raw_tag)))
-                               process_indexed_cmd(h, raw_tag);
-                       else
-                               process_nonindexed_cmd(h, raw_tag);
+                       process_indexed_cmd(h, raw_tag);
                        raw_tag = next_command(h, q);
                }
        }
@@ -5554,10 +5520,7 @@ static irqreturn_t do_hpsa_intr_msi(int irq, void *queue)
        h->last_intr_timestamp = get_jiffies_64();
        raw_tag = get_next_completion(h, q);
        while (raw_tag != FIFO_EMPTY) {
-               if (likely(hpsa_tag_contains_index(raw_tag)))
-                       process_indexed_cmd(h, raw_tag);
-               else
-                       process_nonindexed_cmd(h, raw_tag);
+               process_indexed_cmd(h, raw_tag);
                raw_tag = next_command(h, q);
        }
        return IRQ_HANDLED;
@@ -5595,7 +5558,7 @@ static int hpsa_message(struct pci_dev *pdev, unsigned char opcode,
        err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
        if (err) {
                iounmap(vaddr);
-               return -ENOMEM;
+               return err;
        }
 
        cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64);
@@ -5689,30 +5652,22 @@ static int hpsa_controller_hard_reset(struct pci_dev *pdev,
                 * the controller, place the interface device in D3 then to D0,
                 * this causes a secondary PCI reset which will reset the
                 * controller." */
-               int pos;
-               u16 pmcsr;
-
-               pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
-               if (pos == 0) {
-                       dev_err(&pdev->dev,
-                               "hpsa_reset_controller: "
-                               "PCI PM not supported\n");
-                       return -ENODEV;
-               }
+
+               int rc = 0;
+
                dev_info(&pdev->dev, "using PCI PM to reset controller\n");
+
                /* enter the D3hot power management state */
-               pci_read_config_word(pdev, pos + PCI_PM_CTRL,
-                                       (__force u16 *)&pmcsr);
-               pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
-               pmcsr |= (__force u16) PCI_D3hot;
-               pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+               rc = pci_set_power_state(pdev, PCI_D3hot);
+               if (rc)
+                       return rc;
 
                msleep(500);
 
                /* enter the D0 power management state */
-               pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
-               pmcsr |= (__force u16) PCI_D0;
-               pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+               rc = pci_set_power_state(pdev, PCI_D0);
+               if (rc)
+                       return rc;
 
                /*
                 * The P600 requires a small delay when changing states.
@@ -5806,8 +5761,12 @@ static int hpsa_kdump_hard_reset_controller(struct pci_dev *pdev)
         */
 
        rc = hpsa_lookup_board_id(pdev, &board_id);
-       if (rc < 0 || !ctlr_is_resettable(board_id)) {
-               dev_warn(&pdev->dev, "Not resetting device.\n");
+       if (rc < 0) {
+               dev_warn(&pdev->dev, "Board ID not found\n");
+               return rc;
+       }
+       if (!ctlr_is_resettable(board_id)) {
+               dev_warn(&pdev->dev, "Controller not resettable\n");
                return -ENODEV;
        }
 
@@ -5852,8 +5811,8 @@ static int hpsa_kdump_hard_reset_controller(struct pci_dev *pdev)
        } else {
                use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET;
                if (use_doorbell) {
-                       dev_warn(&pdev->dev, "Soft reset not supported. "
-                               "Firmware update is required.\n");
+                       dev_warn(&pdev->dev,
+                               "Soft reset not supported. Firmware update is required.\n");
                        rc = -ENOTSUPP; /* try soft reset */
                        goto unmap_cfgtable;
                }
@@ -5873,8 +5832,7 @@ static int hpsa_kdump_hard_reset_controller(struct pci_dev *pdev)
        rc = hpsa_wait_for_board_state(pdev, vaddr, BOARD_READY);
        if (rc) {
                dev_warn(&pdev->dev,
-                       "failed waiting for board to become ready "
-                       "after hard reset\n");
+                       "Failed waiting for board to become ready after hard reset\n");
                goto unmap_cfgtable;
        }
 
@@ -5925,7 +5883,7 @@ static void print_cfg_table(struct device *dev, struct CfgTable __iomem *tb)
               readl(&(tb->HostWrite.CoalIntDelay)));
        dev_info(dev, "   Coalesce Interrupt Count = 0x%x\n",
               readl(&(tb->HostWrite.CoalIntCount)));
-       dev_info(dev, "   Max outstanding commands = 0x%d\n",
+       dev_info(dev, "   Max outstanding commands = %d\n",
               readl(&(tb->CmdsOutMax)));
        dev_info(dev, "   Bus Types = 0x%x\n", readl(&(tb->BusTypes)));
        for (i = 0; i < 16; i++)
@@ -5973,7 +5931,7 @@ static int find_PCI_BAR_index(struct pci_dev *pdev, unsigned long pci_bar_addr)
 }
 
 /* If MSI/MSI-X is supported by the kernel we will try to enable it on
- * controllers that are capable. If not, we use IO-APIC mode.
+ * controllers that are capable. If not, we use legacy INTx mode.
  */
 
 static void hpsa_interrupt_mode(struct ctlr_info *h)
@@ -5992,7 +5950,7 @@ static void hpsa_interrupt_mode(struct ctlr_info *h)
            (h->board_id == 0x40820E11) || (h->board_id == 0x40830E11))
                goto default_int_mode;
        if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) {
-               dev_info(&h->pdev->dev, "MSIX\n");
+               dev_info(&h->pdev->dev, "MSI-X capable controller\n");
                h->msix_vector = MAX_REPLY_QUEUES;
                if (h->msix_vector > num_online_cpus())
                        h->msix_vector = num_online_cpus();
@@ -6013,7 +5971,7 @@ static void hpsa_interrupt_mode(struct ctlr_info *h)
        }
 single_msi_mode:
        if (pci_find_capability(h->pdev, PCI_CAP_ID_MSI)) {
-               dev_info(&h->pdev->dev, "MSI\n");
+               dev_info(&h->pdev->dev, "MSI capable controller\n");
                if (!pci_enable_msi(h->pdev))
                        h->msi_vector = 1;
                else
@@ -6120,8 +6078,10 @@ static int hpsa_find_cfgtables(struct ctlr_info *h)
                return rc;
        h->cfgtable = remap_pci_mem(pci_resource_start(h->pdev,
                       cfg_base_addr_index) + cfg_offset, sizeof(*h->cfgtable));
-       if (!h->cfgtable)
+       if (!h->cfgtable) {
+               dev_err(&h->pdev->dev, "Failed mapping cfgtable\n");
                return -ENOMEM;
+       }
        rc = write_driver_ver_to_cfgtable(h->cfgtable);
        if (rc)
                return rc;
@@ -6152,6 +6112,15 @@ static void hpsa_get_max_perf_mode_cmds(struct ctlr_info *h)
        }
 }
 
+/* If the controller reports that the total max sg entries is greater than 512,
+ * then we know that chained SG blocks work.  (Original smart arrays did not
+ * support chained SG blocks and would return zero for max sg entries.)
+ */
+static int hpsa_supports_chained_sg_blocks(struct ctlr_info *h)
+{
+       return h->maxsgentries > 512;
+}
+
 /* Interrogate the hardware for some limits:
  * max commands, max SG elements without chaining, and with chaining,
  * SG chain block size, etc.
@@ -6159,21 +6128,23 @@ static void hpsa_get_max_perf_mode_cmds(struct ctlr_info *h)
 static void hpsa_find_board_params(struct ctlr_info *h)
 {
        hpsa_get_max_perf_mode_cmds(h);
-       h->nr_cmds = h->max_commands - 4; /* Allow room for some ioctls */
+       h->nr_cmds = h->max_commands;
        h->maxsgentries = readl(&(h->cfgtable->MaxScatterGatherElements));
        h->fw_support = readl(&(h->cfgtable->misc_fw_support));
-       /*
-        * Limit in-command s/g elements to 32 save dma'able memory.
-        * Howvever spec says if 0, use 31
-        */
-       h->max_cmd_sg_entries = 31;
-       if (h->maxsgentries > 512) {
+       if (hpsa_supports_chained_sg_blocks(h)) {
+               /* Limit in-command s/g elements to 32 save dma'able memory. */
                h->max_cmd_sg_entries = 32;
                h->chainsize = h->maxsgentries - h->max_cmd_sg_entries;
                h->maxsgentries--; /* save one for chain pointer */
        } else {
-               h->chainsize = 0;
+               /*
+                * Original smart arrays supported at most 31 s/g entries
+                * embedded inline in the command (trying to use more
+                * would lock up the controller)
+                */
+               h->max_cmd_sg_entries = 31;
                h->maxsgentries = 31; /* default to traditional values */
+               h->chainsize = 0;
        }
 
        /* Find out what task management functions are supported and cache */
@@ -6187,7 +6158,7 @@ static void hpsa_find_board_params(struct ctlr_info *h)
 static inline bool hpsa_CISS_signature_present(struct ctlr_info *h)
 {
        if (!check_signature(h->cfgtable->Signature, "CISS", 4)) {
-               dev_warn(&h->pdev->dev, "not a valid CISS config table\n");
+               dev_err(&h->pdev->dev, "not a valid CISS config table\n");
                return false;
        }
        return true;
@@ -6220,24 +6191,27 @@ static inline void hpsa_p600_dma_prefetch_quirk(struct ctlr_info *h)
        writel(dma_prefetch, h->vaddr + I2O_DMA1_CFG);
 }
 
-static void hpsa_wait_for_clear_event_notify_ack(struct ctlr_info *h)
+static int hpsa_wait_for_clear_event_notify_ack(struct ctlr_info *h)
 {
        int i;
        u32 doorbell_value;
        unsigned long flags;
        /* wait until the clear_event_notify bit 6 is cleared by controller. */
-       for (i = 0; i < MAX_CONFIG_WAIT; i++) {
+       for (i = 0; i < MAX_CLEAR_EVENT_WAIT; i++) {
                spin_lock_irqsave(&h->lock, flags);
                doorbell_value = readl(h->vaddr + SA5_DOORBELL);
                spin_unlock_irqrestore(&h->lock, flags);
                if (!(doorbell_value & DOORBELL_CLEAR_EVENTS))
-                       break;
+                       goto done;
                /* delay and try again */
-               msleep(20);
+               msleep(CLEAR_EVENT_WAIT_INTERVAL);
        }
+       return -ENODEV;
+done:
+       return 0;
 }
 
-static void hpsa_wait_for_mode_change_ack(struct ctlr_info *h)
+static int hpsa_wait_for_mode_change_ack(struct ctlr_info *h)
 {
        int i;
        u32 doorbell_value;
@@ -6247,17 +6221,21 @@ static void hpsa_wait_for_mode_change_ack(struct ctlr_info *h)
         * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right
         * as we enter this code.)
         */
-       for (i = 0; i < MAX_CONFIG_WAIT; i++) {
+       for (i = 0; i < MAX_MODE_CHANGE_WAIT; i++) {
                spin_lock_irqsave(&h->lock, flags);
                doorbell_value = readl(h->vaddr + SA5_DOORBELL);
                spin_unlock_irqrestore(&h->lock, flags);
                if (!(doorbell_value & CFGTBL_ChangeReq))
-                       break;
+                       goto done;
                /* delay and try again */
-               usleep_range(10000, 20000);
+               msleep(MODE_CHANGE_WAIT_INTERVAL);
        }
+       return -ENODEV;
+done:
+       return 0;
 }
 
+/* return -ENODEV or other reason on error, 0 on success */
 static int hpsa_enter_simple_mode(struct ctlr_info *h)
 {
        u32 trans_support;
@@ -6272,14 +6250,15 @@ static int hpsa_enter_simple_mode(struct ctlr_info *h)
        writel(CFGTBL_Trans_Simple, &(h->cfgtable->HostWrite.TransportRequest));
        writel(0, &h->cfgtable->HostWrite.command_pool_addr_hi);
        writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
-       hpsa_wait_for_mode_change_ack(h);
+       if (hpsa_wait_for_mode_change_ack(h))
+               goto error;
        print_cfg_table(&h->pdev->dev, h->cfgtable);
        if (!(readl(&(h->cfgtable->TransportActive)) & CFGTBL_Trans_Simple))
                goto error;
        h->transMethod = CFGTBL_Trans_Simple;
        return 0;
 error:
-       dev_warn(&h->pdev->dev, "unable to get board into simple mode\n");
+       dev_err(&h->pdev->dev, "failed to enter simple mode\n");
        return -ENODEV;
 }
 
@@ -6289,7 +6268,7 @@ static int hpsa_pci_init(struct ctlr_info *h)
 
        prod_index = hpsa_lookup_board_id(h->pdev, &h->board_id);
        if (prod_index < 0)
-               return -ENODEV;
+               return prod_index;
        h->product_name = products[prod_index].product_name;
        h->access = *(products[prod_index].access);
 
@@ -6410,14 +6389,11 @@ static int hpsa_init_reset_devices(struct pci_dev *pdev)
         * "performant mode".  Or, it might be 640x, which can't reset
         * due to concerns about shared bbwc between 6402/6404 pair.
         */
-       if (rc) {
-               if (rc != -ENOTSUPP) /* just try to do the kdump anyhow. */
-                       rc = -ENODEV;
+       if (rc)
                goto out_disable;
-       }
 
        /* Now try to get the controller to respond to a no-op */
-       dev_warn(&pdev->dev, "Waiting for controller to respond to no-op\n");
+       dev_info(&pdev->dev, "Waiting for controller to respond to no-op\n");
        for (i = 0; i < HPSA_POST_RESET_NOOP_RETRIES; i++) {
                if (hpsa_noop(pdev) == 0)
                        break;
@@ -6447,9 +6423,12 @@ static int hpsa_allocate_cmd_pool(struct ctlr_info *h)
            || (h->cmd_pool == NULL)
            || (h->errinfo_pool == NULL)) {
                dev_err(&h->pdev->dev, "out of memory in %s", __func__);
-               return -ENOMEM;
+               goto clean_up;
        }
        return 0;
+clean_up:
+       hpsa_free_cmd_pool(h);
+       return -ENOMEM;
 }
 
 static void hpsa_free_cmd_pool(struct ctlr_info *h)
@@ -6476,16 +6455,38 @@ static void hpsa_free_cmd_pool(struct ctlr_info *h)
 
 static void hpsa_irq_affinity_hints(struct ctlr_info *h)
 {
-       int i, cpu, rc;
+       int i, cpu;
 
        cpu = cpumask_first(cpu_online_mask);
        for (i = 0; i < h->msix_vector; i++) {
-               rc = irq_set_affinity_hint(h->intr[i], get_cpu_mask(cpu));
+               irq_set_affinity_hint(h->intr[i], get_cpu_mask(cpu));
                cpu = cpumask_next(cpu, cpu_online_mask);
        }
 }
 
-static int hpsa_request_irq(struct ctlr_info *h,
+/* clear affinity hints and free MSI-X, MSI, or legacy INTx vectors */
+static void hpsa_free_irqs(struct ctlr_info *h)
+{
+       int i;
+
+       if (!h->msix_vector || h->intr_mode != PERF_MODE_INT) {
+               /* Single reply queue, only one irq to free */
+               i = h->intr_mode;
+               irq_set_affinity_hint(h->intr[i], NULL);
+               free_irq(h->intr[i], &h->q[i]);
+               return;
+       }
+
+       for (i = 0; i < h->msix_vector; i++) {
+               irq_set_affinity_hint(h->intr[i], NULL);
+               free_irq(h->intr[i], &h->q[i]);
+       }
+       for (; i < MAX_REPLY_QUEUES; i++)
+               h->q[i] = 0;
+}
+
+/* returns 0 on success; cleans up and returns -Enn on error */
+static int hpsa_request_irqs(struct ctlr_info *h,
        irqreturn_t (*msixhandler)(int, void *),
        irqreturn_t (*intxhandler)(int, void *))
 {
@@ -6500,10 +6501,25 @@ static int hpsa_request_irq(struct ctlr_info *h,
 
        if (h->intr_mode == PERF_MODE_INT && h->msix_vector > 0) {
                /* If performant mode and MSI-X, use multiple reply queues */
-               for (i = 0; i < h->msix_vector; i++)
+               for (i = 0; i < h->msix_vector; i++) {
                        rc = request_irq(h->intr[i], msixhandler,
                                        0, h->devname,
                                        &h->q[i]);
+                       if (rc) {
+                               int j;
+
+                               dev_err(&h->pdev->dev,
+                                       "failed to get irq %d for %s\n",
+                                      h->intr[i], h->devname);
+                               for (j = 0; j < i; j++) {
+                                       free_irq(h->intr[j], &h->q[j]);
+                                       h->q[j] = 0;
+                               }
+                               for (; j < MAX_REPLY_QUEUES; j++)
+                                       h->q[j] = 0;
+                               return rc;
+                       }
+               }
                hpsa_irq_affinity_hints(h);
        } else {
                /* Use single reply pool */
@@ -6549,27 +6565,9 @@ static int hpsa_kdump_soft_reset(struct ctlr_info *h)
        return 0;
 }
 
-static void free_irqs(struct ctlr_info *h)
-{
-       int i;
-
-       if (!h->msix_vector || h->intr_mode != PERF_MODE_INT) {
-               /* Single reply queue, only one irq to free */
-               i = h->intr_mode;
-               irq_set_affinity_hint(h->intr[i], NULL);
-               free_irq(h->intr[i], &h->q[i]);
-               return;
-       }
-
-       for (i = 0; i < h->msix_vector; i++) {
-               irq_set_affinity_hint(h->intr[i], NULL);
-               free_irq(h->intr[i], &h->q[i]);
-       }
-}
-
 static void hpsa_free_irqs_and_disable_msix(struct ctlr_info *h)
 {
-       free_irqs(h);
+       hpsa_free_irqs(h);
 #ifdef CONFIG_PCI_MSI
        if (h->msix_vector) {
                if (h->pdev->msix_enabled)
@@ -6615,16 +6613,20 @@ static void hpsa_undo_allocations_after_kdump_soft_reset(struct ctlr_info *h)
 }
 
 /* Called when controller lockup detected. */
-static void fail_all_cmds_on_list(struct ctlr_info *h, struct list_head *list)
+static void fail_all_outstanding_cmds(struct ctlr_info *h)
 {
-       struct CommandList *c = NULL;
+       int i, refcount;
+       struct CommandList *c;
 
-       assert_spin_locked(&h->lock);
-       /* Mark all outstanding commands as failed and complete them. */
-       while (!list_empty(list)) {
-               c = list_entry(list->next, struct CommandList, list);
-               c->err_info->CommandStatus = CMD_HARDWARE_ERR;
-               finish_cmd(c);
+       flush_workqueue(h->resubmit_wq); /* ensure all cmds are fully built */
+       for (i = 0; i < h->nr_cmds; i++) {
+               c = h->cmd_pool + i;
+               refcount = atomic_inc_return(&c->refcount);
+               if (refcount > 1) {
+                       c->err_info->CommandStatus = CMD_HARDWARE_ERR;
+                       finish_cmd(c);
+               }
+               cmd_free(h, c);
        }
 }
 
@@ -6661,10 +6663,7 @@ static void controller_lockup_detected(struct ctlr_info *h)
        dev_warn(&h->pdev->dev, "Controller lockup detected: 0x%08x\n",
                        lockup_detected);
        pci_disable_device(h->pdev);
-       spin_lock_irqsave(&h->lock, flags);
-       fail_all_cmds_on_list(h, &h->cmpQ);
-       fail_all_cmds_on_list(h, &h->reqQ);
-       spin_unlock_irqrestore(&h->lock, flags);
+       fail_all_outstanding_cmds(h);
 }
 
 static void detect_controller_lockup(struct ctlr_info *h)
@@ -6707,8 +6706,8 @@ static void hpsa_ack_ctlr_events(struct ctlr_info *h)
        int i;
        char *event_type;
 
-       /* Clear the driver-requested rescan flag */
-       h->drv_req_rescan = 0;
+       if (!(h->fw_support & MISC_FW_EVENT_NOTIFY))
+               return;
 
        /* Ask the controller to clear the events we're handling. */
        if ((h->transMethod & (CFGTBL_Trans_io_accel1
@@ -6755,9 +6754,6 @@ static void hpsa_ack_ctlr_events(struct ctlr_info *h)
  */
 static int hpsa_ctlr_needs_rescan(struct ctlr_info *h)
 {
-       if (h->drv_req_rescan)
-               return 1;
-
        if (!(h->fw_support & MISC_FW_EVENT_NOTIFY))
                return 0;
 
@@ -6791,34 +6787,60 @@ static int hpsa_offline_devices_ready(struct ctlr_info *h)
        return 0;
 }
 
-
-static void hpsa_monitor_ctlr_worker(struct work_struct *work)
+static void hpsa_rescan_ctlr_worker(struct work_struct *work)
 {
        unsigned long flags;
        struct ctlr_info *h = container_of(to_delayed_work(work),
-                                       struct ctlr_info, monitor_ctlr_work);
-       detect_controller_lockup(h);
-       if (lockup_detected(h))
+                                       struct ctlr_info, rescan_ctlr_work);
+
+
+       if (h->remove_in_progress)
                return;
 
        if (hpsa_ctlr_needs_rescan(h) || hpsa_offline_devices_ready(h)) {
                scsi_host_get(h->scsi_host);
-               h->drv_req_rescan = 0;
                hpsa_ack_ctlr_events(h);
                hpsa_scan_start(h->scsi_host);
                scsi_host_put(h->scsi_host);
        }
-
        spin_lock_irqsave(&h->lock, flags);
-       if (h->remove_in_progress) {
-               spin_unlock_irqrestore(&h->lock, flags);
+       if (!h->remove_in_progress)
+               queue_delayed_work(h->rescan_ctlr_wq, &h->rescan_ctlr_work,
+                               h->heartbeat_sample_interval);
+       spin_unlock_irqrestore(&h->lock, flags);
+}
+
+static void hpsa_monitor_ctlr_worker(struct work_struct *work)
+{
+       unsigned long flags;
+       struct ctlr_info *h = container_of(to_delayed_work(work),
+                                       struct ctlr_info, monitor_ctlr_work);
+
+       detect_controller_lockup(h);
+       if (lockup_detected(h))
                return;
-       }
-       schedule_delayed_work(&h->monitor_ctlr_work,
+
+       spin_lock_irqsave(&h->lock, flags);
+       if (!h->remove_in_progress)
+               schedule_delayed_work(&h->monitor_ctlr_work,
                                h->heartbeat_sample_interval);
        spin_unlock_irqrestore(&h->lock, flags);
 }
 
+static struct workqueue_struct *hpsa_create_controller_wq(struct ctlr_info *h,
+                                               char *name)
+{
+       struct workqueue_struct *wq = NULL;
+       char wq_name[20];
+
+       snprintf(wq_name, sizeof(wq_name), "%s_%d_hpsa", name, h->ctlr);
+       wq = alloc_ordered_workqueue(wq_name, 0);
+       if (!wq)
+               dev_err(&h->pdev->dev, "failed to create %s workqueue\n", name);
+
+       return wq;
+}
+
 static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
        int dac, rc;
@@ -6855,13 +6877,23 @@ reinit_after_soft_reset:
 
        h->pdev = pdev;
        h->intr_mode = hpsa_simple_mode ? SIMPLE_MODE_INT : PERF_MODE_INT;
-       INIT_LIST_HEAD(&h->cmpQ);
-       INIT_LIST_HEAD(&h->reqQ);
        INIT_LIST_HEAD(&h->offline_device_list);
        spin_lock_init(&h->lock);
        spin_lock_init(&h->offline_device_lock);
        spin_lock_init(&h->scan_lock);
-       spin_lock_init(&h->passthru_count_lock);
+       atomic_set(&h->passthru_cmds_avail, HPSA_MAX_CONCURRENT_PASSTHRUS);
+
+       h->rescan_ctlr_wq = hpsa_create_controller_wq(h, "rescan");
+       if (!h->rescan_ctlr_wq) {
+               rc = -ENOMEM;
+               goto clean1;
+       }
+
+       h->resubmit_wq = hpsa_create_controller_wq(h, "resubmit");
+       if (!h->resubmit_wq) {
+               rc = -ENOMEM;
+               goto clean1;
+       }
 
        /* Allocate and clear per-cpu variable lockup_detected */
        h->lockup_detected = alloc_percpu(u32);
@@ -6896,13 +6928,14 @@ reinit_after_soft_reset:
        /* make sure the board interrupts are off */
        h->access.set_intr_mask(h, HPSA_INTR_OFF);
 
-       if (hpsa_request_irq(h, do_hpsa_intr_msi, do_hpsa_intr_intx))
+       if (hpsa_request_irqs(h, do_hpsa_intr_msi, do_hpsa_intr_intx))
                goto clean2;
        dev_info(&pdev->dev, "%s: <0x%x> at IRQ %d%s using DAC\n",
               h->devname, pdev->device,
               h->intr[h->intr_mode], dac ? "" : " not");
-       if (hpsa_allocate_cmd_pool(h))
-               goto clean4;
+       rc = hpsa_allocate_cmd_pool(h);
+       if (rc)
+               goto clean2_and_free_irqs;
        if (hpsa_allocate_sg_chain_blocks(h))
                goto clean4;
        init_waitqueue_head(&h->scan_wait_queue);
@@ -6931,12 +6964,12 @@ reinit_after_soft_reset:
                spin_lock_irqsave(&h->lock, flags);
                h->access.set_intr_mask(h, HPSA_INTR_OFF);
                spin_unlock_irqrestore(&h->lock, flags);
-               free_irqs(h);
-               rc = hpsa_request_irq(h, hpsa_msix_discard_completions,
+               hpsa_free_irqs(h);
+               rc = hpsa_request_irqs(h, hpsa_msix_discard_completions,
                                        hpsa_intx_discard_completions);
                if (rc) {
-                       dev_warn(&h->pdev->dev, "Failed to request_irq after "
-                               "soft reset.\n");
+                       dev_warn(&h->pdev->dev,
+                               "Failed to request_irq after soft reset.\n");
                        goto clean4;
                }
 
@@ -6973,7 +7006,6 @@ reinit_after_soft_reset:
                /* Enable Accelerated IO path at driver layer */
                h->acciopath_status = 1;
 
-       h->drv_req_rescan = 0;
 
        /* Turn the interrupts on so we can service requests */
        h->access.set_intr_mask(h, HPSA_INTR_ON);
@@ -6986,14 +7018,22 @@ reinit_after_soft_reset:
        INIT_DELAYED_WORK(&h->monitor_ctlr_work, hpsa_monitor_ctlr_worker);
        schedule_delayed_work(&h->monitor_ctlr_work,
                                h->heartbeat_sample_interval);
+       INIT_DELAYED_WORK(&h->rescan_ctlr_work, hpsa_rescan_ctlr_worker);
+       queue_delayed_work(h->rescan_ctlr_wq, &h->rescan_ctlr_work,
+                               h->heartbeat_sample_interval);
        return 0;
 
 clean4:
        hpsa_free_sg_chain_blocks(h);
        hpsa_free_cmd_pool(h);
-       free_irqs(h);
+clean2_and_free_irqs:
+       hpsa_free_irqs(h);
 clean2:
 clean1:
+       if (h->resubmit_wq)
+               destroy_workqueue(h->resubmit_wq);
+       if (h->rescan_ctlr_wq)
+               destroy_workqueue(h->rescan_ctlr_wq);
        if (h->lockup_detected)
                free_percpu(h->lockup_detected);
        kfree(h);
@@ -7012,9 +7052,9 @@ static void hpsa_flush_cache(struct ctlr_info *h)
        if (!flush_buf)
                return;
 
-       c = cmd_special_alloc(h);
+       c = cmd_alloc(h);
        if (!c) {
-               dev_warn(&h->pdev->dev, "cmd_special_alloc returned NULL!\n");
+               dev_warn(&h->pdev->dev, "cmd_alloc returned NULL!\n");
                goto out_of_memory;
        }
        if (fill_cmd(c, HPSA_CACHE_FLUSH, h, flush_buf, 4, 0,
@@ -7026,7 +7066,7 @@ static void hpsa_flush_cache(struct ctlr_info *h)
 out:
                dev_warn(&h->pdev->dev,
                        "error flushing cache on controller\n");
-       cmd_special_free(h, c);
+       cmd_free(h, c);
 out_of_memory:
        kfree(flush_buf);
 }
@@ -7067,9 +7107,11 @@ static void hpsa_remove_one(struct pci_dev *pdev)
        /* Get rid of any controller monitoring work items */
        spin_lock_irqsave(&h->lock, flags);
        h->remove_in_progress = 1;
-       cancel_delayed_work(&h->monitor_ctlr_work);
        spin_unlock_irqrestore(&h->lock, flags);
-
+       cancel_delayed_work_sync(&h->monitor_ctlr_work);
+       cancel_delayed_work_sync(&h->rescan_ctlr_work);
+       destroy_workqueue(h->rescan_ctlr_wq);
+       destroy_workqueue(h->resubmit_wq);
        hpsa_unregister_scsi(h);        /* unhook from SCSI subsystem */
        hpsa_shutdown(pdev);
        iounmap(h->vaddr);
@@ -7150,7 +7192,8 @@ static void  calc_bucket_map(int bucket[], int num_buckets,
        }
 }
 
-static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 trans_support)
+/* return -ENODEV or other reason on error, 0 on success */
+static int hpsa_enter_performant_mode(struct ctlr_info *h, u32 trans_support)
 {
        int i;
        unsigned long register_value;
@@ -7242,12 +7285,16 @@ static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 trans_support)
                }
        }
        writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
-       hpsa_wait_for_mode_change_ack(h);
+       if (hpsa_wait_for_mode_change_ack(h)) {
+               dev_err(&h->pdev->dev,
+                       "performant mode problem - doorbell timeout\n");
+               return -ENODEV;
+       }
        register_value = readl(&(h->cfgtable->TransportActive));
        if (!(register_value & CFGTBL_Trans_Performant)) {
-               dev_warn(&h->pdev->dev, "unable to get board into"
-                                       " performant mode\n");
-               return;
+               dev_err(&h->pdev->dev,
+                       "performant mode problem - transport not active\n");
+               return -ENODEV;
        }
        /* Change the access methods to the performant access methods */
        h->access = access;
@@ -7255,7 +7302,7 @@ static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 trans_support)
 
        if (!((trans_support & CFGTBL_Trans_io_accel1) ||
                (trans_support & CFGTBL_Trans_io_accel2)))
-               return;
+               return 0;
 
        if (trans_support & CFGTBL_Trans_io_accel1) {
                /* Set up I/O accelerator mode */
@@ -7290,8 +7337,7 @@ static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 trans_support)
                        cp->timeout_sec = 0;
                        cp->ReplyQueue = 0;
                        cp->tag =
-                               cpu_to_le64((i << DIRECT_LOOKUP_SHIFT) |
-                                               DIRECT_LOOKUP_BIT);
+                               cpu_to_le64((i << DIRECT_LOOKUP_SHIFT));
                        cp->host_addr =
                                cpu_to_le64(h->ioaccel_cmd_pool_dhandle +
                                        (i * sizeof(struct io_accel1_cmd)));
@@ -7320,7 +7366,12 @@ static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 trans_support)
                        writel(bft2[i], &h->ioaccel2_bft2_regs[i]);
        }
        writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
-       hpsa_wait_for_mode_change_ack(h);
+       if (hpsa_wait_for_mode_change_ack(h)) {
+               dev_err(&h->pdev->dev,
+                       "performant mode problem - enabling ioaccel mode\n");
+               return -ENODEV;
+       }
+       return 0;
 }
 
 static int hpsa_alloc_ioaccel_cmd_and_bft(struct ctlr_info *h)
@@ -7466,17 +7517,18 @@ static int is_accelerated_cmd(struct CommandList *c)
 static void hpsa_drain_accel_commands(struct ctlr_info *h)
 {
        struct CommandList *c = NULL;
-       unsigned long flags;
-       int accel_cmds_out;
+       int i, accel_cmds_out;
+       int refcount;
 
-       do { /* wait for all outstanding commands to drain out */
+       do { /* wait for all outstanding ioaccel commands to drain out */
                accel_cmds_out = 0;
-               spin_lock_irqsave(&h->lock, flags);
-               list_for_each_entry(c, &h->cmpQ, list)
-                       accel_cmds_out += is_accelerated_cmd(c);
-               list_for_each_entry(c, &h->reqQ, list)
-                       accel_cmds_out += is_accelerated_cmd(c);
-               spin_unlock_irqrestore(&h->lock, flags);
+               for (i = 0; i < h->nr_cmds; i++) {
+                       c = h->cmd_pool + i;
+                       refcount = atomic_inc_return(&c->refcount);
+                       if (refcount > 1) /* Command is allocated */
+                               accel_cmds_out += is_accelerated_cmd(c);
+                       cmd_free(h, c);
+               }
                if (accel_cmds_out <= 0)
                        break;
                msleep(100);