#include "nvme.h"
-#define NVME_Q_DEPTH 1024
#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
MODULE_PARM_DESC(max_host_mem_size_mb,
"Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
+static int io_queue_depth_set(const char *val, const struct kernel_param *kp);
+static const struct kernel_param_ops io_queue_depth_ops = {
+ .set = io_queue_depth_set,
+ .get = param_get_int,
+};
+
+static int io_queue_depth = 1024;
+module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
+MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
+
struct nvme_dev;
struct nvme_queue;
struct device *dev;
struct dma_pool *prp_page_pool;
struct dma_pool *prp_small_pool;
- unsigned queue_count;
unsigned online_queues;
unsigned max_qid;
int q_depth;
/* host memory buffer support: */
u64 host_mem_size;
u32 nr_host_mem_descs;
+ dma_addr_t host_mem_descs_dma;
struct nvme_host_mem_buf_desc *host_mem_descs;
void **host_mem_desc_bufs;
};
+static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
+{
+ int n = 0, ret;
+
+ ret = kstrtoint(val, 10, &n);
+ if (ret != 0 || n < 2)
+ return -EINVAL;
+
+ return param_set_int(val, kp);
+}
+
static inline unsigned int sq_idx(unsigned int qid, u32 stride)
{
return qid * 2 * stride;
}
#endif
-static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
+static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct dma_pool *pool;
length -= (page_size - offset);
if (length <= 0)
- return true;
+ return BLK_STS_OK;
dma_len -= (page_size - offset);
if (dma_len) {
if (length <= page_size) {
iod->first_dma = dma_addr;
- return true;
+ return BLK_STS_OK;
}
nprps = DIV_ROUND_UP(length, page_size);
if (!prp_list) {
iod->first_dma = dma_addr;
iod->npages = -1;
- return false;
+ return BLK_STS_RESOURCE;
}
list[0] = prp_list;
iod->first_dma = prp_dma;
__le64 *old_prp_list = prp_list;
prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
if (!prp_list)
- return false;
+ return BLK_STS_RESOURCE;
list[iod->npages++] = prp_list;
prp_list[0] = old_prp_list[i - 1];
old_prp_list[i - 1] = cpu_to_le64(prp_dma);
break;
if (dma_len > 0)
continue;
- BUG_ON(dma_len < 0);
+ if (unlikely(dma_len < 0))
+ goto bad_sgl;
sg = sg_next(sg);
dma_addr = sg_dma_address(sg);
dma_len = sg_dma_len(sg);
}
- return true;
+ return BLK_STS_OK;
+
+ bad_sgl:
+ if (WARN_ONCE(1, "Invalid SGL for payload:%d nents:%d\n",
+ blk_rq_payload_bytes(req), iod->nents)) {
+ for_each_sg(iod->sg, sg, iod->nents, i) {
+ dma_addr_t phys = sg_phys(sg);
+ pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d "
+ "dma_address:%pad dma_length:%d\n", i, &phys,
+ sg->offset, sg->length,
+ &sg_dma_address(sg),
+ sg_dma_len(sg));
+ }
+ }
+ return BLK_STS_IOERR;
+
}
static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
DMA_ATTR_NO_WARN))
goto out;
- if (!nvme_setup_prps(dev, req))
+ ret = nvme_setup_prps(dev, req);
+ if (ret != BLK_STS_OK)
goto out_unmap;
ret = BLK_STS_IOERR;
return;
}
+ nvmeq->cqe_seen = 1;
req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
nvme_end_request(req, cqe->status, cqe->result);
}
consumed++;
}
- if (consumed) {
+ if (consumed)
nvme_ring_cq_doorbell(nvmeq);
- nvmeq->cqe_seen = 1;
- }
}
static irqreturn_t nvme_irq(int irq, void *data)
{
int i;
- for (i = dev->queue_count - 1; i >= lowest; i--) {
+ for (i = dev->ctrl.queue_count - 1; i >= lowest; i--) {
struct nvme_queue *nvmeq = dev->queues[i];
- dev->queue_count--;
+ dev->ctrl.queue_count--;
dev->queues[i] = NULL;
nvme_free_queue(nvmeq);
}
spin_unlock_irq(&nvmeq->q_lock);
if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
- blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q);
+ blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
pci_free_irq(to_pci_dev(nvmeq->dev->dev), vector, nvmeq);
if (shutdown)
nvme_shutdown_ctrl(&dev->ctrl);
else
- nvme_disable_ctrl(&dev->ctrl, lo_hi_readq(
- dev->bar + NVME_REG_CAP));
+ nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
spin_lock_irq(&nvmeq->q_lock);
nvme_process_cq(nvmeq);
nvmeq->qid = qid;
nvmeq->cq_vector = -1;
dev->queues[qid] = nvmeq;
- dev->queue_count++;
+ dev->ctrl.queue_count++;
return nvmeq;
* user requests may be waiting on a stopped queue. Start the
* queue to flush these to completion.
*/
- blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
+ blk_mq_unquiesce_queue(dev->ctrl.admin_q);
blk_cleanup_queue(dev->ctrl.admin_q);
blk_mq_free_tag_set(&dev->admin_tagset);
}
return -ENODEV;
}
} else
- blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
+ blk_mq_unquiesce_queue(dev->ctrl.admin_q);
return 0;
}
return 0;
}
-static int nvme_configure_admin_queue(struct nvme_dev *dev)
+static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
{
int result;
u32 aqa;
- u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
struct nvme_queue *nvmeq;
result = nvme_remap_bar(dev, db_bar_size(dev, 0));
return result;
dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
- NVME_CAP_NSSRC(cap) : 0;
+ NVME_CAP_NSSRC(dev->ctrl.cap) : 0;
if (dev->subsystem &&
(readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
- result = nvme_disable_ctrl(&dev->ctrl, cap);
+ result = nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
if (result < 0)
return result;
lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
- result = nvme_enable_ctrl(&dev->ctrl, cap);
+ result = nvme_enable_ctrl(&dev->ctrl, dev->ctrl.cap);
if (result)
return result;
unsigned i, max;
int ret = 0;
- for (i = dev->queue_count; i <= dev->max_qid; i++) {
+ for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) {
/* vector == qid - 1, match nvme_create_queue */
if (!nvme_alloc_queue(dev, i, dev->q_depth,
pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
}
}
- max = min(dev->max_qid, dev->queue_count - 1);
+ max = min(dev->max_qid, dev->ctrl.queue_count - 1);
for (i = dev->online_queues; i <= max; i++) {
ret = nvme_create_queue(dev->queues[i], i);
if (ret)
if (dev->cmb) {
iounmap(dev->cmb);
dev->cmb = NULL;
- if (dev->cmbsz) {
- sysfs_remove_file_from_group(&dev->ctrl.device->kobj,
- &dev_attr_cmb.attr, NULL);
- dev->cmbsz = 0;
- }
+ sysfs_remove_file_from_group(&dev->ctrl.device->kobj,
+ &dev_attr_cmb.attr, NULL);
+ dev->cmbsz = 0;
}
}
static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
{
- size_t len = dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs);
+ u64 dma_addr = dev->host_mem_descs_dma;
struct nvme_command c;
- u64 dma_addr;
int ret;
- dma_addr = dma_map_single(dev->dev, dev->host_mem_descs, len,
- DMA_TO_DEVICE);
- if (dma_mapping_error(dev->dev, dma_addr))
- return -ENOMEM;
-
memset(&c, 0, sizeof(c));
c.features.opcode = nvme_admin_set_features;
c.features.fid = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF);
"failed to set host mem (err %d, flags %#x).\n",
ret, bits);
}
- dma_unmap_single(dev->dev, dma_addr, len, DMA_TO_DEVICE);
return ret;
}
kfree(dev->host_mem_desc_bufs);
dev->host_mem_desc_bufs = NULL;
- kfree(dev->host_mem_descs);
+ dma_free_coherent(dev->dev,
+ dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs),
+ dev->host_mem_descs, dev->host_mem_descs_dma);
dev->host_mem_descs = NULL;
}
static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
{
struct nvme_host_mem_buf_desc *descs;
- u32 chunk_size, max_entries, i = 0;
+ u32 chunk_size, max_entries, len;
+ dma_addr_t descs_dma;
+ int i = 0;
void **bufs;
- u64 size, tmp;
+ u64 size = 0, tmp;
/* start big and work our way down */
chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
tmp = (preferred + chunk_size - 1);
do_div(tmp, chunk_size);
max_entries = tmp;
- descs = kcalloc(max_entries, sizeof(*descs), GFP_KERNEL);
+ descs = dma_zalloc_coherent(dev->dev, max_entries * sizeof(*descs),
+ &descs_dma, GFP_KERNEL);
if (!descs)
goto out;
if (!bufs)
goto out_free_descs;
- for (size = 0; size < preferred; size += chunk_size) {
- u32 len = min_t(u64, chunk_size, preferred - size);
+ for (size = 0; size < preferred; size += len) {
dma_addr_t dma_addr;
+ len = min_t(u64, chunk_size, preferred - size);
bufs[i] = dma_alloc_attrs(dev->dev, len, &dma_addr, GFP_KERNEL,
DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
if (!bufs[i])
dev->nr_host_mem_descs = i;
dev->host_mem_size = size;
dev->host_mem_descs = descs;
+ dev->host_mem_descs_dma = descs_dma;
dev->host_mem_desc_bufs = bufs;
return 0;
kfree(bufs);
out_free_descs:
- kfree(descs);
+ dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs,
+ descs_dma);
out:
/* try a smaller chunk size if we failed early */
if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) {
static int nvme_pci_enable(struct nvme_dev *dev)
{
- u64 cap;
int result = -ENOMEM;
struct pci_dev *pdev = to_pci_dev(dev->dev);
if (result < 0)
return result;
- cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
+ dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
- dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
- dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
+ dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1,
+ io_queue_depth);
+ dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
dev->dbs = dev->bar + 4096;
/*
dev_warn(dev->ctrl.device, "detected Apple NVMe controller, "
"set queue depth=%u to work around controller resets\n",
dev->q_depth);
+ } else if (pdev->vendor == PCI_VENDOR_ID_SAMSUNG &&
+ (pdev->device == 0xa821 || pdev->device == 0xa822) &&
+ NVME_CAP_MQES(dev->ctrl.cap) == 0) {
+ dev->q_depth = 64;
+ dev_err(dev->ctrl.device, "detected PM1725 NVMe controller, "
+ "set queue depth=%u\n", dev->q_depth);
}
/*
* CMBs can currently only exist on >=1.2 PCIe devices. We only
- * populate sysfs if a CMB is implemented. Note that we add the
- * CMB attribute to the nvme_ctrl kobj which removes the need to remove
- * it on exit. Since nvme_dev_attrs_group has no name we can pass
- * NULL as final argument to sysfs_add_file_to_group.
+ * populate sysfs if a CMB is implemented. Since nvme_dev_attrs_group
+ * has no name we can pass NULL as final argument to
+ * sysfs_add_file_to_group.
*/
if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2, 0)) {
dev->cmb = nvme_map_cmb(dev);
-
- if (dev->cmbsz) {
+ if (dev->cmb) {
if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
&dev_attr_cmb.attr, NULL))
dev_warn(dev->ctrl.device,
nvme_stop_queues(&dev->ctrl);
queues = dev->online_queues - 1;
- for (i = dev->queue_count - 1; i > 0; i--)
+ for (i = dev->ctrl.queue_count - 1; i > 0; i--)
nvme_suspend_queue(dev->queues[i]);
if (dead) {
* probe, before the admin queue is configured. Thus,
* queue_count can be 0 here.
*/
- if (dev->queue_count)
+ if (dev->ctrl.queue_count)
nvme_suspend_queue(dev->queues[0]);
} else {
nvme_disable_io_queues(dev, queues);
if (result)
goto out;
- result = nvme_configure_admin_queue(dev);
+ result = nvme_pci_configure_admin_queue(dev);
if (result)
goto out;
if (result)
goto out;
- /*
- * A controller that can not execute IO typically requires user
- * intervention to correct. For such degraded controllers, the driver
- * should not submit commands the user did not request, so skip
- * registering for asynchronous event notification on this condition.
- */
- if (dev->online_queues > 1)
- nvme_queue_async_events(&dev->ctrl);
-
/*
* Keep the controller around but remove all namespaces if we don't have
* any working I/O queue.
goto out;
}
- if (dev->online_queues > 1)
- nvme_queue_scan(&dev->ctrl);
+ nvme_start_ctrl(&dev->ctrl);
return;
out:
result = nvme_dev_map(dev);
if (result)
- goto free;
+ goto put_pci;
INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
result = nvme_setup_prp_pools(dev);
if (result)
- goto put_pci;
+ goto unmap;
quirks |= check_dell_samsung_bug(pdev);
release_pools:
nvme_release_prp_pools(dev);
+ unmap:
+ nvme_dev_unmap(dev);
put_pci:
put_device(dev->dev);
- nvme_dev_unmap(dev);
free:
kfree(dev->queues);
kfree(dev);
}
flush_work(&dev->ctrl.reset_work);
- nvme_uninit_ctrl(&dev->ctrl);
+ nvme_stop_ctrl(&dev->ctrl);
+ nvme_remove_namespaces(&dev->ctrl);
nvme_dev_disable(dev, true);
nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0);
+ nvme_uninit_ctrl(&dev->ctrl);
nvme_release_prp_pools(dev);
nvme_dev_unmap(dev);
nvme_put_ctrl(&dev->ctrl);
{ PCI_VDEVICE(INTEL, 0x0a54),
.driver_data = NVME_QUIRK_STRIPE_SIZE |
NVME_QUIRK_DEALLOCATE_ZEROES, },
+ { PCI_VDEVICE(INTEL, 0x0a55),
+ .driver_data = NVME_QUIRK_STRIPE_SIZE |
+ NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS },
{ PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+ { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */
+ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+ { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */
+ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },