* more details.
*/
-#include <linux/nvme.h>
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <scsi/sg.h>
#include <asm-generic/io-64-nonatomic-lo-hi.h>
+#include <uapi/linux/nvme_ioctl.h>
+#include "nvme.h"
+
#define NVME_MINORS (1U << MINORBITS)
#define NVME_Q_DEPTH 1024
#define NVME_AQ_DEPTH 256
static struct class *nvme_class;
-static void nvme_reset_failed_dev(struct work_struct *ws);
+static int __nvme_reset(struct nvme_dev *dev);
static int nvme_reset(struct nvme_dev *dev);
static int nvme_process_cq(struct nvme_queue *nvmeq);
+static void nvme_dead_ctrl(struct nvme_dev *dev);
struct async_cmd_info {
struct kthread_work work;
spin_unlock_irqrestore(req->q->queue_lock, flags);
return;
}
+
if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
if (cmd_rq->ctx == CMD_CTX_CANCELLED)
- req->errors = -EINTR;
- else
- req->errors = status;
+ status = -EINTR;
} else {
- req->errors = nvme_error_status(status);
+ status = nvme_error_status(status);
}
- } else
- req->errors = 0;
+ }
+
if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
u32 result = le32_to_cpup(&cqe->result);
req->special = (void *)(uintptr_t)result;
}
nvme_free_iod(nvmeq->dev, iod);
- blk_mq_complete_request(req);
+ blk_mq_complete_request(req, status);
}
/* length is in bytes. gfp flags indicates whether we may sleep. */
if (ns && ns->ms && !blk_integrity_rq(req)) {
if (!(ns->pi_type && ns->ms == 8) &&
req->cmd_type != REQ_TYPE_DRV_PRIV) {
- req->errors = -EFAULT;
- blk_mq_complete_request(req);
+ blk_mq_complete_request(req, -EFAULT);
return BLK_MQ_RQ_QUEUE_OK;
}
}
struct nvme_command cmd;
if (!nvmeq->qid || cmd_rq->aborted) {
- unsigned long flags;
-
- spin_lock_irqsave(&dev_list_lock, flags);
- if (work_busy(&dev->reset_work))
- goto out;
- list_del_init(&dev->node);
- dev_warn(dev->dev, "I/O %d QID %d timeout, reset controller\n",
- req->tag, nvmeq->qid);
- dev->reset_workfn = nvme_reset_failed_dev;
- queue_work(nvme_workq, &dev->reset_work);
- out:
- spin_unlock_irqrestore(&dev_list_lock, flags);
+ spin_lock(&dev_list_lock);
+ if (!__nvme_reset(dev)) {
+ dev_warn(dev->dev,
+ "I/O %d QID %d timeout, reset controller\n",
+ req->tag, nvmeq->qid);
+ }
+ spin_unlock(&dev_list_lock);
return;
}
#define nvme_compat_ioctl NULL
#endif
+static void nvme_free_dev(struct kref *kref);
+static void nvme_free_ns(struct kref *kref)
+{
+ struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
+
+ spin_lock(&dev_list_lock);
+ ns->disk->private_data = NULL;
+ spin_unlock(&dev_list_lock);
+
+ kref_put(&ns->dev->kref, nvme_free_dev);
+ put_disk(ns->disk);
+ kfree(ns);
+}
+
static int nvme_open(struct block_device *bdev, fmode_t mode)
{
int ret = 0;
ns = bdev->bd_disk->private_data;
if (!ns)
ret = -ENXIO;
- else if (!kref_get_unless_zero(&ns->dev->kref))
+ else if (!kref_get_unless_zero(&ns->kref))
ret = -ENXIO;
spin_unlock(&dev_list_lock);
return ret;
}
-static void nvme_free_dev(struct kref *kref);
-
static void nvme_release(struct gendisk *disk, fmode_t mode)
{
struct nvme_ns *ns = disk->private_data;
- struct nvme_dev *dev = ns->dev;
-
- kref_put(&dev->kref, nvme_free_dev);
+ kref_put(&ns->kref, nvme_free_ns);
}
static int nvme_getgeo(struct block_device *bd, struct hd_geometry *geo)
if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) ||
csts & NVME_CSTS_CFS) {
- if (work_busy(&dev->reset_work))
- continue;
- list_del_init(&dev->node);
- dev_warn(dev->dev,
- "Failed status: %x, reset controller\n",
- readl(&dev->bar->csts));
- dev->reset_workfn = nvme_reset_failed_dev;
- queue_work(nvme_workq, &dev->reset_work);
+ if (!__nvme_reset(dev)) {
+ dev_warn(dev->dev,
+ "Failed status: %x, reset controller\n",
+ readl(&dev->bar->csts));
+ }
continue;
}
for (i = 0; i < dev->queue_count; i++) {
if (!disk)
goto out_free_queue;
+ kref_init(&ns->kref);
ns->ns_id = nsid;
ns->disk = disk;
ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
if (nvme_revalidate_disk(ns->disk))
goto out_free_disk;
+ kref_get(&dev->kref);
add_disk(ns->disk);
if (ns->ms) {
struct block_device *bd = bdget_disk(ns->disk, 0);
kfree(ns);
}
+/*
+ * Create I/O queues. Failing to create an I/O queue is not an issue,
+ * we can continue with less than the desired amount of queues, and
+ * even a controller without I/O queues an still be used to issue
+ * admin commands. This might be useful to upgrade a buggy firmware
+ * for example.
+ */
static void nvme_create_io_queues(struct nvme_dev *dev)
{
unsigned i;
break;
for (i = dev->online_queues; i <= dev->queue_count - 1; i++)
- if (nvme_create_queue(dev->queues[i], i))
+ if (nvme_create_queue(dev->queues[i], i)) {
+ nvme_free_queues(dev, i);
break;
+ }
}
static int set_queue_count(struct nvme_dev *dev, int count)
return result;
}
-static void nvme_free_namespace(struct nvme_ns *ns)
-{
- list_del(&ns->list);
-
- spin_lock(&dev_list_lock);
- ns->disk->private_data = NULL;
- spin_unlock(&dev_list_lock);
-
- put_disk(ns->disk);
- kfree(ns);
-}
-
static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
{
struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
if (kill || !blk_queue_dying(ns->queue)) {
blk_mq_abort_requeue_list(ns->queue);
blk_cleanup_queue(ns->queue);
- }
+ }
+ list_del_init(&ns->list);
+ kref_put(&ns->kref, nvme_free_ns);
}
static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
for (i = 1; i <= nn; i++) {
ns = nvme_find_ns(dev, i);
if (ns) {
- if (revalidate_disk(ns->disk)) {
+ if (revalidate_disk(ns->disk))
nvme_ns_remove(ns);
- nvme_free_namespace(ns);
- }
} else
nvme_alloc_ns(dev, i);
}
list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
- if (ns->ns_id > nn) {
+ if (ns->ns_id > nn)
nvme_ns_remove(ns);
- nvme_free_namespace(ns);
- }
}
list_sort(NULL, &dev->namespaces, ns_cmp);
}
+static void nvme_set_irq_hints(struct nvme_dev *dev)
+{
+ struct nvme_queue *nvmeq;
+ int i;
+
+ for (i = 0; i < dev->online_queues; i++) {
+ nvmeq = dev->queues[i];
+
+ if (!nvmeq->tags || !(*nvmeq->tags))
+ continue;
+
+ irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
+ blk_mq_tags_cpumask(*nvmeq->tags));
+ }
+}
+
static void nvme_dev_scan(struct work_struct *work)
{
struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
return;
nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn));
kfree(ctrl);
+ nvme_set_irq_hints(dev);
}
/*
static void nvme_dev_remove(struct nvme_dev *dev)
{
- struct nvme_ns *ns;
+ struct nvme_ns *ns, *next;
- list_for_each_entry(ns, &dev->namespaces, list)
+ list_for_each_entry_safe(ns, next, &dev->namespaces, list)
nvme_ns_remove(ns);
}
spin_unlock(&dev_list_lock);
}
-static void nvme_free_namespaces(struct nvme_dev *dev)
-{
- struct nvme_ns *ns, *next;
-
- list_for_each_entry_safe(ns, next, &dev->namespaces, list)
- nvme_free_namespace(ns);
-}
-
static void nvme_free_dev(struct kref *kref)
{
struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
put_device(dev->dev);
put_device(dev->device);
- nvme_free_namespaces(dev);
nvme_release_instance(dev);
if (dev->tagset.tags)
blk_mq_free_tag_set(&dev->tagset);
.compat_ioctl = nvme_dev_ioctl,
};
-static void nvme_set_irq_hints(struct nvme_dev *dev)
+static void nvme_probe_work(struct work_struct *work)
{
- struct nvme_queue *nvmeq;
- int i;
-
- for (i = 0; i < dev->online_queues; i++) {
- nvmeq = dev->queues[i];
-
- if (!nvmeq->tags || !(*nvmeq->tags))
- continue;
-
- irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
- blk_mq_tags_cpumask(*nvmeq->tags));
- }
-}
-
-static int nvme_dev_start(struct nvme_dev *dev)
-{
- int result;
+ struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work);
bool start_thread = false;
+ int result;
result = nvme_dev_map(dev);
if (result)
- return result;
+ goto out;
result = nvme_configure_admin_queue(dev);
if (result)
if (result)
goto free_tags;
- nvme_set_irq_hints(dev);
-
dev->event_limit = 1;
- return result;
+
+ /*
+ * Keep the controller around but remove all namespaces if we don't have
+ * any working I/O queue.
+ */
+ if (dev->online_queues < 2) {
+ dev_warn(dev->dev, "IO queues not created\n");
+ nvme_dev_remove(dev);
+ } else {
+ nvme_unfreeze_queues(dev);
+ nvme_dev_add(dev);
+ }
+
+ return;
free_tags:
nvme_dev_remove_admin(dev);
nvme_dev_list_remove(dev);
unmap:
nvme_dev_unmap(dev);
- return result;
+ out:
+ if (!work_busy(&dev->reset_work))
+ nvme_dead_ctrl(dev);
}
static int nvme_remove_dead_ctrl(void *arg)
return 0;
}
-static void nvme_remove_disks(struct work_struct *ws)
-{
- struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
-
- nvme_free_queues(dev, 1);
- nvme_dev_remove(dev);
-}
-
-static int nvme_dev_resume(struct nvme_dev *dev)
-{
- int ret;
-
- ret = nvme_dev_start(dev);
- if (ret)
- return ret;
- if (dev->online_queues < 2) {
- spin_lock(&dev_list_lock);
- dev->reset_workfn = nvme_remove_disks;
- queue_work(nvme_workq, &dev->reset_work);
- spin_unlock(&dev_list_lock);
- } else {
- nvme_unfreeze_queues(dev);
- nvme_dev_add(dev);
- nvme_set_irq_hints(dev);
- }
- return 0;
-}
-
static void nvme_dead_ctrl(struct nvme_dev *dev)
{
dev_warn(dev->dev, "Device failed to resume\n");
}
}
-static void nvme_dev_reset(struct nvme_dev *dev)
+static void nvme_reset_work(struct work_struct *ws)
{
+ struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
bool in_probe = work_busy(&dev->probe_work);
nvme_dev_shutdown(dev);
schedule_work(&dev->probe_work);
}
-static void nvme_reset_failed_dev(struct work_struct *ws)
+static int __nvme_reset(struct nvme_dev *dev)
{
- struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
- nvme_dev_reset(dev);
-}
-
-static void nvme_reset_workfn(struct work_struct *work)
-{
- struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
- dev->reset_workfn(work);
+ if (work_pending(&dev->reset_work))
+ return -EBUSY;
+ list_del_init(&dev->node);
+ queue_work(nvme_workq, &dev->reset_work);
+ return 0;
}
static int nvme_reset(struct nvme_dev *dev)
{
- int ret = -EBUSY;
+ int ret;
if (!dev->admin_q || blk_queue_dying(dev->admin_q))
return -ENODEV;
spin_lock(&dev_list_lock);
- if (!work_pending(&dev->reset_work)) {
- dev->reset_workfn = nvme_reset_failed_dev;
- queue_work(nvme_workq, &dev->reset_work);
- ret = 0;
- }
+ ret = __nvme_reset(dev);
spin_unlock(&dev_list_lock);
if (!ret) {
}
static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
-static void nvme_async_probe(struct work_struct *work);
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
int node, result = -ENOMEM;
goto free;
INIT_LIST_HEAD(&dev->namespaces);
- dev->reset_workfn = nvme_reset_failed_dev;
- INIT_WORK(&dev->reset_work, nvme_reset_workfn);
+ INIT_WORK(&dev->reset_work, nvme_reset_work);
dev->dev = get_device(&pdev->dev);
pci_set_drvdata(pdev, dev);
result = nvme_set_instance(dev);
INIT_LIST_HEAD(&dev->node);
INIT_WORK(&dev->scan_work, nvme_dev_scan);
- INIT_WORK(&dev->probe_work, nvme_async_probe);
+ INIT_WORK(&dev->probe_work, nvme_probe_work);
schedule_work(&dev->probe_work);
return 0;
return result;
}
-static void nvme_async_probe(struct work_struct *work)
-{
- struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work);
-
- if (nvme_dev_resume(dev) && !work_busy(&dev->reset_work))
- nvme_dead_ctrl(dev);
-}
-
static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
if (prepare)
nvme_dev_shutdown(dev);
else
- nvme_dev_resume(dev);
+ schedule_work(&dev->probe_work);
}
static void nvme_shutdown(struct pci_dev *pdev)
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
- if (nvme_dev_resume(ndev) && !work_busy(&ndev->reset_work)) {
- ndev->reset_workfn = nvme_reset_failed_dev;
- queue_work(nvme_workq, &ndev->reset_work);
- }
+ schedule_work(&ndev->probe_work);
return 0;
}
#endif