]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - drivers/block/nvme-core.c
nvme: move hardware structures out of the uapi version of nvme.h
[karo-tx-linux.git] / drivers / block / nvme-core.c
index 6f04771f1019798cc2feabf73eff2ddbadc84b81..a526696d684db841d0ada59eaf6ceede5a9bd4d6 100644 (file)
@@ -12,7 +12,6 @@
  * more details.
  */
 
-#include <linux/nvme.h>
 #include <linux/bitops.h>
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
@@ -43,6 +42,9 @@
 #include <scsi/sg.h>
 #include <asm-generic/io-64-nonatomic-lo-hi.h>
 
+#include <uapi/linux/nvme_ioctl.h>
+#include "nvme.h"
+
 #define NVME_MINORS            (1U << MINORBITS)
 #define NVME_Q_DEPTH           1024
 #define NVME_AQ_DEPTH          256
@@ -84,9 +86,10 @@ static wait_queue_head_t nvme_kthread_wait;
 
 static struct class *nvme_class;
 
-static void nvme_reset_failed_dev(struct work_struct *ws);
+static int __nvme_reset(struct nvme_dev *dev);
 static int nvme_reset(struct nvme_dev *dev);
 static int nvme_process_cq(struct nvme_queue *nvmeq);
+static void nvme_dead_ctrl(struct nvme_dev *dev);
 
 struct async_cmd_info {
        struct kthread_work work;
@@ -1277,18 +1280,13 @@ static void nvme_abort_req(struct request *req)
        struct nvme_command cmd;
 
        if (!nvmeq->qid || cmd_rq->aborted) {
-               unsigned long flags;
-
-               spin_lock_irqsave(&dev_list_lock, flags);
-               if (work_busy(&dev->reset_work))
-                       goto out;
-               list_del_init(&dev->node);
-               dev_warn(dev->dev, "I/O %d QID %d timeout, reset controller\n",
-                                                       req->tag, nvmeq->qid);
-               dev->reset_workfn = nvme_reset_failed_dev;
-               queue_work(nvme_workq, &dev->reset_work);
- out:
-               spin_unlock_irqrestore(&dev_list_lock, flags);
+               spin_lock(&dev_list_lock);
+               if (!__nvme_reset(dev)) {
+                       dev_warn(dev->dev,
+                                "I/O %d QID %d timeout, reset controller\n",
+                                req->tag, nvmeq->qid);
+               }
+               spin_unlock(&dev_list_lock);
                return;
        }
 
@@ -1943,6 +1941,20 @@ static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
 #define nvme_compat_ioctl      NULL
 #endif
 
+static void nvme_free_dev(struct kref *kref);
+static void nvme_free_ns(struct kref *kref)
+{
+       struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
+
+       spin_lock(&dev_list_lock);
+       ns->disk->private_data = NULL;
+       spin_unlock(&dev_list_lock);
+
+       kref_put(&ns->dev->kref, nvme_free_dev);
+       put_disk(ns->disk);
+       kfree(ns);
+}
+
 static int nvme_open(struct block_device *bdev, fmode_t mode)
 {
        int ret = 0;
@@ -1952,21 +1964,17 @@ static int nvme_open(struct block_device *bdev, fmode_t mode)
        ns = bdev->bd_disk->private_data;
        if (!ns)
                ret = -ENXIO;
-       else if (!kref_get_unless_zero(&ns->dev->kref))
+       else if (!kref_get_unless_zero(&ns->kref))
                ret = -ENXIO;
        spin_unlock(&dev_list_lock);
 
        return ret;
 }
 
-static void nvme_free_dev(struct kref *kref);
-
 static void nvme_release(struct gendisk *disk, fmode_t mode)
 {
        struct nvme_ns *ns = disk->private_data;
-       struct nvme_dev *dev = ns->dev;
-
-       kref_put(&dev->kref, nvme_free_dev);
+       kref_put(&ns->kref, nvme_free_ns);
 }
 
 static int nvme_getgeo(struct block_device *bd, struct hd_geometry *geo)
@@ -2073,14 +2081,11 @@ static int nvme_kthread(void *data)
 
                        if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) ||
                                                        csts & NVME_CSTS_CFS) {
-                               if (work_busy(&dev->reset_work))
-                                       continue;
-                               list_del_init(&dev->node);
-                               dev_warn(dev->dev,
-                                       "Failed status: %x, reset controller\n",
-                                       readl(&dev->bar->csts));
-                               dev->reset_workfn = nvme_reset_failed_dev;
-                               queue_work(nvme_workq, &dev->reset_work);
+                               if (!__nvme_reset(dev)) {
+                                       dev_warn(dev->dev,
+                                               "Failed status: %x, reset controller\n",
+                                               readl(&dev->bar->csts));
+                               }
                                continue;
                        }
                        for (i = 0; i < dev->queue_count; i++) {
@@ -2126,6 +2131,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
        if (!disk)
                goto out_free_queue;
 
+       kref_init(&ns->kref);
        ns->ns_id = nsid;
        ns->disk = disk;
        ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
@@ -2162,6 +2168,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
        if (nvme_revalidate_disk(ns->disk))
                goto out_free_disk;
 
+       kref_get(&dev->kref);
        add_disk(ns->disk);
        if (ns->ms) {
                struct block_device *bd = bdget_disk(ns->disk, 0);
@@ -2184,6 +2191,13 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
        kfree(ns);
 }
 
+/*
+ * Create I/O queues.  Failing to create an I/O queue is not an issue,
+ * we can continue with less than the desired amount of queues, and
+ * even a controller without I/O queues an still be used to issue
+ * admin commands.  This might be useful to upgrade a buggy firmware
+ * for example.
+ */
 static void nvme_create_io_queues(struct nvme_dev *dev)
 {
        unsigned i;
@@ -2193,8 +2207,10 @@ static void nvme_create_io_queues(struct nvme_dev *dev)
                        break;
 
        for (i = dev->online_queues; i <= dev->queue_count - 1; i++)
-               if (nvme_create_queue(dev->queues[i], i))
+               if (nvme_create_queue(dev->queues[i], i)) {
+                       nvme_free_queues(dev, i);
                        break;
+               }
 }
 
 static int set_queue_count(struct nvme_dev *dev, int count)
@@ -2357,18 +2373,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
        return result;
 }
 
-static void nvme_free_namespace(struct nvme_ns *ns)
-{
-       list_del(&ns->list);
-
-       spin_lock(&dev_list_lock);
-       ns->disk->private_data = NULL;
-       spin_unlock(&dev_list_lock);
-
-       put_disk(ns->disk);
-       kfree(ns);
-}
-
 static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
 {
        struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
@@ -2410,7 +2414,9 @@ static void nvme_ns_remove(struct nvme_ns *ns)
        if (kill || !blk_queue_dying(ns->queue)) {
                blk_mq_abort_requeue_list(ns->queue);
                blk_cleanup_queue(ns->queue);
-        }
+       }
+       list_del_init(&ns->list);
+       kref_put(&ns->kref, nvme_free_ns);
 }
 
 static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
@@ -2421,18 +2427,14 @@ static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
        for (i = 1; i <= nn; i++) {
                ns = nvme_find_ns(dev, i);
                if (ns) {
-                       if (revalidate_disk(ns->disk)) {
+                       if (revalidate_disk(ns->disk))
                                nvme_ns_remove(ns);
-                               nvme_free_namespace(ns);
-                       }
                } else
                        nvme_alloc_ns(dev, i);
        }
        list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
-               if (ns->ns_id > nn) {
+               if (ns->ns_id > nn)
                        nvme_ns_remove(ns);
-                       nvme_free_namespace(ns);
-               }
        }
        list_sort(NULL, &dev->namespaces, ns_cmp);
 }
@@ -2822,9 +2824,9 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
 
 static void nvme_dev_remove(struct nvme_dev *dev)
 {
-       struct nvme_ns *ns;
+       struct nvme_ns *ns, *next;
 
-       list_for_each_entry(ns, &dev->namespaces, list)
+       list_for_each_entry_safe(ns, next, &dev->namespaces, list)
                nvme_ns_remove(ns);
 }
 
@@ -2880,21 +2882,12 @@ static void nvme_release_instance(struct nvme_dev *dev)
        spin_unlock(&dev_list_lock);
 }
 
-static void nvme_free_namespaces(struct nvme_dev *dev)
-{
-       struct nvme_ns *ns, *next;
-
-       list_for_each_entry_safe(ns, next, &dev->namespaces, list)
-               nvme_free_namespace(ns);
-}
-
 static void nvme_free_dev(struct kref *kref)
 {
        struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
 
        put_device(dev->dev);
        put_device(dev->device);
-       nvme_free_namespaces(dev);
        nvme_release_instance(dev);
        if (dev->tagset.tags)
                blk_mq_free_tag_set(&dev->tagset);
@@ -2968,14 +2961,15 @@ static const struct file_operations nvme_dev_fops = {
        .compat_ioctl   = nvme_dev_ioctl,
 };
 
-static int nvme_dev_start(struct nvme_dev *dev)
+static void nvme_probe_work(struct work_struct *work)
 {
-       int result;
+       struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work);
        bool start_thread = false;
+       int result;
 
        result = nvme_dev_map(dev);
        if (result)
-               return result;
+               goto out;
 
        result = nvme_configure_admin_queue(dev);
        if (result)
@@ -3010,7 +3004,20 @@ static int nvme_dev_start(struct nvme_dev *dev)
                goto free_tags;
 
        dev->event_limit = 1;
-       return result;
+
+       /*
+        * Keep the controller around but remove all namespaces if we don't have
+        * any working I/O queue.
+        */
+       if (dev->online_queues < 2) {
+               dev_warn(dev->dev, "IO queues not created\n");
+               nvme_dev_remove(dev);
+       } else {
+               nvme_unfreeze_queues(dev);
+               nvme_dev_add(dev);
+       }
+
+       return;
 
  free_tags:
        nvme_dev_remove_admin(dev);
@@ -3022,7 +3029,9 @@ static int nvme_dev_start(struct nvme_dev *dev)
        nvme_dev_list_remove(dev);
  unmap:
        nvme_dev_unmap(dev);
-       return result;
+ out:
+       if (!work_busy(&dev->reset_work))
+               nvme_dead_ctrl(dev);
 }
 
 static int nvme_remove_dead_ctrl(void *arg)
@@ -3036,33 +3045,6 @@ static int nvme_remove_dead_ctrl(void *arg)
        return 0;
 }
 
-static void nvme_remove_disks(struct work_struct *ws)
-{
-       struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
-
-       nvme_free_queues(dev, 1);
-       nvme_dev_remove(dev);
-}
-
-static int nvme_dev_resume(struct nvme_dev *dev)
-{
-       int ret;
-
-       ret = nvme_dev_start(dev);
-       if (ret)
-               return ret;
-       if (dev->online_queues < 2) {
-               spin_lock(&dev_list_lock);
-               dev->reset_workfn = nvme_remove_disks;
-               queue_work(nvme_workq, &dev->reset_work);
-               spin_unlock(&dev_list_lock);
-       } else {
-               nvme_unfreeze_queues(dev);
-               nvme_dev_add(dev);
-       }
-       return 0;
-}
-
 static void nvme_dead_ctrl(struct nvme_dev *dev)
 {
        dev_warn(dev->dev, "Device failed to resume\n");
@@ -3075,8 +3057,9 @@ static void nvme_dead_ctrl(struct nvme_dev *dev)
        }
 }
 
-static void nvme_dev_reset(struct nvme_dev *dev)
+static void nvme_reset_work(struct work_struct *ws)
 {
+       struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
        bool in_probe = work_busy(&dev->probe_work);
 
        nvme_dev_shutdown(dev);
@@ -3096,31 +3079,24 @@ static void nvme_dev_reset(struct nvme_dev *dev)
        schedule_work(&dev->probe_work);
 }
 
-static void nvme_reset_failed_dev(struct work_struct *ws)
-{
-       struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
-       nvme_dev_reset(dev);
-}
-
-static void nvme_reset_workfn(struct work_struct *work)
+static int __nvme_reset(struct nvme_dev *dev)
 {
-       struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
-       dev->reset_workfn(work);
+       if (work_pending(&dev->reset_work))
+               return -EBUSY;
+       list_del_init(&dev->node);
+       queue_work(nvme_workq, &dev->reset_work);
+       return 0;
 }
 
 static int nvme_reset(struct nvme_dev *dev)
 {
-       int ret = -EBUSY;
+       int ret;
 
        if (!dev->admin_q || blk_queue_dying(dev->admin_q))
                return -ENODEV;
 
        spin_lock(&dev_list_lock);
-       if (!work_pending(&dev->reset_work)) {
-               dev->reset_workfn = nvme_reset_failed_dev;
-               queue_work(nvme_workq, &dev->reset_work);
-               ret = 0;
-       }
+       ret = __nvme_reset(dev);
        spin_unlock(&dev_list_lock);
 
        if (!ret) {
@@ -3147,7 +3123,6 @@ static ssize_t nvme_sysfs_reset(struct device *dev,
 }
 static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
 
-static void nvme_async_probe(struct work_struct *work);
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
        int node, result = -ENOMEM;
@@ -3170,8 +3145,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                goto free;
 
        INIT_LIST_HEAD(&dev->namespaces);
-       dev->reset_workfn = nvme_reset_failed_dev;
-       INIT_WORK(&dev->reset_work, nvme_reset_workfn);
+       INIT_WORK(&dev->reset_work, nvme_reset_work);
        dev->dev = get_device(&pdev->dev);
        pci_set_drvdata(pdev, dev);
        result = nvme_set_instance(dev);
@@ -3199,7 +3173,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
        INIT_LIST_HEAD(&dev->node);
        INIT_WORK(&dev->scan_work, nvme_dev_scan);
-       INIT_WORK(&dev->probe_work, nvme_async_probe);
+       INIT_WORK(&dev->probe_work, nvme_probe_work);
        schedule_work(&dev->probe_work);
        return 0;
 
@@ -3219,14 +3193,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        return result;
 }
 
-static void nvme_async_probe(struct work_struct *work)
-{
-       struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work);
-
-       if (nvme_dev_resume(dev) && !work_busy(&dev->reset_work))
-               nvme_dead_ctrl(dev);
-}
-
 static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
 {
        struct nvme_dev *dev = pci_get_drvdata(pdev);
@@ -3234,7 +3200,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
        if (prepare)
                nvme_dev_shutdown(dev);
        else
-               nvme_dev_resume(dev);
+               schedule_work(&dev->probe_work);
 }
 
 static void nvme_shutdown(struct pci_dev *pdev)
@@ -3288,10 +3254,7 @@ static int nvme_resume(struct device *dev)
        struct pci_dev *pdev = to_pci_dev(dev);
        struct nvme_dev *ndev = pci_get_drvdata(pdev);
 
-       if (nvme_dev_resume(ndev) && !work_busy(&ndev->reset_work)) {
-               ndev->reset_workfn = nvme_reset_failed_dev;
-               queue_work(nvme_workq, &ndev->reset_work);
-       }
+       schedule_work(&ndev->probe_work);
        return 0;
 }
 #endif