drivers/nvme/target/core.c

   1 /*
   2  * Common code for the NVMe target.
   3  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
   4  *
   5  * This program is free software; you can redistribute it and/or modify it
   6  * under the terms and conditions of the GNU General Public License,
   7  * version 2, as published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  12  * more details.
  13  */
  14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  15 #include <linux/module.h>
  16 #include <linux/random.h>
  17 #include <linux/rculist.h>
  18
  19 #include "nvmet.h"
  20
  21 static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
  22 static DEFINE_IDA(cntlid_ida);
  23
  24 /*
  25  * This read/write semaphore is used to synchronize access to configuration
  26  * information on a target system that will result in discovery log page
  27  * information change for at least one host.
  28  * The full list of resources to protected by this semaphore is:
  29  *
  30  *  - subsystems list
  31  *  - per-subsystem allowed hosts list
  32  *  - allow_any_host subsystem attribute
  33  *  - nvmet_genctr
  34  *  - the nvmet_transports array
  35  *
  36  * When updating any of those lists/structures write lock should be obtained,
  37  * while when reading (popolating discovery log page or checking host-subsystem
  38  * link) read lock is obtained to allow concurrent reads.
  39  */
  40 DECLARE_RWSEM(nvmet_config_sem);
  41
  42 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
  43                 const char *subsysnqn);
  44
  45 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
  46                 size_t len)
  47 {
  48         if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
  49                 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
  50         return 0;
  51 }
  52
  53 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
  54 {
  55         if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
  56                 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
  57         return 0;
  58 }
  59
  60 static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
  61 {
  62         return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
  63 }
  64
  65 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
  66 {
  67         struct nvmet_req *req;
  68
  69         while (1) {
  70                 mutex_lock(&ctrl->lock);
  71                 if (!ctrl->nr_async_event_cmds) {
  72                         mutex_unlock(&ctrl->lock);
  73                         return;
  74                 }
  75
  76                 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
  77                 mutex_unlock(&ctrl->lock);
  78                 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
  79         }
  80 }
  81
  82 static void nvmet_async_event_work(struct work_struct *work)
  83 {
  84         struct nvmet_ctrl *ctrl =
  85                 container_of(work, struct nvmet_ctrl, async_event_work);
  86         struct nvmet_async_event *aen;
  87         struct nvmet_req *req;
  88
  89         while (1) {
  90                 mutex_lock(&ctrl->lock);
  91                 aen = list_first_entry_or_null(&ctrl->async_events,
  92                                 struct nvmet_async_event, entry);
  93                 if (!aen || !ctrl->nr_async_event_cmds) {
  94                         mutex_unlock(&ctrl->lock);
  95                         return;
  96                 }
  97
  98                 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
  99                 nvmet_set_result(req, nvmet_async_event_result(aen));
 100
 101                 list_del(&aen->entry);
 102                 kfree(aen);
 103
 104                 mutex_unlock(&ctrl->lock);
 105                 nvmet_req_complete(req, 0);
 106         }
 107 }
 108
 109 static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
 110                 u8 event_info, u8 log_page)
 111 {
 112         struct nvmet_async_event *aen;
 113
 114         aen = kmalloc(sizeof(*aen), GFP_KERNEL);
 115         if (!aen)
 116                 return;
 117
 118         aen->event_type = event_type;
 119         aen->event_info = event_info;
 120         aen->log_page = log_page;
 121
 122         mutex_lock(&ctrl->lock);
 123         list_add_tail(&aen->entry, &ctrl->async_events);
 124         mutex_unlock(&ctrl->lock);
 125
 126         schedule_work(&ctrl->async_event_work);
 127 }
 128
 129 int nvmet_register_transport(struct nvmet_fabrics_ops *ops)
 130 {
 131         int ret = 0;
 132
 133         down_write(&nvmet_config_sem);
 134         if (nvmet_transports[ops->type])
 135                 ret = -EINVAL;
 136         else
 137                 nvmet_transports[ops->type] = ops;
 138         up_write(&nvmet_config_sem);
 139
 140         return ret;
 141 }
 142 EXPORT_SYMBOL_GPL(nvmet_register_transport);
 143
 144 void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops)
 145 {
 146         down_write(&nvmet_config_sem);
 147         nvmet_transports[ops->type] = NULL;
 148         up_write(&nvmet_config_sem);
 149 }
 150 EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
 151
 152 int nvmet_enable_port(struct nvmet_port *port)
 153 {
 154         struct nvmet_fabrics_ops *ops;
 155         int ret;
 156
 157         lockdep_assert_held(&nvmet_config_sem);
 158
 159         ops = nvmet_transports[port->disc_addr.trtype];
 160         if (!ops) {
 161                 up_write(&nvmet_config_sem);
 162                 request_module("nvmet-transport-%d", port->disc_addr.trtype);
 163                 down_write(&nvmet_config_sem);
 164                 ops = nvmet_transports[port->disc_addr.trtype];
 165                 if (!ops) {
 166                         pr_err("transport type %d not supported\n",
 167                                 port->disc_addr.trtype);
 168                         return -EINVAL;
 169                 }
 170         }
 171
 172         if (!try_module_get(ops->owner))
 173                 return -EINVAL;
 174
 175         ret = ops->add_port(port);
 176         if (ret) {
 177                 module_put(ops->owner);
 178                 return ret;
 179         }
 180
 181         port->enabled = true;
 182         return 0;
 183 }
 184
 185 void nvmet_disable_port(struct nvmet_port *port)
 186 {
 187         struct nvmet_fabrics_ops *ops;
 188
 189         lockdep_assert_held(&nvmet_config_sem);
 190
 191         port->enabled = false;
 192
 193         ops = nvmet_transports[port->disc_addr.trtype];
 194         ops->remove_port(port);
 195         module_put(ops->owner);
 196 }
 197
 198 static void nvmet_keep_alive_timer(struct work_struct *work)
 199 {
 200         struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
 201                         struct nvmet_ctrl, ka_work);
 202
 203         pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
 204                 ctrl->cntlid, ctrl->kato);
 205
 206         nvmet_ctrl_fatal_error(ctrl);
 207 }
 208
 209 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
 210 {
 211         pr_debug("ctrl %d start keep-alive timer for %d secs\n",
 212                 ctrl->cntlid, ctrl->kato);
 213
 214         INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
 215         schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
 216 }
 217
 218 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
 219 {
 220         pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
 221
 222         cancel_delayed_work_sync(&ctrl->ka_work);
 223 }
 224
 225 static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl,
 226                 __le32 nsid)
 227 {
 228         struct nvmet_ns *ns;
 229
 230         list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
 231                 if (ns->nsid == le32_to_cpu(nsid))
 232                         return ns;
 233         }
 234
 235         return NULL;
 236 }
 237
 238 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid)
 239 {
 240         struct nvmet_ns *ns;
 241
 242         rcu_read_lock();
 243         ns = __nvmet_find_namespace(ctrl, nsid);
 244         if (ns)
 245                 percpu_ref_get(&ns->ref);
 246         rcu_read_unlock();
 247
 248         return ns;
 249 }
 250
 251 static void nvmet_destroy_namespace(struct percpu_ref *ref)
 252 {
 253         struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
 254
 255         complete(&ns->disable_done);
 256 }
 257
 258 void nvmet_put_namespace(struct nvmet_ns *ns)
 259 {
 260         percpu_ref_put(&ns->ref);
 261 }
 262
 263 int nvmet_ns_enable(struct nvmet_ns *ns)
 264 {
 265         struct nvmet_subsys *subsys = ns->subsys;
 266         struct nvmet_ctrl *ctrl;
 267         int ret = 0;
 268
 269         mutex_lock(&subsys->lock);
 270         if (ns->enabled)
 271                 goto out_unlock;
 272
 273         ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE,
 274                         NULL);
 275         if (IS_ERR(ns->bdev)) {
 276                 pr_err("failed to open block device %s: (%ld)\n",
 277                        ns->device_path, PTR_ERR(ns->bdev));
 278                 ret = PTR_ERR(ns->bdev);
 279                 ns->bdev = NULL;
 280                 goto out_unlock;
 281         }
 282
 283         ns->size = i_size_read(ns->bdev->bd_inode);
 284         ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
 285
 286         ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
 287                                 0, GFP_KERNEL);
 288         if (ret)
 289                 goto out_blkdev_put;
 290
 291         if (ns->nsid > subsys->max_nsid)
 292                 subsys->max_nsid = ns->nsid;
 293
 294         /*
 295          * The namespaces list needs to be sorted to simplify the implementation
 296          * of the Identify Namepace List subcommand.
 297          */
 298         if (list_empty(&subsys->namespaces)) {
 299                 list_add_tail_rcu(&ns->dev_link, &subsys->namespaces);
 300         } else {
 301                 struct nvmet_ns *old;
 302
 303                 list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) {
 304                         BUG_ON(ns->nsid == old->nsid);
 305                         if (ns->nsid < old->nsid)
 306                                 break;
 307                 }
 308
 309                 list_add_tail_rcu(&ns->dev_link, &old->dev_link);
 310         }
 311
 312         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
 313                 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
 314
 315         ns->enabled = true;
 316         ret = 0;
 317 out_unlock:
 318         mutex_unlock(&subsys->lock);
 319         return ret;
 320 out_blkdev_put:
 321         blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
 322         ns->bdev = NULL;
 323         goto out_unlock;
 324 }
 325
 326 void nvmet_ns_disable(struct nvmet_ns *ns)
 327 {
 328         struct nvmet_subsys *subsys = ns->subsys;
 329         struct nvmet_ctrl *ctrl;
 330
 331         mutex_lock(&subsys->lock);
 332         if (!ns->enabled)
 333                 goto out_unlock;
 334
 335         ns->enabled = false;
 336         list_del_rcu(&ns->dev_link);
 337         mutex_unlock(&subsys->lock);
 338
 339         /*
 340          * Now that we removed the namespaces from the lookup list, we
 341          * can kill the per_cpu ref and wait for any remaining references
 342          * to be dropped, as well as a RCU grace period for anyone only
 343          * using the namepace under rcu_read_lock().  Note that we can't
 344          * use call_rcu here as we need to ensure the namespaces have
 345          * been fully destroyed before unloading the module.
 346          */
 347         percpu_ref_kill(&ns->ref);
 348         synchronize_rcu();
 349         wait_for_completion(&ns->disable_done);
 350         percpu_ref_exit(&ns->ref);
 351
 352         mutex_lock(&subsys->lock);
 353         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
 354                 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
 355
 356         if (ns->bdev)
 357                 blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
 358 out_unlock:
 359         mutex_unlock(&subsys->lock);
 360 }
 361
 362 void nvmet_ns_free(struct nvmet_ns *ns)
 363 {
 364         nvmet_ns_disable(ns);
 365
 366         kfree(ns->device_path);
 367         kfree(ns);
 368 }
 369
 370 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
 371 {
 372         struct nvmet_ns *ns;
 373
 374         ns = kzalloc(sizeof(*ns), GFP_KERNEL);
 375         if (!ns)
 376                 return NULL;
 377
 378         INIT_LIST_HEAD(&ns->dev_link);
 379         init_completion(&ns->disable_done);
 380
 381         ns->nsid = nsid;
 382         ns->subsys = subsys;
 383
 384         return ns;
 385 }
 386
 387 static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
 388 {
 389         if (status)
 390                 nvmet_set_status(req, status);
 391
 392         /* XXX: need to fill in something useful for sq_head */
 393         req->rsp->sq_head = 0;
 394         if (likely(req->sq)) /* may happen during early failure */
 395                 req->rsp->sq_id = cpu_to_le16(req->sq->qid);
 396         req->rsp->command_id = req->cmd->common.command_id;
 397
 398         if (req->ns)
 399                 nvmet_put_namespace(req->ns);
 400         req->ops->queue_response(req);
 401 }
 402
 403 void nvmet_req_complete(struct nvmet_req *req, u16 status)
 404 {
 405         __nvmet_req_complete(req, status);
 406         percpu_ref_put(&req->sq->ref);
 407 }
 408 EXPORT_SYMBOL_GPL(nvmet_req_complete);
 409
 410 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
 411                 u16 qid, u16 size)
 412 {
 413         cq->qid = qid;
 414         cq->size = size;
 415
 416         ctrl->cqs[qid] = cq;
 417 }
 418
 419 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
 420                 u16 qid, u16 size)
 421 {
 422         sq->qid = qid;
 423         sq->size = size;
 424
 425         ctrl->sqs[qid] = sq;
 426 }
 427
 428 static void nvmet_confirm_sq(struct percpu_ref *ref)
 429 {
 430         struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
 431
 432         complete(&sq->confirm_done);
 433 }
 434
 435 void nvmet_sq_destroy(struct nvmet_sq *sq)
 436 {
 437         /*
 438          * If this is the admin queue, complete all AERs so that our
 439          * queue doesn't have outstanding requests on it.
 440          */
 441         if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
 442                 nvmet_async_events_free(sq->ctrl);
 443         percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
 444         wait_for_completion(&sq->confirm_done);
 445         wait_for_completion(&sq->free_done);
 446         percpu_ref_exit(&sq->ref);
 447
 448         if (sq->ctrl) {
 449                 nvmet_ctrl_put(sq->ctrl);
 450                 sq->ctrl = NULL; /* allows reusing the queue later */
 451         }
 452 }
 453 EXPORT_SYMBOL_GPL(nvmet_sq_destroy);
 454
 455 static void nvmet_sq_free(struct percpu_ref *ref)
 456 {
 457         struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
 458
 459         complete(&sq->free_done);
 460 }
 461
 462 int nvmet_sq_init(struct nvmet_sq *sq)
 463 {
 464         int ret;
 465
 466         ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL);
 467         if (ret) {
 468                 pr_err("percpu_ref init failed!\n");
 469                 return ret;
 470         }
 471         init_completion(&sq->free_done);
 472         init_completion(&sq->confirm_done);
 473
 474         return 0;
 475 }
 476 EXPORT_SYMBOL_GPL(nvmet_sq_init);
 477
 478 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
 479                 struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops)
 480 {
 481         u8 flags = req->cmd->common.flags;
 482         u16 status;
 483
 484         req->cq = cq;
 485         req->sq = sq;
 486         req->ops = ops;
 487         req->sg = NULL;
 488         req->sg_cnt = 0;
 489         req->rsp->status = 0;
 490
 491         /* no support for fused commands yet */
 492         if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
 493                 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 494                 goto fail;
 495         }
 496
 497         /* either variant of SGLs is fine, as we don't support metadata */
 498         if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF &&
 499                      (flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METASEG)) {
 500                 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 501                 goto fail;
 502         }
 503
 504         if (unlikely(!req->sq->ctrl))
 505                 /* will return an error for any Non-connect command: */
 506                 status = nvmet_parse_connect_cmd(req);
 507         else if (likely(req->sq->qid != 0))
 508                 status = nvmet_parse_io_cmd(req);
 509         else if (req->cmd->common.opcode == nvme_fabrics_command)
 510                 status = nvmet_parse_fabrics_cmd(req);
 511         else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
 512                 status = nvmet_parse_discovery_cmd(req);
 513         else
 514                 status = nvmet_parse_admin_cmd(req);
 515
 516         if (status)
 517                 goto fail;
 518
 519         if (unlikely(!percpu_ref_tryget_live(&sq->ref))) {
 520                 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 521                 goto fail;
 522         }
 523
 524         return true;
 525
 526 fail:
 527         __nvmet_req_complete(req, status);
 528         return false;
 529 }
 530 EXPORT_SYMBOL_GPL(nvmet_req_init);
 531
 532 static inline bool nvmet_cc_en(u32 cc)
 533 {
 534         return cc & 0x1;
 535 }
 536
 537 static inline u8 nvmet_cc_css(u32 cc)
 538 {
 539         return (cc >> 4) & 0x7;
 540 }
 541
 542 static inline u8 nvmet_cc_mps(u32 cc)
 543 {
 544         return (cc >> 7) & 0xf;
 545 }
 546
 547 static inline u8 nvmet_cc_ams(u32 cc)
 548 {
 549         return (cc >> 11) & 0x7;
 550 }
 551
 552 static inline u8 nvmet_cc_shn(u32 cc)
 553 {
 554         return (cc >> 14) & 0x3;
 555 }
 556
 557 static inline u8 nvmet_cc_iosqes(u32 cc)
 558 {
 559         return (cc >> 16) & 0xf;
 560 }
 561
 562 static inline u8 nvmet_cc_iocqes(u32 cc)
 563 {
 564         return (cc >> 20) & 0xf;
 565 }
 566
 567 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
 568 {
 569         lockdep_assert_held(&ctrl->lock);
 570
 571         if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
 572             nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES ||
 573             nvmet_cc_mps(ctrl->cc) != 0 ||
 574             nvmet_cc_ams(ctrl->cc) != 0 ||
 575             nvmet_cc_css(ctrl->cc) != 0) {
 576                 ctrl->csts = NVME_CSTS_CFS;
 577                 return;
 578         }
 579
 580         ctrl->csts = NVME_CSTS_RDY;
 581 }
 582
 583 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
 584 {
 585         lockdep_assert_held(&ctrl->lock);
 586
 587         /* XXX: tear down queues? */
 588         ctrl->csts &= ~NVME_CSTS_RDY;
 589         ctrl->cc = 0;
 590 }
 591
 592 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new)
 593 {
 594         u32 old;
 595
 596         mutex_lock(&ctrl->lock);
 597         old = ctrl->cc;
 598         ctrl->cc = new;
 599
 600         if (nvmet_cc_en(new) && !nvmet_cc_en(old))
 601                 nvmet_start_ctrl(ctrl);
 602         if (!nvmet_cc_en(new) && nvmet_cc_en(old))
 603                 nvmet_clear_ctrl(ctrl);
 604         if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) {
 605                 nvmet_clear_ctrl(ctrl);
 606                 ctrl->csts |= NVME_CSTS_SHST_CMPLT;
 607         }
 608         if (!nvmet_cc_shn(new) && nvmet_cc_shn(old))
 609                 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
 610         mutex_unlock(&ctrl->lock);
 611 }
 612
 613 static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
 614 {
 615         /* command sets supported: NVMe command set: */
 616         ctrl->cap = (1ULL << 37);
 617         /* CC.EN timeout in 500msec units: */
 618         ctrl->cap |= (15ULL << 24);
 619         /* maximum queue entries supported: */
 620         ctrl->cap |= NVMET_QUEUE_SIZE - 1;
 621 }
 622
 623 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
 624                 struct nvmet_req *req, struct nvmet_ctrl **ret)
 625 {
 626         struct nvmet_subsys *subsys;
 627         struct nvmet_ctrl *ctrl;
 628         u16 status = 0;
 629
 630         subsys = nvmet_find_get_subsys(req->port, subsysnqn);
 631         if (!subsys) {
 632                 pr_warn("connect request for invalid subsystem %s!\n",
 633                         subsysnqn);
 634                 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
 635                 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 636         }
 637
 638         mutex_lock(&subsys->lock);
 639         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
 640                 if (ctrl->cntlid == cntlid) {
 641                         if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) {
 642                                 pr_warn("hostnqn mismatch.\n");
 643                                 continue;
 644                         }
 645                         if (!kref_get_unless_zero(&ctrl->ref))
 646                                 continue;
 647
 648                         *ret = ctrl;
 649                         goto out;
 650                 }
 651         }
 652
 653         pr_warn("could not find controller %d for subsys %s / host %s\n",
 654                 cntlid, subsysnqn, hostnqn);
 655         req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
 656         status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 657
 658 out:
 659         mutex_unlock(&subsys->lock);
 660         nvmet_subsys_put(subsys);
 661         return status;
 662 }
 663
 664 u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd)
 665 {
 666         if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
 667                 pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n",
 668                        cmd->common.opcode, req->sq->qid);
 669                 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
 670         }
 671
 672         if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
 673                 pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n",
 674                        cmd->common.opcode, req->sq->qid);
 675                 req->ns = NULL;
 676                 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
 677         }
 678         return 0;
 679 }
 680
 681 static bool __nvmet_host_allowed(struct nvmet_subsys *subsys,
 682                 const char *hostnqn)
 683 {
 684         struct nvmet_host_link *p;
 685
 686         if (subsys->allow_any_host)
 687                 return true;
 688
 689         list_for_each_entry(p, &subsys->hosts, entry) {
 690                 if (!strcmp(nvmet_host_name(p->host), hostnqn))
 691                         return true;
 692         }
 693
 694         return false;
 695 }
 696
 697 static bool nvmet_host_discovery_allowed(struct nvmet_req *req,
 698                 const char *hostnqn)
 699 {
 700         struct nvmet_subsys_link *s;
 701
 702         list_for_each_entry(s, &req->port->subsystems, entry) {
 703                 if (__nvmet_host_allowed(s->subsys, hostnqn))
 704                         return true;
 705         }
 706
 707         return false;
 708 }
 709
 710 bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
 711                 const char *hostnqn)
 712 {
 713         lockdep_assert_held(&nvmet_config_sem);
 714
 715         if (subsys->type == NVME_NQN_DISC)
 716                 return nvmet_host_discovery_allowed(req, hostnqn);
 717         else
 718                 return __nvmet_host_allowed(subsys, hostnqn);
 719 }
 720
 721 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 722                 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
 723 {
 724         struct nvmet_subsys *subsys;
 725         struct nvmet_ctrl *ctrl;
 726         int ret;
 727         u16 status;
 728
 729         status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 730         subsys = nvmet_find_get_subsys(req->port, subsysnqn);
 731         if (!subsys) {
 732                 pr_warn("connect request for invalid subsystem %s!\n",
 733                         subsysnqn);
 734                 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
 735                 goto out;
 736         }
 737
 738         status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 739         down_read(&nvmet_config_sem);
 740         if (!nvmet_host_allowed(req, subsys, hostnqn)) {
 741                 pr_info("connect by host %s for subsystem %s not allowed\n",
 742                         hostnqn, subsysnqn);
 743                 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
 744                 up_read(&nvmet_config_sem);
 745                 goto out_put_subsystem;
 746         }
 747         up_read(&nvmet_config_sem);
 748
 749         status = NVME_SC_INTERNAL;
 750         ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
 751         if (!ctrl)
 752                 goto out_put_subsystem;
 753         mutex_init(&ctrl->lock);
 754
 755         nvmet_init_cap(ctrl);
 756
 757         INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
 758         INIT_LIST_HEAD(&ctrl->async_events);
 759
 760         memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
 761         memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
 762
 763         /* generate a random serial number as our controllers are ephemeral: */
 764         get_random_bytes(&ctrl->serial, sizeof(ctrl->serial));
 765
 766         kref_init(&ctrl->ref);
 767         ctrl->subsys = subsys;
 768
 769         ctrl->cqs = kcalloc(subsys->max_qid + 1,
 770                         sizeof(struct nvmet_cq *),
 771                         GFP_KERNEL);
 772         if (!ctrl->cqs)
 773                 goto out_free_ctrl;
 774
 775         ctrl->sqs = kcalloc(subsys->max_qid + 1,
 776                         sizeof(struct nvmet_sq *),
 777                         GFP_KERNEL);
 778         if (!ctrl->sqs)
 779                 goto out_free_cqs;
 780
 781         ret = ida_simple_get(&cntlid_ida,
 782                              NVME_CNTLID_MIN, NVME_CNTLID_MAX,
 783                              GFP_KERNEL);
 784         if (ret < 0) {
 785                 status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
 786                 goto out_free_sqs;
 787         }
 788         ctrl->cntlid = ret;
 789
 790         ctrl->ops = req->ops;
 791         if (ctrl->subsys->type == NVME_NQN_DISC) {
 792                 /* Don't accept keep-alive timeout for discovery controllers */
 793                 if (kato) {
 794                         status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 795                         goto out_free_sqs;
 796                 }
 797
 798                 /*
 799                  * Discovery controllers use some arbitrary high value in order
 800                  * to cleanup stale discovery sessions
 801                  *
 802                  * From the latest base diff RC:
 803                  * "The Keep Alive command is not supported by
 804                  * Discovery controllers. A transport may specify a
 805                  * fixed Discovery controller activity timeout value
 806                  * (e.g., 2 minutes).  If no commands are received
 807                  * by a Discovery controller within that time
 808                  * period, the controller may perform the
 809                  * actions for Keep Alive Timer expiration".
 810                  */
 811                 ctrl->kato = NVMET_DISC_KATO;
 812         } else {
 813                 /* keep-alive timeout in seconds */
 814                 ctrl->kato = DIV_ROUND_UP(kato, 1000);
 815         }
 816         nvmet_start_keep_alive_timer(ctrl);
 817
 818         mutex_lock(&subsys->lock);
 819         list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
 820         mutex_unlock(&subsys->lock);
 821
 822         *ctrlp = ctrl;
 823         return 0;
 824
 825 out_free_sqs:
 826         kfree(ctrl->sqs);
 827 out_free_cqs:
 828         kfree(ctrl->cqs);
 829 out_free_ctrl:
 830         kfree(ctrl);
 831 out_put_subsystem:
 832         nvmet_subsys_put(subsys);
 833 out:
 834         return status;
 835 }
 836
 837 static void nvmet_ctrl_free(struct kref *ref)
 838 {
 839         struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
 840         struct nvmet_subsys *subsys = ctrl->subsys;
 841
 842         nvmet_stop_keep_alive_timer(ctrl);
 843
 844         mutex_lock(&subsys->lock);
 845         list_del(&ctrl->subsys_entry);
 846         mutex_unlock(&subsys->lock);
 847
 848         flush_work(&ctrl->async_event_work);
 849         cancel_work_sync(&ctrl->fatal_err_work);
 850
 851         ida_simple_remove(&cntlid_ida, ctrl->cntlid);
 852         nvmet_subsys_put(subsys);
 853
 854         kfree(ctrl->sqs);
 855         kfree(ctrl->cqs);
 856         kfree(ctrl);
 857 }
 858
 859 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
 860 {
 861         kref_put(&ctrl->ref, nvmet_ctrl_free);
 862 }
 863
 864 static void nvmet_fatal_error_handler(struct work_struct *work)
 865 {
 866         struct nvmet_ctrl *ctrl =
 867                         container_of(work, struct nvmet_ctrl, fatal_err_work);
 868
 869         pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
 870         ctrl->ops->delete_ctrl(ctrl);
 871 }
 872
 873 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
 874 {
 875         mutex_lock(&ctrl->lock);
 876         if (!(ctrl->csts & NVME_CSTS_CFS)) {
 877                 ctrl->csts |= NVME_CSTS_CFS;
 878                 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
 879                 schedule_work(&ctrl->fatal_err_work);
 880         }
 881         mutex_unlock(&ctrl->lock);
 882 }
 883 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error);
 884
 885 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
 886                 const char *subsysnqn)
 887 {
 888         struct nvmet_subsys_link *p;
 889
 890         if (!port)
 891                 return NULL;
 892
 893         if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn,
 894                         NVMF_NQN_SIZE)) {
 895                 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
 896                         return NULL;
 897                 return nvmet_disc_subsys;
 898         }
 899
 900         down_read(&nvmet_config_sem);
 901         list_for_each_entry(p, &port->subsystems, entry) {
 902                 if (!strncmp(p->subsys->subsysnqn, subsysnqn,
 903                                 NVMF_NQN_SIZE)) {
 904                         if (!kref_get_unless_zero(&p->subsys->ref))
 905                                 break;
 906                         up_read(&nvmet_config_sem);
 907                         return p->subsys;
 908                 }
 909         }
 910         up_read(&nvmet_config_sem);
 911         return NULL;
 912 }
 913
 914 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 915                 enum nvme_subsys_type type)
 916 {
 917         struct nvmet_subsys *subsys;
 918
 919         subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
 920         if (!subsys)
 921                 return NULL;
 922
 923         subsys->ver = NVME_VS(1, 2, 1); /* NVMe 1.2.1 */
 924
 925         switch (type) {
 926         case NVME_NQN_NVME:
 927                 subsys->max_qid = NVMET_NR_QUEUES;
 928                 break;
 929         case NVME_NQN_DISC:
 930                 subsys->max_qid = 0;
 931                 break;
 932         default:
 933                 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
 934                 kfree(subsys);
 935                 return NULL;
 936         }
 937         subsys->type = type;
 938         subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
 939                         GFP_KERNEL);
 940         if (!subsys->subsysnqn) {
 941                 kfree(subsys);
 942                 return NULL;
 943         }
 944
 945         kref_init(&subsys->ref);
 946
 947         mutex_init(&subsys->lock);
 948         INIT_LIST_HEAD(&subsys->namespaces);
 949         INIT_LIST_HEAD(&subsys->ctrls);
 950         INIT_LIST_HEAD(&subsys->hosts);
 951
 952         return subsys;
 953 }
 954
 955 static void nvmet_subsys_free(struct kref *ref)
 956 {
 957         struct nvmet_subsys *subsys =
 958                 container_of(ref, struct nvmet_subsys, ref);
 959
 960         WARN_ON_ONCE(!list_empty(&subsys->namespaces));
 961
 962         kfree(subsys->subsysnqn);
 963         kfree(subsys);
 964 }
 965
 966 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys)
 967 {
 968         struct nvmet_ctrl *ctrl;
 969
 970         mutex_lock(&subsys->lock);
 971         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
 972                 ctrl->ops->delete_ctrl(ctrl);
 973         mutex_unlock(&subsys->lock);
 974 }
 975
 976 void nvmet_subsys_put(struct nvmet_subsys *subsys)
 977 {
 978         kref_put(&subsys->ref, nvmet_subsys_free);
 979 }
 980
 981 static int __init nvmet_init(void)
 982 {
 983         int error;
 984
 985         error = nvmet_init_discovery();
 986         if (error)
 987                 goto out;
 988
 989         error = nvmet_init_configfs();
 990         if (error)
 991                 goto out_exit_discovery;
 992         return 0;
 993
 994 out_exit_discovery:
 995         nvmet_exit_discovery();
 996 out:
 997         return error;
 998 }
 999
1000 static void __exit nvmet_exit(void)
1001 {
1002         nvmet_exit_configfs();
1003         nvmet_exit_discovery();
1004         ida_destroy(&cntlid_ida);
1005
1006         BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
1007         BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
1008 }
1009
1010 module_init(nvmet_init);
1011 module_exit(nvmet_exit);
1012
1013 MODULE_LICENSE("GPL v2");