2 * Intel MIC Platform Software Stack (MPSS)
4 * Copyright(c) 2013 Intel Corporation.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
18 * Intel MIC Host driver.
21 #include <linux/pci.h>
22 #include <linux/sched.h>
23 #include <linux/uaccess.h>
25 #include <linux/mic_common.h>
26 #include "../common/mic_dev.h"
27 #include "mic_device.h"
29 #include "mic_virtio.h"
32 * Initiates the copies across the PCIe bus from card memory to
33 * a user space buffer.
35 static int mic_virtio_copy_to_user(struct mic_vdev *mvdev,
36 void __user *ubuf, size_t len, u64 addr)
39 void __iomem *dbuf = mvdev->mdev->aper.va + addr;
41 * We are copying from IO below an should ideally use something
42 * like copy_to_user_fromio(..) if it existed.
44 if (copy_to_user(ubuf, (void __force *)dbuf, len)) {
46 dev_err(mic_dev(mvdev), "%s %d err %d\n",
47 __func__, __LINE__, err);
50 mvdev->in_bytes += len;
57 * Initiates copies across the PCIe bus from a user space
58 * buffer to card memory.
60 static int mic_virtio_copy_from_user(struct mic_vdev *mvdev,
61 void __user *ubuf, size_t len, u64 addr)
64 void __iomem *dbuf = mvdev->mdev->aper.va + addr;
66 * We are copying to IO below and should ideally use something
67 * like copy_from_user_toio(..) if it existed.
69 if (copy_from_user((void __force *)dbuf, ubuf, len)) {
71 dev_err(mic_dev(mvdev), "%s %d err %d\n",
72 __func__, __LINE__, err);
75 mvdev->out_bytes += len;
81 #define MIC_VRINGH_READ true
83 /* The function to call to notify the card about added buffers */
84 static void mic_notify(struct vringh *vrh)
86 struct mic_vringh *mvrh = container_of(vrh, struct mic_vringh, vrh);
87 struct mic_vdev *mvdev = mvrh->mvdev;
88 s8 db = mvdev->dc->h2c_vdev_db;
91 mvdev->mdev->ops->send_intr(mvdev->mdev, db);
94 /* Determine the total number of bytes consumed in a VRINGH KIOV */
95 static inline u32 mic_vringh_iov_consumed(struct vringh_kiov *iov)
98 u32 total = iov->consumed;
100 for (i = 0; i < iov->i; i++)
101 total += iov->iov[i].iov_len;
106 * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
107 * This API is heavily based on the vringh_iov_xfer(..) implementation
108 * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
109 * and vringh_iov_push_kern(..) directly is because there is no
110 * way to override the VRINGH xfer(..) routines as of v3.10.
112 static int mic_vringh_copy(struct mic_vdev *mvdev, struct vringh_kiov *iov,
113 void __user *ubuf, size_t len, bool read, size_t *out_len)
116 size_t partlen, tot_len = 0;
118 while (len && iov->i < iov->used) {
119 partlen = min(iov->iov[iov->i].iov_len, len);
121 ret = mic_virtio_copy_to_user(mvdev,
123 (u64)iov->iov[iov->i].iov_base);
125 ret = mic_virtio_copy_from_user(mvdev,
127 (u64)iov->iov[iov->i].iov_base);
129 dev_err(mic_dev(mvdev), "%s %d err %d\n",
130 __func__, __LINE__, ret);
136 iov->consumed += partlen;
137 iov->iov[iov->i].iov_len -= partlen;
138 iov->iov[iov->i].iov_base += partlen;
139 if (!iov->iov[iov->i].iov_len) {
140 /* Fix up old iov element then increment. */
141 iov->iov[iov->i].iov_len = iov->consumed;
142 iov->iov[iov->i].iov_base -= iov->consumed;
153 * Use the standard VRINGH infrastructure in the kernel to fetch new
154 * descriptors, initiate the copies and update the used ring.
156 static int _mic_virtio_copy(struct mic_vdev *mvdev,
157 struct mic_copy_desc *copy)
160 u32 iovcnt = copy->iovcnt;
162 struct iovec __user *u_iov = copy->iov;
163 void __user *ubuf = NULL;
164 struct mic_vringh *mvr = &mvdev->mvr[copy->vr_idx];
165 struct vringh_kiov *riov = &mvr->riov;
166 struct vringh_kiov *wiov = &mvr->wiov;
167 struct vringh *vrh = &mvr->vrh;
168 u16 *head = &mvr->head;
169 struct mic_vring *vr = &mvr->vring;
170 size_t len = 0, out_len;
173 /* Fetch a new IOVEC if all previous elements have been processed */
174 if (riov->i == riov->used && wiov->i == wiov->used) {
175 ret = vringh_getdesc_kern(vrh, riov, wiov,
177 /* Check if there are available descriptors */
183 /* Copy over a new iovec from user space. */
184 ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
187 dev_err(mic_dev(mvdev), "%s %d err %d\n",
188 __func__, __LINE__, ret);
194 /* Issue all the read descriptors first */
195 ret = mic_vringh_copy(mvdev, riov, ubuf, len,
196 MIC_VRINGH_READ, &out_len);
198 dev_err(mic_dev(mvdev), "%s %d err %d\n",
199 __func__, __LINE__, ret);
204 copy->out_len += out_len;
205 /* Issue the write descriptors next */
206 ret = mic_vringh_copy(mvdev, wiov, ubuf, len,
207 !MIC_VRINGH_READ, &out_len);
209 dev_err(mic_dev(mvdev), "%s %d err %d\n",
210 __func__, __LINE__, ret);
215 copy->out_len += out_len;
217 /* One user space iovec is now completed */
221 /* Exit loop if all elements in KIOVs have been processed. */
222 if (riov->i == riov->used && wiov->i == wiov->used)
226 * Update the used ring if a descriptor was available and some data was
227 * copied in/out and the user asked for a used ring update.
229 if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
232 /* Determine the total data consumed */
233 total += mic_vringh_iov_consumed(riov);
234 total += mic_vringh_iov_consumed(wiov);
235 vringh_complete_kern(vrh, *head, total);
237 if (vringh_need_notify_kern(vrh) > 0)
239 vringh_kiov_cleanup(riov);
240 vringh_kiov_cleanup(wiov);
241 /* Update avail idx for user space */
242 vr->info->avail_idx = vrh->last_avail_idx;
247 static inline int mic_verify_copy_args(struct mic_vdev *mvdev,
248 struct mic_copy_desc *copy)
250 if (copy->vr_idx >= mvdev->dd->num_vq) {
251 dev_err(mic_dev(mvdev), "%s %d err %d\n",
252 __func__, __LINE__, -EINVAL);
258 /* Copy a specified number of virtio descriptors in a chain */
259 int mic_virtio_copy_desc(struct mic_vdev *mvdev,
260 struct mic_copy_desc *copy)
263 struct mic_vringh *mvr = &mvdev->mvr[copy->vr_idx];
265 err = mic_verify_copy_args(mvdev, copy);
269 mutex_lock(&mvr->vr_mutex);
270 if (!mic_vdevup(mvdev)) {
272 dev_err(mic_dev(mvdev), "%s %d err %d\n",
273 __func__, __LINE__, err);
276 err = _mic_virtio_copy(mvdev, copy);
278 dev_err(mic_dev(mvdev), "%s %d err %d\n",
279 __func__, __LINE__, err);
282 mutex_unlock(&mvr->vr_mutex);
286 static void mic_virtio_init_post(struct mic_vdev *mvdev)
288 struct mic_vqconfig *vqconfig = mic_vq_config(mvdev->dd);
291 for (i = 0; i < mvdev->dd->num_vq; i++) {
292 if (!le64_to_cpu(vqconfig[i].used_address)) {
293 dev_warn(mic_dev(mvdev), "used_address zero??\n");
296 mvdev->mvr[i].vrh.vring.used =
297 (void __force *)mvdev->mdev->aper.va +
298 le64_to_cpu(vqconfig[i].used_address);
301 mvdev->dc->used_address_updated = 0;
303 dev_dbg(mic_dev(mvdev), "%s: device type %d LINKUP\n",
304 __func__, mvdev->virtio_id);
307 static inline void mic_virtio_device_reset(struct mic_vdev *mvdev)
311 dev_dbg(mic_dev(mvdev), "%s: status %d device type %d RESET\n",
312 __func__, mvdev->dd->status, mvdev->virtio_id);
314 for (i = 0; i < mvdev->dd->num_vq; i++)
316 * Avoid lockdep false positive. The + 1 is for the mic
317 * mutex which is held in the reset devices code path.
319 mutex_lock_nested(&mvdev->mvr[i].vr_mutex, i + 1);
321 /* 0 status means "reset" */
322 mvdev->dd->status = 0;
323 mvdev->dc->vdev_reset = 0;
324 mvdev->dc->host_ack = 1;
326 for (i = 0; i < mvdev->dd->num_vq; i++) {
327 struct vringh *vrh = &mvdev->mvr[i].vrh;
328 mvdev->mvr[i].vring.info->avail_idx = 0;
330 vrh->last_avail_idx = 0;
331 vrh->last_used_idx = 0;
334 for (i = 0; i < mvdev->dd->num_vq; i++)
335 mutex_unlock(&mvdev->mvr[i].vr_mutex);
338 void mic_virtio_reset_devices(struct mic_device *mdev)
340 struct list_head *pos, *tmp;
341 struct mic_vdev *mvdev;
343 dev_dbg(mdev->sdev->parent, "%s\n", __func__);
345 list_for_each_safe(pos, tmp, &mdev->vdev_list) {
346 mvdev = list_entry(pos, struct mic_vdev, list);
347 mic_virtio_device_reset(mvdev);
348 mvdev->poll_wake = 1;
349 wake_up(&mvdev->waitq);
353 void mic_bh_handler(struct work_struct *work)
355 struct mic_vdev *mvdev = container_of(work, struct mic_vdev,
358 if (mvdev->dc->used_address_updated)
359 mic_virtio_init_post(mvdev);
361 if (mvdev->dc->vdev_reset)
362 mic_virtio_device_reset(mvdev);
364 mvdev->poll_wake = 1;
365 wake_up(&mvdev->waitq);
368 static irqreturn_t mic_virtio_intr_handler(int irq, void *data)
370 struct mic_vdev *mvdev = data;
371 struct mic_device *mdev = mvdev->mdev;
373 mdev->ops->intr_workarounds(mdev);
374 schedule_work(&mvdev->virtio_bh_work);
378 int mic_virtio_config_change(struct mic_vdev *mvdev,
381 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
382 int ret = 0, retry, i;
383 struct mic_bootparam *bootparam = mvdev->mdev->dp;
384 s8 db = bootparam->h2c_config_db;
386 mutex_lock(&mvdev->mdev->mic_mutex);
387 for (i = 0; i < mvdev->dd->num_vq; i++)
388 mutex_lock_nested(&mvdev->mvr[i].vr_mutex, i + 1);
390 if (db == -1 || mvdev->dd->type == -1) {
395 if (copy_from_user(mic_vq_configspace(mvdev->dd),
396 argp, mvdev->dd->config_len)) {
397 dev_err(mic_dev(mvdev), "%s %d err %d\n",
398 __func__, __LINE__, -EFAULT);
402 mvdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
403 mvdev->mdev->ops->send_intr(mvdev->mdev, db);
405 for (retry = 100; retry--;) {
406 ret = wait_event_timeout(wake,
407 mvdev->dc->guest_ack, msecs_to_jiffies(100));
412 dev_dbg(mic_dev(mvdev),
413 "%s %d retry: %d\n", __func__, __LINE__, retry);
414 mvdev->dc->config_change = 0;
415 mvdev->dc->guest_ack = 0;
417 for (i = 0; i < mvdev->dd->num_vq; i++)
418 mutex_unlock(&mvdev->mvr[i].vr_mutex);
419 mutex_unlock(&mvdev->mdev->mic_mutex);
423 static int mic_copy_dp_entry(struct mic_vdev *mvdev,
426 struct mic_device_desc **devpage)
428 struct mic_device *mdev = mvdev->mdev;
429 struct mic_device_desc dd, *dd_config, *devp;
430 struct mic_vqconfig *vqconfig;
432 bool slot_found = false;
434 if (copy_from_user(&dd, argp, sizeof(dd))) {
435 dev_err(mic_dev(mvdev), "%s %d err %d\n",
436 __func__, __LINE__, -EFAULT);
440 if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
441 dd.num_vq > MIC_MAX_VRINGS) {
442 dev_err(mic_dev(mvdev), "%s %d err %d\n",
443 __func__, __LINE__, -EINVAL);
447 dd_config = kmalloc(mic_desc_size(&dd), GFP_KERNEL);
448 if (dd_config == NULL) {
449 dev_err(mic_dev(mvdev), "%s %d err %d\n",
450 __func__, __LINE__, -ENOMEM);
453 if (copy_from_user(dd_config, argp, mic_desc_size(&dd))) {
455 dev_err(mic_dev(mvdev), "%s %d err %d\n",
456 __func__, __LINE__, ret);
460 vqconfig = mic_vq_config(dd_config);
461 for (i = 0; i < dd.num_vq; i++) {
462 if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
464 dev_err(mic_dev(mvdev), "%s %d err %d\n",
465 __func__, __LINE__, ret);
470 /* Find the first free device page entry */
471 for (i = sizeof(struct mic_bootparam);
472 i < MIC_DP_SIZE - mic_total_desc_size(dd_config);
473 i += mic_total_desc_size(devp)) {
475 if (devp->type == 0 || devp->type == -1) {
482 dev_err(mic_dev(mvdev), "%s %d err %d\n",
483 __func__, __LINE__, ret);
487 * Save off the type before doing the memcpy. Type will be set in the
488 * end after completing all initialization for the new device.
490 *type = dd_config->type;
492 memcpy(devp, dd_config, mic_desc_size(dd_config));
500 static void mic_init_device_ctrl(struct mic_vdev *mvdev,
501 struct mic_device_desc *devpage)
503 struct mic_device_ctrl *dc;
505 dc = (void *)devpage + mic_aligned_desc_size(devpage);
507 dc->config_change = 0;
511 dc->used_address_updated = 0;
512 dc->c2h_vdev_db = -1;
513 dc->h2c_vdev_db = -1;
517 int mic_virtio_add_device(struct mic_vdev *mvdev,
520 struct mic_device *mdev = mvdev->mdev;
521 struct mic_device_desc *dd = NULL;
522 struct mic_vqconfig *vqconfig;
523 int vr_size, i, j, ret;
527 struct mic_bootparam *bootparam = mdev->dp;
531 mutex_lock(&mdev->mic_mutex);
533 ret = mic_copy_dp_entry(mvdev, argp, &type, &dd);
535 mutex_unlock(&mdev->mic_mutex);
539 mic_init_device_ctrl(mvdev, dd);
542 mvdev->virtio_id = type;
543 vqconfig = mic_vq_config(dd);
544 INIT_WORK(&mvdev->virtio_bh_work, mic_bh_handler);
546 for (i = 0; i < dd->num_vq; i++) {
547 struct mic_vringh *mvr = &mvdev->mvr[i];
548 struct mic_vring *vr = &mvdev->mvr[i].vring;
549 num = le16_to_cpu(vqconfig[i].num);
550 mutex_init(&mvr->vr_mutex);
551 vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) +
552 sizeof(struct _mic_vring_info));
554 __get_free_pages(GFP_KERNEL | __GFP_ZERO,
558 dev_err(mic_dev(mvdev), "%s %d err %d\n",
559 __func__, __LINE__, ret);
563 vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN);
564 vr->info->magic = cpu_to_le32(MIC_MAGIC + mvdev->virtio_id + i);
565 vr_addr = mic_map_single(mdev, vr->va, vr_size);
566 if (mic_map_error(vr_addr)) {
567 free_pages((unsigned long)vr->va, get_order(vr_size));
569 dev_err(mic_dev(mvdev), "%s %d err %d\n",
570 __func__, __LINE__, ret);
573 vqconfig[i].address = cpu_to_le64(vr_addr);
575 vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
576 ret = vringh_init_kern(&mvr->vrh,
577 *(u32 *)mic_vq_features(mvdev->dd), num, false,
578 vr->vr.desc, vr->vr.avail, vr->vr.used);
580 dev_err(mic_dev(mvdev), "%s %d err %d\n",
581 __func__, __LINE__, ret);
584 vringh_kiov_init(&mvr->riov, NULL, 0);
585 vringh_kiov_init(&mvr->wiov, NULL, 0);
586 mvr->head = USHRT_MAX;
588 mvr->vrh.notify = mic_notify;
589 dev_dbg(mdev->sdev->parent,
590 "%s %d index %d va %p info %p vr_size 0x%x\n",
591 __func__, __LINE__, i, vr->va, vr->info, vr_size);
594 snprintf(irqname, sizeof(irqname), "mic%dvirtio%d", mdev->id,
596 mvdev->virtio_db = mic_next_db(mdev);
597 mvdev->virtio_cookie = mic_request_threaded_irq(mdev,
598 mic_virtio_intr_handler,
599 NULL, irqname, mvdev,
600 mvdev->virtio_db, MIC_INTR_DB);
601 if (IS_ERR(mvdev->virtio_cookie)) {
602 ret = PTR_ERR(mvdev->virtio_cookie);
603 dev_dbg(mdev->sdev->parent, "request irq failed\n");
607 mvdev->dc->c2h_vdev_db = mvdev->virtio_db;
609 list_add_tail(&mvdev->list, &mdev->vdev_list);
611 * Order the type update with previous stores. This write barrier
612 * is paired with the corresponding read barrier before the uncached
613 * system memory read of the type, on the card while scanning the
619 dev_dbg(mdev->sdev->parent, "Added virtio device id %d\n", dd->type);
621 db = bootparam->h2c_config_db;
623 mdev->ops->send_intr(mdev, db);
624 mutex_unlock(&mdev->mic_mutex);
627 vqconfig = mic_vq_config(dd);
628 for (j = 0; j < i; j++) {
629 struct mic_vringh *mvr = &mvdev->mvr[j];
630 mic_unmap_single(mdev, le64_to_cpu(vqconfig[j].address),
632 free_pages((unsigned long)mvr->vring.va,
633 get_order(mvr->vring.len));
635 mutex_unlock(&mdev->mic_mutex);
639 void mic_virtio_del_device(struct mic_vdev *mvdev)
641 struct list_head *pos, *tmp;
642 struct mic_vdev *tmp_mvdev;
643 struct mic_device *mdev = mvdev->mdev;
644 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
646 struct mic_vqconfig *vqconfig;
647 struct mic_bootparam *bootparam = mdev->dp;
650 mutex_lock(&mdev->mic_mutex);
651 db = bootparam->h2c_config_db;
653 goto skip_hot_remove;
654 dev_dbg(mdev->sdev->parent,
655 "Requesting hot remove id %d\n", mvdev->virtio_id);
656 mvdev->dc->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
657 mdev->ops->send_intr(mdev, db);
658 for (retry = 100; retry--;) {
659 ret = wait_event_timeout(wake,
660 mvdev->dc->guest_ack, msecs_to_jiffies(100));
664 dev_dbg(mdev->sdev->parent,
665 "Device id %d config_change %d guest_ack %d retry %d\n",
666 mvdev->virtio_id, mvdev->dc->config_change,
667 mvdev->dc->guest_ack, retry);
668 mvdev->dc->config_change = 0;
669 mvdev->dc->guest_ack = 0;
671 mic_free_irq(mdev, mvdev->virtio_cookie, mvdev);
672 flush_work(&mvdev->virtio_bh_work);
673 vqconfig = mic_vq_config(mvdev->dd);
674 for (i = 0; i < mvdev->dd->num_vq; i++) {
675 struct mic_vringh *mvr = &mvdev->mvr[i];
676 vringh_kiov_cleanup(&mvr->riov);
677 vringh_kiov_cleanup(&mvr->wiov);
678 mic_unmap_single(mdev, le64_to_cpu(vqconfig[i].address),
680 free_pages((unsigned long)mvr->vring.va,
681 get_order(mvr->vring.len));
684 list_for_each_safe(pos, tmp, &mdev->vdev_list) {
685 tmp_mvdev = list_entry(pos, struct mic_vdev, list);
686 if (tmp_mvdev == mvdev) {
688 dev_dbg(mdev->sdev->parent,
689 "Removing virtio device id %d\n",
695 * Order the type update with previous stores. This write barrier
696 * is paired with the corresponding read barrier before the uncached
697 * system memory read of the type, on the card while scanning the
701 mvdev->dd->type = -1;
702 mutex_unlock(&mdev->mic_mutex);