]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/vfio/vfio_iommu_spapr_tce.c
vfio: powerpc/spapr: Check that IOMMU page is fully contained by system page
[karo-tx-linux.git] / drivers / vfio / vfio_iommu_spapr_tce.c
1 /*
2  * VFIO: IOMMU DMA mapping support for TCE on POWER
3  *
4  * Copyright (C) 2013 IBM Corp.  All rights reserved.
5  *     Author: Alexey Kardashevskiy <aik@ozlabs.ru>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * Derived from original vfio_iommu_type1.c:
12  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
13  *     Author: Alex Williamson <alex.williamson@redhat.com>
14  */
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/slab.h>
19 #include <linux/uaccess.h>
20 #include <linux/err.h>
21 #include <linux/vfio.h>
22 #include <asm/iommu.h>
23 #include <asm/tce.h>
24
25 #define DRIVER_VERSION  "0.1"
26 #define DRIVER_AUTHOR   "aik@ozlabs.ru"
27 #define DRIVER_DESC     "VFIO IOMMU SPAPR TCE"
28
29 static void tce_iommu_detach_group(void *iommu_data,
30                 struct iommu_group *iommu_group);
31
32 /*
33  * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
34  *
35  * This code handles mapping and unmapping of user data buffers
36  * into DMA'ble space using the IOMMU
37  */
38
39 /*
40  * The container descriptor supports only a single group per container.
41  * Required by the API as the container is not supplied with the IOMMU group
42  * at the moment of initialization.
43  */
44 struct tce_container {
45         struct mutex lock;
46         struct iommu_table *tbl;
47         bool enabled;
48 };
49
50 static bool tce_page_is_contained(struct page *page, unsigned page_shift)
51 {
52         /*
53          * Check that the TCE table granularity is not bigger than the size of
54          * a page we just found. Otherwise the hardware can get access to
55          * a bigger memory chunk that it should.
56          */
57         return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
58 }
59
60 static int tce_iommu_enable(struct tce_container *container)
61 {
62         int ret = 0;
63         unsigned long locked, lock_limit, npages;
64         struct iommu_table *tbl = container->tbl;
65
66         if (!container->tbl)
67                 return -ENXIO;
68
69         if (!current->mm)
70                 return -ESRCH; /* process exited */
71
72         if (container->enabled)
73                 return -EBUSY;
74
75         /*
76          * When userspace pages are mapped into the IOMMU, they are effectively
77          * locked memory, so, theoretically, we need to update the accounting
78          * of locked pages on each map and unmap.  For powerpc, the map unmap
79          * paths can be very hot, though, and the accounting would kill
80          * performance, especially since it would be difficult to impossible
81          * to handle the accounting in real mode only.
82          *
83          * To address that, rather than precisely accounting every page, we
84          * instead account for a worst case on locked memory when the iommu is
85          * enabled and disabled.  The worst case upper bound on locked memory
86          * is the size of the whole iommu window, which is usually relatively
87          * small (compared to total memory sizes) on POWER hardware.
88          *
89          * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
90          * that would effectively kill the guest at random points, much better
91          * enforcing the limit based on the max that the guest can map.
92          */
93         down_write(&current->mm->mmap_sem);
94         npages = (tbl->it_size << IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT;
95         locked = current->mm->locked_vm + npages;
96         lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
97         if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
98                 pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n",
99                                 rlimit(RLIMIT_MEMLOCK));
100                 ret = -ENOMEM;
101         } else {
102
103                 current->mm->locked_vm += npages;
104                 container->enabled = true;
105         }
106         up_write(&current->mm->mmap_sem);
107
108         return ret;
109 }
110
111 static void tce_iommu_disable(struct tce_container *container)
112 {
113         if (!container->enabled)
114                 return;
115
116         container->enabled = false;
117
118         if (!container->tbl || !current->mm)
119                 return;
120
121         down_write(&current->mm->mmap_sem);
122         current->mm->locked_vm -= (container->tbl->it_size <<
123                         IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT;
124         up_write(&current->mm->mmap_sem);
125 }
126
127 static void *tce_iommu_open(unsigned long arg)
128 {
129         struct tce_container *container;
130
131         if (arg != VFIO_SPAPR_TCE_IOMMU) {
132                 pr_err("tce_vfio: Wrong IOMMU type\n");
133                 return ERR_PTR(-EINVAL);
134         }
135
136         container = kzalloc(sizeof(*container), GFP_KERNEL);
137         if (!container)
138                 return ERR_PTR(-ENOMEM);
139
140         mutex_init(&container->lock);
141
142         return container;
143 }
144
145 static void tce_iommu_release(void *iommu_data)
146 {
147         struct tce_container *container = iommu_data;
148
149         WARN_ON(container->tbl && !container->tbl->it_group);
150         tce_iommu_disable(container);
151
152         if (container->tbl && container->tbl->it_group)
153                 tce_iommu_detach_group(iommu_data, container->tbl->it_group);
154
155         mutex_destroy(&container->lock);
156
157         kfree(container);
158 }
159
160 static int tce_iommu_clear(struct tce_container *container,
161                 struct iommu_table *tbl,
162                 unsigned long entry, unsigned long pages)
163 {
164         unsigned long oldtce;
165         struct page *page;
166
167         for ( ; pages; --pages, ++entry) {
168                 oldtce = iommu_clear_tce(tbl, entry);
169                 if (!oldtce)
170                         continue;
171
172                 page = pfn_to_page(oldtce >> PAGE_SHIFT);
173                 WARN_ON(!page);
174                 if (page) {
175                         if (oldtce & TCE_PCI_WRITE)
176                                 SetPageDirty(page);
177                         put_page(page);
178                 }
179         }
180
181         return 0;
182 }
183
184 static long tce_iommu_build(struct tce_container *container,
185                 struct iommu_table *tbl,
186                 unsigned long entry, unsigned long tce, unsigned long pages)
187 {
188         long i, ret = 0;
189         struct page *page = NULL;
190         unsigned long hva;
191         enum dma_data_direction direction = iommu_tce_direction(tce);
192
193         for (i = 0; i < pages; ++i) {
194                 unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
195
196                 ret = get_user_pages_fast(tce & PAGE_MASK, 1,
197                                 direction != DMA_TO_DEVICE, &page);
198                 if (unlikely(ret != 1)) {
199                         ret = -EFAULT;
200                         break;
201                 }
202
203                 if (!tce_page_is_contained(page, tbl->it_page_shift)) {
204                         ret = -EPERM;
205                         break;
206                 }
207
208                 hva = (unsigned long) page_address(page) + offset;
209
210                 ret = iommu_tce_build(tbl, entry + i, hva, direction);
211                 if (ret) {
212                         put_page(page);
213                         pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
214                                         __func__, entry << tbl->it_page_shift,
215                                         tce, ret);
216                         break;
217                 }
218                 tce += IOMMU_PAGE_SIZE_4K;
219         }
220
221         if (ret)
222                 tce_iommu_clear(container, tbl, entry, i);
223
224         return ret;
225 }
226
227 static long tce_iommu_ioctl(void *iommu_data,
228                                  unsigned int cmd, unsigned long arg)
229 {
230         struct tce_container *container = iommu_data;
231         unsigned long minsz;
232         long ret;
233
234         switch (cmd) {
235         case VFIO_CHECK_EXTENSION:
236                 switch (arg) {
237                 case VFIO_SPAPR_TCE_IOMMU:
238                         ret = 1;
239                         break;
240                 default:
241                         ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
242                         break;
243                 }
244
245                 return (ret < 0) ? 0 : ret;
246
247         case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
248                 struct vfio_iommu_spapr_tce_info info;
249                 struct iommu_table *tbl = container->tbl;
250
251                 if (WARN_ON(!tbl))
252                         return -ENXIO;
253
254                 minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
255                                 dma32_window_size);
256
257                 if (copy_from_user(&info, (void __user *)arg, minsz))
258                         return -EFAULT;
259
260                 if (info.argsz < minsz)
261                         return -EINVAL;
262
263                 info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT_4K;
264                 info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT_4K;
265                 info.flags = 0;
266
267                 if (copy_to_user((void __user *)arg, &info, minsz))
268                         return -EFAULT;
269
270                 return 0;
271         }
272         case VFIO_IOMMU_MAP_DMA: {
273                 struct vfio_iommu_type1_dma_map param;
274                 struct iommu_table *tbl = container->tbl;
275                 unsigned long tce;
276
277                 if (!tbl)
278                         return -ENXIO;
279
280                 BUG_ON(!tbl->it_group);
281
282                 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
283
284                 if (copy_from_user(&param, (void __user *)arg, minsz))
285                         return -EFAULT;
286
287                 if (param.argsz < minsz)
288                         return -EINVAL;
289
290                 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ |
291                                 VFIO_DMA_MAP_FLAG_WRITE))
292                         return -EINVAL;
293
294                 if ((param.size & ~IOMMU_PAGE_MASK_4K) ||
295                                 (param.vaddr & ~IOMMU_PAGE_MASK_4K))
296                         return -EINVAL;
297
298                 /* iova is checked by the IOMMU API */
299                 tce = param.vaddr;
300                 if (param.flags & VFIO_DMA_MAP_FLAG_READ)
301                         tce |= TCE_PCI_READ;
302                 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
303                         tce |= TCE_PCI_WRITE;
304
305                 ret = iommu_tce_put_param_check(tbl, param.iova, tce);
306                 if (ret)
307                         return ret;
308
309                 ret = tce_iommu_build(container, tbl,
310                                 param.iova >> IOMMU_PAGE_SHIFT_4K,
311                                 tce, param.size >> IOMMU_PAGE_SHIFT_4K);
312
313                 iommu_flush_tce(tbl);
314
315                 return ret;
316         }
317         case VFIO_IOMMU_UNMAP_DMA: {
318                 struct vfio_iommu_type1_dma_unmap param;
319                 struct iommu_table *tbl = container->tbl;
320
321                 if (WARN_ON(!tbl))
322                         return -ENXIO;
323
324                 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
325                                 size);
326
327                 if (copy_from_user(&param, (void __user *)arg, minsz))
328                         return -EFAULT;
329
330                 if (param.argsz < minsz)
331                         return -EINVAL;
332
333                 /* No flag is supported now */
334                 if (param.flags)
335                         return -EINVAL;
336
337                 if (param.size & ~IOMMU_PAGE_MASK_4K)
338                         return -EINVAL;
339
340                 ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
341                                 param.size >> IOMMU_PAGE_SHIFT_4K);
342                 if (ret)
343                         return ret;
344
345                 ret = tce_iommu_clear(container, tbl,
346                                 param.iova >> IOMMU_PAGE_SHIFT_4K,
347                                 param.size >> IOMMU_PAGE_SHIFT_4K);
348                 iommu_flush_tce(tbl);
349
350                 return ret;
351         }
352         case VFIO_IOMMU_ENABLE:
353                 mutex_lock(&container->lock);
354                 ret = tce_iommu_enable(container);
355                 mutex_unlock(&container->lock);
356                 return ret;
357
358
359         case VFIO_IOMMU_DISABLE:
360                 mutex_lock(&container->lock);
361                 tce_iommu_disable(container);
362                 mutex_unlock(&container->lock);
363                 return 0;
364         case VFIO_EEH_PE_OP:
365                 if (!container->tbl || !container->tbl->it_group)
366                         return -ENODEV;
367
368                 return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group,
369                                                   cmd, arg);
370         }
371
372         return -ENOTTY;
373 }
374
375 static int tce_iommu_attach_group(void *iommu_data,
376                 struct iommu_group *iommu_group)
377 {
378         int ret;
379         struct tce_container *container = iommu_data;
380         struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
381
382         BUG_ON(!tbl);
383         mutex_lock(&container->lock);
384
385         /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
386                         iommu_group_id(iommu_group), iommu_group); */
387         if (container->tbl) {
388                 pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n",
389                                 iommu_group_id(container->tbl->it_group),
390                                 iommu_group_id(iommu_group));
391                 ret = -EBUSY;
392         } else if (container->enabled) {
393                 pr_err("tce_vfio: attaching group #%u to enabled container\n",
394                                 iommu_group_id(iommu_group));
395                 ret = -EBUSY;
396         } else {
397                 ret = iommu_take_ownership(tbl);
398                 if (!ret)
399                         container->tbl = tbl;
400         }
401
402         mutex_unlock(&container->lock);
403
404         return ret;
405 }
406
407 static void tce_iommu_detach_group(void *iommu_data,
408                 struct iommu_group *iommu_group)
409 {
410         struct tce_container *container = iommu_data;
411         struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
412
413         BUG_ON(!tbl);
414         mutex_lock(&container->lock);
415         if (tbl != container->tbl) {
416                 pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n",
417                                 iommu_group_id(iommu_group),
418                                 iommu_group_id(tbl->it_group));
419         } else {
420                 if (container->enabled) {
421                         pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n",
422                                         iommu_group_id(tbl->it_group));
423                         tce_iommu_disable(container);
424                 }
425
426                 /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n",
427                                 iommu_group_id(iommu_group), iommu_group); */
428                 container->tbl = NULL;
429                 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
430                 iommu_release_ownership(tbl);
431         }
432         mutex_unlock(&container->lock);
433 }
434
435 const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
436         .name           = "iommu-vfio-powerpc",
437         .owner          = THIS_MODULE,
438         .open           = tce_iommu_open,
439         .release        = tce_iommu_release,
440         .ioctl          = tce_iommu_ioctl,
441         .attach_group   = tce_iommu_attach_group,
442         .detach_group   = tce_iommu_detach_group,
443 };
444
445 static int __init tce_iommu_init(void)
446 {
447         return vfio_register_iommu_driver(&tce_iommu_driver_ops);
448 }
449
450 static void __exit tce_iommu_cleanup(void)
451 {
452         vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
453 }
454
455 module_init(tce_iommu_init);
456 module_exit(tce_iommu_cleanup);
457
458 MODULE_VERSION(DRIVER_VERSION);
459 MODULE_LICENSE("GPL v2");
460 MODULE_AUTHOR(DRIVER_AUTHOR);
461 MODULE_DESCRIPTION(DRIVER_DESC);
462