]> git.karo-electronics.de Git - linux-beck.git/blob - drivers/iommu/intel-iommu.c
iommu/vt-d: Clean up and fix page table clear/free behaviour
[linux-beck.git] / drivers / iommu / intel-iommu.c
1 /*
2  * Copyright © 2006-2014 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * Authors: David Woodhouse <dwmw2@infradead.org>,
14  *          Ashok Raj <ashok.raj@intel.com>,
15  *          Shaohua Li <shaohua.li@intel.com>,
16  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17  *          Fenghua Yu <fenghua.yu@intel.com>
18  */
19
20 #include <linux/init.h>
21 #include <linux/bitmap.h>
22 #include <linux/debugfs.h>
23 #include <linux/export.h>
24 #include <linux/slab.h>
25 #include <linux/irq.h>
26 #include <linux/interrupt.h>
27 #include <linux/spinlock.h>
28 #include <linux/pci.h>
29 #include <linux/dmar.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/mempool.h>
32 #include <linux/memory.h>
33 #include <linux/timer.h>
34 #include <linux/iova.h>
35 #include <linux/iommu.h>
36 #include <linux/intel-iommu.h>
37 #include <linux/syscore_ops.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/pci-ats.h>
41 #include <linux/memblock.h>
42 #include <asm/irq_remapping.h>
43 #include <asm/cacheflush.h>
44 #include <asm/iommu.h>
45
46 #include "irq_remapping.h"
47 #include "pci.h"
48
49 #define ROOT_SIZE               VTD_PAGE_SIZE
50 #define CONTEXT_SIZE            VTD_PAGE_SIZE
51
52 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
54 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
55
56 #define IOAPIC_RANGE_START      (0xfee00000)
57 #define IOAPIC_RANGE_END        (0xfeefffff)
58 #define IOVA_START_ADDR         (0x1000)
59
60 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
62 #define MAX_AGAW_WIDTH 64
63 #define MAX_AGAW_PFN_WIDTH      (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
64
65 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
66 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
67
68 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
69    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
70 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
71                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
72 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
73
74 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
75 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
76 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
77
78 /* page table handling */
79 #define LEVEL_STRIDE            (9)
80 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
81
82 /*
83  * This bitmap is used to advertise the page sizes our hardware support
84  * to the IOMMU core, which will then use this information to split
85  * physically contiguous memory regions it is mapping into page sizes
86  * that we support.
87  *
88  * Traditionally the IOMMU core just handed us the mappings directly,
89  * after making sure the size is an order of a 4KiB page and that the
90  * mapping has natural alignment.
91  *
92  * To retain this behavior, we currently advertise that we support
93  * all page sizes that are an order of 4KiB.
94  *
95  * If at some point we'd like to utilize the IOMMU core's new behavior,
96  * we could change this to advertise the real page sizes we support.
97  */
98 #define INTEL_IOMMU_PGSIZES     (~0xFFFUL)
99
100 static inline int agaw_to_level(int agaw)
101 {
102         return agaw + 2;
103 }
104
105 static inline int agaw_to_width(int agaw)
106 {
107         return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
108 }
109
110 static inline int width_to_agaw(int width)
111 {
112         return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
113 }
114
115 static inline unsigned int level_to_offset_bits(int level)
116 {
117         return (level - 1) * LEVEL_STRIDE;
118 }
119
120 static inline int pfn_level_offset(unsigned long pfn, int level)
121 {
122         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
123 }
124
125 static inline unsigned long level_mask(int level)
126 {
127         return -1UL << level_to_offset_bits(level);
128 }
129
130 static inline unsigned long level_size(int level)
131 {
132         return 1UL << level_to_offset_bits(level);
133 }
134
135 static inline unsigned long align_to_level(unsigned long pfn, int level)
136 {
137         return (pfn + level_size(level) - 1) & level_mask(level);
138 }
139
140 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
141 {
142         return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
143 }
144
145 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
146    are never going to work. */
147 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
148 {
149         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
150 }
151
152 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
153 {
154         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
155 }
156 static inline unsigned long page_to_dma_pfn(struct page *pg)
157 {
158         return mm_to_dma_pfn(page_to_pfn(pg));
159 }
160 static inline unsigned long virt_to_dma_pfn(void *p)
161 {
162         return page_to_dma_pfn(virt_to_page(p));
163 }
164
165 /* global iommu list, set NULL for ignored DMAR units */
166 static struct intel_iommu **g_iommus;
167
168 static void __init check_tylersburg_isoch(void);
169 static int rwbf_quirk;
170
171 /*
172  * set to 1 to panic kernel if can't successfully enable VT-d
173  * (used when kernel is launched w/ TXT)
174  */
175 static int force_on = 0;
176
177 /*
178  * 0: Present
179  * 1-11: Reserved
180  * 12-63: Context Ptr (12 - (haw-1))
181  * 64-127: Reserved
182  */
183 struct root_entry {
184         u64     val;
185         u64     rsvd1;
186 };
187 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
188 static inline bool root_present(struct root_entry *root)
189 {
190         return (root->val & 1);
191 }
192 static inline void set_root_present(struct root_entry *root)
193 {
194         root->val |= 1;
195 }
196 static inline void set_root_value(struct root_entry *root, unsigned long value)
197 {
198         root->val |= value & VTD_PAGE_MASK;
199 }
200
201 static inline struct context_entry *
202 get_context_addr_from_root(struct root_entry *root)
203 {
204         return (struct context_entry *)
205                 (root_present(root)?phys_to_virt(
206                 root->val & VTD_PAGE_MASK) :
207                 NULL);
208 }
209
210 /*
211  * low 64 bits:
212  * 0: present
213  * 1: fault processing disable
214  * 2-3: translation type
215  * 12-63: address space root
216  * high 64 bits:
217  * 0-2: address width
218  * 3-6: aval
219  * 8-23: domain id
220  */
221 struct context_entry {
222         u64 lo;
223         u64 hi;
224 };
225
226 static inline bool context_present(struct context_entry *context)
227 {
228         return (context->lo & 1);
229 }
230 static inline void context_set_present(struct context_entry *context)
231 {
232         context->lo |= 1;
233 }
234
235 static inline void context_set_fault_enable(struct context_entry *context)
236 {
237         context->lo &= (((u64)-1) << 2) | 1;
238 }
239
240 static inline void context_set_translation_type(struct context_entry *context,
241                                                 unsigned long value)
242 {
243         context->lo &= (((u64)-1) << 4) | 3;
244         context->lo |= (value & 3) << 2;
245 }
246
247 static inline void context_set_address_root(struct context_entry *context,
248                                             unsigned long value)
249 {
250         context->lo |= value & VTD_PAGE_MASK;
251 }
252
253 static inline void context_set_address_width(struct context_entry *context,
254                                              unsigned long value)
255 {
256         context->hi |= value & 7;
257 }
258
259 static inline void context_set_domain_id(struct context_entry *context,
260                                          unsigned long value)
261 {
262         context->hi |= (value & ((1 << 16) - 1)) << 8;
263 }
264
265 static inline void context_clear_entry(struct context_entry *context)
266 {
267         context->lo = 0;
268         context->hi = 0;
269 }
270
271 /*
272  * 0: readable
273  * 1: writable
274  * 2-6: reserved
275  * 7: super page
276  * 8-10: available
277  * 11: snoop behavior
278  * 12-63: Host physcial address
279  */
280 struct dma_pte {
281         u64 val;
282 };
283
284 static inline void dma_clear_pte(struct dma_pte *pte)
285 {
286         pte->val = 0;
287 }
288
289 static inline u64 dma_pte_addr(struct dma_pte *pte)
290 {
291 #ifdef CONFIG_64BIT
292         return pte->val & VTD_PAGE_MASK;
293 #else
294         /* Must have a full atomic 64-bit read */
295         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
296 #endif
297 }
298
299 static inline bool dma_pte_present(struct dma_pte *pte)
300 {
301         return (pte->val & 3) != 0;
302 }
303
304 static inline bool dma_pte_superpage(struct dma_pte *pte)
305 {
306         return (pte->val & (1 << 7));
307 }
308
309 static inline int first_pte_in_page(struct dma_pte *pte)
310 {
311         return !((unsigned long)pte & ~VTD_PAGE_MASK);
312 }
313
314 /*
315  * This domain is a statically identity mapping domain.
316  *      1. This domain creats a static 1:1 mapping to all usable memory.
317  *      2. It maps to each iommu if successful.
318  *      3. Each iommu mapps to this domain if successful.
319  */
320 static struct dmar_domain *si_domain;
321 static int hw_pass_through = 1;
322
323 /* devices under the same p2p bridge are owned in one domain */
324 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
325
326 /* domain represents a virtual machine, more than one devices
327  * across iommus may be owned in one domain, e.g. kvm guest.
328  */
329 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 1)
330
331 /* si_domain contains mulitple devices */
332 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 2)
333
334 /* define the limit of IOMMUs supported in each domain */
335 #ifdef  CONFIG_X86
336 # define        IOMMU_UNITS_SUPPORTED   MAX_IO_APICS
337 #else
338 # define        IOMMU_UNITS_SUPPORTED   64
339 #endif
340
341 struct dmar_domain {
342         int     id;                     /* domain id */
343         int     nid;                    /* node id */
344         DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
345                                         /* bitmap of iommus this domain uses*/
346
347         struct list_head devices;       /* all devices' list */
348         struct iova_domain iovad;       /* iova's that belong to this domain */
349
350         struct dma_pte  *pgd;           /* virtual address */
351         int             gaw;            /* max guest address width */
352
353         /* adjusted guest address width, 0 is level 2 30-bit */
354         int             agaw;
355
356         int             flags;          /* flags to find out type of domain */
357
358         int             iommu_coherency;/* indicate coherency of iommu access */
359         int             iommu_snooping; /* indicate snooping control feature*/
360         int             iommu_count;    /* reference count of iommu */
361         int             iommu_superpage;/* Level of superpages supported:
362                                            0 == 4KiB (no superpages), 1 == 2MiB,
363                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
364         spinlock_t      iommu_lock;     /* protect iommu set in domain */
365         u64             max_addr;       /* maximum mapped address */
366 };
367
368 /* PCI domain-device relationship */
369 struct device_domain_info {
370         struct list_head link;  /* link to domain siblings */
371         struct list_head global; /* link to global list */
372         int segment;            /* PCI domain */
373         u8 bus;                 /* PCI bus number */
374         u8 devfn;               /* PCI devfn number */
375         struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
376         struct intel_iommu *iommu; /* IOMMU used by this device */
377         struct dmar_domain *domain; /* pointer to domain */
378 };
379
380 struct dmar_rmrr_unit {
381         struct list_head list;          /* list of rmrr units   */
382         struct acpi_dmar_header *hdr;   /* ACPI header          */
383         u64     base_address;           /* reserved base address*/
384         u64     end_address;            /* reserved end address */
385         struct pci_dev __rcu **devices; /* target devices */
386         int     devices_cnt;            /* target device count */
387 };
388
389 struct dmar_atsr_unit {
390         struct list_head list;          /* list of ATSR units */
391         struct acpi_dmar_header *hdr;   /* ACPI header */
392         struct pci_dev __rcu **devices; /* target devices */
393         int devices_cnt;                /* target device count */
394         u8 include_all:1;               /* include all ports */
395 };
396
397 static LIST_HEAD(dmar_atsr_units);
398 static LIST_HEAD(dmar_rmrr_units);
399
400 #define for_each_rmrr_units(rmrr) \
401         list_for_each_entry(rmrr, &dmar_rmrr_units, list)
402
403 static void flush_unmaps_timeout(unsigned long data);
404
405 static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
406
407 #define HIGH_WATER_MARK 250
408 struct deferred_flush_tables {
409         int next;
410         struct iova *iova[HIGH_WATER_MARK];
411         struct dmar_domain *domain[HIGH_WATER_MARK];
412         struct page *freelist[HIGH_WATER_MARK];
413 };
414
415 static struct deferred_flush_tables *deferred_flush;
416
417 /* bitmap for indexing intel_iommus */
418 static int g_num_of_iommus;
419
420 static DEFINE_SPINLOCK(async_umap_flush_lock);
421 static LIST_HEAD(unmaps_to_do);
422
423 static int timer_on;
424 static long list_size;
425
426 static void domain_exit(struct dmar_domain *domain);
427 static void domain_remove_dev_info(struct dmar_domain *domain);
428 static void domain_remove_one_dev_info(struct dmar_domain *domain,
429                                        struct pci_dev *pdev);
430 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
431                                            struct pci_dev *pdev);
432
433 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
434 int dmar_disabled = 0;
435 #else
436 int dmar_disabled = 1;
437 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
438
439 int intel_iommu_enabled = 0;
440 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
441
442 static int dmar_map_gfx = 1;
443 static int dmar_forcedac;
444 static int intel_iommu_strict;
445 static int intel_iommu_superpage = 1;
446
447 int intel_iommu_gfx_mapped;
448 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
449
450 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
451 static DEFINE_SPINLOCK(device_domain_lock);
452 static LIST_HEAD(device_domain_list);
453
454 static struct iommu_ops intel_iommu_ops;
455
456 static int __init intel_iommu_setup(char *str)
457 {
458         if (!str)
459                 return -EINVAL;
460         while (*str) {
461                 if (!strncmp(str, "on", 2)) {
462                         dmar_disabled = 0;
463                         printk(KERN_INFO "Intel-IOMMU: enabled\n");
464                 } else if (!strncmp(str, "off", 3)) {
465                         dmar_disabled = 1;
466                         printk(KERN_INFO "Intel-IOMMU: disabled\n");
467                 } else if (!strncmp(str, "igfx_off", 8)) {
468                         dmar_map_gfx = 0;
469                         printk(KERN_INFO
470                                 "Intel-IOMMU: disable GFX device mapping\n");
471                 } else if (!strncmp(str, "forcedac", 8)) {
472                         printk(KERN_INFO
473                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
474                         dmar_forcedac = 1;
475                 } else if (!strncmp(str, "strict", 6)) {
476                         printk(KERN_INFO
477                                 "Intel-IOMMU: disable batched IOTLB flush\n");
478                         intel_iommu_strict = 1;
479                 } else if (!strncmp(str, "sp_off", 6)) {
480                         printk(KERN_INFO
481                                 "Intel-IOMMU: disable supported super page\n");
482                         intel_iommu_superpage = 0;
483                 }
484
485                 str += strcspn(str, ",");
486                 while (*str == ',')
487                         str++;
488         }
489         return 0;
490 }
491 __setup("intel_iommu=", intel_iommu_setup);
492
493 static struct kmem_cache *iommu_domain_cache;
494 static struct kmem_cache *iommu_devinfo_cache;
495 static struct kmem_cache *iommu_iova_cache;
496
497 static inline void *alloc_pgtable_page(int node)
498 {
499         struct page *page;
500         void *vaddr = NULL;
501
502         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
503         if (page)
504                 vaddr = page_address(page);
505         return vaddr;
506 }
507
508 static inline void free_pgtable_page(void *vaddr)
509 {
510         free_page((unsigned long)vaddr);
511 }
512
513 static inline void *alloc_domain_mem(void)
514 {
515         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
516 }
517
518 static void free_domain_mem(void *vaddr)
519 {
520         kmem_cache_free(iommu_domain_cache, vaddr);
521 }
522
523 static inline void * alloc_devinfo_mem(void)
524 {
525         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
526 }
527
528 static inline void free_devinfo_mem(void *vaddr)
529 {
530         kmem_cache_free(iommu_devinfo_cache, vaddr);
531 }
532
533 struct iova *alloc_iova_mem(void)
534 {
535         return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
536 }
537
538 void free_iova_mem(struct iova *iova)
539 {
540         kmem_cache_free(iommu_iova_cache, iova);
541 }
542
543
544 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
545 {
546         unsigned long sagaw;
547         int agaw = -1;
548
549         sagaw = cap_sagaw(iommu->cap);
550         for (agaw = width_to_agaw(max_gaw);
551              agaw >= 0; agaw--) {
552                 if (test_bit(agaw, &sagaw))
553                         break;
554         }
555
556         return agaw;
557 }
558
559 /*
560  * Calculate max SAGAW for each iommu.
561  */
562 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
563 {
564         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
565 }
566
567 /*
568  * calculate agaw for each iommu.
569  * "SAGAW" may be different across iommus, use a default agaw, and
570  * get a supported less agaw for iommus that don't support the default agaw.
571  */
572 int iommu_calculate_agaw(struct intel_iommu *iommu)
573 {
574         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
575 }
576
577 /* This functionin only returns single iommu in a domain */
578 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
579 {
580         int iommu_id;
581
582         /* si_domain and vm domain should not get here. */
583         BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
584         BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
585
586         iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
587         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
588                 return NULL;
589
590         return g_iommus[iommu_id];
591 }
592
593 static void domain_update_iommu_coherency(struct dmar_domain *domain)
594 {
595         int i;
596
597         i = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
598
599         domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0;
600
601         for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
602                 if (!ecap_coherent(g_iommus[i]->ecap)) {
603                         domain->iommu_coherency = 0;
604                         break;
605                 }
606         }
607 }
608
609 static void domain_update_iommu_snooping(struct dmar_domain *domain)
610 {
611         int i;
612
613         domain->iommu_snooping = 1;
614
615         for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
616                 if (!ecap_sc_support(g_iommus[i]->ecap)) {
617                         domain->iommu_snooping = 0;
618                         break;
619                 }
620         }
621 }
622
623 static void domain_update_iommu_superpage(struct dmar_domain *domain)
624 {
625         struct dmar_drhd_unit *drhd;
626         struct intel_iommu *iommu = NULL;
627         int mask = 0xf;
628
629         if (!intel_iommu_superpage) {
630                 domain->iommu_superpage = 0;
631                 return;
632         }
633
634         /* set iommu_superpage to the smallest common denominator */
635         rcu_read_lock();
636         for_each_active_iommu(iommu, drhd) {
637                 mask &= cap_super_page_val(iommu->cap);
638                 if (!mask) {
639                         break;
640                 }
641         }
642         rcu_read_unlock();
643
644         domain->iommu_superpage = fls(mask);
645 }
646
647 /* Some capabilities may be different across iommus */
648 static void domain_update_iommu_cap(struct dmar_domain *domain)
649 {
650         domain_update_iommu_coherency(domain);
651         domain_update_iommu_snooping(domain);
652         domain_update_iommu_superpage(domain);
653 }
654
655 static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
656 {
657         struct dmar_drhd_unit *drhd = NULL;
658         struct intel_iommu *iommu;
659         struct pci_dev *dev;
660         int i;
661
662         rcu_read_lock();
663         for_each_active_iommu(iommu, drhd) {
664                 if (segment != drhd->segment)
665                         continue;
666
667                 for_each_active_dev_scope(drhd->devices,
668                                           drhd->devices_cnt, i, dev) {
669                         if (dev->bus->number == bus && dev->devfn == devfn)
670                                 goto out;
671                         if (dev->subordinate &&
672                             dev->subordinate->number <= bus &&
673                             dev->subordinate->busn_res.end >= bus)
674                                 goto out;
675                 }
676
677                 if (drhd->include_all)
678                         goto out;
679         }
680         iommu = NULL;
681 out:
682         rcu_read_unlock();
683
684         return iommu;
685 }
686
687 static void domain_flush_cache(struct dmar_domain *domain,
688                                void *addr, int size)
689 {
690         if (!domain->iommu_coherency)
691                 clflush_cache_range(addr, size);
692 }
693
694 /* Gets context entry for a given bus and devfn */
695 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
696                 u8 bus, u8 devfn)
697 {
698         struct root_entry *root;
699         struct context_entry *context;
700         unsigned long phy_addr;
701         unsigned long flags;
702
703         spin_lock_irqsave(&iommu->lock, flags);
704         root = &iommu->root_entry[bus];
705         context = get_context_addr_from_root(root);
706         if (!context) {
707                 context = (struct context_entry *)
708                                 alloc_pgtable_page(iommu->node);
709                 if (!context) {
710                         spin_unlock_irqrestore(&iommu->lock, flags);
711                         return NULL;
712                 }
713                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
714                 phy_addr = virt_to_phys((void *)context);
715                 set_root_value(root, phy_addr);
716                 set_root_present(root);
717                 __iommu_flush_cache(iommu, root, sizeof(*root));
718         }
719         spin_unlock_irqrestore(&iommu->lock, flags);
720         return &context[devfn];
721 }
722
723 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
724 {
725         struct root_entry *root;
726         struct context_entry *context;
727         int ret;
728         unsigned long flags;
729
730         spin_lock_irqsave(&iommu->lock, flags);
731         root = &iommu->root_entry[bus];
732         context = get_context_addr_from_root(root);
733         if (!context) {
734                 ret = 0;
735                 goto out;
736         }
737         ret = context_present(&context[devfn]);
738 out:
739         spin_unlock_irqrestore(&iommu->lock, flags);
740         return ret;
741 }
742
743 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
744 {
745         struct root_entry *root;
746         struct context_entry *context;
747         unsigned long flags;
748
749         spin_lock_irqsave(&iommu->lock, flags);
750         root = &iommu->root_entry[bus];
751         context = get_context_addr_from_root(root);
752         if (context) {
753                 context_clear_entry(&context[devfn]);
754                 __iommu_flush_cache(iommu, &context[devfn], \
755                         sizeof(*context));
756         }
757         spin_unlock_irqrestore(&iommu->lock, flags);
758 }
759
760 static void free_context_table(struct intel_iommu *iommu)
761 {
762         struct root_entry *root;
763         int i;
764         unsigned long flags;
765         struct context_entry *context;
766
767         spin_lock_irqsave(&iommu->lock, flags);
768         if (!iommu->root_entry) {
769                 goto out;
770         }
771         for (i = 0; i < ROOT_ENTRY_NR; i++) {
772                 root = &iommu->root_entry[i];
773                 context = get_context_addr_from_root(root);
774                 if (context)
775                         free_pgtable_page(context);
776         }
777         free_pgtable_page(iommu->root_entry);
778         iommu->root_entry = NULL;
779 out:
780         spin_unlock_irqrestore(&iommu->lock, flags);
781 }
782
783 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
784                                       unsigned long pfn, int *target_level)
785 {
786         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
787         struct dma_pte *parent, *pte = NULL;
788         int level = agaw_to_level(domain->agaw);
789         int offset;
790
791         BUG_ON(!domain->pgd);
792
793         if (addr_width < BITS_PER_LONG && pfn >> addr_width)
794                 /* Address beyond IOMMU's addressing capabilities. */
795                 return NULL;
796
797         parent = domain->pgd;
798
799         while (1) {
800                 void *tmp_page;
801
802                 offset = pfn_level_offset(pfn, level);
803                 pte = &parent[offset];
804                 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
805                         break;
806                 if (level == *target_level)
807                         break;
808
809                 if (!dma_pte_present(pte)) {
810                         uint64_t pteval;
811
812                         tmp_page = alloc_pgtable_page(domain->nid);
813
814                         if (!tmp_page)
815                                 return NULL;
816
817                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
818                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
819                         if (cmpxchg64(&pte->val, 0ULL, pteval)) {
820                                 /* Someone else set it while we were thinking; use theirs. */
821                                 free_pgtable_page(tmp_page);
822                         } else {
823                                 dma_pte_addr(pte);
824                                 domain_flush_cache(domain, pte, sizeof(*pte));
825                         }
826                 }
827                 if (level == 1)
828                         break;
829
830                 parent = phys_to_virt(dma_pte_addr(pte));
831                 level--;
832         }
833
834         if (!*target_level)
835                 *target_level = level;
836
837         return pte;
838 }
839
840
841 /* return address's pte at specific level */
842 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
843                                          unsigned long pfn,
844                                          int level, int *large_page)
845 {
846         struct dma_pte *parent, *pte = NULL;
847         int total = agaw_to_level(domain->agaw);
848         int offset;
849
850         parent = domain->pgd;
851         while (level <= total) {
852                 offset = pfn_level_offset(pfn, total);
853                 pte = &parent[offset];
854                 if (level == total)
855                         return pte;
856
857                 if (!dma_pte_present(pte)) {
858                         *large_page = total;
859                         break;
860                 }
861
862                 if (pte->val & DMA_PTE_LARGE_PAGE) {
863                         *large_page = total;
864                         return pte;
865                 }
866
867                 parent = phys_to_virt(dma_pte_addr(pte));
868                 total--;
869         }
870         return NULL;
871 }
872
873 /* clear last level pte, a tlb flush should be followed */
874 static void dma_pte_clear_range(struct dmar_domain *domain,
875                                 unsigned long start_pfn,
876                                 unsigned long last_pfn)
877 {
878         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
879         unsigned int large_page = 1;
880         struct dma_pte *first_pte, *pte;
881
882         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
883         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
884         BUG_ON(start_pfn > last_pfn);
885
886         /* we don't need lock here; nobody else touches the iova range */
887         do {
888                 large_page = 1;
889                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
890                 if (!pte) {
891                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
892                         continue;
893                 }
894                 do {
895                         dma_clear_pte(pte);
896                         start_pfn += lvl_to_nr_pages(large_page);
897                         pte++;
898                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
899
900                 domain_flush_cache(domain, first_pte,
901                                    (void *)pte - (void *)first_pte);
902
903         } while (start_pfn && start_pfn <= last_pfn);
904 }
905
906 static void dma_pte_free_level(struct dmar_domain *domain, int level,
907                                struct dma_pte *pte, unsigned long pfn,
908                                unsigned long start_pfn, unsigned long last_pfn)
909 {
910         pfn = max(start_pfn, pfn);
911         pte = &pte[pfn_level_offset(pfn, level)];
912
913         do {
914                 unsigned long level_pfn;
915                 struct dma_pte *level_pte;
916
917                 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
918                         goto next;
919
920                 level_pfn = pfn & level_mask(level - 1);
921                 level_pte = phys_to_virt(dma_pte_addr(pte));
922
923                 if (level > 2)
924                         dma_pte_free_level(domain, level - 1, level_pte,
925                                            level_pfn, start_pfn, last_pfn);
926
927                 /* If range covers entire pagetable, free it */
928                 if (!(start_pfn > level_pfn ||
929                       last_pfn < level_pfn + level_size(level) - 1)) {
930                         dma_clear_pte(pte);
931                         domain_flush_cache(domain, pte, sizeof(*pte));
932                         free_pgtable_page(level_pte);
933                 }
934 next:
935                 pfn += level_size(level);
936         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
937 }
938
939 /* free page table pages. last level pte should already be cleared */
940 static void dma_pte_free_pagetable(struct dmar_domain *domain,
941                                    unsigned long start_pfn,
942                                    unsigned long last_pfn)
943 {
944         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
945
946         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
947         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
948         BUG_ON(start_pfn > last_pfn);
949
950         /* We don't need lock here; nobody else touches the iova range */
951         dma_pte_free_level(domain, agaw_to_level(domain->agaw),
952                            domain->pgd, 0, start_pfn, last_pfn);
953
954         /* free pgd */
955         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
956                 free_pgtable_page(domain->pgd);
957                 domain->pgd = NULL;
958         }
959 }
960
961 /* When a page at a given level is being unlinked from its parent, we don't
962    need to *modify* it at all. All we need to do is make a list of all the
963    pages which can be freed just as soon as we've flushed the IOTLB and we
964    know the hardware page-walk will no longer touch them.
965    The 'pte' argument is the *parent* PTE, pointing to the page that is to
966    be freed. */
967 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
968                                             int level, struct dma_pte *pte,
969                                             struct page *freelist)
970 {
971         struct page *pg;
972
973         pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
974         pg->freelist = freelist;
975         freelist = pg;
976
977         if (level == 1)
978                 return freelist;
979
980         for (pte = page_address(pg); !first_pte_in_page(pte); pte++) {
981                 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
982                         freelist = dma_pte_list_pagetables(domain, level - 1,
983                                                            pte, freelist);
984         }
985
986         return freelist;
987 }
988
989 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
990                                         struct dma_pte *pte, unsigned long pfn,
991                                         unsigned long start_pfn,
992                                         unsigned long last_pfn,
993                                         struct page *freelist)
994 {
995         struct dma_pte *first_pte = NULL, *last_pte = NULL;
996
997         pfn = max(start_pfn, pfn);
998         pte = &pte[pfn_level_offset(pfn, level)];
999
1000         do {
1001                 unsigned long level_pfn;
1002
1003                 if (!dma_pte_present(pte))
1004                         goto next;
1005
1006                 level_pfn = pfn & level_mask(level);
1007
1008                 /* If range covers entire pagetable, free it */
1009                 if (start_pfn <= level_pfn &&
1010                     last_pfn >= level_pfn + level_size(level) - 1) {
1011                         /* These suborbinate page tables are going away entirely. Don't
1012                            bother to clear them; we're just going to *free* them. */
1013                         if (level > 1 && !dma_pte_superpage(pte))
1014                                 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1015
1016                         dma_clear_pte(pte);
1017                         if (!first_pte)
1018                                 first_pte = pte;
1019                         last_pte = pte;
1020                 } else if (level > 1) {
1021                         /* Recurse down into a level that isn't *entirely* obsolete */
1022                         freelist = dma_pte_clear_level(domain, level - 1,
1023                                                        phys_to_virt(dma_pte_addr(pte)),
1024                                                        level_pfn, start_pfn, last_pfn,
1025                                                        freelist);
1026                 }
1027 next:
1028                 pfn += level_size(level);
1029         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1030
1031         if (first_pte)
1032                 domain_flush_cache(domain, first_pte,
1033                                    (void *)++last_pte - (void *)first_pte);
1034
1035         return freelist;
1036 }
1037
1038 /* We can't just free the pages because the IOMMU may still be walking
1039    the page tables, and may have cached the intermediate levels. The
1040    pages can only be freed after the IOTLB flush has been done. */
1041 struct page *domain_unmap(struct dmar_domain *domain,
1042                           unsigned long start_pfn,
1043                           unsigned long last_pfn)
1044 {
1045         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1046         struct page *freelist = NULL;
1047
1048         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
1049         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
1050         BUG_ON(start_pfn > last_pfn);
1051
1052         /* we don't need lock here; nobody else touches the iova range */
1053         freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1054                                        domain->pgd, 0, start_pfn, last_pfn, NULL);
1055
1056         /* free pgd */
1057         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1058                 struct page *pgd_page = virt_to_page(domain->pgd);
1059                 pgd_page->freelist = freelist;
1060                 freelist = pgd_page;
1061
1062                 domain->pgd = NULL;
1063         }
1064
1065         return freelist;
1066 }
1067
1068 void dma_free_pagelist(struct page *freelist)
1069 {
1070         struct page *pg;
1071
1072         while ((pg = freelist)) {
1073                 freelist = pg->freelist;
1074                 free_pgtable_page(page_address(pg));
1075         }
1076 }
1077
1078 /* iommu handling */
1079 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1080 {
1081         struct root_entry *root;
1082         unsigned long flags;
1083
1084         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1085         if (!root)
1086                 return -ENOMEM;
1087
1088         __iommu_flush_cache(iommu, root, ROOT_SIZE);
1089
1090         spin_lock_irqsave(&iommu->lock, flags);
1091         iommu->root_entry = root;
1092         spin_unlock_irqrestore(&iommu->lock, flags);
1093
1094         return 0;
1095 }
1096
1097 static void iommu_set_root_entry(struct intel_iommu *iommu)
1098 {
1099         void *addr;
1100         u32 sts;
1101         unsigned long flag;
1102
1103         addr = iommu->root_entry;
1104
1105         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1106         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
1107
1108         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1109
1110         /* Make sure hardware complete it */
1111         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1112                       readl, (sts & DMA_GSTS_RTPS), sts);
1113
1114         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1115 }
1116
1117 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1118 {
1119         u32 val;
1120         unsigned long flag;
1121
1122         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1123                 return;
1124
1125         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1126         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1127
1128         /* Make sure hardware complete it */
1129         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1130                       readl, (!(val & DMA_GSTS_WBFS)), val);
1131
1132         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1133 }
1134
1135 /* return value determine if we need a write buffer flush */
1136 static void __iommu_flush_context(struct intel_iommu *iommu,
1137                                   u16 did, u16 source_id, u8 function_mask,
1138                                   u64 type)
1139 {
1140         u64 val = 0;
1141         unsigned long flag;
1142
1143         switch (type) {
1144         case DMA_CCMD_GLOBAL_INVL:
1145                 val = DMA_CCMD_GLOBAL_INVL;
1146                 break;
1147         case DMA_CCMD_DOMAIN_INVL:
1148                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1149                 break;
1150         case DMA_CCMD_DEVICE_INVL:
1151                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1152                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1153                 break;
1154         default:
1155                 BUG();
1156         }
1157         val |= DMA_CCMD_ICC;
1158
1159         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1160         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1161
1162         /* Make sure hardware complete it */
1163         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1164                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1165
1166         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1167 }
1168
1169 /* return value determine if we need a write buffer flush */
1170 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1171                                 u64 addr, unsigned int size_order, u64 type)
1172 {
1173         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1174         u64 val = 0, val_iva = 0;
1175         unsigned long flag;
1176
1177         switch (type) {
1178         case DMA_TLB_GLOBAL_FLUSH:
1179                 /* global flush doesn't need set IVA_REG */
1180                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1181                 break;
1182         case DMA_TLB_DSI_FLUSH:
1183                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1184                 break;
1185         case DMA_TLB_PSI_FLUSH:
1186                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1187                 /* IH bit is passed in as part of address */
1188                 val_iva = size_order | addr;
1189                 break;
1190         default:
1191                 BUG();
1192         }
1193         /* Note: set drain read/write */
1194 #if 0
1195         /*
1196          * This is probably to be super secure.. Looks like we can
1197          * ignore it without any impact.
1198          */
1199         if (cap_read_drain(iommu->cap))
1200                 val |= DMA_TLB_READ_DRAIN;
1201 #endif
1202         if (cap_write_drain(iommu->cap))
1203                 val |= DMA_TLB_WRITE_DRAIN;
1204
1205         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1206         /* Note: Only uses first TLB reg currently */
1207         if (val_iva)
1208                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1209         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1210
1211         /* Make sure hardware complete it */
1212         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1213                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1214
1215         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1216
1217         /* check IOTLB invalidation granularity */
1218         if (DMA_TLB_IAIG(val) == 0)
1219                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1220         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1221                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1222                         (unsigned long long)DMA_TLB_IIRG(type),
1223                         (unsigned long long)DMA_TLB_IAIG(val));
1224 }
1225
1226 static struct device_domain_info *iommu_support_dev_iotlb(
1227         struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1228 {
1229         int found = 0;
1230         unsigned long flags;
1231         struct device_domain_info *info;
1232         struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1233
1234         if (!ecap_dev_iotlb_support(iommu->ecap))
1235                 return NULL;
1236
1237         if (!iommu->qi)
1238                 return NULL;
1239
1240         spin_lock_irqsave(&device_domain_lock, flags);
1241         list_for_each_entry(info, &domain->devices, link)
1242                 if (info->bus == bus && info->devfn == devfn) {
1243                         found = 1;
1244                         break;
1245                 }
1246         spin_unlock_irqrestore(&device_domain_lock, flags);
1247
1248         if (!found || !info->dev)
1249                 return NULL;
1250
1251         if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1252                 return NULL;
1253
1254         if (!dmar_find_matched_atsr_unit(info->dev))
1255                 return NULL;
1256
1257         info->iommu = iommu;
1258
1259         return info;
1260 }
1261
1262 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1263 {
1264         if (!info)
1265                 return;
1266
1267         pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1268 }
1269
1270 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1271 {
1272         if (!info->dev || !pci_ats_enabled(info->dev))
1273                 return;
1274
1275         pci_disable_ats(info->dev);
1276 }
1277
1278 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1279                                   u64 addr, unsigned mask)
1280 {
1281         u16 sid, qdep;
1282         unsigned long flags;
1283         struct device_domain_info *info;
1284
1285         spin_lock_irqsave(&device_domain_lock, flags);
1286         list_for_each_entry(info, &domain->devices, link) {
1287                 if (!info->dev || !pci_ats_enabled(info->dev))
1288                         continue;
1289
1290                 sid = info->bus << 8 | info->devfn;
1291                 qdep = pci_ats_queue_depth(info->dev);
1292                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1293         }
1294         spin_unlock_irqrestore(&device_domain_lock, flags);
1295 }
1296
1297 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1298                                   unsigned long pfn, unsigned int pages, int ih, int map)
1299 {
1300         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1301         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1302
1303         BUG_ON(pages == 0);
1304
1305         if (ih)
1306                 ih = 1 << 6;
1307         /*
1308          * Fallback to domain selective flush if no PSI support or the size is
1309          * too big.
1310          * PSI requires page size to be 2 ^ x, and the base address is naturally
1311          * aligned to the size
1312          */
1313         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1314                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1315                                                 DMA_TLB_DSI_FLUSH);
1316         else
1317                 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1318                                                 DMA_TLB_PSI_FLUSH);
1319
1320         /*
1321          * In caching mode, changes of pages from non-present to present require
1322          * flush. However, device IOTLB doesn't need to be flushed in this case.
1323          */
1324         if (!cap_caching_mode(iommu->cap) || !map)
1325                 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1326 }
1327
1328 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1329 {
1330         u32 pmen;
1331         unsigned long flags;
1332
1333         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1334         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1335         pmen &= ~DMA_PMEN_EPM;
1336         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1337
1338         /* wait for the protected region status bit to clear */
1339         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1340                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1341
1342         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1343 }
1344
1345 static int iommu_enable_translation(struct intel_iommu *iommu)
1346 {
1347         u32 sts;
1348         unsigned long flags;
1349
1350         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1351         iommu->gcmd |= DMA_GCMD_TE;
1352         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1353
1354         /* Make sure hardware complete it */
1355         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1356                       readl, (sts & DMA_GSTS_TES), sts);
1357
1358         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1359         return 0;
1360 }
1361
1362 static int iommu_disable_translation(struct intel_iommu *iommu)
1363 {
1364         u32 sts;
1365         unsigned long flag;
1366
1367         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1368         iommu->gcmd &= ~DMA_GCMD_TE;
1369         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1370
1371         /* Make sure hardware complete it */
1372         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1373                       readl, (!(sts & DMA_GSTS_TES)), sts);
1374
1375         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1376         return 0;
1377 }
1378
1379
1380 static int iommu_init_domains(struct intel_iommu *iommu)
1381 {
1382         unsigned long ndomains;
1383         unsigned long nlongs;
1384
1385         ndomains = cap_ndoms(iommu->cap);
1386         pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1387                  iommu->seq_id, ndomains);
1388         nlongs = BITS_TO_LONGS(ndomains);
1389
1390         spin_lock_init(&iommu->lock);
1391
1392         /* TBD: there might be 64K domains,
1393          * consider other allocation for future chip
1394          */
1395         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1396         if (!iommu->domain_ids) {
1397                 pr_err("IOMMU%d: allocating domain id array failed\n",
1398                        iommu->seq_id);
1399                 return -ENOMEM;
1400         }
1401         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1402                         GFP_KERNEL);
1403         if (!iommu->domains) {
1404                 pr_err("IOMMU%d: allocating domain array failed\n",
1405                        iommu->seq_id);
1406                 kfree(iommu->domain_ids);
1407                 iommu->domain_ids = NULL;
1408                 return -ENOMEM;
1409         }
1410
1411         /*
1412          * if Caching mode is set, then invalid translations are tagged
1413          * with domainid 0. Hence we need to pre-allocate it.
1414          */
1415         if (cap_caching_mode(iommu->cap))
1416                 set_bit(0, iommu->domain_ids);
1417         return 0;
1418 }
1419
1420 static void free_dmar_iommu(struct intel_iommu *iommu)
1421 {
1422         struct dmar_domain *domain;
1423         int i, count;
1424         unsigned long flags;
1425
1426         if ((iommu->domains) && (iommu->domain_ids)) {
1427                 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1428                         /*
1429                          * Domain id 0 is reserved for invalid translation
1430                          * if hardware supports caching mode.
1431                          */
1432                         if (cap_caching_mode(iommu->cap) && i == 0)
1433                                 continue;
1434
1435                         domain = iommu->domains[i];
1436                         clear_bit(i, iommu->domain_ids);
1437
1438                         spin_lock_irqsave(&domain->iommu_lock, flags);
1439                         count = --domain->iommu_count;
1440                         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1441                         if (count == 0)
1442                                 domain_exit(domain);
1443                 }
1444         }
1445
1446         if (iommu->gcmd & DMA_GCMD_TE)
1447                 iommu_disable_translation(iommu);
1448
1449         kfree(iommu->domains);
1450         kfree(iommu->domain_ids);
1451         iommu->domains = NULL;
1452         iommu->domain_ids = NULL;
1453
1454         g_iommus[iommu->seq_id] = NULL;
1455
1456         /* free context mapping */
1457         free_context_table(iommu);
1458 }
1459
1460 static struct dmar_domain *alloc_domain(bool vm)
1461 {
1462         /* domain id for virtual machine, it won't be set in context */
1463         static atomic_t vm_domid = ATOMIC_INIT(0);
1464         struct dmar_domain *domain;
1465
1466         domain = alloc_domain_mem();
1467         if (!domain)
1468                 return NULL;
1469
1470         domain->nid = -1;
1471         domain->iommu_count = 0;
1472         memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
1473         domain->flags = 0;
1474         spin_lock_init(&domain->iommu_lock);
1475         INIT_LIST_HEAD(&domain->devices);
1476         if (vm) {
1477                 domain->id = atomic_inc_return(&vm_domid);
1478                 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
1479         }
1480
1481         return domain;
1482 }
1483
1484 static int iommu_attach_domain(struct dmar_domain *domain,
1485                                struct intel_iommu *iommu)
1486 {
1487         int num;
1488         unsigned long ndomains;
1489         unsigned long flags;
1490
1491         ndomains = cap_ndoms(iommu->cap);
1492
1493         spin_lock_irqsave(&iommu->lock, flags);
1494
1495         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1496         if (num >= ndomains) {
1497                 spin_unlock_irqrestore(&iommu->lock, flags);
1498                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1499                 return -ENOMEM;
1500         }
1501
1502         domain->id = num;
1503         domain->iommu_count++;
1504         set_bit(num, iommu->domain_ids);
1505         set_bit(iommu->seq_id, domain->iommu_bmp);
1506         iommu->domains[num] = domain;
1507         spin_unlock_irqrestore(&iommu->lock, flags);
1508
1509         return 0;
1510 }
1511
1512 static void iommu_detach_domain(struct dmar_domain *domain,
1513                                 struct intel_iommu *iommu)
1514 {
1515         unsigned long flags;
1516         int num, ndomains;
1517
1518         spin_lock_irqsave(&iommu->lock, flags);
1519         ndomains = cap_ndoms(iommu->cap);
1520         for_each_set_bit(num, iommu->domain_ids, ndomains) {
1521                 if (iommu->domains[num] == domain) {
1522                         clear_bit(num, iommu->domain_ids);
1523                         iommu->domains[num] = NULL;
1524                         break;
1525                 }
1526         }
1527         spin_unlock_irqrestore(&iommu->lock, flags);
1528 }
1529
1530 static struct iova_domain reserved_iova_list;
1531 static struct lock_class_key reserved_rbtree_key;
1532
1533 static int dmar_init_reserved_ranges(void)
1534 {
1535         struct pci_dev *pdev = NULL;
1536         struct iova *iova;
1537         int i;
1538
1539         init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1540
1541         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1542                 &reserved_rbtree_key);
1543
1544         /* IOAPIC ranges shouldn't be accessed by DMA */
1545         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1546                 IOVA_PFN(IOAPIC_RANGE_END));
1547         if (!iova) {
1548                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1549                 return -ENODEV;
1550         }
1551
1552         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1553         for_each_pci_dev(pdev) {
1554                 struct resource *r;
1555
1556                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1557                         r = &pdev->resource[i];
1558                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1559                                 continue;
1560                         iova = reserve_iova(&reserved_iova_list,
1561                                             IOVA_PFN(r->start),
1562                                             IOVA_PFN(r->end));
1563                         if (!iova) {
1564                                 printk(KERN_ERR "Reserve iova failed\n");
1565                                 return -ENODEV;
1566                         }
1567                 }
1568         }
1569         return 0;
1570 }
1571
1572 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1573 {
1574         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1575 }
1576
1577 static inline int guestwidth_to_adjustwidth(int gaw)
1578 {
1579         int agaw;
1580         int r = (gaw - 12) % 9;
1581
1582         if (r == 0)
1583                 agaw = gaw;
1584         else
1585                 agaw = gaw + 9 - r;
1586         if (agaw > 64)
1587                 agaw = 64;
1588         return agaw;
1589 }
1590
1591 static int domain_init(struct dmar_domain *domain, int guest_width)
1592 {
1593         struct intel_iommu *iommu;
1594         int adjust_width, agaw;
1595         unsigned long sagaw;
1596
1597         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1598         domain_reserve_special_ranges(domain);
1599
1600         /* calculate AGAW */
1601         iommu = domain_get_iommu(domain);
1602         if (guest_width > cap_mgaw(iommu->cap))
1603                 guest_width = cap_mgaw(iommu->cap);
1604         domain->gaw = guest_width;
1605         adjust_width = guestwidth_to_adjustwidth(guest_width);
1606         agaw = width_to_agaw(adjust_width);
1607         sagaw = cap_sagaw(iommu->cap);
1608         if (!test_bit(agaw, &sagaw)) {
1609                 /* hardware doesn't support it, choose a bigger one */
1610                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1611                 agaw = find_next_bit(&sagaw, 5, agaw);
1612                 if (agaw >= 5)
1613                         return -ENODEV;
1614         }
1615         domain->agaw = agaw;
1616
1617         if (ecap_coherent(iommu->ecap))
1618                 domain->iommu_coherency = 1;
1619         else
1620                 domain->iommu_coherency = 0;
1621
1622         if (ecap_sc_support(iommu->ecap))
1623                 domain->iommu_snooping = 1;
1624         else
1625                 domain->iommu_snooping = 0;
1626
1627         domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1628         domain->nid = iommu->node;
1629
1630         /* always allocate the top pgd */
1631         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1632         if (!domain->pgd)
1633                 return -ENOMEM;
1634         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1635         return 0;
1636 }
1637
1638 static void domain_exit(struct dmar_domain *domain)
1639 {
1640         struct dmar_drhd_unit *drhd;
1641         struct intel_iommu *iommu;
1642         struct page *freelist = NULL;
1643
1644         /* Domain 0 is reserved, so dont process it */
1645         if (!domain)
1646                 return;
1647
1648         /* Flush any lazy unmaps that may reference this domain */
1649         if (!intel_iommu_strict)
1650                 flush_unmaps_timeout(0);
1651
1652         /* remove associated devices */
1653         domain_remove_dev_info(domain);
1654
1655         /* destroy iovas */
1656         put_iova_domain(&domain->iovad);
1657
1658         freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1659
1660         /* clear attached or cached domains */
1661         rcu_read_lock();
1662         for_each_active_iommu(iommu, drhd)
1663                 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1664                     test_bit(iommu->seq_id, domain->iommu_bmp))
1665                         iommu_detach_domain(domain, iommu);
1666         rcu_read_unlock();
1667
1668         dma_free_pagelist(freelist);
1669
1670         free_domain_mem(domain);
1671 }
1672
1673 static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1674                                  u8 bus, u8 devfn, int translation)
1675 {
1676         struct context_entry *context;
1677         unsigned long flags;
1678         struct intel_iommu *iommu;
1679         struct dma_pte *pgd;
1680         unsigned long num;
1681         unsigned long ndomains;
1682         int id;
1683         int agaw;
1684         struct device_domain_info *info = NULL;
1685
1686         pr_debug("Set context mapping for %02x:%02x.%d\n",
1687                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1688
1689         BUG_ON(!domain->pgd);
1690         BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1691                translation != CONTEXT_TT_MULTI_LEVEL);
1692
1693         iommu = device_to_iommu(segment, bus, devfn);
1694         if (!iommu)
1695                 return -ENODEV;
1696
1697         context = device_to_context_entry(iommu, bus, devfn);
1698         if (!context)
1699                 return -ENOMEM;
1700         spin_lock_irqsave(&iommu->lock, flags);
1701         if (context_present(context)) {
1702                 spin_unlock_irqrestore(&iommu->lock, flags);
1703                 return 0;
1704         }
1705
1706         id = domain->id;
1707         pgd = domain->pgd;
1708
1709         if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1710             domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1711                 int found = 0;
1712
1713                 /* find an available domain id for this device in iommu */
1714                 ndomains = cap_ndoms(iommu->cap);
1715                 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1716                         if (iommu->domains[num] == domain) {
1717                                 id = num;
1718                                 found = 1;
1719                                 break;
1720                         }
1721                 }
1722
1723                 if (found == 0) {
1724                         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1725                         if (num >= ndomains) {
1726                                 spin_unlock_irqrestore(&iommu->lock, flags);
1727                                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1728                                 return -EFAULT;
1729                         }
1730
1731                         set_bit(num, iommu->domain_ids);
1732                         iommu->domains[num] = domain;
1733                         id = num;
1734                 }
1735
1736                 /* Skip top levels of page tables for
1737                  * iommu which has less agaw than default.
1738                  * Unnecessary for PT mode.
1739                  */
1740                 if (translation != CONTEXT_TT_PASS_THROUGH) {
1741                         for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1742                                 pgd = phys_to_virt(dma_pte_addr(pgd));
1743                                 if (!dma_pte_present(pgd)) {
1744                                         spin_unlock_irqrestore(&iommu->lock, flags);
1745                                         return -ENOMEM;
1746                                 }
1747                         }
1748                 }
1749         }
1750
1751         context_set_domain_id(context, id);
1752
1753         if (translation != CONTEXT_TT_PASS_THROUGH) {
1754                 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1755                 translation = info ? CONTEXT_TT_DEV_IOTLB :
1756                                      CONTEXT_TT_MULTI_LEVEL;
1757         }
1758         /*
1759          * In pass through mode, AW must be programmed to indicate the largest
1760          * AGAW value supported by hardware. And ASR is ignored by hardware.
1761          */
1762         if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1763                 context_set_address_width(context, iommu->msagaw);
1764         else {
1765                 context_set_address_root(context, virt_to_phys(pgd));
1766                 context_set_address_width(context, iommu->agaw);
1767         }
1768
1769         context_set_translation_type(context, translation);
1770         context_set_fault_enable(context);
1771         context_set_present(context);
1772         domain_flush_cache(domain, context, sizeof(*context));
1773
1774         /*
1775          * It's a non-present to present mapping. If hardware doesn't cache
1776          * non-present entry we only need to flush the write-buffer. If the
1777          * _does_ cache non-present entries, then it does so in the special
1778          * domain #0, which we have to flush:
1779          */
1780         if (cap_caching_mode(iommu->cap)) {
1781                 iommu->flush.flush_context(iommu, 0,
1782                                            (((u16)bus) << 8) | devfn,
1783                                            DMA_CCMD_MASK_NOBIT,
1784                                            DMA_CCMD_DEVICE_INVL);
1785                 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1786         } else {
1787                 iommu_flush_write_buffer(iommu);
1788         }
1789         iommu_enable_dev_iotlb(info);
1790         spin_unlock_irqrestore(&iommu->lock, flags);
1791
1792         spin_lock_irqsave(&domain->iommu_lock, flags);
1793         if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1794                 domain->iommu_count++;
1795                 if (domain->iommu_count == 1)
1796                         domain->nid = iommu->node;
1797                 domain_update_iommu_cap(domain);
1798         }
1799         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1800         return 0;
1801 }
1802
1803 static int
1804 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1805                         int translation)
1806 {
1807         int ret;
1808         struct pci_dev *tmp, *parent;
1809
1810         ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1811                                          pdev->bus->number, pdev->devfn,
1812                                          translation);
1813         if (ret)
1814                 return ret;
1815
1816         /* dependent device mapping */
1817         tmp = pci_find_upstream_pcie_bridge(pdev);
1818         if (!tmp)
1819                 return 0;
1820         /* Secondary interface's bus number and devfn 0 */
1821         parent = pdev->bus->self;
1822         while (parent != tmp) {
1823                 ret = domain_context_mapping_one(domain,
1824                                                  pci_domain_nr(parent->bus),
1825                                                  parent->bus->number,
1826                                                  parent->devfn, translation);
1827                 if (ret)
1828                         return ret;
1829                 parent = parent->bus->self;
1830         }
1831         if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
1832                 return domain_context_mapping_one(domain,
1833                                         pci_domain_nr(tmp->subordinate),
1834                                         tmp->subordinate->number, 0,
1835                                         translation);
1836         else /* this is a legacy PCI bridge */
1837                 return domain_context_mapping_one(domain,
1838                                                   pci_domain_nr(tmp->bus),
1839                                                   tmp->bus->number,
1840                                                   tmp->devfn,
1841                                                   translation);
1842 }
1843
1844 static int domain_context_mapped(struct pci_dev *pdev)
1845 {
1846         int ret;
1847         struct pci_dev *tmp, *parent;
1848         struct intel_iommu *iommu;
1849
1850         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1851                                 pdev->devfn);
1852         if (!iommu)
1853                 return -ENODEV;
1854
1855         ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1856         if (!ret)
1857                 return ret;
1858         /* dependent device mapping */
1859         tmp = pci_find_upstream_pcie_bridge(pdev);
1860         if (!tmp)
1861                 return ret;
1862         /* Secondary interface's bus number and devfn 0 */
1863         parent = pdev->bus->self;
1864         while (parent != tmp) {
1865                 ret = device_context_mapped(iommu, parent->bus->number,
1866                                             parent->devfn);
1867                 if (!ret)
1868                         return ret;
1869                 parent = parent->bus->self;
1870         }
1871         if (pci_is_pcie(tmp))
1872                 return device_context_mapped(iommu, tmp->subordinate->number,
1873                                              0);
1874         else
1875                 return device_context_mapped(iommu, tmp->bus->number,
1876                                              tmp->devfn);
1877 }
1878
1879 /* Returns a number of VTD pages, but aligned to MM page size */
1880 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1881                                             size_t size)
1882 {
1883         host_addr &= ~PAGE_MASK;
1884         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1885 }
1886
1887 /* Return largest possible superpage level for a given mapping */
1888 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1889                                           unsigned long iov_pfn,
1890                                           unsigned long phy_pfn,
1891                                           unsigned long pages)
1892 {
1893         int support, level = 1;
1894         unsigned long pfnmerge;
1895
1896         support = domain->iommu_superpage;
1897
1898         /* To use a large page, the virtual *and* physical addresses
1899            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1900            of them will mean we have to use smaller pages. So just
1901            merge them and check both at once. */
1902         pfnmerge = iov_pfn | phy_pfn;
1903
1904         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1905                 pages >>= VTD_STRIDE_SHIFT;
1906                 if (!pages)
1907                         break;
1908                 pfnmerge >>= VTD_STRIDE_SHIFT;
1909                 level++;
1910                 support--;
1911         }
1912         return level;
1913 }
1914
1915 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1916                             struct scatterlist *sg, unsigned long phys_pfn,
1917                             unsigned long nr_pages, int prot)
1918 {
1919         struct dma_pte *first_pte = NULL, *pte = NULL;
1920         phys_addr_t uninitialized_var(pteval);
1921         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1922         unsigned long sg_res;
1923         unsigned int largepage_lvl = 0;
1924         unsigned long lvl_pages = 0;
1925
1926         BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1927
1928         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1929                 return -EINVAL;
1930
1931         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1932
1933         if (sg)
1934                 sg_res = 0;
1935         else {
1936                 sg_res = nr_pages + 1;
1937                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1938         }
1939
1940         while (nr_pages > 0) {
1941                 uint64_t tmp;
1942
1943                 if (!sg_res) {
1944                         sg_res = aligned_nrpages(sg->offset, sg->length);
1945                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1946                         sg->dma_length = sg->length;
1947                         pteval = page_to_phys(sg_page(sg)) | prot;
1948                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
1949                 }
1950
1951                 if (!pte) {
1952                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1953
1954                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
1955                         if (!pte)
1956                                 return -ENOMEM;
1957                         /* It is large page*/
1958                         if (largepage_lvl > 1) {
1959                                 pteval |= DMA_PTE_LARGE_PAGE;
1960                                 /* Ensure that old small page tables are removed to make room
1961                                    for superpage, if they exist. */
1962                                 dma_pte_clear_range(domain, iov_pfn,
1963                                                     iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1964                                 dma_pte_free_pagetable(domain, iov_pfn,
1965                                                        iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1966                         } else {
1967                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1968                         }
1969
1970                 }
1971                 /* We don't need lock here, nobody else
1972                  * touches the iova range
1973                  */
1974                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1975                 if (tmp) {
1976                         static int dumps = 5;
1977                         printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1978                                iov_pfn, tmp, (unsigned long long)pteval);
1979                         if (dumps) {
1980                                 dumps--;
1981                                 debug_dma_dump_mappings(NULL);
1982                         }
1983                         WARN_ON(1);
1984                 }
1985
1986                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1987
1988                 BUG_ON(nr_pages < lvl_pages);
1989                 BUG_ON(sg_res < lvl_pages);
1990
1991                 nr_pages -= lvl_pages;
1992                 iov_pfn += lvl_pages;
1993                 phys_pfn += lvl_pages;
1994                 pteval += lvl_pages * VTD_PAGE_SIZE;
1995                 sg_res -= lvl_pages;
1996
1997                 /* If the next PTE would be the first in a new page, then we
1998                    need to flush the cache on the entries we've just written.
1999                    And then we'll need to recalculate 'pte', so clear it and
2000                    let it get set again in the if (!pte) block above.
2001
2002                    If we're done (!nr_pages) we need to flush the cache too.
2003
2004                    Also if we've been setting superpages, we may need to
2005                    recalculate 'pte' and switch back to smaller pages for the
2006                    end of the mapping, if the trailing size is not enough to
2007                    use another superpage (i.e. sg_res < lvl_pages). */
2008                 pte++;
2009                 if (!nr_pages || first_pte_in_page(pte) ||
2010                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
2011                         domain_flush_cache(domain, first_pte,
2012                                            (void *)pte - (void *)first_pte);
2013                         pte = NULL;
2014                 }
2015
2016                 if (!sg_res && nr_pages)
2017                         sg = sg_next(sg);
2018         }
2019         return 0;
2020 }
2021
2022 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2023                                     struct scatterlist *sg, unsigned long nr_pages,
2024                                     int prot)
2025 {
2026         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2027 }
2028
2029 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2030                                      unsigned long phys_pfn, unsigned long nr_pages,
2031                                      int prot)
2032 {
2033         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2034 }
2035
2036 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
2037 {
2038         if (!iommu)
2039                 return;
2040
2041         clear_context_table(iommu, bus, devfn);
2042         iommu->flush.flush_context(iommu, 0, 0, 0,
2043                                            DMA_CCMD_GLOBAL_INVL);
2044         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2045 }
2046
2047 static inline void unlink_domain_info(struct device_domain_info *info)
2048 {
2049         assert_spin_locked(&device_domain_lock);
2050         list_del(&info->link);
2051         list_del(&info->global);
2052         if (info->dev)
2053                 info->dev->dev.archdata.iommu = NULL;
2054 }
2055
2056 static void domain_remove_dev_info(struct dmar_domain *domain)
2057 {
2058         struct device_domain_info *info;
2059         unsigned long flags, flags2;
2060         struct intel_iommu *iommu;
2061
2062         spin_lock_irqsave(&device_domain_lock, flags);
2063         while (!list_empty(&domain->devices)) {
2064                 info = list_entry(domain->devices.next,
2065                         struct device_domain_info, link);
2066                 unlink_domain_info(info);
2067                 spin_unlock_irqrestore(&device_domain_lock, flags);
2068
2069                 iommu_disable_dev_iotlb(info);
2070                 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
2071                 iommu_detach_dev(iommu, info->bus, info->devfn);
2072
2073                 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
2074                         iommu_detach_dependent_devices(iommu, info->dev);
2075                         /* clear this iommu in iommu_bmp, update iommu count
2076                          * and capabilities
2077                          */
2078                         spin_lock_irqsave(&domain->iommu_lock, flags2);
2079                         if (test_and_clear_bit(iommu->seq_id,
2080                                                domain->iommu_bmp)) {
2081                                 domain->iommu_count--;
2082                                 domain_update_iommu_cap(domain);
2083                         }
2084                         spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2085                 }
2086
2087                 free_devinfo_mem(info);
2088                 spin_lock_irqsave(&device_domain_lock, flags);
2089         }
2090         spin_unlock_irqrestore(&device_domain_lock, flags);
2091 }
2092
2093 /*
2094  * find_domain
2095  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
2096  */
2097 static struct dmar_domain *
2098 find_domain(struct pci_dev *pdev)
2099 {
2100         struct device_domain_info *info;
2101
2102         /* No lock here, assumes no domain exit in normal case */
2103         info = pdev->dev.archdata.iommu;
2104         if (info)
2105                 return info->domain;
2106         return NULL;
2107 }
2108
2109 static inline struct dmar_domain *
2110 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2111 {
2112         struct device_domain_info *info;
2113
2114         list_for_each_entry(info, &device_domain_list, global)
2115                 if (info->segment == segment && info->bus == bus &&
2116                     info->devfn == devfn)
2117                         return info->domain;
2118
2119         return NULL;
2120 }
2121
2122 static int dmar_insert_dev_info(int segment, int bus, int devfn,
2123                                 struct pci_dev *dev, struct dmar_domain **domp)
2124 {
2125         struct dmar_domain *found, *domain = *domp;
2126         struct device_domain_info *info;
2127         unsigned long flags;
2128
2129         info = alloc_devinfo_mem();
2130         if (!info)
2131                 return -ENOMEM;
2132
2133         info->segment = segment;
2134         info->bus = bus;
2135         info->devfn = devfn;
2136         info->dev = dev;
2137         info->domain = domain;
2138         if (!dev)
2139                 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2140
2141         spin_lock_irqsave(&device_domain_lock, flags);
2142         if (dev)
2143                 found = find_domain(dev);
2144         else
2145                 found = dmar_search_domain_by_dev_info(segment, bus, devfn);
2146         if (found) {
2147                 spin_unlock_irqrestore(&device_domain_lock, flags);
2148                 free_devinfo_mem(info);
2149                 if (found != domain) {
2150                         domain_exit(domain);
2151                         *domp = found;
2152                 }
2153         } else {
2154                 list_add(&info->link, &domain->devices);
2155                 list_add(&info->global, &device_domain_list);
2156                 if (dev)
2157                         dev->dev.archdata.iommu = info;
2158                 spin_unlock_irqrestore(&device_domain_lock, flags);
2159         }
2160
2161         return 0;
2162 }
2163
2164 /* domain is initialized */
2165 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
2166 {
2167         struct dmar_domain *domain, *free = NULL;
2168         struct intel_iommu *iommu;
2169         struct dmar_drhd_unit *drhd;
2170         struct pci_dev *dev_tmp;
2171         unsigned long flags;
2172         int bus = 0, devfn = 0;
2173         int segment;
2174
2175         domain = find_domain(pdev);
2176         if (domain)
2177                 return domain;
2178
2179         segment = pci_domain_nr(pdev->bus);
2180
2181         dev_tmp = pci_find_upstream_pcie_bridge(pdev);
2182         if (dev_tmp) {
2183                 if (pci_is_pcie(dev_tmp)) {
2184                         bus = dev_tmp->subordinate->number;
2185                         devfn = 0;
2186                 } else {
2187                         bus = dev_tmp->bus->number;
2188                         devfn = dev_tmp->devfn;
2189                 }
2190                 spin_lock_irqsave(&device_domain_lock, flags);
2191                 domain = dmar_search_domain_by_dev_info(segment, bus, devfn);
2192                 spin_unlock_irqrestore(&device_domain_lock, flags);
2193                 /* pcie-pci bridge already has a domain, uses it */
2194                 if (domain)
2195                         goto found_domain;
2196         }
2197
2198         drhd = dmar_find_matched_drhd_unit(pdev);
2199         if (!drhd) {
2200                 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
2201                         pci_name(pdev));
2202                 return NULL;
2203         }
2204         iommu = drhd->iommu;
2205
2206         /* Allocate and intialize new domain for the device */
2207         domain = alloc_domain(false);
2208         if (!domain)
2209                 goto error;
2210         if (iommu_attach_domain(domain, iommu)) {
2211                 free_domain_mem(domain);
2212                 goto error;
2213         }
2214         free = domain;
2215         if (domain_init(domain, gaw))
2216                 goto error;
2217
2218         /* register pcie-to-pci device */
2219         if (dev_tmp) {
2220                 if (dmar_insert_dev_info(segment, bus, devfn, NULL, &domain))
2221                         goto error;
2222                 else
2223                         free = NULL;
2224         }
2225
2226 found_domain:
2227         if (dmar_insert_dev_info(segment, pdev->bus->number, pdev->devfn,
2228                                  pdev, &domain) == 0)
2229                 return domain;
2230 error:
2231         if (free)
2232                 domain_exit(free);
2233         /* recheck it here, maybe others set it */
2234         return find_domain(pdev);
2235 }
2236
2237 static int iommu_identity_mapping;
2238 #define IDENTMAP_ALL            1
2239 #define IDENTMAP_GFX            2
2240 #define IDENTMAP_AZALIA         4
2241
2242 static int iommu_domain_identity_map(struct dmar_domain *domain,
2243                                      unsigned long long start,
2244                                      unsigned long long end)
2245 {
2246         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2247         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2248
2249         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2250                           dma_to_mm_pfn(last_vpfn))) {
2251                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2252                 return -ENOMEM;
2253         }
2254
2255         pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2256                  start, end, domain->id);
2257         /*
2258          * RMRR range might have overlap with physical memory range,
2259          * clear it first
2260          */
2261         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2262
2263         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2264                                   last_vpfn - first_vpfn + 1,
2265                                   DMA_PTE_READ|DMA_PTE_WRITE);
2266 }
2267
2268 static int iommu_prepare_identity_map(struct pci_dev *pdev,
2269                                       unsigned long long start,
2270                                       unsigned long long end)
2271 {
2272         struct dmar_domain *domain;
2273         int ret;
2274
2275         domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2276         if (!domain)
2277                 return -ENOMEM;
2278
2279         /* For _hardware_ passthrough, don't bother. But for software
2280            passthrough, we do it anyway -- it may indicate a memory
2281            range which is reserved in E820, so which didn't get set
2282            up to start with in si_domain */
2283         if (domain == si_domain && hw_pass_through) {
2284                 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2285                        pci_name(pdev), start, end);
2286                 return 0;
2287         }
2288
2289         printk(KERN_INFO
2290                "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2291                pci_name(pdev), start, end);
2292         
2293         if (end < start) {
2294                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2295                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2296                         dmi_get_system_info(DMI_BIOS_VENDOR),
2297                         dmi_get_system_info(DMI_BIOS_VERSION),
2298                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2299                 ret = -EIO;
2300                 goto error;
2301         }
2302
2303         if (end >> agaw_to_width(domain->agaw)) {
2304                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2305                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2306                      agaw_to_width(domain->agaw),
2307                      dmi_get_system_info(DMI_BIOS_VENDOR),
2308                      dmi_get_system_info(DMI_BIOS_VERSION),
2309                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2310                 ret = -EIO;
2311                 goto error;
2312         }
2313
2314         ret = iommu_domain_identity_map(domain, start, end);
2315         if (ret)
2316                 goto error;
2317
2318         /* context entry init */
2319         ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2320         if (ret)
2321                 goto error;
2322
2323         return 0;
2324
2325  error:
2326         domain_exit(domain);
2327         return ret;
2328 }
2329
2330 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2331         struct pci_dev *pdev)
2332 {
2333         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2334                 return 0;
2335         return iommu_prepare_identity_map(pdev, rmrr->base_address,
2336                 rmrr->end_address);
2337 }
2338
2339 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2340 static inline void iommu_prepare_isa(void)
2341 {
2342         struct pci_dev *pdev;
2343         int ret;
2344
2345         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2346         if (!pdev)
2347                 return;
2348
2349         printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2350         ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2351
2352         if (ret)
2353                 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2354                        "floppy might not work\n");
2355
2356 }
2357 #else
2358 static inline void iommu_prepare_isa(void)
2359 {
2360         return;
2361 }
2362 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2363
2364 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2365
2366 static int __init si_domain_init(int hw)
2367 {
2368         struct dmar_drhd_unit *drhd;
2369         struct intel_iommu *iommu;
2370         int nid, ret = 0;
2371
2372         si_domain = alloc_domain(false);
2373         if (!si_domain)
2374                 return -EFAULT;
2375
2376         si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2377
2378         for_each_active_iommu(iommu, drhd) {
2379                 ret = iommu_attach_domain(si_domain, iommu);
2380                 if (ret) {
2381                         domain_exit(si_domain);
2382                         return -EFAULT;
2383                 }
2384         }
2385
2386         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2387                 domain_exit(si_domain);
2388                 return -EFAULT;
2389         }
2390
2391         pr_debug("IOMMU: identity mapping domain is domain %d\n",
2392                  si_domain->id);
2393
2394         if (hw)
2395                 return 0;
2396
2397         for_each_online_node(nid) {
2398                 unsigned long start_pfn, end_pfn;
2399                 int i;
2400
2401                 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2402                         ret = iommu_domain_identity_map(si_domain,
2403                                         PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2404                         if (ret)
2405                                 return ret;
2406                 }
2407         }
2408
2409         return 0;
2410 }
2411
2412 static int identity_mapping(struct pci_dev *pdev)
2413 {
2414         struct device_domain_info *info;
2415
2416         if (likely(!iommu_identity_mapping))
2417                 return 0;
2418
2419         info = pdev->dev.archdata.iommu;
2420         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2421                 return (info->domain == si_domain);
2422
2423         return 0;
2424 }
2425
2426 static int domain_add_dev_info(struct dmar_domain *domain,
2427                                struct pci_dev *pdev,
2428                                int translation)
2429 {
2430         struct device_domain_info *info;
2431         unsigned long flags;
2432         int ret;
2433
2434         info = alloc_devinfo_mem();
2435         if (!info)
2436                 return -ENOMEM;
2437
2438         info->segment = pci_domain_nr(pdev->bus);
2439         info->bus = pdev->bus->number;
2440         info->devfn = pdev->devfn;
2441         info->dev = pdev;
2442         info->domain = domain;
2443
2444         spin_lock_irqsave(&device_domain_lock, flags);
2445         list_add(&info->link, &domain->devices);
2446         list_add(&info->global, &device_domain_list);
2447         pdev->dev.archdata.iommu = info;
2448         spin_unlock_irqrestore(&device_domain_lock, flags);
2449
2450         ret = domain_context_mapping(domain, pdev, translation);
2451         if (ret) {
2452                 spin_lock_irqsave(&device_domain_lock, flags);
2453                 unlink_domain_info(info);
2454                 spin_unlock_irqrestore(&device_domain_lock, flags);
2455                 free_devinfo_mem(info);
2456                 return ret;
2457         }
2458
2459         return 0;
2460 }
2461
2462 static bool device_has_rmrr(struct pci_dev *dev)
2463 {
2464         struct dmar_rmrr_unit *rmrr;
2465         struct pci_dev *tmp;
2466         int i;
2467
2468         rcu_read_lock();
2469         for_each_rmrr_units(rmrr) {
2470                 /*
2471                  * Return TRUE if this RMRR contains the device that
2472                  * is passed in.
2473                  */
2474                 for_each_active_dev_scope(rmrr->devices,
2475                                           rmrr->devices_cnt, i, tmp)
2476                         if (tmp == dev) {
2477                                 rcu_read_unlock();
2478                                 return true;
2479                         }
2480         }
2481         rcu_read_unlock();
2482         return false;
2483 }
2484
2485 static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2486 {
2487
2488         /*
2489          * We want to prevent any device associated with an RMRR from
2490          * getting placed into the SI Domain. This is done because
2491          * problems exist when devices are moved in and out of domains
2492          * and their respective RMRR info is lost. We exempt USB devices
2493          * from this process due to their usage of RMRRs that are known
2494          * to not be needed after BIOS hand-off to OS.
2495          */
2496         if (device_has_rmrr(pdev) &&
2497             (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2498                 return 0;
2499
2500         if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2501                 return 1;
2502
2503         if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2504                 return 1;
2505
2506         if (!(iommu_identity_mapping & IDENTMAP_ALL))
2507                 return 0;
2508
2509         /*
2510          * We want to start off with all devices in the 1:1 domain, and
2511          * take them out later if we find they can't access all of memory.
2512          *
2513          * However, we can't do this for PCI devices behind bridges,
2514          * because all PCI devices behind the same bridge will end up
2515          * with the same source-id on their transactions.
2516          *
2517          * Practically speaking, we can't change things around for these
2518          * devices at run-time, because we can't be sure there'll be no
2519          * DMA transactions in flight for any of their siblings.
2520          * 
2521          * So PCI devices (unless they're on the root bus) as well as
2522          * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2523          * the 1:1 domain, just in _case_ one of their siblings turns out
2524          * not to be able to map all of memory.
2525          */
2526         if (!pci_is_pcie(pdev)) {
2527                 if (!pci_is_root_bus(pdev->bus))
2528                         return 0;
2529                 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2530                         return 0;
2531         } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2532                 return 0;
2533
2534         /* 
2535          * At boot time, we don't yet know if devices will be 64-bit capable.
2536          * Assume that they will -- if they turn out not to be, then we can 
2537          * take them out of the 1:1 domain later.
2538          */
2539         if (!startup) {
2540                 /*
2541                  * If the device's dma_mask is less than the system's memory
2542                  * size then this is not a candidate for identity mapping.
2543                  */
2544                 u64 dma_mask = pdev->dma_mask;
2545
2546                 if (pdev->dev.coherent_dma_mask &&
2547                     pdev->dev.coherent_dma_mask < dma_mask)
2548                         dma_mask = pdev->dev.coherent_dma_mask;
2549
2550                 return dma_mask >= dma_get_required_mask(&pdev->dev);
2551         }
2552
2553         return 1;
2554 }
2555
2556 static int __init iommu_prepare_static_identity_mapping(int hw)
2557 {
2558         struct pci_dev *pdev = NULL;
2559         int ret;
2560
2561         ret = si_domain_init(hw);
2562         if (ret)
2563                 return -EFAULT;
2564
2565         for_each_pci_dev(pdev) {
2566                 if (iommu_should_identity_map(pdev, 1)) {
2567                         ret = domain_add_dev_info(si_domain, pdev,
2568                                              hw ? CONTEXT_TT_PASS_THROUGH :
2569                                                   CONTEXT_TT_MULTI_LEVEL);
2570                         if (ret) {
2571                                 /* device not associated with an iommu */
2572                                 if (ret == -ENODEV)
2573                                         continue;
2574                                 return ret;
2575                         }
2576                         pr_info("IOMMU: %s identity mapping for device %s\n",
2577                                 hw ? "hardware" : "software", pci_name(pdev));
2578                 }
2579         }
2580
2581         return 0;
2582 }
2583
2584 static int __init init_dmars(void)
2585 {
2586         struct dmar_drhd_unit *drhd;
2587         struct dmar_rmrr_unit *rmrr;
2588         struct pci_dev *pdev;
2589         struct intel_iommu *iommu;
2590         int i, ret;
2591
2592         /*
2593          * for each drhd
2594          *    allocate root
2595          *    initialize and program root entry to not present
2596          * endfor
2597          */
2598         for_each_drhd_unit(drhd) {
2599                 /*
2600                  * lock not needed as this is only incremented in the single
2601                  * threaded kernel __init code path all other access are read
2602                  * only
2603                  */
2604                 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2605                         g_num_of_iommus++;
2606                         continue;
2607                 }
2608                 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2609                           IOMMU_UNITS_SUPPORTED);
2610         }
2611
2612         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2613                         GFP_KERNEL);
2614         if (!g_iommus) {
2615                 printk(KERN_ERR "Allocating global iommu array failed\n");
2616                 ret = -ENOMEM;
2617                 goto error;
2618         }
2619
2620         deferred_flush = kzalloc(g_num_of_iommus *
2621                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2622         if (!deferred_flush) {
2623                 ret = -ENOMEM;
2624                 goto free_g_iommus;
2625         }
2626
2627         for_each_active_iommu(iommu, drhd) {
2628                 g_iommus[iommu->seq_id] = iommu;
2629
2630                 ret = iommu_init_domains(iommu);
2631                 if (ret)
2632                         goto free_iommu;
2633
2634                 /*
2635                  * TBD:
2636                  * we could share the same root & context tables
2637                  * among all IOMMU's. Need to Split it later.
2638                  */
2639                 ret = iommu_alloc_root_entry(iommu);
2640                 if (ret) {
2641                         printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2642                         goto free_iommu;
2643                 }
2644                 if (!ecap_pass_through(iommu->ecap))
2645                         hw_pass_through = 0;
2646         }
2647
2648         /*
2649          * Start from the sane iommu hardware state.
2650          */
2651         for_each_active_iommu(iommu, drhd) {
2652                 /*
2653                  * If the queued invalidation is already initialized by us
2654                  * (for example, while enabling interrupt-remapping) then
2655                  * we got the things already rolling from a sane state.
2656                  */
2657                 if (iommu->qi)
2658                         continue;
2659
2660                 /*
2661                  * Clear any previous faults.
2662                  */
2663                 dmar_fault(-1, iommu);
2664                 /*
2665                  * Disable queued invalidation if supported and already enabled
2666                  * before OS handover.
2667                  */
2668                 dmar_disable_qi(iommu);
2669         }
2670
2671         for_each_active_iommu(iommu, drhd) {
2672                 if (dmar_enable_qi(iommu)) {
2673                         /*
2674                          * Queued Invalidate not enabled, use Register Based
2675                          * Invalidate
2676                          */
2677                         iommu->flush.flush_context = __iommu_flush_context;
2678                         iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2679                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2680                                "invalidation\n",
2681                                 iommu->seq_id,
2682                                (unsigned long long)drhd->reg_base_addr);
2683                 } else {
2684                         iommu->flush.flush_context = qi_flush_context;
2685                         iommu->flush.flush_iotlb = qi_flush_iotlb;
2686                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2687                                "invalidation\n",
2688                                 iommu->seq_id,
2689                                (unsigned long long)drhd->reg_base_addr);
2690                 }
2691         }
2692
2693         if (iommu_pass_through)
2694                 iommu_identity_mapping |= IDENTMAP_ALL;
2695
2696 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2697         iommu_identity_mapping |= IDENTMAP_GFX;
2698 #endif
2699
2700         check_tylersburg_isoch();
2701
2702         /*
2703          * If pass through is not set or not enabled, setup context entries for
2704          * identity mappings for rmrr, gfx, and isa and may fall back to static
2705          * identity mapping if iommu_identity_mapping is set.
2706          */
2707         if (iommu_identity_mapping) {
2708                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2709                 if (ret) {
2710                         printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2711                         goto free_iommu;
2712                 }
2713         }
2714         /*
2715          * For each rmrr
2716          *   for each dev attached to rmrr
2717          *   do
2718          *     locate drhd for dev, alloc domain for dev
2719          *     allocate free domain
2720          *     allocate page table entries for rmrr
2721          *     if context not allocated for bus
2722          *           allocate and init context
2723          *           set present in root table for this bus
2724          *     init context with domain, translation etc
2725          *    endfor
2726          * endfor
2727          */
2728         printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2729         for_each_rmrr_units(rmrr) {
2730                 /* some BIOS lists non-exist devices in DMAR table. */
2731                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2732                                           i, pdev) {
2733                         ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2734                         if (ret)
2735                                 printk(KERN_ERR
2736                                        "IOMMU: mapping reserved region failed\n");
2737                 }
2738         }
2739
2740         iommu_prepare_isa();
2741
2742         /*
2743          * for each drhd
2744          *   enable fault log
2745          *   global invalidate context cache
2746          *   global invalidate iotlb
2747          *   enable translation
2748          */
2749         for_each_iommu(iommu, drhd) {
2750                 if (drhd->ignored) {
2751                         /*
2752                          * we always have to disable PMRs or DMA may fail on
2753                          * this device
2754                          */
2755                         if (force_on)
2756                                 iommu_disable_protect_mem_regions(iommu);
2757                         continue;
2758                 }
2759
2760                 iommu_flush_write_buffer(iommu);
2761
2762                 ret = dmar_set_interrupt(iommu);
2763                 if (ret)
2764                         goto free_iommu;
2765
2766                 iommu_set_root_entry(iommu);
2767
2768                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2769                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2770
2771                 ret = iommu_enable_translation(iommu);
2772                 if (ret)
2773                         goto free_iommu;
2774
2775                 iommu_disable_protect_mem_regions(iommu);
2776         }
2777
2778         return 0;
2779
2780 free_iommu:
2781         for_each_active_iommu(iommu, drhd)
2782                 free_dmar_iommu(iommu);
2783         kfree(deferred_flush);
2784 free_g_iommus:
2785         kfree(g_iommus);
2786 error:
2787         return ret;
2788 }
2789
2790 /* This takes a number of _MM_ pages, not VTD pages */
2791 static struct iova *intel_alloc_iova(struct device *dev,
2792                                      struct dmar_domain *domain,
2793                                      unsigned long nrpages, uint64_t dma_mask)
2794 {
2795         struct pci_dev *pdev = to_pci_dev(dev);
2796         struct iova *iova = NULL;
2797
2798         /* Restrict dma_mask to the width that the iommu can handle */
2799         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2800
2801         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2802                 /*
2803                  * First try to allocate an io virtual address in
2804                  * DMA_BIT_MASK(32) and if that fails then try allocating
2805                  * from higher range
2806                  */
2807                 iova = alloc_iova(&domain->iovad, nrpages,
2808                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
2809                 if (iova)
2810                         return iova;
2811         }
2812         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2813         if (unlikely(!iova)) {
2814                 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2815                        nrpages, pci_name(pdev));
2816                 return NULL;
2817         }
2818
2819         return iova;
2820 }
2821
2822 static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2823 {
2824         struct dmar_domain *domain;
2825         int ret;
2826
2827         domain = get_domain_for_dev(pdev,
2828                         DEFAULT_DOMAIN_ADDRESS_WIDTH);
2829         if (!domain) {
2830                 printk(KERN_ERR
2831                         "Allocating domain for %s failed", pci_name(pdev));
2832                 return NULL;
2833         }
2834
2835         /* make sure context mapping is ok */
2836         if (unlikely(!domain_context_mapped(pdev))) {
2837                 ret = domain_context_mapping(domain, pdev,
2838                                              CONTEXT_TT_MULTI_LEVEL);
2839                 if (ret) {
2840                         printk(KERN_ERR
2841                                 "Domain context map for %s failed",
2842                                 pci_name(pdev));
2843                         return NULL;
2844                 }
2845         }
2846
2847         return domain;
2848 }
2849
2850 static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2851 {
2852         struct device_domain_info *info;
2853
2854         /* No lock here, assumes no domain exit in normal case */
2855         info = dev->dev.archdata.iommu;
2856         if (likely(info))
2857                 return info->domain;
2858
2859         return __get_valid_domain_for_dev(dev);
2860 }
2861
2862 static int iommu_dummy(struct pci_dev *pdev)
2863 {
2864         return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2865 }
2866
2867 /* Check if the pdev needs to go through non-identity map and unmap process.*/
2868 static int iommu_no_mapping(struct device *dev)
2869 {
2870         struct pci_dev *pdev;
2871         int found;
2872
2873         if (unlikely(!dev_is_pci(dev)))
2874                 return 1;
2875
2876         pdev = to_pci_dev(dev);
2877         if (iommu_dummy(pdev))
2878                 return 1;
2879
2880         if (!iommu_identity_mapping)
2881                 return 0;
2882
2883         found = identity_mapping(pdev);
2884         if (found) {
2885                 if (iommu_should_identity_map(pdev, 0))
2886                         return 1;
2887                 else {
2888                         /*
2889                          * 32 bit DMA is removed from si_domain and fall back
2890                          * to non-identity mapping.
2891                          */
2892                         domain_remove_one_dev_info(si_domain, pdev);
2893                         printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2894                                pci_name(pdev));
2895                         return 0;
2896                 }
2897         } else {
2898                 /*
2899                  * In case of a detached 64 bit DMA device from vm, the device
2900                  * is put into si_domain for identity mapping.
2901                  */
2902                 if (iommu_should_identity_map(pdev, 0)) {
2903                         int ret;
2904                         ret = domain_add_dev_info(si_domain, pdev,
2905                                                   hw_pass_through ?
2906                                                   CONTEXT_TT_PASS_THROUGH :
2907                                                   CONTEXT_TT_MULTI_LEVEL);
2908                         if (!ret) {
2909                                 printk(KERN_INFO "64bit %s uses identity mapping\n",
2910                                        pci_name(pdev));
2911                                 return 1;
2912                         }
2913                 }
2914         }
2915
2916         return 0;
2917 }
2918
2919 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2920                                      size_t size, int dir, u64 dma_mask)
2921 {
2922         struct pci_dev *pdev = to_pci_dev(hwdev);
2923         struct dmar_domain *domain;
2924         phys_addr_t start_paddr;
2925         struct iova *iova;
2926         int prot = 0;
2927         int ret;
2928         struct intel_iommu *iommu;
2929         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2930
2931         BUG_ON(dir == DMA_NONE);
2932
2933         if (iommu_no_mapping(hwdev))
2934                 return paddr;
2935
2936         domain = get_valid_domain_for_dev(pdev);
2937         if (!domain)
2938                 return 0;
2939
2940         iommu = domain_get_iommu(domain);
2941         size = aligned_nrpages(paddr, size);
2942
2943         iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2944         if (!iova)
2945                 goto error;
2946
2947         /*
2948          * Check if DMAR supports zero-length reads on write only
2949          * mappings..
2950          */
2951         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2952                         !cap_zlr(iommu->cap))
2953                 prot |= DMA_PTE_READ;
2954         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2955                 prot |= DMA_PTE_WRITE;
2956         /*
2957          * paddr - (paddr + size) might be partial page, we should map the whole
2958          * page.  Note: if two part of one page are separately mapped, we
2959          * might have two guest_addr mapping to the same host paddr, but this
2960          * is not a big problem
2961          */
2962         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2963                                  mm_to_dma_pfn(paddr_pfn), size, prot);
2964         if (ret)
2965                 goto error;
2966
2967         /* it's a non-present to present mapping. Only flush if caching mode */
2968         if (cap_caching_mode(iommu->cap))
2969                 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
2970         else
2971                 iommu_flush_write_buffer(iommu);
2972
2973         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2974         start_paddr += paddr & ~PAGE_MASK;
2975         return start_paddr;
2976
2977 error:
2978         if (iova)
2979                 __free_iova(&domain->iovad, iova);
2980         printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
2981                 pci_name(pdev), size, (unsigned long long)paddr, dir);
2982         return 0;
2983 }
2984
2985 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2986                                  unsigned long offset, size_t size,
2987                                  enum dma_data_direction dir,
2988                                  struct dma_attrs *attrs)
2989 {
2990         return __intel_map_single(dev, page_to_phys(page) + offset, size,
2991                                   dir, to_pci_dev(dev)->dma_mask);
2992 }
2993
2994 static void flush_unmaps(void)
2995 {
2996         int i, j;
2997
2998         timer_on = 0;
2999
3000         /* just flush them all */
3001         for (i = 0; i < g_num_of_iommus; i++) {
3002                 struct intel_iommu *iommu = g_iommus[i];
3003                 if (!iommu)
3004                         continue;
3005
3006                 if (!deferred_flush[i].next)
3007                         continue;
3008
3009                 /* In caching mode, global flushes turn emulation expensive */
3010                 if (!cap_caching_mode(iommu->cap))
3011                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3012                                          DMA_TLB_GLOBAL_FLUSH);
3013                 for (j = 0; j < deferred_flush[i].next; j++) {
3014                         unsigned long mask;
3015                         struct iova *iova = deferred_flush[i].iova[j];
3016                         struct dmar_domain *domain = deferred_flush[i].domain[j];
3017
3018                         /* On real hardware multiple invalidations are expensive */
3019                         if (cap_caching_mode(iommu->cap))
3020                                 iommu_flush_iotlb_psi(iommu, domain->id,
3021                                         iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1,
3022                                         !deferred_flush[i].freelist[j], 0);
3023                         else {
3024                                 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
3025                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3026                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3027                         }
3028                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3029                         if (deferred_flush[i].freelist[j])
3030                                 dma_free_pagelist(deferred_flush[i].freelist[j]);
3031                 }
3032                 deferred_flush[i].next = 0;
3033         }
3034
3035         list_size = 0;
3036 }
3037
3038 static void flush_unmaps_timeout(unsigned long data)
3039 {
3040         unsigned long flags;
3041
3042         spin_lock_irqsave(&async_umap_flush_lock, flags);
3043         flush_unmaps();
3044         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3045 }
3046
3047 static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3048 {
3049         unsigned long flags;
3050         int next, iommu_id;
3051         struct intel_iommu *iommu;
3052
3053         spin_lock_irqsave(&async_umap_flush_lock, flags);
3054         if (list_size == HIGH_WATER_MARK)
3055                 flush_unmaps();
3056
3057         iommu = domain_get_iommu(dom);
3058         iommu_id = iommu->seq_id;
3059
3060         next = deferred_flush[iommu_id].next;
3061         deferred_flush[iommu_id].domain[next] = dom;
3062         deferred_flush[iommu_id].iova[next] = iova;
3063         deferred_flush[iommu_id].freelist[next] = freelist;
3064         deferred_flush[iommu_id].next++;
3065
3066         if (!timer_on) {
3067                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3068                 timer_on = 1;
3069         }
3070         list_size++;
3071         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3072 }
3073
3074 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3075                              size_t size, enum dma_data_direction dir,
3076                              struct dma_attrs *attrs)
3077 {
3078         struct pci_dev *pdev = to_pci_dev(dev);
3079         struct dmar_domain *domain;
3080         unsigned long start_pfn, last_pfn;
3081         struct iova *iova;
3082         struct intel_iommu *iommu;
3083         struct page *freelist;
3084
3085         if (iommu_no_mapping(dev))
3086                 return;
3087
3088         domain = find_domain(pdev);
3089         BUG_ON(!domain);
3090
3091         iommu = domain_get_iommu(domain);
3092
3093         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3094         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3095                       (unsigned long long)dev_addr))
3096                 return;
3097
3098         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3099         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3100
3101         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3102                  pci_name(pdev), start_pfn, last_pfn);
3103
3104         freelist = domain_unmap(domain, start_pfn, last_pfn);
3105
3106         if (intel_iommu_strict) {
3107                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3108                                       last_pfn - start_pfn + 1, !freelist, 0);
3109                 /* free iova */
3110                 __free_iova(&domain->iovad, iova);
3111                 dma_free_pagelist(freelist);
3112         } else {
3113                 add_unmap(domain, iova, freelist);
3114                 /*
3115                  * queue up the release of the unmap to save the 1/6th of the
3116                  * cpu used up by the iotlb flush operation...
3117                  */
3118         }
3119 }
3120
3121 static void *intel_alloc_coherent(struct device *hwdev, size_t size,
3122                                   dma_addr_t *dma_handle, gfp_t flags,
3123                                   struct dma_attrs *attrs)
3124 {
3125         void *vaddr;
3126         int order;
3127
3128         size = PAGE_ALIGN(size);
3129         order = get_order(size);
3130
3131         if (!iommu_no_mapping(hwdev))
3132                 flags &= ~(GFP_DMA | GFP_DMA32);
3133         else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
3134                 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
3135                         flags |= GFP_DMA;
3136                 else
3137                         flags |= GFP_DMA32;
3138         }
3139
3140         vaddr = (void *)__get_free_pages(flags, order);
3141         if (!vaddr)
3142                 return NULL;
3143         memset(vaddr, 0, size);
3144
3145         *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
3146                                          DMA_BIDIRECTIONAL,
3147                                          hwdev->coherent_dma_mask);
3148         if (*dma_handle)
3149                 return vaddr;
3150         free_pages((unsigned long)vaddr, order);
3151         return NULL;
3152 }
3153
3154 static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
3155                                 dma_addr_t dma_handle, struct dma_attrs *attrs)
3156 {
3157         int order;
3158
3159         size = PAGE_ALIGN(size);
3160         order = get_order(size);
3161
3162         intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
3163         free_pages((unsigned long)vaddr, order);
3164 }
3165
3166 static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
3167                            int nelems, enum dma_data_direction dir,
3168                            struct dma_attrs *attrs)
3169 {
3170         struct pci_dev *pdev = to_pci_dev(hwdev);
3171         struct dmar_domain *domain;
3172         unsigned long start_pfn, last_pfn;
3173         struct iova *iova;
3174         struct intel_iommu *iommu;
3175         struct page *freelist;
3176
3177         if (iommu_no_mapping(hwdev))
3178                 return;
3179
3180         domain = find_domain(pdev);
3181         BUG_ON(!domain);
3182
3183         iommu = domain_get_iommu(domain);
3184
3185         iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
3186         if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3187                       (unsigned long long)sglist[0].dma_address))
3188                 return;
3189
3190         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3191         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3192
3193         freelist = domain_unmap(domain, start_pfn, last_pfn);
3194
3195         if (intel_iommu_strict) {
3196                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3197                                       last_pfn - start_pfn + 1, !freelist, 0);
3198                 /* free iova */
3199                 __free_iova(&domain->iovad, iova);
3200                 dma_free_pagelist(freelist);
3201         } else {
3202                 add_unmap(domain, iova, freelist);
3203                 /*
3204                  * queue up the release of the unmap to save the 1/6th of the
3205                  * cpu used up by the iotlb flush operation...
3206                  */
3207         }
3208 }
3209
3210 static int intel_nontranslate_map_sg(struct device *hddev,
3211         struct scatterlist *sglist, int nelems, int dir)
3212 {
3213         int i;
3214         struct scatterlist *sg;
3215
3216         for_each_sg(sglist, sg, nelems, i) {
3217                 BUG_ON(!sg_page(sg));
3218                 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3219                 sg->dma_length = sg->length;
3220         }
3221         return nelems;
3222 }
3223
3224 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3225                         enum dma_data_direction dir, struct dma_attrs *attrs)
3226 {
3227         int i;
3228         struct pci_dev *pdev = to_pci_dev(hwdev);
3229         struct dmar_domain *domain;
3230         size_t size = 0;
3231         int prot = 0;
3232         struct iova *iova = NULL;
3233         int ret;
3234         struct scatterlist *sg;
3235         unsigned long start_vpfn;
3236         struct intel_iommu *iommu;
3237
3238         BUG_ON(dir == DMA_NONE);
3239         if (iommu_no_mapping(hwdev))
3240                 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
3241
3242         domain = get_valid_domain_for_dev(pdev);
3243         if (!domain)
3244                 return 0;
3245
3246         iommu = domain_get_iommu(domain);
3247
3248         for_each_sg(sglist, sg, nelems, i)
3249                 size += aligned_nrpages(sg->offset, sg->length);
3250
3251         iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3252                                 pdev->dma_mask);
3253         if (!iova) {
3254                 sglist->dma_length = 0;
3255                 return 0;
3256         }
3257
3258         /*
3259          * Check if DMAR supports zero-length reads on write only
3260          * mappings..
3261          */
3262         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3263                         !cap_zlr(iommu->cap))
3264                 prot |= DMA_PTE_READ;
3265         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3266                 prot |= DMA_PTE_WRITE;
3267
3268         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3269
3270         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3271         if (unlikely(ret)) {
3272                 /*  clear the page */
3273                 dma_pte_clear_range(domain, start_vpfn,
3274                                     start_vpfn + size - 1);
3275                 /* free page tables */
3276                 dma_pte_free_pagetable(domain, start_vpfn,
3277                                        start_vpfn + size - 1);
3278                 /* free iova */
3279                 __free_iova(&domain->iovad, iova);
3280                 return 0;
3281         }
3282
3283         /* it's a non-present to present mapping. Only flush if caching mode */
3284         if (cap_caching_mode(iommu->cap))
3285                 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
3286         else
3287                 iommu_flush_write_buffer(iommu);
3288
3289         return nelems;
3290 }
3291
3292 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3293 {
3294         return !dma_addr;
3295 }
3296
3297 struct dma_map_ops intel_dma_ops = {
3298         .alloc = intel_alloc_coherent,
3299         .free = intel_free_coherent,
3300         .map_sg = intel_map_sg,
3301         .unmap_sg = intel_unmap_sg,
3302         .map_page = intel_map_page,
3303         .unmap_page = intel_unmap_page,
3304         .mapping_error = intel_mapping_error,
3305 };
3306
3307 static inline int iommu_domain_cache_init(void)
3308 {
3309         int ret = 0;
3310
3311         iommu_domain_cache = kmem_cache_create("iommu_domain",
3312                                          sizeof(struct dmar_domain),
3313                                          0,
3314                                          SLAB_HWCACHE_ALIGN,
3315
3316                                          NULL);
3317         if (!iommu_domain_cache) {
3318                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3319                 ret = -ENOMEM;
3320         }
3321
3322         return ret;
3323 }
3324
3325 static inline int iommu_devinfo_cache_init(void)
3326 {
3327         int ret = 0;
3328
3329         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3330                                          sizeof(struct device_domain_info),
3331                                          0,
3332                                          SLAB_HWCACHE_ALIGN,
3333                                          NULL);
3334         if (!iommu_devinfo_cache) {
3335                 printk(KERN_ERR "Couldn't create devinfo cache\n");
3336                 ret = -ENOMEM;
3337         }
3338
3339         return ret;
3340 }
3341
3342 static inline int iommu_iova_cache_init(void)
3343 {
3344         int ret = 0;
3345
3346         iommu_iova_cache = kmem_cache_create("iommu_iova",
3347                                          sizeof(struct iova),
3348                                          0,
3349                                          SLAB_HWCACHE_ALIGN,
3350                                          NULL);
3351         if (!iommu_iova_cache) {
3352                 printk(KERN_ERR "Couldn't create iova cache\n");
3353                 ret = -ENOMEM;
3354         }
3355
3356         return ret;
3357 }
3358
3359 static int __init iommu_init_mempool(void)
3360 {
3361         int ret;
3362         ret = iommu_iova_cache_init();
3363         if (ret)
3364                 return ret;
3365
3366         ret = iommu_domain_cache_init();
3367         if (ret)
3368                 goto domain_error;
3369
3370         ret = iommu_devinfo_cache_init();
3371         if (!ret)
3372                 return ret;
3373
3374         kmem_cache_destroy(iommu_domain_cache);
3375 domain_error:
3376         kmem_cache_destroy(iommu_iova_cache);
3377
3378         return -ENOMEM;
3379 }
3380
3381 static void __init iommu_exit_mempool(void)
3382 {
3383         kmem_cache_destroy(iommu_devinfo_cache);
3384         kmem_cache_destroy(iommu_domain_cache);
3385         kmem_cache_destroy(iommu_iova_cache);
3386
3387 }
3388
3389 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3390 {
3391         struct dmar_drhd_unit *drhd;
3392         u32 vtbar;
3393         int rc;
3394
3395         /* We know that this device on this chipset has its own IOMMU.
3396          * If we find it under a different IOMMU, then the BIOS is lying
3397          * to us. Hope that the IOMMU for this device is actually
3398          * disabled, and it needs no translation...
3399          */
3400         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3401         if (rc) {
3402                 /* "can't" happen */
3403                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3404                 return;
3405         }
3406         vtbar &= 0xffff0000;
3407
3408         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3409         drhd = dmar_find_matched_drhd_unit(pdev);
3410         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3411                             TAINT_FIRMWARE_WORKAROUND,
3412                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3413                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3414 }
3415 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3416
3417 static void __init init_no_remapping_devices(void)
3418 {
3419         struct dmar_drhd_unit *drhd;
3420         struct pci_dev *dev;
3421         int i;
3422
3423         for_each_drhd_unit(drhd) {
3424                 if (!drhd->include_all) {
3425                         for_each_active_dev_scope(drhd->devices,
3426                                                   drhd->devices_cnt, i, dev)
3427                                 break;
3428                         /* ignore DMAR unit if no pci devices exist */
3429                         if (i == drhd->devices_cnt)
3430                                 drhd->ignored = 1;
3431                 }
3432         }
3433
3434         for_each_active_drhd_unit(drhd) {
3435                 if (drhd->include_all)
3436                         continue;
3437
3438                 for_each_active_dev_scope(drhd->devices,
3439                                           drhd->devices_cnt, i, dev)
3440                         if (!IS_GFX_DEVICE(dev))
3441                                 break;
3442                 if (i < drhd->devices_cnt)
3443                         continue;
3444
3445                 /* This IOMMU has *only* gfx devices. Either bypass it or
3446                    set the gfx_mapped flag, as appropriate */
3447                 if (dmar_map_gfx) {
3448                         intel_iommu_gfx_mapped = 1;
3449                 } else {
3450                         drhd->ignored = 1;
3451                         for_each_active_dev_scope(drhd->devices,
3452                                                   drhd->devices_cnt, i, dev)
3453                                 dev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3454                 }
3455         }
3456 }
3457
3458 #ifdef CONFIG_SUSPEND
3459 static int init_iommu_hw(void)
3460 {
3461         struct dmar_drhd_unit *drhd;
3462         struct intel_iommu *iommu = NULL;
3463
3464         for_each_active_iommu(iommu, drhd)
3465                 if (iommu->qi)
3466                         dmar_reenable_qi(iommu);
3467
3468         for_each_iommu(iommu, drhd) {
3469                 if (drhd->ignored) {
3470                         /*
3471                          * we always have to disable PMRs or DMA may fail on
3472                          * this device
3473                          */
3474                         if (force_on)
3475                                 iommu_disable_protect_mem_regions(iommu);
3476                         continue;
3477                 }
3478         
3479                 iommu_flush_write_buffer(iommu);
3480
3481                 iommu_set_root_entry(iommu);
3482
3483                 iommu->flush.flush_context(iommu, 0, 0, 0,
3484                                            DMA_CCMD_GLOBAL_INVL);
3485                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3486                                          DMA_TLB_GLOBAL_FLUSH);
3487                 if (iommu_enable_translation(iommu))
3488                         return 1;
3489                 iommu_disable_protect_mem_regions(iommu);
3490         }
3491
3492         return 0;
3493 }
3494
3495 static void iommu_flush_all(void)
3496 {
3497         struct dmar_drhd_unit *drhd;
3498         struct intel_iommu *iommu;
3499
3500         for_each_active_iommu(iommu, drhd) {
3501                 iommu->flush.flush_context(iommu, 0, 0, 0,
3502                                            DMA_CCMD_GLOBAL_INVL);
3503                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3504                                          DMA_TLB_GLOBAL_FLUSH);
3505         }
3506 }
3507
3508 static int iommu_suspend(void)
3509 {
3510         struct dmar_drhd_unit *drhd;
3511         struct intel_iommu *iommu = NULL;
3512         unsigned long flag;
3513
3514         for_each_active_iommu(iommu, drhd) {
3515                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3516                                                  GFP_ATOMIC);
3517                 if (!iommu->iommu_state)
3518                         goto nomem;
3519         }
3520
3521         iommu_flush_all();
3522
3523         for_each_active_iommu(iommu, drhd) {
3524                 iommu_disable_translation(iommu);
3525
3526                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3527
3528                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3529                         readl(iommu->reg + DMAR_FECTL_REG);
3530                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3531                         readl(iommu->reg + DMAR_FEDATA_REG);
3532                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3533                         readl(iommu->reg + DMAR_FEADDR_REG);
3534                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3535                         readl(iommu->reg + DMAR_FEUADDR_REG);
3536
3537                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3538         }
3539         return 0;
3540
3541 nomem:
3542         for_each_active_iommu(iommu, drhd)
3543                 kfree(iommu->iommu_state);
3544
3545         return -ENOMEM;
3546 }
3547
3548 static void iommu_resume(void)
3549 {
3550         struct dmar_drhd_unit *drhd;
3551         struct intel_iommu *iommu = NULL;
3552         unsigned long flag;
3553
3554         if (init_iommu_hw()) {
3555                 if (force_on)
3556                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3557                 else
3558                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3559                 return;
3560         }
3561
3562         for_each_active_iommu(iommu, drhd) {
3563
3564                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3565
3566                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3567                         iommu->reg + DMAR_FECTL_REG);
3568                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3569                         iommu->reg + DMAR_FEDATA_REG);
3570                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3571                         iommu->reg + DMAR_FEADDR_REG);
3572                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3573                         iommu->reg + DMAR_FEUADDR_REG);
3574
3575                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3576         }
3577
3578         for_each_active_iommu(iommu, drhd)
3579                 kfree(iommu->iommu_state);
3580 }
3581
3582 static struct syscore_ops iommu_syscore_ops = {
3583         .resume         = iommu_resume,
3584         .suspend        = iommu_suspend,
3585 };
3586
3587 static void __init init_iommu_pm_ops(void)
3588 {
3589         register_syscore_ops(&iommu_syscore_ops);
3590 }
3591
3592 #else
3593 static inline void init_iommu_pm_ops(void) {}
3594 #endif  /* CONFIG_PM */
3595
3596
3597 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3598 {
3599         struct acpi_dmar_reserved_memory *rmrr;
3600         struct dmar_rmrr_unit *rmrru;
3601
3602         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3603         if (!rmrru)
3604                 return -ENOMEM;
3605
3606         rmrru->hdr = header;
3607         rmrr = (struct acpi_dmar_reserved_memory *)header;
3608         rmrru->base_address = rmrr->base_address;
3609         rmrru->end_address = rmrr->end_address;
3610         rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3611                                 ((void *)rmrr) + rmrr->header.length,
3612                                 &rmrru->devices_cnt);
3613         if (rmrru->devices_cnt && rmrru->devices == NULL) {
3614                 kfree(rmrru);
3615                 return -ENOMEM;
3616         }
3617
3618         list_add(&rmrru->list, &dmar_rmrr_units);
3619
3620         return 0;
3621 }
3622
3623 int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3624 {
3625         struct acpi_dmar_atsr *atsr;
3626         struct dmar_atsr_unit *atsru;
3627
3628         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3629         atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3630         if (!atsru)
3631                 return -ENOMEM;
3632
3633         atsru->hdr = hdr;
3634         atsru->include_all = atsr->flags & 0x1;
3635         if (!atsru->include_all) {
3636                 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3637                                 (void *)atsr + atsr->header.length,
3638                                 &atsru->devices_cnt);
3639                 if (atsru->devices_cnt && atsru->devices == NULL) {
3640                         kfree(atsru);
3641                         return -ENOMEM;
3642                 }
3643         }
3644
3645         list_add_rcu(&atsru->list, &dmar_atsr_units);
3646
3647         return 0;
3648 }
3649
3650 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3651 {
3652         dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3653         kfree(atsru);
3654 }
3655
3656 static void intel_iommu_free_dmars(void)
3657 {
3658         struct dmar_rmrr_unit *rmrru, *rmrr_n;
3659         struct dmar_atsr_unit *atsru, *atsr_n;
3660
3661         list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3662                 list_del(&rmrru->list);
3663                 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3664                 kfree(rmrru);
3665         }
3666
3667         list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3668                 list_del(&atsru->list);
3669                 intel_iommu_free_atsr(atsru);
3670         }
3671 }
3672
3673 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3674 {
3675         int i, ret = 1;
3676         struct pci_bus *bus;
3677         struct pci_dev *bridge = NULL, *tmp;
3678         struct acpi_dmar_atsr *atsr;
3679         struct dmar_atsr_unit *atsru;
3680
3681         dev = pci_physfn(dev);
3682         for (bus = dev->bus; bus; bus = bus->parent) {
3683                 bridge = bus->self;
3684                 if (!bridge || !pci_is_pcie(bridge) ||
3685                     pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3686                         return 0;
3687                 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3688                         break;
3689         }
3690         if (!bridge)
3691                 return 0;
3692
3693         rcu_read_lock();
3694         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3695                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3696                 if (atsr->segment != pci_domain_nr(dev->bus))
3697                         continue;
3698
3699                 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3700                         if (tmp == bridge)
3701                                 goto out;
3702
3703                 if (atsru->include_all)
3704                         goto out;
3705         }
3706         ret = 0;
3707 out:
3708         rcu_read_unlock();
3709
3710         return ret;
3711 }
3712
3713 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3714 {
3715         int ret = 0;
3716         struct dmar_rmrr_unit *rmrru;
3717         struct dmar_atsr_unit *atsru;
3718         struct acpi_dmar_atsr *atsr;
3719         struct acpi_dmar_reserved_memory *rmrr;
3720
3721         if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3722                 return 0;
3723
3724         list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3725                 rmrr = container_of(rmrru->hdr,
3726                                     struct acpi_dmar_reserved_memory, header);
3727                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3728                         ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3729                                 ((void *)rmrr) + rmrr->header.length,
3730                                 rmrr->segment, rmrru->devices,
3731                                 rmrru->devices_cnt);
3732                         if (ret > 0)
3733                                 break;
3734                         else if(ret < 0)
3735                                 return ret;
3736                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3737                         if (dmar_remove_dev_scope(info, rmrr->segment,
3738                                 rmrru->devices, rmrru->devices_cnt))
3739                                 break;
3740                 }
3741         }
3742
3743         list_for_each_entry(atsru, &dmar_atsr_units, list) {
3744                 if (atsru->include_all)
3745                         continue;
3746
3747                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3748                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3749                         ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3750                                         (void *)atsr + atsr->header.length,
3751                                         atsr->segment, atsru->devices,
3752                                         atsru->devices_cnt);
3753                         if (ret > 0)
3754                                 break;
3755                         else if(ret < 0)
3756                                 return ret;
3757                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3758                         if (dmar_remove_dev_scope(info, atsr->segment,
3759                                         atsru->devices, atsru->devices_cnt))
3760                                 break;
3761                 }
3762         }
3763
3764         return 0;
3765 }
3766
3767 /*
3768  * Here we only respond to action of unbound device from driver.
3769  *
3770  * Added device is not attached to its DMAR domain here yet. That will happen
3771  * when mapping the device to iova.
3772  */
3773 static int device_notifier(struct notifier_block *nb,
3774                                   unsigned long action, void *data)
3775 {
3776         struct device *dev = data;
3777         struct pci_dev *pdev = to_pci_dev(dev);
3778         struct dmar_domain *domain;
3779
3780         if (iommu_dummy(pdev))
3781                 return 0;
3782
3783         if (action != BUS_NOTIFY_UNBOUND_DRIVER &&
3784             action != BUS_NOTIFY_DEL_DEVICE)
3785                 return 0;
3786
3787         domain = find_domain(pdev);
3788         if (!domain)
3789                 return 0;
3790
3791         down_read(&dmar_global_lock);
3792         domain_remove_one_dev_info(domain, pdev);
3793         if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3794             !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3795             list_empty(&domain->devices))
3796                 domain_exit(domain);
3797         up_read(&dmar_global_lock);
3798
3799         return 0;
3800 }
3801
3802 static struct notifier_block device_nb = {
3803         .notifier_call = device_notifier,
3804 };
3805
3806 static int intel_iommu_memory_notifier(struct notifier_block *nb,
3807                                        unsigned long val, void *v)
3808 {
3809         struct memory_notify *mhp = v;
3810         unsigned long long start, end;
3811         unsigned long start_vpfn, last_vpfn;
3812
3813         switch (val) {
3814         case MEM_GOING_ONLINE:
3815                 start = mhp->start_pfn << PAGE_SHIFT;
3816                 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
3817                 if (iommu_domain_identity_map(si_domain, start, end)) {
3818                         pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
3819                                 start, end);
3820                         return NOTIFY_BAD;
3821                 }
3822                 break;
3823
3824         case MEM_OFFLINE:
3825         case MEM_CANCEL_ONLINE:
3826                 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
3827                 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
3828                 while (start_vpfn <= last_vpfn) {
3829                         struct iova *iova;
3830                         struct dmar_drhd_unit *drhd;
3831                         struct intel_iommu *iommu;
3832                         struct page *freelist;
3833
3834                         iova = find_iova(&si_domain->iovad, start_vpfn);
3835                         if (iova == NULL) {
3836                                 pr_debug("dmar: failed get IOVA for PFN %lx\n",
3837                                          start_vpfn);
3838                                 break;
3839                         }
3840
3841                         iova = split_and_remove_iova(&si_domain->iovad, iova,
3842                                                      start_vpfn, last_vpfn);
3843                         if (iova == NULL) {
3844                                 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
3845                                         start_vpfn, last_vpfn);
3846                                 return NOTIFY_BAD;
3847                         }
3848
3849                         freelist = domain_unmap(si_domain, iova->pfn_lo,
3850                                                iova->pfn_hi);
3851
3852                         rcu_read_lock();
3853                         for_each_active_iommu(iommu, drhd)
3854                                 iommu_flush_iotlb_psi(iommu, si_domain->id,
3855                                         iova->pfn_lo,
3856                                         iova->pfn_hi - iova->pfn_lo + 1,
3857                                         !freelist, 0);
3858                         rcu_read_unlock();
3859                         dma_free_pagelist(freelist);
3860
3861                         start_vpfn = iova->pfn_hi + 1;
3862                         free_iova_mem(iova);
3863                 }
3864                 break;
3865         }
3866
3867         return NOTIFY_OK;
3868 }
3869
3870 static struct notifier_block intel_iommu_memory_nb = {
3871         .notifier_call = intel_iommu_memory_notifier,
3872         .priority = 0
3873 };
3874
3875 int __init intel_iommu_init(void)
3876 {
3877         int ret = -ENODEV;
3878         struct dmar_drhd_unit *drhd;
3879         struct intel_iommu *iommu;
3880
3881         /* VT-d is required for a TXT/tboot launch, so enforce that */
3882         force_on = tboot_force_iommu();
3883
3884         if (iommu_init_mempool()) {
3885                 if (force_on)
3886                         panic("tboot: Failed to initialize iommu memory\n");
3887                 return -ENOMEM;
3888         }
3889
3890         down_write(&dmar_global_lock);
3891         if (dmar_table_init()) {
3892                 if (force_on)
3893                         panic("tboot: Failed to initialize DMAR table\n");
3894                 goto out_free_dmar;
3895         }
3896
3897         /*
3898          * Disable translation if already enabled prior to OS handover.
3899          */
3900         for_each_active_iommu(iommu, drhd)
3901                 if (iommu->gcmd & DMA_GCMD_TE)
3902                         iommu_disable_translation(iommu);
3903
3904         if (dmar_dev_scope_init() < 0) {
3905                 if (force_on)
3906                         panic("tboot: Failed to initialize DMAR device scope\n");
3907                 goto out_free_dmar;
3908         }
3909
3910         if (no_iommu || dmar_disabled)
3911                 goto out_free_dmar;
3912
3913         if (list_empty(&dmar_rmrr_units))
3914                 printk(KERN_INFO "DMAR: No RMRR found\n");
3915
3916         if (list_empty(&dmar_atsr_units))
3917                 printk(KERN_INFO "DMAR: No ATSR found\n");
3918
3919         if (dmar_init_reserved_ranges()) {
3920                 if (force_on)
3921                         panic("tboot: Failed to reserve iommu ranges\n");
3922                 goto out_free_reserved_range;
3923         }
3924
3925         init_no_remapping_devices();
3926
3927         ret = init_dmars();
3928         if (ret) {
3929                 if (force_on)
3930                         panic("tboot: Failed to initialize DMARs\n");
3931                 printk(KERN_ERR "IOMMU: dmar init failed\n");
3932                 goto out_free_reserved_range;
3933         }
3934         up_write(&dmar_global_lock);
3935         printk(KERN_INFO
3936         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3937
3938         init_timer(&unmap_timer);
3939 #ifdef CONFIG_SWIOTLB
3940         swiotlb = 0;
3941 #endif
3942         dma_ops = &intel_dma_ops;
3943
3944         init_iommu_pm_ops();
3945
3946         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
3947         bus_register_notifier(&pci_bus_type, &device_nb);
3948         if (si_domain && !hw_pass_through)
3949                 register_memory_notifier(&intel_iommu_memory_nb);
3950
3951         intel_iommu_enabled = 1;
3952
3953         return 0;
3954
3955 out_free_reserved_range:
3956         put_iova_domain(&reserved_iova_list);
3957 out_free_dmar:
3958         intel_iommu_free_dmars();
3959         up_write(&dmar_global_lock);
3960         iommu_exit_mempool();
3961         return ret;
3962 }
3963
3964 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3965                                            struct pci_dev *pdev)
3966 {
3967         struct pci_dev *tmp, *parent;
3968
3969         if (!iommu || !pdev)
3970                 return;
3971
3972         /* dependent device detach */
3973         tmp = pci_find_upstream_pcie_bridge(pdev);
3974         /* Secondary interface's bus number and devfn 0 */
3975         if (tmp) {
3976                 parent = pdev->bus->self;
3977                 while (parent != tmp) {
3978                         iommu_detach_dev(iommu, parent->bus->number,
3979                                          parent->devfn);
3980                         parent = parent->bus->self;
3981                 }
3982                 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3983                         iommu_detach_dev(iommu,
3984                                 tmp->subordinate->number, 0);
3985                 else /* this is a legacy PCI bridge */
3986                         iommu_detach_dev(iommu, tmp->bus->number,
3987                                          tmp->devfn);
3988         }
3989 }
3990
3991 static void domain_remove_one_dev_info(struct dmar_domain *domain,
3992                                           struct pci_dev *pdev)
3993 {
3994         struct device_domain_info *info, *tmp;
3995         struct intel_iommu *iommu;
3996         unsigned long flags;
3997         int found = 0;
3998
3999         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
4000                                 pdev->devfn);
4001         if (!iommu)
4002                 return;
4003
4004         spin_lock_irqsave(&device_domain_lock, flags);
4005         list_for_each_entry_safe(info, tmp, &domain->devices, link) {
4006                 if (info->segment == pci_domain_nr(pdev->bus) &&
4007                     info->bus == pdev->bus->number &&
4008                     info->devfn == pdev->devfn) {
4009                         unlink_domain_info(info);
4010                         spin_unlock_irqrestore(&device_domain_lock, flags);
4011
4012                         iommu_disable_dev_iotlb(info);
4013                         iommu_detach_dev(iommu, info->bus, info->devfn);
4014                         iommu_detach_dependent_devices(iommu, pdev);
4015                         free_devinfo_mem(info);
4016
4017                         spin_lock_irqsave(&device_domain_lock, flags);
4018
4019                         if (found)
4020                                 break;
4021                         else
4022                                 continue;
4023                 }
4024
4025                 /* if there is no other devices under the same iommu
4026                  * owned by this domain, clear this iommu in iommu_bmp
4027                  * update iommu count and coherency
4028                  */
4029                 if (iommu == device_to_iommu(info->segment, info->bus,
4030                                             info->devfn))
4031                         found = 1;
4032         }
4033
4034         spin_unlock_irqrestore(&device_domain_lock, flags);
4035
4036         if (found == 0) {
4037                 unsigned long tmp_flags;
4038                 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
4039                 clear_bit(iommu->seq_id, domain->iommu_bmp);
4040                 domain->iommu_count--;
4041                 domain_update_iommu_cap(domain);
4042                 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
4043
4044                 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
4045                     !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
4046                         spin_lock_irqsave(&iommu->lock, tmp_flags);
4047                         clear_bit(domain->id, iommu->domain_ids);
4048                         iommu->domains[domain->id] = NULL;
4049                         spin_unlock_irqrestore(&iommu->lock, tmp_flags);
4050                 }
4051         }
4052 }
4053
4054 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4055 {
4056         int adjust_width;
4057
4058         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
4059         domain_reserve_special_ranges(domain);
4060
4061         /* calculate AGAW */
4062         domain->gaw = guest_width;
4063         adjust_width = guestwidth_to_adjustwidth(guest_width);
4064         domain->agaw = width_to_agaw(adjust_width);
4065
4066         domain->iommu_coherency = 0;
4067         domain->iommu_snooping = 0;
4068         domain->iommu_superpage = 0;
4069         domain->max_addr = 0;
4070         domain->nid = -1;
4071
4072         /* always allocate the top pgd */
4073         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4074         if (!domain->pgd)
4075                 return -ENOMEM;
4076         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4077         return 0;
4078 }
4079
4080 static int intel_iommu_domain_init(struct iommu_domain *domain)
4081 {
4082         struct dmar_domain *dmar_domain;
4083
4084         dmar_domain = alloc_domain(true);
4085         if (!dmar_domain) {
4086                 printk(KERN_ERR
4087                         "intel_iommu_domain_init: dmar_domain == NULL\n");
4088                 return -ENOMEM;
4089         }
4090         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4091                 printk(KERN_ERR
4092                         "intel_iommu_domain_init() failed\n");
4093                 domain_exit(dmar_domain);
4094                 return -ENOMEM;
4095         }
4096         domain_update_iommu_cap(dmar_domain);
4097         domain->priv = dmar_domain;
4098
4099         domain->geometry.aperture_start = 0;
4100         domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4101         domain->geometry.force_aperture = true;
4102
4103         return 0;
4104 }
4105
4106 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
4107 {
4108         struct dmar_domain *dmar_domain = domain->priv;
4109
4110         domain->priv = NULL;
4111         domain_exit(dmar_domain);
4112 }
4113
4114 static int intel_iommu_attach_device(struct iommu_domain *domain,
4115                                      struct device *dev)
4116 {
4117         struct dmar_domain *dmar_domain = domain->priv;
4118         struct pci_dev *pdev = to_pci_dev(dev);
4119         struct intel_iommu *iommu;
4120         int addr_width;
4121
4122         /* normally pdev is not mapped */
4123         if (unlikely(domain_context_mapped(pdev))) {
4124                 struct dmar_domain *old_domain;
4125
4126                 old_domain = find_domain(pdev);
4127                 if (old_domain) {
4128                         if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
4129                             dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
4130                                 domain_remove_one_dev_info(old_domain, pdev);
4131                         else
4132                                 domain_remove_dev_info(old_domain);
4133                 }
4134         }
4135
4136         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
4137                                 pdev->devfn);
4138         if (!iommu)
4139                 return -ENODEV;
4140
4141         /* check if this iommu agaw is sufficient for max mapped address */
4142         addr_width = agaw_to_width(iommu->agaw);
4143         if (addr_width > cap_mgaw(iommu->cap))
4144                 addr_width = cap_mgaw(iommu->cap);
4145
4146         if (dmar_domain->max_addr > (1LL << addr_width)) {
4147                 printk(KERN_ERR "%s: iommu width (%d) is not "
4148                        "sufficient for the mapped address (%llx)\n",
4149                        __func__, addr_width, dmar_domain->max_addr);
4150                 return -EFAULT;
4151         }
4152         dmar_domain->gaw = addr_width;
4153
4154         /*
4155          * Knock out extra levels of page tables if necessary
4156          */
4157         while (iommu->agaw < dmar_domain->agaw) {
4158                 struct dma_pte *pte;
4159
4160                 pte = dmar_domain->pgd;
4161                 if (dma_pte_present(pte)) {
4162                         dmar_domain->pgd = (struct dma_pte *)
4163                                 phys_to_virt(dma_pte_addr(pte));
4164                         free_pgtable_page(pte);
4165                 }
4166                 dmar_domain->agaw--;
4167         }
4168
4169         return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
4170 }
4171
4172 static void intel_iommu_detach_device(struct iommu_domain *domain,
4173                                       struct device *dev)
4174 {
4175         struct dmar_domain *dmar_domain = domain->priv;
4176         struct pci_dev *pdev = to_pci_dev(dev);
4177
4178         domain_remove_one_dev_info(dmar_domain, pdev);
4179 }
4180
4181 static int intel_iommu_map(struct iommu_domain *domain,
4182                            unsigned long iova, phys_addr_t hpa,
4183                            size_t size, int iommu_prot)
4184 {
4185         struct dmar_domain *dmar_domain = domain->priv;
4186         u64 max_addr;
4187         int prot = 0;
4188         int ret;
4189
4190         if (iommu_prot & IOMMU_READ)
4191                 prot |= DMA_PTE_READ;
4192         if (iommu_prot & IOMMU_WRITE)
4193                 prot |= DMA_PTE_WRITE;
4194         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4195                 prot |= DMA_PTE_SNP;
4196
4197         max_addr = iova + size;
4198         if (dmar_domain->max_addr < max_addr) {
4199                 u64 end;
4200
4201                 /* check if minimum agaw is sufficient for mapped address */
4202                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4203                 if (end < max_addr) {
4204                         printk(KERN_ERR "%s: iommu width (%d) is not "
4205                                "sufficient for the mapped address (%llx)\n",
4206                                __func__, dmar_domain->gaw, max_addr);
4207                         return -EFAULT;
4208                 }
4209                 dmar_domain->max_addr = max_addr;
4210         }
4211         /* Round up size to next multiple of PAGE_SIZE, if it and
4212            the low bits of hpa would take us onto the next page */
4213         size = aligned_nrpages(hpa, size);
4214         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4215                                  hpa >> VTD_PAGE_SHIFT, size, prot);
4216         return ret;
4217 }
4218
4219 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4220                                 unsigned long iova, size_t size)
4221 {
4222         struct dmar_domain *dmar_domain = domain->priv;
4223         struct page *freelist = NULL;
4224         struct intel_iommu *iommu;
4225         unsigned long start_pfn, last_pfn;
4226         unsigned int npages;
4227         int iommu_id, num, ndomains, level = 0;
4228
4229         /* Cope with horrid API which requires us to unmap more than the
4230            size argument if it happens to be a large-page mapping. */
4231         if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4232                 BUG();
4233
4234         if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4235                 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4236
4237         start_pfn = iova >> VTD_PAGE_SHIFT;
4238         last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4239
4240         freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4241
4242         npages = last_pfn - start_pfn + 1;
4243
4244         for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4245                iommu = g_iommus[iommu_id];
4246
4247                /*
4248                 * find bit position of dmar_domain
4249                 */
4250                ndomains = cap_ndoms(iommu->cap);
4251                for_each_set_bit(num, iommu->domain_ids, ndomains) {
4252                        if (iommu->domains[num] == dmar_domain)
4253                                iommu_flush_iotlb_psi(iommu, num, start_pfn,
4254                                                      npages, !freelist, 0);
4255                }
4256
4257         }
4258
4259         dma_free_pagelist(freelist);
4260
4261         if (dmar_domain->max_addr == iova + size)
4262                 dmar_domain->max_addr = iova;
4263
4264         return size;
4265 }
4266
4267 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4268                                             dma_addr_t iova)
4269 {
4270         struct dmar_domain *dmar_domain = domain->priv;
4271         struct dma_pte *pte;
4272         int level = 0;
4273         u64 phys = 0;
4274
4275         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4276         if (pte)
4277                 phys = dma_pte_addr(pte);
4278
4279         return phys;
4280 }
4281
4282 static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4283                                       unsigned long cap)
4284 {
4285         struct dmar_domain *dmar_domain = domain->priv;
4286
4287         if (cap == IOMMU_CAP_CACHE_COHERENCY)
4288                 return dmar_domain->iommu_snooping;
4289         if (cap == IOMMU_CAP_INTR_REMAP)
4290                 return irq_remapping_enabled;
4291
4292         return 0;
4293 }
4294
4295 #define REQ_ACS_FLAGS   (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
4296
4297 static int intel_iommu_add_device(struct device *dev)
4298 {
4299         struct pci_dev *pdev = to_pci_dev(dev);
4300         struct pci_dev *bridge, *dma_pdev = NULL;
4301         struct iommu_group *group;
4302         int ret;
4303
4304         if (!device_to_iommu(pci_domain_nr(pdev->bus),
4305                              pdev->bus->number, pdev->devfn))
4306                 return -ENODEV;
4307
4308         bridge = pci_find_upstream_pcie_bridge(pdev);
4309         if (bridge) {
4310                 if (pci_is_pcie(bridge))
4311                         dma_pdev = pci_get_domain_bus_and_slot(
4312                                                 pci_domain_nr(pdev->bus),
4313                                                 bridge->subordinate->number, 0);
4314                 if (!dma_pdev)
4315                         dma_pdev = pci_dev_get(bridge);
4316         } else
4317                 dma_pdev = pci_dev_get(pdev);
4318
4319         /* Account for quirked devices */
4320         swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
4321
4322         /*
4323          * If it's a multifunction device that does not support our
4324          * required ACS flags, add to the same group as lowest numbered
4325          * function that also does not suport the required ACS flags.
4326          */
4327         if (dma_pdev->multifunction &&
4328             !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) {
4329                 u8 i, slot = PCI_SLOT(dma_pdev->devfn);
4330
4331                 for (i = 0; i < 8; i++) {
4332                         struct pci_dev *tmp;
4333
4334                         tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i));
4335                         if (!tmp)
4336                                 continue;
4337
4338                         if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) {
4339                                 swap_pci_ref(&dma_pdev, tmp);
4340                                 break;
4341                         }
4342                         pci_dev_put(tmp);
4343                 }
4344         }
4345
4346         /*
4347          * Devices on the root bus go through the iommu.  If that's not us,
4348          * find the next upstream device and test ACS up to the root bus.
4349          * Finding the next device may require skipping virtual buses.
4350          */
4351         while (!pci_is_root_bus(dma_pdev->bus)) {
4352                 struct pci_bus *bus = dma_pdev->bus;
4353
4354                 while (!bus->self) {
4355                         if (!pci_is_root_bus(bus))
4356                                 bus = bus->parent;
4357                         else
4358                                 goto root_bus;
4359                 }
4360
4361                 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
4362                         break;
4363
4364                 swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
4365         }
4366
4367 root_bus:
4368         group = iommu_group_get(&dma_pdev->dev);
4369         pci_dev_put(dma_pdev);
4370         if (!group) {
4371                 group = iommu_group_alloc();
4372                 if (IS_ERR(group))
4373                         return PTR_ERR(group);
4374         }
4375
4376         ret = iommu_group_add_device(group, dev);
4377
4378         iommu_group_put(group);
4379         return ret;
4380 }
4381
4382 static void intel_iommu_remove_device(struct device *dev)
4383 {
4384         iommu_group_remove_device(dev);
4385 }
4386
4387 static struct iommu_ops intel_iommu_ops = {
4388         .domain_init    = intel_iommu_domain_init,
4389         .domain_destroy = intel_iommu_domain_destroy,
4390         .attach_dev     = intel_iommu_attach_device,
4391         .detach_dev     = intel_iommu_detach_device,
4392         .map            = intel_iommu_map,
4393         .unmap          = intel_iommu_unmap,
4394         .iova_to_phys   = intel_iommu_iova_to_phys,
4395         .domain_has_cap = intel_iommu_domain_has_cap,
4396         .add_device     = intel_iommu_add_device,
4397         .remove_device  = intel_iommu_remove_device,
4398         .pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
4399 };
4400
4401 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4402 {
4403         /* G4x/GM45 integrated gfx dmar support is totally busted. */
4404         printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4405         dmar_map_gfx = 0;
4406 }
4407
4408 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4409 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4410 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4411 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4412 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4413 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4414 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4415
4416 static void quirk_iommu_rwbf(struct pci_dev *dev)
4417 {
4418         /*
4419          * Mobile 4 Series Chipset neglects to set RWBF capability,
4420          * but needs it. Same seems to hold for the desktop versions.
4421          */
4422         printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4423         rwbf_quirk = 1;
4424 }
4425
4426 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4427 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4428 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4429 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4430 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4431 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4432 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4433
4434 #define GGC 0x52
4435 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
4436 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
4437 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
4438 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
4439 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
4440 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
4441 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
4442 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
4443
4444 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4445 {
4446         unsigned short ggc;
4447
4448         if (pci_read_config_word(dev, GGC, &ggc))
4449                 return;
4450
4451         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4452                 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4453                 dmar_map_gfx = 0;
4454         } else if (dmar_map_gfx) {
4455                 /* we have to ensure the gfx device is idle before we flush */
4456                 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4457                 intel_iommu_strict = 1;
4458        }
4459 }
4460 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4461 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4462 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4463 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4464
4465 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4466    ISOCH DMAR unit for the Azalia sound device, but not give it any
4467    TLB entries, which causes it to deadlock. Check for that.  We do
4468    this in a function called from init_dmars(), instead of in a PCI
4469    quirk, because we don't want to print the obnoxious "BIOS broken"
4470    message if VT-d is actually disabled.
4471 */
4472 static void __init check_tylersburg_isoch(void)
4473 {
4474         struct pci_dev *pdev;
4475         uint32_t vtisochctrl;
4476
4477         /* If there's no Azalia in the system anyway, forget it. */
4478         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4479         if (!pdev)
4480                 return;
4481         pci_dev_put(pdev);
4482
4483         /* System Management Registers. Might be hidden, in which case
4484            we can't do the sanity check. But that's OK, because the
4485            known-broken BIOSes _don't_ actually hide it, so far. */
4486         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4487         if (!pdev)
4488                 return;
4489
4490         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4491                 pci_dev_put(pdev);
4492                 return;
4493         }
4494
4495         pci_dev_put(pdev);
4496
4497         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4498         if (vtisochctrl & 1)
4499                 return;
4500
4501         /* Drop all bits other than the number of TLB entries */
4502         vtisochctrl &= 0x1c;
4503
4504         /* If we have the recommended number of TLB entries (16), fine. */
4505         if (vtisochctrl == 0x10)
4506                 return;
4507
4508         /* Zero TLB entries? You get to ride the short bus to school. */
4509         if (!vtisochctrl) {
4510                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4511                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4512                      dmi_get_system_info(DMI_BIOS_VENDOR),
4513                      dmi_get_system_info(DMI_BIOS_VERSION),
4514                      dmi_get_system_info(DMI_PRODUCT_VERSION));
4515                 iommu_identity_mapping |= IDENTMAP_AZALIA;
4516                 return;
4517         }
4518         
4519         printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4520                vtisochctrl);
4521 }