]> git.karo-electronics.de Git - mv-sheeva.git/blob - drivers/pci/intel-iommu.c
e6fe1994f9d348d6c78688f6c2ec9d9cfbf32468
[mv-sheeva.git] / drivers / pci / intel-iommu.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  * Author: Fenghua Yu <fenghua.yu@intel.com>
22  */
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/iommu.h>
38 #include <linux/intel-iommu.h>
39 #include <linux/syscore_ops.h>
40 #include <linux/tboot.h>
41 #include <linux/dmi.h>
42 #include <asm/cacheflush.h>
43 #include <asm/iommu.h>
44 #include "pci.h"
45
46 #define ROOT_SIZE               VTD_PAGE_SIZE
47 #define CONTEXT_SIZE            VTD_PAGE_SIZE
48
49 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
50 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
51 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
52
53 #define IOAPIC_RANGE_START      (0xfee00000)
54 #define IOAPIC_RANGE_END        (0xfeefffff)
55 #define IOVA_START_ADDR         (0x1000)
56
57 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
58
59 #define MAX_AGAW_WIDTH 64
60
61 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
62 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
63
64 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
65    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
66 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
67                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
68 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
69
70 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
71 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
72 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
73
74 /* page table handling */
75 #define LEVEL_STRIDE            (9)
76 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
77
78 static inline int agaw_to_level(int agaw)
79 {
80         return agaw + 2;
81 }
82
83 static inline int agaw_to_width(int agaw)
84 {
85         return 30 + agaw * LEVEL_STRIDE;
86 }
87
88 static inline int width_to_agaw(int width)
89 {
90         return (width - 30) / LEVEL_STRIDE;
91 }
92
93 static inline unsigned int level_to_offset_bits(int level)
94 {
95         return (level - 1) * LEVEL_STRIDE;
96 }
97
98 static inline int pfn_level_offset(unsigned long pfn, int level)
99 {
100         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
101 }
102
103 static inline unsigned long level_mask(int level)
104 {
105         return -1UL << level_to_offset_bits(level);
106 }
107
108 static inline unsigned long level_size(int level)
109 {
110         return 1UL << level_to_offset_bits(level);
111 }
112
113 static inline unsigned long align_to_level(unsigned long pfn, int level)
114 {
115         return (pfn + level_size(level) - 1) & level_mask(level);
116 }
117
118 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
119 {
120         return  1 << ((lvl - 1) * LEVEL_STRIDE);
121 }
122
123 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
124    are never going to work. */
125 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
126 {
127         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
128 }
129
130 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
131 {
132         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
133 }
134 static inline unsigned long page_to_dma_pfn(struct page *pg)
135 {
136         return mm_to_dma_pfn(page_to_pfn(pg));
137 }
138 static inline unsigned long virt_to_dma_pfn(void *p)
139 {
140         return page_to_dma_pfn(virt_to_page(p));
141 }
142
143 /* global iommu list, set NULL for ignored DMAR units */
144 static struct intel_iommu **g_iommus;
145
146 static void __init check_tylersburg_isoch(void);
147 static int rwbf_quirk;
148
149 /*
150  * set to 1 to panic kernel if can't successfully enable VT-d
151  * (used when kernel is launched w/ TXT)
152  */
153 static int force_on = 0;
154
155 /*
156  * 0: Present
157  * 1-11: Reserved
158  * 12-63: Context Ptr (12 - (haw-1))
159  * 64-127: Reserved
160  */
161 struct root_entry {
162         u64     val;
163         u64     rsvd1;
164 };
165 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
166 static inline bool root_present(struct root_entry *root)
167 {
168         return (root->val & 1);
169 }
170 static inline void set_root_present(struct root_entry *root)
171 {
172         root->val |= 1;
173 }
174 static inline void set_root_value(struct root_entry *root, unsigned long value)
175 {
176         root->val |= value & VTD_PAGE_MASK;
177 }
178
179 static inline struct context_entry *
180 get_context_addr_from_root(struct root_entry *root)
181 {
182         return (struct context_entry *)
183                 (root_present(root)?phys_to_virt(
184                 root->val & VTD_PAGE_MASK) :
185                 NULL);
186 }
187
188 /*
189  * low 64 bits:
190  * 0: present
191  * 1: fault processing disable
192  * 2-3: translation type
193  * 12-63: address space root
194  * high 64 bits:
195  * 0-2: address width
196  * 3-6: aval
197  * 8-23: domain id
198  */
199 struct context_entry {
200         u64 lo;
201         u64 hi;
202 };
203
204 static inline bool context_present(struct context_entry *context)
205 {
206         return (context->lo & 1);
207 }
208 static inline void context_set_present(struct context_entry *context)
209 {
210         context->lo |= 1;
211 }
212
213 static inline void context_set_fault_enable(struct context_entry *context)
214 {
215         context->lo &= (((u64)-1) << 2) | 1;
216 }
217
218 static inline void context_set_translation_type(struct context_entry *context,
219                                                 unsigned long value)
220 {
221         context->lo &= (((u64)-1) << 4) | 3;
222         context->lo |= (value & 3) << 2;
223 }
224
225 static inline void context_set_address_root(struct context_entry *context,
226                                             unsigned long value)
227 {
228         context->lo |= value & VTD_PAGE_MASK;
229 }
230
231 static inline void context_set_address_width(struct context_entry *context,
232                                              unsigned long value)
233 {
234         context->hi |= value & 7;
235 }
236
237 static inline void context_set_domain_id(struct context_entry *context,
238                                          unsigned long value)
239 {
240         context->hi |= (value & ((1 << 16) - 1)) << 8;
241 }
242
243 static inline void context_clear_entry(struct context_entry *context)
244 {
245         context->lo = 0;
246         context->hi = 0;
247 }
248
249 /*
250  * 0: readable
251  * 1: writable
252  * 2-6: reserved
253  * 7: super page
254  * 8-10: available
255  * 11: snoop behavior
256  * 12-63: Host physcial address
257  */
258 struct dma_pte {
259         u64 val;
260 };
261
262 static inline void dma_clear_pte(struct dma_pte *pte)
263 {
264         pte->val = 0;
265 }
266
267 static inline void dma_set_pte_readable(struct dma_pte *pte)
268 {
269         pte->val |= DMA_PTE_READ;
270 }
271
272 static inline void dma_set_pte_writable(struct dma_pte *pte)
273 {
274         pte->val |= DMA_PTE_WRITE;
275 }
276
277 static inline void dma_set_pte_snp(struct dma_pte *pte)
278 {
279         pte->val |= DMA_PTE_SNP;
280 }
281
282 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
283 {
284         pte->val = (pte->val & ~3) | (prot & 3);
285 }
286
287 static inline u64 dma_pte_addr(struct dma_pte *pte)
288 {
289 #ifdef CONFIG_64BIT
290         return pte->val & VTD_PAGE_MASK;
291 #else
292         /* Must have a full atomic 64-bit read */
293         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
294 #endif
295 }
296
297 static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
298 {
299         pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
300 }
301
302 static inline bool dma_pte_present(struct dma_pte *pte)
303 {
304         return (pte->val & 3) != 0;
305 }
306
307 static inline int first_pte_in_page(struct dma_pte *pte)
308 {
309         return !((unsigned long)pte & ~VTD_PAGE_MASK);
310 }
311
312 /*
313  * This domain is a statically identity mapping domain.
314  *      1. This domain creats a static 1:1 mapping to all usable memory.
315  *      2. It maps to each iommu if successful.
316  *      3. Each iommu mapps to this domain if successful.
317  */
318 static struct dmar_domain *si_domain;
319 static int hw_pass_through = 1;
320
321 /* devices under the same p2p bridge are owned in one domain */
322 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
323
324 /* domain represents a virtual machine, more than one devices
325  * across iommus may be owned in one domain, e.g. kvm guest.
326  */
327 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 1)
328
329 /* si_domain contains mulitple devices */
330 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 2)
331
332 struct dmar_domain {
333         int     id;                     /* domain id */
334         int     nid;                    /* node id */
335         unsigned long iommu_bmp;        /* bitmap of iommus this domain uses*/
336
337         struct list_head devices;       /* all devices' list */
338         struct iova_domain iovad;       /* iova's that belong to this domain */
339
340         struct dma_pte  *pgd;           /* virtual address */
341         int             gaw;            /* max guest address width */
342
343         /* adjusted guest address width, 0 is level 2 30-bit */
344         int             agaw;
345
346         int             flags;          /* flags to find out type of domain */
347
348         int             iommu_coherency;/* indicate coherency of iommu access */
349         int             iommu_snooping; /* indicate snooping control feature*/
350         int             iommu_count;    /* reference count of iommu */
351         int             iommu_superpage;/* Level of superpages supported:
352                                            0 == 4KiB (no superpages), 1 == 2MiB,
353                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
354         spinlock_t      iommu_lock;     /* protect iommu set in domain */
355         u64             max_addr;       /* maximum mapped address */
356 };
357
358 /* PCI domain-device relationship */
359 struct device_domain_info {
360         struct list_head link;  /* link to domain siblings */
361         struct list_head global; /* link to global list */
362         int segment;            /* PCI domain */
363         u8 bus;                 /* PCI bus number */
364         u8 devfn;               /* PCI devfn number */
365         struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
366         struct intel_iommu *iommu; /* IOMMU used by this device */
367         struct dmar_domain *domain; /* pointer to domain */
368 };
369
370 static void flush_unmaps_timeout(unsigned long data);
371
372 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
373
374 #define HIGH_WATER_MARK 250
375 struct deferred_flush_tables {
376         int next;
377         struct iova *iova[HIGH_WATER_MARK];
378         struct dmar_domain *domain[HIGH_WATER_MARK];
379 };
380
381 static struct deferred_flush_tables *deferred_flush;
382
383 /* bitmap for indexing intel_iommus */
384 static int g_num_of_iommus;
385
386 static DEFINE_SPINLOCK(async_umap_flush_lock);
387 static LIST_HEAD(unmaps_to_do);
388
389 static int timer_on;
390 static long list_size;
391
392 static void domain_remove_dev_info(struct dmar_domain *domain);
393
394 #ifdef CONFIG_DMAR_DEFAULT_ON
395 int dmar_disabled = 0;
396 #else
397 int dmar_disabled = 1;
398 #endif /*CONFIG_DMAR_DEFAULT_ON*/
399
400 static int dmar_map_gfx = 1;
401 static int dmar_forcedac;
402 static int intel_iommu_strict;
403 static int intel_iommu_superpage = 1;
404
405 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
406 static DEFINE_SPINLOCK(device_domain_lock);
407 static LIST_HEAD(device_domain_list);
408
409 static struct iommu_ops intel_iommu_ops;
410
411 static int __init intel_iommu_setup(char *str)
412 {
413         if (!str)
414                 return -EINVAL;
415         while (*str) {
416                 if (!strncmp(str, "on", 2)) {
417                         dmar_disabled = 0;
418                         printk(KERN_INFO "Intel-IOMMU: enabled\n");
419                 } else if (!strncmp(str, "off", 3)) {
420                         dmar_disabled = 1;
421                         printk(KERN_INFO "Intel-IOMMU: disabled\n");
422                 } else if (!strncmp(str, "igfx_off", 8)) {
423                         dmar_map_gfx = 0;
424                         printk(KERN_INFO
425                                 "Intel-IOMMU: disable GFX device mapping\n");
426                 } else if (!strncmp(str, "forcedac", 8)) {
427                         printk(KERN_INFO
428                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
429                         dmar_forcedac = 1;
430                 } else if (!strncmp(str, "strict", 6)) {
431                         printk(KERN_INFO
432                                 "Intel-IOMMU: disable batched IOTLB flush\n");
433                         intel_iommu_strict = 1;
434                 } else if (!strncmp(str, "sp_off", 6)) {
435                         printk(KERN_INFO
436                                 "Intel-IOMMU: disable supported super page\n");
437                         intel_iommu_superpage = 0;
438                 }
439
440                 str += strcspn(str, ",");
441                 while (*str == ',')
442                         str++;
443         }
444         return 0;
445 }
446 __setup("intel_iommu=", intel_iommu_setup);
447
448 static struct kmem_cache *iommu_domain_cache;
449 static struct kmem_cache *iommu_devinfo_cache;
450 static struct kmem_cache *iommu_iova_cache;
451
452 static inline void *alloc_pgtable_page(int node)
453 {
454         struct page *page;
455         void *vaddr = NULL;
456
457         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
458         if (page)
459                 vaddr = page_address(page);
460         return vaddr;
461 }
462
463 static inline void free_pgtable_page(void *vaddr)
464 {
465         free_page((unsigned long)vaddr);
466 }
467
468 static inline void *alloc_domain_mem(void)
469 {
470         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
471 }
472
473 static void free_domain_mem(void *vaddr)
474 {
475         kmem_cache_free(iommu_domain_cache, vaddr);
476 }
477
478 static inline void * alloc_devinfo_mem(void)
479 {
480         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
481 }
482
483 static inline void free_devinfo_mem(void *vaddr)
484 {
485         kmem_cache_free(iommu_devinfo_cache, vaddr);
486 }
487
488 struct iova *alloc_iova_mem(void)
489 {
490         return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
491 }
492
493 void free_iova_mem(struct iova *iova)
494 {
495         kmem_cache_free(iommu_iova_cache, iova);
496 }
497
498
499 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
500 {
501         unsigned long sagaw;
502         int agaw = -1;
503
504         sagaw = cap_sagaw(iommu->cap);
505         for (agaw = width_to_agaw(max_gaw);
506              agaw >= 0; agaw--) {
507                 if (test_bit(agaw, &sagaw))
508                         break;
509         }
510
511         return agaw;
512 }
513
514 /*
515  * Calculate max SAGAW for each iommu.
516  */
517 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
518 {
519         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
520 }
521
522 /*
523  * calculate agaw for each iommu.
524  * "SAGAW" may be different across iommus, use a default agaw, and
525  * get a supported less agaw for iommus that don't support the default agaw.
526  */
527 int iommu_calculate_agaw(struct intel_iommu *iommu)
528 {
529         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
530 }
531
532 /* This functionin only returns single iommu in a domain */
533 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
534 {
535         int iommu_id;
536
537         /* si_domain and vm domain should not get here. */
538         BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
539         BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
540
541         iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
542         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
543                 return NULL;
544
545         return g_iommus[iommu_id];
546 }
547
548 static void domain_update_iommu_coherency(struct dmar_domain *domain)
549 {
550         int i;
551
552         domain->iommu_coherency = 1;
553
554         for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
555                 if (!ecap_coherent(g_iommus[i]->ecap)) {
556                         domain->iommu_coherency = 0;
557                         break;
558                 }
559         }
560 }
561
562 static void domain_update_iommu_snooping(struct dmar_domain *domain)
563 {
564         int i;
565
566         domain->iommu_snooping = 1;
567
568         for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
569                 if (!ecap_sc_support(g_iommus[i]->ecap)) {
570                         domain->iommu_snooping = 0;
571                         break;
572                 }
573         }
574 }
575
576 static void domain_update_iommu_superpage(struct dmar_domain *domain)
577 {
578         int i, mask = 0xf;
579
580         if (!intel_iommu_superpage) {
581                 domain->iommu_superpage = 0;
582                 return;
583         }
584
585         domain->iommu_superpage = 4; /* 1TiB */
586
587         for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
588                 mask |= cap_super_page_val(g_iommus[i]->cap);
589                 if (!mask) {
590                         break;
591                 }
592         }
593         domain->iommu_superpage = fls(mask);
594 }
595
596 /* Some capabilities may be different across iommus */
597 static void domain_update_iommu_cap(struct dmar_domain *domain)
598 {
599         domain_update_iommu_coherency(domain);
600         domain_update_iommu_snooping(domain);
601         domain_update_iommu_superpage(domain);
602 }
603
604 static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
605 {
606         struct dmar_drhd_unit *drhd = NULL;
607         int i;
608
609         for_each_drhd_unit(drhd) {
610                 if (drhd->ignored)
611                         continue;
612                 if (segment != drhd->segment)
613                         continue;
614
615                 for (i = 0; i < drhd->devices_cnt; i++) {
616                         if (drhd->devices[i] &&
617                             drhd->devices[i]->bus->number == bus &&
618                             drhd->devices[i]->devfn == devfn)
619                                 return drhd->iommu;
620                         if (drhd->devices[i] &&
621                             drhd->devices[i]->subordinate &&
622                             drhd->devices[i]->subordinate->number <= bus &&
623                             drhd->devices[i]->subordinate->subordinate >= bus)
624                                 return drhd->iommu;
625                 }
626
627                 if (drhd->include_all)
628                         return drhd->iommu;
629         }
630
631         return NULL;
632 }
633
634 static void domain_flush_cache(struct dmar_domain *domain,
635                                void *addr, int size)
636 {
637         if (!domain->iommu_coherency)
638                 clflush_cache_range(addr, size);
639 }
640
641 /* Gets context entry for a given bus and devfn */
642 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
643                 u8 bus, u8 devfn)
644 {
645         struct root_entry *root;
646         struct context_entry *context;
647         unsigned long phy_addr;
648         unsigned long flags;
649
650         spin_lock_irqsave(&iommu->lock, flags);
651         root = &iommu->root_entry[bus];
652         context = get_context_addr_from_root(root);
653         if (!context) {
654                 context = (struct context_entry *)
655                                 alloc_pgtable_page(iommu->node);
656                 if (!context) {
657                         spin_unlock_irqrestore(&iommu->lock, flags);
658                         return NULL;
659                 }
660                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
661                 phy_addr = virt_to_phys((void *)context);
662                 set_root_value(root, phy_addr);
663                 set_root_present(root);
664                 __iommu_flush_cache(iommu, root, sizeof(*root));
665         }
666         spin_unlock_irqrestore(&iommu->lock, flags);
667         return &context[devfn];
668 }
669
670 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
671 {
672         struct root_entry *root;
673         struct context_entry *context;
674         int ret;
675         unsigned long flags;
676
677         spin_lock_irqsave(&iommu->lock, flags);
678         root = &iommu->root_entry[bus];
679         context = get_context_addr_from_root(root);
680         if (!context) {
681                 ret = 0;
682                 goto out;
683         }
684         ret = context_present(&context[devfn]);
685 out:
686         spin_unlock_irqrestore(&iommu->lock, flags);
687         return ret;
688 }
689
690 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
691 {
692         struct root_entry *root;
693         struct context_entry *context;
694         unsigned long flags;
695
696         spin_lock_irqsave(&iommu->lock, flags);
697         root = &iommu->root_entry[bus];
698         context = get_context_addr_from_root(root);
699         if (context) {
700                 context_clear_entry(&context[devfn]);
701                 __iommu_flush_cache(iommu, &context[devfn], \
702                         sizeof(*context));
703         }
704         spin_unlock_irqrestore(&iommu->lock, flags);
705 }
706
707 static void free_context_table(struct intel_iommu *iommu)
708 {
709         struct root_entry *root;
710         int i;
711         unsigned long flags;
712         struct context_entry *context;
713
714         spin_lock_irqsave(&iommu->lock, flags);
715         if (!iommu->root_entry) {
716                 goto out;
717         }
718         for (i = 0; i < ROOT_ENTRY_NR; i++) {
719                 root = &iommu->root_entry[i];
720                 context = get_context_addr_from_root(root);
721                 if (context)
722                         free_pgtable_page(context);
723         }
724         free_pgtable_page(iommu->root_entry);
725         iommu->root_entry = NULL;
726 out:
727         spin_unlock_irqrestore(&iommu->lock, flags);
728 }
729
730 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
731                                       unsigned long pfn, int large_level)
732 {
733         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
734         struct dma_pte *parent, *pte = NULL;
735         int level = agaw_to_level(domain->agaw);
736         int offset, target_level;
737
738         BUG_ON(!domain->pgd);
739         BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
740         parent = domain->pgd;
741
742         /* Search pte */
743         if (!large_level)
744                 target_level = 1;
745         else
746                 target_level = large_level;
747
748         while (level > 0) {
749                 void *tmp_page;
750
751                 offset = pfn_level_offset(pfn, level);
752                 pte = &parent[offset];
753                 if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
754                         break;
755                 if (level == target_level)
756                         break;
757
758                 if (!dma_pte_present(pte)) {
759                         uint64_t pteval;
760
761                         tmp_page = alloc_pgtable_page(domain->nid);
762
763                         if (!tmp_page)
764                                 return NULL;
765
766                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
767                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
768                         if (cmpxchg64(&pte->val, 0ULL, pteval)) {
769                                 /* Someone else set it while we were thinking; use theirs. */
770                                 free_pgtable_page(tmp_page);
771                         } else {
772                                 dma_pte_addr(pte);
773                                 domain_flush_cache(domain, pte, sizeof(*pte));
774                         }
775                 }
776                 parent = phys_to_virt(dma_pte_addr(pte));
777                 level--;
778         }
779
780         return pte;
781 }
782
783
784 /* return address's pte at specific level */
785 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
786                                          unsigned long pfn,
787                                          int level, int *large_page)
788 {
789         struct dma_pte *parent, *pte = NULL;
790         int total = agaw_to_level(domain->agaw);
791         int offset;
792
793         parent = domain->pgd;
794         while (level <= total) {
795                 offset = pfn_level_offset(pfn, total);
796                 pte = &parent[offset];
797                 if (level == total)
798                         return pte;
799
800                 if (!dma_pte_present(pte)) {
801                         *large_page = total;
802                         break;
803                 }
804
805                 if (pte->val & DMA_PTE_LARGE_PAGE) {
806                         *large_page = total;
807                         return pte;
808                 }
809
810                 parent = phys_to_virt(dma_pte_addr(pte));
811                 total--;
812         }
813         return NULL;
814 }
815
816 /* clear last level pte, a tlb flush should be followed */
817 static void dma_pte_clear_range(struct dmar_domain *domain,
818                                 unsigned long start_pfn,
819                                 unsigned long last_pfn)
820 {
821         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
822         unsigned int large_page = 1;
823         struct dma_pte *first_pte, *pte;
824
825         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
826         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
827         BUG_ON(start_pfn > last_pfn);
828
829         /* we don't need lock here; nobody else touches the iova range */
830         do {
831                 large_page = 1;
832                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
833                 if (!pte) {
834                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
835                         continue;
836                 }
837                 do {
838                         dma_clear_pte(pte);
839                         start_pfn += lvl_to_nr_pages(large_page);
840                         pte++;
841                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
842
843                 domain_flush_cache(domain, first_pte,
844                                    (void *)pte - (void *)first_pte);
845
846         } while (start_pfn && start_pfn <= last_pfn);
847 }
848
849 /* free page table pages. last level pte should already be cleared */
850 static void dma_pte_free_pagetable(struct dmar_domain *domain,
851                                    unsigned long start_pfn,
852                                    unsigned long last_pfn)
853 {
854         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
855         struct dma_pte *first_pte, *pte;
856         int total = agaw_to_level(domain->agaw);
857         int level;
858         unsigned long tmp;
859         int large_page = 2;
860
861         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
862         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
863         BUG_ON(start_pfn > last_pfn);
864
865         /* We don't need lock here; nobody else touches the iova range */
866         level = 2;
867         while (level <= total) {
868                 tmp = align_to_level(start_pfn, level);
869
870                 /* If we can't even clear one PTE at this level, we're done */
871                 if (tmp + level_size(level) - 1 > last_pfn)
872                         return;
873
874                 do {
875                         large_page = level;
876                         first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
877                         if (large_page > level)
878                                 level = large_page + 1;
879                         if (!pte) {
880                                 tmp = align_to_level(tmp + 1, level + 1);
881                                 continue;
882                         }
883                         do {
884                                 if (dma_pte_present(pte)) {
885                                         free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
886                                         dma_clear_pte(pte);
887                                 }
888                                 pte++;
889                                 tmp += level_size(level);
890                         } while (!first_pte_in_page(pte) &&
891                                  tmp + level_size(level) - 1 <= last_pfn);
892
893                         domain_flush_cache(domain, first_pte,
894                                            (void *)pte - (void *)first_pte);
895                         
896                 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
897                 level++;
898         }
899         /* free pgd */
900         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
901                 free_pgtable_page(domain->pgd);
902                 domain->pgd = NULL;
903         }
904 }
905
906 /* iommu handling */
907 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
908 {
909         struct root_entry *root;
910         unsigned long flags;
911
912         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
913         if (!root)
914                 return -ENOMEM;
915
916         __iommu_flush_cache(iommu, root, ROOT_SIZE);
917
918         spin_lock_irqsave(&iommu->lock, flags);
919         iommu->root_entry = root;
920         spin_unlock_irqrestore(&iommu->lock, flags);
921
922         return 0;
923 }
924
925 static void iommu_set_root_entry(struct intel_iommu *iommu)
926 {
927         void *addr;
928         u32 sts;
929         unsigned long flag;
930
931         addr = iommu->root_entry;
932
933         spin_lock_irqsave(&iommu->register_lock, flag);
934         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
935
936         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
937
938         /* Make sure hardware complete it */
939         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
940                       readl, (sts & DMA_GSTS_RTPS), sts);
941
942         spin_unlock_irqrestore(&iommu->register_lock, flag);
943 }
944
945 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
946 {
947         u32 val;
948         unsigned long flag;
949
950         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
951                 return;
952
953         spin_lock_irqsave(&iommu->register_lock, flag);
954         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
955
956         /* Make sure hardware complete it */
957         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
958                       readl, (!(val & DMA_GSTS_WBFS)), val);
959
960         spin_unlock_irqrestore(&iommu->register_lock, flag);
961 }
962
963 /* return value determine if we need a write buffer flush */
964 static void __iommu_flush_context(struct intel_iommu *iommu,
965                                   u16 did, u16 source_id, u8 function_mask,
966                                   u64 type)
967 {
968         u64 val = 0;
969         unsigned long flag;
970
971         switch (type) {
972         case DMA_CCMD_GLOBAL_INVL:
973                 val = DMA_CCMD_GLOBAL_INVL;
974                 break;
975         case DMA_CCMD_DOMAIN_INVL:
976                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
977                 break;
978         case DMA_CCMD_DEVICE_INVL:
979                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
980                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
981                 break;
982         default:
983                 BUG();
984         }
985         val |= DMA_CCMD_ICC;
986
987         spin_lock_irqsave(&iommu->register_lock, flag);
988         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
989
990         /* Make sure hardware complete it */
991         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
992                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
993
994         spin_unlock_irqrestore(&iommu->register_lock, flag);
995 }
996
997 /* return value determine if we need a write buffer flush */
998 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
999                                 u64 addr, unsigned int size_order, u64 type)
1000 {
1001         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1002         u64 val = 0, val_iva = 0;
1003         unsigned long flag;
1004
1005         switch (type) {
1006         case DMA_TLB_GLOBAL_FLUSH:
1007                 /* global flush doesn't need set IVA_REG */
1008                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1009                 break;
1010         case DMA_TLB_DSI_FLUSH:
1011                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1012                 break;
1013         case DMA_TLB_PSI_FLUSH:
1014                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1015                 /* Note: always flush non-leaf currently */
1016                 val_iva = size_order | addr;
1017                 break;
1018         default:
1019                 BUG();
1020         }
1021         /* Note: set drain read/write */
1022 #if 0
1023         /*
1024          * This is probably to be super secure.. Looks like we can
1025          * ignore it without any impact.
1026          */
1027         if (cap_read_drain(iommu->cap))
1028                 val |= DMA_TLB_READ_DRAIN;
1029 #endif
1030         if (cap_write_drain(iommu->cap))
1031                 val |= DMA_TLB_WRITE_DRAIN;
1032
1033         spin_lock_irqsave(&iommu->register_lock, flag);
1034         /* Note: Only uses first TLB reg currently */
1035         if (val_iva)
1036                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1037         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1038
1039         /* Make sure hardware complete it */
1040         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1041                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1042
1043         spin_unlock_irqrestore(&iommu->register_lock, flag);
1044
1045         /* check IOTLB invalidation granularity */
1046         if (DMA_TLB_IAIG(val) == 0)
1047                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1048         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1049                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1050                         (unsigned long long)DMA_TLB_IIRG(type),
1051                         (unsigned long long)DMA_TLB_IAIG(val));
1052 }
1053
1054 static struct device_domain_info *iommu_support_dev_iotlb(
1055         struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1056 {
1057         int found = 0;
1058         unsigned long flags;
1059         struct device_domain_info *info;
1060         struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1061
1062         if (!ecap_dev_iotlb_support(iommu->ecap))
1063                 return NULL;
1064
1065         if (!iommu->qi)
1066                 return NULL;
1067
1068         spin_lock_irqsave(&device_domain_lock, flags);
1069         list_for_each_entry(info, &domain->devices, link)
1070                 if (info->bus == bus && info->devfn == devfn) {
1071                         found = 1;
1072                         break;
1073                 }
1074         spin_unlock_irqrestore(&device_domain_lock, flags);
1075
1076         if (!found || !info->dev)
1077                 return NULL;
1078
1079         if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1080                 return NULL;
1081
1082         if (!dmar_find_matched_atsr_unit(info->dev))
1083                 return NULL;
1084
1085         info->iommu = iommu;
1086
1087         return info;
1088 }
1089
1090 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1091 {
1092         if (!info)
1093                 return;
1094
1095         pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1096 }
1097
1098 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1099 {
1100         if (!info->dev || !pci_ats_enabled(info->dev))
1101                 return;
1102
1103         pci_disable_ats(info->dev);
1104 }
1105
1106 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1107                                   u64 addr, unsigned mask)
1108 {
1109         u16 sid, qdep;
1110         unsigned long flags;
1111         struct device_domain_info *info;
1112
1113         spin_lock_irqsave(&device_domain_lock, flags);
1114         list_for_each_entry(info, &domain->devices, link) {
1115                 if (!info->dev || !pci_ats_enabled(info->dev))
1116                         continue;
1117
1118                 sid = info->bus << 8 | info->devfn;
1119                 qdep = pci_ats_queue_depth(info->dev);
1120                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1121         }
1122         spin_unlock_irqrestore(&device_domain_lock, flags);
1123 }
1124
1125 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1126                                   unsigned long pfn, unsigned int pages, int map)
1127 {
1128         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1129         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1130
1131         BUG_ON(pages == 0);
1132
1133         /*
1134          * Fallback to domain selective flush if no PSI support or the size is
1135          * too big.
1136          * PSI requires page size to be 2 ^ x, and the base address is naturally
1137          * aligned to the size
1138          */
1139         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1140                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1141                                                 DMA_TLB_DSI_FLUSH);
1142         else
1143                 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1144                                                 DMA_TLB_PSI_FLUSH);
1145
1146         /*
1147          * In caching mode, changes of pages from non-present to present require
1148          * flush. However, device IOTLB doesn't need to be flushed in this case.
1149          */
1150         if (!cap_caching_mode(iommu->cap) || !map)
1151                 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1152 }
1153
1154 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1155 {
1156         u32 pmen;
1157         unsigned long flags;
1158
1159         spin_lock_irqsave(&iommu->register_lock, flags);
1160         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1161         pmen &= ~DMA_PMEN_EPM;
1162         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1163
1164         /* wait for the protected region status bit to clear */
1165         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1166                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1167
1168         spin_unlock_irqrestore(&iommu->register_lock, flags);
1169 }
1170
1171 static int iommu_enable_translation(struct intel_iommu *iommu)
1172 {
1173         u32 sts;
1174         unsigned long flags;
1175
1176         spin_lock_irqsave(&iommu->register_lock, flags);
1177         iommu->gcmd |= DMA_GCMD_TE;
1178         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1179
1180         /* Make sure hardware complete it */
1181         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1182                       readl, (sts & DMA_GSTS_TES), sts);
1183
1184         spin_unlock_irqrestore(&iommu->register_lock, flags);
1185         return 0;
1186 }
1187
1188 static int iommu_disable_translation(struct intel_iommu *iommu)
1189 {
1190         u32 sts;
1191         unsigned long flag;
1192
1193         spin_lock_irqsave(&iommu->register_lock, flag);
1194         iommu->gcmd &= ~DMA_GCMD_TE;
1195         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1196
1197         /* Make sure hardware complete it */
1198         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1199                       readl, (!(sts & DMA_GSTS_TES)), sts);
1200
1201         spin_unlock_irqrestore(&iommu->register_lock, flag);
1202         return 0;
1203 }
1204
1205
1206 static int iommu_init_domains(struct intel_iommu *iommu)
1207 {
1208         unsigned long ndomains;
1209         unsigned long nlongs;
1210
1211         ndomains = cap_ndoms(iommu->cap);
1212         pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu->seq_id,
1213                         ndomains);
1214         nlongs = BITS_TO_LONGS(ndomains);
1215
1216         spin_lock_init(&iommu->lock);
1217
1218         /* TBD: there might be 64K domains,
1219          * consider other allocation for future chip
1220          */
1221         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1222         if (!iommu->domain_ids) {
1223                 printk(KERN_ERR "Allocating domain id array failed\n");
1224                 return -ENOMEM;
1225         }
1226         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1227                         GFP_KERNEL);
1228         if (!iommu->domains) {
1229                 printk(KERN_ERR "Allocating domain array failed\n");
1230                 return -ENOMEM;
1231         }
1232
1233         /*
1234          * if Caching mode is set, then invalid translations are tagged
1235          * with domainid 0. Hence we need to pre-allocate it.
1236          */
1237         if (cap_caching_mode(iommu->cap))
1238                 set_bit(0, iommu->domain_ids);
1239         return 0;
1240 }
1241
1242
1243 static void domain_exit(struct dmar_domain *domain);
1244 static void vm_domain_exit(struct dmar_domain *domain);
1245
1246 void free_dmar_iommu(struct intel_iommu *iommu)
1247 {
1248         struct dmar_domain *domain;
1249         int i;
1250         unsigned long flags;
1251
1252         if ((iommu->domains) && (iommu->domain_ids)) {
1253                 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1254                         domain = iommu->domains[i];
1255                         clear_bit(i, iommu->domain_ids);
1256
1257                         spin_lock_irqsave(&domain->iommu_lock, flags);
1258                         if (--domain->iommu_count == 0) {
1259                                 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1260                                         vm_domain_exit(domain);
1261                                 else
1262                                         domain_exit(domain);
1263                         }
1264                         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1265                 }
1266         }
1267
1268         if (iommu->gcmd & DMA_GCMD_TE)
1269                 iommu_disable_translation(iommu);
1270
1271         if (iommu->irq) {
1272                 irq_set_handler_data(iommu->irq, NULL);
1273                 /* This will mask the irq */
1274                 free_irq(iommu->irq, iommu);
1275                 destroy_irq(iommu->irq);
1276         }
1277
1278         kfree(iommu->domains);
1279         kfree(iommu->domain_ids);
1280
1281         g_iommus[iommu->seq_id] = NULL;
1282
1283         /* if all iommus are freed, free g_iommus */
1284         for (i = 0; i < g_num_of_iommus; i++) {
1285                 if (g_iommus[i])
1286                         break;
1287         }
1288
1289         if (i == g_num_of_iommus)
1290                 kfree(g_iommus);
1291
1292         /* free context mapping */
1293         free_context_table(iommu);
1294 }
1295
1296 static struct dmar_domain *alloc_domain(void)
1297 {
1298         struct dmar_domain *domain;
1299
1300         domain = alloc_domain_mem();
1301         if (!domain)
1302                 return NULL;
1303
1304         domain->nid = -1;
1305         memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1306         domain->flags = 0;
1307
1308         return domain;
1309 }
1310
1311 static int iommu_attach_domain(struct dmar_domain *domain,
1312                                struct intel_iommu *iommu)
1313 {
1314         int num;
1315         unsigned long ndomains;
1316         unsigned long flags;
1317
1318         ndomains = cap_ndoms(iommu->cap);
1319
1320         spin_lock_irqsave(&iommu->lock, flags);
1321
1322         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1323         if (num >= ndomains) {
1324                 spin_unlock_irqrestore(&iommu->lock, flags);
1325                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1326                 return -ENOMEM;
1327         }
1328
1329         domain->id = num;
1330         set_bit(num, iommu->domain_ids);
1331         set_bit(iommu->seq_id, &domain->iommu_bmp);
1332         iommu->domains[num] = domain;
1333         spin_unlock_irqrestore(&iommu->lock, flags);
1334
1335         return 0;
1336 }
1337
1338 static void iommu_detach_domain(struct dmar_domain *domain,
1339                                 struct intel_iommu *iommu)
1340 {
1341         unsigned long flags;
1342         int num, ndomains;
1343         int found = 0;
1344
1345         spin_lock_irqsave(&iommu->lock, flags);
1346         ndomains = cap_ndoms(iommu->cap);
1347         for_each_set_bit(num, iommu->domain_ids, ndomains) {
1348                 if (iommu->domains[num] == domain) {
1349                         found = 1;
1350                         break;
1351                 }
1352         }
1353
1354         if (found) {
1355                 clear_bit(num, iommu->domain_ids);
1356                 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1357                 iommu->domains[num] = NULL;
1358         }
1359         spin_unlock_irqrestore(&iommu->lock, flags);
1360 }
1361
1362 static struct iova_domain reserved_iova_list;
1363 static struct lock_class_key reserved_rbtree_key;
1364
1365 static int dmar_init_reserved_ranges(void)
1366 {
1367         struct pci_dev *pdev = NULL;
1368         struct iova *iova;
1369         int i;
1370
1371         init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1372
1373         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1374                 &reserved_rbtree_key);
1375
1376         /* IOAPIC ranges shouldn't be accessed by DMA */
1377         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1378                 IOVA_PFN(IOAPIC_RANGE_END));
1379         if (!iova) {
1380                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1381                 return -ENODEV;
1382         }
1383
1384         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1385         for_each_pci_dev(pdev) {
1386                 struct resource *r;
1387
1388                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1389                         r = &pdev->resource[i];
1390                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1391                                 continue;
1392                         iova = reserve_iova(&reserved_iova_list,
1393                                             IOVA_PFN(r->start),
1394                                             IOVA_PFN(r->end));
1395                         if (!iova) {
1396                                 printk(KERN_ERR "Reserve iova failed\n");
1397                                 return -ENODEV;
1398                         }
1399                 }
1400         }
1401         return 0;
1402 }
1403
1404 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1405 {
1406         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1407 }
1408
1409 static inline int guestwidth_to_adjustwidth(int gaw)
1410 {
1411         int agaw;
1412         int r = (gaw - 12) % 9;
1413
1414         if (r == 0)
1415                 agaw = gaw;
1416         else
1417                 agaw = gaw + 9 - r;
1418         if (agaw > 64)
1419                 agaw = 64;
1420         return agaw;
1421 }
1422
1423 static int domain_init(struct dmar_domain *domain, int guest_width)
1424 {
1425         struct intel_iommu *iommu;
1426         int adjust_width, agaw;
1427         unsigned long sagaw;
1428
1429         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1430         spin_lock_init(&domain->iommu_lock);
1431
1432         domain_reserve_special_ranges(domain);
1433
1434         /* calculate AGAW */
1435         iommu = domain_get_iommu(domain);
1436         if (guest_width > cap_mgaw(iommu->cap))
1437                 guest_width = cap_mgaw(iommu->cap);
1438         domain->gaw = guest_width;
1439         adjust_width = guestwidth_to_adjustwidth(guest_width);
1440         agaw = width_to_agaw(adjust_width);
1441         sagaw = cap_sagaw(iommu->cap);
1442         if (!test_bit(agaw, &sagaw)) {
1443                 /* hardware doesn't support it, choose a bigger one */
1444                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1445                 agaw = find_next_bit(&sagaw, 5, agaw);
1446                 if (agaw >= 5)
1447                         return -ENODEV;
1448         }
1449         domain->agaw = agaw;
1450         INIT_LIST_HEAD(&domain->devices);
1451
1452         if (ecap_coherent(iommu->ecap))
1453                 domain->iommu_coherency = 1;
1454         else
1455                 domain->iommu_coherency = 0;
1456
1457         if (ecap_sc_support(iommu->ecap))
1458                 domain->iommu_snooping = 1;
1459         else
1460                 domain->iommu_snooping = 0;
1461
1462         domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1463         domain->iommu_count = 1;
1464         domain->nid = iommu->node;
1465
1466         /* always allocate the top pgd */
1467         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1468         if (!domain->pgd)
1469                 return -ENOMEM;
1470         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1471         return 0;
1472 }
1473
1474 static void domain_exit(struct dmar_domain *domain)
1475 {
1476         struct dmar_drhd_unit *drhd;
1477         struct intel_iommu *iommu;
1478
1479         /* Domain 0 is reserved, so dont process it */
1480         if (!domain)
1481                 return;
1482
1483         /* Flush any lazy unmaps that may reference this domain */
1484         if (!intel_iommu_strict)
1485                 flush_unmaps_timeout(0);
1486
1487         domain_remove_dev_info(domain);
1488         /* destroy iovas */
1489         put_iova_domain(&domain->iovad);
1490
1491         /* clear ptes */
1492         dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1493
1494         /* free page tables */
1495         dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1496
1497         for_each_active_iommu(iommu, drhd)
1498                 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1499                         iommu_detach_domain(domain, iommu);
1500
1501         free_domain_mem(domain);
1502 }
1503
1504 static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1505                                  u8 bus, u8 devfn, int translation)
1506 {
1507         struct context_entry *context;
1508         unsigned long flags;
1509         struct intel_iommu *iommu;
1510         struct dma_pte *pgd;
1511         unsigned long num;
1512         unsigned long ndomains;
1513         int id;
1514         int agaw;
1515         struct device_domain_info *info = NULL;
1516
1517         pr_debug("Set context mapping for %02x:%02x.%d\n",
1518                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1519
1520         BUG_ON(!domain->pgd);
1521         BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1522                translation != CONTEXT_TT_MULTI_LEVEL);
1523
1524         iommu = device_to_iommu(segment, bus, devfn);
1525         if (!iommu)
1526                 return -ENODEV;
1527
1528         context = device_to_context_entry(iommu, bus, devfn);
1529         if (!context)
1530                 return -ENOMEM;
1531         spin_lock_irqsave(&iommu->lock, flags);
1532         if (context_present(context)) {
1533                 spin_unlock_irqrestore(&iommu->lock, flags);
1534                 return 0;
1535         }
1536
1537         id = domain->id;
1538         pgd = domain->pgd;
1539
1540         if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1541             domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1542                 int found = 0;
1543
1544                 /* find an available domain id for this device in iommu */
1545                 ndomains = cap_ndoms(iommu->cap);
1546                 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1547                         if (iommu->domains[num] == domain) {
1548                                 id = num;
1549                                 found = 1;
1550                                 break;
1551                         }
1552                 }
1553
1554                 if (found == 0) {
1555                         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1556                         if (num >= ndomains) {
1557                                 spin_unlock_irqrestore(&iommu->lock, flags);
1558                                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1559                                 return -EFAULT;
1560                         }
1561
1562                         set_bit(num, iommu->domain_ids);
1563                         iommu->domains[num] = domain;
1564                         id = num;
1565                 }
1566
1567                 /* Skip top levels of page tables for
1568                  * iommu which has less agaw than default.
1569                  * Unnecessary for PT mode.
1570                  */
1571                 if (translation != CONTEXT_TT_PASS_THROUGH) {
1572                         for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1573                                 pgd = phys_to_virt(dma_pte_addr(pgd));
1574                                 if (!dma_pte_present(pgd)) {
1575                                         spin_unlock_irqrestore(&iommu->lock, flags);
1576                                         return -ENOMEM;
1577                                 }
1578                         }
1579                 }
1580         }
1581
1582         context_set_domain_id(context, id);
1583
1584         if (translation != CONTEXT_TT_PASS_THROUGH) {
1585                 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1586                 translation = info ? CONTEXT_TT_DEV_IOTLB :
1587                                      CONTEXT_TT_MULTI_LEVEL;
1588         }
1589         /*
1590          * In pass through mode, AW must be programmed to indicate the largest
1591          * AGAW value supported by hardware. And ASR is ignored by hardware.
1592          */
1593         if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1594                 context_set_address_width(context, iommu->msagaw);
1595         else {
1596                 context_set_address_root(context, virt_to_phys(pgd));
1597                 context_set_address_width(context, iommu->agaw);
1598         }
1599
1600         context_set_translation_type(context, translation);
1601         context_set_fault_enable(context);
1602         context_set_present(context);
1603         domain_flush_cache(domain, context, sizeof(*context));
1604
1605         /*
1606          * It's a non-present to present mapping. If hardware doesn't cache
1607          * non-present entry we only need to flush the write-buffer. If the
1608          * _does_ cache non-present entries, then it does so in the special
1609          * domain #0, which we have to flush:
1610          */
1611         if (cap_caching_mode(iommu->cap)) {
1612                 iommu->flush.flush_context(iommu, 0,
1613                                            (((u16)bus) << 8) | devfn,
1614                                            DMA_CCMD_MASK_NOBIT,
1615                                            DMA_CCMD_DEVICE_INVL);
1616                 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1617         } else {
1618                 iommu_flush_write_buffer(iommu);
1619         }
1620         iommu_enable_dev_iotlb(info);
1621         spin_unlock_irqrestore(&iommu->lock, flags);
1622
1623         spin_lock_irqsave(&domain->iommu_lock, flags);
1624         if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1625                 domain->iommu_count++;
1626                 if (domain->iommu_count == 1)
1627                         domain->nid = iommu->node;
1628                 domain_update_iommu_cap(domain);
1629         }
1630         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1631         return 0;
1632 }
1633
1634 static int
1635 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1636                         int translation)
1637 {
1638         int ret;
1639         struct pci_dev *tmp, *parent;
1640
1641         ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1642                                          pdev->bus->number, pdev->devfn,
1643                                          translation);
1644         if (ret)
1645                 return ret;
1646
1647         /* dependent device mapping */
1648         tmp = pci_find_upstream_pcie_bridge(pdev);
1649         if (!tmp)
1650                 return 0;
1651         /* Secondary interface's bus number and devfn 0 */
1652         parent = pdev->bus->self;
1653         while (parent != tmp) {
1654                 ret = domain_context_mapping_one(domain,
1655                                                  pci_domain_nr(parent->bus),
1656                                                  parent->bus->number,
1657                                                  parent->devfn, translation);
1658                 if (ret)
1659                         return ret;
1660                 parent = parent->bus->self;
1661         }
1662         if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
1663                 return domain_context_mapping_one(domain,
1664                                         pci_domain_nr(tmp->subordinate),
1665                                         tmp->subordinate->number, 0,
1666                                         translation);
1667         else /* this is a legacy PCI bridge */
1668                 return domain_context_mapping_one(domain,
1669                                                   pci_domain_nr(tmp->bus),
1670                                                   tmp->bus->number,
1671                                                   tmp->devfn,
1672                                                   translation);
1673 }
1674
1675 static int domain_context_mapped(struct pci_dev *pdev)
1676 {
1677         int ret;
1678         struct pci_dev *tmp, *parent;
1679         struct intel_iommu *iommu;
1680
1681         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1682                                 pdev->devfn);
1683         if (!iommu)
1684                 return -ENODEV;
1685
1686         ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1687         if (!ret)
1688                 return ret;
1689         /* dependent device mapping */
1690         tmp = pci_find_upstream_pcie_bridge(pdev);
1691         if (!tmp)
1692                 return ret;
1693         /* Secondary interface's bus number and devfn 0 */
1694         parent = pdev->bus->self;
1695         while (parent != tmp) {
1696                 ret = device_context_mapped(iommu, parent->bus->number,
1697                                             parent->devfn);
1698                 if (!ret)
1699                         return ret;
1700                 parent = parent->bus->self;
1701         }
1702         if (pci_is_pcie(tmp))
1703                 return device_context_mapped(iommu, tmp->subordinate->number,
1704                                              0);
1705         else
1706                 return device_context_mapped(iommu, tmp->bus->number,
1707                                              tmp->devfn);
1708 }
1709
1710 /* Returns a number of VTD pages, but aligned to MM page size */
1711 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1712                                             size_t size)
1713 {
1714         host_addr &= ~PAGE_MASK;
1715         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1716 }
1717
1718 /* Return largest possible superpage level for a given mapping */
1719 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1720                                           unsigned long iov_pfn,
1721                                           unsigned long phy_pfn,
1722                                           unsigned long pages)
1723 {
1724         int support, level = 1;
1725         unsigned long pfnmerge;
1726
1727         support = domain->iommu_superpage;
1728
1729         /* To use a large page, the virtual *and* physical addresses
1730            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1731            of them will mean we have to use smaller pages. So just
1732            merge them and check both at once. */
1733         pfnmerge = iov_pfn | phy_pfn;
1734
1735         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1736                 pages >>= VTD_STRIDE_SHIFT;
1737                 if (!pages)
1738                         break;
1739                 pfnmerge >>= VTD_STRIDE_SHIFT;
1740                 level++;
1741                 support--;
1742         }
1743         return level;
1744 }
1745
1746 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1747                             struct scatterlist *sg, unsigned long phys_pfn,
1748                             unsigned long nr_pages, int prot)
1749 {
1750         struct dma_pte *first_pte = NULL, *pte = NULL;
1751         phys_addr_t uninitialized_var(pteval);
1752         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1753         unsigned long sg_res;
1754         unsigned int largepage_lvl = 0;
1755         unsigned long lvl_pages = 0;
1756
1757         BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1758
1759         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1760                 return -EINVAL;
1761
1762         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1763
1764         if (sg)
1765                 sg_res = 0;
1766         else {
1767                 sg_res = nr_pages + 1;
1768                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1769         }
1770
1771         while (nr_pages > 0) {
1772                 uint64_t tmp;
1773
1774                 if (!sg_res) {
1775                         sg_res = aligned_nrpages(sg->offset, sg->length);
1776                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1777                         sg->dma_length = sg->length;
1778                         pteval = page_to_phys(sg_page(sg)) | prot;
1779                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
1780                 }
1781
1782                 if (!pte) {
1783                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1784
1785                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1786                         if (!pte)
1787                                 return -ENOMEM;
1788                         /* It is large page*/
1789                         if (largepage_lvl > 1)
1790                                 pteval |= DMA_PTE_LARGE_PAGE;
1791                         else
1792                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1793
1794                 }
1795                 /* We don't need lock here, nobody else
1796                  * touches the iova range
1797                  */
1798                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1799                 if (tmp) {
1800                         static int dumps = 5;
1801                         printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1802                                iov_pfn, tmp, (unsigned long long)pteval);
1803                         if (dumps) {
1804                                 dumps--;
1805                                 debug_dma_dump_mappings(NULL);
1806                         }
1807                         WARN_ON(1);
1808                 }
1809
1810                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1811
1812                 BUG_ON(nr_pages < lvl_pages);
1813                 BUG_ON(sg_res < lvl_pages);
1814
1815                 nr_pages -= lvl_pages;
1816                 iov_pfn += lvl_pages;
1817                 phys_pfn += lvl_pages;
1818                 pteval += lvl_pages * VTD_PAGE_SIZE;
1819                 sg_res -= lvl_pages;
1820
1821                 /* If the next PTE would be the first in a new page, then we
1822                    need to flush the cache on the entries we've just written.
1823                    And then we'll need to recalculate 'pte', so clear it and
1824                    let it get set again in the if (!pte) block above.
1825
1826                    If we're done (!nr_pages) we need to flush the cache too.
1827
1828                    Also if we've been setting superpages, we may need to
1829                    recalculate 'pte' and switch back to smaller pages for the
1830                    end of the mapping, if the trailing size is not enough to
1831                    use another superpage (i.e. sg_res < lvl_pages). */
1832                 pte++;
1833                 if (!nr_pages || first_pte_in_page(pte) ||
1834                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
1835                         domain_flush_cache(domain, first_pte,
1836                                            (void *)pte - (void *)first_pte);
1837                         pte = NULL;
1838                 }
1839
1840                 if (!sg_res && nr_pages)
1841                         sg = sg_next(sg);
1842         }
1843         return 0;
1844 }
1845
1846 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1847                                     struct scatterlist *sg, unsigned long nr_pages,
1848                                     int prot)
1849 {
1850         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1851 }
1852
1853 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1854                                      unsigned long phys_pfn, unsigned long nr_pages,
1855                                      int prot)
1856 {
1857         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1858 }
1859
1860 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1861 {
1862         if (!iommu)
1863                 return;
1864
1865         clear_context_table(iommu, bus, devfn);
1866         iommu->flush.flush_context(iommu, 0, 0, 0,
1867                                            DMA_CCMD_GLOBAL_INVL);
1868         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1869 }
1870
1871 static void domain_remove_dev_info(struct dmar_domain *domain)
1872 {
1873         struct device_domain_info *info;
1874         unsigned long flags;
1875         struct intel_iommu *iommu;
1876
1877         spin_lock_irqsave(&device_domain_lock, flags);
1878         while (!list_empty(&domain->devices)) {
1879                 info = list_entry(domain->devices.next,
1880                         struct device_domain_info, link);
1881                 list_del(&info->link);
1882                 list_del(&info->global);
1883                 if (info->dev)
1884                         info->dev->dev.archdata.iommu = NULL;
1885                 spin_unlock_irqrestore(&device_domain_lock, flags);
1886
1887                 iommu_disable_dev_iotlb(info);
1888                 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1889                 iommu_detach_dev(iommu, info->bus, info->devfn);
1890                 free_devinfo_mem(info);
1891
1892                 spin_lock_irqsave(&device_domain_lock, flags);
1893         }
1894         spin_unlock_irqrestore(&device_domain_lock, flags);
1895 }
1896
1897 /*
1898  * find_domain
1899  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1900  */
1901 static struct dmar_domain *
1902 find_domain(struct pci_dev *pdev)
1903 {
1904         struct device_domain_info *info;
1905
1906         /* No lock here, assumes no domain exit in normal case */
1907         info = pdev->dev.archdata.iommu;
1908         if (info)
1909                 return info->domain;
1910         return NULL;
1911 }
1912
1913 /* domain is initialized */
1914 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1915 {
1916         struct dmar_domain *domain, *found = NULL;
1917         struct intel_iommu *iommu;
1918         struct dmar_drhd_unit *drhd;
1919         struct device_domain_info *info, *tmp;
1920         struct pci_dev *dev_tmp;
1921         unsigned long flags;
1922         int bus = 0, devfn = 0;
1923         int segment;
1924         int ret;
1925
1926         domain = find_domain(pdev);
1927         if (domain)
1928                 return domain;
1929
1930         segment = pci_domain_nr(pdev->bus);
1931
1932         dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1933         if (dev_tmp) {
1934                 if (pci_is_pcie(dev_tmp)) {
1935                         bus = dev_tmp->subordinate->number;
1936                         devfn = 0;
1937                 } else {
1938                         bus = dev_tmp->bus->number;
1939                         devfn = dev_tmp->devfn;
1940                 }
1941                 spin_lock_irqsave(&device_domain_lock, flags);
1942                 list_for_each_entry(info, &device_domain_list, global) {
1943                         if (info->segment == segment &&
1944                             info->bus == bus && info->devfn == devfn) {
1945                                 found = info->domain;
1946                                 break;
1947                         }
1948                 }
1949                 spin_unlock_irqrestore(&device_domain_lock, flags);
1950                 /* pcie-pci bridge already has a domain, uses it */
1951                 if (found) {
1952                         domain = found;
1953                         goto found_domain;
1954                 }
1955         }
1956
1957         domain = alloc_domain();
1958         if (!domain)
1959                 goto error;
1960
1961         /* Allocate new domain for the device */
1962         drhd = dmar_find_matched_drhd_unit(pdev);
1963         if (!drhd) {
1964                 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1965                         pci_name(pdev));
1966                 return NULL;
1967         }
1968         iommu = drhd->iommu;
1969
1970         ret = iommu_attach_domain(domain, iommu);
1971         if (ret) {
1972                 free_domain_mem(domain);
1973                 goto error;
1974         }
1975
1976         if (domain_init(domain, gaw)) {
1977                 domain_exit(domain);
1978                 goto error;
1979         }
1980
1981         /* register pcie-to-pci device */
1982         if (dev_tmp) {
1983                 info = alloc_devinfo_mem();
1984                 if (!info) {
1985                         domain_exit(domain);
1986                         goto error;
1987                 }
1988                 info->segment = segment;
1989                 info->bus = bus;
1990                 info->devfn = devfn;
1991                 info->dev = NULL;
1992                 info->domain = domain;
1993                 /* This domain is shared by devices under p2p bridge */
1994                 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
1995
1996                 /* pcie-to-pci bridge already has a domain, uses it */
1997                 found = NULL;
1998                 spin_lock_irqsave(&device_domain_lock, flags);
1999                 list_for_each_entry(tmp, &device_domain_list, global) {
2000                         if (tmp->segment == segment &&
2001                             tmp->bus == bus && tmp->devfn == devfn) {
2002                                 found = tmp->domain;
2003                                 break;
2004                         }
2005                 }
2006                 if (found) {
2007                         spin_unlock_irqrestore(&device_domain_lock, flags);
2008                         free_devinfo_mem(info);
2009                         domain_exit(domain);
2010                         domain = found;
2011                 } else {
2012                         list_add(&info->link, &domain->devices);
2013                         list_add(&info->global, &device_domain_list);
2014                         spin_unlock_irqrestore(&device_domain_lock, flags);
2015                 }
2016         }
2017
2018 found_domain:
2019         info = alloc_devinfo_mem();
2020         if (!info)
2021                 goto error;
2022         info->segment = segment;
2023         info->bus = pdev->bus->number;
2024         info->devfn = pdev->devfn;
2025         info->dev = pdev;
2026         info->domain = domain;
2027         spin_lock_irqsave(&device_domain_lock, flags);
2028         /* somebody is fast */
2029         found = find_domain(pdev);
2030         if (found != NULL) {
2031                 spin_unlock_irqrestore(&device_domain_lock, flags);
2032                 if (found != domain) {
2033                         domain_exit(domain);
2034                         domain = found;
2035                 }
2036                 free_devinfo_mem(info);
2037                 return domain;
2038         }
2039         list_add(&info->link, &domain->devices);
2040         list_add(&info->global, &device_domain_list);
2041         pdev->dev.archdata.iommu = info;
2042         spin_unlock_irqrestore(&device_domain_lock, flags);
2043         return domain;
2044 error:
2045         /* recheck it here, maybe others set it */
2046         return find_domain(pdev);
2047 }
2048
2049 static int iommu_identity_mapping;
2050 #define IDENTMAP_ALL            1
2051 #define IDENTMAP_GFX            2
2052 #define IDENTMAP_AZALIA         4
2053
2054 static int iommu_domain_identity_map(struct dmar_domain *domain,
2055                                      unsigned long long start,
2056                                      unsigned long long end)
2057 {
2058         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2059         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2060
2061         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2062                           dma_to_mm_pfn(last_vpfn))) {
2063                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2064                 return -ENOMEM;
2065         }
2066
2067         pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2068                  start, end, domain->id);
2069         /*
2070          * RMRR range might have overlap with physical memory range,
2071          * clear it first
2072          */
2073         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2074
2075         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2076                                   last_vpfn - first_vpfn + 1,
2077                                   DMA_PTE_READ|DMA_PTE_WRITE);
2078 }
2079
2080 static int iommu_prepare_identity_map(struct pci_dev *pdev,
2081                                       unsigned long long start,
2082                                       unsigned long long end)
2083 {
2084         struct dmar_domain *domain;
2085         int ret;
2086
2087         domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2088         if (!domain)
2089                 return -ENOMEM;
2090
2091         /* For _hardware_ passthrough, don't bother. But for software
2092            passthrough, we do it anyway -- it may indicate a memory
2093            range which is reserved in E820, so which didn't get set
2094            up to start with in si_domain */
2095         if (domain == si_domain && hw_pass_through) {
2096                 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2097                        pci_name(pdev), start, end);
2098                 return 0;
2099         }
2100
2101         printk(KERN_INFO
2102                "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2103                pci_name(pdev), start, end);
2104         
2105         if (end < start) {
2106                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2107                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2108                         dmi_get_system_info(DMI_BIOS_VENDOR),
2109                         dmi_get_system_info(DMI_BIOS_VERSION),
2110                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2111                 ret = -EIO;
2112                 goto error;
2113         }
2114
2115         if (end >> agaw_to_width(domain->agaw)) {
2116                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2117                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2118                      agaw_to_width(domain->agaw),
2119                      dmi_get_system_info(DMI_BIOS_VENDOR),
2120                      dmi_get_system_info(DMI_BIOS_VERSION),
2121                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2122                 ret = -EIO;
2123                 goto error;
2124         }
2125
2126         ret = iommu_domain_identity_map(domain, start, end);
2127         if (ret)
2128                 goto error;
2129
2130         /* context entry init */
2131         ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2132         if (ret)
2133                 goto error;
2134
2135         return 0;
2136
2137  error:
2138         domain_exit(domain);
2139         return ret;
2140 }
2141
2142 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2143         struct pci_dev *pdev)
2144 {
2145         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2146                 return 0;
2147         return iommu_prepare_identity_map(pdev, rmrr->base_address,
2148                 rmrr->end_address + 1);
2149 }
2150
2151 #ifdef CONFIG_DMAR_FLOPPY_WA
2152 static inline void iommu_prepare_isa(void)
2153 {
2154         struct pci_dev *pdev;
2155         int ret;
2156
2157         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2158         if (!pdev)
2159                 return;
2160
2161         printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2162         ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
2163
2164         if (ret)
2165                 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2166                        "floppy might not work\n");
2167
2168 }
2169 #else
2170 static inline void iommu_prepare_isa(void)
2171 {
2172         return;
2173 }
2174 #endif /* !CONFIG_DMAR_FLPY_WA */
2175
2176 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2177
2178 static int __init si_domain_work_fn(unsigned long start_pfn,
2179                                     unsigned long end_pfn, void *datax)
2180 {
2181         int *ret = datax;
2182
2183         *ret = iommu_domain_identity_map(si_domain,
2184                                          (uint64_t)start_pfn << PAGE_SHIFT,
2185                                          (uint64_t)end_pfn << PAGE_SHIFT);
2186         return *ret;
2187
2188 }
2189
2190 static int __init si_domain_init(int hw)
2191 {
2192         struct dmar_drhd_unit *drhd;
2193         struct intel_iommu *iommu;
2194         int nid, ret = 0;
2195
2196         si_domain = alloc_domain();
2197         if (!si_domain)
2198                 return -EFAULT;
2199
2200         pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2201
2202         for_each_active_iommu(iommu, drhd) {
2203                 ret = iommu_attach_domain(si_domain, iommu);
2204                 if (ret) {
2205                         domain_exit(si_domain);
2206                         return -EFAULT;
2207                 }
2208         }
2209
2210         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2211                 domain_exit(si_domain);
2212                 return -EFAULT;
2213         }
2214
2215         si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2216
2217         if (hw)
2218                 return 0;
2219
2220         for_each_online_node(nid) {
2221                 work_with_active_regions(nid, si_domain_work_fn, &ret);
2222                 if (ret)
2223                         return ret;
2224         }
2225
2226         return 0;
2227 }
2228
2229 static void domain_remove_one_dev_info(struct dmar_domain *domain,
2230                                           struct pci_dev *pdev);
2231 static int identity_mapping(struct pci_dev *pdev)
2232 {
2233         struct device_domain_info *info;
2234
2235         if (likely(!iommu_identity_mapping))
2236                 return 0;
2237
2238
2239         list_for_each_entry(info, &si_domain->devices, link)
2240                 if (info->dev == pdev)
2241                         return 1;
2242         return 0;
2243 }
2244
2245 static int domain_add_dev_info(struct dmar_domain *domain,
2246                                struct pci_dev *pdev,
2247                                int translation)
2248 {
2249         struct device_domain_info *info;
2250         unsigned long flags;
2251         int ret;
2252
2253         info = alloc_devinfo_mem();
2254         if (!info)
2255                 return -ENOMEM;
2256
2257         ret = domain_context_mapping(domain, pdev, translation);
2258         if (ret) {
2259                 free_devinfo_mem(info);
2260                 return ret;
2261         }
2262
2263         info->segment = pci_domain_nr(pdev->bus);
2264         info->bus = pdev->bus->number;
2265         info->devfn = pdev->devfn;
2266         info->dev = pdev;
2267         info->domain = domain;
2268
2269         spin_lock_irqsave(&device_domain_lock, flags);
2270         list_add(&info->link, &domain->devices);
2271         list_add(&info->global, &device_domain_list);
2272         pdev->dev.archdata.iommu = info;
2273         spin_unlock_irqrestore(&device_domain_lock, flags);
2274
2275         return 0;
2276 }
2277
2278 static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2279 {
2280         if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2281                 return 1;
2282
2283         if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2284                 return 1;
2285
2286         if (!(iommu_identity_mapping & IDENTMAP_ALL))
2287                 return 0;
2288
2289         /*
2290          * We want to start off with all devices in the 1:1 domain, and
2291          * take them out later if we find they can't access all of memory.
2292          *
2293          * However, we can't do this for PCI devices behind bridges,
2294          * because all PCI devices behind the same bridge will end up
2295          * with the same source-id on their transactions.
2296          *
2297          * Practically speaking, we can't change things around for these
2298          * devices at run-time, because we can't be sure there'll be no
2299          * DMA transactions in flight for any of their siblings.
2300          * 
2301          * So PCI devices (unless they're on the root bus) as well as
2302          * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2303          * the 1:1 domain, just in _case_ one of their siblings turns out
2304          * not to be able to map all of memory.
2305          */
2306         if (!pci_is_pcie(pdev)) {
2307                 if (!pci_is_root_bus(pdev->bus))
2308                         return 0;
2309                 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2310                         return 0;
2311         } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2312                 return 0;
2313
2314         /* 
2315          * At boot time, we don't yet know if devices will be 64-bit capable.
2316          * Assume that they will -- if they turn out not to be, then we can 
2317          * take them out of the 1:1 domain later.
2318          */
2319         if (!startup)
2320                 return pdev->dma_mask > DMA_BIT_MASK(32);
2321
2322         return 1;
2323 }
2324
2325 static int __init iommu_prepare_static_identity_mapping(int hw)
2326 {
2327         struct pci_dev *pdev = NULL;
2328         int ret;
2329
2330         ret = si_domain_init(hw);
2331         if (ret)
2332                 return -EFAULT;
2333
2334         for_each_pci_dev(pdev) {
2335                 if (iommu_should_identity_map(pdev, 1)) {
2336                         printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2337                                hw ? "hardware" : "software", pci_name(pdev));
2338
2339                         ret = domain_add_dev_info(si_domain, pdev,
2340                                                      hw ? CONTEXT_TT_PASS_THROUGH :
2341                                                      CONTEXT_TT_MULTI_LEVEL);
2342                         if (ret)
2343                                 return ret;
2344                 }
2345         }
2346
2347         return 0;
2348 }
2349
2350 static int __init init_dmars(void)
2351 {
2352         struct dmar_drhd_unit *drhd;
2353         struct dmar_rmrr_unit *rmrr;
2354         struct pci_dev *pdev;
2355         struct intel_iommu *iommu;
2356         int i, ret;
2357
2358         /*
2359          * for each drhd
2360          *    allocate root
2361          *    initialize and program root entry to not present
2362          * endfor
2363          */
2364         for_each_drhd_unit(drhd) {
2365                 g_num_of_iommus++;
2366                 /*
2367                  * lock not needed as this is only incremented in the single
2368                  * threaded kernel __init code path all other access are read
2369                  * only
2370                  */
2371         }
2372
2373         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2374                         GFP_KERNEL);
2375         if (!g_iommus) {
2376                 printk(KERN_ERR "Allocating global iommu array failed\n");
2377                 ret = -ENOMEM;
2378                 goto error;
2379         }
2380
2381         deferred_flush = kzalloc(g_num_of_iommus *
2382                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2383         if (!deferred_flush) {
2384                 ret = -ENOMEM;
2385                 goto error;
2386         }
2387
2388         for_each_drhd_unit(drhd) {
2389                 if (drhd->ignored)
2390                         continue;
2391
2392                 iommu = drhd->iommu;
2393                 g_iommus[iommu->seq_id] = iommu;
2394
2395                 ret = iommu_init_domains(iommu);
2396                 if (ret)
2397                         goto error;
2398
2399                 /*
2400                  * TBD:
2401                  * we could share the same root & context tables
2402                  * among all IOMMU's. Need to Split it later.
2403                  */
2404                 ret = iommu_alloc_root_entry(iommu);
2405                 if (ret) {
2406                         printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2407                         goto error;
2408                 }
2409                 if (!ecap_pass_through(iommu->ecap))
2410                         hw_pass_through = 0;
2411         }
2412
2413         /*
2414          * Start from the sane iommu hardware state.
2415          */
2416         for_each_drhd_unit(drhd) {
2417                 if (drhd->ignored)
2418                         continue;
2419
2420                 iommu = drhd->iommu;
2421
2422                 /*
2423                  * If the queued invalidation is already initialized by us
2424                  * (for example, while enabling interrupt-remapping) then
2425                  * we got the things already rolling from a sane state.
2426                  */
2427                 if (iommu->qi)
2428                         continue;
2429
2430                 /*
2431                  * Clear any previous faults.
2432                  */
2433                 dmar_fault(-1, iommu);
2434                 /*
2435                  * Disable queued invalidation if supported and already enabled
2436                  * before OS handover.
2437                  */
2438                 dmar_disable_qi(iommu);
2439         }
2440
2441         for_each_drhd_unit(drhd) {
2442                 if (drhd->ignored)
2443                         continue;
2444
2445                 iommu = drhd->iommu;
2446
2447                 if (dmar_enable_qi(iommu)) {
2448                         /*
2449                          * Queued Invalidate not enabled, use Register Based
2450                          * Invalidate
2451                          */
2452                         iommu->flush.flush_context = __iommu_flush_context;
2453                         iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2454                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2455                                "invalidation\n",
2456                                 iommu->seq_id,
2457                                (unsigned long long)drhd->reg_base_addr);
2458                 } else {
2459                         iommu->flush.flush_context = qi_flush_context;
2460                         iommu->flush.flush_iotlb = qi_flush_iotlb;
2461                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2462                                "invalidation\n",
2463                                 iommu->seq_id,
2464                                (unsigned long long)drhd->reg_base_addr);
2465                 }
2466         }
2467
2468         if (iommu_pass_through)
2469                 iommu_identity_mapping |= IDENTMAP_ALL;
2470
2471 #ifdef CONFIG_DMAR_BROKEN_GFX_WA
2472         iommu_identity_mapping |= IDENTMAP_GFX;
2473 #endif
2474
2475         check_tylersburg_isoch();
2476
2477         /*
2478          * If pass through is not set or not enabled, setup context entries for
2479          * identity mappings for rmrr, gfx, and isa and may fall back to static
2480          * identity mapping if iommu_identity_mapping is set.
2481          */
2482         if (iommu_identity_mapping) {
2483                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2484                 if (ret) {
2485                         printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2486                         goto error;
2487                 }
2488         }
2489         /*
2490          * For each rmrr
2491          *   for each dev attached to rmrr
2492          *   do
2493          *     locate drhd for dev, alloc domain for dev
2494          *     allocate free domain
2495          *     allocate page table entries for rmrr
2496          *     if context not allocated for bus
2497          *           allocate and init context
2498          *           set present in root table for this bus
2499          *     init context with domain, translation etc
2500          *    endfor
2501          * endfor
2502          */
2503         printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2504         for_each_rmrr_units(rmrr) {
2505                 for (i = 0; i < rmrr->devices_cnt; i++) {
2506                         pdev = rmrr->devices[i];
2507                         /*
2508                          * some BIOS lists non-exist devices in DMAR
2509                          * table.
2510                          */
2511                         if (!pdev)
2512                                 continue;
2513                         ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2514                         if (ret)
2515                                 printk(KERN_ERR
2516                                        "IOMMU: mapping reserved region failed\n");
2517                 }
2518         }
2519
2520         iommu_prepare_isa();
2521
2522         /*
2523          * for each drhd
2524          *   enable fault log
2525          *   global invalidate context cache
2526          *   global invalidate iotlb
2527          *   enable translation
2528          */
2529         for_each_drhd_unit(drhd) {
2530                 if (drhd->ignored) {
2531                         /*
2532                          * we always have to disable PMRs or DMA may fail on
2533                          * this device
2534                          */
2535                         if (force_on)
2536                                 iommu_disable_protect_mem_regions(drhd->iommu);
2537                         continue;
2538                 }
2539                 iommu = drhd->iommu;
2540
2541                 iommu_flush_write_buffer(iommu);
2542
2543                 ret = dmar_set_interrupt(iommu);
2544                 if (ret)
2545                         goto error;
2546
2547                 iommu_set_root_entry(iommu);
2548
2549                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2550                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2551
2552                 ret = iommu_enable_translation(iommu);
2553                 if (ret)
2554                         goto error;
2555
2556                 iommu_disable_protect_mem_regions(iommu);
2557         }
2558
2559         return 0;
2560 error:
2561         for_each_drhd_unit(drhd) {
2562                 if (drhd->ignored)
2563                         continue;
2564                 iommu = drhd->iommu;
2565                 free_iommu(iommu);
2566         }
2567         kfree(g_iommus);
2568         return ret;
2569 }
2570
2571 /* This takes a number of _MM_ pages, not VTD pages */
2572 static struct iova *intel_alloc_iova(struct device *dev,
2573                                      struct dmar_domain *domain,
2574                                      unsigned long nrpages, uint64_t dma_mask)
2575 {
2576         struct pci_dev *pdev = to_pci_dev(dev);
2577         struct iova *iova = NULL;
2578
2579         /* Restrict dma_mask to the width that the iommu can handle */
2580         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2581
2582         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2583                 /*
2584                  * First try to allocate an io virtual address in
2585                  * DMA_BIT_MASK(32) and if that fails then try allocating
2586                  * from higher range
2587                  */
2588                 iova = alloc_iova(&domain->iovad, nrpages,
2589                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
2590                 if (iova)
2591                         return iova;
2592         }
2593         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2594         if (unlikely(!iova)) {
2595                 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2596                        nrpages, pci_name(pdev));
2597                 return NULL;
2598         }
2599
2600         return iova;
2601 }
2602
2603 static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2604 {
2605         struct dmar_domain *domain;
2606         int ret;
2607
2608         domain = get_domain_for_dev(pdev,
2609                         DEFAULT_DOMAIN_ADDRESS_WIDTH);
2610         if (!domain) {
2611                 printk(KERN_ERR
2612                         "Allocating domain for %s failed", pci_name(pdev));
2613                 return NULL;
2614         }
2615
2616         /* make sure context mapping is ok */
2617         if (unlikely(!domain_context_mapped(pdev))) {
2618                 ret = domain_context_mapping(domain, pdev,
2619                                              CONTEXT_TT_MULTI_LEVEL);
2620                 if (ret) {
2621                         printk(KERN_ERR
2622                                 "Domain context map for %s failed",
2623                                 pci_name(pdev));
2624                         return NULL;
2625                 }
2626         }
2627
2628         return domain;
2629 }
2630
2631 static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2632 {
2633         struct device_domain_info *info;
2634
2635         /* No lock here, assumes no domain exit in normal case */
2636         info = dev->dev.archdata.iommu;
2637         if (likely(info))
2638                 return info->domain;
2639
2640         return __get_valid_domain_for_dev(dev);
2641 }
2642
2643 static int iommu_dummy(struct pci_dev *pdev)
2644 {
2645         return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2646 }
2647
2648 /* Check if the pdev needs to go through non-identity map and unmap process.*/
2649 static int iommu_no_mapping(struct device *dev)
2650 {
2651         struct pci_dev *pdev;
2652         int found;
2653
2654         if (unlikely(dev->bus != &pci_bus_type))
2655                 return 1;
2656
2657         pdev = to_pci_dev(dev);
2658         if (iommu_dummy(pdev))
2659                 return 1;
2660
2661         if (!iommu_identity_mapping)
2662                 return 0;
2663
2664         found = identity_mapping(pdev);
2665         if (found) {
2666                 if (iommu_should_identity_map(pdev, 0))
2667                         return 1;
2668                 else {
2669                         /*
2670                          * 32 bit DMA is removed from si_domain and fall back
2671                          * to non-identity mapping.
2672                          */
2673                         domain_remove_one_dev_info(si_domain, pdev);
2674                         printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2675                                pci_name(pdev));
2676                         return 0;
2677                 }
2678         } else {
2679                 /*
2680                  * In case of a detached 64 bit DMA device from vm, the device
2681                  * is put into si_domain for identity mapping.
2682                  */
2683                 if (iommu_should_identity_map(pdev, 0)) {
2684                         int ret;
2685                         ret = domain_add_dev_info(si_domain, pdev,
2686                                                   hw_pass_through ?
2687                                                   CONTEXT_TT_PASS_THROUGH :
2688                                                   CONTEXT_TT_MULTI_LEVEL);
2689                         if (!ret) {
2690                                 printk(KERN_INFO "64bit %s uses identity mapping\n",
2691                                        pci_name(pdev));
2692                                 return 1;
2693                         }
2694                 }
2695         }
2696
2697         return 0;
2698 }
2699
2700 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2701                                      size_t size, int dir, u64 dma_mask)
2702 {
2703         struct pci_dev *pdev = to_pci_dev(hwdev);
2704         struct dmar_domain *domain;
2705         phys_addr_t start_paddr;
2706         struct iova *iova;
2707         int prot = 0;
2708         int ret;
2709         struct intel_iommu *iommu;
2710         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2711
2712         BUG_ON(dir == DMA_NONE);
2713
2714         if (iommu_no_mapping(hwdev))
2715                 return paddr;
2716
2717         domain = get_valid_domain_for_dev(pdev);
2718         if (!domain)
2719                 return 0;
2720
2721         iommu = domain_get_iommu(domain);
2722         size = aligned_nrpages(paddr, size);
2723
2724         iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2725                                 pdev->dma_mask);
2726         if (!iova)
2727                 goto error;
2728
2729         /*
2730          * Check if DMAR supports zero-length reads on write only
2731          * mappings..
2732          */
2733         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2734                         !cap_zlr(iommu->cap))
2735                 prot |= DMA_PTE_READ;
2736         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2737                 prot |= DMA_PTE_WRITE;
2738         /*
2739          * paddr - (paddr + size) might be partial page, we should map the whole
2740          * page.  Note: if two part of one page are separately mapped, we
2741          * might have two guest_addr mapping to the same host paddr, but this
2742          * is not a big problem
2743          */
2744         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2745                                  mm_to_dma_pfn(paddr_pfn), size, prot);
2746         if (ret)
2747                 goto error;
2748
2749         /* it's a non-present to present mapping. Only flush if caching mode */
2750         if (cap_caching_mode(iommu->cap))
2751                 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
2752         else
2753                 iommu_flush_write_buffer(iommu);
2754
2755         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2756         start_paddr += paddr & ~PAGE_MASK;
2757         return start_paddr;
2758
2759 error:
2760         if (iova)
2761                 __free_iova(&domain->iovad, iova);
2762         printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
2763                 pci_name(pdev), size, (unsigned long long)paddr, dir);
2764         return 0;
2765 }
2766
2767 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2768                                  unsigned long offset, size_t size,
2769                                  enum dma_data_direction dir,
2770                                  struct dma_attrs *attrs)
2771 {
2772         return __intel_map_single(dev, page_to_phys(page) + offset, size,
2773                                   dir, to_pci_dev(dev)->dma_mask);
2774 }
2775
2776 static void flush_unmaps(void)
2777 {
2778         int i, j;
2779
2780         timer_on = 0;
2781
2782         /* just flush them all */
2783         for (i = 0; i < g_num_of_iommus; i++) {
2784                 struct intel_iommu *iommu = g_iommus[i];
2785                 if (!iommu)
2786                         continue;
2787
2788                 if (!deferred_flush[i].next)
2789                         continue;
2790
2791                 /* In caching mode, global flushes turn emulation expensive */
2792                 if (!cap_caching_mode(iommu->cap))
2793                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2794                                          DMA_TLB_GLOBAL_FLUSH);
2795                 for (j = 0; j < deferred_flush[i].next; j++) {
2796                         unsigned long mask;
2797                         struct iova *iova = deferred_flush[i].iova[j];
2798                         struct dmar_domain *domain = deferred_flush[i].domain[j];
2799
2800                         /* On real hardware multiple invalidations are expensive */
2801                         if (cap_caching_mode(iommu->cap))
2802                                 iommu_flush_iotlb_psi(iommu, domain->id,
2803                                 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2804                         else {
2805                                 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2806                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2807                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2808                         }
2809                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2810                 }
2811                 deferred_flush[i].next = 0;
2812         }
2813
2814         list_size = 0;
2815 }
2816
2817 static void flush_unmaps_timeout(unsigned long data)
2818 {
2819         unsigned long flags;
2820
2821         spin_lock_irqsave(&async_umap_flush_lock, flags);
2822         flush_unmaps();
2823         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2824 }
2825
2826 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2827 {
2828         unsigned long flags;
2829         int next, iommu_id;
2830         struct intel_iommu *iommu;
2831
2832         spin_lock_irqsave(&async_umap_flush_lock, flags);
2833         if (list_size == HIGH_WATER_MARK)
2834                 flush_unmaps();
2835
2836         iommu = domain_get_iommu(dom);
2837         iommu_id = iommu->seq_id;
2838
2839         next = deferred_flush[iommu_id].next;
2840         deferred_flush[iommu_id].domain[next] = dom;
2841         deferred_flush[iommu_id].iova[next] = iova;
2842         deferred_flush[iommu_id].next++;
2843
2844         if (!timer_on) {
2845                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2846                 timer_on = 1;
2847         }
2848         list_size++;
2849         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2850 }
2851
2852 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2853                              size_t size, enum dma_data_direction dir,
2854                              struct dma_attrs *attrs)
2855 {
2856         struct pci_dev *pdev = to_pci_dev(dev);
2857         struct dmar_domain *domain;
2858         unsigned long start_pfn, last_pfn;
2859         struct iova *iova;
2860         struct intel_iommu *iommu;
2861
2862         if (iommu_no_mapping(dev))
2863                 return;
2864
2865         domain = find_domain(pdev);
2866         BUG_ON(!domain);
2867
2868         iommu = domain_get_iommu(domain);
2869
2870         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2871         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2872                       (unsigned long long)dev_addr))
2873                 return;
2874
2875         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2876         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2877
2878         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2879                  pci_name(pdev), start_pfn, last_pfn);
2880
2881         /*  clear the whole page */
2882         dma_pte_clear_range(domain, start_pfn, last_pfn);
2883
2884         /* free page tables */
2885         dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2886
2887         if (intel_iommu_strict) {
2888                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2889                                       last_pfn - start_pfn + 1, 0);
2890                 /* free iova */
2891                 __free_iova(&domain->iovad, iova);
2892         } else {
2893                 add_unmap(domain, iova);
2894                 /*
2895                  * queue up the release of the unmap to save the 1/6th of the
2896                  * cpu used up by the iotlb flush operation...
2897                  */
2898         }
2899 }
2900
2901 static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2902                                   dma_addr_t *dma_handle, gfp_t flags)
2903 {
2904         void *vaddr;
2905         int order;
2906
2907         size = PAGE_ALIGN(size);
2908         order = get_order(size);
2909
2910         if (!iommu_no_mapping(hwdev))
2911                 flags &= ~(GFP_DMA | GFP_DMA32);
2912         else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2913                 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2914                         flags |= GFP_DMA;
2915                 else
2916                         flags |= GFP_DMA32;
2917         }
2918
2919         vaddr = (void *)__get_free_pages(flags, order);
2920         if (!vaddr)
2921                 return NULL;
2922         memset(vaddr, 0, size);
2923
2924         *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2925                                          DMA_BIDIRECTIONAL,
2926                                          hwdev->coherent_dma_mask);
2927         if (*dma_handle)
2928                 return vaddr;
2929         free_pages((unsigned long)vaddr, order);
2930         return NULL;
2931 }
2932
2933 static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2934                                 dma_addr_t dma_handle)
2935 {
2936         int order;
2937
2938         size = PAGE_ALIGN(size);
2939         order = get_order(size);
2940
2941         intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
2942         free_pages((unsigned long)vaddr, order);
2943 }
2944
2945 static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2946                            int nelems, enum dma_data_direction dir,
2947                            struct dma_attrs *attrs)
2948 {
2949         struct pci_dev *pdev = to_pci_dev(hwdev);
2950         struct dmar_domain *domain;
2951         unsigned long start_pfn, last_pfn;
2952         struct iova *iova;
2953         struct intel_iommu *iommu;
2954
2955         if (iommu_no_mapping(hwdev))
2956                 return;
2957
2958         domain = find_domain(pdev);
2959         BUG_ON(!domain);
2960
2961         iommu = domain_get_iommu(domain);
2962
2963         iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2964         if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2965                       (unsigned long long)sglist[0].dma_address))
2966                 return;
2967
2968         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2969         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2970
2971         /*  clear the whole page */
2972         dma_pte_clear_range(domain, start_pfn, last_pfn);
2973
2974         /* free page tables */
2975         dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2976
2977         if (intel_iommu_strict) {
2978                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2979                                       last_pfn - start_pfn + 1, 0);
2980                 /* free iova */
2981                 __free_iova(&domain->iovad, iova);
2982         } else {
2983                 add_unmap(domain, iova);
2984                 /*
2985                  * queue up the release of the unmap to save the 1/6th of the
2986                  * cpu used up by the iotlb flush operation...
2987                  */
2988         }
2989 }
2990
2991 static int intel_nontranslate_map_sg(struct device *hddev,
2992         struct scatterlist *sglist, int nelems, int dir)
2993 {
2994         int i;
2995         struct scatterlist *sg;
2996
2997         for_each_sg(sglist, sg, nelems, i) {
2998                 BUG_ON(!sg_page(sg));
2999                 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3000                 sg->dma_length = sg->length;
3001         }
3002         return nelems;
3003 }
3004
3005 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3006                         enum dma_data_direction dir, struct dma_attrs *attrs)
3007 {
3008         int i;
3009         struct pci_dev *pdev = to_pci_dev(hwdev);
3010         struct dmar_domain *domain;
3011         size_t size = 0;
3012         int prot = 0;
3013         struct iova *iova = NULL;
3014         int ret;
3015         struct scatterlist *sg;
3016         unsigned long start_vpfn;
3017         struct intel_iommu *iommu;
3018
3019         BUG_ON(dir == DMA_NONE);
3020         if (iommu_no_mapping(hwdev))
3021                 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
3022
3023         domain = get_valid_domain_for_dev(pdev);
3024         if (!domain)
3025                 return 0;
3026
3027         iommu = domain_get_iommu(domain);
3028
3029         for_each_sg(sglist, sg, nelems, i)
3030                 size += aligned_nrpages(sg->offset, sg->length);
3031
3032         iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3033                                 pdev->dma_mask);
3034         if (!iova) {
3035                 sglist->dma_length = 0;
3036                 return 0;
3037         }
3038
3039         /*
3040          * Check if DMAR supports zero-length reads on write only
3041          * mappings..
3042          */
3043         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3044                         !cap_zlr(iommu->cap))
3045                 prot |= DMA_PTE_READ;
3046         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3047                 prot |= DMA_PTE_WRITE;
3048
3049         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3050
3051         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3052         if (unlikely(ret)) {
3053                 /*  clear the page */
3054                 dma_pte_clear_range(domain, start_vpfn,
3055                                     start_vpfn + size - 1);
3056                 /* free page tables */
3057                 dma_pte_free_pagetable(domain, start_vpfn,
3058                                        start_vpfn + size - 1);
3059                 /* free iova */
3060                 __free_iova(&domain->iovad, iova);
3061                 return 0;
3062         }
3063
3064         /* it's a non-present to present mapping. Only flush if caching mode */
3065         if (cap_caching_mode(iommu->cap))
3066                 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
3067         else
3068                 iommu_flush_write_buffer(iommu);
3069
3070         return nelems;
3071 }
3072
3073 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3074 {
3075         return !dma_addr;
3076 }
3077
3078 struct dma_map_ops intel_dma_ops = {
3079         .alloc_coherent = intel_alloc_coherent,
3080         .free_coherent = intel_free_coherent,
3081         .map_sg = intel_map_sg,
3082         .unmap_sg = intel_unmap_sg,
3083         .map_page = intel_map_page,
3084         .unmap_page = intel_unmap_page,
3085         .mapping_error = intel_mapping_error,
3086 };
3087
3088 static inline int iommu_domain_cache_init(void)
3089 {
3090         int ret = 0;
3091
3092         iommu_domain_cache = kmem_cache_create("iommu_domain",
3093                                          sizeof(struct dmar_domain),
3094                                          0,
3095                                          SLAB_HWCACHE_ALIGN,
3096
3097                                          NULL);
3098         if (!iommu_domain_cache) {
3099                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3100                 ret = -ENOMEM;
3101         }
3102
3103         return ret;
3104 }
3105
3106 static inline int iommu_devinfo_cache_init(void)
3107 {
3108         int ret = 0;
3109
3110         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3111                                          sizeof(struct device_domain_info),
3112                                          0,
3113                                          SLAB_HWCACHE_ALIGN,
3114                                          NULL);
3115         if (!iommu_devinfo_cache) {
3116                 printk(KERN_ERR "Couldn't create devinfo cache\n");
3117                 ret = -ENOMEM;
3118         }
3119
3120         return ret;
3121 }
3122
3123 static inline int iommu_iova_cache_init(void)
3124 {
3125         int ret = 0;
3126
3127         iommu_iova_cache = kmem_cache_create("iommu_iova",
3128                                          sizeof(struct iova),
3129                                          0,
3130                                          SLAB_HWCACHE_ALIGN,
3131                                          NULL);
3132         if (!iommu_iova_cache) {
3133                 printk(KERN_ERR "Couldn't create iova cache\n");
3134                 ret = -ENOMEM;
3135         }
3136
3137         return ret;
3138 }
3139
3140 static int __init iommu_init_mempool(void)
3141 {
3142         int ret;
3143         ret = iommu_iova_cache_init();
3144         if (ret)
3145                 return ret;
3146
3147         ret = iommu_domain_cache_init();
3148         if (ret)
3149                 goto domain_error;
3150
3151         ret = iommu_devinfo_cache_init();
3152         if (!ret)
3153                 return ret;
3154
3155         kmem_cache_destroy(iommu_domain_cache);
3156 domain_error:
3157         kmem_cache_destroy(iommu_iova_cache);
3158
3159         return -ENOMEM;
3160 }
3161
3162 static void __init iommu_exit_mempool(void)
3163 {
3164         kmem_cache_destroy(iommu_devinfo_cache);
3165         kmem_cache_destroy(iommu_domain_cache);
3166         kmem_cache_destroy(iommu_iova_cache);
3167
3168 }
3169
3170 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3171 {
3172         struct dmar_drhd_unit *drhd;
3173         u32 vtbar;
3174         int rc;
3175
3176         /* We know that this device on this chipset has its own IOMMU.
3177          * If we find it under a different IOMMU, then the BIOS is lying
3178          * to us. Hope that the IOMMU for this device is actually
3179          * disabled, and it needs no translation...
3180          */
3181         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3182         if (rc) {
3183                 /* "can't" happen */
3184                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3185                 return;
3186         }
3187         vtbar &= 0xffff0000;
3188
3189         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3190         drhd = dmar_find_matched_drhd_unit(pdev);
3191         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3192                             TAINT_FIRMWARE_WORKAROUND,
3193                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3194                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3195 }
3196 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3197
3198 static void __init init_no_remapping_devices(void)
3199 {
3200         struct dmar_drhd_unit *drhd;
3201
3202         for_each_drhd_unit(drhd) {
3203                 if (!drhd->include_all) {
3204                         int i;
3205                         for (i = 0; i < drhd->devices_cnt; i++)
3206                                 if (drhd->devices[i] != NULL)
3207                                         break;
3208                         /* ignore DMAR unit if no pci devices exist */
3209                         if (i == drhd->devices_cnt)
3210                                 drhd->ignored = 1;
3211                 }
3212         }
3213
3214         if (dmar_map_gfx)
3215                 return;
3216
3217         for_each_drhd_unit(drhd) {
3218                 int i;
3219                 if (drhd->ignored || drhd->include_all)
3220                         continue;
3221
3222                 for (i = 0; i < drhd->devices_cnt; i++)
3223                         if (drhd->devices[i] &&
3224                                 !IS_GFX_DEVICE(drhd->devices[i]))
3225                                 break;
3226
3227                 if (i < drhd->devices_cnt)
3228                         continue;
3229
3230                 /* bypass IOMMU if it is just for gfx devices */
3231                 drhd->ignored = 1;
3232                 for (i = 0; i < drhd->devices_cnt; i++) {
3233                         if (!drhd->devices[i])
3234                                 continue;
3235                         drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3236                 }
3237         }
3238 }
3239
3240 #ifdef CONFIG_SUSPEND
3241 static int init_iommu_hw(void)
3242 {
3243         struct dmar_drhd_unit *drhd;
3244         struct intel_iommu *iommu = NULL;
3245
3246         for_each_active_iommu(iommu, drhd)
3247                 if (iommu->qi)
3248                         dmar_reenable_qi(iommu);
3249
3250         for_each_iommu(iommu, drhd) {
3251                 if (drhd->ignored) {
3252                         /*
3253                          * we always have to disable PMRs or DMA may fail on
3254                          * this device
3255                          */
3256                         if (force_on)
3257                                 iommu_disable_protect_mem_regions(iommu);
3258                         continue;
3259                 }
3260         
3261                 iommu_flush_write_buffer(iommu);
3262
3263                 iommu_set_root_entry(iommu);
3264
3265                 iommu->flush.flush_context(iommu, 0, 0, 0,
3266                                            DMA_CCMD_GLOBAL_INVL);
3267                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3268                                          DMA_TLB_GLOBAL_FLUSH);
3269                 if (iommu_enable_translation(iommu))
3270                         return 1;
3271                 iommu_disable_protect_mem_regions(iommu);
3272         }
3273
3274         return 0;
3275 }
3276
3277 static void iommu_flush_all(void)
3278 {
3279         struct dmar_drhd_unit *drhd;
3280         struct intel_iommu *iommu;
3281
3282         for_each_active_iommu(iommu, drhd) {
3283                 iommu->flush.flush_context(iommu, 0, 0, 0,
3284                                            DMA_CCMD_GLOBAL_INVL);
3285                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3286                                          DMA_TLB_GLOBAL_FLUSH);
3287         }
3288 }
3289
3290 static int iommu_suspend(void)
3291 {
3292         struct dmar_drhd_unit *drhd;
3293         struct intel_iommu *iommu = NULL;
3294         unsigned long flag;
3295
3296         for_each_active_iommu(iommu, drhd) {
3297                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3298                                                  GFP_ATOMIC);
3299                 if (!iommu->iommu_state)
3300                         goto nomem;
3301         }
3302
3303         iommu_flush_all();
3304
3305         for_each_active_iommu(iommu, drhd) {
3306                 iommu_disable_translation(iommu);
3307
3308                 spin_lock_irqsave(&iommu->register_lock, flag);
3309
3310                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3311                         readl(iommu->reg + DMAR_FECTL_REG);
3312                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3313                         readl(iommu->reg + DMAR_FEDATA_REG);
3314                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3315                         readl(iommu->reg + DMAR_FEADDR_REG);
3316                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3317                         readl(iommu->reg + DMAR_FEUADDR_REG);
3318
3319                 spin_unlock_irqrestore(&iommu->register_lock, flag);
3320         }
3321         return 0;
3322
3323 nomem:
3324         for_each_active_iommu(iommu, drhd)
3325                 kfree(iommu->iommu_state);
3326
3327         return -ENOMEM;
3328 }
3329
3330 static void iommu_resume(void)
3331 {
3332         struct dmar_drhd_unit *drhd;
3333         struct intel_iommu *iommu = NULL;
3334         unsigned long flag;
3335
3336         if (init_iommu_hw()) {
3337                 if (force_on)
3338                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3339                 else
3340                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3341                 return;
3342         }
3343
3344         for_each_active_iommu(iommu, drhd) {
3345
3346                 spin_lock_irqsave(&iommu->register_lock, flag);
3347
3348                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3349                         iommu->reg + DMAR_FECTL_REG);
3350                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3351                         iommu->reg + DMAR_FEDATA_REG);
3352                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3353                         iommu->reg + DMAR_FEADDR_REG);
3354                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3355                         iommu->reg + DMAR_FEUADDR_REG);
3356
3357                 spin_unlock_irqrestore(&iommu->register_lock, flag);
3358         }
3359
3360         for_each_active_iommu(iommu, drhd)
3361                 kfree(iommu->iommu_state);
3362 }
3363
3364 static struct syscore_ops iommu_syscore_ops = {
3365         .resume         = iommu_resume,
3366         .suspend        = iommu_suspend,
3367 };
3368
3369 static void __init init_iommu_pm_ops(void)
3370 {
3371         register_syscore_ops(&iommu_syscore_ops);
3372 }
3373
3374 #else
3375 static inline int init_iommu_pm_ops(void) { }
3376 #endif  /* CONFIG_PM */
3377
3378 /*
3379  * Here we only respond to action of unbound device from driver.
3380  *
3381  * Added device is not attached to its DMAR domain here yet. That will happen
3382  * when mapping the device to iova.
3383  */
3384 static int device_notifier(struct notifier_block *nb,
3385                                   unsigned long action, void *data)
3386 {
3387         struct device *dev = data;
3388         struct pci_dev *pdev = to_pci_dev(dev);
3389         struct dmar_domain *domain;
3390
3391         if (iommu_no_mapping(dev))
3392                 return 0;
3393
3394         domain = find_domain(pdev);
3395         if (!domain)
3396                 return 0;
3397
3398         if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
3399                 domain_remove_one_dev_info(domain, pdev);
3400
3401                 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3402                     !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3403                     list_empty(&domain->devices))
3404                         domain_exit(domain);
3405         }
3406
3407         return 0;
3408 }
3409
3410 static struct notifier_block device_nb = {
3411         .notifier_call = device_notifier,
3412 };
3413
3414 int __init intel_iommu_init(void)
3415 {
3416         int ret = 0;
3417
3418         /* VT-d is required for a TXT/tboot launch, so enforce that */
3419         force_on = tboot_force_iommu();
3420
3421         if (dmar_table_init()) {
3422                 if (force_on)
3423                         panic("tboot: Failed to initialize DMAR table\n");
3424                 return  -ENODEV;
3425         }
3426
3427         if (dmar_dev_scope_init()) {
3428                 if (force_on)
3429                         panic("tboot: Failed to initialize DMAR device scope\n");
3430                 return  -ENODEV;
3431         }
3432
3433         /*
3434          * Check the need for DMA-remapping initialization now.
3435          * Above initialization will also be used by Interrupt-remapping.
3436          */
3437         if (no_iommu || dmar_disabled)
3438                 return -ENODEV;
3439
3440         if (iommu_init_mempool()) {
3441                 if (force_on)
3442                         panic("tboot: Failed to initialize iommu memory\n");
3443                 return  -ENODEV;
3444         }
3445
3446         if (dmar_init_reserved_ranges()) {
3447                 if (force_on)
3448                         panic("tboot: Failed to reserve iommu ranges\n");
3449                 return  -ENODEV;
3450         }
3451
3452         init_no_remapping_devices();
3453
3454         ret = init_dmars();
3455         if (ret) {
3456                 if (force_on)
3457                         panic("tboot: Failed to initialize DMARs\n");
3458                 printk(KERN_ERR "IOMMU: dmar init failed\n");
3459                 put_iova_domain(&reserved_iova_list);
3460                 iommu_exit_mempool();
3461                 return ret;
3462         }
3463         printk(KERN_INFO
3464         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3465
3466         init_timer(&unmap_timer);
3467 #ifdef CONFIG_SWIOTLB
3468         swiotlb = 0;
3469 #endif
3470         dma_ops = &intel_dma_ops;
3471
3472         init_iommu_pm_ops();
3473
3474         register_iommu(&intel_iommu_ops);
3475
3476         bus_register_notifier(&pci_bus_type, &device_nb);
3477
3478         return 0;
3479 }
3480
3481 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3482                                            struct pci_dev *pdev)
3483 {
3484         struct pci_dev *tmp, *parent;
3485
3486         if (!iommu || !pdev)
3487                 return;
3488
3489         /* dependent device detach */
3490         tmp = pci_find_upstream_pcie_bridge(pdev);
3491         /* Secondary interface's bus number and devfn 0 */
3492         if (tmp) {
3493                 parent = pdev->bus->self;
3494                 while (parent != tmp) {
3495                         iommu_detach_dev(iommu, parent->bus->number,
3496                                          parent->devfn);
3497                         parent = parent->bus->self;
3498                 }
3499                 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3500                         iommu_detach_dev(iommu,
3501                                 tmp->subordinate->number, 0);
3502                 else /* this is a legacy PCI bridge */
3503                         iommu_detach_dev(iommu, tmp->bus->number,
3504                                          tmp->devfn);
3505         }
3506 }
3507
3508 static void domain_remove_one_dev_info(struct dmar_domain *domain,
3509                                           struct pci_dev *pdev)
3510 {
3511         struct device_domain_info *info;
3512         struct intel_iommu *iommu;
3513         unsigned long flags;
3514         int found = 0;
3515         struct list_head *entry, *tmp;
3516
3517         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3518                                 pdev->devfn);
3519         if (!iommu)
3520                 return;
3521
3522         spin_lock_irqsave(&device_domain_lock, flags);
3523         list_for_each_safe(entry, tmp, &domain->devices) {
3524                 info = list_entry(entry, struct device_domain_info, link);
3525                 /* No need to compare PCI domain; it has to be the same */
3526                 if (info->bus == pdev->bus->number &&
3527                     info->devfn == pdev->devfn) {
3528                         list_del(&info->link);
3529                         list_del(&info->global);
3530                         if (info->dev)
3531                                 info->dev->dev.archdata.iommu = NULL;
3532                         spin_unlock_irqrestore(&device_domain_lock, flags);
3533
3534                         iommu_disable_dev_iotlb(info);
3535                         iommu_detach_dev(iommu, info->bus, info->devfn);
3536                         iommu_detach_dependent_devices(iommu, pdev);
3537                         free_devinfo_mem(info);
3538
3539                         spin_lock_irqsave(&device_domain_lock, flags);
3540
3541                         if (found)
3542                                 break;
3543                         else
3544                                 continue;
3545                 }
3546
3547                 /* if there is no other devices under the same iommu
3548                  * owned by this domain, clear this iommu in iommu_bmp
3549                  * update iommu count and coherency
3550                  */
3551                 if (iommu == device_to_iommu(info->segment, info->bus,
3552                                             info->devfn))
3553                         found = 1;
3554         }
3555
3556         if (found == 0) {
3557                 unsigned long tmp_flags;
3558                 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3559                 clear_bit(iommu->seq_id, &domain->iommu_bmp);
3560                 domain->iommu_count--;
3561                 domain_update_iommu_cap(domain);
3562                 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3563
3564                 spin_lock_irqsave(&iommu->lock, tmp_flags);
3565                 clear_bit(domain->id, iommu->domain_ids);
3566                 iommu->domains[domain->id] = NULL;
3567                 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3568         }
3569
3570         spin_unlock_irqrestore(&device_domain_lock, flags);
3571 }
3572
3573 static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3574 {
3575         struct device_domain_info *info;
3576         struct intel_iommu *iommu;
3577         unsigned long flags1, flags2;
3578
3579         spin_lock_irqsave(&device_domain_lock, flags1);
3580         while (!list_empty(&domain->devices)) {
3581                 info = list_entry(domain->devices.next,
3582                         struct device_domain_info, link);
3583                 list_del(&info->link);
3584                 list_del(&info->global);
3585                 if (info->dev)
3586                         info->dev->dev.archdata.iommu = NULL;
3587
3588                 spin_unlock_irqrestore(&device_domain_lock, flags1);
3589
3590                 iommu_disable_dev_iotlb(info);
3591                 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
3592                 iommu_detach_dev(iommu, info->bus, info->devfn);
3593                 iommu_detach_dependent_devices(iommu, info->dev);
3594
3595                 /* clear this iommu in iommu_bmp, update iommu count
3596                  * and capabilities
3597                  */
3598                 spin_lock_irqsave(&domain->iommu_lock, flags2);
3599                 if (test_and_clear_bit(iommu->seq_id,
3600                                        &domain->iommu_bmp)) {
3601                         domain->iommu_count--;
3602                         domain_update_iommu_cap(domain);
3603                 }
3604                 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3605
3606                 free_devinfo_mem(info);
3607                 spin_lock_irqsave(&device_domain_lock, flags1);
3608         }
3609         spin_unlock_irqrestore(&device_domain_lock, flags1);
3610 }
3611
3612 /* domain id for virtual machine, it won't be set in context */
3613 static unsigned long vm_domid;
3614
3615 static struct dmar_domain *iommu_alloc_vm_domain(void)
3616 {
3617         struct dmar_domain *domain;
3618
3619         domain = alloc_domain_mem();
3620         if (!domain)
3621                 return NULL;
3622
3623         domain->id = vm_domid++;
3624         domain->nid = -1;
3625         memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3626         domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3627
3628         return domain;
3629 }
3630
3631 static int md_domain_init(struct dmar_domain *domain, int guest_width)
3632 {
3633         int adjust_width;
3634
3635         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3636         spin_lock_init(&domain->iommu_lock);
3637
3638         domain_reserve_special_ranges(domain);
3639
3640         /* calculate AGAW */
3641         domain->gaw = guest_width;
3642         adjust_width = guestwidth_to_adjustwidth(guest_width);
3643         domain->agaw = width_to_agaw(adjust_width);
3644
3645         INIT_LIST_HEAD(&domain->devices);
3646
3647         domain->iommu_count = 0;
3648         domain->iommu_coherency = 0;
3649         domain->iommu_snooping = 0;
3650         domain->iommu_superpage = 0;
3651         domain->max_addr = 0;
3652         domain->nid = -1;
3653
3654         /* always allocate the top pgd */
3655         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
3656         if (!domain->pgd)
3657                 return -ENOMEM;
3658         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3659         return 0;
3660 }
3661
3662 static void iommu_free_vm_domain(struct dmar_domain *domain)
3663 {
3664         unsigned long flags;
3665         struct dmar_drhd_unit *drhd;
3666         struct intel_iommu *iommu;
3667         unsigned long i;
3668         unsigned long ndomains;
3669
3670         for_each_drhd_unit(drhd) {
3671                 if (drhd->ignored)
3672                         continue;
3673                 iommu = drhd->iommu;
3674
3675                 ndomains = cap_ndoms(iommu->cap);
3676                 for_each_set_bit(i, iommu->domain_ids, ndomains) {
3677                         if (iommu->domains[i] == domain) {
3678                                 spin_lock_irqsave(&iommu->lock, flags);
3679                                 clear_bit(i, iommu->domain_ids);
3680                                 iommu->domains[i] = NULL;
3681                                 spin_unlock_irqrestore(&iommu->lock, flags);
3682                                 break;
3683                         }
3684                 }
3685         }
3686 }
3687
3688 static void vm_domain_exit(struct dmar_domain *domain)
3689 {
3690         /* Domain 0 is reserved, so dont process it */
3691         if (!domain)
3692                 return;
3693
3694         vm_domain_remove_all_dev_info(domain);
3695         /* destroy iovas */
3696         put_iova_domain(&domain->iovad);
3697
3698         /* clear ptes */
3699         dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3700
3701         /* free page tables */
3702         dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3703
3704         iommu_free_vm_domain(domain);
3705         free_domain_mem(domain);
3706 }
3707
3708 static int intel_iommu_domain_init(struct iommu_domain *domain)
3709 {
3710         struct dmar_domain *dmar_domain;
3711
3712         dmar_domain = iommu_alloc_vm_domain();
3713         if (!dmar_domain) {
3714                 printk(KERN_ERR
3715                         "intel_iommu_domain_init: dmar_domain == NULL\n");
3716                 return -ENOMEM;
3717         }
3718         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3719                 printk(KERN_ERR
3720                         "intel_iommu_domain_init() failed\n");
3721                 vm_domain_exit(dmar_domain);
3722                 return -ENOMEM;
3723         }
3724         domain->priv = dmar_domain;
3725
3726         return 0;
3727 }
3728
3729 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
3730 {
3731         struct dmar_domain *dmar_domain = domain->priv;
3732
3733         domain->priv = NULL;
3734         vm_domain_exit(dmar_domain);
3735 }
3736
3737 static int intel_iommu_attach_device(struct iommu_domain *domain,
3738                                      struct device *dev)
3739 {
3740         struct dmar_domain *dmar_domain = domain->priv;
3741         struct pci_dev *pdev = to_pci_dev(dev);
3742         struct intel_iommu *iommu;
3743         int addr_width;
3744
3745         /* normally pdev is not mapped */
3746         if (unlikely(domain_context_mapped(pdev))) {
3747                 struct dmar_domain *old_domain;
3748
3749                 old_domain = find_domain(pdev);
3750                 if (old_domain) {
3751                         if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3752                             dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3753                                 domain_remove_one_dev_info(old_domain, pdev);
3754                         else
3755                                 domain_remove_dev_info(old_domain);
3756                 }
3757         }
3758
3759         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3760                                 pdev->devfn);
3761         if (!iommu)
3762                 return -ENODEV;
3763
3764         /* check if this iommu agaw is sufficient for max mapped address */
3765         addr_width = agaw_to_width(iommu->agaw);
3766         if (addr_width > cap_mgaw(iommu->cap))
3767                 addr_width = cap_mgaw(iommu->cap);
3768
3769         if (dmar_domain->max_addr > (1LL << addr_width)) {
3770                 printk(KERN_ERR "%s: iommu width (%d) is not "
3771                        "sufficient for the mapped address (%llx)\n",
3772                        __func__, addr_width, dmar_domain->max_addr);
3773                 return -EFAULT;
3774         }
3775         dmar_domain->gaw = addr_width;
3776
3777         /*
3778          * Knock out extra levels of page tables if necessary
3779          */
3780         while (iommu->agaw < dmar_domain->agaw) {
3781                 struct dma_pte *pte;
3782
3783                 pte = dmar_domain->pgd;
3784                 if (dma_pte_present(pte)) {
3785                         dmar_domain->pgd = (struct dma_pte *)
3786                                 phys_to_virt(dma_pte_addr(pte));
3787                         free_pgtable_page(pte);
3788                 }
3789                 dmar_domain->agaw--;
3790         }
3791
3792         return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
3793 }
3794
3795 static void intel_iommu_detach_device(struct iommu_domain *domain,
3796                                       struct device *dev)
3797 {
3798         struct dmar_domain *dmar_domain = domain->priv;
3799         struct pci_dev *pdev = to_pci_dev(dev);
3800
3801         domain_remove_one_dev_info(dmar_domain, pdev);
3802 }
3803
3804 static int intel_iommu_map(struct iommu_domain *domain,
3805                            unsigned long iova, phys_addr_t hpa,
3806                            int gfp_order, int iommu_prot)
3807 {
3808         struct dmar_domain *dmar_domain = domain->priv;
3809         u64 max_addr;
3810         int prot = 0;
3811         size_t size;
3812         int ret;
3813
3814         if (iommu_prot & IOMMU_READ)
3815                 prot |= DMA_PTE_READ;
3816         if (iommu_prot & IOMMU_WRITE)
3817                 prot |= DMA_PTE_WRITE;
3818         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3819                 prot |= DMA_PTE_SNP;
3820
3821         size     = PAGE_SIZE << gfp_order;
3822         max_addr = iova + size;
3823         if (dmar_domain->max_addr < max_addr) {
3824                 u64 end;
3825
3826                 /* check if minimum agaw is sufficient for mapped address */
3827                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
3828                 if (end < max_addr) {
3829                         printk(KERN_ERR "%s: iommu width (%d) is not "
3830                                "sufficient for the mapped address (%llx)\n",
3831                                __func__, dmar_domain->gaw, max_addr);
3832                         return -EFAULT;
3833                 }
3834                 dmar_domain->max_addr = max_addr;
3835         }
3836         /* Round up size to next multiple of PAGE_SIZE, if it and
3837            the low bits of hpa would take us onto the next page */
3838         size = aligned_nrpages(hpa, size);
3839         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3840                                  hpa >> VTD_PAGE_SHIFT, size, prot);
3841         return ret;
3842 }
3843
3844 static int intel_iommu_unmap(struct iommu_domain *domain,
3845                              unsigned long iova, int gfp_order)
3846 {
3847         struct dmar_domain *dmar_domain = domain->priv;
3848         size_t size = PAGE_SIZE << gfp_order;
3849
3850         dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3851                             (iova + size - 1) >> VTD_PAGE_SHIFT);
3852
3853         if (dmar_domain->max_addr == iova + size)
3854                 dmar_domain->max_addr = iova;
3855
3856         return gfp_order;
3857 }
3858
3859 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3860                                             unsigned long iova)
3861 {
3862         struct dmar_domain *dmar_domain = domain->priv;
3863         struct dma_pte *pte;
3864         u64 phys = 0;
3865
3866         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
3867         if (pte)
3868                 phys = dma_pte_addr(pte);
3869
3870         return phys;
3871 }
3872
3873 static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
3874                                       unsigned long cap)
3875 {
3876         struct dmar_domain *dmar_domain = domain->priv;
3877
3878         if (cap == IOMMU_CAP_CACHE_COHERENCY)
3879                 return dmar_domain->iommu_snooping;
3880         if (cap == IOMMU_CAP_INTR_REMAP)
3881                 return intr_remapping_enabled;
3882
3883         return 0;
3884 }
3885
3886 static struct iommu_ops intel_iommu_ops = {
3887         .domain_init    = intel_iommu_domain_init,
3888         .domain_destroy = intel_iommu_domain_destroy,
3889         .attach_dev     = intel_iommu_attach_device,
3890         .detach_dev     = intel_iommu_detach_device,
3891         .map            = intel_iommu_map,
3892         .unmap          = intel_iommu_unmap,
3893         .iova_to_phys   = intel_iommu_iova_to_phys,
3894         .domain_has_cap = intel_iommu_domain_has_cap,
3895 };
3896
3897 static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
3898 {
3899         /*
3900          * Mobile 4 Series Chipset neglects to set RWBF capability,
3901          * but needs it:
3902          */
3903         printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
3904         rwbf_quirk = 1;
3905
3906         /* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */
3907         if (dev->revision == 0x07) {
3908                 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
3909                 dmar_map_gfx = 0;
3910         }
3911 }
3912
3913 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
3914
3915 #define GGC 0x52
3916 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
3917 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
3918 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
3919 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
3920 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
3921 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
3922 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
3923 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
3924
3925 static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
3926 {
3927         unsigned short ggc;
3928
3929         if (pci_read_config_word(dev, GGC, &ggc))
3930                 return;
3931
3932         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
3933                 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
3934                 dmar_map_gfx = 0;
3935         }
3936 }
3937 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
3938 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
3939 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
3940 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
3941
3942 /* On Tylersburg chipsets, some BIOSes have been known to enable the
3943    ISOCH DMAR unit for the Azalia sound device, but not give it any
3944    TLB entries, which causes it to deadlock. Check for that.  We do
3945    this in a function called from init_dmars(), instead of in a PCI
3946    quirk, because we don't want to print the obnoxious "BIOS broken"
3947    message if VT-d is actually disabled.
3948 */
3949 static void __init check_tylersburg_isoch(void)
3950 {
3951         struct pci_dev *pdev;
3952         uint32_t vtisochctrl;
3953
3954         /* If there's no Azalia in the system anyway, forget it. */
3955         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
3956         if (!pdev)
3957                 return;
3958         pci_dev_put(pdev);
3959
3960         /* System Management Registers. Might be hidden, in which case
3961            we can't do the sanity check. But that's OK, because the
3962            known-broken BIOSes _don't_ actually hide it, so far. */
3963         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
3964         if (!pdev)
3965                 return;
3966
3967         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
3968                 pci_dev_put(pdev);
3969                 return;
3970         }
3971
3972         pci_dev_put(pdev);
3973
3974         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
3975         if (vtisochctrl & 1)
3976                 return;
3977
3978         /* Drop all bits other than the number of TLB entries */
3979         vtisochctrl &= 0x1c;
3980
3981         /* If we have the recommended number of TLB entries (16), fine. */
3982         if (vtisochctrl == 0x10)
3983                 return;
3984
3985         /* Zero TLB entries? You get to ride the short bus to school. */
3986         if (!vtisochctrl) {
3987                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
3988                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
3989                      dmi_get_system_info(DMI_BIOS_VENDOR),
3990                      dmi_get_system_info(DMI_BIOS_VERSION),
3991                      dmi_get_system_info(DMI_PRODUCT_VERSION));
3992                 iommu_identity_mapping |= IDENTMAP_AZALIA;
3993                 return;
3994         }
3995         
3996         printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
3997                vtisochctrl);
3998 }