]> git.karo-electronics.de Git - mv-sheeva.git/blob - drivers/pci/intel-iommu.c
5cbab7f19ae0b48e5933900ce970ad6b91574b20
[mv-sheeva.git] / drivers / pci / intel-iommu.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  * Author: Fenghua Yu <fenghua.yu@intel.com>
22  */
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/iommu.h>
38 #include <linux/intel-iommu.h>
39 #include <linux/syscore_ops.h>
40 #include <linux/tboot.h>
41 #include <linux/dmi.h>
42 #include <asm/cacheflush.h>
43 #include <asm/iommu.h>
44 #include "pci.h"
45
46 #define ROOT_SIZE               VTD_PAGE_SIZE
47 #define CONTEXT_SIZE            VTD_PAGE_SIZE
48
49 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
50 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
51 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
52
53 #define IOAPIC_RANGE_START      (0xfee00000)
54 #define IOAPIC_RANGE_END        (0xfeefffff)
55 #define IOVA_START_ADDR         (0x1000)
56
57 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
58
59 #define MAX_AGAW_WIDTH 64
60
61 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
62 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
63
64 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
65    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
66 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
67                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
68 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
69
70 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
71 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
72 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
73
74 /* page table handling */
75 #define LEVEL_STRIDE            (9)
76 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
77
78 static inline int agaw_to_level(int agaw)
79 {
80         return agaw + 2;
81 }
82
83 static inline int agaw_to_width(int agaw)
84 {
85         return 30 + agaw * LEVEL_STRIDE;
86 }
87
88 static inline int width_to_agaw(int width)
89 {
90         return (width - 30) / LEVEL_STRIDE;
91 }
92
93 static inline unsigned int level_to_offset_bits(int level)
94 {
95         return (level - 1) * LEVEL_STRIDE;
96 }
97
98 static inline int pfn_level_offset(unsigned long pfn, int level)
99 {
100         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
101 }
102
103 static inline unsigned long level_mask(int level)
104 {
105         return -1UL << level_to_offset_bits(level);
106 }
107
108 static inline unsigned long level_size(int level)
109 {
110         return 1UL << level_to_offset_bits(level);
111 }
112
113 static inline unsigned long align_to_level(unsigned long pfn, int level)
114 {
115         return (pfn + level_size(level) - 1) & level_mask(level);
116 }
117
118 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
119 {
120         return  1 << ((lvl - 1) * LEVEL_STRIDE);
121 }
122
123 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
124    are never going to work. */
125 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
126 {
127         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
128 }
129
130 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
131 {
132         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
133 }
134 static inline unsigned long page_to_dma_pfn(struct page *pg)
135 {
136         return mm_to_dma_pfn(page_to_pfn(pg));
137 }
138 static inline unsigned long virt_to_dma_pfn(void *p)
139 {
140         return page_to_dma_pfn(virt_to_page(p));
141 }
142
143 /* global iommu list, set NULL for ignored DMAR units */
144 static struct intel_iommu **g_iommus;
145
146 static void __init check_tylersburg_isoch(void);
147 static int rwbf_quirk;
148
149 /*
150  * set to 1 to panic kernel if can't successfully enable VT-d
151  * (used when kernel is launched w/ TXT)
152  */
153 static int force_on = 0;
154
155 /*
156  * 0: Present
157  * 1-11: Reserved
158  * 12-63: Context Ptr (12 - (haw-1))
159  * 64-127: Reserved
160  */
161 struct root_entry {
162         u64     val;
163         u64     rsvd1;
164 };
165 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
166 static inline bool root_present(struct root_entry *root)
167 {
168         return (root->val & 1);
169 }
170 static inline void set_root_present(struct root_entry *root)
171 {
172         root->val |= 1;
173 }
174 static inline void set_root_value(struct root_entry *root, unsigned long value)
175 {
176         root->val |= value & VTD_PAGE_MASK;
177 }
178
179 static inline struct context_entry *
180 get_context_addr_from_root(struct root_entry *root)
181 {
182         return (struct context_entry *)
183                 (root_present(root)?phys_to_virt(
184                 root->val & VTD_PAGE_MASK) :
185                 NULL);
186 }
187
188 /*
189  * low 64 bits:
190  * 0: present
191  * 1: fault processing disable
192  * 2-3: translation type
193  * 12-63: address space root
194  * high 64 bits:
195  * 0-2: address width
196  * 3-6: aval
197  * 8-23: domain id
198  */
199 struct context_entry {
200         u64 lo;
201         u64 hi;
202 };
203
204 static inline bool context_present(struct context_entry *context)
205 {
206         return (context->lo & 1);
207 }
208 static inline void context_set_present(struct context_entry *context)
209 {
210         context->lo |= 1;
211 }
212
213 static inline void context_set_fault_enable(struct context_entry *context)
214 {
215         context->lo &= (((u64)-1) << 2) | 1;
216 }
217
218 static inline void context_set_translation_type(struct context_entry *context,
219                                                 unsigned long value)
220 {
221         context->lo &= (((u64)-1) << 4) | 3;
222         context->lo |= (value & 3) << 2;
223 }
224
225 static inline void context_set_address_root(struct context_entry *context,
226                                             unsigned long value)
227 {
228         context->lo |= value & VTD_PAGE_MASK;
229 }
230
231 static inline void context_set_address_width(struct context_entry *context,
232                                              unsigned long value)
233 {
234         context->hi |= value & 7;
235 }
236
237 static inline void context_set_domain_id(struct context_entry *context,
238                                          unsigned long value)
239 {
240         context->hi |= (value & ((1 << 16) - 1)) << 8;
241 }
242
243 static inline void context_clear_entry(struct context_entry *context)
244 {
245         context->lo = 0;
246         context->hi = 0;
247 }
248
249 /*
250  * 0: readable
251  * 1: writable
252  * 2-6: reserved
253  * 7: super page
254  * 8-10: available
255  * 11: snoop behavior
256  * 12-63: Host physcial address
257  */
258 struct dma_pte {
259         u64 val;
260 };
261
262 static inline void dma_clear_pte(struct dma_pte *pte)
263 {
264         pte->val = 0;
265 }
266
267 static inline void dma_set_pte_readable(struct dma_pte *pte)
268 {
269         pte->val |= DMA_PTE_READ;
270 }
271
272 static inline void dma_set_pte_writable(struct dma_pte *pte)
273 {
274         pte->val |= DMA_PTE_WRITE;
275 }
276
277 static inline void dma_set_pte_snp(struct dma_pte *pte)
278 {
279         pte->val |= DMA_PTE_SNP;
280 }
281
282 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
283 {
284         pte->val = (pte->val & ~3) | (prot & 3);
285 }
286
287 static inline u64 dma_pte_addr(struct dma_pte *pte)
288 {
289 #ifdef CONFIG_64BIT
290         return pte->val & VTD_PAGE_MASK;
291 #else
292         /* Must have a full atomic 64-bit read */
293         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
294 #endif
295 }
296
297 static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
298 {
299         pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
300 }
301
302 static inline bool dma_pte_present(struct dma_pte *pte)
303 {
304         return (pte->val & 3) != 0;
305 }
306
307 static inline int first_pte_in_page(struct dma_pte *pte)
308 {
309         return !((unsigned long)pte & ~VTD_PAGE_MASK);
310 }
311
312 /*
313  * This domain is a statically identity mapping domain.
314  *      1. This domain creats a static 1:1 mapping to all usable memory.
315  *      2. It maps to each iommu if successful.
316  *      3. Each iommu mapps to this domain if successful.
317  */
318 static struct dmar_domain *si_domain;
319 static int hw_pass_through = 1;
320
321 /* devices under the same p2p bridge are owned in one domain */
322 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
323
324 /* domain represents a virtual machine, more than one devices
325  * across iommus may be owned in one domain, e.g. kvm guest.
326  */
327 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 1)
328
329 /* si_domain contains mulitple devices */
330 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 2)
331
332 struct dmar_domain {
333         int     id;                     /* domain id */
334         int     nid;                    /* node id */
335         unsigned long iommu_bmp;        /* bitmap of iommus this domain uses*/
336
337         struct list_head devices;       /* all devices' list */
338         struct iova_domain iovad;       /* iova's that belong to this domain */
339
340         struct dma_pte  *pgd;           /* virtual address */
341         int             gaw;            /* max guest address width */
342
343         /* adjusted guest address width, 0 is level 2 30-bit */
344         int             agaw;
345
346         int             flags;          /* flags to find out type of domain */
347
348         int             iommu_coherency;/* indicate coherency of iommu access */
349         int             iommu_snooping; /* indicate snooping control feature*/
350         int             iommu_count;    /* reference count of iommu */
351         int             iommu_superpage;/* Level of superpages supported:
352                                            0 == 4KiB (no superpages), 1 == 2MiB,
353                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
354         spinlock_t      iommu_lock;     /* protect iommu set in domain */
355         u64             max_addr;       /* maximum mapped address */
356 };
357
358 /* PCI domain-device relationship */
359 struct device_domain_info {
360         struct list_head link;  /* link to domain siblings */
361         struct list_head global; /* link to global list */
362         int segment;            /* PCI domain */
363         u8 bus;                 /* PCI bus number */
364         u8 devfn;               /* PCI devfn number */
365         struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
366         struct intel_iommu *iommu; /* IOMMU used by this device */
367         struct dmar_domain *domain; /* pointer to domain */
368 };
369
370 static void flush_unmaps_timeout(unsigned long data);
371
372 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
373
374 #define HIGH_WATER_MARK 250
375 struct deferred_flush_tables {
376         int next;
377         struct iova *iova[HIGH_WATER_MARK];
378         struct dmar_domain *domain[HIGH_WATER_MARK];
379 };
380
381 static struct deferred_flush_tables *deferred_flush;
382
383 /* bitmap for indexing intel_iommus */
384 static int g_num_of_iommus;
385
386 static DEFINE_SPINLOCK(async_umap_flush_lock);
387 static LIST_HEAD(unmaps_to_do);
388
389 static int timer_on;
390 static long list_size;
391
392 static void domain_remove_dev_info(struct dmar_domain *domain);
393
394 #ifdef CONFIG_DMAR_DEFAULT_ON
395 int dmar_disabled = 0;
396 #else
397 int dmar_disabled = 1;
398 #endif /*CONFIG_DMAR_DEFAULT_ON*/
399
400 static int dmar_map_gfx = 1;
401 static int dmar_forcedac;
402 static int intel_iommu_strict;
403 static int intel_iommu_superpage = 1;
404
405 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
406 static DEFINE_SPINLOCK(device_domain_lock);
407 static LIST_HEAD(device_domain_list);
408
409 static struct iommu_ops intel_iommu_ops;
410
411 static int __init intel_iommu_setup(char *str)
412 {
413         if (!str)
414                 return -EINVAL;
415         while (*str) {
416                 if (!strncmp(str, "on", 2)) {
417                         dmar_disabled = 0;
418                         printk(KERN_INFO "Intel-IOMMU: enabled\n");
419                 } else if (!strncmp(str, "off", 3)) {
420                         dmar_disabled = 1;
421                         printk(KERN_INFO "Intel-IOMMU: disabled\n");
422                 } else if (!strncmp(str, "igfx_off", 8)) {
423                         dmar_map_gfx = 0;
424                         printk(KERN_INFO
425                                 "Intel-IOMMU: disable GFX device mapping\n");
426                 } else if (!strncmp(str, "forcedac", 8)) {
427                         printk(KERN_INFO
428                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
429                         dmar_forcedac = 1;
430                 } else if (!strncmp(str, "strict", 6)) {
431                         printk(KERN_INFO
432                                 "Intel-IOMMU: disable batched IOTLB flush\n");
433                         intel_iommu_strict = 1;
434                 } else if (!strncmp(str, "sp_off", 6)) {
435                         printk(KERN_INFO
436                                 "Intel-IOMMU: disable supported super page\n");
437                         intel_iommu_superpage = 0;
438                 }
439
440                 str += strcspn(str, ",");
441                 while (*str == ',')
442                         str++;
443         }
444         return 0;
445 }
446 __setup("intel_iommu=", intel_iommu_setup);
447
448 static struct kmem_cache *iommu_domain_cache;
449 static struct kmem_cache *iommu_devinfo_cache;
450 static struct kmem_cache *iommu_iova_cache;
451
452 static inline void *alloc_pgtable_page(int node)
453 {
454         struct page *page;
455         void *vaddr = NULL;
456
457         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
458         if (page)
459                 vaddr = page_address(page);
460         return vaddr;
461 }
462
463 static inline void free_pgtable_page(void *vaddr)
464 {
465         free_page((unsigned long)vaddr);
466 }
467
468 static inline void *alloc_domain_mem(void)
469 {
470         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
471 }
472
473 static void free_domain_mem(void *vaddr)
474 {
475         kmem_cache_free(iommu_domain_cache, vaddr);
476 }
477
478 static inline void * alloc_devinfo_mem(void)
479 {
480         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
481 }
482
483 static inline void free_devinfo_mem(void *vaddr)
484 {
485         kmem_cache_free(iommu_devinfo_cache, vaddr);
486 }
487
488 struct iova *alloc_iova_mem(void)
489 {
490         return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
491 }
492
493 void free_iova_mem(struct iova *iova)
494 {
495         kmem_cache_free(iommu_iova_cache, iova);
496 }
497
498
499 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
500 {
501         unsigned long sagaw;
502         int agaw = -1;
503
504         sagaw = cap_sagaw(iommu->cap);
505         for (agaw = width_to_agaw(max_gaw);
506              agaw >= 0; agaw--) {
507                 if (test_bit(agaw, &sagaw))
508                         break;
509         }
510
511         return agaw;
512 }
513
514 /*
515  * Calculate max SAGAW for each iommu.
516  */
517 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
518 {
519         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
520 }
521
522 /*
523  * calculate agaw for each iommu.
524  * "SAGAW" may be different across iommus, use a default agaw, and
525  * get a supported less agaw for iommus that don't support the default agaw.
526  */
527 int iommu_calculate_agaw(struct intel_iommu *iommu)
528 {
529         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
530 }
531
532 /* This functionin only returns single iommu in a domain */
533 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
534 {
535         int iommu_id;
536
537         /* si_domain and vm domain should not get here. */
538         BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
539         BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
540
541         iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
542         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
543                 return NULL;
544
545         return g_iommus[iommu_id];
546 }
547
548 static void domain_update_iommu_coherency(struct dmar_domain *domain)
549 {
550         int i;
551
552         domain->iommu_coherency = 1;
553
554         for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
555                 if (!ecap_coherent(g_iommus[i]->ecap)) {
556                         domain->iommu_coherency = 0;
557                         break;
558                 }
559         }
560 }
561
562 static void domain_update_iommu_snooping(struct dmar_domain *domain)
563 {
564         int i;
565
566         domain->iommu_snooping = 1;
567
568         for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
569                 if (!ecap_sc_support(g_iommus[i]->ecap)) {
570                         domain->iommu_snooping = 0;
571                         break;
572                 }
573         }
574 }
575
576 static void domain_update_iommu_superpage(struct dmar_domain *domain)
577 {
578         int i, mask = 0xf;
579
580         if (!intel_iommu_superpage) {
581                 domain->iommu_superpage = 0;
582                 return;
583         }
584
585         domain->iommu_superpage = 4; /* 1TiB */
586
587         for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
588                 mask |= cap_super_page_val(g_iommus[i]->cap);
589                 if (!mask) {
590                         break;
591                 }
592         }
593         domain->iommu_superpage = fls(mask);
594 }
595
596 /* Some capabilities may be different across iommus */
597 static void domain_update_iommu_cap(struct dmar_domain *domain)
598 {
599         domain_update_iommu_coherency(domain);
600         domain_update_iommu_snooping(domain);
601         domain_update_iommu_superpage(domain);
602 }
603
604 static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
605 {
606         struct dmar_drhd_unit *drhd = NULL;
607         int i;
608
609         for_each_drhd_unit(drhd) {
610                 if (drhd->ignored)
611                         continue;
612                 if (segment != drhd->segment)
613                         continue;
614
615                 for (i = 0; i < drhd->devices_cnt; i++) {
616                         if (drhd->devices[i] &&
617                             drhd->devices[i]->bus->number == bus &&
618                             drhd->devices[i]->devfn == devfn)
619                                 return drhd->iommu;
620                         if (drhd->devices[i] &&
621                             drhd->devices[i]->subordinate &&
622                             drhd->devices[i]->subordinate->number <= bus &&
623                             drhd->devices[i]->subordinate->subordinate >= bus)
624                                 return drhd->iommu;
625                 }
626
627                 if (drhd->include_all)
628                         return drhd->iommu;
629         }
630
631         return NULL;
632 }
633
634 static void domain_flush_cache(struct dmar_domain *domain,
635                                void *addr, int size)
636 {
637         if (!domain->iommu_coherency)
638                 clflush_cache_range(addr, size);
639 }
640
641 /* Gets context entry for a given bus and devfn */
642 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
643                 u8 bus, u8 devfn)
644 {
645         struct root_entry *root;
646         struct context_entry *context;
647         unsigned long phy_addr;
648         unsigned long flags;
649
650         spin_lock_irqsave(&iommu->lock, flags);
651         root = &iommu->root_entry[bus];
652         context = get_context_addr_from_root(root);
653         if (!context) {
654                 context = (struct context_entry *)
655                                 alloc_pgtable_page(iommu->node);
656                 if (!context) {
657                         spin_unlock_irqrestore(&iommu->lock, flags);
658                         return NULL;
659                 }
660                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
661                 phy_addr = virt_to_phys((void *)context);
662                 set_root_value(root, phy_addr);
663                 set_root_present(root);
664                 __iommu_flush_cache(iommu, root, sizeof(*root));
665         }
666         spin_unlock_irqrestore(&iommu->lock, flags);
667         return &context[devfn];
668 }
669
670 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
671 {
672         struct root_entry *root;
673         struct context_entry *context;
674         int ret;
675         unsigned long flags;
676
677         spin_lock_irqsave(&iommu->lock, flags);
678         root = &iommu->root_entry[bus];
679         context = get_context_addr_from_root(root);
680         if (!context) {
681                 ret = 0;
682                 goto out;
683         }
684         ret = context_present(&context[devfn]);
685 out:
686         spin_unlock_irqrestore(&iommu->lock, flags);
687         return ret;
688 }
689
690 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
691 {
692         struct root_entry *root;
693         struct context_entry *context;
694         unsigned long flags;
695
696         spin_lock_irqsave(&iommu->lock, flags);
697         root = &iommu->root_entry[bus];
698         context = get_context_addr_from_root(root);
699         if (context) {
700                 context_clear_entry(&context[devfn]);
701                 __iommu_flush_cache(iommu, &context[devfn], \
702                         sizeof(*context));
703         }
704         spin_unlock_irqrestore(&iommu->lock, flags);
705 }
706
707 static void free_context_table(struct intel_iommu *iommu)
708 {
709         struct root_entry *root;
710         int i;
711         unsigned long flags;
712         struct context_entry *context;
713
714         spin_lock_irqsave(&iommu->lock, flags);
715         if (!iommu->root_entry) {
716                 goto out;
717         }
718         for (i = 0; i < ROOT_ENTRY_NR; i++) {
719                 root = &iommu->root_entry[i];
720                 context = get_context_addr_from_root(root);
721                 if (context)
722                         free_pgtable_page(context);
723         }
724         free_pgtable_page(iommu->root_entry);
725         iommu->root_entry = NULL;
726 out:
727         spin_unlock_irqrestore(&iommu->lock, flags);
728 }
729
730 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
731                                       unsigned long pfn, int large_level)
732 {
733         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
734         struct dma_pte *parent, *pte = NULL;
735         int level = agaw_to_level(domain->agaw);
736         int offset, target_level;
737
738         BUG_ON(!domain->pgd);
739         BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
740         parent = domain->pgd;
741
742         /* Search pte */
743         if (!large_level)
744                 target_level = 1;
745         else
746                 target_level = large_level;
747
748         while (level > 0) {
749                 void *tmp_page;
750
751                 offset = pfn_level_offset(pfn, level);
752                 pte = &parent[offset];
753                 if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
754                         break;
755                 if (level == target_level)
756                         break;
757
758                 if (!dma_pte_present(pte)) {
759                         uint64_t pteval;
760
761                         tmp_page = alloc_pgtable_page(domain->nid);
762
763                         if (!tmp_page)
764                                 return NULL;
765
766                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
767                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
768                         if (cmpxchg64(&pte->val, 0ULL, pteval)) {
769                                 /* Someone else set it while we were thinking; use theirs. */
770                                 free_pgtable_page(tmp_page);
771                         } else {
772                                 dma_pte_addr(pte);
773                                 domain_flush_cache(domain, pte, sizeof(*pte));
774                         }
775                 }
776                 parent = phys_to_virt(dma_pte_addr(pte));
777                 level--;
778         }
779
780         return pte;
781 }
782
783
784 /* return address's pte at specific level */
785 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
786                                          unsigned long pfn,
787                                          int level, int *large_page)
788 {
789         struct dma_pte *parent, *pte = NULL;
790         int total = agaw_to_level(domain->agaw);
791         int offset;
792
793         parent = domain->pgd;
794         while (level <= total) {
795                 offset = pfn_level_offset(pfn, total);
796                 pte = &parent[offset];
797                 if (level == total)
798                         return pte;
799
800                 if (!dma_pte_present(pte)) {
801                         *large_page = total;
802                         break;
803                 }
804
805                 if (pte->val & DMA_PTE_LARGE_PAGE) {
806                         *large_page = total;
807                         return pte;
808                 }
809
810                 parent = phys_to_virt(dma_pte_addr(pte));
811                 total--;
812         }
813         return NULL;
814 }
815
816 /* clear last level pte, a tlb flush should be followed */
817 static void dma_pte_clear_range(struct dmar_domain *domain,
818                                 unsigned long start_pfn,
819                                 unsigned long last_pfn)
820 {
821         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
822         unsigned int large_page = 1;
823         struct dma_pte *first_pte, *pte;
824
825         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
826         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
827         BUG_ON(start_pfn > last_pfn);
828
829         /* we don't need lock here; nobody else touches the iova range */
830         do {
831                 large_page = 1;
832                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
833                 if (!pte) {
834                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
835                         continue;
836                 }
837                 do {
838                         dma_clear_pte(pte);
839                         start_pfn += lvl_to_nr_pages(large_page);
840                         pte++;
841                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
842
843                 domain_flush_cache(domain, first_pte,
844                                    (void *)pte - (void *)first_pte);
845
846         } while (start_pfn && start_pfn <= last_pfn);
847 }
848
849 /* free page table pages. last level pte should already be cleared */
850 static void dma_pte_free_pagetable(struct dmar_domain *domain,
851                                    unsigned long start_pfn,
852                                    unsigned long last_pfn)
853 {
854         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
855         struct dma_pte *first_pte, *pte;
856         int total = agaw_to_level(domain->agaw);
857         int level;
858         unsigned long tmp;
859         int large_page = 2;
860
861         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
862         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
863         BUG_ON(start_pfn > last_pfn);
864
865         /* We don't need lock here; nobody else touches the iova range */
866         level = 2;
867         while (level <= total) {
868                 tmp = align_to_level(start_pfn, level);
869
870                 /* If we can't even clear one PTE at this level, we're done */
871                 if (tmp + level_size(level) - 1 > last_pfn)
872                         return;
873
874                 do {
875                         large_page = level;
876                         first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
877                         if (large_page > level)
878                                 level = large_page + 1;
879                         if (!pte) {
880                                 tmp = align_to_level(tmp + 1, level + 1);
881                                 continue;
882                         }
883                         do {
884                                 if (dma_pte_present(pte)) {
885                                         free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
886                                         dma_clear_pte(pte);
887                                 }
888                                 pte++;
889                                 tmp += level_size(level);
890                         } while (!first_pte_in_page(pte) &&
891                                  tmp + level_size(level) - 1 <= last_pfn);
892
893                         domain_flush_cache(domain, first_pte,
894                                            (void *)pte - (void *)first_pte);
895                         
896                 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
897                 level++;
898         }
899         /* free pgd */
900         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
901                 free_pgtable_page(domain->pgd);
902                 domain->pgd = NULL;
903         }
904 }
905
906 /* iommu handling */
907 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
908 {
909         struct root_entry *root;
910         unsigned long flags;
911
912         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
913         if (!root)
914                 return -ENOMEM;
915
916         __iommu_flush_cache(iommu, root, ROOT_SIZE);
917
918         spin_lock_irqsave(&iommu->lock, flags);
919         iommu->root_entry = root;
920         spin_unlock_irqrestore(&iommu->lock, flags);
921
922         return 0;
923 }
924
925 static void iommu_set_root_entry(struct intel_iommu *iommu)
926 {
927         void *addr;
928         u32 sts;
929         unsigned long flag;
930
931         addr = iommu->root_entry;
932
933         spin_lock_irqsave(&iommu->register_lock, flag);
934         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
935
936         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
937
938         /* Make sure hardware complete it */
939         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
940                       readl, (sts & DMA_GSTS_RTPS), sts);
941
942         spin_unlock_irqrestore(&iommu->register_lock, flag);
943 }
944
945 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
946 {
947         u32 val;
948         unsigned long flag;
949
950         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
951                 return;
952
953         spin_lock_irqsave(&iommu->register_lock, flag);
954         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
955
956         /* Make sure hardware complete it */
957         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
958                       readl, (!(val & DMA_GSTS_WBFS)), val);
959
960         spin_unlock_irqrestore(&iommu->register_lock, flag);
961 }
962
963 /* return value determine if we need a write buffer flush */
964 static void __iommu_flush_context(struct intel_iommu *iommu,
965                                   u16 did, u16 source_id, u8 function_mask,
966                                   u64 type)
967 {
968         u64 val = 0;
969         unsigned long flag;
970
971         switch (type) {
972         case DMA_CCMD_GLOBAL_INVL:
973                 val = DMA_CCMD_GLOBAL_INVL;
974                 break;
975         case DMA_CCMD_DOMAIN_INVL:
976                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
977                 break;
978         case DMA_CCMD_DEVICE_INVL:
979                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
980                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
981                 break;
982         default:
983                 BUG();
984         }
985         val |= DMA_CCMD_ICC;
986
987         spin_lock_irqsave(&iommu->register_lock, flag);
988         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
989
990         /* Make sure hardware complete it */
991         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
992                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
993
994         spin_unlock_irqrestore(&iommu->register_lock, flag);
995 }
996
997 /* return value determine if we need a write buffer flush */
998 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
999                                 u64 addr, unsigned int size_order, u64 type)
1000 {
1001         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1002         u64 val = 0, val_iva = 0;
1003         unsigned long flag;
1004
1005         switch (type) {
1006         case DMA_TLB_GLOBAL_FLUSH:
1007                 /* global flush doesn't need set IVA_REG */
1008                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1009                 break;
1010         case DMA_TLB_DSI_FLUSH:
1011                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1012                 break;
1013         case DMA_TLB_PSI_FLUSH:
1014                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1015                 /* Note: always flush non-leaf currently */
1016                 val_iva = size_order | addr;
1017                 break;
1018         default:
1019                 BUG();
1020         }
1021         /* Note: set drain read/write */
1022 #if 0
1023         /*
1024          * This is probably to be super secure.. Looks like we can
1025          * ignore it without any impact.
1026          */
1027         if (cap_read_drain(iommu->cap))
1028                 val |= DMA_TLB_READ_DRAIN;
1029 #endif
1030         if (cap_write_drain(iommu->cap))
1031                 val |= DMA_TLB_WRITE_DRAIN;
1032
1033         spin_lock_irqsave(&iommu->register_lock, flag);
1034         /* Note: Only uses first TLB reg currently */
1035         if (val_iva)
1036                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1037         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1038
1039         /* Make sure hardware complete it */
1040         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1041                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1042
1043         spin_unlock_irqrestore(&iommu->register_lock, flag);
1044
1045         /* check IOTLB invalidation granularity */
1046         if (DMA_TLB_IAIG(val) == 0)
1047                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1048         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1049                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1050                         (unsigned long long)DMA_TLB_IIRG(type),
1051                         (unsigned long long)DMA_TLB_IAIG(val));
1052 }
1053
1054 static struct device_domain_info *iommu_support_dev_iotlb(
1055         struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1056 {
1057         int found = 0;
1058         unsigned long flags;
1059         struct device_domain_info *info;
1060         struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1061
1062         if (!ecap_dev_iotlb_support(iommu->ecap))
1063                 return NULL;
1064
1065         if (!iommu->qi)
1066                 return NULL;
1067
1068         spin_lock_irqsave(&device_domain_lock, flags);
1069         list_for_each_entry(info, &domain->devices, link)
1070                 if (info->bus == bus && info->devfn == devfn) {
1071                         found = 1;
1072                         break;
1073                 }
1074         spin_unlock_irqrestore(&device_domain_lock, flags);
1075
1076         if (!found || !info->dev)
1077                 return NULL;
1078
1079         if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1080                 return NULL;
1081
1082         if (!dmar_find_matched_atsr_unit(info->dev))
1083                 return NULL;
1084
1085         info->iommu = iommu;
1086
1087         return info;
1088 }
1089
1090 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1091 {
1092         if (!info)
1093                 return;
1094
1095         pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1096 }
1097
1098 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1099 {
1100         if (!info->dev || !pci_ats_enabled(info->dev))
1101                 return;
1102
1103         pci_disable_ats(info->dev);
1104 }
1105
1106 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1107                                   u64 addr, unsigned mask)
1108 {
1109         u16 sid, qdep;
1110         unsigned long flags;
1111         struct device_domain_info *info;
1112
1113         spin_lock_irqsave(&device_domain_lock, flags);
1114         list_for_each_entry(info, &domain->devices, link) {
1115                 if (!info->dev || !pci_ats_enabled(info->dev))
1116                         continue;
1117
1118                 sid = info->bus << 8 | info->devfn;
1119                 qdep = pci_ats_queue_depth(info->dev);
1120                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1121         }
1122         spin_unlock_irqrestore(&device_domain_lock, flags);
1123 }
1124
1125 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1126                                   unsigned long pfn, unsigned int pages, int map)
1127 {
1128         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1129         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1130
1131         BUG_ON(pages == 0);
1132
1133         /*
1134          * Fallback to domain selective flush if no PSI support or the size is
1135          * too big.
1136          * PSI requires page size to be 2 ^ x, and the base address is naturally
1137          * aligned to the size
1138          */
1139         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1140                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1141                                                 DMA_TLB_DSI_FLUSH);
1142         else
1143                 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1144                                                 DMA_TLB_PSI_FLUSH);
1145
1146         /*
1147          * In caching mode, changes of pages from non-present to present require
1148          * flush. However, device IOTLB doesn't need to be flushed in this case.
1149          */
1150         if (!cap_caching_mode(iommu->cap) || !map)
1151                 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1152 }
1153
1154 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1155 {
1156         u32 pmen;
1157         unsigned long flags;
1158
1159         spin_lock_irqsave(&iommu->register_lock, flags);
1160         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1161         pmen &= ~DMA_PMEN_EPM;
1162         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1163
1164         /* wait for the protected region status bit to clear */
1165         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1166                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1167
1168         spin_unlock_irqrestore(&iommu->register_lock, flags);
1169 }
1170
1171 static int iommu_enable_translation(struct intel_iommu *iommu)
1172 {
1173         u32 sts;
1174         unsigned long flags;
1175
1176         spin_lock_irqsave(&iommu->register_lock, flags);
1177         iommu->gcmd |= DMA_GCMD_TE;
1178         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1179
1180         /* Make sure hardware complete it */
1181         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1182                       readl, (sts & DMA_GSTS_TES), sts);
1183
1184         spin_unlock_irqrestore(&iommu->register_lock, flags);
1185         return 0;
1186 }
1187
1188 static int iommu_disable_translation(struct intel_iommu *iommu)
1189 {
1190         u32 sts;
1191         unsigned long flag;
1192
1193         spin_lock_irqsave(&iommu->register_lock, flag);
1194         iommu->gcmd &= ~DMA_GCMD_TE;
1195         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1196
1197         /* Make sure hardware complete it */
1198         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1199                       readl, (!(sts & DMA_GSTS_TES)), sts);
1200
1201         spin_unlock_irqrestore(&iommu->register_lock, flag);
1202         return 0;
1203 }
1204
1205
1206 static int iommu_init_domains(struct intel_iommu *iommu)
1207 {
1208         unsigned long ndomains;
1209         unsigned long nlongs;
1210
1211         ndomains = cap_ndoms(iommu->cap);
1212         pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu->seq_id,
1213                         ndomains);
1214         nlongs = BITS_TO_LONGS(ndomains);
1215
1216         spin_lock_init(&iommu->lock);
1217
1218         /* TBD: there might be 64K domains,
1219          * consider other allocation for future chip
1220          */
1221         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1222         if (!iommu->domain_ids) {
1223                 printk(KERN_ERR "Allocating domain id array failed\n");
1224                 return -ENOMEM;
1225         }
1226         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1227                         GFP_KERNEL);
1228         if (!iommu->domains) {
1229                 printk(KERN_ERR "Allocating domain array failed\n");
1230                 return -ENOMEM;
1231         }
1232
1233         /*
1234          * if Caching mode is set, then invalid translations are tagged
1235          * with domainid 0. Hence we need to pre-allocate it.
1236          */
1237         if (cap_caching_mode(iommu->cap))
1238                 set_bit(0, iommu->domain_ids);
1239         return 0;
1240 }
1241
1242
1243 static void domain_exit(struct dmar_domain *domain);
1244 static void vm_domain_exit(struct dmar_domain *domain);
1245
1246 void free_dmar_iommu(struct intel_iommu *iommu)
1247 {
1248         struct dmar_domain *domain;
1249         int i;
1250         unsigned long flags;
1251
1252         if ((iommu->domains) && (iommu->domain_ids)) {
1253                 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1254                         domain = iommu->domains[i];
1255                         clear_bit(i, iommu->domain_ids);
1256
1257                         spin_lock_irqsave(&domain->iommu_lock, flags);
1258                         if (--domain->iommu_count == 0) {
1259                                 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1260                                         vm_domain_exit(domain);
1261                                 else
1262                                         domain_exit(domain);
1263                         }
1264                         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1265                 }
1266         }
1267
1268         if (iommu->gcmd & DMA_GCMD_TE)
1269                 iommu_disable_translation(iommu);
1270
1271         if (iommu->irq) {
1272                 irq_set_handler_data(iommu->irq, NULL);
1273                 /* This will mask the irq */
1274                 free_irq(iommu->irq, iommu);
1275                 destroy_irq(iommu->irq);
1276         }
1277
1278         kfree(iommu->domains);
1279         kfree(iommu->domain_ids);
1280
1281         g_iommus[iommu->seq_id] = NULL;
1282
1283         /* if all iommus are freed, free g_iommus */
1284         for (i = 0; i < g_num_of_iommus; i++) {
1285                 if (g_iommus[i])
1286                         break;
1287         }
1288
1289         if (i == g_num_of_iommus)
1290                 kfree(g_iommus);
1291
1292         /* free context mapping */
1293         free_context_table(iommu);
1294 }
1295
1296 static struct dmar_domain *alloc_domain(void)
1297 {
1298         struct dmar_domain *domain;
1299
1300         domain = alloc_domain_mem();
1301         if (!domain)
1302                 return NULL;
1303
1304         domain->nid = -1;
1305         memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1306         domain->flags = 0;
1307
1308         return domain;
1309 }
1310
1311 static int iommu_attach_domain(struct dmar_domain *domain,
1312                                struct intel_iommu *iommu)
1313 {
1314         int num;
1315         unsigned long ndomains;
1316         unsigned long flags;
1317
1318         ndomains = cap_ndoms(iommu->cap);
1319
1320         spin_lock_irqsave(&iommu->lock, flags);
1321
1322         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1323         if (num >= ndomains) {
1324                 spin_unlock_irqrestore(&iommu->lock, flags);
1325                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1326                 return -ENOMEM;
1327         }
1328
1329         domain->id = num;
1330         set_bit(num, iommu->domain_ids);
1331         set_bit(iommu->seq_id, &domain->iommu_bmp);
1332         iommu->domains[num] = domain;
1333         spin_unlock_irqrestore(&iommu->lock, flags);
1334
1335         return 0;
1336 }
1337
1338 static void iommu_detach_domain(struct dmar_domain *domain,
1339                                 struct intel_iommu *iommu)
1340 {
1341         unsigned long flags;
1342         int num, ndomains;
1343         int found = 0;
1344
1345         spin_lock_irqsave(&iommu->lock, flags);
1346         ndomains = cap_ndoms(iommu->cap);
1347         for_each_set_bit(num, iommu->domain_ids, ndomains) {
1348                 if (iommu->domains[num] == domain) {
1349                         found = 1;
1350                         break;
1351                 }
1352         }
1353
1354         if (found) {
1355                 clear_bit(num, iommu->domain_ids);
1356                 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1357                 iommu->domains[num] = NULL;
1358         }
1359         spin_unlock_irqrestore(&iommu->lock, flags);
1360 }
1361
1362 static struct iova_domain reserved_iova_list;
1363 static struct lock_class_key reserved_rbtree_key;
1364
1365 static int dmar_init_reserved_ranges(void)
1366 {
1367         struct pci_dev *pdev = NULL;
1368         struct iova *iova;
1369         int i;
1370
1371         init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1372
1373         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1374                 &reserved_rbtree_key);
1375
1376         /* IOAPIC ranges shouldn't be accessed by DMA */
1377         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1378                 IOVA_PFN(IOAPIC_RANGE_END));
1379         if (!iova) {
1380                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1381                 return -ENODEV;
1382         }
1383
1384         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1385         for_each_pci_dev(pdev) {
1386                 struct resource *r;
1387
1388                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1389                         r = &pdev->resource[i];
1390                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1391                                 continue;
1392                         iova = reserve_iova(&reserved_iova_list,
1393                                             IOVA_PFN(r->start),
1394                                             IOVA_PFN(r->end));
1395                         if (!iova) {
1396                                 printk(KERN_ERR "Reserve iova failed\n");
1397                                 return -ENODEV;
1398                         }
1399                 }
1400         }
1401         return 0;
1402 }
1403
1404 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1405 {
1406         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1407 }
1408
1409 static inline int guestwidth_to_adjustwidth(int gaw)
1410 {
1411         int agaw;
1412         int r = (gaw - 12) % 9;
1413
1414         if (r == 0)
1415                 agaw = gaw;
1416         else
1417                 agaw = gaw + 9 - r;
1418         if (agaw > 64)
1419                 agaw = 64;
1420         return agaw;
1421 }
1422
1423 static int domain_init(struct dmar_domain *domain, int guest_width)
1424 {
1425         struct intel_iommu *iommu;
1426         int adjust_width, agaw;
1427         unsigned long sagaw;
1428
1429         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1430         spin_lock_init(&domain->iommu_lock);
1431
1432         domain_reserve_special_ranges(domain);
1433
1434         /* calculate AGAW */
1435         iommu = domain_get_iommu(domain);
1436         if (guest_width > cap_mgaw(iommu->cap))
1437                 guest_width = cap_mgaw(iommu->cap);
1438         domain->gaw = guest_width;
1439         adjust_width = guestwidth_to_adjustwidth(guest_width);
1440         agaw = width_to_agaw(adjust_width);
1441         sagaw = cap_sagaw(iommu->cap);
1442         if (!test_bit(agaw, &sagaw)) {
1443                 /* hardware doesn't support it, choose a bigger one */
1444                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1445                 agaw = find_next_bit(&sagaw, 5, agaw);
1446                 if (agaw >= 5)
1447                         return -ENODEV;
1448         }
1449         domain->agaw = agaw;
1450         INIT_LIST_HEAD(&domain->devices);
1451
1452         if (ecap_coherent(iommu->ecap))
1453                 domain->iommu_coherency = 1;
1454         else
1455                 domain->iommu_coherency = 0;
1456
1457         if (ecap_sc_support(iommu->ecap))
1458                 domain->iommu_snooping = 1;
1459         else
1460                 domain->iommu_snooping = 0;
1461
1462         domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1463         domain->iommu_count = 1;
1464         domain->nid = iommu->node;
1465
1466         /* always allocate the top pgd */
1467         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1468         if (!domain->pgd)
1469                 return -ENOMEM;
1470         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1471         return 0;
1472 }
1473
1474 static void domain_exit(struct dmar_domain *domain)
1475 {
1476         struct dmar_drhd_unit *drhd;
1477         struct intel_iommu *iommu;
1478
1479         /* Domain 0 is reserved, so dont process it */
1480         if (!domain)
1481                 return;
1482
1483         /* Flush any lazy unmaps that may reference this domain */
1484         if (!intel_iommu_strict)
1485                 flush_unmaps_timeout(0);
1486
1487         domain_remove_dev_info(domain);
1488         /* destroy iovas */
1489         put_iova_domain(&domain->iovad);
1490
1491         /* clear ptes */
1492         dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1493
1494         /* free page tables */
1495         dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1496
1497         for_each_active_iommu(iommu, drhd)
1498                 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1499                         iommu_detach_domain(domain, iommu);
1500
1501         free_domain_mem(domain);
1502 }
1503
1504 static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1505                                  u8 bus, u8 devfn, int translation)
1506 {
1507         struct context_entry *context;
1508         unsigned long flags;
1509         struct intel_iommu *iommu;
1510         struct dma_pte *pgd;
1511         unsigned long num;
1512         unsigned long ndomains;
1513         int id;
1514         int agaw;
1515         struct device_domain_info *info = NULL;
1516
1517         pr_debug("Set context mapping for %02x:%02x.%d\n",
1518                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1519
1520         BUG_ON(!domain->pgd);
1521         BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1522                translation != CONTEXT_TT_MULTI_LEVEL);
1523
1524         iommu = device_to_iommu(segment, bus, devfn);
1525         if (!iommu)
1526                 return -ENODEV;
1527
1528         context = device_to_context_entry(iommu, bus, devfn);
1529         if (!context)
1530                 return -ENOMEM;
1531         spin_lock_irqsave(&iommu->lock, flags);
1532         if (context_present(context)) {
1533                 spin_unlock_irqrestore(&iommu->lock, flags);
1534                 return 0;
1535         }
1536
1537         id = domain->id;
1538         pgd = domain->pgd;
1539
1540         if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1541             domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1542                 int found = 0;
1543
1544                 /* find an available domain id for this device in iommu */
1545                 ndomains = cap_ndoms(iommu->cap);
1546                 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1547                         if (iommu->domains[num] == domain) {
1548                                 id = num;
1549                                 found = 1;
1550                                 break;
1551                         }
1552                 }
1553
1554                 if (found == 0) {
1555                         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1556                         if (num >= ndomains) {
1557                                 spin_unlock_irqrestore(&iommu->lock, flags);
1558                                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1559                                 return -EFAULT;
1560                         }
1561
1562                         set_bit(num, iommu->domain_ids);
1563                         iommu->domains[num] = domain;
1564                         id = num;
1565                 }
1566
1567                 /* Skip top levels of page tables for
1568                  * iommu which has less agaw than default.
1569                  * Unnecessary for PT mode.
1570                  */
1571                 if (translation != CONTEXT_TT_PASS_THROUGH) {
1572                         for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1573                                 pgd = phys_to_virt(dma_pte_addr(pgd));
1574                                 if (!dma_pte_present(pgd)) {
1575                                         spin_unlock_irqrestore(&iommu->lock, flags);
1576                                         return -ENOMEM;
1577                                 }
1578                         }
1579                 }
1580         }
1581
1582         context_set_domain_id(context, id);
1583
1584         if (translation != CONTEXT_TT_PASS_THROUGH) {
1585                 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1586                 translation = info ? CONTEXT_TT_DEV_IOTLB :
1587                                      CONTEXT_TT_MULTI_LEVEL;
1588         }
1589         /*
1590          * In pass through mode, AW must be programmed to indicate the largest
1591          * AGAW value supported by hardware. And ASR is ignored by hardware.
1592          */
1593         if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1594                 context_set_address_width(context, iommu->msagaw);
1595         else {
1596                 context_set_address_root(context, virt_to_phys(pgd));
1597                 context_set_address_width(context, iommu->agaw);
1598         }
1599
1600         context_set_translation_type(context, translation);
1601         context_set_fault_enable(context);
1602         context_set_present(context);
1603         domain_flush_cache(domain, context, sizeof(*context));
1604
1605         /*
1606          * It's a non-present to present mapping. If hardware doesn't cache
1607          * non-present entry we only need to flush the write-buffer. If the
1608          * _does_ cache non-present entries, then it does so in the special
1609          * domain #0, which we have to flush:
1610          */
1611         if (cap_caching_mode(iommu->cap)) {
1612                 iommu->flush.flush_context(iommu, 0,
1613                                            (((u16)bus) << 8) | devfn,
1614                                            DMA_CCMD_MASK_NOBIT,
1615                                            DMA_CCMD_DEVICE_INVL);
1616                 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1617         } else {
1618                 iommu_flush_write_buffer(iommu);
1619         }
1620         iommu_enable_dev_iotlb(info);
1621         spin_unlock_irqrestore(&iommu->lock, flags);
1622
1623         spin_lock_irqsave(&domain->iommu_lock, flags);
1624         if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1625                 domain->iommu_count++;
1626                 if (domain->iommu_count == 1)
1627                         domain->nid = iommu->node;
1628                 domain_update_iommu_cap(domain);
1629         }
1630         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1631         return 0;
1632 }
1633
1634 static int
1635 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1636                         int translation)
1637 {
1638         int ret;
1639         struct pci_dev *tmp, *parent;
1640
1641         ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1642                                          pdev->bus->number, pdev->devfn,
1643                                          translation);
1644         if (ret)
1645                 return ret;
1646
1647         /* dependent device mapping */
1648         tmp = pci_find_upstream_pcie_bridge(pdev);
1649         if (!tmp)
1650                 return 0;
1651         /* Secondary interface's bus number and devfn 0 */
1652         parent = pdev->bus->self;
1653         while (parent != tmp) {
1654                 ret = domain_context_mapping_one(domain,
1655                                                  pci_domain_nr(parent->bus),
1656                                                  parent->bus->number,
1657                                                  parent->devfn, translation);
1658                 if (ret)
1659                         return ret;
1660                 parent = parent->bus->self;
1661         }
1662         if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
1663                 return domain_context_mapping_one(domain,
1664                                         pci_domain_nr(tmp->subordinate),
1665                                         tmp->subordinate->number, 0,
1666                                         translation);
1667         else /* this is a legacy PCI bridge */
1668                 return domain_context_mapping_one(domain,
1669                                                   pci_domain_nr(tmp->bus),
1670                                                   tmp->bus->number,
1671                                                   tmp->devfn,
1672                                                   translation);
1673 }
1674
1675 static int domain_context_mapped(struct pci_dev *pdev)
1676 {
1677         int ret;
1678         struct pci_dev *tmp, *parent;
1679         struct intel_iommu *iommu;
1680
1681         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1682                                 pdev->devfn);
1683         if (!iommu)
1684                 return -ENODEV;
1685
1686         ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1687         if (!ret)
1688                 return ret;
1689         /* dependent device mapping */
1690         tmp = pci_find_upstream_pcie_bridge(pdev);
1691         if (!tmp)
1692                 return ret;
1693         /* Secondary interface's bus number and devfn 0 */
1694         parent = pdev->bus->self;
1695         while (parent != tmp) {
1696                 ret = device_context_mapped(iommu, parent->bus->number,
1697                                             parent->devfn);
1698                 if (!ret)
1699                         return ret;
1700                 parent = parent->bus->self;
1701         }
1702         if (pci_is_pcie(tmp))
1703                 return device_context_mapped(iommu, tmp->subordinate->number,
1704                                              0);
1705         else
1706                 return device_context_mapped(iommu, tmp->bus->number,
1707                                              tmp->devfn);
1708 }
1709
1710 /* Returns a number of VTD pages, but aligned to MM page size */
1711 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1712                                             size_t size)
1713 {
1714         host_addr &= ~PAGE_MASK;
1715         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1716 }
1717
1718 /* Return largest possible superpage level for a given mapping */
1719 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1720                                           unsigned long iov_pfn,
1721                                           unsigned long phy_pfn,
1722                                           unsigned long pages)
1723 {
1724         int support, level = 1;
1725         unsigned long pfnmerge;
1726
1727         support = domain->iommu_superpage;
1728
1729         /* To use a large page, the virtual *and* physical addresses
1730            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1731            of them will mean we have to use smaller pages. So just
1732            merge them and check both at once. */
1733         pfnmerge = iov_pfn | phy_pfn;
1734
1735         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1736                 pages >>= VTD_STRIDE_SHIFT;
1737                 if (!pages)
1738                         break;
1739                 pfnmerge >>= VTD_STRIDE_SHIFT;
1740                 level++;
1741                 support--;
1742         }
1743         return level;
1744 }
1745
1746 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1747                             struct scatterlist *sg, unsigned long phys_pfn,
1748                             unsigned long nr_pages, int prot)
1749 {
1750         struct dma_pte *first_pte = NULL, *pte = NULL;
1751         phys_addr_t uninitialized_var(pteval);
1752         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1753         unsigned long sg_res;
1754         unsigned int largepage_lvl = 0;
1755         unsigned long lvl_pages = 0;
1756
1757         BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1758
1759         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1760                 return -EINVAL;
1761
1762         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1763
1764         if (sg)
1765                 sg_res = 0;
1766         else {
1767                 sg_res = nr_pages + 1;
1768                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1769         }
1770
1771         while (nr_pages > 0) {
1772                 uint64_t tmp;
1773
1774                 if (!sg_res) {
1775                         sg_res = aligned_nrpages(sg->offset, sg->length);
1776                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1777                         sg->dma_length = sg->length;
1778                         pteval = page_to_phys(sg_page(sg)) | prot;
1779                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
1780                 }
1781
1782                 if (!pte) {
1783                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1784
1785                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1786                         if (!pte)
1787                                 return -ENOMEM;
1788                         /* It is large page*/
1789                         if (largepage_lvl > 1)
1790                                 pteval |= DMA_PTE_LARGE_PAGE;
1791                         else
1792                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1793
1794                 }
1795                 /* We don't need lock here, nobody else
1796                  * touches the iova range
1797                  */
1798                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1799                 if (tmp) {
1800                         static int dumps = 5;
1801                         printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1802                                iov_pfn, tmp, (unsigned long long)pteval);
1803                         if (dumps) {
1804                                 dumps--;
1805                                 debug_dma_dump_mappings(NULL);
1806                         }
1807                         WARN_ON(1);
1808                 }
1809
1810                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1811
1812                 BUG_ON(nr_pages < lvl_pages);
1813                 BUG_ON(sg_res < lvl_pages);
1814
1815                 nr_pages -= lvl_pages;
1816                 iov_pfn += lvl_pages;
1817                 phys_pfn += lvl_pages;
1818                 pteval += lvl_pages * VTD_PAGE_SIZE;
1819                 sg_res -= lvl_pages;
1820
1821                 /* If the next PTE would be the first in a new page, then we
1822                    need to flush the cache on the entries we've just written.
1823                    And then we'll need to recalculate 'pte', so clear it and
1824                    let it get set again in the if (!pte) block above.
1825
1826                    If we're done (!nr_pages) we need to flush the cache too.
1827
1828                    Also if we've been setting superpages, we may need to
1829                    recalculate 'pte' and switch back to smaller pages for the
1830                    end of the mapping, if the trailing size is not enough to
1831                    use another superpage (i.e. sg_res < lvl_pages). */
1832                 pte++;
1833                 if (!nr_pages || first_pte_in_page(pte) ||
1834                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
1835                         domain_flush_cache(domain, first_pte,
1836                                            (void *)pte - (void *)first_pte);
1837                         pte = NULL;
1838                 }
1839
1840                 if (!sg_res && nr_pages)
1841                         sg = sg_next(sg);
1842         }
1843         return 0;
1844 }
1845
1846 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1847                                     struct scatterlist *sg, unsigned long nr_pages,
1848                                     int prot)
1849 {
1850         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1851 }
1852
1853 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1854                                      unsigned long phys_pfn, unsigned long nr_pages,
1855                                      int prot)
1856 {
1857         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1858 }
1859
1860 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1861 {
1862         if (!iommu)
1863                 return;
1864
1865         clear_context_table(iommu, bus, devfn);
1866         iommu->flush.flush_context(iommu, 0, 0, 0,
1867                                            DMA_CCMD_GLOBAL_INVL);
1868         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1869 }
1870
1871 static void domain_remove_dev_info(struct dmar_domain *domain)
1872 {
1873         struct device_domain_info *info;
1874         unsigned long flags;
1875         struct intel_iommu *iommu;
1876
1877         spin_lock_irqsave(&device_domain_lock, flags);
1878         while (!list_empty(&domain->devices)) {
1879                 info = list_entry(domain->devices.next,
1880                         struct device_domain_info, link);
1881                 list_del(&info->link);
1882                 list_del(&info->global);
1883                 if (info->dev)
1884                         info->dev->dev.archdata.iommu = NULL;
1885                 spin_unlock_irqrestore(&device_domain_lock, flags);
1886
1887                 iommu_disable_dev_iotlb(info);
1888                 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1889                 iommu_detach_dev(iommu, info->bus, info->devfn);
1890                 free_devinfo_mem(info);
1891
1892                 spin_lock_irqsave(&device_domain_lock, flags);
1893         }
1894         spin_unlock_irqrestore(&device_domain_lock, flags);
1895 }
1896
1897 /*
1898  * find_domain
1899  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1900  */
1901 static struct dmar_domain *
1902 find_domain(struct pci_dev *pdev)
1903 {
1904         struct device_domain_info *info;
1905
1906         /* No lock here, assumes no domain exit in normal case */
1907         info = pdev->dev.archdata.iommu;
1908         if (info)
1909                 return info->domain;
1910         return NULL;
1911 }
1912
1913 /* domain is initialized */
1914 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1915 {
1916         struct dmar_domain *domain, *found = NULL;
1917         struct intel_iommu *iommu;
1918         struct dmar_drhd_unit *drhd;
1919         struct device_domain_info *info, *tmp;
1920         struct pci_dev *dev_tmp;
1921         unsigned long flags;
1922         int bus = 0, devfn = 0;
1923         int segment;
1924         int ret;
1925
1926         domain = find_domain(pdev);
1927         if (domain)
1928                 return domain;
1929
1930         segment = pci_domain_nr(pdev->bus);
1931
1932         dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1933         if (dev_tmp) {
1934                 if (pci_is_pcie(dev_tmp)) {
1935                         bus = dev_tmp->subordinate->number;
1936                         devfn = 0;
1937                 } else {
1938                         bus = dev_tmp->bus->number;
1939                         devfn = dev_tmp->devfn;
1940                 }
1941                 spin_lock_irqsave(&device_domain_lock, flags);
1942                 list_for_each_entry(info, &device_domain_list, global) {
1943                         if (info->segment == segment &&
1944                             info->bus == bus && info->devfn == devfn) {
1945                                 found = info->domain;
1946                                 break;
1947                         }
1948                 }
1949                 spin_unlock_irqrestore(&device_domain_lock, flags);
1950                 /* pcie-pci bridge already has a domain, uses it */
1951                 if (found) {
1952                         domain = found;
1953                         goto found_domain;
1954                 }
1955         }
1956
1957         domain = alloc_domain();
1958         if (!domain)
1959                 goto error;
1960
1961         /* Allocate new domain for the device */
1962         drhd = dmar_find_matched_drhd_unit(pdev);
1963         if (!drhd) {
1964                 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1965                         pci_name(pdev));
1966                 return NULL;
1967         }
1968         iommu = drhd->iommu;
1969
1970         ret = iommu_attach_domain(domain, iommu);
1971         if (ret) {
1972                 free_domain_mem(domain);
1973                 goto error;
1974         }
1975
1976         if (domain_init(domain, gaw)) {
1977                 domain_exit(domain);
1978                 goto error;
1979         }
1980
1981         /* register pcie-to-pci device */
1982         if (dev_tmp) {
1983                 info = alloc_devinfo_mem();
1984                 if (!info) {
1985                         domain_exit(domain);
1986                         goto error;
1987                 }
1988                 info->segment = segment;
1989                 info->bus = bus;
1990                 info->devfn = devfn;
1991                 info->dev = NULL;
1992                 info->domain = domain;
1993                 /* This domain is shared by devices under p2p bridge */
1994                 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
1995
1996                 /* pcie-to-pci bridge already has a domain, uses it */
1997                 found = NULL;
1998                 spin_lock_irqsave(&device_domain_lock, flags);
1999                 list_for_each_entry(tmp, &device_domain_list, global) {
2000                         if (tmp->segment == segment &&
2001                             tmp->bus == bus && tmp->devfn == devfn) {
2002                                 found = tmp->domain;
2003                                 break;
2004                         }
2005                 }
2006                 if (found) {
2007                         spin_unlock_irqrestore(&device_domain_lock, flags);
2008                         free_devinfo_mem(info);
2009                         domain_exit(domain);
2010                         domain = found;
2011                 } else {
2012                         list_add(&info->link, &domain->devices);
2013                         list_add(&info->global, &device_domain_list);
2014                         spin_unlock_irqrestore(&device_domain_lock, flags);
2015                 }
2016         }
2017
2018 found_domain:
2019         info = alloc_devinfo_mem();
2020         if (!info)
2021                 goto error;
2022         info->segment = segment;
2023         info->bus = pdev->bus->number;
2024         info->devfn = pdev->devfn;
2025         info->dev = pdev;
2026         info->domain = domain;
2027         spin_lock_irqsave(&device_domain_lock, flags);
2028         /* somebody is fast */
2029         found = find_domain(pdev);
2030         if (found != NULL) {
2031                 spin_unlock_irqrestore(&device_domain_lock, flags);
2032                 if (found != domain) {
2033                         domain_exit(domain);
2034                         domain = found;
2035                 }
2036                 free_devinfo_mem(info);
2037                 return domain;
2038         }
2039         list_add(&info->link, &domain->devices);
2040         list_add(&info->global, &device_domain_list);
2041         pdev->dev.archdata.iommu = info;
2042         spin_unlock_irqrestore(&device_domain_lock, flags);
2043         return domain;
2044 error:
2045         /* recheck it here, maybe others set it */
2046         return find_domain(pdev);
2047 }
2048
2049 static int iommu_identity_mapping;
2050 #define IDENTMAP_ALL            1
2051 #define IDENTMAP_GFX            2
2052 #define IDENTMAP_AZALIA         4
2053
2054 static int iommu_domain_identity_map(struct dmar_domain *domain,
2055                                      unsigned long long start,
2056                                      unsigned long long end)
2057 {
2058         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2059         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2060
2061         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2062                           dma_to_mm_pfn(last_vpfn))) {
2063                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2064                 return -ENOMEM;
2065         }
2066
2067         pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2068                  start, end, domain->id);
2069         /*
2070          * RMRR range might have overlap with physical memory range,
2071          * clear it first
2072          */
2073         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2074
2075         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2076                                   last_vpfn - first_vpfn + 1,
2077                                   DMA_PTE_READ|DMA_PTE_WRITE);
2078 }
2079
2080 static int iommu_prepare_identity_map(struct pci_dev *pdev,
2081                                       unsigned long long start,
2082                                       unsigned long long end)
2083 {
2084         struct dmar_domain *domain;
2085         int ret;
2086
2087         domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2088         if (!domain)
2089                 return -ENOMEM;
2090
2091         /* For _hardware_ passthrough, don't bother. But for software
2092            passthrough, we do it anyway -- it may indicate a memory
2093            range which is reserved in E820, so which didn't get set
2094            up to start with in si_domain */
2095         if (domain == si_domain && hw_pass_through) {
2096                 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2097                        pci_name(pdev), start, end);
2098                 return 0;
2099         }
2100
2101         printk(KERN_INFO
2102                "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2103                pci_name(pdev), start, end);
2104         
2105         if (end < start) {
2106                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2107                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2108                         dmi_get_system_info(DMI_BIOS_VENDOR),
2109                         dmi_get_system_info(DMI_BIOS_VERSION),
2110                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2111                 ret = -EIO;
2112                 goto error;
2113         }
2114
2115         if (end >> agaw_to_width(domain->agaw)) {
2116                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2117                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2118                      agaw_to_width(domain->agaw),
2119                      dmi_get_system_info(DMI_BIOS_VENDOR),
2120                      dmi_get_system_info(DMI_BIOS_VERSION),
2121                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2122                 ret = -EIO;
2123                 goto error;
2124         }
2125
2126         ret = iommu_domain_identity_map(domain, start, end);
2127         if (ret)
2128                 goto error;
2129
2130         /* context entry init */
2131         ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2132         if (ret)
2133                 goto error;
2134
2135         return 0;
2136
2137  error:
2138         domain_exit(domain);
2139         return ret;
2140 }
2141
2142 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2143         struct pci_dev *pdev)
2144 {
2145         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2146                 return 0;
2147         return iommu_prepare_identity_map(pdev, rmrr->base_address,
2148                 rmrr->end_address + 1);
2149 }
2150
2151 #ifdef CONFIG_DMAR_FLOPPY_WA
2152 static inline void iommu_prepare_isa(void)
2153 {
2154         struct pci_dev *pdev;
2155         int ret;
2156
2157         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2158         if (!pdev)
2159                 return;
2160
2161         printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2162         ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
2163
2164         if (ret)
2165                 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2166                        "floppy might not work\n");
2167
2168 }
2169 #else
2170 static inline void iommu_prepare_isa(void)
2171 {
2172         return;
2173 }
2174 #endif /* !CONFIG_DMAR_FLPY_WA */
2175
2176 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2177
2178 static int __init si_domain_work_fn(unsigned long start_pfn,
2179                                     unsigned long end_pfn, void *datax)
2180 {
2181         int *ret = datax;
2182
2183         *ret = iommu_domain_identity_map(si_domain,
2184                                          (uint64_t)start_pfn << PAGE_SHIFT,
2185                                          (uint64_t)end_pfn << PAGE_SHIFT);
2186         return *ret;
2187
2188 }
2189
2190 static int __init si_domain_init(int hw)
2191 {
2192         struct dmar_drhd_unit *drhd;
2193         struct intel_iommu *iommu;
2194         int nid, ret = 0;
2195
2196         si_domain = alloc_domain();
2197         if (!si_domain)
2198                 return -EFAULT;
2199
2200         pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2201
2202         for_each_active_iommu(iommu, drhd) {
2203                 ret = iommu_attach_domain(si_domain, iommu);
2204                 if (ret) {
2205                         domain_exit(si_domain);
2206                         return -EFAULT;
2207                 }
2208         }
2209
2210         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2211                 domain_exit(si_domain);
2212                 return -EFAULT;
2213         }
2214
2215         si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2216
2217         if (hw)
2218                 return 0;
2219
2220         for_each_online_node(nid) {
2221                 work_with_active_regions(nid, si_domain_work_fn, &ret);
2222                 if (ret)
2223                         return ret;
2224         }
2225
2226         return 0;
2227 }
2228
2229 static void domain_remove_one_dev_info(struct dmar_domain *domain,
2230                                           struct pci_dev *pdev);
2231 static int identity_mapping(struct pci_dev *pdev)
2232 {
2233         struct device_domain_info *info;
2234
2235         if (likely(!iommu_identity_mapping))
2236                 return 0;
2237
2238         info = pdev->dev.archdata.iommu;
2239         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2240                 return (info->domain == si_domain);
2241
2242         return 0;
2243 }
2244
2245 static int domain_add_dev_info(struct dmar_domain *domain,
2246                                struct pci_dev *pdev,
2247                                int translation)
2248 {
2249         struct device_domain_info *info;
2250         unsigned long flags;
2251         int ret;
2252
2253         info = alloc_devinfo_mem();
2254         if (!info)
2255                 return -ENOMEM;
2256
2257         ret = domain_context_mapping(domain, pdev, translation);
2258         if (ret) {
2259                 free_devinfo_mem(info);
2260                 return ret;
2261         }
2262
2263         info->segment = pci_domain_nr(pdev->bus);
2264         info->bus = pdev->bus->number;
2265         info->devfn = pdev->devfn;
2266         info->dev = pdev;
2267         info->domain = domain;
2268
2269         spin_lock_irqsave(&device_domain_lock, flags);
2270         list_add(&info->link, &domain->devices);
2271         list_add(&info->global, &device_domain_list);
2272         pdev->dev.archdata.iommu = info;
2273         spin_unlock_irqrestore(&device_domain_lock, flags);
2274
2275         return 0;
2276 }
2277
2278 static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2279 {
2280         if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2281                 return 1;
2282
2283         if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2284                 return 1;
2285
2286         if (!(iommu_identity_mapping & IDENTMAP_ALL))
2287                 return 0;
2288
2289         /*
2290          * We want to start off with all devices in the 1:1 domain, and
2291          * take them out later if we find they can't access all of memory.
2292          *
2293          * However, we can't do this for PCI devices behind bridges,
2294          * because all PCI devices behind the same bridge will end up
2295          * with the same source-id on their transactions.
2296          *
2297          * Practically speaking, we can't change things around for these
2298          * devices at run-time, because we can't be sure there'll be no
2299          * DMA transactions in flight for any of their siblings.
2300          * 
2301          * So PCI devices (unless they're on the root bus) as well as
2302          * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2303          * the 1:1 domain, just in _case_ one of their siblings turns out
2304          * not to be able to map all of memory.
2305          */
2306         if (!pci_is_pcie(pdev)) {
2307                 if (!pci_is_root_bus(pdev->bus))
2308                         return 0;
2309                 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2310                         return 0;
2311         } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2312                 return 0;
2313
2314         /* 
2315          * At boot time, we don't yet know if devices will be 64-bit capable.
2316          * Assume that they will -- if they turn out not to be, then we can 
2317          * take them out of the 1:1 domain later.
2318          */
2319         if (!startup) {
2320                 /*
2321                  * If the device's dma_mask is less than the system's memory
2322                  * size then this is not a candidate for identity mapping.
2323                  */
2324                 u64 dma_mask = pdev->dma_mask;
2325
2326                 if (pdev->dev.coherent_dma_mask &&
2327                     pdev->dev.coherent_dma_mask < dma_mask)
2328                         dma_mask = pdev->dev.coherent_dma_mask;
2329
2330                 return dma_mask >= dma_get_required_mask(&pdev->dev);
2331         }
2332
2333         return 1;
2334 }
2335
2336 static int __init iommu_prepare_static_identity_mapping(int hw)
2337 {
2338         struct pci_dev *pdev = NULL;
2339         int ret;
2340
2341         ret = si_domain_init(hw);
2342         if (ret)
2343                 return -EFAULT;
2344
2345         for_each_pci_dev(pdev) {
2346                 if (iommu_should_identity_map(pdev, 1)) {
2347                         printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2348                                hw ? "hardware" : "software", pci_name(pdev));
2349
2350                         ret = domain_add_dev_info(si_domain, pdev,
2351                                                      hw ? CONTEXT_TT_PASS_THROUGH :
2352                                                      CONTEXT_TT_MULTI_LEVEL);
2353                         if (ret)
2354                                 return ret;
2355                 }
2356         }
2357
2358         return 0;
2359 }
2360
2361 static int __init init_dmars(void)
2362 {
2363         struct dmar_drhd_unit *drhd;
2364         struct dmar_rmrr_unit *rmrr;
2365         struct pci_dev *pdev;
2366         struct intel_iommu *iommu;
2367         int i, ret;
2368
2369         /*
2370          * for each drhd
2371          *    allocate root
2372          *    initialize and program root entry to not present
2373          * endfor
2374          */
2375         for_each_drhd_unit(drhd) {
2376                 g_num_of_iommus++;
2377                 /*
2378                  * lock not needed as this is only incremented in the single
2379                  * threaded kernel __init code path all other access are read
2380                  * only
2381                  */
2382         }
2383
2384         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2385                         GFP_KERNEL);
2386         if (!g_iommus) {
2387                 printk(KERN_ERR "Allocating global iommu array failed\n");
2388                 ret = -ENOMEM;
2389                 goto error;
2390         }
2391
2392         deferred_flush = kzalloc(g_num_of_iommus *
2393                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2394         if (!deferred_flush) {
2395                 ret = -ENOMEM;
2396                 goto error;
2397         }
2398
2399         for_each_drhd_unit(drhd) {
2400                 if (drhd->ignored)
2401                         continue;
2402
2403                 iommu = drhd->iommu;
2404                 g_iommus[iommu->seq_id] = iommu;
2405
2406                 ret = iommu_init_domains(iommu);
2407                 if (ret)
2408                         goto error;
2409
2410                 /*
2411                  * TBD:
2412                  * we could share the same root & context tables
2413                  * among all IOMMU's. Need to Split it later.
2414                  */
2415                 ret = iommu_alloc_root_entry(iommu);
2416                 if (ret) {
2417                         printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2418                         goto error;
2419                 }
2420                 if (!ecap_pass_through(iommu->ecap))
2421                         hw_pass_through = 0;
2422         }
2423
2424         /*
2425          * Start from the sane iommu hardware state.
2426          */
2427         for_each_drhd_unit(drhd) {
2428                 if (drhd->ignored)
2429                         continue;
2430
2431                 iommu = drhd->iommu;
2432
2433                 /*
2434                  * If the queued invalidation is already initialized by us
2435                  * (for example, while enabling interrupt-remapping) then
2436                  * we got the things already rolling from a sane state.
2437                  */
2438                 if (iommu->qi)
2439                         continue;
2440
2441                 /*
2442                  * Clear any previous faults.
2443                  */
2444                 dmar_fault(-1, iommu);
2445                 /*
2446                  * Disable queued invalidation if supported and already enabled
2447                  * before OS handover.
2448                  */
2449                 dmar_disable_qi(iommu);
2450         }
2451
2452         for_each_drhd_unit(drhd) {
2453                 if (drhd->ignored)
2454                         continue;
2455
2456                 iommu = drhd->iommu;
2457
2458                 if (dmar_enable_qi(iommu)) {
2459                         /*
2460                          * Queued Invalidate not enabled, use Register Based
2461                          * Invalidate
2462                          */
2463                         iommu->flush.flush_context = __iommu_flush_context;
2464                         iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2465                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2466                                "invalidation\n",
2467                                 iommu->seq_id,
2468                                (unsigned long long)drhd->reg_base_addr);
2469                 } else {
2470                         iommu->flush.flush_context = qi_flush_context;
2471                         iommu->flush.flush_iotlb = qi_flush_iotlb;
2472                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2473                                "invalidation\n",
2474                                 iommu->seq_id,
2475                                (unsigned long long)drhd->reg_base_addr);
2476                 }
2477         }
2478
2479         if (iommu_pass_through)
2480                 iommu_identity_mapping |= IDENTMAP_ALL;
2481
2482 #ifdef CONFIG_DMAR_BROKEN_GFX_WA
2483         iommu_identity_mapping |= IDENTMAP_GFX;
2484 #endif
2485
2486         check_tylersburg_isoch();
2487
2488         /*
2489          * If pass through is not set or not enabled, setup context entries for
2490          * identity mappings for rmrr, gfx, and isa and may fall back to static
2491          * identity mapping if iommu_identity_mapping is set.
2492          */
2493         if (iommu_identity_mapping) {
2494                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2495                 if (ret) {
2496                         printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2497                         goto error;
2498                 }
2499         }
2500         /*
2501          * For each rmrr
2502          *   for each dev attached to rmrr
2503          *   do
2504          *     locate drhd for dev, alloc domain for dev
2505          *     allocate free domain
2506          *     allocate page table entries for rmrr
2507          *     if context not allocated for bus
2508          *           allocate and init context
2509          *           set present in root table for this bus
2510          *     init context with domain, translation etc
2511          *    endfor
2512          * endfor
2513          */
2514         printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2515         for_each_rmrr_units(rmrr) {
2516                 for (i = 0; i < rmrr->devices_cnt; i++) {
2517                         pdev = rmrr->devices[i];
2518                         /*
2519                          * some BIOS lists non-exist devices in DMAR
2520                          * table.
2521                          */
2522                         if (!pdev)
2523                                 continue;
2524                         ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2525                         if (ret)
2526                                 printk(KERN_ERR
2527                                        "IOMMU: mapping reserved region failed\n");
2528                 }
2529         }
2530
2531         iommu_prepare_isa();
2532
2533         /*
2534          * for each drhd
2535          *   enable fault log
2536          *   global invalidate context cache
2537          *   global invalidate iotlb
2538          *   enable translation
2539          */
2540         for_each_drhd_unit(drhd) {
2541                 if (drhd->ignored) {
2542                         /*
2543                          * we always have to disable PMRs or DMA may fail on
2544                          * this device
2545                          */
2546                         if (force_on)
2547                                 iommu_disable_protect_mem_regions(drhd->iommu);
2548                         continue;
2549                 }
2550                 iommu = drhd->iommu;
2551
2552                 iommu_flush_write_buffer(iommu);
2553
2554                 ret = dmar_set_interrupt(iommu);
2555                 if (ret)
2556                         goto error;
2557
2558                 iommu_set_root_entry(iommu);
2559
2560                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2561                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2562
2563                 ret = iommu_enable_translation(iommu);
2564                 if (ret)
2565                         goto error;
2566
2567                 iommu_disable_protect_mem_regions(iommu);
2568         }
2569
2570         return 0;
2571 error:
2572         for_each_drhd_unit(drhd) {
2573                 if (drhd->ignored)
2574                         continue;
2575                 iommu = drhd->iommu;
2576                 free_iommu(iommu);
2577         }
2578         kfree(g_iommus);
2579         return ret;
2580 }
2581
2582 /* This takes a number of _MM_ pages, not VTD pages */
2583 static struct iova *intel_alloc_iova(struct device *dev,
2584                                      struct dmar_domain *domain,
2585                                      unsigned long nrpages, uint64_t dma_mask)
2586 {
2587         struct pci_dev *pdev = to_pci_dev(dev);
2588         struct iova *iova = NULL;
2589
2590         /* Restrict dma_mask to the width that the iommu can handle */
2591         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2592
2593         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2594                 /*
2595                  * First try to allocate an io virtual address in
2596                  * DMA_BIT_MASK(32) and if that fails then try allocating
2597                  * from higher range
2598                  */
2599                 iova = alloc_iova(&domain->iovad, nrpages,
2600                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
2601                 if (iova)
2602                         return iova;
2603         }
2604         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2605         if (unlikely(!iova)) {
2606                 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2607                        nrpages, pci_name(pdev));
2608                 return NULL;
2609         }
2610
2611         return iova;
2612 }
2613
2614 static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2615 {
2616         struct dmar_domain *domain;
2617         int ret;
2618
2619         domain = get_domain_for_dev(pdev,
2620                         DEFAULT_DOMAIN_ADDRESS_WIDTH);
2621         if (!domain) {
2622                 printk(KERN_ERR
2623                         "Allocating domain for %s failed", pci_name(pdev));
2624                 return NULL;
2625         }
2626
2627         /* make sure context mapping is ok */
2628         if (unlikely(!domain_context_mapped(pdev))) {
2629                 ret = domain_context_mapping(domain, pdev,
2630                                              CONTEXT_TT_MULTI_LEVEL);
2631                 if (ret) {
2632                         printk(KERN_ERR
2633                                 "Domain context map for %s failed",
2634                                 pci_name(pdev));
2635                         return NULL;
2636                 }
2637         }
2638
2639         return domain;
2640 }
2641
2642 static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2643 {
2644         struct device_domain_info *info;
2645
2646         /* No lock here, assumes no domain exit in normal case */
2647         info = dev->dev.archdata.iommu;
2648         if (likely(info))
2649                 return info->domain;
2650
2651         return __get_valid_domain_for_dev(dev);
2652 }
2653
2654 static int iommu_dummy(struct pci_dev *pdev)
2655 {
2656         return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2657 }
2658
2659 /* Check if the pdev needs to go through non-identity map and unmap process.*/
2660 static int iommu_no_mapping(struct device *dev)
2661 {
2662         struct pci_dev *pdev;
2663         int found;
2664
2665         if (unlikely(dev->bus != &pci_bus_type))
2666                 return 1;
2667
2668         pdev = to_pci_dev(dev);
2669         if (iommu_dummy(pdev))
2670                 return 1;
2671
2672         if (!iommu_identity_mapping)
2673                 return 0;
2674
2675         found = identity_mapping(pdev);
2676         if (found) {
2677                 if (iommu_should_identity_map(pdev, 0))
2678                         return 1;
2679                 else {
2680                         /*
2681                          * 32 bit DMA is removed from si_domain and fall back
2682                          * to non-identity mapping.
2683                          */
2684                         domain_remove_one_dev_info(si_domain, pdev);
2685                         printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2686                                pci_name(pdev));
2687                         return 0;
2688                 }
2689         } else {
2690                 /*
2691                  * In case of a detached 64 bit DMA device from vm, the device
2692                  * is put into si_domain for identity mapping.
2693                  */
2694                 if (iommu_should_identity_map(pdev, 0)) {
2695                         int ret;
2696                         ret = domain_add_dev_info(si_domain, pdev,
2697                                                   hw_pass_through ?
2698                                                   CONTEXT_TT_PASS_THROUGH :
2699                                                   CONTEXT_TT_MULTI_LEVEL);
2700                         if (!ret) {
2701                                 printk(KERN_INFO "64bit %s uses identity mapping\n",
2702                                        pci_name(pdev));
2703                                 return 1;
2704                         }
2705                 }
2706         }
2707
2708         return 0;
2709 }
2710
2711 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2712                                      size_t size, int dir, u64 dma_mask)
2713 {
2714         struct pci_dev *pdev = to_pci_dev(hwdev);
2715         struct dmar_domain *domain;
2716         phys_addr_t start_paddr;
2717         struct iova *iova;
2718         int prot = 0;
2719         int ret;
2720         struct intel_iommu *iommu;
2721         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2722
2723         BUG_ON(dir == DMA_NONE);
2724
2725         if (iommu_no_mapping(hwdev))
2726                 return paddr;
2727
2728         domain = get_valid_domain_for_dev(pdev);
2729         if (!domain)
2730                 return 0;
2731
2732         iommu = domain_get_iommu(domain);
2733         size = aligned_nrpages(paddr, size);
2734
2735         iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2736         if (!iova)
2737                 goto error;
2738
2739         /*
2740          * Check if DMAR supports zero-length reads on write only
2741          * mappings..
2742          */
2743         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2744                         !cap_zlr(iommu->cap))
2745                 prot |= DMA_PTE_READ;
2746         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2747                 prot |= DMA_PTE_WRITE;
2748         /*
2749          * paddr - (paddr + size) might be partial page, we should map the whole
2750          * page.  Note: if two part of one page are separately mapped, we
2751          * might have two guest_addr mapping to the same host paddr, but this
2752          * is not a big problem
2753          */
2754         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2755                                  mm_to_dma_pfn(paddr_pfn), size, prot);
2756         if (ret)
2757                 goto error;
2758
2759         /* it's a non-present to present mapping. Only flush if caching mode */
2760         if (cap_caching_mode(iommu->cap))
2761                 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
2762         else
2763                 iommu_flush_write_buffer(iommu);
2764
2765         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2766         start_paddr += paddr & ~PAGE_MASK;
2767         return start_paddr;
2768
2769 error:
2770         if (iova)
2771                 __free_iova(&domain->iovad, iova);
2772         printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
2773                 pci_name(pdev), size, (unsigned long long)paddr, dir);
2774         return 0;
2775 }
2776
2777 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2778                                  unsigned long offset, size_t size,
2779                                  enum dma_data_direction dir,
2780                                  struct dma_attrs *attrs)
2781 {
2782         return __intel_map_single(dev, page_to_phys(page) + offset, size,
2783                                   dir, to_pci_dev(dev)->dma_mask);
2784 }
2785
2786 static void flush_unmaps(void)
2787 {
2788         int i, j;
2789
2790         timer_on = 0;
2791
2792         /* just flush them all */
2793         for (i = 0; i < g_num_of_iommus; i++) {
2794                 struct intel_iommu *iommu = g_iommus[i];
2795                 if (!iommu)
2796                         continue;
2797
2798                 if (!deferred_flush[i].next)
2799                         continue;
2800
2801                 /* In caching mode, global flushes turn emulation expensive */
2802                 if (!cap_caching_mode(iommu->cap))
2803                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2804                                          DMA_TLB_GLOBAL_FLUSH);
2805                 for (j = 0; j < deferred_flush[i].next; j++) {
2806                         unsigned long mask;
2807                         struct iova *iova = deferred_flush[i].iova[j];
2808                         struct dmar_domain *domain = deferred_flush[i].domain[j];
2809
2810                         /* On real hardware multiple invalidations are expensive */
2811                         if (cap_caching_mode(iommu->cap))
2812                                 iommu_flush_iotlb_psi(iommu, domain->id,
2813                                 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2814                         else {
2815                                 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2816                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2817                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2818                         }
2819                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2820                 }
2821                 deferred_flush[i].next = 0;
2822         }
2823
2824         list_size = 0;
2825 }
2826
2827 static void flush_unmaps_timeout(unsigned long data)
2828 {
2829         unsigned long flags;
2830
2831         spin_lock_irqsave(&async_umap_flush_lock, flags);
2832         flush_unmaps();
2833         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2834 }
2835
2836 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2837 {
2838         unsigned long flags;
2839         int next, iommu_id;
2840         struct intel_iommu *iommu;
2841
2842         spin_lock_irqsave(&async_umap_flush_lock, flags);
2843         if (list_size == HIGH_WATER_MARK)
2844                 flush_unmaps();
2845
2846         iommu = domain_get_iommu(dom);
2847         iommu_id = iommu->seq_id;
2848
2849         next = deferred_flush[iommu_id].next;
2850         deferred_flush[iommu_id].domain[next] = dom;
2851         deferred_flush[iommu_id].iova[next] = iova;
2852         deferred_flush[iommu_id].next++;
2853
2854         if (!timer_on) {
2855                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2856                 timer_on = 1;
2857         }
2858         list_size++;
2859         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2860 }
2861
2862 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2863                              size_t size, enum dma_data_direction dir,
2864                              struct dma_attrs *attrs)
2865 {
2866         struct pci_dev *pdev = to_pci_dev(dev);
2867         struct dmar_domain *domain;
2868         unsigned long start_pfn, last_pfn;
2869         struct iova *iova;
2870         struct intel_iommu *iommu;
2871
2872         if (iommu_no_mapping(dev))
2873                 return;
2874
2875         domain = find_domain(pdev);
2876         BUG_ON(!domain);
2877
2878         iommu = domain_get_iommu(domain);
2879
2880         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2881         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2882                       (unsigned long long)dev_addr))
2883                 return;
2884
2885         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2886         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2887
2888         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2889                  pci_name(pdev), start_pfn, last_pfn);
2890
2891         /*  clear the whole page */
2892         dma_pte_clear_range(domain, start_pfn, last_pfn);
2893
2894         /* free page tables */
2895         dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2896
2897         if (intel_iommu_strict) {
2898                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2899                                       last_pfn - start_pfn + 1, 0);
2900                 /* free iova */
2901                 __free_iova(&domain->iovad, iova);
2902         } else {
2903                 add_unmap(domain, iova);
2904                 /*
2905                  * queue up the release of the unmap to save the 1/6th of the
2906                  * cpu used up by the iotlb flush operation...
2907                  */
2908         }
2909 }
2910
2911 static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2912                                   dma_addr_t *dma_handle, gfp_t flags)
2913 {
2914         void *vaddr;
2915         int order;
2916
2917         size = PAGE_ALIGN(size);
2918         order = get_order(size);
2919
2920         if (!iommu_no_mapping(hwdev))
2921                 flags &= ~(GFP_DMA | GFP_DMA32);
2922         else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2923                 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2924                         flags |= GFP_DMA;
2925                 else
2926                         flags |= GFP_DMA32;
2927         }
2928
2929         vaddr = (void *)__get_free_pages(flags, order);
2930         if (!vaddr)
2931                 return NULL;
2932         memset(vaddr, 0, size);
2933
2934         *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2935                                          DMA_BIDIRECTIONAL,
2936                                          hwdev->coherent_dma_mask);
2937         if (*dma_handle)
2938                 return vaddr;
2939         free_pages((unsigned long)vaddr, order);
2940         return NULL;
2941 }
2942
2943 static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2944                                 dma_addr_t dma_handle)
2945 {
2946         int order;
2947
2948         size = PAGE_ALIGN(size);
2949         order = get_order(size);
2950
2951         intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
2952         free_pages((unsigned long)vaddr, order);
2953 }
2954
2955 static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2956                            int nelems, enum dma_data_direction dir,
2957                            struct dma_attrs *attrs)
2958 {
2959         struct pci_dev *pdev = to_pci_dev(hwdev);
2960         struct dmar_domain *domain;
2961         unsigned long start_pfn, last_pfn;
2962         struct iova *iova;
2963         struct intel_iommu *iommu;
2964
2965         if (iommu_no_mapping(hwdev))
2966                 return;
2967
2968         domain = find_domain(pdev);
2969         BUG_ON(!domain);
2970
2971         iommu = domain_get_iommu(domain);
2972
2973         iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2974         if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2975                       (unsigned long long)sglist[0].dma_address))
2976                 return;
2977
2978         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2979         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2980
2981         /*  clear the whole page */
2982         dma_pte_clear_range(domain, start_pfn, last_pfn);
2983
2984         /* free page tables */
2985         dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2986
2987         if (intel_iommu_strict) {
2988                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2989                                       last_pfn - start_pfn + 1, 0);
2990                 /* free iova */
2991                 __free_iova(&domain->iovad, iova);
2992         } else {
2993                 add_unmap(domain, iova);
2994                 /*
2995                  * queue up the release of the unmap to save the 1/6th of the
2996                  * cpu used up by the iotlb flush operation...
2997                  */
2998         }
2999 }
3000
3001 static int intel_nontranslate_map_sg(struct device *hddev,
3002         struct scatterlist *sglist, int nelems, int dir)
3003 {
3004         int i;
3005         struct scatterlist *sg;
3006
3007         for_each_sg(sglist, sg, nelems, i) {
3008                 BUG_ON(!sg_page(sg));
3009                 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3010                 sg->dma_length = sg->length;
3011         }
3012         return nelems;
3013 }
3014
3015 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3016                         enum dma_data_direction dir, struct dma_attrs *attrs)
3017 {
3018         int i;
3019         struct pci_dev *pdev = to_pci_dev(hwdev);
3020         struct dmar_domain *domain;
3021         size_t size = 0;
3022         int prot = 0;
3023         struct iova *iova = NULL;
3024         int ret;
3025         struct scatterlist *sg;
3026         unsigned long start_vpfn;
3027         struct intel_iommu *iommu;
3028
3029         BUG_ON(dir == DMA_NONE);
3030         if (iommu_no_mapping(hwdev))
3031                 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
3032
3033         domain = get_valid_domain_for_dev(pdev);
3034         if (!domain)
3035                 return 0;
3036
3037         iommu = domain_get_iommu(domain);
3038
3039         for_each_sg(sglist, sg, nelems, i)
3040                 size += aligned_nrpages(sg->offset, sg->length);
3041
3042         iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3043                                 pdev->dma_mask);
3044         if (!iova) {
3045                 sglist->dma_length = 0;
3046                 return 0;
3047         }
3048
3049         /*
3050          * Check if DMAR supports zero-length reads on write only
3051          * mappings..
3052          */
3053         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3054                         !cap_zlr(iommu->cap))
3055                 prot |= DMA_PTE_READ;
3056         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3057                 prot |= DMA_PTE_WRITE;
3058
3059         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3060
3061         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3062         if (unlikely(ret)) {
3063                 /*  clear the page */
3064                 dma_pte_clear_range(domain, start_vpfn,
3065                                     start_vpfn + size - 1);
3066                 /* free page tables */
3067                 dma_pte_free_pagetable(domain, start_vpfn,
3068                                        start_vpfn + size - 1);
3069                 /* free iova */
3070                 __free_iova(&domain->iovad, iova);
3071                 return 0;
3072         }
3073
3074         /* it's a non-present to present mapping. Only flush if caching mode */
3075         if (cap_caching_mode(iommu->cap))
3076                 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
3077         else
3078                 iommu_flush_write_buffer(iommu);
3079
3080         return nelems;
3081 }
3082
3083 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3084 {
3085         return !dma_addr;
3086 }
3087
3088 struct dma_map_ops intel_dma_ops = {
3089         .alloc_coherent = intel_alloc_coherent,
3090         .free_coherent = intel_free_coherent,
3091         .map_sg = intel_map_sg,
3092         .unmap_sg = intel_unmap_sg,
3093         .map_page = intel_map_page,
3094         .unmap_page = intel_unmap_page,
3095         .mapping_error = intel_mapping_error,
3096 };
3097
3098 static inline int iommu_domain_cache_init(void)
3099 {
3100         int ret = 0;
3101
3102         iommu_domain_cache = kmem_cache_create("iommu_domain",
3103                                          sizeof(struct dmar_domain),
3104                                          0,
3105                                          SLAB_HWCACHE_ALIGN,
3106
3107                                          NULL);
3108         if (!iommu_domain_cache) {
3109                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3110                 ret = -ENOMEM;
3111         }
3112
3113         return ret;
3114 }
3115
3116 static inline int iommu_devinfo_cache_init(void)
3117 {
3118         int ret = 0;
3119
3120         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3121                                          sizeof(struct device_domain_info),
3122                                          0,
3123                                          SLAB_HWCACHE_ALIGN,
3124                                          NULL);
3125         if (!iommu_devinfo_cache) {
3126                 printk(KERN_ERR "Couldn't create devinfo cache\n");
3127                 ret = -ENOMEM;
3128         }
3129
3130         return ret;
3131 }
3132
3133 static inline int iommu_iova_cache_init(void)
3134 {
3135         int ret = 0;
3136
3137         iommu_iova_cache = kmem_cache_create("iommu_iova",
3138                                          sizeof(struct iova),
3139                                          0,
3140                                          SLAB_HWCACHE_ALIGN,
3141                                          NULL);
3142         if (!iommu_iova_cache) {
3143                 printk(KERN_ERR "Couldn't create iova cache\n");
3144                 ret = -ENOMEM;
3145         }
3146
3147         return ret;
3148 }
3149
3150 static int __init iommu_init_mempool(void)
3151 {
3152         int ret;
3153         ret = iommu_iova_cache_init();
3154         if (ret)
3155                 return ret;
3156
3157         ret = iommu_domain_cache_init();
3158         if (ret)
3159                 goto domain_error;
3160
3161         ret = iommu_devinfo_cache_init();
3162         if (!ret)
3163                 return ret;
3164
3165         kmem_cache_destroy(iommu_domain_cache);
3166 domain_error:
3167         kmem_cache_destroy(iommu_iova_cache);
3168
3169         return -ENOMEM;
3170 }
3171
3172 static void __init iommu_exit_mempool(void)
3173 {
3174         kmem_cache_destroy(iommu_devinfo_cache);
3175         kmem_cache_destroy(iommu_domain_cache);
3176         kmem_cache_destroy(iommu_iova_cache);
3177
3178 }
3179
3180 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3181 {
3182         struct dmar_drhd_unit *drhd;
3183         u32 vtbar;
3184         int rc;
3185
3186         /* We know that this device on this chipset has its own IOMMU.
3187          * If we find it under a different IOMMU, then the BIOS is lying
3188          * to us. Hope that the IOMMU for this device is actually
3189          * disabled, and it needs no translation...
3190          */
3191         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3192         if (rc) {
3193                 /* "can't" happen */
3194                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3195                 return;
3196         }
3197         vtbar &= 0xffff0000;
3198
3199         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3200         drhd = dmar_find_matched_drhd_unit(pdev);
3201         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3202                             TAINT_FIRMWARE_WORKAROUND,
3203                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3204                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3205 }
3206 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3207
3208 static void __init init_no_remapping_devices(void)
3209 {
3210         struct dmar_drhd_unit *drhd;
3211
3212         for_each_drhd_unit(drhd) {
3213                 if (!drhd->include_all) {
3214                         int i;
3215                         for (i = 0; i < drhd->devices_cnt; i++)
3216                                 if (drhd->devices[i] != NULL)
3217                                         break;
3218                         /* ignore DMAR unit if no pci devices exist */
3219                         if (i == drhd->devices_cnt)
3220                                 drhd->ignored = 1;
3221                 }
3222         }
3223
3224         if (dmar_map_gfx)
3225                 return;
3226
3227         for_each_drhd_unit(drhd) {
3228                 int i;
3229                 if (drhd->ignored || drhd->include_all)
3230                         continue;
3231
3232                 for (i = 0; i < drhd->devices_cnt; i++)
3233                         if (drhd->devices[i] &&
3234                                 !IS_GFX_DEVICE(drhd->devices[i]))
3235                                 break;
3236
3237                 if (i < drhd->devices_cnt)
3238                         continue;
3239
3240                 /* bypass IOMMU if it is just for gfx devices */
3241                 drhd->ignored = 1;
3242                 for (i = 0; i < drhd->devices_cnt; i++) {
3243                         if (!drhd->devices[i])
3244                                 continue;
3245                         drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3246                 }
3247         }
3248 }
3249
3250 #ifdef CONFIG_SUSPEND
3251 static int init_iommu_hw(void)
3252 {
3253         struct dmar_drhd_unit *drhd;
3254         struct intel_iommu *iommu = NULL;
3255
3256         for_each_active_iommu(iommu, drhd)
3257                 if (iommu->qi)
3258                         dmar_reenable_qi(iommu);
3259
3260         for_each_iommu(iommu, drhd) {
3261                 if (drhd->ignored) {
3262                         /*
3263                          * we always have to disable PMRs or DMA may fail on
3264                          * this device
3265                          */
3266                         if (force_on)
3267                                 iommu_disable_protect_mem_regions(iommu);
3268                         continue;
3269                 }
3270         
3271                 iommu_flush_write_buffer(iommu);
3272
3273                 iommu_set_root_entry(iommu);
3274
3275                 iommu->flush.flush_context(iommu, 0, 0, 0,
3276                                            DMA_CCMD_GLOBAL_INVL);
3277                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3278                                          DMA_TLB_GLOBAL_FLUSH);
3279                 if (iommu_enable_translation(iommu))
3280                         return 1;
3281                 iommu_disable_protect_mem_regions(iommu);
3282         }
3283
3284         return 0;
3285 }
3286
3287 static void iommu_flush_all(void)
3288 {
3289         struct dmar_drhd_unit *drhd;
3290         struct intel_iommu *iommu;
3291
3292         for_each_active_iommu(iommu, drhd) {
3293                 iommu->flush.flush_context(iommu, 0, 0, 0,
3294                                            DMA_CCMD_GLOBAL_INVL);
3295                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3296                                          DMA_TLB_GLOBAL_FLUSH);
3297         }
3298 }
3299
3300 static int iommu_suspend(void)
3301 {
3302         struct dmar_drhd_unit *drhd;
3303         struct intel_iommu *iommu = NULL;
3304         unsigned long flag;
3305
3306         for_each_active_iommu(iommu, drhd) {
3307                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3308                                                  GFP_ATOMIC);
3309                 if (!iommu->iommu_state)
3310                         goto nomem;
3311         }
3312
3313         iommu_flush_all();
3314
3315         for_each_active_iommu(iommu, drhd) {
3316                 iommu_disable_translation(iommu);
3317
3318                 spin_lock_irqsave(&iommu->register_lock, flag);
3319
3320                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3321                         readl(iommu->reg + DMAR_FECTL_REG);
3322                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3323                         readl(iommu->reg + DMAR_FEDATA_REG);
3324                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3325                         readl(iommu->reg + DMAR_FEADDR_REG);
3326                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3327                         readl(iommu->reg + DMAR_FEUADDR_REG);
3328
3329                 spin_unlock_irqrestore(&iommu->register_lock, flag);
3330         }
3331         return 0;
3332
3333 nomem:
3334         for_each_active_iommu(iommu, drhd)
3335                 kfree(iommu->iommu_state);
3336
3337         return -ENOMEM;
3338 }
3339
3340 static void iommu_resume(void)
3341 {
3342         struct dmar_drhd_unit *drhd;
3343         struct intel_iommu *iommu = NULL;
3344         unsigned long flag;
3345
3346         if (init_iommu_hw()) {
3347                 if (force_on)
3348                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3349                 else
3350                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3351                 return;
3352         }
3353
3354         for_each_active_iommu(iommu, drhd) {
3355
3356                 spin_lock_irqsave(&iommu->register_lock, flag);
3357
3358                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3359                         iommu->reg + DMAR_FECTL_REG);
3360                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3361                         iommu->reg + DMAR_FEDATA_REG);
3362                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3363                         iommu->reg + DMAR_FEADDR_REG);
3364                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3365                         iommu->reg + DMAR_FEUADDR_REG);
3366
3367                 spin_unlock_irqrestore(&iommu->register_lock, flag);
3368         }
3369
3370         for_each_active_iommu(iommu, drhd)
3371                 kfree(iommu->iommu_state);
3372 }
3373
3374 static struct syscore_ops iommu_syscore_ops = {
3375         .resume         = iommu_resume,
3376         .suspend        = iommu_suspend,
3377 };
3378
3379 static void __init init_iommu_pm_ops(void)
3380 {
3381         register_syscore_ops(&iommu_syscore_ops);
3382 }
3383
3384 #else
3385 static inline int init_iommu_pm_ops(void) { }
3386 #endif  /* CONFIG_PM */
3387
3388 /*
3389  * Here we only respond to action of unbound device from driver.
3390  *
3391  * Added device is not attached to its DMAR domain here yet. That will happen
3392  * when mapping the device to iova.
3393  */
3394 static int device_notifier(struct notifier_block *nb,
3395                                   unsigned long action, void *data)
3396 {
3397         struct device *dev = data;
3398         struct pci_dev *pdev = to_pci_dev(dev);
3399         struct dmar_domain *domain;
3400
3401         if (iommu_no_mapping(dev))
3402                 return 0;
3403
3404         domain = find_domain(pdev);
3405         if (!domain)
3406                 return 0;
3407
3408         if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
3409                 domain_remove_one_dev_info(domain, pdev);
3410
3411                 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3412                     !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3413                     list_empty(&domain->devices))
3414                         domain_exit(domain);
3415         }
3416
3417         return 0;
3418 }
3419
3420 static struct notifier_block device_nb = {
3421         .notifier_call = device_notifier,
3422 };
3423
3424 int __init intel_iommu_init(void)
3425 {
3426         int ret = 0;
3427
3428         /* VT-d is required for a TXT/tboot launch, so enforce that */
3429         force_on = tboot_force_iommu();
3430
3431         if (dmar_table_init()) {
3432                 if (force_on)
3433                         panic("tboot: Failed to initialize DMAR table\n");
3434                 return  -ENODEV;
3435         }
3436
3437         if (dmar_dev_scope_init()) {
3438                 if (force_on)
3439                         panic("tboot: Failed to initialize DMAR device scope\n");
3440                 return  -ENODEV;
3441         }
3442
3443         /*
3444          * Check the need for DMA-remapping initialization now.
3445          * Above initialization will also be used by Interrupt-remapping.
3446          */
3447         if (no_iommu || dmar_disabled)
3448                 return -ENODEV;
3449
3450         if (iommu_init_mempool()) {
3451                 if (force_on)
3452                         panic("tboot: Failed to initialize iommu memory\n");
3453                 return  -ENODEV;
3454         }
3455
3456         if (dmar_init_reserved_ranges()) {
3457                 if (force_on)
3458                         panic("tboot: Failed to reserve iommu ranges\n");
3459                 return  -ENODEV;
3460         }
3461
3462         init_no_remapping_devices();
3463
3464         ret = init_dmars();
3465         if (ret) {
3466                 if (force_on)
3467                         panic("tboot: Failed to initialize DMARs\n");
3468                 printk(KERN_ERR "IOMMU: dmar init failed\n");
3469                 put_iova_domain(&reserved_iova_list);
3470                 iommu_exit_mempool();
3471                 return ret;
3472         }
3473         printk(KERN_INFO
3474         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3475
3476         init_timer(&unmap_timer);
3477 #ifdef CONFIG_SWIOTLB
3478         swiotlb = 0;
3479 #endif
3480         dma_ops = &intel_dma_ops;
3481
3482         init_iommu_pm_ops();
3483
3484         register_iommu(&intel_iommu_ops);
3485
3486         bus_register_notifier(&pci_bus_type, &device_nb);
3487
3488         return 0;
3489 }
3490
3491 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3492                                            struct pci_dev *pdev)
3493 {
3494         struct pci_dev *tmp, *parent;
3495
3496         if (!iommu || !pdev)
3497                 return;
3498
3499         /* dependent device detach */
3500         tmp = pci_find_upstream_pcie_bridge(pdev);
3501         /* Secondary interface's bus number and devfn 0 */
3502         if (tmp) {
3503                 parent = pdev->bus->self;
3504                 while (parent != tmp) {
3505                         iommu_detach_dev(iommu, parent->bus->number,
3506                                          parent->devfn);
3507                         parent = parent->bus->self;
3508                 }
3509                 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3510                         iommu_detach_dev(iommu,
3511                                 tmp->subordinate->number, 0);
3512                 else /* this is a legacy PCI bridge */
3513                         iommu_detach_dev(iommu, tmp->bus->number,
3514                                          tmp->devfn);
3515         }
3516 }
3517
3518 static void domain_remove_one_dev_info(struct dmar_domain *domain,
3519                                           struct pci_dev *pdev)
3520 {
3521         struct device_domain_info *info;
3522         struct intel_iommu *iommu;
3523         unsigned long flags;
3524         int found = 0;
3525         struct list_head *entry, *tmp;
3526
3527         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3528                                 pdev->devfn);
3529         if (!iommu)
3530                 return;
3531
3532         spin_lock_irqsave(&device_domain_lock, flags);
3533         list_for_each_safe(entry, tmp, &domain->devices) {
3534                 info = list_entry(entry, struct device_domain_info, link);
3535                 /* No need to compare PCI domain; it has to be the same */
3536                 if (info->bus == pdev->bus->number &&
3537                     info->devfn == pdev->devfn) {
3538                         list_del(&info->link);
3539                         list_del(&info->global);
3540                         if (info->dev)
3541                                 info->dev->dev.archdata.iommu = NULL;
3542                         spin_unlock_irqrestore(&device_domain_lock, flags);
3543
3544                         iommu_disable_dev_iotlb(info);
3545                         iommu_detach_dev(iommu, info->bus, info->devfn);
3546                         iommu_detach_dependent_devices(iommu, pdev);
3547                         free_devinfo_mem(info);
3548
3549                         spin_lock_irqsave(&device_domain_lock, flags);
3550
3551                         if (found)
3552                                 break;
3553                         else
3554                                 continue;
3555                 }
3556
3557                 /* if there is no other devices under the same iommu
3558                  * owned by this domain, clear this iommu in iommu_bmp
3559                  * update iommu count and coherency
3560                  */
3561                 if (iommu == device_to_iommu(info->segment, info->bus,
3562                                             info->devfn))
3563                         found = 1;
3564         }
3565
3566         if (found == 0) {
3567                 unsigned long tmp_flags;
3568                 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3569                 clear_bit(iommu->seq_id, &domain->iommu_bmp);
3570                 domain->iommu_count--;
3571                 domain_update_iommu_cap(domain);
3572                 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3573
3574                 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3575                     !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3576                         spin_lock_irqsave(&iommu->lock, tmp_flags);
3577                         clear_bit(domain->id, iommu->domain_ids);
3578                         iommu->domains[domain->id] = NULL;
3579                         spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3580                 }
3581         }
3582
3583         spin_unlock_irqrestore(&device_domain_lock, flags);
3584 }
3585
3586 static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3587 {
3588         struct device_domain_info *info;
3589         struct intel_iommu *iommu;
3590         unsigned long flags1, flags2;
3591
3592         spin_lock_irqsave(&device_domain_lock, flags1);
3593         while (!list_empty(&domain->devices)) {
3594                 info = list_entry(domain->devices.next,
3595                         struct device_domain_info, link);
3596                 list_del(&info->link);
3597                 list_del(&info->global);
3598                 if (info->dev)
3599                         info->dev->dev.archdata.iommu = NULL;
3600
3601                 spin_unlock_irqrestore(&device_domain_lock, flags1);
3602
3603                 iommu_disable_dev_iotlb(info);
3604                 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
3605                 iommu_detach_dev(iommu, info->bus, info->devfn);
3606                 iommu_detach_dependent_devices(iommu, info->dev);
3607
3608                 /* clear this iommu in iommu_bmp, update iommu count
3609                  * and capabilities
3610                  */
3611                 spin_lock_irqsave(&domain->iommu_lock, flags2);
3612                 if (test_and_clear_bit(iommu->seq_id,
3613                                        &domain->iommu_bmp)) {
3614                         domain->iommu_count--;
3615                         domain_update_iommu_cap(domain);
3616                 }
3617                 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3618
3619                 free_devinfo_mem(info);
3620                 spin_lock_irqsave(&device_domain_lock, flags1);
3621         }
3622         spin_unlock_irqrestore(&device_domain_lock, flags1);
3623 }
3624
3625 /* domain id for virtual machine, it won't be set in context */
3626 static unsigned long vm_domid;
3627
3628 static struct dmar_domain *iommu_alloc_vm_domain(void)
3629 {
3630         struct dmar_domain *domain;
3631
3632         domain = alloc_domain_mem();
3633         if (!domain)
3634                 return NULL;
3635
3636         domain->id = vm_domid++;
3637         domain->nid = -1;
3638         memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3639         domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3640
3641         return domain;
3642 }
3643
3644 static int md_domain_init(struct dmar_domain *domain, int guest_width)
3645 {
3646         int adjust_width;
3647
3648         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3649         spin_lock_init(&domain->iommu_lock);
3650
3651         domain_reserve_special_ranges(domain);
3652
3653         /* calculate AGAW */
3654         domain->gaw = guest_width;
3655         adjust_width = guestwidth_to_adjustwidth(guest_width);
3656         domain->agaw = width_to_agaw(adjust_width);
3657
3658         INIT_LIST_HEAD(&domain->devices);
3659
3660         domain->iommu_count = 0;
3661         domain->iommu_coherency = 0;
3662         domain->iommu_snooping = 0;
3663         domain->iommu_superpage = 0;
3664         domain->max_addr = 0;
3665         domain->nid = -1;
3666
3667         /* always allocate the top pgd */
3668         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
3669         if (!domain->pgd)
3670                 return -ENOMEM;
3671         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3672         return 0;
3673 }
3674
3675 static void iommu_free_vm_domain(struct dmar_domain *domain)
3676 {
3677         unsigned long flags;
3678         struct dmar_drhd_unit *drhd;
3679         struct intel_iommu *iommu;
3680         unsigned long i;
3681         unsigned long ndomains;
3682
3683         for_each_drhd_unit(drhd) {
3684                 if (drhd->ignored)
3685                         continue;
3686                 iommu = drhd->iommu;
3687
3688                 ndomains = cap_ndoms(iommu->cap);
3689                 for_each_set_bit(i, iommu->domain_ids, ndomains) {
3690                         if (iommu->domains[i] == domain) {
3691                                 spin_lock_irqsave(&iommu->lock, flags);
3692                                 clear_bit(i, iommu->domain_ids);
3693                                 iommu->domains[i] = NULL;
3694                                 spin_unlock_irqrestore(&iommu->lock, flags);
3695                                 break;
3696                         }
3697                 }
3698         }
3699 }
3700
3701 static void vm_domain_exit(struct dmar_domain *domain)
3702 {
3703         /* Domain 0 is reserved, so dont process it */
3704         if (!domain)
3705                 return;
3706
3707         vm_domain_remove_all_dev_info(domain);
3708         /* destroy iovas */
3709         put_iova_domain(&domain->iovad);
3710
3711         /* clear ptes */
3712         dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3713
3714         /* free page tables */
3715         dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3716
3717         iommu_free_vm_domain(domain);
3718         free_domain_mem(domain);
3719 }
3720
3721 static int intel_iommu_domain_init(struct iommu_domain *domain)
3722 {
3723         struct dmar_domain *dmar_domain;
3724
3725         dmar_domain = iommu_alloc_vm_domain();
3726         if (!dmar_domain) {
3727                 printk(KERN_ERR
3728                         "intel_iommu_domain_init: dmar_domain == NULL\n");
3729                 return -ENOMEM;
3730         }
3731         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3732                 printk(KERN_ERR
3733                         "intel_iommu_domain_init() failed\n");
3734                 vm_domain_exit(dmar_domain);
3735                 return -ENOMEM;
3736         }
3737         domain->priv = dmar_domain;
3738
3739         return 0;
3740 }
3741
3742 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
3743 {
3744         struct dmar_domain *dmar_domain = domain->priv;
3745
3746         domain->priv = NULL;
3747         vm_domain_exit(dmar_domain);
3748 }
3749
3750 static int intel_iommu_attach_device(struct iommu_domain *domain,
3751                                      struct device *dev)
3752 {
3753         struct dmar_domain *dmar_domain = domain->priv;
3754         struct pci_dev *pdev = to_pci_dev(dev);
3755         struct intel_iommu *iommu;
3756         int addr_width;
3757
3758         /* normally pdev is not mapped */
3759         if (unlikely(domain_context_mapped(pdev))) {
3760                 struct dmar_domain *old_domain;
3761
3762                 old_domain = find_domain(pdev);
3763                 if (old_domain) {
3764                         if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3765                             dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3766                                 domain_remove_one_dev_info(old_domain, pdev);
3767                         else
3768                                 domain_remove_dev_info(old_domain);
3769                 }
3770         }
3771
3772         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3773                                 pdev->devfn);
3774         if (!iommu)
3775                 return -ENODEV;
3776
3777         /* check if this iommu agaw is sufficient for max mapped address */
3778         addr_width = agaw_to_width(iommu->agaw);
3779         if (addr_width > cap_mgaw(iommu->cap))
3780                 addr_width = cap_mgaw(iommu->cap);
3781
3782         if (dmar_domain->max_addr > (1LL << addr_width)) {
3783                 printk(KERN_ERR "%s: iommu width (%d) is not "
3784                        "sufficient for the mapped address (%llx)\n",
3785                        __func__, addr_width, dmar_domain->max_addr);
3786                 return -EFAULT;
3787         }
3788         dmar_domain->gaw = addr_width;
3789
3790         /*
3791          * Knock out extra levels of page tables if necessary
3792          */
3793         while (iommu->agaw < dmar_domain->agaw) {
3794                 struct dma_pte *pte;
3795
3796                 pte = dmar_domain->pgd;
3797                 if (dma_pte_present(pte)) {
3798                         dmar_domain->pgd = (struct dma_pte *)
3799                                 phys_to_virt(dma_pte_addr(pte));
3800                         free_pgtable_page(pte);
3801                 }
3802                 dmar_domain->agaw--;
3803         }
3804
3805         return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
3806 }
3807
3808 static void intel_iommu_detach_device(struct iommu_domain *domain,
3809                                       struct device *dev)
3810 {
3811         struct dmar_domain *dmar_domain = domain->priv;
3812         struct pci_dev *pdev = to_pci_dev(dev);
3813
3814         domain_remove_one_dev_info(dmar_domain, pdev);
3815 }
3816
3817 static int intel_iommu_map(struct iommu_domain *domain,
3818                            unsigned long iova, phys_addr_t hpa,
3819                            int gfp_order, int iommu_prot)
3820 {
3821         struct dmar_domain *dmar_domain = domain->priv;
3822         u64 max_addr;
3823         int prot = 0;
3824         size_t size;
3825         int ret;
3826
3827         if (iommu_prot & IOMMU_READ)
3828                 prot |= DMA_PTE_READ;
3829         if (iommu_prot & IOMMU_WRITE)
3830                 prot |= DMA_PTE_WRITE;
3831         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3832                 prot |= DMA_PTE_SNP;
3833
3834         size     = PAGE_SIZE << gfp_order;
3835         max_addr = iova + size;
3836         if (dmar_domain->max_addr < max_addr) {
3837                 u64 end;
3838
3839                 /* check if minimum agaw is sufficient for mapped address */
3840                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
3841                 if (end < max_addr) {
3842                         printk(KERN_ERR "%s: iommu width (%d) is not "
3843                                "sufficient for the mapped address (%llx)\n",
3844                                __func__, dmar_domain->gaw, max_addr);
3845                         return -EFAULT;
3846                 }
3847                 dmar_domain->max_addr = max_addr;
3848         }
3849         /* Round up size to next multiple of PAGE_SIZE, if it and
3850            the low bits of hpa would take us onto the next page */
3851         size = aligned_nrpages(hpa, size);
3852         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3853                                  hpa >> VTD_PAGE_SHIFT, size, prot);
3854         return ret;
3855 }
3856
3857 static int intel_iommu_unmap(struct iommu_domain *domain,
3858                              unsigned long iova, int gfp_order)
3859 {
3860         struct dmar_domain *dmar_domain = domain->priv;
3861         size_t size = PAGE_SIZE << gfp_order;
3862
3863         dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3864                             (iova + size - 1) >> VTD_PAGE_SHIFT);
3865
3866         if (dmar_domain->max_addr == iova + size)
3867                 dmar_domain->max_addr = iova;
3868
3869         return gfp_order;
3870 }
3871
3872 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3873                                             unsigned long iova)
3874 {
3875         struct dmar_domain *dmar_domain = domain->priv;
3876         struct dma_pte *pte;
3877         u64 phys = 0;
3878
3879         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
3880         if (pte)
3881                 phys = dma_pte_addr(pte);
3882
3883         return phys;
3884 }
3885
3886 static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
3887                                       unsigned long cap)
3888 {
3889         struct dmar_domain *dmar_domain = domain->priv;
3890
3891         if (cap == IOMMU_CAP_CACHE_COHERENCY)
3892                 return dmar_domain->iommu_snooping;
3893         if (cap == IOMMU_CAP_INTR_REMAP)
3894                 return intr_remapping_enabled;
3895
3896         return 0;
3897 }
3898
3899 static struct iommu_ops intel_iommu_ops = {
3900         .domain_init    = intel_iommu_domain_init,
3901         .domain_destroy = intel_iommu_domain_destroy,
3902         .attach_dev     = intel_iommu_attach_device,
3903         .detach_dev     = intel_iommu_detach_device,
3904         .map            = intel_iommu_map,
3905         .unmap          = intel_iommu_unmap,
3906         .iova_to_phys   = intel_iommu_iova_to_phys,
3907         .domain_has_cap = intel_iommu_domain_has_cap,
3908 };
3909
3910 static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
3911 {
3912         /*
3913          * Mobile 4 Series Chipset neglects to set RWBF capability,
3914          * but needs it:
3915          */
3916         printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
3917         rwbf_quirk = 1;
3918
3919         /* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */
3920         if (dev->revision == 0x07) {
3921                 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
3922                 dmar_map_gfx = 0;
3923         }
3924 }
3925
3926 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
3927
3928 #define GGC 0x52
3929 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
3930 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
3931 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
3932 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
3933 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
3934 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
3935 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
3936 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
3937
3938 static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
3939 {
3940         unsigned short ggc;
3941
3942         if (pci_read_config_word(dev, GGC, &ggc))
3943                 return;
3944
3945         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
3946                 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
3947                 dmar_map_gfx = 0;
3948         }
3949 }
3950 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
3951 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
3952 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
3953 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
3954
3955 /* On Tylersburg chipsets, some BIOSes have been known to enable the
3956    ISOCH DMAR unit for the Azalia sound device, but not give it any
3957    TLB entries, which causes it to deadlock. Check for that.  We do
3958    this in a function called from init_dmars(), instead of in a PCI
3959    quirk, because we don't want to print the obnoxious "BIOS broken"
3960    message if VT-d is actually disabled.
3961 */
3962 static void __init check_tylersburg_isoch(void)
3963 {
3964         struct pci_dev *pdev;
3965         uint32_t vtisochctrl;
3966
3967         /* If there's no Azalia in the system anyway, forget it. */
3968         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
3969         if (!pdev)
3970                 return;
3971         pci_dev_put(pdev);
3972
3973         /* System Management Registers. Might be hidden, in which case
3974            we can't do the sanity check. But that's OK, because the
3975            known-broken BIOSes _don't_ actually hide it, so far. */
3976         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
3977         if (!pdev)
3978                 return;
3979
3980         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
3981                 pci_dev_put(pdev);
3982                 return;
3983         }
3984
3985         pci_dev_put(pdev);
3986
3987         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
3988         if (vtisochctrl & 1)
3989                 return;
3990
3991         /* Drop all bits other than the number of TLB entries */
3992         vtisochctrl &= 0x1c;
3993
3994         /* If we have the recommended number of TLB entries (16), fine. */
3995         if (vtisochctrl == 0x10)
3996                 return;
3997
3998         /* Zero TLB entries? You get to ride the short bus to school. */
3999         if (!vtisochctrl) {
4000                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4001                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4002                      dmi_get_system_info(DMI_BIOS_VENDOR),
4003                      dmi_get_system_info(DMI_BIOS_VERSION),
4004                      dmi_get_system_info(DMI_PRODUCT_VERSION));
4005                 iommu_identity_mapping |= IDENTMAP_AZALIA;
4006                 return;
4007         }
4008         
4009         printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4010                vtisochctrl);
4011 }