]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/pci/intel-iommu.c
intel-iommu: Speed up processing of the identity_mapping function
[karo-tx-linux.git] / drivers / pci / intel-iommu.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  * Author: Fenghua Yu <fenghua.yu@intel.com>
22  */
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/iommu.h>
38 #include <linux/intel-iommu.h>
39 #include <linux/syscore_ops.h>
40 #include <linux/tboot.h>
41 #include <linux/dmi.h>
42 #include <asm/cacheflush.h>
43 #include <asm/iommu.h>
44 #include "pci.h"
45
46 #define ROOT_SIZE               VTD_PAGE_SIZE
47 #define CONTEXT_SIZE            VTD_PAGE_SIZE
48
49 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
50 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
51 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
52
53 #define IOAPIC_RANGE_START      (0xfee00000)
54 #define IOAPIC_RANGE_END        (0xfeefffff)
55 #define IOVA_START_ADDR         (0x1000)
56
57 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
58
59 #define MAX_AGAW_WIDTH 64
60
61 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
62 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
63
64 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
65    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
66 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
67                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
68 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
69
70 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
71 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
72 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
73
74 /* page table handling */
75 #define LEVEL_STRIDE            (9)
76 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
77
78 static inline int agaw_to_level(int agaw)
79 {
80         return agaw + 2;
81 }
82
83 static inline int agaw_to_width(int agaw)
84 {
85         return 30 + agaw * LEVEL_STRIDE;
86 }
87
88 static inline int width_to_agaw(int width)
89 {
90         return (width - 30) / LEVEL_STRIDE;
91 }
92
93 static inline unsigned int level_to_offset_bits(int level)
94 {
95         return (level - 1) * LEVEL_STRIDE;
96 }
97
98 static inline int pfn_level_offset(unsigned long pfn, int level)
99 {
100         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
101 }
102
103 static inline unsigned long level_mask(int level)
104 {
105         return -1UL << level_to_offset_bits(level);
106 }
107
108 static inline unsigned long level_size(int level)
109 {
110         return 1UL << level_to_offset_bits(level);
111 }
112
113 static inline unsigned long align_to_level(unsigned long pfn, int level)
114 {
115         return (pfn + level_size(level) - 1) & level_mask(level);
116 }
117
118 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
119 {
120         return  1 << ((lvl - 1) * LEVEL_STRIDE);
121 }
122
123 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
124    are never going to work. */
125 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
126 {
127         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
128 }
129
130 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
131 {
132         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
133 }
134 static inline unsigned long page_to_dma_pfn(struct page *pg)
135 {
136         return mm_to_dma_pfn(page_to_pfn(pg));
137 }
138 static inline unsigned long virt_to_dma_pfn(void *p)
139 {
140         return page_to_dma_pfn(virt_to_page(p));
141 }
142
143 /* global iommu list, set NULL for ignored DMAR units */
144 static struct intel_iommu **g_iommus;
145
146 static void __init check_tylersburg_isoch(void);
147 static int rwbf_quirk;
148
149 /*
150  * set to 1 to panic kernel if can't successfully enable VT-d
151  * (used when kernel is launched w/ TXT)
152  */
153 static int force_on = 0;
154
155 /*
156  * 0: Present
157  * 1-11: Reserved
158  * 12-63: Context Ptr (12 - (haw-1))
159  * 64-127: Reserved
160  */
161 struct root_entry {
162         u64     val;
163         u64     rsvd1;
164 };
165 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
166 static inline bool root_present(struct root_entry *root)
167 {
168         return (root->val & 1);
169 }
170 static inline void set_root_present(struct root_entry *root)
171 {
172         root->val |= 1;
173 }
174 static inline void set_root_value(struct root_entry *root, unsigned long value)
175 {
176         root->val |= value & VTD_PAGE_MASK;
177 }
178
179 static inline struct context_entry *
180 get_context_addr_from_root(struct root_entry *root)
181 {
182         return (struct context_entry *)
183                 (root_present(root)?phys_to_virt(
184                 root->val & VTD_PAGE_MASK) :
185                 NULL);
186 }
187
188 /*
189  * low 64 bits:
190  * 0: present
191  * 1: fault processing disable
192  * 2-3: translation type
193  * 12-63: address space root
194  * high 64 bits:
195  * 0-2: address width
196  * 3-6: aval
197  * 8-23: domain id
198  */
199 struct context_entry {
200         u64 lo;
201         u64 hi;
202 };
203
204 static inline bool context_present(struct context_entry *context)
205 {
206         return (context->lo & 1);
207 }
208 static inline void context_set_present(struct context_entry *context)
209 {
210         context->lo |= 1;
211 }
212
213 static inline void context_set_fault_enable(struct context_entry *context)
214 {
215         context->lo &= (((u64)-1) << 2) | 1;
216 }
217
218 static inline void context_set_translation_type(struct context_entry *context,
219                                                 unsigned long value)
220 {
221         context->lo &= (((u64)-1) << 4) | 3;
222         context->lo |= (value & 3) << 2;
223 }
224
225 static inline void context_set_address_root(struct context_entry *context,
226                                             unsigned long value)
227 {
228         context->lo |= value & VTD_PAGE_MASK;
229 }
230
231 static inline void context_set_address_width(struct context_entry *context,
232                                              unsigned long value)
233 {
234         context->hi |= value & 7;
235 }
236
237 static inline void context_set_domain_id(struct context_entry *context,
238                                          unsigned long value)
239 {
240         context->hi |= (value & ((1 << 16) - 1)) << 8;
241 }
242
243 static inline void context_clear_entry(struct context_entry *context)
244 {
245         context->lo = 0;
246         context->hi = 0;
247 }
248
249 /*
250  * 0: readable
251  * 1: writable
252  * 2-6: reserved
253  * 7: super page
254  * 8-10: available
255  * 11: snoop behavior
256  * 12-63: Host physcial address
257  */
258 struct dma_pte {
259         u64 val;
260 };
261
262 static inline void dma_clear_pte(struct dma_pte *pte)
263 {
264         pte->val = 0;
265 }
266
267 static inline void dma_set_pte_readable(struct dma_pte *pte)
268 {
269         pte->val |= DMA_PTE_READ;
270 }
271
272 static inline void dma_set_pte_writable(struct dma_pte *pte)
273 {
274         pte->val |= DMA_PTE_WRITE;
275 }
276
277 static inline void dma_set_pte_snp(struct dma_pte *pte)
278 {
279         pte->val |= DMA_PTE_SNP;
280 }
281
282 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
283 {
284         pte->val = (pte->val & ~3) | (prot & 3);
285 }
286
287 static inline u64 dma_pte_addr(struct dma_pte *pte)
288 {
289 #ifdef CONFIG_64BIT
290         return pte->val & VTD_PAGE_MASK;
291 #else
292         /* Must have a full atomic 64-bit read */
293         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
294 #endif
295 }
296
297 static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
298 {
299         pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
300 }
301
302 static inline bool dma_pte_present(struct dma_pte *pte)
303 {
304         return (pte->val & 3) != 0;
305 }
306
307 static inline int first_pte_in_page(struct dma_pte *pte)
308 {
309         return !((unsigned long)pte & ~VTD_PAGE_MASK);
310 }
311
312 /*
313  * This domain is a statically identity mapping domain.
314  *      1. This domain creats a static 1:1 mapping to all usable memory.
315  *      2. It maps to each iommu if successful.
316  *      3. Each iommu mapps to this domain if successful.
317  */
318 static struct dmar_domain *si_domain;
319 static int hw_pass_through = 1;
320
321 /* devices under the same p2p bridge are owned in one domain */
322 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
323
324 /* domain represents a virtual machine, more than one devices
325  * across iommus may be owned in one domain, e.g. kvm guest.
326  */
327 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 1)
328
329 /* si_domain contains mulitple devices */
330 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 2)
331
332 struct dmar_domain {
333         int     id;                     /* domain id */
334         int     nid;                    /* node id */
335         unsigned long iommu_bmp;        /* bitmap of iommus this domain uses*/
336
337         struct list_head devices;       /* all devices' list */
338         struct iova_domain iovad;       /* iova's that belong to this domain */
339
340         struct dma_pte  *pgd;           /* virtual address */
341         int             gaw;            /* max guest address width */
342
343         /* adjusted guest address width, 0 is level 2 30-bit */
344         int             agaw;
345
346         int             flags;          /* flags to find out type of domain */
347
348         int             iommu_coherency;/* indicate coherency of iommu access */
349         int             iommu_snooping; /* indicate snooping control feature*/
350         int             iommu_count;    /* reference count of iommu */
351         int             iommu_superpage;/* Level of superpages supported:
352                                            0 == 4KiB (no superpages), 1 == 2MiB,
353                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
354         spinlock_t      iommu_lock;     /* protect iommu set in domain */
355         u64             max_addr;       /* maximum mapped address */
356 };
357
358 /* PCI domain-device relationship */
359 struct device_domain_info {
360         struct list_head link;  /* link to domain siblings */
361         struct list_head global; /* link to global list */
362         int segment;            /* PCI domain */
363         u8 bus;                 /* PCI bus number */
364         u8 devfn;               /* PCI devfn number */
365         struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
366         struct intel_iommu *iommu; /* IOMMU used by this device */
367         struct dmar_domain *domain; /* pointer to domain */
368 };
369
370 static void flush_unmaps_timeout(unsigned long data);
371
372 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
373
374 #define HIGH_WATER_MARK 250
375 struct deferred_flush_tables {
376         int next;
377         struct iova *iova[HIGH_WATER_MARK];
378         struct dmar_domain *domain[HIGH_WATER_MARK];
379 };
380
381 static struct deferred_flush_tables *deferred_flush;
382
383 /* bitmap for indexing intel_iommus */
384 static int g_num_of_iommus;
385
386 static DEFINE_SPINLOCK(async_umap_flush_lock);
387 static LIST_HEAD(unmaps_to_do);
388
389 static int timer_on;
390 static long list_size;
391
392 static void domain_remove_dev_info(struct dmar_domain *domain);
393
394 #ifdef CONFIG_DMAR_DEFAULT_ON
395 int dmar_disabled = 0;
396 #else
397 int dmar_disabled = 1;
398 #endif /*CONFIG_DMAR_DEFAULT_ON*/
399
400 static int dmar_map_gfx = 1;
401 static int dmar_forcedac;
402 static int intel_iommu_strict;
403 static int intel_iommu_superpage = 1;
404
405 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
406 static DEFINE_SPINLOCK(device_domain_lock);
407 static LIST_HEAD(device_domain_list);
408
409 static struct iommu_ops intel_iommu_ops;
410
411 static int __init intel_iommu_setup(char *str)
412 {
413         if (!str)
414                 return -EINVAL;
415         while (*str) {
416                 if (!strncmp(str, "on", 2)) {
417                         dmar_disabled = 0;
418                         printk(KERN_INFO "Intel-IOMMU: enabled\n");
419                 } else if (!strncmp(str, "off", 3)) {
420                         dmar_disabled = 1;
421                         printk(KERN_INFO "Intel-IOMMU: disabled\n");
422                 } else if (!strncmp(str, "igfx_off", 8)) {
423                         dmar_map_gfx = 0;
424                         printk(KERN_INFO
425                                 "Intel-IOMMU: disable GFX device mapping\n");
426                 } else if (!strncmp(str, "forcedac", 8)) {
427                         printk(KERN_INFO
428                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
429                         dmar_forcedac = 1;
430                 } else if (!strncmp(str, "strict", 6)) {
431                         printk(KERN_INFO
432                                 "Intel-IOMMU: disable batched IOTLB flush\n");
433                         intel_iommu_strict = 1;
434                 } else if (!strncmp(str, "sp_off", 6)) {
435                         printk(KERN_INFO
436                                 "Intel-IOMMU: disable supported super page\n");
437                         intel_iommu_superpage = 0;
438                 }
439
440                 str += strcspn(str, ",");
441                 while (*str == ',')
442                         str++;
443         }
444         return 0;
445 }
446 __setup("intel_iommu=", intel_iommu_setup);
447
448 static struct kmem_cache *iommu_domain_cache;
449 static struct kmem_cache *iommu_devinfo_cache;
450 static struct kmem_cache *iommu_iova_cache;
451
452 static inline void *alloc_pgtable_page(int node)
453 {
454         struct page *page;
455         void *vaddr = NULL;
456
457         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
458         if (page)
459                 vaddr = page_address(page);
460         return vaddr;
461 }
462
463 static inline void free_pgtable_page(void *vaddr)
464 {
465         free_page((unsigned long)vaddr);
466 }
467
468 static inline void *alloc_domain_mem(void)
469 {
470         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
471 }
472
473 static void free_domain_mem(void *vaddr)
474 {
475         kmem_cache_free(iommu_domain_cache, vaddr);
476 }
477
478 static inline void * alloc_devinfo_mem(void)
479 {
480         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
481 }
482
483 static inline void free_devinfo_mem(void *vaddr)
484 {
485         kmem_cache_free(iommu_devinfo_cache, vaddr);
486 }
487
488 struct iova *alloc_iova_mem(void)
489 {
490         return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
491 }
492
493 void free_iova_mem(struct iova *iova)
494 {
495         kmem_cache_free(iommu_iova_cache, iova);
496 }
497
498
499 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
500 {
501         unsigned long sagaw;
502         int agaw = -1;
503
504         sagaw = cap_sagaw(iommu->cap);
505         for (agaw = width_to_agaw(max_gaw);
506              agaw >= 0; agaw--) {
507                 if (test_bit(agaw, &sagaw))
508                         break;
509         }
510
511         return agaw;
512 }
513
514 /*
515  * Calculate max SAGAW for each iommu.
516  */
517 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
518 {
519         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
520 }
521
522 /*
523  * calculate agaw for each iommu.
524  * "SAGAW" may be different across iommus, use a default agaw, and
525  * get a supported less agaw for iommus that don't support the default agaw.
526  */
527 int iommu_calculate_agaw(struct intel_iommu *iommu)
528 {
529         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
530 }
531
532 /* This functionin only returns single iommu in a domain */
533 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
534 {
535         int iommu_id;
536
537         /* si_domain and vm domain should not get here. */
538         BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
539         BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
540
541         iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
542         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
543                 return NULL;
544
545         return g_iommus[iommu_id];
546 }
547
548 static void domain_update_iommu_coherency(struct dmar_domain *domain)
549 {
550         int i;
551
552         domain->iommu_coherency = 1;
553
554         for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
555                 if (!ecap_coherent(g_iommus[i]->ecap)) {
556                         domain->iommu_coherency = 0;
557                         break;
558                 }
559         }
560 }
561
562 static void domain_update_iommu_snooping(struct dmar_domain *domain)
563 {
564         int i;
565
566         domain->iommu_snooping = 1;
567
568         for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
569                 if (!ecap_sc_support(g_iommus[i]->ecap)) {
570                         domain->iommu_snooping = 0;
571                         break;
572                 }
573         }
574 }
575
576 static void domain_update_iommu_superpage(struct dmar_domain *domain)
577 {
578         int i, mask = 0xf;
579
580         if (!intel_iommu_superpage) {
581                 domain->iommu_superpage = 0;
582                 return;
583         }
584
585         domain->iommu_superpage = 4; /* 1TiB */
586
587         for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
588                 mask |= cap_super_page_val(g_iommus[i]->cap);
589                 if (!mask) {
590                         break;
591                 }
592         }
593         domain->iommu_superpage = fls(mask);
594 }
595
596 /* Some capabilities may be different across iommus */
597 static void domain_update_iommu_cap(struct dmar_domain *domain)
598 {
599         domain_update_iommu_coherency(domain);
600         domain_update_iommu_snooping(domain);
601         domain_update_iommu_superpage(domain);
602 }
603
604 static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
605 {
606         struct dmar_drhd_unit *drhd = NULL;
607         int i;
608
609         for_each_drhd_unit(drhd) {
610                 if (drhd->ignored)
611                         continue;
612                 if (segment != drhd->segment)
613                         continue;
614
615                 for (i = 0; i < drhd->devices_cnt; i++) {
616                         if (drhd->devices[i] &&
617                             drhd->devices[i]->bus->number == bus &&
618                             drhd->devices[i]->devfn == devfn)
619                                 return drhd->iommu;
620                         if (drhd->devices[i] &&
621                             drhd->devices[i]->subordinate &&
622                             drhd->devices[i]->subordinate->number <= bus &&
623                             drhd->devices[i]->subordinate->subordinate >= bus)
624                                 return drhd->iommu;
625                 }
626
627                 if (drhd->include_all)
628                         return drhd->iommu;
629         }
630
631         return NULL;
632 }
633
634 static void domain_flush_cache(struct dmar_domain *domain,
635                                void *addr, int size)
636 {
637         if (!domain->iommu_coherency)
638                 clflush_cache_range(addr, size);
639 }
640
641 /* Gets context entry for a given bus and devfn */
642 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
643                 u8 bus, u8 devfn)
644 {
645         struct root_entry *root;
646         struct context_entry *context;
647         unsigned long phy_addr;
648         unsigned long flags;
649
650         spin_lock_irqsave(&iommu->lock, flags);
651         root = &iommu->root_entry[bus];
652         context = get_context_addr_from_root(root);
653         if (!context) {
654                 context = (struct context_entry *)
655                                 alloc_pgtable_page(iommu->node);
656                 if (!context) {
657                         spin_unlock_irqrestore(&iommu->lock, flags);
658                         return NULL;
659                 }
660                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
661                 phy_addr = virt_to_phys((void *)context);
662                 set_root_value(root, phy_addr);
663                 set_root_present(root);
664                 __iommu_flush_cache(iommu, root, sizeof(*root));
665         }
666         spin_unlock_irqrestore(&iommu->lock, flags);
667         return &context[devfn];
668 }
669
670 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
671 {
672         struct root_entry *root;
673         struct context_entry *context;
674         int ret;
675         unsigned long flags;
676
677         spin_lock_irqsave(&iommu->lock, flags);
678         root = &iommu->root_entry[bus];
679         context = get_context_addr_from_root(root);
680         if (!context) {
681                 ret = 0;
682                 goto out;
683         }
684         ret = context_present(&context[devfn]);
685 out:
686         spin_unlock_irqrestore(&iommu->lock, flags);
687         return ret;
688 }
689
690 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
691 {
692         struct root_entry *root;
693         struct context_entry *context;
694         unsigned long flags;
695
696         spin_lock_irqsave(&iommu->lock, flags);
697         root = &iommu->root_entry[bus];
698         context = get_context_addr_from_root(root);
699         if (context) {
700                 context_clear_entry(&context[devfn]);
701                 __iommu_flush_cache(iommu, &context[devfn], \
702                         sizeof(*context));
703         }
704         spin_unlock_irqrestore(&iommu->lock, flags);
705 }
706
707 static void free_context_table(struct intel_iommu *iommu)
708 {
709         struct root_entry *root;
710         int i;
711         unsigned long flags;
712         struct context_entry *context;
713
714         spin_lock_irqsave(&iommu->lock, flags);
715         if (!iommu->root_entry) {
716                 goto out;
717         }
718         for (i = 0; i < ROOT_ENTRY_NR; i++) {
719                 root = &iommu->root_entry[i];
720                 context = get_context_addr_from_root(root);
721                 if (context)
722                         free_pgtable_page(context);
723         }
724         free_pgtable_page(iommu->root_entry);
725         iommu->root_entry = NULL;
726 out:
727         spin_unlock_irqrestore(&iommu->lock, flags);
728 }
729
730 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
731                                       unsigned long pfn, int large_level)
732 {
733         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
734         struct dma_pte *parent, *pte = NULL;
735         int level = agaw_to_level(domain->agaw);
736         int offset, target_level;
737
738         BUG_ON(!domain->pgd);
739         BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
740         parent = domain->pgd;
741
742         /* Search pte */
743         if (!large_level)
744                 target_level = 1;
745         else
746                 target_level = large_level;
747
748         while (level > 0) {
749                 void *tmp_page;
750
751                 offset = pfn_level_offset(pfn, level);
752                 pte = &parent[offset];
753                 if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
754                         break;
755                 if (level == target_level)
756                         break;
757
758                 if (!dma_pte_present(pte)) {
759                         uint64_t pteval;
760
761                         tmp_page = alloc_pgtable_page(domain->nid);
762
763                         if (!tmp_page)
764                                 return NULL;
765
766                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
767                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
768                         if (cmpxchg64(&pte->val, 0ULL, pteval)) {
769                                 /* Someone else set it while we were thinking; use theirs. */
770                                 free_pgtable_page(tmp_page);
771                         } else {
772                                 dma_pte_addr(pte);
773                                 domain_flush_cache(domain, pte, sizeof(*pte));
774                         }
775                 }
776                 parent = phys_to_virt(dma_pte_addr(pte));
777                 level--;
778         }
779
780         return pte;
781 }
782
783
784 /* return address's pte at specific level */
785 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
786                                          unsigned long pfn,
787                                          int level, int *large_page)
788 {
789         struct dma_pte *parent, *pte = NULL;
790         int total = agaw_to_level(domain->agaw);
791         int offset;
792
793         parent = domain->pgd;
794         while (level <= total) {
795                 offset = pfn_level_offset(pfn, total);
796                 pte = &parent[offset];
797                 if (level == total)
798                         return pte;
799
800                 if (!dma_pte_present(pte)) {
801                         *large_page = total;
802                         break;
803                 }
804
805                 if (pte->val & DMA_PTE_LARGE_PAGE) {
806                         *large_page = total;
807                         return pte;
808                 }
809
810                 parent = phys_to_virt(dma_pte_addr(pte));
811                 total--;
812         }
813         return NULL;
814 }
815
816 /* clear last level pte, a tlb flush should be followed */
817 static void dma_pte_clear_range(struct dmar_domain *domain,
818                                 unsigned long start_pfn,
819                                 unsigned long last_pfn)
820 {
821         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
822         unsigned int large_page = 1;
823         struct dma_pte *first_pte, *pte;
824
825         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
826         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
827         BUG_ON(start_pfn > last_pfn);
828
829         /* we don't need lock here; nobody else touches the iova range */
830         do {
831                 large_page = 1;
832                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
833                 if (!pte) {
834                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
835                         continue;
836                 }
837                 do {
838                         dma_clear_pte(pte);
839                         start_pfn += lvl_to_nr_pages(large_page);
840                         pte++;
841                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
842
843                 domain_flush_cache(domain, first_pte,
844                                    (void *)pte - (void *)first_pte);
845
846         } while (start_pfn && start_pfn <= last_pfn);
847 }
848
849 /* free page table pages. last level pte should already be cleared */
850 static void dma_pte_free_pagetable(struct dmar_domain *domain,
851                                    unsigned long start_pfn,
852                                    unsigned long last_pfn)
853 {
854         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
855         struct dma_pte *first_pte, *pte;
856         int total = agaw_to_level(domain->agaw);
857         int level;
858         unsigned long tmp;
859         int large_page = 2;
860
861         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
862         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
863         BUG_ON(start_pfn > last_pfn);
864
865         /* We don't need lock here; nobody else touches the iova range */
866         level = 2;
867         while (level <= total) {
868                 tmp = align_to_level(start_pfn, level);
869
870                 /* If we can't even clear one PTE at this level, we're done */
871                 if (tmp + level_size(level) - 1 > last_pfn)
872                         return;
873
874                 do {
875                         large_page = level;
876                         first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
877                         if (large_page > level)
878                                 level = large_page + 1;
879                         if (!pte) {
880                                 tmp = align_to_level(tmp + 1, level + 1);
881                                 continue;
882                         }
883                         do {
884                                 if (dma_pte_present(pte)) {
885                                         free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
886                                         dma_clear_pte(pte);
887                                 }
888                                 pte++;
889                                 tmp += level_size(level);
890                         } while (!first_pte_in_page(pte) &&
891                                  tmp + level_size(level) - 1 <= last_pfn);
892
893                         domain_flush_cache(domain, first_pte,
894                                            (void *)pte - (void *)first_pte);
895                         
896                 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
897                 level++;
898         }
899         /* free pgd */
900         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
901                 free_pgtable_page(domain->pgd);
902                 domain->pgd = NULL;
903         }
904 }
905
906 /* iommu handling */
907 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
908 {
909         struct root_entry *root;
910         unsigned long flags;
911
912         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
913         if (!root)
914                 return -ENOMEM;
915
916         __iommu_flush_cache(iommu, root, ROOT_SIZE);
917
918         spin_lock_irqsave(&iommu->lock, flags);
919         iommu->root_entry = root;
920         spin_unlock_irqrestore(&iommu->lock, flags);
921
922         return 0;
923 }
924
925 static void iommu_set_root_entry(struct intel_iommu *iommu)
926 {
927         void *addr;
928         u32 sts;
929         unsigned long flag;
930
931         addr = iommu->root_entry;
932
933         spin_lock_irqsave(&iommu->register_lock, flag);
934         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
935
936         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
937
938         /* Make sure hardware complete it */
939         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
940                       readl, (sts & DMA_GSTS_RTPS), sts);
941
942         spin_unlock_irqrestore(&iommu->register_lock, flag);
943 }
944
945 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
946 {
947         u32 val;
948         unsigned long flag;
949
950         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
951                 return;
952
953         spin_lock_irqsave(&iommu->register_lock, flag);
954         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
955
956         /* Make sure hardware complete it */
957         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
958                       readl, (!(val & DMA_GSTS_WBFS)), val);
959
960         spin_unlock_irqrestore(&iommu->register_lock, flag);
961 }
962
963 /* return value determine if we need a write buffer flush */
964 static void __iommu_flush_context(struct intel_iommu *iommu,
965                                   u16 did, u16 source_id, u8 function_mask,
966                                   u64 type)
967 {
968         u64 val = 0;
969         unsigned long flag;
970
971         switch (type) {
972         case DMA_CCMD_GLOBAL_INVL:
973                 val = DMA_CCMD_GLOBAL_INVL;
974                 break;
975         case DMA_CCMD_DOMAIN_INVL:
976                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
977                 break;
978         case DMA_CCMD_DEVICE_INVL:
979                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
980                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
981                 break;
982         default:
983                 BUG();
984         }
985         val |= DMA_CCMD_ICC;
986
987         spin_lock_irqsave(&iommu->register_lock, flag);
988         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
989
990         /* Make sure hardware complete it */
991         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
992                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
993
994         spin_unlock_irqrestore(&iommu->register_lock, flag);
995 }
996
997 /* return value determine if we need a write buffer flush */
998 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
999                                 u64 addr, unsigned int size_order, u64 type)
1000 {
1001         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1002         u64 val = 0, val_iva = 0;
1003         unsigned long flag;
1004
1005         switch (type) {
1006         case DMA_TLB_GLOBAL_FLUSH:
1007                 /* global flush doesn't need set IVA_REG */
1008                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1009                 break;
1010         case DMA_TLB_DSI_FLUSH:
1011                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1012                 break;
1013         case DMA_TLB_PSI_FLUSH:
1014                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1015                 /* Note: always flush non-leaf currently */
1016                 val_iva = size_order | addr;
1017                 break;
1018         default:
1019                 BUG();
1020         }
1021         /* Note: set drain read/write */
1022 #if 0
1023         /*
1024          * This is probably to be super secure.. Looks like we can
1025          * ignore it without any impact.
1026          */
1027         if (cap_read_drain(iommu->cap))
1028                 val |= DMA_TLB_READ_DRAIN;
1029 #endif
1030         if (cap_write_drain(iommu->cap))
1031                 val |= DMA_TLB_WRITE_DRAIN;
1032
1033         spin_lock_irqsave(&iommu->register_lock, flag);
1034         /* Note: Only uses first TLB reg currently */
1035         if (val_iva)
1036                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1037         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1038
1039         /* Make sure hardware complete it */
1040         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1041                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1042
1043         spin_unlock_irqrestore(&iommu->register_lock, flag);
1044
1045         /* check IOTLB invalidation granularity */
1046         if (DMA_TLB_IAIG(val) == 0)
1047                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1048         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1049                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1050                         (unsigned long long)DMA_TLB_IIRG(type),
1051                         (unsigned long long)DMA_TLB_IAIG(val));
1052 }
1053
1054 static struct device_domain_info *iommu_support_dev_iotlb(
1055         struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1056 {
1057         int found = 0;
1058         unsigned long flags;
1059         struct device_domain_info *info;
1060         struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1061
1062         if (!ecap_dev_iotlb_support(iommu->ecap))
1063                 return NULL;
1064
1065         if (!iommu->qi)
1066                 return NULL;
1067
1068         spin_lock_irqsave(&device_domain_lock, flags);
1069         list_for_each_entry(info, &domain->devices, link)
1070                 if (info->bus == bus && info->devfn == devfn) {
1071                         found = 1;
1072                         break;
1073                 }
1074         spin_unlock_irqrestore(&device_domain_lock, flags);
1075
1076         if (!found || !info->dev)
1077                 return NULL;
1078
1079         if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1080                 return NULL;
1081
1082         if (!dmar_find_matched_atsr_unit(info->dev))
1083                 return NULL;
1084
1085         info->iommu = iommu;
1086
1087         return info;
1088 }
1089
1090 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1091 {
1092         if (!info)
1093                 return;
1094
1095         pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1096 }
1097
1098 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1099 {
1100         if (!info->dev || !pci_ats_enabled(info->dev))
1101                 return;
1102
1103         pci_disable_ats(info->dev);
1104 }
1105
1106 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1107                                   u64 addr, unsigned mask)
1108 {
1109         u16 sid, qdep;
1110         unsigned long flags;
1111         struct device_domain_info *info;
1112
1113         spin_lock_irqsave(&device_domain_lock, flags);
1114         list_for_each_entry(info, &domain->devices, link) {
1115                 if (!info->dev || !pci_ats_enabled(info->dev))
1116                         continue;
1117
1118                 sid = info->bus << 8 | info->devfn;
1119                 qdep = pci_ats_queue_depth(info->dev);
1120                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1121         }
1122         spin_unlock_irqrestore(&device_domain_lock, flags);
1123 }
1124
1125 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1126                                   unsigned long pfn, unsigned int pages, int map)
1127 {
1128         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1129         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1130
1131         BUG_ON(pages == 0);
1132
1133         /*
1134          * Fallback to domain selective flush if no PSI support or the size is
1135          * too big.
1136          * PSI requires page size to be 2 ^ x, and the base address is naturally
1137          * aligned to the size
1138          */
1139         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1140                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1141                                                 DMA_TLB_DSI_FLUSH);
1142         else
1143                 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1144                                                 DMA_TLB_PSI_FLUSH);
1145
1146         /*
1147          * In caching mode, changes of pages from non-present to present require
1148          * flush. However, device IOTLB doesn't need to be flushed in this case.
1149          */
1150         if (!cap_caching_mode(iommu->cap) || !map)
1151                 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1152 }
1153
1154 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1155 {
1156         u32 pmen;
1157         unsigned long flags;
1158
1159         spin_lock_irqsave(&iommu->register_lock, flags);
1160         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1161         pmen &= ~DMA_PMEN_EPM;
1162         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1163
1164         /* wait for the protected region status bit to clear */
1165         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1166                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1167
1168         spin_unlock_irqrestore(&iommu->register_lock, flags);
1169 }
1170
1171 static int iommu_enable_translation(struct intel_iommu *iommu)
1172 {
1173         u32 sts;
1174         unsigned long flags;
1175
1176         spin_lock_irqsave(&iommu->register_lock, flags);
1177         iommu->gcmd |= DMA_GCMD_TE;
1178         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1179
1180         /* Make sure hardware complete it */
1181         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1182                       readl, (sts & DMA_GSTS_TES), sts);
1183
1184         spin_unlock_irqrestore(&iommu->register_lock, flags);
1185         return 0;
1186 }
1187
1188 static int iommu_disable_translation(struct intel_iommu *iommu)
1189 {
1190         u32 sts;
1191         unsigned long flag;
1192
1193         spin_lock_irqsave(&iommu->register_lock, flag);
1194         iommu->gcmd &= ~DMA_GCMD_TE;
1195         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1196
1197         /* Make sure hardware complete it */
1198         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1199                       readl, (!(sts & DMA_GSTS_TES)), sts);
1200
1201         spin_unlock_irqrestore(&iommu->register_lock, flag);
1202         return 0;
1203 }
1204
1205
1206 static int iommu_init_domains(struct intel_iommu *iommu)
1207 {
1208         unsigned long ndomains;
1209         unsigned long nlongs;
1210
1211         ndomains = cap_ndoms(iommu->cap);
1212         pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu->seq_id,
1213                         ndomains);
1214         nlongs = BITS_TO_LONGS(ndomains);
1215
1216         spin_lock_init(&iommu->lock);
1217
1218         /* TBD: there might be 64K domains,
1219          * consider other allocation for future chip
1220          */
1221         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1222         if (!iommu->domain_ids) {
1223                 printk(KERN_ERR "Allocating domain id array failed\n");
1224                 return -ENOMEM;
1225         }
1226         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1227                         GFP_KERNEL);
1228         if (!iommu->domains) {
1229                 printk(KERN_ERR "Allocating domain array failed\n");
1230                 return -ENOMEM;
1231         }
1232
1233         /*
1234          * if Caching mode is set, then invalid translations are tagged
1235          * with domainid 0. Hence we need to pre-allocate it.
1236          */
1237         if (cap_caching_mode(iommu->cap))
1238                 set_bit(0, iommu->domain_ids);
1239         return 0;
1240 }
1241
1242
1243 static void domain_exit(struct dmar_domain *domain);
1244 static void vm_domain_exit(struct dmar_domain *domain);
1245
1246 void free_dmar_iommu(struct intel_iommu *iommu)
1247 {
1248         struct dmar_domain *domain;
1249         int i;
1250         unsigned long flags;
1251
1252         if ((iommu->domains) && (iommu->domain_ids)) {
1253                 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1254                         domain = iommu->domains[i];
1255                         clear_bit(i, iommu->domain_ids);
1256
1257                         spin_lock_irqsave(&domain->iommu_lock, flags);
1258                         if (--domain->iommu_count == 0) {
1259                                 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1260                                         vm_domain_exit(domain);
1261                                 else
1262                                         domain_exit(domain);
1263                         }
1264                         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1265                 }
1266         }
1267
1268         if (iommu->gcmd & DMA_GCMD_TE)
1269                 iommu_disable_translation(iommu);
1270
1271         if (iommu->irq) {
1272                 irq_set_handler_data(iommu->irq, NULL);
1273                 /* This will mask the irq */
1274                 free_irq(iommu->irq, iommu);
1275                 destroy_irq(iommu->irq);
1276         }
1277
1278         kfree(iommu->domains);
1279         kfree(iommu->domain_ids);
1280
1281         g_iommus[iommu->seq_id] = NULL;
1282
1283         /* if all iommus are freed, free g_iommus */
1284         for (i = 0; i < g_num_of_iommus; i++) {
1285                 if (g_iommus[i])
1286                         break;
1287         }
1288
1289         if (i == g_num_of_iommus)
1290                 kfree(g_iommus);
1291
1292         /* free context mapping */
1293         free_context_table(iommu);
1294 }
1295
1296 static struct dmar_domain *alloc_domain(void)
1297 {
1298         struct dmar_domain *domain;
1299
1300         domain = alloc_domain_mem();
1301         if (!domain)
1302                 return NULL;
1303
1304         domain->nid = -1;
1305         memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1306         domain->flags = 0;
1307
1308         return domain;
1309 }
1310
1311 static int iommu_attach_domain(struct dmar_domain *domain,
1312                                struct intel_iommu *iommu)
1313 {
1314         int num;
1315         unsigned long ndomains;
1316         unsigned long flags;
1317
1318         ndomains = cap_ndoms(iommu->cap);
1319
1320         spin_lock_irqsave(&iommu->lock, flags);
1321
1322         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1323         if (num >= ndomains) {
1324                 spin_unlock_irqrestore(&iommu->lock, flags);
1325                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1326                 return -ENOMEM;
1327         }
1328
1329         domain->id = num;
1330         set_bit(num, iommu->domain_ids);
1331         set_bit(iommu->seq_id, &domain->iommu_bmp);
1332         iommu->domains[num] = domain;
1333         spin_unlock_irqrestore(&iommu->lock, flags);
1334
1335         return 0;
1336 }
1337
1338 static void iommu_detach_domain(struct dmar_domain *domain,
1339                                 struct intel_iommu *iommu)
1340 {
1341         unsigned long flags;
1342         int num, ndomains;
1343         int found = 0;
1344
1345         spin_lock_irqsave(&iommu->lock, flags);
1346         ndomains = cap_ndoms(iommu->cap);
1347         for_each_set_bit(num, iommu->domain_ids, ndomains) {
1348                 if (iommu->domains[num] == domain) {
1349                         found = 1;
1350                         break;
1351                 }
1352         }
1353
1354         if (found) {
1355                 clear_bit(num, iommu->domain_ids);
1356                 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1357                 iommu->domains[num] = NULL;
1358         }
1359         spin_unlock_irqrestore(&iommu->lock, flags);
1360 }
1361
1362 static struct iova_domain reserved_iova_list;
1363 static struct lock_class_key reserved_rbtree_key;
1364
1365 static int dmar_init_reserved_ranges(void)
1366 {
1367         struct pci_dev *pdev = NULL;
1368         struct iova *iova;
1369         int i;
1370
1371         init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1372
1373         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1374                 &reserved_rbtree_key);
1375
1376         /* IOAPIC ranges shouldn't be accessed by DMA */
1377         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1378                 IOVA_PFN(IOAPIC_RANGE_END));
1379         if (!iova) {
1380                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1381                 return -ENODEV;
1382         }
1383
1384         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1385         for_each_pci_dev(pdev) {
1386                 struct resource *r;
1387
1388                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1389                         r = &pdev->resource[i];
1390                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1391                                 continue;
1392                         iova = reserve_iova(&reserved_iova_list,
1393                                             IOVA_PFN(r->start),
1394                                             IOVA_PFN(r->end));
1395                         if (!iova) {
1396                                 printk(KERN_ERR "Reserve iova failed\n");
1397                                 return -ENODEV;
1398                         }
1399                 }
1400         }
1401         return 0;
1402 }
1403
1404 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1405 {
1406         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1407 }
1408
1409 static inline int guestwidth_to_adjustwidth(int gaw)
1410 {
1411         int agaw;
1412         int r = (gaw - 12) % 9;
1413
1414         if (r == 0)
1415                 agaw = gaw;
1416         else
1417                 agaw = gaw + 9 - r;
1418         if (agaw > 64)
1419                 agaw = 64;
1420         return agaw;
1421 }
1422
1423 static int domain_init(struct dmar_domain *domain, int guest_width)
1424 {
1425         struct intel_iommu *iommu;
1426         int adjust_width, agaw;
1427         unsigned long sagaw;
1428
1429         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1430         spin_lock_init(&domain->iommu_lock);
1431
1432         domain_reserve_special_ranges(domain);
1433
1434         /* calculate AGAW */
1435         iommu = domain_get_iommu(domain);
1436         if (guest_width > cap_mgaw(iommu->cap))
1437                 guest_width = cap_mgaw(iommu->cap);
1438         domain->gaw = guest_width;
1439         adjust_width = guestwidth_to_adjustwidth(guest_width);
1440         agaw = width_to_agaw(adjust_width);
1441         sagaw = cap_sagaw(iommu->cap);
1442         if (!test_bit(agaw, &sagaw)) {
1443                 /* hardware doesn't support it, choose a bigger one */
1444                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1445                 agaw = find_next_bit(&sagaw, 5, agaw);
1446                 if (agaw >= 5)
1447                         return -ENODEV;
1448         }
1449         domain->agaw = agaw;
1450         INIT_LIST_HEAD(&domain->devices);
1451
1452         if (ecap_coherent(iommu->ecap))
1453                 domain->iommu_coherency = 1;
1454         else
1455                 domain->iommu_coherency = 0;
1456
1457         if (ecap_sc_support(iommu->ecap))
1458                 domain->iommu_snooping = 1;
1459         else
1460                 domain->iommu_snooping = 0;
1461
1462         domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1463         domain->iommu_count = 1;
1464         domain->nid = iommu->node;
1465
1466         /* always allocate the top pgd */
1467         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1468         if (!domain->pgd)
1469                 return -ENOMEM;
1470         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1471         return 0;
1472 }
1473
1474 static void domain_exit(struct dmar_domain *domain)
1475 {
1476         struct dmar_drhd_unit *drhd;
1477         struct intel_iommu *iommu;
1478
1479         /* Domain 0 is reserved, so dont process it */
1480         if (!domain)
1481                 return;
1482
1483         /* Flush any lazy unmaps that may reference this domain */
1484         if (!intel_iommu_strict)
1485                 flush_unmaps_timeout(0);
1486
1487         domain_remove_dev_info(domain);
1488         /* destroy iovas */
1489         put_iova_domain(&domain->iovad);
1490
1491         /* clear ptes */
1492         dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1493
1494         /* free page tables */
1495         dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1496
1497         for_each_active_iommu(iommu, drhd)
1498                 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1499                         iommu_detach_domain(domain, iommu);
1500
1501         free_domain_mem(domain);
1502 }
1503
1504 static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1505                                  u8 bus, u8 devfn, int translation)
1506 {
1507         struct context_entry *context;
1508         unsigned long flags;
1509         struct intel_iommu *iommu;
1510         struct dma_pte *pgd;
1511         unsigned long num;
1512         unsigned long ndomains;
1513         int id;
1514         int agaw;
1515         struct device_domain_info *info = NULL;
1516
1517         pr_debug("Set context mapping for %02x:%02x.%d\n",
1518                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1519
1520         BUG_ON(!domain->pgd);
1521         BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1522                translation != CONTEXT_TT_MULTI_LEVEL);
1523
1524         iommu = device_to_iommu(segment, bus, devfn);
1525         if (!iommu)
1526                 return -ENODEV;
1527
1528         context = device_to_context_entry(iommu, bus, devfn);
1529         if (!context)
1530                 return -ENOMEM;
1531         spin_lock_irqsave(&iommu->lock, flags);
1532         if (context_present(context)) {
1533                 spin_unlock_irqrestore(&iommu->lock, flags);
1534                 return 0;
1535         }
1536
1537         id = domain->id;
1538         pgd = domain->pgd;
1539
1540         if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1541             domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1542                 int found = 0;
1543
1544                 /* find an available domain id for this device in iommu */
1545                 ndomains = cap_ndoms(iommu->cap);
1546                 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1547                         if (iommu->domains[num] == domain) {
1548                                 id = num;
1549                                 found = 1;
1550                                 break;
1551                         }
1552                 }
1553
1554                 if (found == 0) {
1555                         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1556                         if (num >= ndomains) {
1557                                 spin_unlock_irqrestore(&iommu->lock, flags);
1558                                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1559                                 return -EFAULT;
1560                         }
1561
1562                         set_bit(num, iommu->domain_ids);
1563                         iommu->domains[num] = domain;
1564                         id = num;
1565                 }
1566
1567                 /* Skip top levels of page tables for
1568                  * iommu which has less agaw than default.
1569                  * Unnecessary for PT mode.
1570                  */
1571                 if (translation != CONTEXT_TT_PASS_THROUGH) {
1572                         for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1573                                 pgd = phys_to_virt(dma_pte_addr(pgd));
1574                                 if (!dma_pte_present(pgd)) {
1575                                         spin_unlock_irqrestore(&iommu->lock, flags);
1576                                         return -ENOMEM;
1577                                 }
1578                         }
1579                 }
1580         }
1581
1582         context_set_domain_id(context, id);
1583
1584         if (translation != CONTEXT_TT_PASS_THROUGH) {
1585                 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1586                 translation = info ? CONTEXT_TT_DEV_IOTLB :
1587                                      CONTEXT_TT_MULTI_LEVEL;
1588         }
1589         /*
1590          * In pass through mode, AW must be programmed to indicate the largest
1591          * AGAW value supported by hardware. And ASR is ignored by hardware.
1592          */
1593         if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1594                 context_set_address_width(context, iommu->msagaw);
1595         else {
1596                 context_set_address_root(context, virt_to_phys(pgd));
1597                 context_set_address_width(context, iommu->agaw);
1598         }
1599
1600         context_set_translation_type(context, translation);
1601         context_set_fault_enable(context);
1602         context_set_present(context);
1603         domain_flush_cache(domain, context, sizeof(*context));
1604
1605         /*
1606          * It's a non-present to present mapping. If hardware doesn't cache
1607          * non-present entry we only need to flush the write-buffer. If the
1608          * _does_ cache non-present entries, then it does so in the special
1609          * domain #0, which we have to flush:
1610          */
1611         if (cap_caching_mode(iommu->cap)) {
1612                 iommu->flush.flush_context(iommu, 0,
1613                                            (((u16)bus) << 8) | devfn,
1614                                            DMA_CCMD_MASK_NOBIT,
1615                                            DMA_CCMD_DEVICE_INVL);
1616                 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1617         } else {
1618                 iommu_flush_write_buffer(iommu);
1619         }
1620         iommu_enable_dev_iotlb(info);
1621         spin_unlock_irqrestore(&iommu->lock, flags);
1622
1623         spin_lock_irqsave(&domain->iommu_lock, flags);
1624         if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1625                 domain->iommu_count++;
1626                 if (domain->iommu_count == 1)
1627                         domain->nid = iommu->node;
1628                 domain_update_iommu_cap(domain);
1629         }
1630         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1631         return 0;
1632 }
1633
1634 static int
1635 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1636                         int translation)
1637 {
1638         int ret;
1639         struct pci_dev *tmp, *parent;
1640
1641         ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1642                                          pdev->bus->number, pdev->devfn,
1643                                          translation);
1644         if (ret)
1645                 return ret;
1646
1647         /* dependent device mapping */
1648         tmp = pci_find_upstream_pcie_bridge(pdev);
1649         if (!tmp)
1650                 return 0;
1651         /* Secondary interface's bus number and devfn 0 */
1652         parent = pdev->bus->self;
1653         while (parent != tmp) {
1654                 ret = domain_context_mapping_one(domain,
1655                                                  pci_domain_nr(parent->bus),
1656                                                  parent->bus->number,
1657                                                  parent->devfn, translation);
1658                 if (ret)
1659                         return ret;
1660                 parent = parent->bus->self;
1661         }
1662         if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
1663                 return domain_context_mapping_one(domain,
1664                                         pci_domain_nr(tmp->subordinate),
1665                                         tmp->subordinate->number, 0,
1666                                         translation);
1667         else /* this is a legacy PCI bridge */
1668                 return domain_context_mapping_one(domain,
1669                                                   pci_domain_nr(tmp->bus),
1670                                                   tmp->bus->number,
1671                                                   tmp->devfn,
1672                                                   translation);
1673 }
1674
1675 static int domain_context_mapped(struct pci_dev *pdev)
1676 {
1677         int ret;
1678         struct pci_dev *tmp, *parent;
1679         struct intel_iommu *iommu;
1680
1681         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1682                                 pdev->devfn);
1683         if (!iommu)
1684                 return -ENODEV;
1685
1686         ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1687         if (!ret)
1688                 return ret;
1689         /* dependent device mapping */
1690         tmp = pci_find_upstream_pcie_bridge(pdev);
1691         if (!tmp)
1692                 return ret;
1693         /* Secondary interface's bus number and devfn 0 */
1694         parent = pdev->bus->self;
1695         while (parent != tmp) {
1696                 ret = device_context_mapped(iommu, parent->bus->number,
1697                                             parent->devfn);
1698                 if (!ret)
1699                         return ret;
1700                 parent = parent->bus->self;
1701         }
1702         if (pci_is_pcie(tmp))
1703                 return device_context_mapped(iommu, tmp->subordinate->number,
1704                                              0);
1705         else
1706                 return device_context_mapped(iommu, tmp->bus->number,
1707                                              tmp->devfn);
1708 }
1709
1710 /* Returns a number of VTD pages, but aligned to MM page size */
1711 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1712                                             size_t size)
1713 {
1714         host_addr &= ~PAGE_MASK;
1715         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1716 }
1717
1718 /* Return largest possible superpage level for a given mapping */
1719 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1720                                           unsigned long iov_pfn,
1721                                           unsigned long phy_pfn,
1722                                           unsigned long pages)
1723 {
1724         int support, level = 1;
1725         unsigned long pfnmerge;
1726
1727         support = domain->iommu_superpage;
1728
1729         /* To use a large page, the virtual *and* physical addresses
1730            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1731            of them will mean we have to use smaller pages. So just
1732            merge them and check both at once. */
1733         pfnmerge = iov_pfn | phy_pfn;
1734
1735         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1736                 pages >>= VTD_STRIDE_SHIFT;
1737                 if (!pages)
1738                         break;
1739                 pfnmerge >>= VTD_STRIDE_SHIFT;
1740                 level++;
1741                 support--;
1742         }
1743         return level;
1744 }
1745
1746 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1747                             struct scatterlist *sg, unsigned long phys_pfn,
1748                             unsigned long nr_pages, int prot)
1749 {
1750         struct dma_pte *first_pte = NULL, *pte = NULL;
1751         phys_addr_t uninitialized_var(pteval);
1752         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1753         unsigned long sg_res;
1754         unsigned int largepage_lvl = 0;
1755         unsigned long lvl_pages = 0;
1756
1757         BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1758
1759         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1760                 return -EINVAL;
1761
1762         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1763
1764         if (sg)
1765                 sg_res = 0;
1766         else {
1767                 sg_res = nr_pages + 1;
1768                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1769         }
1770
1771         while (nr_pages > 0) {
1772                 uint64_t tmp;
1773
1774                 if (!sg_res) {
1775                         sg_res = aligned_nrpages(sg->offset, sg->length);
1776                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1777                         sg->dma_length = sg->length;
1778                         pteval = page_to_phys(sg_page(sg)) | prot;
1779                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
1780                 }
1781
1782                 if (!pte) {
1783                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1784
1785                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1786                         if (!pte)
1787                                 return -ENOMEM;
1788                         /* It is large page*/
1789                         if (largepage_lvl > 1)
1790                                 pteval |= DMA_PTE_LARGE_PAGE;
1791                         else
1792                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1793
1794                 }
1795                 /* We don't need lock here, nobody else
1796                  * touches the iova range
1797                  */
1798                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1799                 if (tmp) {
1800                         static int dumps = 5;
1801                         printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1802                                iov_pfn, tmp, (unsigned long long)pteval);
1803                         if (dumps) {
1804                                 dumps--;
1805                                 debug_dma_dump_mappings(NULL);
1806                         }
1807                         WARN_ON(1);
1808                 }
1809
1810                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1811
1812                 BUG_ON(nr_pages < lvl_pages);
1813                 BUG_ON(sg_res < lvl_pages);
1814
1815                 nr_pages -= lvl_pages;
1816                 iov_pfn += lvl_pages;
1817                 phys_pfn += lvl_pages;
1818                 pteval += lvl_pages * VTD_PAGE_SIZE;
1819                 sg_res -= lvl_pages;
1820
1821                 /* If the next PTE would be the first in a new page, then we
1822                    need to flush the cache on the entries we've just written.
1823                    And then we'll need to recalculate 'pte', so clear it and
1824                    let it get set again in the if (!pte) block above.
1825
1826                    If we're done (!nr_pages) we need to flush the cache too.
1827
1828                    Also if we've been setting superpages, we may need to
1829                    recalculate 'pte' and switch back to smaller pages for the
1830                    end of the mapping, if the trailing size is not enough to
1831                    use another superpage (i.e. sg_res < lvl_pages). */
1832                 pte++;
1833                 if (!nr_pages || first_pte_in_page(pte) ||
1834                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
1835                         domain_flush_cache(domain, first_pte,
1836                                            (void *)pte - (void *)first_pte);
1837                         pte = NULL;
1838                 }
1839
1840                 if (!sg_res && nr_pages)
1841                         sg = sg_next(sg);
1842         }
1843         return 0;
1844 }
1845
1846 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1847                                     struct scatterlist *sg, unsigned long nr_pages,
1848                                     int prot)
1849 {
1850         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1851 }
1852
1853 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1854                                      unsigned long phys_pfn, unsigned long nr_pages,
1855                                      int prot)
1856 {
1857         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1858 }
1859
1860 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1861 {
1862         if (!iommu)
1863                 return;
1864
1865         clear_context_table(iommu, bus, devfn);
1866         iommu->flush.flush_context(iommu, 0, 0, 0,
1867                                            DMA_CCMD_GLOBAL_INVL);
1868         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1869 }
1870
1871 static void domain_remove_dev_info(struct dmar_domain *domain)
1872 {
1873         struct device_domain_info *info;
1874         unsigned long flags;
1875         struct intel_iommu *iommu;
1876
1877         spin_lock_irqsave(&device_domain_lock, flags);
1878         while (!list_empty(&domain->devices)) {
1879                 info = list_entry(domain->devices.next,
1880                         struct device_domain_info, link);
1881                 list_del(&info->link);
1882                 list_del(&info->global);
1883                 if (info->dev)
1884                         info->dev->dev.archdata.iommu = NULL;
1885                 spin_unlock_irqrestore(&device_domain_lock, flags);
1886
1887                 iommu_disable_dev_iotlb(info);
1888                 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1889                 iommu_detach_dev(iommu, info->bus, info->devfn);
1890                 free_devinfo_mem(info);
1891
1892                 spin_lock_irqsave(&device_domain_lock, flags);
1893         }
1894         spin_unlock_irqrestore(&device_domain_lock, flags);
1895 }
1896
1897 /*
1898  * find_domain
1899  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1900  */
1901 static struct dmar_domain *
1902 find_domain(struct pci_dev *pdev)
1903 {
1904         struct device_domain_info *info;
1905
1906         /* No lock here, assumes no domain exit in normal case */
1907         info = pdev->dev.archdata.iommu;
1908         if (info)
1909                 return info->domain;
1910         return NULL;
1911 }
1912
1913 /* domain is initialized */
1914 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1915 {
1916         struct dmar_domain *domain, *found = NULL;
1917         struct intel_iommu *iommu;
1918         struct dmar_drhd_unit *drhd;
1919         struct device_domain_info *info, *tmp;
1920         struct pci_dev *dev_tmp;
1921         unsigned long flags;
1922         int bus = 0, devfn = 0;
1923         int segment;
1924         int ret;
1925
1926         domain = find_domain(pdev);
1927         if (domain)
1928                 return domain;
1929
1930         segment = pci_domain_nr(pdev->bus);
1931
1932         dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1933         if (dev_tmp) {
1934                 if (pci_is_pcie(dev_tmp)) {
1935                         bus = dev_tmp->subordinate->number;
1936                         devfn = 0;
1937                 } else {
1938                         bus = dev_tmp->bus->number;
1939                         devfn = dev_tmp->devfn;
1940                 }
1941                 spin_lock_irqsave(&device_domain_lock, flags);
1942                 list_for_each_entry(info, &device_domain_list, global) {
1943                         if (info->segment == segment &&
1944                             info->bus == bus && info->devfn == devfn) {
1945                                 found = info->domain;
1946                                 break;
1947                         }
1948                 }
1949                 spin_unlock_irqrestore(&device_domain_lock, flags);
1950                 /* pcie-pci bridge already has a domain, uses it */
1951                 if (found) {
1952                         domain = found;
1953                         goto found_domain;
1954                 }
1955         }
1956
1957         domain = alloc_domain();
1958         if (!domain)
1959                 goto error;
1960
1961         /* Allocate new domain for the device */
1962         drhd = dmar_find_matched_drhd_unit(pdev);
1963         if (!drhd) {
1964                 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1965                         pci_name(pdev));
1966                 return NULL;
1967         }
1968         iommu = drhd->iommu;
1969
1970         ret = iommu_attach_domain(domain, iommu);
1971         if (ret) {
1972                 free_domain_mem(domain);
1973                 goto error;
1974         }
1975
1976         if (domain_init(domain, gaw)) {
1977                 domain_exit(domain);
1978                 goto error;
1979         }
1980
1981         /* register pcie-to-pci device */
1982         if (dev_tmp) {
1983                 info = alloc_devinfo_mem();
1984                 if (!info) {
1985                         domain_exit(domain);
1986                         goto error;
1987                 }
1988                 info->segment = segment;
1989                 info->bus = bus;
1990                 info->devfn = devfn;
1991                 info->dev = NULL;
1992                 info->domain = domain;
1993                 /* This domain is shared by devices under p2p bridge */
1994                 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
1995
1996                 /* pcie-to-pci bridge already has a domain, uses it */
1997                 found = NULL;
1998                 spin_lock_irqsave(&device_domain_lock, flags);
1999                 list_for_each_entry(tmp, &device_domain_list, global) {
2000                         if (tmp->segment == segment &&
2001                             tmp->bus == bus && tmp->devfn == devfn) {
2002                                 found = tmp->domain;
2003                                 break;
2004                         }
2005                 }
2006                 if (found) {
2007                         spin_unlock_irqrestore(&device_domain_lock, flags);
2008                         free_devinfo_mem(info);
2009                         domain_exit(domain);
2010                         domain = found;
2011                 } else {
2012                         list_add(&info->link, &domain->devices);
2013                         list_add(&info->global, &device_domain_list);
2014                         spin_unlock_irqrestore(&device_domain_lock, flags);
2015                 }
2016         }
2017
2018 found_domain:
2019         info = alloc_devinfo_mem();
2020         if (!info)
2021                 goto error;
2022         info->segment = segment;
2023         info->bus = pdev->bus->number;
2024         info->devfn = pdev->devfn;
2025         info->dev = pdev;
2026         info->domain = domain;
2027         spin_lock_irqsave(&device_domain_lock, flags);
2028         /* somebody is fast */
2029         found = find_domain(pdev);
2030         if (found != NULL) {
2031                 spin_unlock_irqrestore(&device_domain_lock, flags);
2032                 if (found != domain) {
2033                         domain_exit(domain);
2034                         domain = found;
2035                 }
2036                 free_devinfo_mem(info);
2037                 return domain;
2038         }
2039         list_add(&info->link, &domain->devices);
2040         list_add(&info->global, &device_domain_list);
2041         pdev->dev.archdata.iommu = info;
2042         spin_unlock_irqrestore(&device_domain_lock, flags);
2043         return domain;
2044 error:
2045         /* recheck it here, maybe others set it */
2046         return find_domain(pdev);
2047 }
2048
2049 static int iommu_identity_mapping;
2050 #define IDENTMAP_ALL            1
2051 #define IDENTMAP_GFX            2
2052 #define IDENTMAP_AZALIA         4
2053
2054 static int iommu_domain_identity_map(struct dmar_domain *domain,
2055                                      unsigned long long start,
2056                                      unsigned long long end)
2057 {
2058         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2059         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2060
2061         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2062                           dma_to_mm_pfn(last_vpfn))) {
2063                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2064                 return -ENOMEM;
2065         }
2066
2067         pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2068                  start, end, domain->id);
2069         /*
2070          * RMRR range might have overlap with physical memory range,
2071          * clear it first
2072          */
2073         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2074
2075         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2076                                   last_vpfn - first_vpfn + 1,
2077                                   DMA_PTE_READ|DMA_PTE_WRITE);
2078 }
2079
2080 static int iommu_prepare_identity_map(struct pci_dev *pdev,
2081                                       unsigned long long start,
2082                                       unsigned long long end)
2083 {
2084         struct dmar_domain *domain;
2085         int ret;
2086
2087         domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2088         if (!domain)
2089                 return -ENOMEM;
2090
2091         /* For _hardware_ passthrough, don't bother. But for software
2092            passthrough, we do it anyway -- it may indicate a memory
2093            range which is reserved in E820, so which didn't get set
2094            up to start with in si_domain */
2095         if (domain == si_domain && hw_pass_through) {
2096                 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2097                        pci_name(pdev), start, end);
2098                 return 0;
2099         }
2100
2101         printk(KERN_INFO
2102                "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2103                pci_name(pdev), start, end);
2104         
2105         if (end < start) {
2106                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2107                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2108                         dmi_get_system_info(DMI_BIOS_VENDOR),
2109                         dmi_get_system_info(DMI_BIOS_VERSION),
2110                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2111                 ret = -EIO;
2112                 goto error;
2113         }
2114
2115         if (end >> agaw_to_width(domain->agaw)) {
2116                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2117                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2118                      agaw_to_width(domain->agaw),
2119                      dmi_get_system_info(DMI_BIOS_VENDOR),
2120                      dmi_get_system_info(DMI_BIOS_VERSION),
2121                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2122                 ret = -EIO;
2123                 goto error;
2124         }
2125
2126         ret = iommu_domain_identity_map(domain, start, end);
2127         if (ret)
2128                 goto error;
2129
2130         /* context entry init */
2131         ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2132         if (ret)
2133                 goto error;
2134
2135         return 0;
2136
2137  error:
2138         domain_exit(domain);
2139         return ret;
2140 }
2141
2142 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2143         struct pci_dev *pdev)
2144 {
2145         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2146                 return 0;
2147         return iommu_prepare_identity_map(pdev, rmrr->base_address,
2148                 rmrr->end_address + 1);
2149 }
2150
2151 #ifdef CONFIG_DMAR_FLOPPY_WA
2152 static inline void iommu_prepare_isa(void)
2153 {
2154         struct pci_dev *pdev;
2155         int ret;
2156
2157         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2158         if (!pdev)
2159                 return;
2160
2161         printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2162         ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
2163
2164         if (ret)
2165                 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2166                        "floppy might not work\n");
2167
2168 }
2169 #else
2170 static inline void iommu_prepare_isa(void)
2171 {
2172         return;
2173 }
2174 #endif /* !CONFIG_DMAR_FLPY_WA */
2175
2176 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2177
2178 static int __init si_domain_work_fn(unsigned long start_pfn,
2179                                     unsigned long end_pfn, void *datax)
2180 {
2181         int *ret = datax;
2182
2183         *ret = iommu_domain_identity_map(si_domain,
2184                                          (uint64_t)start_pfn << PAGE_SHIFT,
2185                                          (uint64_t)end_pfn << PAGE_SHIFT);
2186         return *ret;
2187
2188 }
2189
2190 static int __init si_domain_init(int hw)
2191 {
2192         struct dmar_drhd_unit *drhd;
2193         struct intel_iommu *iommu;
2194         int nid, ret = 0;
2195
2196         si_domain = alloc_domain();
2197         if (!si_domain)
2198                 return -EFAULT;
2199
2200         pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2201
2202         for_each_active_iommu(iommu, drhd) {
2203                 ret = iommu_attach_domain(si_domain, iommu);
2204                 if (ret) {
2205                         domain_exit(si_domain);
2206                         return -EFAULT;
2207                 }
2208         }
2209
2210         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2211                 domain_exit(si_domain);
2212                 return -EFAULT;
2213         }
2214
2215         si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2216
2217         if (hw)
2218                 return 0;
2219
2220         for_each_online_node(nid) {
2221                 work_with_active_regions(nid, si_domain_work_fn, &ret);
2222                 if (ret)
2223                         return ret;
2224         }
2225
2226         return 0;
2227 }
2228
2229 static void domain_remove_one_dev_info(struct dmar_domain *domain,
2230                                           struct pci_dev *pdev);
2231 static int identity_mapping(struct pci_dev *pdev)
2232 {
2233         struct device_domain_info *info;
2234
2235         if (likely(!iommu_identity_mapping))
2236                 return 0;
2237
2238         info = pdev->dev.archdata.iommu;
2239         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2240                 return (info->domain == si_domain);
2241
2242         return 0;
2243 }
2244
2245 static int domain_add_dev_info(struct dmar_domain *domain,
2246                                struct pci_dev *pdev,
2247                                int translation)
2248 {
2249         struct device_domain_info *info;
2250         unsigned long flags;
2251         int ret;
2252
2253         info = alloc_devinfo_mem();
2254         if (!info)
2255                 return -ENOMEM;
2256
2257         ret = domain_context_mapping(domain, pdev, translation);
2258         if (ret) {
2259                 free_devinfo_mem(info);
2260                 return ret;
2261         }
2262
2263         info->segment = pci_domain_nr(pdev->bus);
2264         info->bus = pdev->bus->number;
2265         info->devfn = pdev->devfn;
2266         info->dev = pdev;
2267         info->domain = domain;
2268
2269         spin_lock_irqsave(&device_domain_lock, flags);
2270         list_add(&info->link, &domain->devices);
2271         list_add(&info->global, &device_domain_list);
2272         pdev->dev.archdata.iommu = info;
2273         spin_unlock_irqrestore(&device_domain_lock, flags);
2274
2275         return 0;
2276 }
2277
2278 static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2279 {
2280         if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2281                 return 1;
2282
2283         if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2284                 return 1;
2285
2286         if (!(iommu_identity_mapping & IDENTMAP_ALL))
2287                 return 0;
2288
2289         /*
2290          * We want to start off with all devices in the 1:1 domain, and
2291          * take them out later if we find they can't access all of memory.
2292          *
2293          * However, we can't do this for PCI devices behind bridges,
2294          * because all PCI devices behind the same bridge will end up
2295          * with the same source-id on their transactions.
2296          *
2297          * Practically speaking, we can't change things around for these
2298          * devices at run-time, because we can't be sure there'll be no
2299          * DMA transactions in flight for any of their siblings.
2300          * 
2301          * So PCI devices (unless they're on the root bus) as well as
2302          * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2303          * the 1:1 domain, just in _case_ one of their siblings turns out
2304          * not to be able to map all of memory.
2305          */
2306         if (!pci_is_pcie(pdev)) {
2307                 if (!pci_is_root_bus(pdev->bus))
2308                         return 0;
2309                 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2310                         return 0;
2311         } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2312                 return 0;
2313
2314         /* 
2315          * At boot time, we don't yet know if devices will be 64-bit capable.
2316          * Assume that they will -- if they turn out not to be, then we can 
2317          * take them out of the 1:1 domain later.
2318          */
2319         if (!startup) {
2320                 /*
2321                  * If the device's dma_mask is less than the system's memory
2322                  * size then this is not a candidate for identity mapping.
2323                  */
2324                 u64 dma_mask = pdev->dma_mask;
2325
2326                 if (pdev->dev.coherent_dma_mask &&
2327                     pdev->dev.coherent_dma_mask < dma_mask)
2328                         dma_mask = pdev->dev.coherent_dma_mask;
2329
2330                 return dma_mask >= dma_get_required_mask(&pdev->dev);
2331         }
2332
2333         return 1;
2334 }
2335
2336 static int __init iommu_prepare_static_identity_mapping(int hw)
2337 {
2338         struct pci_dev *pdev = NULL;
2339         int ret;
2340
2341         ret = si_domain_init(hw);
2342         if (ret)
2343                 return -EFAULT;
2344
2345         for_each_pci_dev(pdev) {
2346                 if (iommu_should_identity_map(pdev, 1)) {
2347                         printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2348                                hw ? "hardware" : "software", pci_name(pdev));
2349
2350                         ret = domain_add_dev_info(si_domain, pdev,
2351                                                      hw ? CONTEXT_TT_PASS_THROUGH :
2352                                                      CONTEXT_TT_MULTI_LEVEL);
2353                         if (ret)
2354                                 return ret;
2355                 }
2356         }
2357
2358         return 0;
2359 }
2360
2361 static int __init init_dmars(void)
2362 {
2363         struct dmar_drhd_unit *drhd;
2364         struct dmar_rmrr_unit *rmrr;
2365         struct pci_dev *pdev;
2366         struct intel_iommu *iommu;
2367         int i, ret;
2368
2369         /*
2370          * for each drhd
2371          *    allocate root
2372          *    initialize and program root entry to not present
2373          * endfor
2374          */
2375         for_each_drhd_unit(drhd) {
2376                 g_num_of_iommus++;
2377                 /*
2378                  * lock not needed as this is only incremented in the single
2379                  * threaded kernel __init code path all other access are read
2380                  * only
2381                  */
2382         }
2383
2384         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2385                         GFP_KERNEL);
2386         if (!g_iommus) {
2387                 printk(KERN_ERR "Allocating global iommu array failed\n");
2388                 ret = -ENOMEM;
2389                 goto error;
2390         }
2391
2392         deferred_flush = kzalloc(g_num_of_iommus *
2393                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2394         if (!deferred_flush) {
2395                 ret = -ENOMEM;
2396                 goto error;
2397         }
2398
2399         for_each_drhd_unit(drhd) {
2400                 if (drhd->ignored)
2401                         continue;
2402
2403                 iommu = drhd->iommu;
2404                 g_iommus[iommu->seq_id] = iommu;
2405
2406                 ret = iommu_init_domains(iommu);
2407                 if (ret)
2408                         goto error;
2409
2410                 /*
2411                  * TBD:
2412                  * we could share the same root & context tables
2413                  * among all IOMMU's. Need to Split it later.
2414                  */
2415                 ret = iommu_alloc_root_entry(iommu);
2416                 if (ret) {
2417                         printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2418                         goto error;
2419                 }
2420                 if (!ecap_pass_through(iommu->ecap))
2421                         hw_pass_through = 0;
2422         }
2423
2424         /*
2425          * Start from the sane iommu hardware state.
2426          */
2427         for_each_drhd_unit(drhd) {
2428                 if (drhd->ignored)
2429                         continue;
2430
2431                 iommu = drhd->iommu;
2432
2433                 /*
2434                  * If the queued invalidation is already initialized by us
2435                  * (for example, while enabling interrupt-remapping) then
2436                  * we got the things already rolling from a sane state.
2437                  */
2438                 if (iommu->qi)
2439                         continue;
2440
2441                 /*
2442                  * Clear any previous faults.
2443                  */
2444                 dmar_fault(-1, iommu);
2445                 /*
2446                  * Disable queued invalidation if supported and already enabled
2447                  * before OS handover.
2448                  */
2449                 dmar_disable_qi(iommu);
2450         }
2451
2452         for_each_drhd_unit(drhd) {
2453                 if (drhd->ignored)
2454                         continue;
2455
2456                 iommu = drhd->iommu;
2457
2458                 if (dmar_enable_qi(iommu)) {
2459                         /*
2460                          * Queued Invalidate not enabled, use Register Based
2461                          * Invalidate
2462                          */
2463                         iommu->flush.flush_context = __iommu_flush_context;
2464                         iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2465                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2466                                "invalidation\n",
2467                                 iommu->seq_id,
2468                                (unsigned long long)drhd->reg_base_addr);
2469                 } else {
2470                         iommu->flush.flush_context = qi_flush_context;
2471                         iommu->flush.flush_iotlb = qi_flush_iotlb;
2472                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2473                                "invalidation\n",
2474                                 iommu->seq_id,
2475                                (unsigned long long)drhd->reg_base_addr);
2476                 }
2477         }
2478
2479         if (iommu_pass_through)
2480                 iommu_identity_mapping |= IDENTMAP_ALL;
2481
2482 #ifdef CONFIG_DMAR_BROKEN_GFX_WA
2483         iommu_identity_mapping |= IDENTMAP_GFX;
2484 #endif
2485
2486         check_tylersburg_isoch();
2487
2488         /*
2489          * If pass through is not set or not enabled, setup context entries for
2490          * identity mappings for rmrr, gfx, and isa and may fall back to static
2491          * identity mapping if iommu_identity_mapping is set.
2492          */
2493         if (iommu_identity_mapping) {
2494                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2495                 if (ret) {
2496                         printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2497                         goto error;
2498                 }
2499         }
2500         /*
2501          * For each rmrr
2502          *   for each dev attached to rmrr
2503          *   do
2504          *     locate drhd for dev, alloc domain for dev
2505          *     allocate free domain
2506          *     allocate page table entries for rmrr
2507          *     if context not allocated for bus
2508          *           allocate and init context
2509          *           set present in root table for this bus
2510          *     init context with domain, translation etc
2511          *    endfor
2512          * endfor
2513          */
2514         printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2515         for_each_rmrr_units(rmrr) {
2516                 for (i = 0; i < rmrr->devices_cnt; i++) {
2517                         pdev = rmrr->devices[i];
2518                         /*
2519                          * some BIOS lists non-exist devices in DMAR
2520                          * table.
2521                          */
2522                         if (!pdev)
2523                                 continue;
2524                         ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2525                         if (ret)
2526                                 printk(KERN_ERR
2527                                        "IOMMU: mapping reserved region failed\n");
2528                 }
2529         }
2530
2531         iommu_prepare_isa();
2532
2533         /*
2534          * for each drhd
2535          *   enable fault log
2536          *   global invalidate context cache
2537          *   global invalidate iotlb
2538          *   enable translation
2539          */
2540         for_each_drhd_unit(drhd) {
2541                 if (drhd->ignored) {
2542                         /*
2543                          * we always have to disable PMRs or DMA may fail on
2544                          * this device
2545                          */
2546                         if (force_on)
2547                                 iommu_disable_protect_mem_regions(drhd->iommu);
2548                         continue;
2549                 }
2550                 iommu = drhd->iommu;
2551
2552                 iommu_flush_write_buffer(iommu);
2553
2554                 ret = dmar_set_interrupt(iommu);
2555                 if (ret)
2556                         goto error;
2557
2558                 iommu_set_root_entry(iommu);
2559
2560                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2561                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2562
2563                 ret = iommu_enable_translation(iommu);
2564                 if (ret)
2565                         goto error;
2566
2567                 iommu_disable_protect_mem_regions(iommu);
2568         }
2569
2570         return 0;
2571 error:
2572         for_each_drhd_unit(drhd) {
2573                 if (drhd->ignored)
2574                         continue;
2575                 iommu = drhd->iommu;
2576                 free_iommu(iommu);
2577         }
2578         kfree(g_iommus);
2579         return ret;
2580 }
2581
2582 /* This takes a number of _MM_ pages, not VTD pages */
2583 static struct iova *intel_alloc_iova(struct device *dev,
2584                                      struct dmar_domain *domain,
2585                                      unsigned long nrpages, uint64_t dma_mask)
2586 {
2587         struct pci_dev *pdev = to_pci_dev(dev);
2588         struct iova *iova = NULL;
2589
2590         /* Restrict dma_mask to the width that the iommu can handle */
2591         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2592
2593         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2594                 /*
2595                  * First try to allocate an io virtual address in
2596                  * DMA_BIT_MASK(32) and if that fails then try allocating
2597                  * from higher range
2598                  */
2599                 iova = alloc_iova(&domain->iovad, nrpages,
2600                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
2601                 if (iova)
2602                         return iova;
2603         }
2604         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2605         if (unlikely(!iova)) {
2606                 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2607                        nrpages, pci_name(pdev));
2608                 return NULL;
2609         }
2610
2611         return iova;
2612 }
2613
2614 static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2615 {
2616         struct dmar_domain *domain;
2617         int ret;
2618
2619         domain = get_domain_for_dev(pdev,
2620                         DEFAULT_DOMAIN_ADDRESS_WIDTH);
2621         if (!domain) {
2622                 printk(KERN_ERR
2623                         "Allocating domain for %s failed", pci_name(pdev));
2624                 return NULL;
2625         }
2626
2627         /* make sure context mapping is ok */
2628         if (unlikely(!domain_context_mapped(pdev))) {
2629                 ret = domain_context_mapping(domain, pdev,
2630                                              CONTEXT_TT_MULTI_LEVEL);
2631                 if (ret) {
2632                         printk(KERN_ERR
2633                                 "Domain context map for %s failed",
2634                                 pci_name(pdev));
2635                         return NULL;
2636                 }
2637         }
2638
2639         return domain;
2640 }
2641
2642 static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2643 {
2644         struct device_domain_info *info;
2645
2646         /* No lock here, assumes no domain exit in normal case */
2647         info = dev->dev.archdata.iommu;
2648         if (likely(info))
2649                 return info->domain;
2650
2651         return __get_valid_domain_for_dev(dev);
2652 }
2653
2654 static int iommu_dummy(struct pci_dev *pdev)
2655 {
2656         return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2657 }
2658
2659 /* Check if the pdev needs to go through non-identity map and unmap process.*/
2660 static int iommu_no_mapping(struct device *dev)
2661 {
2662         struct pci_dev *pdev;
2663         int found;
2664
2665         if (unlikely(dev->bus != &pci_bus_type))
2666                 return 1;
2667
2668         pdev = to_pci_dev(dev);
2669         if (iommu_dummy(pdev))
2670                 return 1;
2671
2672         if (!iommu_identity_mapping)
2673                 return 0;
2674
2675         found = identity_mapping(pdev);
2676         if (found) {
2677                 if (iommu_should_identity_map(pdev, 0))
2678                         return 1;
2679                 else {
2680                         /*
2681                          * 32 bit DMA is removed from si_domain and fall back
2682                          * to non-identity mapping.
2683                          */
2684                         domain_remove_one_dev_info(si_domain, pdev);
2685                         printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2686                                pci_name(pdev));
2687                         return 0;
2688                 }
2689         } else {
2690                 /*
2691                  * In case of a detached 64 bit DMA device from vm, the device
2692                  * is put into si_domain for identity mapping.
2693                  */
2694                 if (iommu_should_identity_map(pdev, 0)) {
2695                         int ret;
2696                         ret = domain_add_dev_info(si_domain, pdev,
2697                                                   hw_pass_through ?
2698                                                   CONTEXT_TT_PASS_THROUGH :
2699                                                   CONTEXT_TT_MULTI_LEVEL);
2700                         if (!ret) {
2701                                 printk(KERN_INFO "64bit %s uses identity mapping\n",
2702                                        pci_name(pdev));
2703                                 return 1;
2704                         }
2705                 }
2706         }
2707
2708         return 0;
2709 }
2710
2711 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2712                                      size_t size, int dir, u64 dma_mask)
2713 {
2714         struct pci_dev *pdev = to_pci_dev(hwdev);
2715         struct dmar_domain *domain;
2716         phys_addr_t start_paddr;
2717         struct iova *iova;
2718         int prot = 0;
2719         int ret;
2720         struct intel_iommu *iommu;
2721         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2722
2723         BUG_ON(dir == DMA_NONE);
2724
2725         if (iommu_no_mapping(hwdev))
2726                 return paddr;
2727
2728         domain = get_valid_domain_for_dev(pdev);
2729         if (!domain)
2730                 return 0;
2731
2732         iommu = domain_get_iommu(domain);
2733         size = aligned_nrpages(paddr, size);
2734
2735         iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2736                                 pdev->dma_mask);
2737         if (!iova)
2738                 goto error;
2739
2740         /*
2741          * Check if DMAR supports zero-length reads on write only
2742          * mappings..
2743          */
2744         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2745                         !cap_zlr(iommu->cap))
2746                 prot |= DMA_PTE_READ;
2747         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2748                 prot |= DMA_PTE_WRITE;
2749         /*
2750          * paddr - (paddr + size) might be partial page, we should map the whole
2751          * page.  Note: if two part of one page are separately mapped, we
2752          * might have two guest_addr mapping to the same host paddr, but this
2753          * is not a big problem
2754          */
2755         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2756                                  mm_to_dma_pfn(paddr_pfn), size, prot);
2757         if (ret)
2758                 goto error;
2759
2760         /* it's a non-present to present mapping. Only flush if caching mode */
2761         if (cap_caching_mode(iommu->cap))
2762                 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
2763         else
2764                 iommu_flush_write_buffer(iommu);
2765
2766         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2767         start_paddr += paddr & ~PAGE_MASK;
2768         return start_paddr;
2769
2770 error:
2771         if (iova)
2772                 __free_iova(&domain->iovad, iova);
2773         printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
2774                 pci_name(pdev), size, (unsigned long long)paddr, dir);
2775         return 0;
2776 }
2777
2778 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2779                                  unsigned long offset, size_t size,
2780                                  enum dma_data_direction dir,
2781                                  struct dma_attrs *attrs)
2782 {
2783         return __intel_map_single(dev, page_to_phys(page) + offset, size,
2784                                   dir, to_pci_dev(dev)->dma_mask);
2785 }
2786
2787 static void flush_unmaps(void)
2788 {
2789         int i, j;
2790
2791         timer_on = 0;
2792
2793         /* just flush them all */
2794         for (i = 0; i < g_num_of_iommus; i++) {
2795                 struct intel_iommu *iommu = g_iommus[i];
2796                 if (!iommu)
2797                         continue;
2798
2799                 if (!deferred_flush[i].next)
2800                         continue;
2801
2802                 /* In caching mode, global flushes turn emulation expensive */
2803                 if (!cap_caching_mode(iommu->cap))
2804                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2805                                          DMA_TLB_GLOBAL_FLUSH);
2806                 for (j = 0; j < deferred_flush[i].next; j++) {
2807                         unsigned long mask;
2808                         struct iova *iova = deferred_flush[i].iova[j];
2809                         struct dmar_domain *domain = deferred_flush[i].domain[j];
2810
2811                         /* On real hardware multiple invalidations are expensive */
2812                         if (cap_caching_mode(iommu->cap))
2813                                 iommu_flush_iotlb_psi(iommu, domain->id,
2814                                 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2815                         else {
2816                                 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2817                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2818                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2819                         }
2820                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2821                 }
2822                 deferred_flush[i].next = 0;
2823         }
2824
2825         list_size = 0;
2826 }
2827
2828 static void flush_unmaps_timeout(unsigned long data)
2829 {
2830         unsigned long flags;
2831
2832         spin_lock_irqsave(&async_umap_flush_lock, flags);
2833         flush_unmaps();
2834         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2835 }
2836
2837 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2838 {
2839         unsigned long flags;
2840         int next, iommu_id;
2841         struct intel_iommu *iommu;
2842
2843         spin_lock_irqsave(&async_umap_flush_lock, flags);
2844         if (list_size == HIGH_WATER_MARK)
2845                 flush_unmaps();
2846
2847         iommu = domain_get_iommu(dom);
2848         iommu_id = iommu->seq_id;
2849
2850         next = deferred_flush[iommu_id].next;
2851         deferred_flush[iommu_id].domain[next] = dom;
2852         deferred_flush[iommu_id].iova[next] = iova;
2853         deferred_flush[iommu_id].next++;
2854
2855         if (!timer_on) {
2856                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2857                 timer_on = 1;
2858         }
2859         list_size++;
2860         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2861 }
2862
2863 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2864                              size_t size, enum dma_data_direction dir,
2865                              struct dma_attrs *attrs)
2866 {
2867         struct pci_dev *pdev = to_pci_dev(dev);
2868         struct dmar_domain *domain;
2869         unsigned long start_pfn, last_pfn;
2870         struct iova *iova;
2871         struct intel_iommu *iommu;
2872
2873         if (iommu_no_mapping(dev))
2874                 return;
2875
2876         domain = find_domain(pdev);
2877         BUG_ON(!domain);
2878
2879         iommu = domain_get_iommu(domain);
2880
2881         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2882         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2883                       (unsigned long long)dev_addr))
2884                 return;
2885
2886         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2887         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2888
2889         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2890                  pci_name(pdev), start_pfn, last_pfn);
2891
2892         /*  clear the whole page */
2893         dma_pte_clear_range(domain, start_pfn, last_pfn);
2894
2895         /* free page tables */
2896         dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2897
2898         if (intel_iommu_strict) {
2899                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2900                                       last_pfn - start_pfn + 1, 0);
2901                 /* free iova */
2902                 __free_iova(&domain->iovad, iova);
2903         } else {
2904                 add_unmap(domain, iova);
2905                 /*
2906                  * queue up the release of the unmap to save the 1/6th of the
2907                  * cpu used up by the iotlb flush operation...
2908                  */
2909         }
2910 }
2911
2912 static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2913                                   dma_addr_t *dma_handle, gfp_t flags)
2914 {
2915         void *vaddr;
2916         int order;
2917
2918         size = PAGE_ALIGN(size);
2919         order = get_order(size);
2920
2921         if (!iommu_no_mapping(hwdev))
2922                 flags &= ~(GFP_DMA | GFP_DMA32);
2923         else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2924                 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2925                         flags |= GFP_DMA;
2926                 else
2927                         flags |= GFP_DMA32;
2928         }
2929
2930         vaddr = (void *)__get_free_pages(flags, order);
2931         if (!vaddr)
2932                 return NULL;
2933         memset(vaddr, 0, size);
2934
2935         *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2936                                          DMA_BIDIRECTIONAL,
2937                                          hwdev->coherent_dma_mask);
2938         if (*dma_handle)
2939                 return vaddr;
2940         free_pages((unsigned long)vaddr, order);
2941         return NULL;
2942 }
2943
2944 static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2945                                 dma_addr_t dma_handle)
2946 {
2947         int order;
2948
2949         size = PAGE_ALIGN(size);
2950         order = get_order(size);
2951
2952         intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
2953         free_pages((unsigned long)vaddr, order);
2954 }
2955
2956 static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2957                            int nelems, enum dma_data_direction dir,
2958                            struct dma_attrs *attrs)
2959 {
2960         struct pci_dev *pdev = to_pci_dev(hwdev);
2961         struct dmar_domain *domain;
2962         unsigned long start_pfn, last_pfn;
2963         struct iova *iova;
2964         struct intel_iommu *iommu;
2965
2966         if (iommu_no_mapping(hwdev))
2967                 return;
2968
2969         domain = find_domain(pdev);
2970         BUG_ON(!domain);
2971
2972         iommu = domain_get_iommu(domain);
2973
2974         iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2975         if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2976                       (unsigned long long)sglist[0].dma_address))
2977                 return;
2978
2979         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2980         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2981
2982         /*  clear the whole page */
2983         dma_pte_clear_range(domain, start_pfn, last_pfn);
2984
2985         /* free page tables */
2986         dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2987
2988         if (intel_iommu_strict) {
2989                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2990                                       last_pfn - start_pfn + 1, 0);
2991                 /* free iova */
2992                 __free_iova(&domain->iovad, iova);
2993         } else {
2994                 add_unmap(domain, iova);
2995                 /*
2996                  * queue up the release of the unmap to save the 1/6th of the
2997                  * cpu used up by the iotlb flush operation...
2998                  */
2999         }
3000 }
3001
3002 static int intel_nontranslate_map_sg(struct device *hddev,
3003         struct scatterlist *sglist, int nelems, int dir)
3004 {
3005         int i;
3006         struct scatterlist *sg;
3007
3008         for_each_sg(sglist, sg, nelems, i) {
3009                 BUG_ON(!sg_page(sg));
3010                 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3011                 sg->dma_length = sg->length;
3012         }
3013         return nelems;
3014 }
3015
3016 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3017                         enum dma_data_direction dir, struct dma_attrs *attrs)
3018 {
3019         int i;
3020         struct pci_dev *pdev = to_pci_dev(hwdev);
3021         struct dmar_domain *domain;
3022         size_t size = 0;
3023         int prot = 0;
3024         struct iova *iova = NULL;
3025         int ret;
3026         struct scatterlist *sg;
3027         unsigned long start_vpfn;
3028         struct intel_iommu *iommu;
3029
3030         BUG_ON(dir == DMA_NONE);
3031         if (iommu_no_mapping(hwdev))
3032                 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
3033
3034         domain = get_valid_domain_for_dev(pdev);
3035         if (!domain)
3036                 return 0;
3037
3038         iommu = domain_get_iommu(domain);
3039
3040         for_each_sg(sglist, sg, nelems, i)
3041                 size += aligned_nrpages(sg->offset, sg->length);
3042
3043         iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3044                                 pdev->dma_mask);
3045         if (!iova) {
3046                 sglist->dma_length = 0;
3047                 return 0;
3048         }
3049
3050         /*
3051          * Check if DMAR supports zero-length reads on write only
3052          * mappings..
3053          */
3054         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3055                         !cap_zlr(iommu->cap))
3056                 prot |= DMA_PTE_READ;
3057         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3058                 prot |= DMA_PTE_WRITE;
3059
3060         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3061
3062         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3063         if (unlikely(ret)) {
3064                 /*  clear the page */
3065                 dma_pte_clear_range(domain, start_vpfn,
3066                                     start_vpfn + size - 1);
3067                 /* free page tables */
3068                 dma_pte_free_pagetable(domain, start_vpfn,
3069                                        start_vpfn + size - 1);
3070                 /* free iova */
3071                 __free_iova(&domain->iovad, iova);
3072                 return 0;
3073         }
3074
3075         /* it's a non-present to present mapping. Only flush if caching mode */
3076         if (cap_caching_mode(iommu->cap))
3077                 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
3078         else
3079                 iommu_flush_write_buffer(iommu);
3080
3081         return nelems;
3082 }
3083
3084 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3085 {
3086         return !dma_addr;
3087 }
3088
3089 struct dma_map_ops intel_dma_ops = {
3090         .alloc_coherent = intel_alloc_coherent,
3091         .free_coherent = intel_free_coherent,
3092         .map_sg = intel_map_sg,
3093         .unmap_sg = intel_unmap_sg,
3094         .map_page = intel_map_page,
3095         .unmap_page = intel_unmap_page,
3096         .mapping_error = intel_mapping_error,
3097 };
3098
3099 static inline int iommu_domain_cache_init(void)
3100 {
3101         int ret = 0;
3102
3103         iommu_domain_cache = kmem_cache_create("iommu_domain",
3104                                          sizeof(struct dmar_domain),
3105                                          0,
3106                                          SLAB_HWCACHE_ALIGN,
3107
3108                                          NULL);
3109         if (!iommu_domain_cache) {
3110                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3111                 ret = -ENOMEM;
3112         }
3113
3114         return ret;
3115 }
3116
3117 static inline int iommu_devinfo_cache_init(void)
3118 {
3119         int ret = 0;
3120
3121         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3122                                          sizeof(struct device_domain_info),
3123                                          0,
3124                                          SLAB_HWCACHE_ALIGN,
3125                                          NULL);
3126         if (!iommu_devinfo_cache) {
3127                 printk(KERN_ERR "Couldn't create devinfo cache\n");
3128                 ret = -ENOMEM;
3129         }
3130
3131         return ret;
3132 }
3133
3134 static inline int iommu_iova_cache_init(void)
3135 {
3136         int ret = 0;
3137
3138         iommu_iova_cache = kmem_cache_create("iommu_iova",
3139                                          sizeof(struct iova),
3140                                          0,
3141                                          SLAB_HWCACHE_ALIGN,
3142                                          NULL);
3143         if (!iommu_iova_cache) {
3144                 printk(KERN_ERR "Couldn't create iova cache\n");
3145                 ret = -ENOMEM;
3146         }
3147
3148         return ret;
3149 }
3150
3151 static int __init iommu_init_mempool(void)
3152 {
3153         int ret;
3154         ret = iommu_iova_cache_init();
3155         if (ret)
3156                 return ret;
3157
3158         ret = iommu_domain_cache_init();
3159         if (ret)
3160                 goto domain_error;
3161
3162         ret = iommu_devinfo_cache_init();
3163         if (!ret)
3164                 return ret;
3165
3166         kmem_cache_destroy(iommu_domain_cache);
3167 domain_error:
3168         kmem_cache_destroy(iommu_iova_cache);
3169
3170         return -ENOMEM;
3171 }
3172
3173 static void __init iommu_exit_mempool(void)
3174 {
3175         kmem_cache_destroy(iommu_devinfo_cache);
3176         kmem_cache_destroy(iommu_domain_cache);
3177         kmem_cache_destroy(iommu_iova_cache);
3178
3179 }
3180
3181 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3182 {
3183         struct dmar_drhd_unit *drhd;
3184         u32 vtbar;
3185         int rc;
3186
3187         /* We know that this device on this chipset has its own IOMMU.
3188          * If we find it under a different IOMMU, then the BIOS is lying
3189          * to us. Hope that the IOMMU for this device is actually
3190          * disabled, and it needs no translation...
3191          */
3192         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3193         if (rc) {
3194                 /* "can't" happen */
3195                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3196                 return;
3197         }
3198         vtbar &= 0xffff0000;
3199
3200         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3201         drhd = dmar_find_matched_drhd_unit(pdev);
3202         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3203                             TAINT_FIRMWARE_WORKAROUND,
3204                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3205                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3206 }
3207 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3208
3209 static void __init init_no_remapping_devices(void)
3210 {
3211         struct dmar_drhd_unit *drhd;
3212
3213         for_each_drhd_unit(drhd) {
3214                 if (!drhd->include_all) {
3215                         int i;
3216                         for (i = 0; i < drhd->devices_cnt; i++)
3217                                 if (drhd->devices[i] != NULL)
3218                                         break;
3219                         /* ignore DMAR unit if no pci devices exist */
3220                         if (i == drhd->devices_cnt)
3221                                 drhd->ignored = 1;
3222                 }
3223         }
3224
3225         if (dmar_map_gfx)
3226                 return;
3227
3228         for_each_drhd_unit(drhd) {
3229                 int i;
3230                 if (drhd->ignored || drhd->include_all)
3231                         continue;
3232
3233                 for (i = 0; i < drhd->devices_cnt; i++)
3234                         if (drhd->devices[i] &&
3235                                 !IS_GFX_DEVICE(drhd->devices[i]))
3236                                 break;
3237
3238                 if (i < drhd->devices_cnt)
3239                         continue;
3240
3241                 /* bypass IOMMU if it is just for gfx devices */
3242                 drhd->ignored = 1;
3243                 for (i = 0; i < drhd->devices_cnt; i++) {
3244                         if (!drhd->devices[i])
3245                                 continue;
3246                         drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3247                 }
3248         }
3249 }
3250
3251 #ifdef CONFIG_SUSPEND
3252 static int init_iommu_hw(void)
3253 {
3254         struct dmar_drhd_unit *drhd;
3255         struct intel_iommu *iommu = NULL;
3256
3257         for_each_active_iommu(iommu, drhd)
3258                 if (iommu->qi)
3259                         dmar_reenable_qi(iommu);
3260
3261         for_each_iommu(iommu, drhd) {
3262                 if (drhd->ignored) {
3263                         /*
3264                          * we always have to disable PMRs or DMA may fail on
3265                          * this device
3266                          */
3267                         if (force_on)
3268                                 iommu_disable_protect_mem_regions(iommu);
3269                         continue;
3270                 }
3271         
3272                 iommu_flush_write_buffer(iommu);
3273
3274                 iommu_set_root_entry(iommu);
3275
3276                 iommu->flush.flush_context(iommu, 0, 0, 0,
3277                                            DMA_CCMD_GLOBAL_INVL);
3278                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3279                                          DMA_TLB_GLOBAL_FLUSH);
3280                 if (iommu_enable_translation(iommu))
3281                         return 1;
3282                 iommu_disable_protect_mem_regions(iommu);
3283         }
3284
3285         return 0;
3286 }
3287
3288 static void iommu_flush_all(void)
3289 {
3290         struct dmar_drhd_unit *drhd;
3291         struct intel_iommu *iommu;
3292
3293         for_each_active_iommu(iommu, drhd) {
3294                 iommu->flush.flush_context(iommu, 0, 0, 0,
3295                                            DMA_CCMD_GLOBAL_INVL);
3296                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3297                                          DMA_TLB_GLOBAL_FLUSH);
3298         }
3299 }
3300
3301 static int iommu_suspend(void)
3302 {
3303         struct dmar_drhd_unit *drhd;
3304         struct intel_iommu *iommu = NULL;
3305         unsigned long flag;
3306
3307         for_each_active_iommu(iommu, drhd) {
3308                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3309                                                  GFP_ATOMIC);
3310                 if (!iommu->iommu_state)
3311                         goto nomem;
3312         }
3313
3314         iommu_flush_all();
3315
3316         for_each_active_iommu(iommu, drhd) {
3317                 iommu_disable_translation(iommu);
3318
3319                 spin_lock_irqsave(&iommu->register_lock, flag);
3320
3321                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3322                         readl(iommu->reg + DMAR_FECTL_REG);
3323                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3324                         readl(iommu->reg + DMAR_FEDATA_REG);
3325                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3326                         readl(iommu->reg + DMAR_FEADDR_REG);
3327                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3328                         readl(iommu->reg + DMAR_FEUADDR_REG);
3329
3330                 spin_unlock_irqrestore(&iommu->register_lock, flag);
3331         }
3332         return 0;
3333
3334 nomem:
3335         for_each_active_iommu(iommu, drhd)
3336                 kfree(iommu->iommu_state);
3337
3338         return -ENOMEM;
3339 }
3340
3341 static void iommu_resume(void)
3342 {
3343         struct dmar_drhd_unit *drhd;
3344         struct intel_iommu *iommu = NULL;
3345         unsigned long flag;
3346
3347         if (init_iommu_hw()) {
3348                 if (force_on)
3349                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3350                 else
3351                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3352                 return;
3353         }
3354
3355         for_each_active_iommu(iommu, drhd) {
3356
3357                 spin_lock_irqsave(&iommu->register_lock, flag);
3358
3359                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3360                         iommu->reg + DMAR_FECTL_REG);
3361                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3362                         iommu->reg + DMAR_FEDATA_REG);
3363                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3364                         iommu->reg + DMAR_FEADDR_REG);
3365                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3366                         iommu->reg + DMAR_FEUADDR_REG);
3367
3368                 spin_unlock_irqrestore(&iommu->register_lock, flag);
3369         }
3370
3371         for_each_active_iommu(iommu, drhd)
3372                 kfree(iommu->iommu_state);
3373 }
3374
3375 static struct syscore_ops iommu_syscore_ops = {
3376         .resume         = iommu_resume,
3377         .suspend        = iommu_suspend,
3378 };
3379
3380 static void __init init_iommu_pm_ops(void)
3381 {
3382         register_syscore_ops(&iommu_syscore_ops);
3383 }
3384
3385 #else
3386 static inline int init_iommu_pm_ops(void) { }
3387 #endif  /* CONFIG_PM */
3388
3389 /*
3390  * Here we only respond to action of unbound device from driver.
3391  *
3392  * Added device is not attached to its DMAR domain here yet. That will happen
3393  * when mapping the device to iova.
3394  */
3395 static int device_notifier(struct notifier_block *nb,
3396                                   unsigned long action, void *data)
3397 {
3398         struct device *dev = data;
3399         struct pci_dev *pdev = to_pci_dev(dev);
3400         struct dmar_domain *domain;
3401
3402         if (iommu_no_mapping(dev))
3403                 return 0;
3404
3405         domain = find_domain(pdev);
3406         if (!domain)
3407                 return 0;
3408
3409         if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
3410                 domain_remove_one_dev_info(domain, pdev);
3411
3412                 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3413                     !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3414                     list_empty(&domain->devices))
3415                         domain_exit(domain);
3416         }
3417
3418         return 0;
3419 }
3420
3421 static struct notifier_block device_nb = {
3422         .notifier_call = device_notifier,
3423 };
3424
3425 int __init intel_iommu_init(void)
3426 {
3427         int ret = 0;
3428
3429         /* VT-d is required for a TXT/tboot launch, so enforce that */
3430         force_on = tboot_force_iommu();
3431
3432         if (dmar_table_init()) {
3433                 if (force_on)
3434                         panic("tboot: Failed to initialize DMAR table\n");
3435                 return  -ENODEV;
3436         }
3437
3438         if (dmar_dev_scope_init()) {
3439                 if (force_on)
3440                         panic("tboot: Failed to initialize DMAR device scope\n");
3441                 return  -ENODEV;
3442         }
3443
3444         /*
3445          * Check the need for DMA-remapping initialization now.
3446          * Above initialization will also be used by Interrupt-remapping.
3447          */
3448         if (no_iommu || dmar_disabled)
3449                 return -ENODEV;
3450
3451         if (iommu_init_mempool()) {
3452                 if (force_on)
3453                         panic("tboot: Failed to initialize iommu memory\n");
3454                 return  -ENODEV;
3455         }
3456
3457         if (dmar_init_reserved_ranges()) {
3458                 if (force_on)
3459                         panic("tboot: Failed to reserve iommu ranges\n");
3460                 return  -ENODEV;
3461         }
3462
3463         init_no_remapping_devices();
3464
3465         ret = init_dmars();
3466         if (ret) {
3467                 if (force_on)
3468                         panic("tboot: Failed to initialize DMARs\n");
3469                 printk(KERN_ERR "IOMMU: dmar init failed\n");
3470                 put_iova_domain(&reserved_iova_list);
3471                 iommu_exit_mempool();
3472                 return ret;
3473         }
3474         printk(KERN_INFO
3475         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3476
3477         init_timer(&unmap_timer);
3478 #ifdef CONFIG_SWIOTLB
3479         swiotlb = 0;
3480 #endif
3481         dma_ops = &intel_dma_ops;
3482
3483         init_iommu_pm_ops();
3484
3485         register_iommu(&intel_iommu_ops);
3486
3487         bus_register_notifier(&pci_bus_type, &device_nb);
3488
3489         return 0;
3490 }
3491
3492 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3493                                            struct pci_dev *pdev)
3494 {
3495         struct pci_dev *tmp, *parent;
3496
3497         if (!iommu || !pdev)
3498                 return;
3499
3500         /* dependent device detach */
3501         tmp = pci_find_upstream_pcie_bridge(pdev);
3502         /* Secondary interface's bus number and devfn 0 */
3503         if (tmp) {
3504                 parent = pdev->bus->self;
3505                 while (parent != tmp) {
3506                         iommu_detach_dev(iommu, parent->bus->number,
3507                                          parent->devfn);
3508                         parent = parent->bus->self;
3509                 }
3510                 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3511                         iommu_detach_dev(iommu,
3512                                 tmp->subordinate->number, 0);
3513                 else /* this is a legacy PCI bridge */
3514                         iommu_detach_dev(iommu, tmp->bus->number,
3515                                          tmp->devfn);
3516         }
3517 }
3518
3519 static void domain_remove_one_dev_info(struct dmar_domain *domain,
3520                                           struct pci_dev *pdev)
3521 {
3522         struct device_domain_info *info;
3523         struct intel_iommu *iommu;
3524         unsigned long flags;
3525         int found = 0;
3526         struct list_head *entry, *tmp;
3527
3528         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3529                                 pdev->devfn);
3530         if (!iommu)
3531                 return;
3532
3533         spin_lock_irqsave(&device_domain_lock, flags);
3534         list_for_each_safe(entry, tmp, &domain->devices) {
3535                 info = list_entry(entry, struct device_domain_info, link);
3536                 /* No need to compare PCI domain; it has to be the same */
3537                 if (info->bus == pdev->bus->number &&
3538                     info->devfn == pdev->devfn) {
3539                         list_del(&info->link);
3540                         list_del(&info->global);
3541                         if (info->dev)
3542                                 info->dev->dev.archdata.iommu = NULL;
3543                         spin_unlock_irqrestore(&device_domain_lock, flags);
3544
3545                         iommu_disable_dev_iotlb(info);
3546                         iommu_detach_dev(iommu, info->bus, info->devfn);
3547                         iommu_detach_dependent_devices(iommu, pdev);
3548                         free_devinfo_mem(info);
3549
3550                         spin_lock_irqsave(&device_domain_lock, flags);
3551
3552                         if (found)
3553                                 break;
3554                         else
3555                                 continue;
3556                 }
3557
3558                 /* if there is no other devices under the same iommu
3559                  * owned by this domain, clear this iommu in iommu_bmp
3560                  * update iommu count and coherency
3561                  */
3562                 if (iommu == device_to_iommu(info->segment, info->bus,
3563                                             info->devfn))
3564                         found = 1;
3565         }
3566
3567         if (found == 0) {
3568                 unsigned long tmp_flags;
3569                 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3570                 clear_bit(iommu->seq_id, &domain->iommu_bmp);
3571                 domain->iommu_count--;
3572                 domain_update_iommu_cap(domain);
3573                 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3574
3575                 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3576                     !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3577                         spin_lock_irqsave(&iommu->lock, tmp_flags);
3578                         clear_bit(domain->id, iommu->domain_ids);
3579                         iommu->domains[domain->id] = NULL;
3580                         spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3581                 }
3582         }
3583
3584         spin_unlock_irqrestore(&device_domain_lock, flags);
3585 }
3586
3587 static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3588 {
3589         struct device_domain_info *info;
3590         struct intel_iommu *iommu;
3591         unsigned long flags1, flags2;
3592
3593         spin_lock_irqsave(&device_domain_lock, flags1);
3594         while (!list_empty(&domain->devices)) {
3595                 info = list_entry(domain->devices.next,
3596                         struct device_domain_info, link);
3597                 list_del(&info->link);
3598                 list_del(&info->global);
3599                 if (info->dev)
3600                         info->dev->dev.archdata.iommu = NULL;
3601
3602                 spin_unlock_irqrestore(&device_domain_lock, flags1);
3603
3604                 iommu_disable_dev_iotlb(info);
3605                 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
3606                 iommu_detach_dev(iommu, info->bus, info->devfn);
3607                 iommu_detach_dependent_devices(iommu, info->dev);
3608
3609                 /* clear this iommu in iommu_bmp, update iommu count
3610                  * and capabilities
3611                  */
3612                 spin_lock_irqsave(&domain->iommu_lock, flags2);
3613                 if (test_and_clear_bit(iommu->seq_id,
3614                                        &domain->iommu_bmp)) {
3615                         domain->iommu_count--;
3616                         domain_update_iommu_cap(domain);
3617                 }
3618                 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3619
3620                 free_devinfo_mem(info);
3621                 spin_lock_irqsave(&device_domain_lock, flags1);
3622         }
3623         spin_unlock_irqrestore(&device_domain_lock, flags1);
3624 }
3625
3626 /* domain id for virtual machine, it won't be set in context */
3627 static unsigned long vm_domid;
3628
3629 static struct dmar_domain *iommu_alloc_vm_domain(void)
3630 {
3631         struct dmar_domain *domain;
3632
3633         domain = alloc_domain_mem();
3634         if (!domain)
3635                 return NULL;
3636
3637         domain->id = vm_domid++;
3638         domain->nid = -1;
3639         memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3640         domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3641
3642         return domain;
3643 }
3644
3645 static int md_domain_init(struct dmar_domain *domain, int guest_width)
3646 {
3647         int adjust_width;
3648
3649         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3650         spin_lock_init(&domain->iommu_lock);
3651
3652         domain_reserve_special_ranges(domain);
3653
3654         /* calculate AGAW */
3655         domain->gaw = guest_width;
3656         adjust_width = guestwidth_to_adjustwidth(guest_width);
3657         domain->agaw = width_to_agaw(adjust_width);
3658
3659         INIT_LIST_HEAD(&domain->devices);
3660
3661         domain->iommu_count = 0;
3662         domain->iommu_coherency = 0;
3663         domain->iommu_snooping = 0;
3664         domain->iommu_superpage = 0;
3665         domain->max_addr = 0;
3666         domain->nid = -1;
3667
3668         /* always allocate the top pgd */
3669         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
3670         if (!domain->pgd)
3671                 return -ENOMEM;
3672         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3673         return 0;
3674 }
3675
3676 static void iommu_free_vm_domain(struct dmar_domain *domain)
3677 {
3678         unsigned long flags;
3679         struct dmar_drhd_unit *drhd;
3680         struct intel_iommu *iommu;
3681         unsigned long i;
3682         unsigned long ndomains;
3683
3684         for_each_drhd_unit(drhd) {
3685                 if (drhd->ignored)
3686                         continue;
3687                 iommu = drhd->iommu;
3688
3689                 ndomains = cap_ndoms(iommu->cap);
3690                 for_each_set_bit(i, iommu->domain_ids, ndomains) {
3691                         if (iommu->domains[i] == domain) {
3692                                 spin_lock_irqsave(&iommu->lock, flags);
3693                                 clear_bit(i, iommu->domain_ids);
3694                                 iommu->domains[i] = NULL;
3695                                 spin_unlock_irqrestore(&iommu->lock, flags);
3696                                 break;
3697                         }
3698                 }
3699         }
3700 }
3701
3702 static void vm_domain_exit(struct dmar_domain *domain)
3703 {
3704         /* Domain 0 is reserved, so dont process it */
3705         if (!domain)
3706                 return;
3707
3708         vm_domain_remove_all_dev_info(domain);
3709         /* destroy iovas */
3710         put_iova_domain(&domain->iovad);
3711
3712         /* clear ptes */
3713         dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3714
3715         /* free page tables */
3716         dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3717
3718         iommu_free_vm_domain(domain);
3719         free_domain_mem(domain);
3720 }
3721
3722 static int intel_iommu_domain_init(struct iommu_domain *domain)
3723 {
3724         struct dmar_domain *dmar_domain;
3725
3726         dmar_domain = iommu_alloc_vm_domain();
3727         if (!dmar_domain) {
3728                 printk(KERN_ERR
3729                         "intel_iommu_domain_init: dmar_domain == NULL\n");
3730                 return -ENOMEM;
3731         }
3732         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3733                 printk(KERN_ERR
3734                         "intel_iommu_domain_init() failed\n");
3735                 vm_domain_exit(dmar_domain);
3736                 return -ENOMEM;
3737         }
3738         domain->priv = dmar_domain;
3739
3740         return 0;
3741 }
3742
3743 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
3744 {
3745         struct dmar_domain *dmar_domain = domain->priv;
3746
3747         domain->priv = NULL;
3748         vm_domain_exit(dmar_domain);
3749 }
3750
3751 static int intel_iommu_attach_device(struct iommu_domain *domain,
3752                                      struct device *dev)
3753 {
3754         struct dmar_domain *dmar_domain = domain->priv;
3755         struct pci_dev *pdev = to_pci_dev(dev);
3756         struct intel_iommu *iommu;
3757         int addr_width;
3758
3759         /* normally pdev is not mapped */
3760         if (unlikely(domain_context_mapped(pdev))) {
3761                 struct dmar_domain *old_domain;
3762
3763                 old_domain = find_domain(pdev);
3764                 if (old_domain) {
3765                         if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3766                             dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3767                                 domain_remove_one_dev_info(old_domain, pdev);
3768                         else
3769                                 domain_remove_dev_info(old_domain);
3770                 }
3771         }
3772
3773         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3774                                 pdev->devfn);
3775         if (!iommu)
3776                 return -ENODEV;
3777
3778         /* check if this iommu agaw is sufficient for max mapped address */
3779         addr_width = agaw_to_width(iommu->agaw);
3780         if (addr_width > cap_mgaw(iommu->cap))
3781                 addr_width = cap_mgaw(iommu->cap);
3782
3783         if (dmar_domain->max_addr > (1LL << addr_width)) {
3784                 printk(KERN_ERR "%s: iommu width (%d) is not "
3785                        "sufficient for the mapped address (%llx)\n",
3786                        __func__, addr_width, dmar_domain->max_addr);
3787                 return -EFAULT;
3788         }
3789         dmar_domain->gaw = addr_width;
3790
3791         /*
3792          * Knock out extra levels of page tables if necessary
3793          */
3794         while (iommu->agaw < dmar_domain->agaw) {
3795                 struct dma_pte *pte;
3796
3797                 pte = dmar_domain->pgd;
3798                 if (dma_pte_present(pte)) {
3799                         dmar_domain->pgd = (struct dma_pte *)
3800                                 phys_to_virt(dma_pte_addr(pte));
3801                         free_pgtable_page(pte);
3802                 }
3803                 dmar_domain->agaw--;
3804         }
3805
3806         return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
3807 }
3808
3809 static void intel_iommu_detach_device(struct iommu_domain *domain,
3810                                       struct device *dev)
3811 {
3812         struct dmar_domain *dmar_domain = domain->priv;
3813         struct pci_dev *pdev = to_pci_dev(dev);
3814
3815         domain_remove_one_dev_info(dmar_domain, pdev);
3816 }
3817
3818 static int intel_iommu_map(struct iommu_domain *domain,
3819                            unsigned long iova, phys_addr_t hpa,
3820                            int gfp_order, int iommu_prot)
3821 {
3822         struct dmar_domain *dmar_domain = domain->priv;
3823         u64 max_addr;
3824         int prot = 0;
3825         size_t size;
3826         int ret;
3827
3828         if (iommu_prot & IOMMU_READ)
3829                 prot |= DMA_PTE_READ;
3830         if (iommu_prot & IOMMU_WRITE)
3831                 prot |= DMA_PTE_WRITE;
3832         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3833                 prot |= DMA_PTE_SNP;
3834
3835         size     = PAGE_SIZE << gfp_order;
3836         max_addr = iova + size;
3837         if (dmar_domain->max_addr < max_addr) {
3838                 u64 end;
3839
3840                 /* check if minimum agaw is sufficient for mapped address */
3841                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
3842                 if (end < max_addr) {
3843                         printk(KERN_ERR "%s: iommu width (%d) is not "
3844                                "sufficient for the mapped address (%llx)\n",
3845                                __func__, dmar_domain->gaw, max_addr);
3846                         return -EFAULT;
3847                 }
3848                 dmar_domain->max_addr = max_addr;
3849         }
3850         /* Round up size to next multiple of PAGE_SIZE, if it and
3851            the low bits of hpa would take us onto the next page */
3852         size = aligned_nrpages(hpa, size);
3853         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3854                                  hpa >> VTD_PAGE_SHIFT, size, prot);
3855         return ret;
3856 }
3857
3858 static int intel_iommu_unmap(struct iommu_domain *domain,
3859                              unsigned long iova, int gfp_order)
3860 {
3861         struct dmar_domain *dmar_domain = domain->priv;
3862         size_t size = PAGE_SIZE << gfp_order;
3863
3864         dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3865                             (iova + size - 1) >> VTD_PAGE_SHIFT);
3866
3867         if (dmar_domain->max_addr == iova + size)
3868                 dmar_domain->max_addr = iova;
3869
3870         return gfp_order;
3871 }
3872
3873 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3874                                             unsigned long iova)
3875 {
3876         struct dmar_domain *dmar_domain = domain->priv;
3877         struct dma_pte *pte;
3878         u64 phys = 0;
3879
3880         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
3881         if (pte)
3882                 phys = dma_pte_addr(pte);
3883
3884         return phys;
3885 }
3886
3887 static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
3888                                       unsigned long cap)
3889 {
3890         struct dmar_domain *dmar_domain = domain->priv;
3891
3892         if (cap == IOMMU_CAP_CACHE_COHERENCY)
3893                 return dmar_domain->iommu_snooping;
3894         if (cap == IOMMU_CAP_INTR_REMAP)
3895                 return intr_remapping_enabled;
3896
3897         return 0;
3898 }
3899
3900 static struct iommu_ops intel_iommu_ops = {
3901         .domain_init    = intel_iommu_domain_init,
3902         .domain_destroy = intel_iommu_domain_destroy,
3903         .attach_dev     = intel_iommu_attach_device,
3904         .detach_dev     = intel_iommu_detach_device,
3905         .map            = intel_iommu_map,
3906         .unmap          = intel_iommu_unmap,
3907         .iova_to_phys   = intel_iommu_iova_to_phys,
3908         .domain_has_cap = intel_iommu_domain_has_cap,
3909 };
3910
3911 static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
3912 {
3913         /*
3914          * Mobile 4 Series Chipset neglects to set RWBF capability,
3915          * but needs it:
3916          */
3917         printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
3918         rwbf_quirk = 1;
3919
3920         /* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */
3921         if (dev->revision == 0x07) {
3922                 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
3923                 dmar_map_gfx = 0;
3924         }
3925 }
3926
3927 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
3928
3929 #define GGC 0x52
3930 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
3931 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
3932 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
3933 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
3934 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
3935 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
3936 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
3937 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
3938
3939 static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
3940 {
3941         unsigned short ggc;
3942
3943         if (pci_read_config_word(dev, GGC, &ggc))
3944                 return;
3945
3946         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
3947                 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
3948                 dmar_map_gfx = 0;
3949         }
3950 }
3951 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
3952 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
3953 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
3954 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
3955
3956 /* On Tylersburg chipsets, some BIOSes have been known to enable the
3957    ISOCH DMAR unit for the Azalia sound device, but not give it any
3958    TLB entries, which causes it to deadlock. Check for that.  We do
3959    this in a function called from init_dmars(), instead of in a PCI
3960    quirk, because we don't want to print the obnoxious "BIOS broken"
3961    message if VT-d is actually disabled.
3962 */
3963 static void __init check_tylersburg_isoch(void)
3964 {
3965         struct pci_dev *pdev;
3966         uint32_t vtisochctrl;
3967
3968         /* If there's no Azalia in the system anyway, forget it. */
3969         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
3970         if (!pdev)
3971                 return;
3972         pci_dev_put(pdev);
3973
3974         /* System Management Registers. Might be hidden, in which case
3975            we can't do the sanity check. But that's OK, because the
3976            known-broken BIOSes _don't_ actually hide it, so far. */
3977         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
3978         if (!pdev)
3979                 return;
3980
3981         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
3982                 pci_dev_put(pdev);
3983                 return;
3984         }
3985
3986         pci_dev_put(pdev);
3987
3988         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
3989         if (vtisochctrl & 1)
3990                 return;
3991
3992         /* Drop all bits other than the number of TLB entries */
3993         vtisochctrl &= 0x1c;
3994
3995         /* If we have the recommended number of TLB entries (16), fine. */
3996         if (vtisochctrl == 0x10)
3997                 return;
3998
3999         /* Zero TLB entries? You get to ride the short bus to school. */
4000         if (!vtisochctrl) {
4001                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4002                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4003                      dmi_get_system_info(DMI_BIOS_VENDOR),
4004                      dmi_get_system_info(DMI_BIOS_VERSION),
4005                      dmi_get_system_info(DMI_PRODUCT_VERSION));
4006                 iommu_identity_mapping |= IDENTMAP_AZALIA;
4007                 return;
4008         }
4009         
4010         printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4011                vtisochctrl);
4012 }