lockdep: Fix trace_[soft,hard]irqs_[on,off]() recursion
[pandora-kernel.git] / drivers / iommu / intel-iommu.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  * Author: Fenghua Yu <fenghua.yu@intel.com>
22  */
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/iommu.h>
38 #include <linux/intel-iommu.h>
39 #include <linux/syscore_ops.h>
40 #include <linux/tboot.h>
41 #include <linux/dmi.h>
42 #include <linux/pci-ats.h>
43 #include <asm/cacheflush.h>
44 #include <asm/iommu.h>
45
46 #define ROOT_SIZE               VTD_PAGE_SIZE
47 #define CONTEXT_SIZE            VTD_PAGE_SIZE
48
49 #define IS_BRIDGE_HOST_DEVICE(pdev) \
50                             ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
51 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
52 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
53 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
54
55 #define IOAPIC_RANGE_START      (0xfee00000)
56 #define IOAPIC_RANGE_END        (0xfeefffff)
57 #define IOVA_START_ADDR         (0x1000)
58
59 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
60
61 #define MAX_AGAW_WIDTH 64
62
63 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
64 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
65
66 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
67    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
68 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
69                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
70 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
71
72 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
73 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
74 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
75
76 /* page table handling */
77 #define LEVEL_STRIDE            (9)
78 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
79
80 static inline int agaw_to_level(int agaw)
81 {
82         return agaw + 2;
83 }
84
85 static inline int agaw_to_width(int agaw)
86 {
87         return 30 + agaw * LEVEL_STRIDE;
88 }
89
90 static inline int width_to_agaw(int width)
91 {
92         return (width - 30) / LEVEL_STRIDE;
93 }
94
95 static inline unsigned int level_to_offset_bits(int level)
96 {
97         return (level - 1) * LEVEL_STRIDE;
98 }
99
100 static inline int pfn_level_offset(unsigned long pfn, int level)
101 {
102         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
103 }
104
105 static inline unsigned long level_mask(int level)
106 {
107         return -1UL << level_to_offset_bits(level);
108 }
109
110 static inline unsigned long level_size(int level)
111 {
112         return 1UL << level_to_offset_bits(level);
113 }
114
115 static inline unsigned long align_to_level(unsigned long pfn, int level)
116 {
117         return (pfn + level_size(level) - 1) & level_mask(level);
118 }
119
120 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
121 {
122         return  1 << ((lvl - 1) * LEVEL_STRIDE);
123 }
124
125 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
126    are never going to work. */
127 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
128 {
129         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
130 }
131
132 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
133 {
134         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
135 }
136 static inline unsigned long page_to_dma_pfn(struct page *pg)
137 {
138         return mm_to_dma_pfn(page_to_pfn(pg));
139 }
140 static inline unsigned long virt_to_dma_pfn(void *p)
141 {
142         return page_to_dma_pfn(virt_to_page(p));
143 }
144
145 /* global iommu list, set NULL for ignored DMAR units */
146 static struct intel_iommu **g_iommus;
147
148 static void __init check_tylersburg_isoch(void);
149 static int rwbf_quirk;
150
151 /*
152  * set to 1 to panic kernel if can't successfully enable VT-d
153  * (used when kernel is launched w/ TXT)
154  */
155 static int force_on = 0;
156
157 /*
158  * 0: Present
159  * 1-11: Reserved
160  * 12-63: Context Ptr (12 - (haw-1))
161  * 64-127: Reserved
162  */
163 struct root_entry {
164         u64     val;
165         u64     rsvd1;
166 };
167 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
168 static inline bool root_present(struct root_entry *root)
169 {
170         return (root->val & 1);
171 }
172 static inline void set_root_present(struct root_entry *root)
173 {
174         root->val |= 1;
175 }
176 static inline void set_root_value(struct root_entry *root, unsigned long value)
177 {
178         root->val |= value & VTD_PAGE_MASK;
179 }
180
181 static inline struct context_entry *
182 get_context_addr_from_root(struct root_entry *root)
183 {
184         return (struct context_entry *)
185                 (root_present(root)?phys_to_virt(
186                 root->val & VTD_PAGE_MASK) :
187                 NULL);
188 }
189
190 /*
191  * low 64 bits:
192  * 0: present
193  * 1: fault processing disable
194  * 2-3: translation type
195  * 12-63: address space root
196  * high 64 bits:
197  * 0-2: address width
198  * 3-6: aval
199  * 8-23: domain id
200  */
201 struct context_entry {
202         u64 lo;
203         u64 hi;
204 };
205
206 static inline bool context_present(struct context_entry *context)
207 {
208         return (context->lo & 1);
209 }
210 static inline void context_set_present(struct context_entry *context)
211 {
212         context->lo |= 1;
213 }
214
215 static inline void context_set_fault_enable(struct context_entry *context)
216 {
217         context->lo &= (((u64)-1) << 2) | 1;
218 }
219
220 static inline void context_set_translation_type(struct context_entry *context,
221                                                 unsigned long value)
222 {
223         context->lo &= (((u64)-1) << 4) | 3;
224         context->lo |= (value & 3) << 2;
225 }
226
227 static inline void context_set_address_root(struct context_entry *context,
228                                             unsigned long value)
229 {
230         context->lo |= value & VTD_PAGE_MASK;
231 }
232
233 static inline void context_set_address_width(struct context_entry *context,
234                                              unsigned long value)
235 {
236         context->hi |= value & 7;
237 }
238
239 static inline void context_set_domain_id(struct context_entry *context,
240                                          unsigned long value)
241 {
242         context->hi |= (value & ((1 << 16) - 1)) << 8;
243 }
244
245 static inline void context_clear_entry(struct context_entry *context)
246 {
247         context->lo = 0;
248         context->hi = 0;
249 }
250
251 /*
252  * 0: readable
253  * 1: writable
254  * 2-6: reserved
255  * 7: super page
256  * 8-10: available
257  * 11: snoop behavior
258  * 12-63: Host physcial address
259  */
260 struct dma_pte {
261         u64 val;
262 };
263
264 static inline void dma_clear_pte(struct dma_pte *pte)
265 {
266         pte->val = 0;
267 }
268
269 static inline void dma_set_pte_readable(struct dma_pte *pte)
270 {
271         pte->val |= DMA_PTE_READ;
272 }
273
274 static inline void dma_set_pte_writable(struct dma_pte *pte)
275 {
276         pte->val |= DMA_PTE_WRITE;
277 }
278
279 static inline void dma_set_pte_snp(struct dma_pte *pte)
280 {
281         pte->val |= DMA_PTE_SNP;
282 }
283
284 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
285 {
286         pte->val = (pte->val & ~3) | (prot & 3);
287 }
288
289 static inline u64 dma_pte_addr(struct dma_pte *pte)
290 {
291 #ifdef CONFIG_64BIT
292         return pte->val & VTD_PAGE_MASK;
293 #else
294         /* Must have a full atomic 64-bit read */
295         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
296 #endif
297 }
298
299 static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
300 {
301         pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
302 }
303
304 static inline bool dma_pte_present(struct dma_pte *pte)
305 {
306         return (pte->val & 3) != 0;
307 }
308
309 static inline int first_pte_in_page(struct dma_pte *pte)
310 {
311         return !((unsigned long)pte & ~VTD_PAGE_MASK);
312 }
313
314 /*
315  * This domain is a statically identity mapping domain.
316  *      1. This domain creats a static 1:1 mapping to all usable memory.
317  *      2. It maps to each iommu if successful.
318  *      3. Each iommu mapps to this domain if successful.
319  */
320 static struct dmar_domain *si_domain;
321 static int hw_pass_through = 1;
322
323 /* devices under the same p2p bridge are owned in one domain */
324 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
325
326 /* domain represents a virtual machine, more than one devices
327  * across iommus may be owned in one domain, e.g. kvm guest.
328  */
329 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 1)
330
331 /* si_domain contains mulitple devices */
332 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 2)
333
334 struct dmar_domain {
335         int     id;                     /* domain id */
336         int     nid;                    /* node id */
337         unsigned long iommu_bmp;        /* bitmap of iommus this domain uses*/
338
339         struct list_head devices;       /* all devices' list */
340         struct iova_domain iovad;       /* iova's that belong to this domain */
341
342         struct dma_pte  *pgd;           /* virtual address */
343         int             gaw;            /* max guest address width */
344
345         /* adjusted guest address width, 0 is level 2 30-bit */
346         int             agaw;
347
348         int             flags;          /* flags to find out type of domain */
349
350         int             iommu_coherency;/* indicate coherency of iommu access */
351         int             iommu_snooping; /* indicate snooping control feature*/
352         int             iommu_count;    /* reference count of iommu */
353         int             iommu_superpage;/* Level of superpages supported:
354                                            0 == 4KiB (no superpages), 1 == 2MiB,
355                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
356         spinlock_t      iommu_lock;     /* protect iommu set in domain */
357         u64             max_addr;       /* maximum mapped address */
358 };
359
360 /* PCI domain-device relationship */
361 struct device_domain_info {
362         struct list_head link;  /* link to domain siblings */
363         struct list_head global; /* link to global list */
364         int segment;            /* PCI domain */
365         u8 bus;                 /* PCI bus number */
366         u8 devfn;               /* PCI devfn number */
367         struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
368         struct intel_iommu *iommu; /* IOMMU used by this device */
369         struct dmar_domain *domain; /* pointer to domain */
370 };
371
372 static void flush_unmaps_timeout(unsigned long data);
373
374 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
375
376 #define HIGH_WATER_MARK 250
377 struct deferred_flush_tables {
378         int next;
379         struct iova *iova[HIGH_WATER_MARK];
380         struct dmar_domain *domain[HIGH_WATER_MARK];
381 };
382
383 static struct deferred_flush_tables *deferred_flush;
384
385 /* bitmap for indexing intel_iommus */
386 static int g_num_of_iommus;
387
388 static DEFINE_SPINLOCK(async_umap_flush_lock);
389 static LIST_HEAD(unmaps_to_do);
390
391 static int timer_on;
392 static long list_size;
393
394 static void domain_remove_dev_info(struct dmar_domain *domain);
395
396 #ifdef CONFIG_DMAR_DEFAULT_ON
397 int dmar_disabled = 0;
398 #else
399 int dmar_disabled = 1;
400 #endif /*CONFIG_DMAR_DEFAULT_ON*/
401
402 static int dmar_map_gfx = 1;
403 static int dmar_forcedac;
404 static int intel_iommu_strict;
405 static int intel_iommu_superpage = 1;
406
407 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
408 static DEFINE_SPINLOCK(device_domain_lock);
409 static LIST_HEAD(device_domain_list);
410
411 static struct iommu_ops intel_iommu_ops;
412
413 static int __init intel_iommu_setup(char *str)
414 {
415         if (!str)
416                 return -EINVAL;
417         while (*str) {
418                 if (!strncmp(str, "on", 2)) {
419                         dmar_disabled = 0;
420                         printk(KERN_INFO "Intel-IOMMU: enabled\n");
421                 } else if (!strncmp(str, "off", 3)) {
422                         dmar_disabled = 1;
423                         printk(KERN_INFO "Intel-IOMMU: disabled\n");
424                 } else if (!strncmp(str, "igfx_off", 8)) {
425                         dmar_map_gfx = 0;
426                         printk(KERN_INFO
427                                 "Intel-IOMMU: disable GFX device mapping\n");
428                 } else if (!strncmp(str, "forcedac", 8)) {
429                         printk(KERN_INFO
430                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
431                         dmar_forcedac = 1;
432                 } else if (!strncmp(str, "strict", 6)) {
433                         printk(KERN_INFO
434                                 "Intel-IOMMU: disable batched IOTLB flush\n");
435                         intel_iommu_strict = 1;
436                 } else if (!strncmp(str, "sp_off", 6)) {
437                         printk(KERN_INFO
438                                 "Intel-IOMMU: disable supported super page\n");
439                         intel_iommu_superpage = 0;
440                 }
441
442                 str += strcspn(str, ",");
443                 while (*str == ',')
444                         str++;
445         }
446         return 0;
447 }
448 __setup("intel_iommu=", intel_iommu_setup);
449
450 static struct kmem_cache *iommu_domain_cache;
451 static struct kmem_cache *iommu_devinfo_cache;
452 static struct kmem_cache *iommu_iova_cache;
453
454 static inline void *alloc_pgtable_page(int node)
455 {
456         struct page *page;
457         void *vaddr = NULL;
458
459         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
460         if (page)
461                 vaddr = page_address(page);
462         return vaddr;
463 }
464
465 static inline void free_pgtable_page(void *vaddr)
466 {
467         free_page((unsigned long)vaddr);
468 }
469
470 static inline void *alloc_domain_mem(void)
471 {
472         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
473 }
474
475 static void free_domain_mem(void *vaddr)
476 {
477         kmem_cache_free(iommu_domain_cache, vaddr);
478 }
479
480 static inline void * alloc_devinfo_mem(void)
481 {
482         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
483 }
484
485 static inline void free_devinfo_mem(void *vaddr)
486 {
487         kmem_cache_free(iommu_devinfo_cache, vaddr);
488 }
489
490 struct iova *alloc_iova_mem(void)
491 {
492         return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
493 }
494
495 void free_iova_mem(struct iova *iova)
496 {
497         kmem_cache_free(iommu_iova_cache, iova);
498 }
499
500
501 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
502 {
503         unsigned long sagaw;
504         int agaw = -1;
505
506         sagaw = cap_sagaw(iommu->cap);
507         for (agaw = width_to_agaw(max_gaw);
508              agaw >= 0; agaw--) {
509                 if (test_bit(agaw, &sagaw))
510                         break;
511         }
512
513         return agaw;
514 }
515
516 /*
517  * Calculate max SAGAW for each iommu.
518  */
519 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
520 {
521         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
522 }
523
524 /*
525  * calculate agaw for each iommu.
526  * "SAGAW" may be different across iommus, use a default agaw, and
527  * get a supported less agaw for iommus that don't support the default agaw.
528  */
529 int iommu_calculate_agaw(struct intel_iommu *iommu)
530 {
531         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
532 }
533
534 /* This functionin only returns single iommu in a domain */
535 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
536 {
537         int iommu_id;
538
539         /* si_domain and vm domain should not get here. */
540         BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
541         BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
542
543         iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
544         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
545                 return NULL;
546
547         return g_iommus[iommu_id];
548 }
549
550 static void domain_update_iommu_coherency(struct dmar_domain *domain)
551 {
552         int i;
553
554         domain->iommu_coherency = 1;
555
556         for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
557                 if (!ecap_coherent(g_iommus[i]->ecap)) {
558                         domain->iommu_coherency = 0;
559                         break;
560                 }
561         }
562 }
563
564 static void domain_update_iommu_snooping(struct dmar_domain *domain)
565 {
566         int i;
567
568         domain->iommu_snooping = 1;
569
570         for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
571                 if (!ecap_sc_support(g_iommus[i]->ecap)) {
572                         domain->iommu_snooping = 0;
573                         break;
574                 }
575         }
576 }
577
578 static void domain_update_iommu_superpage(struct dmar_domain *domain)
579 {
580         int i, mask = 0xf;
581
582         if (!intel_iommu_superpage) {
583                 domain->iommu_superpage = 0;
584                 return;
585         }
586
587         domain->iommu_superpage = 4; /* 1TiB */
588
589         for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
590                 mask |= cap_super_page_val(g_iommus[i]->cap);
591                 if (!mask) {
592                         break;
593                 }
594         }
595         domain->iommu_superpage = fls(mask);
596 }
597
598 /* Some capabilities may be different across iommus */
599 static void domain_update_iommu_cap(struct dmar_domain *domain)
600 {
601         domain_update_iommu_coherency(domain);
602         domain_update_iommu_snooping(domain);
603         domain_update_iommu_superpage(domain);
604 }
605
606 static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
607 {
608         struct dmar_drhd_unit *drhd = NULL;
609         int i;
610
611         for_each_drhd_unit(drhd) {
612                 if (drhd->ignored)
613                         continue;
614                 if (segment != drhd->segment)
615                         continue;
616
617                 for (i = 0; i < drhd->devices_cnt; i++) {
618                         if (drhd->devices[i] &&
619                             drhd->devices[i]->bus->number == bus &&
620                             drhd->devices[i]->devfn == devfn)
621                                 return drhd->iommu;
622                         if (drhd->devices[i] &&
623                             drhd->devices[i]->subordinate &&
624                             drhd->devices[i]->subordinate->number <= bus &&
625                             drhd->devices[i]->subordinate->subordinate >= bus)
626                                 return drhd->iommu;
627                 }
628
629                 if (drhd->include_all)
630                         return drhd->iommu;
631         }
632
633         return NULL;
634 }
635
636 static void domain_flush_cache(struct dmar_domain *domain,
637                                void *addr, int size)
638 {
639         if (!domain->iommu_coherency)
640                 clflush_cache_range(addr, size);
641 }
642
643 /* Gets context entry for a given bus and devfn */
644 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
645                 u8 bus, u8 devfn)
646 {
647         struct root_entry *root;
648         struct context_entry *context;
649         unsigned long phy_addr;
650         unsigned long flags;
651
652         spin_lock_irqsave(&iommu->lock, flags);
653         root = &iommu->root_entry[bus];
654         context = get_context_addr_from_root(root);
655         if (!context) {
656                 context = (struct context_entry *)
657                                 alloc_pgtable_page(iommu->node);
658                 if (!context) {
659                         spin_unlock_irqrestore(&iommu->lock, flags);
660                         return NULL;
661                 }
662                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
663                 phy_addr = virt_to_phys((void *)context);
664                 set_root_value(root, phy_addr);
665                 set_root_present(root);
666                 __iommu_flush_cache(iommu, root, sizeof(*root));
667         }
668         spin_unlock_irqrestore(&iommu->lock, flags);
669         return &context[devfn];
670 }
671
672 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
673 {
674         struct root_entry *root;
675         struct context_entry *context;
676         int ret;
677         unsigned long flags;
678
679         spin_lock_irqsave(&iommu->lock, flags);
680         root = &iommu->root_entry[bus];
681         context = get_context_addr_from_root(root);
682         if (!context) {
683                 ret = 0;
684                 goto out;
685         }
686         ret = context_present(&context[devfn]);
687 out:
688         spin_unlock_irqrestore(&iommu->lock, flags);
689         return ret;
690 }
691
692 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
693 {
694         struct root_entry *root;
695         struct context_entry *context;
696         unsigned long flags;
697
698         spin_lock_irqsave(&iommu->lock, flags);
699         root = &iommu->root_entry[bus];
700         context = get_context_addr_from_root(root);
701         if (context) {
702                 context_clear_entry(&context[devfn]);
703                 __iommu_flush_cache(iommu, &context[devfn], \
704                         sizeof(*context));
705         }
706         spin_unlock_irqrestore(&iommu->lock, flags);
707 }
708
709 static void free_context_table(struct intel_iommu *iommu)
710 {
711         struct root_entry *root;
712         int i;
713         unsigned long flags;
714         struct context_entry *context;
715
716         spin_lock_irqsave(&iommu->lock, flags);
717         if (!iommu->root_entry) {
718                 goto out;
719         }
720         for (i = 0; i < ROOT_ENTRY_NR; i++) {
721                 root = &iommu->root_entry[i];
722                 context = get_context_addr_from_root(root);
723                 if (context)
724                         free_pgtable_page(context);
725         }
726         free_pgtable_page(iommu->root_entry);
727         iommu->root_entry = NULL;
728 out:
729         spin_unlock_irqrestore(&iommu->lock, flags);
730 }
731
732 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
733                                       unsigned long pfn, int large_level)
734 {
735         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
736         struct dma_pte *parent, *pte = NULL;
737         int level = agaw_to_level(domain->agaw);
738         int offset, target_level;
739
740         BUG_ON(!domain->pgd);
741         BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
742         parent = domain->pgd;
743
744         /* Search pte */
745         if (!large_level)
746                 target_level = 1;
747         else
748                 target_level = large_level;
749
750         while (level > 0) {
751                 void *tmp_page;
752
753                 offset = pfn_level_offset(pfn, level);
754                 pte = &parent[offset];
755                 if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
756                         break;
757                 if (level == target_level)
758                         break;
759
760                 if (!dma_pte_present(pte)) {
761                         uint64_t pteval;
762
763                         tmp_page = alloc_pgtable_page(domain->nid);
764
765                         if (!tmp_page)
766                                 return NULL;
767
768                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
769                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
770                         if (cmpxchg64(&pte->val, 0ULL, pteval)) {
771                                 /* Someone else set it while we were thinking; use theirs. */
772                                 free_pgtable_page(tmp_page);
773                         } else {
774                                 dma_pte_addr(pte);
775                                 domain_flush_cache(domain, pte, sizeof(*pte));
776                         }
777                 }
778                 parent = phys_to_virt(dma_pte_addr(pte));
779                 level--;
780         }
781
782         return pte;
783 }
784
785
786 /* return address's pte at specific level */
787 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
788                                          unsigned long pfn,
789                                          int level, int *large_page)
790 {
791         struct dma_pte *parent, *pte = NULL;
792         int total = agaw_to_level(domain->agaw);
793         int offset;
794
795         parent = domain->pgd;
796         while (level <= total) {
797                 offset = pfn_level_offset(pfn, total);
798                 pte = &parent[offset];
799                 if (level == total)
800                         return pte;
801
802                 if (!dma_pte_present(pte)) {
803                         *large_page = total;
804                         break;
805                 }
806
807                 if (pte->val & DMA_PTE_LARGE_PAGE) {
808                         *large_page = total;
809                         return pte;
810                 }
811
812                 parent = phys_to_virt(dma_pte_addr(pte));
813                 total--;
814         }
815         return NULL;
816 }
817
818 /* clear last level pte, a tlb flush should be followed */
819 static void dma_pte_clear_range(struct dmar_domain *domain,
820                                 unsigned long start_pfn,
821                                 unsigned long last_pfn)
822 {
823         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
824         unsigned int large_page = 1;
825         struct dma_pte *first_pte, *pte;
826
827         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
828         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
829         BUG_ON(start_pfn > last_pfn);
830
831         /* we don't need lock here; nobody else touches the iova range */
832         do {
833                 large_page = 1;
834                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
835                 if (!pte) {
836                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
837                         continue;
838                 }
839                 do {
840                         dma_clear_pte(pte);
841                         start_pfn += lvl_to_nr_pages(large_page);
842                         pte++;
843                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
844
845                 domain_flush_cache(domain, first_pte,
846                                    (void *)pte - (void *)first_pte);
847
848         } while (start_pfn && start_pfn <= last_pfn);
849 }
850
851 /* free page table pages. last level pte should already be cleared */
852 static void dma_pte_free_pagetable(struct dmar_domain *domain,
853                                    unsigned long start_pfn,
854                                    unsigned long last_pfn)
855 {
856         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
857         struct dma_pte *first_pte, *pte;
858         int total = agaw_to_level(domain->agaw);
859         int level;
860         unsigned long tmp;
861         int large_page = 2;
862
863         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
864         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
865         BUG_ON(start_pfn > last_pfn);
866
867         /* We don't need lock here; nobody else touches the iova range */
868         level = 2;
869         while (level <= total) {
870                 tmp = align_to_level(start_pfn, level);
871
872                 /* If we can't even clear one PTE at this level, we're done */
873                 if (tmp + level_size(level) - 1 > last_pfn)
874                         return;
875
876                 do {
877                         large_page = level;
878                         first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
879                         if (large_page > level)
880                                 level = large_page + 1;
881                         if (!pte) {
882                                 tmp = align_to_level(tmp + 1, level + 1);
883                                 continue;
884                         }
885                         do {
886                                 if (dma_pte_present(pte)) {
887                                         free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
888                                         dma_clear_pte(pte);
889                                 }
890                                 pte++;
891                                 tmp += level_size(level);
892                         } while (!first_pte_in_page(pte) &&
893                                  tmp + level_size(level) - 1 <= last_pfn);
894
895                         domain_flush_cache(domain, first_pte,
896                                            (void *)pte - (void *)first_pte);
897                         
898                 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
899                 level++;
900         }
901         /* free pgd */
902         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
903                 free_pgtable_page(domain->pgd);
904                 domain->pgd = NULL;
905         }
906 }
907
908 /* iommu handling */
909 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
910 {
911         struct root_entry *root;
912         unsigned long flags;
913
914         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
915         if (!root)
916                 return -ENOMEM;
917
918         __iommu_flush_cache(iommu, root, ROOT_SIZE);
919
920         spin_lock_irqsave(&iommu->lock, flags);
921         iommu->root_entry = root;
922         spin_unlock_irqrestore(&iommu->lock, flags);
923
924         return 0;
925 }
926
927 static void iommu_set_root_entry(struct intel_iommu *iommu)
928 {
929         void *addr;
930         u32 sts;
931         unsigned long flag;
932
933         addr = iommu->root_entry;
934
935         spin_lock_irqsave(&iommu->register_lock, flag);
936         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
937
938         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
939
940         /* Make sure hardware complete it */
941         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
942                       readl, (sts & DMA_GSTS_RTPS), sts);
943
944         spin_unlock_irqrestore(&iommu->register_lock, flag);
945 }
946
947 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
948 {
949         u32 val;
950         unsigned long flag;
951
952         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
953                 return;
954
955         spin_lock_irqsave(&iommu->register_lock, flag);
956         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
957
958         /* Make sure hardware complete it */
959         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
960                       readl, (!(val & DMA_GSTS_WBFS)), val);
961
962         spin_unlock_irqrestore(&iommu->register_lock, flag);
963 }
964
965 /* return value determine if we need a write buffer flush */
966 static void __iommu_flush_context(struct intel_iommu *iommu,
967                                   u16 did, u16 source_id, u8 function_mask,
968                                   u64 type)
969 {
970         u64 val = 0;
971         unsigned long flag;
972
973         switch (type) {
974         case DMA_CCMD_GLOBAL_INVL:
975                 val = DMA_CCMD_GLOBAL_INVL;
976                 break;
977         case DMA_CCMD_DOMAIN_INVL:
978                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
979                 break;
980         case DMA_CCMD_DEVICE_INVL:
981                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
982                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
983                 break;
984         default:
985                 BUG();
986         }
987         val |= DMA_CCMD_ICC;
988
989         spin_lock_irqsave(&iommu->register_lock, flag);
990         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
991
992         /* Make sure hardware complete it */
993         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
994                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
995
996         spin_unlock_irqrestore(&iommu->register_lock, flag);
997 }
998
999 /* return value determine if we need a write buffer flush */
1000 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1001                                 u64 addr, unsigned int size_order, u64 type)
1002 {
1003         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1004         u64 val = 0, val_iva = 0;
1005         unsigned long flag;
1006
1007         switch (type) {
1008         case DMA_TLB_GLOBAL_FLUSH:
1009                 /* global flush doesn't need set IVA_REG */
1010                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1011                 break;
1012         case DMA_TLB_DSI_FLUSH:
1013                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1014                 break;
1015         case DMA_TLB_PSI_FLUSH:
1016                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1017                 /* Note: always flush non-leaf currently */
1018                 val_iva = size_order | addr;
1019                 break;
1020         default:
1021                 BUG();
1022         }
1023         /* Note: set drain read/write */
1024 #if 0
1025         /*
1026          * This is probably to be super secure.. Looks like we can
1027          * ignore it without any impact.
1028          */
1029         if (cap_read_drain(iommu->cap))
1030                 val |= DMA_TLB_READ_DRAIN;
1031 #endif
1032         if (cap_write_drain(iommu->cap))
1033                 val |= DMA_TLB_WRITE_DRAIN;
1034
1035         spin_lock_irqsave(&iommu->register_lock, flag);
1036         /* Note: Only uses first TLB reg currently */
1037         if (val_iva)
1038                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1039         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1040
1041         /* Make sure hardware complete it */
1042         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1043                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1044
1045         spin_unlock_irqrestore(&iommu->register_lock, flag);
1046
1047         /* check IOTLB invalidation granularity */
1048         if (DMA_TLB_IAIG(val) == 0)
1049                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1050         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1051                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1052                         (unsigned long long)DMA_TLB_IIRG(type),
1053                         (unsigned long long)DMA_TLB_IAIG(val));
1054 }
1055
1056 static struct device_domain_info *iommu_support_dev_iotlb(
1057         struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1058 {
1059         int found = 0;
1060         unsigned long flags;
1061         struct device_domain_info *info;
1062         struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1063
1064         if (!ecap_dev_iotlb_support(iommu->ecap))
1065                 return NULL;
1066
1067         if (!iommu->qi)
1068                 return NULL;
1069
1070         spin_lock_irqsave(&device_domain_lock, flags);
1071         list_for_each_entry(info, &domain->devices, link)
1072                 if (info->bus == bus && info->devfn == devfn) {
1073                         found = 1;
1074                         break;
1075                 }
1076         spin_unlock_irqrestore(&device_domain_lock, flags);
1077
1078         if (!found || !info->dev)
1079                 return NULL;
1080
1081         if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1082                 return NULL;
1083
1084         if (!dmar_find_matched_atsr_unit(info->dev))
1085                 return NULL;
1086
1087         info->iommu = iommu;
1088
1089         return info;
1090 }
1091
1092 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1093 {
1094         if (!info)
1095                 return;
1096
1097         pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1098 }
1099
1100 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1101 {
1102         if (!info->dev || !pci_ats_enabled(info->dev))
1103                 return;
1104
1105         pci_disable_ats(info->dev);
1106 }
1107
1108 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1109                                   u64 addr, unsigned mask)
1110 {
1111         u16 sid, qdep;
1112         unsigned long flags;
1113         struct device_domain_info *info;
1114
1115         spin_lock_irqsave(&device_domain_lock, flags);
1116         list_for_each_entry(info, &domain->devices, link) {
1117                 if (!info->dev || !pci_ats_enabled(info->dev))
1118                         continue;
1119
1120                 sid = info->bus << 8 | info->devfn;
1121                 qdep = pci_ats_queue_depth(info->dev);
1122                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1123         }
1124         spin_unlock_irqrestore(&device_domain_lock, flags);
1125 }
1126
1127 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1128                                   unsigned long pfn, unsigned int pages, int map)
1129 {
1130         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1131         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1132
1133         BUG_ON(pages == 0);
1134
1135         /*
1136          * Fallback to domain selective flush if no PSI support or the size is
1137          * too big.
1138          * PSI requires page size to be 2 ^ x, and the base address is naturally
1139          * aligned to the size
1140          */
1141         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1142                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1143                                                 DMA_TLB_DSI_FLUSH);
1144         else
1145                 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1146                                                 DMA_TLB_PSI_FLUSH);
1147
1148         /*
1149          * In caching mode, changes of pages from non-present to present require
1150          * flush. However, device IOTLB doesn't need to be flushed in this case.
1151          */
1152         if (!cap_caching_mode(iommu->cap) || !map)
1153                 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1154 }
1155
1156 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1157 {
1158         u32 pmen;
1159         unsigned long flags;
1160
1161         spin_lock_irqsave(&iommu->register_lock, flags);
1162         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1163         pmen &= ~DMA_PMEN_EPM;
1164         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1165
1166         /* wait for the protected region status bit to clear */
1167         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1168                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1169
1170         spin_unlock_irqrestore(&iommu->register_lock, flags);
1171 }
1172
1173 static int iommu_enable_translation(struct intel_iommu *iommu)
1174 {
1175         u32 sts;
1176         unsigned long flags;
1177
1178         spin_lock_irqsave(&iommu->register_lock, flags);
1179         iommu->gcmd |= DMA_GCMD_TE;
1180         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1181
1182         /* Make sure hardware complete it */
1183         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1184                       readl, (sts & DMA_GSTS_TES), sts);
1185
1186         spin_unlock_irqrestore(&iommu->register_lock, flags);
1187         return 0;
1188 }
1189
1190 static int iommu_disable_translation(struct intel_iommu *iommu)
1191 {
1192         u32 sts;
1193         unsigned long flag;
1194
1195         spin_lock_irqsave(&iommu->register_lock, flag);
1196         iommu->gcmd &= ~DMA_GCMD_TE;
1197         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1198
1199         /* Make sure hardware complete it */
1200         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1201                       readl, (!(sts & DMA_GSTS_TES)), sts);
1202
1203         spin_unlock_irqrestore(&iommu->register_lock, flag);
1204         return 0;
1205 }
1206
1207
1208 static int iommu_init_domains(struct intel_iommu *iommu)
1209 {
1210         unsigned long ndomains;
1211         unsigned long nlongs;
1212
1213         ndomains = cap_ndoms(iommu->cap);
1214         pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu->seq_id,
1215                         ndomains);
1216         nlongs = BITS_TO_LONGS(ndomains);
1217
1218         spin_lock_init(&iommu->lock);
1219
1220         /* TBD: there might be 64K domains,
1221          * consider other allocation for future chip
1222          */
1223         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1224         if (!iommu->domain_ids) {
1225                 printk(KERN_ERR "Allocating domain id array failed\n");
1226                 return -ENOMEM;
1227         }
1228         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1229                         GFP_KERNEL);
1230         if (!iommu->domains) {
1231                 printk(KERN_ERR "Allocating domain array failed\n");
1232                 return -ENOMEM;
1233         }
1234
1235         /*
1236          * if Caching mode is set, then invalid translations are tagged
1237          * with domainid 0. Hence we need to pre-allocate it.
1238          */
1239         if (cap_caching_mode(iommu->cap))
1240                 set_bit(0, iommu->domain_ids);
1241         return 0;
1242 }
1243
1244
1245 static void domain_exit(struct dmar_domain *domain);
1246 static void vm_domain_exit(struct dmar_domain *domain);
1247
1248 void free_dmar_iommu(struct intel_iommu *iommu)
1249 {
1250         struct dmar_domain *domain;
1251         int i;
1252         unsigned long flags;
1253
1254         if ((iommu->domains) && (iommu->domain_ids)) {
1255                 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1256                         domain = iommu->domains[i];
1257                         clear_bit(i, iommu->domain_ids);
1258
1259                         spin_lock_irqsave(&domain->iommu_lock, flags);
1260                         if (--domain->iommu_count == 0) {
1261                                 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1262                                         vm_domain_exit(domain);
1263                                 else
1264                                         domain_exit(domain);
1265                         }
1266                         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1267                 }
1268         }
1269
1270         if (iommu->gcmd & DMA_GCMD_TE)
1271                 iommu_disable_translation(iommu);
1272
1273         if (iommu->irq) {
1274                 irq_set_handler_data(iommu->irq, NULL);
1275                 /* This will mask the irq */
1276                 free_irq(iommu->irq, iommu);
1277                 destroy_irq(iommu->irq);
1278         }
1279
1280         kfree(iommu->domains);
1281         kfree(iommu->domain_ids);
1282
1283         g_iommus[iommu->seq_id] = NULL;
1284
1285         /* if all iommus are freed, free g_iommus */
1286         for (i = 0; i < g_num_of_iommus; i++) {
1287                 if (g_iommus[i])
1288                         break;
1289         }
1290
1291         if (i == g_num_of_iommus)
1292                 kfree(g_iommus);
1293
1294         /* free context mapping */
1295         free_context_table(iommu);
1296 }
1297
1298 static struct dmar_domain *alloc_domain(void)
1299 {
1300         struct dmar_domain *domain;
1301
1302         domain = alloc_domain_mem();
1303         if (!domain)
1304                 return NULL;
1305
1306         domain->nid = -1;
1307         memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1308         domain->flags = 0;
1309
1310         return domain;
1311 }
1312
1313 static int iommu_attach_domain(struct dmar_domain *domain,
1314                                struct intel_iommu *iommu)
1315 {
1316         int num;
1317         unsigned long ndomains;
1318         unsigned long flags;
1319
1320         ndomains = cap_ndoms(iommu->cap);
1321
1322         spin_lock_irqsave(&iommu->lock, flags);
1323
1324         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1325         if (num >= ndomains) {
1326                 spin_unlock_irqrestore(&iommu->lock, flags);
1327                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1328                 return -ENOMEM;
1329         }
1330
1331         domain->id = num;
1332         set_bit(num, iommu->domain_ids);
1333         set_bit(iommu->seq_id, &domain->iommu_bmp);
1334         iommu->domains[num] = domain;
1335         spin_unlock_irqrestore(&iommu->lock, flags);
1336
1337         return 0;
1338 }
1339
1340 static void iommu_detach_domain(struct dmar_domain *domain,
1341                                 struct intel_iommu *iommu)
1342 {
1343         unsigned long flags;
1344         int num, ndomains;
1345         int found = 0;
1346
1347         spin_lock_irqsave(&iommu->lock, flags);
1348         ndomains = cap_ndoms(iommu->cap);
1349         for_each_set_bit(num, iommu->domain_ids, ndomains) {
1350                 if (iommu->domains[num] == domain) {
1351                         found = 1;
1352                         break;
1353                 }
1354         }
1355
1356         if (found) {
1357                 clear_bit(num, iommu->domain_ids);
1358                 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1359                 iommu->domains[num] = NULL;
1360         }
1361         spin_unlock_irqrestore(&iommu->lock, flags);
1362 }
1363
1364 static struct iova_domain reserved_iova_list;
1365 static struct lock_class_key reserved_rbtree_key;
1366
1367 static int dmar_init_reserved_ranges(void)
1368 {
1369         struct pci_dev *pdev = NULL;
1370         struct iova *iova;
1371         int i;
1372
1373         init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1374
1375         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1376                 &reserved_rbtree_key);
1377
1378         /* IOAPIC ranges shouldn't be accessed by DMA */
1379         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1380                 IOVA_PFN(IOAPIC_RANGE_END));
1381         if (!iova) {
1382                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1383                 return -ENODEV;
1384         }
1385
1386         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1387         for_each_pci_dev(pdev) {
1388                 struct resource *r;
1389
1390                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1391                         r = &pdev->resource[i];
1392                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1393                                 continue;
1394                         iova = reserve_iova(&reserved_iova_list,
1395                                             IOVA_PFN(r->start),
1396                                             IOVA_PFN(r->end));
1397                         if (!iova) {
1398                                 printk(KERN_ERR "Reserve iova failed\n");
1399                                 return -ENODEV;
1400                         }
1401                 }
1402         }
1403         return 0;
1404 }
1405
1406 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1407 {
1408         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1409 }
1410
1411 static inline int guestwidth_to_adjustwidth(int gaw)
1412 {
1413         int agaw;
1414         int r = (gaw - 12) % 9;
1415
1416         if (r == 0)
1417                 agaw = gaw;
1418         else
1419                 agaw = gaw + 9 - r;
1420         if (agaw > 64)
1421                 agaw = 64;
1422         return agaw;
1423 }
1424
1425 static int domain_init(struct dmar_domain *domain, int guest_width)
1426 {
1427         struct intel_iommu *iommu;
1428         int adjust_width, agaw;
1429         unsigned long sagaw;
1430
1431         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1432         spin_lock_init(&domain->iommu_lock);
1433
1434         domain_reserve_special_ranges(domain);
1435
1436         /* calculate AGAW */
1437         iommu = domain_get_iommu(domain);
1438         if (guest_width > cap_mgaw(iommu->cap))
1439                 guest_width = cap_mgaw(iommu->cap);
1440         domain->gaw = guest_width;
1441         adjust_width = guestwidth_to_adjustwidth(guest_width);
1442         agaw = width_to_agaw(adjust_width);
1443         sagaw = cap_sagaw(iommu->cap);
1444         if (!test_bit(agaw, &sagaw)) {
1445                 /* hardware doesn't support it, choose a bigger one */
1446                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1447                 agaw = find_next_bit(&sagaw, 5, agaw);
1448                 if (agaw >= 5)
1449                         return -ENODEV;
1450         }
1451         domain->agaw = agaw;
1452         INIT_LIST_HEAD(&domain->devices);
1453
1454         if (ecap_coherent(iommu->ecap))
1455                 domain->iommu_coherency = 1;
1456         else
1457                 domain->iommu_coherency = 0;
1458
1459         if (ecap_sc_support(iommu->ecap))
1460                 domain->iommu_snooping = 1;
1461         else
1462                 domain->iommu_snooping = 0;
1463
1464         domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1465         domain->iommu_count = 1;
1466         domain->nid = iommu->node;
1467
1468         /* always allocate the top pgd */
1469         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1470         if (!domain->pgd)
1471                 return -ENOMEM;
1472         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1473         return 0;
1474 }
1475
1476 static void domain_exit(struct dmar_domain *domain)
1477 {
1478         struct dmar_drhd_unit *drhd;
1479         struct intel_iommu *iommu;
1480
1481         /* Domain 0 is reserved, so dont process it */
1482         if (!domain)
1483                 return;
1484
1485         /* Flush any lazy unmaps that may reference this domain */
1486         if (!intel_iommu_strict)
1487                 flush_unmaps_timeout(0);
1488
1489         domain_remove_dev_info(domain);
1490         /* destroy iovas */
1491         put_iova_domain(&domain->iovad);
1492
1493         /* clear ptes */
1494         dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1495
1496         /* free page tables */
1497         dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1498
1499         for_each_active_iommu(iommu, drhd)
1500                 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1501                         iommu_detach_domain(domain, iommu);
1502
1503         free_domain_mem(domain);
1504 }
1505
1506 static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1507                                  u8 bus, u8 devfn, int translation)
1508 {
1509         struct context_entry *context;
1510         unsigned long flags;
1511         struct intel_iommu *iommu;
1512         struct dma_pte *pgd;
1513         unsigned long num;
1514         unsigned long ndomains;
1515         int id;
1516         int agaw;
1517         struct device_domain_info *info = NULL;
1518
1519         pr_debug("Set context mapping for %02x:%02x.%d\n",
1520                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1521
1522         BUG_ON(!domain->pgd);
1523         BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1524                translation != CONTEXT_TT_MULTI_LEVEL);
1525
1526         iommu = device_to_iommu(segment, bus, devfn);
1527         if (!iommu)
1528                 return -ENODEV;
1529
1530         context = device_to_context_entry(iommu, bus, devfn);
1531         if (!context)
1532                 return -ENOMEM;
1533         spin_lock_irqsave(&iommu->lock, flags);
1534         if (context_present(context)) {
1535                 spin_unlock_irqrestore(&iommu->lock, flags);
1536                 return 0;
1537         }
1538
1539         id = domain->id;
1540         pgd = domain->pgd;
1541
1542         if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1543             domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1544                 int found = 0;
1545
1546                 /* find an available domain id for this device in iommu */
1547                 ndomains = cap_ndoms(iommu->cap);
1548                 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1549                         if (iommu->domains[num] == domain) {
1550                                 id = num;
1551                                 found = 1;
1552                                 break;
1553                         }
1554                 }
1555
1556                 if (found == 0) {
1557                         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1558                         if (num >= ndomains) {
1559                                 spin_unlock_irqrestore(&iommu->lock, flags);
1560                                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1561                                 return -EFAULT;
1562                         }
1563
1564                         set_bit(num, iommu->domain_ids);
1565                         iommu->domains[num] = domain;
1566                         id = num;
1567                 }
1568
1569                 /* Skip top levels of page tables for
1570                  * iommu which has less agaw than default.
1571                  * Unnecessary for PT mode.
1572                  */
1573                 if (translation != CONTEXT_TT_PASS_THROUGH) {
1574                         for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1575                                 pgd = phys_to_virt(dma_pte_addr(pgd));
1576                                 if (!dma_pte_present(pgd)) {
1577                                         spin_unlock_irqrestore(&iommu->lock, flags);
1578                                         return -ENOMEM;
1579                                 }
1580                         }
1581                 }
1582         }
1583
1584         context_set_domain_id(context, id);
1585
1586         if (translation != CONTEXT_TT_PASS_THROUGH) {
1587                 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1588                 translation = info ? CONTEXT_TT_DEV_IOTLB :
1589                                      CONTEXT_TT_MULTI_LEVEL;
1590         }
1591         /*
1592          * In pass through mode, AW must be programmed to indicate the largest
1593          * AGAW value supported by hardware. And ASR is ignored by hardware.
1594          */
1595         if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1596                 context_set_address_width(context, iommu->msagaw);
1597         else {
1598                 context_set_address_root(context, virt_to_phys(pgd));
1599                 context_set_address_width(context, iommu->agaw);
1600         }
1601
1602         context_set_translation_type(context, translation);
1603         context_set_fault_enable(context);
1604         context_set_present(context);
1605         domain_flush_cache(domain, context, sizeof(*context));
1606
1607         /*
1608          * It's a non-present to present mapping. If hardware doesn't cache
1609          * non-present entry we only need to flush the write-buffer. If the
1610          * _does_ cache non-present entries, then it does so in the special
1611          * domain #0, which we have to flush:
1612          */
1613         if (cap_caching_mode(iommu->cap)) {
1614                 iommu->flush.flush_context(iommu, 0,
1615                                            (((u16)bus) << 8) | devfn,
1616                                            DMA_CCMD_MASK_NOBIT,
1617                                            DMA_CCMD_DEVICE_INVL);
1618                 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1619         } else {
1620                 iommu_flush_write_buffer(iommu);
1621         }
1622         iommu_enable_dev_iotlb(info);
1623         spin_unlock_irqrestore(&iommu->lock, flags);
1624
1625         spin_lock_irqsave(&domain->iommu_lock, flags);
1626         if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1627                 domain->iommu_count++;
1628                 if (domain->iommu_count == 1)
1629                         domain->nid = iommu->node;
1630                 domain_update_iommu_cap(domain);
1631         }
1632         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1633         return 0;
1634 }
1635
1636 static int
1637 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1638                         int translation)
1639 {
1640         int ret;
1641         struct pci_dev *tmp, *parent;
1642
1643         ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1644                                          pdev->bus->number, pdev->devfn,
1645                                          translation);
1646         if (ret)
1647                 return ret;
1648
1649         /* dependent device mapping */
1650         tmp = pci_find_upstream_pcie_bridge(pdev);
1651         if (!tmp)
1652                 return 0;
1653         /* Secondary interface's bus number and devfn 0 */
1654         parent = pdev->bus->self;
1655         while (parent != tmp) {
1656                 ret = domain_context_mapping_one(domain,
1657                                                  pci_domain_nr(parent->bus),
1658                                                  parent->bus->number,
1659                                                  parent->devfn, translation);
1660                 if (ret)
1661                         return ret;
1662                 parent = parent->bus->self;
1663         }
1664         if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
1665                 return domain_context_mapping_one(domain,
1666                                         pci_domain_nr(tmp->subordinate),
1667                                         tmp->subordinate->number, 0,
1668                                         translation);
1669         else /* this is a legacy PCI bridge */
1670                 return domain_context_mapping_one(domain,
1671                                                   pci_domain_nr(tmp->bus),
1672                                                   tmp->bus->number,
1673                                                   tmp->devfn,
1674                                                   translation);
1675 }
1676
1677 static int domain_context_mapped(struct pci_dev *pdev)
1678 {
1679         int ret;
1680         struct pci_dev *tmp, *parent;
1681         struct intel_iommu *iommu;
1682
1683         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1684                                 pdev->devfn);
1685         if (!iommu)
1686                 return -ENODEV;
1687
1688         ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1689         if (!ret)
1690                 return ret;
1691         /* dependent device mapping */
1692         tmp = pci_find_upstream_pcie_bridge(pdev);
1693         if (!tmp)
1694                 return ret;
1695         /* Secondary interface's bus number and devfn 0 */
1696         parent = pdev->bus->self;
1697         while (parent != tmp) {
1698                 ret = device_context_mapped(iommu, parent->bus->number,
1699                                             parent->devfn);
1700                 if (!ret)
1701                         return ret;
1702                 parent = parent->bus->self;
1703         }
1704         if (pci_is_pcie(tmp))
1705                 return device_context_mapped(iommu, tmp->subordinate->number,
1706                                              0);
1707         else
1708                 return device_context_mapped(iommu, tmp->bus->number,
1709                                              tmp->devfn);
1710 }
1711
1712 /* Returns a number of VTD pages, but aligned to MM page size */
1713 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1714                                             size_t size)
1715 {
1716         host_addr &= ~PAGE_MASK;
1717         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1718 }
1719
1720 /* Return largest possible superpage level for a given mapping */
1721 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1722                                           unsigned long iov_pfn,
1723                                           unsigned long phy_pfn,
1724                                           unsigned long pages)
1725 {
1726         int support, level = 1;
1727         unsigned long pfnmerge;
1728
1729         support = domain->iommu_superpage;
1730
1731         /* To use a large page, the virtual *and* physical addresses
1732            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1733            of them will mean we have to use smaller pages. So just
1734            merge them and check both at once. */
1735         pfnmerge = iov_pfn | phy_pfn;
1736
1737         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1738                 pages >>= VTD_STRIDE_SHIFT;
1739                 if (!pages)
1740                         break;
1741                 pfnmerge >>= VTD_STRIDE_SHIFT;
1742                 level++;
1743                 support--;
1744         }
1745         return level;
1746 }
1747
1748 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1749                             struct scatterlist *sg, unsigned long phys_pfn,
1750                             unsigned long nr_pages, int prot)
1751 {
1752         struct dma_pte *first_pte = NULL, *pte = NULL;
1753         phys_addr_t uninitialized_var(pteval);
1754         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1755         unsigned long sg_res;
1756         unsigned int largepage_lvl = 0;
1757         unsigned long lvl_pages = 0;
1758
1759         BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1760
1761         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1762                 return -EINVAL;
1763
1764         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1765
1766         if (sg)
1767                 sg_res = 0;
1768         else {
1769                 sg_res = nr_pages + 1;
1770                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1771         }
1772
1773         while (nr_pages > 0) {
1774                 uint64_t tmp;
1775
1776                 if (!sg_res) {
1777                         sg_res = aligned_nrpages(sg->offset, sg->length);
1778                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1779                         sg->dma_length = sg->length;
1780                         pteval = page_to_phys(sg_page(sg)) | prot;
1781                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
1782                 }
1783
1784                 if (!pte) {
1785                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1786
1787                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1788                         if (!pte)
1789                                 return -ENOMEM;
1790                         /* It is large page*/
1791                         if (largepage_lvl > 1)
1792                                 pteval |= DMA_PTE_LARGE_PAGE;
1793                         else
1794                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1795
1796                 }
1797                 /* We don't need lock here, nobody else
1798                  * touches the iova range
1799                  */
1800                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1801                 if (tmp) {
1802                         static int dumps = 5;
1803                         printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1804                                iov_pfn, tmp, (unsigned long long)pteval);
1805                         if (dumps) {
1806                                 dumps--;
1807                                 debug_dma_dump_mappings(NULL);
1808                         }
1809                         WARN_ON(1);
1810                 }
1811
1812                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1813
1814                 BUG_ON(nr_pages < lvl_pages);
1815                 BUG_ON(sg_res < lvl_pages);
1816
1817                 nr_pages -= lvl_pages;
1818                 iov_pfn += lvl_pages;
1819                 phys_pfn += lvl_pages;
1820                 pteval += lvl_pages * VTD_PAGE_SIZE;
1821                 sg_res -= lvl_pages;
1822
1823                 /* If the next PTE would be the first in a new page, then we
1824                    need to flush the cache on the entries we've just written.
1825                    And then we'll need to recalculate 'pte', so clear it and
1826                    let it get set again in the if (!pte) block above.
1827
1828                    If we're done (!nr_pages) we need to flush the cache too.
1829
1830                    Also if we've been setting superpages, we may need to
1831                    recalculate 'pte' and switch back to smaller pages for the
1832                    end of the mapping, if the trailing size is not enough to
1833                    use another superpage (i.e. sg_res < lvl_pages). */
1834                 pte++;
1835                 if (!nr_pages || first_pte_in_page(pte) ||
1836                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
1837                         domain_flush_cache(domain, first_pte,
1838                                            (void *)pte - (void *)first_pte);
1839                         pte = NULL;
1840                 }
1841
1842                 if (!sg_res && nr_pages)
1843                         sg = sg_next(sg);
1844         }
1845         return 0;
1846 }
1847
1848 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1849                                     struct scatterlist *sg, unsigned long nr_pages,
1850                                     int prot)
1851 {
1852         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1853 }
1854
1855 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1856                                      unsigned long phys_pfn, unsigned long nr_pages,
1857                                      int prot)
1858 {
1859         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1860 }
1861
1862 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1863 {
1864         if (!iommu)
1865                 return;
1866
1867         clear_context_table(iommu, bus, devfn);
1868         iommu->flush.flush_context(iommu, 0, 0, 0,
1869                                            DMA_CCMD_GLOBAL_INVL);
1870         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1871 }
1872
1873 static void domain_remove_dev_info(struct dmar_domain *domain)
1874 {
1875         struct device_domain_info *info;
1876         unsigned long flags;
1877         struct intel_iommu *iommu;
1878
1879         spin_lock_irqsave(&device_domain_lock, flags);
1880         while (!list_empty(&domain->devices)) {
1881                 info = list_entry(domain->devices.next,
1882                         struct device_domain_info, link);
1883                 list_del(&info->link);
1884                 list_del(&info->global);
1885                 if (info->dev)
1886                         info->dev->dev.archdata.iommu = NULL;
1887                 spin_unlock_irqrestore(&device_domain_lock, flags);
1888
1889                 iommu_disable_dev_iotlb(info);
1890                 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1891                 iommu_detach_dev(iommu, info->bus, info->devfn);
1892                 free_devinfo_mem(info);
1893
1894                 spin_lock_irqsave(&device_domain_lock, flags);
1895         }
1896         spin_unlock_irqrestore(&device_domain_lock, flags);
1897 }
1898
1899 /*
1900  * find_domain
1901  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1902  */
1903 static struct dmar_domain *
1904 find_domain(struct pci_dev *pdev)
1905 {
1906         struct device_domain_info *info;
1907
1908         /* No lock here, assumes no domain exit in normal case */
1909         info = pdev->dev.archdata.iommu;
1910         if (info)
1911                 return info->domain;
1912         return NULL;
1913 }
1914
1915 /* domain is initialized */
1916 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1917 {
1918         struct dmar_domain *domain, *found = NULL;
1919         struct intel_iommu *iommu;
1920         struct dmar_drhd_unit *drhd;
1921         struct device_domain_info *info, *tmp;
1922         struct pci_dev *dev_tmp;
1923         unsigned long flags;
1924         int bus = 0, devfn = 0;
1925         int segment;
1926         int ret;
1927
1928         domain = find_domain(pdev);
1929         if (domain)
1930                 return domain;
1931
1932         segment = pci_domain_nr(pdev->bus);
1933
1934         dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1935         if (dev_tmp) {
1936                 if (pci_is_pcie(dev_tmp)) {
1937                         bus = dev_tmp->subordinate->number;
1938                         devfn = 0;
1939                 } else {
1940                         bus = dev_tmp->bus->number;
1941                         devfn = dev_tmp->devfn;
1942                 }
1943                 spin_lock_irqsave(&device_domain_lock, flags);
1944                 list_for_each_entry(info, &device_domain_list, global) {
1945                         if (info->segment == segment &&
1946                             info->bus == bus && info->devfn == devfn) {
1947                                 found = info->domain;
1948                                 break;
1949                         }
1950                 }
1951                 spin_unlock_irqrestore(&device_domain_lock, flags);
1952                 /* pcie-pci bridge already has a domain, uses it */
1953                 if (found) {
1954                         domain = found;
1955                         goto found_domain;
1956                 }
1957         }
1958
1959         domain = alloc_domain();
1960         if (!domain)
1961                 goto error;
1962
1963         /* Allocate new domain for the device */
1964         drhd = dmar_find_matched_drhd_unit(pdev);
1965         if (!drhd) {
1966                 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1967                         pci_name(pdev));
1968                 return NULL;
1969         }
1970         iommu = drhd->iommu;
1971
1972         ret = iommu_attach_domain(domain, iommu);
1973         if (ret) {
1974                 free_domain_mem(domain);
1975                 goto error;
1976         }
1977
1978         if (domain_init(domain, gaw)) {
1979                 domain_exit(domain);
1980                 goto error;
1981         }
1982
1983         /* register pcie-to-pci device */
1984         if (dev_tmp) {
1985                 info = alloc_devinfo_mem();
1986                 if (!info) {
1987                         domain_exit(domain);
1988                         goto error;
1989                 }
1990                 info->segment = segment;
1991                 info->bus = bus;
1992                 info->devfn = devfn;
1993                 info->dev = NULL;
1994                 info->domain = domain;
1995                 /* This domain is shared by devices under p2p bridge */
1996                 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
1997
1998                 /* pcie-to-pci bridge already has a domain, uses it */
1999                 found = NULL;
2000                 spin_lock_irqsave(&device_domain_lock, flags);
2001                 list_for_each_entry(tmp, &device_domain_list, global) {
2002                         if (tmp->segment == segment &&
2003                             tmp->bus == bus && tmp->devfn == devfn) {
2004                                 found = tmp->domain;
2005                                 break;
2006                         }
2007                 }
2008                 if (found) {
2009                         spin_unlock_irqrestore(&device_domain_lock, flags);
2010                         free_devinfo_mem(info);
2011                         domain_exit(domain);
2012                         domain = found;
2013                 } else {
2014                         list_add(&info->link, &domain->devices);
2015                         list_add(&info->global, &device_domain_list);
2016                         spin_unlock_irqrestore(&device_domain_lock, flags);
2017                 }
2018         }
2019
2020 found_domain:
2021         info = alloc_devinfo_mem();
2022         if (!info)
2023                 goto error;
2024         info->segment = segment;
2025         info->bus = pdev->bus->number;
2026         info->devfn = pdev->devfn;
2027         info->dev = pdev;
2028         info->domain = domain;
2029         spin_lock_irqsave(&device_domain_lock, flags);
2030         /* somebody is fast */
2031         found = find_domain(pdev);
2032         if (found != NULL) {
2033                 spin_unlock_irqrestore(&device_domain_lock, flags);
2034                 if (found != domain) {
2035                         domain_exit(domain);
2036                         domain = found;
2037                 }
2038                 free_devinfo_mem(info);
2039                 return domain;
2040         }
2041         list_add(&info->link, &domain->devices);
2042         list_add(&info->global, &device_domain_list);
2043         pdev->dev.archdata.iommu = info;
2044         spin_unlock_irqrestore(&device_domain_lock, flags);
2045         return domain;
2046 error:
2047         /* recheck it here, maybe others set it */
2048         return find_domain(pdev);
2049 }
2050
2051 static int iommu_identity_mapping;
2052 #define IDENTMAP_ALL            1
2053 #define IDENTMAP_GFX            2
2054 #define IDENTMAP_AZALIA         4
2055
2056 static int iommu_domain_identity_map(struct dmar_domain *domain,
2057                                      unsigned long long start,
2058                                      unsigned long long end)
2059 {
2060         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2061         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2062
2063         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2064                           dma_to_mm_pfn(last_vpfn))) {
2065                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2066                 return -ENOMEM;
2067         }
2068
2069         pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2070                  start, end, domain->id);
2071         /*
2072          * RMRR range might have overlap with physical memory range,
2073          * clear it first
2074          */
2075         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2076
2077         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2078                                   last_vpfn - first_vpfn + 1,
2079                                   DMA_PTE_READ|DMA_PTE_WRITE);
2080 }
2081
2082 static int iommu_prepare_identity_map(struct pci_dev *pdev,
2083                                       unsigned long long start,
2084                                       unsigned long long end)
2085 {
2086         struct dmar_domain *domain;
2087         int ret;
2088
2089         domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2090         if (!domain)
2091                 return -ENOMEM;
2092
2093         /* For _hardware_ passthrough, don't bother. But for software
2094            passthrough, we do it anyway -- it may indicate a memory
2095            range which is reserved in E820, so which didn't get set
2096            up to start with in si_domain */
2097         if (domain == si_domain && hw_pass_through) {
2098                 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2099                        pci_name(pdev), start, end);
2100                 return 0;
2101         }
2102
2103         printk(KERN_INFO
2104                "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2105                pci_name(pdev), start, end);
2106         
2107         if (end < start) {
2108                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2109                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2110                         dmi_get_system_info(DMI_BIOS_VENDOR),
2111                         dmi_get_system_info(DMI_BIOS_VERSION),
2112                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2113                 ret = -EIO;
2114                 goto error;
2115         }
2116
2117         if (end >> agaw_to_width(domain->agaw)) {
2118                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2119                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2120                      agaw_to_width(domain->agaw),
2121                      dmi_get_system_info(DMI_BIOS_VENDOR),
2122                      dmi_get_system_info(DMI_BIOS_VERSION),
2123                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2124                 ret = -EIO;
2125                 goto error;
2126         }
2127
2128         ret = iommu_domain_identity_map(domain, start, end);
2129         if (ret)
2130                 goto error;
2131
2132         /* context entry init */
2133         ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2134         if (ret)
2135                 goto error;
2136
2137         return 0;
2138
2139  error:
2140         domain_exit(domain);
2141         return ret;
2142 }
2143
2144 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2145         struct pci_dev *pdev)
2146 {
2147         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2148                 return 0;
2149         return iommu_prepare_identity_map(pdev, rmrr->base_address,
2150                 rmrr->end_address);
2151 }
2152
2153 #ifdef CONFIG_DMAR_FLOPPY_WA
2154 static inline void iommu_prepare_isa(void)
2155 {
2156         struct pci_dev *pdev;
2157         int ret;
2158
2159         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2160         if (!pdev)
2161                 return;
2162
2163         printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2164         ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2165
2166         if (ret)
2167                 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2168                        "floppy might not work\n");
2169
2170 }
2171 #else
2172 static inline void iommu_prepare_isa(void)
2173 {
2174         return;
2175 }
2176 #endif /* !CONFIG_DMAR_FLPY_WA */
2177
2178 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2179
2180 static int __init si_domain_work_fn(unsigned long start_pfn,
2181                                     unsigned long end_pfn, void *datax)
2182 {
2183         int *ret = datax;
2184
2185         *ret = iommu_domain_identity_map(si_domain,
2186                                          (uint64_t)start_pfn << PAGE_SHIFT,
2187                                          (uint64_t)end_pfn << PAGE_SHIFT);
2188         return *ret;
2189
2190 }
2191
2192 static int __init si_domain_init(int hw)
2193 {
2194         struct dmar_drhd_unit *drhd;
2195         struct intel_iommu *iommu;
2196         int nid, ret = 0;
2197
2198         si_domain = alloc_domain();
2199         if (!si_domain)
2200                 return -EFAULT;
2201
2202         pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2203
2204         for_each_active_iommu(iommu, drhd) {
2205                 ret = iommu_attach_domain(si_domain, iommu);
2206                 if (ret) {
2207                         domain_exit(si_domain);
2208                         return -EFAULT;
2209                 }
2210         }
2211
2212         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2213                 domain_exit(si_domain);
2214                 return -EFAULT;
2215         }
2216
2217         si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2218
2219         if (hw)
2220                 return 0;
2221
2222         for_each_online_node(nid) {
2223                 work_with_active_regions(nid, si_domain_work_fn, &ret);
2224                 if (ret)
2225                         return ret;
2226         }
2227
2228         return 0;
2229 }
2230
2231 static void domain_remove_one_dev_info(struct dmar_domain *domain,
2232                                           struct pci_dev *pdev);
2233 static int identity_mapping(struct pci_dev *pdev)
2234 {
2235         struct device_domain_info *info;
2236
2237         if (likely(!iommu_identity_mapping))
2238                 return 0;
2239
2240         info = pdev->dev.archdata.iommu;
2241         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2242                 return (info->domain == si_domain);
2243
2244         return 0;
2245 }
2246
2247 static int domain_add_dev_info(struct dmar_domain *domain,
2248                                struct pci_dev *pdev,
2249                                int translation)
2250 {
2251         struct device_domain_info *info;
2252         unsigned long flags;
2253         int ret;
2254
2255         info = alloc_devinfo_mem();
2256         if (!info)
2257                 return -ENOMEM;
2258
2259         ret = domain_context_mapping(domain, pdev, translation);
2260         if (ret) {
2261                 free_devinfo_mem(info);
2262                 return ret;
2263         }
2264
2265         info->segment = pci_domain_nr(pdev->bus);
2266         info->bus = pdev->bus->number;
2267         info->devfn = pdev->devfn;
2268         info->dev = pdev;
2269         info->domain = domain;
2270
2271         spin_lock_irqsave(&device_domain_lock, flags);
2272         list_add(&info->link, &domain->devices);
2273         list_add(&info->global, &device_domain_list);
2274         pdev->dev.archdata.iommu = info;
2275         spin_unlock_irqrestore(&device_domain_lock, flags);
2276
2277         return 0;
2278 }
2279
2280 static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2281 {
2282         if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2283                 return 1;
2284
2285         if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2286                 return 1;
2287
2288         if (!(iommu_identity_mapping & IDENTMAP_ALL))
2289                 return 0;
2290
2291         /*
2292          * We want to start off with all devices in the 1:1 domain, and
2293          * take them out later if we find they can't access all of memory.
2294          *
2295          * However, we can't do this for PCI devices behind bridges,
2296          * because all PCI devices behind the same bridge will end up
2297          * with the same source-id on their transactions.
2298          *
2299          * Practically speaking, we can't change things around for these
2300          * devices at run-time, because we can't be sure there'll be no
2301          * DMA transactions in flight for any of their siblings.
2302          * 
2303          * So PCI devices (unless they're on the root bus) as well as
2304          * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2305          * the 1:1 domain, just in _case_ one of their siblings turns out
2306          * not to be able to map all of memory.
2307          */
2308         if (!pci_is_pcie(pdev)) {
2309                 if (!pci_is_root_bus(pdev->bus))
2310                         return 0;
2311                 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2312                         return 0;
2313         } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2314                 return 0;
2315
2316         /* 
2317          * At boot time, we don't yet know if devices will be 64-bit capable.
2318          * Assume that they will -- if they turn out not to be, then we can 
2319          * take them out of the 1:1 domain later.
2320          */
2321         if (!startup) {
2322                 /*
2323                  * If the device's dma_mask is less than the system's memory
2324                  * size then this is not a candidate for identity mapping.
2325                  */
2326                 u64 dma_mask = pdev->dma_mask;
2327
2328                 if (pdev->dev.coherent_dma_mask &&
2329                     pdev->dev.coherent_dma_mask < dma_mask)
2330                         dma_mask = pdev->dev.coherent_dma_mask;
2331
2332                 return dma_mask >= dma_get_required_mask(&pdev->dev);
2333         }
2334
2335         return 1;
2336 }
2337
2338 static int __init iommu_prepare_static_identity_mapping(int hw)
2339 {
2340         struct pci_dev *pdev = NULL;
2341         int ret;
2342
2343         ret = si_domain_init(hw);
2344         if (ret)
2345                 return -EFAULT;
2346
2347         for_each_pci_dev(pdev) {
2348                 /* Skip Host/PCI Bridge devices */
2349                 if (IS_BRIDGE_HOST_DEVICE(pdev))
2350                         continue;
2351                 if (iommu_should_identity_map(pdev, 1)) {
2352                         printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2353                                hw ? "hardware" : "software", pci_name(pdev));
2354
2355                         ret = domain_add_dev_info(si_domain, pdev,
2356                                                      hw ? CONTEXT_TT_PASS_THROUGH :
2357                                                      CONTEXT_TT_MULTI_LEVEL);
2358                         if (ret)
2359                                 return ret;
2360                 }
2361         }
2362
2363         return 0;
2364 }
2365
2366 static int __init init_dmars(void)
2367 {
2368         struct dmar_drhd_unit *drhd;
2369         struct dmar_rmrr_unit *rmrr;
2370         struct pci_dev *pdev;
2371         struct intel_iommu *iommu;
2372         int i, ret;
2373
2374         /*
2375          * for each drhd
2376          *    allocate root
2377          *    initialize and program root entry to not present
2378          * endfor
2379          */
2380         for_each_drhd_unit(drhd) {
2381                 g_num_of_iommus++;
2382                 /*
2383                  * lock not needed as this is only incremented in the single
2384                  * threaded kernel __init code path all other access are read
2385                  * only
2386                  */
2387         }
2388
2389         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2390                         GFP_KERNEL);
2391         if (!g_iommus) {
2392                 printk(KERN_ERR "Allocating global iommu array failed\n");
2393                 ret = -ENOMEM;
2394                 goto error;
2395         }
2396
2397         deferred_flush = kzalloc(g_num_of_iommus *
2398                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2399         if (!deferred_flush) {
2400                 ret = -ENOMEM;
2401                 goto error;
2402         }
2403
2404         for_each_drhd_unit(drhd) {
2405                 if (drhd->ignored)
2406                         continue;
2407
2408                 iommu = drhd->iommu;
2409                 g_iommus[iommu->seq_id] = iommu;
2410
2411                 ret = iommu_init_domains(iommu);
2412                 if (ret)
2413                         goto error;
2414
2415                 /*
2416                  * TBD:
2417                  * we could share the same root & context tables
2418                  * among all IOMMU's. Need to Split it later.
2419                  */
2420                 ret = iommu_alloc_root_entry(iommu);
2421                 if (ret) {
2422                         printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2423                         goto error;
2424                 }
2425                 if (!ecap_pass_through(iommu->ecap))
2426                         hw_pass_through = 0;
2427         }
2428
2429         /*
2430          * Start from the sane iommu hardware state.
2431          */
2432         for_each_drhd_unit(drhd) {
2433                 if (drhd->ignored)
2434                         continue;
2435
2436                 iommu = drhd->iommu;
2437
2438                 /*
2439                  * If the queued invalidation is already initialized by us
2440                  * (for example, while enabling interrupt-remapping) then
2441                  * we got the things already rolling from a sane state.
2442                  */
2443                 if (iommu->qi)
2444                         continue;
2445
2446                 /*
2447                  * Clear any previous faults.
2448                  */
2449                 dmar_fault(-1, iommu);
2450                 /*
2451                  * Disable queued invalidation if supported and already enabled
2452                  * before OS handover.
2453                  */
2454                 dmar_disable_qi(iommu);
2455         }
2456
2457         for_each_drhd_unit(drhd) {
2458                 if (drhd->ignored)
2459                         continue;
2460
2461                 iommu = drhd->iommu;
2462
2463                 if (dmar_enable_qi(iommu)) {
2464                         /*
2465                          * Queued Invalidate not enabled, use Register Based
2466                          * Invalidate
2467                          */
2468                         iommu->flush.flush_context = __iommu_flush_context;
2469                         iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2470                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2471                                "invalidation\n",
2472                                 iommu->seq_id,
2473                                (unsigned long long)drhd->reg_base_addr);
2474                 } else {
2475                         iommu->flush.flush_context = qi_flush_context;
2476                         iommu->flush.flush_iotlb = qi_flush_iotlb;
2477                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2478                                "invalidation\n",
2479                                 iommu->seq_id,
2480                                (unsigned long long)drhd->reg_base_addr);
2481                 }
2482         }
2483
2484         if (iommu_pass_through)
2485                 iommu_identity_mapping |= IDENTMAP_ALL;
2486
2487 #ifdef CONFIG_DMAR_BROKEN_GFX_WA
2488         iommu_identity_mapping |= IDENTMAP_GFX;
2489 #endif
2490
2491         check_tylersburg_isoch();
2492
2493         /*
2494          * If pass through is not set or not enabled, setup context entries for
2495          * identity mappings for rmrr, gfx, and isa and may fall back to static
2496          * identity mapping if iommu_identity_mapping is set.
2497          */
2498         if (iommu_identity_mapping) {
2499                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2500                 if (ret) {
2501                         printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2502                         goto error;
2503                 }
2504         }
2505         /*
2506          * For each rmrr
2507          *   for each dev attached to rmrr
2508          *   do
2509          *     locate drhd for dev, alloc domain for dev
2510          *     allocate free domain
2511          *     allocate page table entries for rmrr
2512          *     if context not allocated for bus
2513          *           allocate and init context
2514          *           set present in root table for this bus
2515          *     init context with domain, translation etc
2516          *    endfor
2517          * endfor
2518          */
2519         printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2520         for_each_rmrr_units(rmrr) {
2521                 for (i = 0; i < rmrr->devices_cnt; i++) {
2522                         pdev = rmrr->devices[i];
2523                         /*
2524                          * some BIOS lists non-exist devices in DMAR
2525                          * table.
2526                          */
2527                         if (!pdev)
2528                                 continue;
2529                         ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2530                         if (ret)
2531                                 printk(KERN_ERR
2532                                        "IOMMU: mapping reserved region failed\n");
2533                 }
2534         }
2535
2536         iommu_prepare_isa();
2537
2538         /*
2539          * for each drhd
2540          *   enable fault log
2541          *   global invalidate context cache
2542          *   global invalidate iotlb
2543          *   enable translation
2544          */
2545         for_each_drhd_unit(drhd) {
2546                 if (drhd->ignored) {
2547                         /*
2548                          * we always have to disable PMRs or DMA may fail on
2549                          * this device
2550                          */
2551                         if (force_on)
2552                                 iommu_disable_protect_mem_regions(drhd->iommu);
2553                         continue;
2554                 }
2555                 iommu = drhd->iommu;
2556
2557                 iommu_flush_write_buffer(iommu);
2558
2559                 ret = dmar_set_interrupt(iommu);
2560                 if (ret)
2561                         goto error;
2562
2563                 iommu_set_root_entry(iommu);
2564
2565                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2566                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2567
2568                 ret = iommu_enable_translation(iommu);
2569                 if (ret)
2570                         goto error;
2571
2572                 iommu_disable_protect_mem_regions(iommu);
2573         }
2574
2575         return 0;
2576 error:
2577         for_each_drhd_unit(drhd) {
2578                 if (drhd->ignored)
2579                         continue;
2580                 iommu = drhd->iommu;
2581                 free_iommu(iommu);
2582         }
2583         kfree(g_iommus);
2584         return ret;
2585 }
2586
2587 /* This takes a number of _MM_ pages, not VTD pages */
2588 static struct iova *intel_alloc_iova(struct device *dev,
2589                                      struct dmar_domain *domain,
2590                                      unsigned long nrpages, uint64_t dma_mask)
2591 {
2592         struct pci_dev *pdev = to_pci_dev(dev);
2593         struct iova *iova = NULL;
2594
2595         /* Restrict dma_mask to the width that the iommu can handle */
2596         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2597
2598         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2599                 /*
2600                  * First try to allocate an io virtual address in
2601                  * DMA_BIT_MASK(32) and if that fails then try allocating
2602                  * from higher range
2603                  */
2604                 iova = alloc_iova(&domain->iovad, nrpages,
2605                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
2606                 if (iova)
2607                         return iova;
2608         }
2609         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2610         if (unlikely(!iova)) {
2611                 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2612                        nrpages, pci_name(pdev));
2613                 return NULL;
2614         }
2615
2616         return iova;
2617 }
2618
2619 static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2620 {
2621         struct dmar_domain *domain;
2622         int ret;
2623
2624         domain = get_domain_for_dev(pdev,
2625                         DEFAULT_DOMAIN_ADDRESS_WIDTH);
2626         if (!domain) {
2627                 printk(KERN_ERR
2628                         "Allocating domain for %s failed", pci_name(pdev));
2629                 return NULL;
2630         }
2631
2632         /* make sure context mapping is ok */
2633         if (unlikely(!domain_context_mapped(pdev))) {
2634                 ret = domain_context_mapping(domain, pdev,
2635                                              CONTEXT_TT_MULTI_LEVEL);
2636                 if (ret) {
2637                         printk(KERN_ERR
2638                                 "Domain context map for %s failed",
2639                                 pci_name(pdev));
2640                         return NULL;
2641                 }
2642         }
2643
2644         return domain;
2645 }
2646
2647 static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2648 {
2649         struct device_domain_info *info;
2650
2651         /* No lock here, assumes no domain exit in normal case */
2652         info = dev->dev.archdata.iommu;
2653         if (likely(info))
2654                 return info->domain;
2655
2656         return __get_valid_domain_for_dev(dev);
2657 }
2658
2659 static int iommu_dummy(struct pci_dev *pdev)
2660 {
2661         return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2662 }
2663
2664 /* Check if the pdev needs to go through non-identity map and unmap process.*/
2665 static int iommu_no_mapping(struct device *dev)
2666 {
2667         struct pci_dev *pdev;
2668         int found;
2669
2670         if (unlikely(dev->bus != &pci_bus_type))
2671                 return 1;
2672
2673         pdev = to_pci_dev(dev);
2674         if (iommu_dummy(pdev))
2675                 return 1;
2676
2677         if (!iommu_identity_mapping)
2678                 return 0;
2679
2680         found = identity_mapping(pdev);
2681         if (found) {
2682                 if (iommu_should_identity_map(pdev, 0))
2683                         return 1;
2684                 else {
2685                         /*
2686                          * 32 bit DMA is removed from si_domain and fall back
2687                          * to non-identity mapping.
2688                          */
2689                         domain_remove_one_dev_info(si_domain, pdev);
2690                         printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2691                                pci_name(pdev));
2692                         return 0;
2693                 }
2694         } else {
2695                 /*
2696                  * In case of a detached 64 bit DMA device from vm, the device
2697                  * is put into si_domain for identity mapping.
2698                  */
2699                 if (iommu_should_identity_map(pdev, 0)) {
2700                         int ret;
2701                         ret = domain_add_dev_info(si_domain, pdev,
2702                                                   hw_pass_through ?
2703                                                   CONTEXT_TT_PASS_THROUGH :
2704                                                   CONTEXT_TT_MULTI_LEVEL);
2705                         if (!ret) {
2706                                 printk(KERN_INFO "64bit %s uses identity mapping\n",
2707                                        pci_name(pdev));
2708                                 return 1;
2709                         }
2710                 }
2711         }
2712
2713         return 0;
2714 }
2715
2716 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2717                                      size_t size, int dir, u64 dma_mask)
2718 {
2719         struct pci_dev *pdev = to_pci_dev(hwdev);
2720         struct dmar_domain *domain;
2721         phys_addr_t start_paddr;
2722         struct iova *iova;
2723         int prot = 0;
2724         int ret;
2725         struct intel_iommu *iommu;
2726         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2727
2728         BUG_ON(dir == DMA_NONE);
2729
2730         if (iommu_no_mapping(hwdev))
2731                 return paddr;
2732
2733         domain = get_valid_domain_for_dev(pdev);
2734         if (!domain)
2735                 return 0;
2736
2737         iommu = domain_get_iommu(domain);
2738         size = aligned_nrpages(paddr, size);
2739
2740         iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2741         if (!iova)
2742                 goto error;
2743
2744         /*
2745          * Check if DMAR supports zero-length reads on write only
2746          * mappings..
2747          */
2748         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2749                         !cap_zlr(iommu->cap))
2750                 prot |= DMA_PTE_READ;
2751         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2752                 prot |= DMA_PTE_WRITE;
2753         /*
2754          * paddr - (paddr + size) might be partial page, we should map the whole
2755          * page.  Note: if two part of one page are separately mapped, we
2756          * might have two guest_addr mapping to the same host paddr, but this
2757          * is not a big problem
2758          */
2759         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2760                                  mm_to_dma_pfn(paddr_pfn), size, prot);
2761         if (ret)
2762                 goto error;
2763
2764         /* it's a non-present to present mapping. Only flush if caching mode */
2765         if (cap_caching_mode(iommu->cap))
2766                 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
2767         else
2768                 iommu_flush_write_buffer(iommu);
2769
2770         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2771         start_paddr += paddr & ~PAGE_MASK;
2772         return start_paddr;
2773
2774 error:
2775         if (iova)
2776                 __free_iova(&domain->iovad, iova);
2777         printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
2778                 pci_name(pdev), size, (unsigned long long)paddr, dir);
2779         return 0;
2780 }
2781
2782 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2783                                  unsigned long offset, size_t size,
2784                                  enum dma_data_direction dir,
2785                                  struct dma_attrs *attrs)
2786 {
2787         return __intel_map_single(dev, page_to_phys(page) + offset, size,
2788                                   dir, to_pci_dev(dev)->dma_mask);
2789 }
2790
2791 static void flush_unmaps(void)
2792 {
2793         int i, j;
2794
2795         timer_on = 0;
2796
2797         /* just flush them all */
2798         for (i = 0; i < g_num_of_iommus; i++) {
2799                 struct intel_iommu *iommu = g_iommus[i];
2800                 if (!iommu)
2801                         continue;
2802
2803                 if (!deferred_flush[i].next)
2804                         continue;
2805
2806                 /* In caching mode, global flushes turn emulation expensive */
2807                 if (!cap_caching_mode(iommu->cap))
2808                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2809                                          DMA_TLB_GLOBAL_FLUSH);
2810                 for (j = 0; j < deferred_flush[i].next; j++) {
2811                         unsigned long mask;
2812                         struct iova *iova = deferred_flush[i].iova[j];
2813                         struct dmar_domain *domain = deferred_flush[i].domain[j];
2814
2815                         /* On real hardware multiple invalidations are expensive */
2816                         if (cap_caching_mode(iommu->cap))
2817                                 iommu_flush_iotlb_psi(iommu, domain->id,
2818                                 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2819                         else {
2820                                 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2821                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2822                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2823                         }
2824                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2825                 }
2826                 deferred_flush[i].next = 0;
2827         }
2828
2829         list_size = 0;
2830 }
2831
2832 static void flush_unmaps_timeout(unsigned long data)
2833 {
2834         unsigned long flags;
2835
2836         spin_lock_irqsave(&async_umap_flush_lock, flags);
2837         flush_unmaps();
2838         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2839 }
2840
2841 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2842 {
2843         unsigned long flags;
2844         int next, iommu_id;
2845         struct intel_iommu *iommu;
2846
2847         spin_lock_irqsave(&async_umap_flush_lock, flags);
2848         if (list_size == HIGH_WATER_MARK)
2849                 flush_unmaps();
2850
2851         iommu = domain_get_iommu(dom);
2852         iommu_id = iommu->seq_id;
2853
2854         next = deferred_flush[iommu_id].next;
2855         deferred_flush[iommu_id].domain[next] = dom;
2856         deferred_flush[iommu_id].iova[next] = iova;
2857         deferred_flush[iommu_id].next++;
2858
2859         if (!timer_on) {
2860                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2861                 timer_on = 1;
2862         }
2863         list_size++;
2864         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2865 }
2866
2867 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2868                              size_t size, enum dma_data_direction dir,
2869                              struct dma_attrs *attrs)
2870 {
2871         struct pci_dev *pdev = to_pci_dev(dev);
2872         struct dmar_domain *domain;
2873         unsigned long start_pfn, last_pfn;
2874         struct iova *iova;
2875         struct intel_iommu *iommu;
2876
2877         if (iommu_no_mapping(dev))
2878                 return;
2879
2880         domain = find_domain(pdev);
2881         BUG_ON(!domain);
2882
2883         iommu = domain_get_iommu(domain);
2884
2885         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2886         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2887                       (unsigned long long)dev_addr))
2888                 return;
2889
2890         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2891         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2892
2893         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2894                  pci_name(pdev), start_pfn, last_pfn);
2895
2896         /*  clear the whole page */
2897         dma_pte_clear_range(domain, start_pfn, last_pfn);
2898
2899         /* free page tables */
2900         dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2901
2902         if (intel_iommu_strict) {
2903                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2904                                       last_pfn - start_pfn + 1, 0);
2905                 /* free iova */
2906                 __free_iova(&domain->iovad, iova);
2907         } else {
2908                 add_unmap(domain, iova);
2909                 /*
2910                  * queue up the release of the unmap to save the 1/6th of the
2911                  * cpu used up by the iotlb flush operation...
2912                  */
2913         }
2914 }
2915
2916 static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2917                                   dma_addr_t *dma_handle, gfp_t flags)
2918 {
2919         void *vaddr;
2920         int order;
2921
2922         size = PAGE_ALIGN(size);
2923         order = get_order(size);
2924
2925         if (!iommu_no_mapping(hwdev))
2926                 flags &= ~(GFP_DMA | GFP_DMA32);
2927         else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2928                 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2929                         flags |= GFP_DMA;
2930                 else
2931                         flags |= GFP_DMA32;
2932         }
2933
2934         vaddr = (void *)__get_free_pages(flags, order);
2935         if (!vaddr)
2936                 return NULL;
2937         memset(vaddr, 0, size);
2938
2939         *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2940                                          DMA_BIDIRECTIONAL,
2941                                          hwdev->coherent_dma_mask);
2942         if (*dma_handle)
2943                 return vaddr;
2944         free_pages((unsigned long)vaddr, order);
2945         return NULL;
2946 }
2947
2948 static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2949                                 dma_addr_t dma_handle)
2950 {
2951         int order;
2952
2953         size = PAGE_ALIGN(size);
2954         order = get_order(size);
2955
2956         intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
2957         free_pages((unsigned long)vaddr, order);
2958 }
2959
2960 static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2961                            int nelems, enum dma_data_direction dir,
2962                            struct dma_attrs *attrs)
2963 {
2964         struct pci_dev *pdev = to_pci_dev(hwdev);
2965         struct dmar_domain *domain;
2966         unsigned long start_pfn, last_pfn;
2967         struct iova *iova;
2968         struct intel_iommu *iommu;
2969
2970         if (iommu_no_mapping(hwdev))
2971                 return;
2972
2973         domain = find_domain(pdev);
2974         BUG_ON(!domain);
2975
2976         iommu = domain_get_iommu(domain);
2977
2978         iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2979         if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2980                       (unsigned long long)sglist[0].dma_address))
2981                 return;
2982
2983         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2984         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2985
2986         /*  clear the whole page */
2987         dma_pte_clear_range(domain, start_pfn, last_pfn);
2988
2989         /* free page tables */
2990         dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2991
2992         if (intel_iommu_strict) {
2993                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2994                                       last_pfn - start_pfn + 1, 0);
2995                 /* free iova */
2996                 __free_iova(&domain->iovad, iova);
2997         } else {
2998                 add_unmap(domain, iova);
2999                 /*
3000                  * queue up the release of the unmap to save the 1/6th of the
3001                  * cpu used up by the iotlb flush operation...
3002                  */
3003         }
3004 }
3005
3006 static int intel_nontranslate_map_sg(struct device *hddev,
3007         struct scatterlist *sglist, int nelems, int dir)
3008 {
3009         int i;
3010         struct scatterlist *sg;
3011
3012         for_each_sg(sglist, sg, nelems, i) {
3013                 BUG_ON(!sg_page(sg));
3014                 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3015                 sg->dma_length = sg->length;
3016         }
3017         return nelems;
3018 }
3019
3020 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3021                         enum dma_data_direction dir, struct dma_attrs *attrs)
3022 {
3023         int i;
3024         struct pci_dev *pdev = to_pci_dev(hwdev);
3025         struct dmar_domain *domain;
3026         size_t size = 0;
3027         int prot = 0;
3028         struct iova *iova = NULL;
3029         int ret;
3030         struct scatterlist *sg;
3031         unsigned long start_vpfn;
3032         struct intel_iommu *iommu;
3033
3034         BUG_ON(dir == DMA_NONE);
3035         if (iommu_no_mapping(hwdev))
3036                 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
3037
3038         domain = get_valid_domain_for_dev(pdev);
3039         if (!domain)
3040                 return 0;
3041
3042         iommu = domain_get_iommu(domain);
3043
3044         for_each_sg(sglist, sg, nelems, i)
3045                 size += aligned_nrpages(sg->offset, sg->length);
3046
3047         iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3048                                 pdev->dma_mask);
3049         if (!iova) {
3050                 sglist->dma_length = 0;
3051                 return 0;
3052         }
3053
3054         /*
3055          * Check if DMAR supports zero-length reads on write only
3056          * mappings..
3057          */
3058         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3059                         !cap_zlr(iommu->cap))
3060                 prot |= DMA_PTE_READ;
3061         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3062                 prot |= DMA_PTE_WRITE;
3063
3064         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3065
3066         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3067         if (unlikely(ret)) {
3068                 /*  clear the page */
3069                 dma_pte_clear_range(domain, start_vpfn,
3070                                     start_vpfn + size - 1);
3071                 /* free page tables */
3072                 dma_pte_free_pagetable(domain, start_vpfn,
3073                                        start_vpfn + size - 1);
3074                 /* free iova */
3075                 __free_iova(&domain->iovad, iova);
3076                 return 0;
3077         }
3078
3079         /* it's a non-present to present mapping. Only flush if caching mode */
3080         if (cap_caching_mode(iommu->cap))
3081                 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
3082         else
3083                 iommu_flush_write_buffer(iommu);
3084
3085         return nelems;
3086 }
3087
3088 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3089 {
3090         return !dma_addr;
3091 }
3092
3093 struct dma_map_ops intel_dma_ops = {
3094         .alloc_coherent = intel_alloc_coherent,
3095         .free_coherent = intel_free_coherent,
3096         .map_sg = intel_map_sg,
3097         .unmap_sg = intel_unmap_sg,
3098         .map_page = intel_map_page,
3099         .unmap_page = intel_unmap_page,
3100         .mapping_error = intel_mapping_error,
3101 };
3102
3103 static inline int iommu_domain_cache_init(void)
3104 {
3105         int ret = 0;
3106
3107         iommu_domain_cache = kmem_cache_create("iommu_domain",
3108                                          sizeof(struct dmar_domain),
3109                                          0,
3110                                          SLAB_HWCACHE_ALIGN,
3111
3112                                          NULL);
3113         if (!iommu_domain_cache) {
3114                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3115                 ret = -ENOMEM;
3116         }
3117
3118         return ret;
3119 }
3120
3121 static inline int iommu_devinfo_cache_init(void)
3122 {
3123         int ret = 0;
3124
3125         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3126                                          sizeof(struct device_domain_info),
3127                                          0,
3128                                          SLAB_HWCACHE_ALIGN,
3129                                          NULL);
3130         if (!iommu_devinfo_cache) {
3131                 printk(KERN_ERR "Couldn't create devinfo cache\n");
3132                 ret = -ENOMEM;
3133         }
3134
3135         return ret;
3136 }
3137
3138 static inline int iommu_iova_cache_init(void)
3139 {
3140         int ret = 0;
3141
3142         iommu_iova_cache = kmem_cache_create("iommu_iova",
3143                                          sizeof(struct iova),
3144                                          0,
3145                                          SLAB_HWCACHE_ALIGN,
3146                                          NULL);
3147         if (!iommu_iova_cache) {
3148                 printk(KERN_ERR "Couldn't create iova cache\n");
3149                 ret = -ENOMEM;
3150         }
3151
3152         return ret;
3153 }
3154
3155 static int __init iommu_init_mempool(void)
3156 {
3157         int ret;
3158         ret = iommu_iova_cache_init();
3159         if (ret)
3160                 return ret;
3161
3162         ret = iommu_domain_cache_init();
3163         if (ret)
3164                 goto domain_error;
3165
3166         ret = iommu_devinfo_cache_init();
3167         if (!ret)
3168                 return ret;
3169
3170         kmem_cache_destroy(iommu_domain_cache);
3171 domain_error:
3172         kmem_cache_destroy(iommu_iova_cache);
3173
3174         return -ENOMEM;
3175 }
3176
3177 static void __init iommu_exit_mempool(void)
3178 {
3179         kmem_cache_destroy(iommu_devinfo_cache);
3180         kmem_cache_destroy(iommu_domain_cache);
3181         kmem_cache_destroy(iommu_iova_cache);
3182
3183 }
3184
3185 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3186 {
3187         struct dmar_drhd_unit *drhd;
3188         u32 vtbar;
3189         int rc;
3190
3191         /* We know that this device on this chipset has its own IOMMU.
3192          * If we find it under a different IOMMU, then the BIOS is lying
3193          * to us. Hope that the IOMMU for this device is actually
3194          * disabled, and it needs no translation...
3195          */
3196         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3197         if (rc) {
3198                 /* "can't" happen */
3199                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3200                 return;
3201         }
3202         vtbar &= 0xffff0000;
3203
3204         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3205         drhd = dmar_find_matched_drhd_unit(pdev);
3206         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3207                             TAINT_FIRMWARE_WORKAROUND,
3208                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3209                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3210 }
3211 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3212
3213 static void __init init_no_remapping_devices(void)
3214 {
3215         struct dmar_drhd_unit *drhd;
3216
3217         for_each_drhd_unit(drhd) {
3218                 if (!drhd->include_all) {
3219                         int i;
3220                         for (i = 0; i < drhd->devices_cnt; i++)
3221                                 if (drhd->devices[i] != NULL)
3222                                         break;
3223                         /* ignore DMAR unit if no pci devices exist */
3224                         if (i == drhd->devices_cnt)
3225                                 drhd->ignored = 1;
3226                 }
3227         }
3228
3229         if (dmar_map_gfx)
3230                 return;
3231
3232         for_each_drhd_unit(drhd) {
3233                 int i;
3234                 if (drhd->ignored || drhd->include_all)
3235                         continue;
3236
3237                 for (i = 0; i < drhd->devices_cnt; i++)
3238                         if (drhd->devices[i] &&
3239                                 !IS_GFX_DEVICE(drhd->devices[i]))
3240                                 break;
3241
3242                 if (i < drhd->devices_cnt)
3243                         continue;
3244
3245                 /* bypass IOMMU if it is just for gfx devices */
3246                 drhd->ignored = 1;
3247                 for (i = 0; i < drhd->devices_cnt; i++) {
3248                         if (!drhd->devices[i])
3249                                 continue;
3250                         drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3251                 }
3252         }
3253 }
3254
3255 #ifdef CONFIG_SUSPEND
3256 static int init_iommu_hw(void)
3257 {
3258         struct dmar_drhd_unit *drhd;
3259         struct intel_iommu *iommu = NULL;
3260
3261         for_each_active_iommu(iommu, drhd)
3262                 if (iommu->qi)
3263                         dmar_reenable_qi(iommu);
3264
3265         for_each_iommu(iommu, drhd) {
3266                 if (drhd->ignored) {
3267                         /*
3268                          * we always have to disable PMRs or DMA may fail on
3269                          * this device
3270                          */
3271                         if (force_on)
3272                                 iommu_disable_protect_mem_regions(iommu);
3273                         continue;
3274                 }
3275         
3276                 iommu_flush_write_buffer(iommu);
3277
3278                 iommu_set_root_entry(iommu);
3279
3280                 iommu->flush.flush_context(iommu, 0, 0, 0,
3281                                            DMA_CCMD_GLOBAL_INVL);
3282                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3283                                          DMA_TLB_GLOBAL_FLUSH);
3284                 if (iommu_enable_translation(iommu))
3285                         return 1;
3286                 iommu_disable_protect_mem_regions(iommu);
3287         }
3288
3289         return 0;
3290 }
3291
3292 static void iommu_flush_all(void)
3293 {
3294         struct dmar_drhd_unit *drhd;
3295         struct intel_iommu *iommu;
3296
3297         for_each_active_iommu(iommu, drhd) {
3298                 iommu->flush.flush_context(iommu, 0, 0, 0,
3299                                            DMA_CCMD_GLOBAL_INVL);
3300                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3301                                          DMA_TLB_GLOBAL_FLUSH);
3302         }
3303 }
3304
3305 static int iommu_suspend(void)
3306 {
3307         struct dmar_drhd_unit *drhd;
3308         struct intel_iommu *iommu = NULL;
3309         unsigned long flag;
3310
3311         for_each_active_iommu(iommu, drhd) {
3312                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3313                                                  GFP_ATOMIC);
3314                 if (!iommu->iommu_state)
3315                         goto nomem;
3316         }
3317
3318         iommu_flush_all();
3319
3320         for_each_active_iommu(iommu, drhd) {
3321                 iommu_disable_translation(iommu);
3322
3323                 spin_lock_irqsave(&iommu->register_lock, flag);
3324
3325                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3326                         readl(iommu->reg + DMAR_FECTL_REG);
3327                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3328                         readl(iommu->reg + DMAR_FEDATA_REG);
3329                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3330                         readl(iommu->reg + DMAR_FEADDR_REG);
3331                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3332                         readl(iommu->reg + DMAR_FEUADDR_REG);
3333
3334                 spin_unlock_irqrestore(&iommu->register_lock, flag);
3335         }
3336         return 0;
3337
3338 nomem:
3339         for_each_active_iommu(iommu, drhd)
3340                 kfree(iommu->iommu_state);
3341
3342         return -ENOMEM;
3343 }
3344
3345 static void iommu_resume(void)
3346 {
3347         struct dmar_drhd_unit *drhd;
3348         struct intel_iommu *iommu = NULL;
3349         unsigned long flag;
3350
3351         if (init_iommu_hw()) {
3352                 if (force_on)
3353                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3354                 else
3355                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3356                 return;
3357         }
3358
3359         for_each_active_iommu(iommu, drhd) {
3360
3361                 spin_lock_irqsave(&iommu->register_lock, flag);
3362
3363                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3364                         iommu->reg + DMAR_FECTL_REG);
3365                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3366                         iommu->reg + DMAR_FEDATA_REG);
3367                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3368                         iommu->reg + DMAR_FEADDR_REG);
3369                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3370                         iommu->reg + DMAR_FEUADDR_REG);
3371
3372                 spin_unlock_irqrestore(&iommu->register_lock, flag);
3373         }
3374
3375         for_each_active_iommu(iommu, drhd)
3376                 kfree(iommu->iommu_state);
3377 }
3378
3379 static struct syscore_ops iommu_syscore_ops = {
3380         .resume         = iommu_resume,
3381         .suspend        = iommu_suspend,
3382 };
3383
3384 static void __init init_iommu_pm_ops(void)
3385 {
3386         register_syscore_ops(&iommu_syscore_ops);
3387 }
3388
3389 #else
3390 static inline void init_iommu_pm_ops(void) {}
3391 #endif  /* CONFIG_PM */
3392
3393 /*
3394  * Here we only respond to action of unbound device from driver.
3395  *
3396  * Added device is not attached to its DMAR domain here yet. That will happen
3397  * when mapping the device to iova.
3398  */
3399 static int device_notifier(struct notifier_block *nb,
3400                                   unsigned long action, void *data)
3401 {
3402         struct device *dev = data;
3403         struct pci_dev *pdev = to_pci_dev(dev);
3404         struct dmar_domain *domain;
3405
3406         if (iommu_no_mapping(dev))
3407                 return 0;
3408
3409         domain = find_domain(pdev);
3410         if (!domain)
3411                 return 0;
3412
3413         if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
3414                 domain_remove_one_dev_info(domain, pdev);
3415
3416                 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3417                     !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3418                     list_empty(&domain->devices))
3419                         domain_exit(domain);
3420         }
3421
3422         return 0;
3423 }
3424
3425 static struct notifier_block device_nb = {
3426         .notifier_call = device_notifier,
3427 };
3428
3429 int __init intel_iommu_init(void)
3430 {
3431         int ret = 0;
3432
3433         /* VT-d is required for a TXT/tboot launch, so enforce that */
3434         force_on = tboot_force_iommu();
3435
3436         if (dmar_table_init()) {
3437                 if (force_on)
3438                         panic("tboot: Failed to initialize DMAR table\n");
3439                 return  -ENODEV;
3440         }
3441
3442         if (dmar_dev_scope_init()) {
3443                 if (force_on)
3444                         panic("tboot: Failed to initialize DMAR device scope\n");
3445                 return  -ENODEV;
3446         }
3447
3448         /*
3449          * Check the need for DMA-remapping initialization now.
3450          * Above initialization will also be used by Interrupt-remapping.
3451          */
3452         if (no_iommu || dmar_disabled)
3453                 return -ENODEV;
3454
3455         if (iommu_init_mempool()) {
3456                 if (force_on)
3457                         panic("tboot: Failed to initialize iommu memory\n");
3458                 return  -ENODEV;
3459         }
3460
3461         if (dmar_init_reserved_ranges()) {
3462                 if (force_on)
3463                         panic("tboot: Failed to reserve iommu ranges\n");
3464                 return  -ENODEV;
3465         }
3466
3467         init_no_remapping_devices();
3468
3469         ret = init_dmars();
3470         if (ret) {
3471                 if (force_on)
3472                         panic("tboot: Failed to initialize DMARs\n");
3473                 printk(KERN_ERR "IOMMU: dmar init failed\n");
3474                 put_iova_domain(&reserved_iova_list);
3475                 iommu_exit_mempool();
3476                 return ret;
3477         }
3478         printk(KERN_INFO
3479         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3480
3481         init_timer(&unmap_timer);
3482 #ifdef CONFIG_SWIOTLB
3483         swiotlb = 0;
3484 #endif
3485         dma_ops = &intel_dma_ops;
3486
3487         init_iommu_pm_ops();
3488
3489         register_iommu(&intel_iommu_ops);
3490
3491         bus_register_notifier(&pci_bus_type, &device_nb);
3492
3493         return 0;
3494 }
3495
3496 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3497                                            struct pci_dev *pdev)
3498 {
3499         struct pci_dev *tmp, *parent;
3500
3501         if (!iommu || !pdev)
3502                 return;
3503
3504         /* dependent device detach */
3505         tmp = pci_find_upstream_pcie_bridge(pdev);
3506         /* Secondary interface's bus number and devfn 0 */
3507         if (tmp) {
3508                 parent = pdev->bus->self;
3509                 while (parent != tmp) {
3510                         iommu_detach_dev(iommu, parent->bus->number,
3511                                          parent->devfn);
3512                         parent = parent->bus->self;
3513                 }
3514                 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3515                         iommu_detach_dev(iommu,
3516                                 tmp->subordinate->number, 0);
3517                 else /* this is a legacy PCI bridge */
3518                         iommu_detach_dev(iommu, tmp->bus->number,
3519                                          tmp->devfn);
3520         }
3521 }
3522
3523 static void domain_remove_one_dev_info(struct dmar_domain *domain,
3524                                           struct pci_dev *pdev)
3525 {
3526         struct device_domain_info *info;
3527         struct intel_iommu *iommu;
3528         unsigned long flags;
3529         int found = 0;
3530         struct list_head *entry, *tmp;
3531
3532         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3533                                 pdev->devfn);
3534         if (!iommu)
3535                 return;
3536
3537         spin_lock_irqsave(&device_domain_lock, flags);
3538         list_for_each_safe(entry, tmp, &domain->devices) {
3539                 info = list_entry(entry, struct device_domain_info, link);
3540                 if (info->segment == pci_domain_nr(pdev->bus) &&
3541                     info->bus == pdev->bus->number &&
3542                     info->devfn == pdev->devfn) {
3543                         list_del(&info->link);
3544                         list_del(&info->global);
3545                         if (info->dev)
3546                                 info->dev->dev.archdata.iommu = NULL;
3547                         spin_unlock_irqrestore(&device_domain_lock, flags);
3548
3549                         iommu_disable_dev_iotlb(info);
3550                         iommu_detach_dev(iommu, info->bus, info->devfn);
3551                         iommu_detach_dependent_devices(iommu, pdev);
3552                         free_devinfo_mem(info);
3553
3554                         spin_lock_irqsave(&device_domain_lock, flags);
3555
3556                         if (found)
3557                                 break;
3558                         else
3559                                 continue;
3560                 }
3561
3562                 /* if there is no other devices under the same iommu
3563                  * owned by this domain, clear this iommu in iommu_bmp
3564                  * update iommu count and coherency
3565                  */
3566                 if (iommu == device_to_iommu(info->segment, info->bus,
3567                                             info->devfn))
3568                         found = 1;
3569         }
3570
3571         if (found == 0) {
3572                 unsigned long tmp_flags;
3573                 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3574                 clear_bit(iommu->seq_id, &domain->iommu_bmp);
3575                 domain->iommu_count--;
3576                 domain_update_iommu_cap(domain);
3577                 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3578
3579                 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3580                     !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3581                         spin_lock_irqsave(&iommu->lock, tmp_flags);
3582                         clear_bit(domain->id, iommu->domain_ids);
3583                         iommu->domains[domain->id] = NULL;
3584                         spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3585                 }
3586         }
3587
3588         spin_unlock_irqrestore(&device_domain_lock, flags);
3589 }
3590
3591 static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3592 {
3593         struct device_domain_info *info;
3594         struct intel_iommu *iommu;
3595         unsigned long flags1, flags2;
3596
3597         spin_lock_irqsave(&device_domain_lock, flags1);
3598         while (!list_empty(&domain->devices)) {
3599                 info = list_entry(domain->devices.next,
3600                         struct device_domain_info, link);
3601                 list_del(&info->link);
3602                 list_del(&info->global);
3603                 if (info->dev)
3604                         info->dev->dev.archdata.iommu = NULL;
3605
3606                 spin_unlock_irqrestore(&device_domain_lock, flags1);
3607
3608                 iommu_disable_dev_iotlb(info);
3609                 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
3610                 iommu_detach_dev(iommu, info->bus, info->devfn);
3611                 iommu_detach_dependent_devices(iommu, info->dev);
3612
3613                 /* clear this iommu in iommu_bmp, update iommu count
3614                  * and capabilities
3615                  */
3616                 spin_lock_irqsave(&domain->iommu_lock, flags2);
3617                 if (test_and_clear_bit(iommu->seq_id,
3618                                        &domain->iommu_bmp)) {
3619                         domain->iommu_count--;
3620                         domain_update_iommu_cap(domain);
3621                 }
3622                 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3623
3624                 free_devinfo_mem(info);
3625                 spin_lock_irqsave(&device_domain_lock, flags1);
3626         }
3627         spin_unlock_irqrestore(&device_domain_lock, flags1);
3628 }
3629
3630 /* domain id for virtual machine, it won't be set in context */
3631 static unsigned long vm_domid;
3632
3633 static struct dmar_domain *iommu_alloc_vm_domain(void)
3634 {
3635         struct dmar_domain *domain;
3636
3637         domain = alloc_domain_mem();
3638         if (!domain)
3639                 return NULL;
3640
3641         domain->id = vm_domid++;
3642         domain->nid = -1;
3643         memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3644         domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3645
3646         return domain;
3647 }
3648
3649 static int md_domain_init(struct dmar_domain *domain, int guest_width)
3650 {
3651         int adjust_width;
3652
3653         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3654         spin_lock_init(&domain->iommu_lock);
3655
3656         domain_reserve_special_ranges(domain);
3657
3658         /* calculate AGAW */
3659         domain->gaw = guest_width;
3660         adjust_width = guestwidth_to_adjustwidth(guest_width);
3661         domain->agaw = width_to_agaw(adjust_width);
3662
3663         INIT_LIST_HEAD(&domain->devices);
3664
3665         domain->iommu_count = 0;
3666         domain->iommu_coherency = 0;
3667         domain->iommu_snooping = 0;
3668         domain->iommu_superpage = 0;
3669         domain->max_addr = 0;
3670         domain->nid = -1;
3671
3672         /* always allocate the top pgd */
3673         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
3674         if (!domain->pgd)
3675                 return -ENOMEM;
3676         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3677         return 0;
3678 }
3679
3680 static void iommu_free_vm_domain(struct dmar_domain *domain)
3681 {
3682         unsigned long flags;
3683         struct dmar_drhd_unit *drhd;
3684         struct intel_iommu *iommu;
3685         unsigned long i;
3686         unsigned long ndomains;
3687
3688         for_each_drhd_unit(drhd) {
3689                 if (drhd->ignored)
3690                         continue;
3691                 iommu = drhd->iommu;
3692
3693                 ndomains = cap_ndoms(iommu->cap);
3694                 for_each_set_bit(i, iommu->domain_ids, ndomains) {
3695                         if (iommu->domains[i] == domain) {
3696                                 spin_lock_irqsave(&iommu->lock, flags);
3697                                 clear_bit(i, iommu->domain_ids);
3698                                 iommu->domains[i] = NULL;
3699                                 spin_unlock_irqrestore(&iommu->lock, flags);
3700                                 break;
3701                         }
3702                 }
3703         }
3704 }
3705
3706 static void vm_domain_exit(struct dmar_domain *domain)
3707 {
3708         /* Domain 0 is reserved, so dont process it */
3709         if (!domain)
3710                 return;
3711
3712         vm_domain_remove_all_dev_info(domain);
3713         /* destroy iovas */
3714         put_iova_domain(&domain->iovad);
3715
3716         /* clear ptes */
3717         dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3718
3719         /* free page tables */
3720         dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3721
3722         iommu_free_vm_domain(domain);
3723         free_domain_mem(domain);
3724 }
3725
3726 static int intel_iommu_domain_init(struct iommu_domain *domain)
3727 {
3728         struct dmar_domain *dmar_domain;
3729
3730         dmar_domain = iommu_alloc_vm_domain();
3731         if (!dmar_domain) {
3732                 printk(KERN_ERR
3733                         "intel_iommu_domain_init: dmar_domain == NULL\n");
3734                 return -ENOMEM;
3735         }
3736         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3737                 printk(KERN_ERR
3738                         "intel_iommu_domain_init() failed\n");
3739                 vm_domain_exit(dmar_domain);
3740                 return -ENOMEM;
3741         }
3742         domain->priv = dmar_domain;
3743
3744         return 0;
3745 }
3746
3747 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
3748 {
3749         struct dmar_domain *dmar_domain = domain->priv;
3750
3751         domain->priv = NULL;
3752         vm_domain_exit(dmar_domain);
3753 }
3754
3755 static int intel_iommu_attach_device(struct iommu_domain *domain,
3756                                      struct device *dev)
3757 {
3758         struct dmar_domain *dmar_domain = domain->priv;
3759         struct pci_dev *pdev = to_pci_dev(dev);
3760         struct intel_iommu *iommu;
3761         int addr_width;
3762
3763         /* normally pdev is not mapped */
3764         if (unlikely(domain_context_mapped(pdev))) {
3765                 struct dmar_domain *old_domain;
3766
3767                 old_domain = find_domain(pdev);
3768                 if (old_domain) {
3769                         if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3770                             dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3771                                 domain_remove_one_dev_info(old_domain, pdev);
3772                         else
3773                                 domain_remove_dev_info(old_domain);
3774                 }
3775         }
3776
3777         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3778                                 pdev->devfn);
3779         if (!iommu)
3780                 return -ENODEV;
3781
3782         /* check if this iommu agaw is sufficient for max mapped address */
3783         addr_width = agaw_to_width(iommu->agaw);
3784         if (addr_width > cap_mgaw(iommu->cap))
3785                 addr_width = cap_mgaw(iommu->cap);
3786
3787         if (dmar_domain->max_addr > (1LL << addr_width)) {
3788                 printk(KERN_ERR "%s: iommu width (%d) is not "
3789                        "sufficient for the mapped address (%llx)\n",
3790                        __func__, addr_width, dmar_domain->max_addr);
3791                 return -EFAULT;
3792         }
3793         dmar_domain->gaw = addr_width;
3794
3795         /*
3796          * Knock out extra levels of page tables if necessary
3797          */
3798         while (iommu->agaw < dmar_domain->agaw) {
3799                 struct dma_pte *pte;
3800
3801                 pte = dmar_domain->pgd;
3802                 if (dma_pte_present(pte)) {
3803                         dmar_domain->pgd = (struct dma_pte *)
3804                                 phys_to_virt(dma_pte_addr(pte));
3805                         free_pgtable_page(pte);
3806                 }
3807                 dmar_domain->agaw--;
3808         }
3809
3810         return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
3811 }
3812
3813 static void intel_iommu_detach_device(struct iommu_domain *domain,
3814                                       struct device *dev)
3815 {
3816         struct dmar_domain *dmar_domain = domain->priv;
3817         struct pci_dev *pdev = to_pci_dev(dev);
3818
3819         domain_remove_one_dev_info(dmar_domain, pdev);
3820 }
3821
3822 static int intel_iommu_map(struct iommu_domain *domain,
3823                            unsigned long iova, phys_addr_t hpa,
3824                            int gfp_order, int iommu_prot)
3825 {
3826         struct dmar_domain *dmar_domain = domain->priv;
3827         u64 max_addr;
3828         int prot = 0;
3829         size_t size;
3830         int ret;
3831
3832         if (iommu_prot & IOMMU_READ)
3833                 prot |= DMA_PTE_READ;
3834         if (iommu_prot & IOMMU_WRITE)
3835                 prot |= DMA_PTE_WRITE;
3836         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3837                 prot |= DMA_PTE_SNP;
3838
3839         size     = PAGE_SIZE << gfp_order;
3840         max_addr = iova + size;
3841         if (dmar_domain->max_addr < max_addr) {
3842                 u64 end;
3843
3844                 /* check if minimum agaw is sufficient for mapped address */
3845                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
3846                 if (end < max_addr) {
3847                         printk(KERN_ERR "%s: iommu width (%d) is not "
3848                                "sufficient for the mapped address (%llx)\n",
3849                                __func__, dmar_domain->gaw, max_addr);
3850                         return -EFAULT;
3851                 }
3852                 dmar_domain->max_addr = max_addr;
3853         }
3854         /* Round up size to next multiple of PAGE_SIZE, if it and
3855            the low bits of hpa would take us onto the next page */
3856         size = aligned_nrpages(hpa, size);
3857         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3858                                  hpa >> VTD_PAGE_SHIFT, size, prot);
3859         return ret;
3860 }
3861
3862 static int intel_iommu_unmap(struct iommu_domain *domain,
3863                              unsigned long iova, int gfp_order)
3864 {
3865         struct dmar_domain *dmar_domain = domain->priv;
3866         size_t size = PAGE_SIZE << gfp_order;
3867
3868         dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3869                             (iova + size - 1) >> VTD_PAGE_SHIFT);
3870
3871         if (dmar_domain->max_addr == iova + size)
3872                 dmar_domain->max_addr = iova;
3873
3874         return gfp_order;
3875 }
3876
3877 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3878                                             unsigned long iova)
3879 {
3880         struct dmar_domain *dmar_domain = domain->priv;
3881         struct dma_pte *pte;
3882         u64 phys = 0;
3883
3884         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
3885         if (pte)
3886                 phys = dma_pte_addr(pte);
3887
3888         return phys;
3889 }
3890
3891 static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
3892                                       unsigned long cap)
3893 {
3894         struct dmar_domain *dmar_domain = domain->priv;
3895
3896         if (cap == IOMMU_CAP_CACHE_COHERENCY)
3897                 return dmar_domain->iommu_snooping;
3898         if (cap == IOMMU_CAP_INTR_REMAP)
3899                 return intr_remapping_enabled;
3900
3901         return 0;
3902 }
3903
3904 static struct iommu_ops intel_iommu_ops = {
3905         .domain_init    = intel_iommu_domain_init,
3906         .domain_destroy = intel_iommu_domain_destroy,
3907         .attach_dev     = intel_iommu_attach_device,
3908         .detach_dev     = intel_iommu_detach_device,
3909         .map            = intel_iommu_map,
3910         .unmap          = intel_iommu_unmap,
3911         .iova_to_phys   = intel_iommu_iova_to_phys,
3912         .domain_has_cap = intel_iommu_domain_has_cap,
3913 };
3914
3915 static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
3916 {
3917         /*
3918          * Mobile 4 Series Chipset neglects to set RWBF capability,
3919          * but needs it:
3920          */
3921         printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
3922         rwbf_quirk = 1;
3923
3924         /* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */
3925         if (dev->revision == 0x07) {
3926                 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
3927                 dmar_map_gfx = 0;
3928         }
3929 }
3930
3931 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
3932
3933 #define GGC 0x52
3934 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
3935 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
3936 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
3937 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
3938 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
3939 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
3940 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
3941 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
3942
3943 static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
3944 {
3945         unsigned short ggc;
3946
3947         if (pci_read_config_word(dev, GGC, &ggc))
3948                 return;
3949
3950         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
3951                 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
3952                 dmar_map_gfx = 0;
3953         }
3954 }
3955 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
3956 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
3957 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
3958 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
3959
3960 /* On Tylersburg chipsets, some BIOSes have been known to enable the
3961    ISOCH DMAR unit for the Azalia sound device, but not give it any
3962    TLB entries, which causes it to deadlock. Check for that.  We do
3963    this in a function called from init_dmars(), instead of in a PCI
3964    quirk, because we don't want to print the obnoxious "BIOS broken"
3965    message if VT-d is actually disabled.
3966 */
3967 static void __init check_tylersburg_isoch(void)
3968 {
3969         struct pci_dev *pdev;
3970         uint32_t vtisochctrl;
3971
3972         /* If there's no Azalia in the system anyway, forget it. */
3973         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
3974         if (!pdev)
3975                 return;
3976         pci_dev_put(pdev);
3977
3978         /* System Management Registers. Might be hidden, in which case
3979            we can't do the sanity check. But that's OK, because the
3980            known-broken BIOSes _don't_ actually hide it, so far. */
3981         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
3982         if (!pdev)
3983                 return;
3984
3985         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
3986                 pci_dev_put(pdev);
3987                 return;
3988         }
3989
3990         pci_dev_put(pdev);
3991
3992         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
3993         if (vtisochctrl & 1)
3994                 return;
3995
3996         /* Drop all bits other than the number of TLB entries */
3997         vtisochctrl &= 0x1c;
3998
3999         /* If we have the recommended number of TLB entries (16), fine. */
4000         if (vtisochctrl == 0x10)
4001                 return;
4002
4003         /* Zero TLB entries? You get to ride the short bus to school. */
4004         if (!vtisochctrl) {
4005                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4006                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4007                      dmi_get_system_info(DMI_BIOS_VENDOR),
4008                      dmi_get_system_info(DMI_BIOS_VERSION),
4009                      dmi_get_system_info(DMI_PRODUCT_VERSION));
4010                 iommu_identity_mapping |= IDENTMAP_AZALIA;
4011                 return;
4012         }
4013         
4014         printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4015                vtisochctrl);
4016 }