lockdep: Fix trace_[soft,hard]irqs_[on,off]() recursion
[pandora-kernel.git] / drivers / pci / intel-iommu.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  * Author: Fenghua Yu <fenghua.yu@intel.com>
22  */
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/iommu.h>
38 #include <linux/intel-iommu.h>
39 #include <linux/syscore_ops.h>
40 #include <linux/tboot.h>
41 #include <linux/dmi.h>
42 #include <linux/pci-ats.h>
43 #include <asm/cacheflush.h>
44 #include <asm/iommu.h>
45 #include "pci.h"
46
47 #define ROOT_SIZE               VTD_PAGE_SIZE
48 #define CONTEXT_SIZE            VTD_PAGE_SIZE
49
50 #define IS_BRIDGE_HOST_DEVICE(pdev) \
51                             ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
52 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
54 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
55
56 #define IOAPIC_RANGE_START      (0xfee00000)
57 #define IOAPIC_RANGE_END        (0xfeefffff)
58 #define IOVA_START_ADDR         (0x1000)
59
60 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
62 #define MAX_AGAW_WIDTH 64
63
64 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
65 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
66
67 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
68    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
69 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
70                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
71 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
72
73 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
74 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
75 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
76
77 /* page table handling */
78 #define LEVEL_STRIDE            (9)
79 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
80
81 static inline int agaw_to_level(int agaw)
82 {
83         return agaw + 2;
84 }
85
86 static inline int agaw_to_width(int agaw)
87 {
88         return 30 + agaw * LEVEL_STRIDE;
89 }
90
91 static inline int width_to_agaw(int width)
92 {
93         return (width - 30) / LEVEL_STRIDE;
94 }
95
96 static inline unsigned int level_to_offset_bits(int level)
97 {
98         return (level - 1) * LEVEL_STRIDE;
99 }
100
101 static inline int pfn_level_offset(unsigned long pfn, int level)
102 {
103         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
104 }
105
106 static inline unsigned long level_mask(int level)
107 {
108         return -1UL << level_to_offset_bits(level);
109 }
110
111 static inline unsigned long level_size(int level)
112 {
113         return 1UL << level_to_offset_bits(level);
114 }
115
116 static inline unsigned long align_to_level(unsigned long pfn, int level)
117 {
118         return (pfn + level_size(level) - 1) & level_mask(level);
119 }
120
121 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
122 {
123         return  1 << ((lvl - 1) * LEVEL_STRIDE);
124 }
125
126 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
127    are never going to work. */
128 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
129 {
130         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
131 }
132
133 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
134 {
135         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
136 }
137 static inline unsigned long page_to_dma_pfn(struct page *pg)
138 {
139         return mm_to_dma_pfn(page_to_pfn(pg));
140 }
141 static inline unsigned long virt_to_dma_pfn(void *p)
142 {
143         return page_to_dma_pfn(virt_to_page(p));
144 }
145
146 /* global iommu list, set NULL for ignored DMAR units */
147 static struct intel_iommu **g_iommus;
148
149 static void __init check_tylersburg_isoch(void);
150 static int rwbf_quirk;
151
152 /*
153  * set to 1 to panic kernel if can't successfully enable VT-d
154  * (used when kernel is launched w/ TXT)
155  */
156 static int force_on = 0;
157
158 /*
159  * 0: Present
160  * 1-11: Reserved
161  * 12-63: Context Ptr (12 - (haw-1))
162  * 64-127: Reserved
163  */
164 struct root_entry {
165         u64     val;
166         u64     rsvd1;
167 };
168 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
169 static inline bool root_present(struct root_entry *root)
170 {
171         return (root->val & 1);
172 }
173 static inline void set_root_present(struct root_entry *root)
174 {
175         root->val |= 1;
176 }
177 static inline void set_root_value(struct root_entry *root, unsigned long value)
178 {
179         root->val |= value & VTD_PAGE_MASK;
180 }
181
182 static inline struct context_entry *
183 get_context_addr_from_root(struct root_entry *root)
184 {
185         return (struct context_entry *)
186                 (root_present(root)?phys_to_virt(
187                 root->val & VTD_PAGE_MASK) :
188                 NULL);
189 }
190
191 /*
192  * low 64 bits:
193  * 0: present
194  * 1: fault processing disable
195  * 2-3: translation type
196  * 12-63: address space root
197  * high 64 bits:
198  * 0-2: address width
199  * 3-6: aval
200  * 8-23: domain id
201  */
202 struct context_entry {
203         u64 lo;
204         u64 hi;
205 };
206
207 static inline bool context_present(struct context_entry *context)
208 {
209         return (context->lo & 1);
210 }
211 static inline void context_set_present(struct context_entry *context)
212 {
213         context->lo |= 1;
214 }
215
216 static inline void context_set_fault_enable(struct context_entry *context)
217 {
218         context->lo &= (((u64)-1) << 2) | 1;
219 }
220
221 static inline void context_set_translation_type(struct context_entry *context,
222                                                 unsigned long value)
223 {
224         context->lo &= (((u64)-1) << 4) | 3;
225         context->lo |= (value & 3) << 2;
226 }
227
228 static inline void context_set_address_root(struct context_entry *context,
229                                             unsigned long value)
230 {
231         context->lo |= value & VTD_PAGE_MASK;
232 }
233
234 static inline void context_set_address_width(struct context_entry *context,
235                                              unsigned long value)
236 {
237         context->hi |= value & 7;
238 }
239
240 static inline void context_set_domain_id(struct context_entry *context,
241                                          unsigned long value)
242 {
243         context->hi |= (value & ((1 << 16) - 1)) << 8;
244 }
245
246 static inline void context_clear_entry(struct context_entry *context)
247 {
248         context->lo = 0;
249         context->hi = 0;
250 }
251
252 /*
253  * 0: readable
254  * 1: writable
255  * 2-6: reserved
256  * 7: super page
257  * 8-10: available
258  * 11: snoop behavior
259  * 12-63: Host physcial address
260  */
261 struct dma_pte {
262         u64 val;
263 };
264
265 static inline void dma_clear_pte(struct dma_pte *pte)
266 {
267         pte->val = 0;
268 }
269
270 static inline void dma_set_pte_readable(struct dma_pte *pte)
271 {
272         pte->val |= DMA_PTE_READ;
273 }
274
275 static inline void dma_set_pte_writable(struct dma_pte *pte)
276 {
277         pte->val |= DMA_PTE_WRITE;
278 }
279
280 static inline void dma_set_pte_snp(struct dma_pte *pte)
281 {
282         pte->val |= DMA_PTE_SNP;
283 }
284
285 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
286 {
287         pte->val = (pte->val & ~3) | (prot & 3);
288 }
289
290 static inline u64 dma_pte_addr(struct dma_pte *pte)
291 {
292 #ifdef CONFIG_64BIT
293         return pte->val & VTD_PAGE_MASK;
294 #else
295         /* Must have a full atomic 64-bit read */
296         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
297 #endif
298 }
299
300 static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
301 {
302         pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
303 }
304
305 static inline bool dma_pte_present(struct dma_pte *pte)
306 {
307         return (pte->val & 3) != 0;
308 }
309
310 static inline int first_pte_in_page(struct dma_pte *pte)
311 {
312         return !((unsigned long)pte & ~VTD_PAGE_MASK);
313 }
314
315 /*
316  * This domain is a statically identity mapping domain.
317  *      1. This domain creats a static 1:1 mapping to all usable memory.
318  *      2. It maps to each iommu if successful.
319  *      3. Each iommu mapps to this domain if successful.
320  */
321 static struct dmar_domain *si_domain;
322 static int hw_pass_through = 1;
323
324 /* devices under the same p2p bridge are owned in one domain */
325 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
326
327 /* domain represents a virtual machine, more than one devices
328  * across iommus may be owned in one domain, e.g. kvm guest.
329  */
330 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 1)
331
332 /* si_domain contains mulitple devices */
333 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 2)
334
335 struct dmar_domain {
336         int     id;                     /* domain id */
337         int     nid;                    /* node id */
338         unsigned long iommu_bmp;        /* bitmap of iommus this domain uses*/
339
340         struct list_head devices;       /* all devices' list */
341         struct iova_domain iovad;       /* iova's that belong to this domain */
342
343         struct dma_pte  *pgd;           /* virtual address */
344         int             gaw;            /* max guest address width */
345
346         /* adjusted guest address width, 0 is level 2 30-bit */
347         int             agaw;
348
349         int             flags;          /* flags to find out type of domain */
350
351         int             iommu_coherency;/* indicate coherency of iommu access */
352         int             iommu_snooping; /* indicate snooping control feature*/
353         int             iommu_count;    /* reference count of iommu */
354         int             iommu_superpage;/* Level of superpages supported:
355                                            0 == 4KiB (no superpages), 1 == 2MiB,
356                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
357         spinlock_t      iommu_lock;     /* protect iommu set in domain */
358         u64             max_addr;       /* maximum mapped address */
359 };
360
361 /* PCI domain-device relationship */
362 struct device_domain_info {
363         struct list_head link;  /* link to domain siblings */
364         struct list_head global; /* link to global list */
365         int segment;            /* PCI domain */
366         u8 bus;                 /* PCI bus number */
367         u8 devfn;               /* PCI devfn number */
368         struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
369         struct intel_iommu *iommu; /* IOMMU used by this device */
370         struct dmar_domain *domain; /* pointer to domain */
371 };
372
373 static void flush_unmaps_timeout(unsigned long data);
374
375 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
376
377 #define HIGH_WATER_MARK 250
378 struct deferred_flush_tables {
379         int next;
380         struct iova *iova[HIGH_WATER_MARK];
381         struct dmar_domain *domain[HIGH_WATER_MARK];
382 };
383
384 static struct deferred_flush_tables *deferred_flush;
385
386 /* bitmap for indexing intel_iommus */
387 static int g_num_of_iommus;
388
389 static DEFINE_SPINLOCK(async_umap_flush_lock);
390 static LIST_HEAD(unmaps_to_do);
391
392 static int timer_on;
393 static long list_size;
394
395 static void domain_remove_dev_info(struct dmar_domain *domain);
396
397 #ifdef CONFIG_DMAR_DEFAULT_ON
398 int dmar_disabled = 0;
399 #else
400 int dmar_disabled = 1;
401 #endif /*CONFIG_DMAR_DEFAULT_ON*/
402
403 static int dmar_map_gfx = 1;
404 static int dmar_forcedac;
405 static int intel_iommu_strict;
406 static int intel_iommu_superpage = 1;
407
408 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
409 static DEFINE_SPINLOCK(device_domain_lock);
410 static LIST_HEAD(device_domain_list);
411
412 static struct iommu_ops intel_iommu_ops;
413
414 static int __init intel_iommu_setup(char *str)
415 {
416         if (!str)
417                 return -EINVAL;
418         while (*str) {
419                 if (!strncmp(str, "on", 2)) {
420                         dmar_disabled = 0;
421                         printk(KERN_INFO "Intel-IOMMU: enabled\n");
422                 } else if (!strncmp(str, "off", 3)) {
423                         dmar_disabled = 1;
424                         printk(KERN_INFO "Intel-IOMMU: disabled\n");
425                 } else if (!strncmp(str, "igfx_off", 8)) {
426                         dmar_map_gfx = 0;
427                         printk(KERN_INFO
428                                 "Intel-IOMMU: disable GFX device mapping\n");
429                 } else if (!strncmp(str, "forcedac", 8)) {
430                         printk(KERN_INFO
431                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
432                         dmar_forcedac = 1;
433                 } else if (!strncmp(str, "strict", 6)) {
434                         printk(KERN_INFO
435                                 "Intel-IOMMU: disable batched IOTLB flush\n");
436                         intel_iommu_strict = 1;
437                 } else if (!strncmp(str, "sp_off", 6)) {
438                         printk(KERN_INFO
439                                 "Intel-IOMMU: disable supported super page\n");
440                         intel_iommu_superpage = 0;
441                 }
442
443                 str += strcspn(str, ",");
444                 while (*str == ',')
445                         str++;
446         }
447         return 0;
448 }
449 __setup("intel_iommu=", intel_iommu_setup);
450
451 static struct kmem_cache *iommu_domain_cache;
452 static struct kmem_cache *iommu_devinfo_cache;
453 static struct kmem_cache *iommu_iova_cache;
454
455 static inline void *alloc_pgtable_page(int node)
456 {
457         struct page *page;
458         void *vaddr = NULL;
459
460         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
461         if (page)
462                 vaddr = page_address(page);
463         return vaddr;
464 }
465
466 static inline void free_pgtable_page(void *vaddr)
467 {
468         free_page((unsigned long)vaddr);
469 }
470
471 static inline void *alloc_domain_mem(void)
472 {
473         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
474 }
475
476 static void free_domain_mem(void *vaddr)
477 {
478         kmem_cache_free(iommu_domain_cache, vaddr);
479 }
480
481 static inline void * alloc_devinfo_mem(void)
482 {
483         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
484 }
485
486 static inline void free_devinfo_mem(void *vaddr)
487 {
488         kmem_cache_free(iommu_devinfo_cache, vaddr);
489 }
490
491 struct iova *alloc_iova_mem(void)
492 {
493         return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
494 }
495
496 void free_iova_mem(struct iova *iova)
497 {
498         kmem_cache_free(iommu_iova_cache, iova);
499 }
500
501
502 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
503 {
504         unsigned long sagaw;
505         int agaw = -1;
506
507         sagaw = cap_sagaw(iommu->cap);
508         for (agaw = width_to_agaw(max_gaw);
509              agaw >= 0; agaw--) {
510                 if (test_bit(agaw, &sagaw))
511                         break;
512         }
513
514         return agaw;
515 }
516
517 /*
518  * Calculate max SAGAW for each iommu.
519  */
520 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
521 {
522         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
523 }
524
525 /*
526  * calculate agaw for each iommu.
527  * "SAGAW" may be different across iommus, use a default agaw, and
528  * get a supported less agaw for iommus that don't support the default agaw.
529  */
530 int iommu_calculate_agaw(struct intel_iommu *iommu)
531 {
532         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
533 }
534
535 /* This functionin only returns single iommu in a domain */
536 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
537 {
538         int iommu_id;
539
540         /* si_domain and vm domain should not get here. */
541         BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
542         BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
543
544         iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
545         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
546                 return NULL;
547
548         return g_iommus[iommu_id];
549 }
550
551 static void domain_update_iommu_coherency(struct dmar_domain *domain)
552 {
553         int i;
554
555         domain->iommu_coherency = 1;
556
557         for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
558                 if (!ecap_coherent(g_iommus[i]->ecap)) {
559                         domain->iommu_coherency = 0;
560                         break;
561                 }
562         }
563 }
564
565 static void domain_update_iommu_snooping(struct dmar_domain *domain)
566 {
567         int i;
568
569         domain->iommu_snooping = 1;
570
571         for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
572                 if (!ecap_sc_support(g_iommus[i]->ecap)) {
573                         domain->iommu_snooping = 0;
574                         break;
575                 }
576         }
577 }
578
579 static void domain_update_iommu_superpage(struct dmar_domain *domain)
580 {
581         int i, mask = 0xf;
582
583         if (!intel_iommu_superpage) {
584                 domain->iommu_superpage = 0;
585                 return;
586         }
587
588         domain->iommu_superpage = 4; /* 1TiB */
589
590         for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
591                 mask |= cap_super_page_val(g_iommus[i]->cap);
592                 if (!mask) {
593                         break;
594                 }
595         }
596         domain->iommu_superpage = fls(mask);
597 }
598
599 /* Some capabilities may be different across iommus */
600 static void domain_update_iommu_cap(struct dmar_domain *domain)
601 {
602         domain_update_iommu_coherency(domain);
603         domain_update_iommu_snooping(domain);
604         domain_update_iommu_superpage(domain);
605 }
606
607 static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
608 {
609         struct dmar_drhd_unit *drhd = NULL;
610         int i;
611
612         for_each_drhd_unit(drhd) {
613                 if (drhd->ignored)
614                         continue;
615                 if (segment != drhd->segment)
616                         continue;
617
618                 for (i = 0; i < drhd->devices_cnt; i++) {
619                         if (drhd->devices[i] &&
620                             drhd->devices[i]->bus->number == bus &&
621                             drhd->devices[i]->devfn == devfn)
622                                 return drhd->iommu;
623                         if (drhd->devices[i] &&
624                             drhd->devices[i]->subordinate &&
625                             drhd->devices[i]->subordinate->number <= bus &&
626                             drhd->devices[i]->subordinate->subordinate >= bus)
627                                 return drhd->iommu;
628                 }
629
630                 if (drhd->include_all)
631                         return drhd->iommu;
632         }
633
634         return NULL;
635 }
636
637 static void domain_flush_cache(struct dmar_domain *domain,
638                                void *addr, int size)
639 {
640         if (!domain->iommu_coherency)
641                 clflush_cache_range(addr, size);
642 }
643
644 /* Gets context entry for a given bus and devfn */
645 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
646                 u8 bus, u8 devfn)
647 {
648         struct root_entry *root;
649         struct context_entry *context;
650         unsigned long phy_addr;
651         unsigned long flags;
652
653         spin_lock_irqsave(&iommu->lock, flags);
654         root = &iommu->root_entry[bus];
655         context = get_context_addr_from_root(root);
656         if (!context) {
657                 context = (struct context_entry *)
658                                 alloc_pgtable_page(iommu->node);
659                 if (!context) {
660                         spin_unlock_irqrestore(&iommu->lock, flags);
661                         return NULL;
662                 }
663                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
664                 phy_addr = virt_to_phys((void *)context);
665                 set_root_value(root, phy_addr);
666                 set_root_present(root);
667                 __iommu_flush_cache(iommu, root, sizeof(*root));
668         }
669         spin_unlock_irqrestore(&iommu->lock, flags);
670         return &context[devfn];
671 }
672
673 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
674 {
675         struct root_entry *root;
676         struct context_entry *context;
677         int ret;
678         unsigned long flags;
679
680         spin_lock_irqsave(&iommu->lock, flags);
681         root = &iommu->root_entry[bus];
682         context = get_context_addr_from_root(root);
683         if (!context) {
684                 ret = 0;
685                 goto out;
686         }
687         ret = context_present(&context[devfn]);
688 out:
689         spin_unlock_irqrestore(&iommu->lock, flags);
690         return ret;
691 }
692
693 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
694 {
695         struct root_entry *root;
696         struct context_entry *context;
697         unsigned long flags;
698
699         spin_lock_irqsave(&iommu->lock, flags);
700         root = &iommu->root_entry[bus];
701         context = get_context_addr_from_root(root);
702         if (context) {
703                 context_clear_entry(&context[devfn]);
704                 __iommu_flush_cache(iommu, &context[devfn], \
705                         sizeof(*context));
706         }
707         spin_unlock_irqrestore(&iommu->lock, flags);
708 }
709
710 static void free_context_table(struct intel_iommu *iommu)
711 {
712         struct root_entry *root;
713         int i;
714         unsigned long flags;
715         struct context_entry *context;
716
717         spin_lock_irqsave(&iommu->lock, flags);
718         if (!iommu->root_entry) {
719                 goto out;
720         }
721         for (i = 0; i < ROOT_ENTRY_NR; i++) {
722                 root = &iommu->root_entry[i];
723                 context = get_context_addr_from_root(root);
724                 if (context)
725                         free_pgtable_page(context);
726         }
727         free_pgtable_page(iommu->root_entry);
728         iommu->root_entry = NULL;
729 out:
730         spin_unlock_irqrestore(&iommu->lock, flags);
731 }
732
733 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
734                                       unsigned long pfn, int large_level)
735 {
736         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
737         struct dma_pte *parent, *pte = NULL;
738         int level = agaw_to_level(domain->agaw);
739         int offset, target_level;
740
741         BUG_ON(!domain->pgd);
742         BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
743         parent = domain->pgd;
744
745         /* Search pte */
746         if (!large_level)
747                 target_level = 1;
748         else
749                 target_level = large_level;
750
751         while (level > 0) {
752                 void *tmp_page;
753
754                 offset = pfn_level_offset(pfn, level);
755                 pte = &parent[offset];
756                 if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
757                         break;
758                 if (level == target_level)
759                         break;
760
761                 if (!dma_pte_present(pte)) {
762                         uint64_t pteval;
763
764                         tmp_page = alloc_pgtable_page(domain->nid);
765
766                         if (!tmp_page)
767                                 return NULL;
768
769                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
770                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
771                         if (cmpxchg64(&pte->val, 0ULL, pteval)) {
772                                 /* Someone else set it while we were thinking; use theirs. */
773                                 free_pgtable_page(tmp_page);
774                         } else {
775                                 dma_pte_addr(pte);
776                                 domain_flush_cache(domain, pte, sizeof(*pte));
777                         }
778                 }
779                 parent = phys_to_virt(dma_pte_addr(pte));
780                 level--;
781         }
782
783         return pte;
784 }
785
786
787 /* return address's pte at specific level */
788 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
789                                          unsigned long pfn,
790                                          int level, int *large_page)
791 {
792         struct dma_pte *parent, *pte = NULL;
793         int total = agaw_to_level(domain->agaw);
794         int offset;
795
796         parent = domain->pgd;
797         while (level <= total) {
798                 offset = pfn_level_offset(pfn, total);
799                 pte = &parent[offset];
800                 if (level == total)
801                         return pte;
802
803                 if (!dma_pte_present(pte)) {
804                         *large_page = total;
805                         break;
806                 }
807
808                 if (pte->val & DMA_PTE_LARGE_PAGE) {
809                         *large_page = total;
810                         return pte;
811                 }
812
813                 parent = phys_to_virt(dma_pte_addr(pte));
814                 total--;
815         }
816         return NULL;
817 }
818
819 /* clear last level pte, a tlb flush should be followed */
820 static void dma_pte_clear_range(struct dmar_domain *domain,
821                                 unsigned long start_pfn,
822                                 unsigned long last_pfn)
823 {
824         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
825         unsigned int large_page = 1;
826         struct dma_pte *first_pte, *pte;
827
828         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
829         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
830         BUG_ON(start_pfn > last_pfn);
831
832         /* we don't need lock here; nobody else touches the iova range */
833         do {
834                 large_page = 1;
835                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
836                 if (!pte) {
837                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
838                         continue;
839                 }
840                 do {
841                         dma_clear_pte(pte);
842                         start_pfn += lvl_to_nr_pages(large_page);
843                         pte++;
844                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
845
846                 domain_flush_cache(domain, first_pte,
847                                    (void *)pte - (void *)first_pte);
848
849         } while (start_pfn && start_pfn <= last_pfn);
850 }
851
852 /* free page table pages. last level pte should already be cleared */
853 static void dma_pte_free_pagetable(struct dmar_domain *domain,
854                                    unsigned long start_pfn,
855                                    unsigned long last_pfn)
856 {
857         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
858         struct dma_pte *first_pte, *pte;
859         int total = agaw_to_level(domain->agaw);
860         int level;
861         unsigned long tmp;
862         int large_page = 2;
863
864         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
865         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
866         BUG_ON(start_pfn > last_pfn);
867
868         /* We don't need lock here; nobody else touches the iova range */
869         level = 2;
870         while (level <= total) {
871                 tmp = align_to_level(start_pfn, level);
872
873                 /* If we can't even clear one PTE at this level, we're done */
874                 if (tmp + level_size(level) - 1 > last_pfn)
875                         return;
876
877                 do {
878                         large_page = level;
879                         first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
880                         if (large_page > level)
881                                 level = large_page + 1;
882                         if (!pte) {
883                                 tmp = align_to_level(tmp + 1, level + 1);
884                                 continue;
885                         }
886                         do {
887                                 if (dma_pte_present(pte)) {
888                                         free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
889                                         dma_clear_pte(pte);
890                                 }
891                                 pte++;
892                                 tmp += level_size(level);
893                         } while (!first_pte_in_page(pte) &&
894                                  tmp + level_size(level) - 1 <= last_pfn);
895
896                         domain_flush_cache(domain, first_pte,
897                                            (void *)pte - (void *)first_pte);
898                         
899                 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
900                 level++;
901         }
902         /* free pgd */
903         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
904                 free_pgtable_page(domain->pgd);
905                 domain->pgd = NULL;
906         }
907 }
908
909 /* iommu handling */
910 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
911 {
912         struct root_entry *root;
913         unsigned long flags;
914
915         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
916         if (!root)
917                 return -ENOMEM;
918
919         __iommu_flush_cache(iommu, root, ROOT_SIZE);
920
921         spin_lock_irqsave(&iommu->lock, flags);
922         iommu->root_entry = root;
923         spin_unlock_irqrestore(&iommu->lock, flags);
924
925         return 0;
926 }
927
928 static void iommu_set_root_entry(struct intel_iommu *iommu)
929 {
930         void *addr;
931         u32 sts;
932         unsigned long flag;
933
934         addr = iommu->root_entry;
935
936         spin_lock_irqsave(&iommu->register_lock, flag);
937         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
938
939         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
940
941         /* Make sure hardware complete it */
942         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
943                       readl, (sts & DMA_GSTS_RTPS), sts);
944
945         spin_unlock_irqrestore(&iommu->register_lock, flag);
946 }
947
948 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
949 {
950         u32 val;
951         unsigned long flag;
952
953         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
954                 return;
955
956         spin_lock_irqsave(&iommu->register_lock, flag);
957         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
958
959         /* Make sure hardware complete it */
960         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
961                       readl, (!(val & DMA_GSTS_WBFS)), val);
962
963         spin_unlock_irqrestore(&iommu->register_lock, flag);
964 }
965
966 /* return value determine if we need a write buffer flush */
967 static void __iommu_flush_context(struct intel_iommu *iommu,
968                                   u16 did, u16 source_id, u8 function_mask,
969                                   u64 type)
970 {
971         u64 val = 0;
972         unsigned long flag;
973
974         switch (type) {
975         case DMA_CCMD_GLOBAL_INVL:
976                 val = DMA_CCMD_GLOBAL_INVL;
977                 break;
978         case DMA_CCMD_DOMAIN_INVL:
979                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
980                 break;
981         case DMA_CCMD_DEVICE_INVL:
982                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
983                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
984                 break;
985         default:
986                 BUG();
987         }
988         val |= DMA_CCMD_ICC;
989
990         spin_lock_irqsave(&iommu->register_lock, flag);
991         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
992
993         /* Make sure hardware complete it */
994         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
995                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
996
997         spin_unlock_irqrestore(&iommu->register_lock, flag);
998 }
999
1000 /* return value determine if we need a write buffer flush */
1001 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1002                                 u64 addr, unsigned int size_order, u64 type)
1003 {
1004         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1005         u64 val = 0, val_iva = 0;
1006         unsigned long flag;
1007
1008         switch (type) {
1009         case DMA_TLB_GLOBAL_FLUSH:
1010                 /* global flush doesn't need set IVA_REG */
1011                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1012                 break;
1013         case DMA_TLB_DSI_FLUSH:
1014                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1015                 break;
1016         case DMA_TLB_PSI_FLUSH:
1017                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1018                 /* Note: always flush non-leaf currently */
1019                 val_iva = size_order | addr;
1020                 break;
1021         default:
1022                 BUG();
1023         }
1024         /* Note: set drain read/write */
1025 #if 0
1026         /*
1027          * This is probably to be super secure.. Looks like we can
1028          * ignore it without any impact.
1029          */
1030         if (cap_read_drain(iommu->cap))
1031                 val |= DMA_TLB_READ_DRAIN;
1032 #endif
1033         if (cap_write_drain(iommu->cap))
1034                 val |= DMA_TLB_WRITE_DRAIN;
1035
1036         spin_lock_irqsave(&iommu->register_lock, flag);
1037         /* Note: Only uses first TLB reg currently */
1038         if (val_iva)
1039                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1040         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1041
1042         /* Make sure hardware complete it */
1043         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1044                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1045
1046         spin_unlock_irqrestore(&iommu->register_lock, flag);
1047
1048         /* check IOTLB invalidation granularity */
1049         if (DMA_TLB_IAIG(val) == 0)
1050                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1051         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1052                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1053                         (unsigned long long)DMA_TLB_IIRG(type),
1054                         (unsigned long long)DMA_TLB_IAIG(val));
1055 }
1056
1057 static struct device_domain_info *iommu_support_dev_iotlb(
1058         struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1059 {
1060         int found = 0;
1061         unsigned long flags;
1062         struct device_domain_info *info;
1063         struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1064
1065         if (!ecap_dev_iotlb_support(iommu->ecap))
1066                 return NULL;
1067
1068         if (!iommu->qi)
1069                 return NULL;
1070
1071         spin_lock_irqsave(&device_domain_lock, flags);
1072         list_for_each_entry(info, &domain->devices, link)
1073                 if (info->bus == bus && info->devfn == devfn) {
1074                         found = 1;
1075                         break;
1076                 }
1077         spin_unlock_irqrestore(&device_domain_lock, flags);
1078
1079         if (!found || !info->dev)
1080                 return NULL;
1081
1082         if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1083                 return NULL;
1084
1085         if (!dmar_find_matched_atsr_unit(info->dev))
1086                 return NULL;
1087
1088         info->iommu = iommu;
1089
1090         return info;
1091 }
1092
1093 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1094 {
1095         if (!info)
1096                 return;
1097
1098         pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1099 }
1100
1101 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1102 {
1103         if (!info->dev || !pci_ats_enabled(info->dev))
1104                 return;
1105
1106         pci_disable_ats(info->dev);
1107 }
1108
1109 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1110                                   u64 addr, unsigned mask)
1111 {
1112         u16 sid, qdep;
1113         unsigned long flags;
1114         struct device_domain_info *info;
1115
1116         spin_lock_irqsave(&device_domain_lock, flags);
1117         list_for_each_entry(info, &domain->devices, link) {
1118                 if (!info->dev || !pci_ats_enabled(info->dev))
1119                         continue;
1120
1121                 sid = info->bus << 8 | info->devfn;
1122                 qdep = pci_ats_queue_depth(info->dev);
1123                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1124         }
1125         spin_unlock_irqrestore(&device_domain_lock, flags);
1126 }
1127
1128 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1129                                   unsigned long pfn, unsigned int pages, int map)
1130 {
1131         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1132         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1133
1134         BUG_ON(pages == 0);
1135
1136         /*
1137          * Fallback to domain selective flush if no PSI support or the size is
1138          * too big.
1139          * PSI requires page size to be 2 ^ x, and the base address is naturally
1140          * aligned to the size
1141          */
1142         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1143                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1144                                                 DMA_TLB_DSI_FLUSH);
1145         else
1146                 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1147                                                 DMA_TLB_PSI_FLUSH);
1148
1149         /*
1150          * In caching mode, changes of pages from non-present to present require
1151          * flush. However, device IOTLB doesn't need to be flushed in this case.
1152          */
1153         if (!cap_caching_mode(iommu->cap) || !map)
1154                 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1155 }
1156
1157 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1158 {
1159         u32 pmen;
1160         unsigned long flags;
1161
1162         spin_lock_irqsave(&iommu->register_lock, flags);
1163         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1164         pmen &= ~DMA_PMEN_EPM;
1165         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1166
1167         /* wait for the protected region status bit to clear */
1168         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1169                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1170
1171         spin_unlock_irqrestore(&iommu->register_lock, flags);
1172 }
1173
1174 static int iommu_enable_translation(struct intel_iommu *iommu)
1175 {
1176         u32 sts;
1177         unsigned long flags;
1178
1179         spin_lock_irqsave(&iommu->register_lock, flags);
1180         iommu->gcmd |= DMA_GCMD_TE;
1181         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1182
1183         /* Make sure hardware complete it */
1184         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1185                       readl, (sts & DMA_GSTS_TES), sts);
1186
1187         spin_unlock_irqrestore(&iommu->register_lock, flags);
1188         return 0;
1189 }
1190
1191 static int iommu_disable_translation(struct intel_iommu *iommu)
1192 {
1193         u32 sts;
1194         unsigned long flag;
1195
1196         spin_lock_irqsave(&iommu->register_lock, flag);
1197         iommu->gcmd &= ~DMA_GCMD_TE;
1198         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1199
1200         /* Make sure hardware complete it */
1201         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1202                       readl, (!(sts & DMA_GSTS_TES)), sts);
1203
1204         spin_unlock_irqrestore(&iommu->register_lock, flag);
1205         return 0;
1206 }
1207
1208
1209 static int iommu_init_domains(struct intel_iommu *iommu)
1210 {
1211         unsigned long ndomains;
1212         unsigned long nlongs;
1213
1214         ndomains = cap_ndoms(iommu->cap);
1215         pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu->seq_id,
1216                         ndomains);
1217         nlongs = BITS_TO_LONGS(ndomains);
1218
1219         spin_lock_init(&iommu->lock);
1220
1221         /* TBD: there might be 64K domains,
1222          * consider other allocation for future chip
1223          */
1224         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1225         if (!iommu->domain_ids) {
1226                 printk(KERN_ERR "Allocating domain id array failed\n");
1227                 return -ENOMEM;
1228         }
1229         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1230                         GFP_KERNEL);
1231         if (!iommu->domains) {
1232                 printk(KERN_ERR "Allocating domain array failed\n");
1233                 return -ENOMEM;
1234         }
1235
1236         /*
1237          * if Caching mode is set, then invalid translations are tagged
1238          * with domainid 0. Hence we need to pre-allocate it.
1239          */
1240         if (cap_caching_mode(iommu->cap))
1241                 set_bit(0, iommu->domain_ids);
1242         return 0;
1243 }
1244
1245
1246 static void domain_exit(struct dmar_domain *domain);
1247 static void vm_domain_exit(struct dmar_domain *domain);
1248
1249 void free_dmar_iommu(struct intel_iommu *iommu)
1250 {
1251         struct dmar_domain *domain;
1252         int i;
1253         unsigned long flags;
1254
1255         if ((iommu->domains) && (iommu->domain_ids)) {
1256                 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1257                         domain = iommu->domains[i];
1258                         clear_bit(i, iommu->domain_ids);
1259
1260                         spin_lock_irqsave(&domain->iommu_lock, flags);
1261                         if (--domain->iommu_count == 0) {
1262                                 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1263                                         vm_domain_exit(domain);
1264                                 else
1265                                         domain_exit(domain);
1266                         }
1267                         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1268                 }
1269         }
1270
1271         if (iommu->gcmd & DMA_GCMD_TE)
1272                 iommu_disable_translation(iommu);
1273
1274         if (iommu->irq) {
1275                 irq_set_handler_data(iommu->irq, NULL);
1276                 /* This will mask the irq */
1277                 free_irq(iommu->irq, iommu);
1278                 destroy_irq(iommu->irq);
1279         }
1280
1281         kfree(iommu->domains);
1282         kfree(iommu->domain_ids);
1283
1284         g_iommus[iommu->seq_id] = NULL;
1285
1286         /* if all iommus are freed, free g_iommus */
1287         for (i = 0; i < g_num_of_iommus; i++) {
1288                 if (g_iommus[i])
1289                         break;
1290         }
1291
1292         if (i == g_num_of_iommus)
1293                 kfree(g_iommus);
1294
1295         /* free context mapping */
1296         free_context_table(iommu);
1297 }
1298
1299 static struct dmar_domain *alloc_domain(void)
1300 {
1301         struct dmar_domain *domain;
1302
1303         domain = alloc_domain_mem();
1304         if (!domain)
1305                 return NULL;
1306
1307         domain->nid = -1;
1308         memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1309         domain->flags = 0;
1310
1311         return domain;
1312 }
1313
1314 static int iommu_attach_domain(struct dmar_domain *domain,
1315                                struct intel_iommu *iommu)
1316 {
1317         int num;
1318         unsigned long ndomains;
1319         unsigned long flags;
1320
1321         ndomains = cap_ndoms(iommu->cap);
1322
1323         spin_lock_irqsave(&iommu->lock, flags);
1324
1325         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1326         if (num >= ndomains) {
1327                 spin_unlock_irqrestore(&iommu->lock, flags);
1328                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1329                 return -ENOMEM;
1330         }
1331
1332         domain->id = num;
1333         set_bit(num, iommu->domain_ids);
1334         set_bit(iommu->seq_id, &domain->iommu_bmp);
1335         iommu->domains[num] = domain;
1336         spin_unlock_irqrestore(&iommu->lock, flags);
1337
1338         return 0;
1339 }
1340
1341 static void iommu_detach_domain(struct dmar_domain *domain,
1342                                 struct intel_iommu *iommu)
1343 {
1344         unsigned long flags;
1345         int num, ndomains;
1346         int found = 0;
1347
1348         spin_lock_irqsave(&iommu->lock, flags);
1349         ndomains = cap_ndoms(iommu->cap);
1350         for_each_set_bit(num, iommu->domain_ids, ndomains) {
1351                 if (iommu->domains[num] == domain) {
1352                         found = 1;
1353                         break;
1354                 }
1355         }
1356
1357         if (found) {
1358                 clear_bit(num, iommu->domain_ids);
1359                 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1360                 iommu->domains[num] = NULL;
1361         }
1362         spin_unlock_irqrestore(&iommu->lock, flags);
1363 }
1364
1365 static struct iova_domain reserved_iova_list;
1366 static struct lock_class_key reserved_rbtree_key;
1367
1368 static int dmar_init_reserved_ranges(void)
1369 {
1370         struct pci_dev *pdev = NULL;
1371         struct iova *iova;
1372         int i;
1373
1374         init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1375
1376         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1377                 &reserved_rbtree_key);
1378
1379         /* IOAPIC ranges shouldn't be accessed by DMA */
1380         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1381                 IOVA_PFN(IOAPIC_RANGE_END));
1382         if (!iova) {
1383                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1384                 return -ENODEV;
1385         }
1386
1387         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1388         for_each_pci_dev(pdev) {
1389                 struct resource *r;
1390
1391                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1392                         r = &pdev->resource[i];
1393                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1394                                 continue;
1395                         iova = reserve_iova(&reserved_iova_list,
1396                                             IOVA_PFN(r->start),
1397                                             IOVA_PFN(r->end));
1398                         if (!iova) {
1399                                 printk(KERN_ERR "Reserve iova failed\n");
1400                                 return -ENODEV;
1401                         }
1402                 }
1403         }
1404         return 0;
1405 }
1406
1407 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1408 {
1409         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1410 }
1411
1412 static inline int guestwidth_to_adjustwidth(int gaw)
1413 {
1414         int agaw;
1415         int r = (gaw - 12) % 9;
1416
1417         if (r == 0)
1418                 agaw = gaw;
1419         else
1420                 agaw = gaw + 9 - r;
1421         if (agaw > 64)
1422                 agaw = 64;
1423         return agaw;
1424 }
1425
1426 static int domain_init(struct dmar_domain *domain, int guest_width)
1427 {
1428         struct intel_iommu *iommu;
1429         int adjust_width, agaw;
1430         unsigned long sagaw;
1431
1432         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1433         spin_lock_init(&domain->iommu_lock);
1434
1435         domain_reserve_special_ranges(domain);
1436
1437         /* calculate AGAW */
1438         iommu = domain_get_iommu(domain);
1439         if (guest_width > cap_mgaw(iommu->cap))
1440                 guest_width = cap_mgaw(iommu->cap);
1441         domain->gaw = guest_width;
1442         adjust_width = guestwidth_to_adjustwidth(guest_width);
1443         agaw = width_to_agaw(adjust_width);
1444         sagaw = cap_sagaw(iommu->cap);
1445         if (!test_bit(agaw, &sagaw)) {
1446                 /* hardware doesn't support it, choose a bigger one */
1447                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1448                 agaw = find_next_bit(&sagaw, 5, agaw);
1449                 if (agaw >= 5)
1450                         return -ENODEV;
1451         }
1452         domain->agaw = agaw;
1453         INIT_LIST_HEAD(&domain->devices);
1454
1455         if (ecap_coherent(iommu->ecap))
1456                 domain->iommu_coherency = 1;
1457         else
1458                 domain->iommu_coherency = 0;
1459
1460         if (ecap_sc_support(iommu->ecap))
1461                 domain->iommu_snooping = 1;
1462         else
1463                 domain->iommu_snooping = 0;
1464
1465         domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1466         domain->iommu_count = 1;
1467         domain->nid = iommu->node;
1468
1469         /* always allocate the top pgd */
1470         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1471         if (!domain->pgd)
1472                 return -ENOMEM;
1473         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1474         return 0;
1475 }
1476
1477 static void domain_exit(struct dmar_domain *domain)
1478 {
1479         struct dmar_drhd_unit *drhd;
1480         struct intel_iommu *iommu;
1481
1482         /* Domain 0 is reserved, so dont process it */
1483         if (!domain)
1484                 return;
1485
1486         /* Flush any lazy unmaps that may reference this domain */
1487         if (!intel_iommu_strict)
1488                 flush_unmaps_timeout(0);
1489
1490         domain_remove_dev_info(domain);
1491         /* destroy iovas */
1492         put_iova_domain(&domain->iovad);
1493
1494         /* clear ptes */
1495         dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1496
1497         /* free page tables */
1498         dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1499
1500         for_each_active_iommu(iommu, drhd)
1501                 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1502                         iommu_detach_domain(domain, iommu);
1503
1504         free_domain_mem(domain);
1505 }
1506
1507 static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1508                                  u8 bus, u8 devfn, int translation)
1509 {
1510         struct context_entry *context;
1511         unsigned long flags;
1512         struct intel_iommu *iommu;
1513         struct dma_pte *pgd;
1514         unsigned long num;
1515         unsigned long ndomains;
1516         int id;
1517         int agaw;
1518         struct device_domain_info *info = NULL;
1519
1520         pr_debug("Set context mapping for %02x:%02x.%d\n",
1521                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1522
1523         BUG_ON(!domain->pgd);
1524         BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1525                translation != CONTEXT_TT_MULTI_LEVEL);
1526
1527         iommu = device_to_iommu(segment, bus, devfn);
1528         if (!iommu)
1529                 return -ENODEV;
1530
1531         context = device_to_context_entry(iommu, bus, devfn);
1532         if (!context)
1533                 return -ENOMEM;
1534         spin_lock_irqsave(&iommu->lock, flags);
1535         if (context_present(context)) {
1536                 spin_unlock_irqrestore(&iommu->lock, flags);
1537                 return 0;
1538         }
1539
1540         id = domain->id;
1541         pgd = domain->pgd;
1542
1543         if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1544             domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1545                 int found = 0;
1546
1547                 /* find an available domain id for this device in iommu */
1548                 ndomains = cap_ndoms(iommu->cap);
1549                 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1550                         if (iommu->domains[num] == domain) {
1551                                 id = num;
1552                                 found = 1;
1553                                 break;
1554                         }
1555                 }
1556
1557                 if (found == 0) {
1558                         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1559                         if (num >= ndomains) {
1560                                 spin_unlock_irqrestore(&iommu->lock, flags);
1561                                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1562                                 return -EFAULT;
1563                         }
1564
1565                         set_bit(num, iommu->domain_ids);
1566                         iommu->domains[num] = domain;
1567                         id = num;
1568                 }
1569
1570                 /* Skip top levels of page tables for
1571                  * iommu which has less agaw than default.
1572                  * Unnecessary for PT mode.
1573                  */
1574                 if (translation != CONTEXT_TT_PASS_THROUGH) {
1575                         for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1576                                 pgd = phys_to_virt(dma_pte_addr(pgd));
1577                                 if (!dma_pte_present(pgd)) {
1578                                         spin_unlock_irqrestore(&iommu->lock, flags);
1579                                         return -ENOMEM;
1580                                 }
1581                         }
1582                 }
1583         }
1584
1585         context_set_domain_id(context, id);
1586
1587         if (translation != CONTEXT_TT_PASS_THROUGH) {
1588                 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1589                 translation = info ? CONTEXT_TT_DEV_IOTLB :
1590                                      CONTEXT_TT_MULTI_LEVEL;
1591         }
1592         /*
1593          * In pass through mode, AW must be programmed to indicate the largest
1594          * AGAW value supported by hardware. And ASR is ignored by hardware.
1595          */
1596         if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1597                 context_set_address_width(context, iommu->msagaw);
1598         else {
1599                 context_set_address_root(context, virt_to_phys(pgd));
1600                 context_set_address_width(context, iommu->agaw);
1601         }
1602
1603         context_set_translation_type(context, translation);
1604         context_set_fault_enable(context);
1605         context_set_present(context);
1606         domain_flush_cache(domain, context, sizeof(*context));
1607
1608         /*
1609          * It's a non-present to present mapping. If hardware doesn't cache
1610          * non-present entry we only need to flush the write-buffer. If the
1611          * _does_ cache non-present entries, then it does so in the special
1612          * domain #0, which we have to flush:
1613          */
1614         if (cap_caching_mode(iommu->cap)) {
1615                 iommu->flush.flush_context(iommu, 0,
1616                                            (((u16)bus) << 8) | devfn,
1617                                            DMA_CCMD_MASK_NOBIT,
1618                                            DMA_CCMD_DEVICE_INVL);
1619                 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1620         } else {
1621                 iommu_flush_write_buffer(iommu);
1622         }
1623         iommu_enable_dev_iotlb(info);
1624         spin_unlock_irqrestore(&iommu->lock, flags);
1625
1626         spin_lock_irqsave(&domain->iommu_lock, flags);
1627         if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1628                 domain->iommu_count++;
1629                 if (domain->iommu_count == 1)
1630                         domain->nid = iommu->node;
1631                 domain_update_iommu_cap(domain);
1632         }
1633         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1634         return 0;
1635 }
1636
1637 static int
1638 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1639                         int translation)
1640 {
1641         int ret;
1642         struct pci_dev *tmp, *parent;
1643
1644         ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1645                                          pdev->bus->number, pdev->devfn,
1646                                          translation);
1647         if (ret)
1648                 return ret;
1649
1650         /* dependent device mapping */
1651         tmp = pci_find_upstream_pcie_bridge(pdev);
1652         if (!tmp)
1653                 return 0;
1654         /* Secondary interface's bus number and devfn 0 */
1655         parent = pdev->bus->self;
1656         while (parent != tmp) {
1657                 ret = domain_context_mapping_one(domain,
1658                                                  pci_domain_nr(parent->bus),
1659                                                  parent->bus->number,
1660                                                  parent->devfn, translation);
1661                 if (ret)
1662                         return ret;
1663                 parent = parent->bus->self;
1664         }
1665         if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
1666                 return domain_context_mapping_one(domain,
1667                                         pci_domain_nr(tmp->subordinate),
1668                                         tmp->subordinate->number, 0,
1669                                         translation);
1670         else /* this is a legacy PCI bridge */
1671                 return domain_context_mapping_one(domain,
1672                                                   pci_domain_nr(tmp->bus),
1673                                                   tmp->bus->number,
1674                                                   tmp->devfn,
1675                                                   translation);
1676 }
1677
1678 static int domain_context_mapped(struct pci_dev *pdev)
1679 {
1680         int ret;
1681         struct pci_dev *tmp, *parent;
1682         struct intel_iommu *iommu;
1683
1684         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1685                                 pdev->devfn);
1686         if (!iommu)
1687                 return -ENODEV;
1688
1689         ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1690         if (!ret)
1691                 return ret;
1692         /* dependent device mapping */
1693         tmp = pci_find_upstream_pcie_bridge(pdev);
1694         if (!tmp)
1695                 return ret;
1696         /* Secondary interface's bus number and devfn 0 */
1697         parent = pdev->bus->self;
1698         while (parent != tmp) {
1699                 ret = device_context_mapped(iommu, parent->bus->number,
1700                                             parent->devfn);
1701                 if (!ret)
1702                         return ret;
1703                 parent = parent->bus->self;
1704         }
1705         if (pci_is_pcie(tmp))
1706                 return device_context_mapped(iommu, tmp->subordinate->number,
1707                                              0);
1708         else
1709                 return device_context_mapped(iommu, tmp->bus->number,
1710                                              tmp->devfn);
1711 }
1712
1713 /* Returns a number of VTD pages, but aligned to MM page size */
1714 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1715                                             size_t size)
1716 {
1717         host_addr &= ~PAGE_MASK;
1718         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1719 }
1720
1721 /* Return largest possible superpage level for a given mapping */
1722 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1723                                           unsigned long iov_pfn,
1724                                           unsigned long phy_pfn,
1725                                           unsigned long pages)
1726 {
1727         int support, level = 1;
1728         unsigned long pfnmerge;
1729
1730         support = domain->iommu_superpage;
1731
1732         /* To use a large page, the virtual *and* physical addresses
1733            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1734            of them will mean we have to use smaller pages. So just
1735            merge them and check both at once. */
1736         pfnmerge = iov_pfn | phy_pfn;
1737
1738         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1739                 pages >>= VTD_STRIDE_SHIFT;
1740                 if (!pages)
1741                         break;
1742                 pfnmerge >>= VTD_STRIDE_SHIFT;
1743                 level++;
1744                 support--;
1745         }
1746         return level;
1747 }
1748
1749 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1750                             struct scatterlist *sg, unsigned long phys_pfn,
1751                             unsigned long nr_pages, int prot)
1752 {
1753         struct dma_pte *first_pte = NULL, *pte = NULL;
1754         phys_addr_t uninitialized_var(pteval);
1755         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1756         unsigned long sg_res;
1757         unsigned int largepage_lvl = 0;
1758         unsigned long lvl_pages = 0;
1759
1760         BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1761
1762         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1763                 return -EINVAL;
1764
1765         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1766
1767         if (sg)
1768                 sg_res = 0;
1769         else {
1770                 sg_res = nr_pages + 1;
1771                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1772         }
1773
1774         while (nr_pages > 0) {
1775                 uint64_t tmp;
1776
1777                 if (!sg_res) {
1778                         sg_res = aligned_nrpages(sg->offset, sg->length);
1779                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1780                         sg->dma_length = sg->length;
1781                         pteval = page_to_phys(sg_page(sg)) | prot;
1782                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
1783                 }
1784
1785                 if (!pte) {
1786                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1787
1788                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1789                         if (!pte)
1790                                 return -ENOMEM;
1791                         /* It is large page*/
1792                         if (largepage_lvl > 1)
1793                                 pteval |= DMA_PTE_LARGE_PAGE;
1794                         else
1795                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1796
1797                 }
1798                 /* We don't need lock here, nobody else
1799                  * touches the iova range
1800                  */
1801                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1802                 if (tmp) {
1803                         static int dumps = 5;
1804                         printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1805                                iov_pfn, tmp, (unsigned long long)pteval);
1806                         if (dumps) {
1807                                 dumps--;
1808                                 debug_dma_dump_mappings(NULL);
1809                         }
1810                         WARN_ON(1);
1811                 }
1812
1813                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1814
1815                 BUG_ON(nr_pages < lvl_pages);
1816                 BUG_ON(sg_res < lvl_pages);
1817
1818                 nr_pages -= lvl_pages;
1819                 iov_pfn += lvl_pages;
1820                 phys_pfn += lvl_pages;
1821                 pteval += lvl_pages * VTD_PAGE_SIZE;
1822                 sg_res -= lvl_pages;
1823
1824                 /* If the next PTE would be the first in a new page, then we
1825                    need to flush the cache on the entries we've just written.
1826                    And then we'll need to recalculate 'pte', so clear it and
1827                    let it get set again in the if (!pte) block above.
1828
1829                    If we're done (!nr_pages) we need to flush the cache too.
1830
1831                    Also if we've been setting superpages, we may need to
1832                    recalculate 'pte' and switch back to smaller pages for the
1833                    end of the mapping, if the trailing size is not enough to
1834                    use another superpage (i.e. sg_res < lvl_pages). */
1835                 pte++;
1836                 if (!nr_pages || first_pte_in_page(pte) ||
1837                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
1838                         domain_flush_cache(domain, first_pte,
1839                                            (void *)pte - (void *)first_pte);
1840                         pte = NULL;
1841                 }
1842
1843                 if (!sg_res && nr_pages)
1844                         sg = sg_next(sg);
1845         }
1846         return 0;
1847 }
1848
1849 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1850                                     struct scatterlist *sg, unsigned long nr_pages,
1851                                     int prot)
1852 {
1853         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1854 }
1855
1856 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1857                                      unsigned long phys_pfn, unsigned long nr_pages,
1858                                      int prot)
1859 {
1860         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1861 }
1862
1863 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1864 {
1865         if (!iommu)
1866                 return;
1867
1868         clear_context_table(iommu, bus, devfn);
1869         iommu->flush.flush_context(iommu, 0, 0, 0,
1870                                            DMA_CCMD_GLOBAL_INVL);
1871         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1872 }
1873
1874 static void domain_remove_dev_info(struct dmar_domain *domain)
1875 {
1876         struct device_domain_info *info;
1877         unsigned long flags;
1878         struct intel_iommu *iommu;
1879
1880         spin_lock_irqsave(&device_domain_lock, flags);
1881         while (!list_empty(&domain->devices)) {
1882                 info = list_entry(domain->devices.next,
1883                         struct device_domain_info, link);
1884                 list_del(&info->link);
1885                 list_del(&info->global);
1886                 if (info->dev)
1887                         info->dev->dev.archdata.iommu = NULL;
1888                 spin_unlock_irqrestore(&device_domain_lock, flags);
1889
1890                 iommu_disable_dev_iotlb(info);
1891                 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1892                 iommu_detach_dev(iommu, info->bus, info->devfn);
1893                 free_devinfo_mem(info);
1894
1895                 spin_lock_irqsave(&device_domain_lock, flags);
1896         }
1897         spin_unlock_irqrestore(&device_domain_lock, flags);
1898 }
1899
1900 /*
1901  * find_domain
1902  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1903  */
1904 static struct dmar_domain *
1905 find_domain(struct pci_dev *pdev)
1906 {
1907         struct device_domain_info *info;
1908
1909         /* No lock here, assumes no domain exit in normal case */
1910         info = pdev->dev.archdata.iommu;
1911         if (info)
1912                 return info->domain;
1913         return NULL;
1914 }
1915
1916 /* domain is initialized */
1917 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1918 {
1919         struct dmar_domain *domain, *found = NULL;
1920         struct intel_iommu *iommu;
1921         struct dmar_drhd_unit *drhd;
1922         struct device_domain_info *info, *tmp;
1923         struct pci_dev *dev_tmp;
1924         unsigned long flags;
1925         int bus = 0, devfn = 0;
1926         int segment;
1927         int ret;
1928
1929         domain = find_domain(pdev);
1930         if (domain)
1931                 return domain;
1932
1933         segment = pci_domain_nr(pdev->bus);
1934
1935         dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1936         if (dev_tmp) {
1937                 if (pci_is_pcie(dev_tmp)) {
1938                         bus = dev_tmp->subordinate->number;
1939                         devfn = 0;
1940                 } else {
1941                         bus = dev_tmp->bus->number;
1942                         devfn = dev_tmp->devfn;
1943                 }
1944                 spin_lock_irqsave(&device_domain_lock, flags);
1945                 list_for_each_entry(info, &device_domain_list, global) {
1946                         if (info->segment == segment &&
1947                             info->bus == bus && info->devfn == devfn) {
1948                                 found = info->domain;
1949                                 break;
1950                         }
1951                 }
1952                 spin_unlock_irqrestore(&device_domain_lock, flags);
1953                 /* pcie-pci bridge already has a domain, uses it */
1954                 if (found) {
1955                         domain = found;
1956                         goto found_domain;
1957                 }
1958         }
1959
1960         domain = alloc_domain();
1961         if (!domain)
1962                 goto error;
1963
1964         /* Allocate new domain for the device */
1965         drhd = dmar_find_matched_drhd_unit(pdev);
1966         if (!drhd) {
1967                 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1968                         pci_name(pdev));
1969                 return NULL;
1970         }
1971         iommu = drhd->iommu;
1972
1973         ret = iommu_attach_domain(domain, iommu);
1974         if (ret) {
1975                 free_domain_mem(domain);
1976                 goto error;
1977         }
1978
1979         if (domain_init(domain, gaw)) {
1980                 domain_exit(domain);
1981                 goto error;
1982         }
1983
1984         /* register pcie-to-pci device */
1985         if (dev_tmp) {
1986                 info = alloc_devinfo_mem();
1987                 if (!info) {
1988                         domain_exit(domain);
1989                         goto error;
1990                 }
1991                 info->segment = segment;
1992                 info->bus = bus;
1993                 info->devfn = devfn;
1994                 info->dev = NULL;
1995                 info->domain = domain;
1996                 /* This domain is shared by devices under p2p bridge */
1997                 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
1998
1999                 /* pcie-to-pci bridge already has a domain, uses it */
2000                 found = NULL;
2001                 spin_lock_irqsave(&device_domain_lock, flags);
2002                 list_for_each_entry(tmp, &device_domain_list, global) {
2003                         if (tmp->segment == segment &&
2004                             tmp->bus == bus && tmp->devfn == devfn) {
2005                                 found = tmp->domain;
2006                                 break;
2007                         }
2008                 }
2009                 if (found) {
2010                         spin_unlock_irqrestore(&device_domain_lock, flags);
2011                         free_devinfo_mem(info);
2012                         domain_exit(domain);
2013                         domain = found;
2014                 } else {
2015                         list_add(&info->link, &domain->devices);
2016                         list_add(&info->global, &device_domain_list);
2017                         spin_unlock_irqrestore(&device_domain_lock, flags);
2018                 }
2019         }
2020
2021 found_domain:
2022         info = alloc_devinfo_mem();
2023         if (!info)
2024                 goto error;
2025         info->segment = segment;
2026         info->bus = pdev->bus->number;
2027         info->devfn = pdev->devfn;
2028         info->dev = pdev;
2029         info->domain = domain;
2030         spin_lock_irqsave(&device_domain_lock, flags);
2031         /* somebody is fast */
2032         found = find_domain(pdev);
2033         if (found != NULL) {
2034                 spin_unlock_irqrestore(&device_domain_lock, flags);
2035                 if (found != domain) {
2036                         domain_exit(domain);
2037                         domain = found;
2038                 }
2039                 free_devinfo_mem(info);
2040                 return domain;
2041         }
2042         list_add(&info->link, &domain->devices);
2043         list_add(&info->global, &device_domain_list);
2044         pdev->dev.archdata.iommu = info;
2045         spin_unlock_irqrestore(&device_domain_lock, flags);
2046         return domain;
2047 error:
2048         /* recheck it here, maybe others set it */
2049         return find_domain(pdev);
2050 }
2051
2052 static int iommu_identity_mapping;
2053 #define IDENTMAP_ALL            1
2054 #define IDENTMAP_GFX            2
2055 #define IDENTMAP_AZALIA         4
2056
2057 static int iommu_domain_identity_map(struct dmar_domain *domain,
2058                                      unsigned long long start,
2059                                      unsigned long long end)
2060 {
2061         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2062         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2063
2064         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2065                           dma_to_mm_pfn(last_vpfn))) {
2066                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2067                 return -ENOMEM;
2068         }
2069
2070         pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2071                  start, end, domain->id);
2072         /*
2073          * RMRR range might have overlap with physical memory range,
2074          * clear it first
2075          */
2076         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2077
2078         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2079                                   last_vpfn - first_vpfn + 1,
2080                                   DMA_PTE_READ|DMA_PTE_WRITE);
2081 }
2082
2083 static int iommu_prepare_identity_map(struct pci_dev *pdev,
2084                                       unsigned long long start,
2085                                       unsigned long long end)
2086 {
2087         struct dmar_domain *domain;
2088         int ret;
2089
2090         domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2091         if (!domain)
2092                 return -ENOMEM;
2093
2094         /* For _hardware_ passthrough, don't bother. But for software
2095            passthrough, we do it anyway -- it may indicate a memory
2096            range which is reserved in E820, so which didn't get set
2097            up to start with in si_domain */
2098         if (domain == si_domain && hw_pass_through) {
2099                 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2100                        pci_name(pdev), start, end);
2101                 return 0;
2102         }
2103
2104         printk(KERN_INFO
2105                "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2106                pci_name(pdev), start, end);
2107         
2108         if (end < start) {
2109                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2110                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2111                         dmi_get_system_info(DMI_BIOS_VENDOR),
2112                         dmi_get_system_info(DMI_BIOS_VERSION),
2113                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2114                 ret = -EIO;
2115                 goto error;
2116         }
2117
2118         if (end >> agaw_to_width(domain->agaw)) {
2119                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2120                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2121                      agaw_to_width(domain->agaw),
2122                      dmi_get_system_info(DMI_BIOS_VENDOR),
2123                      dmi_get_system_info(DMI_BIOS_VERSION),
2124                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2125                 ret = -EIO;
2126                 goto error;
2127         }
2128
2129         ret = iommu_domain_identity_map(domain, start, end);
2130         if (ret)
2131                 goto error;
2132
2133         /* context entry init */
2134         ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2135         if (ret)
2136                 goto error;
2137
2138         return 0;
2139
2140  error:
2141         domain_exit(domain);
2142         return ret;
2143 }
2144
2145 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2146         struct pci_dev *pdev)
2147 {
2148         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2149                 return 0;
2150         return iommu_prepare_identity_map(pdev, rmrr->base_address,
2151                 rmrr->end_address);
2152 }
2153
2154 #ifdef CONFIG_DMAR_FLOPPY_WA
2155 static inline void iommu_prepare_isa(void)
2156 {
2157         struct pci_dev *pdev;
2158         int ret;
2159
2160         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2161         if (!pdev)
2162                 return;
2163
2164         printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2165         ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2166
2167         if (ret)
2168                 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2169                        "floppy might not work\n");
2170
2171 }
2172 #else
2173 static inline void iommu_prepare_isa(void)
2174 {
2175         return;
2176 }
2177 #endif /* !CONFIG_DMAR_FLPY_WA */
2178
2179 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2180
2181 static int __init si_domain_work_fn(unsigned long start_pfn,
2182                                     unsigned long end_pfn, void *datax)
2183 {
2184         int *ret = datax;
2185
2186         *ret = iommu_domain_identity_map(si_domain,
2187                                          (uint64_t)start_pfn << PAGE_SHIFT,
2188                                          (uint64_t)end_pfn << PAGE_SHIFT);
2189         return *ret;
2190
2191 }
2192
2193 static int __init si_domain_init(int hw)
2194 {
2195         struct dmar_drhd_unit *drhd;
2196         struct intel_iommu *iommu;
2197         int nid, ret = 0;
2198
2199         si_domain = alloc_domain();
2200         if (!si_domain)
2201                 return -EFAULT;
2202
2203         pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2204
2205         for_each_active_iommu(iommu, drhd) {
2206                 ret = iommu_attach_domain(si_domain, iommu);
2207                 if (ret) {
2208                         domain_exit(si_domain);
2209                         return -EFAULT;
2210                 }
2211         }
2212
2213         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2214                 domain_exit(si_domain);
2215                 return -EFAULT;
2216         }
2217
2218         si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2219
2220         if (hw)
2221                 return 0;
2222
2223         for_each_online_node(nid) {
2224                 work_with_active_regions(nid, si_domain_work_fn, &ret);
2225                 if (ret)
2226                         return ret;
2227         }
2228
2229         return 0;
2230 }
2231
2232 static void domain_remove_one_dev_info(struct dmar_domain *domain,
2233                                           struct pci_dev *pdev);
2234 static int identity_mapping(struct pci_dev *pdev)
2235 {
2236         struct device_domain_info *info;
2237
2238         if (likely(!iommu_identity_mapping))
2239                 return 0;
2240
2241         info = pdev->dev.archdata.iommu;
2242         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2243                 return (info->domain == si_domain);
2244
2245         return 0;
2246 }
2247
2248 static int domain_add_dev_info(struct dmar_domain *domain,
2249                                struct pci_dev *pdev,
2250                                int translation)
2251 {
2252         struct device_domain_info *info;
2253         unsigned long flags;
2254         int ret;
2255
2256         info = alloc_devinfo_mem();
2257         if (!info)
2258                 return -ENOMEM;
2259
2260         ret = domain_context_mapping(domain, pdev, translation);
2261         if (ret) {
2262                 free_devinfo_mem(info);
2263                 return ret;
2264         }
2265
2266         info->segment = pci_domain_nr(pdev->bus);
2267         info->bus = pdev->bus->number;
2268         info->devfn = pdev->devfn;
2269         info->dev = pdev;
2270         info->domain = domain;
2271
2272         spin_lock_irqsave(&device_domain_lock, flags);
2273         list_add(&info->link, &domain->devices);
2274         list_add(&info->global, &device_domain_list);
2275         pdev->dev.archdata.iommu = info;
2276         spin_unlock_irqrestore(&device_domain_lock, flags);
2277
2278         return 0;
2279 }
2280
2281 static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2282 {
2283         if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2284                 return 1;
2285
2286         if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2287                 return 1;
2288
2289         if (!(iommu_identity_mapping & IDENTMAP_ALL))
2290                 return 0;
2291
2292         /*
2293          * We want to start off with all devices in the 1:1 domain, and
2294          * take them out later if we find they can't access all of memory.
2295          *
2296          * However, we can't do this for PCI devices behind bridges,
2297          * because all PCI devices behind the same bridge will end up
2298          * with the same source-id on their transactions.
2299          *
2300          * Practically speaking, we can't change things around for these
2301          * devices at run-time, because we can't be sure there'll be no
2302          * DMA transactions in flight for any of their siblings.
2303          * 
2304          * So PCI devices (unless they're on the root bus) as well as
2305          * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2306          * the 1:1 domain, just in _case_ one of their siblings turns out
2307          * not to be able to map all of memory.
2308          */
2309         if (!pci_is_pcie(pdev)) {
2310                 if (!pci_is_root_bus(pdev->bus))
2311                         return 0;
2312                 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2313                         return 0;
2314         } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2315                 return 0;
2316
2317         /* 
2318          * At boot time, we don't yet know if devices will be 64-bit capable.
2319          * Assume that they will -- if they turn out not to be, then we can 
2320          * take them out of the 1:1 domain later.
2321          */
2322         if (!startup) {
2323                 /*
2324                  * If the device's dma_mask is less than the system's memory
2325                  * size then this is not a candidate for identity mapping.
2326                  */
2327                 u64 dma_mask = pdev->dma_mask;
2328
2329                 if (pdev->dev.coherent_dma_mask &&
2330                     pdev->dev.coherent_dma_mask < dma_mask)
2331                         dma_mask = pdev->dev.coherent_dma_mask;
2332
2333                 return dma_mask >= dma_get_required_mask(&pdev->dev);
2334         }
2335
2336         return 1;
2337 }
2338
2339 static int __init iommu_prepare_static_identity_mapping(int hw)
2340 {
2341         struct pci_dev *pdev = NULL;
2342         int ret;
2343
2344         ret = si_domain_init(hw);
2345         if (ret)
2346                 return -EFAULT;
2347
2348         for_each_pci_dev(pdev) {
2349                 /* Skip Host/PCI Bridge devices */
2350                 if (IS_BRIDGE_HOST_DEVICE(pdev))
2351                         continue;
2352                 if (iommu_should_identity_map(pdev, 1)) {
2353                         printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2354                                hw ? "hardware" : "software", pci_name(pdev));
2355
2356                         ret = domain_add_dev_info(si_domain, pdev,
2357                                                      hw ? CONTEXT_TT_PASS_THROUGH :
2358                                                      CONTEXT_TT_MULTI_LEVEL);
2359                         if (ret)
2360                                 return ret;
2361                 }
2362         }
2363
2364         return 0;
2365 }
2366
2367 static int __init init_dmars(void)
2368 {
2369         struct dmar_drhd_unit *drhd;
2370         struct dmar_rmrr_unit *rmrr;
2371         struct pci_dev *pdev;
2372         struct intel_iommu *iommu;
2373         int i, ret;
2374
2375         /*
2376          * for each drhd
2377          *    allocate root
2378          *    initialize and program root entry to not present
2379          * endfor
2380          */
2381         for_each_drhd_unit(drhd) {
2382                 g_num_of_iommus++;
2383                 /*
2384                  * lock not needed as this is only incremented in the single
2385                  * threaded kernel __init code path all other access are read
2386                  * only
2387                  */
2388         }
2389
2390         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2391                         GFP_KERNEL);
2392         if (!g_iommus) {
2393                 printk(KERN_ERR "Allocating global iommu array failed\n");
2394                 ret = -ENOMEM;
2395                 goto error;
2396         }
2397
2398         deferred_flush = kzalloc(g_num_of_iommus *
2399                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2400         if (!deferred_flush) {
2401                 ret = -ENOMEM;
2402                 goto error;
2403         }
2404
2405         for_each_drhd_unit(drhd) {
2406                 if (drhd->ignored)
2407                         continue;
2408
2409                 iommu = drhd->iommu;
2410                 g_iommus[iommu->seq_id] = iommu;
2411
2412                 ret = iommu_init_domains(iommu);
2413                 if (ret)
2414                         goto error;
2415
2416                 /*
2417                  * TBD:
2418                  * we could share the same root & context tables
2419                  * among all IOMMU's. Need to Split it later.
2420                  */
2421                 ret = iommu_alloc_root_entry(iommu);
2422                 if (ret) {
2423                         printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2424                         goto error;
2425                 }
2426                 if (!ecap_pass_through(iommu->ecap))
2427                         hw_pass_through = 0;
2428         }
2429
2430         /*
2431          * Start from the sane iommu hardware state.
2432          */
2433         for_each_drhd_unit(drhd) {
2434                 if (drhd->ignored)
2435                         continue;
2436
2437                 iommu = drhd->iommu;
2438
2439                 /*
2440                  * If the queued invalidation is already initialized by us
2441                  * (for example, while enabling interrupt-remapping) then
2442                  * we got the things already rolling from a sane state.
2443                  */
2444                 if (iommu->qi)
2445                         continue;
2446
2447                 /*
2448                  * Clear any previous faults.
2449                  */
2450                 dmar_fault(-1, iommu);
2451                 /*
2452                  * Disable queued invalidation if supported and already enabled
2453                  * before OS handover.
2454                  */
2455                 dmar_disable_qi(iommu);
2456         }
2457
2458         for_each_drhd_unit(drhd) {
2459                 if (drhd->ignored)
2460                         continue;
2461
2462                 iommu = drhd->iommu;
2463
2464                 if (dmar_enable_qi(iommu)) {
2465                         /*
2466                          * Queued Invalidate not enabled, use Register Based
2467                          * Invalidate
2468                          */
2469                         iommu->flush.flush_context = __iommu_flush_context;
2470                         iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2471                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2472                                "invalidation\n",
2473                                 iommu->seq_id,
2474                                (unsigned long long)drhd->reg_base_addr);
2475                 } else {
2476                         iommu->flush.flush_context = qi_flush_context;
2477                         iommu->flush.flush_iotlb = qi_flush_iotlb;
2478                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2479                                "invalidation\n",
2480                                 iommu->seq_id,
2481                                (unsigned long long)drhd->reg_base_addr);
2482                 }
2483         }
2484
2485         if (iommu_pass_through)
2486                 iommu_identity_mapping |= IDENTMAP_ALL;
2487
2488 #ifdef CONFIG_DMAR_BROKEN_GFX_WA
2489         iommu_identity_mapping |= IDENTMAP_GFX;
2490 #endif
2491
2492         check_tylersburg_isoch();
2493
2494         /*
2495          * If pass through is not set or not enabled, setup context entries for
2496          * identity mappings for rmrr, gfx, and isa and may fall back to static
2497          * identity mapping if iommu_identity_mapping is set.
2498          */
2499         if (iommu_identity_mapping) {
2500                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2501                 if (ret) {
2502                         printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2503                         goto error;
2504                 }
2505         }
2506         /*
2507          * For each rmrr
2508          *   for each dev attached to rmrr
2509          *   do
2510          *     locate drhd for dev, alloc domain for dev
2511          *     allocate free domain
2512          *     allocate page table entries for rmrr
2513          *     if context not allocated for bus
2514          *           allocate and init context
2515          *           set present in root table for this bus
2516          *     init context with domain, translation etc
2517          *    endfor
2518          * endfor
2519          */
2520         printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2521         for_each_rmrr_units(rmrr) {
2522                 for (i = 0; i < rmrr->devices_cnt; i++) {
2523                         pdev = rmrr->devices[i];
2524                         /*
2525                          * some BIOS lists non-exist devices in DMAR
2526                          * table.
2527                          */
2528                         if (!pdev)
2529                                 continue;
2530                         ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2531                         if (ret)
2532                                 printk(KERN_ERR
2533                                        "IOMMU: mapping reserved region failed\n");
2534                 }
2535         }
2536
2537         iommu_prepare_isa();
2538
2539         /*
2540          * for each drhd
2541          *   enable fault log
2542          *   global invalidate context cache
2543          *   global invalidate iotlb
2544          *   enable translation
2545          */
2546         for_each_drhd_unit(drhd) {
2547                 if (drhd->ignored) {
2548                         /*
2549                          * we always have to disable PMRs or DMA may fail on
2550                          * this device
2551                          */
2552                         if (force_on)
2553                                 iommu_disable_protect_mem_regions(drhd->iommu);
2554                         continue;
2555                 }
2556                 iommu = drhd->iommu;
2557
2558                 iommu_flush_write_buffer(iommu);
2559
2560                 ret = dmar_set_interrupt(iommu);
2561                 if (ret)
2562                         goto error;
2563
2564                 iommu_set_root_entry(iommu);
2565
2566                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2567                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2568
2569                 ret = iommu_enable_translation(iommu);
2570                 if (ret)
2571                         goto error;
2572
2573                 iommu_disable_protect_mem_regions(iommu);
2574         }
2575
2576         return 0;
2577 error:
2578         for_each_drhd_unit(drhd) {
2579                 if (drhd->ignored)
2580                         continue;
2581                 iommu = drhd->iommu;
2582                 free_iommu(iommu);
2583         }
2584         kfree(g_iommus);
2585         return ret;
2586 }
2587
2588 /* This takes a number of _MM_ pages, not VTD pages */
2589 static struct iova *intel_alloc_iova(struct device *dev,
2590                                      struct dmar_domain *domain,
2591                                      unsigned long nrpages, uint64_t dma_mask)
2592 {
2593         struct pci_dev *pdev = to_pci_dev(dev);
2594         struct iova *iova = NULL;
2595
2596         /* Restrict dma_mask to the width that the iommu can handle */
2597         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2598
2599         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2600                 /*
2601                  * First try to allocate an io virtual address in
2602                  * DMA_BIT_MASK(32) and if that fails then try allocating
2603                  * from higher range
2604                  */
2605                 iova = alloc_iova(&domain->iovad, nrpages,
2606                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
2607                 if (iova)
2608                         return iova;
2609         }
2610         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2611         if (unlikely(!iova)) {
2612                 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2613                        nrpages, pci_name(pdev));
2614                 return NULL;
2615         }
2616
2617         return iova;
2618 }
2619
2620 static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2621 {
2622         struct dmar_domain *domain;
2623         int ret;
2624
2625         domain = get_domain_for_dev(pdev,
2626                         DEFAULT_DOMAIN_ADDRESS_WIDTH);
2627         if (!domain) {
2628                 printk(KERN_ERR
2629                         "Allocating domain for %s failed", pci_name(pdev));
2630                 return NULL;
2631         }
2632
2633         /* make sure context mapping is ok */
2634         if (unlikely(!domain_context_mapped(pdev))) {
2635                 ret = domain_context_mapping(domain, pdev,
2636                                              CONTEXT_TT_MULTI_LEVEL);
2637                 if (ret) {
2638                         printk(KERN_ERR
2639                                 "Domain context map for %s failed",
2640                                 pci_name(pdev));
2641                         return NULL;
2642                 }
2643         }
2644
2645         return domain;
2646 }
2647
2648 static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2649 {
2650         struct device_domain_info *info;
2651
2652         /* No lock here, assumes no domain exit in normal case */
2653         info = dev->dev.archdata.iommu;
2654         if (likely(info))
2655                 return info->domain;
2656
2657         return __get_valid_domain_for_dev(dev);
2658 }
2659
2660 static int iommu_dummy(struct pci_dev *pdev)
2661 {
2662         return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2663 }
2664
2665 /* Check if the pdev needs to go through non-identity map and unmap process.*/
2666 static int iommu_no_mapping(struct device *dev)
2667 {
2668         struct pci_dev *pdev;
2669         int found;
2670
2671         if (unlikely(dev->bus != &pci_bus_type))
2672                 return 1;
2673
2674         pdev = to_pci_dev(dev);
2675         if (iommu_dummy(pdev))
2676                 return 1;
2677
2678         if (!iommu_identity_mapping)
2679                 return 0;
2680
2681         found = identity_mapping(pdev);
2682         if (found) {
2683                 if (iommu_should_identity_map(pdev, 0))
2684                         return 1;
2685                 else {
2686                         /*
2687                          * 32 bit DMA is removed from si_domain and fall back
2688                          * to non-identity mapping.
2689                          */
2690                         domain_remove_one_dev_info(si_domain, pdev);
2691                         printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2692                                pci_name(pdev));
2693                         return 0;
2694                 }
2695         } else {
2696                 /*
2697                  * In case of a detached 64 bit DMA device from vm, the device
2698                  * is put into si_domain for identity mapping.
2699                  */
2700                 if (iommu_should_identity_map(pdev, 0)) {
2701                         int ret;
2702                         ret = domain_add_dev_info(si_domain, pdev,
2703                                                   hw_pass_through ?
2704                                                   CONTEXT_TT_PASS_THROUGH :
2705                                                   CONTEXT_TT_MULTI_LEVEL);
2706                         if (!ret) {
2707                                 printk(KERN_INFO "64bit %s uses identity mapping\n",
2708                                        pci_name(pdev));
2709                                 return 1;
2710                         }
2711                 }
2712         }
2713
2714         return 0;
2715 }
2716
2717 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2718                                      size_t size, int dir, u64 dma_mask)
2719 {
2720         struct pci_dev *pdev = to_pci_dev(hwdev);
2721         struct dmar_domain *domain;
2722         phys_addr_t start_paddr;
2723         struct iova *iova;
2724         int prot = 0;
2725         int ret;
2726         struct intel_iommu *iommu;
2727         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2728
2729         BUG_ON(dir == DMA_NONE);
2730
2731         if (iommu_no_mapping(hwdev))
2732                 return paddr;
2733
2734         domain = get_valid_domain_for_dev(pdev);
2735         if (!domain)
2736                 return 0;
2737
2738         iommu = domain_get_iommu(domain);
2739         size = aligned_nrpages(paddr, size);
2740
2741         iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2742         if (!iova)
2743                 goto error;
2744
2745         /*
2746          * Check if DMAR supports zero-length reads on write only
2747          * mappings..
2748          */
2749         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2750                         !cap_zlr(iommu->cap))
2751                 prot |= DMA_PTE_READ;
2752         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2753                 prot |= DMA_PTE_WRITE;
2754         /*
2755          * paddr - (paddr + size) might be partial page, we should map the whole
2756          * page.  Note: if two part of one page are separately mapped, we
2757          * might have two guest_addr mapping to the same host paddr, but this
2758          * is not a big problem
2759          */
2760         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2761                                  mm_to_dma_pfn(paddr_pfn), size, prot);
2762         if (ret)
2763                 goto error;
2764
2765         /* it's a non-present to present mapping. Only flush if caching mode */
2766         if (cap_caching_mode(iommu->cap))
2767                 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
2768         else
2769                 iommu_flush_write_buffer(iommu);
2770
2771         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2772         start_paddr += paddr & ~PAGE_MASK;
2773         return start_paddr;
2774
2775 error:
2776         if (iova)
2777                 __free_iova(&domain->iovad, iova);
2778         printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
2779                 pci_name(pdev), size, (unsigned long long)paddr, dir);
2780         return 0;
2781 }
2782
2783 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2784                                  unsigned long offset, size_t size,
2785                                  enum dma_data_direction dir,
2786                                  struct dma_attrs *attrs)
2787 {
2788         return __intel_map_single(dev, page_to_phys(page) + offset, size,
2789                                   dir, to_pci_dev(dev)->dma_mask);
2790 }
2791
2792 static void flush_unmaps(void)
2793 {
2794         int i, j;
2795
2796         timer_on = 0;
2797
2798         /* just flush them all */
2799         for (i = 0; i < g_num_of_iommus; i++) {
2800                 struct intel_iommu *iommu = g_iommus[i];
2801                 if (!iommu)
2802                         continue;
2803
2804                 if (!deferred_flush[i].next)
2805                         continue;
2806
2807                 /* In caching mode, global flushes turn emulation expensive */
2808                 if (!cap_caching_mode(iommu->cap))
2809                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2810                                          DMA_TLB_GLOBAL_FLUSH);
2811                 for (j = 0; j < deferred_flush[i].next; j++) {
2812                         unsigned long mask;
2813                         struct iova *iova = deferred_flush[i].iova[j];
2814                         struct dmar_domain *domain = deferred_flush[i].domain[j];
2815
2816                         /* On real hardware multiple invalidations are expensive */
2817                         if (cap_caching_mode(iommu->cap))
2818                                 iommu_flush_iotlb_psi(iommu, domain->id,
2819                                 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2820                         else {
2821                                 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2822                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2823                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2824                         }
2825                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2826                 }
2827                 deferred_flush[i].next = 0;
2828         }
2829
2830         list_size = 0;
2831 }
2832
2833 static void flush_unmaps_timeout(unsigned long data)
2834 {
2835         unsigned long flags;
2836
2837         spin_lock_irqsave(&async_umap_flush_lock, flags);
2838         flush_unmaps();
2839         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2840 }
2841
2842 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2843 {
2844         unsigned long flags;
2845         int next, iommu_id;
2846         struct intel_iommu *iommu;
2847
2848         spin_lock_irqsave(&async_umap_flush_lock, flags);
2849         if (list_size == HIGH_WATER_MARK)
2850                 flush_unmaps();
2851
2852         iommu = domain_get_iommu(dom);
2853         iommu_id = iommu->seq_id;
2854
2855         next = deferred_flush[iommu_id].next;
2856         deferred_flush[iommu_id].domain[next] = dom;
2857         deferred_flush[iommu_id].iova[next] = iova;
2858         deferred_flush[iommu_id].next++;
2859
2860         if (!timer_on) {
2861                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2862                 timer_on = 1;
2863         }
2864         list_size++;
2865         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2866 }
2867
2868 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2869                              size_t size, enum dma_data_direction dir,
2870                              struct dma_attrs *attrs)
2871 {
2872         struct pci_dev *pdev = to_pci_dev(dev);
2873         struct dmar_domain *domain;
2874         unsigned long start_pfn, last_pfn;
2875         struct iova *iova;
2876         struct intel_iommu *iommu;
2877
2878         if (iommu_no_mapping(dev))
2879                 return;
2880
2881         domain = find_domain(pdev);
2882         BUG_ON(!domain);
2883
2884         iommu = domain_get_iommu(domain);
2885
2886         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2887         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2888                       (unsigned long long)dev_addr))
2889                 return;
2890
2891         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2892         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2893
2894         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2895                  pci_name(pdev), start_pfn, last_pfn);
2896
2897         /*  clear the whole page */
2898         dma_pte_clear_range(domain, start_pfn, last_pfn);
2899
2900         /* free page tables */
2901         dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2902
2903         if (intel_iommu_strict) {
2904                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2905                                       last_pfn - start_pfn + 1, 0);
2906                 /* free iova */
2907                 __free_iova(&domain->iovad, iova);
2908         } else {
2909                 add_unmap(domain, iova);
2910                 /*
2911                  * queue up the release of the unmap to save the 1/6th of the
2912                  * cpu used up by the iotlb flush operation...
2913                  */
2914         }
2915 }
2916
2917 static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2918                                   dma_addr_t *dma_handle, gfp_t flags)
2919 {
2920         void *vaddr;
2921         int order;
2922
2923         size = PAGE_ALIGN(size);
2924         order = get_order(size);
2925
2926         if (!iommu_no_mapping(hwdev))
2927                 flags &= ~(GFP_DMA | GFP_DMA32);
2928         else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2929                 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2930                         flags |= GFP_DMA;
2931                 else
2932                         flags |= GFP_DMA32;
2933         }
2934
2935         vaddr = (void *)__get_free_pages(flags, order);
2936         if (!vaddr)
2937                 return NULL;
2938         memset(vaddr, 0, size);
2939
2940         *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2941                                          DMA_BIDIRECTIONAL,
2942                                          hwdev->coherent_dma_mask);
2943         if (*dma_handle)
2944                 return vaddr;
2945         free_pages((unsigned long)vaddr, order);
2946         return NULL;
2947 }
2948
2949 static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2950                                 dma_addr_t dma_handle)
2951 {
2952         int order;
2953
2954         size = PAGE_ALIGN(size);
2955         order = get_order(size);
2956
2957         intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
2958         free_pages((unsigned long)vaddr, order);
2959 }
2960
2961 static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2962                            int nelems, enum dma_data_direction dir,
2963                            struct dma_attrs *attrs)
2964 {
2965         struct pci_dev *pdev = to_pci_dev(hwdev);
2966         struct dmar_domain *domain;
2967         unsigned long start_pfn, last_pfn;
2968         struct iova *iova;
2969         struct intel_iommu *iommu;
2970
2971         if (iommu_no_mapping(hwdev))
2972                 return;
2973
2974         domain = find_domain(pdev);
2975         BUG_ON(!domain);
2976
2977         iommu = domain_get_iommu(domain);
2978
2979         iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2980         if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2981                       (unsigned long long)sglist[0].dma_address))
2982                 return;
2983
2984         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2985         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2986
2987         /*  clear the whole page */
2988         dma_pte_clear_range(domain, start_pfn, last_pfn);
2989
2990         /* free page tables */
2991         dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2992
2993         if (intel_iommu_strict) {
2994                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2995                                       last_pfn - start_pfn + 1, 0);
2996                 /* free iova */
2997                 __free_iova(&domain->iovad, iova);
2998         } else {
2999                 add_unmap(domain, iova);
3000                 /*
3001                  * queue up the release of the unmap to save the 1/6th of the
3002                  * cpu used up by the iotlb flush operation...
3003                  */
3004         }
3005 }
3006
3007 static int intel_nontranslate_map_sg(struct device *hddev,
3008         struct scatterlist *sglist, int nelems, int dir)
3009 {
3010         int i;
3011         struct scatterlist *sg;
3012
3013         for_each_sg(sglist, sg, nelems, i) {
3014                 BUG_ON(!sg_page(sg));
3015                 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3016                 sg->dma_length = sg->length;
3017         }
3018         return nelems;
3019 }
3020
3021 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3022                         enum dma_data_direction dir, struct dma_attrs *attrs)
3023 {
3024         int i;
3025         struct pci_dev *pdev = to_pci_dev(hwdev);
3026         struct dmar_domain *domain;
3027         size_t size = 0;
3028         int prot = 0;
3029         struct iova *iova = NULL;
3030         int ret;
3031         struct scatterlist *sg;
3032         unsigned long start_vpfn;
3033         struct intel_iommu *iommu;
3034
3035         BUG_ON(dir == DMA_NONE);
3036         if (iommu_no_mapping(hwdev))
3037                 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
3038
3039         domain = get_valid_domain_for_dev(pdev);
3040         if (!domain)
3041                 return 0;
3042
3043         iommu = domain_get_iommu(domain);
3044
3045         for_each_sg(sglist, sg, nelems, i)
3046                 size += aligned_nrpages(sg->offset, sg->length);
3047
3048         iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3049                                 pdev->dma_mask);
3050         if (!iova) {
3051                 sglist->dma_length = 0;
3052                 return 0;
3053         }
3054
3055         /*
3056          * Check if DMAR supports zero-length reads on write only
3057          * mappings..
3058          */
3059         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3060                         !cap_zlr(iommu->cap))
3061                 prot |= DMA_PTE_READ;
3062         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3063                 prot |= DMA_PTE_WRITE;
3064
3065         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3066
3067         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3068         if (unlikely(ret)) {
3069                 /*  clear the page */
3070                 dma_pte_clear_range(domain, start_vpfn,
3071                                     start_vpfn + size - 1);
3072                 /* free page tables */
3073                 dma_pte_free_pagetable(domain, start_vpfn,
3074                                        start_vpfn + size - 1);
3075                 /* free iova */
3076                 __free_iova(&domain->iovad, iova);
3077                 return 0;
3078         }
3079
3080         /* it's a non-present to present mapping. Only flush if caching mode */
3081         if (cap_caching_mode(iommu->cap))
3082                 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
3083         else
3084                 iommu_flush_write_buffer(iommu);
3085
3086         return nelems;
3087 }
3088
3089 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3090 {
3091         return !dma_addr;
3092 }
3093
3094 struct dma_map_ops intel_dma_ops = {
3095         .alloc_coherent = intel_alloc_coherent,
3096         .free_coherent = intel_free_coherent,
3097         .map_sg = intel_map_sg,
3098         .unmap_sg = intel_unmap_sg,
3099         .map_page = intel_map_page,
3100         .unmap_page = intel_unmap_page,
3101         .mapping_error = intel_mapping_error,
3102 };
3103
3104 static inline int iommu_domain_cache_init(void)
3105 {
3106         int ret = 0;
3107
3108         iommu_domain_cache = kmem_cache_create("iommu_domain",
3109                                          sizeof(struct dmar_domain),
3110                                          0,
3111                                          SLAB_HWCACHE_ALIGN,
3112
3113                                          NULL);
3114         if (!iommu_domain_cache) {
3115                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3116                 ret = -ENOMEM;
3117         }
3118
3119         return ret;
3120 }
3121
3122 static inline int iommu_devinfo_cache_init(void)
3123 {
3124         int ret = 0;
3125
3126         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3127                                          sizeof(struct device_domain_info),
3128                                          0,
3129                                          SLAB_HWCACHE_ALIGN,
3130                                          NULL);
3131         if (!iommu_devinfo_cache) {
3132                 printk(KERN_ERR "Couldn't create devinfo cache\n");
3133                 ret = -ENOMEM;
3134         }
3135
3136         return ret;
3137 }
3138
3139 static inline int iommu_iova_cache_init(void)
3140 {
3141         int ret = 0;
3142
3143         iommu_iova_cache = kmem_cache_create("iommu_iova",
3144                                          sizeof(struct iova),
3145                                          0,
3146                                          SLAB_HWCACHE_ALIGN,
3147                                          NULL);
3148         if (!iommu_iova_cache) {
3149                 printk(KERN_ERR "Couldn't create iova cache\n");
3150                 ret = -ENOMEM;
3151         }
3152
3153         return ret;
3154 }
3155
3156 static int __init iommu_init_mempool(void)
3157 {
3158         int ret;
3159         ret = iommu_iova_cache_init();
3160         if (ret)
3161                 return ret;
3162
3163         ret = iommu_domain_cache_init();
3164         if (ret)
3165                 goto domain_error;
3166
3167         ret = iommu_devinfo_cache_init();
3168         if (!ret)
3169                 return ret;
3170
3171         kmem_cache_destroy(iommu_domain_cache);
3172 domain_error:
3173         kmem_cache_destroy(iommu_iova_cache);
3174
3175         return -ENOMEM;
3176 }
3177
3178 static void __init iommu_exit_mempool(void)
3179 {
3180         kmem_cache_destroy(iommu_devinfo_cache);
3181         kmem_cache_destroy(iommu_domain_cache);
3182         kmem_cache_destroy(iommu_iova_cache);
3183
3184 }
3185
3186 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3187 {
3188         struct dmar_drhd_unit *drhd;
3189         u32 vtbar;
3190         int rc;
3191
3192         /* We know that this device on this chipset has its own IOMMU.
3193          * If we find it under a different IOMMU, then the BIOS is lying
3194          * to us. Hope that the IOMMU for this device is actually
3195          * disabled, and it needs no translation...
3196          */
3197         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3198         if (rc) {
3199                 /* "can't" happen */
3200                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3201                 return;
3202         }
3203         vtbar &= 0xffff0000;
3204
3205         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3206         drhd = dmar_find_matched_drhd_unit(pdev);
3207         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3208                             TAINT_FIRMWARE_WORKAROUND,
3209                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3210                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3211 }
3212 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3213
3214 static void __init init_no_remapping_devices(void)
3215 {
3216         struct dmar_drhd_unit *drhd;
3217
3218         for_each_drhd_unit(drhd) {
3219                 if (!drhd->include_all) {
3220                         int i;
3221                         for (i = 0; i < drhd->devices_cnt; i++)
3222                                 if (drhd->devices[i] != NULL)
3223                                         break;
3224                         /* ignore DMAR unit if no pci devices exist */
3225                         if (i == drhd->devices_cnt)
3226                                 drhd->ignored = 1;
3227                 }
3228         }
3229
3230         if (dmar_map_gfx)
3231                 return;
3232
3233         for_each_drhd_unit(drhd) {
3234                 int i;
3235                 if (drhd->ignored || drhd->include_all)
3236                         continue;
3237
3238                 for (i = 0; i < drhd->devices_cnt; i++)
3239                         if (drhd->devices[i] &&
3240                                 !IS_GFX_DEVICE(drhd->devices[i]))
3241                                 break;
3242
3243                 if (i < drhd->devices_cnt)
3244                         continue;
3245
3246                 /* bypass IOMMU if it is just for gfx devices */
3247                 drhd->ignored = 1;
3248                 for (i = 0; i < drhd->devices_cnt; i++) {
3249                         if (!drhd->devices[i])
3250                                 continue;
3251                         drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3252                 }
3253         }
3254 }
3255
3256 #ifdef CONFIG_SUSPEND
3257 static int init_iommu_hw(void)
3258 {
3259         struct dmar_drhd_unit *drhd;
3260         struct intel_iommu *iommu = NULL;
3261
3262         for_each_active_iommu(iommu, drhd)
3263                 if (iommu->qi)
3264                         dmar_reenable_qi(iommu);
3265
3266         for_each_iommu(iommu, drhd) {
3267                 if (drhd->ignored) {
3268                         /*
3269                          * we always have to disable PMRs or DMA may fail on
3270                          * this device
3271                          */
3272                         if (force_on)
3273                                 iommu_disable_protect_mem_regions(iommu);
3274                         continue;
3275                 }
3276         
3277                 iommu_flush_write_buffer(iommu);
3278
3279                 iommu_set_root_entry(iommu);
3280
3281                 iommu->flush.flush_context(iommu, 0, 0, 0,
3282                                            DMA_CCMD_GLOBAL_INVL);
3283                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3284                                          DMA_TLB_GLOBAL_FLUSH);
3285                 if (iommu_enable_translation(iommu))
3286                         return 1;
3287                 iommu_disable_protect_mem_regions(iommu);
3288         }
3289
3290         return 0;
3291 }
3292
3293 static void iommu_flush_all(void)
3294 {
3295         struct dmar_drhd_unit *drhd;
3296         struct intel_iommu *iommu;
3297
3298         for_each_active_iommu(iommu, drhd) {
3299                 iommu->flush.flush_context(iommu, 0, 0, 0,
3300                                            DMA_CCMD_GLOBAL_INVL);
3301                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3302                                          DMA_TLB_GLOBAL_FLUSH);
3303         }
3304 }
3305
3306 static int iommu_suspend(void)
3307 {
3308         struct dmar_drhd_unit *drhd;
3309         struct intel_iommu *iommu = NULL;
3310         unsigned long flag;
3311
3312         for_each_active_iommu(iommu, drhd) {
3313                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3314                                                  GFP_ATOMIC);
3315                 if (!iommu->iommu_state)
3316                         goto nomem;
3317         }
3318
3319         iommu_flush_all();
3320
3321         for_each_active_iommu(iommu, drhd) {
3322                 iommu_disable_translation(iommu);
3323
3324                 spin_lock_irqsave(&iommu->register_lock, flag);
3325
3326                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3327                         readl(iommu->reg + DMAR_FECTL_REG);
3328                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3329                         readl(iommu->reg + DMAR_FEDATA_REG);
3330                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3331                         readl(iommu->reg + DMAR_FEADDR_REG);
3332                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3333                         readl(iommu->reg + DMAR_FEUADDR_REG);
3334
3335                 spin_unlock_irqrestore(&iommu->register_lock, flag);
3336         }
3337         return 0;
3338
3339 nomem:
3340         for_each_active_iommu(iommu, drhd)
3341                 kfree(iommu->iommu_state);
3342
3343         return -ENOMEM;
3344 }
3345
3346 static void iommu_resume(void)
3347 {
3348         struct dmar_drhd_unit *drhd;
3349         struct intel_iommu *iommu = NULL;
3350         unsigned long flag;
3351
3352         if (init_iommu_hw()) {
3353                 if (force_on)
3354                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3355                 else
3356                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3357                 return;
3358         }
3359
3360         for_each_active_iommu(iommu, drhd) {
3361
3362                 spin_lock_irqsave(&iommu->register_lock, flag);
3363
3364                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3365                         iommu->reg + DMAR_FECTL_REG);
3366                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3367                         iommu->reg + DMAR_FEDATA_REG);
3368                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3369                         iommu->reg + DMAR_FEADDR_REG);
3370                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3371                         iommu->reg + DMAR_FEUADDR_REG);
3372
3373                 spin_unlock_irqrestore(&iommu->register_lock, flag);
3374         }
3375
3376         for_each_active_iommu(iommu, drhd)
3377                 kfree(iommu->iommu_state);
3378 }
3379
3380 static struct syscore_ops iommu_syscore_ops = {
3381         .resume         = iommu_resume,
3382         .suspend        = iommu_suspend,
3383 };
3384
3385 static void __init init_iommu_pm_ops(void)
3386 {
3387         register_syscore_ops(&iommu_syscore_ops);
3388 }
3389
3390 #else
3391 static inline int init_iommu_pm_ops(void) { }
3392 #endif  /* CONFIG_PM */
3393
3394 /*
3395  * Here we only respond to action of unbound device from driver.
3396  *
3397  * Added device is not attached to its DMAR domain here yet. That will happen
3398  * when mapping the device to iova.
3399  */
3400 static int device_notifier(struct notifier_block *nb,
3401                                   unsigned long action, void *data)
3402 {
3403         struct device *dev = data;
3404         struct pci_dev *pdev = to_pci_dev(dev);
3405         struct dmar_domain *domain;
3406
3407         if (iommu_no_mapping(dev))
3408                 return 0;
3409
3410         domain = find_domain(pdev);
3411         if (!domain)
3412                 return 0;
3413
3414         if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
3415                 domain_remove_one_dev_info(domain, pdev);
3416
3417                 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3418                     !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3419                     list_empty(&domain->devices))
3420                         domain_exit(domain);
3421         }
3422
3423         return 0;
3424 }
3425
3426 static struct notifier_block device_nb = {
3427         .notifier_call = device_notifier,
3428 };
3429
3430 int __init intel_iommu_init(void)
3431 {
3432         int ret = 0;
3433
3434         /* VT-d is required for a TXT/tboot launch, so enforce that */
3435         force_on = tboot_force_iommu();
3436
3437         if (dmar_table_init()) {
3438                 if (force_on)
3439                         panic("tboot: Failed to initialize DMAR table\n");
3440                 return  -ENODEV;
3441         }
3442
3443         if (dmar_dev_scope_init()) {
3444                 if (force_on)
3445                         panic("tboot: Failed to initialize DMAR device scope\n");
3446                 return  -ENODEV;
3447         }
3448
3449         /*
3450          * Check the need for DMA-remapping initialization now.
3451          * Above initialization will also be used by Interrupt-remapping.
3452          */
3453         if (no_iommu || dmar_disabled)
3454                 return -ENODEV;
3455
3456         if (iommu_init_mempool()) {
3457                 if (force_on)
3458                         panic("tboot: Failed to initialize iommu memory\n");
3459                 return  -ENODEV;
3460         }
3461
3462         if (dmar_init_reserved_ranges()) {
3463                 if (force_on)
3464                         panic("tboot: Failed to reserve iommu ranges\n");
3465                 return  -ENODEV;
3466         }
3467
3468         init_no_remapping_devices();
3469
3470         ret = init_dmars();
3471         if (ret) {
3472                 if (force_on)
3473                         panic("tboot: Failed to initialize DMARs\n");
3474                 printk(KERN_ERR "IOMMU: dmar init failed\n");
3475                 put_iova_domain(&reserved_iova_list);
3476                 iommu_exit_mempool();
3477                 return ret;
3478         }
3479         printk(KERN_INFO
3480         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3481
3482         init_timer(&unmap_timer);
3483 #ifdef CONFIG_SWIOTLB
3484         swiotlb = 0;
3485 #endif
3486         dma_ops = &intel_dma_ops;
3487
3488         init_iommu_pm_ops();
3489
3490         register_iommu(&intel_iommu_ops);
3491
3492         bus_register_notifier(&pci_bus_type, &device_nb);
3493
3494         return 0;
3495 }
3496
3497 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3498                                            struct pci_dev *pdev)
3499 {
3500         struct pci_dev *tmp, *parent;
3501
3502         if (!iommu || !pdev)
3503                 return;
3504
3505         /* dependent device detach */
3506         tmp = pci_find_upstream_pcie_bridge(pdev);
3507         /* Secondary interface's bus number and devfn 0 */
3508         if (tmp) {
3509                 parent = pdev->bus->self;
3510                 while (parent != tmp) {
3511                         iommu_detach_dev(iommu, parent->bus->number,
3512                                          parent->devfn);
3513                         parent = parent->bus->self;
3514                 }
3515                 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3516                         iommu_detach_dev(iommu,
3517                                 tmp->subordinate->number, 0);
3518                 else /* this is a legacy PCI bridge */
3519                         iommu_detach_dev(iommu, tmp->bus->number,
3520                                          tmp->devfn);
3521         }
3522 }
3523
3524 static void domain_remove_one_dev_info(struct dmar_domain *domain,
3525                                           struct pci_dev *pdev)
3526 {
3527         struct device_domain_info *info;
3528         struct intel_iommu *iommu;
3529         unsigned long flags;
3530         int found = 0;
3531         struct list_head *entry, *tmp;
3532
3533         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3534                                 pdev->devfn);
3535         if (!iommu)
3536                 return;
3537
3538         spin_lock_irqsave(&device_domain_lock, flags);
3539         list_for_each_safe(entry, tmp, &domain->devices) {
3540                 info = list_entry(entry, struct device_domain_info, link);
3541                 if (info->segment == pci_domain_nr(pdev->bus) &&
3542                     info->bus == pdev->bus->number &&
3543                     info->devfn == pdev->devfn) {
3544                         list_del(&info->link);
3545                         list_del(&info->global);
3546                         if (info->dev)
3547                                 info->dev->dev.archdata.iommu = NULL;
3548                         spin_unlock_irqrestore(&device_domain_lock, flags);
3549
3550                         iommu_disable_dev_iotlb(info);
3551                         iommu_detach_dev(iommu, info->bus, info->devfn);
3552                         iommu_detach_dependent_devices(iommu, pdev);
3553                         free_devinfo_mem(info);
3554
3555                         spin_lock_irqsave(&device_domain_lock, flags);
3556
3557                         if (found)
3558                                 break;
3559                         else
3560                                 continue;
3561                 }
3562
3563                 /* if there is no other devices under the same iommu
3564                  * owned by this domain, clear this iommu in iommu_bmp
3565                  * update iommu count and coherency
3566                  */
3567                 if (iommu == device_to_iommu(info->segment, info->bus,
3568                                             info->devfn))
3569                         found = 1;
3570         }
3571
3572         if (found == 0) {
3573                 unsigned long tmp_flags;
3574                 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3575                 clear_bit(iommu->seq_id, &domain->iommu_bmp);
3576                 domain->iommu_count--;
3577                 domain_update_iommu_cap(domain);
3578                 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3579
3580                 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3581                     !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3582                         spin_lock_irqsave(&iommu->lock, tmp_flags);
3583                         clear_bit(domain->id, iommu->domain_ids);
3584                         iommu->domains[domain->id] = NULL;
3585                         spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3586                 }
3587         }
3588
3589         spin_unlock_irqrestore(&device_domain_lock, flags);
3590 }
3591
3592 static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3593 {
3594         struct device_domain_info *info;
3595         struct intel_iommu *iommu;
3596         unsigned long flags1, flags2;
3597
3598         spin_lock_irqsave(&device_domain_lock, flags1);
3599         while (!list_empty(&domain->devices)) {
3600                 info = list_entry(domain->devices.next,
3601                         struct device_domain_info, link);
3602                 list_del(&info->link);
3603                 list_del(&info->global);
3604                 if (info->dev)
3605                         info->dev->dev.archdata.iommu = NULL;
3606
3607                 spin_unlock_irqrestore(&device_domain_lock, flags1);
3608
3609                 iommu_disable_dev_iotlb(info);
3610                 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
3611                 iommu_detach_dev(iommu, info->bus, info->devfn);
3612                 iommu_detach_dependent_devices(iommu, info->dev);
3613
3614                 /* clear this iommu in iommu_bmp, update iommu count
3615                  * and capabilities
3616                  */
3617                 spin_lock_irqsave(&domain->iommu_lock, flags2);
3618                 if (test_and_clear_bit(iommu->seq_id,
3619                                        &domain->iommu_bmp)) {
3620                         domain->iommu_count--;
3621                         domain_update_iommu_cap(domain);
3622                 }
3623                 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3624
3625                 free_devinfo_mem(info);
3626                 spin_lock_irqsave(&device_domain_lock, flags1);
3627         }
3628         spin_unlock_irqrestore(&device_domain_lock, flags1);
3629 }
3630
3631 /* domain id for virtual machine, it won't be set in context */
3632 static unsigned long vm_domid;
3633
3634 static struct dmar_domain *iommu_alloc_vm_domain(void)
3635 {
3636         struct dmar_domain *domain;
3637
3638         domain = alloc_domain_mem();
3639         if (!domain)
3640                 return NULL;
3641
3642         domain->id = vm_domid++;
3643         domain->nid = -1;
3644         memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3645         domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3646
3647         return domain;
3648 }
3649
3650 static int md_domain_init(struct dmar_domain *domain, int guest_width)
3651 {
3652         int adjust_width;
3653
3654         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3655         spin_lock_init(&domain->iommu_lock);
3656
3657         domain_reserve_special_ranges(domain);
3658
3659         /* calculate AGAW */
3660         domain->gaw = guest_width;
3661         adjust_width = guestwidth_to_adjustwidth(guest_width);
3662         domain->agaw = width_to_agaw(adjust_width);
3663
3664         INIT_LIST_HEAD(&domain->devices);
3665
3666         domain->iommu_count = 0;
3667         domain->iommu_coherency = 0;
3668         domain->iommu_snooping = 0;
3669         domain->iommu_superpage = 0;
3670         domain->max_addr = 0;
3671         domain->nid = -1;
3672
3673         /* always allocate the top pgd */
3674         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
3675         if (!domain->pgd)
3676                 return -ENOMEM;
3677         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3678         return 0;
3679 }
3680
3681 static void iommu_free_vm_domain(struct dmar_domain *domain)
3682 {
3683         unsigned long flags;
3684         struct dmar_drhd_unit *drhd;
3685         struct intel_iommu *iommu;
3686         unsigned long i;
3687         unsigned long ndomains;
3688
3689         for_each_drhd_unit(drhd) {
3690                 if (drhd->ignored)
3691                         continue;
3692                 iommu = drhd->iommu;
3693
3694                 ndomains = cap_ndoms(iommu->cap);
3695                 for_each_set_bit(i, iommu->domain_ids, ndomains) {
3696                         if (iommu->domains[i] == domain) {
3697                                 spin_lock_irqsave(&iommu->lock, flags);
3698                                 clear_bit(i, iommu->domain_ids);
3699                                 iommu->domains[i] = NULL;
3700                                 spin_unlock_irqrestore(&iommu->lock, flags);
3701                                 break;
3702                         }
3703                 }
3704         }
3705 }
3706
3707 static void vm_domain_exit(struct dmar_domain *domain)
3708 {
3709         /* Domain 0 is reserved, so dont process it */
3710         if (!domain)
3711                 return;
3712
3713         vm_domain_remove_all_dev_info(domain);
3714         /* destroy iovas */
3715         put_iova_domain(&domain->iovad);
3716
3717         /* clear ptes */
3718         dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3719
3720         /* free page tables */
3721         dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3722
3723         iommu_free_vm_domain(domain);
3724         free_domain_mem(domain);
3725 }
3726
3727 static int intel_iommu_domain_init(struct iommu_domain *domain)
3728 {
3729         struct dmar_domain *dmar_domain;
3730
3731         dmar_domain = iommu_alloc_vm_domain();
3732         if (!dmar_domain) {
3733                 printk(KERN_ERR
3734                         "intel_iommu_domain_init: dmar_domain == NULL\n");
3735                 return -ENOMEM;
3736         }
3737         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3738                 printk(KERN_ERR
3739                         "intel_iommu_domain_init() failed\n");
3740                 vm_domain_exit(dmar_domain);
3741                 return -ENOMEM;
3742         }
3743         domain->priv = dmar_domain;
3744
3745         return 0;
3746 }
3747
3748 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
3749 {
3750         struct dmar_domain *dmar_domain = domain->priv;
3751
3752         domain->priv = NULL;
3753         vm_domain_exit(dmar_domain);
3754 }
3755
3756 static int intel_iommu_attach_device(struct iommu_domain *domain,
3757                                      struct device *dev)
3758 {
3759         struct dmar_domain *dmar_domain = domain->priv;
3760         struct pci_dev *pdev = to_pci_dev(dev);
3761         struct intel_iommu *iommu;
3762         int addr_width;
3763
3764         /* normally pdev is not mapped */
3765         if (unlikely(domain_context_mapped(pdev))) {
3766                 struct dmar_domain *old_domain;
3767
3768                 old_domain = find_domain(pdev);
3769                 if (old_domain) {
3770                         if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3771                             dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3772                                 domain_remove_one_dev_info(old_domain, pdev);
3773                         else
3774                                 domain_remove_dev_info(old_domain);
3775                 }
3776         }
3777
3778         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3779                                 pdev->devfn);
3780         if (!iommu)
3781                 return -ENODEV;
3782
3783         /* check if this iommu agaw is sufficient for max mapped address */
3784         addr_width = agaw_to_width(iommu->agaw);
3785         if (addr_width > cap_mgaw(iommu->cap))
3786                 addr_width = cap_mgaw(iommu->cap);
3787
3788         if (dmar_domain->max_addr > (1LL << addr_width)) {
3789                 printk(KERN_ERR "%s: iommu width (%d) is not "
3790                        "sufficient for the mapped address (%llx)\n",
3791                        __func__, addr_width, dmar_domain->max_addr);
3792                 return -EFAULT;
3793         }
3794         dmar_domain->gaw = addr_width;
3795
3796         /*
3797          * Knock out extra levels of page tables if necessary
3798          */
3799         while (iommu->agaw < dmar_domain->agaw) {
3800                 struct dma_pte *pte;
3801
3802                 pte = dmar_domain->pgd;
3803                 if (dma_pte_present(pte)) {
3804                         dmar_domain->pgd = (struct dma_pte *)
3805                                 phys_to_virt(dma_pte_addr(pte));
3806                         free_pgtable_page(pte);
3807                 }
3808                 dmar_domain->agaw--;
3809         }
3810
3811         return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
3812 }
3813
3814 static void intel_iommu_detach_device(struct iommu_domain *domain,
3815                                       struct device *dev)
3816 {
3817         struct dmar_domain *dmar_domain = domain->priv;
3818         struct pci_dev *pdev = to_pci_dev(dev);
3819
3820         domain_remove_one_dev_info(dmar_domain, pdev);
3821 }
3822
3823 static int intel_iommu_map(struct iommu_domain *domain,
3824                            unsigned long iova, phys_addr_t hpa,
3825                            int gfp_order, int iommu_prot)
3826 {
3827         struct dmar_domain *dmar_domain = domain->priv;
3828         u64 max_addr;
3829         int prot = 0;
3830         size_t size;
3831         int ret;
3832
3833         if (iommu_prot & IOMMU_READ)
3834                 prot |= DMA_PTE_READ;
3835         if (iommu_prot & IOMMU_WRITE)
3836                 prot |= DMA_PTE_WRITE;
3837         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3838                 prot |= DMA_PTE_SNP;
3839
3840         size     = PAGE_SIZE << gfp_order;
3841         max_addr = iova + size;
3842         if (dmar_domain->max_addr < max_addr) {
3843                 u64 end;
3844
3845                 /* check if minimum agaw is sufficient for mapped address */
3846                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
3847                 if (end < max_addr) {
3848                         printk(KERN_ERR "%s: iommu width (%d) is not "
3849                                "sufficient for the mapped address (%llx)\n",
3850                                __func__, dmar_domain->gaw, max_addr);
3851                         return -EFAULT;
3852                 }
3853                 dmar_domain->max_addr = max_addr;
3854         }
3855         /* Round up size to next multiple of PAGE_SIZE, if it and
3856            the low bits of hpa would take us onto the next page */
3857         size = aligned_nrpages(hpa, size);
3858         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3859                                  hpa >> VTD_PAGE_SHIFT, size, prot);
3860         return ret;
3861 }
3862
3863 static int intel_iommu_unmap(struct iommu_domain *domain,
3864                              unsigned long iova, int gfp_order)
3865 {
3866         struct dmar_domain *dmar_domain = domain->priv;
3867         size_t size = PAGE_SIZE << gfp_order;
3868
3869         dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3870                             (iova + size - 1) >> VTD_PAGE_SHIFT);
3871
3872         if (dmar_domain->max_addr == iova + size)
3873                 dmar_domain->max_addr = iova;
3874
3875         return gfp_order;
3876 }
3877
3878 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3879                                             unsigned long iova)
3880 {
3881         struct dmar_domain *dmar_domain = domain->priv;
3882         struct dma_pte *pte;
3883         u64 phys = 0;
3884
3885         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
3886         if (pte)
3887                 phys = dma_pte_addr(pte);
3888
3889         return phys;
3890 }
3891
3892 static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
3893                                       unsigned long cap)
3894 {
3895         struct dmar_domain *dmar_domain = domain->priv;
3896
3897         if (cap == IOMMU_CAP_CACHE_COHERENCY)
3898                 return dmar_domain->iommu_snooping;
3899         if (cap == IOMMU_CAP_INTR_REMAP)
3900                 return intr_remapping_enabled;
3901
3902         return 0;
3903 }
3904
3905 static struct iommu_ops intel_iommu_ops = {
3906         .domain_init    = intel_iommu_domain_init,
3907         .domain_destroy = intel_iommu_domain_destroy,
3908         .attach_dev     = intel_iommu_attach_device,
3909         .detach_dev     = intel_iommu_detach_device,
3910         .map            = intel_iommu_map,
3911         .unmap          = intel_iommu_unmap,
3912         .iova_to_phys   = intel_iommu_iova_to_phys,
3913         .domain_has_cap = intel_iommu_domain_has_cap,
3914 };
3915
3916 static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
3917 {
3918         /*
3919          * Mobile 4 Series Chipset neglects to set RWBF capability,
3920          * but needs it:
3921          */
3922         printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
3923         rwbf_quirk = 1;
3924
3925         /* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */
3926         if (dev->revision == 0x07) {
3927                 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
3928                 dmar_map_gfx = 0;
3929         }
3930 }
3931
3932 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
3933
3934 #define GGC 0x52
3935 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
3936 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
3937 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
3938 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
3939 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
3940 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
3941 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
3942 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
3943
3944 static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
3945 {
3946         unsigned short ggc;
3947
3948         if (pci_read_config_word(dev, GGC, &ggc))
3949                 return;
3950
3951         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
3952                 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
3953                 dmar_map_gfx = 0;
3954         }
3955 }
3956 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
3957 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
3958 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
3959 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
3960
3961 /* On Tylersburg chipsets, some BIOSes have been known to enable the
3962    ISOCH DMAR unit for the Azalia sound device, but not give it any
3963    TLB entries, which causes it to deadlock. Check for that.  We do
3964    this in a function called from init_dmars(), instead of in a PCI
3965    quirk, because we don't want to print the obnoxious "BIOS broken"
3966    message if VT-d is actually disabled.
3967 */
3968 static void __init check_tylersburg_isoch(void)
3969 {
3970         struct pci_dev *pdev;
3971         uint32_t vtisochctrl;
3972
3973         /* If there's no Azalia in the system anyway, forget it. */
3974         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
3975         if (!pdev)
3976                 return;
3977         pci_dev_put(pdev);
3978
3979         /* System Management Registers. Might be hidden, in which case
3980            we can't do the sanity check. But that's OK, because the
3981            known-broken BIOSes _don't_ actually hide it, so far. */
3982         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
3983         if (!pdev)
3984                 return;
3985
3986         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
3987                 pci_dev_put(pdev);
3988                 return;
3989         }
3990
3991         pci_dev_put(pdev);
3992
3993         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
3994         if (vtisochctrl & 1)
3995                 return;
3996
3997         /* Drop all bits other than the number of TLB entries */
3998         vtisochctrl &= 0x1c;
3999
4000         /* If we have the recommended number of TLB entries (16), fine. */
4001         if (vtisochctrl == 0x10)
4002                 return;
4003
4004         /* Zero TLB entries? You get to ride the short bus to school. */
4005         if (!vtisochctrl) {
4006                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4007                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4008                      dmi_get_system_info(DMI_BIOS_VENDOR),
4009                      dmi_get_system_info(DMI_BIOS_VERSION),
4010                      dmi_get_system_info(DMI_PRODUCT_VERSION));
4011                 iommu_identity_mapping |= IDENTMAP_AZALIA;
4012                 return;
4013         }
4014         
4015         printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4016                vtisochctrl);
4017 }