iommu/vt-d: Disable translation if already enabled
[pandora-kernel.git] / drivers / iommu / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36 #include <linux/tboot.h>
37 #include <linux/dmi.h>
38 #include <linux/slab.h>
39 #include <asm/iommu_table.h>
40
41 #define PREFIX "DMAR: "
42
43 /* No locks are needed as DMA remapping hardware unit
44  * list is constructed at boot time and hotplug of
45  * these units are not supported by the architecture.
46  */
47 LIST_HEAD(dmar_drhd_units);
48
49 struct acpi_table_header * __initdata dmar_tbl;
50 static acpi_size dmar_tbl_size;
51
52 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
53 {
54         /*
55          * add INCLUDE_ALL at the tail, so scan the list will find it at
56          * the very end.
57          */
58         if (drhd->include_all)
59                 list_add_tail(&drhd->list, &dmar_drhd_units);
60         else
61                 list_add(&drhd->list, &dmar_drhd_units);
62 }
63
64 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
65                                            struct pci_dev **dev, u16 segment)
66 {
67         struct pci_bus *bus;
68         struct pci_dev *pdev = NULL;
69         struct acpi_dmar_pci_path *path;
70         int count;
71
72         bus = pci_find_bus(segment, scope->bus);
73         path = (struct acpi_dmar_pci_path *)(scope + 1);
74         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
75                 / sizeof(struct acpi_dmar_pci_path);
76
77         while (count) {
78                 if (pdev)
79                         pci_dev_put(pdev);
80                 /*
81                  * Some BIOSes list non-exist devices in DMAR table, just
82                  * ignore it
83                  */
84                 if (!bus) {
85                         printk(KERN_WARNING
86                         PREFIX "Device scope bus [%d] not found\n",
87                         scope->bus);
88                         break;
89                 }
90                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
91                 if (!pdev) {
92                         printk(KERN_WARNING PREFIX
93                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
94                                 segment, bus->number, path->dev, path->fn);
95                         break;
96                 }
97                 path ++;
98                 count --;
99                 bus = pdev->subordinate;
100         }
101         if (!pdev) {
102                 printk(KERN_WARNING PREFIX
103                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
104                 segment, scope->bus, path->dev, path->fn);
105                 *dev = NULL;
106                 return 0;
107         }
108         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
109                         pdev->subordinate) || (scope->entry_type == \
110                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
111                 pci_dev_put(pdev);
112                 printk(KERN_WARNING PREFIX
113                         "Device scope type does not match for %s\n",
114                          pci_name(pdev));
115                 return -EINVAL;
116         }
117         *dev = pdev;
118         return 0;
119 }
120
121 int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
122                                 struct pci_dev ***devices, u16 segment)
123 {
124         struct acpi_dmar_device_scope *scope;
125         void * tmp = start;
126         int index;
127         int ret;
128
129         *cnt = 0;
130         while (start < end) {
131                 scope = start;
132                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
133                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
134                         (*cnt)++;
135                 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
136                         printk(KERN_WARNING PREFIX
137                                "Unsupported device scope\n");
138                 }
139                 start += scope->length;
140         }
141         if (*cnt == 0)
142                 return 0;
143
144         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
145         if (!*devices)
146                 return -ENOMEM;
147
148         start = tmp;
149         index = 0;
150         while (start < end) {
151                 scope = start;
152                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
153                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
154                         ret = dmar_parse_one_dev_scope(scope,
155                                 &(*devices)[index], segment);
156                         if (ret) {
157                                 kfree(*devices);
158                                 return ret;
159                         }
160                         index ++;
161                 }
162                 start += scope->length;
163         }
164
165         return 0;
166 }
167
168 /**
169  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
170  * structure which uniquely represent one DMA remapping hardware unit
171  * present in the platform
172  */
173 static int __init
174 dmar_parse_one_drhd(struct acpi_dmar_header *header)
175 {
176         struct acpi_dmar_hardware_unit *drhd;
177         struct dmar_drhd_unit *dmaru;
178         int ret = 0;
179
180         drhd = (struct acpi_dmar_hardware_unit *)header;
181         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
182         if (!dmaru)
183                 return -ENOMEM;
184
185         dmaru->hdr = header;
186         dmaru->reg_base_addr = drhd->address;
187         dmaru->segment = drhd->segment;
188         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
189
190         ret = alloc_iommu(dmaru);
191         if (ret) {
192                 kfree(dmaru);
193                 return ret;
194         }
195         dmar_register_drhd_unit(dmaru);
196         return 0;
197 }
198
199 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
200 {
201         struct acpi_dmar_hardware_unit *drhd;
202         int ret = 0;
203
204         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
205
206         if (dmaru->include_all)
207                 return 0;
208
209         ret = dmar_parse_dev_scope((void *)(drhd + 1),
210                                 ((void *)drhd) + drhd->header.length,
211                                 &dmaru->devices_cnt, &dmaru->devices,
212                                 drhd->segment);
213         if (ret) {
214                 list_del(&dmaru->list);
215                 kfree(dmaru);
216         }
217         return ret;
218 }
219
220 #ifdef CONFIG_ACPI_NUMA
221 static int __init
222 dmar_parse_one_rhsa(struct acpi_dmar_header *header)
223 {
224         struct acpi_dmar_rhsa *rhsa;
225         struct dmar_drhd_unit *drhd;
226
227         rhsa = (struct acpi_dmar_rhsa *)header;
228         for_each_drhd_unit(drhd) {
229                 if (drhd->reg_base_addr == rhsa->base_address) {
230                         int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
231
232                         if (!node_online(node))
233                                 node = -1;
234                         drhd->iommu->node = node;
235                         return 0;
236                 }
237         }
238         WARN_TAINT(
239                 1, TAINT_FIRMWARE_WORKAROUND,
240                 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
241                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
242                 drhd->reg_base_addr,
243                 dmi_get_system_info(DMI_BIOS_VENDOR),
244                 dmi_get_system_info(DMI_BIOS_VERSION),
245                 dmi_get_system_info(DMI_PRODUCT_VERSION));
246
247         return 0;
248 }
249 #endif
250
251 static void __init
252 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
253 {
254         struct acpi_dmar_hardware_unit *drhd;
255         struct acpi_dmar_reserved_memory *rmrr;
256         struct acpi_dmar_atsr *atsr;
257         struct acpi_dmar_rhsa *rhsa;
258
259         switch (header->type) {
260         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
261                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
262                                     header);
263                 printk (KERN_INFO PREFIX
264                         "DRHD base: %#016Lx flags: %#x\n",
265                         (unsigned long long)drhd->address, drhd->flags);
266                 break;
267         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
268                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
269                                     header);
270                 printk (KERN_INFO PREFIX
271                         "RMRR base: %#016Lx end: %#016Lx\n",
272                         (unsigned long long)rmrr->base_address,
273                         (unsigned long long)rmrr->end_address);
274                 break;
275         case ACPI_DMAR_TYPE_ATSR:
276                 atsr = container_of(header, struct acpi_dmar_atsr, header);
277                 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
278                 break;
279         case ACPI_DMAR_HARDWARE_AFFINITY:
280                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
281                 printk(KERN_INFO PREFIX "RHSA base: %#016Lx proximity domain: %#x\n",
282                        (unsigned long long)rhsa->base_address,
283                        rhsa->proximity_domain);
284                 break;
285         }
286 }
287
288 /**
289  * dmar_table_detect - checks to see if the platform supports DMAR devices
290  */
291 static int __init dmar_table_detect(void)
292 {
293         acpi_status status = AE_OK;
294
295         /* if we could find DMAR table, then there are DMAR devices */
296         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
297                                 (struct acpi_table_header **)&dmar_tbl,
298                                 &dmar_tbl_size);
299
300         if (ACPI_SUCCESS(status) && !dmar_tbl) {
301                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
302                 status = AE_NOT_FOUND;
303         }
304
305         return (ACPI_SUCCESS(status) ? 1 : 0);
306 }
307
308 /**
309  * parse_dmar_table - parses the DMA reporting table
310  */
311 static int __init
312 parse_dmar_table(void)
313 {
314         struct acpi_table_dmar *dmar;
315         struct acpi_dmar_header *entry_header;
316         int ret = 0;
317
318         /*
319          * Do it again, earlier dmar_tbl mapping could be mapped with
320          * fixed map.
321          */
322         dmar_table_detect();
323
324         /*
325          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
326          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
327          */
328         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
329
330         dmar = (struct acpi_table_dmar *)dmar_tbl;
331         if (!dmar)
332                 return -ENODEV;
333
334         if (dmar->width < PAGE_SHIFT - 1) {
335                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
336                 return -EINVAL;
337         }
338
339         printk (KERN_INFO PREFIX "Host address width %d\n",
340                 dmar->width + 1);
341
342         entry_header = (struct acpi_dmar_header *)(dmar + 1);
343         while (((unsigned long)entry_header) <
344                         (((unsigned long)dmar) + dmar_tbl->length)) {
345                 /* Avoid looping forever on bad ACPI tables */
346                 if (entry_header->length == 0) {
347                         printk(KERN_WARNING PREFIX
348                                 "Invalid 0-length structure\n");
349                         ret = -EINVAL;
350                         break;
351                 }
352
353                 dmar_table_print_dmar_entry(entry_header);
354
355                 switch (entry_header->type) {
356                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
357                         ret = dmar_parse_one_drhd(entry_header);
358                         break;
359                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
360                         ret = dmar_parse_one_rmrr(entry_header);
361                         break;
362                 case ACPI_DMAR_TYPE_ATSR:
363                         ret = dmar_parse_one_atsr(entry_header);
364                         break;
365                 case ACPI_DMAR_HARDWARE_AFFINITY:
366 #ifdef CONFIG_ACPI_NUMA
367                         ret = dmar_parse_one_rhsa(entry_header);
368 #endif
369                         break;
370                 default:
371                         printk(KERN_WARNING PREFIX
372                                 "Unknown DMAR structure type %d\n",
373                                 entry_header->type);
374                         ret = 0; /* for forward compatibility */
375                         break;
376                 }
377                 if (ret)
378                         break;
379
380                 entry_header = ((void *)entry_header + entry_header->length);
381         }
382         return ret;
383 }
384
385 static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
386                           struct pci_dev *dev)
387 {
388         int index;
389
390         while (dev) {
391                 for (index = 0; index < cnt; index++)
392                         if (dev == devices[index])
393                                 return 1;
394
395                 /* Check our parent */
396                 dev = dev->bus->self;
397         }
398
399         return 0;
400 }
401
402 struct dmar_drhd_unit *
403 dmar_find_matched_drhd_unit(struct pci_dev *dev)
404 {
405         struct dmar_drhd_unit *dmaru = NULL;
406         struct acpi_dmar_hardware_unit *drhd;
407
408         dev = pci_physfn(dev);
409
410         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
411                 drhd = container_of(dmaru->hdr,
412                                     struct acpi_dmar_hardware_unit,
413                                     header);
414
415                 if (dmaru->include_all &&
416                     drhd->segment == pci_domain_nr(dev->bus))
417                         return dmaru;
418
419                 if (dmar_pci_device_match(dmaru->devices,
420                                           dmaru->devices_cnt, dev))
421                         return dmaru;
422         }
423
424         return NULL;
425 }
426
427 int __init dmar_dev_scope_init(void)
428 {
429         static int dmar_dev_scope_initialized;
430         struct dmar_drhd_unit *drhd, *drhd_n;
431         int ret = -ENODEV;
432
433         if (dmar_dev_scope_initialized)
434                 return dmar_dev_scope_initialized;
435
436         if (list_empty(&dmar_drhd_units))
437                 goto fail;
438
439         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
440                 ret = dmar_parse_dev(drhd);
441                 if (ret)
442                         goto fail;
443         }
444
445         ret = dmar_parse_rmrr_atsr_dev();
446         if (ret)
447                 goto fail;
448
449         dmar_dev_scope_initialized = 1;
450         return 0;
451
452 fail:
453         dmar_dev_scope_initialized = ret;
454         return ret;
455 }
456
457
458 int __init dmar_table_init(void)
459 {
460         static int dmar_table_initialized;
461         int ret;
462
463         if (dmar_table_initialized)
464                 return 0;
465
466         dmar_table_initialized = 1;
467
468         ret = parse_dmar_table();
469         if (ret) {
470                 if (ret != -ENODEV)
471                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
472                 return ret;
473         }
474
475         if (list_empty(&dmar_drhd_units)) {
476                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
477                 return -ENODEV;
478         }
479
480         return 0;
481 }
482
483 static void warn_invalid_dmar(u64 addr, const char *message)
484 {
485         WARN_TAINT_ONCE(
486                 1, TAINT_FIRMWARE_WORKAROUND,
487                 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
488                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
489                 addr, message,
490                 dmi_get_system_info(DMI_BIOS_VENDOR),
491                 dmi_get_system_info(DMI_BIOS_VERSION),
492                 dmi_get_system_info(DMI_PRODUCT_VERSION));
493 }
494
495 int __init check_zero_address(void)
496 {
497         struct acpi_table_dmar *dmar;
498         struct acpi_dmar_header *entry_header;
499         struct acpi_dmar_hardware_unit *drhd;
500
501         dmar = (struct acpi_table_dmar *)dmar_tbl;
502         entry_header = (struct acpi_dmar_header *)(dmar + 1);
503
504         while (((unsigned long)entry_header) <
505                         (((unsigned long)dmar) + dmar_tbl->length)) {
506                 /* Avoid looping forever on bad ACPI tables */
507                 if (entry_header->length == 0) {
508                         printk(KERN_WARNING PREFIX
509                                 "Invalid 0-length structure\n");
510                         return 0;
511                 }
512
513                 if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
514                         void __iomem *addr;
515                         u64 cap, ecap;
516
517                         drhd = (void *)entry_header;
518                         if (!drhd->address) {
519                                 warn_invalid_dmar(0, "");
520                                 goto failed;
521                         }
522
523                         addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
524                         if (!addr ) {
525                                 printk("IOMMU: can't validate: %llx\n", drhd->address);
526                                 goto failed;
527                         }
528                         cap = dmar_readq(addr + DMAR_CAP_REG);
529                         ecap = dmar_readq(addr + DMAR_ECAP_REG);
530                         early_iounmap(addr, VTD_PAGE_SIZE);
531                         if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
532                                 warn_invalid_dmar(drhd->address,
533                                                   " returns all ones");
534                                 goto failed;
535                         }
536                 }
537
538                 entry_header = ((void *)entry_header + entry_header->length);
539         }
540         return 1;
541
542 failed:
543         return 0;
544 }
545
546 int __init detect_intel_iommu(void)
547 {
548         int ret;
549
550         ret = dmar_table_detect();
551         if (ret)
552                 ret = check_zero_address();
553         {
554                 struct acpi_table_dmar *dmar;
555
556                 dmar = (struct acpi_table_dmar *) dmar_tbl;
557
558                 if (ret && intr_remapping_enabled && cpu_has_x2apic &&
559                     dmar->flags & 0x1)
560                         printk(KERN_INFO
561                                "Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
562
563                 if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
564                         iommu_detected = 1;
565                         /* Make sure ACS will be enabled */
566                         pci_request_acs();
567                 }
568
569 #ifdef CONFIG_X86
570                 if (ret)
571                         x86_init.iommu.iommu_init = intel_iommu_init;
572 #endif
573         }
574         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
575         dmar_tbl = NULL;
576
577         return ret ? 1 : -ENODEV;
578 }
579
580
581 int alloc_iommu(struct dmar_drhd_unit *drhd)
582 {
583         struct intel_iommu *iommu;
584         int map_size;
585         u32 ver, sts;
586         static int iommu_allocated = 0;
587         int agaw = 0;
588         int msagaw = 0;
589
590         if (!drhd->reg_base_addr) {
591                 warn_invalid_dmar(0, "");
592                 return -EINVAL;
593         }
594
595         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
596         if (!iommu)
597                 return -ENOMEM;
598
599         iommu->seq_id = iommu_allocated++;
600         sprintf (iommu->name, "dmar%d", iommu->seq_id);
601
602         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
603         if (!iommu->reg) {
604                 printk(KERN_ERR "IOMMU: can't map the region\n");
605                 goto error;
606         }
607         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
608         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
609
610         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
611                 warn_invalid_dmar(drhd->reg_base_addr, " returns all ones");
612                 goto err_unmap;
613         }
614
615         agaw = iommu_calculate_agaw(iommu);
616         if (agaw < 0) {
617                 printk(KERN_ERR
618                        "Cannot get a valid agaw for iommu (seq_id = %d)\n",
619                        iommu->seq_id);
620                 goto err_unmap;
621         }
622         msagaw = iommu_calculate_max_sagaw(iommu);
623         if (msagaw < 0) {
624                 printk(KERN_ERR
625                         "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
626                         iommu->seq_id);
627                 goto err_unmap;
628         }
629         iommu->agaw = agaw;
630         iommu->msagaw = msagaw;
631
632         iommu->node = -1;
633
634         /* the registers might be more than one page */
635         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
636                 cap_max_fault_reg_offset(iommu->cap));
637         map_size = VTD_PAGE_ALIGN(map_size);
638         if (map_size > VTD_PAGE_SIZE) {
639                 iounmap(iommu->reg);
640                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
641                 if (!iommu->reg) {
642                         printk(KERN_ERR "IOMMU: can't map the region\n");
643                         goto error;
644                 }
645         }
646
647         ver = readl(iommu->reg + DMAR_VER_REG);
648         pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
649                 iommu->seq_id,
650                 (unsigned long long)drhd->reg_base_addr,
651                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
652                 (unsigned long long)iommu->cap,
653                 (unsigned long long)iommu->ecap);
654
655         /* Reflect status in gcmd */
656         sts = readl(iommu->reg + DMAR_GSTS_REG);
657         if (sts & DMA_GSTS_IRES)
658                 iommu->gcmd |= DMA_GCMD_IRE;
659         if (sts & DMA_GSTS_TES)
660                 iommu->gcmd |= DMA_GCMD_TE;
661         if (sts & DMA_GSTS_QIES)
662                 iommu->gcmd |= DMA_GCMD_QIE;
663
664         raw_spin_lock_init(&iommu->register_lock);
665
666         drhd->iommu = iommu;
667         return 0;
668
669  err_unmap:
670         iounmap(iommu->reg);
671  error:
672         kfree(iommu);
673         return -1;
674 }
675
676 void free_iommu(struct intel_iommu *iommu)
677 {
678         if (!iommu)
679                 return;
680
681         free_dmar_iommu(iommu);
682
683         if (iommu->reg)
684                 iounmap(iommu->reg);
685         kfree(iommu);
686 }
687
688 /*
689  * Reclaim all the submitted descriptors which have completed its work.
690  */
691 static inline void reclaim_free_desc(struct q_inval *qi)
692 {
693         while (qi->desc_status[qi->free_tail] == QI_DONE ||
694                qi->desc_status[qi->free_tail] == QI_ABORT) {
695                 qi->desc_status[qi->free_tail] = QI_FREE;
696                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
697                 qi->free_cnt++;
698         }
699 }
700
701 static int qi_check_fault(struct intel_iommu *iommu, int index)
702 {
703         u32 fault;
704         int head, tail;
705         struct q_inval *qi = iommu->qi;
706         int wait_index = (index + 1) % QI_LENGTH;
707
708         if (qi->desc_status[wait_index] == QI_ABORT)
709                 return -EAGAIN;
710
711         fault = readl(iommu->reg + DMAR_FSTS_REG);
712
713         /*
714          * If IQE happens, the head points to the descriptor associated
715          * with the error. No new descriptors are fetched until the IQE
716          * is cleared.
717          */
718         if (fault & DMA_FSTS_IQE) {
719                 head = readl(iommu->reg + DMAR_IQH_REG);
720                 if ((head >> DMAR_IQ_SHIFT) == index) {
721                         printk(KERN_ERR "VT-d detected invalid descriptor: "
722                                 "low=%llx, high=%llx\n",
723                                 (unsigned long long)qi->desc[index].low,
724                                 (unsigned long long)qi->desc[index].high);
725                         memcpy(&qi->desc[index], &qi->desc[wait_index],
726                                         sizeof(struct qi_desc));
727                         __iommu_flush_cache(iommu, &qi->desc[index],
728                                         sizeof(struct qi_desc));
729                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
730                         return -EINVAL;
731                 }
732         }
733
734         /*
735          * If ITE happens, all pending wait_desc commands are aborted.
736          * No new descriptors are fetched until the ITE is cleared.
737          */
738         if (fault & DMA_FSTS_ITE) {
739                 head = readl(iommu->reg + DMAR_IQH_REG);
740                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
741                 head |= 1;
742                 tail = readl(iommu->reg + DMAR_IQT_REG);
743                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
744
745                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
746
747                 do {
748                         if (qi->desc_status[head] == QI_IN_USE)
749                                 qi->desc_status[head] = QI_ABORT;
750                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
751                 } while (head != tail);
752
753                 if (qi->desc_status[wait_index] == QI_ABORT)
754                         return -EAGAIN;
755         }
756
757         if (fault & DMA_FSTS_ICE)
758                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
759
760         return 0;
761 }
762
763 /*
764  * Submit the queued invalidation descriptor to the remapping
765  * hardware unit and wait for its completion.
766  */
767 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
768 {
769         int rc;
770         struct q_inval *qi = iommu->qi;
771         struct qi_desc *hw, wait_desc;
772         int wait_index, index;
773         unsigned long flags;
774
775         if (!qi)
776                 return 0;
777
778         hw = qi->desc;
779
780 restart:
781         rc = 0;
782
783         raw_spin_lock_irqsave(&qi->q_lock, flags);
784         while (qi->free_cnt < 3) {
785                 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
786                 cpu_relax();
787                 raw_spin_lock_irqsave(&qi->q_lock, flags);
788         }
789
790         index = qi->free_head;
791         wait_index = (index + 1) % QI_LENGTH;
792
793         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
794
795         hw[index] = *desc;
796
797         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
798                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
799         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
800
801         hw[wait_index] = wait_desc;
802
803         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
804         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
805
806         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
807         qi->free_cnt -= 2;
808
809         /*
810          * update the HW tail register indicating the presence of
811          * new descriptors.
812          */
813         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
814
815         while (qi->desc_status[wait_index] != QI_DONE) {
816                 /*
817                  * We will leave the interrupts disabled, to prevent interrupt
818                  * context to queue another cmd while a cmd is already submitted
819                  * and waiting for completion on this cpu. This is to avoid
820                  * a deadlock where the interrupt context can wait indefinitely
821                  * for free slots in the queue.
822                  */
823                 rc = qi_check_fault(iommu, index);
824                 if (rc)
825                         break;
826
827                 raw_spin_unlock(&qi->q_lock);
828                 cpu_relax();
829                 raw_spin_lock(&qi->q_lock);
830         }
831
832         qi->desc_status[index] = QI_DONE;
833
834         reclaim_free_desc(qi);
835         raw_spin_unlock_irqrestore(&qi->q_lock, flags);
836
837         if (rc == -EAGAIN)
838                 goto restart;
839
840         return rc;
841 }
842
843 /*
844  * Flush the global interrupt entry cache.
845  */
846 void qi_global_iec(struct intel_iommu *iommu)
847 {
848         struct qi_desc desc;
849
850         desc.low = QI_IEC_TYPE;
851         desc.high = 0;
852
853         /* should never fail */
854         qi_submit_sync(&desc, iommu);
855 }
856
857 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
858                       u64 type)
859 {
860         struct qi_desc desc;
861
862         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
863                         | QI_CC_GRAN(type) | QI_CC_TYPE;
864         desc.high = 0;
865
866         qi_submit_sync(&desc, iommu);
867 }
868
869 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
870                     unsigned int size_order, u64 type)
871 {
872         u8 dw = 0, dr = 0;
873
874         struct qi_desc desc;
875         int ih = 0;
876
877         if (cap_write_drain(iommu->cap))
878                 dw = 1;
879
880         if (cap_read_drain(iommu->cap))
881                 dr = 1;
882
883         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
884                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
885         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
886                 | QI_IOTLB_AM(size_order);
887
888         qi_submit_sync(&desc, iommu);
889 }
890
891 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
892                         u64 addr, unsigned mask)
893 {
894         struct qi_desc desc;
895
896         if (mask) {
897                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
898                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
899                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
900         } else
901                 desc.high = QI_DEV_IOTLB_ADDR(addr);
902
903         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
904                 qdep = 0;
905
906         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
907                    QI_DIOTLB_TYPE;
908
909         qi_submit_sync(&desc, iommu);
910 }
911
912 /*
913  * Disable Queued Invalidation interface.
914  */
915 void dmar_disable_qi(struct intel_iommu *iommu)
916 {
917         unsigned long flags;
918         u32 sts;
919         cycles_t start_time = get_cycles();
920
921         if (!ecap_qis(iommu->ecap))
922                 return;
923
924         raw_spin_lock_irqsave(&iommu->register_lock, flags);
925
926         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
927         if (!(sts & DMA_GSTS_QIES))
928                 goto end;
929
930         /*
931          * Give a chance to HW to complete the pending invalidation requests.
932          */
933         while ((readl(iommu->reg + DMAR_IQT_REG) !=
934                 readl(iommu->reg + DMAR_IQH_REG)) &&
935                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
936                 cpu_relax();
937
938         iommu->gcmd &= ~DMA_GCMD_QIE;
939         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
940
941         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
942                       !(sts & DMA_GSTS_QIES), sts);
943 end:
944         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
945 }
946
947 /*
948  * Enable queued invalidation.
949  */
950 static void __dmar_enable_qi(struct intel_iommu *iommu)
951 {
952         u32 sts;
953         unsigned long flags;
954         struct q_inval *qi = iommu->qi;
955
956         qi->free_head = qi->free_tail = 0;
957         qi->free_cnt = QI_LENGTH;
958
959         raw_spin_lock_irqsave(&iommu->register_lock, flags);
960
961         /* write zero to the tail reg */
962         writel(0, iommu->reg + DMAR_IQT_REG);
963
964         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
965
966         iommu->gcmd |= DMA_GCMD_QIE;
967         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
968
969         /* Make sure hardware complete it */
970         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
971
972         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
973 }
974
975 /*
976  * Enable Queued Invalidation interface. This is a must to support
977  * interrupt-remapping. Also used by DMA-remapping, which replaces
978  * register based IOTLB invalidation.
979  */
980 int dmar_enable_qi(struct intel_iommu *iommu)
981 {
982         struct q_inval *qi;
983         struct page *desc_page;
984
985         if (!ecap_qis(iommu->ecap))
986                 return -ENOENT;
987
988         /*
989          * queued invalidation is already setup and enabled.
990          */
991         if (iommu->qi)
992                 return 0;
993
994         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
995         if (!iommu->qi)
996                 return -ENOMEM;
997
998         qi = iommu->qi;
999
1000
1001         desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
1002         if (!desc_page) {
1003                 kfree(qi);
1004                 iommu->qi = 0;
1005                 return -ENOMEM;
1006         }
1007
1008         qi->desc = page_address(desc_page);
1009
1010         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1011         if (!qi->desc_status) {
1012                 free_page((unsigned long) qi->desc);
1013                 kfree(qi);
1014                 iommu->qi = 0;
1015                 return -ENOMEM;
1016         }
1017
1018         qi->free_head = qi->free_tail = 0;
1019         qi->free_cnt = QI_LENGTH;
1020
1021         raw_spin_lock_init(&qi->q_lock);
1022
1023         __dmar_enable_qi(iommu);
1024
1025         return 0;
1026 }
1027
1028 /* iommu interrupt handling. Most stuff are MSI-like. */
1029
1030 enum faulttype {
1031         DMA_REMAP,
1032         INTR_REMAP,
1033         UNKNOWN,
1034 };
1035
1036 static const char *dma_remap_fault_reasons[] =
1037 {
1038         "Software",
1039         "Present bit in root entry is clear",
1040         "Present bit in context entry is clear",
1041         "Invalid context entry",
1042         "Access beyond MGAW",
1043         "PTE Write access is not set",
1044         "PTE Read access is not set",
1045         "Next page table ptr is invalid",
1046         "Root table address invalid",
1047         "Context table ptr is invalid",
1048         "non-zero reserved fields in RTP",
1049         "non-zero reserved fields in CTP",
1050         "non-zero reserved fields in PTE",
1051 };
1052
1053 static const char *intr_remap_fault_reasons[] =
1054 {
1055         "Detected reserved fields in the decoded interrupt-remapped request",
1056         "Interrupt index exceeded the interrupt-remapping table size",
1057         "Present field in the IRTE entry is clear",
1058         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1059         "Detected reserved fields in the IRTE entry",
1060         "Blocked a compatibility format interrupt request",
1061         "Blocked an interrupt request due to source-id verification failure",
1062 };
1063
1064 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1065
1066 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1067 {
1068         if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1069                                         ARRAY_SIZE(intr_remap_fault_reasons))) {
1070                 *fault_type = INTR_REMAP;
1071                 return intr_remap_fault_reasons[fault_reason - 0x20];
1072         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1073                 *fault_type = DMA_REMAP;
1074                 return dma_remap_fault_reasons[fault_reason];
1075         } else {
1076                 *fault_type = UNKNOWN;
1077                 return "Unknown";
1078         }
1079 }
1080
1081 void dmar_msi_unmask(struct irq_data *data)
1082 {
1083         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1084         unsigned long flag;
1085
1086         /* unmask it */
1087         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1088         writel(0, iommu->reg + DMAR_FECTL_REG);
1089         /* Read a reg to force flush the post write */
1090         readl(iommu->reg + DMAR_FECTL_REG);
1091         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1092 }
1093
1094 void dmar_msi_mask(struct irq_data *data)
1095 {
1096         unsigned long flag;
1097         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1098
1099         /* mask it */
1100         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1101         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1102         /* Read a reg to force flush the post write */
1103         readl(iommu->reg + DMAR_FECTL_REG);
1104         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1105 }
1106
1107 void dmar_msi_write(int irq, struct msi_msg *msg)
1108 {
1109         struct intel_iommu *iommu = irq_get_handler_data(irq);
1110         unsigned long flag;
1111
1112         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1113         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1114         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1115         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1116         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1117 }
1118
1119 void dmar_msi_read(int irq, struct msi_msg *msg)
1120 {
1121         struct intel_iommu *iommu = irq_get_handler_data(irq);
1122         unsigned long flag;
1123
1124         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1125         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1126         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1127         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1128         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1129 }
1130
1131 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1132                 u8 fault_reason, u16 source_id, unsigned long long addr)
1133 {
1134         const char *reason;
1135         int fault_type;
1136
1137         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1138
1139         if (fault_type == INTR_REMAP)
1140                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1141                        "fault index %llx\n"
1142                         "INTR-REMAP:[fault reason %02d] %s\n",
1143                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1144                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1145                         fault_reason, reason);
1146         else
1147                 printk(KERN_ERR
1148                        "DMAR:[%s] Request device [%02x:%02x.%d] "
1149                        "fault addr %llx \n"
1150                        "DMAR:[fault reason %02d] %s\n",
1151                        (type ? "DMA Read" : "DMA Write"),
1152                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1153                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1154         return 0;
1155 }
1156
1157 #define PRIMARY_FAULT_REG_LEN (16)
1158 irqreturn_t dmar_fault(int irq, void *dev_id)
1159 {
1160         struct intel_iommu *iommu = dev_id;
1161         int reg, fault_index;
1162         u32 fault_status;
1163         unsigned long flag;
1164
1165         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1166         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1167         if (fault_status)
1168                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1169                        fault_status);
1170
1171         /* TBD: ignore advanced fault log currently */
1172         if (!(fault_status & DMA_FSTS_PPF))
1173                 goto clear_rest;
1174
1175         fault_index = dma_fsts_fault_record_index(fault_status);
1176         reg = cap_fault_reg_offset(iommu->cap);
1177         while (1) {
1178                 u8 fault_reason;
1179                 u16 source_id;
1180                 u64 guest_addr;
1181                 int type;
1182                 u32 data;
1183
1184                 /* highest 32 bits */
1185                 data = readl(iommu->reg + reg +
1186                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1187                 if (!(data & DMA_FRCD_F))
1188                         break;
1189
1190                 fault_reason = dma_frcd_fault_reason(data);
1191                 type = dma_frcd_type(data);
1192
1193                 data = readl(iommu->reg + reg +
1194                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1195                 source_id = dma_frcd_source_id(data);
1196
1197                 guest_addr = dmar_readq(iommu->reg + reg +
1198                                 fault_index * PRIMARY_FAULT_REG_LEN);
1199                 guest_addr = dma_frcd_page_addr(guest_addr);
1200                 /* clear the fault */
1201                 writel(DMA_FRCD_F, iommu->reg + reg +
1202                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1203
1204                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1205
1206                 dmar_fault_do_one(iommu, type, fault_reason,
1207                                 source_id, guest_addr);
1208
1209                 fault_index++;
1210                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1211                         fault_index = 0;
1212                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1213         }
1214 clear_rest:
1215         /* clear all the other faults */
1216         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1217         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1218
1219         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1220         return IRQ_HANDLED;
1221 }
1222
1223 int dmar_set_interrupt(struct intel_iommu *iommu)
1224 {
1225         int irq, ret;
1226
1227         /*
1228          * Check if the fault interrupt is already initialized.
1229          */
1230         if (iommu->irq)
1231                 return 0;
1232
1233         irq = create_irq();
1234         if (!irq) {
1235                 printk(KERN_ERR "IOMMU: no free vectors\n");
1236                 return -EINVAL;
1237         }
1238
1239         irq_set_handler_data(irq, iommu);
1240         iommu->irq = irq;
1241
1242         ret = arch_setup_dmar_msi(irq);
1243         if (ret) {
1244                 irq_set_handler_data(irq, NULL);
1245                 iommu->irq = 0;
1246                 destroy_irq(irq);
1247                 return ret;
1248         }
1249
1250         ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1251         if (ret)
1252                 printk(KERN_ERR "IOMMU: can't request irq\n");
1253         return ret;
1254 }
1255
1256 int __init enable_drhd_fault_handling(void)
1257 {
1258         struct dmar_drhd_unit *drhd;
1259
1260         /*
1261          * Enable fault control interrupt.
1262          */
1263         for_each_drhd_unit(drhd) {
1264                 int ret;
1265                 struct intel_iommu *iommu = drhd->iommu;
1266                 ret = dmar_set_interrupt(iommu);
1267
1268                 if (ret) {
1269                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1270                                " interrupt, ret %d\n",
1271                                (unsigned long long)drhd->reg_base_addr, ret);
1272                         return -1;
1273                 }
1274
1275                 /*
1276                  * Clear any previous faults.
1277                  */
1278                 dmar_fault(iommu->irq, iommu);
1279         }
1280
1281         return 0;
1282 }
1283
1284 /*
1285  * Re-enable Queued Invalidation interface.
1286  */
1287 int dmar_reenable_qi(struct intel_iommu *iommu)
1288 {
1289         if (!ecap_qis(iommu->ecap))
1290                 return -ENOENT;
1291
1292         if (!iommu->qi)
1293                 return -ENOENT;
1294
1295         /*
1296          * First disable queued invalidation.
1297          */
1298         dmar_disable_qi(iommu);
1299         /*
1300          * Then enable queued invalidation again. Since there is no pending
1301          * invalidation requests now, it's safe to re-enable queued
1302          * invalidation.
1303          */
1304         __dmar_enable_qi(iommu);
1305
1306         return 0;
1307 }
1308
1309 /*
1310  * Check interrupt remapping support in DMAR table description.
1311  */
1312 int __init dmar_ir_support(void)
1313 {
1314         struct acpi_table_dmar *dmar;
1315         dmar = (struct acpi_table_dmar *)dmar_tbl;
1316         if (!dmar)
1317                 return 0;
1318         return dmar->flags & 0x1;
1319 }
1320 IOMMU_INIT_POST(detect_intel_iommu);