Merge branch 'battery' into release
[pandora-kernel.git] / drivers / pci / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36 #include <linux/tboot.h>
37
38 #define PREFIX "DMAR: "
39
40 /* No locks are needed as DMA remapping hardware unit
41  * list is constructed at boot time and hotplug of
42  * these units are not supported by the architecture.
43  */
44 LIST_HEAD(dmar_drhd_units);
45
46 static struct acpi_table_header * __initdata dmar_tbl;
47 static acpi_size dmar_tbl_size;
48
49 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
50 {
51         /*
52          * add INCLUDE_ALL at the tail, so scan the list will find it at
53          * the very end.
54          */
55         if (drhd->include_all)
56                 list_add_tail(&drhd->list, &dmar_drhd_units);
57         else
58                 list_add(&drhd->list, &dmar_drhd_units);
59 }
60
61 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
62                                            struct pci_dev **dev, u16 segment)
63 {
64         struct pci_bus *bus;
65         struct pci_dev *pdev = NULL;
66         struct acpi_dmar_pci_path *path;
67         int count;
68
69         bus = pci_find_bus(segment, scope->bus);
70         path = (struct acpi_dmar_pci_path *)(scope + 1);
71         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
72                 / sizeof(struct acpi_dmar_pci_path);
73
74         while (count) {
75                 if (pdev)
76                         pci_dev_put(pdev);
77                 /*
78                  * Some BIOSes list non-exist devices in DMAR table, just
79                  * ignore it
80                  */
81                 if (!bus) {
82                         printk(KERN_WARNING
83                         PREFIX "Device scope bus [%d] not found\n",
84                         scope->bus);
85                         break;
86                 }
87                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
88                 if (!pdev) {
89                         printk(KERN_WARNING PREFIX
90                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
91                                 segment, bus->number, path->dev, path->fn);
92                         break;
93                 }
94                 path ++;
95                 count --;
96                 bus = pdev->subordinate;
97         }
98         if (!pdev) {
99                 printk(KERN_WARNING PREFIX
100                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
101                 segment, scope->bus, path->dev, path->fn);
102                 *dev = NULL;
103                 return 0;
104         }
105         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
106                         pdev->subordinate) || (scope->entry_type == \
107                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
108                 pci_dev_put(pdev);
109                 printk(KERN_WARNING PREFIX
110                         "Device scope type does not match for %s\n",
111                          pci_name(pdev));
112                 return -EINVAL;
113         }
114         *dev = pdev;
115         return 0;
116 }
117
118 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
119                                        struct pci_dev ***devices, u16 segment)
120 {
121         struct acpi_dmar_device_scope *scope;
122         void * tmp = start;
123         int index;
124         int ret;
125
126         *cnt = 0;
127         while (start < end) {
128                 scope = start;
129                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
130                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
131                         (*cnt)++;
132                 else
133                         printk(KERN_WARNING PREFIX
134                                 "Unsupported device scope\n");
135                 start += scope->length;
136         }
137         if (*cnt == 0)
138                 return 0;
139
140         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
141         if (!*devices)
142                 return -ENOMEM;
143
144         start = tmp;
145         index = 0;
146         while (start < end) {
147                 scope = start;
148                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
149                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
150                         ret = dmar_parse_one_dev_scope(scope,
151                                 &(*devices)[index], segment);
152                         if (ret) {
153                                 kfree(*devices);
154                                 return ret;
155                         }
156                         index ++;
157                 }
158                 start += scope->length;
159         }
160
161         return 0;
162 }
163
164 /**
165  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
166  * structure which uniquely represent one DMA remapping hardware unit
167  * present in the platform
168  */
169 static int __init
170 dmar_parse_one_drhd(struct acpi_dmar_header *header)
171 {
172         struct acpi_dmar_hardware_unit *drhd;
173         struct dmar_drhd_unit *dmaru;
174         int ret = 0;
175
176         drhd = (struct acpi_dmar_hardware_unit *)header;
177         if (!drhd->address) {
178                 /* Promote an attitude of violence to a BIOS engineer today */
179                 WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n"
180                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
181                      dmi_get_system_info(DMI_BIOS_VENDOR),
182                      dmi_get_system_info(DMI_BIOS_VERSION),
183                      dmi_get_system_info(DMI_PRODUCT_VERSION));
184                 return -ENODEV;
185         }
186         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
187         if (!dmaru)
188                 return -ENOMEM;
189
190         dmaru->hdr = header;
191         dmaru->reg_base_addr = drhd->address;
192         dmaru->segment = drhd->segment;
193         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
194
195         ret = alloc_iommu(dmaru);
196         if (ret) {
197                 kfree(dmaru);
198                 return ret;
199         }
200         dmar_register_drhd_unit(dmaru);
201         return 0;
202 }
203
204 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
205 {
206         struct acpi_dmar_hardware_unit *drhd;
207         int ret = 0;
208
209         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
210
211         if (dmaru->include_all)
212                 return 0;
213
214         ret = dmar_parse_dev_scope((void *)(drhd + 1),
215                                 ((void *)drhd) + drhd->header.length,
216                                 &dmaru->devices_cnt, &dmaru->devices,
217                                 drhd->segment);
218         if (ret) {
219                 list_del(&dmaru->list);
220                 kfree(dmaru);
221         }
222         return ret;
223 }
224
225 #ifdef CONFIG_DMAR
226 LIST_HEAD(dmar_rmrr_units);
227
228 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
229 {
230         list_add(&rmrr->list, &dmar_rmrr_units);
231 }
232
233
234 static int __init
235 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
236 {
237         struct acpi_dmar_reserved_memory *rmrr;
238         struct dmar_rmrr_unit *rmrru;
239
240         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
241         if (!rmrru)
242                 return -ENOMEM;
243
244         rmrru->hdr = header;
245         rmrr = (struct acpi_dmar_reserved_memory *)header;
246         rmrru->base_address = rmrr->base_address;
247         rmrru->end_address = rmrr->end_address;
248
249         dmar_register_rmrr_unit(rmrru);
250         return 0;
251 }
252
253 static int __init
254 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
255 {
256         struct acpi_dmar_reserved_memory *rmrr;
257         int ret;
258
259         rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
260         ret = dmar_parse_dev_scope((void *)(rmrr + 1),
261                 ((void *)rmrr) + rmrr->header.length,
262                 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
263
264         if (ret || (rmrru->devices_cnt == 0)) {
265                 list_del(&rmrru->list);
266                 kfree(rmrru);
267         }
268         return ret;
269 }
270
271 static LIST_HEAD(dmar_atsr_units);
272
273 static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
274 {
275         struct acpi_dmar_atsr *atsr;
276         struct dmar_atsr_unit *atsru;
277
278         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
279         atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
280         if (!atsru)
281                 return -ENOMEM;
282
283         atsru->hdr = hdr;
284         atsru->include_all = atsr->flags & 0x1;
285
286         list_add(&atsru->list, &dmar_atsr_units);
287
288         return 0;
289 }
290
291 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
292 {
293         int rc;
294         struct acpi_dmar_atsr *atsr;
295
296         if (atsru->include_all)
297                 return 0;
298
299         atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
300         rc = dmar_parse_dev_scope((void *)(atsr + 1),
301                                 (void *)atsr + atsr->header.length,
302                                 &atsru->devices_cnt, &atsru->devices,
303                                 atsr->segment);
304         if (rc || !atsru->devices_cnt) {
305                 list_del(&atsru->list);
306                 kfree(atsru);
307         }
308
309         return rc;
310 }
311
312 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
313 {
314         int i;
315         struct pci_bus *bus;
316         struct acpi_dmar_atsr *atsr;
317         struct dmar_atsr_unit *atsru;
318
319         list_for_each_entry(atsru, &dmar_atsr_units, list) {
320                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
321                 if (atsr->segment == pci_domain_nr(dev->bus))
322                         goto found;
323         }
324
325         return 0;
326
327 found:
328         for (bus = dev->bus; bus; bus = bus->parent) {
329                 struct pci_dev *bridge = bus->self;
330
331                 if (!bridge || !bridge->is_pcie ||
332                     bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
333                         return 0;
334
335                 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
336                         for (i = 0; i < atsru->devices_cnt; i++)
337                                 if (atsru->devices[i] == bridge)
338                                         return 1;
339                         break;
340                 }
341         }
342
343         if (atsru->include_all)
344                 return 1;
345
346         return 0;
347 }
348 #endif
349
350 static void __init
351 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
352 {
353         struct acpi_dmar_hardware_unit *drhd;
354         struct acpi_dmar_reserved_memory *rmrr;
355         struct acpi_dmar_atsr *atsr;
356
357         switch (header->type) {
358         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
359                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
360                                     header);
361                 printk (KERN_INFO PREFIX
362                         "DRHD base: %#016Lx flags: %#x\n",
363                         (unsigned long long)drhd->address, drhd->flags);
364                 break;
365         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
366                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
367                                     header);
368                 printk (KERN_INFO PREFIX
369                         "RMRR base: %#016Lx end: %#016Lx\n",
370                         (unsigned long long)rmrr->base_address,
371                         (unsigned long long)rmrr->end_address);
372                 break;
373         case ACPI_DMAR_TYPE_ATSR:
374                 atsr = container_of(header, struct acpi_dmar_atsr, header);
375                 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
376                 break;
377         }
378 }
379
380 /**
381  * dmar_table_detect - checks to see if the platform supports DMAR devices
382  */
383 static int __init dmar_table_detect(void)
384 {
385         acpi_status status = AE_OK;
386
387         /* if we could find DMAR table, then there are DMAR devices */
388         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
389                                 (struct acpi_table_header **)&dmar_tbl,
390                                 &dmar_tbl_size);
391
392         if (ACPI_SUCCESS(status) && !dmar_tbl) {
393                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
394                 status = AE_NOT_FOUND;
395         }
396
397         return (ACPI_SUCCESS(status) ? 1 : 0);
398 }
399
400 /**
401  * parse_dmar_table - parses the DMA reporting table
402  */
403 static int __init
404 parse_dmar_table(void)
405 {
406         struct acpi_table_dmar *dmar;
407         struct acpi_dmar_header *entry_header;
408         int ret = 0;
409
410         /*
411          * Do it again, earlier dmar_tbl mapping could be mapped with
412          * fixed map.
413          */
414         dmar_table_detect();
415
416         /*
417          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
418          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
419          */
420         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
421
422         dmar = (struct acpi_table_dmar *)dmar_tbl;
423         if (!dmar)
424                 return -ENODEV;
425
426         if (dmar->width < PAGE_SHIFT - 1) {
427                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
428                 return -EINVAL;
429         }
430
431         printk (KERN_INFO PREFIX "Host address width %d\n",
432                 dmar->width + 1);
433
434         entry_header = (struct acpi_dmar_header *)(dmar + 1);
435         while (((unsigned long)entry_header) <
436                         (((unsigned long)dmar) + dmar_tbl->length)) {
437                 /* Avoid looping forever on bad ACPI tables */
438                 if (entry_header->length == 0) {
439                         printk(KERN_WARNING PREFIX
440                                 "Invalid 0-length structure\n");
441                         ret = -EINVAL;
442                         break;
443                 }
444
445                 dmar_table_print_dmar_entry(entry_header);
446
447                 switch (entry_header->type) {
448                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
449                         ret = dmar_parse_one_drhd(entry_header);
450                         break;
451                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
452 #ifdef CONFIG_DMAR
453                         ret = dmar_parse_one_rmrr(entry_header);
454 #endif
455                         break;
456                 case ACPI_DMAR_TYPE_ATSR:
457 #ifdef CONFIG_DMAR
458                         ret = dmar_parse_one_atsr(entry_header);
459 #endif
460                         break;
461                 default:
462                         printk(KERN_WARNING PREFIX
463                                 "Unknown DMAR structure type\n");
464                         ret = 0; /* for forward compatibility */
465                         break;
466                 }
467                 if (ret)
468                         break;
469
470                 entry_header = ((void *)entry_header + entry_header->length);
471         }
472         return ret;
473 }
474
475 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
476                           struct pci_dev *dev)
477 {
478         int index;
479
480         while (dev) {
481                 for (index = 0; index < cnt; index++)
482                         if (dev == devices[index])
483                                 return 1;
484
485                 /* Check our parent */
486                 dev = dev->bus->self;
487         }
488
489         return 0;
490 }
491
492 struct dmar_drhd_unit *
493 dmar_find_matched_drhd_unit(struct pci_dev *dev)
494 {
495         struct dmar_drhd_unit *dmaru = NULL;
496         struct acpi_dmar_hardware_unit *drhd;
497
498         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
499                 drhd = container_of(dmaru->hdr,
500                                     struct acpi_dmar_hardware_unit,
501                                     header);
502
503                 if (dmaru->include_all &&
504                     drhd->segment == pci_domain_nr(dev->bus))
505                         return dmaru;
506
507                 if (dmar_pci_device_match(dmaru->devices,
508                                           dmaru->devices_cnt, dev))
509                         return dmaru;
510         }
511
512         return NULL;
513 }
514
515 int __init dmar_dev_scope_init(void)
516 {
517         struct dmar_drhd_unit *drhd, *drhd_n;
518         int ret = -ENODEV;
519
520         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
521                 ret = dmar_parse_dev(drhd);
522                 if (ret)
523                         return ret;
524         }
525
526 #ifdef CONFIG_DMAR
527         {
528                 struct dmar_rmrr_unit *rmrr, *rmrr_n;
529                 struct dmar_atsr_unit *atsr, *atsr_n;
530
531                 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
532                         ret = rmrr_parse_dev(rmrr);
533                         if (ret)
534                                 return ret;
535                 }
536
537                 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
538                         ret = atsr_parse_dev(atsr);
539                         if (ret)
540                                 return ret;
541                 }
542         }
543 #endif
544
545         return ret;
546 }
547
548
549 int __init dmar_table_init(void)
550 {
551         static int dmar_table_initialized;
552         int ret;
553
554         if (dmar_table_initialized)
555                 return 0;
556
557         dmar_table_initialized = 1;
558
559         ret = parse_dmar_table();
560         if (ret) {
561                 if (ret != -ENODEV)
562                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
563                 return ret;
564         }
565
566         if (list_empty(&dmar_drhd_units)) {
567                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
568                 return -ENODEV;
569         }
570
571 #ifdef CONFIG_DMAR
572         if (list_empty(&dmar_rmrr_units))
573                 printk(KERN_INFO PREFIX "No RMRR found\n");
574
575         if (list_empty(&dmar_atsr_units))
576                 printk(KERN_INFO PREFIX "No ATSR found\n");
577 #endif
578
579 #ifdef CONFIG_INTR_REMAP
580         parse_ioapics_under_ir();
581 #endif
582         return 0;
583 }
584
585 void __init detect_intel_iommu(void)
586 {
587         int ret;
588
589         ret = dmar_table_detect();
590
591         {
592 #ifdef CONFIG_INTR_REMAP
593                 struct acpi_table_dmar *dmar;
594                 /*
595                  * for now we will disable dma-remapping when interrupt
596                  * remapping is enabled.
597                  * When support for queued invalidation for IOTLB invalidation
598                  * is added, we will not need this any more.
599                  */
600                 dmar = (struct acpi_table_dmar *) dmar_tbl;
601                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
602                         printk(KERN_INFO
603                                "Queued invalidation will be enabled to support "
604                                "x2apic and Intr-remapping.\n");
605 #endif
606 #ifdef CONFIG_DMAR
607                 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
608                     !dmar_disabled)
609                         iommu_detected = 1;
610 #endif
611         }
612         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
613         dmar_tbl = NULL;
614 }
615
616
617 int alloc_iommu(struct dmar_drhd_unit *drhd)
618 {
619         struct intel_iommu *iommu;
620         int map_size;
621         u32 ver;
622         static int iommu_allocated = 0;
623         int agaw = 0;
624         int msagaw = 0;
625
626         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
627         if (!iommu)
628                 return -ENOMEM;
629
630         iommu->seq_id = iommu_allocated++;
631         sprintf (iommu->name, "dmar%d", iommu->seq_id);
632
633         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
634         if (!iommu->reg) {
635                 printk(KERN_ERR "IOMMU: can't map the region\n");
636                 goto error;
637         }
638         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
639         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
640
641 #ifdef CONFIG_DMAR
642         agaw = iommu_calculate_agaw(iommu);
643         if (agaw < 0) {
644                 printk(KERN_ERR
645                        "Cannot get a valid agaw for iommu (seq_id = %d)\n",
646                        iommu->seq_id);
647                 goto error;
648         }
649         msagaw = iommu_calculate_max_sagaw(iommu);
650         if (msagaw < 0) {
651                 printk(KERN_ERR
652                         "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
653                         iommu->seq_id);
654                 goto error;
655         }
656 #endif
657         iommu->agaw = agaw;
658         iommu->msagaw = msagaw;
659
660         /* the registers might be more than one page */
661         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
662                 cap_max_fault_reg_offset(iommu->cap));
663         map_size = VTD_PAGE_ALIGN(map_size);
664         if (map_size > VTD_PAGE_SIZE) {
665                 iounmap(iommu->reg);
666                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
667                 if (!iommu->reg) {
668                         printk(KERN_ERR "IOMMU: can't map the region\n");
669                         goto error;
670                 }
671         }
672
673         ver = readl(iommu->reg + DMAR_VER_REG);
674         pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
675                 (unsigned long long)drhd->reg_base_addr,
676                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
677                 (unsigned long long)iommu->cap,
678                 (unsigned long long)iommu->ecap);
679
680         spin_lock_init(&iommu->register_lock);
681
682         drhd->iommu = iommu;
683         return 0;
684 error:
685         kfree(iommu);
686         return -1;
687 }
688
689 void free_iommu(struct intel_iommu *iommu)
690 {
691         if (!iommu)
692                 return;
693
694 #ifdef CONFIG_DMAR
695         free_dmar_iommu(iommu);
696 #endif
697
698         if (iommu->reg)
699                 iounmap(iommu->reg);
700         kfree(iommu);
701 }
702
703 /*
704  * Reclaim all the submitted descriptors which have completed its work.
705  */
706 static inline void reclaim_free_desc(struct q_inval *qi)
707 {
708         while (qi->desc_status[qi->free_tail] == QI_DONE ||
709                qi->desc_status[qi->free_tail] == QI_ABORT) {
710                 qi->desc_status[qi->free_tail] = QI_FREE;
711                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
712                 qi->free_cnt++;
713         }
714 }
715
716 static int qi_check_fault(struct intel_iommu *iommu, int index)
717 {
718         u32 fault;
719         int head, tail;
720         struct q_inval *qi = iommu->qi;
721         int wait_index = (index + 1) % QI_LENGTH;
722
723         if (qi->desc_status[wait_index] == QI_ABORT)
724                 return -EAGAIN;
725
726         fault = readl(iommu->reg + DMAR_FSTS_REG);
727
728         /*
729          * If IQE happens, the head points to the descriptor associated
730          * with the error. No new descriptors are fetched until the IQE
731          * is cleared.
732          */
733         if (fault & DMA_FSTS_IQE) {
734                 head = readl(iommu->reg + DMAR_IQH_REG);
735                 if ((head >> DMAR_IQ_SHIFT) == index) {
736                         printk(KERN_ERR "VT-d detected invalid descriptor: "
737                                 "low=%llx, high=%llx\n",
738                                 (unsigned long long)qi->desc[index].low,
739                                 (unsigned long long)qi->desc[index].high);
740                         memcpy(&qi->desc[index], &qi->desc[wait_index],
741                                         sizeof(struct qi_desc));
742                         __iommu_flush_cache(iommu, &qi->desc[index],
743                                         sizeof(struct qi_desc));
744                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
745                         return -EINVAL;
746                 }
747         }
748
749         /*
750          * If ITE happens, all pending wait_desc commands are aborted.
751          * No new descriptors are fetched until the ITE is cleared.
752          */
753         if (fault & DMA_FSTS_ITE) {
754                 head = readl(iommu->reg + DMAR_IQH_REG);
755                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
756                 head |= 1;
757                 tail = readl(iommu->reg + DMAR_IQT_REG);
758                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
759
760                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
761
762                 do {
763                         if (qi->desc_status[head] == QI_IN_USE)
764                                 qi->desc_status[head] = QI_ABORT;
765                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
766                 } while (head != tail);
767
768                 if (qi->desc_status[wait_index] == QI_ABORT)
769                         return -EAGAIN;
770         }
771
772         if (fault & DMA_FSTS_ICE)
773                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
774
775         return 0;
776 }
777
778 /*
779  * Submit the queued invalidation descriptor to the remapping
780  * hardware unit and wait for its completion.
781  */
782 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
783 {
784         int rc;
785         struct q_inval *qi = iommu->qi;
786         struct qi_desc *hw, wait_desc;
787         int wait_index, index;
788         unsigned long flags;
789
790         if (!qi)
791                 return 0;
792
793         hw = qi->desc;
794
795 restart:
796         rc = 0;
797
798         spin_lock_irqsave(&qi->q_lock, flags);
799         while (qi->free_cnt < 3) {
800                 spin_unlock_irqrestore(&qi->q_lock, flags);
801                 cpu_relax();
802                 spin_lock_irqsave(&qi->q_lock, flags);
803         }
804
805         index = qi->free_head;
806         wait_index = (index + 1) % QI_LENGTH;
807
808         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
809
810         hw[index] = *desc;
811
812         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
813                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
814         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
815
816         hw[wait_index] = wait_desc;
817
818         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
819         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
820
821         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
822         qi->free_cnt -= 2;
823
824         /*
825          * update the HW tail register indicating the presence of
826          * new descriptors.
827          */
828         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
829
830         while (qi->desc_status[wait_index] != QI_DONE) {
831                 /*
832                  * We will leave the interrupts disabled, to prevent interrupt
833                  * context to queue another cmd while a cmd is already submitted
834                  * and waiting for completion on this cpu. This is to avoid
835                  * a deadlock where the interrupt context can wait indefinitely
836                  * for free slots in the queue.
837                  */
838                 rc = qi_check_fault(iommu, index);
839                 if (rc)
840                         break;
841
842                 spin_unlock(&qi->q_lock);
843                 cpu_relax();
844                 spin_lock(&qi->q_lock);
845         }
846
847         qi->desc_status[index] = QI_DONE;
848
849         reclaim_free_desc(qi);
850         spin_unlock_irqrestore(&qi->q_lock, flags);
851
852         if (rc == -EAGAIN)
853                 goto restart;
854
855         return rc;
856 }
857
858 /*
859  * Flush the global interrupt entry cache.
860  */
861 void qi_global_iec(struct intel_iommu *iommu)
862 {
863         struct qi_desc desc;
864
865         desc.low = QI_IEC_TYPE;
866         desc.high = 0;
867
868         /* should never fail */
869         qi_submit_sync(&desc, iommu);
870 }
871
872 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
873                       u64 type)
874 {
875         struct qi_desc desc;
876
877         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
878                         | QI_CC_GRAN(type) | QI_CC_TYPE;
879         desc.high = 0;
880
881         qi_submit_sync(&desc, iommu);
882 }
883
884 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
885                     unsigned int size_order, u64 type)
886 {
887         u8 dw = 0, dr = 0;
888
889         struct qi_desc desc;
890         int ih = 0;
891
892         if (cap_write_drain(iommu->cap))
893                 dw = 1;
894
895         if (cap_read_drain(iommu->cap))
896                 dr = 1;
897
898         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
899                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
900         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
901                 | QI_IOTLB_AM(size_order);
902
903         qi_submit_sync(&desc, iommu);
904 }
905
906 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
907                         u64 addr, unsigned mask)
908 {
909         struct qi_desc desc;
910
911         if (mask) {
912                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
913                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
914                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
915         } else
916                 desc.high = QI_DEV_IOTLB_ADDR(addr);
917
918         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
919                 qdep = 0;
920
921         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
922                    QI_DIOTLB_TYPE;
923
924         qi_submit_sync(&desc, iommu);
925 }
926
927 /*
928  * Disable Queued Invalidation interface.
929  */
930 void dmar_disable_qi(struct intel_iommu *iommu)
931 {
932         unsigned long flags;
933         u32 sts;
934         cycles_t start_time = get_cycles();
935
936         if (!ecap_qis(iommu->ecap))
937                 return;
938
939         spin_lock_irqsave(&iommu->register_lock, flags);
940
941         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
942         if (!(sts & DMA_GSTS_QIES))
943                 goto end;
944
945         /*
946          * Give a chance to HW to complete the pending invalidation requests.
947          */
948         while ((readl(iommu->reg + DMAR_IQT_REG) !=
949                 readl(iommu->reg + DMAR_IQH_REG)) &&
950                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
951                 cpu_relax();
952
953         iommu->gcmd &= ~DMA_GCMD_QIE;
954         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
955
956         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
957                       !(sts & DMA_GSTS_QIES), sts);
958 end:
959         spin_unlock_irqrestore(&iommu->register_lock, flags);
960 }
961
962 /*
963  * Enable queued invalidation.
964  */
965 static void __dmar_enable_qi(struct intel_iommu *iommu)
966 {
967         u32 sts;
968         unsigned long flags;
969         struct q_inval *qi = iommu->qi;
970
971         qi->free_head = qi->free_tail = 0;
972         qi->free_cnt = QI_LENGTH;
973
974         spin_lock_irqsave(&iommu->register_lock, flags);
975
976         /* write zero to the tail reg */
977         writel(0, iommu->reg + DMAR_IQT_REG);
978
979         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
980
981         iommu->gcmd |= DMA_GCMD_QIE;
982         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
983
984         /* Make sure hardware complete it */
985         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
986
987         spin_unlock_irqrestore(&iommu->register_lock, flags);
988 }
989
990 /*
991  * Enable Queued Invalidation interface. This is a must to support
992  * interrupt-remapping. Also used by DMA-remapping, which replaces
993  * register based IOTLB invalidation.
994  */
995 int dmar_enable_qi(struct intel_iommu *iommu)
996 {
997         struct q_inval *qi;
998
999         if (!ecap_qis(iommu->ecap))
1000                 return -ENOENT;
1001
1002         /*
1003          * queued invalidation is already setup and enabled.
1004          */
1005         if (iommu->qi)
1006                 return 0;
1007
1008         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1009         if (!iommu->qi)
1010                 return -ENOMEM;
1011
1012         qi = iommu->qi;
1013
1014         qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
1015         if (!qi->desc) {
1016                 kfree(qi);
1017                 iommu->qi = 0;
1018                 return -ENOMEM;
1019         }
1020
1021         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1022         if (!qi->desc_status) {
1023                 free_page((unsigned long) qi->desc);
1024                 kfree(qi);
1025                 iommu->qi = 0;
1026                 return -ENOMEM;
1027         }
1028
1029         qi->free_head = qi->free_tail = 0;
1030         qi->free_cnt = QI_LENGTH;
1031
1032         spin_lock_init(&qi->q_lock);
1033
1034         __dmar_enable_qi(iommu);
1035
1036         return 0;
1037 }
1038
1039 /* iommu interrupt handling. Most stuff are MSI-like. */
1040
1041 enum faulttype {
1042         DMA_REMAP,
1043         INTR_REMAP,
1044         UNKNOWN,
1045 };
1046
1047 static const char *dma_remap_fault_reasons[] =
1048 {
1049         "Software",
1050         "Present bit in root entry is clear",
1051         "Present bit in context entry is clear",
1052         "Invalid context entry",
1053         "Access beyond MGAW",
1054         "PTE Write access is not set",
1055         "PTE Read access is not set",
1056         "Next page table ptr is invalid",
1057         "Root table address invalid",
1058         "Context table ptr is invalid",
1059         "non-zero reserved fields in RTP",
1060         "non-zero reserved fields in CTP",
1061         "non-zero reserved fields in PTE",
1062 };
1063
1064 static const char *intr_remap_fault_reasons[] =
1065 {
1066         "Detected reserved fields in the decoded interrupt-remapped request",
1067         "Interrupt index exceeded the interrupt-remapping table size",
1068         "Present field in the IRTE entry is clear",
1069         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1070         "Detected reserved fields in the IRTE entry",
1071         "Blocked a compatibility format interrupt request",
1072         "Blocked an interrupt request due to source-id verification failure",
1073 };
1074
1075 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1076
1077 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1078 {
1079         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
1080                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
1081                 *fault_type = INTR_REMAP;
1082                 return intr_remap_fault_reasons[fault_reason - 0x20];
1083         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1084                 *fault_type = DMA_REMAP;
1085                 return dma_remap_fault_reasons[fault_reason];
1086         } else {
1087                 *fault_type = UNKNOWN;
1088                 return "Unknown";
1089         }
1090 }
1091
1092 void dmar_msi_unmask(unsigned int irq)
1093 {
1094         struct intel_iommu *iommu = get_irq_data(irq);
1095         unsigned long flag;
1096
1097         /* unmask it */
1098         spin_lock_irqsave(&iommu->register_lock, flag);
1099         writel(0, iommu->reg + DMAR_FECTL_REG);
1100         /* Read a reg to force flush the post write */
1101         readl(iommu->reg + DMAR_FECTL_REG);
1102         spin_unlock_irqrestore(&iommu->register_lock, flag);
1103 }
1104
1105 void dmar_msi_mask(unsigned int irq)
1106 {
1107         unsigned long flag;
1108         struct intel_iommu *iommu = get_irq_data(irq);
1109
1110         /* mask it */
1111         spin_lock_irqsave(&iommu->register_lock, flag);
1112         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1113         /* Read a reg to force flush the post write */
1114         readl(iommu->reg + DMAR_FECTL_REG);
1115         spin_unlock_irqrestore(&iommu->register_lock, flag);
1116 }
1117
1118 void dmar_msi_write(int irq, struct msi_msg *msg)
1119 {
1120         struct intel_iommu *iommu = get_irq_data(irq);
1121         unsigned long flag;
1122
1123         spin_lock_irqsave(&iommu->register_lock, flag);
1124         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1125         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1126         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1127         spin_unlock_irqrestore(&iommu->register_lock, flag);
1128 }
1129
1130 void dmar_msi_read(int irq, struct msi_msg *msg)
1131 {
1132         struct intel_iommu *iommu = get_irq_data(irq);
1133         unsigned long flag;
1134
1135         spin_lock_irqsave(&iommu->register_lock, flag);
1136         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1137         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1138         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1139         spin_unlock_irqrestore(&iommu->register_lock, flag);
1140 }
1141
1142 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1143                 u8 fault_reason, u16 source_id, unsigned long long addr)
1144 {
1145         const char *reason;
1146         int fault_type;
1147
1148         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1149
1150         if (fault_type == INTR_REMAP)
1151                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1152                        "fault index %llx\n"
1153                         "INTR-REMAP:[fault reason %02d] %s\n",
1154                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1155                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1156                         fault_reason, reason);
1157         else
1158                 printk(KERN_ERR
1159                        "DMAR:[%s] Request device [%02x:%02x.%d] "
1160                        "fault addr %llx \n"
1161                        "DMAR:[fault reason %02d] %s\n",
1162                        (type ? "DMA Read" : "DMA Write"),
1163                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1164                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1165         return 0;
1166 }
1167
1168 #define PRIMARY_FAULT_REG_LEN (16)
1169 irqreturn_t dmar_fault(int irq, void *dev_id)
1170 {
1171         struct intel_iommu *iommu = dev_id;
1172         int reg, fault_index;
1173         u32 fault_status;
1174         unsigned long flag;
1175
1176         spin_lock_irqsave(&iommu->register_lock, flag);
1177         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1178         if (fault_status)
1179                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1180                        fault_status);
1181
1182         /* TBD: ignore advanced fault log currently */
1183         if (!(fault_status & DMA_FSTS_PPF))
1184                 goto clear_rest;
1185
1186         fault_index = dma_fsts_fault_record_index(fault_status);
1187         reg = cap_fault_reg_offset(iommu->cap);
1188         while (1) {
1189                 u8 fault_reason;
1190                 u16 source_id;
1191                 u64 guest_addr;
1192                 int type;
1193                 u32 data;
1194
1195                 /* highest 32 bits */
1196                 data = readl(iommu->reg + reg +
1197                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1198                 if (!(data & DMA_FRCD_F))
1199                         break;
1200
1201                 fault_reason = dma_frcd_fault_reason(data);
1202                 type = dma_frcd_type(data);
1203
1204                 data = readl(iommu->reg + reg +
1205                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1206                 source_id = dma_frcd_source_id(data);
1207
1208                 guest_addr = dmar_readq(iommu->reg + reg +
1209                                 fault_index * PRIMARY_FAULT_REG_LEN);
1210                 guest_addr = dma_frcd_page_addr(guest_addr);
1211                 /* clear the fault */
1212                 writel(DMA_FRCD_F, iommu->reg + reg +
1213                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1214
1215                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1216
1217                 dmar_fault_do_one(iommu, type, fault_reason,
1218                                 source_id, guest_addr);
1219
1220                 fault_index++;
1221                 if (fault_index > cap_num_fault_regs(iommu->cap))
1222                         fault_index = 0;
1223                 spin_lock_irqsave(&iommu->register_lock, flag);
1224         }
1225 clear_rest:
1226         /* clear all the other faults */
1227         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1228         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1229
1230         spin_unlock_irqrestore(&iommu->register_lock, flag);
1231         return IRQ_HANDLED;
1232 }
1233
1234 int dmar_set_interrupt(struct intel_iommu *iommu)
1235 {
1236         int irq, ret;
1237
1238         /*
1239          * Check if the fault interrupt is already initialized.
1240          */
1241         if (iommu->irq)
1242                 return 0;
1243
1244         irq = create_irq();
1245         if (!irq) {
1246                 printk(KERN_ERR "IOMMU: no free vectors\n");
1247                 return -EINVAL;
1248         }
1249
1250         set_irq_data(irq, iommu);
1251         iommu->irq = irq;
1252
1253         ret = arch_setup_dmar_msi(irq);
1254         if (ret) {
1255                 set_irq_data(irq, NULL);
1256                 iommu->irq = 0;
1257                 destroy_irq(irq);
1258                 return ret;
1259         }
1260
1261         ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1262         if (ret)
1263                 printk(KERN_ERR "IOMMU: can't request irq\n");
1264         return ret;
1265 }
1266
1267 int __init enable_drhd_fault_handling(void)
1268 {
1269         struct dmar_drhd_unit *drhd;
1270
1271         /*
1272          * Enable fault control interrupt.
1273          */
1274         for_each_drhd_unit(drhd) {
1275                 int ret;
1276                 struct intel_iommu *iommu = drhd->iommu;
1277                 ret = dmar_set_interrupt(iommu);
1278
1279                 if (ret) {
1280                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1281                                " interrupt, ret %d\n",
1282                                (unsigned long long)drhd->reg_base_addr, ret);
1283                         return -1;
1284                 }
1285         }
1286
1287         return 0;
1288 }
1289
1290 /*
1291  * Re-enable Queued Invalidation interface.
1292  */
1293 int dmar_reenable_qi(struct intel_iommu *iommu)
1294 {
1295         if (!ecap_qis(iommu->ecap))
1296                 return -ENOENT;
1297
1298         if (!iommu->qi)
1299                 return -ENOENT;
1300
1301         /*
1302          * First disable queued invalidation.
1303          */
1304         dmar_disable_qi(iommu);
1305         /*
1306          * Then enable queued invalidation again. Since there is no pending
1307          * invalidation requests now, it's safe to re-enable queued
1308          * invalidation.
1309          */
1310         __dmar_enable_qi(iommu);
1311
1312         return 0;
1313 }