Merge branch 'linus' into perfcounters/core
[pandora-kernel.git] / drivers / pci / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36 #include <linux/tboot.h>
37
38 #undef PREFIX
39 #define PREFIX "DMAR:"
40
41 /* No locks are needed as DMA remapping hardware unit
42  * list is constructed at boot time and hotplug of
43  * these units are not supported by the architecture.
44  */
45 LIST_HEAD(dmar_drhd_units);
46
47 static struct acpi_table_header * __initdata dmar_tbl;
48 static acpi_size dmar_tbl_size;
49
50 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
51 {
52         /*
53          * add INCLUDE_ALL at the tail, so scan the list will find it at
54          * the very end.
55          */
56         if (drhd->include_all)
57                 list_add_tail(&drhd->list, &dmar_drhd_units);
58         else
59                 list_add(&drhd->list, &dmar_drhd_units);
60 }
61
62 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
63                                            struct pci_dev **dev, u16 segment)
64 {
65         struct pci_bus *bus;
66         struct pci_dev *pdev = NULL;
67         struct acpi_dmar_pci_path *path;
68         int count;
69
70         bus = pci_find_bus(segment, scope->bus);
71         path = (struct acpi_dmar_pci_path *)(scope + 1);
72         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
73                 / sizeof(struct acpi_dmar_pci_path);
74
75         while (count) {
76                 if (pdev)
77                         pci_dev_put(pdev);
78                 /*
79                  * Some BIOSes list non-exist devices in DMAR table, just
80                  * ignore it
81                  */
82                 if (!bus) {
83                         printk(KERN_WARNING
84                         PREFIX "Device scope bus [%d] not found\n",
85                         scope->bus);
86                         break;
87                 }
88                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
89                 if (!pdev) {
90                         printk(KERN_WARNING PREFIX
91                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
92                                 segment, bus->number, path->dev, path->fn);
93                         break;
94                 }
95                 path ++;
96                 count --;
97                 bus = pdev->subordinate;
98         }
99         if (!pdev) {
100                 printk(KERN_WARNING PREFIX
101                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
102                 segment, scope->bus, path->dev, path->fn);
103                 *dev = NULL;
104                 return 0;
105         }
106         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
107                         pdev->subordinate) || (scope->entry_type == \
108                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
109                 pci_dev_put(pdev);
110                 printk(KERN_WARNING PREFIX
111                         "Device scope type does not match for %s\n",
112                          pci_name(pdev));
113                 return -EINVAL;
114         }
115         *dev = pdev;
116         return 0;
117 }
118
119 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
120                                        struct pci_dev ***devices, u16 segment)
121 {
122         struct acpi_dmar_device_scope *scope;
123         void * tmp = start;
124         int index;
125         int ret;
126
127         *cnt = 0;
128         while (start < end) {
129                 scope = start;
130                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
131                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
132                         (*cnt)++;
133                 else
134                         printk(KERN_WARNING PREFIX
135                                 "Unsupported device scope\n");
136                 start += scope->length;
137         }
138         if (*cnt == 0)
139                 return 0;
140
141         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
142         if (!*devices)
143                 return -ENOMEM;
144
145         start = tmp;
146         index = 0;
147         while (start < end) {
148                 scope = start;
149                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
150                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
151                         ret = dmar_parse_one_dev_scope(scope,
152                                 &(*devices)[index], segment);
153                         if (ret) {
154                                 kfree(*devices);
155                                 return ret;
156                         }
157                         index ++;
158                 }
159                 start += scope->length;
160         }
161
162         return 0;
163 }
164
165 /**
166  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
167  * structure which uniquely represent one DMA remapping hardware unit
168  * present in the platform
169  */
170 static int __init
171 dmar_parse_one_drhd(struct acpi_dmar_header *header)
172 {
173         struct acpi_dmar_hardware_unit *drhd;
174         struct dmar_drhd_unit *dmaru;
175         int ret = 0;
176
177         drhd = (struct acpi_dmar_hardware_unit *)header;
178         if (!drhd->address) {
179                 /* Promote an attitude of violence to a BIOS engineer today */
180                 WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n"
181                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
182                      dmi_get_system_info(DMI_BIOS_VENDOR),
183                      dmi_get_system_info(DMI_BIOS_VERSION),
184                      dmi_get_system_info(DMI_PRODUCT_VERSION));
185                 return -ENODEV;
186         }
187         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
188         if (!dmaru)
189                 return -ENOMEM;
190
191         dmaru->hdr = header;
192         dmaru->reg_base_addr = drhd->address;
193         dmaru->segment = drhd->segment;
194         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
195
196         ret = alloc_iommu(dmaru);
197         if (ret) {
198                 kfree(dmaru);
199                 return ret;
200         }
201         dmar_register_drhd_unit(dmaru);
202         return 0;
203 }
204
205 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
206 {
207         struct acpi_dmar_hardware_unit *drhd;
208         int ret = 0;
209
210         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
211
212         if (dmaru->include_all)
213                 return 0;
214
215         ret = dmar_parse_dev_scope((void *)(drhd + 1),
216                                 ((void *)drhd) + drhd->header.length,
217                                 &dmaru->devices_cnt, &dmaru->devices,
218                                 drhd->segment);
219         if (ret) {
220                 list_del(&dmaru->list);
221                 kfree(dmaru);
222         }
223         return ret;
224 }
225
226 #ifdef CONFIG_DMAR
227 LIST_HEAD(dmar_rmrr_units);
228
229 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
230 {
231         list_add(&rmrr->list, &dmar_rmrr_units);
232 }
233
234
235 static int __init
236 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
237 {
238         struct acpi_dmar_reserved_memory *rmrr;
239         struct dmar_rmrr_unit *rmrru;
240
241         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
242         if (!rmrru)
243                 return -ENOMEM;
244
245         rmrru->hdr = header;
246         rmrr = (struct acpi_dmar_reserved_memory *)header;
247         rmrru->base_address = rmrr->base_address;
248         rmrru->end_address = rmrr->end_address;
249
250         dmar_register_rmrr_unit(rmrru);
251         return 0;
252 }
253
254 static int __init
255 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
256 {
257         struct acpi_dmar_reserved_memory *rmrr;
258         int ret;
259
260         rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
261         ret = dmar_parse_dev_scope((void *)(rmrr + 1),
262                 ((void *)rmrr) + rmrr->header.length,
263                 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
264
265         if (ret || (rmrru->devices_cnt == 0)) {
266                 list_del(&rmrru->list);
267                 kfree(rmrru);
268         }
269         return ret;
270 }
271
272 static LIST_HEAD(dmar_atsr_units);
273
274 static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
275 {
276         struct acpi_dmar_atsr *atsr;
277         struct dmar_atsr_unit *atsru;
278
279         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
280         atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
281         if (!atsru)
282                 return -ENOMEM;
283
284         atsru->hdr = hdr;
285         atsru->include_all = atsr->flags & 0x1;
286
287         list_add(&atsru->list, &dmar_atsr_units);
288
289         return 0;
290 }
291
292 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
293 {
294         int rc;
295         struct acpi_dmar_atsr *atsr;
296
297         if (atsru->include_all)
298                 return 0;
299
300         atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
301         rc = dmar_parse_dev_scope((void *)(atsr + 1),
302                                 (void *)atsr + atsr->header.length,
303                                 &atsru->devices_cnt, &atsru->devices,
304                                 atsr->segment);
305         if (rc || !atsru->devices_cnt) {
306                 list_del(&atsru->list);
307                 kfree(atsru);
308         }
309
310         return rc;
311 }
312
313 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
314 {
315         int i;
316         struct pci_bus *bus;
317         struct acpi_dmar_atsr *atsr;
318         struct dmar_atsr_unit *atsru;
319
320         list_for_each_entry(atsru, &dmar_atsr_units, list) {
321                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
322                 if (atsr->segment == pci_domain_nr(dev->bus))
323                         goto found;
324         }
325
326         return 0;
327
328 found:
329         for (bus = dev->bus; bus; bus = bus->parent) {
330                 struct pci_dev *bridge = bus->self;
331
332                 if (!bridge || !bridge->is_pcie ||
333                     bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
334                         return 0;
335
336                 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
337                         for (i = 0; i < atsru->devices_cnt; i++)
338                                 if (atsru->devices[i] == bridge)
339                                         return 1;
340                         break;
341                 }
342         }
343
344         if (atsru->include_all)
345                 return 1;
346
347         return 0;
348 }
349 #endif
350
351 static void __init
352 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
353 {
354         struct acpi_dmar_hardware_unit *drhd;
355         struct acpi_dmar_reserved_memory *rmrr;
356         struct acpi_dmar_atsr *atsr;
357
358         switch (header->type) {
359         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
360                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
361                                     header);
362                 printk (KERN_INFO PREFIX
363                         "DRHD base: %#016Lx flags: %#x\n",
364                         (unsigned long long)drhd->address, drhd->flags);
365                 break;
366         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
367                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
368                                     header);
369                 printk (KERN_INFO PREFIX
370                         "RMRR base: %#016Lx end: %#016Lx\n",
371                         (unsigned long long)rmrr->base_address,
372                         (unsigned long long)rmrr->end_address);
373                 break;
374         case ACPI_DMAR_TYPE_ATSR:
375                 atsr = container_of(header, struct acpi_dmar_atsr, header);
376                 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
377                 break;
378         }
379 }
380
381 /**
382  * dmar_table_detect - checks to see if the platform supports DMAR devices
383  */
384 static int __init dmar_table_detect(void)
385 {
386         acpi_status status = AE_OK;
387
388         /* if we could find DMAR table, then there are DMAR devices */
389         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
390                                 (struct acpi_table_header **)&dmar_tbl,
391                                 &dmar_tbl_size);
392
393         if (ACPI_SUCCESS(status) && !dmar_tbl) {
394                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
395                 status = AE_NOT_FOUND;
396         }
397
398         return (ACPI_SUCCESS(status) ? 1 : 0);
399 }
400
401 /**
402  * parse_dmar_table - parses the DMA reporting table
403  */
404 static int __init
405 parse_dmar_table(void)
406 {
407         struct acpi_table_dmar *dmar;
408         struct acpi_dmar_header *entry_header;
409         int ret = 0;
410
411         /*
412          * Do it again, earlier dmar_tbl mapping could be mapped with
413          * fixed map.
414          */
415         dmar_table_detect();
416
417         /*
418          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
419          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
420          */
421         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
422
423         dmar = (struct acpi_table_dmar *)dmar_tbl;
424         if (!dmar)
425                 return -ENODEV;
426
427         if (dmar->width < PAGE_SHIFT - 1) {
428                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
429                 return -EINVAL;
430         }
431
432         printk (KERN_INFO PREFIX "Host address width %d\n",
433                 dmar->width + 1);
434
435         entry_header = (struct acpi_dmar_header *)(dmar + 1);
436         while (((unsigned long)entry_header) <
437                         (((unsigned long)dmar) + dmar_tbl->length)) {
438                 /* Avoid looping forever on bad ACPI tables */
439                 if (entry_header->length == 0) {
440                         printk(KERN_WARNING PREFIX
441                                 "Invalid 0-length structure\n");
442                         ret = -EINVAL;
443                         break;
444                 }
445
446                 dmar_table_print_dmar_entry(entry_header);
447
448                 switch (entry_header->type) {
449                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
450                         ret = dmar_parse_one_drhd(entry_header);
451                         break;
452                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
453 #ifdef CONFIG_DMAR
454                         ret = dmar_parse_one_rmrr(entry_header);
455 #endif
456                         break;
457                 case ACPI_DMAR_TYPE_ATSR:
458 #ifdef CONFIG_DMAR
459                         ret = dmar_parse_one_atsr(entry_header);
460 #endif
461                         break;
462                 default:
463                         printk(KERN_WARNING PREFIX
464                                 "Unknown DMAR structure type\n");
465                         ret = 0; /* for forward compatibility */
466                         break;
467                 }
468                 if (ret)
469                         break;
470
471                 entry_header = ((void *)entry_header + entry_header->length);
472         }
473         return ret;
474 }
475
476 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
477                           struct pci_dev *dev)
478 {
479         int index;
480
481         while (dev) {
482                 for (index = 0; index < cnt; index++)
483                         if (dev == devices[index])
484                                 return 1;
485
486                 /* Check our parent */
487                 dev = dev->bus->self;
488         }
489
490         return 0;
491 }
492
493 struct dmar_drhd_unit *
494 dmar_find_matched_drhd_unit(struct pci_dev *dev)
495 {
496         struct dmar_drhd_unit *dmaru = NULL;
497         struct acpi_dmar_hardware_unit *drhd;
498
499         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
500                 drhd = container_of(dmaru->hdr,
501                                     struct acpi_dmar_hardware_unit,
502                                     header);
503
504                 if (dmaru->include_all &&
505                     drhd->segment == pci_domain_nr(dev->bus))
506                         return dmaru;
507
508                 if (dmar_pci_device_match(dmaru->devices,
509                                           dmaru->devices_cnt, dev))
510                         return dmaru;
511         }
512
513         return NULL;
514 }
515
516 int __init dmar_dev_scope_init(void)
517 {
518         struct dmar_drhd_unit *drhd, *drhd_n;
519         int ret = -ENODEV;
520
521         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
522                 ret = dmar_parse_dev(drhd);
523                 if (ret)
524                         return ret;
525         }
526
527 #ifdef CONFIG_DMAR
528         {
529                 struct dmar_rmrr_unit *rmrr, *rmrr_n;
530                 struct dmar_atsr_unit *atsr, *atsr_n;
531
532                 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
533                         ret = rmrr_parse_dev(rmrr);
534                         if (ret)
535                                 return ret;
536                 }
537
538                 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
539                         ret = atsr_parse_dev(atsr);
540                         if (ret)
541                                 return ret;
542                 }
543         }
544 #endif
545
546         return ret;
547 }
548
549
550 int __init dmar_table_init(void)
551 {
552         static int dmar_table_initialized;
553         int ret;
554
555         if (dmar_table_initialized)
556                 return 0;
557
558         dmar_table_initialized = 1;
559
560         ret = parse_dmar_table();
561         if (ret) {
562                 if (ret != -ENODEV)
563                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
564                 return ret;
565         }
566
567         if (list_empty(&dmar_drhd_units)) {
568                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
569                 return -ENODEV;
570         }
571
572 #ifdef CONFIG_DMAR
573         if (list_empty(&dmar_rmrr_units))
574                 printk(KERN_INFO PREFIX "No RMRR found\n");
575
576         if (list_empty(&dmar_atsr_units))
577                 printk(KERN_INFO PREFIX "No ATSR found\n");
578 #endif
579
580 #ifdef CONFIG_INTR_REMAP
581         parse_ioapics_under_ir();
582 #endif
583         return 0;
584 }
585
586 void __init detect_intel_iommu(void)
587 {
588         int ret;
589
590         ret = dmar_table_detect();
591
592         {
593 #ifdef CONFIG_INTR_REMAP
594                 struct acpi_table_dmar *dmar;
595                 /*
596                  * for now we will disable dma-remapping when interrupt
597                  * remapping is enabled.
598                  * When support for queued invalidation for IOTLB invalidation
599                  * is added, we will not need this any more.
600                  */
601                 dmar = (struct acpi_table_dmar *) dmar_tbl;
602                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
603                         printk(KERN_INFO
604                                "Queued invalidation will be enabled to support "
605                                "x2apic and Intr-remapping.\n");
606 #endif
607 #ifdef CONFIG_DMAR
608                 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
609                     !dmar_disabled)
610                         iommu_detected = 1;
611 #endif
612         }
613         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
614         dmar_tbl = NULL;
615 }
616
617
618 int alloc_iommu(struct dmar_drhd_unit *drhd)
619 {
620         struct intel_iommu *iommu;
621         int map_size;
622         u32 ver;
623         static int iommu_allocated = 0;
624         int agaw = 0;
625         int msagaw = 0;
626
627         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
628         if (!iommu)
629                 return -ENOMEM;
630
631         iommu->seq_id = iommu_allocated++;
632         sprintf (iommu->name, "dmar%d", iommu->seq_id);
633
634         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
635         if (!iommu->reg) {
636                 printk(KERN_ERR "IOMMU: can't map the region\n");
637                 goto error;
638         }
639         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
640         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
641
642 #ifdef CONFIG_DMAR
643         agaw = iommu_calculate_agaw(iommu);
644         if (agaw < 0) {
645                 printk(KERN_ERR
646                        "Cannot get a valid agaw for iommu (seq_id = %d)\n",
647                        iommu->seq_id);
648                 goto error;
649         }
650         msagaw = iommu_calculate_max_sagaw(iommu);
651         if (msagaw < 0) {
652                 printk(KERN_ERR
653                         "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
654                         iommu->seq_id);
655                 goto error;
656         }
657 #endif
658         iommu->agaw = agaw;
659         iommu->msagaw = msagaw;
660
661         /* the registers might be more than one page */
662         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
663                 cap_max_fault_reg_offset(iommu->cap));
664         map_size = VTD_PAGE_ALIGN(map_size);
665         if (map_size > VTD_PAGE_SIZE) {
666                 iounmap(iommu->reg);
667                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
668                 if (!iommu->reg) {
669                         printk(KERN_ERR "IOMMU: can't map the region\n");
670                         goto error;
671                 }
672         }
673
674         ver = readl(iommu->reg + DMAR_VER_REG);
675         pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
676                 (unsigned long long)drhd->reg_base_addr,
677                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
678                 (unsigned long long)iommu->cap,
679                 (unsigned long long)iommu->ecap);
680
681         spin_lock_init(&iommu->register_lock);
682
683         drhd->iommu = iommu;
684         return 0;
685 error:
686         kfree(iommu);
687         return -1;
688 }
689
690 void free_iommu(struct intel_iommu *iommu)
691 {
692         if (!iommu)
693                 return;
694
695 #ifdef CONFIG_DMAR
696         free_dmar_iommu(iommu);
697 #endif
698
699         if (iommu->reg)
700                 iounmap(iommu->reg);
701         kfree(iommu);
702 }
703
704 /*
705  * Reclaim all the submitted descriptors which have completed its work.
706  */
707 static inline void reclaim_free_desc(struct q_inval *qi)
708 {
709         while (qi->desc_status[qi->free_tail] == QI_DONE ||
710                qi->desc_status[qi->free_tail] == QI_ABORT) {
711                 qi->desc_status[qi->free_tail] = QI_FREE;
712                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
713                 qi->free_cnt++;
714         }
715 }
716
717 static int qi_check_fault(struct intel_iommu *iommu, int index)
718 {
719         u32 fault;
720         int head, tail;
721         struct q_inval *qi = iommu->qi;
722         int wait_index = (index + 1) % QI_LENGTH;
723
724         if (qi->desc_status[wait_index] == QI_ABORT)
725                 return -EAGAIN;
726
727         fault = readl(iommu->reg + DMAR_FSTS_REG);
728
729         /*
730          * If IQE happens, the head points to the descriptor associated
731          * with the error. No new descriptors are fetched until the IQE
732          * is cleared.
733          */
734         if (fault & DMA_FSTS_IQE) {
735                 head = readl(iommu->reg + DMAR_IQH_REG);
736                 if ((head >> DMAR_IQ_SHIFT) == index) {
737                         printk(KERN_ERR "VT-d detected invalid descriptor: "
738                                 "low=%llx, high=%llx\n",
739                                 (unsigned long long)qi->desc[index].low,
740                                 (unsigned long long)qi->desc[index].high);
741                         memcpy(&qi->desc[index], &qi->desc[wait_index],
742                                         sizeof(struct qi_desc));
743                         __iommu_flush_cache(iommu, &qi->desc[index],
744                                         sizeof(struct qi_desc));
745                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
746                         return -EINVAL;
747                 }
748         }
749
750         /*
751          * If ITE happens, all pending wait_desc commands are aborted.
752          * No new descriptors are fetched until the ITE is cleared.
753          */
754         if (fault & DMA_FSTS_ITE) {
755                 head = readl(iommu->reg + DMAR_IQH_REG);
756                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
757                 head |= 1;
758                 tail = readl(iommu->reg + DMAR_IQT_REG);
759                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
760
761                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
762
763                 do {
764                         if (qi->desc_status[head] == QI_IN_USE)
765                                 qi->desc_status[head] = QI_ABORT;
766                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
767                 } while (head != tail);
768
769                 if (qi->desc_status[wait_index] == QI_ABORT)
770                         return -EAGAIN;
771         }
772
773         if (fault & DMA_FSTS_ICE)
774                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
775
776         return 0;
777 }
778
779 /*
780  * Submit the queued invalidation descriptor to the remapping
781  * hardware unit and wait for its completion.
782  */
783 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
784 {
785         int rc;
786         struct q_inval *qi = iommu->qi;
787         struct qi_desc *hw, wait_desc;
788         int wait_index, index;
789         unsigned long flags;
790
791         if (!qi)
792                 return 0;
793
794         hw = qi->desc;
795
796 restart:
797         rc = 0;
798
799         spin_lock_irqsave(&qi->q_lock, flags);
800         while (qi->free_cnt < 3) {
801                 spin_unlock_irqrestore(&qi->q_lock, flags);
802                 cpu_relax();
803                 spin_lock_irqsave(&qi->q_lock, flags);
804         }
805
806         index = qi->free_head;
807         wait_index = (index + 1) % QI_LENGTH;
808
809         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
810
811         hw[index] = *desc;
812
813         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
814                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
815         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
816
817         hw[wait_index] = wait_desc;
818
819         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
820         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
821
822         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
823         qi->free_cnt -= 2;
824
825         /*
826          * update the HW tail register indicating the presence of
827          * new descriptors.
828          */
829         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
830
831         while (qi->desc_status[wait_index] != QI_DONE) {
832                 /*
833                  * We will leave the interrupts disabled, to prevent interrupt
834                  * context to queue another cmd while a cmd is already submitted
835                  * and waiting for completion on this cpu. This is to avoid
836                  * a deadlock where the interrupt context can wait indefinitely
837                  * for free slots in the queue.
838                  */
839                 rc = qi_check_fault(iommu, index);
840                 if (rc)
841                         break;
842
843                 spin_unlock(&qi->q_lock);
844                 cpu_relax();
845                 spin_lock(&qi->q_lock);
846         }
847
848         qi->desc_status[index] = QI_DONE;
849
850         reclaim_free_desc(qi);
851         spin_unlock_irqrestore(&qi->q_lock, flags);
852
853         if (rc == -EAGAIN)
854                 goto restart;
855
856         return rc;
857 }
858
859 /*
860  * Flush the global interrupt entry cache.
861  */
862 void qi_global_iec(struct intel_iommu *iommu)
863 {
864         struct qi_desc desc;
865
866         desc.low = QI_IEC_TYPE;
867         desc.high = 0;
868
869         /* should never fail */
870         qi_submit_sync(&desc, iommu);
871 }
872
873 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
874                       u64 type)
875 {
876         struct qi_desc desc;
877
878         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
879                         | QI_CC_GRAN(type) | QI_CC_TYPE;
880         desc.high = 0;
881
882         qi_submit_sync(&desc, iommu);
883 }
884
885 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
886                     unsigned int size_order, u64 type)
887 {
888         u8 dw = 0, dr = 0;
889
890         struct qi_desc desc;
891         int ih = 0;
892
893         if (cap_write_drain(iommu->cap))
894                 dw = 1;
895
896         if (cap_read_drain(iommu->cap))
897                 dr = 1;
898
899         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
900                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
901         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
902                 | QI_IOTLB_AM(size_order);
903
904         qi_submit_sync(&desc, iommu);
905 }
906
907 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
908                         u64 addr, unsigned mask)
909 {
910         struct qi_desc desc;
911
912         if (mask) {
913                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
914                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
915                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
916         } else
917                 desc.high = QI_DEV_IOTLB_ADDR(addr);
918
919         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
920                 qdep = 0;
921
922         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
923                    QI_DIOTLB_TYPE;
924
925         qi_submit_sync(&desc, iommu);
926 }
927
928 /*
929  * Disable Queued Invalidation interface.
930  */
931 void dmar_disable_qi(struct intel_iommu *iommu)
932 {
933         unsigned long flags;
934         u32 sts;
935         cycles_t start_time = get_cycles();
936
937         if (!ecap_qis(iommu->ecap))
938                 return;
939
940         spin_lock_irqsave(&iommu->register_lock, flags);
941
942         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
943         if (!(sts & DMA_GSTS_QIES))
944                 goto end;
945
946         /*
947          * Give a chance to HW to complete the pending invalidation requests.
948          */
949         while ((readl(iommu->reg + DMAR_IQT_REG) !=
950                 readl(iommu->reg + DMAR_IQH_REG)) &&
951                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
952                 cpu_relax();
953
954         iommu->gcmd &= ~DMA_GCMD_QIE;
955         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
956
957         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
958                       !(sts & DMA_GSTS_QIES), sts);
959 end:
960         spin_unlock_irqrestore(&iommu->register_lock, flags);
961 }
962
963 /*
964  * Enable queued invalidation.
965  */
966 static void __dmar_enable_qi(struct intel_iommu *iommu)
967 {
968         u32 sts;
969         unsigned long flags;
970         struct q_inval *qi = iommu->qi;
971
972         qi->free_head = qi->free_tail = 0;
973         qi->free_cnt = QI_LENGTH;
974
975         spin_lock_irqsave(&iommu->register_lock, flags);
976
977         /* write zero to the tail reg */
978         writel(0, iommu->reg + DMAR_IQT_REG);
979
980         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
981
982         iommu->gcmd |= DMA_GCMD_QIE;
983         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
984
985         /* Make sure hardware complete it */
986         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
987
988         spin_unlock_irqrestore(&iommu->register_lock, flags);
989 }
990
991 /*
992  * Enable Queued Invalidation interface. This is a must to support
993  * interrupt-remapping. Also used by DMA-remapping, which replaces
994  * register based IOTLB invalidation.
995  */
996 int dmar_enable_qi(struct intel_iommu *iommu)
997 {
998         struct q_inval *qi;
999
1000         if (!ecap_qis(iommu->ecap))
1001                 return -ENOENT;
1002
1003         /*
1004          * queued invalidation is already setup and enabled.
1005          */
1006         if (iommu->qi)
1007                 return 0;
1008
1009         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1010         if (!iommu->qi)
1011                 return -ENOMEM;
1012
1013         qi = iommu->qi;
1014
1015         qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
1016         if (!qi->desc) {
1017                 kfree(qi);
1018                 iommu->qi = 0;
1019                 return -ENOMEM;
1020         }
1021
1022         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1023         if (!qi->desc_status) {
1024                 free_page((unsigned long) qi->desc);
1025                 kfree(qi);
1026                 iommu->qi = 0;
1027                 return -ENOMEM;
1028         }
1029
1030         qi->free_head = qi->free_tail = 0;
1031         qi->free_cnt = QI_LENGTH;
1032
1033         spin_lock_init(&qi->q_lock);
1034
1035         __dmar_enable_qi(iommu);
1036
1037         return 0;
1038 }
1039
1040 /* iommu interrupt handling. Most stuff are MSI-like. */
1041
1042 enum faulttype {
1043         DMA_REMAP,
1044         INTR_REMAP,
1045         UNKNOWN,
1046 };
1047
1048 static const char *dma_remap_fault_reasons[] =
1049 {
1050         "Software",
1051         "Present bit in root entry is clear",
1052         "Present bit in context entry is clear",
1053         "Invalid context entry",
1054         "Access beyond MGAW",
1055         "PTE Write access is not set",
1056         "PTE Read access is not set",
1057         "Next page table ptr is invalid",
1058         "Root table address invalid",
1059         "Context table ptr is invalid",
1060         "non-zero reserved fields in RTP",
1061         "non-zero reserved fields in CTP",
1062         "non-zero reserved fields in PTE",
1063 };
1064
1065 static const char *intr_remap_fault_reasons[] =
1066 {
1067         "Detected reserved fields in the decoded interrupt-remapped request",
1068         "Interrupt index exceeded the interrupt-remapping table size",
1069         "Present field in the IRTE entry is clear",
1070         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1071         "Detected reserved fields in the IRTE entry",
1072         "Blocked a compatibility format interrupt request",
1073         "Blocked an interrupt request due to source-id verification failure",
1074 };
1075
1076 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1077
1078 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1079 {
1080         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
1081                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
1082                 *fault_type = INTR_REMAP;
1083                 return intr_remap_fault_reasons[fault_reason - 0x20];
1084         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1085                 *fault_type = DMA_REMAP;
1086                 return dma_remap_fault_reasons[fault_reason];
1087         } else {
1088                 *fault_type = UNKNOWN;
1089                 return "Unknown";
1090         }
1091 }
1092
1093 void dmar_msi_unmask(unsigned int irq)
1094 {
1095         struct intel_iommu *iommu = get_irq_data(irq);
1096         unsigned long flag;
1097
1098         /* unmask it */
1099         spin_lock_irqsave(&iommu->register_lock, flag);
1100         writel(0, iommu->reg + DMAR_FECTL_REG);
1101         /* Read a reg to force flush the post write */
1102         readl(iommu->reg + DMAR_FECTL_REG);
1103         spin_unlock_irqrestore(&iommu->register_lock, flag);
1104 }
1105
1106 void dmar_msi_mask(unsigned int irq)
1107 {
1108         unsigned long flag;
1109         struct intel_iommu *iommu = get_irq_data(irq);
1110
1111         /* mask it */
1112         spin_lock_irqsave(&iommu->register_lock, flag);
1113         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1114         /* Read a reg to force flush the post write */
1115         readl(iommu->reg + DMAR_FECTL_REG);
1116         spin_unlock_irqrestore(&iommu->register_lock, flag);
1117 }
1118
1119 void dmar_msi_write(int irq, struct msi_msg *msg)
1120 {
1121         struct intel_iommu *iommu = get_irq_data(irq);
1122         unsigned long flag;
1123
1124         spin_lock_irqsave(&iommu->register_lock, flag);
1125         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1126         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1127         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1128         spin_unlock_irqrestore(&iommu->register_lock, flag);
1129 }
1130
1131 void dmar_msi_read(int irq, struct msi_msg *msg)
1132 {
1133         struct intel_iommu *iommu = get_irq_data(irq);
1134         unsigned long flag;
1135
1136         spin_lock_irqsave(&iommu->register_lock, flag);
1137         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1138         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1139         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1140         spin_unlock_irqrestore(&iommu->register_lock, flag);
1141 }
1142
1143 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1144                 u8 fault_reason, u16 source_id, unsigned long long addr)
1145 {
1146         const char *reason;
1147         int fault_type;
1148
1149         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1150
1151         if (fault_type == INTR_REMAP)
1152                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1153                        "fault index %llx\n"
1154                         "INTR-REMAP:[fault reason %02d] %s\n",
1155                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1156                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1157                         fault_reason, reason);
1158         else
1159                 printk(KERN_ERR
1160                        "DMAR:[%s] Request device [%02x:%02x.%d] "
1161                        "fault addr %llx \n"
1162                        "DMAR:[fault reason %02d] %s\n",
1163                        (type ? "DMA Read" : "DMA Write"),
1164                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1165                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1166         return 0;
1167 }
1168
1169 #define PRIMARY_FAULT_REG_LEN (16)
1170 irqreturn_t dmar_fault(int irq, void *dev_id)
1171 {
1172         struct intel_iommu *iommu = dev_id;
1173         int reg, fault_index;
1174         u32 fault_status;
1175         unsigned long flag;
1176
1177         spin_lock_irqsave(&iommu->register_lock, flag);
1178         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1179         if (fault_status)
1180                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1181                        fault_status);
1182
1183         /* TBD: ignore advanced fault log currently */
1184         if (!(fault_status & DMA_FSTS_PPF))
1185                 goto clear_rest;
1186
1187         fault_index = dma_fsts_fault_record_index(fault_status);
1188         reg = cap_fault_reg_offset(iommu->cap);
1189         while (1) {
1190                 u8 fault_reason;
1191                 u16 source_id;
1192                 u64 guest_addr;
1193                 int type;
1194                 u32 data;
1195
1196                 /* highest 32 bits */
1197                 data = readl(iommu->reg + reg +
1198                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1199                 if (!(data & DMA_FRCD_F))
1200                         break;
1201
1202                 fault_reason = dma_frcd_fault_reason(data);
1203                 type = dma_frcd_type(data);
1204
1205                 data = readl(iommu->reg + reg +
1206                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1207                 source_id = dma_frcd_source_id(data);
1208
1209                 guest_addr = dmar_readq(iommu->reg + reg +
1210                                 fault_index * PRIMARY_FAULT_REG_LEN);
1211                 guest_addr = dma_frcd_page_addr(guest_addr);
1212                 /* clear the fault */
1213                 writel(DMA_FRCD_F, iommu->reg + reg +
1214                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1215
1216                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1217
1218                 dmar_fault_do_one(iommu, type, fault_reason,
1219                                 source_id, guest_addr);
1220
1221                 fault_index++;
1222                 if (fault_index > cap_num_fault_regs(iommu->cap))
1223                         fault_index = 0;
1224                 spin_lock_irqsave(&iommu->register_lock, flag);
1225         }
1226 clear_rest:
1227         /* clear all the other faults */
1228         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1229         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1230
1231         spin_unlock_irqrestore(&iommu->register_lock, flag);
1232         return IRQ_HANDLED;
1233 }
1234
1235 int dmar_set_interrupt(struct intel_iommu *iommu)
1236 {
1237         int irq, ret;
1238
1239         /*
1240          * Check if the fault interrupt is already initialized.
1241          */
1242         if (iommu->irq)
1243                 return 0;
1244
1245         irq = create_irq();
1246         if (!irq) {
1247                 printk(KERN_ERR "IOMMU: no free vectors\n");
1248                 return -EINVAL;
1249         }
1250
1251         set_irq_data(irq, iommu);
1252         iommu->irq = irq;
1253
1254         ret = arch_setup_dmar_msi(irq);
1255         if (ret) {
1256                 set_irq_data(irq, NULL);
1257                 iommu->irq = 0;
1258                 destroy_irq(irq);
1259                 return ret;
1260         }
1261
1262         ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1263         if (ret)
1264                 printk(KERN_ERR "IOMMU: can't request irq\n");
1265         return ret;
1266 }
1267
1268 int __init enable_drhd_fault_handling(void)
1269 {
1270         struct dmar_drhd_unit *drhd;
1271
1272         /*
1273          * Enable fault control interrupt.
1274          */
1275         for_each_drhd_unit(drhd) {
1276                 int ret;
1277                 struct intel_iommu *iommu = drhd->iommu;
1278                 ret = dmar_set_interrupt(iommu);
1279
1280                 if (ret) {
1281                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1282                                " interrupt, ret %d\n",
1283                                (unsigned long long)drhd->reg_base_addr, ret);
1284                         return -1;
1285                 }
1286         }
1287
1288         return 0;
1289 }
1290
1291 /*
1292  * Re-enable Queued Invalidation interface.
1293  */
1294 int dmar_reenable_qi(struct intel_iommu *iommu)
1295 {
1296         if (!ecap_qis(iommu->ecap))
1297                 return -ENOENT;
1298
1299         if (!iommu->qi)
1300                 return -ENOENT;
1301
1302         /*
1303          * First disable queued invalidation.
1304          */
1305         dmar_disable_qi(iommu);
1306         /*
1307          * Then enable queued invalidation again. Since there is no pending
1308          * invalidation requests now, it's safe to re-enable queued
1309          * invalidation.
1310          */
1311         __dmar_enable_qi(iommu);
1312
1313         return 0;
1314 }