3264b626725a32077ed1196b29f0305b8ecf3be2
[pandora-kernel.git] / drivers / pci / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36
37 #undef PREFIX
38 #define PREFIX "DMAR:"
39
40 /* No locks are needed as DMA remapping hardware unit
41  * list is constructed at boot time and hotplug of
42  * these units are not supported by the architecture.
43  */
44 LIST_HEAD(dmar_drhd_units);
45
46 static struct acpi_table_header * __initdata dmar_tbl;
47 static acpi_size dmar_tbl_size;
48
49 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
50 {
51         /*
52          * add INCLUDE_ALL at the tail, so scan the list will find it at
53          * the very end.
54          */
55         if (drhd->include_all)
56                 list_add_tail(&drhd->list, &dmar_drhd_units);
57         else
58                 list_add(&drhd->list, &dmar_drhd_units);
59 }
60
61 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
62                                            struct pci_dev **dev, u16 segment)
63 {
64         struct pci_bus *bus;
65         struct pci_dev *pdev = NULL;
66         struct acpi_dmar_pci_path *path;
67         int count;
68
69         bus = pci_find_bus(segment, scope->bus);
70         path = (struct acpi_dmar_pci_path *)(scope + 1);
71         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
72                 / sizeof(struct acpi_dmar_pci_path);
73
74         while (count) {
75                 if (pdev)
76                         pci_dev_put(pdev);
77                 /*
78                  * Some BIOSes list non-exist devices in DMAR table, just
79                  * ignore it
80                  */
81                 if (!bus) {
82                         printk(KERN_WARNING
83                         PREFIX "Device scope bus [%d] not found\n",
84                         scope->bus);
85                         break;
86                 }
87                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
88                 if (!pdev) {
89                         printk(KERN_WARNING PREFIX
90                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
91                                 segment, bus->number, path->dev, path->fn);
92                         break;
93                 }
94                 path ++;
95                 count --;
96                 bus = pdev->subordinate;
97         }
98         if (!pdev) {
99                 printk(KERN_WARNING PREFIX
100                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
101                 segment, scope->bus, path->dev, path->fn);
102                 *dev = NULL;
103                 return 0;
104         }
105         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
106                         pdev->subordinate) || (scope->entry_type == \
107                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
108                 pci_dev_put(pdev);
109                 printk(KERN_WARNING PREFIX
110                         "Device scope type does not match for %s\n",
111                          pci_name(pdev));
112                 return -EINVAL;
113         }
114         *dev = pdev;
115         return 0;
116 }
117
118 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
119                                        struct pci_dev ***devices, u16 segment)
120 {
121         struct acpi_dmar_device_scope *scope;
122         void * tmp = start;
123         int index;
124         int ret;
125
126         *cnt = 0;
127         while (start < end) {
128                 scope = start;
129                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
130                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
131                         (*cnt)++;
132                 else
133                         printk(KERN_WARNING PREFIX
134                                 "Unsupported device scope\n");
135                 start += scope->length;
136         }
137         if (*cnt == 0)
138                 return 0;
139
140         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
141         if (!*devices)
142                 return -ENOMEM;
143
144         start = tmp;
145         index = 0;
146         while (start < end) {
147                 scope = start;
148                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
149                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
150                         ret = dmar_parse_one_dev_scope(scope,
151                                 &(*devices)[index], segment);
152                         if (ret) {
153                                 kfree(*devices);
154                                 return ret;
155                         }
156                         index ++;
157                 }
158                 start += scope->length;
159         }
160
161         return 0;
162 }
163
164 /**
165  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
166  * structure which uniquely represent one DMA remapping hardware unit
167  * present in the platform
168  */
169 static int __init
170 dmar_parse_one_drhd(struct acpi_dmar_header *header)
171 {
172         struct acpi_dmar_hardware_unit *drhd;
173         struct dmar_drhd_unit *dmaru;
174         int ret = 0;
175
176         drhd = (struct acpi_dmar_hardware_unit *)header;
177         if (!drhd->address) {
178                 /* Promote an attitude of violence to a BIOS engineer today */
179                 WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n"
180                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
181                      dmi_get_system_info(DMI_BIOS_VENDOR),
182                      dmi_get_system_info(DMI_BIOS_VERSION),
183                      dmi_get_system_info(DMI_PRODUCT_VERSION));
184                 return -ENODEV;
185         }
186         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
187         if (!dmaru)
188                 return -ENOMEM;
189
190         dmaru->hdr = header;
191         dmaru->reg_base_addr = drhd->address;
192         dmaru->segment = drhd->segment;
193         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
194
195         ret = alloc_iommu(dmaru);
196         if (ret) {
197                 kfree(dmaru);
198                 return ret;
199         }
200         dmar_register_drhd_unit(dmaru);
201         return 0;
202 }
203
204 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
205 {
206         struct acpi_dmar_hardware_unit *drhd;
207         int ret = 0;
208
209         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
210
211         if (dmaru->include_all)
212                 return 0;
213
214         ret = dmar_parse_dev_scope((void *)(drhd + 1),
215                                 ((void *)drhd) + drhd->header.length,
216                                 &dmaru->devices_cnt, &dmaru->devices,
217                                 drhd->segment);
218         if (ret) {
219                 list_del(&dmaru->list);
220                 kfree(dmaru);
221         }
222         return ret;
223 }
224
225 #ifdef CONFIG_DMAR
226 LIST_HEAD(dmar_rmrr_units);
227
228 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
229 {
230         list_add(&rmrr->list, &dmar_rmrr_units);
231 }
232
233
234 static int __init
235 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
236 {
237         struct acpi_dmar_reserved_memory *rmrr;
238         struct dmar_rmrr_unit *rmrru;
239
240         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
241         if (!rmrru)
242                 return -ENOMEM;
243
244         rmrru->hdr = header;
245         rmrr = (struct acpi_dmar_reserved_memory *)header;
246         rmrru->base_address = rmrr->base_address;
247         rmrru->end_address = rmrr->end_address;
248
249         dmar_register_rmrr_unit(rmrru);
250         return 0;
251 }
252
253 static int __init
254 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
255 {
256         struct acpi_dmar_reserved_memory *rmrr;
257         int ret;
258
259         rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
260         ret = dmar_parse_dev_scope((void *)(rmrr + 1),
261                 ((void *)rmrr) + rmrr->header.length,
262                 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
263
264         if (ret || (rmrru->devices_cnt == 0)) {
265                 list_del(&rmrru->list);
266                 kfree(rmrru);
267         }
268         return ret;
269 }
270
271 static LIST_HEAD(dmar_atsr_units);
272
273 static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
274 {
275         struct acpi_dmar_atsr *atsr;
276         struct dmar_atsr_unit *atsru;
277
278         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
279         atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
280         if (!atsru)
281                 return -ENOMEM;
282
283         atsru->hdr = hdr;
284         atsru->include_all = atsr->flags & 0x1;
285
286         list_add(&atsru->list, &dmar_atsr_units);
287
288         return 0;
289 }
290
291 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
292 {
293         int rc;
294         struct acpi_dmar_atsr *atsr;
295
296         if (atsru->include_all)
297                 return 0;
298
299         atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
300         rc = dmar_parse_dev_scope((void *)(atsr + 1),
301                                 (void *)atsr + atsr->header.length,
302                                 &atsru->devices_cnt, &atsru->devices,
303                                 atsr->segment);
304         if (rc || !atsru->devices_cnt) {
305                 list_del(&atsru->list);
306                 kfree(atsru);
307         }
308
309         return rc;
310 }
311
312 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
313 {
314         int i;
315         struct pci_bus *bus;
316         struct acpi_dmar_atsr *atsr;
317         struct dmar_atsr_unit *atsru;
318
319         list_for_each_entry(atsru, &dmar_atsr_units, list) {
320                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
321                 if (atsr->segment == pci_domain_nr(dev->bus))
322                         goto found;
323         }
324
325         return 0;
326
327 found:
328         for (bus = dev->bus; bus; bus = bus->parent) {
329                 struct pci_dev *bridge = bus->self;
330
331                 if (!bridge || !bridge->is_pcie ||
332                     bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
333                         return 0;
334
335                 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
336                         for (i = 0; i < atsru->devices_cnt; i++)
337                                 if (atsru->devices[i] == bridge)
338                                         return 1;
339                         break;
340                 }
341         }
342
343         if (atsru->include_all)
344                 return 1;
345
346         return 0;
347 }
348 #endif
349
350 static void __init
351 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
352 {
353         struct acpi_dmar_hardware_unit *drhd;
354         struct acpi_dmar_reserved_memory *rmrr;
355         struct acpi_dmar_atsr *atsr;
356
357         switch (header->type) {
358         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
359                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
360                                     header);
361                 printk (KERN_INFO PREFIX
362                         "DRHD base: %#016Lx flags: %#x\n",
363                         (unsigned long long)drhd->address, drhd->flags);
364                 break;
365         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
366                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
367                                     header);
368                 printk (KERN_INFO PREFIX
369                         "RMRR base: %#016Lx end: %#016Lx\n",
370                         (unsigned long long)rmrr->base_address,
371                         (unsigned long long)rmrr->end_address);
372                 break;
373         case ACPI_DMAR_TYPE_ATSR:
374                 atsr = container_of(header, struct acpi_dmar_atsr, header);
375                 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
376                 break;
377         }
378 }
379
380 /**
381  * dmar_table_detect - checks to see if the platform supports DMAR devices
382  */
383 static int __init dmar_table_detect(void)
384 {
385         acpi_status status = AE_OK;
386
387         /* if we could find DMAR table, then there are DMAR devices */
388         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
389                                 (struct acpi_table_header **)&dmar_tbl,
390                                 &dmar_tbl_size);
391
392         if (ACPI_SUCCESS(status) && !dmar_tbl) {
393                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
394                 status = AE_NOT_FOUND;
395         }
396
397         return (ACPI_SUCCESS(status) ? 1 : 0);
398 }
399
400 /**
401  * parse_dmar_table - parses the DMA reporting table
402  */
403 static int __init
404 parse_dmar_table(void)
405 {
406         struct acpi_table_dmar *dmar;
407         struct acpi_dmar_header *entry_header;
408         int ret = 0;
409
410         /*
411          * Do it again, earlier dmar_tbl mapping could be mapped with
412          * fixed map.
413          */
414         dmar_table_detect();
415
416         dmar = (struct acpi_table_dmar *)dmar_tbl;
417         if (!dmar)
418                 return -ENODEV;
419
420         if (dmar->width < PAGE_SHIFT - 1) {
421                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
422                 return -EINVAL;
423         }
424
425         printk (KERN_INFO PREFIX "Host address width %d\n",
426                 dmar->width + 1);
427
428         entry_header = (struct acpi_dmar_header *)(dmar + 1);
429         while (((unsigned long)entry_header) <
430                         (((unsigned long)dmar) + dmar_tbl->length)) {
431                 /* Avoid looping forever on bad ACPI tables */
432                 if (entry_header->length == 0) {
433                         printk(KERN_WARNING PREFIX
434                                 "Invalid 0-length structure\n");
435                         ret = -EINVAL;
436                         break;
437                 }
438
439                 dmar_table_print_dmar_entry(entry_header);
440
441                 switch (entry_header->type) {
442                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
443                         ret = dmar_parse_one_drhd(entry_header);
444                         break;
445                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
446 #ifdef CONFIG_DMAR
447                         ret = dmar_parse_one_rmrr(entry_header);
448 #endif
449                         break;
450                 case ACPI_DMAR_TYPE_ATSR:
451 #ifdef CONFIG_DMAR
452                         ret = dmar_parse_one_atsr(entry_header);
453 #endif
454                         break;
455                 default:
456                         printk(KERN_WARNING PREFIX
457                                 "Unknown DMAR structure type\n");
458                         ret = 0; /* for forward compatibility */
459                         break;
460                 }
461                 if (ret)
462                         break;
463
464                 entry_header = ((void *)entry_header + entry_header->length);
465         }
466         return ret;
467 }
468
469 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
470                           struct pci_dev *dev)
471 {
472         int index;
473
474         while (dev) {
475                 for (index = 0; index < cnt; index++)
476                         if (dev == devices[index])
477                                 return 1;
478
479                 /* Check our parent */
480                 dev = dev->bus->self;
481         }
482
483         return 0;
484 }
485
486 struct dmar_drhd_unit *
487 dmar_find_matched_drhd_unit(struct pci_dev *dev)
488 {
489         struct dmar_drhd_unit *dmaru = NULL;
490         struct acpi_dmar_hardware_unit *drhd;
491
492         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
493                 drhd = container_of(dmaru->hdr,
494                                     struct acpi_dmar_hardware_unit,
495                                     header);
496
497                 if (dmaru->include_all &&
498                     drhd->segment == pci_domain_nr(dev->bus))
499                         return dmaru;
500
501                 if (dmar_pci_device_match(dmaru->devices,
502                                           dmaru->devices_cnt, dev))
503                         return dmaru;
504         }
505
506         return NULL;
507 }
508
509 int __init dmar_dev_scope_init(void)
510 {
511         struct dmar_drhd_unit *drhd, *drhd_n;
512         int ret = -ENODEV;
513
514         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
515                 ret = dmar_parse_dev(drhd);
516                 if (ret)
517                         return ret;
518         }
519
520 #ifdef CONFIG_DMAR
521         {
522                 struct dmar_rmrr_unit *rmrr, *rmrr_n;
523                 struct dmar_atsr_unit *atsr, *atsr_n;
524
525                 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
526                         ret = rmrr_parse_dev(rmrr);
527                         if (ret)
528                                 return ret;
529                 }
530
531                 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
532                         ret = atsr_parse_dev(atsr);
533                         if (ret)
534                                 return ret;
535                 }
536         }
537 #endif
538
539         return ret;
540 }
541
542
543 int __init dmar_table_init(void)
544 {
545         static int dmar_table_initialized;
546         int ret;
547
548         if (dmar_table_initialized)
549                 return 0;
550
551         dmar_table_initialized = 1;
552
553         ret = parse_dmar_table();
554         if (ret) {
555                 if (ret != -ENODEV)
556                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
557                 return ret;
558         }
559
560         if (list_empty(&dmar_drhd_units)) {
561                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
562                 return -ENODEV;
563         }
564
565 #ifdef CONFIG_DMAR
566         if (list_empty(&dmar_rmrr_units))
567                 printk(KERN_INFO PREFIX "No RMRR found\n");
568
569         if (list_empty(&dmar_atsr_units))
570                 printk(KERN_INFO PREFIX "No ATSR found\n");
571 #endif
572
573 #ifdef CONFIG_INTR_REMAP
574         parse_ioapics_under_ir();
575 #endif
576         return 0;
577 }
578
579 void __init detect_intel_iommu(void)
580 {
581         int ret;
582
583         ret = dmar_table_detect();
584
585         {
586 #ifdef CONFIG_INTR_REMAP
587                 struct acpi_table_dmar *dmar;
588                 /*
589                  * for now we will disable dma-remapping when interrupt
590                  * remapping is enabled.
591                  * When support for queued invalidation for IOTLB invalidation
592                  * is added, we will not need this any more.
593                  */
594                 dmar = (struct acpi_table_dmar *) dmar_tbl;
595                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
596                         printk(KERN_INFO
597                                "Queued invalidation will be enabled to support "
598                                "x2apic and Intr-remapping.\n");
599 #endif
600 #ifdef CONFIG_DMAR
601                 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
602                     !dmar_disabled)
603                         iommu_detected = 1;
604 #endif
605         }
606         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
607         dmar_tbl = NULL;
608 }
609
610
611 int alloc_iommu(struct dmar_drhd_unit *drhd)
612 {
613         struct intel_iommu *iommu;
614         int map_size;
615         u32 ver;
616         static int iommu_allocated = 0;
617         int agaw = 0;
618         int msagaw = 0;
619
620         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
621         if (!iommu)
622                 return -ENOMEM;
623
624         iommu->seq_id = iommu_allocated++;
625         sprintf (iommu->name, "dmar%d", iommu->seq_id);
626
627         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
628         if (!iommu->reg) {
629                 printk(KERN_ERR "IOMMU: can't map the region\n");
630                 goto error;
631         }
632         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
633         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
634
635         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
636                 /* Promote an attitude of violence to a BIOS engineer today */
637                 WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n"
638                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
639                      drhd->reg_base_addr,
640                      dmi_get_system_info(DMI_BIOS_VENDOR),
641                      dmi_get_system_info(DMI_BIOS_VERSION),
642                      dmi_get_system_info(DMI_PRODUCT_VERSION));
643                 goto err_unmap;
644         }
645
646 #ifdef CONFIG_DMAR
647         agaw = iommu_calculate_agaw(iommu);
648         if (agaw < 0) {
649                 printk(KERN_ERR
650                        "Cannot get a valid agaw for iommu (seq_id = %d)\n",
651                        iommu->seq_id);
652                 goto err_unmap;
653         }
654         msagaw = iommu_calculate_max_sagaw(iommu);
655         if (msagaw < 0) {
656                 printk(KERN_ERR
657                         "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
658                         iommu->seq_id);
659                 goto err_unmap;
660         }
661 #endif
662         iommu->agaw = agaw;
663         iommu->msagaw = msagaw;
664
665         /* the registers might be more than one page */
666         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
667                 cap_max_fault_reg_offset(iommu->cap));
668         map_size = VTD_PAGE_ALIGN(map_size);
669         if (map_size > VTD_PAGE_SIZE) {
670                 iounmap(iommu->reg);
671                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
672                 if (!iommu->reg) {
673                         printk(KERN_ERR "IOMMU: can't map the region\n");
674                         goto error;
675                 }
676         }
677
678         ver = readl(iommu->reg + DMAR_VER_REG);
679         pr_info("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
680                 (unsigned long long)drhd->reg_base_addr,
681                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
682                 (unsigned long long)iommu->cap,
683                 (unsigned long long)iommu->ecap);
684
685         spin_lock_init(&iommu->register_lock);
686
687         drhd->iommu = iommu;
688         return 0;
689
690  err_unmap:
691         iounmap(iommu->reg);
692  error:
693         kfree(iommu);
694         return -1;
695 }
696
697 void free_iommu(struct intel_iommu *iommu)
698 {
699         if (!iommu)
700                 return;
701
702 #ifdef CONFIG_DMAR
703         free_dmar_iommu(iommu);
704 #endif
705
706         if (iommu->reg)
707                 iounmap(iommu->reg);
708         kfree(iommu);
709 }
710
711 /*
712  * Reclaim all the submitted descriptors which have completed its work.
713  */
714 static inline void reclaim_free_desc(struct q_inval *qi)
715 {
716         while (qi->desc_status[qi->free_tail] == QI_DONE ||
717                qi->desc_status[qi->free_tail] == QI_ABORT) {
718                 qi->desc_status[qi->free_tail] = QI_FREE;
719                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
720                 qi->free_cnt++;
721         }
722 }
723
724 static int qi_check_fault(struct intel_iommu *iommu, int index)
725 {
726         u32 fault;
727         int head, tail;
728         struct q_inval *qi = iommu->qi;
729         int wait_index = (index + 1) % QI_LENGTH;
730
731         if (qi->desc_status[wait_index] == QI_ABORT)
732                 return -EAGAIN;
733
734         fault = readl(iommu->reg + DMAR_FSTS_REG);
735
736         /*
737          * If IQE happens, the head points to the descriptor associated
738          * with the error. No new descriptors are fetched until the IQE
739          * is cleared.
740          */
741         if (fault & DMA_FSTS_IQE) {
742                 head = readl(iommu->reg + DMAR_IQH_REG);
743                 if ((head >> DMAR_IQ_SHIFT) == index) {
744                         printk(KERN_ERR "VT-d detected invalid descriptor: "
745                                 "low=%llx, high=%llx\n",
746                                 (unsigned long long)qi->desc[index].low,
747                                 (unsigned long long)qi->desc[index].high);
748                         memcpy(&qi->desc[index], &qi->desc[wait_index],
749                                         sizeof(struct qi_desc));
750                         __iommu_flush_cache(iommu, &qi->desc[index],
751                                         sizeof(struct qi_desc));
752                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
753                         return -EINVAL;
754                 }
755         }
756
757         /*
758          * If ITE happens, all pending wait_desc commands are aborted.
759          * No new descriptors are fetched until the ITE is cleared.
760          */
761         if (fault & DMA_FSTS_ITE) {
762                 head = readl(iommu->reg + DMAR_IQH_REG);
763                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
764                 head |= 1;
765                 tail = readl(iommu->reg + DMAR_IQT_REG);
766                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
767
768                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
769
770                 do {
771                         if (qi->desc_status[head] == QI_IN_USE)
772                                 qi->desc_status[head] = QI_ABORT;
773                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
774                 } while (head != tail);
775
776                 if (qi->desc_status[wait_index] == QI_ABORT)
777                         return -EAGAIN;
778         }
779
780         if (fault & DMA_FSTS_ICE)
781                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
782
783         return 0;
784 }
785
786 /*
787  * Submit the queued invalidation descriptor to the remapping
788  * hardware unit and wait for its completion.
789  */
790 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
791 {
792         int rc;
793         struct q_inval *qi = iommu->qi;
794         struct qi_desc *hw, wait_desc;
795         int wait_index, index;
796         unsigned long flags;
797
798         if (!qi)
799                 return 0;
800
801         hw = qi->desc;
802
803 restart:
804         rc = 0;
805
806         spin_lock_irqsave(&qi->q_lock, flags);
807         while (qi->free_cnt < 3) {
808                 spin_unlock_irqrestore(&qi->q_lock, flags);
809                 cpu_relax();
810                 spin_lock_irqsave(&qi->q_lock, flags);
811         }
812
813         index = qi->free_head;
814         wait_index = (index + 1) % QI_LENGTH;
815
816         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
817
818         hw[index] = *desc;
819
820         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
821                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
822         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
823
824         hw[wait_index] = wait_desc;
825
826         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
827         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
828
829         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
830         qi->free_cnt -= 2;
831
832         /*
833          * update the HW tail register indicating the presence of
834          * new descriptors.
835          */
836         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
837
838         while (qi->desc_status[wait_index] != QI_DONE) {
839                 /*
840                  * We will leave the interrupts disabled, to prevent interrupt
841                  * context to queue another cmd while a cmd is already submitted
842                  * and waiting for completion on this cpu. This is to avoid
843                  * a deadlock where the interrupt context can wait indefinitely
844                  * for free slots in the queue.
845                  */
846                 rc = qi_check_fault(iommu, index);
847                 if (rc)
848                         break;
849
850                 spin_unlock(&qi->q_lock);
851                 cpu_relax();
852                 spin_lock(&qi->q_lock);
853         }
854
855         qi->desc_status[index] = QI_DONE;
856
857         reclaim_free_desc(qi);
858         spin_unlock_irqrestore(&qi->q_lock, flags);
859
860         if (rc == -EAGAIN)
861                 goto restart;
862
863         return rc;
864 }
865
866 /*
867  * Flush the global interrupt entry cache.
868  */
869 void qi_global_iec(struct intel_iommu *iommu)
870 {
871         struct qi_desc desc;
872
873         desc.low = QI_IEC_TYPE;
874         desc.high = 0;
875
876         /* should never fail */
877         qi_submit_sync(&desc, iommu);
878 }
879
880 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
881                       u64 type)
882 {
883         struct qi_desc desc;
884
885         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
886                         | QI_CC_GRAN(type) | QI_CC_TYPE;
887         desc.high = 0;
888
889         qi_submit_sync(&desc, iommu);
890 }
891
892 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
893                     unsigned int size_order, u64 type)
894 {
895         u8 dw = 0, dr = 0;
896
897         struct qi_desc desc;
898         int ih = 0;
899
900         if (cap_write_drain(iommu->cap))
901                 dw = 1;
902
903         if (cap_read_drain(iommu->cap))
904                 dr = 1;
905
906         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
907                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
908         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
909                 | QI_IOTLB_AM(size_order);
910
911         qi_submit_sync(&desc, iommu);
912 }
913
914 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
915                         u64 addr, unsigned mask)
916 {
917         struct qi_desc desc;
918
919         if (mask) {
920                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
921                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
922                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
923         } else
924                 desc.high = QI_DEV_IOTLB_ADDR(addr);
925
926         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
927                 qdep = 0;
928
929         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
930                    QI_DIOTLB_TYPE;
931
932         qi_submit_sync(&desc, iommu);
933 }
934
935 /*
936  * Disable Queued Invalidation interface.
937  */
938 void dmar_disable_qi(struct intel_iommu *iommu)
939 {
940         unsigned long flags;
941         u32 sts;
942         cycles_t start_time = get_cycles();
943
944         if (!ecap_qis(iommu->ecap))
945                 return;
946
947         spin_lock_irqsave(&iommu->register_lock, flags);
948
949         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
950         if (!(sts & DMA_GSTS_QIES))
951                 goto end;
952
953         /*
954          * Give a chance to HW to complete the pending invalidation requests.
955          */
956         while ((readl(iommu->reg + DMAR_IQT_REG) !=
957                 readl(iommu->reg + DMAR_IQH_REG)) &&
958                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
959                 cpu_relax();
960
961         iommu->gcmd &= ~DMA_GCMD_QIE;
962         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
963
964         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
965                       !(sts & DMA_GSTS_QIES), sts);
966 end:
967         spin_unlock_irqrestore(&iommu->register_lock, flags);
968 }
969
970 /*
971  * Enable queued invalidation.
972  */
973 static void __dmar_enable_qi(struct intel_iommu *iommu)
974 {
975         u32 sts;
976         unsigned long flags;
977         struct q_inval *qi = iommu->qi;
978
979         qi->free_head = qi->free_tail = 0;
980         qi->free_cnt = QI_LENGTH;
981
982         spin_lock_irqsave(&iommu->register_lock, flags);
983
984         /* write zero to the tail reg */
985         writel(0, iommu->reg + DMAR_IQT_REG);
986
987         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
988
989         iommu->gcmd |= DMA_GCMD_QIE;
990         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
991
992         /* Make sure hardware complete it */
993         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
994
995         spin_unlock_irqrestore(&iommu->register_lock, flags);
996 }
997
998 /*
999  * Enable Queued Invalidation interface. This is a must to support
1000  * interrupt-remapping. Also used by DMA-remapping, which replaces
1001  * register based IOTLB invalidation.
1002  */
1003 int dmar_enable_qi(struct intel_iommu *iommu)
1004 {
1005         struct q_inval *qi;
1006
1007         if (!ecap_qis(iommu->ecap))
1008                 return -ENOENT;
1009
1010         /*
1011          * queued invalidation is already setup and enabled.
1012          */
1013         if (iommu->qi)
1014                 return 0;
1015
1016         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1017         if (!iommu->qi)
1018                 return -ENOMEM;
1019
1020         qi = iommu->qi;
1021
1022         qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
1023         if (!qi->desc) {
1024                 kfree(qi);
1025                 iommu->qi = 0;
1026                 return -ENOMEM;
1027         }
1028
1029         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1030         if (!qi->desc_status) {
1031                 free_page((unsigned long) qi->desc);
1032                 kfree(qi);
1033                 iommu->qi = 0;
1034                 return -ENOMEM;
1035         }
1036
1037         qi->free_head = qi->free_tail = 0;
1038         qi->free_cnt = QI_LENGTH;
1039
1040         spin_lock_init(&qi->q_lock);
1041
1042         __dmar_enable_qi(iommu);
1043
1044         return 0;
1045 }
1046
1047 /* iommu interrupt handling. Most stuff are MSI-like. */
1048
1049 enum faulttype {
1050         DMA_REMAP,
1051         INTR_REMAP,
1052         UNKNOWN,
1053 };
1054
1055 static const char *dma_remap_fault_reasons[] =
1056 {
1057         "Software",
1058         "Present bit in root entry is clear",
1059         "Present bit in context entry is clear",
1060         "Invalid context entry",
1061         "Access beyond MGAW",
1062         "PTE Write access is not set",
1063         "PTE Read access is not set",
1064         "Next page table ptr is invalid",
1065         "Root table address invalid",
1066         "Context table ptr is invalid",
1067         "non-zero reserved fields in RTP",
1068         "non-zero reserved fields in CTP",
1069         "non-zero reserved fields in PTE",
1070 };
1071
1072 static const char *intr_remap_fault_reasons[] =
1073 {
1074         "Detected reserved fields in the decoded interrupt-remapped request",
1075         "Interrupt index exceeded the interrupt-remapping table size",
1076         "Present field in the IRTE entry is clear",
1077         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1078         "Detected reserved fields in the IRTE entry",
1079         "Blocked a compatibility format interrupt request",
1080         "Blocked an interrupt request due to source-id verification failure",
1081 };
1082
1083 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1084
1085 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1086 {
1087         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
1088                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
1089                 *fault_type = INTR_REMAP;
1090                 return intr_remap_fault_reasons[fault_reason - 0x20];
1091         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1092                 *fault_type = DMA_REMAP;
1093                 return dma_remap_fault_reasons[fault_reason];
1094         } else {
1095                 *fault_type = UNKNOWN;
1096                 return "Unknown";
1097         }
1098 }
1099
1100 void dmar_msi_unmask(unsigned int irq)
1101 {
1102         struct intel_iommu *iommu = get_irq_data(irq);
1103         unsigned long flag;
1104
1105         /* unmask it */
1106         spin_lock_irqsave(&iommu->register_lock, flag);
1107         writel(0, iommu->reg + DMAR_FECTL_REG);
1108         /* Read a reg to force flush the post write */
1109         readl(iommu->reg + DMAR_FECTL_REG);
1110         spin_unlock_irqrestore(&iommu->register_lock, flag);
1111 }
1112
1113 void dmar_msi_mask(unsigned int irq)
1114 {
1115         unsigned long flag;
1116         struct intel_iommu *iommu = get_irq_data(irq);
1117
1118         /* mask it */
1119         spin_lock_irqsave(&iommu->register_lock, flag);
1120         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1121         /* Read a reg to force flush the post write */
1122         readl(iommu->reg + DMAR_FECTL_REG);
1123         spin_unlock_irqrestore(&iommu->register_lock, flag);
1124 }
1125
1126 void dmar_msi_write(int irq, struct msi_msg *msg)
1127 {
1128         struct intel_iommu *iommu = get_irq_data(irq);
1129         unsigned long flag;
1130
1131         spin_lock_irqsave(&iommu->register_lock, flag);
1132         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1133         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1134         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1135         spin_unlock_irqrestore(&iommu->register_lock, flag);
1136 }
1137
1138 void dmar_msi_read(int irq, struct msi_msg *msg)
1139 {
1140         struct intel_iommu *iommu = get_irq_data(irq);
1141         unsigned long flag;
1142
1143         spin_lock_irqsave(&iommu->register_lock, flag);
1144         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1145         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1146         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1147         spin_unlock_irqrestore(&iommu->register_lock, flag);
1148 }
1149
1150 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1151                 u8 fault_reason, u16 source_id, unsigned long long addr)
1152 {
1153         const char *reason;
1154         int fault_type;
1155
1156         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1157
1158         if (fault_type == INTR_REMAP)
1159                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1160                        "fault index %llx\n"
1161                         "INTR-REMAP:[fault reason %02d] %s\n",
1162                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1163                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1164                         fault_reason, reason);
1165         else
1166                 printk(KERN_ERR
1167                        "DMAR:[%s] Request device [%02x:%02x.%d] "
1168                        "fault addr %llx \n"
1169                        "DMAR:[fault reason %02d] %s\n",
1170                        (type ? "DMA Read" : "DMA Write"),
1171                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1172                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1173         return 0;
1174 }
1175
1176 #define PRIMARY_FAULT_REG_LEN (16)
1177 irqreturn_t dmar_fault(int irq, void *dev_id)
1178 {
1179         struct intel_iommu *iommu = dev_id;
1180         int reg, fault_index;
1181         u32 fault_status;
1182         unsigned long flag;
1183
1184         spin_lock_irqsave(&iommu->register_lock, flag);
1185         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1186         if (fault_status)
1187                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1188                        fault_status);
1189
1190         /* TBD: ignore advanced fault log currently */
1191         if (!(fault_status & DMA_FSTS_PPF))
1192                 goto clear_rest;
1193
1194         fault_index = dma_fsts_fault_record_index(fault_status);
1195         reg = cap_fault_reg_offset(iommu->cap);
1196         while (1) {
1197                 u8 fault_reason;
1198                 u16 source_id;
1199                 u64 guest_addr;
1200                 int type;
1201                 u32 data;
1202
1203                 /* highest 32 bits */
1204                 data = readl(iommu->reg + reg +
1205                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1206                 if (!(data & DMA_FRCD_F))
1207                         break;
1208
1209                 fault_reason = dma_frcd_fault_reason(data);
1210                 type = dma_frcd_type(data);
1211
1212                 data = readl(iommu->reg + reg +
1213                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1214                 source_id = dma_frcd_source_id(data);
1215
1216                 guest_addr = dmar_readq(iommu->reg + reg +
1217                                 fault_index * PRIMARY_FAULT_REG_LEN);
1218                 guest_addr = dma_frcd_page_addr(guest_addr);
1219                 /* clear the fault */
1220                 writel(DMA_FRCD_F, iommu->reg + reg +
1221                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1222
1223                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1224
1225                 dmar_fault_do_one(iommu, type, fault_reason,
1226                                 source_id, guest_addr);
1227
1228                 fault_index++;
1229                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1230                         fault_index = 0;
1231                 spin_lock_irqsave(&iommu->register_lock, flag);
1232         }
1233 clear_rest:
1234         /* clear all the other faults */
1235         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1236         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1237
1238         spin_unlock_irqrestore(&iommu->register_lock, flag);
1239         return IRQ_HANDLED;
1240 }
1241
1242 int dmar_set_interrupt(struct intel_iommu *iommu)
1243 {
1244         int irq, ret;
1245
1246         /*
1247          * Check if the fault interrupt is already initialized.
1248          */
1249         if (iommu->irq)
1250                 return 0;
1251
1252         irq = create_irq();
1253         if (!irq) {
1254                 printk(KERN_ERR "IOMMU: no free vectors\n");
1255                 return -EINVAL;
1256         }
1257
1258         set_irq_data(irq, iommu);
1259         iommu->irq = irq;
1260
1261         ret = arch_setup_dmar_msi(irq);
1262         if (ret) {
1263                 set_irq_data(irq, NULL);
1264                 iommu->irq = 0;
1265                 destroy_irq(irq);
1266                 return ret;
1267         }
1268
1269         ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1270         if (ret)
1271                 printk(KERN_ERR "IOMMU: can't request irq\n");
1272         return ret;
1273 }
1274
1275 int __init enable_drhd_fault_handling(void)
1276 {
1277         struct dmar_drhd_unit *drhd;
1278
1279         /*
1280          * Enable fault control interrupt.
1281          */
1282         for_each_drhd_unit(drhd) {
1283                 int ret;
1284                 struct intel_iommu *iommu = drhd->iommu;
1285                 ret = dmar_set_interrupt(iommu);
1286
1287                 if (ret) {
1288                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1289                                " interrupt, ret %d\n",
1290                                (unsigned long long)drhd->reg_base_addr, ret);
1291                         return -1;
1292                 }
1293         }
1294
1295         return 0;
1296 }
1297
1298 /*
1299  * Re-enable Queued Invalidation interface.
1300  */
1301 int dmar_reenable_qi(struct intel_iommu *iommu)
1302 {
1303         if (!ecap_qis(iommu->ecap))
1304                 return -ENOENT;
1305
1306         if (!iommu->qi)
1307                 return -ENOENT;
1308
1309         /*
1310          * First disable queued invalidation.
1311          */
1312         dmar_disable_qi(iommu);
1313         /*
1314          * Then enable queued invalidation again. Since there is no pending
1315          * invalidation requests now, it's safe to re-enable queued
1316          * invalidation.
1317          */
1318         __dmar_enable_qi(iommu);
1319
1320         return 0;
1321 }