Merge tag 'sh-for-linus' of git://github.com/pmundt/linux-sh
[pandora-kernel.git] / drivers / iommu / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* has to precede printk.h */
30
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/iova.h>
34 #include <linux/intel-iommu.h>
35 #include <linux/timer.h>
36 #include <linux/irq.h>
37 #include <linux/interrupt.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/slab.h>
41 #include <asm/irq_remapping.h>
42 #include <asm/iommu_table.h>
43
44 /* No locks are needed as DMA remapping hardware unit
45  * list is constructed at boot time and hotplug of
46  * these units are not supported by the architecture.
47  */
48 LIST_HEAD(dmar_drhd_units);
49
50 struct acpi_table_header * __initdata dmar_tbl;
51 static acpi_size dmar_tbl_size;
52
53 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
54 {
55         /*
56          * add INCLUDE_ALL at the tail, so scan the list will find it at
57          * the very end.
58          */
59         if (drhd->include_all)
60                 list_add_tail(&drhd->list, &dmar_drhd_units);
61         else
62                 list_add(&drhd->list, &dmar_drhd_units);
63 }
64
65 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
66                                            struct pci_dev **dev, u16 segment)
67 {
68         struct pci_bus *bus;
69         struct pci_dev *pdev = NULL;
70         struct acpi_dmar_pci_path *path;
71         int count;
72
73         bus = pci_find_bus(segment, scope->bus);
74         path = (struct acpi_dmar_pci_path *)(scope + 1);
75         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
76                 / sizeof(struct acpi_dmar_pci_path);
77
78         while (count) {
79                 if (pdev)
80                         pci_dev_put(pdev);
81                 /*
82                  * Some BIOSes list non-exist devices in DMAR table, just
83                  * ignore it
84                  */
85                 if (!bus) {
86                         pr_warn("Device scope bus [%d] not found\n", scope->bus);
87                         break;
88                 }
89                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
90                 if (!pdev) {
91                         /* warning will be printed below */
92                         break;
93                 }
94                 path ++;
95                 count --;
96                 bus = pdev->subordinate;
97         }
98         if (!pdev) {
99                 pr_warn("Device scope device [%04x:%02x:%02x.%02x] not found\n",
100                         segment, scope->bus, path->dev, path->fn);
101                 *dev = NULL;
102                 return 0;
103         }
104         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
105                         pdev->subordinate) || (scope->entry_type == \
106                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
107                 pci_dev_put(pdev);
108                 pr_warn("Device scope type does not match for %s\n",
109                         pci_name(pdev));
110                 return -EINVAL;
111         }
112         *dev = pdev;
113         return 0;
114 }
115
116 int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
117                                 struct pci_dev ***devices, u16 segment)
118 {
119         struct acpi_dmar_device_scope *scope;
120         void * tmp = start;
121         int index;
122         int ret;
123
124         *cnt = 0;
125         while (start < end) {
126                 scope = start;
127                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
128                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
129                         (*cnt)++;
130                 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
131                         pr_warn("Unsupported device scope\n");
132                 }
133                 start += scope->length;
134         }
135         if (*cnt == 0)
136                 return 0;
137
138         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
139         if (!*devices)
140                 return -ENOMEM;
141
142         start = tmp;
143         index = 0;
144         while (start < end) {
145                 scope = start;
146                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
147                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
148                         ret = dmar_parse_one_dev_scope(scope,
149                                 &(*devices)[index], segment);
150                         if (ret) {
151                                 kfree(*devices);
152                                 return ret;
153                         }
154                         index ++;
155                 }
156                 start += scope->length;
157         }
158
159         return 0;
160 }
161
162 /**
163  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
164  * structure which uniquely represent one DMA remapping hardware unit
165  * present in the platform
166  */
167 static int __init
168 dmar_parse_one_drhd(struct acpi_dmar_header *header)
169 {
170         struct acpi_dmar_hardware_unit *drhd;
171         struct dmar_drhd_unit *dmaru;
172         int ret = 0;
173
174         drhd = (struct acpi_dmar_hardware_unit *)header;
175         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
176         if (!dmaru)
177                 return -ENOMEM;
178
179         dmaru->hdr = header;
180         dmaru->reg_base_addr = drhd->address;
181         dmaru->segment = drhd->segment;
182         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
183
184         ret = alloc_iommu(dmaru);
185         if (ret) {
186                 kfree(dmaru);
187                 return ret;
188         }
189         dmar_register_drhd_unit(dmaru);
190         return 0;
191 }
192
193 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
194 {
195         struct acpi_dmar_hardware_unit *drhd;
196         int ret = 0;
197
198         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
199
200         if (dmaru->include_all)
201                 return 0;
202
203         ret = dmar_parse_dev_scope((void *)(drhd + 1),
204                                 ((void *)drhd) + drhd->header.length,
205                                 &dmaru->devices_cnt, &dmaru->devices,
206                                 drhd->segment);
207         if (ret) {
208                 list_del(&dmaru->list);
209                 kfree(dmaru);
210         }
211         return ret;
212 }
213
214 #ifdef CONFIG_ACPI_NUMA
215 static int __init
216 dmar_parse_one_rhsa(struct acpi_dmar_header *header)
217 {
218         struct acpi_dmar_rhsa *rhsa;
219         struct dmar_drhd_unit *drhd;
220
221         rhsa = (struct acpi_dmar_rhsa *)header;
222         for_each_drhd_unit(drhd) {
223                 if (drhd->reg_base_addr == rhsa->base_address) {
224                         int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
225
226                         if (!node_online(node))
227                                 node = -1;
228                         drhd->iommu->node = node;
229                         return 0;
230                 }
231         }
232         WARN_TAINT(
233                 1, TAINT_FIRMWARE_WORKAROUND,
234                 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
235                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
236                 drhd->reg_base_addr,
237                 dmi_get_system_info(DMI_BIOS_VENDOR),
238                 dmi_get_system_info(DMI_BIOS_VERSION),
239                 dmi_get_system_info(DMI_PRODUCT_VERSION));
240
241         return 0;
242 }
243 #endif
244
245 static void __init
246 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
247 {
248         struct acpi_dmar_hardware_unit *drhd;
249         struct acpi_dmar_reserved_memory *rmrr;
250         struct acpi_dmar_atsr *atsr;
251         struct acpi_dmar_rhsa *rhsa;
252
253         switch (header->type) {
254         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
255                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
256                                     header);
257                 pr_info("DRHD base: %#016Lx flags: %#x\n",
258                         (unsigned long long)drhd->address, drhd->flags);
259                 break;
260         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
261                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
262                                     header);
263                 pr_info("RMRR base: %#016Lx end: %#016Lx\n",
264                         (unsigned long long)rmrr->base_address,
265                         (unsigned long long)rmrr->end_address);
266                 break;
267         case ACPI_DMAR_TYPE_ATSR:
268                 atsr = container_of(header, struct acpi_dmar_atsr, header);
269                 pr_info("ATSR flags: %#x\n", atsr->flags);
270                 break;
271         case ACPI_DMAR_HARDWARE_AFFINITY:
272                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
273                 pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
274                        (unsigned long long)rhsa->base_address,
275                        rhsa->proximity_domain);
276                 break;
277         }
278 }
279
280 /**
281  * dmar_table_detect - checks to see if the platform supports DMAR devices
282  */
283 static int __init dmar_table_detect(void)
284 {
285         acpi_status status = AE_OK;
286
287         /* if we could find DMAR table, then there are DMAR devices */
288         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
289                                 (struct acpi_table_header **)&dmar_tbl,
290                                 &dmar_tbl_size);
291
292         if (ACPI_SUCCESS(status) && !dmar_tbl) {
293                 pr_warn("Unable to map DMAR\n");
294                 status = AE_NOT_FOUND;
295         }
296
297         return (ACPI_SUCCESS(status) ? 1 : 0);
298 }
299
300 /**
301  * parse_dmar_table - parses the DMA reporting table
302  */
303 static int __init
304 parse_dmar_table(void)
305 {
306         struct acpi_table_dmar *dmar;
307         struct acpi_dmar_header *entry_header;
308         int ret = 0;
309
310         /*
311          * Do it again, earlier dmar_tbl mapping could be mapped with
312          * fixed map.
313          */
314         dmar_table_detect();
315
316         /*
317          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
318          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
319          */
320         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
321
322         dmar = (struct acpi_table_dmar *)dmar_tbl;
323         if (!dmar)
324                 return -ENODEV;
325
326         if (dmar->width < PAGE_SHIFT - 1) {
327                 pr_warn("Invalid DMAR haw\n");
328                 return -EINVAL;
329         }
330
331         pr_info("Host address width %d\n", dmar->width + 1);
332
333         entry_header = (struct acpi_dmar_header *)(dmar + 1);
334         while (((unsigned long)entry_header) <
335                         (((unsigned long)dmar) + dmar_tbl->length)) {
336                 /* Avoid looping forever on bad ACPI tables */
337                 if (entry_header->length == 0) {
338                         pr_warn("Invalid 0-length structure\n");
339                         ret = -EINVAL;
340                         break;
341                 }
342
343                 dmar_table_print_dmar_entry(entry_header);
344
345                 switch (entry_header->type) {
346                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
347                         ret = dmar_parse_one_drhd(entry_header);
348                         break;
349                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
350                         ret = dmar_parse_one_rmrr(entry_header);
351                         break;
352                 case ACPI_DMAR_TYPE_ATSR:
353                         ret = dmar_parse_one_atsr(entry_header);
354                         break;
355                 case ACPI_DMAR_HARDWARE_AFFINITY:
356 #ifdef CONFIG_ACPI_NUMA
357                         ret = dmar_parse_one_rhsa(entry_header);
358 #endif
359                         break;
360                 default:
361                         pr_warn("Unknown DMAR structure type %d\n",
362                                 entry_header->type);
363                         ret = 0; /* for forward compatibility */
364                         break;
365                 }
366                 if (ret)
367                         break;
368
369                 entry_header = ((void *)entry_header + entry_header->length);
370         }
371         return ret;
372 }
373
374 static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
375                           struct pci_dev *dev)
376 {
377         int index;
378
379         while (dev) {
380                 for (index = 0; index < cnt; index++)
381                         if (dev == devices[index])
382                                 return 1;
383
384                 /* Check our parent */
385                 dev = dev->bus->self;
386         }
387
388         return 0;
389 }
390
391 struct dmar_drhd_unit *
392 dmar_find_matched_drhd_unit(struct pci_dev *dev)
393 {
394         struct dmar_drhd_unit *dmaru = NULL;
395         struct acpi_dmar_hardware_unit *drhd;
396
397         dev = pci_physfn(dev);
398
399         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
400                 drhd = container_of(dmaru->hdr,
401                                     struct acpi_dmar_hardware_unit,
402                                     header);
403
404                 if (dmaru->include_all &&
405                     drhd->segment == pci_domain_nr(dev->bus))
406                         return dmaru;
407
408                 if (dmar_pci_device_match(dmaru->devices,
409                                           dmaru->devices_cnt, dev))
410                         return dmaru;
411         }
412
413         return NULL;
414 }
415
416 int __init dmar_dev_scope_init(void)
417 {
418         static int dmar_dev_scope_initialized;
419         struct dmar_drhd_unit *drhd, *drhd_n;
420         int ret = -ENODEV;
421
422         if (dmar_dev_scope_initialized)
423                 return dmar_dev_scope_initialized;
424
425         if (list_empty(&dmar_drhd_units))
426                 goto fail;
427
428         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
429                 ret = dmar_parse_dev(drhd);
430                 if (ret)
431                         goto fail;
432         }
433
434         ret = dmar_parse_rmrr_atsr_dev();
435         if (ret)
436                 goto fail;
437
438         dmar_dev_scope_initialized = 1;
439         return 0;
440
441 fail:
442         dmar_dev_scope_initialized = ret;
443         return ret;
444 }
445
446
447 int __init dmar_table_init(void)
448 {
449         static int dmar_table_initialized;
450         int ret;
451
452         if (dmar_table_initialized)
453                 return 0;
454
455         dmar_table_initialized = 1;
456
457         ret = parse_dmar_table();
458         if (ret) {
459                 if (ret != -ENODEV)
460                         pr_info("parse DMAR table failure.\n");
461                 return ret;
462         }
463
464         if (list_empty(&dmar_drhd_units)) {
465                 pr_info("No DMAR devices found\n");
466                 return -ENODEV;
467         }
468
469         return 0;
470 }
471
472 static void warn_invalid_dmar(u64 addr, const char *message)
473 {
474         WARN_TAINT_ONCE(
475                 1, TAINT_FIRMWARE_WORKAROUND,
476                 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
477                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
478                 addr, message,
479                 dmi_get_system_info(DMI_BIOS_VENDOR),
480                 dmi_get_system_info(DMI_BIOS_VERSION),
481                 dmi_get_system_info(DMI_PRODUCT_VERSION));
482 }
483
484 int __init check_zero_address(void)
485 {
486         struct acpi_table_dmar *dmar;
487         struct acpi_dmar_header *entry_header;
488         struct acpi_dmar_hardware_unit *drhd;
489
490         dmar = (struct acpi_table_dmar *)dmar_tbl;
491         entry_header = (struct acpi_dmar_header *)(dmar + 1);
492
493         while (((unsigned long)entry_header) <
494                         (((unsigned long)dmar) + dmar_tbl->length)) {
495                 /* Avoid looping forever on bad ACPI tables */
496                 if (entry_header->length == 0) {
497                         pr_warn("Invalid 0-length structure\n");
498                         return 0;
499                 }
500
501                 if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
502                         void __iomem *addr;
503                         u64 cap, ecap;
504
505                         drhd = (void *)entry_header;
506                         if (!drhd->address) {
507                                 warn_invalid_dmar(0, "");
508                                 goto failed;
509                         }
510
511                         addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
512                         if (!addr ) {
513                                 printk("IOMMU: can't validate: %llx\n", drhd->address);
514                                 goto failed;
515                         }
516                         cap = dmar_readq(addr + DMAR_CAP_REG);
517                         ecap = dmar_readq(addr + DMAR_ECAP_REG);
518                         early_iounmap(addr, VTD_PAGE_SIZE);
519                         if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
520                                 warn_invalid_dmar(drhd->address,
521                                                   " returns all ones");
522                                 goto failed;
523                         }
524                 }
525
526                 entry_header = ((void *)entry_header + entry_header->length);
527         }
528         return 1;
529
530 failed:
531         return 0;
532 }
533
534 int __init detect_intel_iommu(void)
535 {
536         int ret;
537
538         ret = dmar_table_detect();
539         if (ret)
540                 ret = check_zero_address();
541         {
542                 struct acpi_table_dmar *dmar;
543
544                 dmar = (struct acpi_table_dmar *) dmar_tbl;
545
546                 if (ret && irq_remapping_enabled && cpu_has_x2apic &&
547                     dmar->flags & 0x1)
548                         pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
549
550                 if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
551                         iommu_detected = 1;
552                         /* Make sure ACS will be enabled */
553                         pci_request_acs();
554                 }
555
556 #ifdef CONFIG_X86
557                 if (ret)
558                         x86_init.iommu.iommu_init = intel_iommu_init;
559 #endif
560         }
561         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
562         dmar_tbl = NULL;
563
564         return ret ? 1 : -ENODEV;
565 }
566
567
568 static void unmap_iommu(struct intel_iommu *iommu)
569 {
570         iounmap(iommu->reg);
571         release_mem_region(iommu->reg_phys, iommu->reg_size);
572 }
573
574 /**
575  * map_iommu: map the iommu's registers
576  * @iommu: the iommu to map
577  * @phys_addr: the physical address of the base resgister
578  *
579  * Memory map the iommu's registers.  Start w/ a single page, and
580  * possibly expand if that turns out to be insufficent.
581  */
582 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
583 {
584         int map_size, err=0;
585
586         iommu->reg_phys = phys_addr;
587         iommu->reg_size = VTD_PAGE_SIZE;
588
589         if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
590                 pr_err("IOMMU: can't reserve memory\n");
591                 err = -EBUSY;
592                 goto out;
593         }
594
595         iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
596         if (!iommu->reg) {
597                 pr_err("IOMMU: can't map the region\n");
598                 err = -ENOMEM;
599                 goto release;
600         }
601
602         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
603         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
604
605         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
606                 err = -EINVAL;
607                 warn_invalid_dmar(phys_addr, " returns all ones");
608                 goto unmap;
609         }
610
611         /* the registers might be more than one page */
612         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
613                          cap_max_fault_reg_offset(iommu->cap));
614         map_size = VTD_PAGE_ALIGN(map_size);
615         if (map_size > iommu->reg_size) {
616                 iounmap(iommu->reg);
617                 release_mem_region(iommu->reg_phys, iommu->reg_size);
618                 iommu->reg_size = map_size;
619                 if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
620                                         iommu->name)) {
621                         pr_err("IOMMU: can't reserve memory\n");
622                         err = -EBUSY;
623                         goto out;
624                 }
625                 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
626                 if (!iommu->reg) {
627                         pr_err("IOMMU: can't map the region\n");
628                         err = -ENOMEM;
629                         goto release;
630                 }
631         }
632         err = 0;
633         goto out;
634
635 unmap:
636         iounmap(iommu->reg);
637 release:
638         release_mem_region(iommu->reg_phys, iommu->reg_size);
639 out:
640         return err;
641 }
642
643 int alloc_iommu(struct dmar_drhd_unit *drhd)
644 {
645         struct intel_iommu *iommu;
646         u32 ver;
647         static int iommu_allocated = 0;
648         int agaw = 0;
649         int msagaw = 0;
650         int err;
651
652         if (!drhd->reg_base_addr) {
653                 warn_invalid_dmar(0, "");
654                 return -EINVAL;
655         }
656
657         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
658         if (!iommu)
659                 return -ENOMEM;
660
661         iommu->seq_id = iommu_allocated++;
662         sprintf (iommu->name, "dmar%d", iommu->seq_id);
663
664         err = map_iommu(iommu, drhd->reg_base_addr);
665         if (err) {
666                 pr_err("IOMMU: failed to map %s\n", iommu->name);
667                 goto error;
668         }
669
670         err = -EINVAL;
671         agaw = iommu_calculate_agaw(iommu);
672         if (agaw < 0) {
673                 pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
674                         iommu->seq_id);
675                 goto err_unmap;
676         }
677         msagaw = iommu_calculate_max_sagaw(iommu);
678         if (msagaw < 0) {
679                 pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
680                         iommu->seq_id);
681                 goto err_unmap;
682         }
683         iommu->agaw = agaw;
684         iommu->msagaw = msagaw;
685
686         iommu->node = -1;
687
688         ver = readl(iommu->reg + DMAR_VER_REG);
689         pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
690                 iommu->seq_id,
691                 (unsigned long long)drhd->reg_base_addr,
692                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
693                 (unsigned long long)iommu->cap,
694                 (unsigned long long)iommu->ecap);
695
696         raw_spin_lock_init(&iommu->register_lock);
697
698         drhd->iommu = iommu;
699         return 0;
700
701  err_unmap:
702         unmap_iommu(iommu);
703  error:
704         kfree(iommu);
705         return err;
706 }
707
708 void free_iommu(struct intel_iommu *iommu)
709 {
710         if (!iommu)
711                 return;
712
713         free_dmar_iommu(iommu);
714
715         if (iommu->reg)
716                 unmap_iommu(iommu);
717
718         kfree(iommu);
719 }
720
721 /*
722  * Reclaim all the submitted descriptors which have completed its work.
723  */
724 static inline void reclaim_free_desc(struct q_inval *qi)
725 {
726         while (qi->desc_status[qi->free_tail] == QI_DONE ||
727                qi->desc_status[qi->free_tail] == QI_ABORT) {
728                 qi->desc_status[qi->free_tail] = QI_FREE;
729                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
730                 qi->free_cnt++;
731         }
732 }
733
734 static int qi_check_fault(struct intel_iommu *iommu, int index)
735 {
736         u32 fault;
737         int head, tail;
738         struct q_inval *qi = iommu->qi;
739         int wait_index = (index + 1) % QI_LENGTH;
740
741         if (qi->desc_status[wait_index] == QI_ABORT)
742                 return -EAGAIN;
743
744         fault = readl(iommu->reg + DMAR_FSTS_REG);
745
746         /*
747          * If IQE happens, the head points to the descriptor associated
748          * with the error. No new descriptors are fetched until the IQE
749          * is cleared.
750          */
751         if (fault & DMA_FSTS_IQE) {
752                 head = readl(iommu->reg + DMAR_IQH_REG);
753                 if ((head >> DMAR_IQ_SHIFT) == index) {
754                         pr_err("VT-d detected invalid descriptor: "
755                                 "low=%llx, high=%llx\n",
756                                 (unsigned long long)qi->desc[index].low,
757                                 (unsigned long long)qi->desc[index].high);
758                         memcpy(&qi->desc[index], &qi->desc[wait_index],
759                                         sizeof(struct qi_desc));
760                         __iommu_flush_cache(iommu, &qi->desc[index],
761                                         sizeof(struct qi_desc));
762                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
763                         return -EINVAL;
764                 }
765         }
766
767         /*
768          * If ITE happens, all pending wait_desc commands are aborted.
769          * No new descriptors are fetched until the ITE is cleared.
770          */
771         if (fault & DMA_FSTS_ITE) {
772                 head = readl(iommu->reg + DMAR_IQH_REG);
773                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
774                 head |= 1;
775                 tail = readl(iommu->reg + DMAR_IQT_REG);
776                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
777
778                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
779
780                 do {
781                         if (qi->desc_status[head] == QI_IN_USE)
782                                 qi->desc_status[head] = QI_ABORT;
783                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
784                 } while (head != tail);
785
786                 if (qi->desc_status[wait_index] == QI_ABORT)
787                         return -EAGAIN;
788         }
789
790         if (fault & DMA_FSTS_ICE)
791                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
792
793         return 0;
794 }
795
796 /*
797  * Submit the queued invalidation descriptor to the remapping
798  * hardware unit and wait for its completion.
799  */
800 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
801 {
802         int rc;
803         struct q_inval *qi = iommu->qi;
804         struct qi_desc *hw, wait_desc;
805         int wait_index, index;
806         unsigned long flags;
807
808         if (!qi)
809                 return 0;
810
811         hw = qi->desc;
812
813 restart:
814         rc = 0;
815
816         raw_spin_lock_irqsave(&qi->q_lock, flags);
817         while (qi->free_cnt < 3) {
818                 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
819                 cpu_relax();
820                 raw_spin_lock_irqsave(&qi->q_lock, flags);
821         }
822
823         index = qi->free_head;
824         wait_index = (index + 1) % QI_LENGTH;
825
826         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
827
828         hw[index] = *desc;
829
830         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
831                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
832         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
833
834         hw[wait_index] = wait_desc;
835
836         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
837         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
838
839         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
840         qi->free_cnt -= 2;
841
842         /*
843          * update the HW tail register indicating the presence of
844          * new descriptors.
845          */
846         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
847
848         while (qi->desc_status[wait_index] != QI_DONE) {
849                 /*
850                  * We will leave the interrupts disabled, to prevent interrupt
851                  * context to queue another cmd while a cmd is already submitted
852                  * and waiting for completion on this cpu. This is to avoid
853                  * a deadlock where the interrupt context can wait indefinitely
854                  * for free slots in the queue.
855                  */
856                 rc = qi_check_fault(iommu, index);
857                 if (rc)
858                         break;
859
860                 raw_spin_unlock(&qi->q_lock);
861                 cpu_relax();
862                 raw_spin_lock(&qi->q_lock);
863         }
864
865         qi->desc_status[index] = QI_DONE;
866
867         reclaim_free_desc(qi);
868         raw_spin_unlock_irqrestore(&qi->q_lock, flags);
869
870         if (rc == -EAGAIN)
871                 goto restart;
872
873         return rc;
874 }
875
876 /*
877  * Flush the global interrupt entry cache.
878  */
879 void qi_global_iec(struct intel_iommu *iommu)
880 {
881         struct qi_desc desc;
882
883         desc.low = QI_IEC_TYPE;
884         desc.high = 0;
885
886         /* should never fail */
887         qi_submit_sync(&desc, iommu);
888 }
889
890 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
891                       u64 type)
892 {
893         struct qi_desc desc;
894
895         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
896                         | QI_CC_GRAN(type) | QI_CC_TYPE;
897         desc.high = 0;
898
899         qi_submit_sync(&desc, iommu);
900 }
901
902 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
903                     unsigned int size_order, u64 type)
904 {
905         u8 dw = 0, dr = 0;
906
907         struct qi_desc desc;
908         int ih = 0;
909
910         if (cap_write_drain(iommu->cap))
911                 dw = 1;
912
913         if (cap_read_drain(iommu->cap))
914                 dr = 1;
915
916         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
917                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
918         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
919                 | QI_IOTLB_AM(size_order);
920
921         qi_submit_sync(&desc, iommu);
922 }
923
924 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
925                         u64 addr, unsigned mask)
926 {
927         struct qi_desc desc;
928
929         if (mask) {
930                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
931                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
932                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
933         } else
934                 desc.high = QI_DEV_IOTLB_ADDR(addr);
935
936         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
937                 qdep = 0;
938
939         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
940                    QI_DIOTLB_TYPE;
941
942         qi_submit_sync(&desc, iommu);
943 }
944
945 /*
946  * Disable Queued Invalidation interface.
947  */
948 void dmar_disable_qi(struct intel_iommu *iommu)
949 {
950         unsigned long flags;
951         u32 sts;
952         cycles_t start_time = get_cycles();
953
954         if (!ecap_qis(iommu->ecap))
955                 return;
956
957         raw_spin_lock_irqsave(&iommu->register_lock, flags);
958
959         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
960         if (!(sts & DMA_GSTS_QIES))
961                 goto end;
962
963         /*
964          * Give a chance to HW to complete the pending invalidation requests.
965          */
966         while ((readl(iommu->reg + DMAR_IQT_REG) !=
967                 readl(iommu->reg + DMAR_IQH_REG)) &&
968                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
969                 cpu_relax();
970
971         iommu->gcmd &= ~DMA_GCMD_QIE;
972         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
973
974         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
975                       !(sts & DMA_GSTS_QIES), sts);
976 end:
977         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
978 }
979
980 /*
981  * Enable queued invalidation.
982  */
983 static void __dmar_enable_qi(struct intel_iommu *iommu)
984 {
985         u32 sts;
986         unsigned long flags;
987         struct q_inval *qi = iommu->qi;
988
989         qi->free_head = qi->free_tail = 0;
990         qi->free_cnt = QI_LENGTH;
991
992         raw_spin_lock_irqsave(&iommu->register_lock, flags);
993
994         /* write zero to the tail reg */
995         writel(0, iommu->reg + DMAR_IQT_REG);
996
997         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
998
999         iommu->gcmd |= DMA_GCMD_QIE;
1000         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1001
1002         /* Make sure hardware complete it */
1003         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1004
1005         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1006 }
1007
1008 /*
1009  * Enable Queued Invalidation interface. This is a must to support
1010  * interrupt-remapping. Also used by DMA-remapping, which replaces
1011  * register based IOTLB invalidation.
1012  */
1013 int dmar_enable_qi(struct intel_iommu *iommu)
1014 {
1015         struct q_inval *qi;
1016         struct page *desc_page;
1017
1018         if (!ecap_qis(iommu->ecap))
1019                 return -ENOENT;
1020
1021         /*
1022          * queued invalidation is already setup and enabled.
1023          */
1024         if (iommu->qi)
1025                 return 0;
1026
1027         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1028         if (!iommu->qi)
1029                 return -ENOMEM;
1030
1031         qi = iommu->qi;
1032
1033
1034         desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
1035         if (!desc_page) {
1036                 kfree(qi);
1037                 iommu->qi = 0;
1038                 return -ENOMEM;
1039         }
1040
1041         qi->desc = page_address(desc_page);
1042
1043         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1044         if (!qi->desc_status) {
1045                 free_page((unsigned long) qi->desc);
1046                 kfree(qi);
1047                 iommu->qi = 0;
1048                 return -ENOMEM;
1049         }
1050
1051         qi->free_head = qi->free_tail = 0;
1052         qi->free_cnt = QI_LENGTH;
1053
1054         raw_spin_lock_init(&qi->q_lock);
1055
1056         __dmar_enable_qi(iommu);
1057
1058         return 0;
1059 }
1060
1061 /* iommu interrupt handling. Most stuff are MSI-like. */
1062
1063 enum faulttype {
1064         DMA_REMAP,
1065         INTR_REMAP,
1066         UNKNOWN,
1067 };
1068
1069 static const char *dma_remap_fault_reasons[] =
1070 {
1071         "Software",
1072         "Present bit in root entry is clear",
1073         "Present bit in context entry is clear",
1074         "Invalid context entry",
1075         "Access beyond MGAW",
1076         "PTE Write access is not set",
1077         "PTE Read access is not set",
1078         "Next page table ptr is invalid",
1079         "Root table address invalid",
1080         "Context table ptr is invalid",
1081         "non-zero reserved fields in RTP",
1082         "non-zero reserved fields in CTP",
1083         "non-zero reserved fields in PTE",
1084 };
1085
1086 static const char *irq_remap_fault_reasons[] =
1087 {
1088         "Detected reserved fields in the decoded interrupt-remapped request",
1089         "Interrupt index exceeded the interrupt-remapping table size",
1090         "Present field in the IRTE entry is clear",
1091         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1092         "Detected reserved fields in the IRTE entry",
1093         "Blocked a compatibility format interrupt request",
1094         "Blocked an interrupt request due to source-id verification failure",
1095 };
1096
1097 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1098
1099 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1100 {
1101         if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1102                                         ARRAY_SIZE(irq_remap_fault_reasons))) {
1103                 *fault_type = INTR_REMAP;
1104                 return irq_remap_fault_reasons[fault_reason - 0x20];
1105         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1106                 *fault_type = DMA_REMAP;
1107                 return dma_remap_fault_reasons[fault_reason];
1108         } else {
1109                 *fault_type = UNKNOWN;
1110                 return "Unknown";
1111         }
1112 }
1113
1114 void dmar_msi_unmask(struct irq_data *data)
1115 {
1116         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1117         unsigned long flag;
1118
1119         /* unmask it */
1120         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1121         writel(0, iommu->reg + DMAR_FECTL_REG);
1122         /* Read a reg to force flush the post write */
1123         readl(iommu->reg + DMAR_FECTL_REG);
1124         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1125 }
1126
1127 void dmar_msi_mask(struct irq_data *data)
1128 {
1129         unsigned long flag;
1130         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1131
1132         /* mask it */
1133         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1134         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1135         /* Read a reg to force flush the post write */
1136         readl(iommu->reg + DMAR_FECTL_REG);
1137         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1138 }
1139
1140 void dmar_msi_write(int irq, struct msi_msg *msg)
1141 {
1142         struct intel_iommu *iommu = irq_get_handler_data(irq);
1143         unsigned long flag;
1144
1145         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1146         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1147         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1148         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1149         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1150 }
1151
1152 void dmar_msi_read(int irq, struct msi_msg *msg)
1153 {
1154         struct intel_iommu *iommu = irq_get_handler_data(irq);
1155         unsigned long flag;
1156
1157         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1158         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1159         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1160         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1161         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1162 }
1163
1164 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1165                 u8 fault_reason, u16 source_id, unsigned long long addr)
1166 {
1167         const char *reason;
1168         int fault_type;
1169
1170         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1171
1172         if (fault_type == INTR_REMAP)
1173                 pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] "
1174                        "fault index %llx\n"
1175                         "INTR-REMAP:[fault reason %02d] %s\n",
1176                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1177                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1178                         fault_reason, reason);
1179         else
1180                 pr_err("DMAR:[%s] Request device [%02x:%02x.%d] "
1181                        "fault addr %llx \n"
1182                        "DMAR:[fault reason %02d] %s\n",
1183                        (type ? "DMA Read" : "DMA Write"),
1184                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1185                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1186         return 0;
1187 }
1188
1189 #define PRIMARY_FAULT_REG_LEN (16)
1190 irqreturn_t dmar_fault(int irq, void *dev_id)
1191 {
1192         struct intel_iommu *iommu = dev_id;
1193         int reg, fault_index;
1194         u32 fault_status;
1195         unsigned long flag;
1196
1197         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1198         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1199         if (fault_status)
1200                 pr_err("DRHD: handling fault status reg %x\n", fault_status);
1201
1202         /* TBD: ignore advanced fault log currently */
1203         if (!(fault_status & DMA_FSTS_PPF))
1204                 goto clear_rest;
1205
1206         fault_index = dma_fsts_fault_record_index(fault_status);
1207         reg = cap_fault_reg_offset(iommu->cap);
1208         while (1) {
1209                 u8 fault_reason;
1210                 u16 source_id;
1211                 u64 guest_addr;
1212                 int type;
1213                 u32 data;
1214
1215                 /* highest 32 bits */
1216                 data = readl(iommu->reg + reg +
1217                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1218                 if (!(data & DMA_FRCD_F))
1219                         break;
1220
1221                 fault_reason = dma_frcd_fault_reason(data);
1222                 type = dma_frcd_type(data);
1223
1224                 data = readl(iommu->reg + reg +
1225                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1226                 source_id = dma_frcd_source_id(data);
1227
1228                 guest_addr = dmar_readq(iommu->reg + reg +
1229                                 fault_index * PRIMARY_FAULT_REG_LEN);
1230                 guest_addr = dma_frcd_page_addr(guest_addr);
1231                 /* clear the fault */
1232                 writel(DMA_FRCD_F, iommu->reg + reg +
1233                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1234
1235                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1236
1237                 dmar_fault_do_one(iommu, type, fault_reason,
1238                                 source_id, guest_addr);
1239
1240                 fault_index++;
1241                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1242                         fault_index = 0;
1243                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1244         }
1245 clear_rest:
1246         /* clear all the other faults */
1247         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1248         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1249
1250         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1251         return IRQ_HANDLED;
1252 }
1253
1254 int dmar_set_interrupt(struct intel_iommu *iommu)
1255 {
1256         int irq, ret;
1257
1258         /*
1259          * Check if the fault interrupt is already initialized.
1260          */
1261         if (iommu->irq)
1262                 return 0;
1263
1264         irq = create_irq();
1265         if (!irq) {
1266                 pr_err("IOMMU: no free vectors\n");
1267                 return -EINVAL;
1268         }
1269
1270         irq_set_handler_data(irq, iommu);
1271         iommu->irq = irq;
1272
1273         ret = arch_setup_dmar_msi(irq);
1274         if (ret) {
1275                 irq_set_handler_data(irq, NULL);
1276                 iommu->irq = 0;
1277                 destroy_irq(irq);
1278                 return ret;
1279         }
1280
1281         ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1282         if (ret)
1283                 pr_err("IOMMU: can't request irq\n");
1284         return ret;
1285 }
1286
1287 int __init enable_drhd_fault_handling(void)
1288 {
1289         struct dmar_drhd_unit *drhd;
1290
1291         /*
1292          * Enable fault control interrupt.
1293          */
1294         for_each_drhd_unit(drhd) {
1295                 int ret;
1296                 struct intel_iommu *iommu = drhd->iommu;
1297                 ret = dmar_set_interrupt(iommu);
1298
1299                 if (ret) {
1300                         pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1301                                (unsigned long long)drhd->reg_base_addr, ret);
1302                         return -1;
1303                 }
1304
1305                 /*
1306                  * Clear any previous faults.
1307                  */
1308                 dmar_fault(iommu->irq, iommu);
1309         }
1310
1311         return 0;
1312 }
1313
1314 /*
1315  * Re-enable Queued Invalidation interface.
1316  */
1317 int dmar_reenable_qi(struct intel_iommu *iommu)
1318 {
1319         if (!ecap_qis(iommu->ecap))
1320                 return -ENOENT;
1321
1322         if (!iommu->qi)
1323                 return -ENOENT;
1324
1325         /*
1326          * First disable queued invalidation.
1327          */
1328         dmar_disable_qi(iommu);
1329         /*
1330          * Then enable queued invalidation again. Since there is no pending
1331          * invalidation requests now, it's safe to re-enable queued
1332          * invalidation.
1333          */
1334         __dmar_enable_qi(iommu);
1335
1336         return 0;
1337 }
1338
1339 /*
1340  * Check interrupt remapping support in DMAR table description.
1341  */
1342 int __init dmar_ir_support(void)
1343 {
1344         struct acpi_table_dmar *dmar;
1345         dmar = (struct acpi_table_dmar *)dmar_tbl;
1346         if (!dmar)
1347                 return 0;
1348         return dmar->flags & 0x1;
1349 }
1350 IOMMU_INIT_POST(detect_intel_iommu);