Merge branch 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[pandora-kernel.git] / drivers / iommu / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36 #include <linux/tboot.h>
37 #include <linux/dmi.h>
38 #include <linux/slab.h>
39 #include <asm/iommu_table.h>
40
41 #define PREFIX "DMAR: "
42
43 /* No locks are needed as DMA remapping hardware unit
44  * list is constructed at boot time and hotplug of
45  * these units are not supported by the architecture.
46  */
47 LIST_HEAD(dmar_drhd_units);
48
49 struct acpi_table_header * __initdata dmar_tbl;
50 static acpi_size dmar_tbl_size;
51
52 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
53 {
54         /*
55          * add INCLUDE_ALL at the tail, so scan the list will find it at
56          * the very end.
57          */
58         if (drhd->include_all)
59                 list_add_tail(&drhd->list, &dmar_drhd_units);
60         else
61                 list_add(&drhd->list, &dmar_drhd_units);
62 }
63
64 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
65                                            struct pci_dev **dev, u16 segment)
66 {
67         struct pci_bus *bus;
68         struct pci_dev *pdev = NULL;
69         struct acpi_dmar_pci_path *path;
70         int count;
71
72         bus = pci_find_bus(segment, scope->bus);
73         path = (struct acpi_dmar_pci_path *)(scope + 1);
74         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
75                 / sizeof(struct acpi_dmar_pci_path);
76
77         while (count) {
78                 if (pdev)
79                         pci_dev_put(pdev);
80                 /*
81                  * Some BIOSes list non-exist devices in DMAR table, just
82                  * ignore it
83                  */
84                 if (!bus) {
85                         printk(KERN_WARNING
86                         PREFIX "Device scope bus [%d] not found\n",
87                         scope->bus);
88                         break;
89                 }
90                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
91                 if (!pdev) {
92                         printk(KERN_WARNING PREFIX
93                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
94                                 segment, bus->number, path->dev, path->fn);
95                         break;
96                 }
97                 path ++;
98                 count --;
99                 bus = pdev->subordinate;
100         }
101         if (!pdev) {
102                 printk(KERN_WARNING PREFIX
103                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
104                 segment, scope->bus, path->dev, path->fn);
105                 *dev = NULL;
106                 return 0;
107         }
108         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
109                         pdev->subordinate) || (scope->entry_type == \
110                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
111                 pci_dev_put(pdev);
112                 printk(KERN_WARNING PREFIX
113                         "Device scope type does not match for %s\n",
114                          pci_name(pdev));
115                 return -EINVAL;
116         }
117         *dev = pdev;
118         return 0;
119 }
120
121 int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
122                                 struct pci_dev ***devices, u16 segment)
123 {
124         struct acpi_dmar_device_scope *scope;
125         void * tmp = start;
126         int index;
127         int ret;
128
129         *cnt = 0;
130         while (start < end) {
131                 scope = start;
132                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
133                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
134                         (*cnt)++;
135                 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
136                         printk(KERN_WARNING PREFIX
137                                "Unsupported device scope\n");
138                 }
139                 start += scope->length;
140         }
141         if (*cnt == 0)
142                 return 0;
143
144         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
145         if (!*devices)
146                 return -ENOMEM;
147
148         start = tmp;
149         index = 0;
150         while (start < end) {
151                 scope = start;
152                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
153                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
154                         ret = dmar_parse_one_dev_scope(scope,
155                                 &(*devices)[index], segment);
156                         if (ret) {
157                                 kfree(*devices);
158                                 return ret;
159                         }
160                         index ++;
161                 }
162                 start += scope->length;
163         }
164
165         return 0;
166 }
167
168 /**
169  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
170  * structure which uniquely represent one DMA remapping hardware unit
171  * present in the platform
172  */
173 static int __init
174 dmar_parse_one_drhd(struct acpi_dmar_header *header)
175 {
176         struct acpi_dmar_hardware_unit *drhd;
177         struct dmar_drhd_unit *dmaru;
178         int ret = 0;
179
180         drhd = (struct acpi_dmar_hardware_unit *)header;
181         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
182         if (!dmaru)
183                 return -ENOMEM;
184
185         dmaru->hdr = header;
186         dmaru->reg_base_addr = drhd->address;
187         dmaru->segment = drhd->segment;
188         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
189
190         ret = alloc_iommu(dmaru);
191         if (ret) {
192                 kfree(dmaru);
193                 return ret;
194         }
195         dmar_register_drhd_unit(dmaru);
196         return 0;
197 }
198
199 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
200 {
201         struct acpi_dmar_hardware_unit *drhd;
202         int ret = 0;
203
204         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
205
206         if (dmaru->include_all)
207                 return 0;
208
209         ret = dmar_parse_dev_scope((void *)(drhd + 1),
210                                 ((void *)drhd) + drhd->header.length,
211                                 &dmaru->devices_cnt, &dmaru->devices,
212                                 drhd->segment);
213         if (ret) {
214                 list_del(&dmaru->list);
215                 kfree(dmaru);
216         }
217         return ret;
218 }
219
220 #ifdef CONFIG_ACPI_NUMA
221 static int __init
222 dmar_parse_one_rhsa(struct acpi_dmar_header *header)
223 {
224         struct acpi_dmar_rhsa *rhsa;
225         struct dmar_drhd_unit *drhd;
226
227         rhsa = (struct acpi_dmar_rhsa *)header;
228         for_each_drhd_unit(drhd) {
229                 if (drhd->reg_base_addr == rhsa->base_address) {
230                         int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
231
232                         if (!node_online(node))
233                                 node = -1;
234                         drhd->iommu->node = node;
235                         return 0;
236                 }
237         }
238         WARN_TAINT(
239                 1, TAINT_FIRMWARE_WORKAROUND,
240                 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
241                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
242                 drhd->reg_base_addr,
243                 dmi_get_system_info(DMI_BIOS_VENDOR),
244                 dmi_get_system_info(DMI_BIOS_VERSION),
245                 dmi_get_system_info(DMI_PRODUCT_VERSION));
246
247         return 0;
248 }
249 #endif
250
251 static void __init
252 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
253 {
254         struct acpi_dmar_hardware_unit *drhd;
255         struct acpi_dmar_reserved_memory *rmrr;
256         struct acpi_dmar_atsr *atsr;
257         struct acpi_dmar_rhsa *rhsa;
258
259         switch (header->type) {
260         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
261                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
262                                     header);
263                 printk (KERN_INFO PREFIX
264                         "DRHD base: %#016Lx flags: %#x\n",
265                         (unsigned long long)drhd->address, drhd->flags);
266                 break;
267         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
268                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
269                                     header);
270                 printk (KERN_INFO PREFIX
271                         "RMRR base: %#016Lx end: %#016Lx\n",
272                         (unsigned long long)rmrr->base_address,
273                         (unsigned long long)rmrr->end_address);
274                 break;
275         case ACPI_DMAR_TYPE_ATSR:
276                 atsr = container_of(header, struct acpi_dmar_atsr, header);
277                 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
278                 break;
279         case ACPI_DMAR_HARDWARE_AFFINITY:
280                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
281                 printk(KERN_INFO PREFIX "RHSA base: %#016Lx proximity domain: %#x\n",
282                        (unsigned long long)rhsa->base_address,
283                        rhsa->proximity_domain);
284                 break;
285         }
286 }
287
288 /**
289  * dmar_table_detect - checks to see if the platform supports DMAR devices
290  */
291 static int __init dmar_table_detect(void)
292 {
293         acpi_status status = AE_OK;
294
295         /* if we could find DMAR table, then there are DMAR devices */
296         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
297                                 (struct acpi_table_header **)&dmar_tbl,
298                                 &dmar_tbl_size);
299
300         if (ACPI_SUCCESS(status) && !dmar_tbl) {
301                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
302                 status = AE_NOT_FOUND;
303         }
304
305         return (ACPI_SUCCESS(status) ? 1 : 0);
306 }
307
308 /**
309  * parse_dmar_table - parses the DMA reporting table
310  */
311 static int __init
312 parse_dmar_table(void)
313 {
314         struct acpi_table_dmar *dmar;
315         struct acpi_dmar_header *entry_header;
316         int ret = 0;
317
318         /*
319          * Do it again, earlier dmar_tbl mapping could be mapped with
320          * fixed map.
321          */
322         dmar_table_detect();
323
324         /*
325          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
326          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
327          */
328         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
329
330         dmar = (struct acpi_table_dmar *)dmar_tbl;
331         if (!dmar)
332                 return -ENODEV;
333
334         if (dmar->width < PAGE_SHIFT - 1) {
335                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
336                 return -EINVAL;
337         }
338
339         printk (KERN_INFO PREFIX "Host address width %d\n",
340                 dmar->width + 1);
341
342         entry_header = (struct acpi_dmar_header *)(dmar + 1);
343         while (((unsigned long)entry_header) <
344                         (((unsigned long)dmar) + dmar_tbl->length)) {
345                 /* Avoid looping forever on bad ACPI tables */
346                 if (entry_header->length == 0) {
347                         printk(KERN_WARNING PREFIX
348                                 "Invalid 0-length structure\n");
349                         ret = -EINVAL;
350                         break;
351                 }
352
353                 dmar_table_print_dmar_entry(entry_header);
354
355                 switch (entry_header->type) {
356                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
357                         ret = dmar_parse_one_drhd(entry_header);
358                         break;
359                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
360                         ret = dmar_parse_one_rmrr(entry_header);
361                         break;
362                 case ACPI_DMAR_TYPE_ATSR:
363                         ret = dmar_parse_one_atsr(entry_header);
364                         break;
365                 case ACPI_DMAR_HARDWARE_AFFINITY:
366 #ifdef CONFIG_ACPI_NUMA
367                         ret = dmar_parse_one_rhsa(entry_header);
368 #endif
369                         break;
370                 default:
371                         printk(KERN_WARNING PREFIX
372                                 "Unknown DMAR structure type %d\n",
373                                 entry_header->type);
374                         ret = 0; /* for forward compatibility */
375                         break;
376                 }
377                 if (ret)
378                         break;
379
380                 entry_header = ((void *)entry_header + entry_header->length);
381         }
382         return ret;
383 }
384
385 static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
386                           struct pci_dev *dev)
387 {
388         int index;
389
390         while (dev) {
391                 for (index = 0; index < cnt; index++)
392                         if (dev == devices[index])
393                                 return 1;
394
395                 /* Check our parent */
396                 dev = dev->bus->self;
397         }
398
399         return 0;
400 }
401
402 struct dmar_drhd_unit *
403 dmar_find_matched_drhd_unit(struct pci_dev *dev)
404 {
405         struct dmar_drhd_unit *dmaru = NULL;
406         struct acpi_dmar_hardware_unit *drhd;
407
408         dev = pci_physfn(dev);
409
410         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
411                 drhd = container_of(dmaru->hdr,
412                                     struct acpi_dmar_hardware_unit,
413                                     header);
414
415                 if (dmaru->include_all &&
416                     drhd->segment == pci_domain_nr(dev->bus))
417                         return dmaru;
418
419                 if (dmar_pci_device_match(dmaru->devices,
420                                           dmaru->devices_cnt, dev))
421                         return dmaru;
422         }
423
424         return NULL;
425 }
426
427 int __init dmar_dev_scope_init(void)
428 {
429         static int dmar_dev_scope_initialized;
430         struct dmar_drhd_unit *drhd, *drhd_n;
431         int ret = -ENODEV;
432
433         if (dmar_dev_scope_initialized)
434                 return dmar_dev_scope_initialized;
435
436         if (list_empty(&dmar_drhd_units))
437                 goto fail;
438
439         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
440                 ret = dmar_parse_dev(drhd);
441                 if (ret)
442                         goto fail;
443         }
444
445         ret = dmar_parse_rmrr_atsr_dev();
446         if (ret)
447                 goto fail;
448
449         dmar_dev_scope_initialized = 1;
450         return 0;
451
452 fail:
453         dmar_dev_scope_initialized = ret;
454         return ret;
455 }
456
457
458 int __init dmar_table_init(void)
459 {
460         static int dmar_table_initialized;
461         int ret;
462
463         if (dmar_table_initialized)
464                 return 0;
465
466         dmar_table_initialized = 1;
467
468         ret = parse_dmar_table();
469         if (ret) {
470                 if (ret != -ENODEV)
471                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
472                 return ret;
473         }
474
475         if (list_empty(&dmar_drhd_units)) {
476                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
477                 return -ENODEV;
478         }
479
480         return 0;
481 }
482
483 static void warn_invalid_dmar(u64 addr, const char *message)
484 {
485         WARN_TAINT_ONCE(
486                 1, TAINT_FIRMWARE_WORKAROUND,
487                 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
488                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
489                 addr, message,
490                 dmi_get_system_info(DMI_BIOS_VENDOR),
491                 dmi_get_system_info(DMI_BIOS_VERSION),
492                 dmi_get_system_info(DMI_PRODUCT_VERSION));
493 }
494
495 int __init check_zero_address(void)
496 {
497         struct acpi_table_dmar *dmar;
498         struct acpi_dmar_header *entry_header;
499         struct acpi_dmar_hardware_unit *drhd;
500
501         dmar = (struct acpi_table_dmar *)dmar_tbl;
502         entry_header = (struct acpi_dmar_header *)(dmar + 1);
503
504         while (((unsigned long)entry_header) <
505                         (((unsigned long)dmar) + dmar_tbl->length)) {
506                 /* Avoid looping forever on bad ACPI tables */
507                 if (entry_header->length == 0) {
508                         printk(KERN_WARNING PREFIX
509                                 "Invalid 0-length structure\n");
510                         return 0;
511                 }
512
513                 if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
514                         void __iomem *addr;
515                         u64 cap, ecap;
516
517                         drhd = (void *)entry_header;
518                         if (!drhd->address) {
519                                 warn_invalid_dmar(0, "");
520                                 goto failed;
521                         }
522
523                         addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
524                         if (!addr ) {
525                                 printk("IOMMU: can't validate: %llx\n", drhd->address);
526                                 goto failed;
527                         }
528                         cap = dmar_readq(addr + DMAR_CAP_REG);
529                         ecap = dmar_readq(addr + DMAR_ECAP_REG);
530                         early_iounmap(addr, VTD_PAGE_SIZE);
531                         if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
532                                 warn_invalid_dmar(drhd->address,
533                                                   " returns all ones");
534                                 goto failed;
535                         }
536                 }
537
538                 entry_header = ((void *)entry_header + entry_header->length);
539         }
540         return 1;
541
542 failed:
543         return 0;
544 }
545
546 int __init detect_intel_iommu(void)
547 {
548         int ret;
549
550         ret = dmar_table_detect();
551         if (ret)
552                 ret = check_zero_address();
553         {
554                 struct acpi_table_dmar *dmar;
555
556                 dmar = (struct acpi_table_dmar *) dmar_tbl;
557
558                 if (ret && intr_remapping_enabled && cpu_has_x2apic &&
559                     dmar->flags & 0x1)
560                         printk(KERN_INFO
561                                "Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
562
563                 if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
564                         iommu_detected = 1;
565                         /* Make sure ACS will be enabled */
566                         pci_request_acs();
567                 }
568
569 #ifdef CONFIG_X86
570                 if (ret)
571                         x86_init.iommu.iommu_init = intel_iommu_init;
572 #endif
573         }
574         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
575         dmar_tbl = NULL;
576
577         return ret ? 1 : -ENODEV;
578 }
579
580
581 int alloc_iommu(struct dmar_drhd_unit *drhd)
582 {
583         struct intel_iommu *iommu;
584         int map_size;
585         u32 ver;
586         static int iommu_allocated = 0;
587         int agaw = 0;
588         int msagaw = 0;
589
590         if (!drhd->reg_base_addr) {
591                 warn_invalid_dmar(0, "");
592                 return -EINVAL;
593         }
594
595         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
596         if (!iommu)
597                 return -ENOMEM;
598
599         iommu->seq_id = iommu_allocated++;
600         sprintf (iommu->name, "dmar%d", iommu->seq_id);
601
602         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
603         if (!iommu->reg) {
604                 printk(KERN_ERR "IOMMU: can't map the region\n");
605                 goto error;
606         }
607         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
608         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
609
610         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
611                 warn_invalid_dmar(drhd->reg_base_addr, " returns all ones");
612                 goto err_unmap;
613         }
614
615         agaw = iommu_calculate_agaw(iommu);
616         if (agaw < 0) {
617                 printk(KERN_ERR
618                        "Cannot get a valid agaw for iommu (seq_id = %d)\n",
619                        iommu->seq_id);
620                 goto err_unmap;
621         }
622         msagaw = iommu_calculate_max_sagaw(iommu);
623         if (msagaw < 0) {
624                 printk(KERN_ERR
625                         "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
626                         iommu->seq_id);
627                 goto err_unmap;
628         }
629         iommu->agaw = agaw;
630         iommu->msagaw = msagaw;
631
632         iommu->node = -1;
633
634         /* the registers might be more than one page */
635         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
636                 cap_max_fault_reg_offset(iommu->cap));
637         map_size = VTD_PAGE_ALIGN(map_size);
638         if (map_size > VTD_PAGE_SIZE) {
639                 iounmap(iommu->reg);
640                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
641                 if (!iommu->reg) {
642                         printk(KERN_ERR "IOMMU: can't map the region\n");
643                         goto error;
644                 }
645         }
646
647         ver = readl(iommu->reg + DMAR_VER_REG);
648         pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
649                 iommu->seq_id,
650                 (unsigned long long)drhd->reg_base_addr,
651                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
652                 (unsigned long long)iommu->cap,
653                 (unsigned long long)iommu->ecap);
654
655         raw_spin_lock_init(&iommu->register_lock);
656
657         drhd->iommu = iommu;
658         return 0;
659
660  err_unmap:
661         iounmap(iommu->reg);
662  error:
663         kfree(iommu);
664         return -1;
665 }
666
667 void free_iommu(struct intel_iommu *iommu)
668 {
669         if (!iommu)
670                 return;
671
672         free_dmar_iommu(iommu);
673
674         if (iommu->reg)
675                 iounmap(iommu->reg);
676         kfree(iommu);
677 }
678
679 /*
680  * Reclaim all the submitted descriptors which have completed its work.
681  */
682 static inline void reclaim_free_desc(struct q_inval *qi)
683 {
684         while (qi->desc_status[qi->free_tail] == QI_DONE ||
685                qi->desc_status[qi->free_tail] == QI_ABORT) {
686                 qi->desc_status[qi->free_tail] = QI_FREE;
687                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
688                 qi->free_cnt++;
689         }
690 }
691
692 static int qi_check_fault(struct intel_iommu *iommu, int index)
693 {
694         u32 fault;
695         int head, tail;
696         struct q_inval *qi = iommu->qi;
697         int wait_index = (index + 1) % QI_LENGTH;
698
699         if (qi->desc_status[wait_index] == QI_ABORT)
700                 return -EAGAIN;
701
702         fault = readl(iommu->reg + DMAR_FSTS_REG);
703
704         /*
705          * If IQE happens, the head points to the descriptor associated
706          * with the error. No new descriptors are fetched until the IQE
707          * is cleared.
708          */
709         if (fault & DMA_FSTS_IQE) {
710                 head = readl(iommu->reg + DMAR_IQH_REG);
711                 if ((head >> DMAR_IQ_SHIFT) == index) {
712                         printk(KERN_ERR "VT-d detected invalid descriptor: "
713                                 "low=%llx, high=%llx\n",
714                                 (unsigned long long)qi->desc[index].low,
715                                 (unsigned long long)qi->desc[index].high);
716                         memcpy(&qi->desc[index], &qi->desc[wait_index],
717                                         sizeof(struct qi_desc));
718                         __iommu_flush_cache(iommu, &qi->desc[index],
719                                         sizeof(struct qi_desc));
720                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
721                         return -EINVAL;
722                 }
723         }
724
725         /*
726          * If ITE happens, all pending wait_desc commands are aborted.
727          * No new descriptors are fetched until the ITE is cleared.
728          */
729         if (fault & DMA_FSTS_ITE) {
730                 head = readl(iommu->reg + DMAR_IQH_REG);
731                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
732                 head |= 1;
733                 tail = readl(iommu->reg + DMAR_IQT_REG);
734                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
735
736                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
737
738                 do {
739                         if (qi->desc_status[head] == QI_IN_USE)
740                                 qi->desc_status[head] = QI_ABORT;
741                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
742                 } while (head != tail);
743
744                 if (qi->desc_status[wait_index] == QI_ABORT)
745                         return -EAGAIN;
746         }
747
748         if (fault & DMA_FSTS_ICE)
749                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
750
751         return 0;
752 }
753
754 /*
755  * Submit the queued invalidation descriptor to the remapping
756  * hardware unit and wait for its completion.
757  */
758 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
759 {
760         int rc;
761         struct q_inval *qi = iommu->qi;
762         struct qi_desc *hw, wait_desc;
763         int wait_index, index;
764         unsigned long flags;
765
766         if (!qi)
767                 return 0;
768
769         hw = qi->desc;
770
771 restart:
772         rc = 0;
773
774         raw_spin_lock_irqsave(&qi->q_lock, flags);
775         while (qi->free_cnt < 3) {
776                 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
777                 cpu_relax();
778                 raw_spin_lock_irqsave(&qi->q_lock, flags);
779         }
780
781         index = qi->free_head;
782         wait_index = (index + 1) % QI_LENGTH;
783
784         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
785
786         hw[index] = *desc;
787
788         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
789                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
790         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
791
792         hw[wait_index] = wait_desc;
793
794         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
795         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
796
797         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
798         qi->free_cnt -= 2;
799
800         /*
801          * update the HW tail register indicating the presence of
802          * new descriptors.
803          */
804         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
805
806         while (qi->desc_status[wait_index] != QI_DONE) {
807                 /*
808                  * We will leave the interrupts disabled, to prevent interrupt
809                  * context to queue another cmd while a cmd is already submitted
810                  * and waiting for completion on this cpu. This is to avoid
811                  * a deadlock where the interrupt context can wait indefinitely
812                  * for free slots in the queue.
813                  */
814                 rc = qi_check_fault(iommu, index);
815                 if (rc)
816                         break;
817
818                 raw_spin_unlock(&qi->q_lock);
819                 cpu_relax();
820                 raw_spin_lock(&qi->q_lock);
821         }
822
823         qi->desc_status[index] = QI_DONE;
824
825         reclaim_free_desc(qi);
826         raw_spin_unlock_irqrestore(&qi->q_lock, flags);
827
828         if (rc == -EAGAIN)
829                 goto restart;
830
831         return rc;
832 }
833
834 /*
835  * Flush the global interrupt entry cache.
836  */
837 void qi_global_iec(struct intel_iommu *iommu)
838 {
839         struct qi_desc desc;
840
841         desc.low = QI_IEC_TYPE;
842         desc.high = 0;
843
844         /* should never fail */
845         qi_submit_sync(&desc, iommu);
846 }
847
848 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
849                       u64 type)
850 {
851         struct qi_desc desc;
852
853         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
854                         | QI_CC_GRAN(type) | QI_CC_TYPE;
855         desc.high = 0;
856
857         qi_submit_sync(&desc, iommu);
858 }
859
860 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
861                     unsigned int size_order, u64 type)
862 {
863         u8 dw = 0, dr = 0;
864
865         struct qi_desc desc;
866         int ih = 0;
867
868         if (cap_write_drain(iommu->cap))
869                 dw = 1;
870
871         if (cap_read_drain(iommu->cap))
872                 dr = 1;
873
874         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
875                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
876         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
877                 | QI_IOTLB_AM(size_order);
878
879         qi_submit_sync(&desc, iommu);
880 }
881
882 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
883                         u64 addr, unsigned mask)
884 {
885         struct qi_desc desc;
886
887         if (mask) {
888                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
889                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
890                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
891         } else
892                 desc.high = QI_DEV_IOTLB_ADDR(addr);
893
894         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
895                 qdep = 0;
896
897         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
898                    QI_DIOTLB_TYPE;
899
900         qi_submit_sync(&desc, iommu);
901 }
902
903 /*
904  * Disable Queued Invalidation interface.
905  */
906 void dmar_disable_qi(struct intel_iommu *iommu)
907 {
908         unsigned long flags;
909         u32 sts;
910         cycles_t start_time = get_cycles();
911
912         if (!ecap_qis(iommu->ecap))
913                 return;
914
915         raw_spin_lock_irqsave(&iommu->register_lock, flags);
916
917         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
918         if (!(sts & DMA_GSTS_QIES))
919                 goto end;
920
921         /*
922          * Give a chance to HW to complete the pending invalidation requests.
923          */
924         while ((readl(iommu->reg + DMAR_IQT_REG) !=
925                 readl(iommu->reg + DMAR_IQH_REG)) &&
926                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
927                 cpu_relax();
928
929         iommu->gcmd &= ~DMA_GCMD_QIE;
930         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
931
932         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
933                       !(sts & DMA_GSTS_QIES), sts);
934 end:
935         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
936 }
937
938 /*
939  * Enable queued invalidation.
940  */
941 static void __dmar_enable_qi(struct intel_iommu *iommu)
942 {
943         u32 sts;
944         unsigned long flags;
945         struct q_inval *qi = iommu->qi;
946
947         qi->free_head = qi->free_tail = 0;
948         qi->free_cnt = QI_LENGTH;
949
950         raw_spin_lock_irqsave(&iommu->register_lock, flags);
951
952         /* write zero to the tail reg */
953         writel(0, iommu->reg + DMAR_IQT_REG);
954
955         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
956
957         iommu->gcmd |= DMA_GCMD_QIE;
958         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
959
960         /* Make sure hardware complete it */
961         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
962
963         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
964 }
965
966 /*
967  * Enable Queued Invalidation interface. This is a must to support
968  * interrupt-remapping. Also used by DMA-remapping, which replaces
969  * register based IOTLB invalidation.
970  */
971 int dmar_enable_qi(struct intel_iommu *iommu)
972 {
973         struct q_inval *qi;
974         struct page *desc_page;
975
976         if (!ecap_qis(iommu->ecap))
977                 return -ENOENT;
978
979         /*
980          * queued invalidation is already setup and enabled.
981          */
982         if (iommu->qi)
983                 return 0;
984
985         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
986         if (!iommu->qi)
987                 return -ENOMEM;
988
989         qi = iommu->qi;
990
991
992         desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
993         if (!desc_page) {
994                 kfree(qi);
995                 iommu->qi = 0;
996                 return -ENOMEM;
997         }
998
999         qi->desc = page_address(desc_page);
1000
1001         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1002         if (!qi->desc_status) {
1003                 free_page((unsigned long) qi->desc);
1004                 kfree(qi);
1005                 iommu->qi = 0;
1006                 return -ENOMEM;
1007         }
1008
1009         qi->free_head = qi->free_tail = 0;
1010         qi->free_cnt = QI_LENGTH;
1011
1012         raw_spin_lock_init(&qi->q_lock);
1013
1014         __dmar_enable_qi(iommu);
1015
1016         return 0;
1017 }
1018
1019 /* iommu interrupt handling. Most stuff are MSI-like. */
1020
1021 enum faulttype {
1022         DMA_REMAP,
1023         INTR_REMAP,
1024         UNKNOWN,
1025 };
1026
1027 static const char *dma_remap_fault_reasons[] =
1028 {
1029         "Software",
1030         "Present bit in root entry is clear",
1031         "Present bit in context entry is clear",
1032         "Invalid context entry",
1033         "Access beyond MGAW",
1034         "PTE Write access is not set",
1035         "PTE Read access is not set",
1036         "Next page table ptr is invalid",
1037         "Root table address invalid",
1038         "Context table ptr is invalid",
1039         "non-zero reserved fields in RTP",
1040         "non-zero reserved fields in CTP",
1041         "non-zero reserved fields in PTE",
1042 };
1043
1044 static const char *intr_remap_fault_reasons[] =
1045 {
1046         "Detected reserved fields in the decoded interrupt-remapped request",
1047         "Interrupt index exceeded the interrupt-remapping table size",
1048         "Present field in the IRTE entry is clear",
1049         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1050         "Detected reserved fields in the IRTE entry",
1051         "Blocked a compatibility format interrupt request",
1052         "Blocked an interrupt request due to source-id verification failure",
1053 };
1054
1055 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1056
1057 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1058 {
1059         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
1060                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
1061                 *fault_type = INTR_REMAP;
1062                 return intr_remap_fault_reasons[fault_reason - 0x20];
1063         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1064                 *fault_type = DMA_REMAP;
1065                 return dma_remap_fault_reasons[fault_reason];
1066         } else {
1067                 *fault_type = UNKNOWN;
1068                 return "Unknown";
1069         }
1070 }
1071
1072 void dmar_msi_unmask(struct irq_data *data)
1073 {
1074         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1075         unsigned long flag;
1076
1077         /* unmask it */
1078         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1079         writel(0, iommu->reg + DMAR_FECTL_REG);
1080         /* Read a reg to force flush the post write */
1081         readl(iommu->reg + DMAR_FECTL_REG);
1082         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1083 }
1084
1085 void dmar_msi_mask(struct irq_data *data)
1086 {
1087         unsigned long flag;
1088         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1089
1090         /* mask it */
1091         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1092         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1093         /* Read a reg to force flush the post write */
1094         readl(iommu->reg + DMAR_FECTL_REG);
1095         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1096 }
1097
1098 void dmar_msi_write(int irq, struct msi_msg *msg)
1099 {
1100         struct intel_iommu *iommu = irq_get_handler_data(irq);
1101         unsigned long flag;
1102
1103         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1104         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1105         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1106         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1107         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1108 }
1109
1110 void dmar_msi_read(int irq, struct msi_msg *msg)
1111 {
1112         struct intel_iommu *iommu = irq_get_handler_data(irq);
1113         unsigned long flag;
1114
1115         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1116         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1117         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1118         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1119         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1120 }
1121
1122 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1123                 u8 fault_reason, u16 source_id, unsigned long long addr)
1124 {
1125         const char *reason;
1126         int fault_type;
1127
1128         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1129
1130         if (fault_type == INTR_REMAP)
1131                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1132                        "fault index %llx\n"
1133                         "INTR-REMAP:[fault reason %02d] %s\n",
1134                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1135                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1136                         fault_reason, reason);
1137         else
1138                 printk(KERN_ERR
1139                        "DMAR:[%s] Request device [%02x:%02x.%d] "
1140                        "fault addr %llx \n"
1141                        "DMAR:[fault reason %02d] %s\n",
1142                        (type ? "DMA Read" : "DMA Write"),
1143                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1144                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1145         return 0;
1146 }
1147
1148 #define PRIMARY_FAULT_REG_LEN (16)
1149 irqreturn_t dmar_fault(int irq, void *dev_id)
1150 {
1151         struct intel_iommu *iommu = dev_id;
1152         int reg, fault_index;
1153         u32 fault_status;
1154         unsigned long flag;
1155
1156         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1157         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1158         if (fault_status)
1159                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1160                        fault_status);
1161
1162         /* TBD: ignore advanced fault log currently */
1163         if (!(fault_status & DMA_FSTS_PPF))
1164                 goto clear_rest;
1165
1166         fault_index = dma_fsts_fault_record_index(fault_status);
1167         reg = cap_fault_reg_offset(iommu->cap);
1168         while (1) {
1169                 u8 fault_reason;
1170                 u16 source_id;
1171                 u64 guest_addr;
1172                 int type;
1173                 u32 data;
1174
1175                 /* highest 32 bits */
1176                 data = readl(iommu->reg + reg +
1177                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1178                 if (!(data & DMA_FRCD_F))
1179                         break;
1180
1181                 fault_reason = dma_frcd_fault_reason(data);
1182                 type = dma_frcd_type(data);
1183
1184                 data = readl(iommu->reg + reg +
1185                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1186                 source_id = dma_frcd_source_id(data);
1187
1188                 guest_addr = dmar_readq(iommu->reg + reg +
1189                                 fault_index * PRIMARY_FAULT_REG_LEN);
1190                 guest_addr = dma_frcd_page_addr(guest_addr);
1191                 /* clear the fault */
1192                 writel(DMA_FRCD_F, iommu->reg + reg +
1193                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1194
1195                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1196
1197                 dmar_fault_do_one(iommu, type, fault_reason,
1198                                 source_id, guest_addr);
1199
1200                 fault_index++;
1201                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1202                         fault_index = 0;
1203                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1204         }
1205 clear_rest:
1206         /* clear all the other faults */
1207         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1208         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1209
1210         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1211         return IRQ_HANDLED;
1212 }
1213
1214 int dmar_set_interrupt(struct intel_iommu *iommu)
1215 {
1216         int irq, ret;
1217
1218         /*
1219          * Check if the fault interrupt is already initialized.
1220          */
1221         if (iommu->irq)
1222                 return 0;
1223
1224         irq = create_irq();
1225         if (!irq) {
1226                 printk(KERN_ERR "IOMMU: no free vectors\n");
1227                 return -EINVAL;
1228         }
1229
1230         irq_set_handler_data(irq, iommu);
1231         iommu->irq = irq;
1232
1233         ret = arch_setup_dmar_msi(irq);
1234         if (ret) {
1235                 irq_set_handler_data(irq, NULL);
1236                 iommu->irq = 0;
1237                 destroy_irq(irq);
1238                 return ret;
1239         }
1240
1241         ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1242         if (ret)
1243                 printk(KERN_ERR "IOMMU: can't request irq\n");
1244         return ret;
1245 }
1246
1247 int __init enable_drhd_fault_handling(void)
1248 {
1249         struct dmar_drhd_unit *drhd;
1250
1251         /*
1252          * Enable fault control interrupt.
1253          */
1254         for_each_drhd_unit(drhd) {
1255                 int ret;
1256                 struct intel_iommu *iommu = drhd->iommu;
1257                 ret = dmar_set_interrupt(iommu);
1258
1259                 if (ret) {
1260                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1261                                " interrupt, ret %d\n",
1262                                (unsigned long long)drhd->reg_base_addr, ret);
1263                         return -1;
1264                 }
1265
1266                 /*
1267                  * Clear any previous faults.
1268                  */
1269                 dmar_fault(iommu->irq, iommu);
1270         }
1271
1272         return 0;
1273 }
1274
1275 /*
1276  * Re-enable Queued Invalidation interface.
1277  */
1278 int dmar_reenable_qi(struct intel_iommu *iommu)
1279 {
1280         if (!ecap_qis(iommu->ecap))
1281                 return -ENOENT;
1282
1283         if (!iommu->qi)
1284                 return -ENOENT;
1285
1286         /*
1287          * First disable queued invalidation.
1288          */
1289         dmar_disable_qi(iommu);
1290         /*
1291          * Then enable queued invalidation again. Since there is no pending
1292          * invalidation requests now, it's safe to re-enable queued
1293          * invalidation.
1294          */
1295         __dmar_enable_qi(iommu);
1296
1297         return 0;
1298 }
1299
1300 /*
1301  * Check interrupt remapping support in DMAR table description.
1302  */
1303 int __init dmar_ir_support(void)
1304 {
1305         struct acpi_table_dmar *dmar;
1306         dmar = (struct acpi_table_dmar *)dmar_tbl;
1307         if (!dmar)
1308                 return 0;
1309         return dmar->flags & 0x1;
1310 }
1311 IOMMU_INIT_POST(detect_intel_iommu);