ACPI: linux/acpi.h should not include linux/dmi.h
[pandora-kernel.git] / drivers / pci / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36 #include <linux/dmi.h>
37
38 #undef PREFIX
39 #define PREFIX "DMAR:"
40
41 /* No locks are needed as DMA remapping hardware unit
42  * list is constructed at boot time and hotplug of
43  * these units are not supported by the architecture.
44  */
45 LIST_HEAD(dmar_drhd_units);
46
47 static struct acpi_table_header * __initdata dmar_tbl;
48 static acpi_size dmar_tbl_size;
49
50 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
51 {
52         /*
53          * add INCLUDE_ALL at the tail, so scan the list will find it at
54          * the very end.
55          */
56         if (drhd->include_all)
57                 list_add_tail(&drhd->list, &dmar_drhd_units);
58         else
59                 list_add(&drhd->list, &dmar_drhd_units);
60 }
61
62 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
63                                            struct pci_dev **dev, u16 segment)
64 {
65         struct pci_bus *bus;
66         struct pci_dev *pdev = NULL;
67         struct acpi_dmar_pci_path *path;
68         int count;
69
70         bus = pci_find_bus(segment, scope->bus);
71         path = (struct acpi_dmar_pci_path *)(scope + 1);
72         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
73                 / sizeof(struct acpi_dmar_pci_path);
74
75         while (count) {
76                 if (pdev)
77                         pci_dev_put(pdev);
78                 /*
79                  * Some BIOSes list non-exist devices in DMAR table, just
80                  * ignore it
81                  */
82                 if (!bus) {
83                         printk(KERN_WARNING
84                         PREFIX "Device scope bus [%d] not found\n",
85                         scope->bus);
86                         break;
87                 }
88                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
89                 if (!pdev) {
90                         printk(KERN_WARNING PREFIX
91                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
92                                 segment, bus->number, path->dev, path->fn);
93                         break;
94                 }
95                 path ++;
96                 count --;
97                 bus = pdev->subordinate;
98         }
99         if (!pdev) {
100                 printk(KERN_WARNING PREFIX
101                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
102                 segment, scope->bus, path->dev, path->fn);
103                 *dev = NULL;
104                 return 0;
105         }
106         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
107                         pdev->subordinate) || (scope->entry_type == \
108                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
109                 pci_dev_put(pdev);
110                 printk(KERN_WARNING PREFIX
111                         "Device scope type does not match for %s\n",
112                          pci_name(pdev));
113                 return -EINVAL;
114         }
115         *dev = pdev;
116         return 0;
117 }
118
119 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
120                                        struct pci_dev ***devices, u16 segment)
121 {
122         struct acpi_dmar_device_scope *scope;
123         void * tmp = start;
124         int index;
125         int ret;
126
127         *cnt = 0;
128         while (start < end) {
129                 scope = start;
130                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
131                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
132                         (*cnt)++;
133                 else
134                         printk(KERN_WARNING PREFIX
135                                 "Unsupported device scope\n");
136                 start += scope->length;
137         }
138         if (*cnt == 0)
139                 return 0;
140
141         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
142         if (!*devices)
143                 return -ENOMEM;
144
145         start = tmp;
146         index = 0;
147         while (start < end) {
148                 scope = start;
149                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
150                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
151                         ret = dmar_parse_one_dev_scope(scope,
152                                 &(*devices)[index], segment);
153                         if (ret) {
154                                 kfree(*devices);
155                                 return ret;
156                         }
157                         index ++;
158                 }
159                 start += scope->length;
160         }
161
162         return 0;
163 }
164
165 /**
166  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
167  * structure which uniquely represent one DMA remapping hardware unit
168  * present in the platform
169  */
170 static int __init
171 dmar_parse_one_drhd(struct acpi_dmar_header *header)
172 {
173         struct acpi_dmar_hardware_unit *drhd;
174         struct dmar_drhd_unit *dmaru;
175         int ret = 0;
176
177         drhd = (struct acpi_dmar_hardware_unit *)header;
178         if (!drhd->address) {
179                 /* Promote an attitude of violence to a BIOS engineer today */
180                 WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n"
181                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
182                      dmi_get_system_info(DMI_BIOS_VENDOR),
183                      dmi_get_system_info(DMI_BIOS_VERSION),
184                      dmi_get_system_info(DMI_PRODUCT_VERSION));
185                 return -ENODEV;
186         }
187         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
188         if (!dmaru)
189                 return -ENOMEM;
190
191         dmaru->hdr = header;
192         dmaru->reg_base_addr = drhd->address;
193         dmaru->segment = drhd->segment;
194         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
195
196         ret = alloc_iommu(dmaru);
197         if (ret) {
198                 kfree(dmaru);
199                 return ret;
200         }
201         dmar_register_drhd_unit(dmaru);
202         return 0;
203 }
204
205 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
206 {
207         struct acpi_dmar_hardware_unit *drhd;
208         int ret = 0;
209
210         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
211
212         if (dmaru->include_all)
213                 return 0;
214
215         ret = dmar_parse_dev_scope((void *)(drhd + 1),
216                                 ((void *)drhd) + drhd->header.length,
217                                 &dmaru->devices_cnt, &dmaru->devices,
218                                 drhd->segment);
219         if (ret) {
220                 list_del(&dmaru->list);
221                 kfree(dmaru);
222         }
223         return ret;
224 }
225
226 #ifdef CONFIG_DMAR
227 LIST_HEAD(dmar_rmrr_units);
228
229 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
230 {
231         list_add(&rmrr->list, &dmar_rmrr_units);
232 }
233
234
235 static int __init
236 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
237 {
238         struct acpi_dmar_reserved_memory *rmrr;
239         struct dmar_rmrr_unit *rmrru;
240
241         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
242         if (!rmrru)
243                 return -ENOMEM;
244
245         rmrru->hdr = header;
246         rmrr = (struct acpi_dmar_reserved_memory *)header;
247         rmrru->base_address = rmrr->base_address;
248         rmrru->end_address = rmrr->end_address;
249
250         dmar_register_rmrr_unit(rmrru);
251         return 0;
252 }
253
254 static int __init
255 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
256 {
257         struct acpi_dmar_reserved_memory *rmrr;
258         int ret;
259
260         rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
261         ret = dmar_parse_dev_scope((void *)(rmrr + 1),
262                 ((void *)rmrr) + rmrr->header.length,
263                 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
264
265         if (ret || (rmrru->devices_cnt == 0)) {
266                 list_del(&rmrru->list);
267                 kfree(rmrru);
268         }
269         return ret;
270 }
271
272 static LIST_HEAD(dmar_atsr_units);
273
274 static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
275 {
276         struct acpi_dmar_atsr *atsr;
277         struct dmar_atsr_unit *atsru;
278
279         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
280         atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
281         if (!atsru)
282                 return -ENOMEM;
283
284         atsru->hdr = hdr;
285         atsru->include_all = atsr->flags & 0x1;
286
287         list_add(&atsru->list, &dmar_atsr_units);
288
289         return 0;
290 }
291
292 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
293 {
294         int rc;
295         struct acpi_dmar_atsr *atsr;
296
297         if (atsru->include_all)
298                 return 0;
299
300         atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
301         rc = dmar_parse_dev_scope((void *)(atsr + 1),
302                                 (void *)atsr + atsr->header.length,
303                                 &atsru->devices_cnt, &atsru->devices,
304                                 atsr->segment);
305         if (rc || !atsru->devices_cnt) {
306                 list_del(&atsru->list);
307                 kfree(atsru);
308         }
309
310         return rc;
311 }
312
313 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
314 {
315         int i;
316         struct pci_bus *bus;
317         struct acpi_dmar_atsr *atsr;
318         struct dmar_atsr_unit *atsru;
319
320         list_for_each_entry(atsru, &dmar_atsr_units, list) {
321                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
322                 if (atsr->segment == pci_domain_nr(dev->bus))
323                         goto found;
324         }
325
326         return 0;
327
328 found:
329         for (bus = dev->bus; bus; bus = bus->parent) {
330                 struct pci_dev *bridge = bus->self;
331
332                 if (!bridge || !bridge->is_pcie ||
333                     bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
334                         return 0;
335
336                 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
337                         for (i = 0; i < atsru->devices_cnt; i++)
338                                 if (atsru->devices[i] == bridge)
339                                         return 1;
340                         break;
341                 }
342         }
343
344         if (atsru->include_all)
345                 return 1;
346
347         return 0;
348 }
349 #endif
350
351 static void __init
352 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
353 {
354         struct acpi_dmar_hardware_unit *drhd;
355         struct acpi_dmar_reserved_memory *rmrr;
356         struct acpi_dmar_atsr *atsr;
357
358         switch (header->type) {
359         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
360                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
361                                     header);
362                 printk (KERN_INFO PREFIX
363                         "DRHD base: %#016Lx flags: %#x\n",
364                         (unsigned long long)drhd->address, drhd->flags);
365                 break;
366         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
367                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
368                                     header);
369                 printk (KERN_INFO PREFIX
370                         "RMRR base: %#016Lx end: %#016Lx\n",
371                         (unsigned long long)rmrr->base_address,
372                         (unsigned long long)rmrr->end_address);
373                 break;
374         case ACPI_DMAR_TYPE_ATSR:
375                 atsr = container_of(header, struct acpi_dmar_atsr, header);
376                 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
377                 break;
378         }
379 }
380
381 /**
382  * dmar_table_detect - checks to see if the platform supports DMAR devices
383  */
384 static int __init dmar_table_detect(void)
385 {
386         acpi_status status = AE_OK;
387
388         /* if we could find DMAR table, then there are DMAR devices */
389         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
390                                 (struct acpi_table_header **)&dmar_tbl,
391                                 &dmar_tbl_size);
392
393         if (ACPI_SUCCESS(status) && !dmar_tbl) {
394                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
395                 status = AE_NOT_FOUND;
396         }
397
398         return (ACPI_SUCCESS(status) ? 1 : 0);
399 }
400
401 /**
402  * parse_dmar_table - parses the DMA reporting table
403  */
404 static int __init
405 parse_dmar_table(void)
406 {
407         struct acpi_table_dmar *dmar;
408         struct acpi_dmar_header *entry_header;
409         int ret = 0;
410
411         /*
412          * Do it again, earlier dmar_tbl mapping could be mapped with
413          * fixed map.
414          */
415         dmar_table_detect();
416
417         dmar = (struct acpi_table_dmar *)dmar_tbl;
418         if (!dmar)
419                 return -ENODEV;
420
421         if (dmar->width < PAGE_SHIFT - 1) {
422                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
423                 return -EINVAL;
424         }
425
426         printk (KERN_INFO PREFIX "Host address width %d\n",
427                 dmar->width + 1);
428
429         entry_header = (struct acpi_dmar_header *)(dmar + 1);
430         while (((unsigned long)entry_header) <
431                         (((unsigned long)dmar) + dmar_tbl->length)) {
432                 /* Avoid looping forever on bad ACPI tables */
433                 if (entry_header->length == 0) {
434                         printk(KERN_WARNING PREFIX
435                                 "Invalid 0-length structure\n");
436                         ret = -EINVAL;
437                         break;
438                 }
439
440                 dmar_table_print_dmar_entry(entry_header);
441
442                 switch (entry_header->type) {
443                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
444                         ret = dmar_parse_one_drhd(entry_header);
445                         break;
446                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
447 #ifdef CONFIG_DMAR
448                         ret = dmar_parse_one_rmrr(entry_header);
449 #endif
450                         break;
451                 case ACPI_DMAR_TYPE_ATSR:
452 #ifdef CONFIG_DMAR
453                         ret = dmar_parse_one_atsr(entry_header);
454 #endif
455                         break;
456                 default:
457                         printk(KERN_WARNING PREFIX
458                                 "Unknown DMAR structure type\n");
459                         ret = 0; /* for forward compatibility */
460                         break;
461                 }
462                 if (ret)
463                         break;
464
465                 entry_header = ((void *)entry_header + entry_header->length);
466         }
467         return ret;
468 }
469
470 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
471                           struct pci_dev *dev)
472 {
473         int index;
474
475         while (dev) {
476                 for (index = 0; index < cnt; index++)
477                         if (dev == devices[index])
478                                 return 1;
479
480                 /* Check our parent */
481                 dev = dev->bus->self;
482         }
483
484         return 0;
485 }
486
487 struct dmar_drhd_unit *
488 dmar_find_matched_drhd_unit(struct pci_dev *dev)
489 {
490         struct dmar_drhd_unit *dmaru = NULL;
491         struct acpi_dmar_hardware_unit *drhd;
492
493         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
494                 drhd = container_of(dmaru->hdr,
495                                     struct acpi_dmar_hardware_unit,
496                                     header);
497
498                 if (dmaru->include_all &&
499                     drhd->segment == pci_domain_nr(dev->bus))
500                         return dmaru;
501
502                 if (dmar_pci_device_match(dmaru->devices,
503                                           dmaru->devices_cnt, dev))
504                         return dmaru;
505         }
506
507         return NULL;
508 }
509
510 int __init dmar_dev_scope_init(void)
511 {
512         struct dmar_drhd_unit *drhd, *drhd_n;
513         int ret = -ENODEV;
514
515         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
516                 ret = dmar_parse_dev(drhd);
517                 if (ret)
518                         return ret;
519         }
520
521 #ifdef CONFIG_DMAR
522         {
523                 struct dmar_rmrr_unit *rmrr, *rmrr_n;
524                 struct dmar_atsr_unit *atsr, *atsr_n;
525
526                 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
527                         ret = rmrr_parse_dev(rmrr);
528                         if (ret)
529                                 return ret;
530                 }
531
532                 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
533                         ret = atsr_parse_dev(atsr);
534                         if (ret)
535                                 return ret;
536                 }
537         }
538 #endif
539
540         return ret;
541 }
542
543
544 int __init dmar_table_init(void)
545 {
546         static int dmar_table_initialized;
547         int ret;
548
549         if (dmar_table_initialized)
550                 return 0;
551
552         dmar_table_initialized = 1;
553
554         ret = parse_dmar_table();
555         if (ret) {
556                 if (ret != -ENODEV)
557                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
558                 return ret;
559         }
560
561         if (list_empty(&dmar_drhd_units)) {
562                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
563                 return -ENODEV;
564         }
565
566 #ifdef CONFIG_DMAR
567         if (list_empty(&dmar_rmrr_units))
568                 printk(KERN_INFO PREFIX "No RMRR found\n");
569
570         if (list_empty(&dmar_atsr_units))
571                 printk(KERN_INFO PREFIX "No ATSR found\n");
572 #endif
573
574 #ifdef CONFIG_INTR_REMAP
575         parse_ioapics_under_ir();
576 #endif
577         return 0;
578 }
579
580 void __init detect_intel_iommu(void)
581 {
582         int ret;
583
584         ret = dmar_table_detect();
585
586         {
587 #ifdef CONFIG_INTR_REMAP
588                 struct acpi_table_dmar *dmar;
589                 /*
590                  * for now we will disable dma-remapping when interrupt
591                  * remapping is enabled.
592                  * When support for queued invalidation for IOTLB invalidation
593                  * is added, we will not need this any more.
594                  */
595                 dmar = (struct acpi_table_dmar *) dmar_tbl;
596                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
597                         printk(KERN_INFO
598                                "Queued invalidation will be enabled to support "
599                                "x2apic and Intr-remapping.\n");
600 #endif
601 #ifdef CONFIG_DMAR
602                 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
603                     !dmar_disabled)
604                         iommu_detected = 1;
605 #endif
606         }
607         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
608         dmar_tbl = NULL;
609 }
610
611
612 int alloc_iommu(struct dmar_drhd_unit *drhd)
613 {
614         struct intel_iommu *iommu;
615         int map_size;
616         u32 ver;
617         static int iommu_allocated = 0;
618         int agaw = 0;
619         int msagaw = 0;
620
621         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
622         if (!iommu)
623                 return -ENOMEM;
624
625         iommu->seq_id = iommu_allocated++;
626         sprintf (iommu->name, "dmar%d", iommu->seq_id);
627
628         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
629         if (!iommu->reg) {
630                 printk(KERN_ERR "IOMMU: can't map the region\n");
631                 goto error;
632         }
633         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
634         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
635
636 #ifdef CONFIG_DMAR
637         agaw = iommu_calculate_agaw(iommu);
638         if (agaw < 0) {
639                 printk(KERN_ERR
640                        "Cannot get a valid agaw for iommu (seq_id = %d)\n",
641                        iommu->seq_id);
642                 goto error;
643         }
644         msagaw = iommu_calculate_max_sagaw(iommu);
645         if (msagaw < 0) {
646                 printk(KERN_ERR
647                         "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
648                         iommu->seq_id);
649                 goto error;
650         }
651 #endif
652         iommu->agaw = agaw;
653         iommu->msagaw = msagaw;
654
655         /* the registers might be more than one page */
656         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
657                 cap_max_fault_reg_offset(iommu->cap));
658         map_size = VTD_PAGE_ALIGN(map_size);
659         if (map_size > VTD_PAGE_SIZE) {
660                 iounmap(iommu->reg);
661                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
662                 if (!iommu->reg) {
663                         printk(KERN_ERR "IOMMU: can't map the region\n");
664                         goto error;
665                 }
666         }
667
668         ver = readl(iommu->reg + DMAR_VER_REG);
669         pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
670                 (unsigned long long)drhd->reg_base_addr,
671                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
672                 (unsigned long long)iommu->cap,
673                 (unsigned long long)iommu->ecap);
674
675         spin_lock_init(&iommu->register_lock);
676
677         drhd->iommu = iommu;
678         return 0;
679 error:
680         kfree(iommu);
681         return -1;
682 }
683
684 void free_iommu(struct intel_iommu *iommu)
685 {
686         if (!iommu)
687                 return;
688
689 #ifdef CONFIG_DMAR
690         free_dmar_iommu(iommu);
691 #endif
692
693         if (iommu->reg)
694                 iounmap(iommu->reg);
695         kfree(iommu);
696 }
697
698 /*
699  * Reclaim all the submitted descriptors which have completed its work.
700  */
701 static inline void reclaim_free_desc(struct q_inval *qi)
702 {
703         while (qi->desc_status[qi->free_tail] == QI_DONE ||
704                qi->desc_status[qi->free_tail] == QI_ABORT) {
705                 qi->desc_status[qi->free_tail] = QI_FREE;
706                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
707                 qi->free_cnt++;
708         }
709 }
710
711 static int qi_check_fault(struct intel_iommu *iommu, int index)
712 {
713         u32 fault;
714         int head, tail;
715         struct q_inval *qi = iommu->qi;
716         int wait_index = (index + 1) % QI_LENGTH;
717
718         if (qi->desc_status[wait_index] == QI_ABORT)
719                 return -EAGAIN;
720
721         fault = readl(iommu->reg + DMAR_FSTS_REG);
722
723         /*
724          * If IQE happens, the head points to the descriptor associated
725          * with the error. No new descriptors are fetched until the IQE
726          * is cleared.
727          */
728         if (fault & DMA_FSTS_IQE) {
729                 head = readl(iommu->reg + DMAR_IQH_REG);
730                 if ((head >> DMAR_IQ_SHIFT) == index) {
731                         printk(KERN_ERR "VT-d detected invalid descriptor: "
732                                 "low=%llx, high=%llx\n",
733                                 (unsigned long long)qi->desc[index].low,
734                                 (unsigned long long)qi->desc[index].high);
735                         memcpy(&qi->desc[index], &qi->desc[wait_index],
736                                         sizeof(struct qi_desc));
737                         __iommu_flush_cache(iommu, &qi->desc[index],
738                                         sizeof(struct qi_desc));
739                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
740                         return -EINVAL;
741                 }
742         }
743
744         /*
745          * If ITE happens, all pending wait_desc commands are aborted.
746          * No new descriptors are fetched until the ITE is cleared.
747          */
748         if (fault & DMA_FSTS_ITE) {
749                 head = readl(iommu->reg + DMAR_IQH_REG);
750                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
751                 head |= 1;
752                 tail = readl(iommu->reg + DMAR_IQT_REG);
753                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
754
755                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
756
757                 do {
758                         if (qi->desc_status[head] == QI_IN_USE)
759                                 qi->desc_status[head] = QI_ABORT;
760                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
761                 } while (head != tail);
762
763                 if (qi->desc_status[wait_index] == QI_ABORT)
764                         return -EAGAIN;
765         }
766
767         if (fault & DMA_FSTS_ICE)
768                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
769
770         return 0;
771 }
772
773 /*
774  * Submit the queued invalidation descriptor to the remapping
775  * hardware unit and wait for its completion.
776  */
777 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
778 {
779         int rc;
780         struct q_inval *qi = iommu->qi;
781         struct qi_desc *hw, wait_desc;
782         int wait_index, index;
783         unsigned long flags;
784
785         if (!qi)
786                 return 0;
787
788         hw = qi->desc;
789
790 restart:
791         rc = 0;
792
793         spin_lock_irqsave(&qi->q_lock, flags);
794         while (qi->free_cnt < 3) {
795                 spin_unlock_irqrestore(&qi->q_lock, flags);
796                 cpu_relax();
797                 spin_lock_irqsave(&qi->q_lock, flags);
798         }
799
800         index = qi->free_head;
801         wait_index = (index + 1) % QI_LENGTH;
802
803         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
804
805         hw[index] = *desc;
806
807         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
808                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
809         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
810
811         hw[wait_index] = wait_desc;
812
813         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
814         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
815
816         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
817         qi->free_cnt -= 2;
818
819         /*
820          * update the HW tail register indicating the presence of
821          * new descriptors.
822          */
823         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
824
825         while (qi->desc_status[wait_index] != QI_DONE) {
826                 /*
827                  * We will leave the interrupts disabled, to prevent interrupt
828                  * context to queue another cmd while a cmd is already submitted
829                  * and waiting for completion on this cpu. This is to avoid
830                  * a deadlock where the interrupt context can wait indefinitely
831                  * for free slots in the queue.
832                  */
833                 rc = qi_check_fault(iommu, index);
834                 if (rc)
835                         break;
836
837                 spin_unlock(&qi->q_lock);
838                 cpu_relax();
839                 spin_lock(&qi->q_lock);
840         }
841
842         qi->desc_status[index] = QI_DONE;
843
844         reclaim_free_desc(qi);
845         spin_unlock_irqrestore(&qi->q_lock, flags);
846
847         if (rc == -EAGAIN)
848                 goto restart;
849
850         return rc;
851 }
852
853 /*
854  * Flush the global interrupt entry cache.
855  */
856 void qi_global_iec(struct intel_iommu *iommu)
857 {
858         struct qi_desc desc;
859
860         desc.low = QI_IEC_TYPE;
861         desc.high = 0;
862
863         /* should never fail */
864         qi_submit_sync(&desc, iommu);
865 }
866
867 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
868                       u64 type)
869 {
870         struct qi_desc desc;
871
872         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
873                         | QI_CC_GRAN(type) | QI_CC_TYPE;
874         desc.high = 0;
875
876         qi_submit_sync(&desc, iommu);
877 }
878
879 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
880                     unsigned int size_order, u64 type)
881 {
882         u8 dw = 0, dr = 0;
883
884         struct qi_desc desc;
885         int ih = 0;
886
887         if (cap_write_drain(iommu->cap))
888                 dw = 1;
889
890         if (cap_read_drain(iommu->cap))
891                 dr = 1;
892
893         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
894                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
895         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
896                 | QI_IOTLB_AM(size_order);
897
898         qi_submit_sync(&desc, iommu);
899 }
900
901 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
902                         u64 addr, unsigned mask)
903 {
904         struct qi_desc desc;
905
906         if (mask) {
907                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
908                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
909                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
910         } else
911                 desc.high = QI_DEV_IOTLB_ADDR(addr);
912
913         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
914                 qdep = 0;
915
916         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
917                    QI_DIOTLB_TYPE;
918
919         qi_submit_sync(&desc, iommu);
920 }
921
922 /*
923  * Disable Queued Invalidation interface.
924  */
925 void dmar_disable_qi(struct intel_iommu *iommu)
926 {
927         unsigned long flags;
928         u32 sts;
929         cycles_t start_time = get_cycles();
930
931         if (!ecap_qis(iommu->ecap))
932                 return;
933
934         spin_lock_irqsave(&iommu->register_lock, flags);
935
936         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
937         if (!(sts & DMA_GSTS_QIES))
938                 goto end;
939
940         /*
941          * Give a chance to HW to complete the pending invalidation requests.
942          */
943         while ((readl(iommu->reg + DMAR_IQT_REG) !=
944                 readl(iommu->reg + DMAR_IQH_REG)) &&
945                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
946                 cpu_relax();
947
948         iommu->gcmd &= ~DMA_GCMD_QIE;
949         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
950
951         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
952                       !(sts & DMA_GSTS_QIES), sts);
953 end:
954         spin_unlock_irqrestore(&iommu->register_lock, flags);
955 }
956
957 /*
958  * Enable queued invalidation.
959  */
960 static void __dmar_enable_qi(struct intel_iommu *iommu)
961 {
962         u32 sts;
963         unsigned long flags;
964         struct q_inval *qi = iommu->qi;
965
966         qi->free_head = qi->free_tail = 0;
967         qi->free_cnt = QI_LENGTH;
968
969         spin_lock_irqsave(&iommu->register_lock, flags);
970
971         /* write zero to the tail reg */
972         writel(0, iommu->reg + DMAR_IQT_REG);
973
974         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
975
976         iommu->gcmd |= DMA_GCMD_QIE;
977         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
978
979         /* Make sure hardware complete it */
980         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
981
982         spin_unlock_irqrestore(&iommu->register_lock, flags);
983 }
984
985 /*
986  * Enable Queued Invalidation interface. This is a must to support
987  * interrupt-remapping. Also used by DMA-remapping, which replaces
988  * register based IOTLB invalidation.
989  */
990 int dmar_enable_qi(struct intel_iommu *iommu)
991 {
992         struct q_inval *qi;
993
994         if (!ecap_qis(iommu->ecap))
995                 return -ENOENT;
996
997         /*
998          * queued invalidation is already setup and enabled.
999          */
1000         if (iommu->qi)
1001                 return 0;
1002
1003         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1004         if (!iommu->qi)
1005                 return -ENOMEM;
1006
1007         qi = iommu->qi;
1008
1009         qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
1010         if (!qi->desc) {
1011                 kfree(qi);
1012                 iommu->qi = 0;
1013                 return -ENOMEM;
1014         }
1015
1016         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1017         if (!qi->desc_status) {
1018                 free_page((unsigned long) qi->desc);
1019                 kfree(qi);
1020                 iommu->qi = 0;
1021                 return -ENOMEM;
1022         }
1023
1024         qi->free_head = qi->free_tail = 0;
1025         qi->free_cnt = QI_LENGTH;
1026
1027         spin_lock_init(&qi->q_lock);
1028
1029         __dmar_enable_qi(iommu);
1030
1031         return 0;
1032 }
1033
1034 /* iommu interrupt handling. Most stuff are MSI-like. */
1035
1036 enum faulttype {
1037         DMA_REMAP,
1038         INTR_REMAP,
1039         UNKNOWN,
1040 };
1041
1042 static const char *dma_remap_fault_reasons[] =
1043 {
1044         "Software",
1045         "Present bit in root entry is clear",
1046         "Present bit in context entry is clear",
1047         "Invalid context entry",
1048         "Access beyond MGAW",
1049         "PTE Write access is not set",
1050         "PTE Read access is not set",
1051         "Next page table ptr is invalid",
1052         "Root table address invalid",
1053         "Context table ptr is invalid",
1054         "non-zero reserved fields in RTP",
1055         "non-zero reserved fields in CTP",
1056         "non-zero reserved fields in PTE",
1057 };
1058
1059 static const char *intr_remap_fault_reasons[] =
1060 {
1061         "Detected reserved fields in the decoded interrupt-remapped request",
1062         "Interrupt index exceeded the interrupt-remapping table size",
1063         "Present field in the IRTE entry is clear",
1064         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1065         "Detected reserved fields in the IRTE entry",
1066         "Blocked a compatibility format interrupt request",
1067         "Blocked an interrupt request due to source-id verification failure",
1068 };
1069
1070 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1071
1072 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1073 {
1074         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
1075                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
1076                 *fault_type = INTR_REMAP;
1077                 return intr_remap_fault_reasons[fault_reason - 0x20];
1078         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1079                 *fault_type = DMA_REMAP;
1080                 return dma_remap_fault_reasons[fault_reason];
1081         } else {
1082                 *fault_type = UNKNOWN;
1083                 return "Unknown";
1084         }
1085 }
1086
1087 void dmar_msi_unmask(unsigned int irq)
1088 {
1089         struct intel_iommu *iommu = get_irq_data(irq);
1090         unsigned long flag;
1091
1092         /* unmask it */
1093         spin_lock_irqsave(&iommu->register_lock, flag);
1094         writel(0, iommu->reg + DMAR_FECTL_REG);
1095         /* Read a reg to force flush the post write */
1096         readl(iommu->reg + DMAR_FECTL_REG);
1097         spin_unlock_irqrestore(&iommu->register_lock, flag);
1098 }
1099
1100 void dmar_msi_mask(unsigned int irq)
1101 {
1102         unsigned long flag;
1103         struct intel_iommu *iommu = get_irq_data(irq);
1104
1105         /* mask it */
1106         spin_lock_irqsave(&iommu->register_lock, flag);
1107         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1108         /* Read a reg to force flush the post write */
1109         readl(iommu->reg + DMAR_FECTL_REG);
1110         spin_unlock_irqrestore(&iommu->register_lock, flag);
1111 }
1112
1113 void dmar_msi_write(int irq, struct msi_msg *msg)
1114 {
1115         struct intel_iommu *iommu = get_irq_data(irq);
1116         unsigned long flag;
1117
1118         spin_lock_irqsave(&iommu->register_lock, flag);
1119         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1120         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1121         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1122         spin_unlock_irqrestore(&iommu->register_lock, flag);
1123 }
1124
1125 void dmar_msi_read(int irq, struct msi_msg *msg)
1126 {
1127         struct intel_iommu *iommu = get_irq_data(irq);
1128         unsigned long flag;
1129
1130         spin_lock_irqsave(&iommu->register_lock, flag);
1131         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1132         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1133         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1134         spin_unlock_irqrestore(&iommu->register_lock, flag);
1135 }
1136
1137 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1138                 u8 fault_reason, u16 source_id, unsigned long long addr)
1139 {
1140         const char *reason;
1141         int fault_type;
1142
1143         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1144
1145         if (fault_type == INTR_REMAP)
1146                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1147                        "fault index %llx\n"
1148                         "INTR-REMAP:[fault reason %02d] %s\n",
1149                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1150                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1151                         fault_reason, reason);
1152         else
1153                 printk(KERN_ERR
1154                        "DMAR:[%s] Request device [%02x:%02x.%d] "
1155                        "fault addr %llx \n"
1156                        "DMAR:[fault reason %02d] %s\n",
1157                        (type ? "DMA Read" : "DMA Write"),
1158                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1159                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1160         return 0;
1161 }
1162
1163 #define PRIMARY_FAULT_REG_LEN (16)
1164 irqreturn_t dmar_fault(int irq, void *dev_id)
1165 {
1166         struct intel_iommu *iommu = dev_id;
1167         int reg, fault_index;
1168         u32 fault_status;
1169         unsigned long flag;
1170
1171         spin_lock_irqsave(&iommu->register_lock, flag);
1172         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1173         if (fault_status)
1174                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1175                        fault_status);
1176
1177         /* TBD: ignore advanced fault log currently */
1178         if (!(fault_status & DMA_FSTS_PPF))
1179                 goto clear_rest;
1180
1181         fault_index = dma_fsts_fault_record_index(fault_status);
1182         reg = cap_fault_reg_offset(iommu->cap);
1183         while (1) {
1184                 u8 fault_reason;
1185                 u16 source_id;
1186                 u64 guest_addr;
1187                 int type;
1188                 u32 data;
1189
1190                 /* highest 32 bits */
1191                 data = readl(iommu->reg + reg +
1192                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1193                 if (!(data & DMA_FRCD_F))
1194                         break;
1195
1196                 fault_reason = dma_frcd_fault_reason(data);
1197                 type = dma_frcd_type(data);
1198
1199                 data = readl(iommu->reg + reg +
1200                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1201                 source_id = dma_frcd_source_id(data);
1202
1203                 guest_addr = dmar_readq(iommu->reg + reg +
1204                                 fault_index * PRIMARY_FAULT_REG_LEN);
1205                 guest_addr = dma_frcd_page_addr(guest_addr);
1206                 /* clear the fault */
1207                 writel(DMA_FRCD_F, iommu->reg + reg +
1208                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1209
1210                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1211
1212                 dmar_fault_do_one(iommu, type, fault_reason,
1213                                 source_id, guest_addr);
1214
1215                 fault_index++;
1216                 if (fault_index > cap_num_fault_regs(iommu->cap))
1217                         fault_index = 0;
1218                 spin_lock_irqsave(&iommu->register_lock, flag);
1219         }
1220 clear_rest:
1221         /* clear all the other faults */
1222         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1223         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1224
1225         spin_unlock_irqrestore(&iommu->register_lock, flag);
1226         return IRQ_HANDLED;
1227 }
1228
1229 int dmar_set_interrupt(struct intel_iommu *iommu)
1230 {
1231         int irq, ret;
1232
1233         /*
1234          * Check if the fault interrupt is already initialized.
1235          */
1236         if (iommu->irq)
1237                 return 0;
1238
1239         irq = create_irq();
1240         if (!irq) {
1241                 printk(KERN_ERR "IOMMU: no free vectors\n");
1242                 return -EINVAL;
1243         }
1244
1245         set_irq_data(irq, iommu);
1246         iommu->irq = irq;
1247
1248         ret = arch_setup_dmar_msi(irq);
1249         if (ret) {
1250                 set_irq_data(irq, NULL);
1251                 iommu->irq = 0;
1252                 destroy_irq(irq);
1253                 return ret;
1254         }
1255
1256         ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1257         if (ret)
1258                 printk(KERN_ERR "IOMMU: can't request irq\n");
1259         return ret;
1260 }
1261
1262 int __init enable_drhd_fault_handling(void)
1263 {
1264         struct dmar_drhd_unit *drhd;
1265
1266         /*
1267          * Enable fault control interrupt.
1268          */
1269         for_each_drhd_unit(drhd) {
1270                 int ret;
1271                 struct intel_iommu *iommu = drhd->iommu;
1272                 ret = dmar_set_interrupt(iommu);
1273
1274                 if (ret) {
1275                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1276                                " interrupt, ret %d\n",
1277                                (unsigned long long)drhd->reg_base_addr, ret);
1278                         return -1;
1279                 }
1280         }
1281
1282         return 0;
1283 }
1284
1285 /*
1286  * Re-enable Queued Invalidation interface.
1287  */
1288 int dmar_reenable_qi(struct intel_iommu *iommu)
1289 {
1290         if (!ecap_qis(iommu->ecap))
1291                 return -ENOENT;
1292
1293         if (!iommu->qi)
1294                 return -ENOENT;
1295
1296         /*
1297          * First disable queued invalidation.
1298          */
1299         dmar_disable_qi(iommu);
1300         /*
1301          * Then enable queued invalidation again. Since there is no pending
1302          * invalidation requests now, it's safe to re-enable queued
1303          * invalidation.
1304          */
1305         __dmar_enable_qi(iommu);
1306
1307         return 0;
1308 }