diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/kernel/amd_iommu_init.c | 15 | ||||
-rw-r--r-- | arch/x86/kernel/aperture_64.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/pci-calgary_64.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/pci-dma.c | 44 | ||||
-rw-r--r-- | arch/x86/kernel/pci-gart_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/pci-iommu_table.c | 89 | ||||
-rw-r--r-- | arch/x86/kernel/pci-swiotlb.c | 44 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 28 |
9 files changed, 207 insertions, 45 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 80a93dc99076..2c833d8c4141 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -45,6 +45,7 @@ obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o obj-y += tsc.o io_delay.o rtc.o +obj-y += pci-iommu_table.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 3cb482e123de..6e11c8134158 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -31,7 +31,7 @@ #include <asm/iommu.h> #include <asm/gart.h> #include <asm/x86_init.h> - +#include <asm/iommu_table.h> /* * definitions for the ACPI scanning code */ @@ -1499,13 +1499,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) return 0; } -void __init amd_iommu_detect(void) +int __init amd_iommu_detect(void) { if (no_iommu || (iommu_detected && !gart_iommu_aperture)) - return; + return -ENODEV; if (amd_iommu_disabled) - return; + return -ENODEV; if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { iommu_detected = 1; @@ -1514,7 +1514,9 @@ void __init amd_iommu_detect(void) /* Make sure ACS will be enabled */ pci_request_acs(); + return 1; } + return -ENODEV; } /**************************************************************************** @@ -1545,3 +1547,8 @@ static int __init parse_amd_iommu_options(char *str) __setup("amd_iommu_dump", parse_amd_iommu_dump); __setup("amd_iommu=", parse_amd_iommu_options); + +IOMMU_INIT_FINISH(amd_iommu_detect, + gart_iommu_hole_init, + 0, + 0); diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 377f5db3b8b4..b3a16e8f0703 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -371,7 +371,7 @@ void __init early_gart_iommu_check(void) static int __initdata printed_gart_size_msg; -void __init gart_iommu_hole_init(void) +int __init gart_iommu_hole_init(void) { u32 agp_aper_base = 0, agp_aper_order = 0; u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; @@ -381,7 +381,7 @@ void __init gart_iommu_hole_init(void) if (gart_iommu_aperture_disabled || !fix_aperture || !early_pci_allowed()) - return; + return -ENODEV; printk(KERN_INFO "Checking aperture...\n"); @@ -463,8 +463,9 @@ out: unsigned long n = (32 * 1024 * 1024) << last_aper_order; insert_aperture_resource((u32)last_aper_base, n); + return 1; } - return; + return 0; } if (!fallback_aper_force) { @@ -500,7 +501,7 @@ out: panic("Not enough memory for aperture"); } } else { - return; + return 0; } /* Fix up the north bridges */ @@ -526,4 +527,6 @@ out: } set_up_gart_resume(aper_order, aper_alloc); + + return 1; } diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 078d4ec1a9d9..f56a117cef68 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -47,6 +47,7 @@ #include <asm/rio.h> #include <asm/bios_ebda.h> #include <asm/x86_init.h> +#include <asm/iommu_table.h> #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT int use_calgary __read_mostly = 1; @@ -1364,7 +1365,7 @@ static int __init calgary_iommu_init(void) return 0; } -void __init detect_calgary(void) +int __init detect_calgary(void) { int bus; void *tbl; @@ -1378,13 +1379,13 @@ void __init detect_calgary(void) * another HW IOMMU already, bail out. */ if (no_iommu || iommu_detected) - return; + return -ENODEV; if (!use_calgary) - return; + return -ENODEV; if (!early_pci_allowed()) - return; + return -ENODEV; printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n"); @@ -1410,13 +1411,13 @@ void __init detect_calgary(void) if (!rio_table_hdr) { printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table " "in EBDA - bailing!\n"); - return; + return -ENODEV; } ret = build_detail_arrays(); if (ret) { printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret); - return; + return -ENOMEM; } specified_table_size = determine_tce_table_size((is_kdump_kernel() ? @@ -1464,7 +1465,7 @@ void __init detect_calgary(void) x86_init.iommu.iommu_init = calgary_iommu_init; } - return; + return calgary_found; cleanup: for (--bus; bus >= 0; --bus) { @@ -1473,6 +1474,7 @@ cleanup: if (info->tce_space) free_tce_table(info->tce_space); } + return -ENOMEM; } static int __init calgary_parse_options(char *p) @@ -1594,3 +1596,5 @@ static int __init calgary_fixup_tce_spaces(void) * and before device_initcall. */ rootfs_initcall(calgary_fixup_tce_spaces); + +IOMMU_INIT_POST(detect_calgary); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 9f07cfcbd3a5..9ea999a4dcc1 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -11,9 +11,8 @@ #include <asm/iommu.h> #include <asm/gart.h> #include <asm/calgary.h> -#include <asm/amd_iommu.h> #include <asm/x86_init.h> -#include <asm/xen/swiotlb-xen.h> +#include <asm/iommu_table.h> static int forbid_dac __read_mostly; @@ -45,6 +44,8 @@ int iommu_detected __read_mostly = 0; */ int iommu_pass_through __read_mostly; +extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; + /* Dummy device used for NULL arguments (normally ISA). */ struct device x86_dma_fallback_dev = { .init_name = "fallback device", @@ -130,26 +131,24 @@ static void __init dma32_free_bootmem(void) void __init pci_iommu_alloc(void) { + struct iommu_table_entry *p; + /* free the range so iommu could get some range less than 4G */ dma32_free_bootmem(); - if (pci_xen_swiotlb_detect() || pci_swiotlb_detect()) - goto out; - - gart_iommu_hole_init(); - - detect_calgary(); - - detect_intel_iommu(); + sort_iommu_table(__iommu_table, __iommu_table_end); + check_iommu_entries(__iommu_table, __iommu_table_end); - /* needs to be called after gart_iommu_hole_init */ - amd_iommu_detect(); -out: - pci_xen_swiotlb_init(); - - pci_swiotlb_init(); + for (p = __iommu_table; p < __iommu_table_end; p++) { + if (p && p->detect && p->detect() > 0) { + p->flags |= IOMMU_DETECTED; + if (p->early_init) + p->early_init(); + if (p->flags & IOMMU_FINISH_IF_DETECTED) + break; + } + } } - void *dma_generic_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag) { @@ -292,6 +291,7 @@ EXPORT_SYMBOL(dma_supported); static int __init pci_iommu_init(void) { + struct iommu_table_entry *p; dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); #ifdef CONFIG_PCI @@ -299,12 +299,10 @@ static int __init pci_iommu_init(void) #endif x86_init.iommu.iommu_init(); - if (swiotlb || xen_swiotlb) { - printk(KERN_INFO "PCI-DMA: " - "Using software bounce buffering for IO (SWIOTLB)\n"); - swiotlb_print_info(); - } else - swiotlb_free(); + for (p = __iommu_table; p < __iommu_table_end; p++) { + if (p && (p->flags & IOMMU_DETECTED) && p->late_init) + p->late_init(); + } return 0; } diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index c562207b1b3d..ba0f0ca9f280 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -41,6 +41,7 @@ #include <asm/dma.h> #include <asm/amd_nb.h> #include <asm/x86_init.h> +#include <asm/iommu_table.h> static unsigned long iommu_bus_base; /* GART remapping area (physical) */ static unsigned long iommu_size; /* size of remapping area bytes */ @@ -905,3 +906,4 @@ void __init gart_parse_options(char *p) } } } +IOMMU_INIT_POST(gart_iommu_hole_init); diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c new file mode 100644 index 000000000000..55d745ec1181 --- /dev/null +++ b/arch/x86/kernel/pci-iommu_table.c @@ -0,0 +1,89 @@ +#include <linux/dma-mapping.h> +#include <asm/iommu_table.h> +#include <linux/string.h> +#include <linux/kallsyms.h> + + +#define DEBUG 1 + +static struct iommu_table_entry * __init +find_dependents_of(struct iommu_table_entry *start, + struct iommu_table_entry *finish, + struct iommu_table_entry *q) +{ + struct iommu_table_entry *p; + + if (!q) + return NULL; + + for (p = start; p < finish; p++) + if (p->detect == q->depend) + return p; + + return NULL; +} + + +void __init sort_iommu_table(struct iommu_table_entry *start, + struct iommu_table_entry *finish) { + + struct iommu_table_entry *p, *q, tmp; + + for (p = start; p < finish; p++) { +again: + q = find_dependents_of(start, finish, p); + /* We are bit sneaky here. We use the memory address to figure + * out if the node we depend on is past our point, if so, swap. + */ + if (q > p) { + tmp = *p; + memmove(p, q, sizeof(*p)); + *q = tmp; + goto again; + } + } + +} + +#ifdef DEBUG +void __init check_iommu_entries(struct iommu_table_entry *start, + struct iommu_table_entry *finish) +{ + struct iommu_table_entry *p, *q, *x; + char sym_p[KSYM_SYMBOL_LEN]; + char sym_q[KSYM_SYMBOL_LEN]; + + /* Simple cyclic dependency checker. */ + for (p = start; p < finish; p++) { + q = find_dependents_of(start, finish, p); + x = find_dependents_of(start, finish, q); + if (p == x) { + sprint_symbol(sym_p, (unsigned long)p->detect); + sprint_symbol(sym_q, (unsigned long)q->detect); + + printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %s depends" \ + " on %s and vice-versa. BREAKING IT.\n", + sym_p, sym_q); + /* Heavy handed way..*/ + x->depend = 0; + } + } + + for (p = start; p < finish; p++) { + q = find_dependents_of(p, finish, p); + if (q && q > p) { + sprint_symbol(sym_p, (unsigned long)p->detect); + sprint_symbol(sym_q, (unsigned long)q->detect); + + printk(KERN_ERR "EXECUTION ORDER INVALID! %s "\ + "should be called before %s!\n", + sym_p, sym_q); + } + } +} +#else +inline void check_iommu_entries(struct iommu_table_entry *start, + struct iommu_table_entry *finish) +{ +} +#endif diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index a5bc528d4328..8f972cbddef0 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -10,7 +10,8 @@ #include <asm/iommu.h> #include <asm/swiotlb.h> #include <asm/dma.h> - +#include <asm/xen/swiotlb-xen.h> +#include <asm/iommu_table.h> int swiotlb __read_mostly; static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, @@ -41,25 +42,42 @@ static struct dma_map_ops swiotlb_dma_ops = { }; /* - * pci_swiotlb_detect - set swiotlb to 1 if necessary + * pci_swiotlb_detect_override - set swiotlb to 1 if necessary * * This returns non-zero if we are forced to use swiotlb (by the boot * option). */ -int __init pci_swiotlb_detect(void) +int __init pci_swiotlb_detect_override(void) { int use_swiotlb = swiotlb | swiotlb_force; + if (swiotlb_force) + swiotlb = 1; + + return use_swiotlb; +} +IOMMU_INIT_FINISH(pci_swiotlb_detect_override, + pci_xen_swiotlb_detect, + pci_swiotlb_init, + pci_swiotlb_late_init); + +/* + * if 4GB or more detected (and iommu=off not set) return 1 + * and set swiotlb to 1. + */ +int __init pci_swiotlb_detect_4gb(void) +{ /* don't initialize swiotlb if iommu=off (no_iommu=1) */ #ifdef CONFIG_X86_64 if (!no_iommu && max_pfn > MAX_DMA32_PFN) swiotlb = 1; #endif - if (swiotlb_force) - swiotlb = 1; - - return use_swiotlb; + return swiotlb; } +IOMMU_INIT(pci_swiotlb_detect_4gb, + pci_swiotlb_detect_override, + pci_swiotlb_init, + pci_swiotlb_late_init); void __init pci_swiotlb_init(void) { @@ -68,3 +86,15 @@ void __init pci_swiotlb_init(void) dma_ops = &swiotlb_dma_ops; } } + +void __init pci_swiotlb_late_init(void) +{ + /* An IOMMU turned us off. */ + if (!swiotlb) + swiotlb_free(); + else { + printk(KERN_INFO "PCI-DMA: " + "Using software bounce buffering for IO (SWIOTLB)\n"); + swiotlb_print_info(); + } +} diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index d0bb52296fa3..38e2b67807e1 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -242,6 +242,12 @@ SECTIONS __x86_cpu_dev_end = .; } + /* + * start address and size of operations which during runtime + * can be patched with virtualization friendly instructions or + * baremetal native ones. Think page table operations. + * Details in paravirt_types.h + */ . = ALIGN(8); .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { __parainstructions = .; @@ -249,6 +255,11 @@ SECTIONS __parainstructions_end = .; } + /* + * struct alt_inst entries. From the header (alternative.h): + * "Alternative instructions for different CPU types or capabilities" + * Think locking instructions on spinlocks. + */ . = ALIGN(8); .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { __alt_instructions = .; @@ -256,11 +267,28 @@ SECTIONS __alt_instructions_end = .; } + /* + * And here are the replacement instructions. The linker sticks + * them as binary blobs. The .altinstructions has enough data to + * get the address and the length of them to patch the kernel safely. + */ .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { *(.altinstr_replacement) } /* + * struct iommu_table_entry entries are injected in this section. + * It is an array of IOMMUs which during run time gets sorted depending + * on its dependency order. After rootfs_initcall is complete + * this section can be safely removed. + */ + .iommu_table : AT(ADDR(.iommu_table) - LOAD_OFFSET) { + __iommu_table = .; + *(.iommu_table) + __iommu_table_end = .; + } + . = ALIGN(8); + /* * .exit.text is discard at runtime, not link time, to deal with * references from .altinstructions and .eh_frame */ |