diff options
Diffstat (limited to 'arch/x86')
69 files changed, 1057 insertions, 246 deletions
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index ad8ec356fb36..0e103236b754 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -14,3 +14,4 @@ obj-y += crypto/ obj-y += vdso/ obj-$(CONFIG_IA32_EMULATION) += ia32/ +obj-y += platform/ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index dfabfefc21c4..e8327686d3c5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1,6 +1,3 @@ -# x86 configuration -mainmenu "Linux Kernel Configuration for x86" - # Select 32 or 64 bit config 64BIT bool "64-bit kernel" if ARCH = "x86" @@ -347,6 +344,7 @@ endif config X86_VSMP bool "ScaleMP vSMP" + select PARAVIRT_GUEST select PARAVIRT depends on X86_64 && PCI depends on X86_EXTENDED_PLATFORM @@ -1895,6 +1893,11 @@ config PCI_OLPC def_bool y depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY) +config PCI_XEN + def_bool y + depends on PCI && XEN + select SWIOTLB_XEN + config PCI_DOMAINS def_bool y depends on PCI diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 92091de11113..55d106b5e31b 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -93,6 +93,9 @@ extern u8 acpi_sci_flags; extern int acpi_sci_override_gsi; void acpi_pic_sci_set_trigger(unsigned int, u16); +extern int (*__acpi_register_gsi)(struct device *dev, u32 gsi, + int trigger, int polarity); + static inline void disable_acpi(void) { acpi_disabled = 1; diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index f0203f4791a8..072273082528 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -41,6 +41,8 @@ #include <asm-generic/int-ll64.h> #include <asm/page.h> +#include <xen/xen.h> + #define build_mmio_read(name, size, type, reg, barrier) \ static inline type name(const volatile void __iomem *addr) \ { type ret; asm volatile("mov" size " %1,%0":reg (ret) \ @@ -351,6 +353,17 @@ extern void early_iounmap(void __iomem *addr, unsigned long size); extern void fixup_early_ioremap(void); extern bool is_early_ioremap_ptep(pte_t *ptep); +#ifdef CONFIG_XEN +struct bio_vec; + +extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, + const struct bio_vec *vec2); + +#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ + (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ + (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) +#endif /* CONFIG_XEN */ + #define IO_SPACE_LIMIT 0xffff #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index c8be4566c3d2..a6b28d017c2f 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -169,6 +169,7 @@ extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); extern void probe_nr_irqs_gsi(void); +extern int get_nr_irqs_gsi(void); extern void setup_ioapic_ids_from_mpc(void); diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 0bf5b0083650..13b0ebaa512f 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -21,10 +21,8 @@ static inline int irq_canonicalize(int irq) #ifdef CONFIG_X86_32 extern void irq_ctx_init(int cpu); -extern void irq_ctx_exit(int cpu); #else # define irq_ctx_init(cpu) do { } while (0) -# define irq_ctx_exit(cpu) do { } while (0) #endif #define __ARCH_HAS_DO_SOFTIRQ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 83c4bb1d917d..3ea3dc487047 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -121,6 +121,7 @@ #define MSR_AMD64_IBSDCLINAD 0xc0011038 #define MSR_AMD64_IBSDCPHYSAD 0xc0011039 #define MSR_AMD64_IBSCTL 0xc001103a +#define MSR_AMD64_IBSBRTARGET 0xc001103b /* Fam 10h MSRs */ #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index d395540ff894..ca0437c714b2 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -7,6 +7,7 @@ #include <linux/string.h> #include <asm/scatterlist.h> #include <asm/io.h> +#include <asm/x86_init.h> #ifdef __KERNEL__ @@ -94,8 +95,36 @@ static inline void early_quirks(void) { } extern void pci_iommu_alloc(void); -/* MSI arch hook */ -#define arch_setup_msi_irqs arch_setup_msi_irqs +#ifdef CONFIG_PCI_MSI +/* MSI arch specific hooks */ +static inline int x86_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + return x86_msi.setup_msi_irqs(dev, nvec, type); +} + +static inline void x86_teardown_msi_irqs(struct pci_dev *dev) +{ + x86_msi.teardown_msi_irqs(dev); +} + +static inline void x86_teardown_msi_irq(unsigned int irq) +{ + x86_msi.teardown_msi_irq(irq); +} +#define arch_setup_msi_irqs x86_setup_msi_irqs +#define arch_teardown_msi_irqs x86_teardown_msi_irqs +#define arch_teardown_msi_irq x86_teardown_msi_irq +/* implemented in arch/x86/kernel/apic/io_apic. */ +int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); +void native_teardown_msi_irq(unsigned int irq); +/* default to the implementation in drivers/lib/msi.c */ +#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS +void default_teardown_msi_irqs(struct pci_dev *dev); +#else +#define native_setup_msi_irqs NULL +#define native_teardown_msi_irq NULL +#define default_teardown_msi_irqs NULL +#endif #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 49c7219826f9..704526734bef 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -47,6 +47,7 @@ enum pci_bf_sort_state { extern unsigned int pcibios_max_latency; void pcibios_resource_survey(void); +void pcibios_set_cache_line_size(void); /* pci-pc.c */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 6e742cc4251b..550e26b1dbb3 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -111,17 +111,18 @@ union cpuid10_edx { #define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) /* IbsFetchCtl bits/masks */ -#define IBS_FETCH_RAND_EN (1ULL<<57) -#define IBS_FETCH_VAL (1ULL<<49) -#define IBS_FETCH_ENABLE (1ULL<<48) -#define IBS_FETCH_CNT 0xFFFF0000ULL -#define IBS_FETCH_MAX_CNT 0x0000FFFFULL +#define IBS_FETCH_RAND_EN (1ULL<<57) +#define IBS_FETCH_VAL (1ULL<<49) +#define IBS_FETCH_ENABLE (1ULL<<48) +#define IBS_FETCH_CNT 0xFFFF0000ULL +#define IBS_FETCH_MAX_CNT 0x0000FFFFULL /* IbsOpCtl bits */ -#define IBS_OP_CNT_CTL (1ULL<<19) -#define IBS_OP_VAL (1ULL<<18) -#define IBS_OP_ENABLE (1ULL<<17) -#define IBS_OP_MAX_CNT 0x0000FFFFULL +#define IBS_OP_CNT_CTL (1ULL<<19) +#define IBS_OP_VAL (1ULL<<18) +#define IBS_OP_ENABLE (1ULL<<17) +#define IBS_OP_MAX_CNT 0x0000FFFFULL +#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ #ifdef CONFIG_PERF_EVENTS extern void init_hw_perf_events(void); diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 4cfc90824068..4c2f63c7fc1b 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -50,7 +50,7 @@ struct smp_ops { void (*smp_prepare_cpus)(unsigned max_cpus); void (*smp_cpus_done)(unsigned max_cpus); - void (*smp_send_stop)(void); + void (*stop_other_cpus)(int wait); void (*smp_send_reschedule)(int cpu); int (*cpu_up)(unsigned cpu); @@ -73,7 +73,12 @@ extern struct smp_ops smp_ops; static inline void smp_send_stop(void) { - smp_ops.smp_send_stop(); + smp_ops.stop_other_cpus(0); +} + +static inline void stop_other_cpus(void) +{ + smp_ops.stop_other_cpus(1); } static inline void smp_prepare_boot_cpu(void) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index baa579c8e038..64642ad019fb 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -154,9 +154,18 @@ struct x86_platform_ops { int (*i8042_detect)(void); }; +struct pci_dev; + +struct x86_msi_ops { + int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); + void (*teardown_msi_irq)(unsigned int irq); + void (*teardown_msi_irqs)(struct pci_dev *dev); +}; + extern struct x86_init_ops x86_init; extern struct x86_cpuinit_ops x86_cpuinit; extern struct x86_platform_ops x86_platform; +extern struct x86_msi_ops x86_msi; extern void x86_init_noop(void); extern void x86_init_uint_noop(unsigned int unused); diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h new file mode 100644 index 000000000000..2329b3eaf8d3 --- /dev/null +++ b/arch/x86/include/asm/xen/pci.h @@ -0,0 +1,65 @@ +#ifndef _ASM_X86_XEN_PCI_H +#define _ASM_X86_XEN_PCI_H + +#if defined(CONFIG_PCI_XEN) +extern int __init pci_xen_init(void); +extern int __init pci_xen_hvm_init(void); +#define pci_xen 1 +#else +#define pci_xen 0 +#define pci_xen_init (0) +static inline int pci_xen_hvm_init(void) +{ + return -1; +} +#endif +#if defined(CONFIG_XEN_DOM0) +void __init xen_setup_pirqs(void); +#else +static inline void __init xen_setup_pirqs(void) +{ +} +#endif + +#if defined(CONFIG_PCI_MSI) +#if defined(CONFIG_PCI_XEN) +/* The drivers/pci/xen-pcifront.c sets this structure to + * its own functions. + */ +struct xen_pci_frontend_ops { + int (*enable_msi)(struct pci_dev *dev, int **vectors); + void (*disable_msi)(struct pci_dev *dev); + int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec); + void (*disable_msix)(struct pci_dev *dev); +}; + +extern struct xen_pci_frontend_ops *xen_pci_frontend; + +static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev, + int **vectors) +{ + if (xen_pci_frontend && xen_pci_frontend->enable_msi) + return xen_pci_frontend->enable_msi(dev, vectors); + return -ENODEV; +} +static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev) +{ + if (xen_pci_frontend && xen_pci_frontend->disable_msi) + xen_pci_frontend->disable_msi(dev); +} +static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev, + int **vectors, int nvec) +{ + if (xen_pci_frontend && xen_pci_frontend->enable_msix) + return xen_pci_frontend->enable_msix(dev, vectors, nvec); + return -ENODEV; +} +static inline void xen_pci_frontend_disable_msix(struct pci_dev *dev) +{ + if (xen_pci_frontend && xen_pci_frontend->disable_msix) + xen_pci_frontend->disable_msix(dev); +} +#endif /* CONFIG_PCI_XEN */ +#endif /* CONFIG_PCI_MSI */ + +#endif /* _ASM_X86_XEN_PCI_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 2c833d8c4141..9e13763b6092 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -36,7 +36,6 @@ obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time.o ioport.o ldt.o dumpstack.o obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o obj-$(CONFIG_IRQ_WORK) += irq_work.o -obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o @@ -58,7 +57,6 @@ obj-$(CONFIG_INTEL_TXT) += tboot.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ obj-y += acpi/ -obj-$(CONFIG_SFI) += sfi.o obj-y += reboot.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o @@ -82,7 +80,6 @@ obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_VM86) += vm86_32.o @@ -104,14 +101,6 @@ obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o -obj-$(CONFIG_SCx200) += scx200.o -scx200-y += scx200_32.o - -obj-$(CONFIG_OLPC) += olpc.o -obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o -obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o -obj-$(CONFIG_X86_MRST) += mrst.o - microcode-y := microcode_core.o microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o @@ -124,7 +113,6 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) - obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o obj-$(CONFIG_AUDIT) += audit_64.o obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c05872aa3ce0..71232b941b6c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -513,35 +513,62 @@ int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) return 0; } -/* - * success: return IRQ number (>=0) - * failure: return < 0 - */ -int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) +static int acpi_register_gsi_pic(struct device *dev, u32 gsi, + int trigger, int polarity) { - unsigned int irq; - unsigned int plat_gsi = gsi; - #ifdef CONFIG_PCI /* * Make sure all (legacy) PCI IRQs are set as level-triggered. */ - if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { - if (trigger == ACPI_LEVEL_SENSITIVE) - eisa_set_level_irq(gsi); - } + if (trigger == ACPI_LEVEL_SENSITIVE) + eisa_set_level_irq(gsi); #endif + return gsi; +} + +static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, + int trigger, int polarity) +{ #ifdef CONFIG_X86_IO_APIC - if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { - plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); - } + gsi = mp_register_gsi(dev, gsi, trigger, polarity); #endif + + return gsi; +} + +int (*__acpi_register_gsi)(struct device *dev, u32 gsi, + int trigger, int polarity) = acpi_register_gsi_pic; + +/* + * success: return IRQ number (>=0) + * failure: return < 0 + */ +int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) +{ + unsigned int irq; + unsigned int plat_gsi = gsi; + + plat_gsi = (*__acpi_register_gsi)(dev, gsi, trigger, polarity); irq = gsi_to_irq(plat_gsi); return irq; } +void __init acpi_set_irq_model_pic(void) +{ + acpi_irq_model = ACPI_IRQ_MODEL_PIC; + __acpi_register_gsi = acpi_register_gsi_pic; + acpi_ioapic = 0; +} + +void __init acpi_set_irq_model_ioapic(void) +{ + acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; + __acpi_register_gsi = acpi_register_gsi_ioapic; + acpi_ioapic = 1; +} + /* * ACPI based hotplug support for CPU */ @@ -1259,8 +1286,7 @@ static void __init acpi_process_madt(void) */ error = acpi_parse_madt_ioapic_entries(); if (!error) { - acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; - acpi_ioapic = 1; + acpi_set_irq_model_ioapic(); smp_found_config = 1; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8ae808d110f4..0929191d83cf 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3331,7 +3331,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) return 0; } -int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { int node, ret, sub_handle, index = 0; unsigned int irq, irq_want; @@ -3389,7 +3389,7 @@ error: return ret; } -void arch_teardown_msi_irq(unsigned int irq) +void native_teardown_msi_irq(unsigned int irq) { destroy_irq(irq); } @@ -3650,6 +3650,11 @@ void __init probe_nr_irqs_gsi(void) printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); } +int get_nr_irqs_gsi(void) +{ + return nr_irqs_gsi; +} + #ifdef CONFIG_SPARSE_IRQ int __init arch_probe_nr_irqs(void) { diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c1e8c7a51164..ed6310183efb 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -237,6 +237,7 @@ struct x86_pmu { * Intel DebugStore bits */ int bts, pebs; + int bts_active, pebs_active; int pebs_record_size; void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; @@ -380,7 +381,7 @@ static void release_pmc_hardware(void) {} #endif -static int reserve_ds_buffers(void); +static void reserve_ds_buffers(void); static void release_ds_buffers(void); static void hw_perf_event_destroy(struct perf_event *event) @@ -477,7 +478,7 @@ static int x86_setup_perfctr(struct perf_event *event) if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && (hwc->sample_period == 1)) { /* BTS is not supported by this architecture. */ - if (!x86_pmu.bts) + if (!x86_pmu.bts_active) return -EOPNOTSUPP; /* BTS is currently only allowed for user-mode. */ @@ -496,12 +497,13 @@ static int x86_pmu_hw_config(struct perf_event *event) int precise = 0; /* Support for constant skid */ - if (x86_pmu.pebs) + if (x86_pmu.pebs_active) { precise++; - /* Support for IP fixup */ - if (x86_pmu.lbr_nr) - precise++; + /* Support for IP fixup */ + if (x86_pmu.lbr_nr) + precise++; + } if (event->attr.precise_ip > precise) return -EOPNOTSUPP; @@ -543,11 +545,8 @@ static int __x86_pmu_event_init(struct perf_event *event) if (atomic_read(&active_events) == 0) { if (!reserve_pmc_hardware()) err = -EBUSY; - else { - err = reserve_ds_buffers(); - if (err) - release_pmc_hardware(); - } + else + reserve_ds_buffers(); } if (!err) atomic_inc(&active_events); diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 4977f9c400e5..b7dcd9f2b8a0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -74,6 +74,107 @@ static void fini_debug_store_on_cpu(int cpu) wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); } +static int alloc_pebs_buffer(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + int node = cpu_to_node(cpu); + int max, thresh = 1; /* always use a single PEBS record */ + void *buffer; + + if (!x86_pmu.pebs) + return 0; + + buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node); + if (unlikely(!buffer)) + return -ENOMEM; + + max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; + + ds->pebs_buffer_base = (u64)(unsigned long)buffer; + ds->pebs_index = ds->pebs_buffer_base; + ds->pebs_absolute_maximum = ds->pebs_buffer_base + + max * x86_pmu.pebs_record_size; + + ds->pebs_interrupt_threshold = ds->pebs_buffer_base + + thresh * x86_pmu.pebs_record_size; + + return 0; +} + +static void release_pebs_buffer(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + + if (!ds || !x86_pmu.pebs) + return; + + kfree((void *)(unsigned long)ds->pebs_buffer_base); + ds->pebs_buffer_base = 0; +} + +static int alloc_bts_buffer(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + int node = cpu_to_node(cpu); + int max, thresh; + void *buffer; + + if (!x86_pmu.bts) + return 0; + + buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node); + if (unlikely(!buffer)) + return -ENOMEM; + + max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; + thresh = max / 16; + + ds->bts_buffer_base = (u64)(unsigned long)buffer; + ds->bts_index = ds->bts_buffer_base; + ds->bts_absolute_maximum = ds->bts_buffer_base + + max * BTS_RECORD_SIZE; + ds->bts_interrupt_threshold = ds->bts_absolute_maximum - + thresh * BTS_RECORD_SIZE; + + return 0; +} + +static void release_bts_buffer(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + + if (!ds || !x86_pmu.bts) + return; + + kfree((void *)(unsigned long)ds->bts_buffer_base); + ds->bts_buffer_base = 0; +} + +static int alloc_ds_buffer(int cpu) +{ + int node = cpu_to_node(cpu); + struct debug_store *ds; + + ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node); + if (unlikely(!ds)) + return -ENOMEM; + + per_cpu(cpu_hw_events, cpu).ds = ds; + + return 0; +} + +static void release_ds_buffer(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + + if (!ds) + return; + + per_cpu(cpu_hw_events, cpu).ds = NULL; + kfree(ds); +} + static void release_ds_buffers(void) { int cpu; @@ -82,93 +183,77 @@ static void release_ds_buffers(void) return; get_online_cpus(); - for_each_online_cpu(cpu) fini_debug_store_on_cpu(cpu); for_each_possible_cpu(cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - - if (!ds) - continue; - - per_cpu(cpu_hw_events, cpu).ds = NULL; - - kfree((void *)(unsigned long)ds->pebs_buffer_base); - kfree((void *)(unsigned long)ds->bts_buffer_base); - kfree(ds); + release_pebs_buffer(cpu); + release_bts_buffer(cpu); + release_ds_buffer(cpu); } - put_online_cpus(); } -static int reserve_ds_buffers(void) +static void reserve_ds_buffers(void) { - int cpu, err = 0; + int bts_err = 0, pebs_err = 0; + int cpu; + + x86_pmu.bts_active = 0; + x86_pmu.pebs_active = 0; if (!x86_pmu.bts && !x86_pmu.pebs) - return 0; + return; + + if (!x86_pmu.bts) + bts_err = 1; + + if (!x86_pmu.pebs) + pebs_err = 1; get_online_cpus(); for_each_possible_cpu(cpu) { - struct debug_store *ds; - void *buffer; - int max, thresh; + if (alloc_ds_buffer(cpu)) { + bts_err = 1; + pebs_err = 1; + } + + if (!bts_err && alloc_bts_buffer(cpu)) + bts_err = 1; - err = -ENOMEM; - ds = kzalloc(sizeof(*ds), GFP_KERNEL); - if (unlikely(!ds)) + if (!pebs_err && alloc_pebs_buffer(cpu)) + pebs_err = 1; + + if (bts_err && pebs_err) break; - per_cpu(cpu_hw_events, cpu).ds = ds; - - if (x86_pmu.bts) { - buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); - if (unlikely(!buffer)) - break; - - max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; - thresh = max / 16; - - ds->bts_buffer_base = (u64)(unsigned long)buffer; - ds->bts_index = ds->bts_buffer_base; - ds->bts_absolute_maximum = ds->bts_buffer_base + - max * BTS_RECORD_SIZE; - ds->bts_interrupt_threshold = ds->bts_absolute_maximum - - thresh * BTS_RECORD_SIZE; - } + } - if (x86_pmu.pebs) { - buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); - if (unlikely(!buffer)) - break; - - max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; - - ds->pebs_buffer_base = (u64)(unsigned long)buffer; - ds->pebs_index = ds->pebs_buffer_base; - ds->pebs_absolute_maximum = ds->pebs_buffer_base + - max * x86_pmu.pebs_record_size; - /* - * Always use single record PEBS - */ - ds->pebs_interrupt_threshold = ds->pebs_buffer_base + - x86_pmu.pebs_record_size; - } + if (bts_err) { + for_each_possible_cpu(cpu) + release_bts_buffer(cpu); + } - err = 0; + if (pebs_err) { + for_each_possible_cpu(cpu) + release_pebs_buffer(cpu); } - if (err) - release_ds_buffers(); - else { + if (bts_err && pebs_err) { + for_each_possible_cpu(cpu) + release_ds_buffer(cpu); + } else { + if (x86_pmu.bts && !bts_err) + x86_pmu.bts_active = 1; + + if (x86_pmu.pebs && !pebs_err) + x86_pmu.pebs_active = 1; + for_each_online_cpu(cpu) init_debug_store_on_cpu(cpu); } put_online_cpus(); - - return err; } /* @@ -233,7 +318,7 @@ static int intel_pmu_drain_bts_buffer(void) if (!event) return 0; - if (!ds) + if (!x86_pmu.bts_active) return 0; at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; @@ -503,7 +588,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) struct pebs_record_core *at, *top; int n; - if (!ds || !x86_pmu.pebs) + if (!x86_pmu.pebs_active) return; at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; @@ -545,7 +630,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) u64 status = 0; int bit, n; - if (!ds || !x86_pmu.pebs) + if (!x86_pmu.pebs_active) return; at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; @@ -630,9 +715,8 @@ static void intel_ds_init(void) #else /* CONFIG_CPU_SUP_INTEL */ -static int reserve_ds_buffers(void) +static void reserve_ds_buffers(void) { - return 0; } static void release_ds_buffers(void) diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 0f6376ffa2d9..1bc7f75a5bda 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -82,11 +82,11 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, if (kstack_end(stack)) break; if (i && ((i % STACKSLOTS_PER_LINE) == 0)) - printk("\n%s", log_lvl); - printk(" %08lx", *stack++); + printk(KERN_CONT "\n"); + printk(KERN_CONT " %08lx", *stack++); touch_nmi_watchdog(); } - printk("\n"); + printk(KERN_CONT "\n"); show_trace_log_lvl(task, regs, sp, bp, log_lvl); } diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 57a21f11c791..6a340485249a 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -265,20 +265,20 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, if (stack >= irq_stack && stack <= irq_stack_end) { if (stack == irq_stack_end) { stack = (unsigned long *) (irq_stack_end[-1]); - printk(" <EOI> "); + printk(KERN_CONT " <EOI> "); } } else { if (((long) stack & (THREAD_SIZE-1)) == 0) break; } if (i && ((i % STACKSLOTS_PER_LINE) == 0)) - printk("\n%s", log_lvl); - printk(" %016lx", *stack++); + printk(KERN_CONT "\n"); + printk(KERN_CONT " %016lx", *stack++); touch_nmi_watchdog(); } preempt_enable(); - printk("\n"); + printk(KERN_CONT "\n"); show_trace_log_lvl(task, regs, sp, bp, log_lvl); } diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 50fbbe60e507..64668dbf00a4 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -60,9 +60,6 @@ union irq_ctx { static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx); -static DEFINE_PER_CPU_MULTIPAGE_ALIGNED(union irq_ctx, hardirq_stack, THREAD_SIZE); -static DEFINE_PER_CPU_MULTIPAGE_ALIGNED(union irq_ctx, softirq_stack, THREAD_SIZE); - static void call_on_stack(void *func, void *stack) { asm volatile("xchgl %%ebx,%%esp \n" @@ -128,7 +125,7 @@ void __cpuinit irq_ctx_init(int cpu) if (per_cpu(hardirq_ctx, cpu)) return; - irqctx = &per_cpu(hardirq_stack, cpu); + irqctx = (union irq_ctx *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER); irqctx->tinfo.task = NULL; irqctx->tinfo.exec_domain = NULL; irqctx->tinfo.cpu = cpu; @@ -137,7 +134,7 @@ void __cpuinit irq_ctx_init(int cpu) per_cpu(hardirq_ctx, cpu) = irqctx; - irqctx = &per_cpu(softirq_stack, cpu); + irqctx = (union irq_ctx *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER); irqctx->tinfo.task = NULL; irqctx->tinfo.exec_domain = NULL; irqctx->tinfo.cpu = cpu; @@ -150,11 +147,6 @@ void __cpuinit irq_ctx_init(int cpu) cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); } -void irq_ctx_exit(int cpu) -{ - per_cpu(hardirq_ctx, cpu) = NULL; -} - asmlinkage void do_softirq(void) { unsigned long flags; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 70c4872cd8aa..45892dc4b72a 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -801,7 +801,8 @@ void ptrace_disable(struct task_struct *child) static const struct user_regset_view user_x86_32_view; /* Initialized below. */ #endif -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; unsigned long __user *datap = (unsigned long __user *)data; @@ -812,8 +813,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) unsigned long tmp; ret = -EIO; - if ((addr & (sizeof(data) - 1)) || addr < 0 || - addr >= sizeof(struct user)) + if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user)) break; tmp = 0; /* Default return condition */ @@ -830,8 +830,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ ret = -EIO; - if ((addr & (sizeof(data) - 1)) || addr < 0 || - addr >= sizeof(struct user)) + if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user)) break; if (addr < sizeof(struct user_regs_struct)) @@ -888,17 +887,17 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION case PTRACE_GET_THREAD_AREA: - if (addr < 0) + if ((int) addr < 0) return -EIO; ret = do_get_thread_area(child, addr, - (struct user_desc __user *) data); + (struct user_desc __user *)data); break; case PTRACE_SET_THREAD_AREA: - if (addr < 0) + if ((int) addr < 0) return -EIO; ret = do_set_thread_area(child, addr, - (struct user_desc __user *) data, 0); + (struct user_desc __user *)data, 0); break; #endif diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f7f53dcd3e0a..c495aa8d4815 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -635,7 +635,7 @@ void native_machine_shutdown(void) /* O.K Now that I'm on the appropriate processor, * stop all of the others. */ - smp_send_stop(); + stop_other_cpus(); #endif lapic_shutdown(); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 95a32746fbf9..21c6746338af 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -769,6 +769,8 @@ void __init setup_arch(char **cmdline_p) x86_init.oem.arch_setup(); + resource_alloc_from_bottom = 0; + iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; setup_memory_map(); parse_setup_data(); /* update the e820_saved too */ diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index d801210945d6..513deac7228d 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -159,10 +159,10 @@ asmlinkage void smp_reboot_interrupt(void) irq_exit(); } -static void native_smp_send_stop(void) +static void native_stop_other_cpus(int wait) { unsigned long flags; - unsigned long wait; + unsigned long timeout; if (reboot_force) return; @@ -179,9 +179,12 @@ static void native_smp_send_stop(void) if (num_online_cpus() > 1) { apic->send_IPI_allbutself(REBOOT_VECTOR); - /* Don't wait longer than a second */ - wait = USEC_PER_SEC; - while (num_online_cpus() > 1 && wait--) + /* + * Don't wait longer than a second if the caller + * didn't ask us to wait. + */ + timeout = USEC_PER_SEC; + while (num_online_cpus() > 1 && (wait || timeout--)) udelay(1); } @@ -227,7 +230,7 @@ struct smp_ops smp_ops = { .smp_prepare_cpus = native_smp_prepare_cpus, .smp_cpus_done = native_smp_cpus_done, - .smp_send_stop = native_smp_send_stop, + .stop_other_cpus = native_stop_other_cpus, .smp_send_reschedule = native_smp_send_reschedule, .cpu_up = native_cpu_up, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6c7faecd9e4a..083e99d1b7df 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1373,7 +1373,6 @@ void play_dead_common(void) { idle_task_exit(); reset_lazy_tlbstate(); - irq_ctx_exit(raw_smp_processor_id()); c1e_remove_cpu(raw_smp_processor_id()); mb(); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index cd6da6bf3eca..ceb2911aa439 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -6,10 +6,12 @@ #include <linux/init.h> #include <linux/ioport.h> #include <linux/module.h> +#include <linux/pci.h> #include <asm/bios_ebda.h> #include <asm/paravirt.h> #include <asm/pci_x86.h> +#include <asm/pci.h> #include <asm/mpspec.h> #include <asm/setup.h> #include <asm/apic.h> @@ -99,3 +101,8 @@ struct x86_platform_ops x86_platform = { }; EXPORT_SYMBOL_GPL(x86_platform); +struct x86_msi_ops x86_msi = { + .setup_msi_irqs = native_setup_msi_irqs, + .teardown_msi_irq = native_teardown_msi_irq, + .teardown_msi_irqs = default_teardown_msi_irqs, +}; diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index d723e369003c..b49962662101 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -74,7 +74,7 @@ void __kunmap_atomic(void *kvaddr) vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) { int idx, type; - type = kmap_atomic_idx_pop(); + type = kmap_atomic_idx(); idx = type + KM_TYPE_NR * smp_processor_id(); #ifdef CONFIG_DEBUG_HIGHMEM @@ -87,6 +87,7 @@ void __kunmap_atomic(void *kvaddr) * attributes or becomes a protected page in a hypervisor. */ kpte_clear_flush(kmap_pte-idx, vaddr); + kmap_atomic_idx_pop(); } #ifdef CONFIG_DEBUG_HIGHMEM else { diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 84346200e783..71a59296af80 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -51,7 +51,6 @@ #include <asm/numa.h> #include <asm/cacheflush.h> #include <asm/init.h> -#include <linux/bootmem.h> static int __init parse_direct_gbpages_off(char *arg) { diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 75a3d7f24a2c..7b179b499fa3 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -98,7 +98,7 @@ iounmap_atomic(void __iomem *kvaddr) vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) { int idx, type; - type = kmap_atomic_idx_pop(); + type = kmap_atomic_idx(); idx = type + KM_TYPE_NR * smp_processor_id(); #ifdef CONFIG_DEBUG_HIGHMEM @@ -111,6 +111,7 @@ iounmap_atomic(void __iomem *kvaddr) * attributes or becomes a protected page in a hypervisor. */ kpte_clear_flush(kmap_pte-idx, vaddr); + kmap_atomic_idx_pop(); } pagefault_enable(); diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index bd1489c3ce09..4e8baad36d37 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -726,6 +726,12 @@ int __init op_nmi_init(struct oprofile_operations *ops) case 0x11: cpu_type = "x86-64/family11h"; break; + case 0x12: + cpu_type = "x86-64/family12h"; + break; + case 0x14: + cpu_type = "x86-64/family14h"; + break; default: return -ENODEV; } diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 42fb46f83883..a011bcc0f943 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -48,17 +48,24 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS]; static u32 ibs_caps; -struct op_ibs_config { +struct ibs_config { unsigned long op_enabled; unsigned long fetch_enabled; unsigned long max_cnt_fetch; unsigned long max_cnt_op; unsigned long rand_en; unsigned long dispatched_ops; + unsigned long branch_target; }; -static struct op_ibs_config ibs_config; -static u64 ibs_op_ctl; +struct ibs_state { + u64 ibs_op_ctl; + int branch_target; + unsigned long sample_size; +}; + +static struct ibs_config ibs_config; +static struct ibs_state ibs_state; /* * IBS cpuid feature detection @@ -71,8 +78,16 @@ static u64 ibs_op_ctl; * bit 0 is used to indicate the existence of IBS. */ #define IBS_CAPS_AVAIL (1U<<0) +#define IBS_CAPS_FETCHSAM (1U<<1) +#define IBS_CAPS_OPSAM (1U<<2) #define IBS_CAPS_RDWROPCNT (1U<<3) #define IBS_CAPS_OPCNT (1U<<4) +#define IBS_CAPS_BRNTRGT (1U<<5) +#define IBS_CAPS_OPCNTEXT (1U<<6) + +#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ + | IBS_CAPS_FETCHSAM \ + | IBS_CAPS_OPSAM) /* * IBS APIC setup @@ -99,12 +114,12 @@ static u32 get_ibs_caps(void) /* check IBS cpuid feature flags */ max_level = cpuid_eax(0x80000000); if (max_level < IBS_CPUID_FEATURES) - return IBS_CAPS_AVAIL; + return IBS_CAPS_DEFAULT; ibs_caps = cpuid_eax(IBS_CPUID_FEATURES); if (!(ibs_caps & IBS_CAPS_AVAIL)) /* cpuid flags not valid */ - return IBS_CAPS_AVAIL; + return IBS_CAPS_DEFAULT; return ibs_caps; } @@ -197,8 +212,8 @@ op_amd_handle_ibs(struct pt_regs * const regs, rdmsrl(MSR_AMD64_IBSOPCTL, ctl); if (ctl & IBS_OP_VAL) { rdmsrl(MSR_AMD64_IBSOPRIP, val); - oprofile_write_reserve(&entry, regs, val, - IBS_OP_CODE, IBS_OP_SIZE); + oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE, + ibs_state.sample_size); oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSOPDATA, val); oprofile_add_data64(&entry, val); @@ -210,10 +225,14 @@ op_amd_handle_ibs(struct pt_regs * const regs, oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSDCPHYSAD, val); oprofile_add_data64(&entry, val); + if (ibs_state.branch_target) { + rdmsrl(MSR_AMD64_IBSBRTARGET, val); + oprofile_add_data(&entry, (unsigned long)val); + } oprofile_write_commit(&entry); /* reenable the IRQ */ - ctl = op_amd_randomize_ibs_op(ibs_op_ctl); + ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl); wrmsrl(MSR_AMD64_IBSOPCTL, ctl); } } @@ -226,21 +245,32 @@ static inline void op_amd_start_ibs(void) if (!ibs_caps) return; + memset(&ibs_state, 0, sizeof(ibs_state)); + + /* + * Note: Since the max count settings may out of range we + * write back the actual used values so that userland can read + * it. + */ + if (ibs_config.fetch_enabled) { - val = (ibs_config.max_cnt_fetch >> 4) & IBS_FETCH_MAX_CNT; + val = ibs_config.max_cnt_fetch >> 4; + val = min(val, IBS_FETCH_MAX_CNT); + ibs_config.max_cnt_fetch = val << 4; val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; val |= IBS_FETCH_ENABLE; wrmsrl(MSR_AMD64_IBSFETCHCTL, val); } if (ibs_config.op_enabled) { - ibs_op_ctl = ibs_config.max_cnt_op >> 4; + val = ibs_config.max_cnt_op >> 4; if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) { /* * IbsOpCurCnt not supported. See * op_amd_randomize_ibs_op() for details. */ - ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL); + val = clamp(val, 0x0081ULL, 0xFF80ULL); + ibs_config.max_cnt_op = val << 4; } else { /* * The start value is randomized with a @@ -248,13 +278,24 @@ static inline void op_amd_start_ibs(void) * with the half of the randomized range. Also * avoid underflows. */ - ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET, - IBS_OP_MAX_CNT); + val += IBS_RANDOM_MAXCNT_OFFSET; + if (ibs_caps & IBS_CAPS_OPCNTEXT) + val = min(val, IBS_OP_MAX_CNT_EXT); + else + val = min(val, IBS_OP_MAX_CNT); + ibs_config.max_cnt_op = + (val - IBS_RANDOM_MAXCNT_OFFSET) << 4; + } + val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT); + val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; + val |= IBS_OP_ENABLE; + ibs_state.ibs_op_ctl = val; + ibs_state.sample_size = IBS_OP_SIZE; + if (ibs_config.branch_target) { + ibs_state.branch_target = 1; + ibs_state.sample_size++; } - if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops) - ibs_op_ctl |= IBS_OP_CNT_CTL; - ibs_op_ctl |= IBS_OP_ENABLE; - val = op_amd_randomize_ibs_op(ibs_op_ctl); + val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl); wrmsrl(MSR_AMD64_IBSOPCTL, val); } } @@ -281,29 +322,25 @@ static inline int eilvt_is_available(int offset) static inline int ibs_eilvt_valid(void) { - u64 val; int offset; + u64 val; rdmsrl(MSR_AMD64_IBSCTL, val); + offset = val & IBSCTL_LVT_OFFSET_MASK; + if (!(val & IBSCTL_LVT_OFFSET_VALID)) { - pr_err(FW_BUG "cpu %d, invalid IBS " - "interrupt offset %d (MSR%08X=0x%016llx)", - smp_processor_id(), offset, - MSR_AMD64_IBSCTL, val); + pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", + smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); return 0; } - offset = val & IBSCTL_LVT_OFFSET_MASK; - - if (eilvt_is_available(offset)) - return !0; - - pr_err(FW_BUG "cpu %d, IBS interrupt offset %d " - "not available (MSR%08X=0x%016llx)", - smp_processor_id(), offset, - MSR_AMD64_IBSCTL, val); + if (!eilvt_is_available(offset)) { + pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", + smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); + return 0; + } - return 0; + return 1; } static inline int get_ibs_offset(void) @@ -630,28 +667,33 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) /* model specific files */ /* setup some reasonable defaults */ + memset(&ibs_config, 0, sizeof(ibs_config)); ibs_config.max_cnt_fetch = 250000; - ibs_config.fetch_enabled = 0; ibs_config.max_cnt_op = 250000; - ibs_config.op_enabled = 0; - ibs_config.dispatched_ops = 0; - - dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); - oprofilefs_create_ulong(sb, dir, "enable", - &ibs_config.fetch_enabled); - oprofilefs_create_ulong(sb, dir, "max_count", - &ibs_config.max_cnt_fetch); - oprofilefs_create_ulong(sb, dir, "rand_enable", - &ibs_config.rand_en); - - dir = oprofilefs_mkdir(sb, root, "ibs_op"); - oprofilefs_create_ulong(sb, dir, "enable", - &ibs_config.op_enabled); - oprofilefs_create_ulong(sb, dir, "max_count", - &ibs_config.max_cnt_op); - if (ibs_caps & IBS_CAPS_OPCNT) - oprofilefs_create_ulong(sb, dir, "dispatched_ops", - &ibs_config.dispatched_ops); + + if (ibs_caps & IBS_CAPS_FETCHSAM) { + dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); + oprofilefs_create_ulong(sb, dir, "enable", + &ibs_config.fetch_enabled); + oprofilefs_create_ulong(sb, dir, "max_count", + &ibs_config.max_cnt_fetch); + oprofilefs_create_ulong(sb, dir, "rand_enable", + &ibs_config.rand_en); + } + + if (ibs_caps & IBS_CAPS_OPSAM) { + dir = oprofilefs_mkdir(sb, root, "ibs_op"); + oprofilefs_create_ulong(sb, dir, "enable", + &ibs_config.op_enabled); + oprofilefs_create_ulong(sb, dir, "max_count", + &ibs_config.max_cnt_op); + if (ibs_caps & IBS_CAPS_OPCNT) + oprofilefs_create_ulong(sb, dir, "dispatched_ops", + &ibs_config.dispatched_ops); + if (ibs_caps & IBS_CAPS_BRNTRGT) + oprofilefs_create_ulong(sb, dir, "branch_target", + &ibs_config.branch_target); + } return 0; } diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index a0207a7fdf39..effd96e33f16 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_PCI_BIOS) += pcbios.o obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o obj-$(CONFIG_PCI_DIRECT) += direct.o obj-$(CONFIG_PCI_OLPC) += olpc.o +obj-$(CONFIG_PCI_XEN) += xen.o obj-y += fixup.o obj-$(CONFIG_ACPI) += acpi.o diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index a0772af64efb..f7c8a399978c 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -421,16 +421,10 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) return bus; } - -int __init pcibios_init(void) +void __init pcibios_set_cache_line_size(void) { struct cpuinfo_x86 *c = &boot_cpu_data; - if (!raw_pci_ops) { - printk(KERN_WARNING "PCI: System does not support PCI\n"); - return 0; - } - /* * Set PCI cacheline size to that of the CPU if the CPU has reported it. * (For older CPUs that don't support cpuid, we se it to 32 bytes @@ -445,7 +439,16 @@ int __init pcibios_init(void) pci_dfl_cache_line_size = 32 >> 2; printk(KERN_DEBUG "PCI: Unknown cacheline size. Setting to 32 bytes\n"); } +} + +int __init pcibios_init(void) +{ + if (!raw_pci_ops) { + printk(KERN_WARNING "PCI: System does not support PCI\n"); + return 0; + } + pcibios_set_cache_line_size(); pcibios_resource_survey(); if (pci_bf_sort >= pci_force_bf) diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 55253095be84..c4bb261c106e 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -65,16 +65,21 @@ pcibios_align_resource(void *data, const struct resource *res, resource_size_t size, resource_size_t align) { struct pci_dev *dev = data; - resource_size_t start = res->start; + resource_size_t start = round_down(res->end - size + 1, align); if (res->flags & IORESOURCE_IO) { - if (skip_isa_ioresource_align(dev)) - return start; - if (start & 0x300) - start = (start + 0x3ff) & ~0x3ff; + + /* + * If we're avoiding ISA aliases, the largest contiguous I/O + * port space is 256 bytes. Clearing bits 9 and 10 preserves + * all 256-byte and smaller alignments, so the result will + * still be correctly aligned. + */ + if (!skip_isa_ioresource_align(dev)) + start &= ~0x300; } else if (res->flags & IORESOURCE_MEM) { if (start < BIOS_END) - start = BIOS_END; + start = res->end; /* fail; no space */ } return start; } @@ -311,6 +316,8 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, */ prot |= _PAGE_CACHE_UC_MINUS; + prot |= _PAGE_IOMAP; /* creating a mapping for IO */ + vma->vm_page_prot = __pgprot(prot); if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index f547ee05f715..9f9bfb705cf9 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -584,27 +584,28 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route case PCI_DEVICE_ID_INTEL_ICH9_3: case PCI_DEVICE_ID_INTEL_ICH9_4: case PCI_DEVICE_ID_INTEL_ICH9_5: - case PCI_DEVICE_ID_INTEL_TOLAPAI_0: + case PCI_DEVICE_ID_INTEL_EP80579_0: case PCI_DEVICE_ID_INTEL_ICH10_0: case PCI_DEVICE_ID_INTEL_ICH10_1: case PCI_DEVICE_ID_INTEL_ICH10_2: case PCI_DEVICE_ID_INTEL_ICH10_3: + case PCI_DEVICE_ID_INTEL_PATSBURG_LPC: r->name = "PIIX/ICH"; r->get = pirq_piix_get; r->set = pirq_piix_set; return 1; } - if ((device >= PCI_DEVICE_ID_INTEL_PCH_LPC_MIN) && - (device <= PCI_DEVICE_ID_INTEL_PCH_LPC_MAX)) { + if ((device >= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN) && + (device <= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX)) { r->name = "PIIX/ICH"; r->get = pirq_piix_get; r->set = pirq_piix_set; return 1; } - if ((device >= PCI_DEVICE_ID_INTEL_CPT_LPC_MIN) && - (device <= PCI_DEVICE_ID_INTEL_CPT_LPC_MAX)) { + if ((device >= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN) && + (device <= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX)) { r->name = "PIIX/ICH"; r->get = pirq_piix_get; r->set = pirq_piix_set; diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index a918553ebc75..e282886616a0 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -65,7 +65,6 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, int end, u64 addr) { struct pci_mmcfg_region *new; - int num_buses; struct resource *res; if (addr == 0) @@ -82,10 +81,9 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, list_add_sorted(new); - num_buses = end - start + 1; res = &new->res; res->start = addr + PCI_MMCFG_BUS_OFFSET(start); - res->end = addr + PCI_MMCFG_BUS_OFFSET(num_buses) - 1; + res->end = addr + PCI_MMCFG_BUS_OFFSET(end + 1) - 1; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; snprintf(new->name, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end); diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c new file mode 100644 index 000000000000..117f5b8daf75 --- /dev/null +++ b/arch/x86/pci/xen.c @@ -0,0 +1,414 @@ +/* + * Xen PCI Frontend Stub - puts some "dummy" functions in to the Linux + * x86 PCI core to support the Xen PCI Frontend + * + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/acpi.h> + +#include <linux/io.h> +#include <asm/io_apic.h> +#include <asm/pci_x86.h> + +#include <asm/xen/hypervisor.h> + +#include <xen/features.h> +#include <xen/events.h> +#include <asm/xen/pci.h> + +#ifdef CONFIG_ACPI +static int xen_hvm_register_pirq(u32 gsi, int triggering) +{ + int rc, irq; + struct physdev_map_pirq map_irq; + int shareable = 0; + char *name; + + if (!xen_hvm_domain()) + return -1; + + map_irq.domid = DOMID_SELF; + map_irq.type = MAP_PIRQ_TYPE_GSI; + map_irq.index = gsi; + map_irq.pirq = -1; + + rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); + if (rc) { + printk(KERN_WARNING "xen map irq failed %d\n", rc); + return -1; + } + + if (triggering == ACPI_EDGE_SENSITIVE) { + shareable = 0; + name = "ioapic-edge"; + } else { + shareable = 1; + name = "ioapic-level"; + } + + irq = xen_map_pirq_gsi(map_irq.pirq, gsi, shareable, name); + + printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); + + return irq; +} + +static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, + int trigger, int polarity) +{ + return xen_hvm_register_pirq(gsi, trigger); +} +#endif + +#if defined(CONFIG_PCI_MSI) +#include <linux/msi.h> +#include <asm/msidef.h> + +struct xen_pci_frontend_ops *xen_pci_frontend; +EXPORT_SYMBOL_GPL(xen_pci_frontend); + +static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, + struct msi_msg *msg) +{ + /* We set vector == 0 to tell the hypervisor we don't care about it, + * but we want a pirq setup instead. + * We use the dest_id field to pass the pirq that we want. */ + msg->address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(pirq); + msg->address_lo = + MSI_ADDR_BASE_LO | + MSI_ADDR_DEST_MODE_PHYSICAL | + MSI_ADDR_REDIRECTION_CPU | + MSI_ADDR_DEST_ID(pirq); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + /* delivery mode reserved */ + (3 << 8) | + MSI_DATA_VECTOR(0); +} + +static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int irq, pirq, ret = 0; + struct msi_desc *msidesc; + struct msi_msg msg; + + list_for_each_entry(msidesc, &dev->msi_list, list) { + xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? + "msi-x" : "msi", &irq, &pirq); + if (irq < 0 || pirq < 0) + goto error; + printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq); + xen_msi_compose_msg(dev, pirq, &msg); + ret = set_irq_msi(irq, msidesc); + if (ret < 0) + goto error_while; + write_msi_msg(irq, &msg); + } + return 0; + +error_while: + unbind_from_irqhandler(irq, NULL); +error: + if (ret == -ENODEV) + dev_err(&dev->dev, "Xen PCI frontend has not registered" \ + " MSI/MSI-X support!\n"); + + return ret; +} + +/* + * For MSI interrupts we have to use drivers/xen/event.s functions to + * allocate an irq_desc and setup the right */ + + +static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int irq, ret, i; + struct msi_desc *msidesc; + int *v; + + v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL); + if (!v) + return -ENOMEM; + + if (type == PCI_CAP_ID_MSIX) + ret = xen_pci_frontend_enable_msix(dev, &v, nvec); + else + ret = xen_pci_frontend_enable_msi(dev, &v); + if (ret) + goto error; + i = 0; + list_for_each_entry(msidesc, &dev->msi_list, list) { + irq = xen_allocate_pirq(v[i], 0, /* not sharable */ + (type == PCI_CAP_ID_MSIX) ? + "pcifront-msi-x" : "pcifront-msi"); + if (irq < 0) + return -1; + + ret = set_irq_msi(irq, msidesc); + if (ret) + goto error_while; + i++; + } + kfree(v); + return 0; + +error_while: + unbind_from_irqhandler(irq, NULL); +error: + if (ret == -ENODEV) + dev_err(&dev->dev, "Xen PCI frontend has not registered" \ + " MSI/MSI-X support!\n"); + + kfree(v); + return ret; +} + +static void xen_teardown_msi_irqs(struct pci_dev *dev) +{ + struct msi_desc *msidesc; + + msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); + if (msidesc->msi_attrib.is_msix) + xen_pci_frontend_disable_msix(dev); + else + xen_pci_frontend_disable_msi(dev); +} + +static void xen_teardown_msi_irq(unsigned int irq) +{ + xen_destroy_irq(irq); +} + +static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int irq, ret; + struct msi_desc *msidesc; + + list_for_each_entry(msidesc, &dev->msi_list, list) { + irq = xen_create_msi_irq(dev, msidesc, type); + if (irq < 0) + return -1; + + ret = set_irq_msi(irq, msidesc); + if (ret) + goto error; + } + return 0; + +error: + xen_destroy_irq(irq); + return ret; +} +#endif + +static int xen_pcifront_enable_irq(struct pci_dev *dev) +{ + int rc; + int share = 1; + + dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq); + + if (dev->irq < 0) + return -EINVAL; + + if (dev->irq < NR_IRQS_LEGACY) + share = 0; + + rc = xen_allocate_pirq(dev->irq, share, "pcifront"); + if (rc < 0) { + dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n", + dev->irq, rc); + return rc; + } + return 0; +} + +int __init pci_xen_init(void) +{ + if (!xen_pv_domain() || xen_initial_domain()) + return -ENODEV; + + printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n"); + + pcibios_set_cache_line_size(); + + pcibios_enable_irq = xen_pcifront_enable_irq; + pcibios_disable_irq = NULL; + +#ifdef CONFIG_ACPI + /* Keep ACPI out of the picture */ + acpi_noirq = 1; +#endif + +#ifdef CONFIG_PCI_MSI + x86_msi.setup_msi_irqs = xen_setup_msi_irqs; + x86_msi.teardown_msi_irq = xen_teardown_msi_irq; + x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs; +#endif + return 0; +} + +int __init pci_xen_hvm_init(void) +{ + if (!xen_feature(XENFEAT_hvm_pirqs)) + return 0; + +#ifdef CONFIG_ACPI + /* + * We don't want to change the actual ACPI delivery model, + * just how GSIs get registered. + */ + __acpi_register_gsi = acpi_register_gsi_xen_hvm; +#endif + +#ifdef CONFIG_PCI_MSI + x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs; + x86_msi.teardown_msi_irq = xen_teardown_msi_irq; +#endif + return 0; +} + +#ifdef CONFIG_XEN_DOM0 +static int xen_register_pirq(u32 gsi, int triggering) +{ + int rc, irq; + struct physdev_map_pirq map_irq; + int shareable = 0; + char *name; + + if (!xen_pv_domain()) + return -1; + + if (triggering == ACPI_EDGE_SENSITIVE) { + shareable = 0; + name = "ioapic-edge"; + } else { + shareable = 1; + name = "ioapic-level"; + } + + irq = xen_allocate_pirq(gsi, shareable, name); + + printk(KERN_DEBUG "xen: --> irq=%d\n", irq); + + if (irq < 0) + goto out; + + map_irq.domid = DOMID_SELF; + map_irq.type = MAP_PIRQ_TYPE_GSI; + map_irq.index = gsi; + map_irq.pirq = irq; + + rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); + if (rc) { + printk(KERN_WARNING "xen map irq failed %d\n", rc); + return -1; + } + +out: + return irq; +} + +static int xen_register_gsi(u32 gsi, int triggering, int polarity) +{ + int rc, irq; + struct physdev_setup_gsi setup_gsi; + + if (!xen_pv_domain()) + return -1; + + printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n", + gsi, triggering, polarity); + + irq = xen_register_pirq(gsi, triggering); + + setup_gsi.gsi = gsi; + setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1); + setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1); + + rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi); + if (rc == -EEXIST) + printk(KERN_INFO "Already setup the GSI :%d\n", gsi); + else if (rc) { + printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n", + gsi, rc); + } + + return irq; +} + +static __init void xen_setup_acpi_sci(void) +{ + int rc; + int trigger, polarity; + int gsi = acpi_sci_override_gsi; + + if (!gsi) + return; + + rc = acpi_get_override_irq(gsi, &trigger, &polarity); + if (rc) { + printk(KERN_WARNING "xen: acpi_get_override_irq failed for acpi" + " sci, rc=%d\n", rc); + return; + } + trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; + polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; + + printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d " + "polarity=%d\n", gsi, trigger, polarity); + + gsi = xen_register_gsi(gsi, trigger, polarity); + printk(KERN_INFO "xen: acpi sci %d\n", gsi); + + return; +} + +static int acpi_register_gsi_xen(struct device *dev, u32 gsi, + int trigger, int polarity) +{ + return xen_register_gsi(gsi, trigger, polarity); +} + +static int __init pci_xen_initial_domain(void) +{ +#ifdef CONFIG_PCI_MSI + x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs; + x86_msi.teardown_msi_irq = xen_teardown_msi_irq; +#endif + xen_setup_acpi_sci(); + __acpi_register_gsi = acpi_register_gsi_xen; + + return 0; +} + +void __init xen_setup_pirqs(void) +{ + int irq; + + pci_xen_initial_domain(); + + if (0 == nr_ioapics) { + for (irq = 0; irq < NR_IRQS_LEGACY; irq++) + xen_allocate_pirq(irq, 0, "xt-pic"); + return; + } + + /* Pre-allocate legacy irqs */ + for (irq = 0; irq < NR_IRQS_LEGACY; irq++) { + int trigger, polarity; + + if (acpi_get_override_irq(irq, &trigger, &polarity) == -1) + continue; + + xen_register_pirq(irq, + trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE); + } +} +#endif diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile new file mode 100644 index 000000000000..7bf70b812fa2 --- /dev/null +++ b/arch/x86/platform/Makefile @@ -0,0 +1,8 @@ +# Platform specific code goes here +obj-y += efi/ +obj-y += mrst/ +obj-y += olpc/ +obj-y += scx200/ +obj-y += sfi/ +obj-y += visws/ +obj-y += uv/ diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile new file mode 100644 index 000000000000..73b8be0f3675 --- /dev/null +++ b/arch/x86/platform/efi/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o diff --git a/arch/x86/kernel/efi.c b/arch/x86/platform/efi/efi.c index 0fe27d7c6258..0fe27d7c6258 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/platform/efi/efi.c diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/platform/efi/efi_32.c index 5cab48ee61a4..5cab48ee61a4 100644 --- a/arch/x86/kernel/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/platform/efi/efi_64.c index ac0621a7ac3d..ac0621a7ac3d 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c diff --git a/arch/x86/kernel/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S index fbe66e626c09..fbe66e626c09 100644 --- a/arch/x86/kernel/efi_stub_32.S +++ b/arch/x86/platform/efi/efi_stub_32.S diff --git a/arch/x86/kernel/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 4c07ccab8146..4c07ccab8146 100644 --- a/arch/x86/kernel/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile new file mode 100644 index 000000000000..efbbc552fa95 --- /dev/null +++ b/arch/x86/platform/mrst/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_X86_MRST) += mrst.o diff --git a/arch/x86/kernel/mrst.c b/arch/x86/platform/mrst/mrst.c index 79ae68154e87..79ae68154e87 100644 --- a/arch/x86/kernel/mrst.c +++ b/arch/x86/platform/mrst/mrst.c diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile new file mode 100644 index 000000000000..c31b8fcb5a86 --- /dev/null +++ b/arch/x86/platform/olpc/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_OLPC) += olpc.o +obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o +obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o diff --git a/arch/x86/kernel/olpc-xo1.c b/arch/x86/platform/olpc/olpc-xo1.c index f5442c03abc3..f5442c03abc3 100644 --- a/arch/x86/kernel/olpc-xo1.c +++ b/arch/x86/platform/olpc/olpc-xo1.c diff --git a/arch/x86/kernel/olpc.c b/arch/x86/platform/olpc/olpc.c index edaf3fe8dc5e..edaf3fe8dc5e 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/platform/olpc/olpc.c diff --git a/arch/x86/kernel/olpc_ofw.c b/arch/x86/platform/olpc/olpc_ofw.c index 787320464379..787320464379 100644 --- a/arch/x86/kernel/olpc_ofw.c +++ b/arch/x86/platform/olpc/olpc_ofw.c diff --git a/arch/x86/platform/scx200/Makefile b/arch/x86/platform/scx200/Makefile new file mode 100644 index 000000000000..762b4c7f4314 --- /dev/null +++ b/arch/x86/platform/scx200/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_SCx200) += scx200.o +scx200-y += scx200_32.o diff --git a/arch/x86/kernel/scx200_32.c b/arch/x86/platform/scx200/scx200_32.c index 7e004acbe526..7e004acbe526 100644 --- a/arch/x86/kernel/scx200_32.c +++ b/arch/x86/platform/scx200/scx200_32.c diff --git a/arch/x86/platform/sfi/Makefile b/arch/x86/platform/sfi/Makefile new file mode 100644 index 000000000000..cc5db1168a5e --- /dev/null +++ b/arch/x86/platform/sfi/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_SFI) += sfi.o diff --git a/arch/x86/kernel/sfi.c b/arch/x86/platform/sfi/sfi.c index dd4c281ffe57..dd4c281ffe57 100644 --- a/arch/x86/kernel/sfi.c +++ b/arch/x86/platform/sfi/sfi.c diff --git a/arch/x86/platform/uv/Makefile b/arch/x86/platform/uv/Makefile new file mode 100644 index 000000000000..6c40995fefb8 --- /dev/null +++ b/arch/x86/platform/uv/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 8bc57baaa9ad..8bc57baaa9ad 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 20ea20a39e2a..20ea20a39e2a 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index 7b24460917d5..7b24460917d5 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c index 309c70fb7759..309c70fb7759 100644 --- a/arch/x86/kernel/uv_sysfs.c +++ b/arch/x86/platform/uv/uv_sysfs.c diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/platform/uv/uv_time.c index 56e421bc379b..56e421bc379b 100644 --- a/arch/x86/kernel/uv_time.c +++ b/arch/x86/platform/uv/uv_time.c diff --git a/arch/x86/platform/visws/Makefile b/arch/x86/platform/visws/Makefile new file mode 100644 index 000000000000..91bc17ab2fd5 --- /dev/null +++ b/arch/x86/platform/visws/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_X86_VISWS) += visws_quirks.o diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c index 3371bd053b89..3371bd053b89 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/platform/visws/visws_quirks.c diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 90a7f5ad6916..5b54892e4bc3 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -13,6 +13,16 @@ config XEN kernel to boot in a paravirtualized environment under the Xen hypervisor. +config XEN_DOM0 + def_bool y + depends on XEN && PCI_XEN && SWIOTLB_XEN + depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI + +# Dummy symbol since people have come to rely on the PRIVILEGED_GUEST +# name in tools. +config XEN_PRIVILEGED_GUEST + def_bool XEN_DOM0 + config XEN_PVHVM def_bool y depends on XEN diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 44ab12dc2a12..235c0f4d3861 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -46,6 +46,7 @@ #include <asm/paravirt.h> #include <asm/apic.h> #include <asm/page.h> +#include <asm/xen/pci.h> #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> #include <asm/fixmap.h> @@ -59,7 +60,6 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/reboot.h> -#include <asm/setup.h> #include <asm/stackprotector.h> #include <asm/hypervisor.h> @@ -237,6 +237,7 @@ static __init void xen_init_cpuid_mask(void) cpuid_leaf1_edx_mask = ~((1 << X86_FEATURE_MCE) | /* disable MCE */ (1 << X86_FEATURE_MCA) | /* disable MCA */ + (1 << X86_FEATURE_MTRR) | /* disable MTRR */ (1 << X86_FEATURE_ACC)); /* thermal monitoring */ if (!xen_initial_domain()) @@ -1016,7 +1017,7 @@ static void xen_reboot(int reason) struct sched_shutdown r = { .reason = reason }; #ifdef CONFIG_SMP - smp_send_stop(); + stop_other_cpus(); #endif if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) @@ -1185,6 +1186,7 @@ asmlinkage void __init xen_start_kernel(void) xen_raw_console_write("mapping kernel into physical memory\n"); pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); + xen_ident_map_ISA(); /* Allocate and initialize top and mid mfn levels for p2m structure */ xen_build_mfn_list_list(); @@ -1223,6 +1225,8 @@ asmlinkage void __init xen_start_kernel(void) add_preferred_console("xenboot", 0, NULL); add_preferred_console("tty", 0, NULL); add_preferred_console("hvc", 0, NULL); + if (pci_xen) + x86_init.pci.arch_init = pci_xen_init; } else { /* Make sure ACS will be enabled */ pci_request_acs(); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 9631c90907eb..c237b810b03f 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1975,6 +1975,7 @@ static void *m2v(phys_addr_t maddr) return __ka(m2p(maddr)); } +/* Set the page permissions on an identity-mapped pages */ static void set_page_prot(void *addr, pgprot_t prot) { unsigned long pfn = __pa(addr) >> PAGE_SHIFT; @@ -2159,6 +2160,8 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, } #endif /* CONFIG_X86_64 */ +static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss; + static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) { pte_t pte; @@ -2179,15 +2182,28 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #else case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: #endif -#ifdef CONFIG_X86_LOCAL_APIC - case FIX_APIC_BASE: /* maps dummy local APIC */ -#endif case FIX_TEXT_POKE0: case FIX_TEXT_POKE1: /* All local page mappings */ pte = pfn_pte(phys, prot); break; +#ifdef CONFIG_X86_LOCAL_APIC + case FIX_APIC_BASE: /* maps dummy local APIC */ + pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL); + break; +#endif + +#ifdef CONFIG_X86_IO_APIC + case FIX_IO_APIC_BASE_0 ... FIX_IO_APIC_BASE_END: + /* + * We just don't map the IO APIC - all access is via + * hypercalls. Keep the address in the pte for reference. + */ + pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL); + break; +#endif + case FIX_PARAVIRT_BOOTMAP: /* This is an MFN, but it isn't an IO mapping from the IO domain */ @@ -2212,6 +2228,29 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #endif } +__init void xen_ident_map_ISA(void) +{ + unsigned long pa; + + /* + * If we're dom0, then linear map the ISA machine addresses into + * the kernel's address space. + */ + if (!xen_initial_domain()) + return; + + xen_raw_printk("Xen: setup ISA identity maps\n"); + + for (pa = ISA_START_ADDRESS; pa < ISA_END_ADDRESS; pa += PAGE_SIZE) { + pte_t pte = mfn_pte(PFN_DOWN(pa), PAGE_KERNEL_IO); + + if (HYPERVISOR_update_va_mapping(PAGE_OFFSET + pa, pte, 0)) + BUG(); + } + + xen_flush_tlb(); +} + static __init void xen_post_allocator_init(void) { pv_mmu_ops.set_pte = xen_set_pte; @@ -2320,6 +2359,8 @@ void __init xen_init_mmu_ops(void) pv_mmu_ops = xen_mmu_ops; vmap_lazy_unmap = false; + + memset(dummy_mapping, 0xff, PAGE_SIZE); } /* Protected by xen_reservation_lock. */ diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 22471001b74c..bfd0632fe65e 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -1,6 +1,7 @@ /* Glue code to lib/swiotlb-xen.c */ #include <linux/dma-mapping.h> +#include <linux/pci.h> #include <xen/swiotlb-xen.h> #include <asm/xen/hypervisor.h> @@ -55,6 +56,9 @@ void __init pci_xen_swiotlb_init(void) if (xen_swiotlb) { xen_swiotlb_init(1); dma_ops = &xen_swiotlb_dma_ops; + + /* Make sure ACS will be enabled */ + pci_request_acs(); } } IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 105db2501050..b1dbdaa23ecc 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -204,6 +204,9 @@ char * __init xen_memory_setup(void) * Even though this is normal, usable memory under Xen, reserve * ISA memory anyway because too many things think they can poke * about in there. + * + * In a dom0 kernel, this region is identity mapped with the + * hardware ISA area, so it really is out of bounds. */ e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, E820_RESERVED); @@ -367,7 +370,5 @@ void __init xen_arch_setup(void) pm_idle = xen_idle; - paravirt_disable_iospace(); - fiddle_vdso(); } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 25f232b18a82..72a4c7959045 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -28,6 +28,7 @@ #include <asm/xen/interface.h> #include <asm/xen/hypercall.h> +#include <xen/xen.h> #include <xen/page.h> #include <xen/events.h> @@ -156,11 +157,35 @@ static void __init xen_fill_possible_map(void) { int i, rc; + if (xen_initial_domain()) + return; + + for (i = 0; i < nr_cpu_ids; i++) { + rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); + if (rc >= 0) { + num_processors++; + set_cpu_possible(i, true); + } + } +} + +static void __init xen_filter_cpu_maps(void) +{ + int i, rc; + + if (!xen_initial_domain()) + return; + + num_processors = 0; + disabled_cpus = 0; for (i = 0; i < nr_cpu_ids; i++) { rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); if (rc >= 0) { num_processors++; set_cpu_possible(i, true); + } else { + set_cpu_possible(i, false); + set_cpu_present(i, false); } } } @@ -174,6 +199,7 @@ static void __init xen_smp_prepare_boot_cpu(void) old memory can be recycled */ make_lowmem_page_readwrite(xen_initial_gdt); + xen_filter_cpu_maps(); xen_setup_vcpu_info_placement(); } @@ -400,9 +426,9 @@ static void stop_self(void *v) BUG(); } -static void xen_smp_send_stop(void) +static void xen_stop_other_cpus(int wait) { - smp_call_function(stop_self, NULL, 0); + smp_call_function(stop_self, NULL, wait); } static void xen_smp_send_reschedule(int cpu) @@ -470,7 +496,7 @@ static const struct smp_ops xen_smp_ops __initdata = { .cpu_disable = xen_cpu_disable, .play_dead = xen_play_dead, - .smp_send_stop = xen_smp_send_stop, + .stop_other_cpus = xen_stop_other_cpus, .smp_send_reschedule = xen_smp_send_reschedule, .send_call_func_ipi = xen_smp_send_call_function_ipi, |