summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig16
-rw-r--r--arch/x86/ia32/ia32_aout.c1
-rw-r--r--arch/x86/include/asm/alternative.h4
-rw-r--r--arch/x86/include/asm/e820.h5
-rw-r--r--arch/x86/include/asm/highmem.h4
-rw-r--r--arch/x86/include/asm/i8259.h2
-rw-r--r--arch/x86/include/asm/io_apic.h1
-rw-r--r--arch/x86/include/asm/irq.h1
-rw-r--r--arch/x86/include/asm/irq_vectors.h48
-rw-r--r--arch/x86/include/asm/kprobes.h31
-rw-r--r--arch/x86/include/asm/local.h37
-rw-r--r--arch/x86/include/asm/paravirt.h9
-rw-r--r--arch/x86/include/asm/paravirt_types.h4
-rw-r--r--arch/x86/include/asm/pci.h2
-rw-r--r--arch/x86/include/asm/pci_64.h2
-rw-r--r--arch/x86/include/asm/percpu.h119
-rw-r--r--arch/x86/include/asm/pgtable_32.h6
-rw-r--r--arch/x86/include/asm/pgtable_64.h2
-rw-r--r--arch/x86/include/asm/proto.h10
-rw-r--r--arch/x86/include/asm/system.h8
-rw-r--r--arch/x86/kernel/acpi/boot.c9
-rw-r--r--arch/x86/kernel/alternative.c60
-rw-r--r--arch/x86/kernel/apic/io_apic.c258
-rw-r--r--arch/x86/kernel/apic/nmi.c12
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c6
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c208
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c1
-rw-r--r--arch/x86/kernel/e820.c349
-rw-r--r--arch/x86/kernel/head32.c10
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/i8259.c30
-rw-r--r--arch/x86/kernel/irqinit.c35
-rw-r--r--arch/x86/kernel/kprobes.c609
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c7
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/pci-dma.c13
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/setup.c10
-rw-r--r--arch/x86/kernel/setup_percpu.c6
-rw-r--r--arch/x86/kernel/smpboot.c13
-rw-r--r--arch/x86/kernel/time.c4
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/visws_quirks.c6
-rw-r--r--arch/x86/kernel/vmi_32.c35
-rw-r--r--arch/x86/kernel/vmiclock_32.c6
-rw-r--r--arch/x86/kernel/vmlinux.lds.S4
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/mm/init_32.c7
-rw-r--r--arch/x86/mm/init_64.c9
-rw-r--r--arch/x86/mm/numa_32.c3
-rw-r--r--arch/x86/mm/numa_64.c97
-rw-r--r--arch/x86/pci/Makefile3
-rw-r--r--arch/x86/pci/amd_bus.c127
-rw-r--r--arch/x86/pci/bus_numa.c25
-rw-r--r--arch/x86/pci/bus_numa.h9
-rw-r--r--arch/x86/pci/i386.c4
-rw-r--r--arch/x86/xen/enlighten.c7
-rw-r--r--arch/x86/xen/mmu.c21
-rw-r--r--arch/x86/xen/xen-asm_32.S4
60 files changed, 1188 insertions, 1154 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0896008f7509..f15f37bfbd62 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -31,6 +31,7 @@ config X86
select ARCH_WANT_FRAME_POINTERS
select HAVE_DMA_ATTRS
select HAVE_KRETPROBES
+ select HAVE_OPTPROBES
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
@@ -184,6 +185,9 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
def_bool y
+config HAVE_EARLY_RES
+ def_bool y
+
config HAVE_INTEL_TXT
def_bool y
depends on EXPERIMENTAL && DMAR && ACPI
@@ -569,6 +573,18 @@ config PARAVIRT_DEBUG
Enable to debug paravirt_ops internals. Specifically, BUG if
a paravirt_op is missing when it is called.
+config NO_BOOTMEM
+ default y
+ bool "Disable Bootmem code"
+ ---help---
+ Use early_res directly instead of bootmem before slab is ready.
+ - allocator (buddy) [generic]
+ - early allocator (bootmem) [generic]
+ - very early allocator (reserve_early*()) [x86]
+ - very very early allocator (early brk model) [x86]
+ So reduce one layer between early allocator to final allocator
+
+
config MEMTEST
bool "Memtest"
---help---
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 9046e4af66ce..280c019cfad8 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -327,7 +327,6 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
current->mm->free_area_cache = TASK_UNMAPPED_BASE;
current->mm->cached_hole_size = 0;
- current->mm->mmap = NULL;
install_exec_creds(bprm);
current->flags &= ~PF_FORKNOEXEC;
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index f1e253ceba4b..b09ec55650b3 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -165,10 +165,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
* invalid instruction possible) or if the instructions are changed from a
* consistent state to another consistent state atomically.
* More care must be taken when modifying code in the SMP case because of
- * Intel's errata.
+ * Intel's errata. text_poke_smp() takes care that errata, but still
+ * doesn't support NMI/MCE handler code modifying.
* On the local CPU you need to be protected again NMI or MCE handlers seeing an
* inconsistent instruction while you patch.
*/
extern void *text_poke(void *addr, const void *opcode, size_t len);
+extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 761249e396fe..0e22296790d3 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -111,11 +111,8 @@ extern unsigned long end_user_pfn;
extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
-extern void reserve_early(u64 start, u64 end, char *name);
-extern void reserve_early_overlap_ok(u64 start, u64 end, char *name);
-extern void free_early(u64 start, u64 end);
-extern void early_res_to_bootmem(u64 start, u64 end);
extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
+#include <linux/early_res.h>
extern unsigned long e820_end_of_ram_pfn(void);
extern unsigned long e820_end_of_low_ram_pfn(void);
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index 014c2b85ae45..a726650fc80f 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -66,10 +66,6 @@ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
struct page *kmap_atomic_to_page(void *ptr);
-#ifndef CONFIG_PARAVIRT
-#define kmap_atomic_pte(page, type) kmap_atomic(page, type)
-#endif
-
#define flush_cache_kmaps() do { } while (0)
extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn,
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 58d7091eeb1f..7ec65b18085d 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -24,7 +24,7 @@ extern unsigned int cached_irq_mask;
#define SLAVE_ICW4_DEFAULT 0x01
#define PIC_ICW4_AEOI 2
-extern spinlock_t i8259A_lock;
+extern raw_spinlock_t i8259A_lock;
extern void init_8259A(int auto_eoi);
extern void enable_8259A_irq(unsigned int irq);
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 7c7c16cde1f8..5f61f6e0ffdd 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -160,6 +160,7 @@ extern int io_apic_get_redir_entries(int ioapic);
struct io_apic_irq_attr;
extern int io_apic_set_pci_routing(struct device *dev, int irq,
struct io_apic_irq_attr *irq_attr);
+void setup_IO_APIC_irq_extra(u32 gsi);
extern int (*ioapic_renumber_irq)(int ioapic, int irq);
extern void ioapic_init_mappings(void);
extern void ioapic_insert_resources(void);
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 5458380b6ef8..262292729fc4 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -48,5 +48,6 @@ extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
extern int vector_used_by_percpu_irq(unsigned int vector);
extern void init_ISA_irqs(void);
+extern int nr_legacy_irqs;
#endif /* _ASM_X86_IRQ_H */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 4611f085cd43..8767d99c4f64 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -28,28 +28,33 @@
#define MCE_VECTOR 0x12
/*
- * IDT vectors usable for external interrupt sources start
- * at 0x20:
+ * IDT vectors usable for external interrupt sources start at 0x20.
+ * (0x80 is the syscall vector, 0x30-0x3f are for ISA)
*/
#define FIRST_EXTERNAL_VECTOR 0x20
-
-#ifdef CONFIG_X86_32
-# define SYSCALL_VECTOR 0x80
-# define IA32_SYSCALL_VECTOR 0x80
-#else
-# define IA32_SYSCALL_VECTOR 0x80
-#endif
+/*
+ * We start allocating at 0x21 to spread out vectors evenly between
+ * priority levels. (0x80 is the syscall vector)
+ */
+#define VECTOR_OFFSET_START 1
/*
- * Reserve the lowest usable priority level 0x20 - 0x2f for triggering
- * cleanup after irq migration.
+ * Reserve the lowest usable vector (and hence lowest priority) 0x20 for
+ * triggering cleanup after irq migration. 0x21-0x2f will still be used
+ * for device interrupts.
*/
#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR
+#define IA32_SYSCALL_VECTOR 0x80
+#ifdef CONFIG_X86_32
+# define SYSCALL_VECTOR 0x80
+#endif
+
/*
* Vectors 0x30-0x3f are used for ISA interrupts.
+ * round up to the next 16-vector boundary
*/
-#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10)
+#define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15)
#define IRQ1_VECTOR (IRQ0_VECTOR + 1)
#define IRQ2_VECTOR (IRQ0_VECTOR + 2)
@@ -120,13 +125,6 @@
*/
#define MCE_SELF_VECTOR 0xeb
-/*
- * First APIC vector available to drivers: (vectors 0x30-0xee) we
- * start at 0x31(0x41) to spread out vectors evenly between priority
- * levels. (0x80 is the syscall vector)
- */
-#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2)
-
#define NR_VECTORS 256
#define FPU_IRQ 13
@@ -154,21 +152,21 @@ static inline int invalid_vm86_irq(int irq)
#define NR_IRQS_LEGACY 16
-#define CPU_VECTOR_LIMIT ( 8 * NR_CPUS )
#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS )
#ifdef CONFIG_X86_IO_APIC
# ifdef CONFIG_SPARSE_IRQ
+# define CPU_VECTOR_LIMIT (64 * NR_CPUS)
# define NR_IRQS \
(CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \
(NR_VECTORS + CPU_VECTOR_LIMIT) : \
(NR_VECTORS + IO_APIC_VECTOR_LIMIT))
# else
-# if NR_CPUS < MAX_IO_APICS
-# define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT)
-# else
-# define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT)
-# endif
+# define CPU_VECTOR_LIMIT (32 * NR_CPUS)
+# define NR_IRQS \
+ (CPU_VECTOR_LIMIT < IO_APIC_VECTOR_LIMIT ? \
+ (NR_VECTORS + CPU_VECTOR_LIMIT) : \
+ (NR_VECTORS + IO_APIC_VECTOR_LIMIT))
# endif
#else /* !CONFIG_X86_IO_APIC: */
# define NR_IRQS NR_IRQS_LEGACY
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 4fe681de1e76..4ffa345a8ccb 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -32,7 +32,10 @@ struct kprobe;
typedef u8 kprobe_opcode_t;
#define BREAKPOINT_INSTRUCTION 0xcc
-#define RELATIVEJUMP_INSTRUCTION 0xe9
+#define RELATIVEJUMP_OPCODE 0xe9
+#define RELATIVEJUMP_SIZE 5
+#define RELATIVECALL_OPCODE 0xe8
+#define RELATIVE_ADDR_SIZE 4
#define MAX_INSN_SIZE 16
#define MAX_STACK_SIZE 64
#define MIN_STACK_SIZE(ADDR) \
@@ -44,6 +47,17 @@ typedef u8 kprobe_opcode_t;
#define flush_insn_slot(p) do { } while (0)
+/* optinsn template addresses */
+extern kprobe_opcode_t optprobe_template_entry;
+extern kprobe_opcode_t optprobe_template_val;
+extern kprobe_opcode_t optprobe_template_call;
+extern kprobe_opcode_t optprobe_template_end;
+#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE)
+#define MAX_OPTINSN_SIZE \
+ (((unsigned long)&optprobe_template_end - \
+ (unsigned long)&optprobe_template_entry) + \
+ MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE)
+
extern const int kretprobe_blacklist_size;
void arch_remove_kprobe(struct kprobe *p);
@@ -64,6 +78,21 @@ struct arch_specific_insn {
int boostable;
};
+struct arch_optimized_insn {
+ /* copy of the original instructions */
+ kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE];
+ /* detour code buffer */
+ kprobe_opcode_t *insn;
+ /* the size of instructions copied to detour code buffer */
+ size_t size;
+};
+
+/* Return true (!0) if optinsn is prepared for optimization. */
+static inline int arch_prepared_optinsn(struct arch_optimized_insn *optinsn)
+{
+ return optinsn->size;
+}
+
struct prev_kprobe {
struct kprobe *kp;
unsigned long status;
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 47b9b6f19057..2e9972468a5d 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -195,41 +195,4 @@ static inline long local_sub_return(long i, local_t *l)
#define __local_add(i, l) local_add((i), (l))
#define __local_sub(i, l) local_sub((i), (l))
-/* Use these for per-cpu local_t variables: on some archs they are
- * much more efficient than these naive implementations. Note they take
- * a variable, not an address.
- *
- * X86_64: This could be done better if we moved the per cpu data directly
- * after GS.
- */
-
-/* Need to disable preemption for the cpu local counters otherwise we could
- still access a variable of a previous CPU in a non atomic way. */
-#define cpu_local_wrap_v(l) \
-({ \
- local_t res__; \
- preempt_disable(); \
- res__ = (l); \
- preempt_enable(); \
- res__; \
-})
-#define cpu_local_wrap(l) \
-({ \
- preempt_disable(); \
- (l); \
- preempt_enable(); \
-}) \
-
-#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var((l))))
-#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var((l)), (i)))
-#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var((l))))
-#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var((l))))
-#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var((l))))
-#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var((l))))
-
-#define __cpu_local_inc(l) cpu_local_inc((l))
-#define __cpu_local_dec(l) cpu_local_dec((l))
-#define __cpu_local_add(i, l) cpu_local_add((i), (l))
-#define __cpu_local_sub(i, l) cpu_local_sub((i), (l))
-
#endif /* _ASM_X86_LOCAL_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index dd59a85a918f..5653f43d90e5 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -435,15 +435,6 @@ static inline void paravirt_release_pud(unsigned long pfn)
PVOP_VCALL1(pv_mmu_ops.release_pud, pfn);
}
-#ifdef CONFIG_HIGHPTE
-static inline void *kmap_atomic_pte(struct page *page, enum km_type type)
-{
- unsigned long ret;
- ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type);
- return (void *)ret;
-}
-#endif
-
static inline void pte_update(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index b1e70d51e40c..db9ef5532341 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -304,10 +304,6 @@ struct pv_mmu_ops {
#endif /* PAGETABLE_LEVELS == 4 */
#endif /* PAGETABLE_LEVELS >= 3 */
-#ifdef CONFIG_HIGHPTE
- void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
-#endif
-
struct pv_lazy_ops lazy_mode;
/* dom0 ops */
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index ada8c201d513..b4a00dd4eed5 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -124,6 +124,8 @@ extern void pci_iommu_alloc(void);
#include "pci_64.h"
#endif
+void dma32_reserve_bootmem(void);
+
/* implement the pci_ DMA API in terms of the generic device dma_ one */
#include <asm-generic/pci-dma-compat.h>
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h
index ae5e40f67daf..fe15cfb21b9b 100644
--- a/arch/x86/include/asm/pci_64.h
+++ b/arch/x86/include/asm/pci_64.h
@@ -22,8 +22,6 @@ extern int (*pci_config_read)(int seg, int bus, int dev, int fn,
extern int (*pci_config_write)(int seg, int bus, int dev, int fn,
int reg, int len, u32 value);
-extern void dma32_reserve_bootmem(void);
-
#endif /* __KERNEL__ */
#endif /* _ASM_X86_PCI_64_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 0c44196b78ac..66a272dfd8b8 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -25,19 +25,18 @@
*/
#ifdef CONFIG_SMP
#define PER_CPU(var, reg) \
- __percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg; \
- lea per_cpu__##var(reg), reg
-#define PER_CPU_VAR(var) %__percpu_seg:per_cpu__##var
+ __percpu_mov_op %__percpu_seg:this_cpu_off, reg; \
+ lea var(reg), reg
+#define PER_CPU_VAR(var) %__percpu_seg:var
#else /* ! SMP */
-#define PER_CPU(var, reg) \
- __percpu_mov_op $per_cpu__##var, reg
-#define PER_CPU_VAR(var) per_cpu__##var
+#define PER_CPU(var, reg) __percpu_mov_op $var, reg
+#define PER_CPU_VAR(var) var
#endif /* SMP */
#ifdef CONFIG_X86_64_SMP
#define INIT_PER_CPU_VAR(var) init_per_cpu__##var
#else
-#define INIT_PER_CPU_VAR(var) per_cpu__##var
+#define INIT_PER_CPU_VAR(var) var
#endif
#else /* ...!ASSEMBLY */
@@ -60,12 +59,12 @@
* There also must be an entry in vmlinux_64.lds.S
*/
#define DECLARE_INIT_PER_CPU(var) \
- extern typeof(per_cpu_var(var)) init_per_cpu_var(var)
+ extern typeof(var) init_per_cpu_var(var)
#ifdef CONFIG_X86_64_SMP
#define init_per_cpu_var(var) init_per_cpu__##var
#else
-#define init_per_cpu_var(var) per_cpu_var(var)
+#define init_per_cpu_var(var) var
#endif
/* For arch-specific code, we can use direct single-insn ops (they
@@ -104,6 +103,64 @@ do { \
} \
} while (0)
+/*
+ * Generate a percpu add to memory instruction and optimize code
+ * if a one is added or subtracted.
+ */
+#define percpu_add_op(var, val) \
+do { \
+ typedef typeof(var) pao_T__; \
+ const int pao_ID__ = (__builtin_constant_p(val) && \
+ ((val) == 1 || (val) == -1)) ? (val) : 0; \
+ if (0) { \
+ pao_T__ pao_tmp__; \
+ pao_tmp__ = (val); \
+ } \
+ switch (sizeof(var)) { \
+ case 1: \
+ if (pao_ID__ == 1) \
+ asm("incb "__percpu_arg(0) : "+m" (var)); \
+ else if (pao_ID__ == -1) \
+ asm("decb "__percpu_arg(0) : "+m" (var)); \
+ else \
+ asm("addb %1, "__percpu_arg(0) \
+ : "+m" (var) \
+ : "qi" ((pao_T__)(val))); \
+ break; \
+ case 2: \
+ if (pao_ID__ == 1) \
+ asm("incw "__percpu_arg(0) : "+m" (var)); \
+ else if (pao_ID__ == -1) \
+ asm("decw "__percpu_arg(0) : "+m" (var)); \
+ else \
+ asm("addw %1, "__percpu_arg(0) \
+ : "+m" (var) \
+ : "ri" ((pao_T__)(val))); \
+ break; \
+ case 4: \
+ if (pao_ID__ == 1) \
+ asm("incl "__percpu_arg(0) : "+m" (var)); \
+ else if (pao_ID__ == -1) \
+ asm("decl "__percpu_arg(0) : "+m" (var)); \
+ else \
+ asm("addl %1, "__percpu_arg(0) \
+ : "+m" (var) \
+ : "ri" ((pao_T__)(val))); \
+ break; \
+ case 8: \
+ if (pao_ID__ == 1) \
+ asm("incq "__percpu_arg(0) : "+m" (var)); \
+ else if (pao_ID__ == -1) \
+ asm("decq "__percpu_arg(0) : "+m" (var)); \
+ else \
+ asm("addq %1, "__percpu_arg(0) \
+ : "+m" (var) \
+ : "re" ((pao_T__)(val))); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+} while (0)
+
#define percpu_from_op(op, var, constraint) \
({ \
typeof(var) pfo_ret__; \
@@ -142,16 +199,14 @@ do { \
* per-thread variables implemented as per-cpu variables and thus
* stable for the duration of the respective task.
*/
-#define percpu_read(var) percpu_from_op("mov", per_cpu__##var, \
- "m" (per_cpu__##var))
-#define percpu_read_stable(var) percpu_from_op("mov", per_cpu__##var, \
- "p" (&per_cpu__##var))
-#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val)
-#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val)
-#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val)
-#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val)
-#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val)
-#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val)
+#define percpu_read(var) percpu_from_op("mov", var, "m" (var))
+#define percpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var)))
+#define percpu_write(var, val) percpu_to_op("mov", var, val)
+#define percpu_add(var, val) percpu_add_op(var, val)
+#define percpu_sub(var, val) percpu_add_op(var, -(val))
+#define percpu_and(var, val) percpu_to_op("and", var, val)
+#define percpu_or(var, val) percpu_to_op("or", var, val)
+#define percpu_xor(var, val) percpu_to_op("xor", var, val)
#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
@@ -160,9 +215,9 @@ do { \
#define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
#define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
#define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
-#define __this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val)
-#define __this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val)
-#define __this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val)
+#define __this_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
+#define __this_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
+#define __this_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
#define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
#define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
#define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
@@ -179,9 +234,9 @@ do { \
#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val)
-#define this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val)
-#define this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val)
+#define this_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
+#define this_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
+#define this_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
#define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
#define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
#define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
@@ -192,9 +247,9 @@ do { \
#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
-#define irqsafe_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val)
-#define irqsafe_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val)
-#define irqsafe_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val)
+#define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
+#define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
+#define irqsafe_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
#define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
#define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
#define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
@@ -212,19 +267,19 @@ do { \
#ifdef CONFIG_X86_64
#define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
-#define __this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val)
+#define __this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val)
+#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
#define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
-#define irqsafe_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val)
+#define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
#define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
@@ -236,7 +291,7 @@ do { \
({ \
int old__; \
asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \
- : "=r" (old__), "+m" (per_cpu__##var) \
+ : "=r" (old__), "+m" (var) \
: "dIr" (bit)); \
old__; \
})
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 01fd9461d323..47339a1ac7b6 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -54,10 +54,10 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
in_irq() ? KM_IRQ_PTE : \
KM_PTE0)
#define pte_offset_map(dir, address) \
- ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \
+ ((pte_t *)kmap_atomic(pmd_page(*(dir)), __KM_PTE) + \
pte_index((address)))
#define pte_offset_map_nested(dir, address) \
- ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \
+ ((pte_t *)kmap_atomic(pmd_page(*(dir)), KM_PTE1) + \
pte_index((address)))
#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE)
#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
@@ -80,7 +80,7 @@ do { \
* The i386 doesn't have any external MMU info: the kernel page
* tables contain all the necessary information.
*/
-#define update_mmu_cache(vma, address, pte) do { } while (0)
+#define update_mmu_cache(vma, address, ptep) do { } while (0)
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index c57a30117149..181be528c612 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -129,7 +129,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
#define pte_unmap(pte) /* NOP */
#define pte_unmap_nested(pte) /* NOP */
-#define update_mmu_cache(vma, address, pte) do { } while (0)
+#define update_mmu_cache(vma, address, ptep) do { } while (0)
/* Encode and de-code a swap entry */
#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 4009f6534f52..6f414ed88620 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -23,14 +23,4 @@ extern int reboot_force;
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
-/*
- * This looks more complex than it should be. But we need to
- * get the type for the ~ right in round_down (it needs to be
- * as wide as the result!), and we want to evaluate the macro
- * arguments just once each.
- */
-#define __round_mask(x,y) ((__typeof__(x))((y)-1))
-#define round_up(x,y) ((((x)-1) | __round_mask(x,y))+1)
-#define round_down(x,y) ((x) & ~__round_mask(x,y))
-
#endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index e04740f7a0bb..b8fe48ee2ed9 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -32,7 +32,7 @@ extern void show_regs_common(void);
"movl %P[task_canary](%[next]), %%ebx\n\t" \
"movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
#define __switch_canary_oparam \
- , [stack_canary] "=m" (per_cpu_var(stack_canary.canary))
+ , [stack_canary] "=m" (stack_canary.canary)
#define __switch_canary_iparam \
, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
#else /* CC_STACKPROTECTOR */
@@ -114,7 +114,7 @@ do { \
"movq %P[task_canary](%%rsi),%%r8\n\t" \
"movq %%r8,"__percpu_arg([gs_canary])"\n\t"
#define __switch_canary_oparam \
- , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary))
+ , [gs_canary] "=m" (irq_stack_union.stack_canary)
#define __switch_canary_iparam \
, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
#else /* CC_STACKPROTECTOR */
@@ -133,7 +133,7 @@ do { \
__switch_canary \
"movq %P[thread_info](%%rsi),%%r8\n\t" \
"movq %%rax,%%rdi\n\t" \
- "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
+ "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
"jnz ret_from_fork\n\t" \
RESTORE_CONTEXT \
: "=a" (last) \
@@ -143,7 +143,7 @@ do { \
[ti_flags] "i" (offsetof(struct thread_info, flags)), \
[_tif_fork] "i" (_TIF_FORK), \
[thread_info] "i" (offsetof(struct task_struct, stack)), \
- [current_task] "m" (per_cpu_var(current_task)) \
+ [current_task] "m" (current_task) \
__switch_canary_iparam \
: "memory", "cc" __EXTRA_CLOBBER)
#endif
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index f95703098f8d..738fcb60e708 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -447,6 +447,12 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
{
*irq = gsi;
+
+#ifdef CONFIG_X86_IO_APIC
+ if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC)
+ setup_IO_APIC_irq_extra(gsi);
+#endif
+
return 0;
}
@@ -474,7 +480,8 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity);
}
#endif
- acpi_gsi_to_irq(plat_gsi, &irq);
+ irq = plat_gsi;
+
return irq;
}
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index e6ea0342c8f8..3a4bf35c179b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -7,6 +7,7 @@
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/memory.h>
+#include <linux/stop_machine.h>
#include <asm/alternative.h>
#include <asm/sections.h>
#include <asm/pgtable.h>
@@ -572,3 +573,62 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
local_irq_restore(flags);
return addr;
}
+
+/*
+ * Cross-modifying kernel text with stop_machine().
+ * This code originally comes from immediate value.
+ */
+static atomic_t stop_machine_first;
+static int wrote_text;
+
+struct text_poke_params {
+ void *addr;
+ const void *opcode;
+ size_t len;
+};
+
+static int __kprobes stop_machine_text_poke(void *data)
+{
+ struct text_poke_params *tpp = data;
+
+ if (atomic_dec_and_test(&stop_machine_first)) {
+ text_poke(tpp->addr, tpp->opcode, tpp->len);
+ smp_wmb(); /* Make sure other cpus see that this has run */
+ wrote_text = 1;
+ } else {
+ while (!wrote_text)
+ cpu_relax();
+ smp_mb(); /* Load wrote_text before following execution */
+ }
+
+ flush_icache_range((unsigned long)tpp->addr,
+ (unsigned long)tpp->addr + tpp->len);
+ return 0;
+}
+
+/**
+ * text_poke_smp - Update instructions on a live kernel on SMP
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy
+ *
+ * Modify multi-byte instruction by using stop_machine() on SMP. This allows
+ * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying
+ * should be allowed, since stop_machine() does _not_ protect code against
+ * NMI and MCE.
+ *
+ * Note: Must be called under get_online_cpus() and text_mutex.
+ */
+void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
+{
+ struct text_poke_params tpp;
+
+ tpp.addr = addr;
+ tpp.opcode = opcode;
+ tpp.len = len;
+ atomic_set(&stop_machine_first, 1);
+ wrote_text = 0;
+ stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+ return addr;
+}
+
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 6bdd2c7ead75..14862f11cc4a 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -73,8 +73,8 @@
*/
int sis_apic_bug = -1;
-static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
+static DEFINE_RAW_SPINLOCK(ioapic_lock);
+static DEFINE_RAW_SPINLOCK(vector_lock);
/*
* # of IRQ routing registers
@@ -94,8 +94,6 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
/* # of MP IRQ source entries */
int mp_irq_entries;
-/* Number of legacy interrupts */
-static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY;
/* GSI interrupts */
static int nr_irqs_gsi = NR_IRQS_LEGACY;
@@ -140,27 +138,10 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node)
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
#ifdef CONFIG_SPARSE_IRQ
-static struct irq_cfg irq_cfgx[] = {
+static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
#else
-static struct irq_cfg irq_cfgx[NR_IRQS] = {
+static struct irq_cfg irq_cfgx[NR_IRQS];
#endif
- [0] = { .vector = IRQ0_VECTOR, },
- [1] = { .vector = IRQ1_VECTOR, },
- [2] = { .vector = IRQ2_VECTOR, },
- [3] = { .vector = IRQ3_VECTOR, },
- [4] = { .vector = IRQ4_VECTOR, },
- [5] = { .vector = IRQ5_VECTOR, },
- [6] = { .vector = IRQ6_VECTOR, },
- [7] = { .vector = IRQ7_VECTOR, },
- [8] = { .vector = IRQ8_VECTOR, },
- [9] = { .vector = IRQ9_VECTOR, },
- [10] = { .vector = IRQ10_VECTOR, },
- [11] = { .vector = IRQ11_VECTOR, },
- [12] = { .vector = IRQ12_VECTOR, },
- [13] = { .vector = IRQ13_VECTOR, },
- [14] = { .vector = IRQ14_VECTOR, },
- [15] = { .vector = IRQ15_VECTOR, },
-};
void __init io_apic_disable_legacy(void)
{
@@ -185,8 +166,14 @@ int __init arch_early_irq_init(void)
desc->chip_data = &cfg[i];
zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
- if (i < nr_legacy_irqs)
- cpumask_setall(cfg[i].domain);
+ /*
+ * For legacy IRQ's, start with assigning irq0 to irq15 to
+ * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
+ */
+ if (i < nr_legacy_irqs) {
+ cfg[i].vector = IRQ0_VECTOR + i;
+ cpumask_set_cpu(0, cfg[i].domain);
+ }
}
return 0;
@@ -406,7 +393,7 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
struct irq_pin_list *entry;
unsigned long flags;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
for_each_irq_pin(entry, cfg->irq_2_pin) {
unsigned int reg;
int pin;
@@ -415,11 +402,11 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
reg = io_apic_read(entry->apic, 0x10 + pin*2);
/* Is the remote IRR bit set? */
if (reg & IO_APIC_REDIR_REMOTE_IRR) {
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return true;
}
}
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return false;
}
@@ -433,10 +420,10 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
{
union entry_union eu;
unsigned long flags;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return eu.entry;
}
@@ -459,9 +446,9 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
{
unsigned long flags;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
__ioapic_write_entry(apic, pin, e);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
/*
@@ -474,10 +461,10 @@ static void ioapic_mask_entry(int apic, int pin)
unsigned long flags;
union entry_union eu = { .entry.mask = 1 };
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(apic, 0x10 + 2*pin, eu.w1);
io_apic_write(apic, 0x11 + 2*pin, eu.w2);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
/*
@@ -604,9 +591,9 @@ static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
BUG_ON(!cfg);
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
__mask_IO_APIC_irq(cfg);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
@@ -614,9 +601,9 @@ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
struct irq_cfg *cfg = desc->chip_data;
unsigned long flags;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
__unmask_IO_APIC_irq(cfg);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
static void mask_IO_APIC_irq(unsigned int irq)
@@ -1140,12 +1127,12 @@ void lock_vector_lock(void)
/* Used to the online set of cpus does not change
* during assign_irq_vector.
*/
- spin_lock(&vector_lock);
+ raw_spin_lock(&vector_lock);
}
void unlock_vector_lock(void)
{
- spin_unlock(&vector_lock);
+ raw_spin_unlock(&vector_lock);
}
static int
@@ -1162,7 +1149,8 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
* Also, we've got to be careful not to trash gate
* 0x80, because int 0x80 is hm, kind of importantish. ;)
*/
- static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
+ static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
+ static int current_offset = VECTOR_OFFSET_START % 8;
unsigned int old_vector;
int cpu, err;
cpumask_var_t tmp_mask;
@@ -1198,7 +1186,7 @@ next:
if (vector >= first_system_vector) {
/* If out of vectors on large boxen, must share them. */
offset = (offset + 1) % 8;
- vector = FIRST_DEVICE_VECTOR + offset;
+ vector = FIRST_EXTERNAL_VECTOR + offset;
}
if (unlikely(current_vector == vector))
continue;
@@ -1232,9 +1220,9 @@ int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
int err;
unsigned long flags;
- spin_lock_irqsave(&vector_lock, flags);
+ raw_spin_lock_irqsave(&vector_lock, flags);
err = __assign_irq_vector(irq, cfg, mask);
- spin_unlock_irqrestore(&vector_lock, flags);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
return err;
}
@@ -1268,11 +1256,16 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
void __setup_vector_irq(int cpu)
{
/* Initialize vector_irq on a new cpu */
- /* This function must be called with vector_lock held */
int irq, vector;
struct irq_cfg *cfg;
struct irq_desc *desc;
+ /*
+ * vector_lock will make sure that we don't run into irq vector
+ * assignments that might be happening on another cpu in parallel,
+ * while we setup our initial vector to irq mappings.
+ */
+ raw_spin_lock(&vector_lock);
/* Mark the inuse vectors */
for_each_irq_desc(irq, desc) {
cfg = desc->chip_data;
@@ -1291,6 +1284,7 @@ void __setup_vector_irq(int cpu)
if (!cpumask_test_cpu(cpu, cfg->domain))
per_cpu(vector_irq, cpu)[vector] = -1;
}
+ raw_spin_unlock(&vector_lock);
}
static struct irq_chip ioapic_chip;
@@ -1440,6 +1434,14 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
cfg = desc->chip_data;
+ /*
+ * For legacy irqs, cfg->domain starts with cpu 0 for legacy
+ * controllers like 8259. Now that IO-APIC can handle this irq, update
+ * the cfg->domain.
+ */
+ if (irq < nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain))
+ apic->vector_allocation_domain(0, cfg->domain);
+
if (assign_irq_vector(irq, cfg, apic->target_cpus()))
return;
@@ -1473,7 +1475,7 @@ static struct {
static void __init setup_IO_APIC_irqs(void)
{
- int apic_id = 0, pin, idx, irq;
+ int apic_id, pin, idx, irq;
int notcon = 0;
struct irq_desc *desc;
struct irq_cfg *cfg;
@@ -1481,14 +1483,7 @@ static void __init setup_IO_APIC_irqs(void)
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
-#ifdef CONFIG_ACPI
- if (!acpi_disabled && acpi_ioapic) {
- apic_id = mp_find_ioapic(0);
- if (apic_id < 0)
- apic_id = 0;
- }
-#endif
-
+ for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
idx = find_irq_entry(apic_id, pin, mp_INT);
if (idx == -1) {
@@ -1510,6 +1505,9 @@ static void __init setup_IO_APIC_irqs(void)
irq = pin_2_irq(idx, apic_id, pin);
+ if ((apic_id > 0) && (irq > 16))
+ continue;
+
/*
* Skip the timer IRQ if there's a quirk handler
* installed and if it returns 1:
@@ -1539,6 +1537,56 @@ static void __init setup_IO_APIC_irqs(void)
}
/*
+ * for the gsit that is not in first ioapic
+ * but could not use acpi_register_gsi()
+ * like some special sci in IBM x3330
+ */
+void setup_IO_APIC_irq_extra(u32 gsi)
+{
+ int apic_id = 0, pin, idx, irq;
+ int node = cpu_to_node(boot_cpu_id);
+ struct irq_desc *desc;
+ struct irq_cfg *cfg;
+
+ /*
+ * Convert 'gsi' to 'ioapic.pin'.
+ */
+ apic_id = mp_find_ioapic(gsi);
+ if (apic_id < 0)
+ return;
+
+ pin = mp_find_ioapic_pin(apic_id, gsi);
+ idx = find_irq_entry(apic_id, pin, mp_INT);
+ if (idx == -1)
+ return;
+
+ irq = pin_2_irq(idx, apic_id, pin);
+#ifdef CONFIG_SPARSE_IRQ
+ desc = irq_to_desc(irq);
+ if (desc)
+ return;
+#endif
+ desc = irq_to_desc_alloc_node(irq, node);
+ if (!desc) {
+ printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+ return;
+ }
+
+ cfg = desc->chip_data;
+ add_pin_to_irq_node(cfg, node, apic_id, pin);
+
+ if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
+ pr_debug("Pin %d-%d already programmed\n",
+ mp_ioapics[apic_id].apicid, pin);
+ return;
+ }
+ set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
+
+ setup_IO_APIC_irq(apic_id, pin, irq, desc,
+ irq_trigger(idx), irq_polarity(idx));
+}
+
+/*
* Set up the timer pin, possibly with the 8259A-master behind.
*/
static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
@@ -1601,14 +1649,14 @@ __apicdebuginit(void) print_IO_APIC(void)
for (apic = 0; apic < nr_ioapics; apic++) {
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(apic, 0);
reg_01.raw = io_apic_read(apic, 1);
if (reg_01.bits.version >= 0x10)
reg_02.raw = io_apic_read(apic, 2);
if (reg_01.bits.version >= 0x20)
reg_03.raw = io_apic_read(apic, 3);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
printk("\n");
printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
@@ -1830,7 +1878,7 @@ __apicdebuginit(void) print_PIC(void)
printk(KERN_DEBUG "\nprinting PIC contents\n");
- spin_lock_irqsave(&i8259A_lock, flags);
+ raw_spin_lock_irqsave(&i8259A_lock, flags);
v = inb(0xa1) << 8 | inb(0x21);
printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
@@ -1844,7 +1892,7 @@ __apicdebuginit(void) print_PIC(void)
outb(0x0a,0xa0);
outb(0x0a,0x20);
- spin_unlock_irqrestore(&i8259A_lock, flags);
+ raw_spin_unlock_irqrestore(&i8259A_lock, flags);
printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
@@ -1903,9 +1951,9 @@ void __init enable_IO_APIC(void)
* The number of IO-APIC IRQ registers (== #pins):
*/
for (apic = 0; apic < nr_ioapics; apic++) {
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_01.raw = io_apic_read(apic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
nr_ioapic_registers[apic] = reg_01.bits.entries+1;
}
@@ -2045,9 +2093,9 @@ void __init setup_ioapic_ids_from_mpc(void)
for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
/* Read the register 0 value */
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(apic_id, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
old_id = mp_ioapics[apic_id].apicid;
@@ -2106,16 +2154,16 @@ void __init setup_ioapic_ids_from_mpc(void)
mp_ioapics[apic_id].apicid);
reg_00.bits.ID = mp_ioapics[apic_id].apicid;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(apic_id, 0, reg_00.raw);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
/*
* Sanity check
*/
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(apic_id, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
printk("could not set ID!\n");
else
@@ -2198,7 +2246,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
unsigned long flags;
struct irq_cfg *cfg;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
if (irq < nr_legacy_irqs) {
disable_8259A_irq(irq);
if (i8259A_irq_pending(irq))
@@ -2206,7 +2254,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
}
cfg = irq_cfg(irq);
__unmask_IO_APIC_irq(cfg);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return was_pending;
}
@@ -2217,9 +2265,9 @@ static int ioapic_retrigger_irq(unsigned int irq)
struct irq_cfg *cfg = irq_cfg(irq);
unsigned long flags;
- spin_lock_irqsave(&vector_lock, flags);
+ raw_spin_lock_irqsave(&vector_lock, flags);
apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
- spin_unlock_irqrestore(&vector_lock, flags);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
return 1;
}
@@ -2312,14 +2360,14 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
irq = desc->irq;
cfg = desc->chip_data;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
ret = set_desc_affinity(desc, mask, &dest);
if (!ret) {
/* Only the high 8 bits are valid. */
dest = SET_APIC_LOGICAL_ID(dest);
__target_IO_APIC_irq(irq, dest, cfg);
}
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return ret;
}
@@ -2554,9 +2602,9 @@ static void eoi_ioapic_irq(struct irq_desc *desc)
irq = desc->irq;
cfg = desc->chip_data;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
__eoi_ioapic_irq(irq, cfg);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
static void ack_apic_level(unsigned int irq)
@@ -3138,13 +3186,13 @@ static int ioapic_resume(struct sys_device *dev)
data = container_of(dev, struct sysfs_ioapic_data, dev);
entry = data->entry;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(dev->id, 0);
if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
reg_00.bits.ID = mp_ioapics[dev->id].apicid;
io_apic_write(dev->id, 0, reg_00.raw);
}
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
ioapic_write_entry(dev->id, i, entry[i]);
@@ -3207,7 +3255,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
if (irq_want < nr_irqs_gsi)
irq_want = nr_irqs_gsi;
- spin_lock_irqsave(&vector_lock, flags);
+ raw_spin_lock_irqsave(&vector_lock, flags);
for (new = irq_want; new < nr_irqs; new++) {
desc_new = irq_to_desc_alloc_node(new, node);
if (!desc_new) {
@@ -3226,14 +3274,11 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
irq = new;
break;
}
- spin_unlock_irqrestore(&vector_lock, flags);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+ if (irq > 0)
+ dynamic_irq_init_keep_chip_data(irq);
- if (irq > 0) {
- dynamic_irq_init(irq);
- /* restore it, in case dynamic_irq_init clear it */
- if (desc_new)
- desc_new->chip_data = cfg_new;
- }
return irq;
}
@@ -3255,20 +3300,13 @@ int create_irq(void)
void destroy_irq(unsigned int irq)
{
unsigned long flags;
- struct irq_cfg *cfg;
- struct irq_desc *desc;
- /* store it, in case dynamic_irq_cleanup clear it */
- desc = irq_to_desc(irq);
- cfg = desc->chip_data;
- dynamic_irq_cleanup(irq);
- /* connect back irq_cfg */
- desc->chip_data = cfg;
+ dynamic_irq_cleanup_keep_chip_data(irq);
free_irte(irq);
- spin_lock_irqsave(&vector_lock, flags);
- __clear_irq_vector(irq, cfg);
- spin_unlock_irqrestore(&vector_lock, flags);
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ __clear_irq_vector(irq, get_irq_chip_data(irq));
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
}
/*
@@ -3805,9 +3843,9 @@ int __init io_apic_get_redir_entries (int ioapic)
union IO_APIC_reg_01 reg_01;
unsigned long flags;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_01.raw = io_apic_read(ioapic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return reg_01.bits.entries;
}
@@ -3969,9 +4007,9 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
if (physids_empty(apic_id_map))
apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(ioapic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
if (apic_id >= get_physical_broadcast()) {
printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
@@ -4005,10 +4043,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
if (reg_00.bits.ID != apic_id) {
reg_00.bits.ID = apic_id;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(ioapic, 0, reg_00.raw);
reg_00.raw = io_apic_read(ioapic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
/* Sanity check */
if (reg_00.bits.ID != apic_id) {
@@ -4029,9 +4067,9 @@ int __init io_apic_get_version(int ioapic)
union IO_APIC_reg_01 reg_01;
unsigned long flags;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_01.raw = io_apic_read(ioapic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return reg_01.bits.version;
}
@@ -4063,27 +4101,23 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
#ifdef CONFIG_SMP
void __init setup_ioapic_dest(void)
{
- int pin, ioapic = 0, irq, irq_entry;
+ int pin, ioapic, irq, irq_entry;
struct irq_desc *desc;
const struct cpumask *mask;
if (skip_ioapic_setup == 1)
return;
-#ifdef CONFIG_ACPI
- if (!acpi_disabled && acpi_ioapic) {
- ioapic = mp_find_ioapic(0);
- if (ioapic < 0)
- ioapic = 0;
- }
-#endif
-
+ for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
irq_entry = find_irq_entry(ioapic, pin, mp_INT);
if (irq_entry == -1)
continue;
irq = pin_2_irq(irq_entry, ioapic, pin);
+ if ((ioapic > 0) && (irq > 16))
+ continue;
+
desc = irq_to_desc(irq);
/*
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index 0159a69396cb..bd7c96b5e8d8 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -416,13 +416,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
/* We can be called before check_nmi_watchdog, hence NULL check. */
if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
- static DEFINE_SPINLOCK(lock); /* Serialise the printks */
+ static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
- spin_lock(&lock);
+ raw_spin_lock(&lock);
printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
show_regs(regs);
dump_stack();
- spin_unlock(&lock);
+ raw_spin_unlock(&lock);
cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
rc = 1;
@@ -438,8 +438,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
* Ayiee, looks like this CPU is stuck ...
* wait a few IRQs (5 seconds) before doing the oops ...
*/
- __this_cpu_inc(per_cpu_var(alert_counter));
- if (__this_cpu_read(per_cpu_var(alert_counter)) == 5 * nmi_hz)
+ __this_cpu_inc(alert_counter);
+ if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
/*
* die_nmi will return ONLY if NOTIFY_STOP happens..
*/
@@ -447,7 +447,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
regs, panic_on_timeout);
} else {
__get_cpu_var(last_irq_sum) = sum;
- __this_cpu_write(per_cpu_var(alert_counter), 0);
+ __this_cpu_write(alert_counter, 0);
}
/* see if the nmi watchdog went off */
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 6e44519960c8..d360b56e9825 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -806,7 +806,7 @@ static int find_psb_table(struct powernow_k8_data *data)
static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
unsigned int index)
{
- acpi_integer control;
+ u64 control;
if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
return;
@@ -824,7 +824,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
{
struct cpufreq_frequency_table *powernow_table;
int ret_val = -ENODEV;
- acpi_integer control, status;
+ u64 control, status;
if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
dprintk("register performance failed: bad ACPI data\n");
@@ -948,7 +948,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
u32 fid;
u32 vid;
u32 freq, index;
- acpi_integer status, control;
+ u64 status, control;
if (data->exttype) {
status = data->acpi_data.states[i].status;
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 09b1698e0466..06130b52f012 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -22,10 +22,10 @@
#include <linux/pci.h>
#include <linux/smp.h>
#include <linux/cpu.h>
-#include <linux/sort.h>
#include <linux/mutex.h>
#include <linux/uaccess.h>
#include <linux/kvm_para.h>
+#include <linux/range.h>
#include <asm/processor.h>
#include <asm/e820.h>
@@ -34,11 +34,6 @@
#include "mtrr.h"
-struct res_range {
- unsigned long start;
- unsigned long end;
-};
-
struct var_mtrr_range_state {
unsigned long base_pfn;
unsigned long size_pfn;
@@ -56,7 +51,7 @@ struct var_mtrr_state {
/* Should be related to MTRR_VAR_RANGES nums */
#define RANGE_NUM 256
-static struct res_range __initdata range[RANGE_NUM];
+static struct range __initdata range[RANGE_NUM];
static int __initdata nr_range;
static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
@@ -64,152 +59,11 @@ static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
static int __initdata debug_print;
#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
-
-static int __init
-add_range(struct res_range *range, int nr_range,
- unsigned long start, unsigned long end)
-{
- /* Out of slots: */
- if (nr_range >= RANGE_NUM)
- return nr_range;
-
- range[nr_range].start = start;
- range[nr_range].end = end;
-
- nr_range++;
-
- return nr_range;
-}
-
-static int __init
-add_range_with_merge(struct res_range *range, int nr_range,
- unsigned long start, unsigned long end)
-{
- int i;
-
- /* Try to merge it with old one: */
- for (i = 0; i < nr_range; i++) {
- unsigned long final_start, final_end;
- unsigned long common_start, common_end;
-
- if (!range[i].end)
- continue;
-
- common_start = max(range[i].start, start);
- common_end = min(range[i].end, end);
- if (common_start > common_end + 1)
- continue;
-
- final_start = min(range[i].start, start);
- final_end = max(range[i].end, end);
-
- range[i].start = final_start;
- range[i].end = final_end;
- return nr_range;
- }
-
- /* Need to add it: */
- return add_range(range, nr_range, start, end);
-}
-
-static void __init
-subtract_range(struct res_range *range, unsigned long start, unsigned long end)
-{
- int i, j;
-
- for (j = 0; j < RANGE_NUM; j++) {
- if (!range[j].end)
- continue;
-
- if (start <= range[j].start && end >= range[j].end) {
- range[j].start = 0;
- range[j].end = 0;
- continue;
- }
-
- if (start <= range[j].start && end < range[j].end &&
- range[j].start < end + 1) {
- range[j].start = end + 1;
- continue;
- }
-
-
- if (start > range[j].start && end >= range[j].end &&
- range[j].end > start - 1) {
- range[j].end = start - 1;
- continue;
- }
-
- if (start > range[j].start && end < range[j].end) {
- /* Find the new spare: */
- for (i = 0; i < RANGE_NUM; i++) {
- if (range[i].end == 0)
- break;
- }
- if (i < RANGE_NUM) {
- range[i].end = range[j].end;
- range[i].start = end + 1;
- } else {
- printk(KERN_ERR "run of slot in ranges\n");
- }
- range[j].end = start - 1;
- continue;
- }
- }
-}
-
-static int __init cmp_range(const void *x1, const void *x2)
-{
- const struct res_range *r1 = x1;
- const struct res_range *r2 = x2;
- long start1, start2;
-
- start1 = r1->start;
- start2 = r2->start;
-
- return start1 - start2;
-}
-
-static int __init clean_sort_range(struct res_range *range, int az)
-{
- int i, j, k = az - 1, nr_range = 0;
-
- for (i = 0; i < k; i++) {
- if (range[i].end)
- continue;
- for (j = k; j > i; j--) {
- if (range[j].end) {
- k = j;
- break;
- }
- }
- if (j == i)
- break;
- range[i].start = range[k].start;
- range[i].end = range[k].end;
- range[k].start = 0;
- range[k].end = 0;
- k--;
- }
- /* count it */
- for (i = 0; i < az; i++) {
- if (!range[i].end) {
- nr_range = i;
- break;
- }
- }
-
- /* sort them */
- sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
-
- return nr_range;
-}
-
#define BIOS_BUG_MSG KERN_WARNING \
"WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
static int __init
-x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
+x86_get_mtrr_mem_range(struct range *range, int nr_range,
unsigned long extra_remove_base,
unsigned long extra_remove_size)
{
@@ -223,14 +77,14 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
continue;
base = range_state[i].base_pfn;
size = range_state[i].size_pfn;
- nr_range = add_range_with_merge(range, nr_range, base,
- base + size - 1);
+ nr_range = add_range_with_merge(range, RANGE_NUM, nr_range,
+ base, base + size);
}
if (debug_print) {
printk(KERN_DEBUG "After WB checking\n");
for (i = 0; i < nr_range; i++)
- printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
- range[i].start, range[i].end + 1);
+ printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+ range[i].start, range[i].end);
}
/* Take out UC ranges: */
@@ -252,19 +106,19 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
size -= (1<<(20-PAGE_SHIFT)) - base;
base = 1<<(20-PAGE_SHIFT);
}
- subtract_range(range, base, base + size - 1);
+ subtract_range(range, RANGE_NUM, base, base + size);
}
if (extra_remove_size)
- subtract_range(range, extra_remove_base,
- extra_remove_base + extra_remove_size - 1);
+ subtract_range(range, RANGE_NUM, extra_remove_base,
+ extra_remove_base + extra_remove_size);
if (debug_print) {
printk(KERN_DEBUG "After UC checking\n");
for (i = 0; i < RANGE_NUM; i++) {
if (!range[i].end)
continue;
- printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
- range[i].start, range[i].end + 1);
+ printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+ range[i].start, range[i].end);
}
}
@@ -273,26 +127,22 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
if (debug_print) {
printk(KERN_DEBUG "After sorting\n");
for (i = 0; i < nr_range; i++)
- printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
- range[i].start, range[i].end + 1);
+ printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+ range[i].start, range[i].end);
}
- /* clear those is not used */
- for (i = nr_range; i < RANGE_NUM; i++)
- memset(&range[i], 0, sizeof(range[i]));
-
return nr_range;
}
#ifdef CONFIG_MTRR_SANITIZER
-static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
+static unsigned long __init sum_ranges(struct range *range, int nr_range)
{
unsigned long sum = 0;
int i;
for (i = 0; i < nr_range; i++)
- sum += range[i].end + 1 - range[i].start;
+ sum += range[i].end - range[i].start;
return sum;
}
@@ -621,7 +471,7 @@ static int __init parse_mtrr_spare_reg(char *arg)
early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
static int __init
-x86_setup_var_mtrrs(struct res_range *range, int nr_range,
+x86_setup_var_mtrrs(struct range *range, int nr_range,
u64 chunk_size, u64 gran_size)
{
struct var_mtrr_state var_state;
@@ -639,7 +489,7 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
/* Write the range: */
for (i = 0; i < nr_range; i++) {
set_var_mtrr_range(&var_state, range[i].start,
- range[i].end - range[i].start + 1);
+ range[i].end - range[i].start);
}
/* Write the last range: */
@@ -742,7 +592,7 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
unsigned long x_remove_base,
unsigned long x_remove_size, int i)
{
- static struct res_range range_new[RANGE_NUM];
+ static struct range range_new[RANGE_NUM];
unsigned long range_sums_new;
static int nr_range_new;
int num_reg;
@@ -869,10 +719,10 @@ int __init mtrr_cleanup(unsigned address_bits)
* [0, 1M) should always be covered by var mtrr with WB
* and fixed mtrrs should take effect before var mtrr for it:
*/
- nr_range = add_range_with_merge(range, nr_range, 0,
- (1ULL<<(20 - PAGE_SHIFT)) - 1);
+ nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0,
+ 1ULL<<(20 - PAGE_SHIFT));
/* Sort the ranges: */
- sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+ sort_range(range, nr_range);
range_sums = sum_ranges(range, nr_range);
printk(KERN_INFO "total RAM covered: %ldM\n",
@@ -1089,9 +939,9 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
nr_range = 0;
if (mtrr_tom2) {
range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
- range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
- if (highest_pfn < range[nr_range].end + 1)
- highest_pfn = range[nr_range].end + 1;
+ range[nr_range].end = mtrr_tom2 >> PAGE_SHIFT;
+ if (highest_pfn < range[nr_range].end)
+ highest_pfn = range[nr_range].end;
nr_range++;
}
nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
@@ -1103,15 +953,15 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
/* Check the holes: */
for (i = 0; i < nr_range - 1; i++) {
- if (range[i].end + 1 < range[i+1].start)
- total_trim_size += real_trim_memory(range[i].end + 1,
+ if (range[i].end < range[i+1].start)
+ total_trim_size += real_trim_memory(range[i].end,
range[i+1].start);
}
/* Check the top: */
i = nr_range - 1;
- if (range[i].end + 1 < end_pfn)
- total_trim_size += real_trim_memory(range[i].end + 1,
+ if (range[i].end < end_pfn)
+ total_trim_size += real_trim_memory(range[i].end,
end_pfn);
if (total_trim_size) {
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index fe4622e8c837..79556bd9b602 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -145,6 +145,7 @@ struct set_mtrr_data {
/**
* ipi_handler - Synchronisation handler. Executed by "other" CPUs.
+ * @info: pointer to mtrr configuration data
*
* Returns nothing.
*/
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index a966b753e496..740b440fbd73 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -12,21 +12,13 @@
#include <linux/types.h>
#include <linux/init.h>
#include <linux/bootmem.h>
-#include <linux/ioport.h>
-#include <linux/string.h>
-#include <linux/kexec.h>
-#include <linux/module.h>
-#include <linux/mm.h>
#include <linux/pfn.h>
#include <linux/suspend.h>
#include <linux/firmware-map.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
#include <asm/e820.h>
#include <asm/proto.h>
#include <asm/setup.h>
-#include <asm/trampoline.h>
/*
* The e820 map is the map that gets modified e.g. with command line parameters
@@ -730,319 +722,44 @@ core_initcall(e820_mark_nvs_memory);
#endif
/*
- * Early reserved memory areas.
- */
-#define MAX_EARLY_RES 32
-
-struct early_res {
- u64 start, end;
- char name[16];
- char overlap_ok;
-};
-static struct early_res early_res[MAX_EARLY_RES] __initdata = {
- { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */
-#if defined(CONFIG_X86_32) && defined(CONFIG_X86_TRAMPOLINE)
- /*
- * But first pinch a few for the stack/trampoline stuff
- * FIXME: Don't need the extra page at 4K, but need to fix
- * trampoline before removing it. (see the GDT stuff)
- */
- { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE", 1 },
-#endif
-
- {}
-};
-
-static int __init find_overlapped_early(u64 start, u64 end)
-{
- int i;
- struct early_res *r;
-
- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
- r = &early_res[i];
- if (end > r->start && start < r->end)
- break;
- }
-
- return i;
-}
-
-/*
- * Drop the i-th range from the early reservation map,
- * by copying any higher ranges down one over it, and
- * clearing what had been the last slot.
- */
-static void __init drop_range(int i)
-{
- int j;
-
- for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
- ;
-
- memmove(&early_res[i], &early_res[i + 1],
- (j - 1 - i) * sizeof(struct early_res));
-
- early_res[j - 1].end = 0;
-}
-
-/*
- * Split any existing ranges that:
- * 1) are marked 'overlap_ok', and
- * 2) overlap with the stated range [start, end)
- * into whatever portion (if any) of the existing range is entirely
- * below or entirely above the stated range. Drop the portion
- * of the existing range that overlaps with the stated range,
- * which will allow the caller of this routine to then add that
- * stated range without conflicting with any existing range.
+ * Find a free area with specified alignment in a specific range.
*/
-static void __init drop_overlaps_that_are_ok(u64 start, u64 end)
+u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
{
int i;
- struct early_res *r;
- u64 lower_start, lower_end;
- u64 upper_start, upper_end;
- char name[16];
- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
- r = &early_res[i];
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ u64 addr;
+ u64 ei_start, ei_last;
- /* Continue past non-overlapping ranges */
- if (end <= r->start || start >= r->end)
+ if (ei->type != E820_RAM)
continue;
- /*
- * Leave non-ok overlaps as is; let caller
- * panic "Overlapping early reservations"
- * when it hits this overlap.
- */
- if (!r->overlap_ok)
- return;
-
- /*
- * We have an ok overlap. We will drop it from the early
- * reservation map, and add back in any non-overlapping
- * portions (lower or upper) as separate, overlap_ok,
- * non-overlapping ranges.
- */
-
- /* 1. Note any non-overlapping (lower or upper) ranges. */
- strncpy(name, r->name, sizeof(name) - 1);
-
- lower_start = lower_end = 0;
- upper_start = upper_end = 0;
- if (r->start < start) {
- lower_start = r->start;
- lower_end = start;
- }
- if (r->end > end) {
- upper_start = end;
- upper_end = r->end;
- }
-
- /* 2. Drop the original ok overlapping range */
- drop_range(i);
-
- i--; /* resume for-loop on copied down entry */
-
- /* 3. Add back in any non-overlapping ranges. */
- if (lower_end)
- reserve_early_overlap_ok(lower_start, lower_end, name);
- if (upper_end)
- reserve_early_overlap_ok(upper_start, upper_end, name);
- }
-}
-
-static void __init __reserve_early(u64 start, u64 end, char *name,
- int overlap_ok)
-{
- int i;
- struct early_res *r;
-
- i = find_overlapped_early(start, end);
- if (i >= MAX_EARLY_RES)
- panic("Too many early reservations");
- r = &early_res[i];
- if (r->end)
- panic("Overlapping early reservations "
- "%llx-%llx %s to %llx-%llx %s\n",
- start, end - 1, name?name:"", r->start,
- r->end - 1, r->name);
- r->start = start;
- r->end = end;
- r->overlap_ok = overlap_ok;
- if (name)
- strncpy(r->name, name, sizeof(r->name) - 1);
-}
-
-/*
- * A few early reservtations come here.
- *
- * The 'overlap_ok' in the name of this routine does -not- mean it
- * is ok for these reservations to overlap an earlier reservation.
- * Rather it means that it is ok for subsequent reservations to
- * overlap this one.
- *
- * Use this entry point to reserve early ranges when you are doing
- * so out of "Paranoia", reserving perhaps more memory than you need,
- * just in case, and don't mind a subsequent overlapping reservation
- * that is known to be needed.
- *
- * The drop_overlaps_that_are_ok() call here isn't really needed.
- * It would be needed if we had two colliding 'overlap_ok'
- * reservations, so that the second such would not panic on the
- * overlap with the first. We don't have any such as of this
- * writing, but might as well tolerate such if it happens in
- * the future.
- */
-void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
-{
- drop_overlaps_that_are_ok(start, end);
- __reserve_early(start, end, name, 1);
-}
-
-/*
- * Most early reservations come here.
- *
- * We first have drop_overlaps_that_are_ok() drop any pre-existing
- * 'overlap_ok' ranges, so that we can then reserve this memory
- * range without risk of panic'ing on an overlapping overlap_ok
- * early reservation.
- */
-void __init reserve_early(u64 start, u64 end, char *name)
-{
- if (start >= end)
- return;
-
- drop_overlaps_that_are_ok(start, end);
- __reserve_early(start, end, name, 0);
-}
-
-void __init free_early(u64 start, u64 end)
-{
- struct early_res *r;
- int i;
-
- i = find_overlapped_early(start, end);
- r = &early_res[i];
- if (i >= MAX_EARLY_RES || r->end != end || r->start != start)
- panic("free_early on not reserved area: %llx-%llx!",
- start, end - 1);
-
- drop_range(i);
-}
-
-void __init early_res_to_bootmem(u64 start, u64 end)
-{
- int i, count;
- u64 final_start, final_end;
-
- count = 0;
- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++)
- count++;
-
- printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n",
- count, start, end);
- for (i = 0; i < count; i++) {
- struct early_res *r = &early_res[i];
- printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i,
- r->start, r->end, r->name);
- final_start = max(start, r->start);
- final_end = min(end, r->end);
- if (final_start >= final_end) {
- printk(KERN_CONT "\n");
- continue;
- }
- printk(KERN_CONT " ==> [%010llx - %010llx]\n",
- final_start, final_end);
- reserve_bootmem_generic(final_start, final_end - final_start,
- BOOTMEM_DEFAULT);
- }
-}
+ ei_last = ei->addr + ei->size;
+ ei_start = ei->addr;
+ addr = find_early_area(ei_start, ei_last, start, end,
+ size, align);
-/* Check for already reserved areas */
-static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
-{
- int i;
- u64 addr = *addrp;
- int changed = 0;
- struct early_res *r;
-again:
- i = find_overlapped_early(addr, addr + size);
- r = &early_res[i];
- if (i < MAX_EARLY_RES && r->end) {
- *addrp = addr = round_up(r->end, align);
- changed = 1;
- goto again;
+ if (addr != -1ULL)
+ return addr;
}
- return changed;
+ return -1ULL;
}
-/* Check for already reserved areas */
-static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
+u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align)
{
- int i;
- u64 addr = *addrp, last;
- u64 size = *sizep;
- int changed = 0;
-again:
- last = addr + size;
- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
- struct early_res *r = &early_res[i];
- if (last > r->start && addr < r->start) {
- size = r->start - addr;
- changed = 1;
- goto again;
- }
- if (last > r->end && addr < r->end) {
- addr = round_up(r->end, align);
- size = last - addr;
- changed = 1;
- goto again;
- }
- if (last <= r->end && addr >= r->start) {
- (*sizep)++;
- return 0;
- }
- }
- if (changed) {
- *addrp = addr;
- *sizep = size;
- }
- return changed;
+ return find_e820_area(start, end, size, align);
}
-/*
- * Find a free area with specified alignment in a specific range.
- */
-u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
+u64 __init get_max_mapped(void)
{
- int i;
+ u64 end = max_pfn_mapped;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- u64 addr, last;
- u64 ei_last;
+ end <<= PAGE_SHIFT;
- if (ei->type != E820_RAM)
- continue;
- addr = round_up(ei->addr, align);
- ei_last = ei->addr + ei->size;
- if (addr < start)
- addr = round_up(start, align);
- if (addr >= ei_last)
- continue;
- while (bad_addr(&addr, size, align) && addr+size <= ei_last)
- ;
- last = addr + size;
- if (last > ei_last)
- continue;
- if (last > end)
- continue;
- return addr;
- }
- return -1ULL;
+ return end;
}
-
/*
* Find next free range after *start
*/
@@ -1052,25 +769,19 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
- u64 addr, last;
- u64 ei_last;
+ u64 addr;
+ u64 ei_start, ei_last;
if (ei->type != E820_RAM)
continue;
- addr = round_up(ei->addr, align);
+
ei_last = ei->addr + ei->size;
- if (addr < start)
- addr = round_up(start, align);
- if (addr >= ei_last)
- continue;
- *sizep = ei_last - addr;
- while (bad_addr_size(&addr, sizep, align) &&
- addr + *sizep <= ei_last)
- ;
- last = addr + *sizep;
- if (last > ei_last)
- continue;
- return addr;
+ ei_start = ei->addr;
+ addr = find_early_area_size(ei_start, ei_last, start,
+ sizep, align);
+
+ if (addr != -1ULL)
+ return addr;
}
return -1ULL;
@@ -1429,6 +1140,8 @@ void __init e820_reserve_resources_late(void)
end = MAX_RESOURCE_SIZE;
if (start >= end)
continue;
+ printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ",
+ start, end);
reserve_region_with_split(&iomem_resource, start, end,
"RAM buffer");
}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 5051b94c9069..adedeef1dedc 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -29,6 +29,16 @@ static void __init i386_default_early_setup(void)
void __init i386_start_kernel(void)
{
+#ifdef CONFIG_X86_TRAMPOLINE
+ /*
+ * But first pinch a few for the stack/trampoline stuff
+ * FIXME: Don't need the extra page at 4K, but need to fix
+ * trampoline before removing it. (see the GDT stuff)
+ */
+ reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE,
+ "EX TRAMPOLINE");
+#endif
+
reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 7fd318bac59c..37c3d4b17d85 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -442,8 +442,8 @@ is386: movl $2,%ecx # set MP
*/
cmpb $0,ready
jne 1f
- movl $per_cpu__gdt_page,%eax
- movl $per_cpu__stack_canary,%ecx
+ movl $gdt_page,%eax
+ movl $stack_canary,%ecx
movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
shrl $16, %ecx
movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
@@ -706,7 +706,7 @@ idt_descr:
.word 0 # 32 bit align gdt_desc.address
ENTRY(early_gdt_descr)
.word GDT_ENTRIES*8-1
- .long per_cpu__gdt_page /* Overwritten for secondary CPUs */
+ .long gdt_page /* Overwritten for secondary CPUs */
/*
* The boot_gdt must mirror the equivalent in setup.S and is
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ad80a1c718c6..ee4fa1bfcb33 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -266,7 +266,7 @@ static void hpet_resume_device(void)
force_hpet_resume();
}
-static void hpet_resume_counter(void)
+static void hpet_resume_counter(struct clocksource *cs)
{
hpet_resume_device();
hpet_restart_counter();
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index df89102bef80..8c93a84bb627 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -32,7 +32,7 @@
*/
static int i8259A_auto_eoi;
-DEFINE_SPINLOCK(i8259A_lock);
+DEFINE_RAW_SPINLOCK(i8259A_lock);
static void mask_and_ack_8259A(unsigned int);
struct irq_chip i8259A_chip = {
@@ -68,13 +68,13 @@ void disable_8259A_irq(unsigned int irq)
unsigned int mask = 1 << irq;
unsigned long flags;
- spin_lock_irqsave(&i8259A_lock, flags);
+ raw_spin_lock_irqsave(&i8259A_lock, flags);
cached_irq_mask |= mask;
if (irq & 8)
outb(cached_slave_mask, PIC_SLAVE_IMR);
else
outb(cached_master_mask, PIC_MASTER_IMR);
- spin_unlock_irqrestore(&i8259A_lock, flags);
+ raw_spin_unlock_irqrestore(&i8259A_lock, flags);
}
void enable_8259A_irq(unsigned int irq)
@@ -82,13 +82,13 @@ void enable_8259A_irq(unsigned int irq)
unsigned int mask = ~(1 << irq);
unsigned long flags;
- spin_lock_irqsave(&i8259A_lock, flags);
+ raw_spin_lock_irqsave(&i8259A_lock, flags);
cached_irq_mask &= mask;
if (irq & 8)
outb(cached_slave_mask, PIC_SLAVE_IMR);
else
outb(cached_master_mask, PIC_MASTER_IMR);
- spin_unlock_irqrestore(&i8259A_lock, flags);
+ raw_spin_unlock_irqrestore(&i8259A_lock, flags);
}
int i8259A_irq_pending(unsigned int irq)
@@ -97,12 +97,12 @@ int i8259A_irq_pending(unsigned int irq)
unsigned long flags;
int ret;
- spin_lock_irqsave(&i8259A_lock, flags);
+ raw_spin_lock_irqsave(&i8259A_lock, flags);
if (irq < 8)
ret = inb(PIC_MASTER_CMD) & mask;
else
ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
- spin_unlock_irqrestore(&i8259A_lock, flags);
+ raw_spin_unlock_irqrestore(&i8259A_lock, flags);
return ret;
}
@@ -150,7 +150,7 @@ static void mask_and_ack_8259A(unsigned int irq)
unsigned int irqmask = 1 << irq;
unsigned long flags;
- spin_lock_irqsave(&i8259A_lock, flags);
+ raw_spin_lock_irqsave(&i8259A_lock, flags);
/*
* Lightweight spurious IRQ detection. We do not want
* to overdo spurious IRQ handling - it's usually a sign
@@ -183,7 +183,7 @@ handle_real_irq:
outb(cached_master_mask, PIC_MASTER_IMR);
outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */
}
- spin_unlock_irqrestore(&i8259A_lock, flags);
+ raw_spin_unlock_irqrestore(&i8259A_lock, flags);
return;
spurious_8259A_irq:
@@ -285,24 +285,24 @@ void mask_8259A(void)
{
unsigned long flags;
- spin_lock_irqsave(&i8259A_lock, flags);
+ raw_spin_lock_irqsave(&i8259A_lock, flags);
outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
- spin_unlock_irqrestore(&i8259A_lock, flags);
+ raw_spin_unlock_irqrestore(&i8259A_lock, flags);
}
void unmask_8259A(void)
{
unsigned long flags;
- spin_lock_irqsave(&i8259A_lock, flags);
+ raw_spin_lock_irqsave(&i8259A_lock, flags);
outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
- spin_unlock_irqrestore(&i8259A_lock, flags);
+ raw_spin_unlock_irqrestore(&i8259A_lock, flags);
}
void init_8259A(int auto_eoi)
@@ -311,7 +311,7 @@ void init_8259A(int auto_eoi)
i8259A_auto_eoi = auto_eoi;
- spin_lock_irqsave(&i8259A_lock, flags);
+ raw_spin_lock_irqsave(&i8259A_lock, flags);
outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
@@ -356,5 +356,5 @@ void init_8259A(int auto_eoi)
outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
- spin_unlock_irqrestore(&i8259A_lock, flags);
+ raw_spin_unlock_irqrestore(&i8259A_lock, flags);
}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index d5932226614f..fce55d532631 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -84,24 +84,7 @@ static struct irqaction irq2 = {
};
DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
- [0 ... IRQ0_VECTOR - 1] = -1,
- [IRQ0_VECTOR] = 0,
- [IRQ1_VECTOR] = 1,
- [IRQ2_VECTOR] = 2,
- [IRQ3_VECTOR] = 3,
- [IRQ4_VECTOR] = 4,
- [IRQ5_VECTOR] = 5,
- [IRQ6_VECTOR] = 6,
- [IRQ7_VECTOR] = 7,
- [IRQ8_VECTOR] = 8,
- [IRQ9_VECTOR] = 9,
- [IRQ10_VECTOR] = 10,
- [IRQ11_VECTOR] = 11,
- [IRQ12_VECTOR] = 12,
- [IRQ13_VECTOR] = 13,
- [IRQ14_VECTOR] = 14,
- [IRQ15_VECTOR] = 15,
- [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
+ [0 ... NR_VECTORS - 1] = -1,
};
int vector_used_by_percpu_irq(unsigned int vector)
@@ -116,6 +99,9 @@ int vector_used_by_percpu_irq(unsigned int vector)
return 0;
}
+/* Number of legacy interrupts */
+int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY;
+
void __init init_ISA_irqs(void)
{
int i;
@@ -142,6 +128,19 @@ void __init init_ISA_irqs(void)
void __init init_IRQ(void)
{
+ int i;
+
+ /*
+ * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15.
+ * If these IRQ's are handled by legacy interrupt-controllers like PIC,
+ * then this configuration will likely be static after the boot. If
+ * these IRQ's are handled by more mordern controllers like IO-APIC,
+ * then this vector space can be freed and re-used dynamically as the
+ * irq's migrate etc.
+ */
+ for (i = 0; i < nr_legacy_irqs; i++)
+ per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i;
+
x86_init.irqs.intr_init();
}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 5de9f4a9c3fd..b43bbaebe2c0 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -49,6 +49,7 @@
#include <linux/module.h>
#include <linux/kdebug.h>
#include <linux/kallsyms.h>
+#include <linux/ftrace.h>
#include <asm/cacheflush.h>
#include <asm/desc.h>
@@ -106,16 +107,22 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
};
const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
-/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
-static void __kprobes set_jmp_op(void *from, void *to)
+static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
{
- struct __arch_jmp_op {
- char op;
+ struct __arch_relative_insn {
+ u8 op;
s32 raddr;
- } __attribute__((packed)) * jop;
- jop = (struct __arch_jmp_op *)from;
- jop->raddr = (s32)((long)(to) - ((long)(from) + 5));
- jop->op = RELATIVEJUMP_INSTRUCTION;
+ } __attribute__((packed)) *insn;
+
+ insn = (struct __arch_relative_insn *)from;
+ insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
+ insn->op = op;
+}
+
+/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
+static void __kprobes synthesize_reljump(void *from, void *to)
+{
+ __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
}
/*
@@ -202,7 +209,7 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
/*
* Basically, kp->ainsn.insn has an original instruction.
* However, RIP-relative instruction can not do single-stepping
- * at different place, fix_riprel() tweaks the displacement of
+ * at different place, __copy_instruction() tweaks the displacement of
* that instruction. In that case, we can't recover the instruction
* from the kp->ainsn.insn.
*
@@ -284,21 +291,37 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
}
/*
- * Adjust the displacement if the instruction uses the %rip-relative
- * addressing mode.
+ * Copy an instruction and adjust the displacement if the instruction
+ * uses the %rip-relative addressing mode.
* If it does, Return the address of the 32-bit displacement word.
* If not, return null.
* Only applicable to 64-bit x86.
*/
-static void __kprobes fix_riprel(struct kprobe *p)
+static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
{
-#ifdef CONFIG_X86_64
struct insn insn;
- kernel_insn_init(&insn, p->ainsn.insn);
+ int ret;
+ kprobe_opcode_t buf[MAX_INSN_SIZE];
+ kernel_insn_init(&insn, src);
+ if (recover) {
+ insn_get_opcode(&insn);
+ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
+ ret = recover_probed_instruction(buf,
+ (unsigned long)src);
+ if (ret)
+ return 0;
+ kernel_insn_init(&insn, buf);
+ }
+ }
+ insn_get_length(&insn);
+ memcpy(dest, insn.kaddr, insn.length);
+
+#ifdef CONFIG_X86_64
if (insn_rip_relative(&insn)) {
s64 newdisp;
u8 *disp;
+ kernel_insn_init(&insn, dest);
insn_get_displacement(&insn);
/*
* The copied instruction uses the %rip-relative addressing
@@ -312,20 +335,23 @@ static void __kprobes fix_riprel(struct kprobe *p)
* extension of the original signed 32-bit displacement would
* have given.
*/
- newdisp = (u8 *) p->addr + (s64) insn.displacement.value -
- (u8 *) p->ainsn.insn;
+ newdisp = (u8 *) src + (s64) insn.displacement.value -
+ (u8 *) dest;
BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
- disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn);
+ disp = (u8 *) dest + insn_offset_displacement(&insn);
*(s32 *) disp = (s32) newdisp;
}
#endif
+ return insn.length;
}
static void __kprobes arch_copy_kprobe(struct kprobe *p)
{
- memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
-
- fix_riprel(p);
+ /*
+ * Copy an instruction without recovering int3, because it will be
+ * put by another subsystem.
+ */
+ __copy_instruction(p->ainsn.insn, p->addr, 0);
if (can_boost(p->addr))
p->ainsn.boostable = 0;
@@ -406,18 +432,6 @@ static void __kprobes restore_btf(void)
update_debugctlmsr(current->thread.debugctlmsr);
}
-static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
-{
- clear_btf();
- regs->flags |= X86_EFLAGS_TF;
- regs->flags &= ~X86_EFLAGS_IF;
- /* single step inline if the instruction is an int3 */
- if (p->opcode == BREAKPOINT_INSTRUCTION)
- regs->ip = (unsigned long)p->addr;
- else
- regs->ip = (unsigned long)p->ainsn.insn;
-}
-
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
@@ -429,20 +443,50 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
*sara = (unsigned long) &kretprobe_trampoline;
}
+#ifdef CONFIG_OPTPROBES
+static int __kprobes setup_detour_execution(struct kprobe *p,
+ struct pt_regs *regs,
+ int reenter);
+#else
+#define setup_detour_execution(p, regs, reenter) (0)
+#endif
+
static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
+ struct kprobe_ctlblk *kcb, int reenter)
{
+ if (setup_detour_execution(p, regs, reenter))
+ return;
+
#if !defined(CONFIG_PREEMPT)
if (p->ainsn.boostable == 1 && !p->post_handler) {
/* Boost up -- we can execute copied instructions directly */
- reset_current_kprobe();
+ if (!reenter)
+ reset_current_kprobe();
+ /*
+ * Reentering boosted probe doesn't reset current_kprobe,
+ * nor set current_kprobe, because it doesn't use single
+ * stepping.
+ */
regs->ip = (unsigned long)p->ainsn.insn;
preempt_enable_no_resched();
return;
}
#endif
- prepare_singlestep(p, regs);
- kcb->kprobe_status = KPROBE_HIT_SS;
+ if (reenter) {
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p, regs, kcb);
+ kcb->kprobe_status = KPROBE_REENTER;
+ } else
+ kcb->kprobe_status = KPROBE_HIT_SS;
+ /* Prepare real single stepping */
+ clear_btf();
+ regs->flags |= X86_EFLAGS_TF;
+ regs->flags &= ~X86_EFLAGS_IF;
+ /* single step inline if the instruction is an int3 */
+ if (p->opcode == BREAKPOINT_INSTRUCTION)
+ regs->ip = (unsigned long)p->addr;
+ else
+ regs->ip = (unsigned long)p->ainsn.insn;
}
/*
@@ -456,11 +500,8 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
switch (kcb->kprobe_status) {
case KPROBE_HIT_SSDONE:
case KPROBE_HIT_ACTIVE:
- save_previous_kprobe(kcb);
- set_current_kprobe(p, regs, kcb);
kprobes_inc_nmissed_count(p);
- prepare_singlestep(p, regs);
- kcb->kprobe_status = KPROBE_REENTER;
+ setup_singlestep(p, regs, kcb, 1);
break;
case KPROBE_HIT_SS:
/* A probe has been hit in the codepath leading up to, or just
@@ -535,13 +576,13 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
* more here.
*/
if (!p->pre_handler || !p->pre_handler(p, regs))
- setup_singlestep(p, regs, kcb);
+ setup_singlestep(p, regs, kcb, 0);
return 1;
}
} else if (kprobe_running()) {
p = __get_cpu_var(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
- setup_singlestep(p, regs, kcb);
+ setup_singlestep(p, regs, kcb, 0);
return 1;
}
} /* else: not a kprobe fault; let the kernel handle it */
@@ -550,6 +591,69 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
return 0;
}
+#ifdef CONFIG_X86_64
+#define SAVE_REGS_STRING \
+ /* Skip cs, ip, orig_ax. */ \
+ " subq $24, %rsp\n" \
+ " pushq %rdi\n" \
+ " pushq %rsi\n" \
+ " pushq %rdx\n" \
+ " pushq %rcx\n" \
+ " pushq %rax\n" \
+ " pushq %r8\n" \
+ " pushq %r9\n" \
+ " pushq %r10\n" \
+ " pushq %r11\n" \
+ " pushq %rbx\n" \
+ " pushq %rbp\n" \
+ " pushq %r12\n" \
+ " pushq %r13\n" \
+ " pushq %r14\n" \
+ " pushq %r15\n"
+#define RESTORE_REGS_STRING \
+ " popq %r15\n" \
+ " popq %r14\n" \
+ " popq %r13\n" \
+ " popq %r12\n" \
+ " popq %rbp\n" \
+ " popq %rbx\n" \
+ " popq %r11\n" \
+ " popq %r10\n" \
+ " popq %r9\n" \
+ " popq %r8\n" \
+ " popq %rax\n" \
+ " popq %rcx\n" \
+ " popq %rdx\n" \
+ " popq %rsi\n" \
+ " popq %rdi\n" \
+ /* Skip orig_ax, ip, cs */ \
+ " addq $24, %rsp\n"
+#else
+#define SAVE_REGS_STRING \
+ /* Skip cs, ip, orig_ax and gs. */ \
+ " subl $16, %esp\n" \
+ " pushl %fs\n" \
+ " pushl %ds\n" \
+ " pushl %es\n" \
+ " pushl %eax\n" \
+ " pushl %ebp\n" \
+ " pushl %edi\n" \
+ " pushl %esi\n" \
+ " pushl %edx\n" \
+ " pushl %ecx\n" \
+ " pushl %ebx\n"
+#define RESTORE_REGS_STRING \
+ " popl %ebx\n" \
+ " popl %ecx\n" \
+ " popl %edx\n" \
+ " popl %esi\n" \
+ " popl %edi\n" \
+ " popl %ebp\n" \
+ " popl %eax\n" \
+ /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
+ " addl $24, %esp\n"
+#endif
+
/*
* When a retprobed function returns, this code saves registers and
* calls trampoline_handler() runs, which calls the kretprobe's handler.
@@ -563,65 +667,16 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
/* We don't bother saving the ss register */
" pushq %rsp\n"
" pushfq\n"
- /*
- * Skip cs, ip, orig_ax.
- * trampoline_handler() will plug in these values
- */
- " subq $24, %rsp\n"
- " pushq %rdi\n"
- " pushq %rsi\n"
- " pushq %rdx\n"
- " pushq %rcx\n"
- " pushq %rax\n"
- " pushq %r8\n"
- " pushq %r9\n"
- " pushq %r10\n"
- " pushq %r11\n"
- " pushq %rbx\n"
- " pushq %rbp\n"
- " pushq %r12\n"
- " pushq %r13\n"
- " pushq %r14\n"
- " pushq %r15\n"
+ SAVE_REGS_STRING
" movq %rsp, %rdi\n"
" call trampoline_handler\n"
/* Replace saved sp with true return address. */
" movq %rax, 152(%rsp)\n"
- " popq %r15\n"
- " popq %r14\n"
- " popq %r13\n"
- " popq %r12\n"
- " popq %rbp\n"
- " popq %rbx\n"
- " popq %r11\n"
- " popq %r10\n"
- " popq %r9\n"
- " popq %r8\n"
- " popq %rax\n"
- " popq %rcx\n"
- " popq %rdx\n"
- " popq %rsi\n"
- " popq %rdi\n"
- /* Skip orig_ax, ip, cs */
- " addq $24, %rsp\n"
+ RESTORE_REGS_STRING
" popfq\n"
#else
" pushf\n"
- /*
- * Skip cs, ip, orig_ax and gs.
- * trampoline_handler() will plug in these values
- */
- " subl $16, %esp\n"
- " pushl %fs\n"
- " pushl %es\n"
- " pushl %ds\n"
- " pushl %eax\n"
- " pushl %ebp\n"
- " pushl %edi\n"
- " pushl %esi\n"
- " pushl %edx\n"
- " pushl %ecx\n"
- " pushl %ebx\n"
+ SAVE_REGS_STRING
" movl %esp, %eax\n"
" call trampoline_handler\n"
/* Move flags to cs */
@@ -629,15 +684,7 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
" movl %edx, 52(%esp)\n"
/* Replace saved flags with true return address. */
" movl %eax, 56(%esp)\n"
- " popl %ebx\n"
- " popl %ecx\n"
- " popl %edx\n"
- " popl %esi\n"
- " popl %edi\n"
- " popl %ebp\n"
- " popl %eax\n"
- /* Skip ds, es, fs, gs, orig_ax and ip */
- " addl $24, %esp\n"
+ RESTORE_REGS_STRING
" popf\n"
#endif
" ret\n");
@@ -805,8 +852,8 @@ static void __kprobes resume_execution(struct kprobe *p,
* These instructions can be executed directly if it
* jumps back to correct address.
*/
- set_jmp_op((void *)regs->ip,
- (void *)orig_ip + (regs->ip - copy_ip));
+ synthesize_reljump((void *)regs->ip,
+ (void *)orig_ip + (regs->ip - copy_ip));
p->ainsn.boostable = 1;
} else {
p->ainsn.boostable = -1;
@@ -1033,6 +1080,358 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}
+
+#ifdef CONFIG_OPTPROBES
+
+/* Insert a call instruction at address 'from', which calls address 'to'.*/
+static void __kprobes synthesize_relcall(void *from, void *to)
+{
+ __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
+}
+
+/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
+static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
+ unsigned long val)
+{
+#ifdef CONFIG_X86_64
+ *addr++ = 0x48;
+ *addr++ = 0xbf;
+#else
+ *addr++ = 0xb8;
+#endif
+ *(unsigned long *)addr = val;
+}
+
+void __kprobes kprobes_optinsn_template_holder(void)
+{
+ asm volatile (
+ ".global optprobe_template_entry\n"
+ "optprobe_template_entry: \n"
+#ifdef CONFIG_X86_64
+ /* We don't bother saving the ss register */
+ " pushq %rsp\n"
+ " pushfq\n"
+ SAVE_REGS_STRING
+ " movq %rsp, %rsi\n"
+ ".global optprobe_template_val\n"
+ "optprobe_template_val: \n"
+ ASM_NOP5
+ ASM_NOP5
+ ".global optprobe_template_call\n"
+ "optprobe_template_call: \n"
+ ASM_NOP5
+ /* Move flags to rsp */
+ " movq 144(%rsp), %rdx\n"
+ " movq %rdx, 152(%rsp)\n"
+ RESTORE_REGS_STRING
+ /* Skip flags entry */
+ " addq $8, %rsp\n"
+ " popfq\n"
+#else /* CONFIG_X86_32 */
+ " pushf\n"
+ SAVE_REGS_STRING
+ " movl %esp, %edx\n"
+ ".global optprobe_template_val\n"
+ "optprobe_template_val: \n"
+ ASM_NOP5
+ ".global optprobe_template_call\n"
+ "optprobe_template_call: \n"
+ ASM_NOP5
+ RESTORE_REGS_STRING
+ " addl $4, %esp\n" /* skip cs */
+ " popf\n"
+#endif
+ ".global optprobe_template_end\n"
+ "optprobe_template_end: \n");
+}
+
+#define TMPL_MOVE_IDX \
+ ((long)&optprobe_template_val - (long)&optprobe_template_entry)
+#define TMPL_CALL_IDX \
+ ((long)&optprobe_template_call - (long)&optprobe_template_entry)
+#define TMPL_END_IDX \
+ ((long)&optprobe_template_end - (long)&optprobe_template_entry)
+
+#define INT3_SIZE sizeof(kprobe_opcode_t)
+
+/* Optimized kprobe call back function: called from optinsn */
+static void __kprobes optimized_callback(struct optimized_kprobe *op,
+ struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ preempt_disable();
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(&op->kp);
+ } else {
+ /* Save skipped registers */
+#ifdef CONFIG_X86_64
+ regs->cs = __KERNEL_CS;
+#else
+ regs->cs = __KERNEL_CS | get_kernel_rpl();
+ regs->gs = 0;
+#endif
+ regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
+ regs->orig_ax = ~0UL;
+
+ __get_cpu_var(current_kprobe) = &op->kp;
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ opt_pre_handler(&op->kp, regs);
+ __get_cpu_var(current_kprobe) = NULL;
+ }
+ preempt_enable_no_resched();
+}
+
+static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
+{
+ int len = 0, ret;
+
+ while (len < RELATIVEJUMP_SIZE) {
+ ret = __copy_instruction(dest + len, src + len, 1);
+ if (!ret || !can_boost(dest + len))
+ return -EINVAL;
+ len += ret;
+ }
+ /* Check whether the address range is reserved */
+ if (ftrace_text_reserved(src, src + len - 1) ||
+ alternatives_text_reserved(src, src + len - 1))
+ return -EBUSY;
+
+ return len;
+}
+
+/* Check whether insn is indirect jump */
+static int __kprobes insn_is_indirect_jump(struct insn *insn)
+{
+ return ((insn->opcode.bytes[0] == 0xff &&
+ (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
+ insn->opcode.bytes[0] == 0xea); /* Segment based jump */
+}
+
+/* Check whether insn jumps into specified address range */
+static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
+{
+ unsigned long target = 0;
+
+ switch (insn->opcode.bytes[0]) {
+ case 0xe0: /* loopne */
+ case 0xe1: /* loope */
+ case 0xe2: /* loop */
+ case 0xe3: /* jcxz */
+ case 0xe9: /* near relative jump */
+ case 0xeb: /* short relative jump */
+ break;
+ case 0x0f:
+ if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
+ break;
+ return 0;
+ default:
+ if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
+ break;
+ return 0;
+ }
+ target = (unsigned long)insn->next_byte + insn->immediate.value;
+
+ return (start <= target && target <= start + len);
+}
+
+/* Decode whole function to ensure any instructions don't jump into target */
+static int __kprobes can_optimize(unsigned long paddr)
+{
+ int ret;
+ unsigned long addr, size = 0, offset = 0;
+ struct insn insn;
+ kprobe_opcode_t buf[MAX_INSN_SIZE];
+ /* Dummy buffers for lookup_symbol_attrs */
+ static char __dummy_buf[KSYM_NAME_LEN];
+
+ /* Lookup symbol including addr */
+ if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf))
+ return 0;
+
+ /* Check there is enough space for a relative jump. */
+ if (size - offset < RELATIVEJUMP_SIZE)
+ return 0;
+
+ /* Decode instructions */
+ addr = paddr - offset;
+ while (addr < paddr - offset + size) { /* Decode until function end */
+ if (search_exception_tables(addr))
+ /*
+ * Since some fixup code will jumps into this function,
+ * we can't optimize kprobe in this function.
+ */
+ return 0;
+ kernel_insn_init(&insn, (void *)addr);
+ insn_get_opcode(&insn);
+ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
+ ret = recover_probed_instruction(buf, addr);
+ if (ret)
+ return 0;
+ kernel_insn_init(&insn, buf);
+ }
+ insn_get_length(&insn);
+ /* Recover address */
+ insn.kaddr = (void *)addr;
+ insn.next_byte = (void *)(addr + insn.length);
+ /* Check any instructions don't jump into target */
+ if (insn_is_indirect_jump(&insn) ||
+ insn_jump_into_range(&insn, paddr + INT3_SIZE,
+ RELATIVE_ADDR_SIZE))
+ return 0;
+ addr += insn.length;
+ }
+
+ return 1;
+}
+
+/* Check optimized_kprobe can actually be optimized. */
+int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
+{
+ int i;
+ struct kprobe *p;
+
+ for (i = 1; i < op->optinsn.size; i++) {
+ p = get_kprobe(op->kp.addr + i);
+ if (p && !kprobe_disabled(p))
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+/* Check the addr is within the optimized instructions. */
+int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op,
+ unsigned long addr)
+{
+ return ((unsigned long)op->kp.addr <= addr &&
+ (unsigned long)op->kp.addr + op->optinsn.size > addr);
+}
+
+/* Free optimized instruction slot */
+static __kprobes
+void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
+{
+ if (op->optinsn.insn) {
+ free_optinsn_slot(op->optinsn.insn, dirty);
+ op->optinsn.insn = NULL;
+ op->optinsn.size = 0;
+ }
+}
+
+void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
+{
+ __arch_remove_optimized_kprobe(op, 1);
+}
+
+/*
+ * Copy replacing target instructions
+ * Target instructions MUST be relocatable (checked inside)
+ */
+int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
+{
+ u8 *buf;
+ int ret;
+ long rel;
+
+ if (!can_optimize((unsigned long)op->kp.addr))
+ return -EILSEQ;
+
+ op->optinsn.insn = get_optinsn_slot();
+ if (!op->optinsn.insn)
+ return -ENOMEM;
+
+ /*
+ * Verify if the address gap is in 2GB range, because this uses
+ * a relative jump.
+ */
+ rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
+ if (abs(rel) > 0x7fffffff)
+ return -ERANGE;
+
+ buf = (u8 *)op->optinsn.insn;
+
+ /* Copy instructions into the out-of-line buffer */
+ ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
+ if (ret < 0) {
+ __arch_remove_optimized_kprobe(op, 0);
+ return ret;
+ }
+ op->optinsn.size = ret;
+
+ /* Copy arch-dep-instance from template */
+ memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
+
+ /* Set probe information */
+ synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
+
+ /* Set probe function call */
+ synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
+
+ /* Set returning jmp instruction at the tail of out-of-line buffer */
+ synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
+ (u8 *)op->kp.addr + op->optinsn.size);
+
+ flush_icache_range((unsigned long) buf,
+ (unsigned long) buf + TMPL_END_IDX +
+ op->optinsn.size + RELATIVEJUMP_SIZE);
+ return 0;
+}
+
+/* Replace a breakpoint (int3) with a relative jump. */
+int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
+{
+ unsigned char jmp_code[RELATIVEJUMP_SIZE];
+ s32 rel = (s32)((long)op->optinsn.insn -
+ ((long)op->kp.addr + RELATIVEJUMP_SIZE));
+
+ /* Backup instructions which will be replaced by jump address */
+ memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
+ RELATIVE_ADDR_SIZE);
+
+ jmp_code[0] = RELATIVEJUMP_OPCODE;
+ *(s32 *)(&jmp_code[1]) = rel;
+
+ /*
+ * text_poke_smp doesn't support NMI/MCE code modifying.
+ * However, since kprobes itself also doesn't support NMI/MCE
+ * code probing, it's not a problem.
+ */
+ text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE);
+ return 0;
+}
+
+/* Replace a relative jump with a breakpoint (int3). */
+void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
+{
+ u8 buf[RELATIVEJUMP_SIZE];
+
+ /* Set int3 to first byte for kprobes */
+ buf[0] = BREAKPOINT_INSTRUCTION;
+ memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+ text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
+}
+
+static int __kprobes setup_detour_execution(struct kprobe *p,
+ struct pt_regs *regs,
+ int reenter)
+{
+ struct optimized_kprobe *op;
+
+ if (p->flags & KPROBE_FLAG_OPTIMIZED) {
+ /* This kprobe is really able to run optimized path. */
+ op = container_of(p, struct optimized_kprobe, kp);
+ /* Detour through copied instructions */
+ regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
+ if (!reenter)
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ return 1;
+ }
+ return 0;
+}
+#endif
+
int __init arch_init_kprobes(void)
{
return 0;
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index 712d15fdc416..71825806cd44 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -7,6 +7,8 @@
#include <linux/string.h>
#include <linux/pci.h>
#include <linux/dmi.h>
+#include <linux/range.h>
+
#include <asm/pci-direct.h>
#include <linux/sort.h>
#include <asm/io.h>
@@ -30,11 +32,6 @@ static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = {
{ 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 },
};
-struct range {
- u64 start;
- u64 end;
-};
-
static int __cpuinit cmp_range(const void *x1, const void *x2)
{
const struct range *r1 = x1;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1b1739d16310..1db183ed7c01 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -428,10 +428,6 @@ struct pv_mmu_ops pv_mmu_ops = {
.ptep_modify_prot_start = __ptep_modify_prot_start,
.ptep_modify_prot_commit = __ptep_modify_prot_commit,
-#ifdef CONFIG_HIGHPTE
- .kmap_atomic_pte = kmap_atomic,
-#endif
-
#if PAGETABLE_LEVELS >= 3
#ifdef CONFIG_X86_PAE
.set_pte_atomic = native_set_pte_atomic,
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 75e14e21f61a..1aa966c565f9 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -65,7 +65,7 @@ int dma_set_mask(struct device *dev, u64 mask)
}
EXPORT_SYMBOL(dma_set_mask);
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_64) && !defined(CONFIG_NUMA)
static __initdata void *dma32_bootmem_ptr;
static unsigned long dma32_bootmem_size __initdata = (128ULL<<20);
@@ -116,14 +116,21 @@ static void __init dma32_free_bootmem(void)
dma32_bootmem_ptr = NULL;
dma32_bootmem_size = 0;
}
+#else
+void __init dma32_reserve_bootmem(void)
+{
+}
+static void __init dma32_free_bootmem(void)
+{
+}
+
#endif
void __init pci_iommu_alloc(void)
{
-#ifdef CONFIG_X86_64
/* free the range so iommu could get some range less than 4G */
dma32_free_bootmem();
-#endif
+
if (pci_swiotlb_detect())
goto out;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 704bddcdf64d..8e1aac86b50c 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -461,6 +461,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"),
},
},
+ { /* Handle problems with rebooting on the iMac9,1. */
+ .callback = set_pci_reboot,
+ .ident = "Apple iMac9,1",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"),
+ },
+ },
{ }
};
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index cb42109a55b4..5d7ba1a449bd 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -969,15 +969,11 @@ void __init setup_arch(char **cmdline_p)
#endif
initmem_init(0, max_pfn, acpi, k8);
+#ifndef CONFIG_NO_BOOTMEM
+ early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
+#endif
-#ifdef CONFIG_X86_64
- /*
- * dma32_reserve_bootmem() allocates bootmem which may conflict
- * with the crashkernel command line, so do that after
- * reserve_crashkernel()
- */
dma32_reserve_bootmem();
-#endif
reserve_ibft_region();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 35abcb8b00e9..ef6370b00e70 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -137,7 +137,13 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
static void __init pcpu_fc_free(void *ptr, size_t size)
{
+#ifdef CONFIG_NO_BOOTMEM
+ u64 start = __pa(ptr);
+ u64 end = start + size;
+ free_early_partial(start, end);
+#else
free_bootmem(__pa(ptr), size);
+#endif
}
static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9b4401115ea1..a435c76d714e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -241,6 +241,11 @@ static void __cpuinit smp_callin(void)
map_cpu_to_logical_apicid();
notify_cpu_starting(cpuid);
+
+ /*
+ * Need to setup vector mappings before we enable interrupts.
+ */
+ __setup_vector_irq(smp_processor_id());
/*
* Get our bogomips.
*
@@ -315,7 +320,6 @@ notrace static void __cpuinit start_secondary(void *unused)
*/
ipi_call_lock();
lock_vector_lock();
- __setup_vector_irq(smp_processor_id());
set_cpu_online(smp_processor_id(), true);
unlock_vector_lock();
ipi_call_unlock();
@@ -1212,11 +1216,12 @@ __init void prefill_possible_map(void)
total_cpus = max_t(int, possible, num_processors + disabled_cpus);
- if (possible > CONFIG_NR_CPUS) {
+ /* nr_cpu_ids could be reduced via nr_cpus= */
+ if (possible > nr_cpu_ids) {
printk(KERN_WARNING
"%d Processors exceeds NR_CPUS limit of %d\n",
- possible, CONFIG_NR_CPUS);
- possible = CONFIG_NR_CPUS;
+ possible, nr_cpu_ids);
+ possible = nr_cpu_ids;
}
printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index be2573448ed9..fb5cc5e14cfa 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -70,11 +70,11 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
* manually to deassert NMI lines for the watchdog if run
* on an 82489DX-based system.
*/
- spin_lock(&i8259A_lock);
+ raw_spin_lock(&i8259A_lock);
outb(0x0c, PIC_MASTER_OCW3);
/* Ack the IRQ; AEOI will end it automatically. */
inb(PIC_MASTER_POLL);
- spin_unlock(&i8259A_lock);
+ raw_spin_unlock(&i8259A_lock);
}
global_clock_event->event_handler(global_clock_event);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 23066ecf12fa..208a857c679f 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -740,7 +740,7 @@ static cycle_t __vsyscall_fn vread_tsc(void)
}
#endif
-static void resume_tsc(void)
+static void resume_tsc(struct clocksource *cs)
{
clocksource_tsc.cycle_last = 0;
}
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 34a279a7471d..ab38ce0984fa 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -559,7 +559,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
struct irq_desc *desc;
unsigned long flags;
- spin_lock_irqsave(&i8259A_lock, flags);
+ raw_spin_lock_irqsave(&i8259A_lock, flags);
/* Find out what's interrupting in the PIIX4 master 8259 */
outb(0x0c, 0x20); /* OCW3 Poll command */
@@ -596,7 +596,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
outb(0x60 + realirq, 0x20);
}
- spin_unlock_irqrestore(&i8259A_lock, flags);
+ raw_spin_unlock_irqrestore(&i8259A_lock, flags);
desc = irq_to_desc(realirq);
@@ -614,7 +614,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
return IRQ_HANDLED;
out_unlock:
- spin_unlock_irqrestore(&i8259A_lock, flags);
+ raw_spin_unlock_irqrestore(&i8259A_lock, flags);
return IRQ_NONE;
}
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index d430e4c30193..7dd599deca4a 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -33,6 +33,7 @@
#include <asm/fixmap.h>
#include <asm/apicdef.h>
#include <asm/apic.h>
+#include <asm/pgalloc.h>
#include <asm/processor.h>
#include <asm/timer.h>
#include <asm/vmi_time.h>
@@ -266,30 +267,6 @@ static void vmi_nop(void)
{
}
-#ifdef CONFIG_HIGHPTE
-static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
-{
- void *va = kmap_atomic(page, type);
-
- /*
- * Internally, the VMI ROM must map virtual addresses to physical
- * addresses for processing MMU updates. By the time MMU updates
- * are issued, this information is typically already lost.
- * Fortunately, the VMI provides a cache of mapping slots for active
- * page tables.
- *
- * We use slot zero for the linear mapping of physical memory, and
- * in HIGHPTE kernels, slot 1 and 2 for KM_PTE0 and KM_PTE1.
- *
- * args: SLOT VA COUNT PFN
- */
- BUG_ON(type != KM_PTE0 && type != KM_PTE1);
- vmi_ops.set_linear_mapping((type - KM_PTE0)+1, va, 1, page_to_pfn(page));
-
- return va;
-}
-#endif
-
static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn)
{
vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
@@ -640,6 +617,12 @@ static inline int __init activate_vmi(void)
u64 reloc;
const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc;
+ /*
+ * Prevent page tables from being allocated in highmem, even if
+ * CONFIG_HIGHPTE is enabled.
+ */
+ __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
+
if (call_vrom_func(vmi_rom, vmi_init) != 0) {
printk(KERN_ERR "VMI ROM failed to initialize!");
return 0;
@@ -778,10 +761,6 @@ static inline int __init activate_vmi(void)
/* Set linear is needed in all cases */
vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
-#ifdef CONFIG_HIGHPTE
- if (vmi_ops.set_linear_mapping)
- pv_mmu_ops.kmap_atomic_pte = vmi_kmap_atomic_pte;
-#endif
/*
* These MUST always be patched. Don't support indirect jumps
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 74c92bb194df..2f1ca5614292 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -79,11 +79,7 @@ unsigned long vmi_tsc_khz(void)
static inline unsigned int vmi_get_timer_vector(void)
{
-#ifdef CONFIG_X86_IO_APIC
- return FIRST_DEVICE_VECTOR;
-#else
- return FIRST_EXTERNAL_VECTOR;
-#endif
+ return IRQ0_VECTOR;
}
/** vmi clockchip */
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index f92a0da608cb..44879df55696 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -341,7 +341,7 @@ SECTIONS
* Per-cpu symbols which need to be offset from __per_cpu_load
* for the boot processor.
*/
-#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
+#define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load
INIT_PER_CPU(gdt_page);
INIT_PER_CPU(irq_stack_union);
@@ -352,7 +352,7 @@ INIT_PER_CPU(irq_stack_union);
"kernel image bigger than KERNEL_IMAGE_SIZE");
#ifdef CONFIG_SMP
-. = ASSERT((per_cpu__irq_stack_union == 0),
+. = ASSERT((irq_stack_union == 0),
"irq_stack_union is not at start of per-cpu area");
#endif
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 06871111bf54..970bbd479516 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -66,6 +66,7 @@ config KVM_AMD
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
+source drivers/vhost/Kconfig
source drivers/lguest/Kconfig
source drivers/virtio/Kconfig
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 2226f2c70ea3..5cb3f0f54f47 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -750,6 +750,7 @@ static void __init zone_sizes_init(void)
free_area_init_nodes(max_zone_pfns);
}
+#ifndef CONFIG_NO_BOOTMEM
static unsigned long __init setup_node_bootmem(int nodeid,
unsigned long start_pfn,
unsigned long end_pfn,
@@ -766,13 +767,14 @@ static unsigned long __init setup_node_bootmem(int nodeid,
printk(KERN_INFO " node %d bootmap %08lx - %08lx\n",
nodeid, bootmap, bootmap + bootmap_size);
free_bootmem_with_active_regions(nodeid, end_pfn);
- early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
return bootmap + bootmap_size;
}
+#endif
void __init setup_bootmem_allocator(void)
{
+#ifndef CONFIG_NO_BOOTMEM
int nodeid;
unsigned long bootmap_size, bootmap;
/*
@@ -784,11 +786,13 @@ void __init setup_bootmem_allocator(void)
if (bootmap == -1L)
panic("Cannot find bootmem map of size %ld\n", bootmap_size);
reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
+#endif
printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
max_pfn_mapped<<PAGE_SHIFT);
printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
+#ifndef CONFIG_NO_BOOTMEM
for_each_online_node(nodeid) {
unsigned long start_pfn, end_pfn;
@@ -806,6 +810,7 @@ void __init setup_bootmem_allocator(void)
bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn,
bootmap);
}
+#endif
after_bootmem = 1;
}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 69ddfbd91135..e9b040e1cde5 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -572,6 +572,7 @@ kernel_physical_mapping_init(unsigned long start,
void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
int acpi, int k8)
{
+#ifndef CONFIG_NO_BOOTMEM
unsigned long bootmap_size, bootmap;
bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
@@ -579,13 +580,15 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
PAGE_SIZE);
if (bootmap == -1L)
panic("Cannot find bootmem map of size %ld\n", bootmap_size);
+ reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
/* don't touch min_low_pfn */
bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
0, end_pfn);
e820_register_active_regions(0, start_pfn, end_pfn);
free_bootmem_with_active_regions(0, end_pfn);
- early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
- reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
+#else
+ e820_register_active_regions(0, start_pfn, end_pfn);
+#endif
}
#endif
@@ -974,7 +977,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
if (pmd_none(*pmd)) {
pte_t entry;
- p = vmemmap_alloc_block(PMD_SIZE, node);
+ p = vmemmap_alloc_block_buf(PMD_SIZE, node);
if (!p)
return -ENOMEM;
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index b20760ca7244..809baaaf48b1 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -418,7 +418,10 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
for_each_online_node(nid) {
memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+ NODE_DATA(nid)->node_id = nid;
+#ifndef CONFIG_NO_BOOTMEM
NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
+#endif
}
setup_bootmem_allocator();
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 3307ea8bd43a..8948f47fde05 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -163,30 +163,48 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
unsigned long end, unsigned long size,
unsigned long align)
{
- unsigned long mem = find_e820_area(start, end, size, align);
- void *ptr;
+ unsigned long mem;
+ /*
+ * put it on high as possible
+ * something will go with NODE_DATA
+ */
+ if (start < (MAX_DMA_PFN<<PAGE_SHIFT))
+ start = MAX_DMA_PFN<<PAGE_SHIFT;
+ if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) &&
+ end > (MAX_DMA32_PFN<<PAGE_SHIFT))
+ start = MAX_DMA32_PFN<<PAGE_SHIFT;
+ mem = find_e820_area(start, end, size, align);
+ if (mem != -1L)
+ return __va(mem);
+
+ /* extend the search scope */
+ end = max_pfn_mapped << PAGE_SHIFT;
+ if (end > (MAX_DMA32_PFN<<PAGE_SHIFT))
+ start = MAX_DMA32_PFN<<PAGE_SHIFT;
+ else
+ start = MAX_DMA_PFN<<PAGE_SHIFT;
+ mem = find_e820_area(start, end, size, align);
if (mem != -1L)
return __va(mem);
- ptr = __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS));
- if (ptr == NULL) {
- printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
+ printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
size, nodeid);
- return NULL;
- }
- return ptr;
+
+ return NULL;
}
/* Initialize bootmem allocator for a node */
void __init
setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
{
- unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
+ unsigned long start_pfn, last_pfn, nodedata_phys;
const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
- unsigned long bootmap_start, nodedata_phys;
- void *bootmap;
int nid;
+#ifndef CONFIG_NO_BOOTMEM
+ unsigned long bootmap_start, bootmap_pages, bootmap_size;
+ void *bootmap;
+#endif
if (!end)
return;
@@ -200,7 +218,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
start = roundup(start, ZONE_ALIGN);
- printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
+ printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid,
start, end);
start_pfn = start >> PAGE_SHIFT;
@@ -211,14 +229,21 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
if (node_data[nodeid] == NULL)
return;
nodedata_phys = __pa(node_data[nodeid]);
+ reserve_early(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA");
printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
nodedata_phys + pgdat_size - 1);
+ nid = phys_to_nid(nodedata_phys);
+ if (nid != nodeid)
+ printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
- NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
+ NODE_DATA(nodeid)->node_id = nodeid;
NODE_DATA(nodeid)->node_start_pfn = start_pfn;
NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
+#ifndef CONFIG_NO_BOOTMEM
+ NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
+
/*
* Find a place for the bootmem map
* nodedata_phys could be on other nodes by alloc_bootmem,
@@ -227,11 +252,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
* of alloc_bootmem, that could clash with reserved range
*/
bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn);
- nid = phys_to_nid(nodedata_phys);
- if (nid == nodeid)
- bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE);
- else
- bootmap_start = roundup(start, PAGE_SIZE);
+ bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE);
/*
* SMP_CACHE_BYTES could be enough, but init_bootmem_node like
* to use that to align to PAGE_SIZE
@@ -239,18 +260,13 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
bootmap = early_node_mem(nodeid, bootmap_start, end,
bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
if (bootmap == NULL) {
- if (nodedata_phys < start || nodedata_phys >= end) {
- /*
- * only need to free it if it is from other node
- * bootmem
- */
- if (nid != nodeid)
- free_bootmem(nodedata_phys, pgdat_size);
- }
+ free_early(nodedata_phys, nodedata_phys + pgdat_size);
node_data[nodeid] = NULL;
return;
}
bootmap_start = __pa(bootmap);
+ reserve_early(bootmap_start, bootmap_start+(bootmap_pages<<PAGE_SHIFT),
+ "BOOTMAP");
bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
bootmap_start >> PAGE_SHIFT,
@@ -259,31 +275,12 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n",
bootmap_start, bootmap_start + bootmap_size - 1,
bootmap_pages);
-
- free_bootmem_with_active_regions(nodeid, end);
-
- /*
- * convert early reserve to bootmem reserve earlier
- * otherwise early_node_mem could use early reserved mem
- * on previous node
- */
- early_res_to_bootmem(start, end);
-
- /*
- * in some case early_node_mem could use alloc_bootmem
- * to get range on other node, don't reserve that again
- */
- if (nid != nodeid)
- printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
- else
- reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys,
- pgdat_size, BOOTMEM_DEFAULT);
nid = phys_to_nid(bootmap_start);
if (nid != nodeid)
printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid);
- else
- reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
- bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
+
+ free_bootmem_with_active_regions(nodeid, end);
+#endif
node_set_online(nodeid);
}
@@ -709,6 +706,10 @@ unsigned long __init numa_free_all_bootmem(void)
for_each_online_node(i)
pages += free_all_bootmem_node(NODE_DATA(i));
+#ifdef CONFIG_NO_BOOTMEM
+ pages += free_all_memory_core_early(MAX_NUMNODES);
+#endif
+
return pages;
}
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 39fba37f702f..0b7d3e9593e1 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -14,8 +14,7 @@ obj-$(CONFIG_X86_VISWS) += visws.o
obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
obj-y += common.o early.o
-obj-y += amd_bus.o
-obj-$(CONFIG_X86_64) += bus_numa.o
+obj-y += amd_bus.o bus_numa.o
ifeq ($(CONFIG_PCI_DEBUG),y)
EXTRA_CFLAGS += -DDEBUG
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 95ecbd495955..fc1e8fe07e5c 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -2,11 +2,11 @@
#include <linux/pci.h>
#include <linux/topology.h>
#include <linux/cpu.h>
+#include <linux/range.h>
+
#include <asm/pci_x86.h>
-#ifdef CONFIG_X86_64
#include <asm/pci-direct.h>
-#endif
#include "bus_numa.h"
@@ -15,60 +15,6 @@
* also get peer root bus resource for io,mmio
*/
-#ifdef CONFIG_X86_64
-
-#define RANGE_NUM 16
-
-struct res_range {
- size_t start;
- size_t end;
-};
-
-static void __init update_range(struct res_range *range, size_t start,
- size_t end)
-{
- int i;
- int j;
-
- for (j = 0; j < RANGE_NUM; j++) {
- if (!range[j].end)
- continue;
-
- if (start <= range[j].start && end >= range[j].end) {
- range[j].start = 0;
- range[j].end = 0;
- continue;
- }
-
- if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
- range[j].start = end + 1;
- continue;
- }
-
-
- if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
- range[j].end = start - 1;
- continue;
- }
-
- if (start > range[j].start && end < range[j].end) {
- /* find the new spare */
- for (i = 0; i < RANGE_NUM; i++) {
- if (range[i].end == 0)
- break;
- }
- if (i < RANGE_NUM) {
- range[i].end = range[j].end;
- range[i].start = end + 1;
- } else {
- printk(KERN_ERR "run of slot in ranges\n");
- }
- range[j].end = start - 1;
- continue;
- }
- }
-}
-
struct pci_hostbridge_probe {
u32 bus;
u32 slot;
@@ -111,6 +57,8 @@ static void __init get_pci_mmcfg_amd_fam10h_range(void)
fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1;
}
+#define RANGE_NUM 16
+
/**
* early_fill_mp_bus_to_node()
* called before pcibios_scan_root and pci_scan_bus
@@ -130,16 +78,17 @@ static int __init early_fill_mp_bus_info(void)
struct pci_root_info *info;
u32 reg;
struct resource *res;
- size_t start;
- size_t end;
- struct res_range range[RANGE_NUM];
+ u64 start;
+ u64 end;
+ struct range range[RANGE_NUM];
u64 val;
u32 address;
+ bool found;
if (!early_pci_allowed())
return -1;
- found_all_numa_early = 0;
+ found = false;
for (i = 0; i < ARRAY_SIZE(pci_probes); i++) {
u32 id;
u16 device;
@@ -153,12 +102,12 @@ static int __init early_fill_mp_bus_info(void)
device = (id>>16) & 0xffff;
if (pci_probes[i].vendor == vendor &&
pci_probes[i].device == device) {
- found_all_numa_early = 1;
+ found = true;
break;
}
}
- if (!found_all_numa_early)
+ if (!found)
return 0;
pci_root_num = 0;
@@ -196,7 +145,7 @@ static int __init early_fill_mp_bus_info(void)
def_link = (reg >> 8) & 0x03;
memset(range, 0, sizeof(range));
- range[0].end = 0xffff;
+ add_range(range, RANGE_NUM, 0, 0, 0xffff + 1);
/* io port resource */
for (i = 0; i < 4; i++) {
reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3));
@@ -220,13 +169,13 @@ static int __init early_fill_mp_bus_info(void)
info = &pci_root_info[j];
printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n",
- node, link, (u64)start, (u64)end);
+ node, link, start, end);
/* kernel only handle 16 bit only */
if (end > 0xffff)
end = 0xffff;
update_res(info, start, end, IORESOURCE_IO, 1);
- update_range(range, start, end);
+ subtract_range(range, RANGE_NUM, start, end + 1);
}
/* add left over io port range to def node/link, [0, 0xffff] */
/* find the position */
@@ -241,29 +190,32 @@ static int __init early_fill_mp_bus_info(void)
if (!range[i].end)
continue;
- update_res(info, range[i].start, range[i].end,
+ update_res(info, range[i].start, range[i].end - 1,
IORESOURCE_IO, 1);
}
}
memset(range, 0, sizeof(range));
/* 0xfd00000000-0xffffffffff for HT */
- range[0].end = (0xfdULL<<32) - 1;
+ end = cap_resource((0xfdULL<<32) - 1);
+ end++;
+ add_range(range, RANGE_NUM, 0, 0, end);
/* need to take out [0, TOM) for RAM*/
address = MSR_K8_TOP_MEM1;
rdmsrl(address, val);
end = (val & 0xffffff800000ULL);
- printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20);
+ printk(KERN_INFO "TOM: %016llx aka %lldM\n", end, end>>20);
if (end < (1ULL<<32))
- update_range(range, 0, end - 1);
+ subtract_range(range, RANGE_NUM, 0, end);
/* get mmconfig */
get_pci_mmcfg_amd_fam10h_range();
/* need to take out mmconf range */
if (fam10h_mmconf_end) {
printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end);
- update_range(range, fam10h_mmconf_start, fam10h_mmconf_end);
+ subtract_range(range, RANGE_NUM, fam10h_mmconf_start,
+ fam10h_mmconf_end + 1);
}
/* mmio resource */
@@ -293,7 +245,7 @@ static int __init early_fill_mp_bus_info(void)
info = &pci_root_info[j];
printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]",
- node, link, (u64)start, (u64)end);
+ node, link, start, end);
/*
* some sick allocation would have range overlap with fam10h
* mmconf range, so need to update start and end.
@@ -318,14 +270,15 @@ static int __init early_fill_mp_bus_info(void)
/* we got a hole */
endx = fam10h_mmconf_start - 1;
update_res(info, start, endx, IORESOURCE_MEM, 0);
- update_range(range, start, endx);
- printk(KERN_CONT " ==> [%llx, %llx]", (u64)start, endx);
+ subtract_range(range, RANGE_NUM, start,
+ endx + 1);
+ printk(KERN_CONT " ==> [%llx, %llx]", start, endx);
start = fam10h_mmconf_end + 1;
changed = 1;
}
if (changed) {
if (start <= end) {
- printk(KERN_CONT " %s [%llx, %llx]", endx?"and":"==>", (u64)start, (u64)end);
+ printk(KERN_CONT " %s [%llx, %llx]", endx ? "and" : "==>", start, end);
} else {
printk(KERN_CONT "%s\n", endx?"":" ==> none");
continue;
@@ -333,8 +286,9 @@ static int __init early_fill_mp_bus_info(void)
}
}
- update_res(info, start, end, IORESOURCE_MEM, 1);
- update_range(range, start, end);
+ update_res(info, cap_resource(start), cap_resource(end),
+ IORESOURCE_MEM, 1);
+ subtract_range(range, RANGE_NUM, start, end + 1);
printk(KERN_CONT "\n");
}
@@ -348,8 +302,8 @@ static int __init early_fill_mp_bus_info(void)
address = MSR_K8_TOP_MEM2;
rdmsrl(address, val);
end = (val & 0xffffff800000ULL);
- printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20);
- update_range(range, 1ULL<<32, end - 1);
+ printk(KERN_INFO "TOM2: %016llx aka %lldM\n", end, end>>20);
+ subtract_range(range, RANGE_NUM, 1ULL<<32, end);
}
/*
@@ -368,7 +322,8 @@ static int __init early_fill_mp_bus_info(void)
if (!range[i].end)
continue;
- update_res(info, range[i].start, range[i].end,
+ update_res(info, cap_resource(range[i].start),
+ cap_resource(range[i].end - 1),
IORESOURCE_MEM, 1);
}
}
@@ -384,24 +339,14 @@ static int __init early_fill_mp_bus_info(void)
info->bus_min, info->bus_max, info->node, info->link);
for (j = 0; j < res_num; j++) {
res = &info->res[j];
- printk(KERN_DEBUG "bus: %02x index %x %s: [%llx, %llx]\n",
- busnum, j,
- (res->flags & IORESOURCE_IO)?"io port":"mmio",
- res->start, res->end);
+ printk(KERN_DEBUG "bus: %02x index %x %pR\n",
+ busnum, j, res);
}
}
return 0;
}
-#else /* !CONFIG_X86_64 */
-
-static int __init early_fill_mp_bus_info(void) { return 0; }
-
-#endif /* !CONFIG_X86_64 */
-
-/* common 32/64 bit code */
-
#define ENABLE_CF8_EXT_CFG (1ULL << 46)
static void enable_pci_io_ecs(void *unused)
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
index 12d54ff3654d..64a122883896 100644
--- a/arch/x86/pci/bus_numa.c
+++ b/arch/x86/pci/bus_numa.c
@@ -1,11 +1,11 @@
#include <linux/init.h>
#include <linux/pci.h>
+#include <linux/range.h>
#include "bus_numa.h"
int pci_root_num;
struct pci_root_info pci_root_info[PCI_ROOT_NR];
-int found_all_numa_early;
void x86_pci_root_bus_res_quirks(struct pci_bus *b)
{
@@ -21,10 +21,6 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b)
if (!pci_root_num)
return;
- /* for amd, if only one root bus, don't need to do anything */
- if (pci_root_num < 2 && found_all_numa_early)
- return;
-
for (i = 0; i < pci_root_num; i++) {
if (pci_root_info[i].bus_min == b->number)
break;
@@ -52,8 +48,8 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b)
}
}
-void __devinit update_res(struct pci_root_info *info, size_t start,
- size_t end, unsigned long flags, int merge)
+void __devinit update_res(struct pci_root_info *info, resource_size_t start,
+ resource_size_t end, unsigned long flags, int merge)
{
int i;
struct resource *res;
@@ -61,25 +57,28 @@ void __devinit update_res(struct pci_root_info *info, size_t start,
if (start > end)
return;
+ if (start == MAX_RESOURCE)
+ return;
+
if (!merge)
goto addit;
/* try to merge it with old one */
for (i = 0; i < info->res_num; i++) {
- size_t final_start, final_end;
- size_t common_start, common_end;
+ resource_size_t final_start, final_end;
+ resource_size_t common_start, common_end;
res = &info->res[i];
if (res->flags != flags)
continue;
- common_start = max((size_t)res->start, start);
- common_end = min((size_t)res->end, end);
+ common_start = max(res->start, start);
+ common_end = min(res->end, end);
if (common_start > common_end + 1)
continue;
- final_start = min((size_t)res->start, start);
- final_end = max((size_t)res->end, end);
+ final_start = min(res->start, start);
+ final_end = max(res->end, end);
res->start = final_start;
res->end = final_end;
diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h
index 731b64ee8d84..804a4b40c31a 100644
--- a/arch/x86/pci/bus_numa.h
+++ b/arch/x86/pci/bus_numa.h
@@ -1,5 +1,5 @@
-#ifdef CONFIG_X86_64
-
+#ifndef __BUS_NUMA_H
+#define __BUS_NUMA_H
/*
* sub bus (transparent) will use entres from 3 to store extra from
* root, so need to make sure we have enough slot there.
@@ -19,8 +19,7 @@ struct pci_root_info {
#define PCI_ROOT_NR 4
extern int pci_root_num;
extern struct pci_root_info pci_root_info[PCI_ROOT_NR];
-extern int found_all_numa_early;
-extern void update_res(struct pci_root_info *info, size_t start,
- size_t end, unsigned long flags, int merge);
+extern void update_res(struct pci_root_info *info, resource_size_t start,
+ resource_size_t end, unsigned long flags, int merge);
#endif
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 5a8fbf8d4cac..dece3eb9c906 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -255,10 +255,6 @@ void __init pcibios_resource_survey(void)
*/
fs_initcall(pcibios_assign_resources);
-void __weak x86_pci_root_bus_res_quirks(struct pci_bus *b)
-{
-}
-
/*
* If we set up a device for bus mastering, we need to check the latency
* timer as certain crappy BIOSes forget to set it properly.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 36daccb68642..b607239c1ba8 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -50,6 +50,7 @@
#include <asm/traps.h>
#include <asm/setup.h>
#include <asm/desc.h>
+#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/reboot.h>
@@ -1094,6 +1095,12 @@ asmlinkage void __init xen_start_kernel(void)
__supported_pte_mask |= _PAGE_IOMAP;
+ /*
+ * Prevent page tables from being allocated in highmem, even
+ * if CONFIG_HIGHPTE is enabled.
+ */
+ __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
+
/* Work out if we support NX */
x86_configure_nx();
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index bf4cd6bfe959..f9eb7de74f42 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1427,23 +1427,6 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
#endif
}
-#ifdef CONFIG_HIGHPTE
-static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
-{
- pgprot_t prot = PAGE_KERNEL;
-
- if (PagePinned(page))
- prot = PAGE_KERNEL_RO;
-
- if (0 && PageHighMem(page))
- printk("mapping highpte %lx type %d prot %s\n",
- page_to_pfn(page), type,
- (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ");
-
- return kmap_atomic_prot(page, type, prot);
-}
-#endif
-
#ifdef CONFIG_X86_32
static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
{
@@ -1902,10 +1885,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
.alloc_pmd_clone = paravirt_nop,
.release_pmd = xen_release_pmd_init,
-#ifdef CONFIG_HIGHPTE
- .kmap_atomic_pte = xen_kmap_atomic_pte,
-#endif
-
#ifdef CONFIG_X86_64
.set_pte = xen_set_pte,
#else
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index 88e15deb8b82..22a2093b5862 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -90,9 +90,9 @@ ENTRY(xen_iret)
GET_THREAD_INFO(%eax)
movl TI_cpu(%eax), %eax
movl __per_cpu_offset(,%eax,4), %eax
- mov per_cpu__xen_vcpu(%eax), %eax
+ mov xen_vcpu(%eax), %eax
#else
- movl per_cpu__xen_vcpu, %eax
+ movl xen_vcpu, %eax
#endif
/* check IF state we're restoring */