summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/arm64/include/asm/memory.h5
-rw-r--r--arch/arm64/include/asm/numa.h2
-rw-r--r--arch/arm64/kernel/setup.c8
-rw-r--r--arch/arm64/mm/numa.c2
-rw-r--r--arch/ia64/include/asm/numa.h2
-rw-r--r--arch/microblaze/include/asm/unistd.h2
-rw-r--r--arch/microblaze/include/uapi/asm/unistd.h6
-rw-r--r--arch/microblaze/kernel/cpu/cpuinfo.c6
-rw-r--r--arch/microblaze/kernel/syscall_table.S6
-rw-r--r--arch/microblaze/kernel/timer.c2
-rw-r--r--arch/mips/boot/dts/ingenic/jz4740.dtsi11
-rw-r--r--arch/mips/boot/dts/ingenic/qi_lb60.dts4
-rw-r--r--arch/mips/include/asm/mach-jz4740/platform.h1
-rw-r--r--arch/mips/jz4740/board-qi_lb60.c1
-rw-r--r--arch/mips/jz4740/platform.c21
-rw-r--r--arch/mips/jz4740/reset.c63
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/include/asm/ima.h29
-rw-r--r--arch/powerpc/include/asm/kexec.h15
-rw-r--r--arch/powerpc/kernel/Makefile4
-rw-r--r--arch/powerpc/kernel/ima_kexec.c223
-rw-r--r--arch/powerpc/kernel/kexec_elf_64.c2
-rw-r--r--arch/powerpc/kernel/machine_kexec_file_64.c15
-rw-r--r--arch/x86/Kconfig12
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/floppy.h20
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/mmu.h4
-rw-r--r--arch/x86/include/asm/mpx.h4
-rw-r--r--arch/x86/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/include/asm/tsc.h9
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c2
-rw-r--r--arch/x86/kernel/apic/apic.c15
-rw-r--r--arch/x86/kernel/cpu/common.c24
-rw-r--r--arch/x86/kernel/head_64.S5
-rw-r--r--arch/x86/kernel/process.c1
-rw-r--r--arch/x86/kernel/smpboot.c59
-rw-r--r--arch/x86/kernel/tsc.c42
-rw-r--r--arch/x86/kernel/tsc_msr.c19
-rw-r--r--arch/x86/kernel/tsc_sync.c290
-rw-r--r--arch/x86/kvm/cpuid.c9
-rw-r--r--arch/x86/kvm/hyperv.c24
-rw-r--r--arch/x86/kvm/vmx.c13
-rw-r--r--arch/x86/kvm/x86.c18
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/init_64.c24
-rw-r--r--arch/x86/mm/mpx.c10
-rw-r--r--arch/x86/mm/numa.c2
-rw-r--r--arch/x86/platform/Makefile1
-rw-r--r--arch/x86/platform/intel-mid/mfld.c9
-rw-r--r--arch/x86/platform/intel-mid/mrfld.c8
-rw-r--r--arch/x86/platform/mellanox/Makefile1
-rw-r--r--arch/x86/platform/mellanox/mlx-platform.c266
-rw-r--r--arch/x86/power/cpu.c1
-rw-r--r--arch/x86/xen/smp.c6
-rw-r--r--arch/xtensa/Kconfig1
-rw-r--r--arch/xtensa/boot/dts/kc705.dts16
-rw-r--r--arch/xtensa/include/asm/Kbuild1
-rw-r--r--arch/xtensa/kernel/Makefile1
-rw-r--r--arch/xtensa/kernel/pci-dma.c21
-rw-r--r--arch/xtensa/kernel/s32c1i_selftest.c128
-rw-r--r--arch/xtensa/kernel/setup.c137
-rw-r--r--arch/xtensa/mm/init.c2
65 files changed, 975 insertions, 673 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 19483aea4bbc..99839c23d453 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -5,6 +5,9 @@
config KEXEC_CORE
bool
+config HAVE_IMA_KEXEC
+ bool
+
config OPROFILE
tristate "OProfile system profiling"
depends on PROFILING
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index b71086d25195..bfe632808d77 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -165,6 +165,11 @@ extern u64 kimage_vaddr;
/* the offset between the kernel virtual and physical mappings */
extern u64 kimage_voffset;
+static inline unsigned long kaslr_offset(void)
+{
+ return kimage_vaddr - KIMAGE_VADDR;
+}
+
/*
* Allow all memory at the discovery stage. We will clip it later.
*/
diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
index 600887e491fd..bf466d1876e3 100644
--- a/arch/arm64/include/asm/numa.h
+++ b/arch/arm64/include/asm/numa.h
@@ -15,6 +15,8 @@ int __node_distance(int from, int to);
extern nodemask_t numa_nodes_parsed __initdata;
+extern bool numa_off;
+
/* Mappings between node number and cpus on that node. */
extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
void numa_clear_node(unsigned int cpu);
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index a53f52ac81c6..b051367e2149 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -338,11 +338,11 @@ subsys_initcall(topology_init);
static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
void *p)
{
- u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR;
+ const unsigned long offset = kaslr_offset();
- if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) {
- pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n",
- kaslr_offset, KIMAGE_VADDR);
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && offset > 0) {
+ pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n",
+ offset, KIMAGE_VADDR);
} else {
pr_emerg("Kernel Offset: disabled\n");
}
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index 4b32168cf91a..b388a99fea7b 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -35,7 +35,7 @@ static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE };
static int numa_distance_cnt;
static u8 *numa_distance;
-static bool numa_off;
+bool numa_off;
static __init int numa_parse_early_param(char *opt)
{
diff --git a/arch/ia64/include/asm/numa.h b/arch/ia64/include/asm/numa.h
index 2db0a6c6daa5..ebef7f40aabb 100644
--- a/arch/ia64/include/asm/numa.h
+++ b/arch/ia64/include/asm/numa.h
@@ -65,6 +65,8 @@ extern int paddr_to_nid(unsigned long paddr);
#define local_nodeid (cpu_to_node_map[smp_processor_id()])
+#define numa_off 0
+
extern void map_cpu_to_node(int cpu, int nid);
extern void unmap_cpu_from_node(int cpu, int nid);
extern void numa_clear_node(int cpu);
diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h
index 805ae5d712e8..032fed71223f 100644
--- a/arch/microblaze/include/asm/unistd.h
+++ b/arch/microblaze/include/asm/unistd.h
@@ -38,6 +38,6 @@
#endif /* __ASSEMBLY__ */
-#define __NR_syscalls 392
+#define __NR_syscalls 398
#endif /* _ASM_MICROBLAZE_UNISTD_H */
diff --git a/arch/microblaze/include/uapi/asm/unistd.h b/arch/microblaze/include/uapi/asm/unistd.h
index a8bd3fa28bc7..d8086159d996 100644
--- a/arch/microblaze/include/uapi/asm/unistd.h
+++ b/arch/microblaze/include/uapi/asm/unistd.h
@@ -407,5 +407,11 @@
#define __NR_userfaultfd 389
#define __NR_membarrier 390
#define __NR_mlock2 391
+#define __NR_copy_file_range 392
+#define __NR_preadv2 393
+#define __NR_pwritev2 394
+#define __NR_pkey_mprotect 395
+#define __NR_pkey_alloc 396
+#define __NR_pkey_free 397
#endif /* _UAPI_ASM_MICROBLAZE_UNISTD_H */
diff --git a/arch/microblaze/kernel/cpu/cpuinfo.c b/arch/microblaze/kernel/cpu/cpuinfo.c
index b70bb538f001..96b3f26d16be 100644
--- a/arch/microblaze/kernel/cpu/cpuinfo.c
+++ b/arch/microblaze/kernel/cpu/cpuinfo.c
@@ -49,6 +49,8 @@ const struct cpu_ver_key cpu_ver_lookup[] = {
{"9.3", 0x20},
{"9.4", 0x21},
{"9.5", 0x22},
+ {"9.6", 0x23},
+ {"10.0", 0x24},
{NULL, 0},
};
@@ -75,6 +77,10 @@ const struct family_string_key family_string_lookup[] = {
{"zynq7000", 0x12},
{"UltraScale Virtex", 0x13},
{"UltraScale Kintex", 0x14},
+ {"UltraScale+ Zynq", 0x15},
+ {"UltraScale+ Virtex", 0x16},
+ {"UltraScale+ Kintex", 0x17},
+ {"Spartan7", 0x18},
{NULL, 0},
};
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index 6b3dd99126d7..6841c2df14d9 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -392,3 +392,9 @@ ENTRY(sys_call_table)
.long sys_userfaultfd
.long sys_membarrier /* 390 */
.long sys_mlock2
+ .long sys_copy_file_range
+ .long sys_preadv2
+ .long sys_pwritev2
+ .long sys_pkey_mprotect /* 395 */
+ .long sys_pkey_alloc
+ .long sys_pkey_free
diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c
index 5bbf38b916ef..9e954959f605 100644
--- a/arch/microblaze/kernel/timer.c
+++ b/arch/microblaze/kernel/timer.c
@@ -259,7 +259,7 @@ static int __init xilinx_timer_init(struct device_node *timer)
int ret;
if (initialized)
- return;
+ return -EINVAL;
initialized = 1;
diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi
index f6ae6ed9c4b1..3e1587f1f77a 100644
--- a/arch/mips/boot/dts/ingenic/jz4740.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi
@@ -44,6 +44,17 @@
#clock-cells = <1>;
};
+ rtc_dev: rtc@10003000 {
+ compatible = "ingenic,jz4740-rtc";
+ reg = <0x10003000 0x40>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <15>;
+
+ clocks = <&cgu JZ4740_CLK_RTC>;
+ clock-names = "rtc";
+ };
+
uart0: serial@10030000 {
compatible = "ingenic,jz4740-uart";
reg = <0x10030000 0x100>;
diff --git a/arch/mips/boot/dts/ingenic/qi_lb60.dts b/arch/mips/boot/dts/ingenic/qi_lb60.dts
index 2414d63ae818..be1a7d3a3e1b 100644
--- a/arch/mips/boot/dts/ingenic/qi_lb60.dts
+++ b/arch/mips/boot/dts/ingenic/qi_lb60.dts
@@ -13,3 +13,7 @@
&ext {
clock-frequency = <12000000>;
};
+
+&rtc_dev {
+ system-power-controller;
+};
diff --git a/arch/mips/include/asm/mach-jz4740/platform.h b/arch/mips/include/asm/mach-jz4740/platform.h
index 073b8bfbb3b3..3645974b7f65 100644
--- a/arch/mips/include/asm/mach-jz4740/platform.h
+++ b/arch/mips/include/asm/mach-jz4740/platform.h
@@ -22,7 +22,6 @@
extern struct platform_device jz4740_udc_device;
extern struct platform_device jz4740_udc_xceiv_device;
extern struct platform_device jz4740_mmc_device;
-extern struct platform_device jz4740_rtc_device;
extern struct platform_device jz4740_i2c_device;
extern struct platform_device jz4740_nand_device;
extern struct platform_device jz4740_framebuffer_device;
diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c
index 258fd03c9ef5..a5bd94b95263 100644
--- a/arch/mips/jz4740/board-qi_lb60.c
+++ b/arch/mips/jz4740/board-qi_lb60.c
@@ -438,7 +438,6 @@ static struct platform_device *jz_platform_devices[] __initdata = {
&jz4740_pcm_device,
&jz4740_i2s_device,
&jz4740_codec_device,
- &jz4740_rtc_device,
&jz4740_adc_device,
&jz4740_pwm_device,
&jz4740_dma_device,
diff --git a/arch/mips/jz4740/platform.c b/arch/mips/jz4740/platform.c
index 2f1dab35c061..5b7cdd67a9d9 100644
--- a/arch/mips/jz4740/platform.c
+++ b/arch/mips/jz4740/platform.c
@@ -88,27 +88,6 @@ struct platform_device jz4740_mmc_device = {
.resource = jz4740_mmc_resources,
};
-/* RTC controller */
-static struct resource jz4740_rtc_resources[] = {
- {
- .start = JZ4740_RTC_BASE_ADDR,
- .end = JZ4740_RTC_BASE_ADDR + 0x38 - 1,
- .flags = IORESOURCE_MEM,
- },
- {
- .start = JZ4740_IRQ_RTC,
- .end = JZ4740_IRQ_RTC,
- .flags = IORESOURCE_IRQ,
- },
-};
-
-struct platform_device jz4740_rtc_device = {
- .name = "jz4740-rtc",
- .id = -1,
- .num_resources = ARRAY_SIZE(jz4740_rtc_resources),
- .resource = jz4740_rtc_resources,
-};
-
/* I2C controller */
static struct resource jz4740_i2c_resources[] = {
{
diff --git a/arch/mips/jz4740/reset.c b/arch/mips/jz4740/reset.c
index 954e669c9e6b..67780c4b6573 100644
--- a/arch/mips/jz4740/reset.c
+++ b/arch/mips/jz4740/reset.c
@@ -57,71 +57,8 @@ static void jz4740_restart(char *command)
jz4740_halt();
}
-#define JZ_REG_RTC_CTRL 0x00
-#define JZ_REG_RTC_HIBERNATE 0x20
-#define JZ_REG_RTC_WAKEUP_FILTER 0x24
-#define JZ_REG_RTC_RESET_COUNTER 0x28
-
-#define JZ_RTC_CTRL_WRDY BIT(7)
-#define JZ_RTC_WAKEUP_FILTER_MASK 0x0000FFE0
-#define JZ_RTC_RESET_COUNTER_MASK 0x00000FE0
-
-static inline void jz4740_rtc_wait_ready(void __iomem *rtc_base)
-{
- uint32_t ctrl;
-
- do {
- ctrl = readl(rtc_base + JZ_REG_RTC_CTRL);
- } while (!(ctrl & JZ_RTC_CTRL_WRDY));
-}
-
-static void jz4740_power_off(void)
-{
- void __iomem *rtc_base = ioremap(JZ4740_RTC_BASE_ADDR, 0x38);
- unsigned long wakeup_filter_ticks;
- unsigned long reset_counter_ticks;
- struct clk *rtc_clk;
- unsigned long rtc_rate;
-
- rtc_clk = clk_get(NULL, "rtc");
- if (IS_ERR(rtc_clk))
- panic("unable to get RTC clock");
- rtc_rate = clk_get_rate(rtc_clk);
- clk_put(rtc_clk);
-
- /*
- * Set minimum wakeup pin assertion time: 100 ms.
- * Range is 0 to 2 sec if RTC is clocked at 32 kHz.
- */
- wakeup_filter_ticks = (100 * rtc_rate) / 1000;
- if (wakeup_filter_ticks < JZ_RTC_WAKEUP_FILTER_MASK)
- wakeup_filter_ticks &= JZ_RTC_WAKEUP_FILTER_MASK;
- else
- wakeup_filter_ticks = JZ_RTC_WAKEUP_FILTER_MASK;
- jz4740_rtc_wait_ready(rtc_base);
- writel(wakeup_filter_ticks, rtc_base + JZ_REG_RTC_WAKEUP_FILTER);
-
- /*
- * Set reset pin low-level assertion time after wakeup: 60 ms.
- * Range is 0 to 125 ms if RTC is clocked at 32 kHz.
- */
- reset_counter_ticks = (60 * rtc_rate) / 1000;
- if (reset_counter_ticks < JZ_RTC_RESET_COUNTER_MASK)
- reset_counter_ticks &= JZ_RTC_RESET_COUNTER_MASK;
- else
- reset_counter_ticks = JZ_RTC_RESET_COUNTER_MASK;
- jz4740_rtc_wait_ready(rtc_base);
- writel(reset_counter_ticks, rtc_base + JZ_REG_RTC_RESET_COUNTER);
-
- jz4740_rtc_wait_ready(rtc_base);
- writel(1, rtc_base + JZ_REG_RTC_HIBERNATE);
-
- jz4740_halt();
-}
-
void jz4740_reset_init(void)
{
_machine_restart = jz4740_restart;
_machine_halt = jz4740_halt;
- pm_power_off = jz4740_power_off;
}
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3da87e198878..a8ee573fe610 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -469,6 +469,7 @@ config KEXEC
config KEXEC_FILE
bool "kexec file based system call"
select KEXEC_CORE
+ select HAVE_IMA_KEXEC
select BUILD_BIN2C
depends on PPC64
depends on CRYPTO=y
diff --git a/arch/powerpc/include/asm/ima.h b/arch/powerpc/include/asm/ima.h
new file mode 100644
index 000000000000..2313bdface34
--- /dev/null
+++ b/arch/powerpc/include/asm/ima.h
@@ -0,0 +1,29 @@
+#ifndef _ASM_POWERPC_IMA_H
+#define _ASM_POWERPC_IMA_H
+
+struct kimage;
+
+int ima_get_kexec_buffer(void **addr, size_t *size);
+int ima_free_kexec_buffer(void);
+
+#ifdef CONFIG_IMA
+void remove_ima_buffer(void *fdt, int chosen_node);
+#else
+static inline void remove_ima_buffer(void *fdt, int chosen_node) {}
+#endif
+
+#ifdef CONFIG_IMA_KEXEC
+int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr,
+ size_t size);
+
+int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node);
+#else
+static inline int setup_ima_buffer(const struct kimage *image, void *fdt,
+ int chosen_node)
+{
+ remove_ima_buffer(fdt, chosen_node);
+ return 0;
+}
+#endif /* CONFIG_IMA_KEXEC */
+
+#endif /* _ASM_POWERPC_IMA_H */
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 6c3b71502fbc..25668bc8cb2a 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -94,11 +94,22 @@ static inline bool kdump_in_progress(void)
#ifdef CONFIG_KEXEC_FILE
extern struct kexec_file_ops kexec_elf64_ops;
+#ifdef CONFIG_IMA_KEXEC
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+ phys_addr_t ima_buffer_addr;
+ size_t ima_buffer_size;
+};
+#endif
+
int setup_purgatory(struct kimage *image, const void *slave_code,
const void *fdt, unsigned long kernel_load_addr,
unsigned long fdt_load_addr);
-int setup_new_fdt(void *fdt, unsigned long initrd_load_addr,
- unsigned long initrd_len, const char *cmdline);
+int setup_new_fdt(const struct kimage *image, void *fdt,
+ unsigned long initrd_load_addr, unsigned long initrd_len,
+ const char *cmdline);
+int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size);
#endif /* CONFIG_KEXEC_FILE */
#else /* !CONFIG_KEXEC_CORE */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index a3a6047fd395..23f8082d7bfa 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -112,6 +112,10 @@ obj-$(CONFIG_PCI_MSI) += msi.o
obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o crash.o \
machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file_$(BITS).o kexec_elf_$(BITS).o
+ifeq ($(CONFIG_HAVE_IMA_KEXEC)$(CONFIG_IMA),yy)
+obj-y += ima_kexec.o
+endif
+
obj-$(CONFIG_AUDIT) += audit.o
obj64-$(CONFIG_AUDIT) += compat_audit.o
diff --git a/arch/powerpc/kernel/ima_kexec.c b/arch/powerpc/kernel/ima_kexec.c
new file mode 100644
index 000000000000..5ea42c937ca9
--- /dev/null
+++ b/arch/powerpc/kernel/ima_kexec.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) 2016 IBM Corporation
+ *
+ * Authors:
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include <linux/kexec.h>
+#include <linux/of.h>
+#include <linux/memblock.h>
+#include <linux/libfdt.h>
+
+static int get_addr_size_cells(int *addr_cells, int *size_cells)
+{
+ struct device_node *root;
+
+ root = of_find_node_by_path("/");
+ if (!root)
+ return -EINVAL;
+
+ *addr_cells = of_n_addr_cells(root);
+ *size_cells = of_n_size_cells(root);
+
+ of_node_put(root);
+
+ return 0;
+}
+
+static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr,
+ size_t *size)
+{
+ int ret, addr_cells, size_cells;
+
+ ret = get_addr_size_cells(&addr_cells, &size_cells);
+ if (ret)
+ return ret;
+
+ if (len < 4 * (addr_cells + size_cells))
+ return -ENOENT;
+
+ *addr = of_read_number(prop, addr_cells);
+ *size = of_read_number(prop + 4 * addr_cells, size_cells);
+
+ return 0;
+}
+
+/**
+ * ima_get_kexec_buffer - get IMA buffer from the previous kernel
+ * @addr: On successful return, set to point to the buffer contents.
+ * @size: On successful return, set to the buffer size.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int ima_get_kexec_buffer(void **addr, size_t *size)
+{
+ int ret, len;
+ unsigned long tmp_addr;
+ size_t tmp_size;
+ const void *prop;
+
+ prop = of_get_property(of_chosen, "linux,ima-kexec-buffer", &len);
+ if (!prop)
+ return -ENOENT;
+
+ ret = do_get_kexec_buffer(prop, len, &tmp_addr, &tmp_size);
+ if (ret)
+ return ret;
+
+ *addr = __va(tmp_addr);
+ *size = tmp_size;
+
+ return 0;
+}
+
+/**
+ * ima_free_kexec_buffer - free memory used by the IMA buffer
+ */
+int ima_free_kexec_buffer(void)
+{
+ int ret;
+ unsigned long addr;
+ size_t size;
+ struct property *prop;
+
+ prop = of_find_property(of_chosen, "linux,ima-kexec-buffer", NULL);
+ if (!prop)
+ return -ENOENT;
+
+ ret = do_get_kexec_buffer(prop->value, prop->length, &addr, &size);
+ if (ret)
+ return ret;
+
+ ret = of_remove_property(of_chosen, prop);
+ if (ret)
+ return ret;
+
+ return memblock_free(addr, size);
+
+}
+
+/**
+ * remove_ima_buffer - remove the IMA buffer property and reservation from @fdt
+ *
+ * The IMA measurement buffer is of no use to a subsequent kernel, so we always
+ * remove it from the device tree.
+ */
+void remove_ima_buffer(void *fdt, int chosen_node)
+{
+ int ret, len;
+ unsigned long addr;
+ size_t size;
+ const void *prop;
+
+ prop = fdt_getprop(fdt, chosen_node, "linux,ima-kexec-buffer", &len);
+ if (!prop)
+ return;
+
+ ret = do_get_kexec_buffer(prop, len, &addr, &size);
+ fdt_delprop(fdt, chosen_node, "linux,ima-kexec-buffer");
+ if (ret)
+ return;
+
+ ret = delete_fdt_mem_rsv(fdt, addr, size);
+ if (!ret)
+ pr_debug("Removed old IMA buffer reservation.\n");
+}
+
+#ifdef CONFIG_IMA_KEXEC
+/**
+ * arch_ima_add_kexec_buffer - do arch-specific steps to add the IMA buffer
+ *
+ * Architectures should use this function to pass on the IMA buffer
+ * information to the next kernel.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr,
+ size_t size)
+{
+ image->arch.ima_buffer_addr = load_addr;
+ image->arch.ima_buffer_size = size;
+
+ return 0;
+}
+
+static int write_number(void *p, u64 value, int cells)
+{
+ if (cells == 1) {
+ u32 tmp;
+
+ if (value > U32_MAX)
+ return -EINVAL;
+
+ tmp = cpu_to_be32(value);
+ memcpy(p, &tmp, sizeof(tmp));
+ } else if (cells == 2) {
+ u64 tmp;
+
+ tmp = cpu_to_be64(value);
+ memcpy(p, &tmp, sizeof(tmp));
+ } else
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * setup_ima_buffer - add IMA buffer information to the fdt
+ * @image: kexec image being loaded.
+ * @fdt: Flattened device tree for the next kernel.
+ * @chosen_node: Offset to the chosen node.
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node)
+{
+ int ret, addr_cells, size_cells, entry_size;
+ u8 value[16];
+
+ remove_ima_buffer(fdt, chosen_node);
+ if (!image->arch.ima_buffer_size)
+ return 0;
+
+ ret = get_addr_size_cells(&addr_cells, &size_cells);
+ if (ret)
+ return ret;
+
+ entry_size = 4 * (addr_cells + size_cells);
+
+ if (entry_size > sizeof(value))
+ return -EINVAL;
+
+ ret = write_number(value, image->arch.ima_buffer_addr, addr_cells);
+ if (ret)
+ return ret;
+
+ ret = write_number(value + 4 * addr_cells, image->arch.ima_buffer_size,
+ size_cells);
+ if (ret)
+ return ret;
+
+ ret = fdt_setprop(fdt, chosen_node, "linux,ima-kexec-buffer", value,
+ entry_size);
+ if (ret < 0)
+ return -EINVAL;
+
+ ret = fdt_add_mem_rsv(fdt, image->arch.ima_buffer_addr,
+ image->arch.ima_buffer_size);
+ if (ret)
+ return -EINVAL;
+
+ pr_debug("IMA buffer at 0x%llx, size = 0x%zx\n",
+ image->arch.ima_buffer_addr, image->arch.ima_buffer_size);
+
+ return 0;
+}
+#endif /* CONFIG_IMA_KEXEC */
diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c
index 6acffd34a70f..9a42309b091a 100644
--- a/arch/powerpc/kernel/kexec_elf_64.c
+++ b/arch/powerpc/kernel/kexec_elf_64.c
@@ -627,7 +627,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
goto out;
}
- ret = setup_new_fdt(fdt, initrd_load_addr, initrd_len, cmdline);
+ ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);
if (ret)
goto out;
diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c
index 7abc8a75ee48..992c0d258e5d 100644
--- a/arch/powerpc/kernel/machine_kexec_file_64.c
+++ b/arch/powerpc/kernel/machine_kexec_file_64.c
@@ -27,6 +27,7 @@
#include <linux/memblock.h>
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
+#include <asm/ima.h>
#define SLAVE_CODE_SIZE 256
@@ -180,7 +181,7 @@ int setup_purgatory(struct kimage *image, const void *slave_code,
*
* Return: 0 on success, or negative errno on error.
*/
-static int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size)
+int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size)
{
int i, ret, num_rsvs = fdt_num_mem_rsv(fdt);
@@ -209,6 +210,7 @@ static int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size
/*
* setup_new_fdt - modify /chosen and memory reservation for the next kernel
+ * @image: kexec image being loaded.
* @fdt: Flattened device tree for the next kernel.
* @initrd_load_addr: Address where the next initrd will be loaded.
* @initrd_len: Size of the next initrd, or 0 if there will be none.
@@ -217,8 +219,9 @@ static int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size
*
* Return: 0 on success, or negative errno on error.
*/
-int setup_new_fdt(void *fdt, unsigned long initrd_load_addr,
- unsigned long initrd_len, const char *cmdline)
+int setup_new_fdt(const struct kimage *image, void *fdt,
+ unsigned long initrd_load_addr, unsigned long initrd_len,
+ const char *cmdline)
{
int ret, chosen_node;
const void *prop;
@@ -328,6 +331,12 @@ int setup_new_fdt(void *fdt, unsigned long initrd_load_addr,
}
}
+ ret = setup_ima_buffer(image, fdt, chosen_node);
+ if (ret) {
+ pr_err("Error setting up the new device tree.\n");
+ return ret;
+ }
+
ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0);
if (ret) {
pr_err("Error setting up the new device tree.\n");
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index dd47e60aabf5..64024c999531 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -555,18 +555,6 @@ config X86_INTEL_QUARK
Say Y here if you have a Quark based system such as the Arduino
compatible Intel Galileo.
-config MLX_PLATFORM
- tristate "Mellanox Technologies platform support"
- depends on X86_64
- depends on X86_EXTENDED_PLATFORM
- ---help---
- This option enables system support for the Mellanox Technologies
- platform.
-
- Say Y here if you are building a kernel for Mellanox system.
-
- Otherwise, say N.
-
config X86_INTEL_LPSS
bool "Intel Low Power Subsystem Support"
depends on X86 && ACPI
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 59ac427960d4..6ccbf1aaa7ce 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -105,6 +105,7 @@
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
+#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h
index 1c7eefe32502..7ec59edde154 100644
--- a/arch/x86/include/asm/floppy.h
+++ b/arch/x86/include/asm/floppy.h
@@ -229,18 +229,18 @@ static struct fd_routine_l {
int (*_dma_setup)(char *addr, unsigned long size, int mode, int io);
} fd_routine[] = {
{
- request_dma,
- free_dma,
- get_dma_residue,
- dma_mem_alloc,
- hard_dma_setup
+ ._request_dma = request_dma,
+ ._free_dma = free_dma,
+ ._get_dma_residue = get_dma_residue,
+ ._dma_mem_alloc = dma_mem_alloc,
+ ._dma_setup = hard_dma_setup
},
{
- vdma_request_dma,
- vdma_nop,
- vdma_get_dma_residue,
- vdma_mem_alloc,
- vdma_dma_setup
+ ._request_dma = vdma_request_dma,
+ ._free_dma = vdma_nop,
+ ._get_dma_residue = vdma_get_dma_residue,
+ ._dma_mem_alloc = vdma_mem_alloc,
+ ._dma_setup = vdma_dma_setup
}
};
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7892530cbacf..2e25038dbd93 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -704,6 +704,7 @@ struct kvm_apic_map {
/* Hyper-V emulation context */
struct kvm_hv {
+ struct mutex hv_lock;
u64 hv_guest_os_id;
u64 hv_hypercall;
u64 hv_tsc_page;
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 72198c64e646..f9813b6d8b80 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -31,6 +31,10 @@ typedef struct {
u16 pkey_allocation_map;
s16 execute_only_pkey;
#endif
+#ifdef CONFIG_X86_INTEL_MPX
+ /* address of the bounds directory */
+ void __user *bd_addr;
+#endif
} mm_context_t;
#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
index 7a35495275a9..0b416d4cf73b 100644
--- a/arch/x86/include/asm/mpx.h
+++ b/arch/x86/include/asm/mpx.h
@@ -59,7 +59,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs);
int mpx_handle_bd_fault(void);
static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
{
- return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR);
+ return (mm->context.bd_addr != MPX_INVALID_BOUNDS_DIR);
}
static inline void mpx_mm_init(struct mm_struct *mm)
{
@@ -67,7 +67,7 @@ static inline void mpx_mm_init(struct mm_struct *mm)
* NULL is theoretically a valid place to put the bounds
* directory, so point this at an invalid address.
*/
- mm->bd_addr = MPX_INVALID_BOUNDS_DIR;
+ mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR;
}
void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long start, unsigned long end);
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 1cc82ece9ac1..62b775926045 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -116,8 +116,7 @@ static inline void native_pgd_clear(pgd_t *pgd)
native_set_pgd(pgd, native_make_pgd(0));
}
-extern void sync_global_pgds(unsigned long start, unsigned long end,
- int removed);
+extern void sync_global_pgds(unsigned long start, unsigned long end);
/*
* Conversion functions: convert a page and protection to a page entry,
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 33b6365c22fe..abb1fdcc545a 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -45,8 +45,17 @@ extern int tsc_clocksource_reliable;
* Boot-time check whether the TSCs are synchronized across
* all CPUs/cores:
*/
+#ifdef CONFIG_X86_TSC
+extern bool tsc_store_and_check_tsc_adjust(bool bootcpu);
+extern void tsc_verify_tsc_adjust(bool resume);
extern void check_tsc_sync_source(int cpu);
extern void check_tsc_sync_target(void);
+#else
+static inline bool tsc_store_and_check_tsc_adjust(bool bootcpu) { return false; }
+static inline void tsc_verify_tsc_adjust(bool resume) { }
+static inline void check_tsc_sync_source(int cpu) { }
+static inline void check_tsc_sync_target(void) { }
+#endif
extern int notsc_setup(char *);
extern void tsc_save_sched_clock_state(void);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 05110c1097ae..581386c7e429 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -75,7 +75,7 @@ apm-y := apm_32.o
obj-$(CONFIG_APM) += apm.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SMP) += smpboot.o
-obj-$(CONFIG_SMP) += tsc_sync.o
+obj-$(CONFIG_X86_TSC) += tsc_sync.o
obj-$(CONFIG_SMP) += setup_percpu.o
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-y += apic/
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 4764fa56924d..6f65b0eed384 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -715,7 +715,7 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
int nid;
nid = acpi_get_node(handle);
- if (nid != -1) {
+ if (nid != NUMA_NO_NODE) {
set_apicid_to_node(physid, nid);
numa_set_node(cpu, nid);
}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index bb47e5eacd44..5b7e43eff139 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2160,21 +2160,6 @@ int __generic_processor_info(int apicid, int version, bool enabled)
}
/*
- * This can happen on physical hotplug. The sanity check at boot time
- * is done from native_smp_prepare_cpus() after num_possible_cpus() is
- * established.
- */
- if (topology_update_package_map(apicid, cpu) < 0) {
- int thiscpu = max + disabled_cpus;
-
- pr_warning("APIC: Package limit reached. Processor %d/0x%x ignored.\n",
- thiscpu, apicid);
-
- disabled_cpus++;
- return -ENOSPC;
- }
-
- /*
* Validate version
*/
if (version == 0x0) {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 729f92ba8224..1f6b50a449ab 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -979,29 +979,21 @@ static void x86_init_cache_qos(struct cpuinfo_x86 *c)
}
/*
- * The physical to logical package id mapping is initialized from the
- * acpi/mptables information. Make sure that CPUID actually agrees with
- * that.
+ * Validate that ACPI/mptables have the same information about the
+ * effective APIC id and update the package map.
*/
-static void sanitize_package_id(struct cpuinfo_x86 *c)
+static void validate_apic_and_package_id(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
- unsigned int pkg, apicid, cpu = smp_processor_id();
+ unsigned int apicid, cpu = smp_processor_id();
apicid = apic->cpu_present_to_apicid(cpu);
- pkg = apicid >> boot_cpu_data.x86_coreid_bits;
- if (apicid != c->initial_apicid) {
- pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x CPUID: %x\n",
+ if (apicid != c->apicid) {
+ pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x APIC: %x\n",
cpu, apicid, c->initial_apicid);
- c->initial_apicid = apicid;
}
- if (pkg != c->phys_proc_id) {
- pr_err(FW_BUG "CPU%u: Using firmware package id %u instead of %u\n",
- cpu, pkg, c->phys_proc_id);
- c->phys_proc_id = pkg;
- }
- c->logical_proc_id = topology_phys_to_logical_pkg(pkg);
+ BUG_ON(topology_update_package_map(c->phys_proc_id, cpu));
#else
c->logical_proc_id = 0;
#endif
@@ -1132,7 +1124,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
#ifdef CONFIG_NUMA
numa_add_cpu(smp_processor_id());
#endif
- sanitize_package_id(c);
}
/*
@@ -1187,6 +1178,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
enable_sep_cpu();
#endif
mtrr_ap_init();
+ validate_apic_and_package_id(c);
}
static __init int setup_noclflush(char *arg)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 90de28841242..b467b14b03eb 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -298,12 +298,13 @@ ENTRY(start_cpu)
* REX.W + FF /5 JMP m16:64 Jump far, absolute indirect,
* address given in m16:64.
*/
- call 1f # put return address on stack for unwinder
-1: xorq %rbp, %rbp # clear frame pointer
+ pushq $.Lafter_lret # put return address on stack for unwinder
+ xorq %rbp, %rbp # clear frame pointer
movq initial_code(%rip), %rax
pushq $__KERNEL_CS # set correct cs
pushq %rax # target address in negative space
lretq
+.Lafter_lret:
ENDPROC(start_cpu)
#include "verify_cpu.S"
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 43c36d8a6ae2..37363e46b1f0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -235,6 +235,7 @@ static inline void play_dead(void)
void arch_cpu_idle_enter(void)
{
+ tsc_verify_tsc_adjust(false);
local_touch_nmi();
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0c37d4fd01b2..46732dc3b73c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -103,7 +103,6 @@ static unsigned int max_physical_pkg_id __read_mostly;
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
static unsigned int logical_packages __read_mostly;
-static bool logical_packages_frozen __read_mostly;
/* Maximum number of SMT threads on any online core */
int __max_smt_threads __read_mostly;
@@ -273,9 +272,14 @@ static void notrace start_secondary(void *unused)
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
-int topology_update_package_map(unsigned int apicid, unsigned int cpu)
+/**
+ * topology_update_package_map - Update the physical to logical package map
+ * @pkg: The physical package id as retrieved via CPUID
+ * @cpu: The cpu for which this is updated
+ */
+int topology_update_package_map(unsigned int pkg, unsigned int cpu)
{
- unsigned int new, pkg = apicid >> boot_cpu_data.x86_coreid_bits;
+ unsigned int new;
/* Called from early boot ? */
if (!physical_package_map)
@@ -288,16 +292,17 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu)
if (test_and_set_bit(pkg, physical_package_map))
goto found;
- if (logical_packages_frozen) {
- physical_to_logical_pkg[pkg] = -1;
- pr_warn("APIC(%x) Package %u exceeds logical package max\n",
- apicid, pkg);
+ if (logical_packages >= __max_logical_packages) {
+ pr_warn("Package %u of CPU %u exceeds BIOS package data %u.\n",
+ logical_packages, cpu, __max_logical_packages);
return -ENOSPC;
}
new = logical_packages++;
- pr_info("APIC(%x) Converting physical %u to logical package %u\n",
- apicid, pkg, new);
+ if (new != pkg) {
+ pr_info("CPU %u Converting physical %u to logical package %u\n",
+ cpu, pkg, new);
+ }
physical_to_logical_pkg[pkg] = new;
found:
@@ -318,9 +323,9 @@ int topology_phys_to_logical_pkg(unsigned int phys_pkg)
}
EXPORT_SYMBOL(topology_phys_to_logical_pkg);
-static void __init smp_init_package_map(void)
+static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu)
{
- unsigned int ncpus, cpu;
+ unsigned int ncpus;
size_t size;
/*
@@ -365,27 +370,9 @@ static void __init smp_init_package_map(void)
size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
physical_package_map = kzalloc(size, GFP_KERNEL);
- for_each_present_cpu(cpu) {
- unsigned int apicid = apic->cpu_present_to_apicid(cpu);
-
- if (apicid == BAD_APICID || !apic->apic_id_valid(apicid))
- continue;
- if (!topology_update_package_map(apicid, cpu))
- continue;
- pr_warn("CPU %u APICId %x disabled\n", cpu, apicid);
- per_cpu(x86_bios_cpu_apicid, cpu) = BAD_APICID;
- set_cpu_possible(cpu, false);
- set_cpu_present(cpu, false);
- }
-
- if (logical_packages > __max_logical_packages) {
- pr_warn("Detected more packages (%u), then computed by BIOS data (%u).\n",
- logical_packages, __max_logical_packages);
- logical_packages_frozen = true;
- __max_logical_packages = logical_packages;
- }
-
pr_info("Max logical packages: %u\n", __max_logical_packages);
+
+ topology_update_package_map(c->phys_proc_id, cpu);
}
void __init smp_store_boot_cpu_info(void)
@@ -395,7 +382,7 @@ void __init smp_store_boot_cpu_info(void)
*c = boot_cpu_data;
c->cpu_index = id;
- smp_init_package_map();
+ smp_init_package_map(c, id);
}
/*
@@ -1476,15 +1463,15 @@ __init void prefill_possible_map(void)
possible = i;
}
+ nr_cpu_ids = possible;
+
pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
possible, max_t(int, possible - num_processors, 0));
+ reset_cpu_possible_mask();
+
for (i = 0; i < possible; i++)
set_cpu_possible(i, true);
- for (; i < NR_CPUS; i++)
- set_cpu_possible(i, false);
-
- nr_cpu_ids = possible;
}
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 46b2f41f8b05..0aed75a1e31b 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -702,6 +702,20 @@ unsigned long native_calibrate_tsc(void)
}
}
+ /*
+ * TSC frequency determined by CPUID is a "hardware reported"
+ * frequency and is the most accurate one so far we have. This
+ * is considered a known frequency.
+ */
+ setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
+
+ /*
+ * For Atom SoCs TSC is the only reliable clocksource.
+ * Mark TSC reliable so no watchdog on it.
+ */
+ if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT)
+ setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
+
return crystal_khz * ebx_numerator / eax_denominator;
}
@@ -1043,18 +1057,20 @@ static void detect_art(void)
if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF)
return;
- cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
- &art_to_tsc_numerator, unused, unused+1);
-
- /* Don't enable ART in a VM, non-stop TSC required */
+ /* Don't enable ART in a VM, non-stop TSC and TSC_ADJUST required */
if (boot_cpu_has(X86_FEATURE_HYPERVISOR) ||
!boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
- art_to_tsc_denominator < ART_MIN_DENOMINATOR)
+ !boot_cpu_has(X86_FEATURE_TSC_ADJUST))
return;
- if (rdmsrl_safe(MSR_IA32_TSC_ADJUST, &art_to_tsc_offset))
+ cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
+ &art_to_tsc_numerator, unused, unused+1);
+
+ if (art_to_tsc_denominator < ART_MIN_DENOMINATOR)
return;
+ rdmsrl(MSR_IA32_TSC_ADJUST, art_to_tsc_offset);
+
/* Make this sticky over multiple CPU init calls */
setup_force_cpu_cap(X86_FEATURE_ART);
}
@@ -1064,6 +1080,11 @@ static void detect_art(void)
static struct clocksource clocksource_tsc;
+static void tsc_resume(struct clocksource *cs)
+{
+ tsc_verify_tsc_adjust(true);
+}
+
/*
* We used to compare the TSC to the cycle_last value in the clocksource
* structure to avoid a nasty time-warp. This can be observed in a
@@ -1096,6 +1117,7 @@ static struct clocksource clocksource_tsc = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
CLOCK_SOURCE_MUST_VERIFY,
.archdata = { .vclock_mode = VCLOCK_TSC },
+ .resume = tsc_resume,
};
void mark_tsc_unstable(char *reason)
@@ -1283,10 +1305,10 @@ static int __init init_tsc_clocksource(void)
clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
/*
- * Trust the results of the earlier calibration on systems
- * exporting a reliable TSC.
+ * When TSC frequency is known (retrieved via MSR or CPUID), we skip
+ * the refined calibration and directly register it as a clocksource.
*/
- if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
+ if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) {
clocksource_register_khz(&clocksource_tsc, tsc_khz);
return 0;
}
@@ -1363,6 +1385,8 @@ void __init tsc_init(void)
if (unsynchronized_tsc())
mark_tsc_unstable("TSCs unsynchronized");
+ else
+ tsc_store_and_check_tsc_adjust(true);
check_system_tsc_reliable();
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 0fe720d64fef..19afdbd7d0a7 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -100,5 +100,24 @@ unsigned long cpu_khz_from_msr(void)
#ifdef CONFIG_X86_LOCAL_APIC
lapic_timer_frequency = (freq * 1000) / HZ;
#endif
+
+ /*
+ * TSC frequency determined by MSR is always considered "known"
+ * because it is reported by HW.
+ * Another fact is that on MSR capable platforms, PIT/HPET is
+ * generally not available so calibration won't work at all.
+ */
+ setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
+
+ /*
+ * Unfortunately there is no way for hardware to tell whether the
+ * TSC is reliable. We were told by silicon design team that TSC
+ * on Atom SoCs are always "reliable". TSC is also the only
+ * reliable clocksource on these SoCs (HPET is either not present
+ * or not functional) so mark TSC reliable which removes the
+ * requirement for a watchdog clocksource.
+ */
+ setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
+
return res;
}
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 78083bf23ed1..d0db011051a5 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -14,18 +14,166 @@
* ( The serial nature of the boot logic and the CPU hotplug lock
* protects against more than 2 CPUs entering this code. )
*/
+#include <linux/topology.h>
#include <linux/spinlock.h>
#include <linux/kernel.h>
#include <linux/smp.h>
#include <linux/nmi.h>
#include <asm/tsc.h>
+struct tsc_adjust {
+ s64 bootval;
+ s64 adjusted;
+ unsigned long nextcheck;
+ bool warned;
+};
+
+static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust);
+
+void tsc_verify_tsc_adjust(bool resume)
+{
+ struct tsc_adjust *adj = this_cpu_ptr(&tsc_adjust);
+ s64 curval;
+
+ if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
+ return;
+
+ /* Rate limit the MSR check */
+ if (!resume && time_before(jiffies, adj->nextcheck))
+ return;
+
+ adj->nextcheck = jiffies + HZ;
+
+ rdmsrl(MSR_IA32_TSC_ADJUST, curval);
+ if (adj->adjusted == curval)
+ return;
+
+ /* Restore the original value */
+ wrmsrl(MSR_IA32_TSC_ADJUST, adj->adjusted);
+
+ if (!adj->warned || resume) {
+ pr_warn(FW_BUG "TSC ADJUST differs: CPU%u %lld --> %lld. Restoring\n",
+ smp_processor_id(), adj->adjusted, curval);
+ adj->warned = true;
+ }
+}
+
+static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval,
+ unsigned int cpu, bool bootcpu)
+{
+ /*
+ * First online CPU in a package stores the boot value in the
+ * adjustment value. This value might change later via the sync
+ * mechanism. If that fails we still can yell about boot values not
+ * being consistent.
+ *
+ * On the boot cpu we just force set the ADJUST value to 0 if it's
+ * non zero. We don't do that on non boot cpus because physical
+ * hotplug should have set the ADJUST register to a value > 0 so
+ * the TSC is in sync with the already running cpus.
+ *
+ * But we always force positive ADJUST values. Otherwise the TSC
+ * deadline timer creates an interrupt storm. We also have to
+ * prevent values > 0x7FFFFFFF as those wreckage the timer as well.
+ */
+ if ((bootcpu && bootval != 0) || (!bootcpu && bootval < 0) ||
+ (bootval > 0x7FFFFFFF)) {
+ pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n", cpu,
+ bootval);
+ wrmsrl(MSR_IA32_TSC_ADJUST, 0);
+ bootval = 0;
+ }
+ cur->adjusted = bootval;
+}
+
+#ifndef CONFIG_SMP
+bool __init tsc_store_and_check_tsc_adjust(bool bootcpu)
+{
+ struct tsc_adjust *cur = this_cpu_ptr(&tsc_adjust);
+ s64 bootval;
+
+ if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
+ return false;
+
+ rdmsrl(MSR_IA32_TSC_ADJUST, bootval);
+ cur->bootval = bootval;
+ cur->nextcheck = jiffies + HZ;
+ tsc_sanitize_first_cpu(cur, bootval, smp_processor_id(), bootcpu);
+ return false;
+}
+
+#else /* !CONFIG_SMP */
+
+/*
+ * Store and check the TSC ADJUST MSR if available
+ */
+bool tsc_store_and_check_tsc_adjust(bool bootcpu)
+{
+ struct tsc_adjust *ref, *cur = this_cpu_ptr(&tsc_adjust);
+ unsigned int refcpu, cpu = smp_processor_id();
+ struct cpumask *mask;
+ s64 bootval;
+
+ if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
+ return false;
+
+ rdmsrl(MSR_IA32_TSC_ADJUST, bootval);
+ cur->bootval = bootval;
+ cur->nextcheck = jiffies + HZ;
+ cur->warned = false;
+
+ /*
+ * Check whether this CPU is the first in a package to come up. In
+ * this case do not check the boot value against another package
+ * because the new package might have been physically hotplugged,
+ * where TSC_ADJUST is expected to be different. When called on the
+ * boot CPU topology_core_cpumask() might not be available yet.
+ */
+ mask = topology_core_cpumask(cpu);
+ refcpu = mask ? cpumask_any_but(mask, cpu) : nr_cpu_ids;
+
+ if (refcpu >= nr_cpu_ids) {
+ tsc_sanitize_first_cpu(cur, bootval, smp_processor_id(),
+ bootcpu);
+ return false;
+ }
+
+ ref = per_cpu_ptr(&tsc_adjust, refcpu);
+ /*
+ * Compare the boot value and complain if it differs in the
+ * package.
+ */
+ if (bootval != ref->bootval) {
+ pr_warn(FW_BUG "TSC ADJUST differs: Reference CPU%u: %lld CPU%u: %lld\n",
+ refcpu, ref->bootval, cpu, bootval);
+ }
+ /*
+ * The TSC_ADJUST values in a package must be the same. If the boot
+ * value on this newly upcoming CPU differs from the adjustment
+ * value of the already online CPU in this package, set it to that
+ * adjusted value.
+ */
+ if (bootval != ref->adjusted) {
+ pr_warn("TSC ADJUST synchronize: Reference CPU%u: %lld CPU%u: %lld\n",
+ refcpu, ref->adjusted, cpu, bootval);
+ cur->adjusted = ref->adjusted;
+ wrmsrl(MSR_IA32_TSC_ADJUST, ref->adjusted);
+ }
+ /*
+ * We have the TSCs forced to be in sync on this package. Skip sync
+ * test:
+ */
+ return true;
+}
+
/*
* Entry/exit counters that make sure that both CPUs
* run the measurement code at once:
*/
static atomic_t start_count;
static atomic_t stop_count;
+static atomic_t skip_test;
+static atomic_t test_runs;
/*
* We use a raw spinlock in this exceptional case, because
@@ -37,15 +185,16 @@ static arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED;
static cycles_t last_tsc;
static cycles_t max_warp;
static int nr_warps;
+static int random_warps;
/*
* TSC-warp measurement loop running on both CPUs. This is not called
* if there is no TSC.
*/
-static void check_tsc_warp(unsigned int timeout)
+static cycles_t check_tsc_warp(unsigned int timeout)
{
- cycles_t start, now, prev, end;
- int i;
+ cycles_t start, now, prev, end, cur_max_warp = 0;
+ int i, cur_warps = 0;
start = rdtsc_ordered();
/*
@@ -85,13 +234,22 @@ static void check_tsc_warp(unsigned int timeout)
if (unlikely(prev > now)) {
arch_spin_lock(&sync_lock);
max_warp = max(max_warp, prev - now);
+ cur_max_warp = max_warp;
+ /*
+ * Check whether this bounces back and forth. Only
+ * one CPU should observe time going backwards.
+ */
+ if (cur_warps != nr_warps)
+ random_warps++;
nr_warps++;
+ cur_warps = nr_warps;
arch_spin_unlock(&sync_lock);
}
}
WARN(!(now-start),
"Warning: zero tsc calibration delta: %Ld [max: %Ld]\n",
now-start, end-start);
+ return cur_max_warp;
}
/*
@@ -136,15 +294,26 @@ void check_tsc_sync_source(int cpu)
}
/*
- * Reset it - in case this is a second bootup:
+ * Set the maximum number of test runs to
+ * 1 if the CPU does not provide the TSC_ADJUST MSR
+ * 3 if the MSR is available, so the target can try to adjust
*/
- atomic_set(&stop_count, 0);
-
+ if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
+ atomic_set(&test_runs, 1);
+ else
+ atomic_set(&test_runs, 3);
+retry:
/*
- * Wait for the target to arrive:
+ * Wait for the target to start or to skip the test:
*/
- while (atomic_read(&start_count) != cpus-1)
+ while (atomic_read(&start_count) != cpus - 1) {
+ if (atomic_read(&skip_test) > 0) {
+ atomic_set(&skip_test, 0);
+ return;
+ }
cpu_relax();
+ }
+
/*
* Trigger the target to continue into the measurement too:
*/
@@ -155,21 +324,35 @@ void check_tsc_sync_source(int cpu)
while (atomic_read(&stop_count) != cpus-1)
cpu_relax();
- if (nr_warps) {
+ /*
+ * If the test was successful set the number of runs to zero and
+ * stop. If not, decrement the number of runs an check if we can
+ * retry. In case of random warps no retry is attempted.
+ */
+ if (!nr_warps) {
+ atomic_set(&test_runs, 0);
+
+ pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n",
+ smp_processor_id(), cpu);
+
+ } else if (atomic_dec_and_test(&test_runs) || random_warps) {
+ /* Force it to 0 if random warps brought us here */
+ atomic_set(&test_runs, 0);
+
pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n",
smp_processor_id(), cpu);
pr_warning("Measured %Ld cycles TSC warp between CPUs, "
"turning off TSC clock.\n", max_warp);
+ if (random_warps)
+ pr_warning("TSC warped randomly between CPUs\n");
mark_tsc_unstable("check_tsc_sync_source failed");
- } else {
- pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n",
- smp_processor_id(), cpu);
}
/*
* Reset it - just in case we boot another CPU later:
*/
atomic_set(&start_count, 0);
+ random_warps = 0;
nr_warps = 0;
max_warp = 0;
last_tsc = 0;
@@ -178,6 +361,12 @@ void check_tsc_sync_source(int cpu)
* Let the target continue with the bootup:
*/
atomic_inc(&stop_count);
+
+ /*
+ * Retry, if there is a chance to do so.
+ */
+ if (atomic_read(&test_runs) > 0)
+ goto retry;
}
/*
@@ -185,6 +374,9 @@ void check_tsc_sync_source(int cpu)
*/
void check_tsc_sync_target(void)
{
+ struct tsc_adjust *cur = this_cpu_ptr(&tsc_adjust);
+ unsigned int cpu = smp_processor_id();
+ cycles_t cur_max_warp, gbl_max_warp;
int cpus = 2;
/* Also aborts if there is no TSC. */
@@ -192,6 +384,16 @@ void check_tsc_sync_target(void)
return;
/*
+ * Store, verify and sanitize the TSC adjust register. If
+ * successful skip the test.
+ */
+ if (tsc_store_and_check_tsc_adjust(false)) {
+ atomic_inc(&skip_test);
+ return;
+ }
+
+retry:
+ /*
* Register this CPU's participation and wait for the
* source CPU to start the measurement:
*/
@@ -199,7 +401,12 @@ void check_tsc_sync_target(void)
while (atomic_read(&start_count) != cpus)
cpu_relax();
- check_tsc_warp(loop_timeout(smp_processor_id()));
+ cur_max_warp = check_tsc_warp(loop_timeout(cpu));
+
+ /*
+ * Store the maximum observed warp value for a potential retry:
+ */
+ gbl_max_warp = max_warp;
/*
* Ok, we are done:
@@ -211,4 +418,61 @@ void check_tsc_sync_target(void)
*/
while (atomic_read(&stop_count) != cpus)
cpu_relax();
+
+ /*
+ * Reset it for the next sync test:
+ */
+ atomic_set(&stop_count, 0);
+
+ /*
+ * Check the number of remaining test runs. If not zero, the test
+ * failed and a retry with adjusted TSC is possible. If zero the
+ * test was either successful or failed terminally.
+ */
+ if (!atomic_read(&test_runs))
+ return;
+
+ /*
+ * If the warp value of this CPU is 0, then the other CPU
+ * observed time going backwards so this TSC was ahead and
+ * needs to move backwards.
+ */
+ if (!cur_max_warp)
+ cur_max_warp = -gbl_max_warp;
+
+ /*
+ * Add the result to the previous adjustment value.
+ *
+ * The adjustement value is slightly off by the overhead of the
+ * sync mechanism (observed values are ~200 TSC cycles), but this
+ * really depends on CPU, node distance and frequency. So
+ * compensating for this is hard to get right. Experiments show
+ * that the warp is not longer detectable when the observed warp
+ * value is used. In the worst case the adjustment needs to go
+ * through a 3rd run for fine tuning.
+ */
+ cur->adjusted += cur_max_warp;
+
+ /*
+ * TSC deadline timer stops working or creates an interrupt storm
+ * with adjust values < 0 and > x07ffffff.
+ *
+ * To allow adjust values > 0x7FFFFFFF we need to disable the
+ * deadline timer and use the local APIC timer, but that requires
+ * more intrusive changes and we do not have any useful information
+ * from Intel about the underlying HW wreckage yet.
+ */
+ if (cur->adjusted < 0)
+ cur->adjusted = 0;
+ if (cur->adjusted > 0x7FFFFFFF)
+ cur->adjusted = 0x7FFFFFFF;
+
+ pr_warn("TSC ADJUST compensate: CPU%u observed %lld warp. Adjust: %lld\n",
+ cpu, cur_max_warp, cur->adjusted);
+
+ wrmsrl(MSR_IA32_TSC_ADJUST, cur->adjusted);
+ goto retry;
+
}
+
+#endif /* CONFIG_SMP */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b2d3cf1ef54a..e85f6bd7b9d5 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -373,16 +373,17 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
const u32 kvm_cpuid_7_0_ebx_x86_features =
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
- F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
- F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
- F(AVX512BW) | F(AVX512VL);
+ F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
+ F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
+ F(SHA_NI) | F(AVX512BW) | F(AVX512VL);
/* cpuid 0xD.1.eax */
const u32 kvm_cpuid_D_1_eax_x86_features =
F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
/* cpuid 7.0.ecx*/
- const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/;
+ const u32 kvm_cpuid_7_0_ecx_x86_features =
+ F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/;
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 99cde5220e07..1572c35b4f1a 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -852,6 +852,10 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
return;
+ mutex_lock(&kvm->arch.hyperv.hv_lock);
+ if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
+ goto out_unlock;
+
gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
/*
* Because the TSC parameters only vary when there is a
@@ -859,7 +863,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
*/
if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
&tsc_seq, sizeof(tsc_seq))))
- return;
+ goto out_unlock;
/*
* While we're computing and writing the parameters, force the
@@ -868,15 +872,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
hv->tsc_ref.tsc_sequence = 0;
if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
- return;
+ goto out_unlock;
if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref))
- return;
+ goto out_unlock;
/* Ensure sequence is zero before writing the rest of the struct. */
smp_wmb();
if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
- return;
+ goto out_unlock;
/*
* Now switch to the TSC page mechanism by writing the sequence.
@@ -891,6 +895,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
hv->tsc_ref.tsc_sequence = tsc_seq;
kvm_write_guest(kvm, gfn_to_gpa(gfn),
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence));
+out_unlock:
+ mutex_unlock(&kvm->arch.hyperv.hv_lock);
}
static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
@@ -1142,9 +1148,9 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
if (kvm_hv_msr_partition_wide(msr)) {
int r;
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
return r;
} else
return kvm_hv_set_msr(vcpu, msr, data, host);
@@ -1155,9 +1161,9 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
if (kvm_hv_msr_partition_wide(msr)) {
int r;
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
r = kvm_hv_get_msr_pw(vcpu, msr, pdata);
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
return r;
} else
return kvm_hv_get_msr(vcpu, msr, pdata);
@@ -1165,7 +1171,7 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
bool kvm_hv_hypercall_enabled(struct kvm *kvm)
{
- return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
+ return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
}
static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index aae43c6f2472..24db5fb6f575 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1389,10 +1389,10 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
}
-static inline bool is_exception(u32 intr_info)
+static inline bool is_nmi(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
- == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK);
+ == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
}
static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
@@ -5728,7 +5728,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
if (is_machine_check(intr_info))
return handle_machine_check(vcpu);
- if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
+ if (is_nmi(intr_info))
return 1; /* already handled by vmx_vcpu_run() */
if (is_no_device(intr_info)) {
@@ -7122,7 +7122,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
if (vmptr == vmx->nested.vmxon_ptr) {
nested_vmx_failValid(vcpu,
- VMXERR_VMCLEAR_VMXON_POINTER);
+ VMXERR_VMPTRLD_VMXON_POINTER);
return kvm_skip_emulated_instruction(vcpu);
}
break;
@@ -8170,7 +8170,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
switch (exit_reason) {
case EXIT_REASON_EXCEPTION_NMI:
- if (!is_exception(intr_info))
+ if (is_nmi(intr_info))
return false;
else if (is_page_fault(intr_info))
return enable_ept;
@@ -8765,8 +8765,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
kvm_machine_check();
/* We need to handle NMIs before interrupts are enabled */
- if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
- (exit_intr_info & INTR_INFO_VALID_MASK)) {
+ if (is_nmi(exit_intr_info)) {
kvm_before_handle_nmi(&vmx->vcpu);
asm("int $2");
kvm_after_handle_nmi(&vmx->vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1f0d2383f5ee..445c51b6cf6d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2844,7 +2844,24 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ int idx;
+ /*
+ * Disable page faults because we're in atomic context here.
+ * kvm_write_guest_offset_cached() would call might_fault()
+ * that relies on pagefault_disable() to tell if there's a
+ * bug. NOTE: the write to guest memory may not go through if
+ * during postcopy live migration or if there's heavy guest
+ * paging.
+ */
+ pagefault_disable();
+ /*
+ * kvm_memslots() will be called by
+ * kvm_write_guest_offset_cached() so take the srcu lock.
+ */
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
kvm_steal_time_set_preempted(vcpu);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ pagefault_enable();
kvm_x86_ops->vcpu_put(vcpu);
kvm_put_guest_fpu(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
@@ -7881,6 +7898,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
mutex_init(&kvm->arch.apic_map_lock);
+ mutex_init(&kvm->arch.hyperv.hv_lock);
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 17c55a536fdd..e3254ca0eec4 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -413,7 +413,7 @@ out:
void vmalloc_sync_all(void)
{
- sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END, 0);
+ sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
}
/*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 14b9dd71d9e8..963895f9af7f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -89,10 +89,10 @@ static int __init nonx32_setup(char *str)
__setup("noexec32=", nonx32_setup);
/*
- * When memory was added/removed make sure all the processes MM have
+ * When memory was added make sure all the processes MM have
* suitable PGD entries in the local PGD level page.
*/
-void sync_global_pgds(unsigned long start, unsigned long end, int removed)
+void sync_global_pgds(unsigned long start, unsigned long end)
{
unsigned long address;
@@ -100,12 +100,7 @@ void sync_global_pgds(unsigned long start, unsigned long end, int removed)
const pgd_t *pgd_ref = pgd_offset_k(address);
struct page *page;
- /*
- * When it is called after memory hot remove, pgd_none()
- * returns true. In this case (removed == 1), we must clear
- * the PGD entries in the local PGD level page.
- */
- if (pgd_none(*pgd_ref) && !removed)
+ if (pgd_none(*pgd_ref))
continue;
spin_lock(&pgd_lock);
@@ -122,13 +117,8 @@ void sync_global_pgds(unsigned long start, unsigned long end, int removed)
BUG_ON(pgd_page_vaddr(*pgd)
!= pgd_page_vaddr(*pgd_ref));
- if (removed) {
- if (pgd_none(*pgd_ref) && !pgd_none(*pgd))
- pgd_clear(pgd);
- } else {
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
- }
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
spin_unlock(pgt_lock);
}
@@ -596,7 +586,7 @@ kernel_physical_mapping_init(unsigned long paddr_start,
}
if (pgd_changed)
- sync_global_pgds(vaddr_start, vaddr_end - 1, 0);
+ sync_global_pgds(vaddr_start, vaddr_end - 1);
__flush_tlb_all();
@@ -1239,7 +1229,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
} else
err = vmemmap_populate_basepages(start, end, node);
if (!err)
- sync_global_pgds(start, end - 1, 0);
+ sync_global_pgds(start, end - 1);
return err;
}
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index e4f800999b32..324e5713d386 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -350,12 +350,12 @@ int mpx_enable_management(void)
* The copy_xregs_to_kernel() beneath get_xsave_field_ptr() is
* expected to be relatively expensive. Storing the bounds
* directory here means that we do not have to do xsave in the
- * unmap path; we can just use mm->bd_addr instead.
+ * unmap path; we can just use mm->context.bd_addr instead.
*/
bd_base = mpx_get_bounds_dir();
down_write(&mm->mmap_sem);
- mm->bd_addr = bd_base;
- if (mm->bd_addr == MPX_INVALID_BOUNDS_DIR)
+ mm->context.bd_addr = bd_base;
+ if (mm->context.bd_addr == MPX_INVALID_BOUNDS_DIR)
ret = -ENXIO;
up_write(&mm->mmap_sem);
@@ -370,7 +370,7 @@ int mpx_disable_management(void)
return -ENXIO;
down_write(&mm->mmap_sem);
- mm->bd_addr = MPX_INVALID_BOUNDS_DIR;
+ mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR;
up_write(&mm->mmap_sem);
return 0;
}
@@ -947,7 +947,7 @@ static int try_unmap_single_bt(struct mm_struct *mm,
end = bta_end_vaddr;
}
- bde_vaddr = mm->bd_addr + mpx_get_bd_entry_offset(mm, start);
+ bde_vaddr = mm->context.bd_addr + mpx_get_bd_entry_offset(mm, start);
ret = get_bt_addr(mm, bde_vaddr, &bt_addr);
/*
* No bounds table there, so nothing to unmap.
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 3f35b48d1d9d..12dcad7297a5 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -19,7 +19,7 @@
#include "numa_internal.h"
-int __initdata numa_off;
+int numa_off;
nodemask_t numa_nodes_parsed __initdata;
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 3c3c19ea94df..184842ef332e 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -8,7 +8,6 @@ obj-y += iris/
obj-y += intel/
obj-y += intel-mid/
obj-y += intel-quark/
-obj-y += mellanox/
obj-y += olpc/
obj-y += scx200/
obj-y += sfi/
diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c
index 1eb47b6298c2..e793fe509971 100644
--- a/arch/x86/platform/intel-mid/mfld.c
+++ b/arch/x86/platform/intel-mid/mfld.c
@@ -49,8 +49,13 @@ static unsigned long __init mfld_calibrate_tsc(void)
fast_calibrate = ratio * fsb;
pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
lapic_timer_frequency = fsb * 1000 / HZ;
- /* mark tsc clocksource as reliable */
- set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
+
+ /*
+ * TSC on Intel Atom SoCs is reliable and of known frequency.
+ * See tsc_msr.c for details.
+ */
+ setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
+ setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
return fast_calibrate;
}
diff --git a/arch/x86/platform/intel-mid/mrfld.c b/arch/x86/platform/intel-mid/mrfld.c
index 59253db41bbc..e0607c77a1bd 100644
--- a/arch/x86/platform/intel-mid/mrfld.c
+++ b/arch/x86/platform/intel-mid/mrfld.c
@@ -78,8 +78,12 @@ static unsigned long __init tangier_calibrate_tsc(void)
pr_debug("Setting lapic_timer_frequency = %d\n",
lapic_timer_frequency);
- /* mark tsc clocksource as reliable */
- set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
+ /*
+ * TSC on Intel Atom SoCs is reliable and of known frequency.
+ * See tsc_msr.c for details.
+ */
+ setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
+ setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
return fast_calibrate;
}
diff --git a/arch/x86/platform/mellanox/Makefile b/arch/x86/platform/mellanox/Makefile
deleted file mode 100644
index f43c93188a1d..000000000000
--- a/arch/x86/platform/mellanox/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-obj-$(CONFIG_MLX_PLATFORM) += mlx-platform.o
diff --git a/arch/x86/platform/mellanox/mlx-platform.c b/arch/x86/platform/mellanox/mlx-platform.c
deleted file mode 100644
index 7dcfcca97399..000000000000
--- a/arch/x86/platform/mellanox/mlx-platform.c
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * arch/x86/platform/mellanox/mlx-platform.c
- * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2016 Vadim Pasternak <vadimp@mellanox.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/device.h>
-#include <linux/dmi.h>
-#include <linux/i2c.h>
-#include <linux/i2c-mux.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/platform_data/i2c-mux-reg.h>
-
-#define MLX_PLAT_DEVICE_NAME "mlxplat"
-
-/* LPC bus IO offsets */
-#define MLXPLAT_CPLD_LPC_I2C_BASE_ADRR 0x2000
-#define MLXPLAT_CPLD_LPC_REG_BASE_ADRR 0x2500
-#define MLXPLAT_CPLD_LPC_IO_RANGE 0x100
-#define MLXPLAT_CPLD_LPC_I2C_CH1_OFF 0xdb
-#define MLXPLAT_CPLD_LPC_I2C_CH2_OFF 0xda
-#define MLXPLAT_CPLD_LPC_PIO_OFFSET 0x10000UL
-#define MLXPLAT_CPLD_LPC_REG1 ((MLXPLAT_CPLD_LPC_REG_BASE_ADRR + \
- MLXPLAT_CPLD_LPC_I2C_CH1_OFF) | \
- MLXPLAT_CPLD_LPC_PIO_OFFSET)
-#define MLXPLAT_CPLD_LPC_REG2 ((MLXPLAT_CPLD_LPC_REG_BASE_ADRR + \
- MLXPLAT_CPLD_LPC_I2C_CH2_OFF) | \
- MLXPLAT_CPLD_LPC_PIO_OFFSET)
-
-/* Start channel numbers */
-#define MLXPLAT_CPLD_CH1 2
-#define MLXPLAT_CPLD_CH2 10
-
-/* Number of LPC attached MUX platform devices */
-#define MLXPLAT_CPLD_LPC_MUX_DEVS 2
-
-/* mlxplat_priv - platform private data
- * @pdev_i2c - i2c controller platform device
- * @pdev_mux - array of mux platform devices
- */
-struct mlxplat_priv {
- struct platform_device *pdev_i2c;
- struct platform_device *pdev_mux[MLXPLAT_CPLD_LPC_MUX_DEVS];
-};
-
-/* Regions for LPC I2C controller and LPC base register space */
-static const struct resource mlxplat_lpc_resources[] = {
- [0] = DEFINE_RES_NAMED(MLXPLAT_CPLD_LPC_I2C_BASE_ADRR,
- MLXPLAT_CPLD_LPC_IO_RANGE,
- "mlxplat_cpld_lpc_i2c_ctrl", IORESOURCE_IO),
- [1] = DEFINE_RES_NAMED(MLXPLAT_CPLD_LPC_REG_BASE_ADRR,
- MLXPLAT_CPLD_LPC_IO_RANGE,
- "mlxplat_cpld_lpc_regs",
- IORESOURCE_IO),
-};
-
-/* Platform default channels */
-static const int mlxplat_default_channels[][8] = {
- {
- MLXPLAT_CPLD_CH1, MLXPLAT_CPLD_CH1 + 1, MLXPLAT_CPLD_CH1 + 2,
- MLXPLAT_CPLD_CH1 + 3, MLXPLAT_CPLD_CH1 + 4, MLXPLAT_CPLD_CH1 +
- 5, MLXPLAT_CPLD_CH1 + 6, MLXPLAT_CPLD_CH1 + 7
- },
- {
- MLXPLAT_CPLD_CH2, MLXPLAT_CPLD_CH2 + 1, MLXPLAT_CPLD_CH2 + 2,
- MLXPLAT_CPLD_CH2 + 3, MLXPLAT_CPLD_CH2 + 4, MLXPLAT_CPLD_CH2 +
- 5, MLXPLAT_CPLD_CH2 + 6, MLXPLAT_CPLD_CH2 + 7
- },
-};
-
-/* Platform channels for MSN21xx system family */
-static const int mlxplat_msn21xx_channels[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
-
-/* Platform mux data */
-static struct i2c_mux_reg_platform_data mlxplat_mux_data[] = {
- {
- .parent = 1,
- .base_nr = MLXPLAT_CPLD_CH1,
- .write_only = 1,
- .reg = (void __iomem *)MLXPLAT_CPLD_LPC_REG1,
- .reg_size = 1,
- .idle_in_use = 1,
- },
- {
- .parent = 1,
- .base_nr = MLXPLAT_CPLD_CH2,
- .write_only = 1,
- .reg = (void __iomem *)MLXPLAT_CPLD_LPC_REG2,
- .reg_size = 1,
- .idle_in_use = 1,
- },
-
-};
-
-static struct platform_device *mlxplat_dev;
-
-static int __init mlxplat_dmi_default_matched(const struct dmi_system_id *dmi)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) {
- mlxplat_mux_data[i].values = mlxplat_default_channels[i];
- mlxplat_mux_data[i].n_values =
- ARRAY_SIZE(mlxplat_default_channels[i]);
- }
-
- return 1;
-};
-
-static int __init mlxplat_dmi_msn21xx_matched(const struct dmi_system_id *dmi)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) {
- mlxplat_mux_data[i].values = mlxplat_msn21xx_channels;
- mlxplat_mux_data[i].n_values =
- ARRAY_SIZE(mlxplat_msn21xx_channels);
- }
-
- return 1;
-};
-
-static struct dmi_system_id mlxplat_dmi_table[] __initdata = {
- {
- .callback = mlxplat_dmi_default_matched,
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
- DMI_MATCH(DMI_PRODUCT_NAME, "MSN24"),
- },
- },
- {
- .callback = mlxplat_dmi_default_matched,
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
- DMI_MATCH(DMI_PRODUCT_NAME, "MSN27"),
- },
- },
- {
- .callback = mlxplat_dmi_default_matched,
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
- DMI_MATCH(DMI_PRODUCT_NAME, "MSB"),
- },
- },
- {
- .callback = mlxplat_dmi_default_matched,
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
- DMI_MATCH(DMI_PRODUCT_NAME, "MSX"),
- },
- },
- {
- .callback = mlxplat_dmi_msn21xx_matched,
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
- DMI_MATCH(DMI_PRODUCT_NAME, "MSN21"),
- },
- },
- { }
-};
-
-static int __init mlxplat_init(void)
-{
- struct mlxplat_priv *priv;
- int i, err;
-
- if (!dmi_check_system(mlxplat_dmi_table))
- return -ENODEV;
-
- mlxplat_dev = platform_device_register_simple(MLX_PLAT_DEVICE_NAME, -1,
- mlxplat_lpc_resources,
- ARRAY_SIZE(mlxplat_lpc_resources));
-
- if (IS_ERR(mlxplat_dev))
- return PTR_ERR(mlxplat_dev);
-
- priv = devm_kzalloc(&mlxplat_dev->dev, sizeof(struct mlxplat_priv),
- GFP_KERNEL);
- if (!priv) {
- err = -ENOMEM;
- goto fail_alloc;
- }
- platform_set_drvdata(mlxplat_dev, priv);
-
- priv->pdev_i2c = platform_device_register_simple("i2c_mlxcpld", -1,
- NULL, 0);
- if (IS_ERR(priv->pdev_i2c)) {
- err = PTR_ERR(priv->pdev_i2c);
- goto fail_alloc;
- };
-
- for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) {
- priv->pdev_mux[i] = platform_device_register_resndata(
- &mlxplat_dev->dev,
- "i2c-mux-reg", i, NULL,
- 0, &mlxplat_mux_data[i],
- sizeof(mlxplat_mux_data[i]));
- if (IS_ERR(priv->pdev_mux[i])) {
- err = PTR_ERR(priv->pdev_mux[i]);
- goto fail_platform_mux_register;
- }
- }
-
- return 0;
-
-fail_platform_mux_register:
- for (i--; i > 0 ; i--)
- platform_device_unregister(priv->pdev_mux[i]);
- platform_device_unregister(priv->pdev_i2c);
-fail_alloc:
- platform_device_unregister(mlxplat_dev);
-
- return err;
-}
-module_init(mlxplat_init);
-
-static void __exit mlxplat_exit(void)
-{
- struct mlxplat_priv *priv = platform_get_drvdata(mlxplat_dev);
- int i;
-
- for (i = ARRAY_SIZE(mlxplat_mux_data) - 1; i >= 0 ; i--)
- platform_device_unregister(priv->pdev_mux[i]);
-
- platform_device_unregister(priv->pdev_i2c);
- platform_device_unregister(mlxplat_dev);
-}
-module_exit(mlxplat_exit);
-
-MODULE_AUTHOR("Vadim Pasternak (vadimp@mellanox.com)");
-MODULE_DESCRIPTION("Mellanox platform driver");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_ALIAS("dmi:*:*Mellanox*:MSN24*:");
-MODULE_ALIAS("dmi:*:*Mellanox*:MSN27*:");
-MODULE_ALIAS("dmi:*:*Mellanox*:MSB*:");
-MODULE_ALIAS("dmi:*:*Mellanox*:MSX*:");
-MODULE_ALIAS("dmi:*:*Mellanox*:MSN21*:");
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 53cace2ec0e2..66ade16c7693 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -252,6 +252,7 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
fix_processor_context();
do_fpu_end();
+ tsc_verify_tsc_adjust(true);
x86_platform.restore_sched_clock_state();
mtrr_bp_restore();
perf_restore_debug_store();
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 9fa27ceeecfd..311acad7dad2 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -87,12 +87,6 @@ static void cpu_bringup(void)
cpu_data(cpu).x86_max_cores = 1;
set_cpu_sibling_map(cpu);
- /*
- * identify_cpu() may have set logical_pkg_id to -1 due
- * to incorrect phys_proc_id. Let's re-comupte it.
- */
- topology_update_package_map(apic->cpu_present_to_apicid(cpu), cpu);
-
xen_setup_cpu_clockevents();
notify_cpu_starting(cpu);
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index f61058617ada..f4126cf997a4 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -15,6 +15,7 @@ config XTENSA
select GENERIC_SCHED_CLOCK
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_API_DEBUG
+ select HAVE_DMA_CONTIGUOUS
select HAVE_EXIT_THREAD
select HAVE_FUNCTION_TRACER
select HAVE_FUTEX_CMPXCHG if !MMU
diff --git a/arch/xtensa/boot/dts/kc705.dts b/arch/xtensa/boot/dts/kc705.dts
index b1f4ee8c9a22..6106bdc097ad 100644
--- a/arch/xtensa/boot/dts/kc705.dts
+++ b/arch/xtensa/boot/dts/kc705.dts
@@ -11,4 +11,20 @@
device_type = "memory";
reg = <0x00000000 0x38000000>;
};
+
+ reserved-memory {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ /* global autoconfigured region for contiguous allocations */
+ linux,cma {
+ compatible = "shared-dma-pool";
+ reusable;
+ size = <0x04000000>;
+ alignment = <0x2000>;
+ alloc-ranges = <0x00000000 0x20000000>;
+ linux,cma-default;
+ };
+ };
};
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 28cf4c5d65ef..b7fbaa56b51a 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -3,6 +3,7 @@ generic-y += bug.h
generic-y += clkdev.h
generic-y += cputime.h
generic-y += div64.h
+generic-y += dma-contiguous.h
generic-y += emergency-restart.h
generic-y += errno.h
generic-y += exec.h
diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
index c31f5d5afc7d..264fb89c444e 100644
--- a/arch/xtensa/kernel/Makefile
+++ b/arch/xtensa/kernel/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += mcount.o
obj-$(CONFIG_SMP) += smp.o mxhead.o
obj-$(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) += perf_event.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
+obj-$(CONFIG_S32C1I_SELFTEST) += s32c1i_selftest.o
AFLAGS_head.o += -mtext-section-literals
AFLAGS_mxhead.o += -mtext-section-literals
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index 6a16decf278f..70e362e6038e 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -15,6 +15,7 @@
* Joe Taylor <joe@tensilica.com, joetylr@yahoo.com>
*/
+#include <linux/dma-contiguous.h>
#include <linux/gfp.h>
#include <linux/highmem.h>
#include <linux/mm.h>
@@ -146,6 +147,8 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
{
unsigned long ret;
unsigned long uncached = 0;
+ unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ struct page *page = NULL;
/* ignore region speicifiers */
@@ -153,11 +156,18 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
flag |= GFP_DMA;
- ret = (unsigned long)__get_free_pages(flag, get_order(size));
- if (ret == 0)
+ if (gfpflags_allow_blocking(flag))
+ page = dma_alloc_from_contiguous(dev, count, get_order(size));
+
+ if (!page)
+ page = alloc_pages(flag, get_order(size));
+
+ if (!page)
return NULL;
+ ret = (unsigned long)page_address(page);
+
/* We currently don't support coherent memory outside KSEG */
BUG_ON(ret < XCHAL_KSEG_CACHED_VADDR ||
@@ -170,16 +180,19 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
return (void *)uncached;
}
-static void xtensa_dma_free(struct device *hwdev, size_t size, void *vaddr,
+static void xtensa_dma_free(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
{
unsigned long addr = (unsigned long)vaddr +
XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR;
+ struct page *page = virt_to_page(addr);
+ unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
BUG_ON(addr < XCHAL_KSEG_CACHED_VADDR ||
addr > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1);
- free_pages(addr, get_order(size));
+ if (!dma_release_from_contiguous(dev, page, count))
+ __free_pages(page, get_order(size));
}
static dma_addr_t xtensa_map_page(struct device *dev, struct page *page,
diff --git a/arch/xtensa/kernel/s32c1i_selftest.c b/arch/xtensa/kernel/s32c1i_selftest.c
new file mode 100644
index 000000000000..07e56e3a9a8b
--- /dev/null
+++ b/arch/xtensa/kernel/s32c1i_selftest.c
@@ -0,0 +1,128 @@
+/*
+ * S32C1I selftest.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2016 Cadence Design Systems Inc.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+#include <asm/traps.h>
+
+#if XCHAL_HAVE_S32C1I
+
+static int __initdata rcw_word, rcw_probe_pc, rcw_exc;
+
+/*
+ * Basic atomic compare-and-swap, that records PC of S32C1I for probing.
+ *
+ * If *v == cmp, set *v = set. Return previous *v.
+ */
+static inline int probed_compare_swap(int *v, int cmp, int set)
+{
+ int tmp;
+
+ __asm__ __volatile__(
+ " movi %1, 1f\n"
+ " s32i %1, %4, 0\n"
+ " wsr %2, scompare1\n"
+ "1: s32c1i %0, %3, 0\n"
+ : "=a" (set), "=&a" (tmp)
+ : "a" (cmp), "a" (v), "a" (&rcw_probe_pc), "0" (set)
+ : "memory"
+ );
+ return set;
+}
+
+/* Handle probed exception */
+
+static void __init do_probed_exception(struct pt_regs *regs,
+ unsigned long exccause)
+{
+ if (regs->pc == rcw_probe_pc) { /* exception on s32c1i ? */
+ regs->pc += 3; /* skip the s32c1i instruction */
+ rcw_exc = exccause;
+ } else {
+ do_unhandled(regs, exccause);
+ }
+}
+
+/* Simple test of S32C1I (soc bringup assist) */
+
+static int __init check_s32c1i(void)
+{
+ int n, cause1, cause2;
+ void *handbus, *handdata, *handaddr; /* temporarily saved handlers */
+
+ rcw_probe_pc = 0;
+ handbus = trap_set_handler(EXCCAUSE_LOAD_STORE_ERROR,
+ do_probed_exception);
+ handdata = trap_set_handler(EXCCAUSE_LOAD_STORE_DATA_ERROR,
+ do_probed_exception);
+ handaddr = trap_set_handler(EXCCAUSE_LOAD_STORE_ADDR_ERROR,
+ do_probed_exception);
+
+ /* First try an S32C1I that does not store: */
+ rcw_exc = 0;
+ rcw_word = 1;
+ n = probed_compare_swap(&rcw_word, 0, 2);
+ cause1 = rcw_exc;
+
+ /* took exception? */
+ if (cause1 != 0) {
+ /* unclean exception? */
+ if (n != 2 || rcw_word != 1)
+ panic("S32C1I exception error");
+ } else if (rcw_word != 1 || n != 1) {
+ panic("S32C1I compare error");
+ }
+
+ /* Then an S32C1I that stores: */
+ rcw_exc = 0;
+ rcw_word = 0x1234567;
+ n = probed_compare_swap(&rcw_word, 0x1234567, 0xabcde);
+ cause2 = rcw_exc;
+
+ if (cause2 != 0) {
+ /* unclean exception? */
+ if (n != 0xabcde || rcw_word != 0x1234567)
+ panic("S32C1I exception error (b)");
+ } else if (rcw_word != 0xabcde || n != 0x1234567) {
+ panic("S32C1I store error");
+ }
+
+ /* Verify consistency of exceptions: */
+ if (cause1 || cause2) {
+ pr_warn("S32C1I took exception %d, %d\n", cause1, cause2);
+ /* If emulation of S32C1I upon bus error gets implemented,
+ * we can get rid of this panic for single core (not SMP)
+ */
+ panic("S32C1I exceptions not currently supported");
+ }
+ if (cause1 != cause2)
+ panic("inconsistent S32C1I exceptions");
+
+ trap_set_handler(EXCCAUSE_LOAD_STORE_ERROR, handbus);
+ trap_set_handler(EXCCAUSE_LOAD_STORE_DATA_ERROR, handdata);
+ trap_set_handler(EXCCAUSE_LOAD_STORE_ADDR_ERROR, handaddr);
+ return 0;
+}
+
+#else /* XCHAL_HAVE_S32C1I */
+
+/* This condition should not occur with a commercially deployed processor.
+ * Display reminder for early engr test or demo chips / FPGA bitstreams
+ */
+static int __init check_s32c1i(void)
+{
+ pr_warn("Processor configuration lacks atomic compare-and-swap support!\n");
+ return 0;
+}
+
+#endif /* XCHAL_HAVE_S32C1I */
+
+early_initcall(check_s32c1i);
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 88a044af7504..848e8568fb3c 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -31,10 +31,6 @@
# include <linux/console.h>
#endif
-#ifdef CONFIG_RTC
-# include <linux/timex.h>
-#endif
-
#ifdef CONFIG_PROC_FS
# include <linux/seq_file.h>
#endif
@@ -48,24 +44,22 @@
#include <asm/page.h>
#include <asm/setup.h>
#include <asm/param.h>
-#include <asm/traps.h>
#include <asm/smp.h>
#include <asm/sysmem.h>
#include <platform/hardware.h>
#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE)
-struct screen_info screen_info = { 0, 24, 0, 0, 0, 80, 0, 0, 0, 24, 1, 16};
-#endif
-
-#ifdef CONFIG_BLK_DEV_FD
-extern struct fd_ops no_fd_ops;
-struct fd_ops *fd_ops;
+struct screen_info screen_info = {
+ .orig_x = 0,
+ .orig_y = 24,
+ .orig_video_cols = 80,
+ .orig_video_lines = 24,
+ .orig_video_isVGA = 1,
+ .orig_video_points = 16,
+};
#endif
-extern struct rtc_ops no_rtc_ops;
-struct rtc_ops *rtc_ops;
-
#ifdef CONFIG_BLK_DEV_INITRD
extern unsigned long initrd_start;
extern unsigned long initrd_end;
@@ -77,7 +71,6 @@ extern int initrd_below_start_ok;
void *dtb_start = __dtb_start;
#endif
-unsigned char aux_device_present;
extern unsigned long loops_per_jiffy;
/* Command line specified as configuration option. */
@@ -317,120 +310,6 @@ extern char _SecondaryResetVector_text_start;
extern char _SecondaryResetVector_text_end;
#endif
-
-#ifdef CONFIG_S32C1I_SELFTEST
-#if XCHAL_HAVE_S32C1I
-
-static int __initdata rcw_word, rcw_probe_pc, rcw_exc;
-
-/*
- * Basic atomic compare-and-swap, that records PC of S32C1I for probing.
- *
- * If *v == cmp, set *v = set. Return previous *v.
- */
-static inline int probed_compare_swap(int *v, int cmp, int set)
-{
- int tmp;
-
- __asm__ __volatile__(
- " movi %1, 1f\n"
- " s32i %1, %4, 0\n"
- " wsr %2, scompare1\n"
- "1: s32c1i %0, %3, 0\n"
- : "=a" (set), "=&a" (tmp)
- : "a" (cmp), "a" (v), "a" (&rcw_probe_pc), "0" (set)
- : "memory"
- );
- return set;
-}
-
-/* Handle probed exception */
-
-static void __init do_probed_exception(struct pt_regs *regs,
- unsigned long exccause)
-{
- if (regs->pc == rcw_probe_pc) { /* exception on s32c1i ? */
- regs->pc += 3; /* skip the s32c1i instruction */
- rcw_exc = exccause;
- } else {
- do_unhandled(regs, exccause);
- }
-}
-
-/* Simple test of S32C1I (soc bringup assist) */
-
-static int __init check_s32c1i(void)
-{
- int n, cause1, cause2;
- void *handbus, *handdata, *handaddr; /* temporarily saved handlers */
-
- rcw_probe_pc = 0;
- handbus = trap_set_handler(EXCCAUSE_LOAD_STORE_ERROR,
- do_probed_exception);
- handdata = trap_set_handler(EXCCAUSE_LOAD_STORE_DATA_ERROR,
- do_probed_exception);
- handaddr = trap_set_handler(EXCCAUSE_LOAD_STORE_ADDR_ERROR,
- do_probed_exception);
-
- /* First try an S32C1I that does not store: */
- rcw_exc = 0;
- rcw_word = 1;
- n = probed_compare_swap(&rcw_word, 0, 2);
- cause1 = rcw_exc;
-
- /* took exception? */
- if (cause1 != 0) {
- /* unclean exception? */
- if (n != 2 || rcw_word != 1)
- panic("S32C1I exception error");
- } else if (rcw_word != 1 || n != 1) {
- panic("S32C1I compare error");
- }
-
- /* Then an S32C1I that stores: */
- rcw_exc = 0;
- rcw_word = 0x1234567;
- n = probed_compare_swap(&rcw_word, 0x1234567, 0xabcde);
- cause2 = rcw_exc;
-
- if (cause2 != 0) {
- /* unclean exception? */
- if (n != 0xabcde || rcw_word != 0x1234567)
- panic("S32C1I exception error (b)");
- } else if (rcw_word != 0xabcde || n != 0x1234567) {
- panic("S32C1I store error");
- }
-
- /* Verify consistency of exceptions: */
- if (cause1 || cause2) {
- pr_warn("S32C1I took exception %d, %d\n", cause1, cause2);
- /* If emulation of S32C1I upon bus error gets implemented,
- we can get rid of this panic for single core (not SMP) */
- panic("S32C1I exceptions not currently supported");
- }
- if (cause1 != cause2)
- panic("inconsistent S32C1I exceptions");
-
- trap_set_handler(EXCCAUSE_LOAD_STORE_ERROR, handbus);
- trap_set_handler(EXCCAUSE_LOAD_STORE_DATA_ERROR, handdata);
- trap_set_handler(EXCCAUSE_LOAD_STORE_ADDR_ERROR, handaddr);
- return 0;
-}
-
-#else /* XCHAL_HAVE_S32C1I */
-
-/* This condition should not occur with a commercially deployed processor.
- Display reminder for early engr test or demo chips / FPGA bitstreams */
-static int __init check_s32c1i(void)
-{
- pr_warn("Processor configuration lacks atomic compare-and-swap support!\n");
- return 0;
-}
-
-#endif /* XCHAL_HAVE_S32C1I */
-early_initcall(check_s32c1i);
-#endif /* CONFIG_S32C1I_SELFTEST */
-
static inline int mem_reserve(unsigned long start, unsigned long end)
{
return memblock_reserve(start, end - start);
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index 80e4cfb2471a..720fe4e8b497 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -26,6 +26,7 @@
#include <linux/nodemask.h>
#include <linux/mm.h>
#include <linux/of_fdt.h>
+#include <linux/dma-contiguous.h>
#include <asm/bootparam.h>
#include <asm/page.h>
@@ -60,6 +61,7 @@ void __init bootmem_init(void)
max_low_pfn = min(max_pfn, MAX_LOW_PFN);
memblock_set_current_limit(PFN_PHYS(max_low_pfn));
+ dma_contiguous_reserve(PFN_PHYS(max_low_pfn));
memblock_dump_all();
}