summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/boot/compressed/Makefile8
-rw-r--r--arch/x86/boot/compressed/eboot.c557
-rw-r--r--arch/x86/boot/compressed/eboot.h12
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c73
-rw-r--r--arch/x86/crypto/aegis128-aesni-asm.S1
-rw-r--r--arch/x86/crypto/aegis128-aesni-glue.c12
-rw-r--r--arch/x86/crypto/aegis128l-aesni-asm.S1
-rw-r--r--arch/x86/crypto/aegis128l-aesni-glue.c12
-rw-r--r--arch/x86/crypto/aegis256-aesni-asm.S1
-rw-r--r--arch/x86/crypto/aegis256-aesni-glue.c12
-rw-r--r--arch/x86/crypto/morus1280-avx2-asm.S1
-rw-r--r--arch/x86/crypto/morus1280-avx2-glue.c10
-rw-r--r--arch/x86/crypto/morus1280-sse2-asm.S1
-rw-r--r--arch/x86/crypto/morus1280-sse2-glue.c10
-rw-r--r--arch/x86/crypto/morus640-sse2-asm.S1
-rw-r--r--arch/x86/crypto/morus640-sse2-glue.c10
-rw-r--r--arch/x86/entry/entry_64.S18
-rw-r--r--arch/x86/events/amd/ibs.c6
-rw-r--r--arch/x86/events/intel/core.c34
-rw-r--r--arch/x86/events/intel/ds.c82
-rw-r--r--arch/x86/events/intel/lbr.c56
-rw-r--r--arch/x86/events/intel/uncore.h2
-rw-r--r--arch/x86/events/intel/uncore_snbep.c10
-rw-r--r--arch/x86/events/perf_event.h6
-rw-r--r--arch/x86/hyperv/hv_apic.c5
-rw-r--r--arch/x86/hyperv/hv_init.c5
-rw-r--r--arch/x86/include/asm/apm.h6
-rw-r--r--arch/x86/include/asm/asm.h59
-rw-r--r--arch/x86/include/asm/atomic.h32
-rw-r--r--arch/x86/include/asm/atomic64_32.h61
-rw-r--r--arch/x86/include/asm/atomic64_64.h50
-rw-r--r--arch/x86/include/asm/cmpxchg.h2
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h4
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h7
-rw-r--r--arch/x86/include/asm/intel_ds.h3
-rw-r--r--arch/x86/include/asm/irqflags.h2
-rw-r--r--arch/x86/include/asm/kprobes.h5
-rw-r--r--arch/x86/include/asm/mshyperv.h5
-rw-r--r--arch/x86/include/asm/qspinlock_paravirt.h2
-rw-r--r--arch/x86/include/asm/refcount.h1
-rw-r--r--arch/x86/include/asm/uaccess_64.h7
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/apic/apic.c5
-rw-r--r--arch/x86/kernel/apic/vector.c19
-rw-r--r--arch/x86/kernel/apm_32.c5
-rw-r--r--arch/x86/kernel/cpu/amd.c4
-rw-r--r--arch/x86/kernel/cpu/bugs.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c205
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c3
-rw-r--r--arch/x86/kernel/hw_breakpoint.c131
-rw-r--r--arch/x86/kernel/irqflags.S26
-rw-r--r--arch/x86/kernel/kprobes/common.h10
-rw-r--r--arch/x86/kernel/kprobes/core.c124
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c49
-rw-r--r--arch/x86/kernel/kprobes/opt.c1
-rw-r--r--arch/x86/kernel/kvm.c4
-rw-r--r--arch/x86/kernel/kvmclock.c12
-rw-r--r--arch/x86/kernel/smpboot.c5
-rw-r--r--arch/x86/kvm/Kconfig2
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/mmu.c2
-rw-r--r--arch/x86/kvm/vmx.c83
-rw-r--r--arch/x86/kvm/x86.c4
-rw-r--r--arch/x86/net/bpf_jit_comp32.c8
-rw-r--r--arch/x86/platform/efi/efi_64.c103
-rw-r--r--arch/x86/platform/efi/quirks.c14
-rw-r--r--arch/x86/purgatory/Makefile2
-rw-r--r--arch/x86/um/mem_32.c2
-rw-r--r--arch/x86/xen/enlighten_pv.c25
-rw-r--r--arch/x86/xen/irq.c4
71 files changed, 980 insertions, 1077 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f1dbb4ee19d7..887d3a7bb646 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -63,7 +63,7 @@ config X86
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_REFCOUNT
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
- select ARCH_HAS_UACCESS_MCSAFE if X86_64
+ select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index fa42f895fdde..169c2feda14a 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -106,9 +106,13 @@ define cmd_check_data_rel
done
endef
+# We need to run two commands under "if_changed", so merge them into a
+# single invocation.
+quiet_cmd_check-and-link-vmlinux = LD $@
+ cmd_check-and-link-vmlinux = $(cmd_check_data_rel); $(cmd_ld)
+
$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
- $(call if_changed,check_data_rel)
- $(call if_changed,ld)
+ $(call if_changed,check-and-link-vmlinux)
OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
$(obj)/vmlinux.bin: vmlinux FORCE
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index e57665b4ba1c..1458b1700fc7 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -34,74 +34,13 @@ static void setup_boot_services##bits(struct efi_config *c) \
\
table = (typeof(table))sys_table; \
\
- c->runtime_services = table->runtime; \
- c->boot_services = table->boottime; \
- c->text_output = table->con_out; \
+ c->runtime_services = table->runtime; \
+ c->boot_services = table->boottime; \
+ c->text_output = table->con_out; \
}
BOOT_SERVICES(32);
BOOT_SERVICES(64);
-static inline efi_status_t __open_volume32(void *__image, void **__fh)
-{
- efi_file_io_interface_t *io;
- efi_loaded_image_32_t *image = __image;
- efi_file_handle_32_t *fh;
- efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
- efi_status_t status;
- void *handle = (void *)(unsigned long)image->device_handle;
- unsigned long func;
-
- status = efi_call_early(handle_protocol, handle,
- &fs_proto, (void **)&io);
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to handle fs_proto\n");
- return status;
- }
-
- func = (unsigned long)io->open_volume;
- status = efi_early->call(func, io, &fh);
- if (status != EFI_SUCCESS)
- efi_printk(sys_table, "Failed to open volume\n");
-
- *__fh = fh;
- return status;
-}
-
-static inline efi_status_t __open_volume64(void *__image, void **__fh)
-{
- efi_file_io_interface_t *io;
- efi_loaded_image_64_t *image = __image;
- efi_file_handle_64_t *fh;
- efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
- efi_status_t status;
- void *handle = (void *)(unsigned long)image->device_handle;
- unsigned long func;
-
- status = efi_call_early(handle_protocol, handle,
- &fs_proto, (void **)&io);
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to handle fs_proto\n");
- return status;
- }
-
- func = (unsigned long)io->open_volume;
- status = efi_early->call(func, io, &fh);
- if (status != EFI_SUCCESS)
- efi_printk(sys_table, "Failed to open volume\n");
-
- *__fh = fh;
- return status;
-}
-
-efi_status_t
-efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh)
-{
- if (efi_early->is64)
- return __open_volume64(__image, __fh);
-
- return __open_volume32(__image, __fh);
-}
-
void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str)
{
efi_call_proto(efi_simple_text_output_protocol, output_string,
@@ -109,23 +48,17 @@ void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str)
}
static efi_status_t
-__setup_efi_pci(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
+preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
{
struct pci_setup_rom *rom = NULL;
efi_status_t status;
unsigned long size;
- uint64_t attributes, romsize;
+ uint64_t romsize;
void *romimage;
- status = efi_call_proto(efi_pci_io_protocol, attributes, pci,
- EfiPciIoAttributeOperationGet, 0ULL,
- &attributes);
- if (status != EFI_SUCCESS)
- return status;
-
/*
- * Some firmware images contain EFI function pointers at the place where the
- * romimage and romsize fields are supposed to be. Typically the EFI
+ * Some firmware images contain EFI function pointers at the place where
+ * the romimage and romsize fields are supposed to be. Typically the EFI
* code is mapped at high addresses, translating to an unrealistically
* large romsize. The UEFI spec limits the size of option ROMs to 16
* MiB so we reject any ROMs over 16 MiB in size to catch this.
@@ -140,16 +73,16 @@ __setup_efi_pci(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to alloc mem for rom\n");
+ efi_printk(sys_table, "Failed to allocate memory for 'rom'\n");
return status;
}
memset(rom, 0, sizeof(*rom));
- rom->data.type = SETUP_PCI;
- rom->data.len = size - sizeof(struct setup_data);
- rom->data.next = 0;
- rom->pcilen = pci->romsize;
+ rom->data.type = SETUP_PCI;
+ rom->data.len = size - sizeof(struct setup_data);
+ rom->data.next = 0;
+ rom->pcilen = pci->romsize;
*__rom = rom;
status = efi_call_proto(efi_pci_io_protocol, pci.read, pci,
@@ -185,96 +118,6 @@ free_struct:
return status;
}
-static void
-setup_efi_pci32(struct boot_params *params, void **pci_handle,
- unsigned long size)
-{
- efi_pci_io_protocol_t *pci = NULL;
- efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
- u32 *handles = (u32 *)(unsigned long)pci_handle;
- efi_status_t status;
- unsigned long nr_pci;
- struct setup_data *data;
- int i;
-
- data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
-
- while (data && data->next)
- data = (struct setup_data *)(unsigned long)data->next;
-
- nr_pci = size / sizeof(u32);
- for (i = 0; i < nr_pci; i++) {
- struct pci_setup_rom *rom = NULL;
- u32 h = handles[i];
-
- status = efi_call_early(handle_protocol, h,
- &pci_proto, (void **)&pci);
-
- if (status != EFI_SUCCESS)
- continue;
-
- if (!pci)
- continue;
-
- status = __setup_efi_pci(pci, &rom);
- if (status != EFI_SUCCESS)
- continue;
-
- if (data)
- data->next = (unsigned long)rom;
- else
- params->hdr.setup_data = (unsigned long)rom;
-
- data = (struct setup_data *)rom;
-
- }
-}
-
-static void
-setup_efi_pci64(struct boot_params *params, void **pci_handle,
- unsigned long size)
-{
- efi_pci_io_protocol_t *pci = NULL;
- efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
- u64 *handles = (u64 *)(unsigned long)pci_handle;
- efi_status_t status;
- unsigned long nr_pci;
- struct setup_data *data;
- int i;
-
- data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
-
- while (data && data->next)
- data = (struct setup_data *)(unsigned long)data->next;
-
- nr_pci = size / sizeof(u64);
- for (i = 0; i < nr_pci; i++) {
- struct pci_setup_rom *rom = NULL;
- u64 h = handles[i];
-
- status = efi_call_early(handle_protocol, h,
- &pci_proto, (void **)&pci);
-
- if (status != EFI_SUCCESS)
- continue;
-
- if (!pci)
- continue;
-
- status = __setup_efi_pci(pci, &rom);
- if (status != EFI_SUCCESS)
- continue;
-
- if (data)
- data->next = (unsigned long)rom;
- else
- params->hdr.setup_data = (unsigned long)rom;
-
- data = (struct setup_data *)rom;
-
- }
-}
-
/*
* There's no way to return an informative status from this function,
* because any analysis (and printing of error messages) needs to be
@@ -290,6 +133,9 @@ static void setup_efi_pci(struct boot_params *params)
void **pci_handle = NULL;
efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
unsigned long size = 0;
+ unsigned long nr_pci;
+ struct setup_data *data;
+ int i;
status = efi_call_early(locate_handle,
EFI_LOCATE_BY_PROTOCOL,
@@ -301,7 +147,7 @@ static void setup_efi_pci(struct boot_params *params)
size, (void **)&pci_handle);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to alloc mem for pci_handle\n");
+ efi_printk(sys_table, "Failed to allocate memory for 'pci_handle'\n");
return;
}
@@ -313,10 +159,34 @@ static void setup_efi_pci(struct boot_params *params)
if (status != EFI_SUCCESS)
goto free_handle;
- if (efi_early->is64)
- setup_efi_pci64(params, pci_handle, size);
- else
- setup_efi_pci32(params, pci_handle, size);
+ data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
+
+ while (data && data->next)
+ data = (struct setup_data *)(unsigned long)data->next;
+
+ nr_pci = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32));
+ for (i = 0; i < nr_pci; i++) {
+ efi_pci_io_protocol_t *pci = NULL;
+ struct pci_setup_rom *rom;
+
+ status = efi_call_early(handle_protocol,
+ efi_is_64bit() ? ((u64 *)pci_handle)[i]
+ : ((u32 *)pci_handle)[i],
+ &pci_proto, (void **)&pci);
+ if (status != EFI_SUCCESS || !pci)
+ continue;
+
+ status = preserve_pci_rom_image(pci, &rom);
+ if (status != EFI_SUCCESS)
+ continue;
+
+ if (data)
+ data->next = (unsigned long)rom;
+ else
+ params->hdr.setup_data = (unsigned long)rom;
+
+ data = (struct setup_data *)rom;
+ }
free_handle:
efi_call_early(free_pool, pci_handle);
@@ -347,8 +217,7 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
size + sizeof(struct setup_data), &new);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table,
- "Failed to alloc mem for properties\n");
+ efi_printk(sys_table, "Failed to allocate memory for 'properties'\n");
return;
}
@@ -364,9 +233,9 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
new->next = 0;
data = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
- if (!data)
+ if (!data) {
boot_params->hdr.setup_data = (unsigned long)new;
- else {
+ } else {
while (data->next)
data = (struct setup_data *)(unsigned long)data->next;
data->next = (unsigned long)new;
@@ -386,81 +255,55 @@ static void setup_quirks(struct boot_params *boot_params)
}
}
+/*
+ * See if we have Universal Graphics Adapter (UGA) protocol
+ */
static efi_status_t
-setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height)
+setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size)
{
- struct efi_uga_draw_protocol *uga = NULL, *first_uga;
- efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
+ efi_status_t status;
+ u32 width, height;
+ void **uga_handle = NULL;
+ efi_uga_draw_protocol_t *uga = NULL, *first_uga;
unsigned long nr_ugas;
- u32 *handles = (u32 *)uga_handle;
- efi_status_t status = EFI_INVALID_PARAMETER;
int i;
- first_uga = NULL;
- nr_ugas = size / sizeof(u32);
- for (i = 0; i < nr_ugas; i++) {
- efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID;
- u32 w, h, depth, refresh;
- void *pciio;
- u32 handle = handles[i];
-
- status = efi_call_early(handle_protocol, handle,
- &uga_proto, (void **)&uga);
- if (status != EFI_SUCCESS)
- continue;
-
- efi_call_early(handle_protocol, handle, &pciio_proto, &pciio);
-
- status = efi_early->call((unsigned long)uga->get_mode, uga,
- &w, &h, &depth, &refresh);
- if (status == EFI_SUCCESS && (!first_uga || pciio)) {
- *width = w;
- *height = h;
-
- /*
- * Once we've found a UGA supporting PCIIO,
- * don't bother looking any further.
- */
- if (pciio)
- break;
-
- first_uga = uga;
- }
- }
+ status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
+ size, (void **)&uga_handle);
+ if (status != EFI_SUCCESS)
+ return status;
- return status;
-}
+ status = efi_call_early(locate_handle,
+ EFI_LOCATE_BY_PROTOCOL,
+ uga_proto, NULL, &size, uga_handle);
+ if (status != EFI_SUCCESS)
+ goto free_handle;
-static efi_status_t
-setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height)
-{
- struct efi_uga_draw_protocol *uga = NULL, *first_uga;
- efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
- unsigned long nr_ugas;
- u64 *handles = (u64 *)uga_handle;
- efi_status_t status = EFI_INVALID_PARAMETER;
- int i;
+ height = 0;
+ width = 0;
first_uga = NULL;
- nr_ugas = size / sizeof(u64);
+ nr_ugas = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32));
for (i = 0; i < nr_ugas; i++) {
efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID;
u32 w, h, depth, refresh;
void *pciio;
- u64 handle = handles[i];
+ unsigned long handle = efi_is_64bit() ? ((u64 *)uga_handle)[i]
+ : ((u32 *)uga_handle)[i];
status = efi_call_early(handle_protocol, handle,
- &uga_proto, (void **)&uga);
+ uga_proto, (void **)&uga);
if (status != EFI_SUCCESS)
continue;
+ pciio = NULL;
efi_call_early(handle_protocol, handle, &pciio_proto, &pciio);
- status = efi_early->call((unsigned long)uga->get_mode, uga,
- &w, &h, &depth, &refresh);
+ status = efi_call_proto(efi_uga_draw_protocol, get_mode, uga,
+ &w, &h, &depth, &refresh);
if (status == EFI_SUCCESS && (!first_uga || pciio)) {
- *width = w;
- *height = h;
+ width = w;
+ height = h;
/*
* Once we've found a UGA supporting PCIIO,
@@ -473,59 +316,28 @@ setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height)
}
}
- return status;
-}
-
-/*
- * See if we have Universal Graphics Adapter (UGA) protocol
- */
-static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto,
- unsigned long size)
-{
- efi_status_t status;
- u32 width, height;
- void **uga_handle = NULL;
-
- status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
- size, (void **)&uga_handle);
- if (status != EFI_SUCCESS)
- return status;
-
- status = efi_call_early(locate_handle,
- EFI_LOCATE_BY_PROTOCOL,
- uga_proto, NULL, &size, uga_handle);
- if (status != EFI_SUCCESS)
- goto free_handle;
-
- height = 0;
- width = 0;
-
- if (efi_early->is64)
- status = setup_uga64(uga_handle, size, &width, &height);
- else
- status = setup_uga32(uga_handle, size, &width, &height);
-
if (!width && !height)
goto free_handle;
/* EFI framebuffer */
- si->orig_video_isVGA = VIDEO_TYPE_EFI;
+ si->orig_video_isVGA = VIDEO_TYPE_EFI;
- si->lfb_depth = 32;
- si->lfb_width = width;
- si->lfb_height = height;
+ si->lfb_depth = 32;
+ si->lfb_width = width;
+ si->lfb_height = height;
- si->red_size = 8;
- si->red_pos = 16;
- si->green_size = 8;
- si->green_pos = 8;
- si->blue_size = 8;
- si->blue_pos = 0;
- si->rsvd_size = 8;
- si->rsvd_pos = 24;
+ si->red_size = 8;
+ si->red_pos = 16;
+ si->green_size = 8;
+ si->green_pos = 8;
+ si->blue_size = 8;
+ si->blue_pos = 0;
+ si->rsvd_size = 8;
+ si->rsvd_pos = 24;
free_handle:
efi_call_early(free_pool, uga_handle);
+
return status;
}
@@ -592,7 +404,7 @@ struct boot_params *make_boot_params(struct efi_config *c)
if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
return NULL;
- if (efi_early->is64)
+ if (efi_is_64bit())
setup_boot_services64(efi_early);
else
setup_boot_services32(efi_early);
@@ -607,7 +419,7 @@ struct boot_params *make_boot_params(struct efi_config *c)
status = efi_low_alloc(sys_table, 0x4000, 1,
(unsigned long *)&boot_params);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to alloc lowmem for boot params\n");
+ efi_printk(sys_table, "Failed to allocate lowmem for boot params\n");
return NULL;
}
@@ -623,9 +435,9 @@ struct boot_params *make_boot_params(struct efi_config *c)
* Fill out some of the header fields ourselves because the
* EFI firmware loader doesn't load the first sector.
*/
- hdr->root_flags = 1;
- hdr->vid_mode = 0xffff;
- hdr->boot_flag = 0xAA55;
+ hdr->root_flags = 1;
+ hdr->vid_mode = 0xffff;
+ hdr->boot_flag = 0xAA55;
hdr->type_of_loader = 0x21;
@@ -633,6 +445,7 @@ struct boot_params *make_boot_params(struct efi_config *c)
cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size);
if (!cmdline_ptr)
goto fail;
+
hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
/* Fill in upper bits of command line address, NOP on 32 bit */
boot_params->ext_cmd_line_ptr = (u64)(unsigned long)cmdline_ptr >> 32;
@@ -669,10 +482,12 @@ struct boot_params *make_boot_params(struct efi_config *c)
boot_params->ext_ramdisk_size = (u64)ramdisk_size >> 32;
return boot_params;
+
fail2:
efi_free(sys_table, options_size, hdr->cmd_line_ptr);
fail:
efi_free(sys_table, 0x4000, (unsigned long)boot_params);
+
return NULL;
}
@@ -684,7 +499,7 @@ static void add_e820ext(struct boot_params *params,
unsigned long size;
e820ext->type = SETUP_E820_EXT;
- e820ext->len = nr_entries * sizeof(struct boot_e820_entry);
+ e820ext->len = nr_entries * sizeof(struct boot_e820_entry);
e820ext->next = 0;
data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
@@ -698,8 +513,8 @@ static void add_e820ext(struct boot_params *params,
params->hdr.setup_data = (unsigned long)e820ext;
}
-static efi_status_t setup_e820(struct boot_params *params,
- struct setup_data *e820ext, u32 e820ext_size)
+static efi_status_t
+setup_e820(struct boot_params *params, struct setup_data *e820ext, u32 e820ext_size)
{
struct boot_e820_entry *entry = params->e820_table;
struct efi_info *efi = &params->efi_info;
@@ -820,11 +635,10 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext,
}
struct exit_boot_struct {
- struct boot_params *boot_params;
- struct efi_info *efi;
- struct setup_data *e820ext;
- __u32 e820ext_size;
- bool is64;
+ struct boot_params *boot_params;
+ struct efi_info *efi;
+ struct setup_data *e820ext;
+ __u32 e820ext_size;
};
static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
@@ -851,25 +665,25 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
first = false;
}
- signature = p->is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE;
+ signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE
+ : EFI32_LOADER_SIGNATURE;
memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32));
- p->efi->efi_systab = (unsigned long)sys_table_arg;
- p->efi->efi_memdesc_size = *map->desc_size;
- p->efi->efi_memdesc_version = *map->desc_ver;
- p->efi->efi_memmap = (unsigned long)*map->map;
- p->efi->efi_memmap_size = *map->map_size;
+ p->efi->efi_systab = (unsigned long)sys_table_arg;
+ p->efi->efi_memdesc_size = *map->desc_size;
+ p->efi->efi_memdesc_version = *map->desc_ver;
+ p->efi->efi_memmap = (unsigned long)*map->map;
+ p->efi->efi_memmap_size = *map->map_size;
#ifdef CONFIG_X86_64
- p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32;
- p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32;
+ p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32;
+ p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32;
#endif
return EFI_SUCCESS;
}
-static efi_status_t exit_boot(struct boot_params *boot_params,
- void *handle, bool is64)
+static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
{
unsigned long map_sz, key, desc_size, buff_size;
efi_memory_desc_t *mem_map;
@@ -880,17 +694,16 @@ static efi_status_t exit_boot(struct boot_params *boot_params,
struct efi_boot_memmap map;
struct exit_boot_struct priv;
- map.map = &mem_map;
- map.map_size = &map_sz;
- map.desc_size = &desc_size;
- map.desc_ver = &desc_version;
- map.key_ptr = &key;
- map.buff_size = &buff_size;
- priv.boot_params = boot_params;
- priv.efi = &boot_params->efi_info;
- priv.e820ext = NULL;
- priv.e820ext_size = 0;
- priv.is64 = is64;
+ map.map = &mem_map;
+ map.map_size = &map_sz;
+ map.desc_size = &desc_size;
+ map.desc_ver = &desc_version;
+ map.key_ptr = &key;
+ map.buff_size = &buff_size;
+ priv.boot_params = boot_params;
+ priv.efi = &boot_params->efi_info;
+ priv.e820ext = NULL;
+ priv.e820ext_size = 0;
/* Might as well exit boot services now */
status = efi_exit_boot_services(sys_table, handle, &map, &priv,
@@ -898,10 +711,11 @@ static efi_status_t exit_boot(struct boot_params *boot_params,
if (status != EFI_SUCCESS)
return status;
- e820ext = priv.e820ext;
- e820ext_size = priv.e820ext_size;
+ e820ext = priv.e820ext;
+ e820ext_size = priv.e820ext_size;
+
/* Historic? */
- boot_params->alt_mem_k = 32 * 1024;
+ boot_params->alt_mem_k = 32 * 1024;
status = setup_e820(boot_params, e820ext, e820ext_size);
if (status != EFI_SUCCESS)
@@ -914,8 +728,8 @@ static efi_status_t exit_boot(struct boot_params *boot_params,
* On success we return a pointer to a boot_params structure, and NULL
* on failure.
*/
-struct boot_params *efi_main(struct efi_config *c,
- struct boot_params *boot_params)
+struct boot_params *
+efi_main(struct efi_config *c, struct boot_params *boot_params)
{
struct desc_ptr *gdt = NULL;
efi_loaded_image_t *image;
@@ -924,13 +738,11 @@ struct boot_params *efi_main(struct efi_config *c,
struct desc_struct *desc;
void *handle;
efi_system_table_t *_table;
- bool is64;
efi_early = c;
_table = (efi_system_table_t *)(unsigned long)efi_early->table;
handle = (void *)(unsigned long)efi_early->image_handle;
- is64 = efi_early->is64;
sys_table = _table;
@@ -938,7 +750,7 @@ struct boot_params *efi_main(struct efi_config *c,
if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
goto fail;
- if (is64)
+ if (efi_is_64bit())
setup_boot_services64(efi_early);
else
setup_boot_services32(efi_early);
@@ -963,7 +775,7 @@ struct boot_params *efi_main(struct efi_config *c,
status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
sizeof(*gdt), (void **)&gdt);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to alloc mem for gdt structure\n");
+ efi_printk(sys_table, "Failed to allocate memory for 'gdt' structure\n");
goto fail;
}
@@ -971,7 +783,7 @@ struct boot_params *efi_main(struct efi_config *c,
status = efi_low_alloc(sys_table, gdt->size, 8,
(unsigned long *)&gdt->address);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to alloc mem for gdt\n");
+ efi_printk(sys_table, "Failed to allocate memory for 'gdt'\n");
goto fail;
}
@@ -994,7 +806,7 @@ struct boot_params *efi_main(struct efi_config *c,
hdr->code32_start = bzimage_addr;
}
- status = exit_boot(boot_params, handle, is64);
+ status = exit_boot(boot_params, handle);
if (status != EFI_SUCCESS) {
efi_printk(sys_table, "exit_boot() failed!\n");
goto fail;
@@ -1008,19 +820,20 @@ struct boot_params *efi_main(struct efi_config *c,
if (IS_ENABLED(CONFIG_X86_64)) {
/* __KERNEL32_CS */
- desc->limit0 = 0xffff;
- desc->base0 = 0x0000;
- desc->base1 = 0x0000;
- desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
- desc->s = DESC_TYPE_CODE_DATA;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit1 = 0xf;
- desc->avl = 0;
- desc->l = 0;
- desc->d = SEG_OP_SIZE_32BIT;
- desc->g = SEG_GRANULARITY_4KB;
- desc->base2 = 0x00;
+ desc->limit0 = 0xffff;
+ desc->base0 = 0x0000;
+ desc->base1 = 0x0000;
+ desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
+ desc->s = DESC_TYPE_CODE_DATA;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit1 = 0xf;
+ desc->avl = 0;
+ desc->l = 0;
+ desc->d = SEG_OP_SIZE_32BIT;
+ desc->g = SEG_GRANULARITY_4KB;
+ desc->base2 = 0x00;
+
desc++;
} else {
/* Second entry is unused on 32-bit */
@@ -1028,15 +841,16 @@ struct boot_params *efi_main(struct efi_config *c,
}
/* __KERNEL_CS */
- desc->limit0 = 0xffff;
- desc->base0 = 0x0000;
- desc->base1 = 0x0000;
- desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
- desc->s = DESC_TYPE_CODE_DATA;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit1 = 0xf;
- desc->avl = 0;
+ desc->limit0 = 0xffff;
+ desc->base0 = 0x0000;
+ desc->base1 = 0x0000;
+ desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
+ desc->s = DESC_TYPE_CODE_DATA;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit1 = 0xf;
+ desc->avl = 0;
+
if (IS_ENABLED(CONFIG_X86_64)) {
desc->l = 1;
desc->d = 0;
@@ -1044,41 +858,41 @@ struct boot_params *efi_main(struct efi_config *c,
desc->l = 0;
desc->d = SEG_OP_SIZE_32BIT;
}
- desc->g = SEG_GRANULARITY_4KB;
- desc->base2 = 0x00;
+ desc->g = SEG_GRANULARITY_4KB;
+ desc->base2 = 0x00;
desc++;
/* __KERNEL_DS */
- desc->limit0 = 0xffff;
- desc->base0 = 0x0000;
- desc->base1 = 0x0000;
- desc->type = SEG_TYPE_DATA | SEG_TYPE_READ_WRITE;
- desc->s = DESC_TYPE_CODE_DATA;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit1 = 0xf;
- desc->avl = 0;
- desc->l = 0;
- desc->d = SEG_OP_SIZE_32BIT;
- desc->g = SEG_GRANULARITY_4KB;
- desc->base2 = 0x00;
+ desc->limit0 = 0xffff;
+ desc->base0 = 0x0000;
+ desc->base1 = 0x0000;
+ desc->type = SEG_TYPE_DATA | SEG_TYPE_READ_WRITE;
+ desc->s = DESC_TYPE_CODE_DATA;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit1 = 0xf;
+ desc->avl = 0;
+ desc->l = 0;
+ desc->d = SEG_OP_SIZE_32BIT;
+ desc->g = SEG_GRANULARITY_4KB;
+ desc->base2 = 0x00;
desc++;
if (IS_ENABLED(CONFIG_X86_64)) {
/* Task segment value */
- desc->limit0 = 0x0000;
- desc->base0 = 0x0000;
- desc->base1 = 0x0000;
- desc->type = SEG_TYPE_TSS;
- desc->s = 0;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit1 = 0x0;
- desc->avl = 0;
- desc->l = 0;
- desc->d = 0;
- desc->g = SEG_GRANULARITY_4KB;
- desc->base2 = 0x00;
+ desc->limit0 = 0x0000;
+ desc->base0 = 0x0000;
+ desc->base1 = 0x0000;
+ desc->type = SEG_TYPE_TSS;
+ desc->s = 0;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit1 = 0x0;
+ desc->avl = 0;
+ desc->l = 0;
+ desc->d = 0;
+ desc->g = SEG_GRANULARITY_4KB;
+ desc->base2 = 0x00;
desc++;
}
@@ -1088,5 +902,6 @@ struct boot_params *efi_main(struct efi_config *c,
return boot_params;
fail:
efi_printk(sys_table, "efi_main() failed!\n");
+
return NULL;
}
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index e799dc5c6448..8297387c4676 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -12,22 +12,22 @@
#define DESC_TYPE_CODE_DATA (1 << 0)
-struct efi_uga_draw_protocol_32 {
+typedef struct {
u32 get_mode;
u32 set_mode;
u32 blt;
-};
+} efi_uga_draw_protocol_32_t;
-struct efi_uga_draw_protocol_64 {
+typedef struct {
u64 get_mode;
u64 set_mode;
u64 blt;
-};
+} efi_uga_draw_protocol_64_t;
-struct efi_uga_draw_protocol {
+typedef struct {
void *get_mode;
void *set_mode;
void *blt;
-};
+} efi_uga_draw_protocol_t;
#endif /* BOOT_COMPRESSED_EBOOT_H */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 8c5107545251..9e2157371491 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -1,3 +1,4 @@
+#include <asm/e820/types.h>
#include <asm/processor.h>
#include "pgtable.h"
#include "../string.h"
@@ -34,10 +35,62 @@ unsigned long *trampoline_32bit __section(.data);
extern struct boot_params *boot_params;
int cmdline_find_option_bool(const char *option);
+static unsigned long find_trampoline_placement(void)
+{
+ unsigned long bios_start, ebda_start;
+ unsigned long trampoline_start;
+ struct boot_e820_entry *entry;
+ int i;
+
+ /*
+ * Find a suitable spot for the trampoline.
+ * This code is based on reserve_bios_regions().
+ */
+
+ ebda_start = *(unsigned short *)0x40e << 4;
+ bios_start = *(unsigned short *)0x413 << 10;
+
+ if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX)
+ bios_start = BIOS_START_MAX;
+
+ if (ebda_start > BIOS_START_MIN && ebda_start < bios_start)
+ bios_start = ebda_start;
+
+ bios_start = round_down(bios_start, PAGE_SIZE);
+
+ /* Find the first usable memory region under bios_start. */
+ for (i = boot_params->e820_entries - 1; i >= 0; i--) {
+ entry = &boot_params->e820_table[i];
+
+ /* Skip all entries above bios_start. */
+ if (bios_start <= entry->addr)
+ continue;
+
+ /* Skip non-RAM entries. */
+ if (entry->type != E820_TYPE_RAM)
+ continue;
+
+ /* Adjust bios_start to the end of the entry if needed. */
+ if (bios_start > entry->addr + entry->size)
+ bios_start = entry->addr + entry->size;
+
+ /* Keep bios_start page-aligned. */
+ bios_start = round_down(bios_start, PAGE_SIZE);
+
+ /* Skip the entry if it's too small. */
+ if (bios_start - TRAMPOLINE_32BIT_SIZE < entry->addr)
+ continue;
+
+ break;
+ }
+
+ /* Place the trampoline just below the end of low memory */
+ return bios_start - TRAMPOLINE_32BIT_SIZE;
+}
+
struct paging_config paging_prepare(void *rmode)
{
struct paging_config paging_config = {};
- unsigned long bios_start, ebda_start;
/* Initialize boot_params. Required for cmdline_find_option_bool(). */
boot_params = rmode;
@@ -61,23 +114,7 @@ struct paging_config paging_prepare(void *rmode)
paging_config.l5_required = 1;
}
- /*
- * Find a suitable spot for the trampoline.
- * This code is based on reserve_bios_regions().
- */
-
- ebda_start = *(unsigned short *)0x40e << 4;
- bios_start = *(unsigned short *)0x413 << 10;
-
- if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX)
- bios_start = BIOS_START_MAX;
-
- if (ebda_start > BIOS_START_MIN && ebda_start < bios_start)
- bios_start = ebda_start;
-
- /* Place the trampoline just below the end of low memory, aligned to 4k */
- paging_config.trampoline_start = bios_start - TRAMPOLINE_32BIT_SIZE;
- paging_config.trampoline_start = round_down(paging_config.trampoline_start, PAGE_SIZE);
+ paging_config.trampoline_start = find_trampoline_placement();
trampoline_32bit = (unsigned long *)paging_config.trampoline_start;
diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S
index 9254e0b6cc06..717bf0776421 100644
--- a/arch/x86/crypto/aegis128-aesni-asm.S
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -535,6 +535,7 @@ ENTRY(crypto_aegis128_aesni_enc_tail)
movdqu STATE3, 0x40(STATEP)
FRAME_END
+ ret
ENDPROC(crypto_aegis128_aesni_enc_tail)
.macro decrypt_block a s0 s1 s2 s3 s4 i
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index 5de7c0d46edf..acd11b3bf639 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -375,16 +375,12 @@ static struct aead_alg crypto_aegis128_aesni_alg[] = {
}
};
-static const struct x86_cpu_id aesni_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_AES),
- X86_FEATURE_MATCH(X86_FEATURE_XMM2),
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
-
static int __init crypto_aegis128_aesni_module_init(void)
{
- if (!x86_match_cpu(aesni_cpu_id))
+ if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+ !boot_cpu_has(X86_FEATURE_AES) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+ !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
return crypto_register_aeads(crypto_aegis128_aesni_alg,
diff --git a/arch/x86/crypto/aegis128l-aesni-asm.S b/arch/x86/crypto/aegis128l-aesni-asm.S
index 9263c344f2c7..4eda2b8db9e1 100644
--- a/arch/x86/crypto/aegis128l-aesni-asm.S
+++ b/arch/x86/crypto/aegis128l-aesni-asm.S
@@ -645,6 +645,7 @@ ENTRY(crypto_aegis128l_aesni_enc_tail)
state_store0
FRAME_END
+ ret
ENDPROC(crypto_aegis128l_aesni_enc_tail)
/*
diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c
index 876e4866e633..2071c3d1ae07 100644
--- a/arch/x86/crypto/aegis128l-aesni-glue.c
+++ b/arch/x86/crypto/aegis128l-aesni-glue.c
@@ -375,16 +375,12 @@ static struct aead_alg crypto_aegis128l_aesni_alg[] = {
}
};
-static const struct x86_cpu_id aesni_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_AES),
- X86_FEATURE_MATCH(X86_FEATURE_XMM2),
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
-
static int __init crypto_aegis128l_aesni_module_init(void)
{
- if (!x86_match_cpu(aesni_cpu_id))
+ if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+ !boot_cpu_has(X86_FEATURE_AES) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+ !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
return crypto_register_aeads(crypto_aegis128l_aesni_alg,
diff --git a/arch/x86/crypto/aegis256-aesni-asm.S b/arch/x86/crypto/aegis256-aesni-asm.S
index 1d977d515bf9..32aae8397268 100644
--- a/arch/x86/crypto/aegis256-aesni-asm.S
+++ b/arch/x86/crypto/aegis256-aesni-asm.S
@@ -543,6 +543,7 @@ ENTRY(crypto_aegis256_aesni_enc_tail)
state_store0
FRAME_END
+ ret
ENDPROC(crypto_aegis256_aesni_enc_tail)
/*
diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c
index 2b5dd3af8f4d..b5f2a8fd5a71 100644
--- a/arch/x86/crypto/aegis256-aesni-glue.c
+++ b/arch/x86/crypto/aegis256-aesni-glue.c
@@ -375,16 +375,12 @@ static struct aead_alg crypto_aegis256_aesni_alg[] = {
}
};
-static const struct x86_cpu_id aesni_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_AES),
- X86_FEATURE_MATCH(X86_FEATURE_XMM2),
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
-
static int __init crypto_aegis256_aesni_module_init(void)
{
- if (!x86_match_cpu(aesni_cpu_id))
+ if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+ !boot_cpu_has(X86_FEATURE_AES) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+ !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
return crypto_register_aeads(crypto_aegis256_aesni_alg,
diff --git a/arch/x86/crypto/morus1280-avx2-asm.S b/arch/x86/crypto/morus1280-avx2-asm.S
index 37d422e77931..07653d4582a6 100644
--- a/arch/x86/crypto/morus1280-avx2-asm.S
+++ b/arch/x86/crypto/morus1280-avx2-asm.S
@@ -453,6 +453,7 @@ ENTRY(crypto_morus1280_avx2_enc_tail)
vmovdqu STATE4, (4 * 32)(%rdi)
FRAME_END
+ ret
ENDPROC(crypto_morus1280_avx2_enc_tail)
/*
diff --git a/arch/x86/crypto/morus1280-avx2-glue.c b/arch/x86/crypto/morus1280-avx2-glue.c
index f111f36d26dc..6634907d6ccd 100644
--- a/arch/x86/crypto/morus1280-avx2-glue.c
+++ b/arch/x86/crypto/morus1280-avx2-glue.c
@@ -37,15 +37,11 @@ asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor,
MORUS1280_DECLARE_ALGS(avx2, "morus1280-avx2", 400);
-static const struct x86_cpu_id avx2_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_AVX2),
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, avx2_cpu_id);
-
static int __init crypto_morus1280_avx2_module_init(void)
{
- if (!x86_match_cpu(avx2_cpu_id))
+ if (!boot_cpu_has(X86_FEATURE_AVX2) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+ !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
return -ENODEV;
return crypto_register_aeads(crypto_morus1280_avx2_algs,
diff --git a/arch/x86/crypto/morus1280-sse2-asm.S b/arch/x86/crypto/morus1280-sse2-asm.S
index 1fe637c7be9d..bd1aa1b60869 100644
--- a/arch/x86/crypto/morus1280-sse2-asm.S
+++ b/arch/x86/crypto/morus1280-sse2-asm.S
@@ -652,6 +652,7 @@ ENTRY(crypto_morus1280_sse2_enc_tail)
movdqu STATE4_HI, (9 * 16)(%rdi)
FRAME_END
+ ret
ENDPROC(crypto_morus1280_sse2_enc_tail)
/*
diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c
index 839270aa713c..95cf857d2cbb 100644
--- a/arch/x86/crypto/morus1280-sse2-glue.c
+++ b/arch/x86/crypto/morus1280-sse2-glue.c
@@ -37,15 +37,11 @@ asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor,
MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350);
-static const struct x86_cpu_id sse2_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_XMM2),
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id);
-
static int __init crypto_morus1280_sse2_module_init(void)
{
- if (!x86_match_cpu(sse2_cpu_id))
+ if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+ !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
return crypto_register_aeads(crypto_morus1280_sse2_algs,
diff --git a/arch/x86/crypto/morus640-sse2-asm.S b/arch/x86/crypto/morus640-sse2-asm.S
index 71c72a0a0862..efa02816d921 100644
--- a/arch/x86/crypto/morus640-sse2-asm.S
+++ b/arch/x86/crypto/morus640-sse2-asm.S
@@ -437,6 +437,7 @@ ENTRY(crypto_morus640_sse2_enc_tail)
movdqu STATE4, (4 * 16)(%rdi)
FRAME_END
+ ret
ENDPROC(crypto_morus640_sse2_enc_tail)
/*
diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c
index 26b47e2db8d2..615fb7bc9a32 100644
--- a/arch/x86/crypto/morus640-sse2-glue.c
+++ b/arch/x86/crypto/morus640-sse2-glue.c
@@ -37,15 +37,11 @@ asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor,
MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400);
-static const struct x86_cpu_id sse2_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_XMM2),
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id);
-
static int __init crypto_morus640_sse2_module_init(void)
{
- if (!x86_match_cpu(sse2_cpu_id))
+ if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+ !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
return crypto_register_aeads(crypto_morus640_sse2_algs,
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 73a522d53b53..8ae7ffda8f98 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -981,7 +981,7 @@ ENTRY(\sym)
call \do_sym
- jmp error_exit /* %ebx: no swapgs flag */
+ jmp error_exit
.endif
END(\sym)
.endm
@@ -1222,7 +1222,6 @@ END(paranoid_exit)
/*
* Save all registers in pt_regs, and switch GS if needed.
- * Return: EBX=0: came from user mode; EBX=1: otherwise
*/
ENTRY(error_entry)
UNWIND_HINT_FUNC
@@ -1269,7 +1268,6 @@ ENTRY(error_entry)
* for these here too.
*/
.Lerror_kernelspace:
- incl %ebx
leaq native_irq_return_iret(%rip), %rcx
cmpq %rcx, RIP+8(%rsp)
je .Lerror_bad_iret
@@ -1303,28 +1301,20 @@ ENTRY(error_entry)
/*
* Pretend that the exception came from user mode: set up pt_regs
- * as if we faulted immediately after IRET and clear EBX so that
- * error_exit knows that we will be returning to user mode.
+ * as if we faulted immediately after IRET.
*/
mov %rsp, %rdi
call fixup_bad_iret
mov %rax, %rsp
- decl %ebx
jmp .Lerror_entry_from_usermode_after_swapgs
END(error_entry)
-
-/*
- * On entry, EBX is a "return to kernel mode" flag:
- * 1: already in kernel mode, don't need SWAPGS
- * 0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode
- */
ENTRY(error_exit)
UNWIND_HINT_REGS
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
- testl %ebx, %ebx
- jnz retint_kernel
+ testb $3, CS(%rsp)
+ jz retint_kernel
jmp retint_user
END(error_exit)
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 4b98101209a1..d50bb4dc0650 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -579,7 +579,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
{
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
struct perf_event *event = pcpu->event;
- struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event *hwc;
struct perf_sample_data data;
struct perf_raw_record raw;
struct pt_regs regs;
@@ -602,6 +602,10 @@ fail:
return 0;
}
+ if (WARN_ON_ONCE(!event))
+ goto fail;
+
+ hwc = &event->hw;
msr = hwc->config_base;
buf = ibs_data.regs;
rdmsrl(msr, *buf);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 707b2a96e516..035c37481f57 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2041,15 +2041,15 @@ static void intel_pmu_disable_event(struct perf_event *event)
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
cpuc->intel_cp_status &= ~(1ull << hwc->idx);
+ if (unlikely(event->attr.precise_ip))
+ intel_pmu_pebs_disable(event);
+
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc);
return;
}
x86_pmu_disable_event(event);
-
- if (unlikely(event->attr.precise_ip))
- intel_pmu_pebs_disable(event);
}
static void intel_pmu_del_event(struct perf_event *event)
@@ -2068,17 +2068,19 @@ static void intel_pmu_read_event(struct perf_event *event)
x86_perf_event_update(event);
}
-static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
+static void intel_pmu_enable_fixed(struct perf_event *event)
{
+ struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
- u64 ctrl_val, bits, mask;
+ u64 ctrl_val, mask, bits = 0;
/*
- * Enable IRQ generation (0x8),
+ * Enable IRQ generation (0x8), if not PEBS,
* and enable ring-3 counting (0x2) and ring-0 counting (0x1)
* if requested:
*/
- bits = 0x8ULL;
+ if (!event->attr.precise_ip)
+ bits |= 0x8;
if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
bits |= 0x2;
if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
@@ -2120,14 +2122,14 @@ static void intel_pmu_enable_event(struct perf_event *event)
if (unlikely(event_is_checkpointed(event)))
cpuc->intel_cp_status |= (1ull << hwc->idx);
+ if (unlikely(event->attr.precise_ip))
+ intel_pmu_pebs_enable(event);
+
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
- intel_pmu_enable_fixed(hwc);
+ intel_pmu_enable_fixed(event);
return;
}
- if (unlikely(event->attr.precise_ip))
- intel_pmu_pebs_enable(event);
-
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
}
@@ -2280,7 +2282,10 @@ again:
* counters from the GLOBAL_STATUS mask and we always process PEBS
* events via drain_pebs().
*/
- status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
+ if (x86_pmu.flags & PMU_FL_PEBS_ALL)
+ status &= ~cpuc->pebs_enabled;
+ else
+ status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
/*
* PEBS overflow sets bit 62 in the global status register
@@ -2997,6 +3002,9 @@ static int intel_pmu_hw_config(struct perf_event *event)
}
if (x86_pmu.pebs_aliases)
x86_pmu.pebs_aliases(event);
+
+ if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+ event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
}
if (needs_branch_stack(event)) {
@@ -4069,7 +4077,6 @@ __init int intel_pmu_init(void)
intel_pmu_lbr_init_skl();
x86_pmu.event_constraints = intel_slm_event_constraints;
- x86_pmu.pebs_constraints = intel_glp_pebs_event_constraints;
x86_pmu.extra_regs = intel_glm_extra_regs;
/*
* It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
@@ -4079,6 +4086,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_prec_dist = true;
x86_pmu.lbr_pt_coexist = true;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.flags |= PMU_FL_PEBS_ALL;
x86_pmu.get_event_constraints = glp_get_event_constraints;
x86_pmu.cpu_events = glm_events_attrs;
/* Goldmont Plus has 4-wide pipeline */
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8a10a045b57b..b7b01d762d32 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -408,9 +408,11 @@ static int alloc_bts_buffer(int cpu)
ds->bts_buffer_base = (unsigned long) cea;
ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
ds->bts_index = ds->bts_buffer_base;
- max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
- ds->bts_absolute_maximum = ds->bts_buffer_base + max;
- ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
+ max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
+ ds->bts_absolute_maximum = ds->bts_buffer_base +
+ max * BTS_RECORD_SIZE;
+ ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
+ (max / 16) * BTS_RECORD_SIZE;
return 0;
}
@@ -711,12 +713,6 @@ struct event_constraint intel_glm_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
-struct event_constraint intel_glp_pebs_event_constraints[] = {
- /* Allow all events as PEBS with no flags */
- INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
- EVENT_CONSTRAINT_END
-};
-
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
@@ -869,6 +865,13 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
}
}
+ /*
+ * Extended PEBS support
+ * Makes the PEBS code search the normal constraints.
+ */
+ if (x86_pmu.flags & PMU_FL_PEBS_ALL)
+ return NULL;
+
return &emptyconstraint;
}
@@ -894,10 +897,16 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
{
struct debug_store *ds = cpuc->ds;
u64 threshold;
+ int reserved;
+
+ if (x86_pmu.flags & PMU_FL_PEBS_ALL)
+ reserved = x86_pmu.max_pebs_events + x86_pmu.num_counters_fixed;
+ else
+ reserved = x86_pmu.max_pebs_events;
if (cpuc->n_pebs == cpuc->n_large_pebs) {
threshold = ds->pebs_absolute_maximum -
- x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
+ reserved * x86_pmu.pebs_record_size;
} else {
threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
}
@@ -961,7 +970,11 @@ void intel_pmu_pebs_enable(struct perf_event *event)
* This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
*/
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
- ds->pebs_event_reset[hwc->idx] =
+ unsigned int idx = hwc->idx;
+
+ if (idx >= INTEL_PMC_IDX_FIXED)
+ idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
+ ds->pebs_event_reset[idx] =
(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
} else {
ds->pebs_event_reset[hwc->idx] = 0;
@@ -1184,16 +1197,20 @@ static void setup_pebs_sample_data(struct perf_event *event,
}
/*
+ * We must however always use iregs for the unwinder to stay sane; the
+ * record BP,SP,IP can point into thin air when the record is from a
+ * previous PMI context or an (I)RET happend between the record and
+ * PMI.
+ */
+ if (sample_type & PERF_SAMPLE_CALLCHAIN)
+ data->callchain = perf_callchain(event, iregs);
+
+ /*
* We use the interrupt regs as a base because the PEBS record does not
* contain a full regs set, specifically it seems to lack segment
* descriptors, which get used by things like user_mode().
*
* In the simple case fix up only the IP for PERF_SAMPLE_IP.
- *
- * We must however always use BP,SP from iregs for the unwinder to stay
- * sane; the record BP,SP can point into thin air when the record is
- * from a previous PMI context or an (I)RET happend between the record
- * and PMI.
*/
*regs = *iregs;
@@ -1212,15 +1229,8 @@ static void setup_pebs_sample_data(struct perf_event *event,
regs->si = pebs->si;
regs->di = pebs->di;
- /*
- * Per the above; only set BP,SP if we don't need callchains.
- *
- * XXX: does this make sense?
- */
- if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
- regs->bp = pebs->bp;
- regs->sp = pebs->sp;
- }
+ regs->bp = pebs->bp;
+ regs->sp = pebs->sp;
#ifndef CONFIG_X86_32
regs->r8 = pebs->r8;
@@ -1482,9 +1492,10 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
struct debug_store *ds = cpuc->ds;
struct perf_event *event;
void *base, *at, *top;
- short counts[MAX_PEBS_EVENTS] = {};
- short error[MAX_PEBS_EVENTS] = {};
- int bit, i;
+ short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
+ short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
+ int bit, i, size;
+ u64 mask;
if (!x86_pmu.pebs_active)
return;
@@ -1494,6 +1505,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
ds->pebs_index = ds->pebs_buffer_base;
+ mask = (1ULL << x86_pmu.max_pebs_events) - 1;
+ size = x86_pmu.max_pebs_events;
+ if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
+ mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED;
+ size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
+ }
+
if (unlikely(base >= top)) {
/*
* The drain_pebs() could be called twice in a short period
@@ -1503,7 +1521,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
* update the event->count for this case.
*/
for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled,
- x86_pmu.max_pebs_events) {
+ size) {
event = cpuc->events[bit];
if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
intel_pmu_save_and_restart_reload(event, 0);
@@ -1516,12 +1534,12 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
u64 pebs_status;
pebs_status = p->status & cpuc->pebs_enabled;
- pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
+ pebs_status &= mask;
/* PEBS v3 has more accurate status bits */
if (x86_pmu.intel_cap.pebs_format >= 3) {
for_each_set_bit(bit, (unsigned long *)&pebs_status,
- x86_pmu.max_pebs_events)
+ size)
counts[bit]++;
continue;
@@ -1569,7 +1587,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
counts[bit]++;
}
- for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
+ for (bit = 0; bit < size; bit++) {
if ((counts[bit] == 0) && (error[bit] == 0))
continue;
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index cf372b90557e..f3e006bed9a7 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -216,6 +216,8 @@ static void intel_pmu_lbr_reset_64(void)
void intel_pmu_lbr_reset(void)
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
if (!x86_pmu.lbr_nr)
return;
@@ -223,6 +225,9 @@ void intel_pmu_lbr_reset(void)
intel_pmu_lbr_reset_32();
else
intel_pmu_lbr_reset_64();
+
+ cpuc->last_task_ctx = NULL;
+ cpuc->last_log_id = 0;
}
/*
@@ -334,6 +339,7 @@ static inline u64 rdlbr_to(unsigned int idx)
static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int i;
unsigned lbr_idx, mask;
u64 tos;
@@ -344,9 +350,21 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
return;
}
- mask = x86_pmu.lbr_nr - 1;
tos = task_ctx->tos;
- for (i = 0; i < tos; i++) {
+ /*
+ * Does not restore the LBR registers, if
+ * - No one else touched them, and
+ * - Did not enter C6
+ */
+ if ((task_ctx == cpuc->last_task_ctx) &&
+ (task_ctx->log_id == cpuc->last_log_id) &&
+ rdlbr_from(tos)) {
+ task_ctx->lbr_stack_state = LBR_NONE;
+ return;
+ }
+
+ mask = x86_pmu.lbr_nr - 1;
+ for (i = 0; i < task_ctx->valid_lbrs; i++) {
lbr_idx = (tos - i) & mask;
wrlbr_from(lbr_idx, task_ctx->lbr_from[i]);
wrlbr_to (lbr_idx, task_ctx->lbr_to[i]);
@@ -354,14 +372,24 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
}
+
+ for (; i < x86_pmu.lbr_nr; i++) {
+ lbr_idx = (tos - i) & mask;
+ wrlbr_from(lbr_idx, 0);
+ wrlbr_to(lbr_idx, 0);
+ if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ wrmsrl(MSR_LBR_INFO_0 + lbr_idx, 0);
+ }
+
wrmsrl(x86_pmu.lbr_tos, tos);
task_ctx->lbr_stack_state = LBR_NONE;
}
static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
unsigned lbr_idx, mask;
- u64 tos;
+ u64 tos, from;
int i;
if (task_ctx->lbr_callstack_users == 0) {
@@ -371,15 +399,22 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
mask = x86_pmu.lbr_nr - 1;
tos = intel_pmu_lbr_tos();
- for (i = 0; i < tos; i++) {
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
lbr_idx = (tos - i) & mask;
- task_ctx->lbr_from[i] = rdlbr_from(lbr_idx);
+ from = rdlbr_from(lbr_idx);
+ if (!from)
+ break;
+ task_ctx->lbr_from[i] = from;
task_ctx->lbr_to[i] = rdlbr_to(lbr_idx);
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
}
+ task_ctx->valid_lbrs = i;
task_ctx->tos = tos;
task_ctx->lbr_stack_state = LBR_VALID;
+
+ cpuc->last_task_ctx = task_ctx;
+ cpuc->last_log_id = ++task_ctx->log_id;
}
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
@@ -531,7 +566,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
*/
static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
{
- bool need_info = false;
+ bool need_info = false, call_stack = false;
unsigned long mask = x86_pmu.lbr_nr - 1;
int lbr_format = x86_pmu.intel_cap.lbr_format;
u64 tos = intel_pmu_lbr_tos();
@@ -542,7 +577,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
if (cpuc->lbr_sel) {
need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
if (cpuc->lbr_sel->config & LBR_CALL_STACK)
- num = tos;
+ call_stack = true;
}
for (i = 0; i < num; i++) {
@@ -555,6 +590,13 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
from = rdlbr_from(lbr_idx);
to = rdlbr_to(lbr_idx);
+ /*
+ * Read LBR call stack entries
+ * until invalid entry (0s) is detected.
+ */
+ if (call_stack && !from)
+ break;
+
if (lbr_format == LBR_FORMAT_INFO && need_info) {
u64 info;
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index c9e1e0bef3c3..e17ab885b1e9 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -28,7 +28,7 @@
#define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff)
#define UNCORE_PCI_DEV_IDX(data) (data & 0xff)
#define UNCORE_EXTRA_PCI_DEV 0xff
-#define UNCORE_EXTRA_PCI_DEV_MAX 3
+#define UNCORE_EXTRA_PCI_DEV_MAX 4
#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 87dc0263a2e1..51d7c117e3c7 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1029,6 +1029,7 @@ void snbep_uncore_cpu_init(void)
enum {
SNBEP_PCI_QPI_PORT0_FILTER,
SNBEP_PCI_QPI_PORT1_FILTER,
+ BDX_PCI_QPI_PORT2_FILTER,
HSWEP_PCI_PCU_3,
};
@@ -3286,15 +3287,18 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = {
},
{ /* QPI Port 0 filter */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f86),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 0),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT0_FILTER),
},
{ /* QPI Port 1 filter */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f96),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 1),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT1_FILTER),
},
{ /* QPI Port 2 filter */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ BDX_PCI_QPI_PORT2_FILTER),
},
{ /* PCU.3 (for Capability registers) */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fc0),
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 9f3711470ec1..156286335351 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -163,6 +163,7 @@ struct intel_excl_cntrs {
unsigned core_id; /* per-core: core id */
};
+struct x86_perf_task_context;
#define MAX_LBR_ENTRIES 32
enum {
@@ -214,6 +215,8 @@ struct cpu_hw_events {
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
struct er_account *lbr_sel;
u64 br_sel;
+ struct x86_perf_task_context *last_task_ctx;
+ int last_log_id;
/*
* Intel host/guest exclude bits
@@ -648,8 +651,10 @@ struct x86_perf_task_context {
u64 lbr_to[MAX_LBR_ENTRIES];
u64 lbr_info[MAX_LBR_ENTRIES];
int tos;
+ int valid_lbrs;
int lbr_callstack_users;
int lbr_stack_state;
+ int log_id;
};
#define x86_add_quirk(func_) \
@@ -668,6 +673,7 @@ do { \
#define PMU_FL_HAS_RSP_1 0x2 /* has 2 equivalent offcore_rsp regs */
#define PMU_FL_EXCL_CNTRS 0x4 /* has exclusive counter requirements */
#define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */
+#define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */
#define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index f68855499391..402338365651 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -114,6 +114,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
}
+ if (nr_bank < 0)
+ goto ipi_mask_ex_done;
if (!nr_bank)
ipi_arg->vp_set.format = HV_GENERIC_SET_ALL;
@@ -158,6 +160,9 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
for_each_cpu(cur_cpu, mask) {
vcpu = hv_cpu_number_to_vp_number(cur_cpu);
+ if (vcpu == VP_INVAL)
+ goto ipi_mask_done;
+
/*
* This particular version of the IPI hypercall can
* only target upto 64 CPUs.
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 4c431e1c1eff..1ff420217298 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -265,7 +265,7 @@ void __init hyperv_init(void)
{
u64 guest_id, required_msrs;
union hv_x64_msr_hypercall_contents hypercall_msr;
- int cpuhp;
+ int cpuhp, i;
if (x86_hyper_type != X86_HYPER_MS_HYPERV)
return;
@@ -293,6 +293,9 @@ void __init hyperv_init(void)
if (!hv_vp_index)
return;
+ for (i = 0; i < num_possible_cpus(); i++)
+ hv_vp_index[i] = VP_INVAL;
+
hv_vp_assist_page = kcalloc(num_possible_cpus(),
sizeof(*hv_vp_assist_page), GFP_KERNEL);
if (!hv_vp_assist_page) {
diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h
index c356098b6fb9..4d4015ddcf26 100644
--- a/arch/x86/include/asm/apm.h
+++ b/arch/x86/include/asm/apm.h
@@ -7,8 +7,6 @@
#ifndef _ASM_X86_MACH_DEFAULT_APM_H
#define _ASM_X86_MACH_DEFAULT_APM_H
-#include <asm/nospec-branch.h>
-
#ifdef APM_ZERO_SEGS
# define APM_DO_ZERO_SEGS \
"pushl %%ds\n\t" \
@@ -34,7 +32,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
- firmware_restrict_branch_speculation_start();
__asm__ __volatile__(APM_DO_ZERO_SEGS
"pushl %%edi\n\t"
"pushl %%ebp\n\t"
@@ -47,7 +44,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
"=S" (*esi)
: "a" (func), "b" (ebx_in), "c" (ecx_in)
: "memory", "cc");
- firmware_restrict_branch_speculation_end();
}
static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
@@ -60,7 +56,6 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
- firmware_restrict_branch_speculation_start();
__asm__ __volatile__(APM_DO_ZERO_SEGS
"pushl %%edi\n\t"
"pushl %%ebp\n\t"
@@ -73,7 +68,6 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
"=S" (si)
: "a" (func), "b" (ebx_in), "c" (ecx_in)
: "memory", "cc");
- firmware_restrict_branch_speculation_end();
return error;
}
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 219faaec51df..990770f9e76b 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -46,6 +46,65 @@
#define _ASM_SI __ASM_REG(si)
#define _ASM_DI __ASM_REG(di)
+#ifndef __x86_64__
+/* 32 bit */
+
+#define _ASM_ARG1 _ASM_AX
+#define _ASM_ARG2 _ASM_DX
+#define _ASM_ARG3 _ASM_CX
+
+#define _ASM_ARG1L eax
+#define _ASM_ARG2L edx
+#define _ASM_ARG3L ecx
+
+#define _ASM_ARG1W ax
+#define _ASM_ARG2W dx
+#define _ASM_ARG3W cx
+
+#define _ASM_ARG1B al
+#define _ASM_ARG2B dl
+#define _ASM_ARG3B cl
+
+#else
+/* 64 bit */
+
+#define _ASM_ARG1 _ASM_DI
+#define _ASM_ARG2 _ASM_SI
+#define _ASM_ARG3 _ASM_DX
+#define _ASM_ARG4 _ASM_CX
+#define _ASM_ARG5 r8
+#define _ASM_ARG6 r9
+
+#define _ASM_ARG1Q rdi
+#define _ASM_ARG2Q rsi
+#define _ASM_ARG3Q rdx
+#define _ASM_ARG4Q rcx
+#define _ASM_ARG5Q r8
+#define _ASM_ARG6Q r9
+
+#define _ASM_ARG1L edi
+#define _ASM_ARG2L esi
+#define _ASM_ARG3L edx
+#define _ASM_ARG4L ecx
+#define _ASM_ARG5L r8d
+#define _ASM_ARG6L r9d
+
+#define _ASM_ARG1W di
+#define _ASM_ARG2W si
+#define _ASM_ARG3W dx
+#define _ASM_ARG4W cx
+#define _ASM_ARG5W r8w
+#define _ASM_ARG6W r9w
+
+#define _ASM_ARG1B dil
+#define _ASM_ARG2B sil
+#define _ASM_ARG3B dl
+#define _ASM_ARG4B cl
+#define _ASM_ARG5B r8b
+#define _ASM_ARG6B r9b
+
+#endif
+
/*
* Macros to generate condition code outputs from inline assembly,
* The output operand must be type "bool".
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 0db6bec95489..b143717b92b3 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -80,6 +80,7 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
* true if the result is zero, or false for all
* other cases.
*/
+#define arch_atomic_sub_and_test arch_atomic_sub_and_test
static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e);
@@ -91,6 +92,7 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
*
* Atomically increments @v by 1.
*/
+#define arch_atomic_inc arch_atomic_inc
static __always_inline void arch_atomic_inc(atomic_t *v)
{
asm volatile(LOCK_PREFIX "incl %0"
@@ -103,6 +105,7 @@ static __always_inline void arch_atomic_inc(atomic_t *v)
*
* Atomically decrements @v by 1.
*/
+#define arch_atomic_dec arch_atomic_dec
static __always_inline void arch_atomic_dec(atomic_t *v)
{
asm volatile(LOCK_PREFIX "decl %0"
@@ -117,6 +120,7 @@ static __always_inline void arch_atomic_dec(atomic_t *v)
* returns true if the result is 0, or false for all other
* cases.
*/
+#define arch_atomic_dec_and_test arch_atomic_dec_and_test
static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e);
@@ -130,6 +134,7 @@ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
* and returns true if the result is zero, or false for all
* other cases.
*/
+#define arch_atomic_inc_and_test arch_atomic_inc_and_test
static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e);
@@ -144,6 +149,7 @@ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
+#define arch_atomic_add_negative arch_atomic_add_negative
static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s);
@@ -173,9 +179,6 @@ static __always_inline int arch_atomic_sub_return(int i, atomic_t *v)
return arch_atomic_add_return(-i, v);
}
-#define arch_atomic_inc_return(v) (arch_atomic_add_return(1, v))
-#define arch_atomic_dec_return(v) (arch_atomic_sub_return(1, v))
-
static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
{
return xadd(&v->counter, i);
@@ -199,7 +202,7 @@ static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int n
static inline int arch_atomic_xchg(atomic_t *v, int new)
{
- return xchg(&v->counter, new);
+ return arch_xchg(&v->counter, new);
}
static inline void arch_atomic_and(int i, atomic_t *v)
@@ -253,27 +256,6 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
return val;
}
-/**
- * __arch_atomic_add_unless - add unless the number is already a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as @v was not already @u.
- * Returns the old value of @v.
- */
-static __always_inline int __arch_atomic_add_unless(atomic_t *v, int a, int u)
-{
- int c = arch_atomic_read(v);
-
- do {
- if (unlikely(c == u))
- break;
- } while (!arch_atomic_try_cmpxchg(v, &c, c + a));
-
- return c;
-}
-
#ifdef CONFIG_X86_32
# include <asm/atomic64_32.h>
#else
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 92212bf0484f..ef959f02d070 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -158,6 +158,7 @@ static inline long long arch_atomic64_inc_return(atomic64_t *v)
"S" (v) : "memory", "ecx");
return a;
}
+#define arch_atomic64_inc_return arch_atomic64_inc_return
static inline long long arch_atomic64_dec_return(atomic64_t *v)
{
@@ -166,6 +167,7 @@ static inline long long arch_atomic64_dec_return(atomic64_t *v)
"S" (v) : "memory", "ecx");
return a;
}
+#define arch_atomic64_dec_return arch_atomic64_dec_return
/**
* arch_atomic64_add - add integer to atomic64 variable
@@ -198,25 +200,12 @@ static inline long long arch_atomic64_sub(long long i, atomic64_t *v)
}
/**
- * arch_atomic64_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer to type atomic64_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static inline int arch_atomic64_sub_and_test(long long i, atomic64_t *v)
-{
- return arch_atomic64_sub_return(i, v) == 0;
-}
-
-/**
* arch_atomic64_inc - increment atomic64 variable
* @v: pointer to type atomic64_t
*
* Atomically increments @v by 1.
*/
+#define arch_atomic64_inc arch_atomic64_inc
static inline void arch_atomic64_inc(atomic64_t *v)
{
__alternative_atomic64(inc, inc_return, /* no output */,
@@ -229,6 +218,7 @@ static inline void arch_atomic64_inc(atomic64_t *v)
*
* Atomically decrements @v by 1.
*/
+#define arch_atomic64_dec arch_atomic64_dec
static inline void arch_atomic64_dec(atomic64_t *v)
{
__alternative_atomic64(dec, dec_return, /* no output */,
@@ -236,46 +226,6 @@ static inline void arch_atomic64_dec(atomic64_t *v)
}
/**
- * arch_atomic64_dec_and_test - decrement and test
- * @v: pointer to type atomic64_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-static inline int arch_atomic64_dec_and_test(atomic64_t *v)
-{
- return arch_atomic64_dec_return(v) == 0;
-}
-
-/**
- * atomic64_inc_and_test - increment and test
- * @v: pointer to type atomic64_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-static inline int arch_atomic64_inc_and_test(atomic64_t *v)
-{
- return arch_atomic64_inc_return(v) == 0;
-}
-
-/**
- * arch_atomic64_add_negative - add and test if negative
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-static inline int arch_atomic64_add_negative(long long i, atomic64_t *v)
-{
- return arch_atomic64_add_return(i, v) < 0;
-}
-
-/**
* arch_atomic64_add_unless - add unless the number is a given value
* @v: pointer of type atomic64_t
* @a: the amount to add to v...
@@ -295,7 +245,7 @@ static inline int arch_atomic64_add_unless(atomic64_t *v, long long a,
return (int)a;
}
-
+#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
{
int r;
@@ -304,6 +254,7 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
return r;
}
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
{
long long r;
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 6106b59d3260..4343d9b4f30e 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -71,6 +71,7 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
* true if the result is zero, or false for all
* other cases.
*/
+#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e);
@@ -82,6 +83,7 @@ static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
*
* Atomically increments @v by 1.
*/
+#define arch_atomic64_inc arch_atomic64_inc
static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "incq %0"
@@ -95,6 +97,7 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
*
* Atomically decrements @v by 1.
*/
+#define arch_atomic64_dec arch_atomic64_dec
static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "decq %0"
@@ -110,6 +113,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
* returns true if the result is 0, or false for all other
* cases.
*/
+#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e);
@@ -123,6 +127,7 @@ static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
* and returns true if the result is zero, or false for all
* other cases.
*/
+#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e);
@@ -137,6 +142,7 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
+#define arch_atomic64_add_negative arch_atomic64_add_negative
static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s);
@@ -169,9 +175,6 @@ static inline long arch_atomic64_fetch_sub(long i, atomic64_t *v)
return xadd(&v->counter, -i);
}
-#define arch_atomic64_inc_return(v) (arch_atomic64_add_return(1, (v)))
-#define arch_atomic64_dec_return(v) (arch_atomic64_sub_return(1, (v)))
-
static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new)
{
return arch_cmpxchg(&v->counter, old, new);
@@ -185,46 +188,7 @@ static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, l
static inline long arch_atomic64_xchg(atomic64_t *v, long new)
{
- return xchg(&v->counter, new);
-}
-
-/**
- * arch_atomic64_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
-static inline bool arch_atomic64_add_unless(atomic64_t *v, long a, long u)
-{
- s64 c = arch_atomic64_read(v);
- do {
- if (unlikely(c == u))
- return false;
- } while (!arch_atomic64_try_cmpxchg(v, &c, c + a));
- return true;
-}
-
-#define arch_atomic64_inc_not_zero(v) arch_atomic64_add_unless((v), 1, 0)
-
-/*
- * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic variable, v, was not decremented.
- */
-static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
-{
- s64 dec, c = arch_atomic64_read(v);
- do {
- dec = c - 1;
- if (unlikely(dec < 0))
- break;
- } while (!arch_atomic64_try_cmpxchg(v, &c, dec));
- return dec;
+ return arch_xchg(&v->counter, new);
}
static inline void arch_atomic64_and(long i, atomic64_t *v)
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index e3efd8a06066..a55d79b233d3 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -75,7 +75,7 @@ extern void __add_wrong_size(void)
* use "asm volatile" and "memory" clobbers to prevent gcc from moving
* information around.
*/
-#define xchg(ptr, v) __xchg_op((ptr), (v), xchg, "")
+#define arch_xchg(ptr, v) __xchg_op((ptr), (v), xchg, "")
/*
* Atomic compare and exchange. Compare OLD with MEM, if identical,
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index bfca3b346c74..072e5459fe2f 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -10,13 +10,13 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
#define arch_cmpxchg64(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg((ptr), (o), (n)); \
+ arch_cmpxchg((ptr), (o), (n)); \
})
#define arch_cmpxchg64_local(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg_local((ptr), (o), (n)); \
+ arch_cmpxchg_local((ptr), (o), (n)); \
})
#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16)
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index f59c39835a5a..a1f0e90d0818 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -49,11 +49,14 @@ static inline int hw_breakpoint_slots(int type)
return HBP_NUM;
}
+struct perf_event_attr;
struct perf_event;
struct pmu;
-extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
-extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
+extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
+extern int hw_breakpoint_arch_parse(struct perf_event *bp,
+ const struct perf_event_attr *attr,
+ struct arch_hw_breakpoint *hw);
extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data);
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
index 62a9f4966b42..ae26df1c2789 100644
--- a/arch/x86/include/asm/intel_ds.h
+++ b/arch/x86/include/asm/intel_ds.h
@@ -8,6 +8,7 @@
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS 8
+#define MAX_FIXED_PEBS_EVENTS 3
/*
* A debug store configuration.
@@ -23,7 +24,7 @@ struct debug_store {
u64 pebs_index;
u64 pebs_absolute_maximum;
u64 pebs_interrupt_threshold;
- u64 pebs_event_reset[MAX_PEBS_EVENTS];
+ u64 pebs_event_reset[MAX_PEBS_EVENTS + MAX_FIXED_PEBS_EVENTS];
} __aligned(PAGE_SIZE);
DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 89f08955fff7..c4fc17220df9 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -13,7 +13,7 @@
* Interrupt control:
*/
-static inline unsigned long native_save_fl(void)
+extern inline unsigned long native_save_fl(void)
{
unsigned long flags;
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 367d99cff426..c8cec1b39b88 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -78,7 +78,7 @@ struct arch_specific_insn {
* boostable = true: This instruction has been boosted: we have
* added a relative jump after the instruction copy in insn,
* so no single-step and fixup are needed (unless there's
- * a post_handler or break_handler).
+ * a post_handler).
*/
bool boostable;
bool if_modifier;
@@ -111,9 +111,6 @@ struct kprobe_ctlblk {
unsigned long kprobe_status;
unsigned long kprobe_old_flags;
unsigned long kprobe_saved_flags;
- unsigned long *jprobe_saved_sp;
- struct pt_regs jprobe_saved_regs;
- kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
struct prev_kprobe prev_kprobe;
};
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 3cd14311edfa..5a7375ed5f7c 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -9,6 +9,8 @@
#include <asm/hyperv-tlfs.h>
#include <asm/nospec-branch.h>
+#define VP_INVAL U32_MAX
+
struct ms_hyperv_info {
u32 features;
u32 misc_features;
@@ -20,7 +22,6 @@ struct ms_hyperv_info {
extern struct ms_hyperv_info ms_hyperv;
-
/*
* Generate the guest ID.
*/
@@ -281,6 +282,8 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset,
*/
for_each_cpu(cpu, cpus) {
vcpu = hv_cpu_number_to_vp_number(cpu);
+ if (vcpu == VP_INVAL)
+ return -1;
vcpu_bank = vcpu / 64;
vcpu_offset = vcpu % 64;
__set_bit(vcpu_offset, (unsigned long *)
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
index 9ef5ee03d2d7..159622ee0674 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -43,7 +43,7 @@ asm (".pushsection .text;"
"push %rdx;"
"mov $0x1,%eax;"
"xor %edx,%edx;"
- "lock cmpxchg %dl,(%rdi);"
+ LOCK_PREFIX "cmpxchg %dl,(%rdi);"
"cmp $0x1,%al;"
"jne .slowpath;"
"pop %rdx;"
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
index 4cf11d88d3b3..19b90521954c 100644
--- a/arch/x86/include/asm/refcount.h
+++ b/arch/x86/include/asm/refcount.h
@@ -5,6 +5,7 @@
* PaX/grsecurity.
*/
#include <linux/refcount.h>
+#include <asm/bug.h>
/*
* This is the first portion of the refcount error handling, which lives in
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 62acb613114b..a9d637bc301d 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -52,7 +52,12 @@ copy_to_user_mcsafe(void *to, const void *from, unsigned len)
unsigned long ret;
__uaccess_begin();
- ret = memcpy_mcsafe(to, from, len);
+ /*
+ * Note, __memcpy_mcsafe() is explicitly used since it can
+ * handle exceptions / faults. memcpy_mcsafe() may fall back to
+ * memcpy() which lacks this handling.
+ */
+ ret = __memcpy_mcsafe(to, from, len);
__uaccess_end();
return ret;
}
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 02d6f5cf4e70..8824d01c0c35 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -61,6 +61,7 @@ obj-y += alternative.o i8253.o hw_breakpoint.o
obj-y += tsc.o tsc_msr.o io_delay.o rtc.o
obj-y += pci-iommu_table.o
obj-y += resource.o
+obj-y += irqflags.o
obj-y += process.o
obj-y += fpu/
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2aabd4cb0e3f..07fa222f0c52 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -573,6 +573,9 @@ static u32 skx_deadline_rev(void)
case 0x04: return 0x02000014;
}
+ if (boot_cpu_data.x86_stepping > 4)
+ return 0;
+
return ~0U;
}
@@ -937,7 +940,7 @@ static int __init calibrate_APIC_clock(void)
if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
pr_warning("APIC timer disabled due to verification failure\n");
- return -1;
+ return -1;
}
return 0;
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 35aaee4fc028..0954315842c0 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -218,7 +218,8 @@ static int reserve_irq_vector(struct irq_data *irqd)
return 0;
}
-static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest)
+static int
+assign_vector_locked(struct irq_data *irqd, const struct cpumask *dest)
{
struct apic_chip_data *apicd = apic_chip_data(irqd);
bool resvd = apicd->has_reserved;
@@ -245,22 +246,12 @@ static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest)
return -EBUSY;
vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu);
- if (vector > 0)
- apic_update_vector(irqd, vector, cpu);
trace_vector_alloc(irqd->irq, vector, resvd, vector);
- return vector;
-}
-
-static int assign_vector_locked(struct irq_data *irqd,
- const struct cpumask *dest)
-{
- struct apic_chip_data *apicd = apic_chip_data(irqd);
- int vector = allocate_vector(irqd, dest);
-
if (vector < 0)
return vector;
+ apic_update_vector(irqd, vector, cpu);
+ apic_update_irq_cfg(irqd, vector, cpu);
- apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
return 0;
}
@@ -433,7 +424,7 @@ static int activate_managed(struct irq_data *irqd)
pr_err("Managed startup irq %u, no vector available\n",
irqd->irq);
}
- return ret;
+ return ret;
}
static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 5d0de79fdab0..ec00d1ff5098 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -240,6 +240,7 @@
#include <asm/olpc.h>
#include <asm/paravirt.h>
#include <asm/reboot.h>
+#include <asm/nospec-branch.h>
#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
extern int (*console_blank_hook)(int);
@@ -614,11 +615,13 @@ static long __apm_bios_call(void *_call)
gdt[0x40 / 8] = bad_bios_desc;
apm_irq_save(flags);
+ firmware_restrict_branch_speculation_start();
APM_DO_SAVE_SEGS;
apm_bios_call_asm(call->func, call->ebx, call->ecx,
&call->eax, &call->ebx, &call->ecx, &call->edx,
&call->esi);
APM_DO_RESTORE_SEGS;
+ firmware_restrict_branch_speculation_end();
apm_irq_restore(flags);
gdt[0x40 / 8] = save_desc_40;
put_cpu();
@@ -690,10 +693,12 @@ static long __apm_bios_call_simple(void *_call)
gdt[0x40 / 8] = bad_bios_desc;
apm_irq_save(flags);
+ firmware_restrict_branch_speculation_start();
APM_DO_SAVE_SEGS;
error = apm_bios_call_simple_asm(call->func, call->ebx, call->ecx,
&call->eax);
APM_DO_RESTORE_SEGS;
+ firmware_restrict_branch_speculation_end();
apm_irq_restore(flags);
gdt[0x40 / 8] = save_desc_40;
put_cpu();
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 082d7875cef8..38915fbfae73 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -543,7 +543,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
nodes_per_socket = ((value >> 3) & 7) + 1;
}
- if (c->x86 >= 0x15 && c->x86 <= 0x17) {
+ if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) &&
+ !boot_cpu_has(X86_FEATURE_VIRT_SSBD) &&
+ c->x86 >= 0x15 && c->x86 <= 0x17) {
unsigned int bit;
switch (c->x86) {
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 404df26b7de8..5c0ea39311fe 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -155,7 +155,8 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
/* SSBD controlled in MSR_SPEC_CTRL */
- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
+ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+ static_cpu_has(X86_FEATURE_AMD_SSBD))
hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
if (hostval != guestval) {
@@ -533,9 +534,10 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
* Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may
* use a completely different MSR and bit dependent on family.
*/
- if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+ if (!static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) &&
+ !static_cpu_has(X86_FEATURE_AMD_SSBD)) {
x86_amd_ssb_disable();
- else {
+ } else {
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index c102ad51025e..4b767284b7f5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -123,8 +123,8 @@ void mce_setup(struct mce *m)
{
memset(m, 0, sizeof(struct mce));
m->cpu = m->extcpu = smp_processor_id();
- /* We hope get_seconds stays lockless */
- m->time = get_seconds();
+ /* need the internal __ version to avoid deadlocks */
+ m->time = __ktime_get_real_seconds();
m->cpuvendor = boot_cpu_data.x86_vendor;
m->cpuid = cpuid_eax(1);
m->socketid = cpu_data(m->extcpu).phys_proc_id;
@@ -1104,6 +1104,101 @@ static void mce_unmap_kpfn(unsigned long pfn)
}
#endif
+
+/*
+ * Cases where we avoid rendezvous handler timeout:
+ * 1) If this CPU is offline.
+ *
+ * 2) If crashing_cpu was set, e.g. we're entering kdump and we need to
+ * skip those CPUs which remain looping in the 1st kernel - see
+ * crash_nmi_callback().
+ *
+ * Note: there still is a small window between kexec-ing and the new,
+ * kdump kernel establishing a new #MC handler where a broadcasted MCE
+ * might not get handled properly.
+ */
+static bool __mc_check_crashing_cpu(int cpu)
+{
+ if (cpu_is_offline(cpu) ||
+ (crashing_cpu != -1 && crashing_cpu != cpu)) {
+ u64 mcgstatus;
+
+ mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+ if (mcgstatus & MCG_STATUS_RIPV) {
+ mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
+ return true;
+ }
+ }
+ return false;
+}
+
+static void __mc_scan_banks(struct mce *m, struct mce *final,
+ unsigned long *toclear, unsigned long *valid_banks,
+ int no_way_out, int *worst)
+{
+ struct mca_config *cfg = &mca_cfg;
+ int severity, i;
+
+ for (i = 0; i < cfg->banks; i++) {
+ __clear_bit(i, toclear);
+ if (!test_bit(i, valid_banks))
+ continue;
+
+ if (!mce_banks[i].ctl)
+ continue;
+
+ m->misc = 0;
+ m->addr = 0;
+ m->bank = i;
+
+ m->status = mce_rdmsrl(msr_ops.status(i));
+ if (!(m->status & MCI_STATUS_VAL))
+ continue;
+
+ /*
+ * Corrected or non-signaled errors are handled by
+ * machine_check_poll(). Leave them alone, unless this panics.
+ */
+ if (!(m->status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
+ !no_way_out)
+ continue;
+
+ /* Set taint even when machine check was not enabled. */
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
+
+ severity = mce_severity(m, cfg->tolerant, NULL, true);
+
+ /*
+ * When machine check was for corrected/deferred handler don't
+ * touch, unless we're panicking.
+ */
+ if ((severity == MCE_KEEP_SEVERITY ||
+ severity == MCE_UCNA_SEVERITY) && !no_way_out)
+ continue;
+
+ __set_bit(i, toclear);
+
+ /* Machine check event was not enabled. Clear, but ignore. */
+ if (severity == MCE_NO_SEVERITY)
+ continue;
+
+ mce_read_aux(m, i);
+
+ /* assuming valid severity level != 0 */
+ m->severity = severity;
+
+ mce_log(m);
+
+ if (severity > *worst) {
+ *final = *m;
+ *worst = severity;
+ }
+ }
+
+ /* mce_clear_state will clear *final, save locally for use later */
+ *m = *final;
+}
+
/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
@@ -1118,68 +1213,45 @@ static void mce_unmap_kpfn(unsigned long pfn)
*/
void do_machine_check(struct pt_regs *regs, long error_code)
{
+ DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
+ DECLARE_BITMAP(toclear, MAX_NR_BANKS);
struct mca_config *cfg = &mca_cfg;
+ int cpu = smp_processor_id();
+ char *msg = "Unknown";
struct mce m, *final;
- int i;
int worst = 0;
- int severity;
/*
* Establish sequential order between the CPUs entering the machine
* check handler.
*/
int order = -1;
+
/*
* If no_way_out gets set, there is no safe way to recover from this
* MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
*/
int no_way_out = 0;
+
/*
* If kill_it gets set, there might be a way to recover from this
* error.
*/
int kill_it = 0;
- DECLARE_BITMAP(toclear, MAX_NR_BANKS);
- DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
- char *msg = "Unknown";
/*
* MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES
* on Intel.
*/
int lmce = 1;
- int cpu = smp_processor_id();
-
- /*
- * Cases where we avoid rendezvous handler timeout:
- * 1) If this CPU is offline.
- *
- * 2) If crashing_cpu was set, e.g. we're entering kdump and we need to
- * skip those CPUs which remain looping in the 1st kernel - see
- * crash_nmi_callback().
- *
- * Note: there still is a small window between kexec-ing and the new,
- * kdump kernel establishing a new #MC handler where a broadcasted MCE
- * might not get handled properly.
- */
- if (cpu_is_offline(cpu) ||
- (crashing_cpu != -1 && crashing_cpu != cpu)) {
- u64 mcgstatus;
- mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
- if (mcgstatus & MCG_STATUS_RIPV) {
- mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
- return;
- }
- }
+ if (__mc_check_crashing_cpu(cpu))
+ return;
ist_enter(regs);
this_cpu_inc(mce_exception_count);
- if (!cfg->banks)
- goto out;
-
mce_gather_info(&m, regs);
m.tsc = rdtsc();
@@ -1220,67 +1292,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
order = mce_start(&no_way_out);
}
- for (i = 0; i < cfg->banks; i++) {
- __clear_bit(i, toclear);
- if (!test_bit(i, valid_banks))
- continue;
- if (!mce_banks[i].ctl)
- continue;
-
- m.misc = 0;
- m.addr = 0;
- m.bank = i;
-
- m.status = mce_rdmsrl(msr_ops.status(i));
- if ((m.status & MCI_STATUS_VAL) == 0)
- continue;
-
- /*
- * Non uncorrected or non signaled errors are handled by
- * machine_check_poll. Leave them alone, unless this panics.
- */
- if (!(m.status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
- !no_way_out)
- continue;
-
- /*
- * Set taint even when machine check was not enabled.
- */
- add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
-
- severity = mce_severity(&m, cfg->tolerant, NULL, true);
-
- /*
- * When machine check was for corrected/deferred handler don't
- * touch, unless we're panicing.
- */
- if ((severity == MCE_KEEP_SEVERITY ||
- severity == MCE_UCNA_SEVERITY) && !no_way_out)
- continue;
- __set_bit(i, toclear);
- if (severity == MCE_NO_SEVERITY) {
- /*
- * Machine check event was not enabled. Clear, but
- * ignore.
- */
- continue;
- }
-
- mce_read_aux(&m, i);
-
- /* assuming valid severity level != 0 */
- m.severity = severity;
-
- mce_log(&m);
-
- if (severity > worst) {
- *final = m;
- worst = severity;
- }
- }
-
- /* mce_clear_state will clear *final, save locally for use later */
- m = *final;
+ __mc_scan_banks(&m, final, toclear, valid_banks, no_way_out, &worst);
if (!no_way_out)
mce_clear_state(toclear);
@@ -1319,7 +1331,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
if (worst > 0)
mce_report_event(regs);
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
-out:
+
sync_core();
if (worst != MCE_AR_SEVERITY && !kill_it)
@@ -2165,9 +2177,6 @@ static ssize_t store_int_with_restart(struct device *s,
if (check_interval == old_check_interval)
return ret;
- if (check_interval < 1)
- check_interval = 1;
-
mutex_lock(&mce_sysfs_mutex);
mce_restart();
mutex_unlock(&mce_sysfs_mutex);
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 4021d3859499..40eee6cc4124 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -106,7 +106,8 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
memset(line, 0, LINE_SIZE);
- length = strncpy_from_user(line, buf, LINE_SIZE - 1);
+ len = min_t(size_t, len, LINE_SIZE - 1);
+ length = strncpy_from_user(line, buf, len);
if (length < 0)
return length;
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 8771766d46b6..34a5c1715148 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -169,28 +169,29 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
set_dr_addr_mask(0, i);
}
-/*
- * Check for virtual address in kernel space.
- */
-int arch_check_bp_in_kernelspace(struct perf_event *bp)
+static int arch_bp_generic_len(int x86_len)
{
- unsigned int len;
- unsigned long va;
- struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-
- va = info->address;
- len = bp->attr.bp_len;
-
- /*
- * We don't need to worry about va + len - 1 overflowing:
- * we already require that va is aligned to a multiple of len.
- */
- return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX);
+ switch (x86_len) {
+ case X86_BREAKPOINT_LEN_1:
+ return HW_BREAKPOINT_LEN_1;
+ case X86_BREAKPOINT_LEN_2:
+ return HW_BREAKPOINT_LEN_2;
+ case X86_BREAKPOINT_LEN_4:
+ return HW_BREAKPOINT_LEN_4;
+#ifdef CONFIG_X86_64
+ case X86_BREAKPOINT_LEN_8:
+ return HW_BREAKPOINT_LEN_8;
+#endif
+ default:
+ return -EINVAL;
+ }
}
int arch_bp_generic_fields(int x86_len, int x86_type,
int *gen_len, int *gen_type)
{
+ int len;
+
/* Type */
switch (x86_type) {
case X86_BREAKPOINT_EXECUTE:
@@ -211,42 +212,47 @@ int arch_bp_generic_fields(int x86_len, int x86_type,
}
/* Len */
- switch (x86_len) {
- case X86_BREAKPOINT_LEN_1:
- *gen_len = HW_BREAKPOINT_LEN_1;
- break;
- case X86_BREAKPOINT_LEN_2:
- *gen_len = HW_BREAKPOINT_LEN_2;
- break;
- case X86_BREAKPOINT_LEN_4:
- *gen_len = HW_BREAKPOINT_LEN_4;
- break;
-#ifdef CONFIG_X86_64
- case X86_BREAKPOINT_LEN_8:
- *gen_len = HW_BREAKPOINT_LEN_8;
- break;
-#endif
- default:
+ len = arch_bp_generic_len(x86_len);
+ if (len < 0)
return -EINVAL;
- }
+ *gen_len = len;
return 0;
}
-
-static int arch_build_bp_info(struct perf_event *bp)
+/*
+ * Check for virtual address in kernel space.
+ */
+int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
{
- struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ unsigned long va;
+ int len;
- info->address = bp->attr.bp_addr;
+ va = hw->address;
+ len = arch_bp_generic_len(hw->len);
+ WARN_ON_ONCE(len < 0);
+
+ /*
+ * We don't need to worry about va + len - 1 overflowing:
+ * we already require that va is aligned to a multiple of len.
+ */
+ return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX);
+}
+
+static int arch_build_bp_info(struct perf_event *bp,
+ const struct perf_event_attr *attr,
+ struct arch_hw_breakpoint *hw)
+{
+ hw->address = attr->bp_addr;
+ hw->mask = 0;
/* Type */
- switch (bp->attr.bp_type) {
+ switch (attr->bp_type) {
case HW_BREAKPOINT_W:
- info->type = X86_BREAKPOINT_WRITE;
+ hw->type = X86_BREAKPOINT_WRITE;
break;
case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
- info->type = X86_BREAKPOINT_RW;
+ hw->type = X86_BREAKPOINT_RW;
break;
case HW_BREAKPOINT_X:
/*
@@ -254,23 +260,23 @@ static int arch_build_bp_info(struct perf_event *bp)
* acceptable for kprobes. On non-kprobes kernels, we don't
* allow kernel breakpoints at all.
*/
- if (bp->attr.bp_addr >= TASK_SIZE_MAX) {
+ if (attr->bp_addr >= TASK_SIZE_MAX) {
#ifdef CONFIG_KPROBES
- if (within_kprobe_blacklist(bp->attr.bp_addr))
+ if (within_kprobe_blacklist(attr->bp_addr))
return -EINVAL;
#else
return -EINVAL;
#endif
}
- info->type = X86_BREAKPOINT_EXECUTE;
+ hw->type = X86_BREAKPOINT_EXECUTE;
/*
* x86 inst breakpoints need to have a specific undefined len.
* But we still need to check userspace is not trying to setup
* an unsupported length, to get a range breakpoint for example.
*/
- if (bp->attr.bp_len == sizeof(long)) {
- info->len = X86_BREAKPOINT_LEN_X;
+ if (attr->bp_len == sizeof(long)) {
+ hw->len = X86_BREAKPOINT_LEN_X;
return 0;
}
default:
@@ -278,28 +284,26 @@ static int arch_build_bp_info(struct perf_event *bp)
}
/* Len */
- info->mask = 0;
-
- switch (bp->attr.bp_len) {
+ switch (attr->bp_len) {
case HW_BREAKPOINT_LEN_1:
- info->len = X86_BREAKPOINT_LEN_1;
+ hw->len = X86_BREAKPOINT_LEN_1;
break;
case HW_BREAKPOINT_LEN_2:
- info->len = X86_BREAKPOINT_LEN_2;
+ hw->len = X86_BREAKPOINT_LEN_2;
break;
case HW_BREAKPOINT_LEN_4:
- info->len = X86_BREAKPOINT_LEN_4;
+ hw->len = X86_BREAKPOINT_LEN_4;
break;
#ifdef CONFIG_X86_64
case HW_BREAKPOINT_LEN_8:
- info->len = X86_BREAKPOINT_LEN_8;
+ hw->len = X86_BREAKPOINT_LEN_8;
break;
#endif
default:
/* AMD range breakpoint */
- if (!is_power_of_2(bp->attr.bp_len))
+ if (!is_power_of_2(attr->bp_len))
return -EINVAL;
- if (bp->attr.bp_addr & (bp->attr.bp_len - 1))
+ if (attr->bp_addr & (attr->bp_len - 1))
return -EINVAL;
if (!boot_cpu_has(X86_FEATURE_BPEXT))
@@ -312,8 +316,8 @@ static int arch_build_bp_info(struct perf_event *bp)
* breakpoints, then we'll have to check for kprobe-blacklisted
* addresses anywhere in the range.
*/
- info->mask = bp->attr.bp_len - 1;
- info->len = X86_BREAKPOINT_LEN_1;
+ hw->mask = attr->bp_len - 1;
+ hw->len = X86_BREAKPOINT_LEN_1;
}
return 0;
@@ -322,22 +326,23 @@ static int arch_build_bp_info(struct perf_event *bp)
/*
* Validate the arch-specific HW Breakpoint register settings
*/
-int arch_validate_hwbkpt_settings(struct perf_event *bp)
+int hw_breakpoint_arch_parse(struct perf_event *bp,
+ const struct perf_event_attr *attr,
+ struct arch_hw_breakpoint *hw)
{
- struct arch_hw_breakpoint *info = counter_arch_bp(bp);
unsigned int align;
int ret;
- ret = arch_build_bp_info(bp);
+ ret = arch_build_bp_info(bp, attr, hw);
if (ret)
return ret;
- switch (info->len) {
+ switch (hw->len) {
case X86_BREAKPOINT_LEN_1:
align = 0;
- if (info->mask)
- align = info->mask;
+ if (hw->mask)
+ align = hw->mask;
break;
case X86_BREAKPOINT_LEN_2:
align = 1;
@@ -358,7 +363,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
* Check that the low-order bits of the address are appropriate
* for the alignment implied by len.
*/
- if (info->address & align)
+ if (hw->address & align)
return -EINVAL;
return 0;
diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S
new file mode 100644
index 000000000000..ddeeaac8adda
--- /dev/null
+++ b/arch/x86/kernel/irqflags.S
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm/asm.h>
+#include <asm/export.h>
+#include <linux/linkage.h>
+
+/*
+ * unsigned long native_save_fl(void)
+ */
+ENTRY(native_save_fl)
+ pushf
+ pop %_ASM_AX
+ ret
+ENDPROC(native_save_fl)
+EXPORT_SYMBOL(native_save_fl)
+
+/*
+ * void native_restore_fl(unsigned long flags)
+ * %eax/%rdi: flags
+ */
+ENTRY(native_restore_fl)
+ push %_ASM_ARG1
+ popf
+ ret
+ENDPROC(native_restore_fl)
+EXPORT_SYMBOL(native_restore_fl)
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index ae38dccf0c8f..2b949f4fd4d8 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -105,14 +105,4 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig
}
#endif
-#ifdef CONFIG_KPROBES_ON_FTRACE
-extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb);
-#else
-static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
-{
- return 0;
-}
-#endif
#endif
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 6f4d42377fe5..b0d1e81c96bb 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -66,8 +66,6 @@
#include "common.h"
-void jprobe_return_end(void);
-
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
@@ -395,8 +393,6 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
- (u8 *) real;
if ((s64) (s32) newdisp != newdisp) {
pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp);
- pr_err("\tSrc: %p, Dest: %p, old disp: %x\n",
- src, real, insn->displacement.value);
return 0;
}
disp = (u8 *) dest + insn_offset_displacement(insn);
@@ -596,7 +592,6 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
* stepping.
*/
regs->ip = (unsigned long)p->ainsn.insn;
- preempt_enable_no_resched();
return;
}
#endif
@@ -640,8 +635,7 @@ static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
* Raise a BUG or we'll continue in an endless reentering loop
* and eventually a stack overflow.
*/
- printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n",
- p->addr);
+ pr_err("Unrecoverable kprobe detected.\n");
dump_kprobe(p);
BUG();
default:
@@ -669,12 +663,10 @@ int kprobe_int3_handler(struct pt_regs *regs)
addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
/*
- * We don't want to be preempted for the entire
- * duration of kprobe processing. We conditionally
- * re-enable preemption at the end of this function,
- * and also in reenter_kprobe() and setup_singlestep().
+ * We don't want to be preempted for the entire duration of kprobe
+ * processing. Since int3 and debug trap disables irqs and we clear
+ * IF while singlestepping, it must be no preemptible.
*/
- preempt_disable();
kcb = get_kprobe_ctlblk();
p = get_kprobe(addr);
@@ -690,13 +682,14 @@ int kprobe_int3_handler(struct pt_regs *regs)
/*
* If we have no pre-handler or it returned 0, we
* continue with normal processing. If we have a
- * pre-handler and it returned non-zero, it prepped
- * for calling the break_handler below on re-entry
- * for jprobe processing, so get out doing nothing
- * more here.
+ * pre-handler and it returned non-zero, that means
+ * user handler setup registers to exit to another
+ * instruction, we must skip the single stepping.
*/
if (!p->pre_handler || !p->pre_handler(p, regs))
setup_singlestep(p, regs, kcb, 0);
+ else
+ reset_current_kprobe();
return 1;
}
} else if (*addr != BREAKPOINT_INSTRUCTION) {
@@ -710,18 +703,9 @@ int kprobe_int3_handler(struct pt_regs *regs)
* the original instruction.
*/
regs->ip = (unsigned long)addr;
- preempt_enable_no_resched();
return 1;
- } else if (kprobe_running()) {
- p = __this_cpu_read(current_kprobe);
- if (p->break_handler && p->break_handler(p, regs)) {
- if (!skip_singlestep(p, regs, kcb))
- setup_singlestep(p, regs, kcb, 0);
- return 1;
- }
} /* else: not a kprobe fault; let the kernel handle it */
- preempt_enable_no_resched();
return 0;
}
NOKPROBE_SYMBOL(kprobe_int3_handler);
@@ -972,8 +956,6 @@ int kprobe_debug_handler(struct pt_regs *regs)
}
reset_current_kprobe();
out:
- preempt_enable_no_resched();
-
/*
* if somebody else is singlestepping across a probe point, flags
* will have TF set, in which case, continue the remaining processing
@@ -1020,7 +1002,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
restore_previous_kprobe(kcb);
else
reset_current_kprobe();
- preempt_enable_no_resched();
} else if (kcb->kprobe_status == KPROBE_HIT_ACTIVE ||
kcb->kprobe_status == KPROBE_HIT_SSDONE) {
/*
@@ -1083,93 +1064,6 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
}
NOKPROBE_SYMBOL(kprobe_exceptions_notify);
-int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct jprobe *jp = container_of(p, struct jprobe, kp);
- unsigned long addr;
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
- kcb->jprobe_saved_regs = *regs;
- kcb->jprobe_saved_sp = stack_addr(regs);
- addr = (unsigned long)(kcb->jprobe_saved_sp);
-
- /*
- * As Linus pointed out, gcc assumes that the callee
- * owns the argument space and could overwrite it, e.g.
- * tailcall optimization. So, to be absolutely safe
- * we also save and restore enough stack bytes to cover
- * the argument area.
- * Use __memcpy() to avoid KASAN stack out-of-bounds reports as we copy
- * raw stack chunk with redzones:
- */
- __memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr));
- regs->ip = (unsigned long)(jp->entry);
-
- /*
- * jprobes use jprobe_return() which skips the normal return
- * path of the function, and this messes up the accounting of the
- * function graph tracer to get messed up.
- *
- * Pause function graph tracing while performing the jprobe function.
- */
- pause_graph_tracing();
- return 1;
-}
-NOKPROBE_SYMBOL(setjmp_pre_handler);
-
-void jprobe_return(void)
-{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
- /* Unpoison stack redzones in the frames we are going to jump over. */
- kasan_unpoison_stack_above_sp_to(kcb->jprobe_saved_sp);
-
- asm volatile (
-#ifdef CONFIG_X86_64
- " xchg %%rbx,%%rsp \n"
-#else
- " xchgl %%ebx,%%esp \n"
-#endif
- " int3 \n"
- " .globl jprobe_return_end\n"
- " jprobe_return_end: \n"
- " nop \n"::"b"
- (kcb->jprobe_saved_sp):"memory");
-}
-NOKPROBE_SYMBOL(jprobe_return);
-NOKPROBE_SYMBOL(jprobe_return_end);
-
-int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- u8 *addr = (u8 *) (regs->ip - 1);
- struct jprobe *jp = container_of(p, struct jprobe, kp);
- void *saved_sp = kcb->jprobe_saved_sp;
-
- if ((addr > (u8 *) jprobe_return) &&
- (addr < (u8 *) jprobe_return_end)) {
- if (stack_addr(regs) != saved_sp) {
- struct pt_regs *saved_regs = &kcb->jprobe_saved_regs;
- printk(KERN_ERR
- "current sp %p does not match saved sp %p\n",
- stack_addr(regs), saved_sp);
- printk(KERN_ERR "Saved registers for jprobe %p\n", jp);
- show_regs(saved_regs);
- printk(KERN_ERR "Current registers\n");
- show_regs(regs);
- BUG();
- }
- /* It's OK to start function graph tracing again */
- unpause_graph_tracing();
- *regs = kcb->jprobe_saved_regs;
- __memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp));
- preempt_enable_no_resched();
- return 1;
- }
- return 0;
-}
-NOKPROBE_SYMBOL(longjmp_break_handler);
-
bool arch_within_kprobe_blacklist(unsigned long addr)
{
bool is_in_entry_trampoline_section = false;
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 8dc0161cec8f..ef819e19650b 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -25,36 +25,6 @@
#include "common.h"
-static nokprobe_inline
-void __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb, unsigned long orig_ip)
-{
- /*
- * Emulate singlestep (and also recover regs->ip)
- * as if there is a 5byte nop
- */
- regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
- if (unlikely(p->post_handler)) {
- kcb->kprobe_status = KPROBE_HIT_SSDONE;
- p->post_handler(p, regs, 0);
- }
- __this_cpu_write(current_kprobe, NULL);
- if (orig_ip)
- regs->ip = orig_ip;
-}
-
-int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
-{
- if (kprobe_ftrace(p)) {
- __skip_singlestep(p, regs, kcb, 0);
- preempt_enable_no_resched();
- return 1;
- }
- return 0;
-}
-NOKPROBE_SYMBOL(skip_singlestep);
-
/* Ftrace callback handler for kprobes -- called under preepmt disabed */
void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct pt_regs *regs)
@@ -75,18 +45,25 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
regs->ip = ip + sizeof(kprobe_opcode_t);
- /* To emulate trap based kprobes, preempt_disable here */
- preempt_disable();
__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (!p->pre_handler || !p->pre_handler(p, regs)) {
- __skip_singlestep(p, regs, kcb, orig_ip);
- preempt_enable_no_resched();
+ /*
+ * Emulate singlestep (and also recover regs->ip)
+ * as if there is a 5byte nop
+ */
+ regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
+ if (unlikely(p->post_handler)) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+ regs->ip = orig_ip;
}
/*
- * If pre_handler returns !0, it sets regs->ip and
- * resets current kprobe, and keep preempt count +1.
+ * If pre_handler returns !0, it changes regs->ip. We have to
+ * skip emulating post_handler.
*/
+ __this_cpu_write(current_kprobe, NULL);
}
}
NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 203d398802a3..eaf02f2e7300 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -491,7 +491,6 @@ int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
if (!reenter)
reset_current_kprobe();
- preempt_enable_no_resched();
return 1;
}
return 0;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 5b2300b818af..a37bda38d205 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -154,7 +154,7 @@ void kvm_async_pf_task_wait(u32 token, int interrupt_kernel)
for (;;) {
if (!n.halted)
- prepare_to_swait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
if (hlist_unhashed(&n.link))
break;
@@ -188,7 +188,7 @@ static void apf_task_wake_one(struct kvm_task_sleep_node *n)
if (n->halted)
smp_send_reschedule(n->cpu);
else if (swq_has_sleeper(&n->wq))
- swake_up(&n->wq);
+ swake_up_one(&n->wq);
}
static void apf_task_wake_all(void)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index bf8d1eb7fca3..3b8e7c13c614 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -138,6 +138,7 @@ static unsigned long kvm_get_tsc_khz(void)
src = &hv_clock[cpu].pvti;
tsc_khz = pvclock_tsc_khz(src);
put_cpu();
+ setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
return tsc_khz;
}
@@ -319,6 +320,8 @@ void __init kvmclock_init(void)
printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
msr_kvm_system_time, msr_kvm_wall_clock);
+ pvclock_set_pvti_cpu0_va(hv_clock);
+
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
@@ -366,14 +369,11 @@ int __init kvm_setup_vsyscall_timeinfo(void)
vcpu_time = &hv_clock[cpu].pvti;
flags = pvclock_read_flags(vcpu_time);
- if (!(flags & PVCLOCK_TSC_STABLE_BIT)) {
- put_cpu();
- return 1;
- }
-
- pvclock_set_pvti_cpu0_va(hv_clock);
put_cpu();
+ if (!(flags & PVCLOCK_TSC_STABLE_BIT))
+ return 1;
+
kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
#endif
return 0;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c2f7d1d2a5c3..db9656e13ea0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -221,6 +221,11 @@ static void notrace start_secondary(void *unused)
#ifdef CONFIG_X86_32
/* switch away from the initial page table */
load_cr3(swapper_pg_dir);
+ /*
+ * Initialize the CR4 shadow before doing anything that could
+ * try to read it.
+ */
+ cr4_init_shadow();
__flush_tlb_all();
#endif
load_current_idt();
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 92fd433c50b9..1bbec387d289 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -85,7 +85,7 @@ config KVM_AMD_SEV
def_bool y
bool "AMD Secure Encrypted Virtualization (SEV) support"
depends on KVM_AMD && X86_64
- depends on CRYPTO_DEV_CCP && CRYPTO_DEV_CCP_DD && CRYPTO_DEV_SP_PSP
+ depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
---help---
Provides support for launching Encrypted VMs on AMD processors.
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b5cd8465d44f..d536d457517b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1379,7 +1379,7 @@ static void apic_timer_expired(struct kvm_lapic *apic)
* using swait_active() is safe.
*/
if (swait_active(q))
- swake_up(q);
+ swake_up_one(q);
if (apic_lvtt_tscdeadline(apic))
ktimer->expired_tscdeadline = ktimer->tscdeadline;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d594690d8b95..6b8f11521c41 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -890,7 +890,7 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
- page = (void *)__get_free_page(GFP_KERNEL);
+ page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
if (!page)
return -ENOMEM;
cache->objects[cache->nobjs++] = page;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1689f433f3a0..5d8e317c2b04 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2571,6 +2571,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
#ifdef CONFIG_X86_64
int cpu = raw_smp_processor_id();
+ unsigned long fs_base, kernel_gs_base;
#endif
int i;
@@ -2586,12 +2587,20 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
#ifdef CONFIG_X86_64
- save_fsgs_for_kvm();
- vmx->host_state.fs_sel = current->thread.fsindex;
- vmx->host_state.gs_sel = current->thread.gsindex;
-#else
- savesegment(fs, vmx->host_state.fs_sel);
- savesegment(gs, vmx->host_state.gs_sel);
+ if (likely(is_64bit_mm(current->mm))) {
+ save_fsgs_for_kvm();
+ vmx->host_state.fs_sel = current->thread.fsindex;
+ vmx->host_state.gs_sel = current->thread.gsindex;
+ fs_base = current->thread.fsbase;
+ kernel_gs_base = current->thread.gsbase;
+ } else {
+#endif
+ savesegment(fs, vmx->host_state.fs_sel);
+ savesegment(gs, vmx->host_state.gs_sel);
+#ifdef CONFIG_X86_64
+ fs_base = read_msr(MSR_FS_BASE);
+ kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
+ }
#endif
if (!(vmx->host_state.fs_sel & 7)) {
vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
@@ -2611,10 +2620,10 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
savesegment(ds, vmx->host_state.ds_sel);
savesegment(es, vmx->host_state.es_sel);
- vmcs_writel(HOST_FS_BASE, current->thread.fsbase);
+ vmcs_writel(HOST_FS_BASE, fs_base);
vmcs_writel(HOST_GS_BASE, cpu_kernelmode_gs_base(cpu));
- vmx->msr_host_kernel_gs_base = current->thread.gsbase;
+ vmx->msr_host_kernel_gs_base = kernel_gs_base;
if (is_long_mode(&vmx->vcpu))
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
#else
@@ -4322,11 +4331,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
vmcs_conf->order = get_order(vmcs_conf->size);
vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
- /* KVM supports Enlightened VMCS v1 only */
- if (static_branch_unlikely(&enable_evmcs))
- vmcs_conf->revision_id = KVM_EVMCS_VERSION;
- else
- vmcs_conf->revision_id = vmx_msr_low;
+ vmcs_conf->revision_id = vmx_msr_low;
vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
@@ -4396,7 +4401,13 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
return NULL;
vmcs = page_address(pages);
memset(vmcs, 0, vmcs_config.size);
- vmcs->revision_id = vmcs_config.revision_id; /* vmcs revision id */
+
+ /* KVM supports Enlightened VMCS v1 only */
+ if (static_branch_unlikely(&enable_evmcs))
+ vmcs->revision_id = KVM_EVMCS_VERSION;
+ else
+ vmcs->revision_id = vmcs_config.revision_id;
+
return vmcs;
}
@@ -4564,6 +4575,19 @@ static __init int alloc_kvm_area(void)
return -ENOMEM;
}
+ /*
+ * When eVMCS is enabled, alloc_vmcs_cpu() sets
+ * vmcs->revision_id to KVM_EVMCS_VERSION instead of
+ * revision_id reported by MSR_IA32_VMX_BASIC.
+ *
+ * However, even though not explictly documented by
+ * TLFS, VMXArea passed as VMXON argument should
+ * still be marked with revision_id reported by
+ * physical CPU.
+ */
+ if (static_branch_unlikely(&enable_evmcs))
+ vmcs->revision_id = vmcs_config.revision_id;
+
per_cpu(vmxarea, cpu) = vmcs;
}
return 0;
@@ -7869,6 +7893,8 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
HRTIMER_MODE_REL_PINNED);
vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
+ vmx->nested.vpid02 = allocate_vpid();
+
vmx->nested.vmxon = true;
return 0;
@@ -8456,21 +8482,20 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
/* Emulate the VMPTRST instruction */
static int handle_vmptrst(struct kvm_vcpu *vcpu)
{
- unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
- gva_t vmcs_gva;
+ unsigned long exit_qual = vmcs_readl(EXIT_QUALIFICATION);
+ u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr;
struct x86_exception e;
+ gva_t gva;
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (get_vmx_mem_address(vcpu, exit_qualification,
- vmx_instruction_info, true, &vmcs_gva))
+ if (get_vmx_mem_address(vcpu, exit_qual, instr_info, true, &gva))
return 1;
/* *_system ok, nested_vmx_check_permission has verified cpl=0 */
- if (kvm_write_guest_virt_system(vcpu, vmcs_gva,
- (void *)&to_vmx(vcpu)->nested.current_vmptr,
- sizeof(u64), &e)) {
+ if (kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr,
+ sizeof(gpa_t), &e)) {
kvm_inject_page_fault(vcpu, &e);
return 1;
}
@@ -10346,11 +10371,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
goto free_vmcs;
}
- if (nested) {
+ if (nested)
nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
kvm_vcpu_apicv_active(&vmx->vcpu));
- vmx->nested.vpid02 = allocate_vpid();
- }
vmx->nested.posted_intr_nv = -1;
vmx->nested.current_vmptr = -1ull;
@@ -10367,7 +10390,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
return &vmx->vcpu;
free_vmcs:
- free_vpid(vmx->nested.vpid02);
free_loaded_vmcs(vmx->loaded_vmcs);
free_msrs:
kfree(vmx->guest_msrs);
@@ -11753,7 +11775,6 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- u32 msr_entry_idx;
u32 exit_qual;
int r;
@@ -11775,10 +11796,10 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
nested_get_vmcs12_pages(vcpu, vmcs12);
r = EXIT_REASON_MSR_LOAD_FAIL;
- msr_entry_idx = nested_vmx_load_msr(vcpu,
- vmcs12->vm_entry_msr_load_addr,
- vmcs12->vm_entry_msr_load_count);
- if (msr_entry_idx)
+ exit_qual = nested_vmx_load_msr(vcpu,
+ vmcs12->vm_entry_msr_load_addr,
+ vmcs12->vm_entry_msr_load_count);
+ if (exit_qual)
goto fail;
/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0046aa70205a..2b812b3c5088 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1097,6 +1097,7 @@ static u32 msr_based_features[] = {
MSR_F10H_DECFG,
MSR_IA32_UCODE_REV,
+ MSR_IA32_ARCH_CAPABILITIES,
};
static unsigned int num_msr_based_features;
@@ -1105,7 +1106,8 @@ static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
{
switch (msr->index) {
case MSR_IA32_UCODE_REV:
- rdmsrl(msr->index, msr->data);
+ case MSR_IA32_ARCH_CAPABILITIES:
+ rdmsrl_safe(msr->index, &msr->data);
break;
default:
if (kvm_x86_ops->get_msr_feature(msr))
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 55799873ebe5..8f6cc71e0848 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -1441,8 +1441,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth)
/* sub esp,STACK_SIZE */
EMIT2_off32(0x81, 0xEC, STACK_SIZE);
- /* sub ebp,SCRATCH_SIZE+4+12*/
- EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 16);
+ /* sub ebp,SCRATCH_SIZE+12*/
+ EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 12);
/* xor ebx,ebx */
EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX));
@@ -1475,8 +1475,8 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth)
/* mov edx,dword ptr [ebp+off]*/
EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1]));
- /* add ebp,SCRATCH_SIZE+4+12*/
- EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 16);
+ /* add ebp,SCRATCH_SIZE+12*/
+ EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 12);
/* mov ebx,dword ptr [ebp-12]*/
EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12);
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 77873ce700ae..ee5d08f25ce4 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -417,7 +417,7 @@ static void __init __map_region(efi_memory_desc_t *md, u64 va)
if (!(md->attribute & EFI_MEMORY_WB))
flags |= _PAGE_PCD;
- if (sev_active())
+ if (sev_active() && md->type != EFI_MEMORY_MAPPED_IO)
flags |= _PAGE_ENC;
pfn = md->phys_addr >> PAGE_SHIFT;
@@ -636,6 +636,8 @@ void efi_switch_mm(struct mm_struct *mm)
#ifdef CONFIG_EFI_MIXED
extern efi_status_t efi64_thunk(u32, ...);
+static DEFINE_SPINLOCK(efi_runtime_lock);
+
#define runtime_service32(func) \
({ \
u32 table = (u32)(unsigned long)efi.systab; \
@@ -657,17 +659,14 @@ extern efi_status_t efi64_thunk(u32, ...);
#define efi_thunk(f, ...) \
({ \
efi_status_t __s; \
- unsigned long __flags; \
u32 __func; \
\
- local_irq_save(__flags); \
arch_efi_call_virt_setup(); \
\
__func = runtime_service32(f); \
__s = efi64_thunk(__func, __VA_ARGS__); \
\
arch_efi_call_virt_teardown(); \
- local_irq_restore(__flags); \
\
__s; \
})
@@ -702,14 +701,17 @@ static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc)
{
efi_status_t status;
u32 phys_tm, phys_tc;
+ unsigned long flags;
spin_lock(&rtc_lock);
+ spin_lock_irqsave(&efi_runtime_lock, flags);
phys_tm = virt_to_phys_or_null(tm);
phys_tc = virt_to_phys_or_null(tc);
status = efi_thunk(get_time, phys_tm, phys_tc);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
spin_unlock(&rtc_lock);
return status;
@@ -719,13 +721,16 @@ static efi_status_t efi_thunk_set_time(efi_time_t *tm)
{
efi_status_t status;
u32 phys_tm;
+ unsigned long flags;
spin_lock(&rtc_lock);
+ spin_lock_irqsave(&efi_runtime_lock, flags);
phys_tm = virt_to_phys_or_null(tm);
status = efi_thunk(set_time, phys_tm);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
spin_unlock(&rtc_lock);
return status;
@@ -737,8 +742,10 @@ efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending,
{
efi_status_t status;
u32 phys_enabled, phys_pending, phys_tm;
+ unsigned long flags;
spin_lock(&rtc_lock);
+ spin_lock_irqsave(&efi_runtime_lock, flags);
phys_enabled = virt_to_phys_or_null(enabled);
phys_pending = virt_to_phys_or_null(pending);
@@ -747,6 +754,7 @@ efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending,
status = efi_thunk(get_wakeup_time, phys_enabled,
phys_pending, phys_tm);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
spin_unlock(&rtc_lock);
return status;
@@ -757,13 +765,16 @@ efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
{
efi_status_t status;
u32 phys_tm;
+ unsigned long flags;
spin_lock(&rtc_lock);
+ spin_lock_irqsave(&efi_runtime_lock, flags);
phys_tm = virt_to_phys_or_null(tm);
status = efi_thunk(set_wakeup_time, enabled, phys_tm);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
spin_unlock(&rtc_lock);
return status;
@@ -781,6 +792,9 @@ efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor,
efi_status_t status;
u32 phys_name, phys_vendor, phys_attr;
u32 phys_data_size, phys_data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
phys_data_size = virt_to_phys_or_null(data_size);
phys_vendor = virt_to_phys_or_null(vendor);
@@ -791,6 +805,8 @@ efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor,
status = efi_thunk(get_variable, phys_name, phys_vendor,
phys_attr, phys_data_size, phys_data);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
return status;
}
@@ -800,6 +816,34 @@ efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor,
{
u32 phys_name, phys_vendor, phys_data;
efi_status_t status;
+ unsigned long flags;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
+
+ phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
+ phys_vendor = virt_to_phys_or_null(vendor);
+ phys_data = virt_to_phys_or_null_size(data, data_size);
+
+ /* If data_size is > sizeof(u32) we've got problems */
+ status = efi_thunk(set_variable, phys_name, phys_vendor,
+ attr, data_size, phys_data);
+
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
+ return status;
+}
+
+static efi_status_t
+efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor,
+ u32 attr, unsigned long data_size,
+ void *data)
+{
+ u32 phys_name, phys_vendor, phys_data;
+ efi_status_t status;
+ unsigned long flags;
+
+ if (!spin_trylock_irqsave(&efi_runtime_lock, flags))
+ return EFI_NOT_READY;
phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
phys_vendor = virt_to_phys_or_null(vendor);
@@ -809,6 +853,8 @@ efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor,
status = efi_thunk(set_variable, phys_name, phys_vendor,
attr, data_size, phys_data);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
return status;
}
@@ -819,6 +865,9 @@ efi_thunk_get_next_variable(unsigned long *name_size,
{
efi_status_t status;
u32 phys_name_size, phys_name, phys_vendor;
+ unsigned long flags;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
phys_name_size = virt_to_phys_or_null(name_size);
phys_vendor = virt_to_phys_or_null(vendor);
@@ -827,6 +876,8 @@ efi_thunk_get_next_variable(unsigned long *name_size,
status = efi_thunk(get_next_variable, phys_name_size,
phys_name, phys_vendor);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
return status;
}
@@ -835,10 +886,15 @@ efi_thunk_get_next_high_mono_count(u32 *count)
{
efi_status_t status;
u32 phys_count;
+ unsigned long flags;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
phys_count = virt_to_phys_or_null(count);
status = efi_thunk(get_next_high_mono_count, phys_count);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
return status;
}
@@ -847,10 +903,15 @@ efi_thunk_reset_system(int reset_type, efi_status_t status,
unsigned long data_size, efi_char16_t *data)
{
u32 phys_data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
phys_data = virt_to_phys_or_null_size(data, data_size);
efi_thunk(reset_system, reset_type, status, data_size, phys_data);
+
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
}
static efi_status_t
@@ -872,10 +933,40 @@ efi_thunk_query_variable_info(u32 attr, u64 *storage_space,
{
efi_status_t status;
u32 phys_storage, phys_remaining, phys_max;
+ unsigned long flags;
+
+ if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
+ return EFI_UNSUPPORTED;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
+
+ phys_storage = virt_to_phys_or_null(storage_space);
+ phys_remaining = virt_to_phys_or_null(remaining_space);
+ phys_max = virt_to_phys_or_null(max_variable_size);
+
+ status = efi_thunk(query_variable_info, attr, phys_storage,
+ phys_remaining, phys_max);
+
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
+ return status;
+}
+
+static efi_status_t
+efi_thunk_query_variable_info_nonblocking(u32 attr, u64 *storage_space,
+ u64 *remaining_space,
+ u64 *max_variable_size)
+{
+ efi_status_t status;
+ u32 phys_storage, phys_remaining, phys_max;
+ unsigned long flags;
if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
return EFI_UNSUPPORTED;
+ if (!spin_trylock_irqsave(&efi_runtime_lock, flags))
+ return EFI_NOT_READY;
+
phys_storage = virt_to_phys_or_null(storage_space);
phys_remaining = virt_to_phys_or_null(remaining_space);
phys_max = virt_to_phys_or_null(max_variable_size);
@@ -883,6 +974,8 @@ efi_thunk_query_variable_info(u32 attr, u64 *storage_space,
status = efi_thunk(query_variable_info, attr, phys_storage,
phys_remaining, phys_max);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
return status;
}
@@ -908,9 +1001,11 @@ void efi_thunk_runtime_setup(void)
efi.get_variable = efi_thunk_get_variable;
efi.get_next_variable = efi_thunk_get_next_variable;
efi.set_variable = efi_thunk_set_variable;
+ efi.set_variable_nonblocking = efi_thunk_set_variable_nonblocking;
efi.get_next_high_mono_count = efi_thunk_get_next_high_mono_count;
efi.reset_system = efi_thunk_reset_system;
efi.query_variable_info = efi_thunk_query_variable_info;
+ efi.query_variable_info_nonblocking = efi_thunk_query_variable_info_nonblocking;
efi.update_capsule = efi_thunk_update_capsule;
efi.query_capsule_caps = efi_thunk_query_capsule_caps;
}
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 36c1f8b9f7e0..844d31cb8a0c 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -105,12 +105,11 @@ early_param("efi_no_storage_paranoia", setup_storage_paranoia);
*/
void efi_delete_dummy_variable(void)
{
- efi.set_variable((efi_char16_t *)efi_dummy_name,
- &EFI_DUMMY_GUID,
- EFI_VARIABLE_NON_VOLATILE |
- EFI_VARIABLE_BOOTSERVICE_ACCESS |
- EFI_VARIABLE_RUNTIME_ACCESS,
- 0, NULL);
+ efi.set_variable_nonblocking((efi_char16_t *)efi_dummy_name,
+ &EFI_DUMMY_GUID,
+ EFI_VARIABLE_NON_VOLATILE |
+ EFI_VARIABLE_BOOTSERVICE_ACCESS |
+ EFI_VARIABLE_RUNTIME_ACCESS, 0, NULL);
}
/*
@@ -249,7 +248,8 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
int num_entries;
void *new;
- if (efi_mem_desc_lookup(addr, &md)) {
+ if (efi_mem_desc_lookup(addr, &md) ||
+ md.type != EFI_BOOT_SERVICES_DATA) {
pr_err("Failed to lookup EFI memory descriptor for %pa\n", &addr);
return;
}
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 2e9ee023e6bc..81a8e33115ad 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -6,7 +6,7 @@ purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string
targets += $(purgatory-y)
PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
-$(obj)/sha256.o: $(srctree)/lib/sha256.c
+$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE
$(call if_changed_rule,cc_o_c)
LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
diff --git a/arch/x86/um/mem_32.c b/arch/x86/um/mem_32.c
index 744afdc18cf3..56c44d865f7b 100644
--- a/arch/x86/um/mem_32.c
+++ b/arch/x86/um/mem_32.c
@@ -16,7 +16,7 @@ static int __init gate_vma_init(void)
if (!FIXADDR_USER_START)
return 0;
- gate_vma.vm_mm = NULL;
+ vma_init(&gate_vma, NULL);
gate_vma.vm_start = FIXADDR_USER_START;
gate_vma.vm_end = FIXADDR_USER_END;
gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 8d4e2e1ae60b..439a94bf89ad 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1207,12 +1207,20 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_setup_features();
- xen_setup_machphys_mapping();
-
/* Install Xen paravirt ops */
pv_info = xen_info;
pv_init_ops.patch = paravirt_patch_default;
pv_cpu_ops = xen_cpu_ops;
+ xen_init_irq_ops();
+
+ /*
+ * Setup xen_vcpu early because it is needed for
+ * local_irq_disable(), irqs_disabled(), e.g. in printk().
+ *
+ * Don't do the full vcpu_info placement stuff until we have
+ * the cpu_possible_mask and a non-dummy shared_info.
+ */
+ xen_vcpu_info_reset(0);
x86_platform.get_nmi_reason = xen_get_nmi_reason;
@@ -1225,10 +1233,12 @@ asmlinkage __visible void __init xen_start_kernel(void)
* Set up some pagetable state before starting to set any ptes.
*/
+ xen_setup_machphys_mapping();
xen_init_mmu_ops();
/* Prevent unwanted bits from being set in PTEs. */
__supported_pte_mask &= ~_PAGE_GLOBAL;
+ __default_kernel_pte_mask &= ~_PAGE_GLOBAL;
/*
* Prevent page tables from being allocated in highmem, even
@@ -1249,20 +1259,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
get_cpu_cap(&boot_cpu_data);
x86_configure_nx();
- xen_init_irq_ops();
-
/* Let's presume PV guests always boot on vCPU with id 0. */
per_cpu(xen_vcpu_id, 0) = 0;
- /*
- * Setup xen_vcpu early because idt_setup_early_handler needs it for
- * local_irq_disable(), irqs_disabled().
- *
- * Don't do the full vcpu_info placement stuff until we have
- * the cpu_possible_mask and a non-dummy shared_info.
- */
- xen_vcpu_info_reset(0);
-
idt_setup_early_handler();
xen_init_capabilities();
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 74179852e46c..7515a19fd324 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -128,8 +128,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
void __init xen_init_irq_ops(void)
{
- /* For PVH we use default pv_irq_ops settings. */
- if (!xen_feature(XENFEAT_hvm_callback_vector))
- pv_irq_ops = xen_irq_ops;
+ pv_irq_ops = xen_irq_ops;
x86_init.irqs.intr_init = xen_init_IRQ;
}