diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-21 02:58:18 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-21 02:58:18 +0100 |
commit | 24b5e20f11a75866bbffc46c30a22fa50612a769 (patch) | |
tree | 2dab5fc6714a5ad1e31bdea1e954fbd69704ce72 | |
parent | Merge branch 'core-objtool-for-linus' of git://git.kernel.org/pub/scm/linux/k... (diff) | |
parent | x86/mm/pat: Fix boot crash when 1GB pages are not supported by the CPU (diff) | |
download | linux-24b5e20f11a75866bbffc46c30a22fa50612a769.tar.xz linux-24b5e20f11a75866bbffc46c30a22fa50612a769.zip |
Merge branch 'efi-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull EFI updates from Ingo Molnar:
"The main changes are:
- Use separate EFI page tables when executing EFI firmware code.
This isolates the EFI context from the rest of the kernel, which
has security and general robustness advantages. (Matt Fleming)
- Run regular UEFI firmware with interrupts enabled. This is already
the status quo under other OSs. (Ard Biesheuvel)
- Various x86 EFI enhancements, such as the use of non-executable
attributes for EFI memory mappings. (Sai Praneeth Prakhya)
- Various arm64 UEFI enhancements. (Ard Biesheuvel)
- ... various fixes and cleanups.
The separate EFI page tables feature got delayed twice already,
because it's an intrusive change and we didn't feel confident about
it - third time's the charm we hope!"
* 'efi-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (37 commits)
x86/mm/pat: Fix boot crash when 1GB pages are not supported by the CPU
x86/efi: Only map kernel text for EFI mixed mode
x86/efi: Map EFI_MEMORY_{XP,RO} memory region bits to EFI page tables
x86/mm/pat: Don't implicitly allow _PAGE_RW in kernel_map_pages_in_pgd()
efi/arm*: Perform hardware compatibility check
efi/arm64: Check for h/w support before booting a >4 KB granular kernel
efi/arm: Check for LPAE support before booting a LPAE kernel
efi/arm-init: Use read-only early mappings
efi/efistub: Prevent __init annotations from being used
arm64/vmlinux.lds.S: Handle .init.rodata.xxx and .init.bss sections
efi/arm64: Drop __init annotation from handle_kernel_image()
x86/mm/pat: Use _PAGE_GLOBAL bit for EFI page table mappings
efi/runtime-wrappers: Run UEFI Runtime Services with interrupts enabled
efi: Reformat GUID tables to follow the format in UEFI spec
efi: Add Persistent Memory type name
efi: Add NV memory attribute
x86/efi: Show actual ending addresses in efi_print_memmap
x86/efi/bgrt: Don't ignore the BGRT if the 'valid' bit is 0
efivars: Use to_efivar_entry
efi: Runtime-wrapper: Get rid of the rtc_lock spinlock
...
23 files changed, 512 insertions, 323 deletions
diff --git a/Documentation/efi-stub.txt b/Documentation/efi-stub.txt index 7747024d3bb7..e15746988261 100644 --- a/Documentation/efi-stub.txt +++ b/Documentation/efi-stub.txt @@ -10,12 +10,12 @@ arch/x86/boot/header.S and arch/x86/boot/compressed/eboot.c, respectively. For ARM the EFI stub is implemented in arch/arm/boot/compressed/efi-header.S and arch/arm/boot/compressed/efi-stub.c. EFI stub code that is shared -between architectures is in drivers/firmware/efi/efi-stub-helper.c. +between architectures is in drivers/firmware/efi/libstub. For arm64, there is no compressed kernel support, so the Image itself masquerades as a PE/COFF image and the EFI stub is linked into the kernel. The arm64 EFI stub lives in arch/arm64/kernel/efi-entry.S -and arch/arm64/kernel/efi-stub.c. +and drivers/firmware/efi/libstub/arm64-stub.c. By using the EFI boot stub it's possible to boot a Linux kernel without the use of a conventional EFI boot loader, such as grub or diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 05712ac83e38..c518dce7da4d 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -16,6 +16,8 @@ ffffec0000000000 - fffffc0000000000 (=44 bits) kasan shadow memory (16TB) ... unused hole ... ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... +ffffffef00000000 - ffffffff00000000 (=64 GB) EFI region mapping space +... unused hole ... ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 ffffffffa0000000 - ffffffffff5fffff (=1525 MB) module mapping space ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls @@ -32,11 +34,9 @@ reference. Current X86-64 implementations only support 40 bits of address space, but we support up to 46 bits. This expands into MBZ space in the page tables. -->trampoline_pgd: - -We map EFI runtime services in the aforementioned PGD in the virtual -range of 64Gb (arbitrarily set, can be raised if needed) - -0xffffffef00000000 - 0xffffffff00000000 +We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual +memory window (this size is arbitrary, it can be raised later if needed). +The mappings are not part of any other kernel PGD and are only available +during EFI runtime calls. -Andi Kleen, Jul 2004 diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 4c56e7a0621b..37f624df68fa 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -135,6 +135,7 @@ SECTIONS CON_INITCALL SECURITY_INITCALL INIT_RAM_FS + *(.init.rodata.* .init.bss) /* from the EFI stub */ } .exit.data : { ARM_EXIT_KEEP(EXIT_DATA) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 08b1f2f6ea50..53748c45e488 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -3,6 +3,7 @@ #include <asm/fpu/api.h> #include <asm/pgtable.h> +#include <asm/tlb.h> /* * We map the EFI regions needed for runtime services non-contiguously, @@ -66,6 +67,17 @@ extern u64 asmlinkage efi_call(void *fp, ...); #define efi_call_phys(f, args...) efi_call((f), args) +/* + * Scratch space used for switching the pagetable in the EFI stub + */ +struct efi_scratch { + u64 r15; + u64 prev_cr3; + pgd_t *efi_pgt; + bool use_pgd; + u64 phys_stack; +} __packed; + #define efi_call_virt(f, ...) \ ({ \ efi_status_t __s; \ @@ -73,7 +85,20 @@ extern u64 asmlinkage efi_call(void *fp, ...); efi_sync_low_kernel_mappings(); \ preempt_disable(); \ __kernel_fpu_begin(); \ + \ + if (efi_scratch.use_pgd) { \ + efi_scratch.prev_cr3 = read_cr3(); \ + write_cr3((unsigned long)efi_scratch.efi_pgt); \ + __flush_tlb_all(); \ + } \ + \ __s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__); \ + \ + if (efi_scratch.use_pgd) { \ + write_cr3(efi_scratch.prev_cr3); \ + __flush_tlb_all(); \ + } \ + \ __kernel_fpu_end(); \ preempt_enable(); \ __s; \ @@ -113,11 +138,12 @@ extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); extern void __init efi_map_region_fixed(efi_memory_desc_t *md); extern void efi_sync_low_kernel_mappings(void); +extern int __init efi_alloc_page_tables(void); extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init old_map_region(efi_memory_desc_t *md); extern void __init runtime_code_page_mkexec(void); -extern void __init efi_runtime_mkexec(void); +extern void __init efi_runtime_update_mappings(void); extern void __init efi_dump_pagetable(void); extern void __init efi_apply_memmap_quirks(void); extern int __init efi_reuse_config(u64 tables, int nr_tables); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 73de2604d779..d239639e0c1d 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -333,6 +333,7 @@ SECTIONS __brk_limit = .; } + . = ALIGN(PAGE_SIZE); _end = .; STABS_DEBUG diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 4d0b26253042..01be9ec3bf79 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -909,16 +909,25 @@ static void populate_pte(struct cpa_data *cpa, pte = pte_offset_kernel(pmd, start); - while (num_pages-- && start < end) { + /* + * Set the GLOBAL flags only if the PRESENT flag is + * set otherwise pte_present will return true even on + * a non present pte. The canon_pgprot will clear + * _PAGE_GLOBAL for the ancient hardware that doesn't + * support it. + */ + if (pgprot_val(pgprot) & _PAGE_PRESENT) + pgprot_val(pgprot) |= _PAGE_GLOBAL; + else + pgprot_val(pgprot) &= ~_PAGE_GLOBAL; - /* deal with the NX bit */ - if (!(pgprot_val(pgprot) & _PAGE_NX)) - cpa->pfn &= ~_PAGE_NX; + pgprot = canon_pgprot(pgprot); - set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot)); + while (num_pages-- && start < end) { + set_pte(pte, pfn_pte(cpa->pfn, pgprot)); start += PAGE_SIZE; - cpa->pfn += PAGE_SIZE; + cpa->pfn++; pte++; } } @@ -974,11 +983,11 @@ static int populate_pmd(struct cpa_data *cpa, pmd = pmd_offset(pud, start); - set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | + set_pmd(pmd, __pmd(cpa->pfn << PAGE_SHIFT | _PAGE_PSE | massage_pgprot(pmd_pgprot))); start += PMD_SIZE; - cpa->pfn += PMD_SIZE; + cpa->pfn += PMD_SIZE >> PAGE_SHIFT; cur_pages += PMD_SIZE >> PAGE_SHIFT; } @@ -1046,12 +1055,12 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, /* * Map everything starting from the Gb boundary, possibly with 1G pages */ - while (end - start >= PUD_SIZE) { - set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | + while (cpu_has_gbpages && end - start >= PUD_SIZE) { + set_pud(pud, __pud(cpa->pfn << PAGE_SHIFT | _PAGE_PSE | massage_pgprot(pud_pgprot))); start += PUD_SIZE; - cpa->pfn += PUD_SIZE; + cpa->pfn += PUD_SIZE >> PAGE_SHIFT; cur_pages += PUD_SIZE >> PAGE_SHIFT; pud++; } @@ -1964,6 +1973,9 @@ int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, if (!(page_flags & _PAGE_NX)) cpa.mask_clr = __pgprot(_PAGE_NX); + if (!(page_flags & _PAGE_RW)) + cpa.mask_clr = __pgprot(_PAGE_RW); + cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags); retval = __change_page_attr_set_clr(&cpa, 0); diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index ea48449b2e63..a2433817c987 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -10,6 +10,9 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/init.h> #include <linux/acpi.h> @@ -28,8 +31,7 @@ struct bmp_header { void __init efi_bgrt_init(void) { acpi_status status; - void __iomem *image; - bool ioremapped = false; + void *image; struct bmp_header bmp_header; if (acpi_disabled) @@ -55,11 +57,6 @@ void __init efi_bgrt_init(void) bgrt_tab->status); return; } - if (bgrt_tab->status != 1) { - pr_debug("Ignoring BGRT: invalid status %u (expected 1)\n", - bgrt_tab->status); - return; - } if (bgrt_tab->image_type != 0) { pr_err("Ignoring BGRT: invalid image type %u (expected 0)\n", bgrt_tab->image_type); @@ -70,20 +67,19 @@ void __init efi_bgrt_init(void) return; } - image = efi_lookup_mapped_addr(bgrt_tab->image_address); + image = memremap(bgrt_tab->image_address, sizeof(bmp_header), MEMREMAP_WB); if (!image) { - image = early_ioremap(bgrt_tab->image_address, - sizeof(bmp_header)); - ioremapped = true; - if (!image) { - pr_err("Ignoring BGRT: failed to map image header memory\n"); - return; - } + pr_err("Ignoring BGRT: failed to map image header memory\n"); + return; } - memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); - if (ioremapped) - early_iounmap(image, sizeof(bmp_header)); + memcpy(&bmp_header, image, sizeof(bmp_header)); + memunmap(image); + if (bmp_header.id != 0x4d42) { + pr_err("Ignoring BGRT: Incorrect BMP magic number 0x%x (expected 0x4d42)\n", + bmp_header.id); + return; + } bgrt_image_size = bmp_header.size; bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN); @@ -93,18 +89,14 @@ void __init efi_bgrt_init(void) return; } - if (ioremapped) { - image = early_ioremap(bgrt_tab->image_address, - bmp_header.size); - if (!image) { - pr_err("Ignoring BGRT: failed to map image memory\n"); - kfree(bgrt_image); - bgrt_image = NULL; - return; - } + image = memremap(bgrt_tab->image_address, bmp_header.size, MEMREMAP_WB); + if (!image) { + pr_err("Ignoring BGRT: failed to map image memory\n"); + kfree(bgrt_image); + bgrt_image = NULL; + return; } - memcpy_fromio(bgrt_image, image, bgrt_image_size); - if (ioremapped) - early_iounmap(image, bmp_header.size); + memcpy(bgrt_image, image, bgrt_image_size); + memunmap(image); } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index ad285404ea7f..994a7df84a7b 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -235,10 +235,10 @@ void __init efi_print_memmap(void) char buf[64]; md = p; - pr_info("mem%02u: %s range=[0x%016llx-0x%016llx) (%lluMB)\n", + pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n", i, efi_md_typeattr_format(buf, sizeof(buf), md), md->phys_addr, - md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), + md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1, (md->num_pages >> (20 - EFI_PAGE_SHIFT))); } #endif /* EFI_DEBUG */ @@ -815,6 +815,7 @@ static void __init kexec_enter_virtual_mode(void) { #ifdef CONFIG_KEXEC_CORE efi_memory_desc_t *md; + unsigned int num_pages; void *p; efi.systab = NULL; @@ -829,6 +830,12 @@ static void __init kexec_enter_virtual_mode(void) return; } + if (efi_alloc_page_tables()) { + pr_err("Failed to allocate EFI page tables\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + /* * Map efi regions which were passed via setup_data. The virt_addr is a * fixed addr which was used in first kernel of a kexec boot. @@ -843,6 +850,14 @@ static void __init kexec_enter_virtual_mode(void) BUG_ON(!efi.systab); + num_pages = ALIGN(memmap.nr_map * memmap.desc_size, PAGE_SIZE); + num_pages >>= PAGE_SHIFT; + + if (efi_setup_page_tables(memmap.phys_map, num_pages)) { + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + efi_sync_low_kernel_mappings(); /* @@ -869,7 +884,7 @@ static void __init kexec_enter_virtual_mode(void) * This function will switch the EFI runtime services to virtual mode. * Essentially, we look through the EFI memmap and map every region that * has the runtime attribute bit set in its memory descriptor into the - * ->trampoline_pgd page table using a top-down VA allocation scheme. + * efi_pgd page table. * * The old method which used to update that memory descriptor with the * virtual address obtained from ioremap() is still supported when the @@ -879,8 +894,8 @@ static void __init kexec_enter_virtual_mode(void) * * The new method does a pagetable switch in a preemption-safe manner * so that we're in a different address space when calling a runtime - * function. For function arguments passing we do copy the PGDs of the - * kernel page table into ->trampoline_pgd prior to each call. + * function. For function arguments passing we do copy the PUDs of the + * kernel page table into efi_pgd prior to each call. * * Specially for kexec boot, efi runtime maps in previous kernel should * be passed in via setup_data. In that case runtime ranges will be mapped @@ -895,6 +910,12 @@ static void __init __efi_enter_virtual_mode(void) efi.systab = NULL; + if (efi_alloc_page_tables()) { + pr_err("Failed to allocate EFI page tables\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + efi_merge_regions(); new_memmap = efi_map_regions(&count, &pg_shift); if (!new_memmap) { @@ -913,7 +934,6 @@ static void __init __efi_enter_virtual_mode(void) } efi_sync_low_kernel_mappings(); - efi_dump_pagetable(); if (efi_is_native()) { status = phys_efi_set_virtual_address_map( @@ -951,31 +971,20 @@ static void __init __efi_enter_virtual_mode(void) efi.set_virtual_address_map = NULL; - efi_runtime_mkexec(); + /* + * Apply more restrictive page table mapping attributes now that + * SVAM() has been called and the firmware has performed all + * necessary relocation fixups for the new virtual addresses. + */ + efi_runtime_update_mappings(); + efi_dump_pagetable(); /* - * We mapped the descriptor array into the EFI pagetable above but we're - * not unmapping it here. Here's why: - * - * We're copying select PGDs from the kernel page table to the EFI page - * table and when we do so and make changes to those PGDs like unmapping - * stuff from them, those changes appear in the kernel page table and we - * go boom. - * - * From setup_real_mode(): - * - * ... - * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd; - * - * In this particular case, our allocation is in PGD 0 of the EFI page - * table but we've copied that PGD from PGD[272] of the EFI page table: - * - * pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272 - * - * where the direct memory mapping in kernel space is. - * - * new_memmap's VA comes from that direct mapping and thus clearing it, - * it would get cleared in the kernel page table too. + * We mapped the descriptor array into the EFI pagetable above + * but we're not unmapping it here because if we're running in + * EFI mixed mode we need all of memory to be accessible when + * we pass parameters to the EFI runtime services in the + * thunking code. * * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift); */ diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index ed5b67338294..338402b91d2e 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -38,6 +38,11 @@ * say 0 - 3G. */ +int __init efi_alloc_page_tables(void) +{ + return 0; +} + void efi_sync_low_kernel_mappings(void) {} void __init efi_dump_pagetable(void) {} int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) @@ -85,7 +90,7 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) __flush_tlb_all(); } -void __init efi_runtime_mkexec(void) +void __init efi_runtime_update_mappings(void) { if (__supported_pte_mask & _PAGE_NX) runtime_code_page_mkexec(); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index a0ac0f9c307f..49e4dd4a1f58 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -15,6 +15,8 @@ * */ +#define pr_fmt(fmt) "efi: " fmt + #include <linux/kernel.h> #include <linux/init.h> #include <linux/mm.h> @@ -40,6 +42,7 @@ #include <asm/fixmap.h> #include <asm/realmode.h> #include <asm/time.h> +#include <asm/pgalloc.h> /* * We allocate runtime services regions bottom-up, starting from -4G, i.e. @@ -47,16 +50,7 @@ */ static u64 efi_va = EFI_VA_START; -/* - * Scratch space used for switching the pagetable in the EFI stub - */ -struct efi_scratch { - u64 r15; - u64 prev_cr3; - pgd_t *efi_pgt; - bool use_pgd; - u64 phys_stack; -} __packed; +struct efi_scratch efi_scratch; static void __init early_code_mapping_set_exec(int executable) { @@ -83,8 +77,11 @@ pgd_t * __init efi_call_phys_prolog(void) int pgd; int n_pgds; - if (!efi_enabled(EFI_OLD_MEMMAP)) - return NULL; + if (!efi_enabled(EFI_OLD_MEMMAP)) { + save_pgd = (pgd_t *)read_cr3(); + write_cr3((unsigned long)efi_scratch.efi_pgt); + goto out; + } early_code_mapping_set_exec(1); @@ -96,6 +93,7 @@ pgd_t * __init efi_call_phys_prolog(void) vaddress = (unsigned long)__va(pgd * PGDIR_SIZE); set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); } +out: __flush_tlb_all(); return save_pgd; @@ -109,8 +107,11 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) int pgd_idx; int nr_pgds; - if (!save_pgd) + if (!efi_enabled(EFI_OLD_MEMMAP)) { + write_cr3((unsigned long)save_pgd); + __flush_tlb_all(); return; + } nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); @@ -123,27 +124,98 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) early_code_mapping_set_exec(0); } +static pgd_t *efi_pgd; + +/* + * We need our own copy of the higher levels of the page tables + * because we want to avoid inserting EFI region mappings (EFI_VA_END + * to EFI_VA_START) into the standard kernel page tables. Everything + * else can be shared, see efi_sync_low_kernel_mappings(). + */ +int __init efi_alloc_page_tables(void) +{ + pgd_t *pgd; + pud_t *pud; + gfp_t gfp_mask; + + if (efi_enabled(EFI_OLD_MEMMAP)) + return 0; + + gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO; + efi_pgd = (pgd_t *)__get_free_page(gfp_mask); + if (!efi_pgd) + return -ENOMEM; + + pgd = efi_pgd + pgd_index(EFI_VA_END); + + pud = pud_alloc_one(NULL, 0); + if (!pud) { + free_page((unsigned long)efi_pgd); + return -ENOMEM; + } + + pgd_populate(NULL, pgd, pud); + + return 0; +} + /* * Add low kernel mappings for passing arguments to EFI functions. */ void efi_sync_low_kernel_mappings(void) { - unsigned num_pgds; - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + unsigned num_entries; + pgd_t *pgd_k, *pgd_efi; + pud_t *pud_k, *pud_efi; if (efi_enabled(EFI_OLD_MEMMAP)) return; - num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET); + /* + * We can share all PGD entries apart from the one entry that + * covers the EFI runtime mapping space. + * + * Make sure the EFI runtime region mappings are guaranteed to + * only span a single PGD entry and that the entry also maps + * other important kernel regions. + */ + BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); + BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != + (EFI_VA_END & PGDIR_MASK)); + + pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); + pgd_k = pgd_offset_k(PAGE_OFFSET); - memcpy(pgd + pgd_index(PAGE_OFFSET), - init_mm.pgd + pgd_index(PAGE_OFFSET), - sizeof(pgd_t) * num_pgds); + num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET); + memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); + + /* + * We share all the PUD entries apart from those that map the + * EFI regions. Copy around them. + */ + BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0); + BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0); + + pgd_efi = efi_pgd + pgd_index(EFI_VA_END); + pud_efi = pud_offset(pgd_efi, 0); + + pgd_k = pgd_offset_k(EFI_VA_END); + pud_k = pud_offset(pgd_k, 0); + + num_entries = pud_index(EFI_VA_END); + memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + + pud_efi = pud_offset(pgd_efi, EFI_VA_START); + pud_k = pud_offset(pgd_k, EFI_VA_START); + + num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START); + memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); } int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { - unsigned long text; + unsigned long pfn, text; + efi_memory_desc_t *md; struct page *page; unsigned npages; pgd_t *pgd; @@ -151,8 +223,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) if (efi_enabled(EFI_OLD_MEMMAP)) return 0; - efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; - pgd = __va(efi_scratch.efi_pgt); + efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd); + pgd = efi_pgd; /* * It can happen that the physical address of new_memmap lands in memory @@ -160,7 +232,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * and ident-map those pages containing the map before calling * phys_efi_set_virtual_address_map(). */ - if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, _PAGE_NX)) { + pfn = pa_memmap >> PAGE_SHIFT; + if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | _PAGE_RW)) { pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); return 1; } @@ -176,6 +249,25 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) if (!IS_ENABLED(CONFIG_EFI_MIXED)) return 0; + /* + * Map all of RAM so that we can access arguments in the 1:1 + * mapping when making EFI runtime calls. + */ + for_each_efi_memory_desc(&memmap, md) { + if (md->type != EFI_CONVENTIONAL_MEMORY && + md->type != EFI_LOADER_DATA && + md->type != EFI_LOADER_CODE) + continue; + + pfn = md->phys_addr >> PAGE_SHIFT; + npages = md->num_pages; + + if (kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, npages, _PAGE_RW)) { + pr_err("Failed to map 1:1 memory\n"); + return 1; + } + } + page = alloc_page(GFP_KERNEL|__GFP_DMA32); if (!page) panic("Unable to allocate EFI runtime stack < 4GB\n"); @@ -183,10 +275,11 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) efi_scratch.phys_stack = virt_to_phys(page_address(page)); efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */ - npages = (_end - _text) >> PAGE_SHIFT; + npages = (_etext - _text) >> PAGE_SHIFT; text = __pa(_text); + pfn = text >> PAGE_SHIFT; - if (kernel_map_pages_in_pgd(pgd, text >> PAGE_SHIFT, text, npages, 0)) { + if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW)) { pr_err("Failed to map kernel text 1:1\n"); return 1; } @@ -196,20 +289,20 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) { - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); - - kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages); + kernel_unmap_pages_in_pgd(efi_pgd, pa_memmap, num_pages); } static void __init __map_region(efi_memory_desc_t *md, u64 va) { - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); - unsigned long pf = 0; + unsigned long flags = _PAGE_RW; + unsigned long pfn; + pgd_t *pgd = efi_pgd; if (!(md->attribute & EFI_MEMORY_WB)) - pf |= _PAGE_PCD; + flags |= _PAGE_PCD; - if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf)) + pfn = md->phys_addr >> PAGE_SHIFT; + if (kernel_map_pages_in_pgd(pgd, pfn, va, md->num_pages, flags)) pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", md->phys_addr, va); } @@ -300,21 +393,56 @@ void __init parse_efi_setup(u64 phys_addr, u32 data_len) efi_setup = phys_addr + sizeof(struct setup_data); } -void __init efi_runtime_mkexec(void) +void __init efi_runtime_update_mappings(void) { - if (!efi_enabled(EFI_OLD_MEMMAP)) + unsigned long pfn; + pgd_t *pgd = efi_pgd; + efi_memory_desc_t *md; + void *p; + + if (efi_enabled(EFI_OLD_MEMMAP)) { + if (__supported_pte_mask & _PAGE_NX) + runtime_code_page_mkexec(); return; + } + + if (!efi_enabled(EFI_NX_PE_DATA)) + return; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + unsigned long pf = 0; + md = p; + + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; - if (__supported_pte_mask & _PAGE_NX) - runtime_code_page_mkexec(); + if (!(md->attribute & EFI_MEMORY_WB)) + pf |= _PAGE_PCD; + + if ((md->attribute & EFI_MEMORY_XP) || + (md->type == EFI_RUNTIME_SERVICES_DATA)) + pf |= _PAGE_NX; + + if (!(md->attribute & EFI_MEMORY_RO) && + (md->type != EFI_RUNTIME_SERVICES_CODE)) + pf |= _PAGE_RW; + + /* Update the 1:1 mapping */ + pfn = md->phys_addr >> PAGE_SHIFT; + if (kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, md->num_pages, pf)) + pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, md->virt_addr); + + if (kernel_map_pages_in_pgd(pgd, pfn, md->virt_addr, md->num_pages, pf)) + pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, md->virt_addr); + } } void __init efi_dump_pagetable(void) { #ifdef CONFIG_EFI_PGT_DUMP - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); - - ptdump_walk_pgd_level(NULL, pgd); + ptdump_walk_pgd_level(NULL, efi_pgd); #endif } diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 0df2dcc18404..92723aeae0f9 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -39,41 +39,6 @@ mov %rsi, %cr0; \ mov (%rsp), %rsp - /* stolen from gcc */ - .macro FLUSH_TLB_ALL - movq %r15, efi_scratch(%rip) - movq %r14, efi_scratch+8(%rip) - movq %cr4, %r15 - movq %r15, %r14 - andb $0x7f, %r14b - movq %r14, %cr4 - movq %r15, %cr4 - movq efi_scratch+8(%rip), %r14 - movq efi_scratch(%rip), %r15 - .endm - - .macro SWITCH_PGT - cmpb $0, efi_scratch+24(%rip) - je 1f - movq %r15, efi_scratch(%rip) # r15 - # save previous CR3 - movq %cr3, %r15 - movq %r15, efi_scratch+8(%rip) # prev_cr3 - movq efi_scratch+16(%rip), %r15 # EFI pgt - movq %r15, %cr3 - 1: - .endm - - .macro RESTORE_PGT - cmpb $0, efi_scratch+24(%rip) - je 2f - movq efi_scratch+8(%rip), %r15 - movq %r15, %cr3 - movq efi_scratch(%rip), %r15 - FLUSH_TLB_ALL - 2: - .endm - ENTRY(efi_call) FRAME_BEGIN SAVE_XMM @@ -85,17 +50,9 @@ ENTRY(efi_call) mov %r8, %r9 mov %rcx, %r8 mov %rsi, %rcx - SWITCH_PGT call *%rdi - RESTORE_PGT addq $48, %rsp RESTORE_XMM FRAME_END ret ENDPROC(efi_call) - - .data -ENTRY(efi_scratch) - .fill 3,8,0 - .byte 0 - .quad 0 diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index ed30e79347e8..ab50ada1d56e 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) "efi: " fmt + #include <linux/init.h> #include <linux/kernel.h> #include <linux/string.h> @@ -55,13 +57,41 @@ void efi_delete_dummy_variable(void) } /* + * In the nonblocking case we do not attempt to perform garbage + * collection if we do not have enough free space. Rather, we do the + * bare minimum check and give up immediately if the available space + * is below EFI_MIN_RESERVE. + * + * This function is intended to be small and simple because it is + * invoked from crash handler paths. + */ +static efi_status_t +query_variable_store_nonblocking(u32 attributes, unsigned long size) +{ + efi_status_t status; + u64 storage_size, remaining_size, max_size; + + status = efi.query_variable_info_nonblocking(attributes, &storage_size, + &remaining_size, + &max_size); + if (status != EFI_SUCCESS) + return status; + + if (remaining_size - size < EFI_MIN_RESERVE) + return EFI_OUT_OF_RESOURCES; + + return EFI_SUCCESS; +} + +/* * Some firmware implementations refuse to boot if there's insufficient space * in the variable store. Ensure that we never use more than a safe limit. * * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable * store. */ -efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) +efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, + bool nonblocking) { efi_status_t status; u64 storage_size, remaining_size, max_size; @@ -69,6 +99,9 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) if (!(attributes & EFI_VARIABLE_NON_VOLATILE)) return 0; + if (nonblocking) + return query_variable_store_nonblocking(attributes, size); + status = efi.query_variable_info(attributes, &storage_size, &remaining_size, &max_size); if (status != EFI_SUCCESS) @@ -312,7 +345,7 @@ void __init efi_apply_memmap_quirks(void) * services. */ if (!efi_runtime_supported()) { - pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); + pr_info("Setup done, disabling due to 32/64-bit mismatch\n"); efi_unmap_memmap(); } diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index 9e15d571b53c..aa1f743152a2 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -61,8 +61,8 @@ static int __init uefi_init(void) char vendor[100] = "unknown"; int i, retval; - efi.systab = early_memremap(efi_system_table, - sizeof(efi_system_table_t)); + efi.systab = early_memremap_ro(efi_system_table, + sizeof(efi_system_table_t)); if (efi.systab == NULL) { pr_warn("Unable to map EFI system table.\n"); return -ENOMEM; @@ -86,8 +86,8 @@ static int __init uefi_init(void) efi.systab->hdr.revision & 0xffff); /* Show what we know for posterity */ - c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor), - sizeof(vendor) * sizeof(efi_char16_t)); + c16 = early_memremap_ro(efi_to_phys(efi.systab->fw_vendor), + sizeof(vendor) * sizeof(efi_char16_t)); if (c16) { for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) vendor[i] = c16[i]; @@ -100,8 +100,8 @@ static int __init uefi_init(void) efi.systab->hdr.revision & 0xffff, vendor); table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables; - config_tables = early_memremap(efi_to_phys(efi.systab->tables), - table_size); + config_tables = early_memremap_ro(efi_to_phys(efi.systab->tables), + table_size); if (config_tables == NULL) { pr_warn("Unable to map EFI config table array.\n"); retval = -ENOMEM; @@ -185,7 +185,7 @@ void __init efi_init(void) efi_system_table = params.system_table; memmap.phys_map = params.mmap; - memmap.map = early_memremap(params.mmap, params.mmap_size); + memmap.map = early_memremap_ro(params.mmap, params.mmap_size); if (memmap.map == NULL) { /* * If we are booting via UEFI, the UEFI memory map is the only diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 2cd37dad67a6..3a69ed5ecfcb 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -182,6 +182,7 @@ static int generic_ops_register(void) { generic_ops.get_variable = efi.get_variable; generic_ops.set_variable = efi.set_variable; + generic_ops.set_variable_nonblocking = efi.set_variable_nonblocking; generic_ops.get_next_variable = efi.get_next_variable; generic_ops.query_variable_store = efi_query_variable_store; @@ -326,38 +327,6 @@ u64 __init efi_mem_desc_end(efi_memory_desc_t *md) return end; } -/* - * We can't ioremap data in EFI boot services RAM, because we've already mapped - * it as RAM. So, look it up in the existing EFI memory map instead. Only - * callable after efi_enter_virtual_mode and before efi_free_boot_services. - */ -void __iomem *efi_lookup_mapped_addr(u64 phys_addr) -{ - struct efi_memory_map *map; - void *p; - map = efi.memmap; - if (!map) - return NULL; - if (WARN_ON(!map->map)) - return NULL; - for (p = map->map; p < map->map_end; p += map->desc_size) { - efi_memory_desc_t *md = p; - u64 size = md->num_pages << EFI_PAGE_SHIFT; - u64 end = md->phys_addr + size; - if (!(md->attribute & EFI_MEMORY_RUNTIME) && - md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) - continue; - if (!md->virt_addr) - continue; - if (phys_addr >= md->phys_addr && phys_addr < end) { - phys_addr += md->virt_addr - md->phys_addr; - return (__force void __iomem *)(unsigned long)phys_addr; - } - } - return NULL; -} - static __initdata efi_config_table_type_t common_tables[] = { {ACPI_20_TABLE_GUID, "ACPI 2.0", &efi.acpi20}, {ACPI_TABLE_GUID, "ACPI", &efi.acpi}, @@ -586,7 +555,8 @@ static __initdata char memory_type_name[][20] = { "ACPI Memory NVS", "Memory Mapped I/O", "MMIO Port Space", - "PAL Code" + "PAL Code", + "Persistent Memory", }; char * __init efi_md_typeattr_format(char *buf, size_t size, @@ -613,13 +583,16 @@ char * __init efi_md_typeattr_format(char *buf, size_t size, if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT | EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO | EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP | + EFI_MEMORY_NV | EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE)) snprintf(pos, size, "|attr=0x%016llx]", (unsigned long long)attr); else - snprintf(pos, size, "|%3s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", + snprintf(pos, size, + "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", attr & EFI_MEMORY_RUNTIME ? "RUN" : "", attr & EFI_MEMORY_MORE_RELIABLE ? "MR" : "", + attr & EFI_MEMORY_NV ? "NV" : "", attr & EFI_MEMORY_XP ? "XP" : "", attr & EFI_MEMORY_RP ? "RP" : "", attr & EFI_MEMORY_WP ? "WP" : "", diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index 10e6774ab2a2..b23a271c6ae5 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -386,7 +386,7 @@ static const struct sysfs_ops efivar_attr_ops = { static void efivar_release(struct kobject *kobj) { - struct efivar_entry *var = container_of(kobj, struct efivar_entry, kobj); + struct efivar_entry *var = to_efivar_entry(kobj); kfree(var); } diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c index 22c5285f7705..75feb3f5829b 100644 --- a/drivers/firmware/efi/esrt.c +++ b/drivers/firmware/efi/esrt.c @@ -167,14 +167,11 @@ static struct kset *esrt_kset; static int esre_create_sysfs_entry(void *esre, int entry_num) { struct esre_entry *entry; - char name[20]; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; - sprintf(name, "entry%d", entry_num); - entry->kobj.kset = esrt_kset; if (esrt->fw_resource_version == 1) { @@ -182,7 +179,7 @@ static int esre_create_sysfs_entry(void *esre, int entry_num) entry->esre.esre1 = esre; rc = kobject_init_and_add(&entry->kobj, &esre1_ktype, NULL, - "%s", name); + "entry%d", entry_num); if (rc) { kfree(entry); return rc; diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index 4deb3e7faa0e..414deb85c2e5 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -192,6 +192,10 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, pr_efi(sys_table, "Booting Linux Kernel...\n"); + status = check_platform_features(sys_table); + if (status != EFI_SUCCESS) + goto fail; + /* * Get a handle to the loaded image protocol. This is used to get * information about the running image, such as size and the command diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c index 495ebd657e38..6f42be4d0084 100644 --- a/drivers/firmware/efi/libstub/arm32-stub.c +++ b/drivers/firmware/efi/libstub/arm32-stub.c @@ -9,6 +9,23 @@ #include <linux/efi.h> #include <asm/efi.h> +efi_status_t check_platform_features(efi_system_table_t *sys_table_arg) +{ + int block; + + /* non-LPAE kernels can run anywhere */ + if (!IS_ENABLED(CONFIG_ARM_LPAE)) + return EFI_SUCCESS; + + /* LPAE kernels need compatible hardware */ + block = cpuid_feature_extract(CPUID_EXT_MMFR0, 0); + if (block < 5) { + pr_efi_err(sys_table_arg, "This LPAE kernel is not supported by your CPU\n"); + return EFI_UNSUPPORTED; + } + return EFI_SUCCESS; +} + efi_status_t handle_kernel_image(efi_system_table_t *sys_table, unsigned long *image_addr, unsigned long *image_size, diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index e0e6b74fef8f..a90f6459f5c6 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -12,18 +12,38 @@ #include <linux/efi.h> #include <asm/efi.h> #include <asm/sections.h> +#include <asm/sysreg.h> #include "efistub.h" extern bool __nokaslr; -efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg, - unsigned long *image_addr, - unsigned long *image_size, - unsigned long *reserve_addr, - unsigned long *reserve_size, - unsigned long dram_base, - efi_loaded_image_t *image) +efi_status_t check_platform_features(efi_system_table_t *sys_table_arg) +{ + u64 tg; + + /* UEFI mandates support for 4 KB granularity, no need to check */ + if (IS_ENABLED(CONFIG_ARM64_4K_PAGES)) + return EFI_SUCCESS; + + tg = (read_cpuid(ID_AA64MMFR0_EL1) >> ID_AA64MMFR0_TGRAN_SHIFT) & 0xf; + if (tg != ID_AA64MMFR0_TGRAN_SUPPORTED) { + if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) + pr_efi_err(sys_table_arg, "This 64 KB granular kernel is not supported by your CPU\n"); + else + pr_efi_err(sys_table_arg, "This 16 KB granular kernel is not supported by your CPU\n"); + return EFI_UNSUPPORTED; + } + return EFI_SUCCESS; +} + +efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg, + unsigned long *image_addr, + unsigned long *image_size, + unsigned long *reserve_addr, + unsigned long *reserve_size, + unsigned long dram_base, + efi_loaded_image_t *image) { efi_status_t status; unsigned long kernel_size, kernel_memsize = 0; diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 5ed3d3f38166..ee49cd23ee63 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -5,6 +5,16 @@ /* error code which can't be mistaken for valid address */ #define EFI_ERROR (~0UL) +/* + * __init annotations should not be used in the EFI stub, since the code is + * either included in the decompressor (x86, ARM) where they have no effect, + * or the whole stub is __init annotated at the section level (arm64), by + * renaming the sections, in which case the __init annotation will be + * redundant, and will result in section names like .init.init.text, and our + * linker script does not expect that. + */ +#undef __init + void efi_char16_printk(efi_system_table_t *, efi_char16_t *); efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image, @@ -50,4 +60,6 @@ efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg, unsigned long size, unsigned long align, unsigned long *addr, unsigned long random_seed); +efi_status_t check_platform_features(efi_system_table_t *sys_table_arg); + #endif diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index 228bbf910461..de6953039af6 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -61,63 +61,23 @@ */ static DEFINE_SPINLOCK(efi_runtime_lock); -/* - * Some runtime services calls can be reentrant under NMI, even if the table - * above says they are not. (source: UEFI Specification v2.4A) - * - * Table 32. Functions that may be called after Machine Check, INIT and NMI - * +----------------------------+------------------------------------------+ - * | Function | Called after Machine Check, INIT and NMI | - * +----------------------------+------------------------------------------+ - * | GetTime() | Yes, even if previously busy. | - * | GetVariable() | Yes, even if previously busy | - * | GetNextVariableName() | Yes, even if previously busy | - * | QueryVariableInfo() | Yes, even if previously busy | - * | SetVariable() | Yes, even if previously busy | - * | UpdateCapsule() | Yes, even if previously busy | - * | QueryCapsuleCapabilities() | Yes, even if previously busy | - * | ResetSystem() | Yes, even if previously busy | - * +----------------------------+------------------------------------------+ - * - * In order to prevent deadlocks under NMI, the wrappers for these functions - * may only grab the efi_runtime_lock or rtc_lock spinlocks if !efi_in_nmi(). - * However, not all of the services listed are reachable through NMI code paths, - * so the the special handling as suggested by the UEFI spec is only implemented - * for QueryVariableInfo() and SetVariable(), as these can be reached in NMI - * context through efi_pstore_write(). - */ - -/* - * As per commit ef68c8f87ed1 ("x86: Serialize EFI time accesses on rtc_lock"), - * the EFI specification requires that callers of the time related runtime - * functions serialize with other CMOS accesses in the kernel, as the EFI time - * functions may choose to also use the legacy CMOS RTC. - */ -__weak DEFINE_SPINLOCK(rtc_lock); - static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { - unsigned long flags; efi_status_t status; - spin_lock_irqsave(&rtc_lock, flags); spin_lock(&efi_runtime_lock); status = efi_call_virt(get_time, tm, tc); spin_unlock(&efi_runtime_lock); - spin_unlock_irqrestore(&rtc_lock, flags); return status; } static efi_status_t virt_efi_set_time(efi_time_t *tm) { - unsigned long flags; efi_status_t status; - spin_lock_irqsave(&rtc_lock, flags); spin_lock(&efi_runtime_lock); status = efi_call_virt(set_time, tm); spin_unlock(&efi_runtime_lock); - spin_unlock_irqrestore(&rtc_lock, flags); return status; } @@ -125,27 +85,21 @@ static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, efi_time_t *tm) { - unsigned long flags; efi_status_t status; - spin_lock_irqsave(&rtc_lock, flags); spin_lock(&efi_runtime_lock); status = efi_call_virt(get_wakeup_time, enabled, pending, tm); spin_unlock(&efi_runtime_lock); - spin_unlock_irqrestore(&rtc_lock, flags); return status; } static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) { - unsigned long flags; efi_status_t status; - spin_lock_irqsave(&rtc_lock, flags); spin_lock(&efi_runtime_lock); status = efi_call_virt(set_wakeup_time, enabled, tm); spin_unlock(&efi_runtime_lock); - spin_unlock_irqrestore(&rtc_lock, flags); return status; } @@ -155,13 +109,12 @@ static efi_status_t virt_efi_get_variable(efi_char16_t *name, unsigned long *data_size, void *data) { - unsigned long flags; efi_status_t status; - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); status = efi_call_virt(get_variable, name, vendor, attr, data_size, data); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } @@ -169,12 +122,11 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) { - unsigned long flags; efi_status_t status; - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); status = efi_call_virt(get_next_variable, name_size, name, vendor); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } @@ -184,13 +136,12 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name, unsigned long data_size, void *data) { - unsigned long flags; efi_status_t status; - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); status = efi_call_virt(set_variable, name, vendor, attr, data_size, data); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } @@ -199,15 +150,14 @@ virt_efi_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor, u32 attr, unsigned long data_size, void *data) { - unsigned long flags; efi_status_t status; - if (!spin_trylock_irqsave(&efi_runtime_lock, flags)) + if (!spin_trylock(&efi_runtime_lock)) return EFI_NOT_READY; status = efi_call_virt(set_variable, name, vendor, attr, data_size, data); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } @@ -217,27 +167,45 @@ static efi_status_t virt_efi_query_variable_info(u32 attr, u64 *remaining_space, u64 *max_variable_size) { - unsigned long flags; efi_status_t status; if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); status = efi_call_virt(query_variable_info, attr, storage_space, remaining_space, max_variable_size); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); + return status; +} + +static efi_status_t +virt_efi_query_variable_info_nonblocking(u32 attr, + u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size) +{ + efi_status_t status; + + if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) + return EFI_UNSUPPORTED; + + if (!spin_trylock(&efi_runtime_lock)) + return EFI_NOT_READY; + + status = efi_call_virt(query_variable_info, attr, storage_space, + remaining_space, max_variable_size); + spin_unlock(&efi_runtime_lock); return status; } static efi_status_t virt_efi_get_next_high_mono_count(u32 *count) { - unsigned long flags; efi_status_t status; - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); status = efi_call_virt(get_next_high_mono_count, count); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } @@ -246,26 +214,23 @@ static void virt_efi_reset_system(int reset_type, unsigned long data_size, efi_char16_t *data) { - unsigned long flags; - - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); __efi_call_virt(reset_system, reset_type, status, data_size, data); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); } static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules, unsigned long count, unsigned long sg_list) { - unsigned long flags; efi_status_t status; if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); status = efi_call_virt(update_capsule, capsules, count, sg_list); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } @@ -274,16 +239,15 @@ static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules, u64 *max_size, int *reset_type) { - unsigned long flags; efi_status_t status; if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); status = efi_call_virt(query_capsule_caps, capsules, count, max_size, reset_type); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } @@ -300,6 +264,7 @@ void efi_native_runtime_setup(void) efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; efi.reset_system = virt_efi_reset_system; efi.query_variable_info = virt_efi_query_variable_info; + efi.query_variable_info_nonblocking = virt_efi_query_variable_info_nonblocking; efi.update_capsule = virt_efi_update_capsule; efi.query_capsule_caps = virt_efi_query_capsule_caps; } diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 7f2ea21c730d..0ac594c0a234 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -300,7 +300,18 @@ check_var_size(u32 attributes, unsigned long size) if (!fops->query_variable_store) return EFI_UNSUPPORTED; - return fops->query_variable_store(attributes, size); + return fops->query_variable_store(attributes, size, false); +} + +static efi_status_t +check_var_size_nonblocking(u32 attributes, unsigned long size) +{ + const struct efivar_operations *fops = __efivars->ops; + + if (!fops->query_variable_store) + return EFI_UNSUPPORTED; + + return fops->query_variable_store(attributes, size, true); } static int efi_status_to_err(efi_status_t status) @@ -681,7 +692,8 @@ efivar_entry_set_nonblocking(efi_char16_t *name, efi_guid_t vendor, if (!spin_trylock_irqsave(&__efivars->lock, flags)) return -EBUSY; - status = check_var_size(attributes, size + ucs2_strsize(name, 1024)); + status = check_var_size_nonblocking(attributes, + size + ucs2_strsize(name, 1024)); if (status != EFI_SUCCESS) { spin_unlock_irqrestore(&__efivars->lock, flags); return -ENOSPC; diff --git a/include/linux/efi.h b/include/linux/efi.h index 333d0ca6940f..1626474567ac 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -97,6 +97,7 @@ typedef struct { #define EFI_MEMORY_WP ((u64)0x0000000000001000ULL) /* write-protect */ #define EFI_MEMORY_RP ((u64)0x0000000000002000ULL) /* read-protect */ #define EFI_MEMORY_XP ((u64)0x0000000000004000ULL) /* execute-protect */ +#define EFI_MEMORY_NV ((u64)0x0000000000008000ULL) /* non-volatile */ #define EFI_MEMORY_MORE_RELIABLE \ ((u64)0x0000000000010000ULL) /* higher reliability */ #define EFI_MEMORY_RO ((u64)0x0000000000020000ULL) /* read-only */ @@ -507,10 +508,6 @@ typedef efi_status_t efi_get_next_variable_t (unsigned long *name_size, efi_char typedef efi_status_t efi_set_variable_t (efi_char16_t *name, efi_guid_t *vendor, u32 attr, unsigned long data_size, void *data); -typedef efi_status_t -efi_set_variable_nonblocking_t(efi_char16_t *name, efi_guid_t *vendor, - u32 attr, unsigned long data_size, void *data); - typedef efi_status_t efi_get_next_high_mono_count_t (u32 *count); typedef void efi_reset_system_t (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data); @@ -529,7 +526,9 @@ typedef efi_status_t efi_query_capsule_caps_t(efi_capsule_header_t **capsules, unsigned long count, u64 *max_size, int *reset_type); -typedef efi_status_t efi_query_variable_store_t(u32 attributes, unsigned long size); +typedef efi_status_t efi_query_variable_store_t(u32 attributes, + unsigned long size, + bool nonblocking); void efi_native_runtime_setup(void); @@ -537,67 +536,88 @@ void efi_native_runtime_setup(void); * EFI Configuration Table and GUID definitions */ #define NULL_GUID \ - EFI_GUID( 0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ) + EFI_GUID(0x00000000, 0x0000, 0x0000, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00) #define MPS_TABLE_GUID \ - EFI_GUID( 0xeb9d2d2f, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d ) + EFI_GUID(0xeb9d2d2f, 0x2d88, 0x11d3, \ + 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define ACPI_TABLE_GUID \ - EFI_GUID( 0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d ) + EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, \ + 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define ACPI_20_TABLE_GUID \ - EFI_GUID( 0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81 ) + EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, \ + 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81) #define SMBIOS_TABLE_GUID \ - EFI_GUID( 0xeb9d2d31, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d ) + EFI_GUID(0xeb9d2d31, 0x2d88, 0x11d3, \ + 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define SMBIOS3_TABLE_GUID \ - EFI_GUID( 0xf2fd1544, 0x9794, 0x4a2c, 0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94 ) + EFI_GUID(0xf2fd1544, 0x9794, 0x4a2c, \ + 0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94) #define SAL_SYSTEM_TABLE_GUID \ - EFI_GUID( 0xeb9d2d32, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d ) + EFI_GUID(0xeb9d2d32, 0x2d88, 0x11d3, \ + 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define HCDP_TABLE_GUID \ - EFI_GUID( 0xf951938d, 0x620b, 0x42ef, 0x82, 0x79, 0xa8, 0x4b, 0x79, 0x61, 0x78, 0x98 ) + EFI_GUID(0xf951938d, 0x620b, 0x42ef, \ + 0x82, 0x79, 0xa8, 0x4b, 0x79, 0x61, 0x78, 0x98) #define UGA_IO_PROTOCOL_GUID \ - EFI_GUID( 0x61a4d49e, 0x6f68, 0x4f1b, 0xb9, 0x22, 0xa8, 0x6e, 0xed, 0xb, 0x7, 0xa2 ) + EFI_GUID(0x61a4d49e, 0x6f68, 0x4f1b, \ + 0xb9, 0x22, 0xa8, 0x6e, 0xed, 0x0b, 0x07, 0xa2) #define EFI_GLOBAL_VARIABLE_GUID \ - EFI_GUID( 0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c ) + EFI_GUID(0x8be4df61, 0x93ca, 0x11d2, \ + 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c) #define UV_SYSTEM_TABLE_GUID \ - EFI_GUID( 0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93 ) + EFI_GUID(0x3b13a7d4, 0x633e, 0x11dd, \ + 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93) #define LINUX_EFI_CRASH_GUID \ - EFI_GUID( 0xcfc8fc79, 0xbe2e, 0x4ddc, 0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0 ) + EFI_GUID(0xcfc8fc79, 0xbe2e, 0x4ddc, \ + 0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0) #define LOADED_IMAGE_PROTOCOL_GUID \ - EFI_GUID( 0x5b1b31a1, 0x9562, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b ) + EFI_GUID(0x5b1b31a1, 0x9562, 0x11d2, \ + 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) #define EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID \ - EFI_GUID( 0x9042a9de, 0x23dc, 0x4a38, 0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, 0x51, 0x6a ) + EFI_GUID(0x9042a9de, 0x23dc, 0x4a38, \ + 0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, 0x51, 0x6a) #define EFI_UGA_PROTOCOL_GUID \ - EFI_GUID( 0x982c298b, 0xf4fa, 0x41cb, 0xb8, 0x38, 0x77, 0xaa, 0x68, 0x8f, 0xb8, 0x39 ) + EFI_GUID(0x982c298b, 0xf4fa, 0x41cb, \ + 0xb8, 0x38, 0x77, 0xaa, 0x68, 0x8f, 0xb8, 0x39) #define EFI_PCI_IO_PROTOCOL_GUID \ - EFI_GUID( 0x4cf5b200, 0x68b8, 0x4ca5, 0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x2, 0x9a ) + EFI_GUID(0x4cf5b200, 0x68b8, 0x4ca5, \ + 0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x02, 0x9a) #define EFI_FILE_INFO_ID \ - EFI_GUID( 0x9576e92, 0x6d3f, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b ) + EFI_GUID(0x9576e92, 0x6d3f, 0x11d2, \ + 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) #define EFI_SYSTEM_RESOURCE_TABLE_GUID \ - EFI_GUID( 0xb122a263, 0x3661, 0x4f68, 0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80 ) + EFI_GUID(0xb122a263, 0x3661, 0x4f68, \ + 0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80) #define EFI_FILE_SYSTEM_GUID \ - EFI_GUID( 0x964e5b22, 0x6459, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b ) + EFI_GUID(0x964e5b22, 0x6459, 0x11d2, \ + 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) #define DEVICE_TREE_GUID \ - EFI_GUID( 0xb1b621d5, 0xf19c, 0x41a5, 0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0 ) + EFI_GUID(0xb1b621d5, 0xf19c, 0x41a5, \ + 0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0) #define EFI_PROPERTIES_TABLE_GUID \ - EFI_GUID( 0x880aaca3, 0x4adc, 0x4a04, 0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5 ) + EFI_GUID(0x880aaca3, 0x4adc, 0x4a04, \ + 0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5) #define EFI_RNG_PROTOCOL_GUID \ EFI_GUID(0x3152bca5, 0xeade, 0x433d, \ @@ -855,8 +875,9 @@ extern struct efi { efi_get_variable_t *get_variable; efi_get_next_variable_t *get_next_variable; efi_set_variable_t *set_variable; - efi_set_variable_nonblocking_t *set_variable_nonblocking; + efi_set_variable_t *set_variable_nonblocking; efi_query_variable_info_t *query_variable_info; + efi_query_variable_info_t *query_variable_info_nonblocking; efi_update_capsule_t *update_capsule; efi_query_capsule_caps_t *query_capsule_caps; efi_get_next_high_mono_count_t *get_next_high_mono_count; @@ -888,13 +909,17 @@ extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if pos #ifdef CONFIG_X86 extern void efi_late_init(void); extern void efi_free_boot_services(void); -extern efi_status_t efi_query_variable_store(u32 attributes, unsigned long size); +extern efi_status_t efi_query_variable_store(u32 attributes, + unsigned long size, + bool nonblocking); extern void efi_find_mirror(void); #else static inline void efi_late_init(void) {} static inline void efi_free_boot_services(void) {} -static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) +static inline efi_status_t efi_query_variable_store(u32 attributes, + unsigned long size, + bool nonblocking) { return EFI_SUCCESS; } @@ -1095,7 +1120,7 @@ struct efivar_operations { efi_get_variable_t *get_variable; efi_get_next_variable_t *get_next_variable; efi_set_variable_t *set_variable; - efi_set_variable_nonblocking_t *set_variable_nonblocking; + efi_set_variable_t *set_variable_nonblocking; efi_query_variable_store_t *query_variable_store; }; |