diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-12 17:58:57 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-12 17:58:57 +0100 |
commit | b29f377119f68b942369a9366bdcb1fec82b2cda (patch) | |
tree | 1d5bca16e70aa3c1a0660825d0d8fc3f09abc262 /arch/x86/kernel | |
parent | Merge tag 'x86-apic-2024-03-12' of git://git.kernel.org/pub/scm/linux/kernel/... (diff) | |
parent | Merge branch 'linus' into x86/boot, to resolve conflict (diff) | |
download | linux-b29f377119f68b942369a9366bdcb1fec82b2cda.tar.xz linux-b29f377119f68b942369a9366bdcb1fec82b2cda.zip |
Merge tag 'x86-boot-2024-03-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 boot updates from Ingo Molnar:
- Continuing work by Ard Biesheuvel to improve the x86 early startup
code, with the long-term goal to make it position independent:
- Get rid of early accesses to global objects, either by moving
them to the stack, deferring the access until later, or dropping
the globals entirely
- Move all code that runs early via the 1:1 mapping into
.head.text, and move code that does not out of it, so that build
time checks can be added later to ensure that no inadvertent
absolute references were emitted into code that does not
tolerate them
- Remove fixup_pointer() and occurrences of __pa_symbol(), which
rely on the compiler emitting absolute references, which is not
guaranteed
- Improve the early console code
- Add early console message about ignored NMIs, so that users are at
least warned about their existence - even if we cannot do anything
about them
- Improve the kexec code's kernel load address handling
- Enable more X86S (simplified x86) bits
- Simplify early boot GDT handling
- Micro-optimize the boot code a bit
- Misc cleanups
* tag 'x86-boot-2024-03-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (22 commits)
x86/sev: Move early startup code into .head.text section
x86/sme: Move early SME kernel encryption handling into .head.text
x86/boot: Move mem_encrypt= parsing to the decompressor
efi/libstub: Add generic support for parsing mem_encrypt=
x86/startup_64: Simplify virtual switch on primary boot
x86/startup_64: Simplify calculation of initial page table address
x86/startup_64: Defer assignment of 5-level paging global variables
x86/startup_64: Simplify CR4 handling in startup code
x86/boot: Use 32-bit XOR to clear registers
efi/x86: Set the PE/COFF header's NX compat flag unconditionally
x86/boot/64: Load the final kernel GDT during early boot directly, remove startup_gdt[]
x86/boot/64: Use RIP_REL_REF() to access early_top_pgt[]
x86/boot/64: Use RIP_REL_REF() to access early page tables
x86/boot/64: Use RIP_REL_REF() to access '__supported_pte_mask'
x86/boot/64: Use RIP_REL_REF() to access early_dynamic_pgts[]
x86/boot/64: Use RIP_REL_REF() to assign 'phys_base'
x86/boot/64: Simplify global variable accesses in GDT/IDT programming
x86/trampoline: Bypass compat mode in trampoline_start64() if not needed
kexec: Allocate kernel above bzImage's pref_address
x86/boot: Add a message about ignored early NMIs
...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/head64.c | 179 | ||||
-rw-r--r-- | arch/x86/kernel/head_64.S | 144 | ||||
-rw-r--r-- | arch/x86/kernel/kexec-bzimage64.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/sev-shared.c | 23 | ||||
-rw-r--r-- | arch/x86/kernel/sev.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/sev_verify_cbit.S | 2 |
6 files changed, 137 insertions, 230 deletions
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index dc0956067944..212e8e06aeba 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -22,6 +22,8 @@ #include <linux/cc_platform.h> #include <linux/pgtable.h> +#include <asm/asm.h> +#include <asm/page_64.h> #include <asm/processor.h> #include <asm/proto.h> #include <asm/smp.h> @@ -67,42 +69,11 @@ unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4; EXPORT_SYMBOL(vmemmap_base); #endif -/* - * GDT used on the boot CPU before switching to virtual addresses. - */ -static struct desc_struct startup_gdt[GDT_ENTRIES] __initdata = { - [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(DESC_CODE32, 0, 0xfffff), - [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(DESC_CODE64, 0, 0xfffff), - [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(DESC_DATA64, 0, 0xfffff), -}; - -/* - * Address needs to be set at runtime because it references the startup_gdt - * while the kernel still uses a direct mapping. - */ -static struct desc_ptr startup_gdt_descr __initdata = { - .size = sizeof(startup_gdt)-1, - .address = 0, -}; - -static void __head *fixup_pointer(void *ptr, unsigned long physaddr) -{ - return ptr - (void *)_text + (void *)physaddr; -} - -static unsigned long __head *fixup_long(void *ptr, unsigned long physaddr) -{ - return fixup_pointer(ptr, physaddr); -} - -#ifdef CONFIG_X86_5LEVEL -static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr) +static inline bool check_la57_support(void) { - return fixup_pointer(ptr, physaddr); -} + if (!IS_ENABLED(CONFIG_X86_5LEVEL)) + return false; -static bool __head check_la57_support(unsigned long physaddr) -{ /* * 5-level paging is detected and enabled at kernel decompression * stage. Only check if it has been enabled there. @@ -110,21 +81,8 @@ static bool __head check_la57_support(unsigned long physaddr) if (!(native_read_cr4() & X86_CR4_LA57)) return false; - *fixup_int(&__pgtable_l5_enabled, physaddr) = 1; - *fixup_int(&pgdir_shift, physaddr) = 48; - *fixup_int(&ptrs_per_p4d, physaddr) = 512; - *fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5; - *fixup_long(&vmalloc_base, physaddr) = __VMALLOC_BASE_L5; - *fixup_long(&vmemmap_base, physaddr) = __VMEMMAP_BASE_L5; - return true; } -#else -static bool __head check_la57_support(unsigned long physaddr) -{ - return false; -} -#endif static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd) { @@ -173,23 +131,22 @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdv * doesn't have to generate PC-relative relocations when accessing globals from * that function. Clang actually does not generate them, which leads to * boot-time crashes. To work around this problem, every global pointer must - * be adjusted using fixup_pointer(). + * be accessed using RIP_REL_REF(). */ unsigned long __head __startup_64(unsigned long physaddr, struct boot_params *bp) { - unsigned long load_delta, *p; + pmd_t (*early_pgts)[PTRS_PER_PMD] = RIP_REL_REF(early_dynamic_pgts); unsigned long pgtable_flags; + unsigned long load_delta; pgdval_t *pgd; p4dval_t *p4d; pudval_t *pud; pmdval_t *pmd, pmd_entry; - pteval_t *mask_ptr; bool la57; int i; - unsigned int *next_pgt_ptr; - la57 = check_la57_support(physaddr); + la57 = check_la57_support(); /* Is the address too large? */ if (physaddr >> MAX_PHYSMEM_BITS) @@ -200,6 +157,7 @@ unsigned long __head __startup_64(unsigned long physaddr, * and the address I am actually running at. */ load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map); + RIP_REL_REF(phys_base) = load_delta; /* Is the address not 2M aligned? */ if (load_delta & ~PMD_MASK) @@ -210,26 +168,21 @@ unsigned long __head __startup_64(unsigned long physaddr, /* Fixup the physical addresses in the page table */ - pgd = fixup_pointer(early_top_pgt, physaddr); - p = pgd + pgd_index(__START_KERNEL_map); - if (la57) - *p = (unsigned long)level4_kernel_pgt; - else - *p = (unsigned long)level3_kernel_pgt; - *p += _PAGE_TABLE_NOENC - __START_KERNEL_map + load_delta; + pgd = &RIP_REL_REF(early_top_pgt)->pgd; + pgd[pgd_index(__START_KERNEL_map)] += load_delta; if (la57) { - p4d = fixup_pointer(level4_kernel_pgt, physaddr); - p4d[511] += load_delta; + p4d = (p4dval_t *)&RIP_REL_REF(level4_kernel_pgt); + p4d[MAX_PTRS_PER_P4D - 1] += load_delta; + + pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE_NOENC; } - pud = fixup_pointer(level3_kernel_pgt, physaddr); - pud[510] += load_delta; - pud[511] += load_delta; + RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 2].pud += load_delta; + RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 1].pud += load_delta; - pmd = fixup_pointer(level2_fixmap_pgt, physaddr); for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--) - pmd[i] += load_delta; + RIP_REL_REF(level2_fixmap_pgt)[i].pmd += load_delta; /* * Set up the identity mapping for the switchover. These @@ -238,15 +191,14 @@ unsigned long __head __startup_64(unsigned long physaddr, * it avoids problems around wraparound. */ - next_pgt_ptr = fixup_pointer(&next_early_pgt, physaddr); - pud = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr); - pmd = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr); + pud = &early_pgts[0]->pmd; + pmd = &early_pgts[1]->pmd; + RIP_REL_REF(next_early_pgt) = 2; pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); if (la57) { - p4d = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], - physaddr); + p4d = &early_pgts[RIP_REL_REF(next_early_pgt)++]->pmd; i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; @@ -267,8 +219,7 @@ unsigned long __head __startup_64(unsigned long physaddr, pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; /* Filter out unsupported __PAGE_KERNEL_* bits: */ - mask_ptr = fixup_pointer(&__supported_pte_mask, physaddr); - pmd_entry &= *mask_ptr; + pmd_entry &= RIP_REL_REF(__supported_pte_mask); pmd_entry += sme_get_me_mask(); pmd_entry += physaddr; @@ -294,7 +245,7 @@ unsigned long __head __startup_64(unsigned long physaddr, * error, causing the BIOS to halt the system. */ - pmd = fixup_pointer(level2_kernel_pgt, physaddr); + pmd = &RIP_REL_REF(level2_kernel_pgt)->pmd; /* invalidate pages before the kernel image */ for (i = 0; i < pmd_index((unsigned long)_text); i++) @@ -309,12 +260,6 @@ unsigned long __head __startup_64(unsigned long physaddr, for (; i < PTRS_PER_PMD; i++) pmd[i] &= ~_PAGE_PRESENT; - /* - * Fixup phys_base - remove the memory encryption mask to obtain - * the true physical address. - */ - *fixup_long(&phys_base, physaddr) += load_delta - sme_get_me_mask(); - return sme_postprocess_startup(bp, pmd); } @@ -486,6 +431,15 @@ asmlinkage __visible void __init __noreturn x86_64_start_kernel(char * real_mode (__START_KERNEL & PGDIR_MASK))); BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); + if (check_la57_support()) { + __pgtable_l5_enabled = 1; + pgdir_shift = 48; + ptrs_per_p4d = 512; + page_offset_base = __PAGE_OFFSET_BASE_L5; + vmalloc_base = __VMALLOC_BASE_L5; + vmemmap_base = __VMEMMAP_BASE_L5; + } + cr4_init_shadow(); /* Kill off the identity-map trampoline */ @@ -569,62 +523,52 @@ void __init __noreturn x86_64_start_reservations(char *real_mode_data) */ static gate_desc bringup_idt_table[NUM_EXCEPTION_VECTORS] __page_aligned_data; -static struct desc_ptr bringup_idt_descr = { - .size = (NUM_EXCEPTION_VECTORS * sizeof(gate_desc)) - 1, - .address = 0, /* Set at runtime */ -}; - -static void set_bringup_idt_handler(gate_desc *idt, int n, void *handler) +/* This may run while still in the direct mapping */ +static void __head startup_64_load_idt(void *vc_handler) { -#ifdef CONFIG_AMD_MEM_ENCRYPT + struct desc_ptr desc = { + .address = (unsigned long)&RIP_REL_REF(bringup_idt_table), + .size = sizeof(bringup_idt_table) - 1, + }; struct idt_data data; - gate_desc desc; - - init_idt_data(&data, n, handler); - idt_init_desc(&desc, &data); - native_write_idt_entry(idt, n, &desc); -#endif -} + gate_desc idt_desc; -/* This runs while still in the direct mapping */ -static void __head startup_64_load_idt(unsigned long physbase) -{ - struct desc_ptr *desc = fixup_pointer(&bringup_idt_descr, physbase); - gate_desc *idt = fixup_pointer(bringup_idt_table, physbase); - - - if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { - void *handler; - - /* VMM Communication Exception */ - handler = fixup_pointer(vc_no_ghcb, physbase); - set_bringup_idt_handler(idt, X86_TRAP_VC, handler); + /* @vc_handler is set only for a VMM Communication Exception */ + if (vc_handler) { + init_idt_data(&data, X86_TRAP_VC, vc_handler); + idt_init_desc(&idt_desc, &data); + native_write_idt_entry((gate_desc *)desc.address, X86_TRAP_VC, &idt_desc); } - desc->address = (unsigned long)idt; - native_load_idt(desc); + native_load_idt(&desc); } /* This is used when running on kernel addresses */ void early_setup_idt(void) { - /* VMM Communication Exception */ + void *handler = NULL; + if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { setup_ghcb(); - set_bringup_idt_handler(bringup_idt_table, X86_TRAP_VC, vc_boot_ghcb); + handler = vc_boot_ghcb; } - bringup_idt_descr.address = (unsigned long)bringup_idt_table; - native_load_idt(&bringup_idt_descr); + startup_64_load_idt(handler); } /* * Setup boot CPU state needed before kernel switches to virtual addresses. */ -void __head startup_64_setup_env(unsigned long physbase) +void __head startup_64_setup_gdt_idt(void) { + void *handler = NULL; + + struct desc_ptr startup_gdt_descr = { + .address = (unsigned long)&RIP_REL_REF(init_per_cpu_var(gdt_page.gdt)), + .size = GDT_SIZE - 1, + }; + /* Load GDT */ - startup_gdt_descr.address = (unsigned long)fixup_pointer(startup_gdt, physbase); native_load_gdt(&startup_gdt_descr); /* New GDT is live - reload data segment registers */ @@ -632,5 +576,8 @@ void __head startup_64_setup_env(unsigned long physbase) "movl %%eax, %%ss\n" "movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory"); - startup_64_load_idt(physbase); + if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) + handler = &RIP_REL_REF(vc_no_ghcb); + + startup_64_load_idt(handler); } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 3dbd05f93859..d8198fbd70e5 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -40,7 +40,6 @@ L4_START_KERNEL = l4_index(__START_KERNEL_map) L3_START_KERNEL = pud_index(__START_KERNEL_map) - .text __HEAD .code64 SYM_CODE_START_NOALIGN(startup_64) @@ -69,8 +68,6 @@ SYM_CODE_START_NOALIGN(startup_64) /* Set up the stack for verify_cpu() */ leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp - leaq _text(%rip), %rdi - /* Setup GSBASE to allow stack canary access for C code */ movl $MSR_GS_BASE, %ecx leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx @@ -78,7 +75,7 @@ SYM_CODE_START_NOALIGN(startup_64) shrq $32, %rdx wrmsr - call startup_64_setup_env + call startup_64_setup_gdt_idt /* Now switch to __KERNEL_CS so IRET works reliably */ pushq $__KERNEL_CS @@ -114,13 +111,11 @@ SYM_CODE_START_NOALIGN(startup_64) call __startup_64 /* Form the CR3 value being sure to include the CR3 modifier */ - addq $(early_top_pgt - __START_KERNEL_map), %rax + leaq early_top_pgt(%rip), %rcx + addq %rcx, %rax #ifdef CONFIG_AMD_MEM_ENCRYPT mov %rax, %rdi - mov %rax, %r14 - - addq phys_base(%rip), %rdi /* * For SEV guests: Verify that the C-bit is correct. A malicious @@ -129,17 +124,23 @@ SYM_CODE_START_NOALIGN(startup_64) * the next RET instruction. */ call sev_verify_cbit +#endif /* - * Restore CR3 value without the phys_base which will be added - * below, before writing %cr3. + * Switch to early_top_pgt which still has the identity mappings + * present. */ - mov %r14, %rax -#endif + movq %rax, %cr3 - jmp 1f + /* Branch to the common startup code at its kernel virtual address */ + ANNOTATE_RETPOLINE_SAFE + jmp *0f(%rip) SYM_CODE_END(startup_64) + __INITRODATA +0: .quad common_startup_64 + + .text SYM_CODE_START(secondary_startup_64) UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR @@ -172,22 +173,39 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) ANNOTATE_NOENDBR /* Clear %R15 which holds the boot_params pointer on the boot CPU */ - xorq %r15, %r15 + xorl %r15d, %r15d + + /* Derive the runtime physical address of init_top_pgt[] */ + movq phys_base(%rip), %rax + addq $(init_top_pgt - __START_KERNEL_map), %rax /* * Retrieve the modifier (SME encryption mask if SME is active) to be * added to the initial pgdir entry that will be programmed into CR3. */ #ifdef CONFIG_AMD_MEM_ENCRYPT - movq sme_me_mask, %rax -#else - xorq %rax, %rax + addq sme_me_mask(%rip), %rax #endif + /* + * Switch to the init_top_pgt here, away from the trampoline_pgd and + * unmap the identity mapped ranges. + */ + movq %rax, %cr3 - /* Form the CR3 value being sure to include the CR3 modifier */ - addq $(init_top_pgt - __START_KERNEL_map), %rax -1: +SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL) + UNWIND_HINT_END_OF_STACK + ANNOTATE_NOENDBR + /* + * Create a mask of CR4 bits to preserve. Omit PGE in order to flush + * global 1:1 translations from the TLBs. + * + * From the SDM: + * "If CR4.PGE is changing from 0 to 1, there were no global TLB + * entries before the execution; if CR4.PGE is changing from 1 to 0, + * there will be no global TLB entries after the execution." + */ + movl $(X86_CR4_PAE | X86_CR4_LA57), %edx #ifdef CONFIG_X86_MCE /* * Preserve CR4.MCE if the kernel will enable #MC support. @@ -196,52 +214,20 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) * configured will crash the system regardless of the CR4.MCE value set * here. */ - movq %cr4, %rcx - andl $X86_CR4_MCE, %ecx -#else - movl $0, %ecx + orl $X86_CR4_MCE, %edx #endif + movq %cr4, %rcx + andl %edx, %ecx - /* Enable PAE mode, PSE, PGE and LA57 */ - orl $(X86_CR4_PAE | X86_CR4_PSE | X86_CR4_PGE), %ecx -#ifdef CONFIG_X86_5LEVEL - testb $1, __pgtable_l5_enabled(%rip) - jz 1f - orl $X86_CR4_LA57, %ecx -1: -#endif + /* Even if ignored in long mode, set PSE uniformly on all logical CPUs. */ + btsl $X86_CR4_PSE_BIT, %ecx movq %rcx, %cr4 - /* Setup early boot stage 4-/5-level pagetables. */ - addq phys_base(%rip), %rax - /* - * Switch to new page-table - * - * For the boot CPU this switches to early_top_pgt which still has the - * identity mappings present. The secondary CPUs will switch to the - * init_top_pgt here, away from the trampoline_pgd and unmap the - * identity mapped ranges. + * Set CR4.PGE to re-enable global translations. */ - movq %rax, %cr3 - - /* - * Do a global TLB flush after the CR3 switch to make sure the TLB - * entries from the identity mapping are flushed. - */ - movq %cr4, %rcx - movq %rcx, %rax - xorq $X86_CR4_PGE, %rcx + btsl $X86_CR4_PGE_BIT, %ecx movq %rcx, %cr4 - movq %rax, %cr4 - - /* Ensure I am executing from virtual addresses */ - movq $1f, %rax - ANNOTATE_RETPOLINE_SAFE - jmp *%rax -1: - UNWIND_HINT_END_OF_STACK - ANNOTATE_NOENDBR // above #ifdef CONFIG_SMP /* @@ -298,7 +284,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) .Llookup_AP: /* EAX contains the APIC ID of the current CPU */ - xorq %rcx, %rcx + xorl %ecx, %ecx leaq cpuid_to_apicid(%rip), %rbx .Lfind_cpunr: @@ -429,39 +415,10 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) movq %r15, %rdi .Ljump_to_C_code: - /* - * Jump to run C code and to be on a real kernel address. - * Since we are running on identity-mapped space we have to jump - * to the full 64bit address, this is only possible as indirect - * jump. In addition we need to ensure %cs is set so we make this - * a far return. - * - * Note: do not change to far jump indirect with 64bit offset. - * - * AMD does not support far jump indirect with 64bit offset. - * AMD64 Architecture Programmer's Manual, Volume 3: states only - * JMP FAR mem16:16 FF /5 Far jump indirect, - * with the target specified by a far pointer in memory. - * JMP FAR mem16:32 FF /5 Far jump indirect, - * with the target specified by a far pointer in memory. - * - * Intel64 does support 64bit offset. - * Software Developer Manual Vol 2: states: - * FF /5 JMP m16:16 Jump far, absolute indirect, - * address given in m16:16 - * FF /5 JMP m16:32 Jump far, absolute indirect, - * address given in m16:32. - * REX.W + FF /5 JMP m16:64 Jump far, absolute indirect, - * address given in m16:64. - */ - pushq $.Lafter_lret # put return address on stack for unwinder xorl %ebp, %ebp # clear frame pointer - movq initial_code(%rip), %rax - pushq $__KERNEL_CS # set correct cs - pushq %rax # target address in negative space - lretq -.Lafter_lret: - ANNOTATE_NOENDBR + ANNOTATE_RETPOLINE_SAFE + callq *initial_code(%rip) + ud2 SYM_CODE_END(secondary_startup_64) #include "verify_cpu.S" @@ -656,7 +613,8 @@ SYM_CODE_END(vc_no_ghcb) .balign 4 SYM_DATA_START_PTI_ALIGNED(early_top_pgt) - .fill 512,8,0 + .fill 511,8,0 + .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC .fill PTI_USER_PGD_FILL,8,0 SYM_DATA_END(early_top_pgt) diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 2a422e00ed4b..cde167b0ea92 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -503,7 +503,10 @@ static void *bzImage64_load(struct kimage *image, char *kernel, kbuf.bufsz = kernel_len - kern16_size; kbuf.memsz = PAGE_ALIGN(header->init_size); kbuf.buf_align = header->kernel_alignment; - kbuf.buf_min = MIN_KERNEL_LOAD_ADDR; + if (header->pref_address < MIN_KERNEL_LOAD_ADDR) + kbuf.buf_min = MIN_KERNEL_LOAD_ADDR; + else + kbuf.buf_min = header->pref_address; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; ret = kexec_add_buffer(&kbuf); if (ret) diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index a200bd72fadc..8b04958da5e7 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -95,7 +95,8 @@ static bool __init sev_es_check_cpu_features(void) return true; } -static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason) +static void __head __noreturn +sev_es_terminate(unsigned int set, unsigned int reason) { u64 val = GHCB_MSR_TERM_REQ; @@ -332,13 +333,7 @@ static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid */ static const struct snp_cpuid_table *snp_cpuid_get_table(void) { - void *ptr; - - asm ("lea cpuid_table_copy(%%rip), %0" - : "=r" (ptr) - : "p" (&cpuid_table_copy)); - - return ptr; + return &RIP_REL_REF(cpuid_table_copy); } /* @@ -397,7 +392,7 @@ static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted) return xsave_size; } -static bool +static bool __head snp_cpuid_get_validated_func(struct cpuid_leaf *leaf) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -534,7 +529,8 @@ static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt, * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value * should be treated as fatal by caller. */ -static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) +static int __head +snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -576,7 +572,7 @@ static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_le * page yet, so it only supports the MSR based communication with the * hypervisor and only the CPUID exit-code. */ -void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) +void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) { unsigned int subfn = lower_bits(regs->cx, 32); unsigned int fn = lower_bits(regs->ax, 32); @@ -1027,7 +1023,8 @@ struct cc_setup_data { * Search for a Confidential Computing blob passed in as a setup_data entry * via the Linux Boot Protocol. */ -static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) +static __head +struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) { struct cc_setup_data *sd = NULL; struct setup_data *hdr; @@ -1054,7 +1051,7 @@ static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) * mapping needs to be updated in sync with all the changes to virtual memory * layout and related mapping facilities throughout the boot process. */ -static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info) +static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info) { const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table; int i; diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 7d242898852f..b59b09c2f284 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -25,6 +25,7 @@ #include <linux/psp-sev.h> #include <uapi/linux/sev-guest.h> +#include <asm/init.h> #include <asm/cpu_entry_area.h> #include <asm/stacktrace.h> #include <asm/sev.h> @@ -701,8 +702,9 @@ static u64 __init get_jump_table_addr(void) return ret; } -static void early_set_pages_state(unsigned long vaddr, unsigned long paddr, - unsigned long npages, enum psc_op op) +static void __head +early_set_pages_state(unsigned long vaddr, unsigned long paddr, + unsigned long npages, enum psc_op op) { unsigned long paddr_end; u64 val; @@ -758,7 +760,7 @@ e_term: sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); } -void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, +void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { /* @@ -2081,7 +2083,7 @@ fail: * * Scan for the blob in that order. */ -static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) +static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) { struct cc_blob_sev_info *cc_info; @@ -2107,7 +2109,7 @@ found_cc_info: return cc_info; } -bool __init snp_init(struct boot_params *bp) +bool __head snp_init(struct boot_params *bp) { struct cc_blob_sev_info *cc_info; @@ -2129,7 +2131,7 @@ bool __init snp_init(struct boot_params *bp) return true; } -void __init __noreturn snp_abort(void) +void __head __noreturn snp_abort(void) { sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } diff --git a/arch/x86/kernel/sev_verify_cbit.S b/arch/x86/kernel/sev_verify_cbit.S index 3355e27c69eb..1ab65f6c6ae7 100644 --- a/arch/x86/kernel/sev_verify_cbit.S +++ b/arch/x86/kernel/sev_verify_cbit.S @@ -77,7 +77,7 @@ SYM_FUNC_START(sev_verify_cbit) * The check failed, prevent any forward progress to prevent ROP * attacks, invalidate the stack and go into a hlt loop. */ - xorq %rsp, %rsp + xorl %esp, %esp subq $0x1000, %rsp 2: hlt jmp 2b |