diff options
-rw-r--r-- | Documentation/x86/x86_64/mm.txt | 6 | ||||
-rw-r--r-- | arch/x86/Kconfig | 10 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 6 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 55 | ||||
-rw-r--r-- | arch/x86/mm/kaslr.c | 94 |
5 files changed, 66 insertions, 105 deletions
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 804f9426ed17..6cbe652d7a49 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -72,7 +72,7 @@ Complete virtual memory map with 5-level page tables Notes: - With 56-bit addresses, user-space memory gets expanded by a factor of 512x, - from 0.125 PB to 64 PB. All kernel mappings shift down to the -64 PT starting + from 0.125 PB to 64 PB. All kernel mappings shift down to the -64 PB starting offset and many of the regions expand to support the much larger physical memory supported. @@ -83,7 +83,7 @@ Notes: 0000000000000000 | 0 | 00ffffffffffffff | 64 PB | user-space virtual memory, different per mm __________________|____________|__________________|_________|___________________________________________________________ | | | | - 0000800000000000 | +64 PB | ffff7fffffffffff | ~16K PB | ... huge, still almost 64 bits wide hole of non-canonical + 0100000000000000 | +64 PB | feffffffffffffff | ~16K PB | ... huge, still almost 64 bits wide hole of non-canonical | | | | virtual memory addresses up to the -64 PB | | | | starting offset of kernel mappings. __________________|____________|__________________|_________|___________________________________________________________ @@ -99,7 +99,7 @@ ____________________________________________________________|___________________ ffd2000000000000 | -11.5 PB | ffd3ffffffffffff | 0.5 PB | ... unused hole ffd4000000000000 | -11 PB | ffd5ffffffffffff | 0.5 PB | virtual memory map (vmemmap_base) ffd6000000000000 | -10.5 PB | ffdeffffffffffff | 2.25 PB | ... unused hole - ffdf000000000000 | -8.25 PB | fffffdffffffffff | ~8 PB | KASAN shadow memory + ffdf000000000000 | -8.25 PB | fffffbffffffffff | ~8 PB | KASAN shadow memory __________________|____________|__________________|_________|____________________________________________________________ | | Identical layout to the 47-bit one from here on: diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 62fc3fda1a05..bd6f93ce0633 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1606,12 +1606,9 @@ config ARCH_FLATMEM_ENABLE depends on X86_32 && !NUMA config ARCH_DISCONTIGMEM_ENABLE - def_bool y - depends on NUMA && X86_32 - -config ARCH_DISCONTIGMEM_DEFAULT - def_bool y + def_bool n depends on NUMA && X86_32 + depends on BROKEN config ARCH_SPARSEMEM_ENABLE def_bool y @@ -1620,8 +1617,7 @@ config ARCH_SPARSEMEM_ENABLE select SPARSEMEM_VMEMMAP_ENABLE if X86_64 config ARCH_SPARSEMEM_DEFAULT - def_bool y - depends on X86_64 + def_bool X86_64 || (NUMA && X86_32) config ARCH_SELECT_MEMORY_MODEL def_bool y diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index a5127b2c195f..834659288ba9 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -141,11 +141,11 @@ SECTIONS *(.text.__x86.indirect_thunk) __indirect_thunk_end = .; #endif - - /* End of text section */ - _etext = .; } :text = 0x9090 + /* End of text section */ + _etext = .; + NOTES :text :note EXCEPTION_TABLE(16) :text = 0x9090 diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 667f1da36208..bd20de9db1a8 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -359,8 +359,6 @@ static noinline int vmalloc_fault(unsigned long address) if (!(address >= VMALLOC_START && address < VMALLOC_END)) return -1; - WARN_ON_ONCE(in_nmi()); - /* * Copy kernel mappings over when needed. This can also * happen within a race in page table update. In the later @@ -603,24 +601,9 @@ static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index) name, index, addr, (desc.limit0 | (desc.limit1 << 16))); } -/* - * This helper function transforms the #PF error_code bits into - * "[PROT] [USER]" type of descriptive, almost human-readable error strings: - */ -static void err_str_append(unsigned long error_code, char *buf, unsigned long mask, const char *txt) -{ - if (error_code & mask) { - if (buf[0]) - strcat(buf, " "); - strcat(buf, txt); - } -} - static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long address) { - char err_txt[64]; - if (!oops_may_print()) return; @@ -644,31 +627,29 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad from_kuid(&init_user_ns, current_uid())); } - pr_alert("BUG: unable to handle kernel %s at %px\n", - address < PAGE_SIZE ? "NULL pointer dereference" : "paging request", - (void *)address); - - err_txt[0] = 0; - - /* - * Note: length of these appended strings including the separation space and the - * zero delimiter must fit into err_txt[]. - */ - err_str_append(error_code, err_txt, X86_PF_PROT, "[PROT]" ); - err_str_append(error_code, err_txt, X86_PF_WRITE, "[WRITE]"); - err_str_append(error_code, err_txt, X86_PF_USER, "[USER]" ); - err_str_append(error_code, err_txt, X86_PF_RSVD, "[RSVD]" ); - err_str_append(error_code, err_txt, X86_PF_INSTR, "[INSTR]"); - err_str_append(error_code, err_txt, X86_PF_PK, "[PK]" ); - - pr_alert("#PF error: %s\n", error_code ? err_txt : "[normal kernel read fault]"); + if (address < PAGE_SIZE && !user_mode(regs)) + pr_alert("BUG: kernel NULL pointer dereference, address: %px\n", + (void *)address); + else + pr_alert("BUG: unable to handle page fault for address: %px\n", + (void *)address); + + pr_alert("#PF: %s %s in %s mode\n", + (error_code & X86_PF_USER) ? "user" : "supervisor", + (error_code & X86_PF_INSTR) ? "instruction fetch" : + (error_code & X86_PF_WRITE) ? "write access" : + "read access", + user_mode(regs) ? "user" : "kernel"); + pr_alert("#PF: error_code(0x%04lx) - %s\n", error_code, + !(error_code & X86_PF_PROT) ? "not-present page" : + (error_code & X86_PF_RSVD) ? "reserved bit violation" : + (error_code & X86_PF_PK) ? "protection keys violation" : + "permissions violation"); if (!(error_code & X86_PF_USER) && user_mode(regs)) { struct desc_ptr idt, gdt; u16 ldtr, tr; - pr_alert("This was a system access from user code\n"); - /* * This can happen for quite a few reasons. The more obvious * ones are faults accessing the GDT, or LDT. Perhaps diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index d669c5e797e0..dc3f058bdf9b 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -125,10 +125,7 @@ void __init kernel_randomize_memory(void) */ entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i); prandom_bytes_state(&rand_state, &rand, sizeof(rand)); - if (pgtable_l5_enabled()) - entropy = (rand % (entropy + 1)) & P4D_MASK; - else - entropy = (rand % (entropy + 1)) & PUD_MASK; + entropy = (rand % (entropy + 1)) & PUD_MASK; vaddr += entropy; *kaslr_regions[i].base = vaddr; @@ -137,84 +134,71 @@ void __init kernel_randomize_memory(void) * randomization alignment. */ vaddr += get_padding(&kaslr_regions[i]); - if (pgtable_l5_enabled()) - vaddr = round_up(vaddr + 1, P4D_SIZE); - else - vaddr = round_up(vaddr + 1, PUD_SIZE); + vaddr = round_up(vaddr + 1, PUD_SIZE); remain_entropy -= entropy; } } static void __meminit init_trampoline_pud(void) { - unsigned long paddr, paddr_next; + pud_t *pud_page_tramp, *pud, *pud_tramp; + p4d_t *p4d_page_tramp, *p4d, *p4d_tramp; + unsigned long paddr, vaddr; pgd_t *pgd; - pud_t *pud_page, *pud_page_tramp; - int i; pud_page_tramp = alloc_low_page(); + /* + * There are two mappings for the low 1MB area, the direct mapping + * and the 1:1 mapping for the real mode trampoline: + * + * Direct mapping: virt_addr = phys_addr + PAGE_OFFSET + * 1:1 mapping: virt_addr = phys_addr + */ paddr = 0; - pgd = pgd_offset_k((unsigned long)__va(paddr)); - pud_page = (pud_t *) pgd_page_vaddr(*pgd); - - for (i = pud_index(paddr); i < PTRS_PER_PUD; i++, paddr = paddr_next) { - pud_t *pud, *pud_tramp; - unsigned long vaddr = (unsigned long)__va(paddr); + vaddr = (unsigned long)__va(paddr); + pgd = pgd_offset_k(vaddr); - pud_tramp = pud_page_tramp + pud_index(paddr); - pud = pud_page + pud_index(vaddr); - paddr_next = (paddr & PUD_MASK) + PUD_SIZE; - - *pud_tramp = *pud; - } + p4d = p4d_offset(pgd, vaddr); + pud = pud_offset(p4d, vaddr); - set_pgd(&trampoline_pgd_entry, - __pgd(_KERNPG_TABLE | __pa(pud_page_tramp))); -} - -static void __meminit init_trampoline_p4d(void) -{ - unsigned long paddr, paddr_next; - pgd_t *pgd; - p4d_t *p4d_page, *p4d_page_tramp; - int i; + pud_tramp = pud_page_tramp + pud_index(paddr); + *pud_tramp = *pud; - p4d_page_tramp = alloc_low_page(); - - paddr = 0; - pgd = pgd_offset_k((unsigned long)__va(paddr)); - p4d_page = (p4d_t *) pgd_page_vaddr(*pgd); - - for (i = p4d_index(paddr); i < PTRS_PER_P4D; i++, paddr = paddr_next) { - p4d_t *p4d, *p4d_tramp; - unsigned long vaddr = (unsigned long)__va(paddr); + if (pgtable_l5_enabled()) { + p4d_page_tramp = alloc_low_page(); p4d_tramp = p4d_page_tramp + p4d_index(paddr); - p4d = p4d_page + p4d_index(vaddr); - paddr_next = (paddr & P4D_MASK) + P4D_SIZE; - *p4d_tramp = *p4d; - } + set_p4d(p4d_tramp, + __p4d(_KERNPG_TABLE | __pa(pud_page_tramp))); - set_pgd(&trampoline_pgd_entry, - __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp))); + set_pgd(&trampoline_pgd_entry, + __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp))); + } else { + set_pgd(&trampoline_pgd_entry, + __pgd(_KERNPG_TABLE | __pa(pud_page_tramp))); + } } /* - * Create PGD aligned trampoline table to allow real mode initialization - * of additional CPUs. Consume only 1 low memory page. + * The real mode trampoline, which is required for bootstrapping CPUs + * occupies only a small area under the low 1MB. See reserve_real_mode() + * for details. + * + * If KASLR is disabled the first PGD entry of the direct mapping is copied + * to map the real mode trampoline. + * + * If KASLR is enabled, copy only the PUD which covers the low 1MB + * area. This limits the randomization granularity to 1GB for both 4-level + * and 5-level paging. */ void __meminit init_trampoline(void) { - if (!kaslr_memory_enabled()) { init_trampoline_default(); return; } - if (pgtable_l5_enabled()) - init_trampoline_p4d(); - else - init_trampoline_pud(); + init_trampoline_pud(); } |