From ae74589cb381cc2838107f92d4e0e1f178c6825d Mon Sep 17 00:00:00 2001 From: "bibo,mao" Date: Sat, 28 Oct 2006 10:38:29 -0700 Subject: [PATCH] fix efi_memory_present_wrapper() efi_memory_present_wrapper() parameter start/end is physical address, but function memory_present parameter is PFN, this patch converts physical address to PFN. Signed-off-by: bibo, mao Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 519e63c3c130..141041dde74d 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -846,7 +846,7 @@ efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) static int __init efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) { - memory_present(0, start, end); + memory_present(0, PFN_UP(start), PFN_DOWN(end)); return 0; } -- cgit v1.2.3 From 8621b81c744ff8880a1efe095a4dcd09763ddb5a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 7 Dec 2006 02:14:03 +0100 Subject: [PATCH] i386: Reserve kernel memory starting from _text Currently when we are reserving the memory the kernel text resides in we start at __PHYSICAL_START which happens to be correct but not very obvious. In addition when we start relocating the kernel __PHYSICAL_START is the wrong value, as it is an absolute symbol that does not get relocated. By starting the reservation at __pa_symbol(_text) the code is clearer and will be correct when relocated. Signed-off-by: Eric W. Biederman Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- arch/i386/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 141041dde74d..61539afbdf23 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1118,8 +1118,8 @@ void __init setup_bootmem_allocator(void) * the (very unlikely) case of us accidentally initializing the * bootmem allocator with an invalid RAM area. */ - reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) + - bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START)); + reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) + + bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text)); /* * reserve physical page 0 - it's a special BIOS page on many boxes, -- cgit v1.2.3 From 269c2d81ed66af7c09a1619ffe165f03e7470a5b Mon Sep 17 00:00:00 2001 From: "bibo,mao" Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] i386: i386 create e820.c to handle standard io/mem resources This patch creates new file named e820.c to hanle standard io/mem resources, moving request_standard_resources function from setup.c to e820.c. Also this patch modifies Makfile to compile file e820.c. Signed-off-by: bibo,mao Signed-off-by: Andi Kleen Makefile | 2 arch/i386/kernel/Makefile | 2 arch/i386/kernel/e820.c | 289 ++++++++++++++++++++++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 276 ------------------------------------------- 3 files changed, 293 insertions(+), 274 deletions(-) --- arch/i386/kernel/Makefile | 2 +- arch/i386/kernel/e820.c | 289 ++++++++++++++++++++++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 276 +------------------------------------------ 3 files changed, 293 insertions(+), 274 deletions(-) create mode 100644 arch/i386/kernel/e820.c (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 1a884b6e6e5c..f614854bd71f 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -6,7 +6,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y := process.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ - pci-dma.o i386_ksyms.o i387.o bootflag.o \ + pci-dma.o i386_ksyms.o i387.o bootflag.o e820.o\ quirks.o i8237.o topology.o alternative.o i8253.o tsc.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c new file mode 100644 index 000000000000..cce706049488 --- /dev/null +++ b/arch/i386/kernel/e820.c @@ -0,0 +1,289 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#ifdef CONFIG_EFI +int efi_enabled = 0; +EXPORT_SYMBOL(efi_enabled); +#endif + +struct e820map e820; +struct resource data_resource = { + .name = "Kernel data", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +struct resource code_resource = { + .name = "Kernel code", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +static struct resource system_rom_resource = { + .name = "System ROM", + .start = 0xf0000, + .end = 0xfffff, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}; + +static struct resource extension_rom_resource = { + .name = "Extension ROM", + .start = 0xe0000, + .end = 0xeffff, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}; + +static struct resource adapter_rom_resources[] = { { + .name = "Adapter ROM", + .start = 0xc8000, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +} }; + +static struct resource video_rom_resource = { + .name = "Video ROM", + .start = 0xc0000, + .end = 0xc7fff, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}; + +static struct resource video_ram_resource = { + .name = "Video RAM area", + .start = 0xa0000, + .end = 0xbffff, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +static struct resource standard_io_resources[] = { { + .name = "dma1", + .start = 0x0000, + .end = 0x001f, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "pic1", + .start = 0x0020, + .end = 0x0021, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "timer0", + .start = 0x0040, + .end = 0x0043, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "timer1", + .start = 0x0050, + .end = 0x0053, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "keyboard", + .start = 0x0060, + .end = 0x006f, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "dma page reg", + .start = 0x0080, + .end = 0x008f, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "pic2", + .start = 0x00a0, + .end = 0x00a1, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "dma2", + .start = 0x00c0, + .end = 0x00df, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "fpu", + .start = 0x00f0, + .end = 0x00ff, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +} }; + +#define romsignature(x) (*(unsigned short *)(x) == 0xaa55) + +static int __init romchecksum(unsigned char *rom, unsigned long length) +{ + unsigned char *p, sum = 0; + + for (p = rom; p < rom + length; p++) + sum += *p; + return sum == 0; +} + +static void __init probe_roms(void) +{ + unsigned long start, length, upper; + unsigned char *rom; + int i; + + /* video rom */ + upper = adapter_rom_resources[0].start; + for (start = video_rom_resource.start; start < upper; start += 2048) { + rom = isa_bus_to_virt(start); + if (!romsignature(rom)) + continue; + + video_rom_resource.start = start; + + /* 0 < length <= 0x7f * 512, historically */ + length = rom[2] * 512; + + /* if checksum okay, trust length byte */ + if (length && romchecksum(rom, length)) + video_rom_resource.end = start + length - 1; + + request_resource(&iomem_resource, &video_rom_resource); + break; + } + + start = (video_rom_resource.end + 1 + 2047) & ~2047UL; + if (start < upper) + start = upper; + + /* system rom */ + request_resource(&iomem_resource, &system_rom_resource); + upper = system_rom_resource.start; + + /* check for extension rom (ignore length byte!) */ + rom = isa_bus_to_virt(extension_rom_resource.start); + if (romsignature(rom)) { + length = extension_rom_resource.end - extension_rom_resource.start + 1; + if (romchecksum(rom, length)) { + request_resource(&iomem_resource, &extension_rom_resource); + upper = extension_rom_resource.start; + } + } + + /* check for adapter roms on 2k boundaries */ + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { + rom = isa_bus_to_virt(start); + if (!romsignature(rom)) + continue; + + /* 0 < length <= 0x7f * 512, historically */ + length = rom[2] * 512; + + /* but accept any length that fits if checksum okay */ + if (!length || start + length > upper || !romchecksum(rom, length)) + continue; + + adapter_rom_resources[i].start = start; + adapter_rom_resources[i].end = start + length - 1; + request_resource(&iomem_resource, &adapter_rom_resources[i]); + + start = adapter_rom_resources[i++].end & ~2047UL; + } +} + +/* + * Request address space for all standard RAM and ROM resources + * and also for regions reported as reserved by the e820. + */ +static void __init +legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) +{ + int i; + + probe_roms(); + for (i = 0; i < e820.nr_map; i++) { + struct resource *res; +#ifndef CONFIG_RESOURCES_64BIT + if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) + continue; +#endif + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); + switch (e820.map[i].type) { + case E820_RAM: res->name = "System RAM"; break; + case E820_ACPI: res->name = "ACPI Tables"; break; + case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; + default: res->name = "reserved"; + } + res->start = e820.map[i].addr; + res->end = res->start + e820.map[i].size - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + if (request_resource(&iomem_resource, res)) { + kfree(res); + continue; + } + if (e820.map[i].type == E820_RAM) { + /* + * We don't know which RAM region contains kernel data, + * so we try it repeatedly and let the resource manager + * test it. + */ + request_resource(res, code_resource); + request_resource(res, data_resource); +#ifdef CONFIG_KEXEC + request_resource(res, &crashk_res); +#endif + } + } +} + +/* + * Request address space for all standard resources + * + * This is called just before pcibios_init(), which is also a + * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). + */ +static int __init request_standard_resources(void) +{ + int i; + + printk("Setting up standard PCI resources\n"); + if (efi_enabled) + efi_initialize_iomem_resources(&code_resource, &data_resource); + else + legacy_init_iomem_resources(&code_resource, &data_resource); + + /* EFI systems may still have VGA */ + request_resource(&iomem_resource, &video_ram_resource); + + /* request I/O space for devices used on all i[345]86 PCs */ + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) + request_resource(&ioport_resource, &standard_io_resources[i]); + return 0; +} + +subsys_initcall(request_standard_resources); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 61539afbdf23..acd2d9392abb 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -76,11 +76,9 @@ int disable_pse __devinitdata = 0; /* * Machine setup.. */ - -#ifdef CONFIG_EFI -int efi_enabled = 0; -EXPORT_SYMBOL(efi_enabled); -#endif +extern struct e820map e820; +extern struct resource code_resource; +extern struct resource data_resource; /* cpu data as detected by the assembly code in head.S */ struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; @@ -134,7 +132,6 @@ struct ist_info ist_info; defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) EXPORT_SYMBOL(ist_info); #endif -struct e820map e820; extern void early_cpu_init(void); extern int root_mountflags; @@ -149,203 +146,6 @@ static char command_line[COMMAND_LINE_SIZE]; unsigned char __initdata boot_params[PARAM_SIZE]; -static struct resource data_resource = { - .name = "Kernel data", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource code_resource = { - .name = "Kernel code", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource system_rom_resource = { - .name = "System ROM", - .start = 0xf0000, - .end = 0xfffff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource extension_rom_resource = { - .name = "Extension ROM", - .start = 0xe0000, - .end = 0xeffff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource adapter_rom_resources[] = { { - .name = "Adapter ROM", - .start = 0xc8000, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -} }; - -static struct resource video_rom_resource = { - .name = "Video ROM", - .start = 0xc0000, - .end = 0xc7fff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource video_ram_resource = { - .name = "Video RAM area", - .start = 0xa0000, - .end = 0xbffff, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource standard_io_resources[] = { { - .name = "dma1", - .start = 0x0000, - .end = 0x001f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "pic1", - .start = 0x0020, - .end = 0x0021, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "timer0", - .start = 0x0040, - .end = 0x0043, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "timer1", - .start = 0x0050, - .end = 0x0053, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "keyboard", - .start = 0x0060, - .end = 0x006f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "dma page reg", - .start = 0x0080, - .end = 0x008f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "pic2", - .start = 0x00a0, - .end = 0x00a1, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "dma2", - .start = 0x00c0, - .end = 0x00df, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "fpu", - .start = 0x00f0, - .end = 0x00ff, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -} }; - -#define romsignature(x) (*(unsigned short *)(x) == 0xaa55) - -static int __init romchecksum(unsigned char *rom, unsigned long length) -{ - unsigned char *p, sum = 0; - - for (p = rom; p < rom + length; p++) - sum += *p; - return sum == 0; -} - -static void __init probe_roms(void) -{ - unsigned long start, length, upper; - unsigned char *rom; - int i; - - /* video rom */ - upper = adapter_rom_resources[0].start; - for (start = video_rom_resource.start; start < upper; start += 2048) { - rom = isa_bus_to_virt(start); - if (!romsignature(rom)) - continue; - - video_rom_resource.start = start; - - /* 0 < length <= 0x7f * 512, historically */ - length = rom[2] * 512; - - /* if checksum okay, trust length byte */ - if (length && romchecksum(rom, length)) - video_rom_resource.end = start + length - 1; - - request_resource(&iomem_resource, &video_rom_resource); - break; - } - - start = (video_rom_resource.end + 1 + 2047) & ~2047UL; - if (start < upper) - start = upper; - - /* system rom */ - request_resource(&iomem_resource, &system_rom_resource); - upper = system_rom_resource.start; - - /* check for extension rom (ignore length byte!) */ - rom = isa_bus_to_virt(extension_rom_resource.start); - if (romsignature(rom)) { - length = extension_rom_resource.end - extension_rom_resource.start + 1; - if (romchecksum(rom, length)) { - request_resource(&iomem_resource, &extension_rom_resource); - upper = extension_rom_resource.start; - } - } - - /* check for adapter roms on 2k boundaries */ - for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { - rom = isa_bus_to_virt(start); - if (!romsignature(rom)) - continue; - - /* 0 < length <= 0x7f * 512, historically */ - length = rom[2] * 512; - - /* but accept any length that fits if checksum okay */ - if (!length || start + length > upper || !romchecksum(rom, length)) - continue; - - adapter_rom_resources[i].start = start; - adapter_rom_resources[i].end = start + length - 1; - request_resource(&iomem_resource, &adapter_rom_resources[i]); - - start = adapter_rom_resources[i++].end & ~2047UL; - } -} - static void __init limit_regions(unsigned long long size) { unsigned long long current_addr = 0; @@ -1200,77 +1000,7 @@ void __init remapped_pgdat_init(void) } } -/* - * Request address space for all standard RAM and ROM resources - * and also for regions reported as reserved by the e820. - */ -static void __init -legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) -{ - int i; - - probe_roms(); - for (i = 0; i < e820.nr_map; i++) { - struct resource *res; -#ifndef CONFIG_RESOURCES_64BIT - if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) - continue; -#endif - res = kzalloc(sizeof(struct resource), GFP_ATOMIC); - switch (e820.map[i].type) { - case E820_RAM: res->name = "System RAM"; break; - case E820_ACPI: res->name = "ACPI Tables"; break; - case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; - default: res->name = "reserved"; - } - res->start = e820.map[i].addr; - res->end = res->start + e820.map[i].size - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - if (request_resource(&iomem_resource, res)) { - kfree(res); - continue; - } - if (e820.map[i].type == E820_RAM) { - /* - * We don't know which RAM region contains kernel data, - * so we try it repeatedly and let the resource manager - * test it. - */ - request_resource(res, code_resource); - request_resource(res, data_resource); -#ifdef CONFIG_KEXEC - request_resource(res, &crashk_res); -#endif - } - } -} - -/* - * Request address space for all standard resources - * - * This is called just before pcibios_init(), which is also a - * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). - */ -static int __init request_standard_resources(void) -{ - int i; - - printk("Setting up standard PCI resources\n"); - if (efi_enabled) - efi_initialize_iomem_resources(&code_resource, &data_resource); - else - legacy_init_iomem_resources(&code_resource, &data_resource); - - /* EFI systems may still have VGA */ - request_resource(&iomem_resource, &video_ram_resource); - - /* request I/O space for devices used on all i[345]86 PCs */ - for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) - request_resource(&ioport_resource, &standard_io_resources[i]); - return 0; -} -subsys_initcall(request_standard_resources); static void __init register_memory(void) { -- cgit v1.2.3 From 8e3342f736dd1c19ce7c28625dedd7d8730fc7ad Mon Sep 17 00:00:00 2001 From: "bibo,mao" Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] i386: create e820.c for e820 map sanitize and copy function This patch moves bios e820 map sanitize and copy function from setup.c to e820.c Signed-off-by: bibo,mao Signed-off-by: Andi Kleen arch/i386/kernel/e820.c | 252 +++++++++++++++++++++++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 240 -------------------------------------------- 2 files changed, 252 insertions(+), 240 deletions(-) --- arch/i386/kernel/e820.c | 252 +++++++++++++++++++++++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 240 -------------------------------------------- 2 files changed, 252 insertions(+), 240 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c index cce706049488..0db95760b073 100644 --- a/arch/i386/kernel/e820.c +++ b/arch/i386/kernel/e820.c @@ -19,6 +19,14 @@ EXPORT_SYMBOL(efi_enabled); #endif struct e820map e820; +struct change_member { + struct e820entry *pbios; /* pointer to original bios entry */ + unsigned long long addr; /* address for this change point */ +}; +static struct change_member change_point_list[2*E820MAX] __initdata; +static struct change_member *change_point[2*E820MAX] __initdata; +static struct e820entry *overlap_list[E820MAX] __initdata; +static struct e820entry new_bios[E820MAX] __initdata; struct resource data_resource = { .name = "Kernel data", .start = 0, @@ -287,3 +295,247 @@ static int __init request_standard_resources(void) } subsys_initcall(request_standard_resources); + +void __init add_memory_region(unsigned long long start, + unsigned long long size, int type) +{ + int x; + + if (!efi_enabled) { + x = e820.nr_map; + + if (x == E820MAX) { + printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); + return; + } + + e820.map[x].addr = start; + e820.map[x].size = size; + e820.map[x].type = type; + e820.nr_map++; + } +} /* add_memory_region */ + +/* + * Sanitize the BIOS e820 map. + * + * Some e820 responses include overlapping entries. The following + * replaces the original e820 map with a new one, removing overlaps. + * + */ +int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) +{ + struct change_member *change_tmp; + unsigned long current_type, last_type; + unsigned long long last_addr; + int chgidx, still_changing; + int overlap_entries; + int new_bios_entry; + int old_nr, new_nr, chg_nr; + int i; + + /* + Visually we're performing the following (1,2,3,4 = memory types)... + + Sample memory map (w/overlaps): + ____22__________________ + ______________________4_ + ____1111________________ + _44_____________________ + 11111111________________ + ____________________33__ + ___________44___________ + __________33333_________ + ______________22________ + ___________________2222_ + _________111111111______ + _____________________11_ + _________________4______ + + Sanitized equivalent (no overlap): + 1_______________________ + _44_____________________ + ___1____________________ + ____22__________________ + ______11________________ + _________1______________ + __________3_____________ + ___________44___________ + _____________33_________ + _______________2________ + ________________1_______ + _________________4______ + ___________________2____ + ____________________33__ + ______________________4_ + */ + printk("sanitize start\n"); + /* if there's only one memory region, don't bother */ + if (*pnr_map < 2) { + printk("sanitize bail 0\n"); + return -1; + } + + old_nr = *pnr_map; + + /* bail out if we find any unreasonable addresses in bios map */ + for (i=0; iaddr = biosmap[i].addr; + change_point[chgidx++]->pbios = &biosmap[i]; + change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; + change_point[chgidx++]->pbios = &biosmap[i]; + } + } + chg_nr = chgidx; /* true number of change-points */ + + /* sort change-point list by memory addresses (low -> high) */ + still_changing = 1; + while (still_changing) { + still_changing = 0; + for (i=1; i < chg_nr; i++) { + /* if > , swap */ + /* or, if current= & last=, swap */ + if ((change_point[i]->addr < change_point[i-1]->addr) || + ((change_point[i]->addr == change_point[i-1]->addr) && + (change_point[i]->addr == change_point[i]->pbios->addr) && + (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) + ) + { + change_tmp = change_point[i]; + change_point[i] = change_point[i-1]; + change_point[i-1] = change_tmp; + still_changing=1; + } + } + } + + /* create a new bios memory map, removing overlaps */ + overlap_entries=0; /* number of entries in the overlap table */ + new_bios_entry=0; /* index for creating new bios map entries */ + last_type = 0; /* start with undefined memory type */ + last_addr = 0; /* start with 0 as last starting address */ + /* loop through change-points, determining affect on the new bios map */ + for (chgidx=0; chgidx < chg_nr; chgidx++) + { + /* keep track of all overlapping bios entries */ + if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) + { + /* add map entry to overlap list (> 1 entry implies an overlap) */ + overlap_list[overlap_entries++]=change_point[chgidx]->pbios; + } + else + { + /* remove entry from list (order independent, so swap with last) */ + for (i=0; ipbios) + overlap_list[i] = overlap_list[overlap_entries-1]; + } + overlap_entries--; + } + /* if there are overlapping entries, decide which "type" to use */ + /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ + current_type = 0; + for (i=0; itype > current_type) + current_type = overlap_list[i]->type; + /* continue building up new bios map based on this information */ + if (current_type != last_type) { + if (last_type != 0) { + new_bios[new_bios_entry].size = + change_point[chgidx]->addr - last_addr; + /* move forward only if the new size was non-zero */ + if (new_bios[new_bios_entry].size != 0) + if (++new_bios_entry >= E820MAX) + break; /* no more space left for new bios entries */ + } + if (current_type != 0) { + new_bios[new_bios_entry].addr = change_point[chgidx]->addr; + new_bios[new_bios_entry].type = current_type; + last_addr=change_point[chgidx]->addr; + } + last_type = current_type; + } + } + new_nr = new_bios_entry; /* retain count for new bios entries */ + + /* copy new bios mapping into original location */ + memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); + *pnr_map = new_nr; + + printk("sanitize end\n"); + return 0; +} + +/* + * Copy the BIOS e820 map into a safe place. + * + * Sanity-check it while we're at it.. + * + * If we're lucky and live on a modern system, the setup code + * will have given us a memory map that we can use to properly + * set up memory. If we aren't, we'll fake a memory map. + * + * We check to see that the memory map contains at least 2 elements + * before we'll use it, because the detection code in setup.S may + * not be perfect and most every PC known to man has two memory + * regions: one from 0 to 640k, and one from 1mb up. (The IBM + * thinkpad 560x, for example, does not cooperate with the memory + * detection code.) + */ +int __init copy_e820_map(struct e820entry * biosmap, int nr_map) +{ + /* Only one memory region (or negative)? Ignore it */ + if (nr_map < 2) + return -1; + + do { + unsigned long long start = biosmap->addr; + unsigned long long size = biosmap->size; + unsigned long long end = start + size; + unsigned long type = biosmap->type; + printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type); + + /* Overflow in 64 bits? Ignore the memory map. */ + if (start > end) + return -1; + + /* + * Some BIOSes claim RAM in the 640k - 1M region. + * Not right. Fix it up. + */ + if (type == E820_RAM) { + printk("copy_e820_map() type is E820_RAM\n"); + if (start < 0x100000ULL && end > 0xA0000ULL) { + printk("copy_e820_map() lies in range...\n"); + if (start < 0xA0000ULL) { + printk("copy_e820_map() start < 0xA0000ULL\n"); + add_memory_region(start, 0xA0000ULL-start, type); + } + if (end <= 0x100000ULL) { + printk("copy_e820_map() end <= 0x100000ULL\n"); + continue; + } + start = 0x100000ULL; + size = end - start; + } + } + add_memory_region(start, size, type); + } while (biosmap++,--nr_map); + return 0; +} + diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index acd2d9392abb..b7509aec0eb1 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -191,26 +191,6 @@ static void __init limit_regions(unsigned long long size) } } -void __init add_memory_region(unsigned long long start, - unsigned long long size, int type) -{ - int x; - - if (!efi_enabled) { - x = e820.nr_map; - - if (x == E820MAX) { - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); - return; - } - - e820.map[x].addr = start; - e820.map[x].size = size; - e820.map[x].type = type; - e820.nr_map++; - } -} /* add_memory_region */ - #define E820_DEBUG 1 static void __init print_memory_map(char *who) @@ -239,226 +219,6 @@ static void __init print_memory_map(char *who) } } -/* - * Sanitize the BIOS e820 map. - * - * Some e820 responses include overlapping entries. The following - * replaces the original e820 map with a new one, removing overlaps. - * - */ -struct change_member { - struct e820entry *pbios; /* pointer to original bios entry */ - unsigned long long addr; /* address for this change point */ -}; -static struct change_member change_point_list[2*E820MAX] __initdata; -static struct change_member *change_point[2*E820MAX] __initdata; -static struct e820entry *overlap_list[E820MAX] __initdata; -static struct e820entry new_bios[E820MAX] __initdata; - -int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) -{ - struct change_member *change_tmp; - unsigned long current_type, last_type; - unsigned long long last_addr; - int chgidx, still_changing; - int overlap_entries; - int new_bios_entry; - int old_nr, new_nr, chg_nr; - int i; - - /* - Visually we're performing the following (1,2,3,4 = memory types)... - - Sample memory map (w/overlaps): - ____22__________________ - ______________________4_ - ____1111________________ - _44_____________________ - 11111111________________ - ____________________33__ - ___________44___________ - __________33333_________ - ______________22________ - ___________________2222_ - _________111111111______ - _____________________11_ - _________________4______ - - Sanitized equivalent (no overlap): - 1_______________________ - _44_____________________ - ___1____________________ - ____22__________________ - ______11________________ - _________1______________ - __________3_____________ - ___________44___________ - _____________33_________ - _______________2________ - ________________1_______ - _________________4______ - ___________________2____ - ____________________33__ - ______________________4_ - */ - - /* if there's only one memory region, don't bother */ - if (*pnr_map < 2) - return -1; - - old_nr = *pnr_map; - - /* bail out if we find any unreasonable addresses in bios map */ - for (i=0; iaddr = biosmap[i].addr; - change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; - change_point[chgidx++]->pbios = &biosmap[i]; - } - } - chg_nr = chgidx; /* true number of change-points */ - - /* sort change-point list by memory addresses (low -> high) */ - still_changing = 1; - while (still_changing) { - still_changing = 0; - for (i=1; i < chg_nr; i++) { - /* if > , swap */ - /* or, if current= & last=, swap */ - if ((change_point[i]->addr < change_point[i-1]->addr) || - ((change_point[i]->addr == change_point[i-1]->addr) && - (change_point[i]->addr == change_point[i]->pbios->addr) && - (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) - ) - { - change_tmp = change_point[i]; - change_point[i] = change_point[i-1]; - change_point[i-1] = change_tmp; - still_changing=1; - } - } - } - - /* create a new bios memory map, removing overlaps */ - overlap_entries=0; /* number of entries in the overlap table */ - new_bios_entry=0; /* index for creating new bios map entries */ - last_type = 0; /* start with undefined memory type */ - last_addr = 0; /* start with 0 as last starting address */ - /* loop through change-points, determining affect on the new bios map */ - for (chgidx=0; chgidx < chg_nr; chgidx++) - { - /* keep track of all overlapping bios entries */ - if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) - { - /* add map entry to overlap list (> 1 entry implies an overlap) */ - overlap_list[overlap_entries++]=change_point[chgidx]->pbios; - } - else - { - /* remove entry from list (order independent, so swap with last) */ - for (i=0; ipbios) - overlap_list[i] = overlap_list[overlap_entries-1]; - } - overlap_entries--; - } - /* if there are overlapping entries, decide which "type" to use */ - /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ - current_type = 0; - for (i=0; itype > current_type) - current_type = overlap_list[i]->type; - /* continue building up new bios map based on this information */ - if (current_type != last_type) { - if (last_type != 0) { - new_bios[new_bios_entry].size = - change_point[chgidx]->addr - last_addr; - /* move forward only if the new size was non-zero */ - if (new_bios[new_bios_entry].size != 0) - if (++new_bios_entry >= E820MAX) - break; /* no more space left for new bios entries */ - } - if (current_type != 0) { - new_bios[new_bios_entry].addr = change_point[chgidx]->addr; - new_bios[new_bios_entry].type = current_type; - last_addr=change_point[chgidx]->addr; - } - last_type = current_type; - } - } - new_nr = new_bios_entry; /* retain count for new bios entries */ - - /* copy new bios mapping into original location */ - memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); - *pnr_map = new_nr; - - return 0; -} - -/* - * Copy the BIOS e820 map into a safe place. - * - * Sanity-check it while we're at it.. - * - * If we're lucky and live on a modern system, the setup code - * will have given us a memory map that we can use to properly - * set up memory. If we aren't, we'll fake a memory map. - * - * We check to see that the memory map contains at least 2 elements - * before we'll use it, because the detection code in setup.S may - * not be perfect and most every PC known to man has two memory - * regions: one from 0 to 640k, and one from 1mb up. (The IBM - * thinkpad 560x, for example, does not cooperate with the memory - * detection code.) - */ -int __init copy_e820_map(struct e820entry * biosmap, int nr_map) -{ - /* Only one memory region (or negative)? Ignore it */ - if (nr_map < 2) - return -1; - - do { - unsigned long long start = biosmap->addr; - unsigned long long size = biosmap->size; - unsigned long long end = start + size; - unsigned long type = biosmap->type; - - /* Overflow in 64 bits? Ignore the memory map. */ - if (start > end) - return -1; - - /* - * Some BIOSes claim RAM in the 640k - 1M region. - * Not right. Fix it up. - */ - if (type == E820_RAM) { - if (start < 0x100000ULL && end > 0xA0000ULL) { - if (start < 0xA0000ULL) - add_memory_region(start, 0xA0000ULL-start, type); - if (end <= 0x100000ULL) - continue; - start = 0x100000ULL; - size = end - start; - } - } - add_memory_region(start, size, type); - } while (biosmap++,--nr_map); - return 0; -} - #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) struct edd edd; #ifdef CONFIG_EDD_MODULE -- cgit v1.2.3 From b2dff6a88cbed59d787a8ca7367c76ba385e1187 Mon Sep 17 00:00:00 2001 From: "bibo,mao" Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] i386: Move find_max_pfn function to e820.c Move more code from setup.c into e820.c Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- arch/i386/kernel/e820.c | 52 +++++++++++++++++++++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 55 ------------------------------------------------ include/asm-i386/e820.h | 1 + 3 files changed, 53 insertions(+), 55 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c index 0db95760b073..be4934f6f85b 100644 --- a/arch/i386/kernel/e820.c +++ b/arch/i386/kernel/e820.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -539,3 +540,54 @@ int __init copy_e820_map(struct e820entry * biosmap, int nr_map) return 0; } +/* + * Callback for efi_memory_walk. + */ +static int __init +efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) +{ + unsigned long *max_pfn = arg, pfn; + + if (start < end) { + pfn = PFN_UP(end -1); + if (pfn > *max_pfn) + *max_pfn = pfn; + } + return 0; +} + +static int __init +efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) +{ + memory_present(0, PFN_UP(start), PFN_DOWN(end)); + return 0; +} + +/* + * Find the highest page frame number we have available + */ +void __init find_max_pfn(void) +{ + int i; + + max_pfn = 0; + if (efi_enabled) { + efi_memmap_walk(efi_find_max_pfn, &max_pfn); + efi_memmap_walk(efi_memory_present_wrapper, NULL); + return; + } + + for (i = 0; i < e820.nr_map; i++) { + unsigned long start, end; + /* RAM? */ + if (e820.map[i].type != E820_RAM) + continue; + start = PFN_UP(e820.map[i].addr); + end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); + if (start >= end) + continue; + if (end > max_pfn) + max_pfn = end; + memory_present(0, start, end); + } +} diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index b7509aec0eb1..3d808054fdf7 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -63,9 +63,6 @@ #include #include -/* Forward Declaration. */ -void __init find_max_pfn(void); - /* This value is set up by the early boot code to point to the value immediately after the boot time page tables. It contains a *physical* address, and must not be in the .bss segment! */ @@ -387,29 +384,6 @@ static int __init parse_reservetop(char *arg) } early_param("reservetop", parse_reservetop); -/* - * Callback for efi_memory_walk. - */ -static int __init -efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) -{ - unsigned long *max_pfn = arg, pfn; - - if (start < end) { - pfn = PFN_UP(end -1); - if (pfn > *max_pfn) - *max_pfn = pfn; - } - return 0; -} - -static int __init -efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) -{ - memory_present(0, PFN_UP(start), PFN_DOWN(end)); - return 0; -} - /* * This function checks if the entire range is mapped with type. * @@ -442,35 +416,6 @@ e820_all_mapped(unsigned long s, unsigned long e, unsigned type) return 0; } -/* - * Find the highest page frame number we have available - */ -void __init find_max_pfn(void) -{ - int i; - - max_pfn = 0; - if (efi_enabled) { - efi_memmap_walk(efi_find_max_pfn, &max_pfn); - efi_memmap_walk(efi_memory_present_wrapper, NULL); - return; - } - - for (i = 0; i < e820.nr_map; i++) { - unsigned long start, end; - /* RAM? */ - if (e820.map[i].type != E820_RAM) - continue; - start = PFN_UP(e820.map[i].addr); - end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); - if (start >= end) - continue; - if (end > max_pfn) - max_pfn = end; - memory_present(0, start, end); - } -} - /* * Determine low and high memory ranges: */ diff --git a/include/asm-i386/e820.h b/include/asm-i386/e820.h index f7514fb6e8e4..147569425152 100644 --- a/include/asm-i386/e820.h +++ b/include/asm-i386/e820.h @@ -38,6 +38,7 @@ extern struct e820map e820; extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); +extern void find_max_pfn(void); #endif/*!__ASSEMBLY__*/ -- cgit v1.2.3 From b5b2405706005cc7765f6ecd00965d29e93f090a Mon Sep 17 00:00:00 2001 From: "bibo,mao" Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] i386: Move e820/efi memmap walking code to e820.c This patch moves e820/efi memmap table walking function from setup.c to e820.c, also this patch adds extern declaration in header file. Signed-off-by: bibo,mao Signed-off-by: Andi Kleen arch/i386/kernel/e820.c | 115 +++++++++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 118 ----------------------------------- include/asm-i386/e820.h | 2 arch/i386/kernel/e820.c | 115 +++++++++++++++++++++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 118 ----------------------------------------------- include/asm-i386/e820.h | 2 3 files changed, 117 insertions(+), 118 deletions(-) --- arch/i386/kernel/e820.c | 115 +++++++++++++++++++++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 118 ----------------------------------------------- include/asm-i386/e820.h | 2 + 3 files changed, 117 insertions(+), 118 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c index be4934f6f85b..47c495bf0cbc 100644 --- a/arch/i386/kernel/e820.c +++ b/arch/i386/kernel/e820.c @@ -28,6 +28,11 @@ static struct change_member change_point_list[2*E820MAX] __initdata; static struct change_member *change_point[2*E820MAX] __initdata; static struct e820entry *overlap_list[E820MAX] __initdata; static struct e820entry new_bios[E820MAX] __initdata; +/* For PCI or other memory-mapped resources */ +unsigned long pci_mem_start = 0x10000000; +#ifdef CONFIG_PCI +EXPORT_SYMBOL(pci_mem_start); +#endif struct resource data_resource = { .name = "Kernel data", .start = 0, @@ -591,3 +596,113 @@ void __init find_max_pfn(void) memory_present(0, start, end); } } + +/* + * Free all available memory for boot time allocation. Used + * as a callback function by efi_memory_walk() + */ + +static int __init +free_available_memory(unsigned long start, unsigned long end, void *arg) +{ + /* check max_low_pfn */ + if (start >= (max_low_pfn << PAGE_SHIFT)) + return 0; + if (end >= (max_low_pfn << PAGE_SHIFT)) + end = max_low_pfn << PAGE_SHIFT; + if (start < end) + free_bootmem(start, end - start); + + return 0; +} +/* + * Register fully available low RAM pages with the bootmem allocator. + */ +void __init register_bootmem_low_pages(unsigned long max_low_pfn) +{ + int i; + + if (efi_enabled) { + efi_memmap_walk(free_available_memory, NULL); + return; + } + for (i = 0; i < e820.nr_map; i++) { + unsigned long curr_pfn, last_pfn, size; + /* + * Reserve usable low memory + */ + if (e820.map[i].type != E820_RAM) + continue; + /* + * We are rounding up the start address of usable memory: + */ + curr_pfn = PFN_UP(e820.map[i].addr); + if (curr_pfn >= max_low_pfn) + continue; + /* + * ... and at the end of the usable range downwards: + */ + last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); + + if (last_pfn > max_low_pfn) + last_pfn = max_low_pfn; + + /* + * .. finally, did all the rounding and playing + * around just make the area go away? + */ + if (last_pfn <= curr_pfn) + continue; + + size = last_pfn - curr_pfn; + free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); + } +} + +void __init register_memory(void) +{ + unsigned long gapstart, gapsize, round; + unsigned long long last; + int i; + + /* + * Search for the bigest gap in the low 32 bits of the e820 + * memory space. + */ + last = 0x100000000ull; + gapstart = 0x10000000; + gapsize = 0x400000; + i = e820.nr_map; + while (--i >= 0) { + unsigned long long start = e820.map[i].addr; + unsigned long long end = start + e820.map[i].size; + + /* + * Since "last" is at most 4GB, we know we'll + * fit in 32 bits if this condition is true + */ + if (last > end) { + unsigned long gap = last - end; + + if (gap > gapsize) { + gapsize = gap; + gapstart = end; + } + } + if (start < last) + last = start; + } + + /* + * See how much we want to round up: start off with + * rounding to the next 1MB area. + */ + round = 0x100000; + while ((gapsize >> 4) > round) + round += round; + /* Fun with two's complement */ + pci_mem_start = (gapstart + round) & -round; + + printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", + pci_mem_start, gapstart, gapsize); +} diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 3d808054fdf7..51ed015a1f35 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -94,12 +94,6 @@ unsigned int machine_submodel_id; unsigned int BIOS_revision; unsigned int mca_pentium_flag; -/* For PCI or other memory-mapped resources */ -unsigned long pci_mem_start = 0x10000000; -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pci_mem_start); -#endif - /* Boot loader ID as an integer, for the benefit of proc_dointvec */ int bootloader_type; @@ -475,68 +469,6 @@ unsigned long __init find_max_low_pfn(void) return max_low_pfn; } -/* - * Free all available memory for boot time allocation. Used - * as a callback function by efi_memory_walk() - */ - -static int __init -free_available_memory(unsigned long start, unsigned long end, void *arg) -{ - /* check max_low_pfn */ - if (start >= (max_low_pfn << PAGE_SHIFT)) - return 0; - if (end >= (max_low_pfn << PAGE_SHIFT)) - end = max_low_pfn << PAGE_SHIFT; - if (start < end) - free_bootmem(start, end - start); - - return 0; -} -/* - * Register fully available low RAM pages with the bootmem allocator. - */ -static void __init register_bootmem_low_pages(unsigned long max_low_pfn) -{ - int i; - - if (efi_enabled) { - efi_memmap_walk(free_available_memory, NULL); - return; - } - for (i = 0; i < e820.nr_map; i++) { - unsigned long curr_pfn, last_pfn, size; - /* - * Reserve usable low memory - */ - if (e820.map[i].type != E820_RAM) - continue; - /* - * We are rounding up the start address of usable memory: - */ - curr_pfn = PFN_UP(e820.map[i].addr); - if (curr_pfn >= max_low_pfn) - continue; - /* - * ... and at the end of the usable range downwards: - */ - last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); - - if (last_pfn > max_low_pfn) - last_pfn = max_low_pfn; - - /* - * .. finally, did all the rounding and playing - * around just make the area go away? - */ - if (last_pfn <= curr_pfn) - continue; - - size = last_pfn - curr_pfn; - free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); - } -} - /* * workaround for Dell systems that neglect to reserve EBDA */ @@ -705,56 +637,6 @@ void __init remapped_pgdat_init(void) } } - - -static void __init register_memory(void) -{ - unsigned long gapstart, gapsize, round; - unsigned long long last; - int i; - - /* - * Search for the bigest gap in the low 32 bits of the e820 - * memory space. - */ - last = 0x100000000ull; - gapstart = 0x10000000; - gapsize = 0x400000; - i = e820.nr_map; - while (--i >= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; - - /* - * Since "last" is at most 4GB, we know we'll - * fit in 32 bits if this condition is true - */ - if (last > end) { - unsigned long gap = last - end; - - if (gap > gapsize) { - gapsize = gap; - gapstart = end; - } - } - if (start < last) - last = start; - } - - /* - * See how much we want to round up: start off with - * rounding to the next 1MB area. - */ - round = 0x100000; - while ((gapsize >> 4) > round) - round += round; - /* Fun with two's complement */ - pci_mem_start = (gapstart + round) & -round; - - printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", - pci_mem_start, gapstart, gapsize); -} - #ifdef CONFIG_MCA static void set_mca_bus(int x) { diff --git a/include/asm-i386/e820.h b/include/asm-i386/e820.h index 147569425152..8da4175a5533 100644 --- a/include/asm-i386/e820.h +++ b/include/asm-i386/e820.h @@ -39,6 +39,8 @@ extern struct e820map e820; extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); extern void find_max_pfn(void); +extern void register_bootmem_low_pages(unsigned long max_low_pfn); +extern void register_memory(void); #endif/*!__ASSEMBLY__*/ -- cgit v1.2.3 From cef518e88b8ed94ea483c436ef5e5b151a3fabc6 Mon Sep 17 00:00:00 2001 From: "bibo,mao" Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] i386: Move memory map printing and other code to e820.c This patch moves e820 memory map print and memmap boot param parsing function from setup.c to e820.c, also adds limit_regions and print_memory_map declaration in header file. Signed-off-by: bibo,mao Signed-off-by: Andi Kleen arch/i386/kernel/e820.c | 152 +++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 158 --------------------------------- include/asm-i386/e820.h | 2 arch/i386/kernel/e820.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 153 ----------------------------------------------- include/asm-i386/e820.h | 2 3 files changed, 155 insertions(+), 152 deletions(-) --- arch/i386/kernel/e820.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 153 +---------------------------------------------- include/asm-i386/e820.h | 2 + 3 files changed, 155 insertions(+), 152 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c index 47c495bf0cbc..b755255f2721 100644 --- a/arch/i386/kernel/e820.c +++ b/arch/i386/kernel/e820.c @@ -33,6 +33,7 @@ unsigned long pci_mem_start = 0x10000000; #ifdef CONFIG_PCI EXPORT_SYMBOL(pci_mem_start); #endif +extern int user_defined_memmap; struct resource data_resource = { .name = "Kernel data", .start = 0, @@ -706,3 +707,154 @@ void __init register_memory(void) printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", pci_mem_start, gapstart, gapsize); } + +void __init print_memory_map(char *who) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + printk(" %s: %016Lx - %016Lx ", who, + e820.map[i].addr, + e820.map[i].addr + e820.map[i].size); + switch (e820.map[i].type) { + case E820_RAM: printk("(usable)\n"); + break; + case E820_RESERVED: + printk("(reserved)\n"); + break; + case E820_ACPI: + printk("(ACPI data)\n"); + break; + case E820_NVS: + printk("(ACPI NVS)\n"); + break; + default: printk("type %lu\n", e820.map[i].type); + break; + } + } +} + +void __init limit_regions(unsigned long long size) +{ + unsigned long long current_addr = 0; + int i; + + print_memory_map("limit_regions start"); + if (efi_enabled) { + efi_memory_desc_t *md; + void *p; + + for (p = memmap.map, i = 0; p < memmap.map_end; + p += memmap.desc_size, i++) { + md = p; + current_addr = md->phys_addr + (md->num_pages << 12); + if (md->type == EFI_CONVENTIONAL_MEMORY) { + if (current_addr >= size) { + md->num_pages -= + (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT); + memmap.nr_map = i + 1; + return; + } + } + } + } + for (i = 0; i < e820.nr_map; i++) { + current_addr = e820.map[i].addr + e820.map[i].size; + if (current_addr < size) + continue; + + if (e820.map[i].type != E820_RAM) + continue; + + if (e820.map[i].addr >= size) { + /* + * This region starts past the end of the + * requested size, skip it completely. + */ + e820.nr_map = i; + } else { + e820.nr_map = i + 1; + e820.map[i].size -= current_addr - size; + } + print_memory_map("limit_regions endfor"); + return; + } + print_memory_map("limit_regions endfunc"); +} + + /* + * This function checks if the entire range is mapped with type. + * + * Note: this function only works correct if the e820 table is sorted and + * not-overlapping, which is the case + */ +int __init +e820_all_mapped(unsigned long s, unsigned long e, unsigned type) +{ + u64 start = s; + u64 end = e; + int i; + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + if (type && ei->type != type) + continue; + /* is the region (part) in overlap with the current region ?*/ + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + /* if the region is at the beginning of we move + * start to the end of the region since it's ok until there + */ + if (ei->addr <= start) + start = ei->addr + ei->size; + /* if start is now at or beyond end, we're done, full + * coverage */ + if (start >= end) + return 1; /* we're done */ + } + return 0; +} + +static int __init parse_memmap(char *arg) +{ + if (!arg) + return -EINVAL; + + if (strcmp(arg, "exactmap") == 0) { +#ifdef CONFIG_CRASH_DUMP + /* If we are doing a crash dump, we + * still need to know the real mem + * size before original memory map is + * reset. + */ + find_max_pfn(); + saved_max_pfn = max_pfn; +#endif + e820.nr_map = 0; + user_defined_memmap = 1; + } else { + /* If the user specifies memory size, we + * limit the BIOS-provided memory map to + * that size. exactmap can be used to specify + * the exact map. mem=number can be used to + * trim the existing memory map. + */ + unsigned long long start_at, mem_size; + + mem_size = memparse(arg, &arg); + if (*arg == '@') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_RAM); + } else if (*arg == '#') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_ACPI); + } else if (*arg == '$') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_RESERVED); + } else { + limit_regions(mem_size); + user_defined_memmap = 1; + } + } + return 0; +} +early_param("memmap", parse_memmap); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 51ed015a1f35..e5bb87aa5a45 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -73,7 +73,6 @@ int disable_pse __devinitdata = 0; /* * Machine setup.. */ -extern struct e820map e820; extern struct resource code_resource; extern struct resource data_resource; @@ -137,79 +136,6 @@ static char command_line[COMMAND_LINE_SIZE]; unsigned char __initdata boot_params[PARAM_SIZE]; -static void __init limit_regions(unsigned long long size) -{ - unsigned long long current_addr = 0; - int i; - - if (efi_enabled) { - efi_memory_desc_t *md; - void *p; - - for (p = memmap.map, i = 0; p < memmap.map_end; - p += memmap.desc_size, i++) { - md = p; - current_addr = md->phys_addr + (md->num_pages << 12); - if (md->type == EFI_CONVENTIONAL_MEMORY) { - if (current_addr >= size) { - md->num_pages -= - (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT); - memmap.nr_map = i + 1; - return; - } - } - } - } - for (i = 0; i < e820.nr_map; i++) { - current_addr = e820.map[i].addr + e820.map[i].size; - if (current_addr < size) - continue; - - if (e820.map[i].type != E820_RAM) - continue; - - if (e820.map[i].addr >= size) { - /* - * This region starts past the end of the - * requested size, skip it completely. - */ - e820.nr_map = i; - } else { - e820.nr_map = i + 1; - e820.map[i].size -= current_addr - size; - } - return; - } -} - -#define E820_DEBUG 1 - -static void __init print_memory_map(char *who) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - printk(" %s: %016Lx - %016Lx ", who, - e820.map[i].addr, - e820.map[i].addr + e820.map[i].size); - switch (e820.map[i].type) { - case E820_RAM: printk("(usable)\n"); - break; - case E820_RESERVED: - printk("(reserved)\n"); - break; - case E820_ACPI: - printk("(ACPI data)\n"); - break; - case E820_NVS: - printk("(ACPI NVS)\n"); - break; - default: printk("type %lu\n", e820.map[i].type); - break; - } - } -} - #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) struct edd edd; #ifdef CONFIG_EDD_MODULE @@ -233,7 +159,7 @@ static inline void copy_edd(void) } #endif -static int __initdata user_defined_memmap = 0; +int __initdata user_defined_memmap = 0; /* * "mem=nopentium" disables the 4MB page tables. @@ -270,51 +196,6 @@ static int __init parse_mem(char *arg) } early_param("mem", parse_mem); -static int __init parse_memmap(char *arg) -{ - if (!arg) - return -EINVAL; - - if (strcmp(arg, "exactmap") == 0) { -#ifdef CONFIG_CRASH_DUMP - /* If we are doing a crash dump, we - * still need to know the real mem - * size before original memory map is - * reset. - */ - find_max_pfn(); - saved_max_pfn = max_pfn; -#endif - e820.nr_map = 0; - user_defined_memmap = 1; - } else { - /* If the user specifies memory size, we - * limit the BIOS-provided memory map to - * that size. exactmap can be used to specify - * the exact map. mem=number can be used to - * trim the existing memory map. - */ - unsigned long long start_at, mem_size; - - mem_size = memparse(arg, &arg); - if (*arg == '@') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_RAM); - } else if (*arg == '#') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_ACPI); - } else if (*arg == '$') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_RESERVED); - } else { - limit_regions(mem_size); - user_defined_memmap = 1; - } - } - return 0; -} -early_param("memmap", parse_memmap); - #ifdef CONFIG_PROC_VMCORE /* elfcorehdr= specifies the location of elf core header * stored by the crashed kernel. @@ -378,38 +259,6 @@ static int __init parse_reservetop(char *arg) } early_param("reservetop", parse_reservetop); - /* - * This function checks if the entire range is mapped with type. - * - * Note: this function only works correct if the e820 table is sorted and - * not-overlapping, which is the case - */ -int __init -e820_all_mapped(unsigned long s, unsigned long e, unsigned type) -{ - u64 start = s; - u64 end = e; - int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - if (type && ei->type != type) - continue; - /* is the region (part) in overlap with the current region ?*/ - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - /* if the region is at the beginning of we move - * start to the end of the region since it's ok until there - */ - if (ei->addr <= start) - start = ei->addr + ei->size; - /* if start is now at or beyond end, we're done, full - * coverage */ - if (start >= end) - return 1; /* we're done */ - } - return 0; -} - /* * Determine low and high memory ranges: */ diff --git a/include/asm-i386/e820.h b/include/asm-i386/e820.h index 8da4175a5533..395077aba583 100644 --- a/include/asm-i386/e820.h +++ b/include/asm-i386/e820.h @@ -41,6 +41,8 @@ extern int e820_all_mapped(unsigned long start, unsigned long end, extern void find_max_pfn(void); extern void register_bootmem_low_pages(unsigned long max_low_pfn); extern void register_memory(void); +extern void limit_regions(unsigned long long size); +extern void print_memory_map(char *who); #endif/*!__ASSEMBLY__*/ -- cgit v1.2.3 From d3561b7fa0fb0fc583bab0eeda32bec9e4c4056d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 7 Dec 2006 02:14:07 +0100 Subject: [PATCH] paravirt: header and stubs for paravirtualisation Create a paravirt.h header for all the critical operations which need to be replaced with hypervisor calls, and include that instead of defining native operations, when CONFIG_PARAVIRT. This patch does the dumbest possible replacement of paravirtualized instructions: calls through a "paravirt_ops" structure. Currently these are function implementations of native hardware: hypervisors will override the ops structure with their own variants. All the pv-ops functions are declared "fastcall" so that a specific register-based ABI is used, to make inlining assember easier. And: +From: Andy Whitcroft The paravirt ops introduce a 'weak' attribute onto memory_setup(). Code ordering leads to the following warnings on x86: arch/i386/kernel/setup.c:651: warning: weak declaration of `memory_setup' after first use results in unspecified behavior Move memory_setup() to avoid this. Signed-off-by: Rusty Russell Signed-off-by: Chris Wright Signed-off-by: Andi Kleen Cc: Jeremy Fitzhardinge Cc: Zachary Amsden Signed-off-by: Andrew Morton Signed-off-by: Andy Whitcroft --- arch/i386/Kconfig | 11 + arch/i386/boot/compressed/misc.c | 1 + arch/i386/kernel/Makefile | 1 + arch/i386/kernel/asm-offsets.c | 10 + arch/i386/kernel/entry.S | 34 ++- arch/i386/kernel/i8259.c | 5 +- arch/i386/kernel/paravirt.c | 404 +++++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 8 +- arch/i386/kernel/smpboot.c | 5 + arch/i386/kernel/time.c | 15 +- arch/i386/power/cpu.c | 8 +- drivers/net/de600.c | 1 - include/asm-i386/delay.h | 8 + include/asm-i386/desc.h | 9 +- include/asm-i386/io.h | 8 +- include/asm-i386/irq.h | 3 + include/asm-i386/irqflags.h | 42 +-- include/asm-i386/mach-default/setup_arch.h | 2 + include/asm-i386/msr.h | 5 + include/asm-i386/paravirt.h | 281 ++++++++++++++++++++ include/asm-i386/processor.h | 15 +- include/asm-i386/segment.h | 2 + include/asm-i386/setup.h | 1 + include/asm-i386/spinlock.h | 4 + include/asm-i386/suspend.h | 8 +- include/asm-i386/system.h | 16 +- include/asm-i386/time.h | 41 +++ 27 files changed, 890 insertions(+), 58 deletions(-) create mode 100644 arch/i386/kernel/paravirt.c create mode 100644 include/asm-i386/paravirt.h create mode 100644 include/asm-i386/time.h (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 1f0f7b60995e..bb1fa061c6cf 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -182,6 +182,17 @@ config X86_ES7000 endchoice +config PARAVIRT + bool "Paravirtualization support (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + Paravirtualization is a way of running multiple instances of + Linux on the same machine, under a hypervisor. This option + changes the kernel so it can modify itself when it is run + under a hypervisor, improving performance significantly. + However, when run without a hypervisor the kernel is + theoretically slower. If in doubt, say N. + config ACPI_SRAT bool default y diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c index dc1538931555..c6798c75c67d 100644 --- a/arch/i386/boot/compressed/misc.c +++ b/arch/i386/boot/compressed/misc.c @@ -9,6 +9,7 @@ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 */ +#undef CONFIG_PARAVIRT #include #include #include diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index f614854bd71f..406612136049 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_VM86) += vm86.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_K8_NB) += k8.o +obj-$(CONFIG_PARAVIRT) += paravirt.o EXTRA_AFLAGS := -traditional diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index 0666eb0ed7bc..1b2f3cd33270 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c @@ -101,4 +101,14 @@ void foo(void) BLANK(); OFFSET(PDA_cpu, i386_pda, cpu_number); OFFSET(PDA_pcurrent, i386_pda, pcurrent); + +#ifdef CONFIG_PARAVIRT + BLANK(); + OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled); + OFFSET(PARAVIRT_irq_disable, paravirt_ops, irq_disable); + OFFSET(PARAVIRT_irq_enable, paravirt_ops, irq_enable); + OFFSET(PARAVIRT_irq_enable_sysexit, paravirt_ops, irq_enable_sysexit); + OFFSET(PARAVIRT_iret, paravirt_ops, iret); + OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0); +#endif } diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 0220bc8cbb43..d274612e05cd 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -62,13 +62,6 @@ DF_MASK = 0x00000400 NT_MASK = 0x00004000 VM_MASK = 0x00020000 -/* These are replaces for paravirtualization */ -#define DISABLE_INTERRUPTS cli -#define ENABLE_INTERRUPTS sti -#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit -#define INTERRUPT_RETURN iret -#define GET_CR0_INTO_EAX movl %cr0, %eax - #ifdef CONFIG_PREEMPT #define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF #else @@ -416,6 +409,20 @@ ldt_ss: jnz restore_nocheck testl $0x00400000, %eax # returning to 32bit stack? jnz restore_nocheck # allright, normal return + +#ifdef CONFIG_PARAVIRT + /* + * The kernel can't run on a non-flat stack if paravirt mode + * is active. Rather than try to fixup the high bits of + * ESP, bypass this code entirely. This may break DOSemu + * and/or Wine support in a paravirt VM, although the option + * is still available to implement the setting of the high + * 16-bits in the INTERRUPT_RETURN paravirt-op. + */ + cmpl $0, paravirt_ops+PARAVIRT_enabled + jne restore_nocheck +#endif + /* If returning to userspace with 16bit stack, * try to fix the higher word of ESP, as the CPU * won't restore it. @@ -833,6 +840,19 @@ nmi_espfix_stack: .previous KPROBE_END(nmi) +#ifdef CONFIG_PARAVIRT +ENTRY(native_iret) +1: iret +.section __ex_table,"a" + .align 4 + .long 1b,iret_exc +.previous + +ENTRY(native_irq_enable_sysexit) + sti + sysexit +#endif + KPROBE_ENTRY(int3) RING0_INT_FRAME pushl $-1 # mark this as an int diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index 62996cd17084..c8d45821c788 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c @@ -381,7 +381,10 @@ void __init init_ISA_irqs (void) } } -void __init init_IRQ(void) +/* Overridden in paravirt.c */ +void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); + +void __init native_init_IRQ(void) { int i; diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c new file mode 100644 index 000000000000..478192cd4b90 --- /dev/null +++ b/arch/i386/kernel/paravirt.c @@ -0,0 +1,404 @@ +/* Paravirtualization interfaces + Copyright (C) 2006 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +/* nop stub */ +static void native_nop(void) +{ +} + +static void __init default_banner(void) +{ + printk(KERN_INFO "Booting paravirtualized kernel on %s\n", + paravirt_ops.name); +} + +char *memory_setup(void) +{ + return paravirt_ops.memory_setup(); +} + +static fastcall unsigned long native_get_debugreg(int regno) +{ + unsigned long val = 0; /* Damn you, gcc! */ + + switch (regno) { + case 0: + asm("movl %%db0, %0" :"=r" (val)); break; + case 1: + asm("movl %%db1, %0" :"=r" (val)); break; + case 2: + asm("movl %%db2, %0" :"=r" (val)); break; + case 3: + asm("movl %%db3, %0" :"=r" (val)); break; + case 6: + asm("movl %%db6, %0" :"=r" (val)); break; + case 7: + asm("movl %%db7, %0" :"=r" (val)); break; + default: + BUG(); + } + return val; +} + +static fastcall void native_set_debugreg(int regno, unsigned long value) +{ + switch (regno) { + case 0: + asm("movl %0,%%db0" : /* no output */ :"r" (value)); + break; + case 1: + asm("movl %0,%%db1" : /* no output */ :"r" (value)); + break; + case 2: + asm("movl %0,%%db2" : /* no output */ :"r" (value)); + break; + case 3: + asm("movl %0,%%db3" : /* no output */ :"r" (value)); + break; + case 6: + asm("movl %0,%%db6" : /* no output */ :"r" (value)); + break; + case 7: + asm("movl %0,%%db7" : /* no output */ :"r" (value)); + break; + default: + BUG(); + } +} + +void init_IRQ(void) +{ + paravirt_ops.init_IRQ(); +} + +static fastcall void native_clts(void) +{ + asm volatile ("clts"); +} + +static fastcall unsigned long native_read_cr0(void) +{ + unsigned long val; + asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); + return val; +} + +static fastcall void native_write_cr0(unsigned long val) +{ + asm volatile("movl %0,%%cr0": :"r" (val)); +} + +static fastcall unsigned long native_read_cr2(void) +{ + unsigned long val; + asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); + return val; +} + +static fastcall void native_write_cr2(unsigned long val) +{ + asm volatile("movl %0,%%cr2": :"r" (val)); +} + +static fastcall unsigned long native_read_cr3(void) +{ + unsigned long val; + asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); + return val; +} + +static fastcall void native_write_cr3(unsigned long val) +{ + asm volatile("movl %0,%%cr3": :"r" (val)); +} + +static fastcall unsigned long native_read_cr4(void) +{ + unsigned long val; + asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); + return val; +} + +static fastcall unsigned long native_read_cr4_safe(void) +{ + unsigned long val; + /* This could fault if %cr4 does not exist */ + asm("1: movl %%cr4, %0 \n" + "2: \n" + ".section __ex_table,\"a\" \n" + ".long 1b,2b \n" + ".previous \n" + : "=r" (val): "0" (0)); + return val; +} + +static fastcall void native_write_cr4(unsigned long val) +{ + asm volatile("movl %0,%%cr4": :"r" (val)); +} + +static fastcall unsigned long native_save_fl(void) +{ + unsigned long f; + asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); + return f; +} + +static fastcall void native_restore_fl(unsigned long f) +{ + asm volatile("pushl %0 ; popfl": /* no output */ + :"g" (f) + :"memory", "cc"); +} + +static fastcall void native_irq_disable(void) +{ + asm volatile("cli": : :"memory"); +} + +static fastcall void native_irq_enable(void) +{ + asm volatile("sti": : :"memory"); +} + +static fastcall void native_safe_halt(void) +{ + asm volatile("sti; hlt": : :"memory"); +} + +static fastcall void native_halt(void) +{ + asm volatile("hlt": : :"memory"); +} + +static fastcall void native_wbinvd(void) +{ + asm volatile("wbinvd": : :"memory"); +} + +static fastcall unsigned long long native_read_msr(unsigned int msr, int *err) +{ + unsigned long long val; + + asm volatile("2: rdmsr ; xorl %0,%0\n" + "1:\n\t" + ".section .fixup,\"ax\"\n\t" + "3: movl %3,%0 ; jmp 1b\n\t" + ".previous\n\t" + ".section __ex_table,\"a\"\n" + " .align 4\n\t" + " .long 2b,3b\n\t" + ".previous" + : "=r" (*err), "=A" (val) + : "c" (msr), "i" (-EFAULT)); + + return val; +} + +static fastcall int native_write_msr(unsigned int msr, unsigned long long val) +{ + int err; + asm volatile("2: wrmsr ; xorl %0,%0\n" + "1:\n\t" + ".section .fixup,\"ax\"\n\t" + "3: movl %4,%0 ; jmp 1b\n\t" + ".previous\n\t" + ".section __ex_table,\"a\"\n" + " .align 4\n\t" + " .long 2b,3b\n\t" + ".previous" + : "=a" (err) + : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)), + "i" (-EFAULT)); + return err; +} + +static fastcall unsigned long long native_read_tsc(void) +{ + unsigned long long val; + asm volatile("rdtsc" : "=A" (val)); + return val; +} + +static fastcall unsigned long long native_read_pmc(void) +{ + unsigned long long val; + asm volatile("rdpmc" : "=A" (val)); + return val; +} + +static fastcall void native_load_tr_desc(void) +{ + asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); +} + +static fastcall void native_load_gdt(const struct Xgt_desc_struct *dtr) +{ + asm volatile("lgdt %0"::"m" (*dtr)); +} + +static fastcall void native_load_idt(const struct Xgt_desc_struct *dtr) +{ + asm volatile("lidt %0"::"m" (*dtr)); +} + +static fastcall void native_store_gdt(struct Xgt_desc_struct *dtr) +{ + asm ("sgdt %0":"=m" (*dtr)); +} + +static fastcall void native_store_idt(struct Xgt_desc_struct *dtr) +{ + asm ("sidt %0":"=m" (*dtr)); +} + +static fastcall unsigned long native_store_tr(void) +{ + unsigned long tr; + asm ("str %0":"=r" (tr)); + return tr; +} + +static fastcall void native_load_tls(struct thread_struct *t, unsigned int cpu) +{ +#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] + C(0); C(1); C(2); +#undef C +} + +static inline void native_write_dt_entry(void *dt, int entry, u32 entry_low, u32 entry_high) +{ + u32 *lp = (u32 *)((char *)dt + entry*8); + lp[0] = entry_low; + lp[1] = entry_high; +} + +static fastcall void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high) +{ + native_write_dt_entry(dt, entrynum, low, high); +} + +static fastcall void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high) +{ + native_write_dt_entry(dt, entrynum, low, high); +} + +static fastcall void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high) +{ + native_write_dt_entry(dt, entrynum, low, high); +} + +static fastcall void native_load_esp0(struct tss_struct *tss, + struct thread_struct *thread) +{ + tss->esp0 = thread->esp0; + + /* This can only happen when SEP is enabled, no need to test "SEP"arately */ + if (unlikely(tss->ss1 != thread->sysenter_cs)) { + tss->ss1 = thread->sysenter_cs; + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); + } +} + +static fastcall void native_io_delay(void) +{ + asm volatile("outb %al,$0x80"); +} + +/* These are in entry.S */ +extern fastcall void native_iret(void); +extern fastcall void native_irq_enable_sysexit(void); + +static int __init print_banner(void) +{ + paravirt_ops.banner(); + return 0; +} +core_initcall(print_banner); + +struct paravirt_ops paravirt_ops = { + .name = "bare hardware", + .paravirt_enabled = 0, + .kernel_rpl = 0, + + .banner = default_banner, + .arch_setup = native_nop, + .memory_setup = machine_specific_memory_setup, + .get_wallclock = native_get_wallclock, + .set_wallclock = native_set_wallclock, + .time_init = time_init_hook, + .init_IRQ = native_init_IRQ, + + .cpuid = native_cpuid, + .get_debugreg = native_get_debugreg, + .set_debugreg = native_set_debugreg, + .clts = native_clts, + .read_cr0 = native_read_cr0, + .write_cr0 = native_write_cr0, + .read_cr2 = native_read_cr2, + .write_cr2 = native_write_cr2, + .read_cr3 = native_read_cr3, + .write_cr3 = native_write_cr3, + .read_cr4 = native_read_cr4, + .read_cr4_safe = native_read_cr4_safe, + .write_cr4 = native_write_cr4, + .save_fl = native_save_fl, + .restore_fl = native_restore_fl, + .irq_disable = native_irq_disable, + .irq_enable = native_irq_enable, + .safe_halt = native_safe_halt, + .halt = native_halt, + .wbinvd = native_wbinvd, + .read_msr = native_read_msr, + .write_msr = native_write_msr, + .read_tsc = native_read_tsc, + .read_pmc = native_read_pmc, + .load_tr_desc = native_load_tr_desc, + .set_ldt = native_set_ldt, + .load_gdt = native_load_gdt, + .load_idt = native_load_idt, + .store_gdt = native_store_gdt, + .store_idt = native_store_idt, + .store_tr = native_store_tr, + .load_tls = native_load_tls, + .write_ldt_entry = native_write_ldt_entry, + .write_gdt_entry = native_write_gdt_entry, + .write_idt_entry = native_write_idt_entry, + .load_esp0 = native_load_esp0, + + .set_iopl_mask = native_set_iopl_mask, + .io_delay = native_io_delay, + .const_udelay = __const_udelay, + + .irq_enable_sysexit = native_irq_enable_sysexit, + .iret = native_iret, +}; +EXPORT_SYMBOL(paravirt_ops); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index e5bb87aa5a45..695d53fd14de 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -495,6 +495,12 @@ static void set_mca_bus(int x) static void set_mca_bus(int x) { } #endif +/* Overridden in paravirt.c if CONFIG_PARAVIRT */ +char * __attribute__((weak)) memory_setup(void) +{ + return machine_specific_memory_setup(); +} + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -547,7 +553,7 @@ void __init setup_arch(char **cmdline_p) efi_init(); else { printk(KERN_INFO "BIOS-provided physical RAM map:\n"); - print_memory_map(machine_specific_memory_setup()); + print_memory_map(memory_setup()); } copy_edd(); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 095636620fa2..cd7de9c9654b 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -33,6 +33,11 @@ * Dave Jones : Report invalid combinations of Athlon CPUs. * Rusty Russell : Hacked into shape for new "hotplug" boot process. */ + +/* SMP boot always wants to use real time delay to allow sufficient time for + * the APs to come online */ +#define USE_REAL_TIME_DELAY + #include #include #include diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 78af572fd17c..c505b16c0990 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -56,6 +56,7 @@ #include #include #include +#include #include "mach_time.h" @@ -116,10 +117,7 @@ static int set_rtc_mmss(unsigned long nowtime) /* gets recalled with irq locally disabled */ /* XXX - does irqsave resolve this? -johnstul */ spin_lock_irqsave(&rtc_lock, flags); - if (efi_enabled) - retval = efi_set_rtc_mmss(nowtime); - else - retval = mach_set_rtc_mmss(nowtime); + retval = set_wallclock(nowtime); spin_unlock_irqrestore(&rtc_lock, flags); return retval; @@ -223,10 +221,7 @@ unsigned long get_cmos_time(void) spin_lock_irqsave(&rtc_lock, flags); - if (efi_enabled) - retval = efi_get_time(); - else - retval = mach_get_cmos_time(); + retval = get_wallclock(); spin_unlock_irqrestore(&rtc_lock, flags); @@ -370,7 +365,7 @@ static void __init hpet_time_init(void) printk("Using HPET for base-timer\n"); } - time_init_hook(); + do_time_init(); } #endif @@ -392,5 +387,5 @@ void __init time_init(void) do_settimeofday(&ts); - time_init_hook(); + do_time_init(); } diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c index 5a1abeff033b..2c15500f8713 100644 --- a/arch/i386/power/cpu.c +++ b/arch/i386/power/cpu.c @@ -26,8 +26,8 @@ void __save_processor_state(struct saved_context *ctxt) /* * descriptor tables */ - store_gdt(&ctxt->gdt_limit); - store_idt(&ctxt->idt_limit); + store_gdt(&ctxt->gdt); + store_idt(&ctxt->idt); store_tr(ctxt->tr); /* @@ -99,8 +99,8 @@ void __restore_processor_state(struct saved_context *ctxt) * now restore the descriptor tables to their proper values * ltr is done i fix_processor_context(). */ - load_gdt(&ctxt->gdt_limit); - load_idt(&ctxt->idt_limit); + load_gdt(&ctxt->gdt); + load_idt(&ctxt->idt); /* * segment registers diff --git a/drivers/net/de600.c b/drivers/net/de600.c index 690bb40b353d..8396e411f1ce 100644 --- a/drivers/net/de600.c +++ b/drivers/net/de600.c @@ -43,7 +43,6 @@ static const char version[] = "de600.c: $Revision: 1.41-2.5 $, Bjorn Ekwall (bj * modify the following "#define": (see for more info) #define REALLY_SLOW_IO */ -#define SLOW_IO_BY_JUMPING /* Looks "better" than dummy write to port 0x80 :-) */ /* use 0 for production, 1 for verification, >2 for debug */ #ifdef DE600_DEBUG diff --git a/include/asm-i386/delay.h b/include/asm-i386/delay.h index 9ae5e3782ed8..32d6678d0bbf 100644 --- a/include/asm-i386/delay.h +++ b/include/asm-i386/delay.h @@ -16,6 +16,13 @@ extern void __ndelay(unsigned long nsecs); extern void __const_udelay(unsigned long usecs); extern void __delay(unsigned long loops); +#if defined(CONFIG_PARAVIRT) && !defined(USE_REAL_TIME_DELAY) +#define udelay(n) paravirt_ops.const_udelay((n) * 0x10c7ul) + +#define ndelay(n) paravirt_ops.const_udelay((n) * 5ul) + +#else /* !PARAVIRT || USE_REAL_TIME_DELAY */ + /* 0x10c7 is 2**32 / 1000000 (rounded up) */ #define udelay(n) (__builtin_constant_p(n) ? \ ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \ @@ -25,6 +32,7 @@ extern void __delay(unsigned long loops); #define ndelay(n) (__builtin_constant_p(n) ? \ ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \ __ndelay(n)) +#endif void use_tsc_delay(void); diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index 6cf2ac2bfde7..f19820f0834f 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -55,6 +55,9 @@ static inline void pack_gate(__u32 *a, __u32 *b, #define DESCTYPE_DPL3 0x60 /* DPL-3 */ #define DESCTYPE_S 0x10 /* !system */ +#ifdef CONFIG_PARAVIRT +#include +#else #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) @@ -105,7 +108,11 @@ static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const vo write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b); } -static inline void set_ldt(void *addr, unsigned int entries) +#define set_ldt native_set_ldt +#endif /* CONFIG_PARAVIRT */ + +static inline fastcall void native_set_ldt(const void *addr, + unsigned int entries) { if (likely(entries == 0)) __asm__ __volatile__("lldt %w0"::"q" (0)); diff --git a/include/asm-i386/io.h b/include/asm-i386/io.h index 68df0dc3ab8f..86ff5e83be2f 100644 --- a/include/asm-i386/io.h +++ b/include/asm-i386/io.h @@ -256,11 +256,11 @@ static inline void flush_write_buffers(void) #endif /* __KERNEL__ */ -#ifdef SLOW_IO_BY_JUMPING -#define __SLOW_DOWN_IO "jmp 1f; 1: jmp 1f; 1:" +#if defined(CONFIG_PARAVIRT) +#include #else + #define __SLOW_DOWN_IO "outb %%al,$0x80;" -#endif static inline void slow_down_io(void) { __asm__ __volatile__( @@ -271,6 +271,8 @@ static inline void slow_down_io(void) { : : ); } +#endif + #ifdef CONFIG_X86_NUMAQ extern void *xquad_portio; /* Where the IO area was mapped */ #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h index 331726b41128..9e15ce0006eb 100644 --- a/include/asm-i386/irq.h +++ b/include/asm-i386/irq.h @@ -41,4 +41,7 @@ extern int irqbalance_disable(char *str); extern void fixup_irqs(cpumask_t map); #endif +void init_IRQ(void); +void __init native_init_IRQ(void); + #endif /* _ASM_IRQ_H */ diff --git a/include/asm-i386/irqflags.h b/include/asm-i386/irqflags.h index e1bdb97c07fa..9ce01f3fb7bc 100644 --- a/include/asm-i386/irqflags.h +++ b/include/asm-i386/irqflags.h @@ -10,6 +10,9 @@ #ifndef _ASM_IRQFLAGS_H #define _ASM_IRQFLAGS_H +#ifdef CONFIG_PARAVIRT +#include +#else #ifndef __ASSEMBLY__ static inline unsigned long __raw_local_save_flags(void) @@ -25,9 +28,6 @@ static inline unsigned long __raw_local_save_flags(void) return flags; } -#define raw_local_save_flags(flags) \ - do { (flags) = __raw_local_save_flags(); } while (0) - static inline void raw_local_irq_restore(unsigned long flags) { __asm__ __volatile__( @@ -66,18 +66,6 @@ static inline void halt(void) __asm__ __volatile__("hlt": : :"memory"); } -static inline int raw_irqs_disabled_flags(unsigned long flags) -{ - return !(flags & (1 << 9)); -} - -static inline int raw_irqs_disabled(void) -{ - unsigned long flags = __raw_local_save_flags(); - - return raw_irqs_disabled_flags(flags); -} - /* * For spinlocks, etc: */ @@ -90,9 +78,33 @@ static inline unsigned long __raw_local_irq_save(void) return flags; } +#else +#define DISABLE_INTERRUPTS cli +#define ENABLE_INTERRUPTS sti +#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit +#define INTERRUPT_RETURN iret +#define GET_CR0_INTO_EAX movl %cr0, %eax +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_PARAVIRT */ + +#ifndef __ASSEMBLY__ +#define raw_local_save_flags(flags) \ + do { (flags) = __raw_local_save_flags(); } while (0) + #define raw_local_irq_save(flags) \ do { (flags) = __raw_local_irq_save(); } while (0) +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & (1 << 9)); +} + +static inline int raw_irqs_disabled(void) +{ + unsigned long flags = __raw_local_save_flags(); + + return raw_irqs_disabled_flags(flags); +} #endif /* __ASSEMBLY__ */ /* diff --git a/include/asm-i386/mach-default/setup_arch.h b/include/asm-i386/mach-default/setup_arch.h index fb42099e7bd4..605e3ccb991b 100644 --- a/include/asm-i386/mach-default/setup_arch.h +++ b/include/asm-i386/mach-default/setup_arch.h @@ -2,4 +2,6 @@ /* no action for generic */ +#ifndef ARCH_SETUP #define ARCH_SETUP +#endif diff --git a/include/asm-i386/msr.h b/include/asm-i386/msr.h index 1820d9d73af3..5679d4993072 100644 --- a/include/asm-i386/msr.h +++ b/include/asm-i386/msr.h @@ -1,6 +1,10 @@ #ifndef __ASM_MSR_H #define __ASM_MSR_H +#ifdef CONFIG_PARAVIRT +#include +#else + /* * Access to machine-specific registers (available on 586 and better only) * Note: the rd* operations modify the parameters directly (without using @@ -77,6 +81,7 @@ static inline void wrmsrl (unsigned long msr, unsigned long long val) __asm__ __volatile__("rdpmc" \ : "=a" (low), "=d" (high) \ : "c" (counter)) +#endif /* !CONFIG_PARAVIRT */ /* symbolic names for some interesting MSRs */ /* Intel defined MSRs. */ diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h new file mode 100644 index 000000000000..a7551a44686f --- /dev/null +++ b/include/asm-i386/paravirt.h @@ -0,0 +1,281 @@ +#ifndef __ASM_PARAVIRT_H +#define __ASM_PARAVIRT_H +/* Various instructions on x86 need to be replaced for + * para-virtualization: those hooks are defined here. */ +#include + +#ifdef CONFIG_PARAVIRT +#ifndef __ASSEMBLY__ +struct thread_struct; +struct Xgt_desc_struct; +struct tss_struct; +struct paravirt_ops +{ + unsigned int kernel_rpl; + int paravirt_enabled; + const char *name; + + void (*arch_setup)(void); + char *(*memory_setup)(void); + void (*init_IRQ)(void); + + void (*banner)(void); + + unsigned long (*get_wallclock)(void); + int (*set_wallclock)(unsigned long); + void (*time_init)(void); + + /* All the function pointers here are declared as "fastcall" + so that we get a specific register-based calling + convention. This makes it easier to implement inline + assembler replacements. */ + + void (fastcall *cpuid)(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx); + + unsigned long (fastcall *get_debugreg)(int regno); + void (fastcall *set_debugreg)(int regno, unsigned long value); + + void (fastcall *clts)(void); + + unsigned long (fastcall *read_cr0)(void); + void (fastcall *write_cr0)(unsigned long); + + unsigned long (fastcall *read_cr2)(void); + void (fastcall *write_cr2)(unsigned long); + + unsigned long (fastcall *read_cr3)(void); + void (fastcall *write_cr3)(unsigned long); + + unsigned long (fastcall *read_cr4_safe)(void); + unsigned long (fastcall *read_cr4)(void); + void (fastcall *write_cr4)(unsigned long); + + unsigned long (fastcall *save_fl)(void); + void (fastcall *restore_fl)(unsigned long); + void (fastcall *irq_disable)(void); + void (fastcall *irq_enable)(void); + void (fastcall *safe_halt)(void); + void (fastcall *halt)(void); + void (fastcall *wbinvd)(void); + + /* err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ + u64 (fastcall *read_msr)(unsigned int msr, int *err); + int (fastcall *write_msr)(unsigned int msr, u64 val); + + u64 (fastcall *read_tsc)(void); + u64 (fastcall *read_pmc)(void); + + void (fastcall *load_tr_desc)(void); + void (fastcall *load_gdt)(const struct Xgt_desc_struct *); + void (fastcall *load_idt)(const struct Xgt_desc_struct *); + void (fastcall *store_gdt)(struct Xgt_desc_struct *); + void (fastcall *store_idt)(struct Xgt_desc_struct *); + void (fastcall *set_ldt)(const void *desc, unsigned entries); + unsigned long (fastcall *store_tr)(void); + void (fastcall *load_tls)(struct thread_struct *t, unsigned int cpu); + void (fastcall *write_ldt_entry)(void *dt, int entrynum, + u32 low, u32 high); + void (fastcall *write_gdt_entry)(void *dt, int entrynum, + u32 low, u32 high); + void (fastcall *write_idt_entry)(void *dt, int entrynum, + u32 low, u32 high); + void (fastcall *load_esp0)(struct tss_struct *tss, + struct thread_struct *thread); + + void (fastcall *set_iopl_mask)(unsigned mask); + + void (fastcall *io_delay)(void); + void (*const_udelay)(unsigned long loops); + + /* These two are jmp to, not actually called. */ + void (fastcall *irq_enable_sysexit)(void); + void (fastcall *iret)(void); +}; + +extern struct paravirt_ops paravirt_ops; + +#define paravirt_enabled() (paravirt_ops.paravirt_enabled) + +static inline void load_esp0(struct tss_struct *tss, + struct thread_struct *thread) +{ + paravirt_ops.load_esp0(tss, thread); +} + +#define ARCH_SETUP paravirt_ops.arch_setup(); +static inline unsigned long get_wallclock(void) +{ + return paravirt_ops.get_wallclock(); +} + +static inline int set_wallclock(unsigned long nowtime) +{ + return paravirt_ops.set_wallclock(nowtime); +} + +static inline void do_time_init(void) +{ + return paravirt_ops.time_init(); +} + +/* The paravirtualized CPUID instruction. */ +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + paravirt_ops.cpuid(eax, ebx, ecx, edx); +} + +/* + * These special macros can be used to get or set a debugging register + */ +#define get_debugreg(var, reg) var = paravirt_ops.get_debugreg(reg) +#define set_debugreg(val, reg) paravirt_ops.set_debugreg(reg, val) + +#define clts() paravirt_ops.clts() + +#define read_cr0() paravirt_ops.read_cr0() +#define write_cr0(x) paravirt_ops.write_cr0(x) + +#define read_cr2() paravirt_ops.read_cr2() +#define write_cr2(x) paravirt_ops.write_cr2(x) + +#define read_cr3() paravirt_ops.read_cr3() +#define write_cr3(x) paravirt_ops.write_cr3(x) + +#define read_cr4() paravirt_ops.read_cr4() +#define read_cr4_safe(x) paravirt_ops.read_cr4_safe() +#define write_cr4(x) paravirt_ops.write_cr4(x) + +static inline unsigned long __raw_local_save_flags(void) +{ + return paravirt_ops.save_fl(); +} + +static inline void raw_local_irq_restore(unsigned long flags) +{ + return paravirt_ops.restore_fl(flags); +} + +static inline void raw_local_irq_disable(void) +{ + paravirt_ops.irq_disable(); +} + +static inline void raw_local_irq_enable(void) +{ + paravirt_ops.irq_enable(); +} + +static inline unsigned long __raw_local_irq_save(void) +{ + unsigned long flags = paravirt_ops.save_fl(); + + paravirt_ops.irq_disable(); + + return flags; +} + +static inline void raw_safe_halt(void) +{ + paravirt_ops.safe_halt(); +} + +static inline void halt(void) +{ + paravirt_ops.safe_halt(); +} +#define wbinvd() paravirt_ops.wbinvd() + +#define get_kernel_rpl() (paravirt_ops.kernel_rpl) + +#define rdmsr(msr,val1,val2) do { \ + int _err; \ + u64 _l = paravirt_ops.read_msr(msr,&_err); \ + val1 = (u32)_l; \ + val2 = _l >> 32; \ +} while(0) + +#define wrmsr(msr,val1,val2) do { \ + u64 _l = ((u64)(val2) << 32) | (val1); \ + paravirt_ops.write_msr((msr), _l); \ +} while(0) + +#define rdmsrl(msr,val) do { \ + int _err; \ + val = paravirt_ops.read_msr((msr),&_err); \ +} while(0) + +#define wrmsrl(msr,val) (paravirt_ops.write_msr((msr),(val))) +#define wrmsr_safe(msr,a,b) ({ \ + u64 _l = ((u64)(b) << 32) | (a); \ + paravirt_ops.write_msr((msr),_l); \ +}) + +/* rdmsr with exception handling */ +#define rdmsr_safe(msr,a,b) ({ \ + int _err; \ + u64 _l = paravirt_ops.read_msr(msr,&_err); \ + (*a) = (u32)_l; \ + (*b) = _l >> 32; \ + _err; }) + +#define rdtsc(low,high) do { \ + u64 _l = paravirt_ops.read_tsc(); \ + low = (u32)_l; \ + high = _l >> 32; \ +} while(0) + +#define rdtscl(low) do { \ + u64 _l = paravirt_ops.read_tsc(); \ + low = (int)_l; \ +} while(0) + +#define rdtscll(val) (val = paravirt_ops.read_tsc()) + +#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) + +#define rdpmc(counter,low,high) do { \ + u64 _l = paravirt_ops.read_pmc(); \ + low = (u32)_l; \ + high = _l >> 32; \ +} while(0) + +#define load_TR_desc() (paravirt_ops.load_tr_desc()) +#define load_gdt(dtr) (paravirt_ops.load_gdt(dtr)) +#define load_idt(dtr) (paravirt_ops.load_idt(dtr)) +#define set_ldt(addr, entries) (paravirt_ops.set_ldt((addr), (entries))) +#define store_gdt(dtr) (paravirt_ops.store_gdt(dtr)) +#define store_idt(dtr) (paravirt_ops.store_idt(dtr)) +#define store_tr(tr) ((tr) = paravirt_ops.store_tr()) +#define load_TLS(t,cpu) (paravirt_ops.load_tls((t),(cpu))) +#define write_ldt_entry(dt, entry, low, high) \ + (paravirt_ops.write_ldt_entry((dt), (entry), (low), (high))) +#define write_gdt_entry(dt, entry, low, high) \ + (paravirt_ops.write_gdt_entry((dt), (entry), (low), (high))) +#define write_idt_entry(dt, entry, low, high) \ + (paravirt_ops.write_idt_entry((dt), (entry), (low), (high))) +#define set_iopl_mask(mask) (paravirt_ops.set_iopl_mask(mask)) + +/* The paravirtualized I/O functions */ +static inline void slow_down_io(void) { + paravirt_ops.io_delay(); +#ifdef REALLY_SLOW_IO + paravirt_ops.io_delay(); + paravirt_ops.io_delay(); + paravirt_ops.io_delay(); +#endif +} + +#define CLI_STRING "pushl %eax; pushl %ecx; pushl %edx; call *paravirt_ops+PARAVIRT_irq_disable; popl %edx; popl %ecx; popl %eax" +#define STI_STRING "pushl %eax; pushl %ecx; pushl %edx; call *paravirt_ops+PARAVIRT_irq_enable; popl %edx; popl %ecx; popl %eax" +#else /* __ASSEMBLY__ */ + +#define INTERRUPT_RETURN jmp *%cs:paravirt_ops+PARAVIRT_iret +#define DISABLE_INTERRUPTS pushl %eax; pushl %ecx; pushl %edx; call *paravirt_ops+PARAVIRT_irq_disable; popl %edx; popl %ecx; popl %eax +#define ENABLE_INTERRUPTS pushl %eax; pushl %ecx; pushl %edx; call *%cs:paravirt_ops+PARAVIRT_irq_enable; popl %edx; popl %ecx; popl %eax +#define ENABLE_INTERRUPTS_SYSEXIT jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit +#define GET_CR0_INTO_EAX call *paravirt_ops+PARAVIRT_read_cr0 +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_PARAVIRT */ +#endif /* __ASM_PARAVIRT_H */ diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 98fa73b71760..6c2c4457be0a 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -144,8 +144,8 @@ static inline void detect_ht(struct cpuinfo_x86 *c) {} #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ -static inline void __cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) +static inline fastcall void native_cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) { /* ecx is often an input as well as an output. */ __asm__("cpuid" @@ -491,6 +491,12 @@ struct thread_struct { .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \ } +#ifdef CONFIG_PARAVIRT +#include +#else +#define paravirt_enabled() 0 +#define __cpuid native_cpuid + static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread) { tss->esp0 = thread->esp0; @@ -524,10 +530,13 @@ static inline void load_esp0(struct tss_struct *tss, struct thread_struct *threa : /* no output */ \ :"r" (value)) +#define set_iopl_mask native_set_iopl_mask +#endif /* CONFIG_PARAVIRT */ + /* * Set IOPL bits in EFLAGS from given mask */ -static inline void set_iopl_mask(unsigned mask) +static fastcall inline void native_set_iopl_mask(unsigned mask) { unsigned int reg; __asm__ __volatile__ ("pushfl;" diff --git a/include/asm-i386/segment.h b/include/asm-i386/segment.h index 5bdda79b6b53..3c796af33776 100644 --- a/include/asm-i386/segment.h +++ b/include/asm-i386/segment.h @@ -131,5 +131,7 @@ #define SEGMENT_LDT 0x4 #define SEGMENT_GDT 0x0 +#ifndef CONFIG_PARAVIRT #define get_kernel_rpl() 0 #endif +#endif diff --git a/include/asm-i386/setup.h b/include/asm-i386/setup.h index 2734909eff84..9930c5a355fc 100644 --- a/include/asm-i386/setup.h +++ b/include/asm-i386/setup.h @@ -70,6 +70,7 @@ extern unsigned char boot_params[PARAM_SIZE]; struct e820entry; char * __init machine_specific_memory_setup(void); +char *memory_setup(void); int __init copy_e820_map(struct e820entry * biosmap, int nr_map); int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map); diff --git a/include/asm-i386/spinlock.h b/include/asm-i386/spinlock.h index c18b71fae6b3..dea60709db29 100644 --- a/include/asm-i386/spinlock.h +++ b/include/asm-i386/spinlock.h @@ -7,8 +7,12 @@ #include #include +#ifdef CONFIG_PARAVIRT +#include +#else #define CLI_STRING "cli" #define STI_STRING "sti" +#endif /* CONFIG_PARAVIRT */ /* * Your basic SMP spinlocks, allowing only a single CPU anywhere diff --git a/include/asm-i386/suspend.h b/include/asm-i386/suspend.h index 08be1e5009d4..30361526d568 100644 --- a/include/asm-i386/suspend.h +++ b/include/asm-i386/suspend.h @@ -23,12 +23,8 @@ arch_prepare_suspend(void) struct saved_context { u16 es, fs, gs, ss; unsigned long cr0, cr2, cr3, cr4; - u16 gdt_pad; - u16 gdt_limit; - unsigned long gdt_base; - u16 idt_pad; - u16 idt_limit; - unsigned long idt_base; + struct Xgt_desc_struct gdt; + struct Xgt_desc_struct idt; u16 ldt; u16 tss; unsigned long tr; diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h index a6dabbcd6e6a..a6d20d9a1a30 100644 --- a/include/asm-i386/system.h +++ b/include/asm-i386/system.h @@ -88,6 +88,9 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" \ #define savesegment(seg, value) \ asm volatile("mov %%" #seg ",%0":"=rm" (value)) +#ifdef CONFIG_PARAVIRT +#include +#else #define read_cr0() ({ \ unsigned int __dummy; \ __asm__ __volatile__( \ @@ -139,17 +142,18 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" \ #define write_cr4(x) \ __asm__ __volatile__("movl %0,%%cr4": :"r" (x)) -/* - * Clear and set 'TS' bit respectively - */ +#define wbinvd() \ + __asm__ __volatile__ ("wbinvd": : :"memory") + +/* Clear the 'TS' bit */ #define clts() __asm__ __volatile__ ("clts") +#endif/* CONFIG_PARAVIRT */ + +/* Set the 'TS' bit */ #define stts() write_cr0(8 | read_cr0()) #endif /* __KERNEL__ */ -#define wbinvd() \ - __asm__ __volatile__ ("wbinvd": : :"memory") - static inline unsigned long get_limit(unsigned long segment) { unsigned long __limit; diff --git a/include/asm-i386/time.h b/include/asm-i386/time.h new file mode 100644 index 000000000000..ea8065af825a --- /dev/null +++ b/include/asm-i386/time.h @@ -0,0 +1,41 @@ +#ifndef _ASMi386_TIME_H +#define _ASMi386_TIME_H + +#include +#include "mach_time.h" + +static inline unsigned long native_get_wallclock(void) +{ + unsigned long retval; + + if (efi_enabled) + retval = efi_get_time(); + else + retval = mach_get_cmos_time(); + + return retval; +} + +static inline int native_set_wallclock(unsigned long nowtime) +{ + int retval; + + if (efi_enabled) + retval = efi_set_rtc_mmss(nowtime); + else + retval = mach_set_rtc_mmss(nowtime); + + return retval; +} + +#ifdef CONFIG_PARAVIRT +#include +#else /* !CONFIG_PARAVIRT */ + +#define get_wallclock() native_get_wallclock() +#define set_wallclock(x) native_set_wallclock(x) +#define do_time_init() time_init_hook() + +#endif /* CONFIG_PARAVIRT */ + +#endif -- cgit v1.2.3 From 19e5d9c0d2194b4b47189cbec2921cbf72b0bd1c Mon Sep 17 00:00:00 2001 From: Henry Nestler Date: Wed, 6 Dec 2006 20:37:45 -0800 Subject: [PATCH] initrd: remove unused false condition for initrd_start After LOADER_TYPE && INITRD_START are true, the short if-condition for INITRD_START can never be false. Remove unused code from the else condition. Signed-off-by: Henry Nestler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/kernel/setup.c | 2 +- arch/i386/kernel/setup.c | 3 +-- arch/m32r/kernel/setup.c | 4 +--- arch/m32r/mm/discontig.c | 4 +--- arch/sh/kernel/setup.c | 3 +-- arch/sh64/kernel/setup.c | 4 +--- arch/x86_64/kernel/setup.c | 3 +-- 7 files changed, 7 insertions(+), 16 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c index a8c61dac1cee..1a5eb6c301c9 100644 --- a/arch/frv/kernel/setup.c +++ b/arch/frv/kernel/setup.c @@ -947,7 +947,7 @@ static void __init setup_linux_memory(void) if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (low_top_pfn << PAGE_SHIFT)) { reserve_bootmem(INITRD_START, INITRD_SIZE); - initrd_start = INITRD_START ? INITRD_START + PAGE_OFFSET : 0; + initrd_start = INITRD_START + PAGE_OFFSET; initrd_end = initrd_start + INITRD_SIZE; } else { diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 141041dde74d..97bb869307bc 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1162,8 +1162,7 @@ void __init setup_bootmem_allocator(void) if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { reserve_bootmem(INITRD_START, INITRD_SIZE); - initrd_start = - INITRD_START ? INITRD_START + PAGE_OFFSET : 0; + initrd_start = INITRD_START + PAGE_OFFSET; initrd_end = initrd_start+INITRD_SIZE; } else { diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c index 0e7778be33cc..936205f7aba0 100644 --- a/arch/m32r/kernel/setup.c +++ b/arch/m32r/kernel/setup.c @@ -196,9 +196,7 @@ static unsigned long __init setup_memory(void) if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { reserve_bootmem(INITRD_START, INITRD_SIZE); - initrd_start = INITRD_START ? - INITRD_START + PAGE_OFFSET : 0; - + initrd_start = INITRD_START + PAGE_OFFSET; initrd_end = initrd_start + INITRD_SIZE; printk("initrd:start[%08lx],size[%08lx]\n", initrd_start, INITRD_SIZE); diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c index abb34ccd5986..c7efdb0aefc5 100644 --- a/arch/m32r/mm/discontig.c +++ b/arch/m32r/mm/discontig.c @@ -105,9 +105,7 @@ unsigned long __init setup_memory(void) if (INITRD_START + INITRD_SIZE <= PFN_PHYS(max_low_pfn)) { reserve_bootmem_node(NODE_DATA(0), INITRD_START, INITRD_SIZE); - initrd_start = INITRD_START ? - INITRD_START + PAGE_OFFSET : 0; - + initrd_start = INITRD_START + PAGE_OFFSET; initrd_end = initrd_start + INITRD_SIZE; printk("initrd:start[%08lx],size[%08lx]\n", initrd_start, INITRD_SIZE); diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 696ca75752d9..f8dd6b7bfab0 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -332,8 +332,7 @@ void __init setup_arch(char **cmdline_p) if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { reserve_bootmem_node(NODE_DATA(0), INITRD_START+__MEMORY_START, INITRD_SIZE); - initrd_start = - INITRD_START ? INITRD_START + PAGE_OFFSET + __MEMORY_START : 0; + initrd_start = INITRD_START + PAGE_OFFSET + __MEMORY_START; initrd_end = initrd_start + INITRD_SIZE; } else { printk("initrd extends beyond end of memory " diff --git a/arch/sh64/kernel/setup.c b/arch/sh64/kernel/setup.c index ffb310e33cef..b9e7d54d7b85 100644 --- a/arch/sh64/kernel/setup.c +++ b/arch/sh64/kernel/setup.c @@ -243,9 +243,7 @@ void __init setup_arch(char **cmdline_p) if (INITRD_START + INITRD_SIZE <= (PFN_PHYS(last_pfn))) { reserve_bootmem_node(NODE_DATA(0), INITRD_START + __MEMORY_START, INITRD_SIZE); - initrd_start = - (long) INITRD_START ? INITRD_START + PAGE_OFFSET + __MEMORY_START : 0; - + initrd_start = (long) INITRD_START + PAGE_OFFSET + __MEMORY_START; initrd_end = initrd_start + INITRD_SIZE; } else { printk("initrd extends beyond end of memory " diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index fc944b5e8f4a..f12f266f3e98 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -471,8 +471,7 @@ void __init setup_arch(char **cmdline_p) if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { reserve_bootmem_generic(INITRD_START, INITRD_SIZE); - initrd_start = - INITRD_START ? INITRD_START + PAGE_OFFSET : 0; + initrd_start = INITRD_START + PAGE_OFFSET; initrd_end = initrd_start+INITRD_SIZE; } else { -- cgit v1.2.3