summaryrefslogtreecommitdiffstats
path: root/mm/memblock.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memblock.c')
-rw-r--r--mm/memblock.c144
1 files changed, 95 insertions, 49 deletions
diff --git a/mm/memblock.c b/mm/memblock.c
index 165f40a8a254..8d9b5f1e7040 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -48,12 +48,12 @@
* boot regardless of the possible restrictions and memory hot(un)plug;
* the ``physmem`` type is only available on some architectures.
*
- * Each region is represented by :c:type:`struct memblock_region` that
+ * Each region is represented by struct memblock_region that
* defines the region extents, its attributes and NUMA node id on NUMA
- * systems. Every memory type is described by the :c:type:`struct
- * memblock_type` which contains an array of memory regions along with
+ * systems. Every memory type is described by the struct memblock_type
+ * which contains an array of memory regions along with
* the allocator metadata. The "memory" and "reserved" types are nicely
- * wrapped with :c:type:`struct memblock`. This structure is statically
+ * wrapped with struct memblock. This structure is statically
* initialized at build time. The region arrays are initially sized to
* %INIT_MEMBLOCK_REGIONS for "memory" and %INIT_MEMBLOCK_RESERVED_REGIONS
* for "reserved". The region array for "physmem" is initially sized to
@@ -275,14 +275,6 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
*
* Find @size free area aligned to @align in the specified range and node.
*
- * When allocation direction is bottom-up, the @start should be greater
- * than the end of the kernel image. Otherwise, it will be trimmed. The
- * reason is that we want the bottom-up allocation just near the kernel
- * image so it is highly likely that the allocated memory and the kernel
- * will reside in the same node.
- *
- * If bottom-up allocation failed, will try to allocate memory top-down.
- *
* Return:
* Found address on success, 0 on failure.
*/
@@ -291,8 +283,6 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
phys_addr_t end, int nid,
enum memblock_flags flags)
{
- phys_addr_t kernel_end, ret;
-
/* pump up @end */
if (end == MEMBLOCK_ALLOC_ACCESSIBLE ||
end == MEMBLOCK_ALLOC_KASAN)
@@ -301,40 +291,13 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
/* avoid allocating the first page */
start = max_t(phys_addr_t, start, PAGE_SIZE);
end = max(start, end);
- kernel_end = __pa_symbol(_end);
-
- /*
- * try bottom-up allocation only when bottom-up mode
- * is set and @end is above the kernel image.
- */
- if (memblock_bottom_up() && end > kernel_end) {
- phys_addr_t bottom_up_start;
-
- /* make sure we will allocate above the kernel */
- bottom_up_start = max(start, kernel_end);
- /* ok, try bottom-up allocation first */
- ret = __memblock_find_range_bottom_up(bottom_up_start, end,
- size, align, nid, flags);
- if (ret)
- return ret;
-
- /*
- * we always limit bottom-up allocation above the kernel,
- * but top-down allocation doesn't have the limit, so
- * retrying top-down allocation may succeed when bottom-up
- * allocation failed.
- *
- * bottom-up allocation is expected to be fail very rarely,
- * so we use WARN_ONCE() here to see the stack trace if
- * fail happens.
- */
- WARN_ONCE(IS_ENABLED(CONFIG_MEMORY_HOTREMOVE),
- "memblock: bottom-up allocation failed, memory hotremove may be affected\n");
- }
-
- return __memblock_find_range_top_down(start, end, size, align, nid,
- flags);
+ if (memblock_bottom_up())
+ return __memblock_find_range_bottom_up(start, end, size, align,
+ nid, flags);
+ else
+ return __memblock_find_range_top_down(start, end, size, align,
+ nid, flags);
}
/**
@@ -871,7 +834,7 @@ int __init_memblock memblock_physmem_add(phys_addr_t base, phys_addr_t size)
* @base: base address of the region
* @size: size of the region
* @set: set or clear the flag
- * @flag: the flag to udpate
+ * @flag: the flag to update
*
* This function isolates region [@base, @base + @size), and sets/clears flag
*
@@ -1419,12 +1382,15 @@ phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size,
phys_addr_t start,
phys_addr_t end)
{
+ memblock_dbg("%s: %llu bytes align=0x%llx from=%pa max_addr=%pa %pS\n",
+ __func__, (u64)size, (u64)align, &start, &end,
+ (void *)_RET_IP_);
return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE,
false);
}
/**
- * memblock_phys_alloc_try_nid - allocate a memory block from specified MUMA node
+ * memblock_phys_alloc_try_nid - allocate a memory block from specified NUMA node
* @size: size of memory block to be allocated in bytes
* @align: alignment of the region and block's size
* @nid: nid of the free area to find, %NUMA_NO_NODE for any node
@@ -1926,6 +1892,85 @@ static int __init early_memblock(char *p)
}
early_param("memblock", early_memblock);
+static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn)
+{
+ struct page *start_pg, *end_pg;
+ phys_addr_t pg, pgend;
+
+ /*
+ * Convert start_pfn/end_pfn to a struct page pointer.
+ */
+ start_pg = pfn_to_page(start_pfn - 1) + 1;
+ end_pg = pfn_to_page(end_pfn - 1) + 1;
+
+ /*
+ * Convert to physical addresses, and round start upwards and end
+ * downwards.
+ */
+ pg = PAGE_ALIGN(__pa(start_pg));
+ pgend = __pa(end_pg) & PAGE_MASK;
+
+ /*
+ * If there are free pages between these, free the section of the
+ * memmap array.
+ */
+ if (pg < pgend)
+ memblock_free(pg, pgend - pg);
+}
+
+/*
+ * The mem_map array can get very big. Free the unused area of the memory map.
+ */
+static void __init free_unused_memmap(void)
+{
+ unsigned long start, end, prev_end = 0;
+ int i;
+
+ if (!IS_ENABLED(CONFIG_HAVE_ARCH_PFN_VALID) ||
+ IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP))
+ return;
+
+ /*
+ * This relies on each bank being in address order.
+ * The banks are sorted previously in bootmem_init().
+ */
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
+#ifdef CONFIG_SPARSEMEM
+ /*
+ * Take care not to free memmap entries that don't exist
+ * due to SPARSEMEM sections which aren't present.
+ */
+ start = min(start, ALIGN(prev_end, PAGES_PER_SECTION));
+#else
+ /*
+ * Align down here since the VM subsystem insists that the
+ * memmap entries are valid from the bank start aligned to
+ * MAX_ORDER_NR_PAGES.
+ */
+ start = round_down(start, MAX_ORDER_NR_PAGES);
+#endif
+
+ /*
+ * If we had a previous bank, and there is a space
+ * between the current bank and the previous, free it.
+ */
+ if (prev_end && prev_end < start)
+ free_memmap(prev_end, start);
+
+ /*
+ * Align up here since the VM subsystem insists that the
+ * memmap entries are valid from the bank end aligned to
+ * MAX_ORDER_NR_PAGES.
+ */
+ prev_end = ALIGN(end, MAX_ORDER_NR_PAGES);
+ }
+
+#ifdef CONFIG_SPARSEMEM
+ if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION))
+ free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION));
+#endif
+}
+
static void __init __free_pages_memory(unsigned long start, unsigned long end)
{
int order;
@@ -2012,6 +2057,7 @@ unsigned long __init memblock_free_all(void)
{
unsigned long pages;
+ free_unused_memmap();
reset_all_zones_managed_pages();
pages = free_low_memory_core_early();