1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
|
/*
* PPC Huge TLB Page Support for Kernel.
*
* Copyright (C) 2003 David Gibson, IBM Corporation.
* Copyright (C) 2011 Becky Bruce, Freescale Semiconductor
*
* Based on the IA-32 version:
* Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
*/
#include <linux/mm.h>
#include <linux/io.h>
#include <linux/slab.h>
#include <linux/hugetlb.h>
#include <linux/export.h>
#include <linux/of_fdt.h>
#include <linux/memblock.h>
#include <linux/moduleparam.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/kmemleak.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/setup.h>
#include <asm/hugetlb.h>
#include <asm/pte-walk.h>
#include <asm/firmware.h>
bool hugetlb_disabled = false;
#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_basic_t)) - \
__builtin_ffs(sizeof(void *)))
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz)
{
/*
* Only called for hugetlbfs pages, hence can ignore THP and the
* irq disabled walk.
*/
return __find_linux_pte(mm->pgd, addr, NULL, NULL);
}
pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, unsigned long sz)
{
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
addr &= ~(sz - 1);
p4d = p4d_offset(pgd_offset(mm, addr), addr);
if (!mm_pud_folded(mm) && sz >= P4D_SIZE)
return (pte_t *)p4d;
pud = pud_alloc(mm, p4d, addr);
if (!pud)
return NULL;
if (!mm_pmd_folded(mm) && sz >= PUD_SIZE)
return (pte_t *)pud;
pmd = pmd_alloc(mm, pud, addr);
if (!pmd)
return NULL;
if (sz >= PMD_SIZE) {
/* On 8xx, all hugepages are handled as contiguous PTEs */
if (IS_ENABLED(CONFIG_PPC_8xx)) {
int i;
for (i = 0; i < sz / PMD_SIZE; i++) {
if (!pte_alloc_huge(mm, pmd + i, addr))
return NULL;
}
}
return (pte_t *)pmd;
}
return pte_alloc_huge(mm, pmd, addr);
}
#ifdef CONFIG_PPC_BOOK3S_64
/*
* Tracks gpages after the device tree is scanned and before the
* huge_boot_pages list is ready on pseries.
*/
#define MAX_NUMBER_GPAGES 1024
__initdata static u64 gpage_freearray[MAX_NUMBER_GPAGES];
__initdata static unsigned nr_gpages;
/*
* Build list of addresses of gigantic pages. This function is used in early
* boot before the buddy allocator is setup.
*/
void __init pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
{
if (!addr)
return;
while (number_of_pages > 0) {
gpage_freearray[nr_gpages] = addr;
nr_gpages++;
number_of_pages--;
addr += page_size;
}
}
static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate)
{
struct huge_bootmem_page *m;
if (nr_gpages == 0)
return 0;
m = phys_to_virt(gpage_freearray[--nr_gpages]);
gpage_freearray[nr_gpages] = 0;
list_add(&m->list, &huge_boot_pages[0]);
m->hstate = hstate;
return 1;
}
bool __init hugetlb_node_alloc_supported(void)
{
return false;
}
#endif
int __init alloc_bootmem_huge_page(struct hstate *h, int nid)
{
#ifdef CONFIG_PPC_BOOK3S_64
if (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled())
return pseries_alloc_bootmem_huge_page(h);
#endif
return __alloc_bootmem_huge_page(h, nid);
}
bool __init arch_hugetlb_valid_size(unsigned long size)
{
int shift = __ffs(size);
int mmu_psize;
/* Check that it is a page size supported by the hardware and
* that it fits within pagetable and slice limits. */
if (size <= PAGE_SIZE || !is_power_of_2(size))
return false;
mmu_psize = check_and_get_huge_psize(shift);
if (mmu_psize < 0)
return false;
BUG_ON(mmu_psize_defs[mmu_psize].shift != shift);
return true;
}
static int __init add_huge_page_size(unsigned long long size)
{
int shift = __ffs(size);
if (!arch_hugetlb_valid_size((unsigned long)size))
return -EINVAL;
hugetlb_add_hstate(shift - PAGE_SHIFT);
return 0;
}
static int __init hugetlbpage_init(void)
{
bool configured = false;
int psize;
if (hugetlb_disabled) {
pr_info("HugeTLB support is disabled!\n");
return 0;
}
if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled() &&
!mmu_has_feature(MMU_FTR_16M_PAGE))
return -ENODEV;
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
unsigned shift;
if (!mmu_psize_defs[psize].shift)
continue;
shift = mmu_psize_to_shift(psize);
if (add_huge_page_size(1ULL << shift) < 0)
continue;
configured = true;
}
if (!configured)
pr_info("Failed to initialize. Disabling HugeTLB");
return 0;
}
arch_initcall(hugetlbpage_init);
void __init gigantic_hugetlb_cma_reserve(void)
{
unsigned long order = 0;
if (radix_enabled())
order = PUD_SHIFT - PAGE_SHIFT;
else if (!firmware_has_feature(FW_FEATURE_LPAR) && mmu_psize_defs[MMU_PAGE_16G].shift)
/*
* For pseries we do use ibm,expected#pages for reserving 16G pages.
*/
order = mmu_psize_to_shift(MMU_PAGE_16G) - PAGE_SHIFT;
if (order)
hugetlb_cma_reserve(order);
}
|