1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/hugetlb.h>
3 #include <linux/err.h>
4
5 #ifdef CONFIG_RISCV_ISA_SVNAPOT
huge_ptep_get(struct mm_struct * mm,unsigned long addr,pte_t * ptep)6 pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
7 {
8 unsigned long pte_num;
9 int i;
10 pte_t orig_pte = ptep_get(ptep);
11
12 if (!pte_present(orig_pte) || !pte_napot(orig_pte))
13 return orig_pte;
14
15 pte_num = napot_pte_num(napot_cont_order(orig_pte));
16
17 for (i = 0; i < pte_num; i++, ptep++) {
18 pte_t pte = ptep_get(ptep);
19
20 if (pte_dirty(pte))
21 orig_pte = pte_mkdirty(orig_pte);
22
23 if (pte_young(pte))
24 orig_pte = pte_mkyoung(orig_pte);
25 }
26
27 return orig_pte;
28 }
29
huge_pte_alloc(struct mm_struct * mm,struct vm_area_struct * vma,unsigned long addr,unsigned long sz)30 pte_t *huge_pte_alloc(struct mm_struct *mm,
31 struct vm_area_struct *vma,
32 unsigned long addr,
33 unsigned long sz)
34 {
35 unsigned long order;
36 pte_t *pte = NULL;
37 pgd_t *pgd;
38 p4d_t *p4d;
39 pud_t *pud;
40 pmd_t *pmd;
41
42 pgd = pgd_offset(mm, addr);
43 p4d = p4d_alloc(mm, pgd, addr);
44 if (!p4d)
45 return NULL;
46
47 pud = pud_alloc(mm, p4d, addr);
48 if (!pud)
49 return NULL;
50
51 if (sz == PUD_SIZE) {
52 pte = (pte_t *)pud;
53 goto out;
54 }
55
56 if (sz == PMD_SIZE) {
57 if (want_pmd_share(vma, addr) && pud_none(pudp_get(pud)))
58 pte = huge_pmd_share(mm, vma, addr, pud);
59 else
60 pte = (pte_t *)pmd_alloc(mm, pud, addr);
61 goto out;
62 }
63
64 pmd = pmd_alloc(mm, pud, addr);
65 if (!pmd)
66 return NULL;
67
68 for_each_napot_order(order) {
69 if (napot_cont_size(order) == sz) {
70 pte = pte_alloc_huge(mm, pmd, addr & napot_cont_mask(order));
71 break;
72 }
73 }
74
75 out:
76 if (pte) {
77 pte_t pteval = ptep_get_lockless(pte);
78
79 WARN_ON_ONCE(pte_present(pteval) && !pte_huge(pteval));
80 }
81 return pte;
82 }
83
huge_pte_offset(struct mm_struct * mm,unsigned long addr,unsigned long sz)84 pte_t *huge_pte_offset(struct mm_struct *mm,
85 unsigned long addr,
86 unsigned long sz)
87 {
88 unsigned long order;
89 pte_t *pte = NULL;
90 pgd_t *pgd;
91 p4d_t *p4d;
92 pud_t *pud;
93 pmd_t *pmd;
94
95 pgd = pgd_offset(mm, addr);
96 if (!pgd_present(pgdp_get(pgd)))
97 return NULL;
98
99 p4d = p4d_offset(pgd, addr);
100 if (!p4d_present(p4dp_get(p4d)))
101 return NULL;
102
103 pud = pud_offset(p4d, addr);
104 if (sz == PUD_SIZE)
105 /* must be pud huge, non-present or none */
106 return (pte_t *)pud;
107
108 if (!pud_present(pudp_get(pud)))
109 return NULL;
110
111 pmd = pmd_offset(pud, addr);
112 if (sz == PMD_SIZE)
113 /* must be pmd huge, non-present or none */
114 return (pte_t *)pmd;
115
116 if (!pmd_present(pmdp_get(pmd)))
117 return NULL;
118
119 for_each_napot_order(order) {
120 if (napot_cont_size(order) == sz) {
121 pte = pte_offset_huge(pmd, addr & napot_cont_mask(order));
122 break;
123 }
124 }
125 return pte;
126 }
127
hugetlb_mask_last_page(struct hstate * h)128 unsigned long hugetlb_mask_last_page(struct hstate *h)
129 {
130 unsigned long hp_size = huge_page_size(h);
131
132 switch (hp_size) {
133 #ifndef __PAGETABLE_PMD_FOLDED
134 case PUD_SIZE:
135 return P4D_SIZE - PUD_SIZE;
136 #endif
137 case PMD_SIZE:
138 return PUD_SIZE - PMD_SIZE;
139 case napot_cont_size(NAPOT_CONT64KB_ORDER):
140 return PMD_SIZE - napot_cont_size(NAPOT_CONT64KB_ORDER);
141 default:
142 break;
143 }
144
145 return 0UL;
146 }
147
get_clear_contig(struct mm_struct * mm,unsigned long addr,pte_t * ptep,unsigned long ncontig)148 static pte_t get_clear_contig(struct mm_struct *mm,
149 unsigned long addr,
150 pte_t *ptep,
151 unsigned long ncontig)
152 {
153 pte_t pte, tmp_pte;
154 bool present;
155
156 pte = ptep_get_and_clear(mm, addr, ptep);
157 present = pte_present(pte);
158 while (--ncontig) {
159 ptep++;
160 addr += PAGE_SIZE;
161 tmp_pte = ptep_get_and_clear(mm, addr, ptep);
162 if (present) {
163 if (pte_dirty(tmp_pte))
164 pte = pte_mkdirty(pte);
165 if (pte_young(tmp_pte))
166 pte = pte_mkyoung(pte);
167 }
168 }
169 return pte;
170 }
171
get_clear_contig_flush(struct mm_struct * mm,unsigned long addr,pte_t * ptep,unsigned long pte_num)172 static pte_t get_clear_contig_flush(struct mm_struct *mm,
173 unsigned long addr,
174 pte_t *ptep,
175 unsigned long pte_num)
176 {
177 pte_t orig_pte = get_clear_contig(mm, addr, ptep, pte_num);
178 struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
179 bool valid = !pte_none(orig_pte);
180
181 if (valid)
182 flush_tlb_range(&vma, addr, addr + (PAGE_SIZE * pte_num));
183
184 return orig_pte;
185 }
186
arch_make_huge_pte(pte_t entry,unsigned int shift,vm_flags_t flags)187 pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
188 {
189 unsigned long order;
190
191 for_each_napot_order(order) {
192 if (shift == napot_cont_shift(order)) {
193 entry = pte_mknapot(entry, order);
194 break;
195 }
196 }
197 if (order == NAPOT_ORDER_MAX)
198 entry = pte_mkhuge(entry);
199
200 return entry;
201 }
202
clear_flush(struct mm_struct * mm,unsigned long addr,pte_t * ptep,unsigned long pgsize,unsigned long ncontig)203 static void clear_flush(struct mm_struct *mm,
204 unsigned long addr,
205 pte_t *ptep,
206 unsigned long pgsize,
207 unsigned long ncontig)
208 {
209 struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
210 unsigned long i, saddr = addr;
211
212 for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
213 ptep_get_and_clear(mm, addr, ptep);
214
215 flush_tlb_range(&vma, saddr, addr);
216 }
217
num_contig_ptes_from_size(unsigned long sz,size_t * pgsize)218 static int num_contig_ptes_from_size(unsigned long sz, size_t *pgsize)
219 {
220 unsigned long hugepage_shift;
221
222 if (sz >= PGDIR_SIZE)
223 hugepage_shift = PGDIR_SHIFT;
224 else if (sz >= P4D_SIZE)
225 hugepage_shift = P4D_SHIFT;
226 else if (sz >= PUD_SIZE)
227 hugepage_shift = PUD_SHIFT;
228 else if (sz >= PMD_SIZE)
229 hugepage_shift = PMD_SHIFT;
230 else
231 hugepage_shift = PAGE_SHIFT;
232
233 *pgsize = 1 << hugepage_shift;
234
235 return sz >> hugepage_shift;
236 }
237
238 /*
239 * When dealing with NAPOT mappings, the privileged specification indicates that
240 * "if an update needs to be made, the OS generally should first mark all of the
241 * PTEs invalid, then issue SFENCE.VMA instruction(s) covering all 4 KiB regions
242 * within the range, [...] then update the PTE(s), as described in Section
243 * 4.2.1.". That's the equivalent of the Break-Before-Make approach used by
244 * arm64.
245 */
set_huge_pte_at(struct mm_struct * mm,unsigned long addr,pte_t * ptep,pte_t pte,unsigned long sz)246 void set_huge_pte_at(struct mm_struct *mm,
247 unsigned long addr,
248 pte_t *ptep,
249 pte_t pte,
250 unsigned long sz)
251 {
252 size_t pgsize;
253 int i, pte_num;
254
255 pte_num = num_contig_ptes_from_size(sz, &pgsize);
256
257 if (!pte_present(pte)) {
258 for (i = 0; i < pte_num; i++, ptep++, addr += pgsize)
259 set_ptes(mm, addr, ptep, pte, 1);
260 return;
261 }
262
263 if (!pte_napot(pte)) {
264 set_ptes(mm, addr, ptep, pte, 1);
265 return;
266 }
267
268 clear_flush(mm, addr, ptep, pgsize, pte_num);
269
270 for (i = 0; i < pte_num; i++, ptep++, addr += pgsize)
271 set_pte_at(mm, addr, ptep, pte);
272 }
273
huge_ptep_set_access_flags(struct vm_area_struct * vma,unsigned long addr,pte_t * ptep,pte_t pte,int dirty)274 int huge_ptep_set_access_flags(struct vm_area_struct *vma,
275 unsigned long addr,
276 pte_t *ptep,
277 pte_t pte,
278 int dirty)
279 {
280 struct mm_struct *mm = vma->vm_mm;
281 unsigned long order;
282 pte_t orig_pte;
283 int i, pte_num;
284
285 if (!pte_napot(pte))
286 return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
287
288 order = napot_cont_order(pte);
289 pte_num = napot_pte_num(order);
290 ptep = huge_pte_offset(mm, addr, napot_cont_size(order));
291 orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num);
292
293 if (pte_dirty(orig_pte))
294 pte = pte_mkdirty(pte);
295
296 if (pte_young(orig_pte))
297 pte = pte_mkyoung(pte);
298
299 for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
300 set_pte_at(mm, addr, ptep, pte);
301
302 return true;
303 }
304
huge_ptep_get_and_clear(struct mm_struct * mm,unsigned long addr,pte_t * ptep,unsigned long sz)305 pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
306 unsigned long addr,
307 pte_t *ptep, unsigned long sz)
308 {
309 size_t pgsize;
310 pte_t orig_pte = ptep_get(ptep);
311 int pte_num;
312
313 if (!pte_napot(orig_pte))
314 return ptep_get_and_clear(mm, addr, ptep);
315
316 pte_num = num_contig_ptes_from_size(sz, &pgsize);
317
318 return get_clear_contig(mm, addr, ptep, pte_num);
319 }
320
huge_ptep_set_wrprotect(struct mm_struct * mm,unsigned long addr,pte_t * ptep)321 void huge_ptep_set_wrprotect(struct mm_struct *mm,
322 unsigned long addr,
323 pte_t *ptep)
324 {
325 pte_t pte = ptep_get(ptep);
326 unsigned long order;
327 pte_t orig_pte;
328 int i, pte_num;
329
330 if (!pte_napot(pte)) {
331 ptep_set_wrprotect(mm, addr, ptep);
332 return;
333 }
334
335 order = napot_cont_order(pte);
336 pte_num = napot_pte_num(order);
337 ptep = huge_pte_offset(mm, addr, napot_cont_size(order));
338 orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num);
339
340 orig_pte = pte_wrprotect(orig_pte);
341
342 for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
343 set_pte_at(mm, addr, ptep, orig_pte);
344 }
345
huge_ptep_clear_flush(struct vm_area_struct * vma,unsigned long addr,pte_t * ptep)346 pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
347 unsigned long addr,
348 pte_t *ptep)
349 {
350 pte_t pte = ptep_get(ptep);
351 int pte_num;
352
353 if (!pte_napot(pte))
354 return ptep_clear_flush(vma, addr, ptep);
355
356 pte_num = napot_pte_num(napot_cont_order(pte));
357
358 return get_clear_contig_flush(vma->vm_mm, addr, ptep, pte_num);
359 }
360
huge_pte_clear(struct mm_struct * mm,unsigned long addr,pte_t * ptep,unsigned long sz)361 void huge_pte_clear(struct mm_struct *mm,
362 unsigned long addr,
363 pte_t *ptep,
364 unsigned long sz)
365 {
366 size_t pgsize;
367 pte_t pte = ptep_get(ptep);
368 int i, pte_num;
369
370 if (!pte_napot(pte)) {
371 pte_clear(mm, addr, ptep);
372 return;
373 }
374
375 pte_num = num_contig_ptes_from_size(sz, &pgsize);
376
377 for (i = 0; i < pte_num; i++, addr += pgsize, ptep++)
378 pte_clear(mm, addr, ptep);
379 }
380
is_napot_size(unsigned long size)381 static bool is_napot_size(unsigned long size)
382 {
383 unsigned long order;
384
385 if (!has_svnapot())
386 return false;
387
388 for_each_napot_order(order) {
389 if (size == napot_cont_size(order))
390 return true;
391 }
392 return false;
393 }
394
napot_hugetlbpages_init(void)395 static __init int napot_hugetlbpages_init(void)
396 {
397 if (has_svnapot()) {
398 unsigned long order;
399
400 for_each_napot_order(order)
401 hugetlb_add_hstate(order);
402 }
403 return 0;
404 }
405 arch_initcall(napot_hugetlbpages_init);
406
407 #else
408
is_napot_size(unsigned long size)409 static bool is_napot_size(unsigned long size)
410 {
411 return false;
412 }
413
414 #endif /*CONFIG_RISCV_ISA_SVNAPOT*/
415
__hugetlb_valid_size(unsigned long size)416 static bool __hugetlb_valid_size(unsigned long size)
417 {
418 if (size == HPAGE_SIZE)
419 return true;
420 else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE)
421 return true;
422 else if (is_napot_size(size))
423 return true;
424 else
425 return false;
426 }
427
arch_hugetlb_valid_size(unsigned long size)428 bool __init arch_hugetlb_valid_size(unsigned long size)
429 {
430 return __hugetlb_valid_size(size);
431 }
432
433 #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
arch_hugetlb_migration_supported(struct hstate * h)434 bool arch_hugetlb_migration_supported(struct hstate *h)
435 {
436 return __hugetlb_valid_size(huge_page_size(h));
437 }
438 #endif
439
440 #ifdef CONFIG_CONTIG_ALLOC
gigantic_pages_init(void)441 static __init int gigantic_pages_init(void)
442 {
443 /* With CONTIG_ALLOC, we can allocate gigantic pages at runtime */
444 if (IS_ENABLED(CONFIG_64BIT))
445 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
446 return 0;
447 }
448 arch_initcall(gigantic_pages_init);
449 #endif
450