1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/hugetlb.h> 3 #include <linux/err.h> 4 5 #ifdef CONFIG_RISCV_ISA_SVNAPOT 6 pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 7 { 8 unsigned long pte_num; 9 int i; 10 pte_t orig_pte = ptep_get(ptep); 11 12 if (!pte_present(orig_pte) || !pte_napot(orig_pte)) 13 return orig_pte; 14 15 pte_num = napot_pte_num(napot_cont_order(orig_pte)); 16 17 for (i = 0; i < pte_num; i++, ptep++) { 18 pte_t pte = ptep_get(ptep); 19 20 if (pte_dirty(pte)) 21 orig_pte = pte_mkdirty(orig_pte); 22 23 if (pte_young(pte)) 24 orig_pte = pte_mkyoung(orig_pte); 25 } 26 27 return orig_pte; 28 } 29 30 pte_t *huge_pte_alloc(struct mm_struct *mm, 31 struct vm_area_struct *vma, 32 unsigned long addr, 33 unsigned long sz) 34 { 35 unsigned long order; 36 pte_t *pte = NULL; 37 pgd_t *pgd; 38 p4d_t *p4d; 39 pud_t *pud; 40 pmd_t *pmd; 41 42 pgd = pgd_offset(mm, addr); 43 p4d = p4d_alloc(mm, pgd, addr); 44 if (!p4d) 45 return NULL; 46 47 pud = pud_alloc(mm, p4d, addr); 48 if (!pud) 49 return NULL; 50 51 if (sz == PUD_SIZE) { 52 pte = (pte_t *)pud; 53 goto out; 54 } 55 56 if (sz == PMD_SIZE) { 57 if (want_pmd_share(vma, addr) && pud_none(pudp_get(pud))) 58 pte = huge_pmd_share(mm, vma, addr, pud); 59 else 60 pte = (pte_t *)pmd_alloc(mm, pud, addr); 61 goto out; 62 } 63 64 pmd = pmd_alloc(mm, pud, addr); 65 if (!pmd) 66 return NULL; 67 68 for_each_napot_order(order) { 69 if (napot_cont_size(order) == sz) { 70 pte = pte_alloc_huge(mm, pmd, addr & napot_cont_mask(order)); 71 break; 72 } 73 } 74 75 out: 76 if (pte) { 77 pte_t pteval = ptep_get_lockless(pte); 78 79 WARN_ON_ONCE(pte_present(pteval) && !pte_huge(pteval)); 80 } 81 return pte; 82 } 83 84 pte_t *huge_pte_offset(struct mm_struct *mm, 85 unsigned long addr, 86 unsigned long sz) 87 { 88 unsigned long order; 89 pte_t *pte = NULL; 90 pgd_t *pgd; 91 p4d_t *p4d; 92 pud_t *pud; 93 pmd_t *pmd; 94 95 pgd = pgd_offset(mm, addr); 96 if (!pgd_present(pgdp_get(pgd))) 97 return NULL; 98 99 p4d = p4d_offset(pgd, addr); 100 if (!p4d_present(p4dp_get(p4d))) 101 return NULL; 102 103 pud = pud_offset(p4d, addr); 104 if (sz == PUD_SIZE) 105 /* must be pud huge, non-present or none */ 106 return (pte_t *)pud; 107 108 if (!pud_present(pudp_get(pud))) 109 return NULL; 110 111 pmd = pmd_offset(pud, addr); 112 if (sz == PMD_SIZE) 113 /* must be pmd huge, non-present or none */ 114 return (pte_t *)pmd; 115 116 if (!pmd_present(pmdp_get(pmd))) 117 return NULL; 118 119 for_each_napot_order(order) { 120 if (napot_cont_size(order) == sz) { 121 pte = pte_offset_huge(pmd, addr & napot_cont_mask(order)); 122 break; 123 } 124 } 125 return pte; 126 } 127 128 unsigned long hugetlb_mask_last_page(struct hstate *h) 129 { 130 unsigned long hp_size = huge_page_size(h); 131 132 switch (hp_size) { 133 #ifndef __PAGETABLE_PMD_FOLDED 134 case PUD_SIZE: 135 return P4D_SIZE - PUD_SIZE; 136 #endif 137 case PMD_SIZE: 138 return PUD_SIZE - PMD_SIZE; 139 case napot_cont_size(NAPOT_CONT64KB_ORDER): 140 return PMD_SIZE - napot_cont_size(NAPOT_CONT64KB_ORDER); 141 default: 142 break; 143 } 144 145 return 0UL; 146 } 147 148 static pte_t get_clear_contig(struct mm_struct *mm, 149 unsigned long addr, 150 pte_t *ptep, 151 unsigned long pte_num) 152 { 153 pte_t orig_pte = ptep_get(ptep); 154 unsigned long i; 155 156 for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) { 157 pte_t pte = ptep_get_and_clear(mm, addr, ptep); 158 159 if (pte_dirty(pte)) 160 orig_pte = pte_mkdirty(orig_pte); 161 162 if (pte_young(pte)) 163 orig_pte = pte_mkyoung(orig_pte); 164 } 165 166 return orig_pte; 167 } 168 169 static pte_t get_clear_contig_flush(struct mm_struct *mm, 170 unsigned long addr, 171 pte_t *ptep, 172 unsigned long pte_num) 173 { 174 pte_t orig_pte = get_clear_contig(mm, addr, ptep, pte_num); 175 struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); 176 bool valid = !pte_none(orig_pte); 177 178 if (valid) 179 flush_tlb_range(&vma, addr, addr + (PAGE_SIZE * pte_num)); 180 181 return orig_pte; 182 } 183 184 pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) 185 { 186 unsigned long order; 187 188 for_each_napot_order(order) { 189 if (shift == napot_cont_shift(order)) { 190 entry = pte_mknapot(entry, order); 191 break; 192 } 193 } 194 if (order == NAPOT_ORDER_MAX) 195 entry = pte_mkhuge(entry); 196 197 return entry; 198 } 199 200 static void clear_flush(struct mm_struct *mm, 201 unsigned long addr, 202 pte_t *ptep, 203 unsigned long pgsize, 204 unsigned long ncontig) 205 { 206 struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); 207 unsigned long i, saddr = addr; 208 209 for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) 210 ptep_get_and_clear(mm, addr, ptep); 211 212 flush_tlb_range(&vma, saddr, addr); 213 } 214 215 /* 216 * When dealing with NAPOT mappings, the privileged specification indicates that 217 * "if an update needs to be made, the OS generally should first mark all of the 218 * PTEs invalid, then issue SFENCE.VMA instruction(s) covering all 4 KiB regions 219 * within the range, [...] then update the PTE(s), as described in Section 220 * 4.2.1.". That's the equivalent of the Break-Before-Make approach used by 221 * arm64. 222 */ 223 void set_huge_pte_at(struct mm_struct *mm, 224 unsigned long addr, 225 pte_t *ptep, 226 pte_t pte, 227 unsigned long sz) 228 { 229 unsigned long hugepage_shift, pgsize; 230 int i, pte_num; 231 232 if (sz >= PGDIR_SIZE) 233 hugepage_shift = PGDIR_SHIFT; 234 else if (sz >= P4D_SIZE) 235 hugepage_shift = P4D_SHIFT; 236 else if (sz >= PUD_SIZE) 237 hugepage_shift = PUD_SHIFT; 238 else if (sz >= PMD_SIZE) 239 hugepage_shift = PMD_SHIFT; 240 else 241 hugepage_shift = PAGE_SHIFT; 242 243 pte_num = sz >> hugepage_shift; 244 pgsize = 1 << hugepage_shift; 245 246 if (!pte_present(pte)) { 247 for (i = 0; i < pte_num; i++, ptep++, addr += pgsize) 248 set_ptes(mm, addr, ptep, pte, 1); 249 return; 250 } 251 252 if (!pte_napot(pte)) { 253 set_ptes(mm, addr, ptep, pte, 1); 254 return; 255 } 256 257 clear_flush(mm, addr, ptep, pgsize, pte_num); 258 259 for (i = 0; i < pte_num; i++, ptep++, addr += pgsize) 260 set_pte_at(mm, addr, ptep, pte); 261 } 262 263 int huge_ptep_set_access_flags(struct vm_area_struct *vma, 264 unsigned long addr, 265 pte_t *ptep, 266 pte_t pte, 267 int dirty) 268 { 269 struct mm_struct *mm = vma->vm_mm; 270 unsigned long order; 271 pte_t orig_pte; 272 int i, pte_num; 273 274 if (!pte_napot(pte)) 275 return ptep_set_access_flags(vma, addr, ptep, pte, dirty); 276 277 order = napot_cont_order(pte); 278 pte_num = napot_pte_num(order); 279 ptep = huge_pte_offset(mm, addr, napot_cont_size(order)); 280 orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num); 281 282 if (pte_dirty(orig_pte)) 283 pte = pte_mkdirty(pte); 284 285 if (pte_young(orig_pte)) 286 pte = pte_mkyoung(pte); 287 288 for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) 289 set_pte_at(mm, addr, ptep, pte); 290 291 return true; 292 } 293 294 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 295 unsigned long addr, 296 pte_t *ptep) 297 { 298 pte_t orig_pte = ptep_get(ptep); 299 int pte_num; 300 301 if (!pte_napot(orig_pte)) 302 return ptep_get_and_clear(mm, addr, ptep); 303 304 pte_num = napot_pte_num(napot_cont_order(orig_pte)); 305 306 return get_clear_contig(mm, addr, ptep, pte_num); 307 } 308 309 void huge_ptep_set_wrprotect(struct mm_struct *mm, 310 unsigned long addr, 311 pte_t *ptep) 312 { 313 pte_t pte = ptep_get(ptep); 314 unsigned long order; 315 pte_t orig_pte; 316 int i, pte_num; 317 318 if (!pte_napot(pte)) { 319 ptep_set_wrprotect(mm, addr, ptep); 320 return; 321 } 322 323 order = napot_cont_order(pte); 324 pte_num = napot_pte_num(order); 325 ptep = huge_pte_offset(mm, addr, napot_cont_size(order)); 326 orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num); 327 328 orig_pte = pte_wrprotect(orig_pte); 329 330 for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) 331 set_pte_at(mm, addr, ptep, orig_pte); 332 } 333 334 pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 335 unsigned long addr, 336 pte_t *ptep) 337 { 338 pte_t pte = ptep_get(ptep); 339 int pte_num; 340 341 if (!pte_napot(pte)) 342 return ptep_clear_flush(vma, addr, ptep); 343 344 pte_num = napot_pte_num(napot_cont_order(pte)); 345 346 return get_clear_contig_flush(vma->vm_mm, addr, ptep, pte_num); 347 } 348 349 void huge_pte_clear(struct mm_struct *mm, 350 unsigned long addr, 351 pte_t *ptep, 352 unsigned long sz) 353 { 354 pte_t pte = ptep_get(ptep); 355 int i, pte_num; 356 357 if (!pte_napot(pte)) { 358 pte_clear(mm, addr, ptep); 359 return; 360 } 361 362 pte_num = napot_pte_num(napot_cont_order(pte)); 363 for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) 364 pte_clear(mm, addr, ptep); 365 } 366 367 static bool is_napot_size(unsigned long size) 368 { 369 unsigned long order; 370 371 if (!has_svnapot()) 372 return false; 373 374 for_each_napot_order(order) { 375 if (size == napot_cont_size(order)) 376 return true; 377 } 378 return false; 379 } 380 381 static __init int napot_hugetlbpages_init(void) 382 { 383 if (has_svnapot()) { 384 unsigned long order; 385 386 for_each_napot_order(order) 387 hugetlb_add_hstate(order); 388 } 389 return 0; 390 } 391 arch_initcall(napot_hugetlbpages_init); 392 393 #else 394 395 static bool is_napot_size(unsigned long size) 396 { 397 return false; 398 } 399 400 #endif /*CONFIG_RISCV_ISA_SVNAPOT*/ 401 402 static bool __hugetlb_valid_size(unsigned long size) 403 { 404 if (size == HPAGE_SIZE) 405 return true; 406 else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE) 407 return true; 408 else if (is_napot_size(size)) 409 return true; 410 else 411 return false; 412 } 413 414 bool __init arch_hugetlb_valid_size(unsigned long size) 415 { 416 return __hugetlb_valid_size(size); 417 } 418 419 #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION 420 bool arch_hugetlb_migration_supported(struct hstate *h) 421 { 422 return __hugetlb_valid_size(huge_page_size(h)); 423 } 424 #endif 425 426 #ifdef CONFIG_CONTIG_ALLOC 427 static __init int gigantic_pages_init(void) 428 { 429 /* With CONTIG_ALLOC, we can allocate gigantic pages at runtime */ 430 if (IS_ENABLED(CONFIG_64BIT)) 431 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); 432 return 0; 433 } 434 arch_initcall(gigantic_pages_init); 435 #endif 436