1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2023 ARM Ltd. 4 */ 5 6 #include <linux/mm.h> 7 #include <linux/efi.h> 8 #include <linux/export.h> 9 #include <asm/tlbflush.h> 10 11 static inline bool mm_is_user(struct mm_struct *mm) 12 { 13 /* 14 * Don't attempt to apply the contig bit to kernel mappings, because 15 * dynamically adding/removing the contig bit can cause page faults. 16 * These racing faults are ok for user space, since they get serialized 17 * on the PTL. But kernel mappings can't tolerate faults. 18 */ 19 if (unlikely(mm_is_efi(mm))) 20 return false; 21 return mm != &init_mm; 22 } 23 24 static inline pte_t *contpte_align_down(pte_t *ptep) 25 { 26 return PTR_ALIGN_DOWN(ptep, sizeof(*ptep) * CONT_PTES); 27 } 28 29 static inline pte_t *contpte_align_addr_ptep(unsigned long *start, 30 unsigned long *end, pte_t *ptep, 31 unsigned int nr) 32 { 33 /* 34 * Note: caller must ensure these nr PTEs are consecutive (present) 35 * PTEs that map consecutive pages of the same large folio within a 36 * single VMA and a single page table. 37 */ 38 if (pte_cont(__ptep_get(ptep + nr - 1))) 39 *end = ALIGN(*end, CONT_PTE_SIZE); 40 41 if (pte_cont(__ptep_get(ptep))) { 42 *start = ALIGN_DOWN(*start, CONT_PTE_SIZE); 43 ptep = contpte_align_down(ptep); 44 } 45 46 return ptep; 47 } 48 49 static void contpte_try_unfold_partial(struct mm_struct *mm, unsigned long addr, 50 pte_t *ptep, unsigned int nr) 51 { 52 /* 53 * Unfold any partially covered contpte block at the beginning and end 54 * of the range. 55 */ 56 57 if (ptep != contpte_align_down(ptep) || nr < CONT_PTES) 58 contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); 59 60 if (ptep + nr != contpte_align_down(ptep + nr)) { 61 unsigned long last_addr = addr + PAGE_SIZE * (nr - 1); 62 pte_t *last_ptep = ptep + nr - 1; 63 64 contpte_try_unfold(mm, last_addr, last_ptep, 65 __ptep_get(last_ptep)); 66 } 67 } 68 69 static void contpte_convert(struct mm_struct *mm, unsigned long addr, 70 pte_t *ptep, pte_t pte) 71 { 72 struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); 73 unsigned long start_addr; 74 pte_t *start_ptep; 75 int i; 76 77 start_ptep = ptep = contpte_align_down(ptep); 78 start_addr = addr = ALIGN_DOWN(addr, CONT_PTE_SIZE); 79 pte = pfn_pte(ALIGN_DOWN(pte_pfn(pte), CONT_PTES), pte_pgprot(pte)); 80 81 for (i = 0; i < CONT_PTES; i++, ptep++, addr += PAGE_SIZE) { 82 pte_t ptent = __ptep_get_and_clear(mm, addr, ptep); 83 84 if (pte_dirty(ptent)) 85 pte = pte_mkdirty(pte); 86 87 if (pte_young(ptent)) 88 pte = pte_mkyoung(pte); 89 } 90 91 /* 92 * On eliding the __tlb_flush_range() under BBML2+noabort: 93 * 94 * NOTE: Instead of using N=16 as the contiguous block length, we use 95 * N=4 for clarity. 96 * 97 * NOTE: 'n' and 'c' are used to denote the "contiguous bit" being 98 * unset and set, respectively. 99 * 100 * We worry about two cases where contiguous bit is used: 101 * - When folding N smaller non-contiguous ptes as 1 contiguous block. 102 * - When unfolding a contiguous block into N smaller non-contiguous ptes. 103 * 104 * Currently, the BBML0 folding case looks as follows: 105 * 106 * 0) Initial page-table layout: 107 * 108 * +----+----+----+----+ 109 * |RO,n|RO,n|RO,n|RW,n| <--- last page being set as RO 110 * +----+----+----+----+ 111 * 112 * 1) Aggregate AF + dirty flags using __ptep_get_and_clear(): 113 * 114 * +----+----+----+----+ 115 * | 0 | 0 | 0 | 0 | 116 * +----+----+----+----+ 117 * 118 * 2) __flush_tlb_range(): 119 * 120 * |____ tlbi + dsb ____| 121 * 122 * 3) __set_ptes() to repaint contiguous block: 123 * 124 * +----+----+----+----+ 125 * |RO,c|RO,c|RO,c|RO,c| 126 * +----+----+----+----+ 127 * 128 * 4) The kernel will eventually __flush_tlb() for changed page: 129 * 130 * |____| <--- tlbi + dsb 131 * 132 * As expected, the intermediate tlbi+dsb ensures that other PEs 133 * only ever see an invalid (0) entry, or the new contiguous TLB entry. 134 * The final tlbi+dsb will always throw away the newly installed 135 * contiguous TLB entry, which is a micro-optimisation opportunity, 136 * but does not affect correctness. 137 * 138 * In the BBML2 case, the change is avoiding the intermediate tlbi+dsb. 139 * This means a few things, but notably other PEs will still "see" any 140 * stale cached TLB entries. This could lead to a "contiguous bit 141 * misprogramming" issue until the final tlbi+dsb of the changed page, 142 * which would clear out both the stale (RW,n) entry and the new (RO,c) 143 * contiguous entry installed in its place. 144 * 145 * What this is saying, is the following: 146 * 147 * +----+----+----+----+ 148 * |RO,n|RO,n|RO,n|RW,n| <--- old page tables, all non-contiguous 149 * +----+----+----+----+ 150 * 151 * +----+----+----+----+ 152 * |RO,c|RO,c|RO,c|RO,c| <--- new page tables, all contiguous 153 * +----+----+----+----+ 154 * /\ 155 * || 156 * 157 * If both the old single (RW,n) and new contiguous (RO,c) TLB entries 158 * are present, and a write is made to this address, do we fault or 159 * is the write permitted (via amalgamation)? 160 * 161 * The relevant Arm ARM DDI 0487L.a requirements are RNGLXZ and RJQQTC, 162 * and together state that when BBML1 or BBML2 are implemented, either 163 * a TLB conflict abort is raised (which we expressly forbid), or will 164 * "produce an OA, access permissions, and memory attributes that are 165 * consistent with any of the programmed translation table values". 166 * 167 * That is to say, will either raise a TLB conflict, or produce one of 168 * the cached TLB entries, but never amalgamate. 169 * 170 * Thus, as the page tables are only considered "consistent" after 171 * the final tlbi+dsb (which evicts both the single stale (RW,n) TLB 172 * entry as well as the new contiguous (RO,c) TLB entry), omitting the 173 * initial tlbi+dsb is correct. 174 * 175 * It is also important to note that at the end of the BBML2 folding 176 * case, we are still left with potentially all N TLB entries still 177 * cached (the N-1 non-contiguous ptes, and the single contiguous 178 * block). However, over time, natural TLB pressure will cause the 179 * non-contiguous pte TLB entries to be flushed, leaving only the 180 * contiguous block TLB entry. This means that omitting the tlbi+dsb is 181 * not only correct, but also keeps our eventual performance benefits. 182 * 183 * For the unfolding case, BBML0 looks as follows: 184 * 185 * 0) Initial page-table layout: 186 * 187 * +----+----+----+----+ 188 * |RW,c|RW,c|RW,c|RW,c| <--- last page being set as RO 189 * +----+----+----+----+ 190 * 191 * 1) Aggregate AF + dirty flags using __ptep_get_and_clear(): 192 * 193 * +----+----+----+----+ 194 * | 0 | 0 | 0 | 0 | 195 * +----+----+----+----+ 196 * 197 * 2) __flush_tlb_range(): 198 * 199 * |____ tlbi + dsb ____| 200 * 201 * 3) __set_ptes() to repaint as non-contiguous: 202 * 203 * +----+----+----+----+ 204 * |RW,n|RW,n|RW,n|RW,n| 205 * +----+----+----+----+ 206 * 207 * 4) Update changed page permissions: 208 * 209 * +----+----+----+----+ 210 * |RW,n|RW,n|RW,n|RO,n| <--- last page permissions set 211 * +----+----+----+----+ 212 * 213 * 5) The kernel will eventually __flush_tlb() for changed page: 214 * 215 * |____| <--- tlbi + dsb 216 * 217 * For BBML2, we again remove the intermediate tlbi+dsb. Here, there 218 * are no issues, as the final tlbi+dsb covering the changed page is 219 * guaranteed to remove the original large contiguous (RW,c) TLB entry, 220 * as well as the intermediate (RW,n) TLB entry; the next access will 221 * install the new (RO,n) TLB entry and the page tables are only 222 * considered "consistent" after the final tlbi+dsb, so software must 223 * be prepared for this inconsistency prior to finishing the mm dance 224 * regardless. 225 */ 226 227 if (!system_supports_bbml2_noabort()) 228 __flush_tlb_range(&vma, start_addr, addr, PAGE_SIZE, 3, 229 TLBF_NOWALKCACHE); 230 231 __set_ptes(mm, start_addr, start_ptep, pte, CONT_PTES); 232 } 233 234 void __contpte_try_fold(struct mm_struct *mm, unsigned long addr, 235 pte_t *ptep, pte_t pte) 236 { 237 /* 238 * We have already checked that the virtual and pysical addresses are 239 * correctly aligned for a contpte mapping in contpte_try_fold() so the 240 * remaining checks are to ensure that the contpte range is fully 241 * covered by a single folio, and ensure that all the ptes are valid 242 * with contiguous PFNs and matching prots. We ignore the state of the 243 * access and dirty bits for the purpose of deciding if its a contiguous 244 * range; the folding process will generate a single contpte entry which 245 * has a single access and dirty bit. Those 2 bits are the logical OR of 246 * their respective bits in the constituent pte entries. In order to 247 * ensure the contpte range is covered by a single folio, we must 248 * recover the folio from the pfn, but special mappings don't have a 249 * folio backing them. Fortunately contpte_try_fold() already checked 250 * that the pte is not special - we never try to fold special mappings. 251 * Note we can't use vm_normal_page() for this since we don't have the 252 * vma. 253 */ 254 255 unsigned long folio_start, folio_end; 256 unsigned long cont_start, cont_end; 257 pte_t expected_pte, subpte; 258 struct folio *folio; 259 struct page *page; 260 unsigned long pfn; 261 pte_t *orig_ptep; 262 pgprot_t prot; 263 264 int i; 265 266 if (!mm_is_user(mm)) 267 return; 268 269 page = pte_page(pte); 270 folio = page_folio(page); 271 folio_start = addr - (page - &folio->page) * PAGE_SIZE; 272 folio_end = folio_start + folio_nr_pages(folio) * PAGE_SIZE; 273 cont_start = ALIGN_DOWN(addr, CONT_PTE_SIZE); 274 cont_end = cont_start + CONT_PTE_SIZE; 275 276 if (folio_start > cont_start || folio_end < cont_end) 277 return; 278 279 pfn = ALIGN_DOWN(pte_pfn(pte), CONT_PTES); 280 prot = pte_pgprot(pte_mkold(pte_mkclean(pte))); 281 expected_pte = pfn_pte(pfn, prot); 282 orig_ptep = ptep; 283 ptep = contpte_align_down(ptep); 284 285 for (i = 0; i < CONT_PTES; i++) { 286 subpte = pte_mkold(pte_mkclean(__ptep_get(ptep))); 287 if (!pte_same(subpte, expected_pte)) 288 return; 289 expected_pte = pte_advance_pfn(expected_pte, 1); 290 ptep++; 291 } 292 293 pte = pte_mkcont(pte); 294 contpte_convert(mm, addr, orig_ptep, pte); 295 } 296 EXPORT_SYMBOL_GPL(__contpte_try_fold); 297 298 void __contpte_try_unfold(struct mm_struct *mm, unsigned long addr, 299 pte_t *ptep, pte_t pte) 300 { 301 /* 302 * We have already checked that the ptes are contiguous in 303 * contpte_try_unfold(), so just check that the mm is user space. 304 */ 305 if (!mm_is_user(mm)) 306 return; 307 308 pte = pte_mknoncont(pte); 309 contpte_convert(mm, addr, ptep, pte); 310 } 311 EXPORT_SYMBOL_GPL(__contpte_try_unfold); 312 313 pte_t contpte_ptep_get(pte_t *ptep, pte_t orig_pte) 314 { 315 /* 316 * Gather access/dirty bits, which may be populated in any of the ptes 317 * of the contig range. We are guaranteed to be holding the PTL, so any 318 * contiguous range cannot be unfolded or otherwise modified under our 319 * feet. 320 */ 321 322 pte_t pte; 323 int i; 324 325 ptep = contpte_align_down(ptep); 326 327 for (i = 0; i < CONT_PTES; i++, ptep++) { 328 pte = __ptep_get(ptep); 329 330 if (pte_dirty(pte)) { 331 orig_pte = pte_mkdirty(orig_pte); 332 for (; i < CONT_PTES; i++, ptep++) { 333 pte = __ptep_get(ptep); 334 if (pte_young(pte)) { 335 orig_pte = pte_mkyoung(orig_pte); 336 break; 337 } 338 } 339 break; 340 } 341 342 if (pte_young(pte)) { 343 orig_pte = pte_mkyoung(orig_pte); 344 i++; 345 ptep++; 346 for (; i < CONT_PTES; i++, ptep++) { 347 pte = __ptep_get(ptep); 348 if (pte_dirty(pte)) { 349 orig_pte = pte_mkdirty(orig_pte); 350 break; 351 } 352 } 353 break; 354 } 355 } 356 357 return orig_pte; 358 } 359 EXPORT_SYMBOL_GPL(contpte_ptep_get); 360 361 static inline bool contpte_is_consistent(pte_t pte, unsigned long pfn, 362 pgprot_t orig_prot) 363 { 364 pgprot_t prot = pte_pgprot(pte_mkold(pte_mkclean(pte))); 365 366 return pte_valid_cont(pte) && pte_pfn(pte) == pfn && 367 pgprot_val(prot) == pgprot_val(orig_prot); 368 } 369 370 pte_t contpte_ptep_get_lockless(pte_t *orig_ptep) 371 { 372 /* 373 * The ptep_get_lockless() API requires us to read and return *orig_ptep 374 * so that it is self-consistent, without the PTL held, so we may be 375 * racing with other threads modifying the pte. Usually a READ_ONCE() 376 * would suffice, but for the contpte case, we also need to gather the 377 * access and dirty bits from across all ptes in the contiguous block, 378 * and we can't read all of those neighbouring ptes atomically, so any 379 * contiguous range may be unfolded/modified/refolded under our feet. 380 * Therefore we ensure we read a _consistent_ contpte range by checking 381 * that all ptes in the range are valid and have CONT_PTE set, that all 382 * pfns are contiguous and that all pgprots are the same (ignoring 383 * access/dirty). If we find a pte that is not consistent, then we must 384 * be racing with an update so start again. If the target pte does not 385 * have CONT_PTE set then that is considered consistent on its own 386 * because it is not part of a contpte range. 387 */ 388 389 pgprot_t orig_prot; 390 unsigned long pfn; 391 pte_t orig_pte; 392 pte_t *ptep; 393 pte_t pte; 394 int i; 395 396 retry: 397 orig_pte = __ptep_get(orig_ptep); 398 399 if (!pte_valid_cont(orig_pte)) 400 return orig_pte; 401 402 orig_prot = pte_pgprot(pte_mkold(pte_mkclean(orig_pte))); 403 ptep = contpte_align_down(orig_ptep); 404 pfn = pte_pfn(orig_pte) - (orig_ptep - ptep); 405 406 for (i = 0; i < CONT_PTES; i++, ptep++, pfn++) { 407 pte = __ptep_get(ptep); 408 409 if (!contpte_is_consistent(pte, pfn, orig_prot)) 410 goto retry; 411 412 if (pte_dirty(pte)) { 413 orig_pte = pte_mkdirty(orig_pte); 414 for (; i < CONT_PTES; i++, ptep++, pfn++) { 415 pte = __ptep_get(ptep); 416 417 if (!contpte_is_consistent(pte, pfn, orig_prot)) 418 goto retry; 419 420 if (pte_young(pte)) { 421 orig_pte = pte_mkyoung(orig_pte); 422 break; 423 } 424 } 425 break; 426 } 427 428 if (pte_young(pte)) { 429 orig_pte = pte_mkyoung(orig_pte); 430 i++; 431 ptep++; 432 pfn++; 433 for (; i < CONT_PTES; i++, ptep++, pfn++) { 434 pte = __ptep_get(ptep); 435 436 if (!contpte_is_consistent(pte, pfn, orig_prot)) 437 goto retry; 438 439 if (pte_dirty(pte)) { 440 orig_pte = pte_mkdirty(orig_pte); 441 break; 442 } 443 } 444 break; 445 } 446 } 447 448 return orig_pte; 449 } 450 EXPORT_SYMBOL_GPL(contpte_ptep_get_lockless); 451 452 void contpte_set_ptes(struct mm_struct *mm, unsigned long addr, 453 pte_t *ptep, pte_t pte, unsigned int nr) 454 { 455 unsigned long next; 456 unsigned long end; 457 unsigned long pfn; 458 pgprot_t prot; 459 460 /* 461 * The set_ptes() spec guarantees that when nr > 1, the initial state of 462 * all ptes is not-present. Therefore we never need to unfold or 463 * otherwise invalidate a range before we set the new ptes. 464 * contpte_set_ptes() should never be called for nr < 2. 465 */ 466 VM_WARN_ON(nr == 1); 467 468 if (!mm_is_user(mm)) 469 return __set_ptes(mm, addr, ptep, pte, nr); 470 471 end = addr + (nr << PAGE_SHIFT); 472 pfn = pte_pfn(pte); 473 prot = pte_pgprot(pte); 474 475 do { 476 next = pte_cont_addr_end(addr, end); 477 nr = (next - addr) >> PAGE_SHIFT; 478 pte = pfn_pte(pfn, prot); 479 480 if (((addr | next | (pfn << PAGE_SHIFT)) & ~CONT_PTE_MASK) == 0) 481 pte = pte_mkcont(pte); 482 else 483 pte = pte_mknoncont(pte); 484 485 __set_ptes(mm, addr, ptep, pte, nr); 486 487 addr = next; 488 ptep += nr; 489 pfn += nr; 490 491 } while (addr != end); 492 } 493 EXPORT_SYMBOL_GPL(contpte_set_ptes); 494 495 void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr, 496 pte_t *ptep, unsigned int nr, int full) 497 { 498 contpte_try_unfold_partial(mm, addr, ptep, nr); 499 __clear_full_ptes(mm, addr, ptep, nr, full); 500 } 501 EXPORT_SYMBOL_GPL(contpte_clear_full_ptes); 502 503 pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm, 504 unsigned long addr, pte_t *ptep, 505 unsigned int nr, int full) 506 { 507 contpte_try_unfold_partial(mm, addr, ptep, nr); 508 return __get_and_clear_full_ptes(mm, addr, ptep, nr, full); 509 } 510 EXPORT_SYMBOL_GPL(contpte_get_and_clear_full_ptes); 511 512 bool contpte_test_and_clear_young_ptes(struct vm_area_struct *vma, 513 unsigned long addr, pte_t *ptep, unsigned int nr) 514 { 515 /* 516 * ptep_clear_flush_young() technically requires us to clear the access 517 * flag for a _single_ pte. However, the core-mm code actually tracks 518 * access/dirty per folio, not per page. And since we only create a 519 * contig range when the range is covered by a single folio, we can get 520 * away with clearing young for the whole contig range here, so we avoid 521 * having to unfold. 522 * 523 * The 'nr' means consecutive (present) PTEs that map consecutive pages 524 * of the same large folio in a single VMA and a single page table. 525 */ 526 527 unsigned long end = addr + nr * PAGE_SIZE; 528 bool young = false; 529 530 ptep = contpte_align_addr_ptep(&addr, &end, ptep, nr); 531 for (; addr != end; ptep++, addr += PAGE_SIZE) 532 young |= __ptep_test_and_clear_young(vma, addr, ptep); 533 534 return young; 535 } 536 EXPORT_SYMBOL_GPL(contpte_test_and_clear_young_ptes); 537 538 bool contpte_clear_flush_young_ptes(struct vm_area_struct *vma, 539 unsigned long addr, pte_t *ptep, unsigned int nr) 540 { 541 bool young; 542 543 young = contpte_test_and_clear_young_ptes(vma, addr, ptep, nr); 544 545 if (young) { 546 unsigned long end = addr + nr * PAGE_SIZE; 547 548 contpte_align_addr_ptep(&addr, &end, ptep, nr); 549 /* 550 * See comment in __ptep_clear_flush_young(); same rationale for 551 * eliding the trailing DSB applies here. 552 */ 553 __flush_tlb_range(vma, addr, end, PAGE_SIZE, 3, 554 TLBF_NOWALKCACHE | TLBF_NOSYNC); 555 } 556 557 return young; 558 } 559 EXPORT_SYMBOL_GPL(contpte_clear_flush_young_ptes); 560 561 void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr, 562 pte_t *ptep, unsigned int nr) 563 { 564 /* 565 * If wrprotecting an entire contig range, we can avoid unfolding. Just 566 * set wrprotect and wait for the later mmu_gather flush to invalidate 567 * the tlb. Until the flush, the page may or may not be wrprotected. 568 * After the flush, it is guaranteed wrprotected. If it's a partial 569 * range though, we must unfold, because we can't have a case where 570 * CONT_PTE is set but wrprotect applies to a subset of the PTEs; this 571 * would cause it to continue to be unpredictable after the flush. 572 */ 573 574 contpte_try_unfold_partial(mm, addr, ptep, nr); 575 __wrprotect_ptes(mm, addr, ptep, nr); 576 } 577 EXPORT_SYMBOL_GPL(contpte_wrprotect_ptes); 578 579 void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma, 580 unsigned long addr, pte_t *ptep, 581 unsigned int nr, cydp_t flags) 582 { 583 /* 584 * We can safely clear access/dirty without needing to unfold from 585 * the architectures perspective, even when contpte is set. If the 586 * range starts or ends midway through a contpte block, we can just 587 * expand to include the full contpte block. While this is not 588 * exactly what the core-mm asked for, it tracks access/dirty per 589 * folio, not per page. And since we only create a contpte block 590 * when it is covered by a single folio, we can get away with 591 * clearing access/dirty for the whole block. 592 */ 593 unsigned long start = addr; 594 unsigned long end = start + nr * PAGE_SIZE; 595 596 ptep = contpte_align_addr_ptep(&start, &end, ptep, nr); 597 __clear_young_dirty_ptes(vma, start, ptep, (end - start) / PAGE_SIZE, flags); 598 } 599 EXPORT_SYMBOL_GPL(contpte_clear_young_dirty_ptes); 600 601 static bool contpte_all_subptes_match_access_flags(pte_t *ptep, pte_t entry) 602 { 603 pte_t *cont_ptep = contpte_align_down(ptep); 604 /* 605 * PFNs differ per sub-PTE. Match only bits consumed by 606 * __ptep_set_access_flags(): AF, DIRTY and write permission. 607 */ 608 const pteval_t cmp_mask = PTE_RDONLY | PTE_AF | PTE_WRITE | PTE_DIRTY; 609 pteval_t entry_cmp = pte_val(entry) & cmp_mask; 610 int i; 611 612 for (i = 0; i < CONT_PTES; i++) { 613 pteval_t pte_cmp = pte_val(__ptep_get(cont_ptep + i)) & cmp_mask; 614 615 if (pte_cmp != entry_cmp) 616 return false; 617 } 618 619 return true; 620 } 621 622 int contpte_ptep_set_access_flags(struct vm_area_struct *vma, 623 unsigned long addr, pte_t *ptep, 624 pte_t entry, int dirty) 625 { 626 unsigned long start_addr; 627 pte_t orig_pte; 628 int i; 629 630 /* 631 * Check whether all sub-PTEs in the CONT block already match the 632 * requested access flags/write permission, using raw per-PTE values 633 * rather than the gathered ptep_get() view. 634 * 635 * __ptep_set_access_flags() can update AF, dirty and write 636 * permission, but only to make the mapping more permissive. 637 * 638 * ptep_get() gathers AF/dirty state across the whole CONT block, 639 * which is correct for a CPU with FEAT_HAFDBS. But page-table 640 * walkers that evaluate each descriptor individually (e.g. a CPU 641 * without DBM support, or an SMMU without HTTU, or with HA/HD 642 * disabled in CD.TCR) can keep faulting on the target sub-PTE if 643 * only a sibling has been updated. Gathering can therefore cause 644 * false no-ops when only a sibling has been updated: 645 * - write faults: target still has PTE_RDONLY (needs PTE_RDONLY cleared) 646 * - read faults: target still lacks PTE_AF 647 * 648 * Per Arm ARM (DDI 0487) D8.7.1, any sub-PTE in a CONT range may 649 * become the effective cached translation, so all entries must have 650 * consistent attributes. Check the full CONT block before returning 651 * no-op, and when any sub-PTE mismatches, proceed to update the whole 652 * range. 653 */ 654 if (contpte_all_subptes_match_access_flags(ptep, entry)) 655 return 0; 656 657 /* 658 * Use raw target pte (not gathered) for write-bit unfold decision. 659 */ 660 orig_pte = pte_mknoncont(__ptep_get(ptep)); 661 662 /* 663 * We can fix up access/dirty bits without having to unfold the contig 664 * range. But if the write bit is changing, we must unfold. 665 */ 666 if (pte_write(orig_pte) == pte_write(entry)) { 667 /* 668 * For HW access management, we technically only need to update 669 * the flag on a single pte in the range. But for SW access 670 * management, we need to update all the ptes to prevent extra 671 * faults. Avoid per-page tlb flush in __ptep_set_access_flags() 672 * and instead flush the whole range at the end. 673 */ 674 ptep = contpte_align_down(ptep); 675 start_addr = addr = ALIGN_DOWN(addr, CONT_PTE_SIZE); 676 677 /* 678 * We are not advancing entry because __ptep_set_access_flags() 679 * only consumes access flags from entry. And since we have checked 680 * for the whole contpte block and returned early, pte_same() 681 * within __ptep_set_access_flags() is likely false. 682 */ 683 for (i = 0; i < CONT_PTES; i++, ptep++, addr += PAGE_SIZE) 684 __ptep_set_access_flags(vma, addr, ptep, entry, 0); 685 686 if (dirty) 687 __flush_tlb_range(vma, start_addr, 688 start_addr + CONT_PTE_SIZE, 689 PAGE_SIZE, 3, 690 TLBF_NOWALKCACHE | TLBF_NOBROADCAST); 691 } else { 692 __contpte_try_unfold(vma->vm_mm, addr, ptep, orig_pte); 693 __ptep_set_access_flags(vma, addr, ptep, entry, dirty); 694 } 695 696 return 1; 697 } 698 EXPORT_SYMBOL_GPL(contpte_ptep_set_access_flags); 699