1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Kernel Physical Mapping (segkpm) hat interface routines for sun4u. 28 */ 29 30 #include <sys/types.h> 31 #include <vm/hat.h> 32 #include <vm/hat_sfmmu.h> 33 #include <vm/page.h> 34 #include <sys/sysmacros.h> 35 #include <sys/cmn_err.h> 36 #include <sys/machsystm.h> 37 #include <vm/seg_kpm.h> 38 #include <sys/cpu_module.h> 39 #include <vm/mach_kpm.h> 40 41 /* kpm prototypes */ 42 static caddr_t sfmmu_kpm_mapin(page_t *); 43 static void sfmmu_kpm_mapout(page_t *, caddr_t); 44 static int sfmmu_kpme_lookup(struct kpme *, page_t *); 45 static void sfmmu_kpme_add(struct kpme *, page_t *); 46 static void sfmmu_kpme_sub(struct kpme *, page_t *); 47 static caddr_t sfmmu_kpm_getvaddr(page_t *, int *); 48 static int sfmmu_kpm_fault(caddr_t, struct memseg *, page_t *); 49 static int sfmmu_kpm_fault_small(caddr_t, struct memseg *, page_t *); 50 static void sfmmu_kpm_vac_conflict(page_t *, caddr_t); 51 void sfmmu_kpm_pageunload(page_t *); 52 void sfmmu_kpm_vac_unload(page_t *, caddr_t); 53 static void sfmmu_kpm_demap_large(caddr_t); 54 static void sfmmu_kpm_demap_small(caddr_t); 55 static void sfmmu_kpm_demap_tlbs(caddr_t); 56 void sfmmu_kpm_hme_unload(page_t *); 57 kpm_hlk_t *sfmmu_kpm_kpmp_enter(page_t *, pgcnt_t); 58 void sfmmu_kpm_kpmp_exit(kpm_hlk_t *kpmp); 59 void sfmmu_kpm_page_cache(page_t *, int, int); 60 61 extern uint_t vac_colors; 62 63 /* 64 * Kernel Physical Mapping (kpm) facility 65 */ 66 67 void 68 mach_kpm_init() 69 {} 70 71 /* -- hat_kpm interface section -- */ 72 73 /* 74 * Mapin a locked page and return the vaddr. 75 * When a kpme is provided by the caller it is added to 76 * the page p_kpmelist. The page to be mapped in must 77 * be at least read locked (p_selock). 78 */ 79 caddr_t 80 hat_kpm_mapin(struct page *pp, struct kpme *kpme) 81 { 82 kmutex_t *pml; 83 caddr_t vaddr; 84 85 if (kpm_enable == 0) { 86 cmn_err(CE_WARN, "hat_kpm_mapin: kpm_enable not set"); 87 return ((caddr_t)NULL); 88 } 89 90 if (pp == NULL || PAGE_LOCKED(pp) == 0) { 91 cmn_err(CE_WARN, "hat_kpm_mapin: pp zero or not locked"); 92 return ((caddr_t)NULL); 93 } 94 95 pml = sfmmu_mlist_enter(pp); 96 ASSERT(pp->p_kpmref >= 0); 97 98 vaddr = (pp->p_kpmref == 0) ? 99 sfmmu_kpm_mapin(pp) : hat_kpm_page2va(pp, 1); 100 101 if (kpme != NULL) { 102 /* 103 * Tolerate multiple mapins for the same kpme to avoid 104 * the need for an extra serialization. 105 */ 106 if ((sfmmu_kpme_lookup(kpme, pp)) == 0) 107 sfmmu_kpme_add(kpme, pp); 108 109 ASSERT(pp->p_kpmref > 0); 110 111 } else { 112 pp->p_kpmref++; 113 } 114 115 sfmmu_mlist_exit(pml); 116 return (vaddr); 117 } 118 119 /* 120 * Mapout a locked page. 121 * When a kpme is provided by the caller it is removed from 122 * the page p_kpmelist. The page to be mapped out must be at 123 * least read locked (p_selock). 124 * Note: The seg_kpm layer provides a mapout interface for the 125 * case that a kpme is used and the underlying page is unlocked. 126 * This can be used instead of calling this function directly. 127 */ 128 void 129 hat_kpm_mapout(struct page *pp, struct kpme *kpme, caddr_t vaddr) 130 { 131 kmutex_t *pml; 132 133 if (kpm_enable == 0) { 134 cmn_err(CE_WARN, "hat_kpm_mapout: kpm_enable not set"); 135 return; 136 } 137 138 if (IS_KPM_ADDR(vaddr) == 0) { 139 cmn_err(CE_WARN, "hat_kpm_mapout: no kpm address"); 140 return; 141 } 142 143 if (pp == NULL || PAGE_LOCKED(pp) == 0) { 144 cmn_err(CE_WARN, "hat_kpm_mapout: page zero or not locked"); 145 return; 146 } 147 148 if (kpme != NULL) { 149 ASSERT(pp == kpme->kpe_page); 150 pp = kpme->kpe_page; 151 pml = sfmmu_mlist_enter(pp); 152 153 if (sfmmu_kpme_lookup(kpme, pp) == 0) 154 panic("hat_kpm_mapout: kpme not found pp=%p", 155 (void *)pp); 156 157 ASSERT(pp->p_kpmref > 0); 158 sfmmu_kpme_sub(kpme, pp); 159 160 } else { 161 pml = sfmmu_mlist_enter(pp); 162 pp->p_kpmref--; 163 } 164 165 ASSERT(pp->p_kpmref >= 0); 166 if (pp->p_kpmref == 0) 167 sfmmu_kpm_mapout(pp, vaddr); 168 169 sfmmu_mlist_exit(pml); 170 } 171 172 /* 173 * hat_kpm_mapin_pfn is used to obtain a kpm mapping for physical 174 * memory addresses that are not described by a page_t. It can 175 * only be supported if vac_colors=1, because there is no page_t 176 * and corresponding kpm_page_t to track VAC conflicts. Currently, 177 * this may not be used on pfn's backed by page_t's, because the 178 * kpm state may not be consistent in hat_kpm_fault if the page is 179 * mapped using both this routine and hat_kpm_mapin. KPM should be 180 * cleaned up on sun4u/vac_colors=1 to be minimal as on sun4v. 181 * The caller must only pass pfn's for valid physical addresses; violation 182 * of this rule will cause panic. 183 */ 184 caddr_t 185 hat_kpm_mapin_pfn(pfn_t pfn) 186 { 187 caddr_t paddr, vaddr; 188 tte_t tte; 189 uint_t szc = kpm_smallpages ? TTE8K : TTE4M; 190 uint_t shift = kpm_smallpages ? MMU_PAGESHIFT : MMU_PAGESHIFT4M; 191 192 if (kpm_enable == 0 || vac_colors > 1 || 193 page_numtomemseg_nolock(pfn) != NULL) 194 return ((caddr_t)NULL); 195 196 paddr = (caddr_t)ptob(pfn); 197 vaddr = (uintptr_t)kpm_vbase + paddr; 198 199 KPM_TTE_VCACHED(tte.ll, pfn, szc); 200 sfmmu_kpm_load_tsb(vaddr, &tte, shift); 201 202 return (vaddr); 203 } 204 205 /*ARGSUSED*/ 206 void 207 hat_kpm_mapout_pfn(pfn_t pfn) 208 { 209 /* empty */ 210 } 211 212 /* 213 * Return the kpm virtual address for the page at pp. 214 * If checkswap is non zero and the page is backed by a 215 * swap vnode the physical address is used rather than 216 * p_offset to determine the kpm region. 217 * Note: The function has to be used w/ extreme care. The 218 * stability of the page identity is in the responsibility 219 * of the caller. 220 */ 221 /*ARGSUSED*/ 222 caddr_t 223 hat_kpm_page2va(struct page *pp, int checkswap) 224 { 225 int vcolor, vcolor_pa; 226 uintptr_t paddr, vaddr; 227 228 ASSERT(kpm_enable); 229 230 paddr = ptob(pp->p_pagenum); 231 vcolor_pa = addr_to_vcolor(paddr); 232 233 if (checkswap && pp->p_vnode && IS_SWAPFSVP(pp->p_vnode)) 234 vcolor = (PP_ISNC(pp)) ? vcolor_pa : PP_GET_VCOLOR(pp); 235 else 236 vcolor = addr_to_vcolor(pp->p_offset); 237 238 vaddr = (uintptr_t)kpm_vbase + paddr; 239 240 if (vcolor_pa != vcolor) { 241 vaddr += ((uintptr_t)(vcolor - vcolor_pa) << MMU_PAGESHIFT); 242 vaddr += (vcolor_pa > vcolor) ? 243 ((uintptr_t)vcolor_pa << kpm_size_shift) : 244 ((uintptr_t)(vcolor - vcolor_pa) << kpm_size_shift); 245 } 246 247 return ((caddr_t)vaddr); 248 } 249 250 /* 251 * Return the page for the kpm virtual address vaddr. 252 * Caller is responsible for the kpm mapping and lock 253 * state of the page. 254 */ 255 page_t * 256 hat_kpm_vaddr2page(caddr_t vaddr) 257 { 258 uintptr_t paddr; 259 pfn_t pfn; 260 261 ASSERT(IS_KPM_ADDR(vaddr)); 262 263 SFMMU_KPM_VTOP(vaddr, paddr); 264 pfn = (pfn_t)btop(paddr); 265 266 return (page_numtopp_nolock(pfn)); 267 } 268 269 /* page to kpm_page */ 270 #define PP2KPMPG(pp, kp) { \ 271 struct memseg *mseg; \ 272 pgcnt_t inx; \ 273 pfn_t pfn; \ 274 \ 275 pfn = pp->p_pagenum; \ 276 mseg = page_numtomemseg_nolock(pfn); \ 277 ASSERT(mseg); \ 278 inx = ptokpmp(kpmptop(ptokpmp(pfn)) - mseg->kpm_pbase); \ 279 ASSERT(inx < mseg->kpm_nkpmpgs); \ 280 kp = &mseg->kpm_pages[inx]; \ 281 } 282 283 /* page to kpm_spage */ 284 #define PP2KPMSPG(pp, ksp) { \ 285 struct memseg *mseg; \ 286 pgcnt_t inx; \ 287 pfn_t pfn; \ 288 \ 289 pfn = pp->p_pagenum; \ 290 mseg = page_numtomemseg_nolock(pfn); \ 291 ASSERT(mseg); \ 292 inx = pfn - mseg->kpm_pbase; \ 293 ksp = &mseg->kpm_spages[inx]; \ 294 } 295 296 /* 297 * hat_kpm_fault is called from segkpm_fault when a kpm tsbmiss occurred 298 * which could not be resolved by the trap level tsbmiss handler for the 299 * following reasons: 300 * . The vaddr is in VAC alias range (always PAGESIZE mapping size). 301 * . The kpm (s)page range of vaddr is in a VAC alias prevention state. 302 * . tsbmiss handling at trap level is not desired (DEBUG kernel only, 303 * kpm_tsbmtl == 0). 304 */ 305 int 306 hat_kpm_fault(struct hat *hat, caddr_t vaddr) 307 { 308 int error; 309 uintptr_t paddr; 310 pfn_t pfn; 311 struct memseg *mseg; 312 page_t *pp; 313 314 if (kpm_enable == 0) { 315 cmn_err(CE_WARN, "hat_kpm_fault: kpm_enable not set"); 316 return (ENOTSUP); 317 } 318 319 ASSERT(hat == ksfmmup); 320 ASSERT(IS_KPM_ADDR(vaddr)); 321 322 SFMMU_KPM_VTOP(vaddr, paddr); 323 pfn = (pfn_t)btop(paddr); 324 if ((mseg = page_numtomemseg_nolock(pfn)) != NULL) { 325 pp = &mseg->pages[(pgcnt_t)(pfn - mseg->pages_base)]; 326 ASSERT((pfn_t)pp->p_pagenum == pfn); 327 } 328 329 /* 330 * hat_kpm_mapin_pfn may add a kpm translation for memory that falls 331 * outside of memsegs. Check for this case and provide the translation 332 * here. 333 */ 334 if (vac_colors == 1 && mseg == NULL) { 335 tte_t tte; 336 uint_t szc = kpm_smallpages ? TTE8K : TTE4M; 337 uint_t shift = kpm_smallpages ? MMU_PAGESHIFT : MMU_PAGESHIFT4M; 338 339 ASSERT(address_in_memlist(phys_install, paddr, 1)); 340 KPM_TTE_VCACHED(tte.ll, pfn, szc); 341 sfmmu_kpm_load_tsb(vaddr, &tte, shift); 342 error = 0; 343 } else if (mseg == NULL || !PAGE_LOCKED(pp)) 344 error = EFAULT; 345 else if (kpm_smallpages == 0) 346 error = sfmmu_kpm_fault(vaddr, mseg, pp); 347 else 348 error = sfmmu_kpm_fault_small(vaddr, mseg, pp); 349 350 return (error); 351 } 352 353 /* 354 * memseg_hash[] was cleared, need to clear memseg_phash[] too. 355 */ 356 void 357 hat_kpm_mseghash_clear(int nentries) 358 { 359 pgcnt_t i; 360 361 if (kpm_enable == 0) 362 return; 363 364 for (i = 0; i < nentries; i++) 365 memseg_phash[i] = MSEG_NULLPTR_PA; 366 } 367 368 /* 369 * Update memseg_phash[inx] when memseg_hash[inx] was changed. 370 */ 371 void 372 hat_kpm_mseghash_update(pgcnt_t inx, struct memseg *msp) 373 { 374 if (kpm_enable == 0) 375 return; 376 377 memseg_phash[inx] = (msp) ? va_to_pa(msp) : MSEG_NULLPTR_PA; 378 } 379 380 /* 381 * Update kpm memseg members from basic memseg info. 382 */ 383 void 384 hat_kpm_addmem_mseg_update(struct memseg *msp, pgcnt_t nkpmpgs, 385 offset_t kpm_pages_off) 386 { 387 if (kpm_enable == 0) 388 return; 389 390 msp->kpm_pages = (kpm_page_t *)((caddr_t)msp->pages + kpm_pages_off); 391 msp->kpm_nkpmpgs = nkpmpgs; 392 msp->kpm_pbase = kpmptop(ptokpmp(msp->pages_base)); 393 msp->pagespa = va_to_pa(msp->pages); 394 msp->epagespa = va_to_pa(msp->epages); 395 msp->kpm_pagespa = va_to_pa(msp->kpm_pages); 396 } 397 398 /* 399 * Setup nextpa when a memseg is inserted. 400 * Assumes that the memsegslock is already held. 401 */ 402 void 403 hat_kpm_addmem_mseg_insert(struct memseg *msp) 404 { 405 if (kpm_enable == 0) 406 return; 407 408 ASSERT(memsegs_lock_held()); 409 msp->nextpa = (memsegs) ? va_to_pa(memsegs) : MSEG_NULLPTR_PA; 410 } 411 412 /* 413 * Setup memsegspa when a memseg is (head) inserted. 414 * Called before memsegs is updated to complete a 415 * memseg insert operation. 416 * Assumes that the memsegslock is already held. 417 */ 418 void 419 hat_kpm_addmem_memsegs_update(struct memseg *msp) 420 { 421 if (kpm_enable == 0) 422 return; 423 424 ASSERT(memsegs_lock_held()); 425 ASSERT(memsegs); 426 memsegspa = va_to_pa(msp); 427 } 428 429 /* 430 * Return end of metadata for an already setup memseg. 431 * 432 * Note: kpm_pages and kpm_spages are aliases and the underlying 433 * member of struct memseg is a union, therefore they always have 434 * the same address within a memseg. They must be differentiated 435 * when pointer arithmetic is used with them. 436 */ 437 caddr_t 438 hat_kpm_mseg_reuse(struct memseg *msp) 439 { 440 caddr_t end; 441 442 if (kpm_smallpages == 0) 443 end = (caddr_t)(msp->kpm_pages + msp->kpm_nkpmpgs); 444 else 445 end = (caddr_t)(msp->kpm_spages + msp->kpm_nkpmpgs); 446 447 return (end); 448 } 449 450 /* 451 * Update memsegspa (when first memseg in list 452 * is deleted) or nextpa when a memseg deleted. 453 * Assumes that the memsegslock is already held. 454 */ 455 void 456 hat_kpm_delmem_mseg_update(struct memseg *msp, struct memseg **mspp) 457 { 458 struct memseg *lmsp; 459 460 if (kpm_enable == 0) 461 return; 462 463 ASSERT(memsegs_lock_held()); 464 465 if (mspp == &memsegs) { 466 memsegspa = (msp->next) ? 467 va_to_pa(msp->next) : MSEG_NULLPTR_PA; 468 } else { 469 lmsp = (struct memseg *) 470 ((uint64_t)mspp - offsetof(struct memseg, next)); 471 lmsp->nextpa = (msp->next) ? 472 va_to_pa(msp->next) : MSEG_NULLPTR_PA; 473 } 474 } 475 476 /* 477 * Update kpm members for all memseg's involved in a split operation 478 * and do the atomic update of the physical memseg chain. 479 * 480 * Note: kpm_pages and kpm_spages are aliases and the underlying member 481 * of struct memseg is a union, therefore they always have the same 482 * address within a memseg. With that the direct assignments and 483 * va_to_pa conversions below don't have to be distinguished wrt. to 484 * kpm_smallpages. They must be differentiated when pointer arithmetic 485 * is used with them. 486 * 487 * Assumes that the memsegslock is already held. 488 */ 489 void 490 hat_kpm_split_mseg_update(struct memseg *msp, struct memseg **mspp, 491 struct memseg *lo, struct memseg *mid, struct memseg *hi) 492 { 493 pgcnt_t start, end, kbase, kstart, num; 494 struct memseg *lmsp; 495 496 if (kpm_enable == 0) 497 return; 498 499 ASSERT(memsegs_lock_held()); 500 ASSERT(msp && mid && msp->kpm_pages); 501 502 kbase = ptokpmp(msp->kpm_pbase); 503 504 if (lo) { 505 num = lo->pages_end - lo->pages_base; 506 start = kpmptop(ptokpmp(lo->pages_base)); 507 /* align end to kpm page size granularity */ 508 end = kpmptop(ptokpmp(start + num - 1)) + kpmpnpgs; 509 lo->kpm_pbase = start; 510 lo->kpm_nkpmpgs = ptokpmp(end - start); 511 lo->kpm_pages = msp->kpm_pages; 512 lo->kpm_pagespa = va_to_pa(lo->kpm_pages); 513 lo->pagespa = va_to_pa(lo->pages); 514 lo->epagespa = va_to_pa(lo->epages); 515 lo->nextpa = va_to_pa(lo->next); 516 } 517 518 /* mid */ 519 num = mid->pages_end - mid->pages_base; 520 kstart = ptokpmp(mid->pages_base); 521 start = kpmptop(kstart); 522 /* align end to kpm page size granularity */ 523 end = kpmptop(ptokpmp(start + num - 1)) + kpmpnpgs; 524 mid->kpm_pbase = start; 525 mid->kpm_nkpmpgs = ptokpmp(end - start); 526 if (kpm_smallpages == 0) { 527 mid->kpm_pages = msp->kpm_pages + (kstart - kbase); 528 } else { 529 mid->kpm_spages = msp->kpm_spages + (kstart - kbase); 530 } 531 mid->kpm_pagespa = va_to_pa(mid->kpm_pages); 532 mid->pagespa = va_to_pa(mid->pages); 533 mid->epagespa = va_to_pa(mid->epages); 534 mid->nextpa = (mid->next) ? va_to_pa(mid->next) : MSEG_NULLPTR_PA; 535 536 if (hi) { 537 num = hi->pages_end - hi->pages_base; 538 kstart = ptokpmp(hi->pages_base); 539 start = kpmptop(kstart); 540 /* align end to kpm page size granularity */ 541 end = kpmptop(ptokpmp(start + num - 1)) + kpmpnpgs; 542 hi->kpm_pbase = start; 543 hi->kpm_nkpmpgs = ptokpmp(end - start); 544 if (kpm_smallpages == 0) { 545 hi->kpm_pages = msp->kpm_pages + (kstart - kbase); 546 } else { 547 hi->kpm_spages = msp->kpm_spages + (kstart - kbase); 548 } 549 hi->kpm_pagespa = va_to_pa(hi->kpm_pages); 550 hi->pagespa = va_to_pa(hi->pages); 551 hi->epagespa = va_to_pa(hi->epages); 552 hi->nextpa = (hi->next) ? va_to_pa(hi->next) : MSEG_NULLPTR_PA; 553 } 554 555 /* 556 * Atomic update of the physical memseg chain 557 */ 558 if (mspp == &memsegs) { 559 memsegspa = (lo) ? va_to_pa(lo) : va_to_pa(mid); 560 } else { 561 lmsp = (struct memseg *) 562 ((uint64_t)mspp - offsetof(struct memseg, next)); 563 lmsp->nextpa = (lo) ? va_to_pa(lo) : va_to_pa(mid); 564 } 565 } 566 567 /* 568 * Walk the memsegs chain, applying func to each memseg span and vcolor. 569 */ 570 void 571 hat_kpm_walk(void (*func)(void *, void *, size_t), void *arg) 572 { 573 pfn_t pbase, pend; 574 int vcolor; 575 void *base; 576 size_t size; 577 struct memseg *msp; 578 579 for (msp = memsegs; msp; msp = msp->next) { 580 pbase = msp->pages_base; 581 pend = msp->pages_end; 582 for (vcolor = 0; vcolor < vac_colors; vcolor++) { 583 base = ptob(pbase) + kpm_vbase + kpm_size * vcolor; 584 size = ptob(pend - pbase); 585 func(arg, base, size); 586 } 587 } 588 } 589 590 591 /* -- sfmmu_kpm internal section -- */ 592 593 /* 594 * Return the page frame number if a valid segkpm mapping exists 595 * for vaddr, otherwise return PFN_INVALID. No locks are grabbed. 596 * Should only be used by other sfmmu routines. 597 */ 598 pfn_t 599 sfmmu_kpm_vatopfn(caddr_t vaddr) 600 { 601 uintptr_t paddr; 602 pfn_t pfn; 603 page_t *pp; 604 605 ASSERT(kpm_enable && IS_KPM_ADDR(vaddr)); 606 607 SFMMU_KPM_VTOP(vaddr, paddr); 608 pfn = (pfn_t)btop(paddr); 609 pp = page_numtopp_nolock(pfn); 610 if (pp && pp->p_kpmref) 611 return (pfn); 612 else 613 return ((pfn_t)PFN_INVALID); 614 } 615 616 /* 617 * Lookup a kpme in the p_kpmelist. 618 */ 619 static int 620 sfmmu_kpme_lookup(struct kpme *kpme, page_t *pp) 621 { 622 struct kpme *p; 623 624 for (p = pp->p_kpmelist; p; p = p->kpe_next) { 625 if (p == kpme) 626 return (1); 627 } 628 return (0); 629 } 630 631 /* 632 * Insert a kpme into the p_kpmelist and increment 633 * the per page kpm reference count. 634 */ 635 static void 636 sfmmu_kpme_add(struct kpme *kpme, page_t *pp) 637 { 638 ASSERT(pp->p_kpmref >= 0); 639 640 /* head insert */ 641 kpme->kpe_prev = NULL; 642 kpme->kpe_next = pp->p_kpmelist; 643 644 if (pp->p_kpmelist) 645 pp->p_kpmelist->kpe_prev = kpme; 646 647 pp->p_kpmelist = kpme; 648 kpme->kpe_page = pp; 649 pp->p_kpmref++; 650 } 651 652 /* 653 * Remove a kpme from the p_kpmelist and decrement 654 * the per page kpm reference count. 655 */ 656 static void 657 sfmmu_kpme_sub(struct kpme *kpme, page_t *pp) 658 { 659 ASSERT(pp->p_kpmref > 0); 660 661 if (kpme->kpe_prev) { 662 ASSERT(pp->p_kpmelist != kpme); 663 ASSERT(kpme->kpe_prev->kpe_page == pp); 664 kpme->kpe_prev->kpe_next = kpme->kpe_next; 665 } else { 666 ASSERT(pp->p_kpmelist == kpme); 667 pp->p_kpmelist = kpme->kpe_next; 668 } 669 670 if (kpme->kpe_next) { 671 ASSERT(kpme->kpe_next->kpe_page == pp); 672 kpme->kpe_next->kpe_prev = kpme->kpe_prev; 673 } 674 675 kpme->kpe_next = kpme->kpe_prev = NULL; 676 kpme->kpe_page = NULL; 677 pp->p_kpmref--; 678 } 679 680 /* 681 * Mapin a single page, it is called every time a page changes it's state 682 * from kpm-unmapped to kpm-mapped. It may not be called, when only a new 683 * kpm instance does a mapin and wants to share the mapping. 684 * Assumes that the mlist mutex is already grabbed. 685 */ 686 static caddr_t 687 sfmmu_kpm_mapin(page_t *pp) 688 { 689 kpm_page_t *kp; 690 kpm_hlk_t *kpmp; 691 caddr_t vaddr; 692 int kpm_vac_range; 693 pfn_t pfn; 694 tte_t tte; 695 kmutex_t *pmtx; 696 int uncached; 697 kpm_spage_t *ksp; 698 kpm_shlk_t *kpmsp; 699 int oldval; 700 701 ASSERT(sfmmu_mlist_held(pp)); 702 ASSERT(pp->p_kpmref == 0); 703 704 vaddr = sfmmu_kpm_getvaddr(pp, &kpm_vac_range); 705 706 ASSERT(IS_KPM_ADDR(vaddr)); 707 uncached = PP_ISNC(pp); 708 pfn = pp->p_pagenum; 709 710 if (kpm_smallpages) 711 goto smallpages_mapin; 712 713 PP2KPMPG(pp, kp); 714 715 kpmp = KPMP_HASH(kp); 716 mutex_enter(&kpmp->khl_mutex); 717 718 ASSERT(PP_ISKPMC(pp) == 0); 719 ASSERT(PP_ISKPMS(pp) == 0); 720 721 if (uncached) { 722 /* ASSERT(pp->p_share); XXX use hat_page_getshare */ 723 if (kpm_vac_range == 0) { 724 if (kp->kp_refcnts == 0) { 725 /* 726 * Must remove large page mapping if it exists. 727 * Pages in uncached state can only be mapped 728 * small (PAGESIZE) within the regular kpm 729 * range. 730 */ 731 if (kp->kp_refcntc == -1) { 732 /* remove go indication */ 733 sfmmu_kpm_tsbmtl(&kp->kp_refcntc, 734 &kpmp->khl_lock, KPMTSBM_STOP); 735 } 736 if (kp->kp_refcnt > 0 && kp->kp_refcntc == 0) 737 sfmmu_kpm_demap_large(vaddr); 738 } 739 ASSERT(kp->kp_refcntc >= 0); 740 kp->kp_refcntc++; 741 } 742 pmtx = sfmmu_page_enter(pp); 743 PP_SETKPMC(pp); 744 sfmmu_page_exit(pmtx); 745 } 746 747 if ((kp->kp_refcntc > 0 || kp->kp_refcnts > 0) && kpm_vac_range == 0) { 748 /* 749 * Have to do a small (PAGESIZE) mapin within this kpm_page 750 * range since it is marked to be in VAC conflict mode or 751 * when there are still other small mappings around. 752 */ 753 754 /* tte assembly */ 755 if (uncached == 0) 756 KPM_TTE_VCACHED(tte.ll, pfn, TTE8K); 757 else 758 KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K); 759 760 /* tsb dropin */ 761 sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT); 762 763 pmtx = sfmmu_page_enter(pp); 764 PP_SETKPMS(pp); 765 sfmmu_page_exit(pmtx); 766 767 kp->kp_refcnts++; 768 ASSERT(kp->kp_refcnts > 0); 769 goto exit; 770 } 771 772 if (kpm_vac_range == 0) { 773 /* 774 * Fast path / regular case, no VAC conflict handling 775 * in progress within this kpm_page range. 776 */ 777 if (kp->kp_refcnt == 0) { 778 779 /* tte assembly */ 780 KPM_TTE_VCACHED(tte.ll, pfn, TTE4M); 781 782 /* tsb dropin */ 783 sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT4M); 784 785 /* Set go flag for TL tsbmiss handler */ 786 if (kp->kp_refcntc == 0) 787 sfmmu_kpm_tsbmtl(&kp->kp_refcntc, 788 &kpmp->khl_lock, KPMTSBM_START); 789 790 ASSERT(kp->kp_refcntc == -1); 791 } 792 kp->kp_refcnt++; 793 ASSERT(kp->kp_refcnt); 794 795 } else { 796 /* 797 * The page is not setup according to the common VAC 798 * prevention rules for the regular and kpm mapping layer 799 * E.g. the page layer was not able to deliver a right 800 * vcolor'ed page for a given vaddr corresponding to 801 * the wanted p_offset. It has to be mapped in small in 802 * within the corresponding kpm vac range in order to 803 * prevent VAC alias conflicts. 804 */ 805 806 /* tte assembly */ 807 if (uncached == 0) { 808 KPM_TTE_VCACHED(tte.ll, pfn, TTE8K); 809 } else { 810 KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K); 811 } 812 813 /* tsb dropin */ 814 sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT); 815 816 kp->kp_refcnta++; 817 if (kp->kp_refcntc == -1) { 818 ASSERT(kp->kp_refcnt > 0); 819 820 /* remove go indication */ 821 sfmmu_kpm_tsbmtl(&kp->kp_refcntc, &kpmp->khl_lock, 822 KPMTSBM_STOP); 823 } 824 ASSERT(kp->kp_refcntc >= 0); 825 } 826 exit: 827 mutex_exit(&kpmp->khl_mutex); 828 return (vaddr); 829 830 smallpages_mapin: 831 if (uncached == 0) { 832 /* tte assembly */ 833 KPM_TTE_VCACHED(tte.ll, pfn, TTE8K); 834 } else { 835 /* 836 * Just in case this same page was mapped cacheable prior to 837 * this and the old tte remains in tlb. 838 */ 839 sfmmu_kpm_demap_small(vaddr); 840 841 /* ASSERT(pp->p_share); XXX use hat_page_getshare */ 842 pmtx = sfmmu_page_enter(pp); 843 PP_SETKPMC(pp); 844 sfmmu_page_exit(pmtx); 845 /* tte assembly */ 846 KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K); 847 } 848 849 /* tsb dropin */ 850 sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT); 851 852 PP2KPMSPG(pp, ksp); 853 kpmsp = KPMP_SHASH(ksp); 854 855 oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag, &kpmsp->kshl_lock, 856 (uncached) ? (KPM_MAPPED_GO | KPM_MAPPEDSC) : 857 (KPM_MAPPED_GO | KPM_MAPPEDS)); 858 859 if (oldval != 0) 860 panic("sfmmu_kpm_mapin: stale smallpages mapping"); 861 862 return (vaddr); 863 } 864 865 /* 866 * Mapout a single page, it is called every time a page changes it's state 867 * from kpm-mapped to kpm-unmapped. It may not be called, when only a kpm 868 * instance calls mapout and there are still other instances mapping the 869 * page. Assumes that the mlist mutex is already grabbed. 870 * 871 * Note: In normal mode (no VAC conflict prevention pending) TLB's are 872 * not flushed. This is the core segkpm behavior to avoid xcalls. It is 873 * no problem because a translation from a segkpm virtual address to a 874 * physical address is always the same. The only downside is a slighty 875 * increased window of vulnerability for misbehaving _kernel_ modules. 876 */ 877 static void 878 sfmmu_kpm_mapout(page_t *pp, caddr_t vaddr) 879 { 880 kpm_page_t *kp; 881 kpm_hlk_t *kpmp; 882 int alias_range; 883 kmutex_t *pmtx; 884 kpm_spage_t *ksp; 885 kpm_shlk_t *kpmsp; 886 int oldval; 887 888 ASSERT(sfmmu_mlist_held(pp)); 889 ASSERT(pp->p_kpmref == 0); 890 891 alias_range = IS_KPM_ALIAS_RANGE(vaddr); 892 893 if (kpm_smallpages) 894 goto smallpages_mapout; 895 896 PP2KPMPG(pp, kp); 897 kpmp = KPMP_HASH(kp); 898 mutex_enter(&kpmp->khl_mutex); 899 900 if (alias_range) { 901 ASSERT(PP_ISKPMS(pp) == 0); 902 if (kp->kp_refcnta <= 0) { 903 panic("sfmmu_kpm_mapout: bad refcnta kp=%p", 904 (void *)kp); 905 } 906 907 if (PP_ISTNC(pp)) { 908 if (PP_ISKPMC(pp) == 0) { 909 /* 910 * Uncached kpm mappings must always have 911 * forced "small page" mode. 912 */ 913 panic("sfmmu_kpm_mapout: uncached page not " 914 "kpm marked"); 915 } 916 sfmmu_kpm_demap_small(vaddr); 917 918 pmtx = sfmmu_page_enter(pp); 919 PP_CLRKPMC(pp); 920 sfmmu_page_exit(pmtx); 921 922 /* 923 * Check if we can resume cached mode. This might 924 * be the case if the kpm mapping was the only 925 * mapping in conflict with other non rule 926 * compliant mappings. The page is no more marked 927 * as kpm mapped, so the conv_tnc path will not 928 * change kpm state. 929 */ 930 conv_tnc(pp, TTE8K); 931 932 } else if (PP_ISKPMC(pp) == 0) { 933 /* remove TSB entry only */ 934 sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT); 935 936 } else { 937 /* already demapped */ 938 pmtx = sfmmu_page_enter(pp); 939 PP_CLRKPMC(pp); 940 sfmmu_page_exit(pmtx); 941 } 942 kp->kp_refcnta--; 943 goto exit; 944 } 945 946 if (kp->kp_refcntc <= 0 && kp->kp_refcnts == 0) { 947 /* 948 * Fast path / regular case. 949 */ 950 ASSERT(kp->kp_refcntc >= -1); 951 ASSERT(!(pp->p_nrm & (P_KPMC | P_KPMS | P_TNC | P_PNC))); 952 953 if (kp->kp_refcnt <= 0) 954 panic("sfmmu_kpm_mapout: bad refcnt kp=%p", (void *)kp); 955 956 if (--kp->kp_refcnt == 0) { 957 /* remove go indication */ 958 if (kp->kp_refcntc == -1) { 959 sfmmu_kpm_tsbmtl(&kp->kp_refcntc, 960 &kpmp->khl_lock, KPMTSBM_STOP); 961 } 962 ASSERT(kp->kp_refcntc == 0); 963 964 /* remove TSB entry */ 965 sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT4M); 966 #ifdef DEBUG 967 if (kpm_tlb_flush) 968 sfmmu_kpm_demap_tlbs(vaddr); 969 #endif 970 } 971 972 } else { 973 /* 974 * The VAC alias path. 975 * We come here if the kpm vaddr is not in any alias_range 976 * and we are unmapping a page within the regular kpm_page 977 * range. The kpm_page either holds conflict pages and/or 978 * is in "small page" mode. If the page is not marked 979 * P_KPMS it couldn't have a valid PAGESIZE sized TSB 980 * entry. Dcache flushing is done lazy and follows the 981 * rules of the regular virtual page coloring scheme. 982 * 983 * Per page states and required actions: 984 * P_KPMC: remove a kpm mapping that is conflicting. 985 * P_KPMS: remove a small kpm mapping within a kpm_page. 986 * P_TNC: check if we can re-cache the page. 987 * P_PNC: we cannot re-cache, sorry. 988 * Per kpm_page: 989 * kp_refcntc > 0: page is part of a kpm_page with conflicts. 990 * kp_refcnts > 0: rm a small mapped page within a kpm_page. 991 */ 992 993 if (PP_ISKPMS(pp)) { 994 if (kp->kp_refcnts < 1) { 995 panic("sfmmu_kpm_mapout: bad refcnts kp=%p", 996 (void *)kp); 997 } 998 sfmmu_kpm_demap_small(vaddr); 999 1000 /* 1001 * Check if we can resume cached mode. This might 1002 * be the case if the kpm mapping was the only 1003 * mapping in conflict with other non rule 1004 * compliant mappings. The page is no more marked 1005 * as kpm mapped, so the conv_tnc path will not 1006 * change kpm state. 1007 */ 1008 if (PP_ISTNC(pp)) { 1009 if (!PP_ISKPMC(pp)) { 1010 /* 1011 * Uncached kpm mappings must always 1012 * have forced "small page" mode. 1013 */ 1014 panic("sfmmu_kpm_mapout: uncached " 1015 "page not kpm marked"); 1016 } 1017 conv_tnc(pp, TTE8K); 1018 } 1019 kp->kp_refcnts--; 1020 kp->kp_refcnt++; 1021 pmtx = sfmmu_page_enter(pp); 1022 PP_CLRKPMS(pp); 1023 sfmmu_page_exit(pmtx); 1024 } 1025 1026 if (PP_ISKPMC(pp)) { 1027 if (kp->kp_refcntc < 1) { 1028 panic("sfmmu_kpm_mapout: bad refcntc kp=%p", 1029 (void *)kp); 1030 } 1031 pmtx = sfmmu_page_enter(pp); 1032 PP_CLRKPMC(pp); 1033 sfmmu_page_exit(pmtx); 1034 kp->kp_refcntc--; 1035 } 1036 1037 if (kp->kp_refcnt-- < 1) 1038 panic("sfmmu_kpm_mapout: bad refcnt kp=%p", (void *)kp); 1039 } 1040 exit: 1041 mutex_exit(&kpmp->khl_mutex); 1042 return; 1043 1044 smallpages_mapout: 1045 PP2KPMSPG(pp, ksp); 1046 kpmsp = KPMP_SHASH(ksp); 1047 1048 if (PP_ISKPMC(pp) == 0) { 1049 oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag, 1050 &kpmsp->kshl_lock, 0); 1051 1052 if (oldval != KPM_MAPPEDS) { 1053 /* 1054 * When we're called after sfmmu_kpm_hme_unload, 1055 * KPM_MAPPEDSC is valid too. 1056 */ 1057 if (oldval != KPM_MAPPEDSC) 1058 panic("sfmmu_kpm_mapout: incorrect mapping"); 1059 } 1060 1061 /* remove TSB entry */ 1062 sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT); 1063 #ifdef DEBUG 1064 if (kpm_tlb_flush) 1065 sfmmu_kpm_demap_tlbs(vaddr); 1066 #endif 1067 1068 } else if (PP_ISTNC(pp)) { 1069 oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag, 1070 &kpmsp->kshl_lock, 0); 1071 1072 if (oldval != KPM_MAPPEDSC || PP_ISKPMC(pp) == 0) 1073 panic("sfmmu_kpm_mapout: inconsistent TNC mapping"); 1074 1075 sfmmu_kpm_demap_small(vaddr); 1076 1077 pmtx = sfmmu_page_enter(pp); 1078 PP_CLRKPMC(pp); 1079 sfmmu_page_exit(pmtx); 1080 1081 /* 1082 * Check if we can resume cached mode. This might be 1083 * the case if the kpm mapping was the only mapping 1084 * in conflict with other non rule compliant mappings. 1085 * The page is no more marked as kpm mapped, so the 1086 * conv_tnc path will not change the kpm state. 1087 */ 1088 conv_tnc(pp, TTE8K); 1089 1090 } else { 1091 oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag, 1092 &kpmsp->kshl_lock, 0); 1093 1094 if (oldval != KPM_MAPPEDSC) 1095 panic("sfmmu_kpm_mapout: inconsistent mapping"); 1096 1097 pmtx = sfmmu_page_enter(pp); 1098 PP_CLRKPMC(pp); 1099 sfmmu_page_exit(pmtx); 1100 } 1101 } 1102 1103 #define abs(x) ((x) < 0 ? -(x) : (x)) 1104 1105 /* 1106 * Determine appropriate kpm mapping address and handle any kpm/hme 1107 * conflicts. Page mapping list and its vcolor parts must be protected. 1108 */ 1109 static caddr_t 1110 sfmmu_kpm_getvaddr(page_t *pp, int *kpm_vac_rangep) 1111 { 1112 int vcolor, vcolor_pa; 1113 caddr_t vaddr; 1114 uintptr_t paddr; 1115 1116 1117 ASSERT(sfmmu_mlist_held(pp)); 1118 1119 paddr = ptob(pp->p_pagenum); 1120 vcolor_pa = addr_to_vcolor(paddr); 1121 1122 if (pp->p_vnode && IS_SWAPFSVP(pp->p_vnode)) { 1123 vcolor = (PP_NEWPAGE(pp) || PP_ISNC(pp)) ? 1124 vcolor_pa : PP_GET_VCOLOR(pp); 1125 } else { 1126 vcolor = addr_to_vcolor(pp->p_offset); 1127 } 1128 1129 vaddr = kpm_vbase + paddr; 1130 *kpm_vac_rangep = 0; 1131 1132 if (vcolor_pa != vcolor) { 1133 *kpm_vac_rangep = abs(vcolor - vcolor_pa); 1134 vaddr += ((uintptr_t)(vcolor - vcolor_pa) << MMU_PAGESHIFT); 1135 vaddr += (vcolor_pa > vcolor) ? 1136 ((uintptr_t)vcolor_pa << kpm_size_shift) : 1137 ((uintptr_t)(vcolor - vcolor_pa) << kpm_size_shift); 1138 1139 ASSERT(!PP_ISMAPPED_LARGE(pp)); 1140 } 1141 1142 if (PP_ISNC(pp)) 1143 return (vaddr); 1144 1145 if (PP_NEWPAGE(pp)) { 1146 PP_SET_VCOLOR(pp, vcolor); 1147 return (vaddr); 1148 } 1149 1150 if (PP_GET_VCOLOR(pp) == vcolor) 1151 return (vaddr); 1152 1153 ASSERT(!PP_ISMAPPED_KPM(pp)); 1154 sfmmu_kpm_vac_conflict(pp, vaddr); 1155 1156 return (vaddr); 1157 } 1158 1159 /* 1160 * VAC conflict state bit values. 1161 * The following defines are used to make the handling of the 1162 * various input states more concise. For that the kpm states 1163 * per kpm_page and per page are combined in a summary state. 1164 * Each single state has a corresponding bit value in the 1165 * summary state. These defines only apply for kpm large page 1166 * mappings. Within comments the abbreviations "kc, c, ks, s" 1167 * are used as short form of the actual state, e.g. "kc" for 1168 * "kp_refcntc > 0", etc. 1169 */ 1170 #define KPM_KC 0x00000008 /* kpm_page: kp_refcntc > 0 */ 1171 #define KPM_C 0x00000004 /* page: P_KPMC set */ 1172 #define KPM_KS 0x00000002 /* kpm_page: kp_refcnts > 0 */ 1173 #define KPM_S 0x00000001 /* page: P_KPMS set */ 1174 1175 /* 1176 * Summary states used in sfmmu_kpm_fault (KPM_TSBM_*). 1177 * See also more detailed comments within in the sfmmu_kpm_fault switch. 1178 * Abbreviations used: 1179 * CONFL: VAC conflict(s) within a kpm_page. 1180 * MAPS: Mapped small: Page mapped in using a regular page size kpm mapping. 1181 * RASM: Re-assembling of a large page mapping possible. 1182 * RPLS: Replace: TSB miss due to TSB replacement only. 1183 * BRKO: Breakup Other: A large kpm mapping has to be broken because another 1184 * page within the kpm_page is already involved in a VAC conflict. 1185 * BRKT: Breakup This: A large kpm mapping has to be broken, this page is 1186 * is involved in a VAC conflict. 1187 */ 1188 #define KPM_TSBM_CONFL_GONE (0) 1189 #define KPM_TSBM_MAPS_RASM (KPM_KS) 1190 #define KPM_TSBM_RPLS_RASM (KPM_KS | KPM_S) 1191 #define KPM_TSBM_MAPS_BRKO (KPM_KC) 1192 #define KPM_TSBM_MAPS (KPM_KC | KPM_KS) 1193 #define KPM_TSBM_RPLS (KPM_KC | KPM_KS | KPM_S) 1194 #define KPM_TSBM_MAPS_BRKT (KPM_KC | KPM_C) 1195 #define KPM_TSBM_MAPS_CONFL (KPM_KC | KPM_C | KPM_KS) 1196 #define KPM_TSBM_RPLS_CONFL (KPM_KC | KPM_C | KPM_KS | KPM_S) 1197 1198 /* 1199 * kpm fault handler for mappings with large page size. 1200 */ 1201 int 1202 sfmmu_kpm_fault(caddr_t vaddr, struct memseg *mseg, page_t *pp) 1203 { 1204 int error; 1205 pgcnt_t inx; 1206 kpm_page_t *kp; 1207 tte_t tte; 1208 pfn_t pfn = pp->p_pagenum; 1209 kpm_hlk_t *kpmp; 1210 kmutex_t *pml; 1211 int alias_range; 1212 int uncached = 0; 1213 kmutex_t *pmtx; 1214 int badstate; 1215 uint_t tsbmcase; 1216 1217 alias_range = IS_KPM_ALIAS_RANGE(vaddr); 1218 1219 inx = ptokpmp(kpmptop(ptokpmp(pfn)) - mseg->kpm_pbase); 1220 if (inx >= mseg->kpm_nkpmpgs) { 1221 cmn_err(CE_PANIC, "sfmmu_kpm_fault: kpm overflow in memseg " 1222 "0x%p pp 0x%p", (void *)mseg, (void *)pp); 1223 } 1224 1225 kp = &mseg->kpm_pages[inx]; 1226 kpmp = KPMP_HASH(kp); 1227 1228 pml = sfmmu_mlist_enter(pp); 1229 1230 if (!PP_ISMAPPED_KPM(pp)) { 1231 sfmmu_mlist_exit(pml); 1232 return (EFAULT); 1233 } 1234 1235 mutex_enter(&kpmp->khl_mutex); 1236 1237 if (alias_range) { 1238 ASSERT(!PP_ISMAPPED_LARGE(pp)); 1239 if (kp->kp_refcnta > 0) { 1240 if (PP_ISKPMC(pp)) { 1241 pmtx = sfmmu_page_enter(pp); 1242 PP_CLRKPMC(pp); 1243 sfmmu_page_exit(pmtx); 1244 } 1245 /* 1246 * Check for vcolor conflicts. Return here 1247 * w/ either no conflict (fast path), removed hme 1248 * mapping chains (unload conflict) or uncached 1249 * (uncache conflict). VACaches are cleaned and 1250 * p_vcolor and PP_TNC are set accordingly for the 1251 * conflict cases. Drop kpmp for uncache conflict 1252 * cases since it will be grabbed within 1253 * sfmmu_kpm_page_cache in case of an uncache 1254 * conflict. 1255 */ 1256 mutex_exit(&kpmp->khl_mutex); 1257 sfmmu_kpm_vac_conflict(pp, vaddr); 1258 mutex_enter(&kpmp->khl_mutex); 1259 1260 if (PP_ISNC(pp)) { 1261 uncached = 1; 1262 pmtx = sfmmu_page_enter(pp); 1263 PP_SETKPMC(pp); 1264 sfmmu_page_exit(pmtx); 1265 } 1266 goto smallexit; 1267 1268 } else { 1269 /* 1270 * We got a tsbmiss on a not active kpm_page range. 1271 * Let segkpm_fault decide how to panic. 1272 */ 1273 error = EFAULT; 1274 } 1275 goto exit; 1276 } 1277 1278 badstate = (kp->kp_refcnt < 0 || kp->kp_refcnts < 0); 1279 if (kp->kp_refcntc == -1) { 1280 /* 1281 * We should come here only if trap level tsb miss 1282 * handler is disabled. 1283 */ 1284 badstate |= (kp->kp_refcnt == 0 || kp->kp_refcnts > 0 || 1285 PP_ISKPMC(pp) || PP_ISKPMS(pp) || PP_ISNC(pp)); 1286 1287 if (badstate == 0) 1288 goto largeexit; 1289 } 1290 1291 if (badstate || kp->kp_refcntc < 0) 1292 goto badstate_exit; 1293 1294 /* 1295 * Combine the per kpm_page and per page kpm VAC states to 1296 * a summary state in order to make the kpm fault handling 1297 * more concise. 1298 */ 1299 tsbmcase = (((kp->kp_refcntc > 0) ? KPM_KC : 0) | 1300 ((kp->kp_refcnts > 0) ? KPM_KS : 0) | 1301 (PP_ISKPMC(pp) ? KPM_C : 0) | 1302 (PP_ISKPMS(pp) ? KPM_S : 0)); 1303 1304 switch (tsbmcase) { 1305 case KPM_TSBM_CONFL_GONE: /* - - - - */ 1306 /* 1307 * That's fine, we either have no more vac conflict in 1308 * this kpm page or someone raced in and has solved the 1309 * vac conflict for us -- call sfmmu_kpm_vac_conflict 1310 * to take care for correcting the vcolor and flushing 1311 * the dcache if required. 1312 */ 1313 mutex_exit(&kpmp->khl_mutex); 1314 sfmmu_kpm_vac_conflict(pp, vaddr); 1315 mutex_enter(&kpmp->khl_mutex); 1316 1317 if (PP_ISNC(pp) || kp->kp_refcnt <= 0 || 1318 addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) { 1319 panic("sfmmu_kpm_fault: inconsistent CONFL_GONE " 1320 "state, pp=%p", (void *)pp); 1321 } 1322 goto largeexit; 1323 1324 case KPM_TSBM_MAPS_RASM: /* - - ks - */ 1325 /* 1326 * All conflicts in this kpm page are gone but there are 1327 * already small mappings around, so we also map this 1328 * page small. This could be the trigger case for a 1329 * small mapping reaper, if this is really needed. 1330 * For now fall thru to the KPM_TSBM_MAPS handling. 1331 */ 1332 1333 case KPM_TSBM_MAPS: /* kc - ks - */ 1334 /* 1335 * Large page mapping is already broken, this page is not 1336 * conflicting, so map it small. Call sfmmu_kpm_vac_conflict 1337 * to take care for correcting the vcolor and flushing 1338 * the dcache if required. 1339 */ 1340 mutex_exit(&kpmp->khl_mutex); 1341 sfmmu_kpm_vac_conflict(pp, vaddr); 1342 mutex_enter(&kpmp->khl_mutex); 1343 1344 if (PP_ISNC(pp) || kp->kp_refcnt <= 0 || 1345 addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) { 1346 panic("sfmmu_kpm_fault: inconsistent MAPS state, " 1347 "pp=%p", (void *)pp); 1348 } 1349 kp->kp_refcnt--; 1350 kp->kp_refcnts++; 1351 pmtx = sfmmu_page_enter(pp); 1352 PP_SETKPMS(pp); 1353 sfmmu_page_exit(pmtx); 1354 goto smallexit; 1355 1356 case KPM_TSBM_RPLS_RASM: /* - - ks s */ 1357 /* 1358 * All conflicts in this kpm page are gone but this page 1359 * is mapped small. This could be the trigger case for a 1360 * small mapping reaper, if this is really needed. 1361 * For now we drop it in small again. Fall thru to the 1362 * KPM_TSBM_RPLS handling. 1363 */ 1364 1365 case KPM_TSBM_RPLS: /* kc - ks s */ 1366 /* 1367 * Large page mapping is already broken, this page is not 1368 * conflicting but already mapped small, so drop it in 1369 * small again. 1370 */ 1371 if (PP_ISNC(pp) || 1372 addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) { 1373 panic("sfmmu_kpm_fault: inconsistent RPLS state, " 1374 "pp=%p", (void *)pp); 1375 } 1376 goto smallexit; 1377 1378 case KPM_TSBM_MAPS_BRKO: /* kc - - - */ 1379 /* 1380 * The kpm page where we live in is marked conflicting 1381 * but this page is not conflicting. So we have to map it 1382 * in small. Call sfmmu_kpm_vac_conflict to take care for 1383 * correcting the vcolor and flushing the dcache if required. 1384 */ 1385 mutex_exit(&kpmp->khl_mutex); 1386 sfmmu_kpm_vac_conflict(pp, vaddr); 1387 mutex_enter(&kpmp->khl_mutex); 1388 1389 if (PP_ISNC(pp) || kp->kp_refcnt <= 0 || 1390 addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) { 1391 panic("sfmmu_kpm_fault: inconsistent MAPS_BRKO state, " 1392 "pp=%p", (void *)pp); 1393 } 1394 kp->kp_refcnt--; 1395 kp->kp_refcnts++; 1396 pmtx = sfmmu_page_enter(pp); 1397 PP_SETKPMS(pp); 1398 sfmmu_page_exit(pmtx); 1399 goto smallexit; 1400 1401 case KPM_TSBM_MAPS_BRKT: /* kc c - - */ 1402 case KPM_TSBM_MAPS_CONFL: /* kc c ks - */ 1403 if (!PP_ISMAPPED(pp)) { 1404 /* 1405 * We got a tsbmiss on kpm large page range that is 1406 * marked to contain vac conflicting pages introduced 1407 * by hme mappings. The hme mappings are all gone and 1408 * must have bypassed the kpm alias prevention logic. 1409 */ 1410 panic("sfmmu_kpm_fault: stale VAC conflict, pp=%p", 1411 (void *)pp); 1412 } 1413 1414 /* 1415 * Check for vcolor conflicts. Return here w/ either no 1416 * conflict (fast path), removed hme mapping chains 1417 * (unload conflict) or uncached (uncache conflict). 1418 * Dcache is cleaned and p_vcolor and P_TNC are set 1419 * accordingly. Drop kpmp for uncache conflict cases 1420 * since it will be grabbed within sfmmu_kpm_page_cache 1421 * in case of an uncache conflict. 1422 */ 1423 mutex_exit(&kpmp->khl_mutex); 1424 sfmmu_kpm_vac_conflict(pp, vaddr); 1425 mutex_enter(&kpmp->khl_mutex); 1426 1427 if (kp->kp_refcnt <= 0) 1428 panic("sfmmu_kpm_fault: bad refcnt kp=%p", (void *)kp); 1429 1430 if (PP_ISNC(pp)) { 1431 uncached = 1; 1432 } else { 1433 /* 1434 * When an unload conflict is solved and there are 1435 * no other small mappings around, we can resume 1436 * largepage mode. Otherwise we have to map or drop 1437 * in small. This could be a trigger for a small 1438 * mapping reaper when this was the last conflict 1439 * within the kpm page and when there are only 1440 * other small mappings around. 1441 */ 1442 ASSERT(addr_to_vcolor(vaddr) == PP_GET_VCOLOR(pp)); 1443 ASSERT(kp->kp_refcntc > 0); 1444 kp->kp_refcntc--; 1445 pmtx = sfmmu_page_enter(pp); 1446 PP_CLRKPMC(pp); 1447 sfmmu_page_exit(pmtx); 1448 ASSERT(PP_ISKPMS(pp) == 0); 1449 if (kp->kp_refcntc == 0 && kp->kp_refcnts == 0) 1450 goto largeexit; 1451 } 1452 1453 kp->kp_refcnt--; 1454 kp->kp_refcnts++; 1455 pmtx = sfmmu_page_enter(pp); 1456 PP_SETKPMS(pp); 1457 sfmmu_page_exit(pmtx); 1458 goto smallexit; 1459 1460 case KPM_TSBM_RPLS_CONFL: /* kc c ks s */ 1461 if (!PP_ISMAPPED(pp)) { 1462 /* 1463 * We got a tsbmiss on kpm large page range that is 1464 * marked to contain vac conflicting pages introduced 1465 * by hme mappings. They are all gone and must have 1466 * somehow bypassed the kpm alias prevention logic. 1467 */ 1468 panic("sfmmu_kpm_fault: stale VAC conflict, pp=%p", 1469 (void *)pp); 1470 } 1471 1472 /* 1473 * This state is only possible for an uncached mapping. 1474 */ 1475 if (!PP_ISNC(pp)) { 1476 panic("sfmmu_kpm_fault: page not uncached, pp=%p", 1477 (void *)pp); 1478 } 1479 uncached = 1; 1480 goto smallexit; 1481 1482 default: 1483 badstate_exit: 1484 panic("sfmmu_kpm_fault: inconsistent VAC state, vaddr=%p kp=%p " 1485 "pp=%p", (void *)vaddr, (void *)kp, (void *)pp); 1486 } 1487 1488 smallexit: 1489 /* tte assembly */ 1490 if (uncached == 0) 1491 KPM_TTE_VCACHED(tte.ll, pfn, TTE8K); 1492 else 1493 KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K); 1494 1495 /* tsb dropin */ 1496 sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT); 1497 1498 error = 0; 1499 goto exit; 1500 1501 largeexit: 1502 if (kp->kp_refcnt > 0) { 1503 1504 /* tte assembly */ 1505 KPM_TTE_VCACHED(tte.ll, pfn, TTE4M); 1506 1507 /* tsb dropin */ 1508 sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT4M); 1509 1510 if (kp->kp_refcntc == 0) { 1511 /* Set "go" flag for TL tsbmiss handler */ 1512 sfmmu_kpm_tsbmtl(&kp->kp_refcntc, &kpmp->khl_lock, 1513 KPMTSBM_START); 1514 } 1515 ASSERT(kp->kp_refcntc == -1); 1516 error = 0; 1517 1518 } else 1519 error = EFAULT; 1520 exit: 1521 mutex_exit(&kpmp->khl_mutex); 1522 sfmmu_mlist_exit(pml); 1523 return (error); 1524 } 1525 1526 /* 1527 * kpm fault handler for mappings with small page size. 1528 */ 1529 int 1530 sfmmu_kpm_fault_small(caddr_t vaddr, struct memseg *mseg, page_t *pp) 1531 { 1532 int error = 0; 1533 pgcnt_t inx; 1534 kpm_spage_t *ksp; 1535 kpm_shlk_t *kpmsp; 1536 kmutex_t *pml; 1537 pfn_t pfn = pp->p_pagenum; 1538 tte_t tte; 1539 kmutex_t *pmtx; 1540 int oldval; 1541 1542 inx = pfn - mseg->kpm_pbase; 1543 ksp = &mseg->kpm_spages[inx]; 1544 kpmsp = KPMP_SHASH(ksp); 1545 1546 pml = sfmmu_mlist_enter(pp); 1547 1548 if (!PP_ISMAPPED_KPM(pp)) { 1549 sfmmu_mlist_exit(pml); 1550 return (EFAULT); 1551 } 1552 1553 /* 1554 * kp_mapped lookup protected by mlist mutex 1555 */ 1556 if (ksp->kp_mapped == KPM_MAPPEDS) { 1557 /* 1558 * Fast path tsbmiss 1559 */ 1560 ASSERT(!PP_ISKPMC(pp)); 1561 ASSERT(!PP_ISNC(pp)); 1562 1563 /* tte assembly */ 1564 KPM_TTE_VCACHED(tte.ll, pfn, TTE8K); 1565 1566 /* tsb dropin */ 1567 sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT); 1568 1569 } else if (ksp->kp_mapped == KPM_MAPPEDSC) { 1570 /* 1571 * Got here due to existing or gone kpm/hme VAC conflict. 1572 * Recheck for vcolor conflicts. Return here w/ either 1573 * no conflict, removed hme mapping chain (unload 1574 * conflict) or uncached (uncache conflict). VACaches 1575 * are cleaned and p_vcolor and PP_TNC are set accordingly 1576 * for the conflict cases. 1577 */ 1578 sfmmu_kpm_vac_conflict(pp, vaddr); 1579 1580 if (PP_ISNC(pp)) { 1581 /* ASSERT(pp->p_share); XXX use hat_page_getshare */ 1582 1583 /* tte assembly */ 1584 KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K); 1585 1586 /* tsb dropin */ 1587 sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT); 1588 1589 oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag, 1590 &kpmsp->kshl_lock, (KPM_MAPPED_GO | KPM_MAPPEDSC)); 1591 1592 if (oldval != KPM_MAPPEDSC) 1593 panic("sfmmu_kpm_fault_small: " 1594 "stale smallpages mapping"); 1595 } else { 1596 if (PP_ISKPMC(pp)) { 1597 pmtx = sfmmu_page_enter(pp); 1598 PP_CLRKPMC(pp); 1599 sfmmu_page_exit(pmtx); 1600 } 1601 1602 /* tte assembly */ 1603 KPM_TTE_VCACHED(tte.ll, pfn, TTE8K); 1604 1605 /* tsb dropin */ 1606 sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT); 1607 1608 oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag, 1609 &kpmsp->kshl_lock, (KPM_MAPPED_GO | KPM_MAPPEDS)); 1610 1611 if (oldval != KPM_MAPPEDSC) 1612 panic("sfmmu_kpm_fault_small: " 1613 "stale smallpages mapping"); 1614 } 1615 1616 } else { 1617 /* 1618 * We got a tsbmiss on a not active kpm_page range. 1619 * Let decide segkpm_fault how to panic. 1620 */ 1621 error = EFAULT; 1622 } 1623 1624 sfmmu_mlist_exit(pml); 1625 return (error); 1626 } 1627 1628 /* 1629 * Check/handle potential hme/kpm mapping conflicts 1630 */ 1631 static void 1632 sfmmu_kpm_vac_conflict(page_t *pp, caddr_t vaddr) 1633 { 1634 int vcolor; 1635 struct sf_hment *sfhmep; 1636 struct hat *tmphat; 1637 struct sf_hment *tmphme = NULL; 1638 struct hme_blk *hmeblkp; 1639 tte_t tte; 1640 1641 ASSERT(sfmmu_mlist_held(pp)); 1642 1643 if (PP_ISNC(pp)) 1644 return; 1645 1646 vcolor = addr_to_vcolor(vaddr); 1647 if (PP_GET_VCOLOR(pp) == vcolor) 1648 return; 1649 1650 /* 1651 * There could be no vcolor conflict between a large cached 1652 * hme page and a non alias range kpm page (neither large nor 1653 * small mapped). So if a hme conflict already exists between 1654 * a constituent page of a large hme mapping and a shared small 1655 * conflicting hme mapping, both mappings must be already 1656 * uncached at this point. 1657 */ 1658 ASSERT(!PP_ISMAPPED_LARGE(pp)); 1659 1660 if (!PP_ISMAPPED(pp)) { 1661 /* 1662 * Previous hme user of page had a different color 1663 * but since there are no current users 1664 * we just flush the cache and change the color. 1665 */ 1666 SFMMU_STAT(sf_pgcolor_conflict); 1667 sfmmu_cache_flush(pp->p_pagenum, PP_GET_VCOLOR(pp)); 1668 PP_SET_VCOLOR(pp, vcolor); 1669 return; 1670 } 1671 1672 /* 1673 * If we get here we have a vac conflict with a current hme 1674 * mapping. This must have been established by forcing a wrong 1675 * colored mapping, e.g. by using mmap(2) with MAP_FIXED. 1676 */ 1677 1678 /* 1679 * Check if any mapping is in same as or if it is locked 1680 * since in that case we need to uncache. 1681 */ 1682 for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) { 1683 tmphme = sfhmep->hme_next; 1684 if (IS_PAHME(sfhmep)) 1685 continue; 1686 hmeblkp = sfmmu_hmetohblk(sfhmep); 1687 tmphat = hblktosfmmu(hmeblkp); 1688 sfmmu_copytte(&sfhmep->hme_tte, &tte); 1689 ASSERT(TTE_IS_VALID(&tte)); 1690 if ((tmphat == ksfmmup) || hmeblkp->hblk_lckcnt) { 1691 /* 1692 * We have an uncache conflict 1693 */ 1694 SFMMU_STAT(sf_uncache_conflict); 1695 sfmmu_page_cache_array(pp, HAT_TMPNC, CACHE_FLUSH, 1); 1696 return; 1697 } 1698 } 1699 1700 /* 1701 * We have an unload conflict 1702 */ 1703 SFMMU_STAT(sf_unload_conflict); 1704 1705 for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) { 1706 tmphme = sfhmep->hme_next; 1707 if (IS_PAHME(sfhmep)) 1708 continue; 1709 hmeblkp = sfmmu_hmetohblk(sfhmep); 1710 (void) sfmmu_pageunload(pp, sfhmep, TTE8K); 1711 } 1712 1713 /* 1714 * Unloads only does tlb flushes so we need to flush the 1715 * dcache vcolor here. 1716 */ 1717 sfmmu_cache_flush(pp->p_pagenum, PP_GET_VCOLOR(pp)); 1718 PP_SET_VCOLOR(pp, vcolor); 1719 } 1720 1721 /* 1722 * Remove all kpm mappings using kpme's for pp and check that 1723 * all kpm mappings (w/ and w/o kpme's) are gone. 1724 */ 1725 void 1726 sfmmu_kpm_pageunload(page_t *pp) 1727 { 1728 caddr_t vaddr; 1729 struct kpme *kpme, *nkpme; 1730 1731 ASSERT(pp != NULL); 1732 ASSERT(pp->p_kpmref); 1733 ASSERT(sfmmu_mlist_held(pp)); 1734 1735 vaddr = hat_kpm_page2va(pp, 1); 1736 1737 for (kpme = pp->p_kpmelist; kpme; kpme = nkpme) { 1738 ASSERT(kpme->kpe_page == pp); 1739 1740 if (pp->p_kpmref == 0) 1741 panic("sfmmu_kpm_pageunload: stale p_kpmref pp=%p " 1742 "kpme=%p", (void *)pp, (void *)kpme); 1743 1744 nkpme = kpme->kpe_next; 1745 1746 /* Add instance callback here here if needed later */ 1747 sfmmu_kpme_sub(kpme, pp); 1748 } 1749 1750 /* 1751 * Also correct after mixed kpme/nonkpme mappings. If nonkpme 1752 * segkpm clients have unlocked the page and forgot to mapout 1753 * we panic here. 1754 */ 1755 if (pp->p_kpmref != 0) 1756 panic("sfmmu_kpm_pageunload: bad refcnt pp=%p", (void *)pp); 1757 1758 sfmmu_kpm_mapout(pp, vaddr); 1759 } 1760 1761 /* 1762 * Remove a large kpm mapping from kernel TSB and all TLB's. 1763 */ 1764 static void 1765 sfmmu_kpm_demap_large(caddr_t vaddr) 1766 { 1767 sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT4M); 1768 sfmmu_kpm_demap_tlbs(vaddr); 1769 } 1770 1771 /* 1772 * Remove a small kpm mapping from kernel TSB and all TLB's. 1773 */ 1774 static void 1775 sfmmu_kpm_demap_small(caddr_t vaddr) 1776 { 1777 sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT); 1778 sfmmu_kpm_demap_tlbs(vaddr); 1779 } 1780 1781 /* 1782 * Demap a kpm mapping in all TLB's. 1783 */ 1784 static void 1785 sfmmu_kpm_demap_tlbs(caddr_t vaddr) 1786 { 1787 cpuset_t cpuset; 1788 1789 kpreempt_disable(); 1790 cpuset = ksfmmup->sfmmu_cpusran; 1791 CPUSET_AND(cpuset, cpu_ready_set); 1792 CPUSET_DEL(cpuset, CPU->cpu_id); 1793 SFMMU_XCALL_STATS(ksfmmup); 1794 1795 xt_some(cpuset, vtag_flushpage_tl1, (uint64_t)vaddr, 1796 (uint64_t)ksfmmup); 1797 vtag_flushpage(vaddr, (uint64_t)ksfmmup); 1798 1799 kpreempt_enable(); 1800 } 1801 1802 /* 1803 * Summary states used in sfmmu_kpm_vac_unload (KPM_VUL__*). 1804 * See also more detailed comments within in the sfmmu_kpm_vac_unload switch. 1805 * Abbreviations used: 1806 * BIG: Large page kpm mapping in use. 1807 * CONFL: VAC conflict(s) within a kpm_page. 1808 * INCR: Count of conflicts within a kpm_page is going to be incremented. 1809 * DECR: Count of conflicts within a kpm_page is going to be decremented. 1810 * UNMAP_SMALL: A small (regular page size) mapping is going to be unmapped. 1811 * TNC: Temporary non cached: a kpm mapped page is mapped in TNC state. 1812 */ 1813 #define KPM_VUL_BIG (0) 1814 #define KPM_VUL_CONFL_INCR1 (KPM_KS) 1815 #define KPM_VUL_UNMAP_SMALL1 (KPM_KS | KPM_S) 1816 #define KPM_VUL_CONFL_INCR2 (KPM_KC) 1817 #define KPM_VUL_CONFL_INCR3 (KPM_KC | KPM_KS) 1818 #define KPM_VUL_UNMAP_SMALL2 (KPM_KC | KPM_KS | KPM_S) 1819 #define KPM_VUL_CONFL_DECR1 (KPM_KC | KPM_C) 1820 #define KPM_VUL_CONFL_DECR2 (KPM_KC | KPM_C | KPM_KS) 1821 #define KPM_VUL_TNC (KPM_KC | KPM_C | KPM_KS | KPM_S) 1822 1823 /* 1824 * Handle VAC unload conflicts introduced by hme mappings or vice 1825 * versa when a hme conflict mapping is replaced by a non conflict 1826 * one. Perform actions and state transitions according to the 1827 * various page and kpm_page entry states. VACache flushes are in 1828 * the responsibiliy of the caller. We still hold the mlist lock. 1829 */ 1830 void 1831 sfmmu_kpm_vac_unload(page_t *pp, caddr_t vaddr) 1832 { 1833 kpm_page_t *kp; 1834 kpm_hlk_t *kpmp; 1835 caddr_t kpmvaddr = hat_kpm_page2va(pp, 1); 1836 int newcolor; 1837 kmutex_t *pmtx; 1838 uint_t vacunlcase; 1839 int badstate = 0; 1840 kpm_spage_t *ksp; 1841 kpm_shlk_t *kpmsp; 1842 1843 ASSERT(PAGE_LOCKED(pp)); 1844 ASSERT(sfmmu_mlist_held(pp)); 1845 ASSERT(!PP_ISNC(pp)); 1846 1847 newcolor = addr_to_vcolor(kpmvaddr) != addr_to_vcolor(vaddr); 1848 if (kpm_smallpages) 1849 goto smallpages_vac_unload; 1850 1851 PP2KPMPG(pp, kp); 1852 kpmp = KPMP_HASH(kp); 1853 mutex_enter(&kpmp->khl_mutex); 1854 1855 if (IS_KPM_ALIAS_RANGE(kpmvaddr)) { 1856 if (kp->kp_refcnta < 1) { 1857 panic("sfmmu_kpm_vac_unload: bad refcnta kpm_page=%p\n", 1858 (void *)kp); 1859 } 1860 1861 if (PP_ISKPMC(pp) == 0) { 1862 if (newcolor == 0) 1863 goto exit; 1864 sfmmu_kpm_demap_small(kpmvaddr); 1865 pmtx = sfmmu_page_enter(pp); 1866 PP_SETKPMC(pp); 1867 sfmmu_page_exit(pmtx); 1868 1869 } else if (newcolor == 0) { 1870 pmtx = sfmmu_page_enter(pp); 1871 PP_CLRKPMC(pp); 1872 sfmmu_page_exit(pmtx); 1873 1874 } else { 1875 badstate++; 1876 } 1877 1878 goto exit; 1879 } 1880 1881 badstate = (kp->kp_refcnt < 0 || kp->kp_refcnts < 0); 1882 if (kp->kp_refcntc == -1) { 1883 /* 1884 * We should come here only if trap level tsb miss 1885 * handler is disabled. 1886 */ 1887 badstate |= (kp->kp_refcnt == 0 || kp->kp_refcnts > 0 || 1888 PP_ISKPMC(pp) || PP_ISKPMS(pp) || PP_ISNC(pp)); 1889 } else { 1890 badstate |= (kp->kp_refcntc < 0); 1891 } 1892 1893 if (badstate) 1894 goto exit; 1895 1896 if (PP_ISKPMC(pp) == 0 && newcolor == 0) { 1897 ASSERT(PP_ISKPMS(pp) == 0); 1898 goto exit; 1899 } 1900 1901 /* 1902 * Combine the per kpm_page and per page kpm VAC states 1903 * to a summary state in order to make the vac unload 1904 * handling more concise. 1905 */ 1906 vacunlcase = (((kp->kp_refcntc > 0) ? KPM_KC : 0) | 1907 ((kp->kp_refcnts > 0) ? KPM_KS : 0) | 1908 (PP_ISKPMC(pp) ? KPM_C : 0) | 1909 (PP_ISKPMS(pp) ? KPM_S : 0)); 1910 1911 switch (vacunlcase) { 1912 case KPM_VUL_BIG: /* - - - - */ 1913 /* 1914 * Have to breakup the large page mapping to be 1915 * able to handle the conflicting hme vaddr. 1916 */ 1917 if (kp->kp_refcntc == -1) { 1918 /* remove go indication */ 1919 sfmmu_kpm_tsbmtl(&kp->kp_refcntc, 1920 &kpmp->khl_lock, KPMTSBM_STOP); 1921 } 1922 sfmmu_kpm_demap_large(kpmvaddr); 1923 1924 ASSERT(kp->kp_refcntc == 0); 1925 kp->kp_refcntc++; 1926 pmtx = sfmmu_page_enter(pp); 1927 PP_SETKPMC(pp); 1928 sfmmu_page_exit(pmtx); 1929 break; 1930 1931 case KPM_VUL_UNMAP_SMALL1: /* - - ks s */ 1932 case KPM_VUL_UNMAP_SMALL2: /* kc - ks s */ 1933 /* 1934 * New conflict w/ an active kpm page, actually mapped 1935 * in by small TSB/TLB entries. Remove the mapping and 1936 * update states. 1937 */ 1938 ASSERT(newcolor); 1939 sfmmu_kpm_demap_small(kpmvaddr); 1940 kp->kp_refcnts--; 1941 kp->kp_refcnt++; 1942 kp->kp_refcntc++; 1943 pmtx = sfmmu_page_enter(pp); 1944 PP_CLRKPMS(pp); 1945 PP_SETKPMC(pp); 1946 sfmmu_page_exit(pmtx); 1947 break; 1948 1949 case KPM_VUL_CONFL_INCR1: /* - - ks - */ 1950 case KPM_VUL_CONFL_INCR2: /* kc - - - */ 1951 case KPM_VUL_CONFL_INCR3: /* kc - ks - */ 1952 /* 1953 * New conflict on a active kpm mapped page not yet in 1954 * TSB/TLB. Mark page and increment the kpm_page conflict 1955 * count. 1956 */ 1957 ASSERT(newcolor); 1958 kp->kp_refcntc++; 1959 pmtx = sfmmu_page_enter(pp); 1960 PP_SETKPMC(pp); 1961 sfmmu_page_exit(pmtx); 1962 break; 1963 1964 case KPM_VUL_CONFL_DECR1: /* kc c - - */ 1965 case KPM_VUL_CONFL_DECR2: /* kc c ks - */ 1966 /* 1967 * A conflicting hme mapping is removed for an active 1968 * kpm page not yet in TSB/TLB. Unmark page and decrement 1969 * the kpm_page conflict count. 1970 */ 1971 ASSERT(newcolor == 0); 1972 kp->kp_refcntc--; 1973 pmtx = sfmmu_page_enter(pp); 1974 PP_CLRKPMC(pp); 1975 sfmmu_page_exit(pmtx); 1976 break; 1977 1978 case KPM_VUL_TNC: /* kc c ks s */ 1979 cmn_err(CE_NOTE, "sfmmu_kpm_vac_unload: " 1980 "page not in NC state"); 1981 /* FALLTHRU */ 1982 1983 default: 1984 badstate++; 1985 } 1986 exit: 1987 if (badstate) { 1988 panic("sfmmu_kpm_vac_unload: inconsistent VAC state, " 1989 "kpmvaddr=%p kp=%p pp=%p", 1990 (void *)kpmvaddr, (void *)kp, (void *)pp); 1991 } 1992 mutex_exit(&kpmp->khl_mutex); 1993 1994 return; 1995 1996 smallpages_vac_unload: 1997 if (newcolor == 0) 1998 return; 1999 2000 PP2KPMSPG(pp, ksp); 2001 kpmsp = KPMP_SHASH(ksp); 2002 2003 if (PP_ISKPMC(pp) == 0) { 2004 if (ksp->kp_mapped == KPM_MAPPEDS) { 2005 /* 2006 * Stop TL tsbmiss handling 2007 */ 2008 (void) sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag, 2009 &kpmsp->kshl_lock, KPM_MAPPEDSC); 2010 2011 sfmmu_kpm_demap_small(kpmvaddr); 2012 2013 } else if (ksp->kp_mapped != KPM_MAPPEDSC) { 2014 panic("sfmmu_kpm_vac_unload: inconsistent mapping"); 2015 } 2016 2017 pmtx = sfmmu_page_enter(pp); 2018 PP_SETKPMC(pp); 2019 sfmmu_page_exit(pmtx); 2020 2021 } else { 2022 if (ksp->kp_mapped != KPM_MAPPEDSC) 2023 panic("sfmmu_kpm_vac_unload: inconsistent mapping"); 2024 } 2025 } 2026 2027 /* 2028 * Page is marked to be in VAC conflict to an existing kpm mapping 2029 * or is kpm mapped using only the regular pagesize. Called from 2030 * sfmmu_hblk_unload when a mlist is completely removed. 2031 */ 2032 void 2033 sfmmu_kpm_hme_unload(page_t *pp) 2034 { 2035 /* tte assembly */ 2036 kpm_page_t *kp; 2037 kpm_hlk_t *kpmp; 2038 caddr_t vaddr; 2039 kmutex_t *pmtx; 2040 uint_t flags; 2041 kpm_spage_t *ksp; 2042 2043 ASSERT(sfmmu_mlist_held(pp)); 2044 ASSERT(PP_ISMAPPED_KPM(pp)); 2045 2046 flags = pp->p_nrm & (P_KPMC | P_KPMS); 2047 if (kpm_smallpages) 2048 goto smallpages_hme_unload; 2049 2050 if (flags == (P_KPMC | P_KPMS)) { 2051 panic("sfmmu_kpm_hme_unload: page should be uncached"); 2052 2053 } else if (flags == P_KPMS) { 2054 /* 2055 * Page mapped small but not involved in VAC conflict 2056 */ 2057 return; 2058 } 2059 2060 vaddr = hat_kpm_page2va(pp, 1); 2061 2062 PP2KPMPG(pp, kp); 2063 kpmp = KPMP_HASH(kp); 2064 mutex_enter(&kpmp->khl_mutex); 2065 2066 if (IS_KPM_ALIAS_RANGE(vaddr)) { 2067 if (kp->kp_refcnta < 1) { 2068 panic("sfmmu_kpm_hme_unload: bad refcnta kpm_page=%p\n", 2069 (void *)kp); 2070 } 2071 } else { 2072 if (kp->kp_refcntc < 1) { 2073 panic("sfmmu_kpm_hme_unload: bad refcntc kpm_page=%p\n", 2074 (void *)kp); 2075 } 2076 kp->kp_refcntc--; 2077 } 2078 2079 pmtx = sfmmu_page_enter(pp); 2080 PP_CLRKPMC(pp); 2081 sfmmu_page_exit(pmtx); 2082 2083 mutex_exit(&kpmp->khl_mutex); 2084 return; 2085 2086 smallpages_hme_unload: 2087 if (flags != P_KPMC) 2088 panic("sfmmu_kpm_hme_unload: page should be uncached"); 2089 2090 vaddr = hat_kpm_page2va(pp, 1); 2091 PP2KPMSPG(pp, ksp); 2092 2093 if (ksp->kp_mapped != KPM_MAPPEDSC) 2094 panic("sfmmu_kpm_hme_unload: inconsistent mapping"); 2095 2096 /* 2097 * Keep KPM_MAPPEDSC until the next kpm tsbmiss where it 2098 * prevents TL tsbmiss handling and force a hat_kpm_fault. 2099 * There we can start over again. 2100 */ 2101 2102 pmtx = sfmmu_page_enter(pp); 2103 PP_CLRKPMC(pp); 2104 sfmmu_page_exit(pmtx); 2105 } 2106 2107 /* 2108 * Special hooks for sfmmu_page_cache_array() when changing the 2109 * cacheability of a page. It is used to obey the hat_kpm lock 2110 * ordering (mlist -> kpmp -> spl, and back). 2111 */ 2112 kpm_hlk_t * 2113 sfmmu_kpm_kpmp_enter(page_t *pp, pgcnt_t npages) 2114 { 2115 kpm_page_t *kp; 2116 kpm_hlk_t *kpmp; 2117 2118 ASSERT(sfmmu_mlist_held(pp)); 2119 2120 if (kpm_smallpages || PP_ISMAPPED_KPM(pp) == 0) 2121 return (NULL); 2122 2123 ASSERT(npages <= kpmpnpgs); 2124 2125 PP2KPMPG(pp, kp); 2126 kpmp = KPMP_HASH(kp); 2127 mutex_enter(&kpmp->khl_mutex); 2128 2129 return (kpmp); 2130 } 2131 2132 void 2133 sfmmu_kpm_kpmp_exit(kpm_hlk_t *kpmp) 2134 { 2135 if (kpm_smallpages || kpmp == NULL) 2136 return; 2137 2138 mutex_exit(&kpmp->khl_mutex); 2139 } 2140 2141 /* 2142 * Summary states used in sfmmu_kpm_page_cache (KPM_*). 2143 * See also more detailed comments within in the sfmmu_kpm_page_cache switch. 2144 * Abbreviations used: 2145 * UNC: Input state for an uncache request. 2146 * BIG: Large page kpm mapping in use. 2147 * SMALL: Page has a small kpm mapping within a kpm_page range. 2148 * NODEMAP: No demap needed. 2149 * NOP: No operation needed on this input state. 2150 * CACHE: Input state for a re-cache request. 2151 * MAPS: Page is in TNC and kpm VAC conflict state and kpm mapped small. 2152 * NOMAP: Page is in TNC and kpm VAC conflict state, but not small kpm 2153 * mapped. 2154 * NOMAPO: Page is in TNC and kpm VAC conflict state, but not small kpm 2155 * mapped. There are also other small kpm mappings within this 2156 * kpm_page. 2157 */ 2158 #define KPM_UNC_BIG (0) 2159 #define KPM_UNC_NODEMAP1 (KPM_KS) 2160 #define KPM_UNC_SMALL1 (KPM_KS | KPM_S) 2161 #define KPM_UNC_NODEMAP2 (KPM_KC) 2162 #define KPM_UNC_NODEMAP3 (KPM_KC | KPM_KS) 2163 #define KPM_UNC_SMALL2 (KPM_KC | KPM_KS | KPM_S) 2164 #define KPM_UNC_NOP1 (KPM_KC | KPM_C) 2165 #define KPM_UNC_NOP2 (KPM_KC | KPM_C | KPM_KS) 2166 #define KPM_CACHE_NOMAP (KPM_KC | KPM_C) 2167 #define KPM_CACHE_NOMAPO (KPM_KC | KPM_C | KPM_KS) 2168 #define KPM_CACHE_MAPS (KPM_KC | KPM_C | KPM_KS | KPM_S) 2169 2170 /* 2171 * This function is called when the virtual cacheability of a page 2172 * is changed and the page has an actice kpm mapping. The mlist mutex, 2173 * the spl hash lock and the kpmp mutex (if needed) are already grabbed. 2174 */ 2175 /*ARGSUSED2*/ 2176 void 2177 sfmmu_kpm_page_cache(page_t *pp, int flags, int cache_flush_tag) 2178 { 2179 kpm_page_t *kp; 2180 kpm_hlk_t *kpmp; 2181 caddr_t kpmvaddr; 2182 int badstate = 0; 2183 uint_t pgcacase; 2184 kpm_spage_t *ksp; 2185 kpm_shlk_t *kpmsp; 2186 int oldval; 2187 2188 ASSERT(PP_ISMAPPED_KPM(pp)); 2189 ASSERT(sfmmu_mlist_held(pp)); 2190 ASSERT(sfmmu_page_spl_held(pp)); 2191 2192 if (flags != HAT_TMPNC && flags != HAT_CACHE) 2193 panic("sfmmu_kpm_page_cache: bad flags"); 2194 2195 kpmvaddr = hat_kpm_page2va(pp, 1); 2196 2197 if (flags == HAT_TMPNC && cache_flush_tag == CACHE_FLUSH) { 2198 pfn_t pfn = pp->p_pagenum; 2199 int vcolor = addr_to_vcolor(kpmvaddr); 2200 cpuset_t cpuset = cpu_ready_set; 2201 2202 /* Flush vcolor in DCache */ 2203 CPUSET_DEL(cpuset, CPU->cpu_id); 2204 SFMMU_XCALL_STATS(ksfmmup); 2205 xt_some(cpuset, vac_flushpage_tl1, pfn, vcolor); 2206 vac_flushpage(pfn, vcolor); 2207 } 2208 2209 if (kpm_smallpages) 2210 goto smallpages_page_cache; 2211 2212 PP2KPMPG(pp, kp); 2213 kpmp = KPMP_HASH(kp); 2214 ASSERT(MUTEX_HELD(&kpmp->khl_mutex)); 2215 2216 if (IS_KPM_ALIAS_RANGE(kpmvaddr)) { 2217 if (kp->kp_refcnta < 1) { 2218 panic("sfmmu_kpm_page_cache: bad refcnta " 2219 "kpm_page=%p\n", (void *)kp); 2220 } 2221 sfmmu_kpm_demap_small(kpmvaddr); 2222 if (flags == HAT_TMPNC) { 2223 PP_SETKPMC(pp); 2224 ASSERT(!PP_ISKPMS(pp)); 2225 } else { 2226 ASSERT(PP_ISKPMC(pp)); 2227 PP_CLRKPMC(pp); 2228 } 2229 goto exit; 2230 } 2231 2232 badstate = (kp->kp_refcnt < 0 || kp->kp_refcnts < 0); 2233 if (kp->kp_refcntc == -1) { 2234 /* 2235 * We should come here only if trap level tsb miss 2236 * handler is disabled. 2237 */ 2238 badstate |= (kp->kp_refcnt == 0 || kp->kp_refcnts > 0 || 2239 PP_ISKPMC(pp) || PP_ISKPMS(pp) || PP_ISNC(pp)); 2240 } else { 2241 badstate |= (kp->kp_refcntc < 0); 2242 } 2243 2244 if (badstate) 2245 goto exit; 2246 2247 /* 2248 * Combine the per kpm_page and per page kpm VAC states to 2249 * a summary state in order to make the VAC cache/uncache 2250 * handling more concise. 2251 */ 2252 pgcacase = (((kp->kp_refcntc > 0) ? KPM_KC : 0) | 2253 ((kp->kp_refcnts > 0) ? KPM_KS : 0) | 2254 (PP_ISKPMC(pp) ? KPM_C : 0) | 2255 (PP_ISKPMS(pp) ? KPM_S : 0)); 2256 2257 if (flags == HAT_CACHE) { 2258 switch (pgcacase) { 2259 case KPM_CACHE_MAPS: /* kc c ks s */ 2260 sfmmu_kpm_demap_small(kpmvaddr); 2261 if (kp->kp_refcnts < 1) { 2262 panic("sfmmu_kpm_page_cache: bad refcnts " 2263 "kpm_page=%p\n", (void *)kp); 2264 } 2265 kp->kp_refcnts--; 2266 kp->kp_refcnt++; 2267 PP_CLRKPMS(pp); 2268 /* FALLTHRU */ 2269 2270 case KPM_CACHE_NOMAP: /* kc c - - */ 2271 case KPM_CACHE_NOMAPO: /* kc c ks - */ 2272 kp->kp_refcntc--; 2273 PP_CLRKPMC(pp); 2274 break; 2275 2276 default: 2277 badstate++; 2278 } 2279 goto exit; 2280 } 2281 2282 switch (pgcacase) { 2283 case KPM_UNC_BIG: /* - - - - */ 2284 if (kp->kp_refcnt < 1) { 2285 panic("sfmmu_kpm_page_cache: bad refcnt " 2286 "kpm_page=%p\n", (void *)kp); 2287 } 2288 2289 /* 2290 * Have to breakup the large page mapping in preparation 2291 * to the upcoming TNC mode handled by small mappings. 2292 * The demap can already be done due to another conflict 2293 * within the kpm_page. 2294 */ 2295 if (kp->kp_refcntc == -1) { 2296 /* remove go indication */ 2297 sfmmu_kpm_tsbmtl(&kp->kp_refcntc, 2298 &kpmp->khl_lock, KPMTSBM_STOP); 2299 } 2300 ASSERT(kp->kp_refcntc == 0); 2301 sfmmu_kpm_demap_large(kpmvaddr); 2302 kp->kp_refcntc++; 2303 PP_SETKPMC(pp); 2304 break; 2305 2306 case KPM_UNC_SMALL1: /* - - ks s */ 2307 case KPM_UNC_SMALL2: /* kc - ks s */ 2308 /* 2309 * Have to demap an already small kpm mapping in preparation 2310 * to the upcoming TNC mode. The demap can already be done 2311 * due to another conflict within the kpm_page. 2312 */ 2313 sfmmu_kpm_demap_small(kpmvaddr); 2314 kp->kp_refcntc++; 2315 kp->kp_refcnts--; 2316 kp->kp_refcnt++; 2317 PP_CLRKPMS(pp); 2318 PP_SETKPMC(pp); 2319 break; 2320 2321 case KPM_UNC_NODEMAP1: /* - - ks - */ 2322 /* fallthru */ 2323 2324 case KPM_UNC_NODEMAP2: /* kc - - - */ 2325 case KPM_UNC_NODEMAP3: /* kc - ks - */ 2326 kp->kp_refcntc++; 2327 PP_SETKPMC(pp); 2328 break; 2329 2330 case KPM_UNC_NOP1: /* kc c - - */ 2331 case KPM_UNC_NOP2: /* kc c ks - */ 2332 break; 2333 2334 default: 2335 badstate++; 2336 } 2337 exit: 2338 if (badstate) { 2339 panic("sfmmu_kpm_page_cache: inconsistent VAC state " 2340 "kpmvaddr=%p kp=%p pp=%p", (void *)kpmvaddr, 2341 (void *)kp, (void *)pp); 2342 } 2343 return; 2344 2345 smallpages_page_cache: 2346 PP2KPMSPG(pp, ksp); 2347 kpmsp = KPMP_SHASH(ksp); 2348 2349 /* 2350 * marked as nogo for we will fault in and resolve it 2351 * through sfmmu_kpm_fault_small 2352 */ 2353 oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped_flag, &kpmsp->kshl_lock, 2354 KPM_MAPPEDSC); 2355 2356 if (!(oldval == KPM_MAPPEDS || oldval == KPM_MAPPEDSC)) 2357 panic("smallpages_page_cache: inconsistent mapping"); 2358 2359 sfmmu_kpm_demap_small(kpmvaddr); 2360 2361 if (flags == HAT_TMPNC) { 2362 PP_SETKPMC(pp); 2363 ASSERT(!PP_ISKPMS(pp)); 2364 2365 } else { 2366 ASSERT(PP_ISKPMC(pp)); 2367 PP_CLRKPMC(pp); 2368 } 2369 2370 /* 2371 * Keep KPM_MAPPEDSC until the next kpm tsbmiss where it 2372 * prevents TL tsbmiss handling and force a hat_kpm_fault. 2373 * There we can start over again. 2374 */ 2375 } 2376