1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * Portions of this source code were derived from Berkeley 4.3 BSD 31 * under license from the Regents of the University of California. 32 */ 33 34 /* 35 * VM - generic vnode mapping segment. 36 * 37 * The segmap driver is used only by the kernel to get faster (than seg_vn) 38 * mappings [lower routine overhead; more persistent cache] to random 39 * vnode/offsets. Note than the kernel may (and does) use seg_vn as well. 40 */ 41 42 #include <sys/types.h> 43 #include <sys/t_lock.h> 44 #include <sys/param.h> 45 #include <sys/sysmacros.h> 46 #include <sys/buf.h> 47 #include <sys/systm.h> 48 #include <sys/vnode.h> 49 #include <sys/mman.h> 50 #include <sys/errno.h> 51 #include <sys/cred.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/cmn_err.h> 55 #include <sys/debug.h> 56 #include <sys/thread.h> 57 #include <sys/dumphdr.h> 58 #include <sys/bitmap.h> 59 #include <sys/lgrp.h> 60 61 #include <vm/seg_kmem.h> 62 #include <vm/hat.h> 63 #include <vm/as.h> 64 #include <vm/seg.h> 65 #include <vm/seg_kpm.h> 66 #include <vm/seg_map.h> 67 #include <vm/page.h> 68 #include <vm/pvn.h> 69 #include <vm/rm.h> 70 71 /* 72 * Private seg op routines. 73 */ 74 static void segmap_free(struct seg *seg); 75 faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr, 76 size_t len, enum fault_type type, enum seg_rw rw); 77 static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr); 78 static int segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, 79 uint_t prot); 80 static int segmap_kluster(struct seg *seg, caddr_t addr, ssize_t); 81 static int segmap_getprot(struct seg *seg, caddr_t addr, size_t len, 82 uint_t *protv); 83 static u_offset_t segmap_getoffset(struct seg *seg, caddr_t addr); 84 static int segmap_gettype(struct seg *seg, caddr_t addr); 85 static int segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp); 86 static void segmap_dump(struct seg *seg); 87 static int segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 88 struct page ***ppp, enum lock_type type, 89 enum seg_rw rw); 90 static void segmap_badop(void); 91 static int segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp); 92 static lgrp_mem_policy_info_t *segmap_getpolicy(struct seg *seg, 93 caddr_t addr); 94 static int segmap_capable(struct seg *seg, segcapability_t capability); 95 96 /* segkpm support */ 97 static caddr_t segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t, 98 struct smap *, enum seg_rw); 99 struct smap *get_smap_kpm(caddr_t, page_t **); 100 101 #define SEGMAP_BADOP(t) (t(*)())segmap_badop 102 103 static struct seg_ops segmap_ops = { 104 SEGMAP_BADOP(int), /* dup */ 105 SEGMAP_BADOP(int), /* unmap */ 106 segmap_free, 107 segmap_fault, 108 segmap_faulta, 109 SEGMAP_BADOP(int), /* setprot */ 110 segmap_checkprot, 111 segmap_kluster, 112 SEGMAP_BADOP(size_t), /* swapout */ 113 SEGMAP_BADOP(int), /* sync */ 114 SEGMAP_BADOP(size_t), /* incore */ 115 SEGMAP_BADOP(int), /* lockop */ 116 segmap_getprot, 117 segmap_getoffset, 118 segmap_gettype, 119 segmap_getvp, 120 SEGMAP_BADOP(int), /* advise */ 121 segmap_dump, 122 segmap_pagelock, /* pagelock */ 123 SEGMAP_BADOP(int), /* setpgsz */ 124 segmap_getmemid, /* getmemid */ 125 segmap_getpolicy, /* getpolicy */ 126 segmap_capable, /* capable */ 127 }; 128 129 /* 130 * Private segmap routines. 131 */ 132 static void segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr, 133 size_t len, enum seg_rw rw, struct smap *smp); 134 static void segmap_smapadd(struct smap *smp); 135 static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp, 136 u_offset_t off, int hashid); 137 static void segmap_hashout(struct smap *smp); 138 139 140 /* 141 * Statistics for segmap operations. 142 * 143 * No explicit locking to protect these stats. 144 */ 145 struct segmapcnt segmapcnt = { 146 { "fault", KSTAT_DATA_ULONG }, 147 { "faulta", KSTAT_DATA_ULONG }, 148 { "getmap", KSTAT_DATA_ULONG }, 149 { "get_use", KSTAT_DATA_ULONG }, 150 { "get_reclaim", KSTAT_DATA_ULONG }, 151 { "get_reuse", KSTAT_DATA_ULONG }, 152 { "get_unused", KSTAT_DATA_ULONG }, 153 { "get_nofree", KSTAT_DATA_ULONG }, 154 { "rel_async", KSTAT_DATA_ULONG }, 155 { "rel_write", KSTAT_DATA_ULONG }, 156 { "rel_free", KSTAT_DATA_ULONG }, 157 { "rel_abort", KSTAT_DATA_ULONG }, 158 { "rel_dontneed", KSTAT_DATA_ULONG }, 159 { "release", KSTAT_DATA_ULONG }, 160 { "pagecreate", KSTAT_DATA_ULONG }, 161 { "free_notfree", KSTAT_DATA_ULONG }, 162 { "free_dirty", KSTAT_DATA_ULONG }, 163 { "free", KSTAT_DATA_ULONG }, 164 { "stolen", KSTAT_DATA_ULONG }, 165 { "get_nomtx", KSTAT_DATA_ULONG } 166 }; 167 168 kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt; 169 uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t); 170 171 /* 172 * Return number of map pages in segment. 173 */ 174 #define MAP_PAGES(seg) ((seg)->s_size >> MAXBSHIFT) 175 176 /* 177 * Translate addr into smap number within segment. 178 */ 179 #define MAP_PAGE(seg, addr) (((addr) - (seg)->s_base) >> MAXBSHIFT) 180 181 /* 182 * Translate addr in seg into struct smap pointer. 183 */ 184 #define GET_SMAP(seg, addr) \ 185 &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)]) 186 187 /* 188 * Bit in map (16 bit bitmap). 189 */ 190 #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf)) 191 192 static int smd_colormsk = 0; 193 static int smd_ncolor = 0; 194 static int smd_nfree = 0; 195 static int smd_freemsk = 0; 196 #ifdef DEBUG 197 static int *colors_used; 198 #endif 199 static struct smap *smd_smap; 200 static struct smaphash *smd_hash; 201 #ifdef SEGMAP_HASHSTATS 202 static unsigned int *smd_hash_len; 203 #endif 204 static struct smfree *smd_free; 205 static ulong_t smd_hashmsk = 0; 206 207 #define SEGMAP_MAXCOLOR 2 208 #define SEGMAP_CACHE_PAD 64 209 210 union segmap_cpu { 211 struct { 212 uint32_t scpu_free_ndx[SEGMAP_MAXCOLOR]; 213 struct smap *scpu_last_smap; 214 ulong_t scpu_getmap; 215 ulong_t scpu_release; 216 ulong_t scpu_get_reclaim; 217 ulong_t scpu_fault; 218 ulong_t scpu_pagecreate; 219 ulong_t scpu_get_reuse; 220 } scpu; 221 char scpu_pad[SEGMAP_CACHE_PAD]; 222 }; 223 static union segmap_cpu *smd_cpu; 224 225 /* 226 * There are three locks in seg_map: 227 * - per freelist mutexes 228 * - per hashchain mutexes 229 * - per smap mutexes 230 * 231 * The lock ordering is to get the smap mutex to lock down the slot 232 * first then the hash lock (for hash in/out (vp, off) list) or the 233 * freelist lock to put the slot back on the free list. 234 * 235 * The hash search is done by only holding the hashchain lock, when a wanted 236 * slot is found, we drop the hashchain lock then lock the slot so there 237 * is no overlapping of hashchain and smap locks. After the slot is 238 * locked, we verify again if the slot is still what we are looking 239 * for. 240 * 241 * Allocation of a free slot is done by holding the freelist lock, 242 * then locking the smap slot at the head of the freelist. This is 243 * in reversed lock order so mutex_tryenter() is used. 244 * 245 * The smap lock protects all fields in smap structure except for 246 * the link fields for hash/free lists which are protected by 247 * hashchain and freelist locks. 248 */ 249 250 #define SHASHMTX(hashid) (&smd_hash[hashid].sh_mtx) 251 252 #define SMP2SMF(smp) (&smd_free[(smp - smd_smap) & smd_freemsk]) 253 #define SMP2SMF_NDX(smp) (ushort_t)((smp - smd_smap) & smd_freemsk) 254 255 #define SMAPMTX(smp) (&smp->sm_mtx) 256 257 #define SMAP_HASHFUNC(vp, off, hashid) \ 258 { \ 259 hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \ 260 ((off) >> MAXBSHIFT)) & smd_hashmsk); \ 261 } 262 263 /* 264 * The most frequently updated kstat counters are kept in the 265 * per cpu array to avoid hot cache blocks. The update function 266 * sums the cpu local counters to update the global counters. 267 */ 268 269 /* ARGSUSED */ 270 int 271 segmap_kstat_update(kstat_t *ksp, int rw) 272 { 273 int i; 274 ulong_t getmap, release, get_reclaim; 275 ulong_t fault, pagecreate, get_reuse; 276 277 if (rw == KSTAT_WRITE) 278 return (EACCES); 279 getmap = release = get_reclaim = (ulong_t)0; 280 fault = pagecreate = get_reuse = (ulong_t)0; 281 for (i = 0; i < max_ncpus; i++) { 282 getmap += smd_cpu[i].scpu.scpu_getmap; 283 release += smd_cpu[i].scpu.scpu_release; 284 get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim; 285 fault += smd_cpu[i].scpu.scpu_fault; 286 pagecreate += smd_cpu[i].scpu.scpu_pagecreate; 287 get_reuse += smd_cpu[i].scpu.scpu_get_reuse; 288 } 289 segmapcnt.smp_getmap.value.ul = getmap; 290 segmapcnt.smp_release.value.ul = release; 291 segmapcnt.smp_get_reclaim.value.ul = get_reclaim; 292 segmapcnt.smp_fault.value.ul = fault; 293 segmapcnt.smp_pagecreate.value.ul = pagecreate; 294 segmapcnt.smp_get_reuse.value.ul = get_reuse; 295 return (0); 296 } 297 298 int 299 segmap_create(struct seg *seg, void *argsp) 300 { 301 struct segmap_data *smd; 302 struct smap *smp; 303 struct smfree *sm; 304 struct segmap_crargs *a = (struct segmap_crargs *)argsp; 305 struct smaphash *shashp; 306 union segmap_cpu *scpu; 307 long i, npages; 308 size_t hashsz; 309 uint_t nfreelist; 310 extern void prefetch_smap_w(void *); 311 extern int max_ncpus; 312 313 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 314 315 if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) { 316 panic("segkmap not MAXBSIZE aligned"); 317 /*NOTREACHED*/ 318 } 319 320 smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP); 321 322 seg->s_data = (void *)smd; 323 seg->s_ops = &segmap_ops; 324 smd->smd_prot = a->prot; 325 326 /* 327 * Scale the number of smap freelists to be 328 * proportional to max_ncpus * number of virtual colors. 329 * The caller can over-ride this scaling by providing 330 * a non-zero a->nfreelist argument. 331 */ 332 nfreelist = a->nfreelist; 333 if (nfreelist == 0) 334 nfreelist = max_ncpus; 335 else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) { 336 cmn_err(CE_WARN, "segmap_create: nfreelist out of range " 337 "%d, using %d", nfreelist, max_ncpus); 338 nfreelist = max_ncpus; 339 } 340 if (nfreelist & (nfreelist - 1)) { 341 /* round up nfreelist to the next power of two. */ 342 nfreelist = 1 << (highbit(nfreelist)); 343 } 344 345 /* 346 * Get the number of virtual colors - must be a power of 2. 347 */ 348 if (a->shmsize) 349 smd_ncolor = a->shmsize >> MAXBSHIFT; 350 else 351 smd_ncolor = 1; 352 ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0); 353 ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR); 354 smd_colormsk = smd_ncolor - 1; 355 smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist; 356 smd_freemsk = smd_nfree - 1; 357 358 /* 359 * Allocate and initialize the freelist headers. 360 * Note that sm_freeq[1] starts out as the release queue. This 361 * is known when the smap structures are initialized below. 362 */ 363 smd_free = smd->smd_free = 364 kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP); 365 for (i = 0; i < smd_nfree; i++) { 366 sm = &smd->smd_free[i]; 367 mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 368 mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 369 sm->sm_allocq = &sm->sm_freeq[0]; 370 sm->sm_releq = &sm->sm_freeq[1]; 371 } 372 373 /* 374 * Allocate and initialize the smap hash chain headers. 375 * Compute hash size rounding down to the next power of two. 376 */ 377 npages = MAP_PAGES(seg); 378 smd->smd_npages = npages; 379 hashsz = npages / SMAP_HASHAVELEN; 380 hashsz = 1 << (highbit(hashsz)-1); 381 smd_hashmsk = hashsz - 1; 382 smd_hash = smd->smd_hash = 383 kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP); 384 #ifdef SEGMAP_HASHSTATS 385 smd_hash_len = 386 kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP); 387 #endif 388 for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) { 389 shashp->sh_hash_list = NULL; 390 mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL); 391 } 392 393 /* 394 * Allocate and initialize the smap structures. 395 * Link all slots onto the appropriate freelist. 396 * The smap array is large enough to affect boot time 397 * on large systems, so use memory prefetching and only 398 * go through the array 1 time. Inline a optimized version 399 * of segmap_smapadd to add structures to freelists with 400 * knowledge that no locks are needed here. 401 */ 402 smd_smap = smd->smd_sm = 403 kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP); 404 405 for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1]; 406 smp >= smd->smd_sm; smp--) { 407 struct smap *smpfreelist; 408 struct sm_freeq *releq; 409 410 prefetch_smap_w((char *)smp); 411 412 smp->sm_vp = NULL; 413 smp->sm_hash = NULL; 414 smp->sm_off = 0; 415 smp->sm_bitmap = 0; 416 smp->sm_refcnt = 0; 417 mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL); 418 smp->sm_free_ndx = SMP2SMF_NDX(smp); 419 420 sm = SMP2SMF(smp); 421 releq = sm->sm_releq; 422 423 smpfreelist = releq->smq_free; 424 if (smpfreelist == 0) { 425 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 426 } else { 427 smp->sm_next = smpfreelist; 428 smp->sm_prev = smpfreelist->sm_prev; 429 smpfreelist->sm_prev = smp; 430 smp->sm_prev->sm_next = smp; 431 releq->smq_free = smp->sm_next; 432 } 433 434 /* 435 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1] 436 */ 437 smp->sm_flags = 0; 438 439 #ifdef SEGKPM_SUPPORT 440 /* 441 * Due to the fragile prefetch loop no 442 * separate function is used here. 443 */ 444 smp->sm_kpme_next = NULL; 445 smp->sm_kpme_prev = NULL; 446 smp->sm_kpme_page = NULL; 447 #endif 448 } 449 450 /* 451 * Allocate the per color indices that distribute allocation 452 * requests over the free lists. Each cpu will have a private 453 * rotor index to spread the allocations even across the available 454 * smap freelists. Init the scpu_last_smap field to the first 455 * smap element so there is no need to check for NULL. 456 */ 457 smd_cpu = 458 kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP); 459 for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) { 460 int j; 461 for (j = 0; j < smd_ncolor; j++) 462 scpu->scpu.scpu_free_ndx[j] = j; 463 scpu->scpu.scpu_last_smap = smd_smap; 464 } 465 466 if (vpm_enable) { 467 vpm_init(); 468 } 469 470 #ifdef DEBUG 471 /* 472 * Keep track of which colors are used more often. 473 */ 474 colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP); 475 #endif /* DEBUG */ 476 477 return (0); 478 } 479 480 static void 481 segmap_free(seg) 482 struct seg *seg; 483 { 484 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 485 } 486 487 /* 488 * Do a F_SOFTUNLOCK call over the range requested. 489 * The range must have already been F_SOFTLOCK'ed. 490 */ 491 static void 492 segmap_unlock( 493 struct hat *hat, 494 struct seg *seg, 495 caddr_t addr, 496 size_t len, 497 enum seg_rw rw, 498 struct smap *smp) 499 { 500 page_t *pp; 501 caddr_t adr; 502 u_offset_t off; 503 struct vnode *vp; 504 kmutex_t *smtx; 505 506 ASSERT(smp->sm_refcnt > 0); 507 508 #ifdef lint 509 seg = seg; 510 #endif 511 512 if (segmap_kpm && IS_KPM_ADDR(addr)) { 513 514 /* 515 * We're called only from segmap_fault and this was a 516 * NOP in case of a kpm based smap, so dangerous things 517 * must have happened in the meantime. Pages are prefaulted 518 * and locked in segmap_getmapflt and they will not be 519 * unlocked until segmap_release. 520 */ 521 panic("segmap_unlock: called with kpm addr %p", (void *)addr); 522 /*NOTREACHED*/ 523 } 524 525 vp = smp->sm_vp; 526 off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 527 528 hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE)); 529 for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) { 530 ushort_t bitmask; 531 532 /* 533 * Use page_find() instead of page_lookup() to 534 * find the page since we know that it has 535 * "shared" lock. 536 */ 537 pp = page_find(vp, off); 538 if (pp == NULL) { 539 panic("segmap_unlock: page not found"); 540 /*NOTREACHED*/ 541 } 542 543 if (rw == S_WRITE) { 544 hat_setrefmod(pp); 545 } else if (rw != S_OTHER) { 546 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 547 "segmap_fault:pp %p vp %p offset %llx", 548 pp, vp, off); 549 hat_setref(pp); 550 } 551 552 /* 553 * Clear bitmap, if the bit corresponding to "off" is set, 554 * since the page and translation are being unlocked. 555 */ 556 bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT); 557 558 /* 559 * Large Files: Following assertion is to verify 560 * the correctness of the cast to (int) above. 561 */ 562 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 563 smtx = SMAPMTX(smp); 564 mutex_enter(smtx); 565 if (smp->sm_bitmap & bitmask) { 566 smp->sm_bitmap &= ~bitmask; 567 } 568 mutex_exit(smtx); 569 570 page_unlock(pp); 571 } 572 } 573 574 #define MAXPPB (MAXBSIZE/4096) /* assumes minimum page size of 4k */ 575 576 /* 577 * This routine is called via a machine specific fault handling 578 * routine. It is also called by software routines wishing to 579 * lock or unlock a range of addresses. 580 * 581 * Note that this routine expects a page-aligned "addr". 582 */ 583 faultcode_t 584 segmap_fault( 585 struct hat *hat, 586 struct seg *seg, 587 caddr_t addr, 588 size_t len, 589 enum fault_type type, 590 enum seg_rw rw) 591 { 592 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 593 struct smap *smp; 594 page_t *pp, **ppp; 595 struct vnode *vp; 596 u_offset_t off; 597 page_t *pl[MAXPPB + 1]; 598 uint_t prot; 599 u_offset_t addroff; 600 caddr_t adr; 601 int err; 602 u_offset_t sm_off; 603 int hat_flag; 604 605 if (segmap_kpm && IS_KPM_ADDR(addr)) { 606 int newpage; 607 kmutex_t *smtx; 608 609 /* 610 * Pages are successfully prefaulted and locked in 611 * segmap_getmapflt and can't be unlocked until 612 * segmap_release. No hat mappings have to be locked 613 * and they also can't be unlocked as long as the 614 * caller owns an active kpm addr. 615 */ 616 #ifndef DEBUG 617 if (type != F_SOFTUNLOCK) 618 return (0); 619 #endif 620 621 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 622 panic("segmap_fault: smap not found " 623 "for addr %p", (void *)addr); 624 /*NOTREACHED*/ 625 } 626 627 smtx = SMAPMTX(smp); 628 #ifdef DEBUG 629 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 630 if (newpage) { 631 cmn_err(CE_WARN, "segmap_fault: newpage? smp %p", 632 (void *)smp); 633 } 634 635 if (type != F_SOFTUNLOCK) { 636 mutex_exit(smtx); 637 return (0); 638 } 639 #endif 640 mutex_exit(smtx); 641 vp = smp->sm_vp; 642 sm_off = smp->sm_off; 643 644 if (vp == NULL) 645 return (FC_MAKE_ERR(EIO)); 646 647 ASSERT(smp->sm_refcnt > 0); 648 649 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 650 if (addroff + len > MAXBSIZE) 651 panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk", 652 (void *)(addr + len)); 653 654 off = sm_off + addroff; 655 656 pp = page_find(vp, off); 657 658 if (pp == NULL) 659 panic("segmap_fault: softunlock page not found"); 660 661 /* 662 * Set ref bit also here in case of S_OTHER to avoid the 663 * overhead of supporting other cases than F_SOFTUNLOCK 664 * with segkpm. We can do this because the underlying 665 * pages are locked anyway. 666 */ 667 if (rw == S_WRITE) { 668 hat_setrefmod(pp); 669 } else { 670 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 671 "segmap_fault:pp %p vp %p offset %llx", 672 pp, vp, off); 673 hat_setref(pp); 674 } 675 676 return (0); 677 } 678 679 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 680 smp = GET_SMAP(seg, addr); 681 vp = smp->sm_vp; 682 sm_off = smp->sm_off; 683 684 if (vp == NULL) 685 return (FC_MAKE_ERR(EIO)); 686 687 ASSERT(smp->sm_refcnt > 0); 688 689 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 690 if (addroff + len > MAXBSIZE) { 691 panic("segmap_fault: endaddr %p " 692 "exceeds MAXBSIZE chunk", (void *)(addr + len)); 693 /*NOTREACHED*/ 694 } 695 off = sm_off + addroff; 696 697 /* 698 * First handle the easy stuff 699 */ 700 if (type == F_SOFTUNLOCK) { 701 segmap_unlock(hat, seg, addr, len, rw, smp); 702 return (0); 703 } 704 705 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 706 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 707 err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE, 708 seg, addr, rw, CRED(), NULL); 709 710 if (err) 711 return (FC_MAKE_ERR(err)); 712 713 prot &= smd->smd_prot; 714 715 /* 716 * Handle all pages returned in the pl[] array. 717 * This loop is coded on the assumption that if 718 * there was no error from the VOP_GETPAGE routine, 719 * that the page list returned will contain all the 720 * needed pages for the vp from [off..off + len]. 721 */ 722 ppp = pl; 723 while ((pp = *ppp++) != NULL) { 724 u_offset_t poff; 725 ASSERT(pp->p_vnode == vp); 726 hat_flag = HAT_LOAD; 727 728 /* 729 * Verify that the pages returned are within the range 730 * of this segmap region. Note that it is theoretically 731 * possible for pages outside this range to be returned, 732 * but it is not very likely. If we cannot use the 733 * page here, just release it and go on to the next one. 734 */ 735 if (pp->p_offset < sm_off || 736 pp->p_offset >= sm_off + MAXBSIZE) { 737 (void) page_release(pp, 1); 738 continue; 739 } 740 741 ASSERT(hat == kas.a_hat); 742 poff = pp->p_offset; 743 adr = addr + (poff - off); 744 if (adr >= addr && adr < addr + len) { 745 hat_setref(pp); 746 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 747 "segmap_fault:pp %p vp %p offset %llx", 748 pp, vp, poff); 749 if (type == F_SOFTLOCK) 750 hat_flag = HAT_LOAD_LOCK; 751 } 752 753 /* 754 * Deal with VMODSORT pages here. If we know this is a write 755 * do the setmod now and allow write protection. 756 * As long as it's modified or not S_OTHER, remove write 757 * protection. With S_OTHER it's up to the FS to deal with this. 758 */ 759 if (IS_VMODSORT(vp)) { 760 if (rw == S_WRITE) 761 hat_setmod(pp); 762 else if (rw != S_OTHER && !hat_ismod(pp)) 763 prot &= ~PROT_WRITE; 764 } 765 766 hat_memload(hat, adr, pp, prot, hat_flag); 767 if (hat_flag != HAT_LOAD_LOCK) 768 page_unlock(pp); 769 } 770 return (0); 771 } 772 773 /* 774 * This routine is used to start I/O on pages asynchronously. 775 */ 776 static faultcode_t 777 segmap_faulta(struct seg *seg, caddr_t addr) 778 { 779 struct smap *smp; 780 struct vnode *vp; 781 u_offset_t off; 782 int err; 783 784 if (segmap_kpm && IS_KPM_ADDR(addr)) { 785 int newpage; 786 kmutex_t *smtx; 787 788 /* 789 * Pages are successfully prefaulted and locked in 790 * segmap_getmapflt and can't be unlocked until 791 * segmap_release. No hat mappings have to be locked 792 * and they also can't be unlocked as long as the 793 * caller owns an active kpm addr. 794 */ 795 #ifdef DEBUG 796 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 797 panic("segmap_faulta: smap not found " 798 "for addr %p", (void *)addr); 799 /*NOTREACHED*/ 800 } 801 802 smtx = SMAPMTX(smp); 803 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 804 mutex_exit(smtx); 805 if (newpage) 806 cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p", 807 (void *)smp); 808 #endif 809 return (0); 810 } 811 812 segmapcnt.smp_faulta.value.ul++; 813 smp = GET_SMAP(seg, addr); 814 815 ASSERT(smp->sm_refcnt > 0); 816 817 vp = smp->sm_vp; 818 off = smp->sm_off; 819 820 if (vp == NULL) { 821 cmn_err(CE_WARN, "segmap_faulta - no vp"); 822 return (FC_MAKE_ERR(EIO)); 823 } 824 825 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 826 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 827 828 err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr 829 & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0, 830 seg, addr, S_READ, CRED(), NULL); 831 832 if (err) 833 return (FC_MAKE_ERR(err)); 834 return (0); 835 } 836 837 /*ARGSUSED*/ 838 static int 839 segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 840 { 841 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 842 843 ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); 844 845 /* 846 * Need not acquire the segment lock since 847 * "smd_prot" is a read-only field. 848 */ 849 return (((smd->smd_prot & prot) != prot) ? EACCES : 0); 850 } 851 852 static int 853 segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 854 { 855 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 856 size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; 857 858 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 859 860 if (pgno != 0) { 861 do 862 protv[--pgno] = smd->smd_prot; 863 while (pgno != 0); 864 } 865 return (0); 866 } 867 868 static u_offset_t 869 segmap_getoffset(struct seg *seg, caddr_t addr) 870 { 871 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 872 873 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 874 875 return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base)); 876 } 877 878 /*ARGSUSED*/ 879 static int 880 segmap_gettype(struct seg *seg, caddr_t addr) 881 { 882 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 883 884 return (MAP_SHARED); 885 } 886 887 /*ARGSUSED*/ 888 static int 889 segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 890 { 891 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 892 893 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 894 895 /* XXX - This doesn't make any sense */ 896 *vpp = smd->smd_sm->sm_vp; 897 return (0); 898 } 899 900 /* 901 * Check to see if it makes sense to do kluster/read ahead to 902 * addr + delta relative to the mapping at addr. We assume here 903 * that delta is a signed PAGESIZE'd multiple (which can be negative). 904 * 905 * For segmap we always "approve" of this action from our standpoint. 906 */ 907 /*ARGSUSED*/ 908 static int 909 segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta) 910 { 911 return (0); 912 } 913 914 static void 915 segmap_badop() 916 { 917 panic("segmap_badop"); 918 /*NOTREACHED*/ 919 } 920 921 /* 922 * Special private segmap operations 923 */ 924 925 /* 926 * Add smap to the appropriate free list. 927 */ 928 static void 929 segmap_smapadd(struct smap *smp) 930 { 931 struct smfree *sm; 932 struct smap *smpfreelist; 933 struct sm_freeq *releq; 934 935 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 936 937 if (smp->sm_refcnt != 0) { 938 panic("segmap_smapadd"); 939 /*NOTREACHED*/ 940 } 941 942 sm = &smd_free[smp->sm_free_ndx]; 943 /* 944 * Add to the tail of the release queue 945 * Note that sm_releq and sm_allocq could toggle 946 * before we get the lock. This does not affect 947 * correctness as the 2 queues are only maintained 948 * to reduce lock pressure. 949 */ 950 releq = sm->sm_releq; 951 if (releq == &sm->sm_freeq[0]) 952 smp->sm_flags |= SM_QNDX_ZERO; 953 else 954 smp->sm_flags &= ~SM_QNDX_ZERO; 955 mutex_enter(&releq->smq_mtx); 956 smpfreelist = releq->smq_free; 957 if (smpfreelist == 0) { 958 int want; 959 960 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 961 /* 962 * Both queue mutexes held to set sm_want; 963 * snapshot the value before dropping releq mutex. 964 * If sm_want appears after the releq mutex is dropped, 965 * then the smap just freed is already gone. 966 */ 967 want = sm->sm_want; 968 mutex_exit(&releq->smq_mtx); 969 /* 970 * See if there was a waiter before dropping the releq mutex 971 * then recheck after obtaining sm_freeq[0] mutex as 972 * the another thread may have already signaled. 973 */ 974 if (want) { 975 mutex_enter(&sm->sm_freeq[0].smq_mtx); 976 if (sm->sm_want) 977 cv_signal(&sm->sm_free_cv); 978 mutex_exit(&sm->sm_freeq[0].smq_mtx); 979 } 980 } else { 981 smp->sm_next = smpfreelist; 982 smp->sm_prev = smpfreelist->sm_prev; 983 smpfreelist->sm_prev = smp; 984 smp->sm_prev->sm_next = smp; 985 mutex_exit(&releq->smq_mtx); 986 } 987 } 988 989 990 static struct smap * 991 segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid) 992 { 993 struct smap **hpp; 994 struct smap *tmp; 995 kmutex_t *hmtx; 996 997 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 998 ASSERT(smp->sm_vp == NULL); 999 ASSERT(smp->sm_hash == NULL); 1000 ASSERT(smp->sm_prev == NULL); 1001 ASSERT(smp->sm_next == NULL); 1002 ASSERT(hashid >= 0 && hashid <= smd_hashmsk); 1003 1004 hmtx = SHASHMTX(hashid); 1005 1006 mutex_enter(hmtx); 1007 /* 1008 * First we need to verify that no one has created a smp 1009 * with (vp,off) as its tag before we us. 1010 */ 1011 for (tmp = smd_hash[hashid].sh_hash_list; 1012 tmp != NULL; tmp = tmp->sm_hash) 1013 if (tmp->sm_vp == vp && tmp->sm_off == off) 1014 break; 1015 1016 if (tmp == NULL) { 1017 /* 1018 * No one created one yet. 1019 * 1020 * Funniness here - we don't increment the ref count on the 1021 * vnode * even though we have another pointer to it here. 1022 * The reason for this is that we don't want the fact that 1023 * a seg_map entry somewhere refers to a vnode to prevent the 1024 * vnode * itself from going away. This is because this 1025 * reference to the vnode is a "soft one". In the case where 1026 * a mapping is being used by a rdwr [or directory routine?] 1027 * there already has to be a non-zero ref count on the vnode. 1028 * In the case where the vp has been freed and the the smap 1029 * structure is on the free list, there are no pages in memory 1030 * that can refer to the vnode. Thus even if we reuse the same 1031 * vnode/smap structure for a vnode which has the same 1032 * address but represents a different object, we are ok. 1033 */ 1034 smp->sm_vp = vp; 1035 smp->sm_off = off; 1036 1037 hpp = &smd_hash[hashid].sh_hash_list; 1038 smp->sm_hash = *hpp; 1039 *hpp = smp; 1040 #ifdef SEGMAP_HASHSTATS 1041 smd_hash_len[hashid]++; 1042 #endif 1043 } 1044 mutex_exit(hmtx); 1045 1046 return (tmp); 1047 } 1048 1049 static void 1050 segmap_hashout(struct smap *smp) 1051 { 1052 struct smap **hpp, *hp; 1053 struct vnode *vp; 1054 kmutex_t *mtx; 1055 int hashid; 1056 u_offset_t off; 1057 1058 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1059 1060 vp = smp->sm_vp; 1061 off = smp->sm_off; 1062 1063 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1064 mtx = SHASHMTX(hashid); 1065 mutex_enter(mtx); 1066 1067 hpp = &smd_hash[hashid].sh_hash_list; 1068 for (;;) { 1069 hp = *hpp; 1070 if (hp == NULL) { 1071 panic("segmap_hashout"); 1072 /*NOTREACHED*/ 1073 } 1074 if (hp == smp) 1075 break; 1076 hpp = &hp->sm_hash; 1077 } 1078 1079 *hpp = smp->sm_hash; 1080 smp->sm_hash = NULL; 1081 #ifdef SEGMAP_HASHSTATS 1082 smd_hash_len[hashid]--; 1083 #endif 1084 mutex_exit(mtx); 1085 1086 smp->sm_vp = NULL; 1087 smp->sm_off = (u_offset_t)0; 1088 1089 } 1090 1091 /* 1092 * Attempt to free unmodified, unmapped, and non locked segmap 1093 * pages. 1094 */ 1095 void 1096 segmap_pagefree(struct vnode *vp, u_offset_t off) 1097 { 1098 u_offset_t pgoff; 1099 page_t *pp; 1100 1101 for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) { 1102 1103 if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL) 1104 continue; 1105 1106 switch (page_release(pp, 1)) { 1107 case PGREL_NOTREL: 1108 segmapcnt.smp_free_notfree.value.ul++; 1109 break; 1110 case PGREL_MOD: 1111 segmapcnt.smp_free_dirty.value.ul++; 1112 break; 1113 case PGREL_CLEAN: 1114 segmapcnt.smp_free.value.ul++; 1115 break; 1116 } 1117 } 1118 } 1119 1120 /* 1121 * Locks held on entry: smap lock 1122 * Locks held on exit : smap lock. 1123 */ 1124 1125 static void 1126 grab_smp(struct smap *smp, page_t *pp) 1127 { 1128 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1129 ASSERT(smp->sm_refcnt == 0); 1130 1131 if (smp->sm_vp != (struct vnode *)NULL) { 1132 struct vnode *vp = smp->sm_vp; 1133 u_offset_t off = smp->sm_off; 1134 /* 1135 * Destroy old vnode association and 1136 * unload any hardware translations to 1137 * the old object. 1138 */ 1139 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++; 1140 segmap_hashout(smp); 1141 1142 /* 1143 * This node is off freelist and hashlist, 1144 * so there is no reason to drop/reacquire sm_mtx 1145 * across calls to hat_unload. 1146 */ 1147 if (segmap_kpm) { 1148 caddr_t vaddr; 1149 int hat_unload_needed = 0; 1150 1151 /* 1152 * unload kpm mapping 1153 */ 1154 if (pp != NULL) { 1155 vaddr = hat_kpm_page2va(pp, 1); 1156 hat_kpm_mapout(pp, GET_KPME(smp), vaddr); 1157 page_unlock(pp); 1158 } 1159 1160 /* 1161 * Check if we have (also) the rare case of a 1162 * non kpm mapping. 1163 */ 1164 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 1165 hat_unload_needed = 1; 1166 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1167 } 1168 1169 if (hat_unload_needed) { 1170 hat_unload(kas.a_hat, segkmap->s_base + 1171 ((smp - smd_smap) * MAXBSIZE), 1172 MAXBSIZE, HAT_UNLOAD); 1173 } 1174 1175 } else { 1176 ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED); 1177 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1178 hat_unload(kas.a_hat, segkmap->s_base + 1179 ((smp - smd_smap) * MAXBSIZE), 1180 MAXBSIZE, HAT_UNLOAD); 1181 } 1182 segmap_pagefree(vp, off); 1183 } 1184 } 1185 1186 static struct smap * 1187 get_free_smp(int free_ndx) 1188 { 1189 struct smfree *sm; 1190 kmutex_t *smtx; 1191 struct smap *smp, *first; 1192 struct sm_freeq *allocq, *releq; 1193 struct kpme *kpme; 1194 page_t *pp = NULL; 1195 int end_ndx, page_locked = 0; 1196 1197 end_ndx = free_ndx; 1198 sm = &smd_free[free_ndx]; 1199 1200 retry_queue: 1201 allocq = sm->sm_allocq; 1202 mutex_enter(&allocq->smq_mtx); 1203 1204 if ((smp = allocq->smq_free) == NULL) { 1205 1206 skip_queue: 1207 /* 1208 * The alloc list is empty or this queue is being skipped; 1209 * first see if the allocq toggled. 1210 */ 1211 if (sm->sm_allocq != allocq) { 1212 /* queue changed */ 1213 mutex_exit(&allocq->smq_mtx); 1214 goto retry_queue; 1215 } 1216 releq = sm->sm_releq; 1217 if (!mutex_tryenter(&releq->smq_mtx)) { 1218 /* cannot get releq; a free smp may be there now */ 1219 mutex_exit(&allocq->smq_mtx); 1220 1221 /* 1222 * This loop could spin forever if this thread has 1223 * higher priority than the thread that is holding 1224 * releq->smq_mtx. In order to force the other thread 1225 * to run, we'll lock/unlock the mutex which is safe 1226 * since we just unlocked the allocq mutex. 1227 */ 1228 mutex_enter(&releq->smq_mtx); 1229 mutex_exit(&releq->smq_mtx); 1230 goto retry_queue; 1231 } 1232 if (releq->smq_free == NULL) { 1233 /* 1234 * This freelist is empty. 1235 * This should not happen unless clients 1236 * are failing to release the segmap 1237 * window after accessing the data. 1238 * Before resorting to sleeping, try 1239 * the next list of the same color. 1240 */ 1241 free_ndx = (free_ndx + smd_ncolor) & smd_freemsk; 1242 if (free_ndx != end_ndx) { 1243 mutex_exit(&releq->smq_mtx); 1244 mutex_exit(&allocq->smq_mtx); 1245 sm = &smd_free[free_ndx]; 1246 goto retry_queue; 1247 } 1248 /* 1249 * Tried all freelists of the same color once, 1250 * wait on this list and hope something gets freed. 1251 */ 1252 segmapcnt.smp_get_nofree.value.ul++; 1253 sm->sm_want++; 1254 mutex_exit(&sm->sm_freeq[1].smq_mtx); 1255 cv_wait(&sm->sm_free_cv, 1256 &sm->sm_freeq[0].smq_mtx); 1257 sm->sm_want--; 1258 mutex_exit(&sm->sm_freeq[0].smq_mtx); 1259 sm = &smd_free[free_ndx]; 1260 goto retry_queue; 1261 } else { 1262 /* 1263 * Something on the rele queue; flip the alloc 1264 * and rele queues and retry. 1265 */ 1266 sm->sm_allocq = releq; 1267 sm->sm_releq = allocq; 1268 mutex_exit(&allocq->smq_mtx); 1269 mutex_exit(&releq->smq_mtx); 1270 if (page_locked) { 1271 delay(hz >> 2); 1272 page_locked = 0; 1273 } 1274 goto retry_queue; 1275 } 1276 } else { 1277 /* 1278 * Fastpath the case we get the smap mutex 1279 * on the first try. 1280 */ 1281 first = smp; 1282 next_smap: 1283 smtx = SMAPMTX(smp); 1284 if (!mutex_tryenter(smtx)) { 1285 /* 1286 * Another thread is trying to reclaim this slot. 1287 * Skip to the next queue or smap. 1288 */ 1289 if ((smp = smp->sm_next) == first) { 1290 goto skip_queue; 1291 } else { 1292 goto next_smap; 1293 } 1294 } else { 1295 /* 1296 * if kpme exists, get shared lock on the page 1297 */ 1298 if (segmap_kpm && smp->sm_vp != NULL) { 1299 1300 kpme = GET_KPME(smp); 1301 pp = kpme->kpe_page; 1302 1303 if (pp != NULL) { 1304 if (!page_trylock(pp, SE_SHARED)) { 1305 smp = smp->sm_next; 1306 mutex_exit(smtx); 1307 page_locked = 1; 1308 1309 pp = NULL; 1310 1311 if (smp == first) { 1312 goto skip_queue; 1313 } else { 1314 goto next_smap; 1315 } 1316 } else { 1317 if (kpme->kpe_page == NULL) { 1318 page_unlock(pp); 1319 pp = NULL; 1320 } 1321 } 1322 } 1323 } 1324 1325 /* 1326 * At this point, we've selected smp. Remove smp 1327 * from its freelist. If smp is the first one in 1328 * the freelist, update the head of the freelist. 1329 */ 1330 if (first == smp) { 1331 ASSERT(first == allocq->smq_free); 1332 allocq->smq_free = smp->sm_next; 1333 } 1334 1335 /* 1336 * if the head of the freelist still points to smp, 1337 * then there are no more free smaps in that list. 1338 */ 1339 if (allocq->smq_free == smp) 1340 /* 1341 * Took the last one 1342 */ 1343 allocq->smq_free = NULL; 1344 else { 1345 smp->sm_prev->sm_next = smp->sm_next; 1346 smp->sm_next->sm_prev = smp->sm_prev; 1347 } 1348 mutex_exit(&allocq->smq_mtx); 1349 smp->sm_prev = smp->sm_next = NULL; 1350 1351 /* 1352 * if pp != NULL, pp must have been locked; 1353 * grab_smp() unlocks pp. 1354 */ 1355 ASSERT((pp == NULL) || PAGE_LOCKED(pp)); 1356 grab_smp(smp, pp); 1357 /* return smp locked. */ 1358 ASSERT(SMAPMTX(smp) == smtx); 1359 ASSERT(MUTEX_HELD(smtx)); 1360 return (smp); 1361 } 1362 } 1363 } 1364 1365 /* 1366 * Special public segmap operations 1367 */ 1368 1369 /* 1370 * Create pages (without using VOP_GETPAGE) and load up translations to them. 1371 * If softlock is TRUE, then set things up so that it looks like a call 1372 * to segmap_fault with F_SOFTLOCK. 1373 * 1374 * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise. 1375 * 1376 * All fields in the generic segment (struct seg) are considered to be 1377 * read-only for "segmap" even though the kernel address space (kas) may 1378 * not be locked, hence no lock is needed to access them. 1379 */ 1380 int 1381 segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock) 1382 { 1383 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 1384 page_t *pp; 1385 u_offset_t off; 1386 struct smap *smp; 1387 struct vnode *vp; 1388 caddr_t eaddr; 1389 int newpage = 0; 1390 uint_t prot; 1391 kmutex_t *smtx; 1392 int hat_flag; 1393 1394 ASSERT(seg->s_as == &kas); 1395 1396 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1397 /* 1398 * Pages are successfully prefaulted and locked in 1399 * segmap_getmapflt and can't be unlocked until 1400 * segmap_release. The SM_KPM_NEWPAGE flag is set 1401 * in segmap_pagecreate_kpm when new pages are created. 1402 * and it is returned as "newpage" indication here. 1403 */ 1404 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1405 panic("segmap_pagecreate: smap not found " 1406 "for addr %p", (void *)addr); 1407 /*NOTREACHED*/ 1408 } 1409 1410 smtx = SMAPMTX(smp); 1411 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 1412 smp->sm_flags &= ~SM_KPM_NEWPAGE; 1413 mutex_exit(smtx); 1414 1415 return (newpage); 1416 } 1417 1418 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 1419 1420 eaddr = addr + len; 1421 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1422 1423 smp = GET_SMAP(seg, addr); 1424 1425 /* 1426 * We don't grab smp mutex here since we assume the smp 1427 * has a refcnt set already which prevents the slot from 1428 * changing its id. 1429 */ 1430 ASSERT(smp->sm_refcnt > 0); 1431 1432 vp = smp->sm_vp; 1433 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1434 prot = smd->smd_prot; 1435 1436 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1437 hat_flag = HAT_LOAD; 1438 pp = page_lookup(vp, off, SE_SHARED); 1439 if (pp == NULL) { 1440 ushort_t bitindex; 1441 1442 if ((pp = page_create_va(vp, off, 1443 PAGESIZE, PG_WAIT, seg, addr)) == NULL) { 1444 panic("segmap_pagecreate: page_create failed"); 1445 /*NOTREACHED*/ 1446 } 1447 newpage = 1; 1448 page_io_unlock(pp); 1449 1450 /* 1451 * Since pages created here do not contain valid 1452 * data until the caller writes into them, the 1453 * "exclusive" lock will not be dropped to prevent 1454 * other users from accessing the page. We also 1455 * have to lock the translation to prevent a fault 1456 * from occurring when the virtual address mapped by 1457 * this page is written into. This is necessary to 1458 * avoid a deadlock since we haven't dropped the 1459 * "exclusive" lock. 1460 */ 1461 bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT); 1462 1463 /* 1464 * Large Files: The following assertion is to 1465 * verify the cast above. 1466 */ 1467 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1468 smtx = SMAPMTX(smp); 1469 mutex_enter(smtx); 1470 smp->sm_bitmap |= SMAP_BIT_MASK(bitindex); 1471 mutex_exit(smtx); 1472 1473 hat_flag = HAT_LOAD_LOCK; 1474 } else if (softlock) { 1475 hat_flag = HAT_LOAD_LOCK; 1476 } 1477 1478 if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE)) 1479 hat_setmod(pp); 1480 1481 hat_memload(kas.a_hat, addr, pp, prot, hat_flag); 1482 1483 if (hat_flag != HAT_LOAD_LOCK) 1484 page_unlock(pp); 1485 1486 TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE, 1487 "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx", 1488 seg, addr, pp, vp, off); 1489 } 1490 1491 return (newpage); 1492 } 1493 1494 void 1495 segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) 1496 { 1497 struct smap *smp; 1498 ushort_t bitmask; 1499 page_t *pp; 1500 struct vnode *vp; 1501 u_offset_t off; 1502 caddr_t eaddr; 1503 kmutex_t *smtx; 1504 1505 ASSERT(seg->s_as == &kas); 1506 1507 eaddr = addr + len; 1508 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1509 1510 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1511 /* 1512 * Pages are successfully prefaulted and locked in 1513 * segmap_getmapflt and can't be unlocked until 1514 * segmap_release, so no pages or hat mappings have 1515 * to be unlocked at this point. 1516 */ 1517 #ifdef DEBUG 1518 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1519 panic("segmap_pageunlock: smap not found " 1520 "for addr %p", (void *)addr); 1521 /*NOTREACHED*/ 1522 } 1523 1524 ASSERT(smp->sm_refcnt > 0); 1525 mutex_exit(SMAPMTX(smp)); 1526 #endif 1527 return; 1528 } 1529 1530 smp = GET_SMAP(seg, addr); 1531 smtx = SMAPMTX(smp); 1532 1533 ASSERT(smp->sm_refcnt > 0); 1534 1535 vp = smp->sm_vp; 1536 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1537 1538 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1539 bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT); 1540 1541 /* 1542 * Large Files: Following assertion is to verify 1543 * the correctness of the cast to (int) above. 1544 */ 1545 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1546 1547 /* 1548 * If the bit corresponding to "off" is set, 1549 * clear this bit in the bitmap, unlock translations, 1550 * and release the "exclusive" lock on the page. 1551 */ 1552 if (smp->sm_bitmap & bitmask) { 1553 mutex_enter(smtx); 1554 smp->sm_bitmap &= ~bitmask; 1555 mutex_exit(smtx); 1556 1557 hat_unlock(kas.a_hat, addr, PAGESIZE); 1558 1559 /* 1560 * Use page_find() instead of page_lookup() to 1561 * find the page since we know that it has 1562 * "exclusive" lock. 1563 */ 1564 pp = page_find(vp, off); 1565 if (pp == NULL) { 1566 panic("segmap_pageunlock: page not found"); 1567 /*NOTREACHED*/ 1568 } 1569 if (rw == S_WRITE) { 1570 hat_setrefmod(pp); 1571 } else if (rw != S_OTHER) { 1572 hat_setref(pp); 1573 } 1574 1575 page_unlock(pp); 1576 } 1577 } 1578 } 1579 1580 caddr_t 1581 segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off) 1582 { 1583 return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER)); 1584 } 1585 1586 /* 1587 * This is the magic virtual address that offset 0 of an ELF 1588 * file gets mapped to in user space. This is used to pick 1589 * the vac color on the freelist. 1590 */ 1591 #define ELF_OFFZERO_VA (0x10000) 1592 /* 1593 * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp 1594 * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned. 1595 * The return address is always MAXBSIZE aligned. 1596 * 1597 * If forcefault is nonzero and the MMU translations haven't yet been created, 1598 * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them. 1599 */ 1600 caddr_t 1601 segmap_getmapflt( 1602 struct seg *seg, 1603 struct vnode *vp, 1604 u_offset_t off, 1605 size_t len, 1606 int forcefault, 1607 enum seg_rw rw) 1608 { 1609 struct smap *smp, *nsmp; 1610 extern struct vnode *common_specvp(); 1611 caddr_t baseaddr; /* MAXBSIZE aligned */ 1612 u_offset_t baseoff; 1613 int newslot; 1614 caddr_t vaddr; 1615 int color, hashid; 1616 kmutex_t *hashmtx, *smapmtx; 1617 struct smfree *sm; 1618 page_t *pp; 1619 struct kpme *kpme; 1620 uint_t prot; 1621 caddr_t base; 1622 page_t *pl[MAXPPB + 1]; 1623 int error; 1624 int is_kpm = 1; 1625 1626 ASSERT(seg->s_as == &kas); 1627 ASSERT(seg == segkmap); 1628 1629 baseoff = off & (offset_t)MAXBMASK; 1630 if (off + len > baseoff + MAXBSIZE) { 1631 panic("segmap_getmap bad len"); 1632 /*NOTREACHED*/ 1633 } 1634 1635 /* 1636 * If this is a block device we have to be sure to use the 1637 * "common" block device vnode for the mapping. 1638 */ 1639 if (vp->v_type == VBLK) 1640 vp = common_specvp(vp); 1641 1642 smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++; 1643 1644 if (segmap_kpm == 0 || 1645 (forcefault == SM_PAGECREATE && rw != S_WRITE)) { 1646 is_kpm = 0; 1647 } 1648 1649 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1650 hashmtx = SHASHMTX(hashid); 1651 1652 retry_hash: 1653 mutex_enter(hashmtx); 1654 for (smp = smd_hash[hashid].sh_hash_list; 1655 smp != NULL; smp = smp->sm_hash) 1656 if (smp->sm_vp == vp && smp->sm_off == baseoff) 1657 break; 1658 mutex_exit(hashmtx); 1659 1660 vrfy_smp: 1661 if (smp != NULL) { 1662 1663 ASSERT(vp->v_count != 0); 1664 1665 /* 1666 * Get smap lock and recheck its tag. The hash lock 1667 * is dropped since the hash is based on (vp, off) 1668 * and (vp, off) won't change when we have smap mtx. 1669 */ 1670 smapmtx = SMAPMTX(smp); 1671 mutex_enter(smapmtx); 1672 if (smp->sm_vp != vp || smp->sm_off != baseoff) { 1673 mutex_exit(smapmtx); 1674 goto retry_hash; 1675 } 1676 1677 if (smp->sm_refcnt == 0) { 1678 1679 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++; 1680 1681 /* 1682 * Could still be on the free list. However, this 1683 * could also be an smp that is transitioning from 1684 * the free list when we have too much contention 1685 * for the smapmtx's. In this case, we have an 1686 * unlocked smp that is not on the free list any 1687 * longer, but still has a 0 refcnt. The only way 1688 * to be sure is to check the freelist pointers. 1689 * Since we now have the smapmtx, we are guaranteed 1690 * that the (vp, off) won't change, so we are safe 1691 * to reclaim it. get_free_smp() knows that this 1692 * can happen, and it will check the refcnt. 1693 */ 1694 1695 if ((smp->sm_next != NULL)) { 1696 struct sm_freeq *freeq; 1697 1698 ASSERT(smp->sm_prev != NULL); 1699 sm = &smd_free[smp->sm_free_ndx]; 1700 1701 if (smp->sm_flags & SM_QNDX_ZERO) 1702 freeq = &sm->sm_freeq[0]; 1703 else 1704 freeq = &sm->sm_freeq[1]; 1705 1706 mutex_enter(&freeq->smq_mtx); 1707 if (freeq->smq_free != smp) { 1708 /* 1709 * fastpath normal case 1710 */ 1711 smp->sm_prev->sm_next = smp->sm_next; 1712 smp->sm_next->sm_prev = smp->sm_prev; 1713 } else if (smp == smp->sm_next) { 1714 /* 1715 * Taking the last smap on freelist 1716 */ 1717 freeq->smq_free = NULL; 1718 } else { 1719 /* 1720 * Reclaiming 1st smap on list 1721 */ 1722 freeq->smq_free = smp->sm_next; 1723 smp->sm_prev->sm_next = smp->sm_next; 1724 smp->sm_next->sm_prev = smp->sm_prev; 1725 } 1726 mutex_exit(&freeq->smq_mtx); 1727 smp->sm_prev = smp->sm_next = NULL; 1728 } else { 1729 ASSERT(smp->sm_prev == NULL); 1730 segmapcnt.smp_stolen.value.ul++; 1731 } 1732 1733 } else { 1734 segmapcnt.smp_get_use.value.ul++; 1735 } 1736 smp->sm_refcnt++; /* another user */ 1737 1738 /* 1739 * We don't invoke segmap_fault via TLB miss, so we set ref 1740 * and mod bits in advance. For S_OTHER we set them in 1741 * segmap_fault F_SOFTUNLOCK. 1742 */ 1743 if (is_kpm) { 1744 if (rw == S_WRITE) { 1745 smp->sm_flags |= SM_WRITE_DATA; 1746 } else if (rw == S_READ) { 1747 smp->sm_flags |= SM_READ_DATA; 1748 } 1749 } 1750 mutex_exit(smapmtx); 1751 1752 newslot = 0; 1753 } else { 1754 1755 uint32_t free_ndx, *free_ndxp; 1756 union segmap_cpu *scpu; 1757 1758 /* 1759 * On a PAC machine or a machine with anti-alias 1760 * hardware, smd_colormsk will be zero. 1761 * 1762 * On a VAC machine- pick color by offset in the file 1763 * so we won't get VAC conflicts on elf files. 1764 * On data files, color does not matter but we 1765 * don't know what kind of file it is so we always 1766 * pick color by offset. This causes color 1767 * corresponding to file offset zero to be used more 1768 * heavily. 1769 */ 1770 color = (baseoff >> MAXBSHIFT) & smd_colormsk; 1771 scpu = smd_cpu+CPU->cpu_seqid; 1772 free_ndxp = &scpu->scpu.scpu_free_ndx[color]; 1773 free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk; 1774 #ifdef DEBUG 1775 colors_used[free_ndx]++; 1776 #endif /* DEBUG */ 1777 1778 /* 1779 * Get a locked smp slot from the free list. 1780 */ 1781 smp = get_free_smp(free_ndx); 1782 smapmtx = SMAPMTX(smp); 1783 1784 ASSERT(smp->sm_vp == NULL); 1785 1786 if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) { 1787 /* 1788 * Failed to hashin, there exists one now. 1789 * Return the smp we just allocated. 1790 */ 1791 segmap_smapadd(smp); 1792 mutex_exit(smapmtx); 1793 1794 smp = nsmp; 1795 goto vrfy_smp; 1796 } 1797 smp->sm_refcnt++; /* another user */ 1798 1799 /* 1800 * We don't invoke segmap_fault via TLB miss, so we set ref 1801 * and mod bits in advance. For S_OTHER we set them in 1802 * segmap_fault F_SOFTUNLOCK. 1803 */ 1804 if (is_kpm) { 1805 if (rw == S_WRITE) { 1806 smp->sm_flags |= SM_WRITE_DATA; 1807 } else if (rw == S_READ) { 1808 smp->sm_flags |= SM_READ_DATA; 1809 } 1810 } 1811 mutex_exit(smapmtx); 1812 1813 newslot = 1; 1814 } 1815 1816 if (!is_kpm) 1817 goto use_segmap_range; 1818 1819 /* 1820 * Use segkpm 1821 */ 1822 /* Lint directive required until 6746211 is fixed */ 1823 /*CONSTCOND*/ 1824 ASSERT(PAGESIZE == MAXBSIZE); 1825 1826 /* 1827 * remember the last smp faulted on this cpu. 1828 */ 1829 (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp; 1830 1831 if (forcefault == SM_PAGECREATE) { 1832 baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw); 1833 return (baseaddr); 1834 } 1835 1836 if (newslot == 0 && 1837 (pp = GET_KPME(smp)->kpe_page) != NULL) { 1838 1839 /* fastpath */ 1840 switch (rw) { 1841 case S_READ: 1842 case S_WRITE: 1843 if (page_trylock(pp, SE_SHARED)) { 1844 if (PP_ISFREE(pp) || 1845 !(pp->p_vnode == vp && 1846 pp->p_offset == baseoff)) { 1847 page_unlock(pp); 1848 pp = page_lookup(vp, baseoff, 1849 SE_SHARED); 1850 } 1851 } else { 1852 pp = page_lookup(vp, baseoff, SE_SHARED); 1853 } 1854 1855 if (pp == NULL) { 1856 ASSERT(GET_KPME(smp)->kpe_page == NULL); 1857 break; 1858 } 1859 1860 if (rw == S_WRITE && 1861 hat_page_getattr(pp, P_MOD | P_REF) != 1862 (P_MOD | P_REF)) { 1863 page_unlock(pp); 1864 break; 1865 } 1866 1867 /* 1868 * We have the p_selock as reader, grab_smp 1869 * can't hit us, we have bumped the smap 1870 * refcnt and hat_pageunload needs the 1871 * p_selock exclusive. 1872 */ 1873 kpme = GET_KPME(smp); 1874 if (kpme->kpe_page == pp) { 1875 baseaddr = hat_kpm_page2va(pp, 0); 1876 } else if (kpme->kpe_page == NULL) { 1877 baseaddr = hat_kpm_mapin(pp, kpme); 1878 } else { 1879 panic("segmap_getmapflt: stale " 1880 "kpme page, kpme %p", (void *)kpme); 1881 /*NOTREACHED*/ 1882 } 1883 1884 /* 1885 * We don't invoke segmap_fault via TLB miss, 1886 * so we set ref and mod bits in advance. 1887 * For S_OTHER and we set them in segmap_fault 1888 * F_SOFTUNLOCK. 1889 */ 1890 if (rw == S_READ && !hat_isref(pp)) 1891 hat_setref(pp); 1892 1893 return (baseaddr); 1894 default: 1895 break; 1896 } 1897 } 1898 1899 base = segkpm_create_va(baseoff); 1900 error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE, 1901 seg, base, rw, CRED(), NULL); 1902 1903 pp = pl[0]; 1904 if (error || pp == NULL) { 1905 /* 1906 * Use segmap address slot and let segmap_fault deal 1907 * with the error cases. There is no error return 1908 * possible here. 1909 */ 1910 goto use_segmap_range; 1911 } 1912 1913 ASSERT(pl[1] == NULL); 1914 1915 /* 1916 * When prot is not returned w/ PROT_ALL the returned pages 1917 * are not backed by fs blocks. For most of the segmap users 1918 * this is no problem, they don't write to the pages in the 1919 * same request and therefore don't rely on a following 1920 * trap driven segmap_fault. With SM_LOCKPROTO users it 1921 * is more secure to use segkmap adresses to allow 1922 * protection segmap_fault's. 1923 */ 1924 if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) { 1925 /* 1926 * Use segmap address slot and let segmap_fault 1927 * do the error return. 1928 */ 1929 ASSERT(rw != S_WRITE); 1930 ASSERT(PAGE_LOCKED(pp)); 1931 page_unlock(pp); 1932 forcefault = 0; 1933 goto use_segmap_range; 1934 } 1935 1936 /* 1937 * We have the p_selock as reader, grab_smp can't hit us, we 1938 * have bumped the smap refcnt and hat_pageunload needs the 1939 * p_selock exclusive. 1940 */ 1941 kpme = GET_KPME(smp); 1942 if (kpme->kpe_page == pp) { 1943 baseaddr = hat_kpm_page2va(pp, 0); 1944 } else if (kpme->kpe_page == NULL) { 1945 baseaddr = hat_kpm_mapin(pp, kpme); 1946 } else { 1947 panic("segmap_getmapflt: stale kpme page after " 1948 "VOP_GETPAGE, kpme %p", (void *)kpme); 1949 /*NOTREACHED*/ 1950 } 1951 1952 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 1953 1954 return (baseaddr); 1955 1956 1957 use_segmap_range: 1958 baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE); 1959 TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP, 1960 "segmap_getmap:seg %p addr %p vp %p offset %llx", 1961 seg, baseaddr, vp, baseoff); 1962 1963 /* 1964 * Prefault the translations 1965 */ 1966 vaddr = baseaddr + (off - baseoff); 1967 if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) { 1968 1969 caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr & 1970 (uintptr_t)PAGEMASK); 1971 1972 (void) segmap_fault(kas.a_hat, seg, pgaddr, 1973 (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK, 1974 F_INVAL, rw); 1975 } 1976 1977 return (baseaddr); 1978 } 1979 1980 int 1981 segmap_release(struct seg *seg, caddr_t addr, uint_t flags) 1982 { 1983 struct smap *smp; 1984 int error; 1985 int bflags = 0; 1986 struct vnode *vp; 1987 u_offset_t offset; 1988 kmutex_t *smtx; 1989 int is_kpm = 0; 1990 page_t *pp; 1991 1992 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1993 1994 if (((uintptr_t)addr & MAXBOFFSET) != 0) { 1995 panic("segmap_release: addr %p not " 1996 "MAXBSIZE aligned", (void *)addr); 1997 /*NOTREACHED*/ 1998 } 1999 2000 if ((smp = get_smap_kpm(addr, &pp)) == NULL) { 2001 panic("segmap_release: smap not found " 2002 "for addr %p", (void *)addr); 2003 /*NOTREACHED*/ 2004 } 2005 2006 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 2007 "segmap_relmap:seg %p addr %p smp %p", 2008 seg, addr, smp); 2009 2010 smtx = SMAPMTX(smp); 2011 2012 /* 2013 * For compatibility reasons segmap_pagecreate_kpm sets this 2014 * flag to allow a following segmap_pagecreate to return 2015 * this as "newpage" flag. When segmap_pagecreate is not 2016 * called at all we clear it now. 2017 */ 2018 smp->sm_flags &= ~SM_KPM_NEWPAGE; 2019 is_kpm = 1; 2020 if (smp->sm_flags & SM_WRITE_DATA) { 2021 hat_setrefmod(pp); 2022 } else if (smp->sm_flags & SM_READ_DATA) { 2023 hat_setref(pp); 2024 } 2025 } else { 2026 if (addr < seg->s_base || addr >= seg->s_base + seg->s_size || 2027 ((uintptr_t)addr & MAXBOFFSET) != 0) { 2028 panic("segmap_release: bad addr %p", (void *)addr); 2029 /*NOTREACHED*/ 2030 } 2031 smp = GET_SMAP(seg, addr); 2032 2033 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 2034 "segmap_relmap:seg %p addr %p smp %p", 2035 seg, addr, smp); 2036 2037 smtx = SMAPMTX(smp); 2038 mutex_enter(smtx); 2039 smp->sm_flags |= SM_NOTKPM_RELEASED; 2040 } 2041 2042 ASSERT(smp->sm_refcnt > 0); 2043 2044 /* 2045 * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2046 * are set. 2047 */ 2048 if ((flags & ~SM_DONTNEED) != 0) { 2049 if (flags & SM_WRITE) 2050 segmapcnt.smp_rel_write.value.ul++; 2051 if (flags & SM_ASYNC) { 2052 bflags |= B_ASYNC; 2053 segmapcnt.smp_rel_async.value.ul++; 2054 } 2055 if (flags & SM_INVAL) { 2056 bflags |= B_INVAL; 2057 segmapcnt.smp_rel_abort.value.ul++; 2058 } 2059 if (flags & SM_DESTROY) { 2060 bflags |= (B_INVAL|B_TRUNC); 2061 segmapcnt.smp_rel_abort.value.ul++; 2062 } 2063 if (smp->sm_refcnt == 1) { 2064 /* 2065 * We only bother doing the FREE and DONTNEED flags 2066 * if no one else is still referencing this mapping. 2067 */ 2068 if (flags & SM_FREE) { 2069 bflags |= B_FREE; 2070 segmapcnt.smp_rel_free.value.ul++; 2071 } 2072 if (flags & SM_DONTNEED) { 2073 bflags |= B_DONTNEED; 2074 segmapcnt.smp_rel_dontneed.value.ul++; 2075 } 2076 } 2077 } else { 2078 smd_cpu[CPU->cpu_seqid].scpu.scpu_release++; 2079 } 2080 2081 vp = smp->sm_vp; 2082 offset = smp->sm_off; 2083 2084 if (--smp->sm_refcnt == 0) { 2085 2086 smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA); 2087 2088 if (flags & (SM_INVAL|SM_DESTROY)) { 2089 segmap_hashout(smp); /* remove map info */ 2090 if (is_kpm) { 2091 hat_kpm_mapout(pp, GET_KPME(smp), addr); 2092 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 2093 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2094 hat_unload(kas.a_hat, addr, MAXBSIZE, 2095 HAT_UNLOAD); 2096 } 2097 2098 } else { 2099 if (segmap_kpm) 2100 segkpm_mapout_validkpme(GET_KPME(smp)); 2101 2102 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2103 hat_unload(kas.a_hat, addr, MAXBSIZE, 2104 HAT_UNLOAD); 2105 } 2106 } 2107 segmap_smapadd(smp); /* add to free list */ 2108 } 2109 2110 mutex_exit(smtx); 2111 2112 if (is_kpm) 2113 page_unlock(pp); 2114 /* 2115 * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2116 * are set. 2117 */ 2118 if ((flags & ~SM_DONTNEED) != 0) { 2119 error = VOP_PUTPAGE(vp, offset, MAXBSIZE, 2120 bflags, CRED(), NULL); 2121 } else { 2122 error = 0; 2123 } 2124 2125 return (error); 2126 } 2127 2128 /* 2129 * Dump the pages belonging to this segmap segment. 2130 */ 2131 static void 2132 segmap_dump(struct seg *seg) 2133 { 2134 struct segmap_data *smd; 2135 struct smap *smp, *smp_end; 2136 page_t *pp; 2137 pfn_t pfn; 2138 u_offset_t off; 2139 caddr_t addr; 2140 2141 smd = (struct segmap_data *)seg->s_data; 2142 addr = seg->s_base; 2143 for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages; 2144 smp < smp_end; smp++) { 2145 2146 if (smp->sm_refcnt) { 2147 for (off = 0; off < MAXBSIZE; off += PAGESIZE) { 2148 int we_own_it = 0; 2149 2150 /* 2151 * If pp == NULL, the page either does 2152 * not exist or is exclusively locked. 2153 * So determine if it exists before 2154 * searching for it. 2155 */ 2156 if ((pp = page_lookup_nowait(smp->sm_vp, 2157 smp->sm_off + off, SE_SHARED))) 2158 we_own_it = 1; 2159 else 2160 pp = page_exists(smp->sm_vp, 2161 smp->sm_off + off); 2162 2163 if (pp) { 2164 pfn = page_pptonum(pp); 2165 dump_addpage(seg->s_as, 2166 addr + off, pfn); 2167 if (we_own_it) 2168 page_unlock(pp); 2169 } 2170 dump_timeleft = dump_timeout; 2171 } 2172 } 2173 addr += MAXBSIZE; 2174 } 2175 } 2176 2177 /*ARGSUSED*/ 2178 static int 2179 segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 2180 struct page ***ppp, enum lock_type type, enum seg_rw rw) 2181 { 2182 return (ENOTSUP); 2183 } 2184 2185 static int 2186 segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 2187 { 2188 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 2189 2190 memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp; 2191 memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base); 2192 return (0); 2193 } 2194 2195 /*ARGSUSED*/ 2196 static lgrp_mem_policy_info_t * 2197 segmap_getpolicy(struct seg *seg, caddr_t addr) 2198 { 2199 return (NULL); 2200 } 2201 2202 /*ARGSUSED*/ 2203 static int 2204 segmap_capable(struct seg *seg, segcapability_t capability) 2205 { 2206 return (0); 2207 } 2208 2209 2210 #ifdef SEGKPM_SUPPORT 2211 2212 /* 2213 * segkpm support routines 2214 */ 2215 2216 static caddr_t 2217 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2218 struct smap *smp, enum seg_rw rw) 2219 { 2220 caddr_t base; 2221 page_t *pp; 2222 int newpage = 0; 2223 struct kpme *kpme; 2224 2225 ASSERT(smp->sm_refcnt > 0); 2226 2227 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 2228 kmutex_t *smtx; 2229 2230 base = segkpm_create_va(off); 2231 2232 if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT, 2233 seg, base)) == NULL) { 2234 panic("segmap_pagecreate_kpm: " 2235 "page_create failed"); 2236 /*NOTREACHED*/ 2237 } 2238 2239 newpage = 1; 2240 page_io_unlock(pp); 2241 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 2242 2243 /* 2244 * Mark this here until the following segmap_pagecreate 2245 * or segmap_release. 2246 */ 2247 smtx = SMAPMTX(smp); 2248 mutex_enter(smtx); 2249 smp->sm_flags |= SM_KPM_NEWPAGE; 2250 mutex_exit(smtx); 2251 } 2252 2253 kpme = GET_KPME(smp); 2254 if (!newpage && kpme->kpe_page == pp) 2255 base = hat_kpm_page2va(pp, 0); 2256 else 2257 base = hat_kpm_mapin(pp, kpme); 2258 2259 /* 2260 * FS code may decide not to call segmap_pagecreate and we 2261 * don't invoke segmap_fault via TLB miss, so we have to set 2262 * ref and mod bits in advance. 2263 */ 2264 if (rw == S_WRITE) { 2265 hat_setrefmod(pp); 2266 } else { 2267 ASSERT(rw == S_READ); 2268 hat_setref(pp); 2269 } 2270 2271 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 2272 2273 return (base); 2274 } 2275 2276 /* 2277 * Find the smap structure corresponding to the 2278 * KPM addr and return it locked. 2279 */ 2280 struct smap * 2281 get_smap_kpm(caddr_t addr, page_t **ppp) 2282 { 2283 struct smap *smp; 2284 struct vnode *vp; 2285 u_offset_t offset; 2286 caddr_t baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK); 2287 int hashid; 2288 kmutex_t *hashmtx; 2289 page_t *pp; 2290 union segmap_cpu *scpu; 2291 2292 pp = hat_kpm_vaddr2page(baseaddr); 2293 2294 ASSERT(pp && !PP_ISFREE(pp)); 2295 ASSERT(PAGE_LOCKED(pp)); 2296 ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0); 2297 2298 vp = pp->p_vnode; 2299 offset = pp->p_offset; 2300 ASSERT(vp != NULL); 2301 2302 /* 2303 * Assume the last smap used on this cpu is the one needed. 2304 */ 2305 scpu = smd_cpu+CPU->cpu_seqid; 2306 smp = scpu->scpu.scpu_last_smap; 2307 mutex_enter(&smp->sm_mtx); 2308 if (smp->sm_vp == vp && smp->sm_off == offset) { 2309 ASSERT(smp->sm_refcnt > 0); 2310 } else { 2311 /* 2312 * Assumption wrong, find the smap on the hash chain. 2313 */ 2314 mutex_exit(&smp->sm_mtx); 2315 SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */ 2316 hashmtx = SHASHMTX(hashid); 2317 2318 mutex_enter(hashmtx); 2319 smp = smd_hash[hashid].sh_hash_list; 2320 for (; smp != NULL; smp = smp->sm_hash) { 2321 if (smp->sm_vp == vp && smp->sm_off == offset) 2322 break; 2323 } 2324 mutex_exit(hashmtx); 2325 if (smp) { 2326 mutex_enter(&smp->sm_mtx); 2327 ASSERT(smp->sm_vp == vp && smp->sm_off == offset); 2328 } 2329 } 2330 2331 if (ppp) 2332 *ppp = smp ? pp : NULL; 2333 2334 return (smp); 2335 } 2336 2337 #else /* SEGKPM_SUPPORT */ 2338 2339 /* segkpm stubs */ 2340 2341 /*ARGSUSED*/ 2342 static caddr_t 2343 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2344 struct smap *smp, enum seg_rw rw) 2345 { 2346 return (NULL); 2347 } 2348 2349 /*ARGSUSED*/ 2350 struct smap * 2351 get_smap_kpm(caddr_t addr, page_t **ppp) 2352 { 2353 return (NULL); 2354 } 2355 2356 #endif /* SEGKPM_SUPPORT */ 2357