1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * Portions of this source code were derived from Berkeley 4.3 BSD 31 * under license from the Regents of the University of California. 32 */ 33 34 /* 35 * VM - generic vnode mapping segment. 36 * 37 * The segmap driver is used only by the kernel to get faster (than seg_vn) 38 * mappings [lower routine overhead; more persistent cache] to random 39 * vnode/offsets. Note than the kernel may (and does) use seg_vn as well. 40 */ 41 42 #include <sys/types.h> 43 #include <sys/t_lock.h> 44 #include <sys/param.h> 45 #include <sys/sysmacros.h> 46 #include <sys/buf.h> 47 #include <sys/systm.h> 48 #include <sys/vnode.h> 49 #include <sys/mman.h> 50 #include <sys/errno.h> 51 #include <sys/cred.h> 52 #include <sys/kmem.h> 53 #include <sys/vtrace.h> 54 #include <sys/cmn_err.h> 55 #include <sys/debug.h> 56 #include <sys/thread.h> 57 #include <sys/dumphdr.h> 58 #include <sys/bitmap.h> 59 #include <sys/lgrp.h> 60 61 #include <vm/seg_kmem.h> 62 #include <vm/hat.h> 63 #include <vm/as.h> 64 #include <vm/seg.h> 65 #include <vm/seg_kpm.h> 66 #include <vm/seg_map.h> 67 #include <vm/page.h> 68 #include <vm/pvn.h> 69 #include <vm/rm.h> 70 71 /* 72 * Private seg op routines. 73 */ 74 static void segmap_free(struct seg *seg); 75 faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr, 76 size_t len, enum fault_type type, enum seg_rw rw); 77 static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr); 78 static int segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, 79 uint_t prot); 80 static int segmap_kluster(struct seg *seg, caddr_t addr, ssize_t); 81 static int segmap_getprot(struct seg *seg, caddr_t addr, size_t len, 82 uint_t *protv); 83 static u_offset_t segmap_getoffset(struct seg *seg, caddr_t addr); 84 static int segmap_gettype(struct seg *seg, caddr_t addr); 85 static int segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp); 86 static void segmap_dump(struct seg *seg); 87 static int segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 88 struct page ***ppp, enum lock_type type, 89 enum seg_rw rw); 90 static void segmap_badop(void); 91 static int segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp); 92 static lgrp_mem_policy_info_t *segmap_getpolicy(struct seg *seg, 93 caddr_t addr); 94 static int segmap_capable(struct seg *seg, segcapability_t capability); 95 96 /* segkpm support */ 97 static caddr_t segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t, 98 struct smap *, enum seg_rw); 99 struct smap *get_smap_kpm(caddr_t, page_t **); 100 101 #define SEGMAP_BADOP(t) (t(*)())segmap_badop 102 103 static struct seg_ops segmap_ops = { 104 SEGMAP_BADOP(int), /* dup */ 105 SEGMAP_BADOP(int), /* unmap */ 106 segmap_free, 107 segmap_fault, 108 segmap_faulta, 109 SEGMAP_BADOP(int), /* setprot */ 110 segmap_checkprot, 111 segmap_kluster, 112 SEGMAP_BADOP(size_t), /* swapout */ 113 SEGMAP_BADOP(int), /* sync */ 114 SEGMAP_BADOP(size_t), /* incore */ 115 SEGMAP_BADOP(int), /* lockop */ 116 segmap_getprot, 117 segmap_getoffset, 118 segmap_gettype, 119 segmap_getvp, 120 SEGMAP_BADOP(int), /* advise */ 121 segmap_dump, 122 segmap_pagelock, /* pagelock */ 123 SEGMAP_BADOP(int), /* setpgsz */ 124 segmap_getmemid, /* getmemid */ 125 segmap_getpolicy, /* getpolicy */ 126 segmap_capable, /* capable */ 127 seg_inherit_notsup /* inherit */ 128 }; 129 130 /* 131 * Private segmap routines. 132 */ 133 static void segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr, 134 size_t len, enum seg_rw rw, struct smap *smp); 135 static void segmap_smapadd(struct smap *smp); 136 static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp, 137 u_offset_t off, int hashid); 138 static void segmap_hashout(struct smap *smp); 139 140 141 /* 142 * Statistics for segmap operations. 143 * 144 * No explicit locking to protect these stats. 145 */ 146 struct segmapcnt segmapcnt = { 147 { "fault", KSTAT_DATA_ULONG }, 148 { "faulta", KSTAT_DATA_ULONG }, 149 { "getmap", KSTAT_DATA_ULONG }, 150 { "get_use", KSTAT_DATA_ULONG }, 151 { "get_reclaim", KSTAT_DATA_ULONG }, 152 { "get_reuse", KSTAT_DATA_ULONG }, 153 { "get_unused", KSTAT_DATA_ULONG }, 154 { "get_nofree", KSTAT_DATA_ULONG }, 155 { "rel_async", KSTAT_DATA_ULONG }, 156 { "rel_write", KSTAT_DATA_ULONG }, 157 { "rel_free", KSTAT_DATA_ULONG }, 158 { "rel_abort", KSTAT_DATA_ULONG }, 159 { "rel_dontneed", KSTAT_DATA_ULONG }, 160 { "release", KSTAT_DATA_ULONG }, 161 { "pagecreate", KSTAT_DATA_ULONG }, 162 { "free_notfree", KSTAT_DATA_ULONG }, 163 { "free_dirty", KSTAT_DATA_ULONG }, 164 { "free", KSTAT_DATA_ULONG }, 165 { "stolen", KSTAT_DATA_ULONG }, 166 { "get_nomtx", KSTAT_DATA_ULONG } 167 }; 168 169 kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt; 170 uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t); 171 172 /* 173 * Return number of map pages in segment. 174 */ 175 #define MAP_PAGES(seg) ((seg)->s_size >> MAXBSHIFT) 176 177 /* 178 * Translate addr into smap number within segment. 179 */ 180 #define MAP_PAGE(seg, addr) (((addr) - (seg)->s_base) >> MAXBSHIFT) 181 182 /* 183 * Translate addr in seg into struct smap pointer. 184 */ 185 #define GET_SMAP(seg, addr) \ 186 &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)]) 187 188 /* 189 * Bit in map (16 bit bitmap). 190 */ 191 #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf)) 192 193 static int smd_colormsk = 0; 194 static int smd_ncolor = 0; 195 static int smd_nfree = 0; 196 static int smd_freemsk = 0; 197 #ifdef DEBUG 198 static int *colors_used; 199 #endif 200 static struct smap *smd_smap; 201 static struct smaphash *smd_hash; 202 #ifdef SEGMAP_HASHSTATS 203 static unsigned int *smd_hash_len; 204 #endif 205 static struct smfree *smd_free; 206 static ulong_t smd_hashmsk = 0; 207 208 #define SEGMAP_MAXCOLOR 2 209 #define SEGMAP_CACHE_PAD 64 210 211 union segmap_cpu { 212 struct { 213 uint32_t scpu_free_ndx[SEGMAP_MAXCOLOR]; 214 struct smap *scpu_last_smap; 215 ulong_t scpu_getmap; 216 ulong_t scpu_release; 217 ulong_t scpu_get_reclaim; 218 ulong_t scpu_fault; 219 ulong_t scpu_pagecreate; 220 ulong_t scpu_get_reuse; 221 } scpu; 222 char scpu_pad[SEGMAP_CACHE_PAD]; 223 }; 224 static union segmap_cpu *smd_cpu; 225 226 /* 227 * There are three locks in seg_map: 228 * - per freelist mutexes 229 * - per hashchain mutexes 230 * - per smap mutexes 231 * 232 * The lock ordering is to get the smap mutex to lock down the slot 233 * first then the hash lock (for hash in/out (vp, off) list) or the 234 * freelist lock to put the slot back on the free list. 235 * 236 * The hash search is done by only holding the hashchain lock, when a wanted 237 * slot is found, we drop the hashchain lock then lock the slot so there 238 * is no overlapping of hashchain and smap locks. After the slot is 239 * locked, we verify again if the slot is still what we are looking 240 * for. 241 * 242 * Allocation of a free slot is done by holding the freelist lock, 243 * then locking the smap slot at the head of the freelist. This is 244 * in reversed lock order so mutex_tryenter() is used. 245 * 246 * The smap lock protects all fields in smap structure except for 247 * the link fields for hash/free lists which are protected by 248 * hashchain and freelist locks. 249 */ 250 251 #define SHASHMTX(hashid) (&smd_hash[hashid].sh_mtx) 252 253 #define SMP2SMF(smp) (&smd_free[(smp - smd_smap) & smd_freemsk]) 254 #define SMP2SMF_NDX(smp) (ushort_t)((smp - smd_smap) & smd_freemsk) 255 256 #define SMAPMTX(smp) (&smp->sm_mtx) 257 258 #define SMAP_HASHFUNC(vp, off, hashid) \ 259 { \ 260 hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \ 261 ((off) >> MAXBSHIFT)) & smd_hashmsk); \ 262 } 263 264 /* 265 * The most frequently updated kstat counters are kept in the 266 * per cpu array to avoid hot cache blocks. The update function 267 * sums the cpu local counters to update the global counters. 268 */ 269 270 /* ARGSUSED */ 271 int 272 segmap_kstat_update(kstat_t *ksp, int rw) 273 { 274 int i; 275 ulong_t getmap, release, get_reclaim; 276 ulong_t fault, pagecreate, get_reuse; 277 278 if (rw == KSTAT_WRITE) 279 return (EACCES); 280 getmap = release = get_reclaim = (ulong_t)0; 281 fault = pagecreate = get_reuse = (ulong_t)0; 282 for (i = 0; i < max_ncpus; i++) { 283 getmap += smd_cpu[i].scpu.scpu_getmap; 284 release += smd_cpu[i].scpu.scpu_release; 285 get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim; 286 fault += smd_cpu[i].scpu.scpu_fault; 287 pagecreate += smd_cpu[i].scpu.scpu_pagecreate; 288 get_reuse += smd_cpu[i].scpu.scpu_get_reuse; 289 } 290 segmapcnt.smp_getmap.value.ul = getmap; 291 segmapcnt.smp_release.value.ul = release; 292 segmapcnt.smp_get_reclaim.value.ul = get_reclaim; 293 segmapcnt.smp_fault.value.ul = fault; 294 segmapcnt.smp_pagecreate.value.ul = pagecreate; 295 segmapcnt.smp_get_reuse.value.ul = get_reuse; 296 return (0); 297 } 298 299 int 300 segmap_create(struct seg *seg, void *argsp) 301 { 302 struct segmap_data *smd; 303 struct smap *smp; 304 struct smfree *sm; 305 struct segmap_crargs *a = (struct segmap_crargs *)argsp; 306 struct smaphash *shashp; 307 union segmap_cpu *scpu; 308 long i, npages; 309 size_t hashsz; 310 uint_t nfreelist; 311 extern void prefetch_smap_w(void *); 312 extern int max_ncpus; 313 314 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 315 316 if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) { 317 panic("segkmap not MAXBSIZE aligned"); 318 /*NOTREACHED*/ 319 } 320 321 smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP); 322 323 seg->s_data = (void *)smd; 324 seg->s_ops = &segmap_ops; 325 smd->smd_prot = a->prot; 326 327 /* 328 * Scale the number of smap freelists to be 329 * proportional to max_ncpus * number of virtual colors. 330 * The caller can over-ride this scaling by providing 331 * a non-zero a->nfreelist argument. 332 */ 333 nfreelist = a->nfreelist; 334 if (nfreelist == 0) 335 nfreelist = max_ncpus; 336 else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) { 337 cmn_err(CE_WARN, "segmap_create: nfreelist out of range " 338 "%d, using %d", nfreelist, max_ncpus); 339 nfreelist = max_ncpus; 340 } 341 if (!ISP2(nfreelist)) { 342 /* round up nfreelist to the next power of two. */ 343 nfreelist = 1 << (highbit(nfreelist)); 344 } 345 346 /* 347 * Get the number of virtual colors - must be a power of 2. 348 */ 349 if (a->shmsize) 350 smd_ncolor = a->shmsize >> MAXBSHIFT; 351 else 352 smd_ncolor = 1; 353 ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0); 354 ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR); 355 smd_colormsk = smd_ncolor - 1; 356 smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist; 357 smd_freemsk = smd_nfree - 1; 358 359 /* 360 * Allocate and initialize the freelist headers. 361 * Note that sm_freeq[1] starts out as the release queue. This 362 * is known when the smap structures are initialized below. 363 */ 364 smd_free = smd->smd_free = 365 kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP); 366 for (i = 0; i < smd_nfree; i++) { 367 sm = &smd->smd_free[i]; 368 mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 369 mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 370 sm->sm_allocq = &sm->sm_freeq[0]; 371 sm->sm_releq = &sm->sm_freeq[1]; 372 } 373 374 /* 375 * Allocate and initialize the smap hash chain headers. 376 * Compute hash size rounding down to the next power of two. 377 */ 378 npages = MAP_PAGES(seg); 379 smd->smd_npages = npages; 380 hashsz = npages / SMAP_HASHAVELEN; 381 hashsz = 1 << (highbit(hashsz)-1); 382 smd_hashmsk = hashsz - 1; 383 smd_hash = smd->smd_hash = 384 kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP); 385 #ifdef SEGMAP_HASHSTATS 386 smd_hash_len = 387 kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP); 388 #endif 389 for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) { 390 shashp->sh_hash_list = NULL; 391 mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL); 392 } 393 394 /* 395 * Allocate and initialize the smap structures. 396 * Link all slots onto the appropriate freelist. 397 * The smap array is large enough to affect boot time 398 * on large systems, so use memory prefetching and only 399 * go through the array 1 time. Inline a optimized version 400 * of segmap_smapadd to add structures to freelists with 401 * knowledge that no locks are needed here. 402 */ 403 smd_smap = smd->smd_sm = 404 kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP); 405 406 for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1]; 407 smp >= smd->smd_sm; smp--) { 408 struct smap *smpfreelist; 409 struct sm_freeq *releq; 410 411 prefetch_smap_w((char *)smp); 412 413 smp->sm_vp = NULL; 414 smp->sm_hash = NULL; 415 smp->sm_off = 0; 416 smp->sm_bitmap = 0; 417 smp->sm_refcnt = 0; 418 mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL); 419 smp->sm_free_ndx = SMP2SMF_NDX(smp); 420 421 sm = SMP2SMF(smp); 422 releq = sm->sm_releq; 423 424 smpfreelist = releq->smq_free; 425 if (smpfreelist == 0) { 426 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 427 } else { 428 smp->sm_next = smpfreelist; 429 smp->sm_prev = smpfreelist->sm_prev; 430 smpfreelist->sm_prev = smp; 431 smp->sm_prev->sm_next = smp; 432 releq->smq_free = smp->sm_next; 433 } 434 435 /* 436 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1] 437 */ 438 smp->sm_flags = 0; 439 440 #ifdef SEGKPM_SUPPORT 441 /* 442 * Due to the fragile prefetch loop no 443 * separate function is used here. 444 */ 445 smp->sm_kpme_next = NULL; 446 smp->sm_kpme_prev = NULL; 447 smp->sm_kpme_page = NULL; 448 #endif 449 } 450 451 /* 452 * Allocate the per color indices that distribute allocation 453 * requests over the free lists. Each cpu will have a private 454 * rotor index to spread the allocations even across the available 455 * smap freelists. Init the scpu_last_smap field to the first 456 * smap element so there is no need to check for NULL. 457 */ 458 smd_cpu = 459 kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP); 460 for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) { 461 int j; 462 for (j = 0; j < smd_ncolor; j++) 463 scpu->scpu.scpu_free_ndx[j] = j; 464 scpu->scpu.scpu_last_smap = smd_smap; 465 } 466 467 vpm_init(); 468 469 #ifdef DEBUG 470 /* 471 * Keep track of which colors are used more often. 472 */ 473 colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP); 474 #endif /* DEBUG */ 475 476 return (0); 477 } 478 479 static void 480 segmap_free(seg) 481 struct seg *seg; 482 { 483 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 484 } 485 486 /* 487 * Do a F_SOFTUNLOCK call over the range requested. 488 * The range must have already been F_SOFTLOCK'ed. 489 */ 490 static void 491 segmap_unlock( 492 struct hat *hat, 493 struct seg *seg, 494 caddr_t addr, 495 size_t len, 496 enum seg_rw rw, 497 struct smap *smp) 498 { 499 page_t *pp; 500 caddr_t adr; 501 u_offset_t off; 502 struct vnode *vp; 503 kmutex_t *smtx; 504 505 ASSERT(smp->sm_refcnt > 0); 506 507 #ifdef lint 508 seg = seg; 509 #endif 510 511 if (segmap_kpm && IS_KPM_ADDR(addr)) { 512 513 /* 514 * We're called only from segmap_fault and this was a 515 * NOP in case of a kpm based smap, so dangerous things 516 * must have happened in the meantime. Pages are prefaulted 517 * and locked in segmap_getmapflt and they will not be 518 * unlocked until segmap_release. 519 */ 520 panic("segmap_unlock: called with kpm addr %p", (void *)addr); 521 /*NOTREACHED*/ 522 } 523 524 vp = smp->sm_vp; 525 off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 526 527 hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE)); 528 for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) { 529 ushort_t bitmask; 530 531 /* 532 * Use page_find() instead of page_lookup() to 533 * find the page since we know that it has 534 * "shared" lock. 535 */ 536 pp = page_find(vp, off); 537 if (pp == NULL) { 538 panic("segmap_unlock: page not found"); 539 /*NOTREACHED*/ 540 } 541 542 if (rw == S_WRITE) { 543 hat_setrefmod(pp); 544 } else if (rw != S_OTHER) { 545 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 546 "segmap_fault:pp %p vp %p offset %llx", pp, vp, off); 547 hat_setref(pp); 548 } 549 550 /* 551 * Clear bitmap, if the bit corresponding to "off" is set, 552 * since the page and translation are being unlocked. 553 */ 554 bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT); 555 556 /* 557 * Large Files: Following assertion is to verify 558 * the correctness of the cast to (int) above. 559 */ 560 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 561 smtx = SMAPMTX(smp); 562 mutex_enter(smtx); 563 if (smp->sm_bitmap & bitmask) { 564 smp->sm_bitmap &= ~bitmask; 565 } 566 mutex_exit(smtx); 567 568 page_unlock(pp); 569 } 570 } 571 572 #define MAXPPB (MAXBSIZE/4096) /* assumes minimum page size of 4k */ 573 574 /* 575 * This routine is called via a machine specific fault handling 576 * routine. It is also called by software routines wishing to 577 * lock or unlock a range of addresses. 578 * 579 * Note that this routine expects a page-aligned "addr". 580 */ 581 faultcode_t 582 segmap_fault( 583 struct hat *hat, 584 struct seg *seg, 585 caddr_t addr, 586 size_t len, 587 enum fault_type type, 588 enum seg_rw rw) 589 { 590 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 591 struct smap *smp; 592 page_t *pp, **ppp; 593 struct vnode *vp; 594 u_offset_t off; 595 page_t *pl[MAXPPB + 1]; 596 uint_t prot; 597 u_offset_t addroff; 598 caddr_t adr; 599 int err; 600 u_offset_t sm_off; 601 int hat_flag; 602 603 if (segmap_kpm && IS_KPM_ADDR(addr)) { 604 int newpage; 605 kmutex_t *smtx; 606 607 /* 608 * Pages are successfully prefaulted and locked in 609 * segmap_getmapflt and can't be unlocked until 610 * segmap_release. No hat mappings have to be locked 611 * and they also can't be unlocked as long as the 612 * caller owns an active kpm addr. 613 */ 614 #ifndef DEBUG 615 if (type != F_SOFTUNLOCK) 616 return (0); 617 #endif 618 619 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 620 panic("segmap_fault: smap not found " 621 "for addr %p", (void *)addr); 622 /*NOTREACHED*/ 623 } 624 625 smtx = SMAPMTX(smp); 626 #ifdef DEBUG 627 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 628 if (newpage) { 629 cmn_err(CE_WARN, "segmap_fault: newpage? smp %p", 630 (void *)smp); 631 } 632 633 if (type != F_SOFTUNLOCK) { 634 mutex_exit(smtx); 635 return (0); 636 } 637 #endif 638 mutex_exit(smtx); 639 vp = smp->sm_vp; 640 sm_off = smp->sm_off; 641 642 if (vp == NULL) 643 return (FC_MAKE_ERR(EIO)); 644 645 ASSERT(smp->sm_refcnt > 0); 646 647 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 648 if (addroff + len > MAXBSIZE) 649 panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk", 650 (void *)(addr + len)); 651 652 off = sm_off + addroff; 653 654 pp = page_find(vp, off); 655 656 if (pp == NULL) 657 panic("segmap_fault: softunlock page not found"); 658 659 /* 660 * Set ref bit also here in case of S_OTHER to avoid the 661 * overhead of supporting other cases than F_SOFTUNLOCK 662 * with segkpm. We can do this because the underlying 663 * pages are locked anyway. 664 */ 665 if (rw == S_WRITE) { 666 hat_setrefmod(pp); 667 } else { 668 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 669 "segmap_fault:pp %p vp %p offset %llx", 670 pp, vp, off); 671 hat_setref(pp); 672 } 673 674 return (0); 675 } 676 677 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 678 smp = GET_SMAP(seg, addr); 679 vp = smp->sm_vp; 680 sm_off = smp->sm_off; 681 682 if (vp == NULL) 683 return (FC_MAKE_ERR(EIO)); 684 685 ASSERT(smp->sm_refcnt > 0); 686 687 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 688 if (addroff + len > MAXBSIZE) { 689 panic("segmap_fault: endaddr %p " 690 "exceeds MAXBSIZE chunk", (void *)(addr + len)); 691 /*NOTREACHED*/ 692 } 693 off = sm_off + addroff; 694 695 /* 696 * First handle the easy stuff 697 */ 698 if (type == F_SOFTUNLOCK) { 699 segmap_unlock(hat, seg, addr, len, rw, smp); 700 return (0); 701 } 702 703 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 704 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 705 err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE, 706 seg, addr, rw, CRED(), NULL); 707 708 if (err) 709 return (FC_MAKE_ERR(err)); 710 711 prot &= smd->smd_prot; 712 713 /* 714 * Handle all pages returned in the pl[] array. 715 * This loop is coded on the assumption that if 716 * there was no error from the VOP_GETPAGE routine, 717 * that the page list returned will contain all the 718 * needed pages for the vp from [off..off + len]. 719 */ 720 ppp = pl; 721 while ((pp = *ppp++) != NULL) { 722 u_offset_t poff; 723 ASSERT(pp->p_vnode == vp); 724 hat_flag = HAT_LOAD; 725 726 /* 727 * Verify that the pages returned are within the range 728 * of this segmap region. Note that it is theoretically 729 * possible for pages outside this range to be returned, 730 * but it is not very likely. If we cannot use the 731 * page here, just release it and go on to the next one. 732 */ 733 if (pp->p_offset < sm_off || 734 pp->p_offset >= sm_off + MAXBSIZE) { 735 (void) page_release(pp, 1); 736 continue; 737 } 738 739 ASSERT(hat == kas.a_hat); 740 poff = pp->p_offset; 741 adr = addr + (poff - off); 742 if (adr >= addr && adr < addr + len) { 743 hat_setref(pp); 744 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 745 "segmap_fault:pp %p vp %p offset %llx", 746 pp, vp, poff); 747 if (type == F_SOFTLOCK) 748 hat_flag = HAT_LOAD_LOCK; 749 } 750 751 /* 752 * Deal with VMODSORT pages here. If we know this is a write 753 * do the setmod now and allow write protection. 754 * As long as it's modified or not S_OTHER, remove write 755 * protection. With S_OTHER it's up to the FS to deal with this. 756 */ 757 if (IS_VMODSORT(vp)) { 758 if (rw == S_WRITE) 759 hat_setmod(pp); 760 else if (rw != S_OTHER && !hat_ismod(pp)) 761 prot &= ~PROT_WRITE; 762 } 763 764 hat_memload(hat, adr, pp, prot, hat_flag); 765 if (hat_flag != HAT_LOAD_LOCK) 766 page_unlock(pp); 767 } 768 return (0); 769 } 770 771 /* 772 * This routine is used to start I/O on pages asynchronously. 773 */ 774 static faultcode_t 775 segmap_faulta(struct seg *seg, caddr_t addr) 776 { 777 struct smap *smp; 778 struct vnode *vp; 779 u_offset_t off; 780 int err; 781 782 if (segmap_kpm && IS_KPM_ADDR(addr)) { 783 int newpage; 784 kmutex_t *smtx; 785 786 /* 787 * Pages are successfully prefaulted and locked in 788 * segmap_getmapflt and can't be unlocked until 789 * segmap_release. No hat mappings have to be locked 790 * and they also can't be unlocked as long as the 791 * caller owns an active kpm addr. 792 */ 793 #ifdef DEBUG 794 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 795 panic("segmap_faulta: smap not found " 796 "for addr %p", (void *)addr); 797 /*NOTREACHED*/ 798 } 799 800 smtx = SMAPMTX(smp); 801 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 802 mutex_exit(smtx); 803 if (newpage) 804 cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p", 805 (void *)smp); 806 #endif 807 return (0); 808 } 809 810 segmapcnt.smp_faulta.value.ul++; 811 smp = GET_SMAP(seg, addr); 812 813 ASSERT(smp->sm_refcnt > 0); 814 815 vp = smp->sm_vp; 816 off = smp->sm_off; 817 818 if (vp == NULL) { 819 cmn_err(CE_WARN, "segmap_faulta - no vp"); 820 return (FC_MAKE_ERR(EIO)); 821 } 822 823 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 824 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 825 826 err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr 827 & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0, 828 seg, addr, S_READ, CRED(), NULL); 829 830 if (err) 831 return (FC_MAKE_ERR(err)); 832 return (0); 833 } 834 835 /*ARGSUSED*/ 836 static int 837 segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 838 { 839 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 840 841 ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); 842 843 /* 844 * Need not acquire the segment lock since 845 * "smd_prot" is a read-only field. 846 */ 847 return (((smd->smd_prot & prot) != prot) ? EACCES : 0); 848 } 849 850 static int 851 segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 852 { 853 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 854 size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; 855 856 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 857 858 if (pgno != 0) { 859 do { 860 protv[--pgno] = smd->smd_prot; 861 } while (pgno != 0); 862 } 863 return (0); 864 } 865 866 static u_offset_t 867 segmap_getoffset(struct seg *seg, caddr_t addr) 868 { 869 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 870 871 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 872 873 return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base)); 874 } 875 876 /*ARGSUSED*/ 877 static int 878 segmap_gettype(struct seg *seg, caddr_t addr) 879 { 880 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 881 882 return (MAP_SHARED); 883 } 884 885 /*ARGSUSED*/ 886 static int 887 segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 888 { 889 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 890 891 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 892 893 /* XXX - This doesn't make any sense */ 894 *vpp = smd->smd_sm->sm_vp; 895 return (0); 896 } 897 898 /* 899 * Check to see if it makes sense to do kluster/read ahead to 900 * addr + delta relative to the mapping at addr. We assume here 901 * that delta is a signed PAGESIZE'd multiple (which can be negative). 902 * 903 * For segmap we always "approve" of this action from our standpoint. 904 */ 905 /*ARGSUSED*/ 906 static int 907 segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta) 908 { 909 return (0); 910 } 911 912 static void 913 segmap_badop() 914 { 915 panic("segmap_badop"); 916 /*NOTREACHED*/ 917 } 918 919 /* 920 * Special private segmap operations 921 */ 922 923 /* 924 * Add smap to the appropriate free list. 925 */ 926 static void 927 segmap_smapadd(struct smap *smp) 928 { 929 struct smfree *sm; 930 struct smap *smpfreelist; 931 struct sm_freeq *releq; 932 933 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 934 935 if (smp->sm_refcnt != 0) { 936 panic("segmap_smapadd"); 937 /*NOTREACHED*/ 938 } 939 940 sm = &smd_free[smp->sm_free_ndx]; 941 /* 942 * Add to the tail of the release queue 943 * Note that sm_releq and sm_allocq could toggle 944 * before we get the lock. This does not affect 945 * correctness as the 2 queues are only maintained 946 * to reduce lock pressure. 947 */ 948 releq = sm->sm_releq; 949 if (releq == &sm->sm_freeq[0]) 950 smp->sm_flags |= SM_QNDX_ZERO; 951 else 952 smp->sm_flags &= ~SM_QNDX_ZERO; 953 mutex_enter(&releq->smq_mtx); 954 smpfreelist = releq->smq_free; 955 if (smpfreelist == 0) { 956 int want; 957 958 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 959 /* 960 * Both queue mutexes held to set sm_want; 961 * snapshot the value before dropping releq mutex. 962 * If sm_want appears after the releq mutex is dropped, 963 * then the smap just freed is already gone. 964 */ 965 want = sm->sm_want; 966 mutex_exit(&releq->smq_mtx); 967 /* 968 * See if there was a waiter before dropping the releq mutex 969 * then recheck after obtaining sm_freeq[0] mutex as 970 * the another thread may have already signaled. 971 */ 972 if (want) { 973 mutex_enter(&sm->sm_freeq[0].smq_mtx); 974 if (sm->sm_want) 975 cv_signal(&sm->sm_free_cv); 976 mutex_exit(&sm->sm_freeq[0].smq_mtx); 977 } 978 } else { 979 smp->sm_next = smpfreelist; 980 smp->sm_prev = smpfreelist->sm_prev; 981 smpfreelist->sm_prev = smp; 982 smp->sm_prev->sm_next = smp; 983 mutex_exit(&releq->smq_mtx); 984 } 985 } 986 987 988 static struct smap * 989 segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid) 990 { 991 struct smap **hpp; 992 struct smap *tmp; 993 kmutex_t *hmtx; 994 995 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 996 ASSERT(smp->sm_vp == NULL); 997 ASSERT(smp->sm_hash == NULL); 998 ASSERT(smp->sm_prev == NULL); 999 ASSERT(smp->sm_next == NULL); 1000 ASSERT(hashid >= 0 && hashid <= smd_hashmsk); 1001 1002 hmtx = SHASHMTX(hashid); 1003 1004 mutex_enter(hmtx); 1005 /* 1006 * First we need to verify that no one has created a smp 1007 * with (vp,off) as its tag before we us. 1008 */ 1009 for (tmp = smd_hash[hashid].sh_hash_list; 1010 tmp != NULL; tmp = tmp->sm_hash) 1011 if (tmp->sm_vp == vp && tmp->sm_off == off) 1012 break; 1013 1014 if (tmp == NULL) { 1015 /* 1016 * No one created one yet. 1017 * 1018 * Funniness here - we don't increment the ref count on the 1019 * vnode * even though we have another pointer to it here. 1020 * The reason for this is that we don't want the fact that 1021 * a seg_map entry somewhere refers to a vnode to prevent the 1022 * vnode * itself from going away. This is because this 1023 * reference to the vnode is a "soft one". In the case where 1024 * a mapping is being used by a rdwr [or directory routine?] 1025 * there already has to be a non-zero ref count on the vnode. 1026 * In the case where the vp has been freed and the the smap 1027 * structure is on the free list, there are no pages in memory 1028 * that can refer to the vnode. Thus even if we reuse the same 1029 * vnode/smap structure for a vnode which has the same 1030 * address but represents a different object, we are ok. 1031 */ 1032 smp->sm_vp = vp; 1033 smp->sm_off = off; 1034 1035 hpp = &smd_hash[hashid].sh_hash_list; 1036 smp->sm_hash = *hpp; 1037 *hpp = smp; 1038 #ifdef SEGMAP_HASHSTATS 1039 smd_hash_len[hashid]++; 1040 #endif 1041 } 1042 mutex_exit(hmtx); 1043 1044 return (tmp); 1045 } 1046 1047 static void 1048 segmap_hashout(struct smap *smp) 1049 { 1050 struct smap **hpp, *hp; 1051 struct vnode *vp; 1052 kmutex_t *mtx; 1053 int hashid; 1054 u_offset_t off; 1055 1056 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1057 1058 vp = smp->sm_vp; 1059 off = smp->sm_off; 1060 1061 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1062 mtx = SHASHMTX(hashid); 1063 mutex_enter(mtx); 1064 1065 hpp = &smd_hash[hashid].sh_hash_list; 1066 for (;;) { 1067 hp = *hpp; 1068 if (hp == NULL) { 1069 panic("segmap_hashout"); 1070 /*NOTREACHED*/ 1071 } 1072 if (hp == smp) 1073 break; 1074 hpp = &hp->sm_hash; 1075 } 1076 1077 *hpp = smp->sm_hash; 1078 smp->sm_hash = NULL; 1079 #ifdef SEGMAP_HASHSTATS 1080 smd_hash_len[hashid]--; 1081 #endif 1082 mutex_exit(mtx); 1083 1084 smp->sm_vp = NULL; 1085 smp->sm_off = (u_offset_t)0; 1086 1087 } 1088 1089 /* 1090 * Attempt to free unmodified, unmapped, and non locked segmap 1091 * pages. 1092 */ 1093 void 1094 segmap_pagefree(struct vnode *vp, u_offset_t off) 1095 { 1096 u_offset_t pgoff; 1097 page_t *pp; 1098 1099 for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) { 1100 1101 if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL) 1102 continue; 1103 1104 switch (page_release(pp, 1)) { 1105 case PGREL_NOTREL: 1106 segmapcnt.smp_free_notfree.value.ul++; 1107 break; 1108 case PGREL_MOD: 1109 segmapcnt.smp_free_dirty.value.ul++; 1110 break; 1111 case PGREL_CLEAN: 1112 segmapcnt.smp_free.value.ul++; 1113 break; 1114 } 1115 } 1116 } 1117 1118 /* 1119 * Locks held on entry: smap lock 1120 * Locks held on exit : smap lock. 1121 */ 1122 1123 static void 1124 grab_smp(struct smap *smp, page_t *pp) 1125 { 1126 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1127 ASSERT(smp->sm_refcnt == 0); 1128 1129 if (smp->sm_vp != (struct vnode *)NULL) { 1130 struct vnode *vp = smp->sm_vp; 1131 u_offset_t off = smp->sm_off; 1132 /* 1133 * Destroy old vnode association and 1134 * unload any hardware translations to 1135 * the old object. 1136 */ 1137 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++; 1138 segmap_hashout(smp); 1139 1140 /* 1141 * This node is off freelist and hashlist, 1142 * so there is no reason to drop/reacquire sm_mtx 1143 * across calls to hat_unload. 1144 */ 1145 if (segmap_kpm) { 1146 caddr_t vaddr; 1147 int hat_unload_needed = 0; 1148 1149 /* 1150 * unload kpm mapping 1151 */ 1152 if (pp != NULL) { 1153 vaddr = hat_kpm_page2va(pp, 1); 1154 hat_kpm_mapout(pp, GET_KPME(smp), vaddr); 1155 page_unlock(pp); 1156 } 1157 1158 /* 1159 * Check if we have (also) the rare case of a 1160 * non kpm mapping. 1161 */ 1162 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 1163 hat_unload_needed = 1; 1164 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1165 } 1166 1167 if (hat_unload_needed) { 1168 hat_unload(kas.a_hat, segkmap->s_base + 1169 ((smp - smd_smap) * MAXBSIZE), 1170 MAXBSIZE, HAT_UNLOAD); 1171 } 1172 1173 } else { 1174 ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED); 1175 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1176 hat_unload(kas.a_hat, segkmap->s_base + 1177 ((smp - smd_smap) * MAXBSIZE), 1178 MAXBSIZE, HAT_UNLOAD); 1179 } 1180 segmap_pagefree(vp, off); 1181 } 1182 } 1183 1184 static struct smap * 1185 get_free_smp(int free_ndx) 1186 { 1187 struct smfree *sm; 1188 kmutex_t *smtx; 1189 struct smap *smp, *first; 1190 struct sm_freeq *allocq, *releq; 1191 struct kpme *kpme; 1192 page_t *pp = NULL; 1193 int end_ndx, page_locked = 0; 1194 1195 end_ndx = free_ndx; 1196 sm = &smd_free[free_ndx]; 1197 1198 retry_queue: 1199 allocq = sm->sm_allocq; 1200 mutex_enter(&allocq->smq_mtx); 1201 1202 if ((smp = allocq->smq_free) == NULL) { 1203 1204 skip_queue: 1205 /* 1206 * The alloc list is empty or this queue is being skipped; 1207 * first see if the allocq toggled. 1208 */ 1209 if (sm->sm_allocq != allocq) { 1210 /* queue changed */ 1211 mutex_exit(&allocq->smq_mtx); 1212 goto retry_queue; 1213 } 1214 releq = sm->sm_releq; 1215 if (!mutex_tryenter(&releq->smq_mtx)) { 1216 /* cannot get releq; a free smp may be there now */ 1217 mutex_exit(&allocq->smq_mtx); 1218 1219 /* 1220 * This loop could spin forever if this thread has 1221 * higher priority than the thread that is holding 1222 * releq->smq_mtx. In order to force the other thread 1223 * to run, we'll lock/unlock the mutex which is safe 1224 * since we just unlocked the allocq mutex. 1225 */ 1226 mutex_enter(&releq->smq_mtx); 1227 mutex_exit(&releq->smq_mtx); 1228 goto retry_queue; 1229 } 1230 if (releq->smq_free == NULL) { 1231 /* 1232 * This freelist is empty. 1233 * This should not happen unless clients 1234 * are failing to release the segmap 1235 * window after accessing the data. 1236 * Before resorting to sleeping, try 1237 * the next list of the same color. 1238 */ 1239 free_ndx = (free_ndx + smd_ncolor) & smd_freemsk; 1240 if (free_ndx != end_ndx) { 1241 mutex_exit(&releq->smq_mtx); 1242 mutex_exit(&allocq->smq_mtx); 1243 sm = &smd_free[free_ndx]; 1244 goto retry_queue; 1245 } 1246 /* 1247 * Tried all freelists of the same color once, 1248 * wait on this list and hope something gets freed. 1249 */ 1250 segmapcnt.smp_get_nofree.value.ul++; 1251 sm->sm_want++; 1252 mutex_exit(&sm->sm_freeq[1].smq_mtx); 1253 cv_wait(&sm->sm_free_cv, 1254 &sm->sm_freeq[0].smq_mtx); 1255 sm->sm_want--; 1256 mutex_exit(&sm->sm_freeq[0].smq_mtx); 1257 sm = &smd_free[free_ndx]; 1258 goto retry_queue; 1259 } else { 1260 /* 1261 * Something on the rele queue; flip the alloc 1262 * and rele queues and retry. 1263 */ 1264 sm->sm_allocq = releq; 1265 sm->sm_releq = allocq; 1266 mutex_exit(&allocq->smq_mtx); 1267 mutex_exit(&releq->smq_mtx); 1268 if (page_locked) { 1269 delay(hz >> 2); 1270 page_locked = 0; 1271 } 1272 goto retry_queue; 1273 } 1274 } else { 1275 /* 1276 * Fastpath the case we get the smap mutex 1277 * on the first try. 1278 */ 1279 first = smp; 1280 next_smap: 1281 smtx = SMAPMTX(smp); 1282 if (!mutex_tryenter(smtx)) { 1283 /* 1284 * Another thread is trying to reclaim this slot. 1285 * Skip to the next queue or smap. 1286 */ 1287 if ((smp = smp->sm_next) == first) { 1288 goto skip_queue; 1289 } else { 1290 goto next_smap; 1291 } 1292 } else { 1293 /* 1294 * if kpme exists, get shared lock on the page 1295 */ 1296 if (segmap_kpm && smp->sm_vp != NULL) { 1297 1298 kpme = GET_KPME(smp); 1299 pp = kpme->kpe_page; 1300 1301 if (pp != NULL) { 1302 if (!page_trylock(pp, SE_SHARED)) { 1303 smp = smp->sm_next; 1304 mutex_exit(smtx); 1305 page_locked = 1; 1306 1307 pp = NULL; 1308 1309 if (smp == first) { 1310 goto skip_queue; 1311 } else { 1312 goto next_smap; 1313 } 1314 } else { 1315 if (kpme->kpe_page == NULL) { 1316 page_unlock(pp); 1317 pp = NULL; 1318 } 1319 } 1320 } 1321 } 1322 1323 /* 1324 * At this point, we've selected smp. Remove smp 1325 * from its freelist. If smp is the first one in 1326 * the freelist, update the head of the freelist. 1327 */ 1328 if (first == smp) { 1329 ASSERT(first == allocq->smq_free); 1330 allocq->smq_free = smp->sm_next; 1331 } 1332 1333 /* 1334 * if the head of the freelist still points to smp, 1335 * then there are no more free smaps in that list. 1336 */ 1337 if (allocq->smq_free == smp) 1338 /* 1339 * Took the last one 1340 */ 1341 allocq->smq_free = NULL; 1342 else { 1343 smp->sm_prev->sm_next = smp->sm_next; 1344 smp->sm_next->sm_prev = smp->sm_prev; 1345 } 1346 mutex_exit(&allocq->smq_mtx); 1347 smp->sm_prev = smp->sm_next = NULL; 1348 1349 /* 1350 * if pp != NULL, pp must have been locked; 1351 * grab_smp() unlocks pp. 1352 */ 1353 ASSERT((pp == NULL) || PAGE_LOCKED(pp)); 1354 grab_smp(smp, pp); 1355 /* return smp locked. */ 1356 ASSERT(SMAPMTX(smp) == smtx); 1357 ASSERT(MUTEX_HELD(smtx)); 1358 return (smp); 1359 } 1360 } 1361 } 1362 1363 /* 1364 * Special public segmap operations 1365 */ 1366 1367 /* 1368 * Create pages (without using VOP_GETPAGE) and load up translations to them. 1369 * If softlock is TRUE, then set things up so that it looks like a call 1370 * to segmap_fault with F_SOFTLOCK. 1371 * 1372 * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise. 1373 * 1374 * All fields in the generic segment (struct seg) are considered to be 1375 * read-only for "segmap" even though the kernel address space (kas) may 1376 * not be locked, hence no lock is needed to access them. 1377 */ 1378 int 1379 segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock) 1380 { 1381 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 1382 page_t *pp; 1383 u_offset_t off; 1384 struct smap *smp; 1385 struct vnode *vp; 1386 caddr_t eaddr; 1387 int newpage = 0; 1388 uint_t prot; 1389 kmutex_t *smtx; 1390 int hat_flag; 1391 1392 ASSERT(seg->s_as == &kas); 1393 1394 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1395 /* 1396 * Pages are successfully prefaulted and locked in 1397 * segmap_getmapflt and can't be unlocked until 1398 * segmap_release. The SM_KPM_NEWPAGE flag is set 1399 * in segmap_pagecreate_kpm when new pages are created. 1400 * and it is returned as "newpage" indication here. 1401 */ 1402 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1403 panic("segmap_pagecreate: smap not found " 1404 "for addr %p", (void *)addr); 1405 /*NOTREACHED*/ 1406 } 1407 1408 smtx = SMAPMTX(smp); 1409 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 1410 smp->sm_flags &= ~SM_KPM_NEWPAGE; 1411 mutex_exit(smtx); 1412 1413 return (newpage); 1414 } 1415 1416 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 1417 1418 eaddr = addr + len; 1419 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1420 1421 smp = GET_SMAP(seg, addr); 1422 1423 /* 1424 * We don't grab smp mutex here since we assume the smp 1425 * has a refcnt set already which prevents the slot from 1426 * changing its id. 1427 */ 1428 ASSERT(smp->sm_refcnt > 0); 1429 1430 vp = smp->sm_vp; 1431 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1432 prot = smd->smd_prot; 1433 1434 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1435 hat_flag = HAT_LOAD; 1436 pp = page_lookup(vp, off, SE_SHARED); 1437 if (pp == NULL) { 1438 ushort_t bitindex; 1439 1440 if ((pp = page_create_va(vp, off, 1441 PAGESIZE, PG_WAIT, seg, addr)) == NULL) { 1442 panic("segmap_pagecreate: page_create failed"); 1443 /*NOTREACHED*/ 1444 } 1445 newpage = 1; 1446 page_io_unlock(pp); 1447 1448 /* 1449 * Since pages created here do not contain valid 1450 * data until the caller writes into them, the 1451 * "exclusive" lock will not be dropped to prevent 1452 * other users from accessing the page. We also 1453 * have to lock the translation to prevent a fault 1454 * from occurring when the virtual address mapped by 1455 * this page is written into. This is necessary to 1456 * avoid a deadlock since we haven't dropped the 1457 * "exclusive" lock. 1458 */ 1459 bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT); 1460 1461 /* 1462 * Large Files: The following assertion is to 1463 * verify the cast above. 1464 */ 1465 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1466 smtx = SMAPMTX(smp); 1467 mutex_enter(smtx); 1468 smp->sm_bitmap |= SMAP_BIT_MASK(bitindex); 1469 mutex_exit(smtx); 1470 1471 hat_flag = HAT_LOAD_LOCK; 1472 } else if (softlock) { 1473 hat_flag = HAT_LOAD_LOCK; 1474 } 1475 1476 if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE)) 1477 hat_setmod(pp); 1478 1479 hat_memload(kas.a_hat, addr, pp, prot, hat_flag); 1480 1481 if (hat_flag != HAT_LOAD_LOCK) 1482 page_unlock(pp); 1483 1484 TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE, 1485 "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx", 1486 seg, addr, pp, vp, off); 1487 } 1488 1489 return (newpage); 1490 } 1491 1492 void 1493 segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) 1494 { 1495 struct smap *smp; 1496 ushort_t bitmask; 1497 page_t *pp; 1498 struct vnode *vp; 1499 u_offset_t off; 1500 caddr_t eaddr; 1501 kmutex_t *smtx; 1502 1503 ASSERT(seg->s_as == &kas); 1504 1505 eaddr = addr + len; 1506 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1507 1508 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1509 /* 1510 * Pages are successfully prefaulted and locked in 1511 * segmap_getmapflt and can't be unlocked until 1512 * segmap_release, so no pages or hat mappings have 1513 * to be unlocked at this point. 1514 */ 1515 #ifdef DEBUG 1516 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1517 panic("segmap_pageunlock: smap not found " 1518 "for addr %p", (void *)addr); 1519 /*NOTREACHED*/ 1520 } 1521 1522 ASSERT(smp->sm_refcnt > 0); 1523 mutex_exit(SMAPMTX(smp)); 1524 #endif 1525 return; 1526 } 1527 1528 smp = GET_SMAP(seg, addr); 1529 smtx = SMAPMTX(smp); 1530 1531 ASSERT(smp->sm_refcnt > 0); 1532 1533 vp = smp->sm_vp; 1534 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1535 1536 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1537 bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT); 1538 1539 /* 1540 * Large Files: Following assertion is to verify 1541 * the correctness of the cast to (int) above. 1542 */ 1543 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1544 1545 /* 1546 * If the bit corresponding to "off" is set, 1547 * clear this bit in the bitmap, unlock translations, 1548 * and release the "exclusive" lock on the page. 1549 */ 1550 if (smp->sm_bitmap & bitmask) { 1551 mutex_enter(smtx); 1552 smp->sm_bitmap &= ~bitmask; 1553 mutex_exit(smtx); 1554 1555 hat_unlock(kas.a_hat, addr, PAGESIZE); 1556 1557 /* 1558 * Use page_find() instead of page_lookup() to 1559 * find the page since we know that it has 1560 * "exclusive" lock. 1561 */ 1562 pp = page_find(vp, off); 1563 if (pp == NULL) { 1564 panic("segmap_pageunlock: page not found"); 1565 /*NOTREACHED*/ 1566 } 1567 if (rw == S_WRITE) { 1568 hat_setrefmod(pp); 1569 } else if (rw != S_OTHER) { 1570 hat_setref(pp); 1571 } 1572 1573 page_unlock(pp); 1574 } 1575 } 1576 } 1577 1578 caddr_t 1579 segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off) 1580 { 1581 return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER)); 1582 } 1583 1584 /* 1585 * This is the magic virtual address that offset 0 of an ELF 1586 * file gets mapped to in user space. This is used to pick 1587 * the vac color on the freelist. 1588 */ 1589 #define ELF_OFFZERO_VA (0x10000) 1590 /* 1591 * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp 1592 * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned. 1593 * The return address is always MAXBSIZE aligned. 1594 * 1595 * If forcefault is nonzero and the MMU translations haven't yet been created, 1596 * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them. 1597 */ 1598 caddr_t 1599 segmap_getmapflt( 1600 struct seg *seg, 1601 struct vnode *vp, 1602 u_offset_t off, 1603 size_t len, 1604 int forcefault, 1605 enum seg_rw rw) 1606 { 1607 struct smap *smp, *nsmp; 1608 extern struct vnode *common_specvp(); 1609 caddr_t baseaddr; /* MAXBSIZE aligned */ 1610 u_offset_t baseoff; 1611 int newslot; 1612 caddr_t vaddr; 1613 int color, hashid; 1614 kmutex_t *hashmtx, *smapmtx; 1615 struct smfree *sm; 1616 page_t *pp; 1617 struct kpme *kpme; 1618 uint_t prot; 1619 caddr_t base; 1620 page_t *pl[MAXPPB + 1]; 1621 int error; 1622 int is_kpm = 1; 1623 1624 ASSERT(seg->s_as == &kas); 1625 ASSERT(seg == segkmap); 1626 1627 baseoff = off & (offset_t)MAXBMASK; 1628 if (off + len > baseoff + MAXBSIZE) { 1629 panic("segmap_getmap bad len"); 1630 /*NOTREACHED*/ 1631 } 1632 1633 /* 1634 * If this is a block device we have to be sure to use the 1635 * "common" block device vnode for the mapping. 1636 */ 1637 if (vp->v_type == VBLK) 1638 vp = common_specvp(vp); 1639 1640 smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++; 1641 1642 if (segmap_kpm == 0 || 1643 (forcefault == SM_PAGECREATE && rw != S_WRITE)) { 1644 is_kpm = 0; 1645 } 1646 1647 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1648 hashmtx = SHASHMTX(hashid); 1649 1650 retry_hash: 1651 mutex_enter(hashmtx); 1652 for (smp = smd_hash[hashid].sh_hash_list; 1653 smp != NULL; smp = smp->sm_hash) 1654 if (smp->sm_vp == vp && smp->sm_off == baseoff) 1655 break; 1656 mutex_exit(hashmtx); 1657 1658 vrfy_smp: 1659 if (smp != NULL) { 1660 1661 ASSERT(vp->v_count != 0); 1662 1663 /* 1664 * Get smap lock and recheck its tag. The hash lock 1665 * is dropped since the hash is based on (vp, off) 1666 * and (vp, off) won't change when we have smap mtx. 1667 */ 1668 smapmtx = SMAPMTX(smp); 1669 mutex_enter(smapmtx); 1670 if (smp->sm_vp != vp || smp->sm_off != baseoff) { 1671 mutex_exit(smapmtx); 1672 goto retry_hash; 1673 } 1674 1675 if (smp->sm_refcnt == 0) { 1676 1677 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++; 1678 1679 /* 1680 * Could still be on the free list. However, this 1681 * could also be an smp that is transitioning from 1682 * the free list when we have too much contention 1683 * for the smapmtx's. In this case, we have an 1684 * unlocked smp that is not on the free list any 1685 * longer, but still has a 0 refcnt. The only way 1686 * to be sure is to check the freelist pointers. 1687 * Since we now have the smapmtx, we are guaranteed 1688 * that the (vp, off) won't change, so we are safe 1689 * to reclaim it. get_free_smp() knows that this 1690 * can happen, and it will check the refcnt. 1691 */ 1692 1693 if ((smp->sm_next != NULL)) { 1694 struct sm_freeq *freeq; 1695 1696 ASSERT(smp->sm_prev != NULL); 1697 sm = &smd_free[smp->sm_free_ndx]; 1698 1699 if (smp->sm_flags & SM_QNDX_ZERO) 1700 freeq = &sm->sm_freeq[0]; 1701 else 1702 freeq = &sm->sm_freeq[1]; 1703 1704 mutex_enter(&freeq->smq_mtx); 1705 if (freeq->smq_free != smp) { 1706 /* 1707 * fastpath normal case 1708 */ 1709 smp->sm_prev->sm_next = smp->sm_next; 1710 smp->sm_next->sm_prev = smp->sm_prev; 1711 } else if (smp == smp->sm_next) { 1712 /* 1713 * Taking the last smap on freelist 1714 */ 1715 freeq->smq_free = NULL; 1716 } else { 1717 /* 1718 * Reclaiming 1st smap on list 1719 */ 1720 freeq->smq_free = smp->sm_next; 1721 smp->sm_prev->sm_next = smp->sm_next; 1722 smp->sm_next->sm_prev = smp->sm_prev; 1723 } 1724 mutex_exit(&freeq->smq_mtx); 1725 smp->sm_prev = smp->sm_next = NULL; 1726 } else { 1727 ASSERT(smp->sm_prev == NULL); 1728 segmapcnt.smp_stolen.value.ul++; 1729 } 1730 1731 } else { 1732 segmapcnt.smp_get_use.value.ul++; 1733 } 1734 smp->sm_refcnt++; /* another user */ 1735 1736 /* 1737 * We don't invoke segmap_fault via TLB miss, so we set ref 1738 * and mod bits in advance. For S_OTHER we set them in 1739 * segmap_fault F_SOFTUNLOCK. 1740 */ 1741 if (is_kpm) { 1742 if (rw == S_WRITE) { 1743 smp->sm_flags |= SM_WRITE_DATA; 1744 } else if (rw == S_READ) { 1745 smp->sm_flags |= SM_READ_DATA; 1746 } 1747 } 1748 mutex_exit(smapmtx); 1749 1750 newslot = 0; 1751 } else { 1752 1753 uint32_t free_ndx, *free_ndxp; 1754 union segmap_cpu *scpu; 1755 1756 /* 1757 * On a PAC machine or a machine with anti-alias 1758 * hardware, smd_colormsk will be zero. 1759 * 1760 * On a VAC machine- pick color by offset in the file 1761 * so we won't get VAC conflicts on elf files. 1762 * On data files, color does not matter but we 1763 * don't know what kind of file it is so we always 1764 * pick color by offset. This causes color 1765 * corresponding to file offset zero to be used more 1766 * heavily. 1767 */ 1768 color = (baseoff >> MAXBSHIFT) & smd_colormsk; 1769 scpu = smd_cpu+CPU->cpu_seqid; 1770 free_ndxp = &scpu->scpu.scpu_free_ndx[color]; 1771 free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk; 1772 #ifdef DEBUG 1773 colors_used[free_ndx]++; 1774 #endif /* DEBUG */ 1775 1776 /* 1777 * Get a locked smp slot from the free list. 1778 */ 1779 smp = get_free_smp(free_ndx); 1780 smapmtx = SMAPMTX(smp); 1781 1782 ASSERT(smp->sm_vp == NULL); 1783 1784 if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) { 1785 /* 1786 * Failed to hashin, there exists one now. 1787 * Return the smp we just allocated. 1788 */ 1789 segmap_smapadd(smp); 1790 mutex_exit(smapmtx); 1791 1792 smp = nsmp; 1793 goto vrfy_smp; 1794 } 1795 smp->sm_refcnt++; /* another user */ 1796 1797 /* 1798 * We don't invoke segmap_fault via TLB miss, so we set ref 1799 * and mod bits in advance. For S_OTHER we set them in 1800 * segmap_fault F_SOFTUNLOCK. 1801 */ 1802 if (is_kpm) { 1803 if (rw == S_WRITE) { 1804 smp->sm_flags |= SM_WRITE_DATA; 1805 } else if (rw == S_READ) { 1806 smp->sm_flags |= SM_READ_DATA; 1807 } 1808 } 1809 mutex_exit(smapmtx); 1810 1811 newslot = 1; 1812 } 1813 1814 if (!is_kpm) 1815 goto use_segmap_range; 1816 1817 /* 1818 * Use segkpm 1819 */ 1820 /* Lint directive required until 6746211 is fixed */ 1821 /*CONSTCOND*/ 1822 ASSERT(PAGESIZE == MAXBSIZE); 1823 1824 /* 1825 * remember the last smp faulted on this cpu. 1826 */ 1827 (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp; 1828 1829 if (forcefault == SM_PAGECREATE) { 1830 baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw); 1831 return (baseaddr); 1832 } 1833 1834 if (newslot == 0 && 1835 (pp = GET_KPME(smp)->kpe_page) != NULL) { 1836 1837 /* fastpath */ 1838 switch (rw) { 1839 case S_READ: 1840 case S_WRITE: 1841 if (page_trylock(pp, SE_SHARED)) { 1842 if (PP_ISFREE(pp) || 1843 !(pp->p_vnode == vp && 1844 pp->p_offset == baseoff)) { 1845 page_unlock(pp); 1846 pp = page_lookup(vp, baseoff, 1847 SE_SHARED); 1848 } 1849 } else { 1850 pp = page_lookup(vp, baseoff, SE_SHARED); 1851 } 1852 1853 if (pp == NULL) { 1854 ASSERT(GET_KPME(smp)->kpe_page == NULL); 1855 break; 1856 } 1857 1858 if (rw == S_WRITE && 1859 hat_page_getattr(pp, P_MOD | P_REF) != 1860 (P_MOD | P_REF)) { 1861 page_unlock(pp); 1862 break; 1863 } 1864 1865 /* 1866 * We have the p_selock as reader, grab_smp 1867 * can't hit us, we have bumped the smap 1868 * refcnt and hat_pageunload needs the 1869 * p_selock exclusive. 1870 */ 1871 kpme = GET_KPME(smp); 1872 if (kpme->kpe_page == pp) { 1873 baseaddr = hat_kpm_page2va(pp, 0); 1874 } else if (kpme->kpe_page == NULL) { 1875 baseaddr = hat_kpm_mapin(pp, kpme); 1876 } else { 1877 panic("segmap_getmapflt: stale " 1878 "kpme page, kpme %p", (void *)kpme); 1879 /*NOTREACHED*/ 1880 } 1881 1882 /* 1883 * We don't invoke segmap_fault via TLB miss, 1884 * so we set ref and mod bits in advance. 1885 * For S_OTHER and we set them in segmap_fault 1886 * F_SOFTUNLOCK. 1887 */ 1888 if (rw == S_READ && !hat_isref(pp)) 1889 hat_setref(pp); 1890 1891 return (baseaddr); 1892 default: 1893 break; 1894 } 1895 } 1896 1897 base = segkpm_create_va(baseoff); 1898 error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE, 1899 seg, base, rw, CRED(), NULL); 1900 1901 pp = pl[0]; 1902 if (error || pp == NULL) { 1903 /* 1904 * Use segmap address slot and let segmap_fault deal 1905 * with the error cases. There is no error return 1906 * possible here. 1907 */ 1908 goto use_segmap_range; 1909 } 1910 1911 ASSERT(pl[1] == NULL); 1912 1913 /* 1914 * When prot is not returned w/ PROT_ALL the returned pages 1915 * are not backed by fs blocks. For most of the segmap users 1916 * this is no problem, they don't write to the pages in the 1917 * same request and therefore don't rely on a following 1918 * trap driven segmap_fault. With SM_LOCKPROTO users it 1919 * is more secure to use segkmap adresses to allow 1920 * protection segmap_fault's. 1921 */ 1922 if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) { 1923 /* 1924 * Use segmap address slot and let segmap_fault 1925 * do the error return. 1926 */ 1927 ASSERT(rw != S_WRITE); 1928 ASSERT(PAGE_LOCKED(pp)); 1929 page_unlock(pp); 1930 forcefault = 0; 1931 goto use_segmap_range; 1932 } 1933 1934 /* 1935 * We have the p_selock as reader, grab_smp can't hit us, we 1936 * have bumped the smap refcnt and hat_pageunload needs the 1937 * p_selock exclusive. 1938 */ 1939 kpme = GET_KPME(smp); 1940 if (kpme->kpe_page == pp) { 1941 baseaddr = hat_kpm_page2va(pp, 0); 1942 } else if (kpme->kpe_page == NULL) { 1943 baseaddr = hat_kpm_mapin(pp, kpme); 1944 } else { 1945 panic("segmap_getmapflt: stale kpme page after " 1946 "VOP_GETPAGE, kpme %p", (void *)kpme); 1947 /*NOTREACHED*/ 1948 } 1949 1950 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 1951 1952 return (baseaddr); 1953 1954 1955 use_segmap_range: 1956 baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE); 1957 TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP, 1958 "segmap_getmap:seg %p addr %p vp %p offset %llx", 1959 seg, baseaddr, vp, baseoff); 1960 1961 /* 1962 * Prefault the translations 1963 */ 1964 vaddr = baseaddr + (off - baseoff); 1965 if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) { 1966 1967 caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr & 1968 (uintptr_t)PAGEMASK); 1969 1970 (void) segmap_fault(kas.a_hat, seg, pgaddr, 1971 (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK, 1972 F_INVAL, rw); 1973 } 1974 1975 return (baseaddr); 1976 } 1977 1978 int 1979 segmap_release(struct seg *seg, caddr_t addr, uint_t flags) 1980 { 1981 struct smap *smp; 1982 int error; 1983 int bflags = 0; 1984 struct vnode *vp; 1985 u_offset_t offset; 1986 kmutex_t *smtx; 1987 int is_kpm = 0; 1988 page_t *pp; 1989 1990 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1991 1992 if (((uintptr_t)addr & MAXBOFFSET) != 0) { 1993 panic("segmap_release: addr %p not " 1994 "MAXBSIZE aligned", (void *)addr); 1995 /*NOTREACHED*/ 1996 } 1997 1998 if ((smp = get_smap_kpm(addr, &pp)) == NULL) { 1999 panic("segmap_release: smap not found " 2000 "for addr %p", (void *)addr); 2001 /*NOTREACHED*/ 2002 } 2003 2004 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 2005 "segmap_relmap:seg %p addr %p smp %p", 2006 seg, addr, smp); 2007 2008 smtx = SMAPMTX(smp); 2009 2010 /* 2011 * For compatibility reasons segmap_pagecreate_kpm sets this 2012 * flag to allow a following segmap_pagecreate to return 2013 * this as "newpage" flag. When segmap_pagecreate is not 2014 * called at all we clear it now. 2015 */ 2016 smp->sm_flags &= ~SM_KPM_NEWPAGE; 2017 is_kpm = 1; 2018 if (smp->sm_flags & SM_WRITE_DATA) { 2019 hat_setrefmod(pp); 2020 } else if (smp->sm_flags & SM_READ_DATA) { 2021 hat_setref(pp); 2022 } 2023 } else { 2024 if (addr < seg->s_base || addr >= seg->s_base + seg->s_size || 2025 ((uintptr_t)addr & MAXBOFFSET) != 0) { 2026 panic("segmap_release: bad addr %p", (void *)addr); 2027 /*NOTREACHED*/ 2028 } 2029 smp = GET_SMAP(seg, addr); 2030 2031 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 2032 "segmap_relmap:seg %p addr %p smp %p", 2033 seg, addr, smp); 2034 2035 smtx = SMAPMTX(smp); 2036 mutex_enter(smtx); 2037 smp->sm_flags |= SM_NOTKPM_RELEASED; 2038 } 2039 2040 ASSERT(smp->sm_refcnt > 0); 2041 2042 /* 2043 * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2044 * are set. 2045 */ 2046 if ((flags & ~SM_DONTNEED) != 0) { 2047 if (flags & SM_WRITE) 2048 segmapcnt.smp_rel_write.value.ul++; 2049 if (flags & SM_ASYNC) { 2050 bflags |= B_ASYNC; 2051 segmapcnt.smp_rel_async.value.ul++; 2052 } 2053 if (flags & SM_INVAL) { 2054 bflags |= B_INVAL; 2055 segmapcnt.smp_rel_abort.value.ul++; 2056 } 2057 if (flags & SM_DESTROY) { 2058 bflags |= (B_INVAL|B_TRUNC); 2059 segmapcnt.smp_rel_abort.value.ul++; 2060 } 2061 if (smp->sm_refcnt == 1) { 2062 /* 2063 * We only bother doing the FREE and DONTNEED flags 2064 * if no one else is still referencing this mapping. 2065 */ 2066 if (flags & SM_FREE) { 2067 bflags |= B_FREE; 2068 segmapcnt.smp_rel_free.value.ul++; 2069 } 2070 if (flags & SM_DONTNEED) { 2071 bflags |= B_DONTNEED; 2072 segmapcnt.smp_rel_dontneed.value.ul++; 2073 } 2074 } 2075 } else { 2076 smd_cpu[CPU->cpu_seqid].scpu.scpu_release++; 2077 } 2078 2079 vp = smp->sm_vp; 2080 offset = smp->sm_off; 2081 2082 if (--smp->sm_refcnt == 0) { 2083 2084 smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA); 2085 2086 if (flags & (SM_INVAL|SM_DESTROY)) { 2087 segmap_hashout(smp); /* remove map info */ 2088 if (is_kpm) { 2089 hat_kpm_mapout(pp, GET_KPME(smp), addr); 2090 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 2091 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2092 hat_unload(kas.a_hat, segkmap->s_base + 2093 ((smp - smd_smap) * MAXBSIZE), 2094 MAXBSIZE, HAT_UNLOAD); 2095 } 2096 2097 } else { 2098 if (segmap_kpm) 2099 segkpm_mapout_validkpme(GET_KPME(smp)); 2100 2101 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2102 hat_unload(kas.a_hat, addr, MAXBSIZE, 2103 HAT_UNLOAD); 2104 } 2105 } 2106 segmap_smapadd(smp); /* add to free list */ 2107 } 2108 2109 mutex_exit(smtx); 2110 2111 if (is_kpm) 2112 page_unlock(pp); 2113 /* 2114 * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2115 * are set. 2116 */ 2117 if ((flags & ~SM_DONTNEED) != 0) { 2118 error = VOP_PUTPAGE(vp, offset, MAXBSIZE, 2119 bflags, CRED(), NULL); 2120 } else { 2121 error = 0; 2122 } 2123 2124 return (error); 2125 } 2126 2127 /* 2128 * Dump the pages belonging to this segmap segment. 2129 */ 2130 static void 2131 segmap_dump(struct seg *seg) 2132 { 2133 struct segmap_data *smd; 2134 struct smap *smp, *smp_end; 2135 page_t *pp; 2136 pfn_t pfn; 2137 u_offset_t off; 2138 caddr_t addr; 2139 2140 smd = (struct segmap_data *)seg->s_data; 2141 addr = seg->s_base; 2142 for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages; 2143 smp < smp_end; smp++) { 2144 2145 if (smp->sm_refcnt) { 2146 for (off = 0; off < MAXBSIZE; off += PAGESIZE) { 2147 int we_own_it = 0; 2148 2149 /* 2150 * If pp == NULL, the page either does 2151 * not exist or is exclusively locked. 2152 * So determine if it exists before 2153 * searching for it. 2154 */ 2155 if ((pp = page_lookup_nowait(smp->sm_vp, 2156 smp->sm_off + off, SE_SHARED))) 2157 we_own_it = 1; 2158 else 2159 pp = page_exists(smp->sm_vp, 2160 smp->sm_off + off); 2161 2162 if (pp) { 2163 pfn = page_pptonum(pp); 2164 dump_addpage(seg->s_as, 2165 addr + off, pfn); 2166 if (we_own_it) 2167 page_unlock(pp); 2168 } 2169 dump_timeleft = dump_timeout; 2170 } 2171 } 2172 addr += MAXBSIZE; 2173 } 2174 } 2175 2176 /*ARGSUSED*/ 2177 static int 2178 segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 2179 struct page ***ppp, enum lock_type type, enum seg_rw rw) 2180 { 2181 return (ENOTSUP); 2182 } 2183 2184 static int 2185 segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 2186 { 2187 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 2188 2189 memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp; 2190 memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base); 2191 return (0); 2192 } 2193 2194 /*ARGSUSED*/ 2195 static lgrp_mem_policy_info_t * 2196 segmap_getpolicy(struct seg *seg, caddr_t addr) 2197 { 2198 return (NULL); 2199 } 2200 2201 /*ARGSUSED*/ 2202 static int 2203 segmap_capable(struct seg *seg, segcapability_t capability) 2204 { 2205 return (0); 2206 } 2207 2208 2209 #ifdef SEGKPM_SUPPORT 2210 2211 /* 2212 * segkpm support routines 2213 */ 2214 2215 static caddr_t 2216 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2217 struct smap *smp, enum seg_rw rw) 2218 { 2219 caddr_t base; 2220 page_t *pp; 2221 int newpage = 0; 2222 struct kpme *kpme; 2223 2224 ASSERT(smp->sm_refcnt > 0); 2225 2226 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 2227 kmutex_t *smtx; 2228 2229 base = segkpm_create_va(off); 2230 2231 if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT, 2232 seg, base)) == NULL) { 2233 panic("segmap_pagecreate_kpm: " 2234 "page_create failed"); 2235 /*NOTREACHED*/ 2236 } 2237 2238 newpage = 1; 2239 page_io_unlock(pp); 2240 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 2241 2242 /* 2243 * Mark this here until the following segmap_pagecreate 2244 * or segmap_release. 2245 */ 2246 smtx = SMAPMTX(smp); 2247 mutex_enter(smtx); 2248 smp->sm_flags |= SM_KPM_NEWPAGE; 2249 mutex_exit(smtx); 2250 } 2251 2252 kpme = GET_KPME(smp); 2253 if (!newpage && kpme->kpe_page == pp) 2254 base = hat_kpm_page2va(pp, 0); 2255 else 2256 base = hat_kpm_mapin(pp, kpme); 2257 2258 /* 2259 * FS code may decide not to call segmap_pagecreate and we 2260 * don't invoke segmap_fault via TLB miss, so we have to set 2261 * ref and mod bits in advance. 2262 */ 2263 if (rw == S_WRITE) { 2264 hat_setrefmod(pp); 2265 } else { 2266 ASSERT(rw == S_READ); 2267 hat_setref(pp); 2268 } 2269 2270 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 2271 2272 return (base); 2273 } 2274 2275 /* 2276 * Find the smap structure corresponding to the 2277 * KPM addr and return it locked. 2278 */ 2279 struct smap * 2280 get_smap_kpm(caddr_t addr, page_t **ppp) 2281 { 2282 struct smap *smp; 2283 struct vnode *vp; 2284 u_offset_t offset; 2285 caddr_t baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK); 2286 int hashid; 2287 kmutex_t *hashmtx; 2288 page_t *pp; 2289 union segmap_cpu *scpu; 2290 2291 pp = hat_kpm_vaddr2page(baseaddr); 2292 2293 ASSERT(pp && !PP_ISFREE(pp)); 2294 ASSERT(PAGE_LOCKED(pp)); 2295 ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0); 2296 2297 vp = pp->p_vnode; 2298 offset = pp->p_offset; 2299 ASSERT(vp != NULL); 2300 2301 /* 2302 * Assume the last smap used on this cpu is the one needed. 2303 */ 2304 scpu = smd_cpu+CPU->cpu_seqid; 2305 smp = scpu->scpu.scpu_last_smap; 2306 mutex_enter(&smp->sm_mtx); 2307 if (smp->sm_vp == vp && smp->sm_off == offset) { 2308 ASSERT(smp->sm_refcnt > 0); 2309 } else { 2310 /* 2311 * Assumption wrong, find the smap on the hash chain. 2312 */ 2313 mutex_exit(&smp->sm_mtx); 2314 SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */ 2315 hashmtx = SHASHMTX(hashid); 2316 2317 mutex_enter(hashmtx); 2318 smp = smd_hash[hashid].sh_hash_list; 2319 for (; smp != NULL; smp = smp->sm_hash) { 2320 if (smp->sm_vp == vp && smp->sm_off == offset) 2321 break; 2322 } 2323 mutex_exit(hashmtx); 2324 if (smp) { 2325 mutex_enter(&smp->sm_mtx); 2326 ASSERT(smp->sm_vp == vp && smp->sm_off == offset); 2327 } 2328 } 2329 2330 if (ppp) 2331 *ppp = smp ? pp : NULL; 2332 2333 return (smp); 2334 } 2335 2336 #else /* SEGKPM_SUPPORT */ 2337 2338 /* segkpm stubs */ 2339 2340 /*ARGSUSED*/ 2341 static caddr_t 2342 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2343 struct smap *smp, enum seg_rw rw) 2344 { 2345 return (NULL); 2346 } 2347 2348 /*ARGSUSED*/ 2349 struct smap * 2350 get_smap_kpm(caddr_t addr, page_t **ppp) 2351 { 2352 return (NULL); 2353 } 2354 2355 #endif /* SEGKPM_SUPPORT */ 2356