1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * Portions of this source code were derived from Berkeley 4.3 BSD 32 * under license from the Regents of the University of California. 33 */ 34 35 #pragma ident "%Z%%M% %I% %E% SMI" 36 37 /* 38 * VM - generic vnode mapping segment. 39 * 40 * The segmap driver is used only by the kernel to get faster (than seg_vn) 41 * mappings [lower routine overhead; more persistent cache] to random 42 * vnode/offsets. Note than the kernel may (and does) use seg_vn as well. 43 */ 44 45 #include <sys/types.h> 46 #include <sys/t_lock.h> 47 #include <sys/param.h> 48 #include <sys/sysmacros.h> 49 #include <sys/buf.h> 50 #include <sys/systm.h> 51 #include <sys/vnode.h> 52 #include <sys/mman.h> 53 #include <sys/errno.h> 54 #include <sys/cred.h> 55 #include <sys/kmem.h> 56 #include <sys/vtrace.h> 57 #include <sys/cmn_err.h> 58 #include <sys/debug.h> 59 #include <sys/thread.h> 60 #include <sys/dumphdr.h> 61 #include <sys/bitmap.h> 62 #include <sys/lgrp.h> 63 64 #include <vm/seg_kmem.h> 65 #include <vm/hat.h> 66 #include <vm/as.h> 67 #include <vm/seg.h> 68 #include <vm/seg_kpm.h> 69 #include <vm/seg_map.h> 70 #include <vm/page.h> 71 #include <vm/pvn.h> 72 #include <vm/rm.h> 73 74 /* 75 * Private seg op routines. 76 */ 77 static void segmap_free(struct seg *seg); 78 faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr, 79 size_t len, enum fault_type type, enum seg_rw rw); 80 static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr); 81 static int segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, 82 uint_t prot); 83 static int segmap_kluster(struct seg *seg, caddr_t addr, ssize_t); 84 static int segmap_getprot(struct seg *seg, caddr_t addr, size_t len, 85 uint_t *protv); 86 static u_offset_t segmap_getoffset(struct seg *seg, caddr_t addr); 87 static int segmap_gettype(struct seg *seg, caddr_t addr); 88 static int segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp); 89 static void segmap_dump(struct seg *seg); 90 static int segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 91 struct page ***ppp, enum lock_type type, 92 enum seg_rw rw); 93 static void segmap_badop(void); 94 static int segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp); 95 static lgrp_mem_policy_info_t *segmap_getpolicy(struct seg *seg, 96 caddr_t addr); 97 static int segmap_capable(struct seg *seg, segcapability_t capability); 98 99 /* segkpm support */ 100 static caddr_t segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t, 101 struct smap *, enum seg_rw); 102 struct smap *get_smap_kpm(caddr_t, page_t **); 103 104 #define SEGMAP_BADOP(t) (t(*)())segmap_badop 105 106 static struct seg_ops segmap_ops = { 107 SEGMAP_BADOP(int), /* dup */ 108 SEGMAP_BADOP(int), /* unmap */ 109 segmap_free, 110 segmap_fault, 111 segmap_faulta, 112 SEGMAP_BADOP(int), /* setprot */ 113 segmap_checkprot, 114 segmap_kluster, 115 SEGMAP_BADOP(size_t), /* swapout */ 116 SEGMAP_BADOP(int), /* sync */ 117 SEGMAP_BADOP(size_t), /* incore */ 118 SEGMAP_BADOP(int), /* lockop */ 119 segmap_getprot, 120 segmap_getoffset, 121 segmap_gettype, 122 segmap_getvp, 123 SEGMAP_BADOP(int), /* advise */ 124 segmap_dump, 125 segmap_pagelock, /* pagelock */ 126 SEGMAP_BADOP(int), /* setpgsz */ 127 segmap_getmemid, /* getmemid */ 128 segmap_getpolicy, /* getpolicy */ 129 segmap_capable, /* capable */ 130 }; 131 132 /* 133 * Private segmap routines. 134 */ 135 static void segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr, 136 size_t len, enum seg_rw rw, struct smap *smp); 137 static void segmap_smapadd(struct smap *smp); 138 static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp, 139 u_offset_t off, int hashid); 140 static void segmap_hashout(struct smap *smp); 141 142 143 /* 144 * Statistics for segmap operations. 145 * 146 * No explicit locking to protect these stats. 147 */ 148 struct segmapcnt segmapcnt = { 149 { "fault", KSTAT_DATA_ULONG }, 150 { "faulta", KSTAT_DATA_ULONG }, 151 { "getmap", KSTAT_DATA_ULONG }, 152 { "get_use", KSTAT_DATA_ULONG }, 153 { "get_reclaim", KSTAT_DATA_ULONG }, 154 { "get_reuse", KSTAT_DATA_ULONG }, 155 { "get_unused", KSTAT_DATA_ULONG }, 156 { "get_nofree", KSTAT_DATA_ULONG }, 157 { "rel_async", KSTAT_DATA_ULONG }, 158 { "rel_write", KSTAT_DATA_ULONG }, 159 { "rel_free", KSTAT_DATA_ULONG }, 160 { "rel_abort", KSTAT_DATA_ULONG }, 161 { "rel_dontneed", KSTAT_DATA_ULONG }, 162 { "release", KSTAT_DATA_ULONG }, 163 { "pagecreate", KSTAT_DATA_ULONG }, 164 { "free_notfree", KSTAT_DATA_ULONG }, 165 { "free_dirty", KSTAT_DATA_ULONG }, 166 { "free", KSTAT_DATA_ULONG }, 167 { "stolen", KSTAT_DATA_ULONG }, 168 { "get_nomtx", KSTAT_DATA_ULONG } 169 }; 170 171 kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt; 172 uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t); 173 174 /* 175 * Return number of map pages in segment. 176 */ 177 #define MAP_PAGES(seg) ((seg)->s_size >> MAXBSHIFT) 178 179 /* 180 * Translate addr into smap number within segment. 181 */ 182 #define MAP_PAGE(seg, addr) (((addr) - (seg)->s_base) >> MAXBSHIFT) 183 184 /* 185 * Translate addr in seg into struct smap pointer. 186 */ 187 #define GET_SMAP(seg, addr) \ 188 &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)]) 189 190 /* 191 * Bit in map (16 bit bitmap). 192 */ 193 #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf)) 194 195 static int smd_colormsk = 0; 196 static int smd_ncolor = 0; 197 static int smd_nfree = 0; 198 static int smd_freemsk = 0; 199 #ifdef DEBUG 200 static int *colors_used; 201 #endif 202 static struct smap *smd_smap; 203 static struct smaphash *smd_hash; 204 #ifdef SEGMAP_HASHSTATS 205 static unsigned int *smd_hash_len; 206 #endif 207 static struct smfree *smd_free; 208 static ulong_t smd_hashmsk = 0; 209 210 #define SEGMAP_MAXCOLOR 2 211 #define SEGMAP_CACHE_PAD 64 212 213 union segmap_cpu { 214 struct { 215 uint32_t scpu_free_ndx[SEGMAP_MAXCOLOR]; 216 struct smap *scpu_last_smap; 217 ulong_t scpu_getmap; 218 ulong_t scpu_release; 219 ulong_t scpu_get_reclaim; 220 ulong_t scpu_fault; 221 ulong_t scpu_pagecreate; 222 ulong_t scpu_get_reuse; 223 } scpu; 224 char scpu_pad[SEGMAP_CACHE_PAD]; 225 }; 226 static union segmap_cpu *smd_cpu; 227 228 /* 229 * There are three locks in seg_map: 230 * - per freelist mutexes 231 * - per hashchain mutexes 232 * - per smap mutexes 233 * 234 * The lock ordering is to get the smap mutex to lock down the slot 235 * first then the hash lock (for hash in/out (vp, off) list) or the 236 * freelist lock to put the slot back on the free list. 237 * 238 * The hash search is done by only holding the hashchain lock, when a wanted 239 * slot is found, we drop the hashchain lock then lock the slot so there 240 * is no overlapping of hashchain and smap locks. After the slot is 241 * locked, we verify again if the slot is still what we are looking 242 * for. 243 * 244 * Allocation of a free slot is done by holding the freelist lock, 245 * then locking the smap slot at the head of the freelist. This is 246 * in reversed lock order so mutex_tryenter() is used. 247 * 248 * The smap lock protects all fields in smap structure except for 249 * the link fields for hash/free lists which are protected by 250 * hashchain and freelist locks. 251 */ 252 253 #define SHASHMTX(hashid) (&smd_hash[hashid].sh_mtx) 254 255 #define SMP2SMF(smp) (&smd_free[(smp - smd_smap) & smd_freemsk]) 256 #define SMP2SMF_NDX(smp) (ushort_t)((smp - smd_smap) & smd_freemsk) 257 258 #define SMAPMTX(smp) (&smp->sm_mtx) 259 260 #define SMAP_HASHFUNC(vp, off, hashid) \ 261 { \ 262 hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \ 263 ((off) >> MAXBSHIFT)) & smd_hashmsk); \ 264 } 265 266 /* 267 * The most frequently updated kstat counters are kept in the 268 * per cpu array to avoid hot cache blocks. The update function 269 * sums the cpu local counters to update the global counters. 270 */ 271 272 /* ARGSUSED */ 273 int 274 segmap_kstat_update(kstat_t *ksp, int rw) 275 { 276 int i; 277 ulong_t getmap, release, get_reclaim; 278 ulong_t fault, pagecreate, get_reuse; 279 280 if (rw == KSTAT_WRITE) 281 return (EACCES); 282 getmap = release = get_reclaim = (ulong_t)0; 283 fault = pagecreate = get_reuse = (ulong_t)0; 284 for (i = 0; i < max_ncpus; i++) { 285 getmap += smd_cpu[i].scpu.scpu_getmap; 286 release += smd_cpu[i].scpu.scpu_release; 287 get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim; 288 fault += smd_cpu[i].scpu.scpu_fault; 289 pagecreate += smd_cpu[i].scpu.scpu_pagecreate; 290 get_reuse += smd_cpu[i].scpu.scpu_get_reuse; 291 } 292 segmapcnt.smp_getmap.value.ul = getmap; 293 segmapcnt.smp_release.value.ul = release; 294 segmapcnt.smp_get_reclaim.value.ul = get_reclaim; 295 segmapcnt.smp_fault.value.ul = fault; 296 segmapcnt.smp_pagecreate.value.ul = pagecreate; 297 segmapcnt.smp_get_reuse.value.ul = get_reuse; 298 return (0); 299 } 300 301 int 302 segmap_create(struct seg *seg, void *argsp) 303 { 304 struct segmap_data *smd; 305 struct smap *smp; 306 struct smfree *sm; 307 struct segmap_crargs *a = (struct segmap_crargs *)argsp; 308 struct smaphash *shashp; 309 union segmap_cpu *scpu; 310 long i, npages; 311 size_t hashsz; 312 uint_t nfreelist; 313 extern void prefetch_smap_w(void *); 314 extern int max_ncpus; 315 316 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 317 318 if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) { 319 panic("segkmap not MAXBSIZE aligned"); 320 /*NOTREACHED*/ 321 } 322 323 smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP); 324 325 seg->s_data = (void *)smd; 326 seg->s_ops = &segmap_ops; 327 smd->smd_prot = a->prot; 328 329 /* 330 * Scale the number of smap freelists to be 331 * proportional to max_ncpus * number of virtual colors. 332 * The caller can over-ride this scaling by providing 333 * a non-zero a->nfreelist argument. 334 */ 335 nfreelist = a->nfreelist; 336 if (nfreelist == 0) 337 nfreelist = max_ncpus; 338 else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) { 339 cmn_err(CE_WARN, "segmap_create: nfreelist out of range " 340 "%d, using %d", nfreelist, max_ncpus); 341 nfreelist = max_ncpus; 342 } 343 if (nfreelist & (nfreelist - 1)) { 344 /* round up nfreelist to the next power of two. */ 345 nfreelist = 1 << (highbit(nfreelist)); 346 } 347 348 /* 349 * Get the number of virtual colors - must be a power of 2. 350 */ 351 if (a->shmsize) 352 smd_ncolor = a->shmsize >> MAXBSHIFT; 353 else 354 smd_ncolor = 1; 355 ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0); 356 ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR); 357 smd_colormsk = smd_ncolor - 1; 358 smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist; 359 smd_freemsk = smd_nfree - 1; 360 361 /* 362 * Allocate and initialize the freelist headers. 363 * Note that sm_freeq[1] starts out as the release queue. This 364 * is known when the smap structures are initialized below. 365 */ 366 smd_free = smd->smd_free = 367 kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP); 368 for (i = 0; i < smd_nfree; i++) { 369 sm = &smd->smd_free[i]; 370 mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 371 mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL); 372 sm->sm_allocq = &sm->sm_freeq[0]; 373 sm->sm_releq = &sm->sm_freeq[1]; 374 } 375 376 /* 377 * Allocate and initialize the smap hash chain headers. 378 * Compute hash size rounding down to the next power of two. 379 */ 380 npages = MAP_PAGES(seg); 381 smd->smd_npages = npages; 382 hashsz = npages / SMAP_HASHAVELEN; 383 hashsz = 1 << (highbit(hashsz)-1); 384 smd_hashmsk = hashsz - 1; 385 smd_hash = smd->smd_hash = 386 kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP); 387 #ifdef SEGMAP_HASHSTATS 388 smd_hash_len = 389 kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP); 390 #endif 391 for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) { 392 shashp->sh_hash_list = NULL; 393 mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL); 394 } 395 396 /* 397 * Allocate and initialize the smap structures. 398 * Link all slots onto the appropriate freelist. 399 * The smap array is large enough to affect boot time 400 * on large systems, so use memory prefetching and only 401 * go through the array 1 time. Inline a optimized version 402 * of segmap_smapadd to add structures to freelists with 403 * knowledge that no locks are needed here. 404 */ 405 smd_smap = smd->smd_sm = 406 kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP); 407 408 for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1]; 409 smp >= smd->smd_sm; smp--) { 410 struct smap *smpfreelist; 411 struct sm_freeq *releq; 412 413 prefetch_smap_w((char *)smp); 414 415 smp->sm_vp = NULL; 416 smp->sm_hash = NULL; 417 smp->sm_off = 0; 418 smp->sm_bitmap = 0; 419 smp->sm_refcnt = 0; 420 mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL); 421 smp->sm_free_ndx = SMP2SMF_NDX(smp); 422 423 sm = SMP2SMF(smp); 424 releq = sm->sm_releq; 425 426 smpfreelist = releq->smq_free; 427 if (smpfreelist == 0) { 428 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 429 } else { 430 smp->sm_next = smpfreelist; 431 smp->sm_prev = smpfreelist->sm_prev; 432 smpfreelist->sm_prev = smp; 433 smp->sm_prev->sm_next = smp; 434 releq->smq_free = smp->sm_next; 435 } 436 437 /* 438 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1] 439 */ 440 smp->sm_flags = 0; 441 442 #ifdef SEGKPM_SUPPORT 443 /* 444 * Due to the fragile prefetch loop no 445 * separate function is used here. 446 */ 447 smp->sm_kpme_next = NULL; 448 smp->sm_kpme_prev = NULL; 449 smp->sm_kpme_page = NULL; 450 #endif 451 } 452 453 /* 454 * Allocate the per color indices that distribute allocation 455 * requests over the free lists. Each cpu will have a private 456 * rotor index to spread the allocations even across the available 457 * smap freelists. Init the scpu_last_smap field to the first 458 * smap element so there is no need to check for NULL. 459 */ 460 smd_cpu = 461 kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP); 462 for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) { 463 int j; 464 for (j = 0; j < smd_ncolor; j++) 465 scpu->scpu.scpu_free_ndx[j] = j; 466 scpu->scpu.scpu_last_smap = smd_smap; 467 } 468 469 #ifdef DEBUG 470 /* 471 * Keep track of which colors are used more often. 472 */ 473 colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP); 474 #endif /* DEBUG */ 475 476 return (0); 477 } 478 479 static void 480 segmap_free(seg) 481 struct seg *seg; 482 { 483 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); 484 } 485 486 /* 487 * Do a F_SOFTUNLOCK call over the range requested. 488 * The range must have already been F_SOFTLOCK'ed. 489 */ 490 static void 491 segmap_unlock( 492 struct hat *hat, 493 struct seg *seg, 494 caddr_t addr, 495 size_t len, 496 enum seg_rw rw, 497 struct smap *smp) 498 { 499 page_t *pp; 500 caddr_t adr; 501 u_offset_t off; 502 struct vnode *vp; 503 kmutex_t *smtx; 504 505 ASSERT(smp->sm_refcnt > 0); 506 507 #ifdef lint 508 seg = seg; 509 #endif 510 511 if (segmap_kpm && IS_KPM_ADDR(addr)) { 512 513 /* 514 * We're called only from segmap_fault and this was a 515 * NOP in case of a kpm based smap, so dangerous things 516 * must have happened in the meantime. Pages are prefaulted 517 * and locked in segmap_getmapflt and they will not be 518 * unlocked until segmap_release. 519 */ 520 panic("segmap_unlock: called with kpm addr %p", (void *)addr); 521 /*NOTREACHED*/ 522 } 523 524 vp = smp->sm_vp; 525 off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 526 527 hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE)); 528 for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) { 529 ushort_t bitmask; 530 531 /* 532 * Use page_find() instead of page_lookup() to 533 * find the page since we know that it has 534 * "shared" lock. 535 */ 536 pp = page_find(vp, off); 537 if (pp == NULL) { 538 panic("segmap_unlock: page not found"); 539 /*NOTREACHED*/ 540 } 541 542 if (rw == S_WRITE) { 543 hat_setrefmod(pp); 544 } else if (rw != S_OTHER) { 545 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 546 "segmap_fault:pp %p vp %p offset %llx", 547 pp, vp, off); 548 hat_setref(pp); 549 } 550 551 /* 552 * Clear bitmap, if the bit corresponding to "off" is set, 553 * since the page and translation are being unlocked. 554 */ 555 bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT); 556 557 /* 558 * Large Files: Following assertion is to verify 559 * the correctness of the cast to (int) above. 560 */ 561 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 562 smtx = SMAPMTX(smp); 563 mutex_enter(smtx); 564 if (smp->sm_bitmap & bitmask) { 565 smp->sm_bitmap &= ~bitmask; 566 } 567 mutex_exit(smtx); 568 569 page_unlock(pp); 570 } 571 } 572 573 #define MAXPPB (MAXBSIZE/4096) /* assumes minimum page size of 4k */ 574 575 /* 576 * This routine is called via a machine specific fault handling 577 * routine. It is also called by software routines wishing to 578 * lock or unlock a range of addresses. 579 * 580 * Note that this routine expects a page-aligned "addr". 581 */ 582 faultcode_t 583 segmap_fault( 584 struct hat *hat, 585 struct seg *seg, 586 caddr_t addr, 587 size_t len, 588 enum fault_type type, 589 enum seg_rw rw) 590 { 591 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 592 struct smap *smp; 593 page_t *pp, **ppp; 594 struct vnode *vp; 595 u_offset_t off; 596 page_t *pl[MAXPPB + 1]; 597 uint_t prot; 598 u_offset_t addroff; 599 caddr_t adr; 600 int err; 601 u_offset_t sm_off; 602 int hat_flag; 603 604 if (segmap_kpm && IS_KPM_ADDR(addr)) { 605 int newpage; 606 kmutex_t *smtx; 607 608 /* 609 * Pages are successfully prefaulted and locked in 610 * segmap_getmapflt and can't be unlocked until 611 * segmap_release. No hat mappings have to be locked 612 * and they also can't be unlocked as long as the 613 * caller owns an active kpm addr. 614 */ 615 #ifndef DEBUG 616 if (type != F_SOFTUNLOCK) 617 return (0); 618 #endif 619 620 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 621 panic("segmap_fault: smap not found " 622 "for addr %p", (void *)addr); 623 /*NOTREACHED*/ 624 } 625 626 smtx = SMAPMTX(smp); 627 #ifdef DEBUG 628 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 629 if (newpage) { 630 cmn_err(CE_WARN, "segmap_fault: newpage? smp %p", 631 (void *)smp); 632 } 633 634 if (type != F_SOFTUNLOCK) { 635 mutex_exit(smtx); 636 return (0); 637 } 638 #endif 639 mutex_exit(smtx); 640 vp = smp->sm_vp; 641 sm_off = smp->sm_off; 642 643 if (vp == NULL) 644 return (FC_MAKE_ERR(EIO)); 645 646 ASSERT(smp->sm_refcnt > 0); 647 648 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 649 if (addroff + len > MAXBSIZE) 650 panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk", 651 (void *)(addr + len)); 652 653 off = sm_off + addroff; 654 655 pp = page_find(vp, off); 656 657 if (pp == NULL) 658 panic("segmap_fault: softunlock page not found"); 659 660 /* 661 * Set ref bit also here in case of S_OTHER to avoid the 662 * overhead of supporting other cases than F_SOFTUNLOCK 663 * with segkpm. We can do this because the underlying 664 * pages are locked anyway. 665 */ 666 if (rw == S_WRITE) { 667 hat_setrefmod(pp); 668 } else { 669 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 670 "segmap_fault:pp %p vp %p offset %llx", 671 pp, vp, off); 672 hat_setref(pp); 673 } 674 675 return (0); 676 } 677 678 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 679 smp = GET_SMAP(seg, addr); 680 vp = smp->sm_vp; 681 sm_off = smp->sm_off; 682 683 if (vp == NULL) 684 return (FC_MAKE_ERR(EIO)); 685 686 ASSERT(smp->sm_refcnt > 0); 687 688 addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET); 689 if (addroff + len > MAXBSIZE) { 690 panic("segmap_fault: endaddr %p " 691 "exceeds MAXBSIZE chunk", (void *)(addr + len)); 692 /*NOTREACHED*/ 693 } 694 off = sm_off + addroff; 695 696 /* 697 * First handle the easy stuff 698 */ 699 if (type == F_SOFTUNLOCK) { 700 segmap_unlock(hat, seg, addr, len, rw, smp); 701 return (0); 702 } 703 704 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 705 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 706 err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE, 707 seg, addr, rw, CRED()); 708 709 if (err) 710 return (FC_MAKE_ERR(err)); 711 712 prot &= smd->smd_prot; 713 714 /* 715 * Handle all pages returned in the pl[] array. 716 * This loop is coded on the assumption that if 717 * there was no error from the VOP_GETPAGE routine, 718 * that the page list returned will contain all the 719 * needed pages for the vp from [off..off + len]. 720 */ 721 ppp = pl; 722 while ((pp = *ppp++) != NULL) { 723 u_offset_t poff; 724 ASSERT(pp->p_vnode == vp); 725 hat_flag = HAT_LOAD; 726 727 /* 728 * Verify that the pages returned are within the range 729 * of this segmap region. Note that it is theoretically 730 * possible for pages outside this range to be returned, 731 * but it is not very likely. If we cannot use the 732 * page here, just release it and go on to the next one. 733 */ 734 if (pp->p_offset < sm_off || 735 pp->p_offset >= sm_off + MAXBSIZE) { 736 (void) page_release(pp, 1); 737 continue; 738 } 739 740 ASSERT(hat == kas.a_hat); 741 poff = pp->p_offset; 742 adr = addr + (poff - off); 743 if (adr >= addr && adr < addr + len) { 744 hat_setref(pp); 745 TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT, 746 "segmap_fault:pp %p vp %p offset %llx", 747 pp, vp, poff); 748 if (type == F_SOFTLOCK) 749 hat_flag = HAT_LOAD_LOCK; 750 } 751 752 /* 753 * Deal with VMODSORT pages here. If we know this is a write 754 * do the setmod now and allow write protection. 755 * As long as it's modified or not S_OTHER, remove write 756 * protection. With S_OTHER it's up to the FS to deal with this. 757 */ 758 if (IS_VMODSORT(vp)) { 759 if (rw == S_WRITE) 760 hat_setmod(pp); 761 else if (rw != S_OTHER && !hat_ismod(pp)) 762 prot &= ~PROT_WRITE; 763 } 764 765 hat_memload(hat, adr, pp, prot, hat_flag); 766 if (hat_flag != HAT_LOAD_LOCK) 767 page_unlock(pp); 768 } 769 return (0); 770 } 771 772 /* 773 * This routine is used to start I/O on pages asynchronously. 774 */ 775 static faultcode_t 776 segmap_faulta(struct seg *seg, caddr_t addr) 777 { 778 struct smap *smp; 779 struct vnode *vp; 780 u_offset_t off; 781 int err; 782 783 if (segmap_kpm && IS_KPM_ADDR(addr)) { 784 int newpage; 785 kmutex_t *smtx; 786 787 /* 788 * Pages are successfully prefaulted and locked in 789 * segmap_getmapflt and can't be unlocked until 790 * segmap_release. No hat mappings have to be locked 791 * and they also can't be unlocked as long as the 792 * caller owns an active kpm addr. 793 */ 794 #ifdef DEBUG 795 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 796 panic("segmap_faulta: smap not found " 797 "for addr %p", (void *)addr); 798 /*NOTREACHED*/ 799 } 800 801 smtx = SMAPMTX(smp); 802 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 803 mutex_exit(smtx); 804 if (newpage) 805 cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p", 806 (void *)smp); 807 #endif 808 return (0); 809 } 810 811 segmapcnt.smp_faulta.value.ul++; 812 smp = GET_SMAP(seg, addr); 813 814 ASSERT(smp->sm_refcnt > 0); 815 816 vp = smp->sm_vp; 817 off = smp->sm_off; 818 819 if (vp == NULL) { 820 cmn_err(CE_WARN, "segmap_faulta - no vp"); 821 return (FC_MAKE_ERR(EIO)); 822 } 823 824 TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE, 825 "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp); 826 827 err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr 828 & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0, 829 seg, addr, S_READ, CRED()); 830 831 if (err) 832 return (FC_MAKE_ERR(err)); 833 return (0); 834 } 835 836 /*ARGSUSED*/ 837 static int 838 segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 839 { 840 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 841 842 ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); 843 844 /* 845 * Need not acquire the segment lock since 846 * "smd_prot" is a read-only field. 847 */ 848 return (((smd->smd_prot & prot) != prot) ? EACCES : 0); 849 } 850 851 static int 852 segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 853 { 854 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 855 size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; 856 857 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 858 859 if (pgno != 0) { 860 do 861 protv[--pgno] = smd->smd_prot; 862 while (pgno != 0); 863 } 864 return (0); 865 } 866 867 static u_offset_t 868 segmap_getoffset(struct seg *seg, caddr_t addr) 869 { 870 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 871 872 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 873 874 return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base)); 875 } 876 877 /*ARGSUSED*/ 878 static int 879 segmap_gettype(struct seg *seg, caddr_t addr) 880 { 881 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 882 883 return (MAP_SHARED); 884 } 885 886 /*ARGSUSED*/ 887 static int 888 segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 889 { 890 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 891 892 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock)); 893 894 /* XXX - This doesn't make any sense */ 895 *vpp = smd->smd_sm->sm_vp; 896 return (0); 897 } 898 899 /* 900 * Check to see if it makes sense to do kluster/read ahead to 901 * addr + delta relative to the mapping at addr. We assume here 902 * that delta is a signed PAGESIZE'd multiple (which can be negative). 903 * 904 * For segmap we always "approve" of this action from our standpoint. 905 */ 906 /*ARGSUSED*/ 907 static int 908 segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta) 909 { 910 return (0); 911 } 912 913 static void 914 segmap_badop() 915 { 916 panic("segmap_badop"); 917 /*NOTREACHED*/ 918 } 919 920 /* 921 * Special private segmap operations 922 */ 923 924 /* 925 * Add smap to the appropriate free list. 926 */ 927 static void 928 segmap_smapadd(struct smap *smp) 929 { 930 struct smfree *sm; 931 struct smap *smpfreelist; 932 struct sm_freeq *releq; 933 934 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 935 936 if (smp->sm_refcnt != 0) { 937 panic("segmap_smapadd"); 938 /*NOTREACHED*/ 939 } 940 941 sm = &smd_free[smp->sm_free_ndx]; 942 /* 943 * Add to the tail of the release queue 944 * Note that sm_releq and sm_allocq could toggle 945 * before we get the lock. This does not affect 946 * correctness as the 2 queues are only maintained 947 * to reduce lock pressure. 948 */ 949 releq = sm->sm_releq; 950 if (releq == &sm->sm_freeq[0]) 951 smp->sm_flags |= SM_QNDX_ZERO; 952 else 953 smp->sm_flags &= ~SM_QNDX_ZERO; 954 mutex_enter(&releq->smq_mtx); 955 smpfreelist = releq->smq_free; 956 if (smpfreelist == 0) { 957 int want; 958 959 releq->smq_free = smp->sm_next = smp->sm_prev = smp; 960 /* 961 * Both queue mutexes held to set sm_want; 962 * snapshot the value before dropping releq mutex. 963 * If sm_want appears after the releq mutex is dropped, 964 * then the smap just freed is already gone. 965 */ 966 want = sm->sm_want; 967 mutex_exit(&releq->smq_mtx); 968 /* 969 * See if there was a waiter before dropping the releq mutex 970 * then recheck after obtaining sm_freeq[0] mutex as 971 * the another thread may have already signaled. 972 */ 973 if (want) { 974 mutex_enter(&sm->sm_freeq[0].smq_mtx); 975 if (sm->sm_want) 976 cv_signal(&sm->sm_free_cv); 977 mutex_exit(&sm->sm_freeq[0].smq_mtx); 978 } 979 } else { 980 smp->sm_next = smpfreelist; 981 smp->sm_prev = smpfreelist->sm_prev; 982 smpfreelist->sm_prev = smp; 983 smp->sm_prev->sm_next = smp; 984 mutex_exit(&releq->smq_mtx); 985 } 986 } 987 988 989 static struct smap * 990 segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid) 991 { 992 struct smap **hpp; 993 struct smap *tmp; 994 kmutex_t *hmtx; 995 996 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 997 ASSERT(smp->sm_vp == NULL); 998 ASSERT(smp->sm_hash == NULL); 999 ASSERT(smp->sm_prev == NULL); 1000 ASSERT(smp->sm_next == NULL); 1001 ASSERT(hashid >= 0 && hashid <= smd_hashmsk); 1002 1003 hmtx = SHASHMTX(hashid); 1004 1005 mutex_enter(hmtx); 1006 /* 1007 * First we need to verify that no one has created a smp 1008 * with (vp,off) as its tag before we us. 1009 */ 1010 for (tmp = smd_hash[hashid].sh_hash_list; 1011 tmp != NULL; tmp = tmp->sm_hash) 1012 if (tmp->sm_vp == vp && tmp->sm_off == off) 1013 break; 1014 1015 if (tmp == NULL) { 1016 /* 1017 * No one created one yet. 1018 * 1019 * Funniness here - we don't increment the ref count on the 1020 * vnode * even though we have another pointer to it here. 1021 * The reason for this is that we don't want the fact that 1022 * a seg_map entry somewhere refers to a vnode to prevent the 1023 * vnode * itself from going away. This is because this 1024 * reference to the vnode is a "soft one". In the case where 1025 * a mapping is being used by a rdwr [or directory routine?] 1026 * there already has to be a non-zero ref count on the vnode. 1027 * In the case where the vp has been freed and the the smap 1028 * structure is on the free list, there are no pages in memory 1029 * that can refer to the vnode. Thus even if we reuse the same 1030 * vnode/smap structure for a vnode which has the same 1031 * address but represents a different object, we are ok. 1032 */ 1033 smp->sm_vp = vp; 1034 smp->sm_off = off; 1035 1036 hpp = &smd_hash[hashid].sh_hash_list; 1037 smp->sm_hash = *hpp; 1038 *hpp = smp; 1039 #ifdef SEGMAP_HASHSTATS 1040 smd_hash_len[hashid]++; 1041 #endif 1042 } 1043 mutex_exit(hmtx); 1044 1045 return (tmp); 1046 } 1047 1048 static void 1049 segmap_hashout(struct smap *smp) 1050 { 1051 struct smap **hpp, *hp; 1052 struct vnode *vp; 1053 kmutex_t *mtx; 1054 int hashid; 1055 u_offset_t off; 1056 1057 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1058 1059 vp = smp->sm_vp; 1060 off = smp->sm_off; 1061 1062 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1063 mtx = SHASHMTX(hashid); 1064 mutex_enter(mtx); 1065 1066 hpp = &smd_hash[hashid].sh_hash_list; 1067 for (;;) { 1068 hp = *hpp; 1069 if (hp == NULL) { 1070 panic("segmap_hashout"); 1071 /*NOTREACHED*/ 1072 } 1073 if (hp == smp) 1074 break; 1075 hpp = &hp->sm_hash; 1076 } 1077 1078 *hpp = smp->sm_hash; 1079 smp->sm_hash = NULL; 1080 #ifdef SEGMAP_HASHSTATS 1081 smd_hash_len[hashid]--; 1082 #endif 1083 mutex_exit(mtx); 1084 1085 smp->sm_vp = NULL; 1086 smp->sm_off = (u_offset_t)0; 1087 1088 } 1089 1090 /* 1091 * Attempt to free unmodified, unmapped, and non locked segmap 1092 * pages. 1093 */ 1094 void 1095 segmap_pagefree(struct vnode *vp, u_offset_t off) 1096 { 1097 u_offset_t pgoff; 1098 page_t *pp; 1099 1100 for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) { 1101 1102 if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL) 1103 continue; 1104 1105 switch (page_release(pp, 1)) { 1106 case PGREL_NOTREL: 1107 segmapcnt.smp_free_notfree.value.ul++; 1108 break; 1109 case PGREL_MOD: 1110 segmapcnt.smp_free_dirty.value.ul++; 1111 break; 1112 case PGREL_CLEAN: 1113 segmapcnt.smp_free.value.ul++; 1114 break; 1115 } 1116 } 1117 } 1118 1119 /* 1120 * Locks held on entry: smap lock 1121 * Locks held on exit : smap lock. 1122 */ 1123 1124 static void 1125 grab_smp(struct smap *smp, page_t *pp) 1126 { 1127 ASSERT(MUTEX_HELD(SMAPMTX(smp))); 1128 ASSERT(smp->sm_refcnt == 0); 1129 1130 if (smp->sm_vp != (struct vnode *)NULL) { 1131 struct vnode *vp = smp->sm_vp; 1132 u_offset_t off = smp->sm_off; 1133 /* 1134 * Destroy old vnode association and 1135 * unload any hardware translations to 1136 * the old object. 1137 */ 1138 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++; 1139 segmap_hashout(smp); 1140 1141 /* 1142 * This node is off freelist and hashlist, 1143 * so there is no reason to drop/reacquire sm_mtx 1144 * across calls to hat_unload. 1145 */ 1146 if (segmap_kpm) { 1147 caddr_t vaddr; 1148 int hat_unload_needed = 0; 1149 1150 /* 1151 * unload kpm mapping 1152 */ 1153 if (pp != NULL) { 1154 vaddr = hat_kpm_page2va(pp, 1); 1155 hat_kpm_mapout(pp, GET_KPME(smp), vaddr); 1156 page_unlock(pp); 1157 } 1158 1159 /* 1160 * Check if we have (also) the rare case of a 1161 * non kpm mapping. 1162 */ 1163 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 1164 hat_unload_needed = 1; 1165 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1166 } 1167 1168 if (hat_unload_needed) { 1169 hat_unload(kas.a_hat, segkmap->s_base + 1170 ((smp - smd_smap) * MAXBSIZE), 1171 MAXBSIZE, HAT_UNLOAD); 1172 } 1173 1174 } else { 1175 ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED); 1176 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 1177 hat_unload(kas.a_hat, segkmap->s_base + 1178 ((smp - smd_smap) * MAXBSIZE), 1179 MAXBSIZE, HAT_UNLOAD); 1180 } 1181 segmap_pagefree(vp, off); 1182 } 1183 } 1184 1185 static struct smap * 1186 get_free_smp(int free_ndx) 1187 { 1188 struct smfree *sm; 1189 kmutex_t *smtx; 1190 struct smap *smp, *first; 1191 struct sm_freeq *allocq, *releq; 1192 struct kpme *kpme; 1193 page_t *pp = NULL; 1194 int end_ndx, page_locked = 0; 1195 1196 end_ndx = free_ndx; 1197 sm = &smd_free[free_ndx]; 1198 1199 retry_queue: 1200 allocq = sm->sm_allocq; 1201 mutex_enter(&allocq->smq_mtx); 1202 1203 if ((smp = allocq->smq_free) == NULL) { 1204 1205 skip_queue: 1206 /* 1207 * The alloc list is empty or this queue is being skipped; 1208 * first see if the allocq toggled. 1209 */ 1210 if (sm->sm_allocq != allocq) { 1211 /* queue changed */ 1212 mutex_exit(&allocq->smq_mtx); 1213 goto retry_queue; 1214 } 1215 releq = sm->sm_releq; 1216 if (!mutex_tryenter(&releq->smq_mtx)) { 1217 /* cannot get releq; a free smp may be there now */ 1218 mutex_exit(&allocq->smq_mtx); 1219 1220 /* 1221 * This loop could spin forever if this thread has 1222 * higher priority than the thread that is holding 1223 * releq->smq_mtx. In order to force the other thread 1224 * to run, we'll lock/unlock the mutex which is safe 1225 * since we just unlocked the allocq mutex. 1226 */ 1227 mutex_enter(&releq->smq_mtx); 1228 mutex_exit(&releq->smq_mtx); 1229 goto retry_queue; 1230 } 1231 if (releq->smq_free == NULL) { 1232 /* 1233 * This freelist is empty. 1234 * This should not happen unless clients 1235 * are failing to release the segmap 1236 * window after accessing the data. 1237 * Before resorting to sleeping, try 1238 * the next list of the same color. 1239 */ 1240 free_ndx = (free_ndx + smd_ncolor) & smd_freemsk; 1241 if (free_ndx != end_ndx) { 1242 mutex_exit(&releq->smq_mtx); 1243 mutex_exit(&allocq->smq_mtx); 1244 sm = &smd_free[free_ndx]; 1245 goto retry_queue; 1246 } 1247 /* 1248 * Tried all freelists of the same color once, 1249 * wait on this list and hope something gets freed. 1250 */ 1251 segmapcnt.smp_get_nofree.value.ul++; 1252 sm->sm_want++; 1253 mutex_exit(&sm->sm_freeq[1].smq_mtx); 1254 cv_wait(&sm->sm_free_cv, 1255 &sm->sm_freeq[0].smq_mtx); 1256 sm->sm_want--; 1257 mutex_exit(&sm->sm_freeq[0].smq_mtx); 1258 sm = &smd_free[free_ndx]; 1259 goto retry_queue; 1260 } else { 1261 /* 1262 * Something on the rele queue; flip the alloc 1263 * and rele queues and retry. 1264 */ 1265 sm->sm_allocq = releq; 1266 sm->sm_releq = allocq; 1267 mutex_exit(&allocq->smq_mtx); 1268 mutex_exit(&releq->smq_mtx); 1269 if (page_locked) { 1270 delay(hz >> 2); 1271 page_locked = 0; 1272 } 1273 goto retry_queue; 1274 } 1275 } else { 1276 /* 1277 * Fastpath the case we get the smap mutex 1278 * on the first try. 1279 */ 1280 first = smp; 1281 next_smap: 1282 smtx = SMAPMTX(smp); 1283 if (!mutex_tryenter(smtx)) { 1284 /* 1285 * Another thread is trying to reclaim this slot. 1286 * Skip to the next queue or smap. 1287 */ 1288 if ((smp = smp->sm_next) == first) { 1289 goto skip_queue; 1290 } else { 1291 goto next_smap; 1292 } 1293 } else { 1294 /* 1295 * if kpme exists, get shared lock on the page 1296 */ 1297 if (segmap_kpm && smp->sm_vp != NULL) { 1298 1299 kpme = GET_KPME(smp); 1300 pp = kpme->kpe_page; 1301 1302 if (pp != NULL) { 1303 if (!page_trylock(pp, SE_SHARED)) { 1304 smp = smp->sm_next; 1305 mutex_exit(smtx); 1306 page_locked = 1; 1307 1308 pp = NULL; 1309 1310 if (smp == first) { 1311 goto skip_queue; 1312 } else { 1313 goto next_smap; 1314 } 1315 } else { 1316 if (kpme->kpe_page == NULL) { 1317 page_unlock(pp); 1318 pp = NULL; 1319 } 1320 } 1321 } 1322 } 1323 1324 /* 1325 * At this point, we've selected smp. Remove smp 1326 * from its freelist. If smp is the first one in 1327 * the freelist, update the head of the freelist. 1328 */ 1329 if (first == smp) { 1330 ASSERT(first == allocq->smq_free); 1331 allocq->smq_free = smp->sm_next; 1332 } 1333 1334 /* 1335 * if the head of the freelist still points to smp, 1336 * then there are no more free smaps in that list. 1337 */ 1338 if (allocq->smq_free == smp) 1339 /* 1340 * Took the last one 1341 */ 1342 allocq->smq_free = NULL; 1343 else { 1344 smp->sm_prev->sm_next = smp->sm_next; 1345 smp->sm_next->sm_prev = smp->sm_prev; 1346 } 1347 mutex_exit(&allocq->smq_mtx); 1348 smp->sm_prev = smp->sm_next = NULL; 1349 1350 /* 1351 * if pp != NULL, pp must have been locked; 1352 * grab_smp() unlocks pp. 1353 */ 1354 ASSERT((pp == NULL) || PAGE_LOCKED(pp)); 1355 grab_smp(smp, pp); 1356 /* return smp locked. */ 1357 ASSERT(SMAPMTX(smp) == smtx); 1358 ASSERT(MUTEX_HELD(smtx)); 1359 return (smp); 1360 } 1361 } 1362 } 1363 1364 /* 1365 * Special public segmap operations 1366 */ 1367 1368 /* 1369 * Create pages (without using VOP_GETPAGE) and load up tranlations to them. 1370 * If softlock is TRUE, then set things up so that it looks like a call 1371 * to segmap_fault with F_SOFTLOCK. 1372 * 1373 * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise. 1374 * 1375 * All fields in the generic segment (struct seg) are considered to be 1376 * read-only for "segmap" even though the kernel address space (kas) may 1377 * not be locked, hence no lock is needed to access them. 1378 */ 1379 int 1380 segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock) 1381 { 1382 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 1383 page_t *pp; 1384 u_offset_t off; 1385 struct smap *smp; 1386 struct vnode *vp; 1387 caddr_t eaddr; 1388 int newpage = 0; 1389 uint_t prot; 1390 kmutex_t *smtx; 1391 int hat_flag; 1392 1393 ASSERT(seg->s_as == &kas); 1394 1395 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1396 /* 1397 * Pages are successfully prefaulted and locked in 1398 * segmap_getmapflt and can't be unlocked until 1399 * segmap_release. The SM_KPM_NEWPAGE flag is set 1400 * in segmap_pagecreate_kpm when new pages are created. 1401 * and it is returned as "newpage" indication here. 1402 */ 1403 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1404 panic("segmap_pagecreate: smap not found " 1405 "for addr %p", (void *)addr); 1406 /*NOTREACHED*/ 1407 } 1408 1409 smtx = SMAPMTX(smp); 1410 newpage = smp->sm_flags & SM_KPM_NEWPAGE; 1411 smp->sm_flags &= ~SM_KPM_NEWPAGE; 1412 mutex_exit(smtx); 1413 1414 return (newpage); 1415 } 1416 1417 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 1418 1419 eaddr = addr + len; 1420 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1421 1422 smp = GET_SMAP(seg, addr); 1423 1424 /* 1425 * We don't grab smp mutex here since we assume the smp 1426 * has a refcnt set already which prevents the slot from 1427 * changing its id. 1428 */ 1429 ASSERT(smp->sm_refcnt > 0); 1430 1431 vp = smp->sm_vp; 1432 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1433 prot = smd->smd_prot; 1434 1435 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1436 hat_flag = HAT_LOAD; 1437 pp = page_lookup(vp, off, SE_SHARED); 1438 if (pp == NULL) { 1439 ushort_t bitindex; 1440 1441 if ((pp = page_create_va(vp, off, 1442 PAGESIZE, PG_WAIT, seg, addr)) == NULL) { 1443 panic("segmap_pagecreate: page_create failed"); 1444 /*NOTREACHED*/ 1445 } 1446 newpage = 1; 1447 page_io_unlock(pp); 1448 1449 /* 1450 * Since pages created here do not contain valid 1451 * data until the caller writes into them, the 1452 * "exclusive" lock will not be dropped to prevent 1453 * other users from accessing the page. We also 1454 * have to lock the translation to prevent a fault 1455 * from occuring when the virtual address mapped by 1456 * this page is written into. This is necessary to 1457 * avoid a deadlock since we haven't dropped the 1458 * "exclusive" lock. 1459 */ 1460 bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT); 1461 1462 /* 1463 * Large Files: The following assertion is to 1464 * verify the cast above. 1465 */ 1466 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1467 smtx = SMAPMTX(smp); 1468 mutex_enter(smtx); 1469 smp->sm_bitmap |= SMAP_BIT_MASK(bitindex); 1470 mutex_exit(smtx); 1471 1472 hat_flag = HAT_LOAD_LOCK; 1473 } else if (softlock) { 1474 hat_flag = HAT_LOAD_LOCK; 1475 } 1476 1477 if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE)) 1478 hat_setmod(pp); 1479 1480 hat_memload(kas.a_hat, addr, pp, prot, hat_flag); 1481 1482 if (hat_flag != HAT_LOAD_LOCK) 1483 page_unlock(pp); 1484 1485 TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE, 1486 "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx", 1487 seg, addr, pp, vp, off); 1488 } 1489 1490 return (newpage); 1491 } 1492 1493 void 1494 segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) 1495 { 1496 struct smap *smp; 1497 ushort_t bitmask; 1498 page_t *pp; 1499 struct vnode *vp; 1500 u_offset_t off; 1501 caddr_t eaddr; 1502 kmutex_t *smtx; 1503 1504 ASSERT(seg->s_as == &kas); 1505 1506 eaddr = addr + len; 1507 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1508 1509 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1510 /* 1511 * Pages are successfully prefaulted and locked in 1512 * segmap_getmapflt and can't be unlocked until 1513 * segmap_release, so no pages or hat mappings have 1514 * to be unlocked at this point. 1515 */ 1516 #ifdef DEBUG 1517 if ((smp = get_smap_kpm(addr, NULL)) == NULL) { 1518 panic("segmap_pageunlock: smap not found " 1519 "for addr %p", (void *)addr); 1520 /*NOTREACHED*/ 1521 } 1522 1523 ASSERT(smp->sm_refcnt > 0); 1524 mutex_exit(SMAPMTX(smp)); 1525 #endif 1526 return; 1527 } 1528 1529 smp = GET_SMAP(seg, addr); 1530 smtx = SMAPMTX(smp); 1531 1532 ASSERT(smp->sm_refcnt > 0); 1533 1534 vp = smp->sm_vp; 1535 off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET)); 1536 1537 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { 1538 bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT); 1539 1540 /* 1541 * Large Files: Following assertion is to verify 1542 * the correctness of the cast to (int) above. 1543 */ 1544 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 1545 1546 /* 1547 * If the bit corresponding to "off" is set, 1548 * clear this bit in the bitmap, unlock translations, 1549 * and release the "exclusive" lock on the page. 1550 */ 1551 if (smp->sm_bitmap & bitmask) { 1552 mutex_enter(smtx); 1553 smp->sm_bitmap &= ~bitmask; 1554 mutex_exit(smtx); 1555 1556 hat_unlock(kas.a_hat, addr, PAGESIZE); 1557 1558 /* 1559 * Use page_find() instead of page_lookup() to 1560 * find the page since we know that it has 1561 * "exclusive" lock. 1562 */ 1563 pp = page_find(vp, off); 1564 if (pp == NULL) { 1565 panic("segmap_pageunlock: page not found"); 1566 /*NOTREACHED*/ 1567 } 1568 if (rw == S_WRITE) { 1569 hat_setrefmod(pp); 1570 } else if (rw != S_OTHER) { 1571 hat_setref(pp); 1572 } 1573 1574 page_unlock(pp); 1575 } 1576 } 1577 } 1578 1579 caddr_t 1580 segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off) 1581 { 1582 return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER)); 1583 } 1584 1585 /* 1586 * This is the magic virtual address that offset 0 of an ELF 1587 * file gets mapped to in user space. This is used to pick 1588 * the vac color on the freelist. 1589 */ 1590 #define ELF_OFFZERO_VA (0x10000) 1591 /* 1592 * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp 1593 * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned. 1594 * The return address is always MAXBSIZE aligned. 1595 * 1596 * If forcefault is nonzero and the MMU translations haven't yet been created, 1597 * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them. 1598 */ 1599 caddr_t 1600 segmap_getmapflt( 1601 struct seg *seg, 1602 struct vnode *vp, 1603 u_offset_t off, 1604 size_t len, 1605 int forcefault, 1606 enum seg_rw rw) 1607 { 1608 struct smap *smp, *nsmp; 1609 extern struct vnode *common_specvp(); 1610 caddr_t baseaddr; /* MAXBSIZE aligned */ 1611 u_offset_t baseoff; 1612 int newslot; 1613 caddr_t vaddr; 1614 int color, hashid; 1615 kmutex_t *hashmtx, *smapmtx; 1616 struct smfree *sm; 1617 page_t *pp; 1618 struct kpme *kpme; 1619 uint_t prot; 1620 caddr_t base; 1621 page_t *pl[MAXPPB + 1]; 1622 int error; 1623 int is_kpm = 1; 1624 1625 ASSERT(seg->s_as == &kas); 1626 ASSERT(seg == segkmap); 1627 1628 baseoff = off & (offset_t)MAXBMASK; 1629 if (off + len > baseoff + MAXBSIZE) { 1630 panic("segmap_getmap bad len"); 1631 /*NOTREACHED*/ 1632 } 1633 1634 /* 1635 * If this is a block device we have to be sure to use the 1636 * "common" block device vnode for the mapping. 1637 */ 1638 if (vp->v_type == VBLK) 1639 vp = common_specvp(vp); 1640 1641 smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++; 1642 1643 if (segmap_kpm == 0 || 1644 (forcefault == SM_PAGECREATE && rw != S_WRITE)) { 1645 is_kpm = 0; 1646 } 1647 1648 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */ 1649 hashmtx = SHASHMTX(hashid); 1650 1651 retry_hash: 1652 mutex_enter(hashmtx); 1653 for (smp = smd_hash[hashid].sh_hash_list; 1654 smp != NULL; smp = smp->sm_hash) 1655 if (smp->sm_vp == vp && smp->sm_off == baseoff) 1656 break; 1657 mutex_exit(hashmtx); 1658 1659 vrfy_smp: 1660 if (smp != NULL) { 1661 1662 ASSERT(vp->v_count != 0); 1663 1664 /* 1665 * Get smap lock and recheck its tag. The hash lock 1666 * is dropped since the hash is based on (vp, off) 1667 * and (vp, off) won't change when we have smap mtx. 1668 */ 1669 smapmtx = SMAPMTX(smp); 1670 mutex_enter(smapmtx); 1671 if (smp->sm_vp != vp || smp->sm_off != baseoff) { 1672 mutex_exit(smapmtx); 1673 goto retry_hash; 1674 } 1675 1676 if (smp->sm_refcnt == 0) { 1677 1678 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++; 1679 1680 /* 1681 * Could still be on the free list. However, this 1682 * could also be an smp that is transitioning from 1683 * the free list when we have too much contention 1684 * for the smapmtx's. In this case, we have an 1685 * unlocked smp that is not on the free list any 1686 * longer, but still has a 0 refcnt. The only way 1687 * to be sure is to check the freelist pointers. 1688 * Since we now have the smapmtx, we are guaranteed 1689 * that the (vp, off) won't change, so we are safe 1690 * to reclaim it. get_free_smp() knows that this 1691 * can happen, and it will check the refcnt. 1692 */ 1693 1694 if ((smp->sm_next != NULL)) { 1695 struct sm_freeq *freeq; 1696 1697 ASSERT(smp->sm_prev != NULL); 1698 sm = &smd_free[smp->sm_free_ndx]; 1699 1700 if (smp->sm_flags & SM_QNDX_ZERO) 1701 freeq = &sm->sm_freeq[0]; 1702 else 1703 freeq = &sm->sm_freeq[1]; 1704 1705 mutex_enter(&freeq->smq_mtx); 1706 if (freeq->smq_free != smp) { 1707 /* 1708 * fastpath normal case 1709 */ 1710 smp->sm_prev->sm_next = smp->sm_next; 1711 smp->sm_next->sm_prev = smp->sm_prev; 1712 } else if (smp == smp->sm_next) { 1713 /* 1714 * Taking the last smap on freelist 1715 */ 1716 freeq->smq_free = NULL; 1717 } else { 1718 /* 1719 * Reclaiming 1st smap on list 1720 */ 1721 freeq->smq_free = smp->sm_next; 1722 smp->sm_prev->sm_next = smp->sm_next; 1723 smp->sm_next->sm_prev = smp->sm_prev; 1724 } 1725 mutex_exit(&freeq->smq_mtx); 1726 smp->sm_prev = smp->sm_next = NULL; 1727 } else { 1728 ASSERT(smp->sm_prev == NULL); 1729 segmapcnt.smp_stolen.value.ul++; 1730 } 1731 1732 } else { 1733 segmapcnt.smp_get_use.value.ul++; 1734 } 1735 smp->sm_refcnt++; /* another user */ 1736 1737 /* 1738 * We don't invoke segmap_fault via TLB miss, so we set ref 1739 * and mod bits in advance. For S_OTHER we set them in 1740 * segmap_fault F_SOFTUNLOCK. 1741 */ 1742 if (is_kpm) { 1743 if (rw == S_WRITE) { 1744 smp->sm_flags |= SM_WRITE_DATA; 1745 } else if (rw == S_READ) { 1746 smp->sm_flags |= SM_READ_DATA; 1747 } 1748 } 1749 mutex_exit(smapmtx); 1750 1751 newslot = 0; 1752 } else { 1753 1754 uint32_t free_ndx, *free_ndxp; 1755 union segmap_cpu *scpu; 1756 1757 /* 1758 * On a PAC machine or a machine with anti-alias 1759 * hardware, smd_colormsk will be zero. 1760 * 1761 * On a VAC machine- pick color by offset in the file 1762 * so we won't get VAC conflicts on elf files. 1763 * On data files, color does not matter but we 1764 * don't know what kind of file it is so we always 1765 * pick color by offset. This causes color 1766 * corresponding to file offset zero to be used more 1767 * heavily. 1768 */ 1769 color = (baseoff >> MAXBSHIFT) & smd_colormsk; 1770 scpu = smd_cpu+CPU->cpu_seqid; 1771 free_ndxp = &scpu->scpu.scpu_free_ndx[color]; 1772 free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk; 1773 #ifdef DEBUG 1774 colors_used[free_ndx]++; 1775 #endif /* DEBUG */ 1776 1777 /* 1778 * Get a locked smp slot from the free list. 1779 */ 1780 smp = get_free_smp(free_ndx); 1781 smapmtx = SMAPMTX(smp); 1782 1783 ASSERT(smp->sm_vp == NULL); 1784 1785 if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) { 1786 /* 1787 * Failed to hashin, there exists one now. 1788 * Return the smp we just allocated. 1789 */ 1790 segmap_smapadd(smp); 1791 mutex_exit(smapmtx); 1792 1793 smp = nsmp; 1794 goto vrfy_smp; 1795 } 1796 smp->sm_refcnt++; /* another user */ 1797 1798 /* 1799 * We don't invoke segmap_fault via TLB miss, so we set ref 1800 * and mod bits in advance. For S_OTHER we set them in 1801 * segmap_fault F_SOFTUNLOCK. 1802 */ 1803 if (is_kpm) { 1804 if (rw == S_WRITE) { 1805 smp->sm_flags |= SM_WRITE_DATA; 1806 } else if (rw == S_READ) { 1807 smp->sm_flags |= SM_READ_DATA; 1808 } 1809 } 1810 mutex_exit(smapmtx); 1811 1812 newslot = 1; 1813 } 1814 1815 if (!is_kpm) 1816 goto use_segmap_range; 1817 1818 /* 1819 * Use segkpm 1820 */ 1821 ASSERT(PAGESIZE == MAXBSIZE); 1822 1823 /* 1824 * remember the last smp faulted on this cpu. 1825 */ 1826 (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp; 1827 1828 if (forcefault == SM_PAGECREATE) { 1829 baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw); 1830 return (baseaddr); 1831 } 1832 1833 if (newslot == 0 && 1834 (pp = GET_KPME(smp)->kpe_page) != NULL) { 1835 1836 /* fastpath */ 1837 switch (rw) { 1838 case S_READ: 1839 case S_WRITE: 1840 if (page_trylock(pp, SE_SHARED)) { 1841 if (PP_ISFREE(pp) || 1842 !(pp->p_vnode == vp && 1843 pp->p_offset == baseoff)) { 1844 page_unlock(pp); 1845 pp = page_lookup(vp, baseoff, 1846 SE_SHARED); 1847 } 1848 } else { 1849 pp = page_lookup(vp, baseoff, SE_SHARED); 1850 } 1851 1852 if (pp == NULL) { 1853 ASSERT(GET_KPME(smp)->kpe_page == NULL); 1854 break; 1855 } 1856 1857 if (rw == S_WRITE && 1858 hat_page_getattr(pp, P_MOD | P_REF) != 1859 (P_MOD | P_REF)) { 1860 page_unlock(pp); 1861 break; 1862 } 1863 1864 /* 1865 * We have the p_selock as reader, grab_smp 1866 * can't hit us, we have bumped the smap 1867 * refcnt and hat_pageunload needs the 1868 * p_selock exclusive. 1869 */ 1870 kpme = GET_KPME(smp); 1871 if (kpme->kpe_page == pp) { 1872 baseaddr = hat_kpm_page2va(pp, 0); 1873 } else if (kpme->kpe_page == NULL) { 1874 baseaddr = hat_kpm_mapin(pp, kpme); 1875 } else { 1876 panic("segmap_getmapflt: stale " 1877 "kpme page, kpme %p", (void *)kpme); 1878 /*NOTREACHED*/ 1879 } 1880 1881 /* 1882 * We don't invoke segmap_fault via TLB miss, 1883 * so we set ref and mod bits in advance. 1884 * For S_OTHER and we set them in segmap_fault 1885 * F_SOFTUNLOCK. 1886 */ 1887 if (rw == S_READ && !hat_isref(pp)) 1888 hat_setref(pp); 1889 1890 return (baseaddr); 1891 default: 1892 break; 1893 } 1894 } 1895 1896 base = segkpm_create_va(baseoff); 1897 error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE, 1898 seg, base, rw, CRED()); 1899 1900 pp = pl[0]; 1901 if (error || pp == NULL) { 1902 /* 1903 * Use segmap address slot and let segmap_fault deal 1904 * with the error cases. There is no error return 1905 * possible here. 1906 */ 1907 goto use_segmap_range; 1908 } 1909 1910 ASSERT(pl[1] == NULL); 1911 1912 /* 1913 * When prot is not returned w/ PROT_ALL the returned pages 1914 * are not backed by fs blocks. For most of the segmap users 1915 * this is no problem, they don't write to the pages in the 1916 * same request and therefore don't rely on a following 1917 * trap driven segmap_fault. With SM_LOCKPROTO users it 1918 * is more secure to use segkmap adresses to allow 1919 * protection segmap_fault's. 1920 */ 1921 if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) { 1922 /* 1923 * Use segmap address slot and let segmap_fault 1924 * do the error return. 1925 */ 1926 ASSERT(rw != S_WRITE); 1927 ASSERT(PAGE_LOCKED(pp)); 1928 page_unlock(pp); 1929 forcefault = 0; 1930 goto use_segmap_range; 1931 } 1932 1933 /* 1934 * We have the p_selock as reader, grab_smp can't hit us, we 1935 * have bumped the smap refcnt and hat_pageunload needs the 1936 * p_selock exclusive. 1937 */ 1938 kpme = GET_KPME(smp); 1939 if (kpme->kpe_page == pp) { 1940 baseaddr = hat_kpm_page2va(pp, 0); 1941 } else if (kpme->kpe_page == NULL) { 1942 baseaddr = hat_kpm_mapin(pp, kpme); 1943 } else { 1944 panic("segmap_getmapflt: stale kpme page after " 1945 "VOP_GETPAGE, kpme %p", (void *)kpme); 1946 /*NOTREACHED*/ 1947 } 1948 1949 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++; 1950 1951 return (baseaddr); 1952 1953 1954 use_segmap_range: 1955 baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE); 1956 TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP, 1957 "segmap_getmap:seg %p addr %p vp %p offset %llx", 1958 seg, baseaddr, vp, baseoff); 1959 1960 /* 1961 * Prefault the translations 1962 */ 1963 vaddr = baseaddr + (off - baseoff); 1964 if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) { 1965 1966 caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr & 1967 (uintptr_t)PAGEMASK); 1968 1969 (void) segmap_fault(kas.a_hat, seg, pgaddr, 1970 (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK, 1971 F_INVAL, rw); 1972 } 1973 1974 return (baseaddr); 1975 } 1976 1977 int 1978 segmap_release(struct seg *seg, caddr_t addr, uint_t flags) 1979 { 1980 struct smap *smp; 1981 int error; 1982 int bflags = 0; 1983 struct vnode *vp; 1984 u_offset_t offset; 1985 kmutex_t *smtx; 1986 int is_kpm = 0; 1987 page_t *pp; 1988 1989 if (segmap_kpm && IS_KPM_ADDR(addr)) { 1990 1991 if (((uintptr_t)addr & MAXBOFFSET) != 0) { 1992 panic("segmap_release: addr %p not " 1993 "MAXBSIZE aligned", (void *)addr); 1994 /*NOTREACHED*/ 1995 } 1996 1997 if ((smp = get_smap_kpm(addr, &pp)) == NULL) { 1998 panic("segmap_release: smap not found " 1999 "for addr %p", (void *)addr); 2000 /*NOTREACHED*/ 2001 } 2002 2003 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 2004 "segmap_relmap:seg %p addr %p smp %p", 2005 seg, addr, smp); 2006 2007 smtx = SMAPMTX(smp); 2008 2009 /* 2010 * For compatibilty reasons segmap_pagecreate_kpm sets this 2011 * flag to allow a following segmap_pagecreate to return 2012 * this as "newpage" flag. When segmap_pagecreate is not 2013 * called at all we clear it now. 2014 */ 2015 smp->sm_flags &= ~SM_KPM_NEWPAGE; 2016 is_kpm = 1; 2017 if (smp->sm_flags & SM_WRITE_DATA) { 2018 hat_setrefmod(pp); 2019 } else if (smp->sm_flags & SM_READ_DATA) { 2020 hat_setref(pp); 2021 } 2022 } else { 2023 if (addr < seg->s_base || addr >= seg->s_base + seg->s_size || 2024 ((uintptr_t)addr & MAXBOFFSET) != 0) { 2025 panic("segmap_release: bad addr %p", (void *)addr); 2026 /*NOTREACHED*/ 2027 } 2028 smp = GET_SMAP(seg, addr); 2029 2030 TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP, 2031 "segmap_relmap:seg %p addr %p smp %p", 2032 seg, addr, smp); 2033 2034 smtx = SMAPMTX(smp); 2035 mutex_enter(smtx); 2036 smp->sm_flags |= SM_NOTKPM_RELEASED; 2037 } 2038 2039 ASSERT(smp->sm_refcnt > 0); 2040 2041 /* 2042 * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2043 * are set. 2044 */ 2045 if ((flags & ~SM_DONTNEED) != 0) { 2046 if (flags & SM_WRITE) 2047 segmapcnt.smp_rel_write.value.ul++; 2048 if (flags & SM_ASYNC) { 2049 bflags |= B_ASYNC; 2050 segmapcnt.smp_rel_async.value.ul++; 2051 } 2052 if (flags & SM_INVAL) { 2053 bflags |= B_INVAL; 2054 segmapcnt.smp_rel_abort.value.ul++; 2055 } 2056 if (flags & SM_DESTROY) { 2057 bflags |= (B_INVAL|B_TRUNC); 2058 segmapcnt.smp_rel_abort.value.ul++; 2059 } 2060 if (smp->sm_refcnt == 1) { 2061 /* 2062 * We only bother doing the FREE and DONTNEED flags 2063 * if no one else is still referencing this mapping. 2064 */ 2065 if (flags & SM_FREE) { 2066 bflags |= B_FREE; 2067 segmapcnt.smp_rel_free.value.ul++; 2068 } 2069 if (flags & SM_DONTNEED) { 2070 bflags |= B_DONTNEED; 2071 segmapcnt.smp_rel_dontneed.value.ul++; 2072 } 2073 } 2074 } else { 2075 smd_cpu[CPU->cpu_seqid].scpu.scpu_release++; 2076 } 2077 2078 vp = smp->sm_vp; 2079 offset = smp->sm_off; 2080 2081 if (--smp->sm_refcnt == 0) { 2082 2083 smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA); 2084 2085 if (flags & (SM_INVAL|SM_DESTROY)) { 2086 segmap_hashout(smp); /* remove map info */ 2087 if (is_kpm) { 2088 hat_kpm_mapout(pp, GET_KPME(smp), addr); 2089 if (smp->sm_flags & SM_NOTKPM_RELEASED) { 2090 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2091 hat_unload(kas.a_hat, addr, MAXBSIZE, 2092 HAT_UNLOAD); 2093 } 2094 2095 } else { 2096 if (segmap_kpm) 2097 segkpm_mapout_validkpme(GET_KPME(smp)); 2098 2099 smp->sm_flags &= ~SM_NOTKPM_RELEASED; 2100 hat_unload(kas.a_hat, addr, MAXBSIZE, 2101 HAT_UNLOAD); 2102 } 2103 } 2104 segmap_smapadd(smp); /* add to free list */ 2105 } 2106 2107 mutex_exit(smtx); 2108 2109 if (is_kpm) 2110 page_unlock(pp); 2111 /* 2112 * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED) 2113 * are set. 2114 */ 2115 if ((flags & ~SM_DONTNEED) != 0) { 2116 error = VOP_PUTPAGE(vp, offset, MAXBSIZE, 2117 bflags, CRED()); 2118 } else { 2119 error = 0; 2120 } 2121 2122 return (error); 2123 } 2124 2125 /* 2126 * Dump the pages belonging to this segmap segment. 2127 */ 2128 static void 2129 segmap_dump(struct seg *seg) 2130 { 2131 struct segmap_data *smd; 2132 struct smap *smp, *smp_end; 2133 page_t *pp; 2134 pfn_t pfn; 2135 u_offset_t off; 2136 caddr_t addr; 2137 2138 smd = (struct segmap_data *)seg->s_data; 2139 addr = seg->s_base; 2140 for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages; 2141 smp < smp_end; smp++) { 2142 2143 if (smp->sm_refcnt) { 2144 for (off = 0; off < MAXBSIZE; off += PAGESIZE) { 2145 int we_own_it = 0; 2146 2147 /* 2148 * If pp == NULL, the page either does 2149 * not exist or is exclusively locked. 2150 * So determine if it exists before 2151 * searching for it. 2152 */ 2153 if ((pp = page_lookup_nowait(smp->sm_vp, 2154 smp->sm_off + off, SE_SHARED))) 2155 we_own_it = 1; 2156 else 2157 pp = page_exists(smp->sm_vp, 2158 smp->sm_off + off); 2159 2160 if (pp) { 2161 pfn = page_pptonum(pp); 2162 dump_addpage(seg->s_as, 2163 addr + off, pfn); 2164 if (we_own_it) 2165 page_unlock(pp); 2166 } 2167 dump_timeleft = dump_timeout; 2168 } 2169 } 2170 addr += MAXBSIZE; 2171 } 2172 } 2173 2174 /*ARGSUSED*/ 2175 static int 2176 segmap_pagelock(struct seg *seg, caddr_t addr, size_t len, 2177 struct page ***ppp, enum lock_type type, enum seg_rw rw) 2178 { 2179 return (ENOTSUP); 2180 } 2181 2182 static int 2183 segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 2184 { 2185 struct segmap_data *smd = (struct segmap_data *)seg->s_data; 2186 2187 memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp; 2188 memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base); 2189 return (0); 2190 } 2191 2192 /*ARGSUSED*/ 2193 static lgrp_mem_policy_info_t * 2194 segmap_getpolicy(struct seg *seg, caddr_t addr) 2195 { 2196 return (NULL); 2197 } 2198 2199 /*ARGSUSED*/ 2200 static int 2201 segmap_capable(struct seg *seg, segcapability_t capability) 2202 { 2203 return (0); 2204 } 2205 2206 2207 #ifdef SEGKPM_SUPPORT 2208 2209 /* 2210 * segkpm support routines 2211 */ 2212 2213 static caddr_t 2214 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2215 struct smap *smp, enum seg_rw rw) 2216 { 2217 caddr_t base; 2218 page_t *pp; 2219 int newpage = 0; 2220 struct kpme *kpme; 2221 2222 ASSERT(smp->sm_refcnt > 0); 2223 2224 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 2225 kmutex_t *smtx; 2226 2227 base = segkpm_create_va(off); 2228 2229 if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT, 2230 seg, base)) == NULL) { 2231 panic("segmap_pagecreate_kpm: " 2232 "page_create failed"); 2233 /*NOTREACHED*/ 2234 } 2235 2236 newpage = 1; 2237 page_io_unlock(pp); 2238 ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX); 2239 2240 /* 2241 * Mark this here until the following segmap_pagecreate 2242 * or segmap_release. 2243 */ 2244 smtx = SMAPMTX(smp); 2245 mutex_enter(smtx); 2246 smp->sm_flags |= SM_KPM_NEWPAGE; 2247 mutex_exit(smtx); 2248 } 2249 2250 kpme = GET_KPME(smp); 2251 if (!newpage && kpme->kpe_page == pp) 2252 base = hat_kpm_page2va(pp, 0); 2253 else 2254 base = hat_kpm_mapin(pp, kpme); 2255 2256 /* 2257 * FS code may decide not to call segmap_pagecreate and we 2258 * don't invoke segmap_fault via TLB miss, so we have to set 2259 * ref and mod bits in advance. 2260 */ 2261 if (rw == S_WRITE) { 2262 hat_setrefmod(pp); 2263 } else { 2264 ASSERT(rw == S_READ); 2265 hat_setref(pp); 2266 } 2267 2268 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++; 2269 2270 return (base); 2271 } 2272 2273 /* 2274 * Find the smap structure corresponding to the 2275 * KPM addr and return it locked. 2276 */ 2277 struct smap * 2278 get_smap_kpm(caddr_t addr, page_t **ppp) 2279 { 2280 struct smap *smp; 2281 struct vnode *vp; 2282 u_offset_t offset; 2283 caddr_t baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK); 2284 int hashid; 2285 kmutex_t *hashmtx; 2286 page_t *pp; 2287 union segmap_cpu *scpu; 2288 2289 pp = hat_kpm_vaddr2page(baseaddr); 2290 2291 ASSERT(pp && !PP_ISFREE(pp)); 2292 ASSERT(PAGE_LOCKED(pp)); 2293 ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0); 2294 2295 vp = pp->p_vnode; 2296 offset = pp->p_offset; 2297 ASSERT(vp != NULL); 2298 2299 /* 2300 * Assume the last smap used on this cpu is the one needed. 2301 */ 2302 scpu = smd_cpu+CPU->cpu_seqid; 2303 smp = scpu->scpu.scpu_last_smap; 2304 mutex_enter(&smp->sm_mtx); 2305 if (smp->sm_vp == vp && smp->sm_off == offset) { 2306 ASSERT(smp->sm_refcnt > 0); 2307 } else { 2308 /* 2309 * Assumption wrong, find the smap on the hash chain. 2310 */ 2311 mutex_exit(&smp->sm_mtx); 2312 SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */ 2313 hashmtx = SHASHMTX(hashid); 2314 2315 mutex_enter(hashmtx); 2316 smp = smd_hash[hashid].sh_hash_list; 2317 for (; smp != NULL; smp = smp->sm_hash) { 2318 if (smp->sm_vp == vp && smp->sm_off == offset) 2319 break; 2320 } 2321 mutex_exit(hashmtx); 2322 if (smp) { 2323 mutex_enter(&smp->sm_mtx); 2324 ASSERT(smp->sm_vp == vp && smp->sm_off == offset); 2325 } 2326 } 2327 2328 if (ppp) 2329 *ppp = smp ? pp : NULL; 2330 2331 return (smp); 2332 } 2333 2334 #else /* SEGKPM_SUPPORT */ 2335 2336 /* segkpm stubs */ 2337 2338 /*ARGSUSED*/ 2339 static caddr_t 2340 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off, 2341 struct smap *smp, enum seg_rw rw) 2342 { 2343 return (NULL); 2344 } 2345 2346 /*ARGSUSED*/ 2347 struct smap * 2348 get_smap_kpm(caddr_t addr, page_t **ppp) 2349 { 2350 return (NULL); 2351 } 2352 2353 #endif /* SEGKPM_SUPPORT */ 2354