1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * University Copyright- Copyright (c) 1982, 1986, 1988 32 * The Regents of the University of California 33 * All Rights Reserved 34 * 35 * University Acknowledgment- Portions of this document are derived from 36 * software developed by the University of California, Berkeley, and its 37 * contributors. 38 */ 39 40 #pragma ident "%Z%%M% %I% %E% SMI" 41 42 /* 43 * VM - segment management. 44 */ 45 46 #include <sys/types.h> 47 #include <sys/inttypes.h> 48 #include <sys/t_lock.h> 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/kmem.h> 52 #include <sys/vmsystm.h> 53 #include <sys/debug.h> 54 #include <sys/cmn_err.h> 55 #include <sys/callb.h> 56 #include <sys/mem_config.h> 57 58 #include <vm/hat.h> 59 #include <vm/as.h> 60 #include <vm/seg.h> 61 #include <vm/seg_kmem.h> 62 63 /* 64 * kstats for segment advise 65 */ 66 segadvstat_t segadvstat = { 67 { "MADV_FREE_hit", KSTAT_DATA_ULONG }, 68 { "MADV_FREE_miss", KSTAT_DATA_ULONG }, 69 }; 70 71 kstat_named_t *segadvstat_ptr = (kstat_named_t *)&segadvstat; 72 uint_t segadvstat_ndata = sizeof (segadvstat) / sizeof (kstat_named_t); 73 74 /* #define PDEBUG */ 75 #if defined(PDEBUG) || defined(lint) || defined(__lint) 76 int pdebug = 0; 77 #else 78 #define pdebug 0 79 #endif /* PDEBUG */ 80 81 #define PPRINTF if (pdebug) printf 82 #define PPRINT(x) PPRINTF(x) 83 #define PPRINT1(x, a) PPRINTF(x, a) 84 #define PPRINT2(x, a, b) PPRINTF(x, a, b) 85 #define PPRINT3(x, a, b, c) PPRINTF(x, a, b, c) 86 #define PPRINT4(x, a, b, c, d) PPRINTF(x, a, b, c, d) 87 #define PPRINT5(x, a, b, c, d, e) PPRINTF(x, a, b, c, d, e) 88 89 #define P_HASHMASK (p_hashsize - 1) 90 #define P_BASESHIFT 6 91 92 /* 93 * entry in the segment page cache 94 */ 95 struct seg_pcache { 96 struct seg_pcache *p_hnext; /* list for hashed blocks */ 97 struct seg_pcache *p_hprev; 98 int p_active; /* active count */ 99 int p_ref; /* ref bit */ 100 size_t p_len; /* segment length */ 101 caddr_t p_addr; /* base address */ 102 struct seg *p_seg; /* segment */ 103 struct page **p_pp; /* pp shadow list */ 104 enum seg_rw p_rw; /* rw */ 105 uint_t p_flags; /* bit flags */ 106 int (*p_callback)(struct seg *, caddr_t, size_t, 107 struct page **, enum seg_rw); 108 }; 109 110 struct seg_phash { 111 struct seg_pcache *p_hnext; /* list for hashed blocks */ 112 struct seg_pcache *p_hprev; 113 int p_qlen; /* Q length */ 114 kmutex_t p_hmutex; /* protects hash bucket */ 115 }; 116 117 static int seg_preap_time = 20; /* reclaim every 20 secs */ 118 static int seg_pmaxqlen = 5; /* max Q length in hash list */ 119 static int seg_ppcount = 5; /* max # of purges per reclaim interval */ 120 static int seg_plazy = 1; /* if 1, pages are cached after pageunlock */ 121 static pgcnt_t seg_pwindow; /* max # of pages that can be cached */ 122 static pgcnt_t seg_plocked; /* # of pages which are cached by pagelock */ 123 static pgcnt_t seg_plocked_window; /* # pages from window */ 124 int seg_preapahead; 125 126 static uint_t seg_pdisable = 0; /* if not 0, caching temporarily disabled */ 127 128 static int seg_pupdate_active = 1; /* background reclaim thread */ 129 static clock_t seg_preap_interval; /* reap interval in ticks */ 130 131 static kmutex_t seg_pcache; /* protects the whole pagelock cache */ 132 static kmutex_t seg_pmem; /* protects window counter */ 133 static ksema_t seg_psaync_sem; /* sema for reclaim thread */ 134 static struct seg_phash *p_hashtab; 135 static int p_hashsize = 0; 136 137 #define p_hash(seg) \ 138 (P_HASHMASK & \ 139 ((uintptr_t)(seg) >> P_BASESHIFT)) 140 141 #define p_match(pcp, seg, addr, len, rw) \ 142 (((pcp)->p_seg == (seg) && \ 143 (pcp)->p_addr == (addr) && \ 144 (pcp)->p_rw == (rw) && \ 145 (pcp)->p_len == (len)) ? 1 : 0) 146 147 #define p_match_pp(pcp, seg, addr, len, pp, rw) \ 148 (((pcp)->p_seg == (seg) && \ 149 (pcp)->p_addr == (addr) && \ 150 (pcp)->p_pp == (pp) && \ 151 (pcp)->p_rw == (rw) && \ 152 (pcp)->p_len == (len)) ? 1 : 0) 153 154 155 /* 156 * lookup an address range in pagelock cache. Return shadow list 157 * and bump up active count. 158 */ 159 struct page ** 160 seg_plookup(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) 161 { 162 struct seg_pcache *pcp; 163 struct seg_phash *hp; 164 165 /* 166 * Skip pagelock cache, while DR is in progress or 167 * seg_pcache is off. 168 */ 169 if (seg_pdisable || seg_plazy == 0) { 170 return (NULL); 171 } 172 173 hp = &p_hashtab[p_hash(seg)]; 174 mutex_enter(&hp->p_hmutex); 175 for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp; 176 pcp = pcp->p_hnext) { 177 if (p_match(pcp, seg, addr, len, rw)) { 178 pcp->p_active++; 179 mutex_exit(&hp->p_hmutex); 180 181 PPRINT5("seg_plookup hit: seg %p, addr %p, " 182 "len %lx, count %d, pplist %p \n", 183 (void *)seg, (void *)addr, len, pcp->p_active, 184 (void *)pcp->p_pp); 185 186 return (pcp->p_pp); 187 } 188 } 189 mutex_exit(&hp->p_hmutex); 190 191 PPRINT("seg_plookup miss:\n"); 192 193 return (NULL); 194 } 195 196 /* 197 * mark address range inactive. If the cache is off or the address 198 * range is not in the cache we call the segment driver to reclaim 199 * the pages. Otherwise just decrement active count and set ref bit. 200 */ 201 void 202 seg_pinactive(struct seg *seg, caddr_t addr, size_t len, struct page **pp, 203 enum seg_rw rw, int (*callback)(struct seg *, caddr_t, size_t, 204 struct page **, enum seg_rw)) 205 { 206 struct seg_pcache *pcp; 207 struct seg_phash *hp; 208 209 if (seg_plazy == 0) { 210 (void) (*callback)(seg, addr, len, pp, rw); 211 return; 212 } 213 hp = &p_hashtab[p_hash(seg)]; 214 mutex_enter(&hp->p_hmutex); 215 for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp; 216 pcp = pcp->p_hnext) { 217 if (p_match_pp(pcp, seg, addr, len, pp, rw)) { 218 pcp->p_active--; 219 ASSERT(pcp->p_active >= 0); 220 if (pcp->p_active == 0 && seg_pdisable) { 221 int npages; 222 223 ASSERT(callback == pcp->p_callback); 224 /* free the entry */ 225 hp->p_qlen--; 226 pcp->p_hprev->p_hnext = pcp->p_hnext; 227 pcp->p_hnext->p_hprev = pcp->p_hprev; 228 mutex_exit(&hp->p_hmutex); 229 npages = pcp->p_len >> PAGESHIFT; 230 mutex_enter(&seg_pmem); 231 seg_plocked -= npages; 232 if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) { 233 seg_plocked_window -= npages; 234 } 235 mutex_exit(&seg_pmem); 236 kmem_free(pcp, sizeof (struct seg_pcache)); 237 goto out; 238 } 239 pcp->p_ref = 1; 240 mutex_exit(&hp->p_hmutex); 241 return; 242 } 243 } 244 mutex_exit(&hp->p_hmutex); 245 out: 246 (void) (*callback)(seg, addr, len, pp, rw); 247 } 248 249 /* 250 * The seg_pinsert_check() is used by segment drivers to predict whether 251 * a call to seg_pinsert will fail and thereby avoid wasteful pre-processing. 252 */ 253 254 int 255 seg_pinsert_check(struct seg *seg, size_t len, uint_t flags) 256 { 257 struct seg_phash *hp; 258 259 if (seg_plazy == 0) { 260 return (SEGP_FAIL); 261 } 262 if (seg_pdisable != 0) { 263 return (SEGP_FAIL); 264 } 265 ASSERT((len & PAGEOFFSET) == 0); 266 hp = &p_hashtab[p_hash(seg)]; 267 if (hp->p_qlen > seg_pmaxqlen && (flags & SEGP_FORCE_WIRED) == 0) { 268 return (SEGP_FAIL); 269 } 270 /* 271 * If the SEGP_FORCE_WIRED flag is set, 272 * we skip the check for seg_pwindow. 273 */ 274 if ((flags & SEGP_FORCE_WIRED) == 0) { 275 pgcnt_t npages; 276 277 npages = len >> PAGESHIFT; 278 if ((seg_plocked_window + npages) > seg_pwindow) { 279 return (SEGP_FAIL); 280 } 281 } 282 return (SEGP_SUCCESS); 283 } 284 285 286 /* 287 * insert address range with shadow list into pagelock cache. If 288 * the cache is off or caching is temporarily disabled or the allowed 289 * 'window' is exceeded - return SEGP_FAIL. Otherwise return 290 * SEGP_SUCCESS. 291 */ 292 int 293 seg_pinsert(struct seg *seg, caddr_t addr, size_t len, struct page **pp, 294 enum seg_rw rw, uint_t flags, int (*callback)(struct seg *, caddr_t, 295 size_t, struct page **, enum seg_rw)) 296 { 297 struct seg_pcache *pcp; 298 struct seg_phash *hp; 299 pgcnt_t npages; 300 301 if (seg_plazy == 0) { 302 return (SEGP_FAIL); 303 } 304 if (seg_pdisable != 0) { 305 return (SEGP_FAIL); 306 } 307 ASSERT((len & PAGEOFFSET) == 0); 308 hp = &p_hashtab[p_hash(seg)]; 309 if (hp->p_qlen > seg_pmaxqlen && (flags & SEGP_FORCE_WIRED) == 0) { 310 return (SEGP_FAIL); 311 } 312 npages = len >> PAGESHIFT; 313 mutex_enter(&seg_pmem); 314 /* 315 * If the SEGP_FORCE_WIRED flag is set, 316 * we skip the check for seg_pwindow. 317 */ 318 if ((flags & SEGP_FORCE_WIRED) == 0) { 319 seg_plocked_window += npages; 320 if (seg_plocked_window > seg_pwindow) { 321 seg_plocked_window -= npages; 322 mutex_exit(&seg_pmem); 323 return (SEGP_FAIL); 324 } 325 } 326 seg_plocked += npages; 327 mutex_exit(&seg_pmem); 328 329 pcp = kmem_alloc(sizeof (struct seg_pcache), KM_SLEEP); 330 pcp->p_seg = seg; 331 pcp->p_addr = addr; 332 pcp->p_len = len; 333 pcp->p_pp = pp; 334 pcp->p_rw = rw; 335 pcp->p_callback = callback; 336 pcp->p_active = 1; 337 pcp->p_flags = flags; 338 339 PPRINT4("seg_pinsert: seg %p, addr %p, len %lx, pplist %p\n", 340 (void *)seg, (void *)addr, len, (void *)pp); 341 342 hp = &p_hashtab[p_hash(seg)]; 343 mutex_enter(&hp->p_hmutex); 344 hp->p_qlen++; 345 pcp->p_hnext = hp->p_hnext; 346 pcp->p_hprev = (struct seg_pcache *)hp; 347 hp->p_hnext->p_hprev = pcp; 348 hp->p_hnext = pcp; 349 mutex_exit(&hp->p_hmutex); 350 return (SEGP_SUCCESS); 351 } 352 353 /* 354 * purge all entries from the pagelock cache if not active 355 * and not recently used. Drop all locks and call through 356 * the address space into the segment driver to reclaim 357 * the pages. This makes sure we get the address space 358 * and segment driver locking right. 359 */ 360 static void 361 seg_ppurge_all(int force) 362 { 363 struct seg_pcache *delcallb_list = NULL; 364 struct seg_pcache *pcp; 365 struct seg_phash *hp; 366 int purge_count = 0; 367 pgcnt_t npages = 0; 368 pgcnt_t npages_window = 0; 369 370 /* 371 * if the cache if off or empty, return 372 */ 373 if (seg_plazy == 0 || seg_plocked == 0) { 374 return; 375 } 376 for (hp = p_hashtab; hp < &p_hashtab[p_hashsize]; hp++) { 377 mutex_enter(&hp->p_hmutex); 378 pcp = hp->p_hnext; 379 380 /* 381 * While 'force' is set, seg_pasync_thread is not 382 * throttled. This is to speedup flushing of seg_pcache 383 * in preparation for DR. 384 * 385 * In normal case, when 'force' is not set, we throttle 386 * seg_pasync_thread so that we don't spend all the time 387 * time in purging the cache. 388 */ 389 while ((pcp != (struct seg_pcache *)hp) && 390 (force || (purge_count <= seg_ppcount))) { 391 392 /* 393 * purge entries which are not active and 394 * have not been used recently and 395 * have the SEGP_ASYNC_FLUSH flag. 396 * 397 * In the 'force' case, we ignore the 398 * SEGP_ASYNC_FLUSH flag. 399 */ 400 if (!(pcp->p_flags & SEGP_ASYNC_FLUSH)) 401 pcp->p_ref = 1; 402 if (force) 403 pcp->p_ref = 0; 404 if (!pcp->p_ref && !pcp->p_active) { 405 struct as *as = pcp->p_seg->s_as; 406 407 /* 408 * try to get the readers lock on the address 409 * space before taking out the cache element. 410 * This ensures as_pagereclaim() can actually 411 * call through the address space and free 412 * the pages. If we don't get the lock, just 413 * skip this entry. The pages will be reclaimed 414 * by the segment driver at unmap time. 415 */ 416 if (AS_LOCK_TRYENTER(as, &as->a_lock, 417 RW_READER)) { 418 hp->p_qlen--; 419 pcp->p_hprev->p_hnext = pcp->p_hnext; 420 pcp->p_hnext->p_hprev = pcp->p_hprev; 421 pcp->p_hprev = delcallb_list; 422 delcallb_list = pcp; 423 purge_count++; 424 } 425 } else { 426 pcp->p_ref = 0; 427 } 428 pcp = pcp->p_hnext; 429 } 430 mutex_exit(&hp->p_hmutex); 431 if (!force && purge_count > seg_ppcount) 432 break; 433 } 434 435 /* 436 * run the delayed callback list. We don't want to hold the 437 * cache lock during a call through the address space. 438 */ 439 while (delcallb_list != NULL) { 440 struct as *as; 441 442 pcp = delcallb_list; 443 delcallb_list = pcp->p_hprev; 444 as = pcp->p_seg->s_as; 445 446 PPRINT4("seg_ppurge_all: purge seg %p, addr %p, len %lx, " 447 "pplist %p\n", (void *)pcp->p_seg, (void *)pcp->p_addr, 448 pcp->p_len, (void *)pcp->p_pp); 449 450 as_pagereclaim(as, pcp->p_pp, pcp->p_addr, 451 pcp->p_len, pcp->p_rw); 452 AS_LOCK_EXIT(as, &as->a_lock); 453 npages += pcp->p_len >> PAGESHIFT; 454 if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) { 455 npages_window += pcp->p_len >> PAGESHIFT; 456 } 457 kmem_free(pcp, sizeof (struct seg_pcache)); 458 } 459 mutex_enter(&seg_pmem); 460 seg_plocked -= npages; 461 seg_plocked_window -= npages_window; 462 mutex_exit(&seg_pmem); 463 } 464 465 /* 466 * Remove cached pages for segment(s) entries from hashtable. 467 * The segments are identified by a given clients callback 468 * function. 469 * This is useful for multiple seg's cached on behalf of 470 * dummy segment (ISM/DISM) with common callback function. 471 * The clients callback function may return status indicating 472 * that the last seg's entry has been purged. In such a case 473 * the seg_ppurge_seg() stops searching hashtable and exits. 474 * Otherwise all hashtable entries are scanned. 475 */ 476 void 477 seg_ppurge_seg(int (*callback)(struct seg *, caddr_t, size_t, 478 struct page **, enum seg_rw)) 479 { 480 struct seg_pcache *pcp, *npcp; 481 struct seg_phash *hp; 482 pgcnt_t npages = 0; 483 pgcnt_t npages_window = 0; 484 int done = 0; 485 486 /* 487 * if the cache if off or empty, return 488 */ 489 if (seg_plazy == 0 || seg_plocked == 0) { 490 return; 491 } 492 mutex_enter(&seg_pcache); 493 seg_pdisable++; 494 mutex_exit(&seg_pcache); 495 496 for (hp = p_hashtab; hp < &p_hashtab[p_hashsize]; hp++) { 497 498 mutex_enter(&hp->p_hmutex); 499 pcp = hp->p_hnext; 500 while (pcp != (struct seg_pcache *)hp) { 501 502 /* 503 * purge entries which are not active 504 */ 505 npcp = pcp->p_hnext; 506 if (!pcp->p_active && pcp->p_callback == callback) { 507 hp->p_qlen--; 508 pcp->p_hprev->p_hnext = pcp->p_hnext; 509 pcp->p_hnext->p_hprev = pcp->p_hprev; 510 511 if ((*pcp->p_callback)(pcp->p_seg, pcp->p_addr, 512 pcp->p_len, pcp->p_pp, pcp->p_rw)) { 513 done = 1; 514 } 515 516 npages += pcp->p_len >> PAGESHIFT; 517 if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) { 518 npages_window += 519 pcp->p_len >> PAGESHIFT; 520 } 521 kmem_free(pcp, sizeof (struct seg_pcache)); 522 } 523 pcp = npcp; 524 if (done) 525 break; 526 } 527 mutex_exit(&hp->p_hmutex); 528 if (done) 529 break; 530 } 531 532 mutex_enter(&seg_pcache); 533 seg_pdisable--; 534 mutex_exit(&seg_pcache); 535 536 mutex_enter(&seg_pmem); 537 seg_plocked -= npages; 538 seg_plocked_window -= npages_window; 539 mutex_exit(&seg_pmem); 540 } 541 542 /* 543 * purge all entries for a given segment. Since we 544 * callback into the segment driver directly for page 545 * reclaim the caller needs to hold the right locks. 546 */ 547 void 548 seg_ppurge(struct seg *seg) 549 { 550 struct seg_pcache *delcallb_list = NULL; 551 struct seg_pcache *pcp; 552 struct seg_phash *hp; 553 pgcnt_t npages = 0; 554 pgcnt_t npages_window = 0; 555 556 if (seg_plazy == 0) { 557 return; 558 } 559 hp = &p_hashtab[p_hash(seg)]; 560 mutex_enter(&hp->p_hmutex); 561 pcp = hp->p_hnext; 562 while (pcp != (struct seg_pcache *)hp) { 563 if (pcp->p_seg == seg) { 564 if (pcp->p_active) { 565 break; 566 } 567 hp->p_qlen--; 568 pcp->p_hprev->p_hnext = pcp->p_hnext; 569 pcp->p_hnext->p_hprev = pcp->p_hprev; 570 pcp->p_hprev = delcallb_list; 571 delcallb_list = pcp; 572 } 573 pcp = pcp->p_hnext; 574 } 575 mutex_exit(&hp->p_hmutex); 576 while (delcallb_list != NULL) { 577 pcp = delcallb_list; 578 delcallb_list = pcp->p_hprev; 579 580 PPRINT4("seg_ppurge: purge seg %p, addr %p, len %lx, " 581 "pplist %p\n", (void *)seg, (void *)pcp->p_addr, 582 pcp->p_len, (void *)pcp->p_pp); 583 584 ASSERT(seg == pcp->p_seg); 585 (void) (*pcp->p_callback)(seg, pcp->p_addr, 586 pcp->p_len, pcp->p_pp, pcp->p_rw); 587 npages += pcp->p_len >> PAGESHIFT; 588 if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) { 589 npages_window += pcp->p_len >> PAGESHIFT; 590 } 591 kmem_free(pcp, sizeof (struct seg_pcache)); 592 } 593 mutex_enter(&seg_pmem); 594 seg_plocked -= npages; 595 seg_plocked_window -= npages_window; 596 mutex_exit(&seg_pmem); 597 } 598 599 static void seg_pinit_mem_config(void); 600 601 /* 602 * setup the pagelock cache 603 */ 604 static void 605 seg_pinit(void) 606 { 607 struct seg_phash *hp; 608 int i; 609 uint_t physmegs; 610 611 sema_init(&seg_psaync_sem, 0, NULL, SEMA_DEFAULT, NULL); 612 613 mutex_enter(&seg_pcache); 614 if (p_hashtab == NULL) { 615 physmegs = physmem >> (20 - PAGESHIFT); 616 617 /* If p_hashsize was not set in /etc/system ... */ 618 if (p_hashsize == 0) { 619 /* 620 * Choose p_hashsize based on physmem. 621 */ 622 if (physmegs < 64) { 623 p_hashsize = 64; 624 } else if (physmegs < 1024) { 625 p_hashsize = 1024; 626 } else if (physmegs < 10 * 1024) { 627 p_hashsize = 8192; 628 } else if (physmegs < 20 * 1024) { 629 p_hashsize = 2 * 8192; 630 seg_pmaxqlen = 16; 631 } else { 632 p_hashsize = 128 * 1024; 633 seg_pmaxqlen = 128; 634 } 635 } 636 637 p_hashtab = kmem_zalloc( 638 p_hashsize * sizeof (struct seg_phash), KM_SLEEP); 639 for (i = 0; i < p_hashsize; i++) { 640 hp = (struct seg_phash *)&p_hashtab[i]; 641 hp->p_hnext = (struct seg_pcache *)hp; 642 hp->p_hprev = (struct seg_pcache *)hp; 643 mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL); 644 } 645 if (seg_pwindow == 0) { 646 if (physmegs < 24) { 647 /* don't use cache */ 648 seg_plazy = 0; 649 } else if (physmegs < 64) { 650 seg_pwindow = physmem >> 5; /* 3% of memory */ 651 } else if (physmegs < 10 * 1024) { 652 seg_pwindow = physmem >> 3; /* 12% of memory */ 653 } else { 654 seg_pwindow = physmem >> 1; 655 } 656 } 657 } 658 mutex_exit(&seg_pcache); 659 660 seg_pinit_mem_config(); 661 } 662 663 /* 664 * called by pageout if memory is low 665 */ 666 void 667 seg_preap(void) 668 { 669 /* 670 * if the cache if off or empty, return 671 */ 672 if (seg_plocked == 0 || seg_plazy == 0) { 673 return; 674 } 675 sema_v(&seg_psaync_sem); 676 } 677 678 static void seg_pupdate(void *); 679 680 /* 681 * run as a backgroud thread and reclaim pagelock 682 * pages which have not been used recently 683 */ 684 void 685 seg_pasync_thread(void) 686 { 687 callb_cpr_t cpr_info; 688 kmutex_t pasync_lock; /* just for CPR stuff */ 689 690 mutex_init(&pasync_lock, NULL, MUTEX_DEFAULT, NULL); 691 692 CALLB_CPR_INIT(&cpr_info, &pasync_lock, 693 callb_generic_cpr, "seg_pasync"); 694 695 if (seg_preap_interval == 0) { 696 seg_preap_interval = seg_preap_time * hz; 697 } else { 698 seg_preap_interval *= hz; 699 } 700 if (seg_plazy && seg_pupdate_active) { 701 (void) timeout(seg_pupdate, NULL, seg_preap_interval); 702 } 703 704 for (;;) { 705 mutex_enter(&pasync_lock); 706 CALLB_CPR_SAFE_BEGIN(&cpr_info); 707 mutex_exit(&pasync_lock); 708 sema_p(&seg_psaync_sem); 709 mutex_enter(&pasync_lock); 710 CALLB_CPR_SAFE_END(&cpr_info, &pasync_lock); 711 mutex_exit(&pasync_lock); 712 713 seg_ppurge_all(0); 714 } 715 } 716 717 static void 718 seg_pupdate(void *dummy) 719 { 720 sema_v(&seg_psaync_sem); 721 722 if (seg_plazy && seg_pupdate_active) { 723 (void) timeout(seg_pupdate, dummy, seg_preap_interval); 724 } 725 } 726 727 static struct kmem_cache *seg_cache; 728 729 /* 730 * Initialize segment management data structures. 731 */ 732 void 733 seg_init(void) 734 { 735 kstat_t *ksp; 736 737 seg_cache = kmem_cache_create("seg_cache", sizeof (struct seg), 738 0, NULL, NULL, NULL, NULL, NULL, 0); 739 740 ksp = kstat_create("unix", 0, "segadvstat", "vm", KSTAT_TYPE_NAMED, 741 segadvstat_ndata, KSTAT_FLAG_VIRTUAL); 742 if (ksp) { 743 ksp->ks_data = (void *)segadvstat_ptr; 744 kstat_install(ksp); 745 } 746 747 seg_pinit(); 748 } 749 750 /* 751 * Allocate a segment to cover [base, base+size] 752 * and attach it to the specified address space. 753 */ 754 struct seg * 755 seg_alloc(struct as *as, caddr_t base, size_t size) 756 { 757 struct seg *new; 758 caddr_t segbase; 759 size_t segsize; 760 761 segbase = (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK); 762 segsize = (((uintptr_t)(base + size) + PAGEOFFSET) & PAGEMASK) - 763 (uintptr_t)segbase; 764 765 if (!valid_va_range(&segbase, &segsize, segsize, AH_LO)) 766 return ((struct seg *)NULL); /* bad virtual addr range */ 767 768 if (as != &kas && 769 valid_usr_range(segbase, segsize, 0, as, 770 as->a_userlimit) != RANGE_OKAY) 771 return ((struct seg *)NULL); /* bad virtual addr range */ 772 773 new = kmem_cache_alloc(seg_cache, KM_SLEEP); 774 new->s_ops = NULL; 775 new->s_data = NULL; 776 new->s_szc = 0; 777 new->s_flags = 0; 778 if (seg_attach(as, segbase, segsize, new) < 0) { 779 kmem_cache_free(seg_cache, new); 780 return ((struct seg *)NULL); 781 } 782 /* caller must fill in ops, data */ 783 return (new); 784 } 785 786 /* 787 * Attach a segment to the address space. Used by seg_alloc() 788 * and for kernel startup to attach to static segments. 789 */ 790 int 791 seg_attach(struct as *as, caddr_t base, size_t size, struct seg *seg) 792 { 793 seg->s_as = as; 794 seg->s_base = base; 795 seg->s_size = size; 796 797 /* 798 * as_addseg() will add the segment at the appropraite point 799 * in the list. It will return -1 if there is overlap with 800 * an already existing segment. 801 */ 802 return (as_addseg(as, seg)); 803 } 804 805 /* 806 * Unmap a segment and free it from its associated address space. 807 * This should be called by anybody who's finished with a whole segment's 808 * mapping. Just calls SEGOP_UNMAP() on the whole mapping . It is the 809 * responsibility of the segment driver to unlink the the segment 810 * from the address space, and to free public and private data structures 811 * associated with the segment. (This is typically done by a call to 812 * seg_free()). 813 */ 814 void 815 seg_unmap(struct seg *seg) 816 { 817 #ifdef DEBUG 818 int ret; 819 #endif /* DEBUG */ 820 821 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 822 823 /* Shouldn't have called seg_unmap if mapping isn't yet established */ 824 ASSERT(seg->s_data != NULL); 825 826 /* Unmap the whole mapping */ 827 #ifdef DEBUG 828 ret = SEGOP_UNMAP(seg, seg->s_base, seg->s_size); 829 ASSERT(ret == 0); 830 #else 831 SEGOP_UNMAP(seg, seg->s_base, seg->s_size); 832 #endif /* DEBUG */ 833 } 834 835 /* 836 * Free the segment from its associated as. This should only be called 837 * if a mapping to the segment has not yet been established (e.g., if 838 * an error occurs in the middle of doing an as_map when the segment 839 * has already been partially set up) or if it has already been deleted 840 * (e.g., from a segment driver unmap routine if the unmap applies to the 841 * entire segment). If the mapping is currently set up then seg_unmap() should 842 * be called instead. 843 */ 844 void 845 seg_free(struct seg *seg) 846 { 847 register struct as *as = seg->s_as; 848 struct seg *tseg = as_removeseg(as, seg); 849 850 ASSERT(tseg == seg); 851 852 /* 853 * If the segment private data field is NULL, 854 * then segment driver is not attached yet. 855 */ 856 if (seg->s_data != NULL) 857 SEGOP_FREE(seg); 858 859 kmem_cache_free(seg_cache, seg); 860 } 861 862 /*ARGSUSED*/ 863 static void 864 seg_p_mem_config_post_add( 865 void *arg, 866 pgcnt_t delta_pages) 867 { 868 /* Nothing to do. */ 869 } 870 871 /* 872 * Attempt to purge seg_pcache. May need to return before this has 873 * completed to allow other pre_del callbacks to unlock pages. This is 874 * ok because: 875 * 1) The seg_pdisable flag has been set so at least we won't 876 * cache anymore locks and the locks we couldn't purge 877 * will not be held if they do get released by a subsequent 878 * pre-delete callback. 879 * 880 * 2) The rest of the memory delete thread processing does not 881 * depend on the changes made in this pre-delete callback. No 882 * panics will result, the worst that will happen is that the 883 * DR code will timeout and cancel the delete. 884 */ 885 /*ARGSUSED*/ 886 static int 887 seg_p_mem_config_pre_del( 888 void *arg, 889 pgcnt_t delta_pages) 890 { 891 pgcnt_t old_plocked; 892 int stall_count = 0; 893 894 mutex_enter(&seg_pcache); 895 seg_pdisable++; 896 ASSERT(seg_pdisable != 0); 897 mutex_exit(&seg_pcache); 898 899 /* 900 * Attempt to empty the cache. Terminate if seg_plocked does not 901 * diminish with SEGP_STALL_THRESHOLD consecutive attempts. 902 */ 903 while (seg_plocked != 0) { 904 old_plocked = seg_plocked; 905 seg_ppurge_all(1); 906 if (seg_plocked == old_plocked) { 907 if (stall_count++ > SEGP_STALL_THRESHOLD) { 908 cmn_err(CE_NOTE, "!Pre-delete couldn't purge" 909 " pagelock cache - continuing"); 910 break; 911 } 912 } else 913 stall_count = 0; 914 if (seg_plocked != 0) 915 delay(hz/SEGP_PREDEL_DELAY_FACTOR); 916 } 917 return (0); 918 } 919 920 /*ARGSUSED*/ 921 static void 922 seg_p_mem_config_post_del( 923 void *arg, 924 pgcnt_t delta_pages, 925 int cancelled) 926 { 927 mutex_enter(&seg_pcache); 928 ASSERT(seg_pdisable != 0); 929 seg_pdisable--; 930 mutex_exit(&seg_pcache); 931 } 932 933 static kphysm_setup_vector_t seg_p_mem_config_vec = { 934 KPHYSM_SETUP_VECTOR_VERSION, 935 seg_p_mem_config_post_add, 936 seg_p_mem_config_pre_del, 937 seg_p_mem_config_post_del, 938 }; 939 940 static void 941 seg_pinit_mem_config(void) 942 { 943 int ret; 944 945 ret = kphysm_setup_func_register(&seg_p_mem_config_vec, (void *)NULL); 946 /* 947 * Want to catch this in the debug kernel. At run time, if the 948 * callbacks don't get run all will be OK as the disable just makes 949 * it more likely that the pages can be collected. 950 */ 951 ASSERT(ret == 0); 952 } 953