1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright (c) 2015, Joyent, Inc.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 /*
31 * University Copyright- Copyright (c) 1982, 1986, 1988
32 * The Regents of the University of California
33 * All Rights Reserved
34 *
35 * University Acknowledgment- Portions of this document are derived from
36 * software developed by the University of California, Berkeley, and its
37 * contributors.
38 */
39
40 /*
41 * VM - segment management.
42 */
43
44 #include <sys/types.h>
45 #include <sys/inttypes.h>
46 #include <sys/t_lock.h>
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/kmem.h>
50 #include <sys/sysmacros.h>
51 #include <sys/vmsystm.h>
52 #include <sys/tuneable.h>
53 #include <sys/debug.h>
54 #include <sys/fs/swapnode.h>
55 #include <sys/cmn_err.h>
56 #include <sys/callb.h>
57 #include <sys/mem_config.h>
58 #include <sys/mman.h>
59
60 #include <vm/hat.h>
61 #include <vm/as.h>
62 #include <vm/seg.h>
63 #include <vm/seg_kmem.h>
64 #include <vm/seg_spt.h>
65 #include <vm/seg_vn.h>
66 #include <vm/anon.h>
67
68 /*
69 * kstats for segment advise
70 */
71 segadvstat_t segadvstat = {
72 { "MADV_FREE_hit", KSTAT_DATA_ULONG },
73 { "MADV_FREE_miss", KSTAT_DATA_ULONG },
74 };
75
76 kstat_named_t *segadvstat_ptr = (kstat_named_t *)&segadvstat;
77 uint_t segadvstat_ndata = sizeof (segadvstat) / sizeof (kstat_named_t);
78
79 /*
80 * entry in the segment page cache
81 */
82 struct seg_pcache {
83 struct seg_pcache *p_hnext; /* list for hashed blocks */
84 struct seg_pcache *p_hprev;
85 pcache_link_t p_plink; /* per segment/amp list */
86 void *p_htag0; /* segment/amp pointer */
87 caddr_t p_addr; /* base address/anon_idx */
88 size_t p_len; /* total bytes */
89 size_t p_wlen; /* writtable bytes at p_addr */
90 struct page **p_pp; /* pp shadow list */
91 seg_preclaim_cbfunc_t p_callback; /* reclaim callback function */
92 clock_t p_lbolt; /* lbolt from last use */
93 struct seg_phash *p_hashp; /* our pcache hash bucket */
94 uint_t p_active; /* active count */
95 uchar_t p_write; /* true if S_WRITE */
96 uchar_t p_ref; /* reference byte */
97 ushort_t p_flags; /* bit flags */
98 };
99
100 struct seg_phash {
101 struct seg_pcache *p_hnext; /* list for hashed blocks */
102 struct seg_pcache *p_hprev;
103 kmutex_t p_hmutex; /* protects hash bucket */
104 pcache_link_t p_halink[2]; /* active bucket linkages */
105 };
106
107 struct seg_phash_wired {
108 struct seg_pcache *p_hnext; /* list for hashed blocks */
109 struct seg_pcache *p_hprev;
110 kmutex_t p_hmutex; /* protects hash bucket */
111 };
112
113 /*
114 * A parameter to control a maximum number of bytes that can be
115 * purged from pcache at a time.
116 */
117 #define P_MAX_APURGE_BYTES (1024 * 1024 * 1024)
118
119 /*
120 * log2(fraction of pcache to reclaim at a time).
121 */
122 #define P_SHRINK_SHFT (5)
123
124 /*
125 * The following variables can be tuned via /etc/system.
126 */
127
128 int segpcache_enabled = 1; /* if 1, shadow lists are cached */
129 pgcnt_t segpcache_maxwindow = 0; /* max # of pages that can be cached */
130 ulong_t segpcache_hashsize_win = 0; /* # of non wired buckets */
131 ulong_t segpcache_hashsize_wired = 0; /* # of wired buckets */
132 int segpcache_reap_sec = 1; /* reap check rate in secs */
133 clock_t segpcache_reap_ticks = 0; /* reap interval in ticks */
134 int segpcache_pcp_maxage_sec = 1; /* pcp max age in secs */
135 clock_t segpcache_pcp_maxage_ticks = 0; /* pcp max age in ticks */
136 int segpcache_shrink_shift = P_SHRINK_SHFT; /* log2 reap fraction */
137 pgcnt_t segpcache_maxapurge_bytes = P_MAX_APURGE_BYTES; /* max purge bytes */
138
139 static kmutex_t seg_pcache_mtx; /* protects seg_pdisabled counter */
140 static kmutex_t seg_pasync_mtx; /* protects async thread scheduling */
141 static kcondvar_t seg_pasync_cv;
142
143 #pragma align 64(pctrl1)
144 #pragma align 64(pctrl2)
145 #pragma align 64(pctrl3)
146
147 /*
148 * Keep frequently used variables together in one cache line.
149 */
150 static struct p_ctrl1 {
151 uint_t p_disabled; /* if not 0, caching temporarily off */
152 pgcnt_t p_maxwin; /* max # of pages that can be cached */
153 size_t p_hashwin_sz; /* # of non wired buckets */
154 struct seg_phash *p_htabwin; /* hash table for non wired entries */
155 size_t p_hashwired_sz; /* # of wired buckets */
156 struct seg_phash_wired *p_htabwired; /* hash table for wired entries */
157 kmem_cache_t *p_kmcache; /* kmem cache for seg_pcache structs */
158 #ifdef _LP64
159 ulong_t pad[1];
160 #endif /* _LP64 */
161 } pctrl1;
162
163 static struct p_ctrl2 {
164 kmutex_t p_mem_mtx; /* protects window counter and p_halinks */
165 pgcnt_t p_locked_win; /* # pages from window */
166 pgcnt_t p_locked; /* # of pages cached by pagelock */
167 uchar_t p_ahcur; /* current active links for insert/delete */
168 uchar_t p_athr_on; /* async reclaim thread is running. */
169 pcache_link_t p_ahhead[2]; /* active buckets linkages */
170 } pctrl2;
171
172 static struct p_ctrl3 {
173 clock_t p_pcp_maxage; /* max pcp age in ticks */
174 ulong_t p_athr_empty_ahb; /* athread walk stats */
175 ulong_t p_athr_full_ahb; /* athread walk stats */
176 pgcnt_t p_maxapurge_npages; /* max pages to purge at a time */
177 int p_shrink_shft; /* reap shift factor */
178 #ifdef _LP64
179 ulong_t pad[3];
180 #endif /* _LP64 */
181 } pctrl3;
182
183 #define seg_pdisabled pctrl1.p_disabled
184 #define seg_pmaxwindow pctrl1.p_maxwin
185 #define seg_phashsize_win pctrl1.p_hashwin_sz
186 #define seg_phashtab_win pctrl1.p_htabwin
187 #define seg_phashsize_wired pctrl1.p_hashwired_sz
188 #define seg_phashtab_wired pctrl1.p_htabwired
189 #define seg_pkmcache pctrl1.p_kmcache
190 #define seg_pmem_mtx pctrl2.p_mem_mtx
191 #define seg_plocked_window pctrl2.p_locked_win
192 #define seg_plocked pctrl2.p_locked
193 #define seg_pahcur pctrl2.p_ahcur
194 #define seg_pathr_on pctrl2.p_athr_on
195 #define seg_pahhead pctrl2.p_ahhead
196 #define seg_pmax_pcpage pctrl3.p_pcp_maxage
197 #define seg_pathr_empty_ahb pctrl3.p_athr_empty_ahb
198 #define seg_pathr_full_ahb pctrl3.p_athr_full_ahb
199 #define seg_pshrink_shift pctrl3.p_shrink_shft
200 #define seg_pmaxapurge_npages pctrl3.p_maxapurge_npages
201
202 #define P_HASHWIN_MASK (seg_phashsize_win - 1)
203 #define P_HASHWIRED_MASK (seg_phashsize_wired - 1)
204 #define P_BASESHIFT (6)
205
206 kthread_t *seg_pasync_thr;
207
208 extern struct seg_ops segvn_ops;
209 extern struct seg_ops segspt_shmops;
210
211 #define IS_PFLAGS_WIRED(flags) ((flags) & SEGP_FORCE_WIRED)
212 #define IS_PCP_WIRED(pcp) IS_PFLAGS_WIRED((pcp)->p_flags)
213
214 #define LBOLT_DELTA(t) ((ulong_t)(ddi_get_lbolt() - (t)))
215
216 #define PCP_AGE(pcp) LBOLT_DELTA((pcp)->p_lbolt)
217
218 /*
219 * htag0 argument can be a seg or amp pointer.
220 */
221 #define P_HASHBP(seg, htag0, addr, flags) \
222 (IS_PFLAGS_WIRED((flags)) ? \
223 ((struct seg_phash *)&seg_phashtab_wired[P_HASHWIRED_MASK & \
224 ((uintptr_t)(htag0) >> P_BASESHIFT)]) : \
225 (&seg_phashtab_win[P_HASHWIN_MASK & \
226 (((uintptr_t)(htag0) >> 3) ^ \
227 ((uintptr_t)(addr) >> ((flags & SEGP_PSHIFT) ? \
228 (flags >> 16) : page_get_shift((seg)->s_szc))))]))
229
230 /*
231 * htag0 argument can be a seg or amp pointer.
232 */
233 #define P_MATCH(pcp, htag0, addr, len) \
234 ((pcp)->p_htag0 == (htag0) && \
235 (pcp)->p_addr == (addr) && \
236 (pcp)->p_len >= (len))
237
238 #define P_MATCH_PP(pcp, htag0, addr, len, pp) \
239 ((pcp)->p_pp == (pp) && \
240 (pcp)->p_htag0 == (htag0) && \
241 (pcp)->p_addr == (addr) && \
242 (pcp)->p_len >= (len))
243
244 #define plink2pcache(pl) ((struct seg_pcache *)((uintptr_t)(pl) - \
245 offsetof(struct seg_pcache, p_plink)))
246
247 #define hlink2phash(hl, l) ((struct seg_phash *)((uintptr_t)(hl) - \
248 offsetof(struct seg_phash, p_halink[l])))
249
250 /*
251 * seg_padd_abuck()/seg_premove_abuck() link and unlink hash buckets from
252 * active hash bucket lists. We maintain active bucket lists to reduce the
253 * overhead of finding active buckets during asynchronous purging since there
254 * can be 10s of millions of buckets on a large system but only a small subset
255 * of them in actual use.
256 *
257 * There're 2 active bucket lists. Current active list (as per seg_pahcur) is
258 * used by seg_pinsert()/seg_pinactive()/seg_ppurge() to add and delete
259 * buckets. The other list is used by asynchronous purge thread. This allows
260 * the purge thread to walk its active list without holding seg_pmem_mtx for a
261 * long time. When asynchronous thread is done with its list it switches to
262 * current active list and makes the list it just finished processing as
263 * current active list.
264 *
265 * seg_padd_abuck() only adds the bucket to current list if the bucket is not
266 * yet on any list. seg_premove_abuck() may remove the bucket from either
267 * list. If the bucket is on current list it will be always removed. Otherwise
268 * the bucket is only removed if asynchronous purge thread is not currently
269 * running or seg_premove_abuck() is called by asynchronous purge thread
270 * itself. A given bucket can only be on one of active lists at a time. These
271 * routines should be called with per bucket lock held. The routines use
272 * seg_pmem_mtx to protect list updates. seg_padd_abuck() must be called after
273 * the first entry is added to the bucket chain and seg_premove_abuck() must
274 * be called after the last pcp entry is deleted from its chain. Per bucket
275 * lock should be held by the callers. This avoids a potential race condition
276 * when seg_premove_abuck() removes a bucket after pcp entries are added to
277 * its list after the caller checked that the bucket has no entries. (this
278 * race would cause a loss of an active bucket from the active lists).
279 *
280 * Both lists are circular doubly linked lists anchored at seg_pahhead heads.
281 * New entries are added to the end of the list since LRU is used as the
282 * purging policy.
283 */
284 static void
seg_padd_abuck(struct seg_phash * hp)285 seg_padd_abuck(struct seg_phash *hp)
286 {
287 int lix;
288
289 ASSERT(MUTEX_HELD(&hp->p_hmutex));
290 ASSERT((struct seg_phash *)hp->p_hnext != hp);
291 ASSERT((struct seg_phash *)hp->p_hprev != hp);
292 ASSERT(hp->p_hnext == hp->p_hprev);
293 ASSERT(!IS_PCP_WIRED(hp->p_hnext));
294 ASSERT(hp->p_hnext->p_hnext == (struct seg_pcache *)hp);
295 ASSERT(hp->p_hprev->p_hprev == (struct seg_pcache *)hp);
296 ASSERT(hp >= seg_phashtab_win &&
297 hp < &seg_phashtab_win[seg_phashsize_win]);
298
299 /*
300 * This bucket can already be on one of active lists
301 * since seg_premove_abuck() may have failed to remove it
302 * before.
303 */
304 mutex_enter(&seg_pmem_mtx);
305 lix = seg_pahcur;
306 ASSERT(lix >= 0 && lix <= 1);
307 if (hp->p_halink[lix].p_lnext != NULL) {
308 ASSERT(hp->p_halink[lix].p_lprev != NULL);
309 ASSERT(hp->p_halink[!lix].p_lnext == NULL);
310 ASSERT(hp->p_halink[!lix].p_lprev == NULL);
311 mutex_exit(&seg_pmem_mtx);
312 return;
313 }
314 ASSERT(hp->p_halink[lix].p_lprev == NULL);
315
316 /*
317 * If this bucket is still on list !lix async thread can't yet remove
318 * it since we hold here per bucket lock. In this case just return
319 * since async thread will eventually find and process this bucket.
320 */
321 if (hp->p_halink[!lix].p_lnext != NULL) {
322 ASSERT(hp->p_halink[!lix].p_lprev != NULL);
323 mutex_exit(&seg_pmem_mtx);
324 return;
325 }
326 ASSERT(hp->p_halink[!lix].p_lprev == NULL);
327 /*
328 * This bucket is not on any active bucket list yet.
329 * Add the bucket to the tail of current active list.
330 */
331 hp->p_halink[lix].p_lnext = &seg_pahhead[lix];
332 hp->p_halink[lix].p_lprev = seg_pahhead[lix].p_lprev;
333 seg_pahhead[lix].p_lprev->p_lnext = &hp->p_halink[lix];
334 seg_pahhead[lix].p_lprev = &hp->p_halink[lix];
335 mutex_exit(&seg_pmem_mtx);
336 }
337
338 static void
seg_premove_abuck(struct seg_phash * hp,int athr)339 seg_premove_abuck(struct seg_phash *hp, int athr)
340 {
341 int lix;
342
343 ASSERT(MUTEX_HELD(&hp->p_hmutex));
344 ASSERT((struct seg_phash *)hp->p_hnext == hp);
345 ASSERT((struct seg_phash *)hp->p_hprev == hp);
346 ASSERT(hp >= seg_phashtab_win &&
347 hp < &seg_phashtab_win[seg_phashsize_win]);
348
349 if (athr) {
350 ASSERT(seg_pathr_on);
351 ASSERT(seg_pahcur <= 1);
352 /*
353 * We are called by asynchronous thread that found this bucket
354 * on not currently active (i.e. !seg_pahcur) list. Remove it
355 * from there. Per bucket lock we are holding makes sure
356 * seg_pinsert() can't sneak in and add pcp entries to this
357 * bucket right before we remove the bucket from its list.
358 */
359 lix = !seg_pahcur;
360 ASSERT(hp->p_halink[lix].p_lnext != NULL);
361 ASSERT(hp->p_halink[lix].p_lprev != NULL);
362 ASSERT(hp->p_halink[!lix].p_lnext == NULL);
363 ASSERT(hp->p_halink[!lix].p_lprev == NULL);
364 hp->p_halink[lix].p_lnext->p_lprev = hp->p_halink[lix].p_lprev;
365 hp->p_halink[lix].p_lprev->p_lnext = hp->p_halink[lix].p_lnext;
366 hp->p_halink[lix].p_lnext = NULL;
367 hp->p_halink[lix].p_lprev = NULL;
368 return;
369 }
370
371 mutex_enter(&seg_pmem_mtx);
372 lix = seg_pahcur;
373 ASSERT(lix >= 0 && lix <= 1);
374
375 /*
376 * If the bucket is on currently active list just remove it from
377 * there.
378 */
379 if (hp->p_halink[lix].p_lnext != NULL) {
380 ASSERT(hp->p_halink[lix].p_lprev != NULL);
381 ASSERT(hp->p_halink[!lix].p_lnext == NULL);
382 ASSERT(hp->p_halink[!lix].p_lprev == NULL);
383 hp->p_halink[lix].p_lnext->p_lprev = hp->p_halink[lix].p_lprev;
384 hp->p_halink[lix].p_lprev->p_lnext = hp->p_halink[lix].p_lnext;
385 hp->p_halink[lix].p_lnext = NULL;
386 hp->p_halink[lix].p_lprev = NULL;
387 mutex_exit(&seg_pmem_mtx);
388 return;
389 }
390 ASSERT(hp->p_halink[lix].p_lprev == NULL);
391
392 /*
393 * If asynchronous thread is not running we can remove the bucket from
394 * not currently active list. The bucket must be on this list since we
395 * already checked that it's not on the other list and the bucket from
396 * which we just deleted the last pcp entry must be still on one of the
397 * active bucket lists.
398 */
399 lix = !lix;
400 ASSERT(hp->p_halink[lix].p_lnext != NULL);
401 ASSERT(hp->p_halink[lix].p_lprev != NULL);
402
403 if (!seg_pathr_on) {
404 hp->p_halink[lix].p_lnext->p_lprev = hp->p_halink[lix].p_lprev;
405 hp->p_halink[lix].p_lprev->p_lnext = hp->p_halink[lix].p_lnext;
406 hp->p_halink[lix].p_lnext = NULL;
407 hp->p_halink[lix].p_lprev = NULL;
408 }
409 mutex_exit(&seg_pmem_mtx);
410 }
411
412 /*
413 * Check if bucket pointed by hp already has a pcp entry that matches request
414 * htag0, addr and len. Set *found to 1 if match is found and to 0 otherwise.
415 * Also delete matching entries that cover smaller address range but start
416 * at the same address as addr argument. Return the list of deleted entries if
417 * any. This is an internal helper function called from seg_pinsert() only
418 * for non wired shadow lists. The caller already holds a per seg/amp list
419 * lock.
420 */
421 static struct seg_pcache *
seg_plookup_checkdup(struct seg_phash * hp,void * htag0,caddr_t addr,size_t len,int * found)422 seg_plookup_checkdup(struct seg_phash *hp, void *htag0,
423 caddr_t addr, size_t len, int *found)
424 {
425 struct seg_pcache *pcp;
426 struct seg_pcache *delcallb_list = NULL;
427
428 ASSERT(MUTEX_HELD(&hp->p_hmutex));
429
430 *found = 0;
431 for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
432 pcp = pcp->p_hnext) {
433 ASSERT(pcp->p_hashp == hp);
434 if (pcp->p_htag0 == htag0 && pcp->p_addr == addr) {
435 ASSERT(!IS_PCP_WIRED(pcp));
436 if (pcp->p_len < len) {
437 pcache_link_t *plinkp;
438 if (pcp->p_active) {
439 continue;
440 }
441 plinkp = &pcp->p_plink;
442 plinkp->p_lprev->p_lnext = plinkp->p_lnext;
443 plinkp->p_lnext->p_lprev = plinkp->p_lprev;
444 pcp->p_hprev->p_hnext = pcp->p_hnext;
445 pcp->p_hnext->p_hprev = pcp->p_hprev;
446 pcp->p_hprev = delcallb_list;
447 delcallb_list = pcp;
448 } else {
449 *found = 1;
450 break;
451 }
452 }
453 }
454 return (delcallb_list);
455 }
456
457 /*
458 * lookup an address range in pagelock cache. Return shadow list and bump up
459 * active count. If amp is not NULL use amp as a lookup tag otherwise use seg
460 * as a lookup tag.
461 */
462 struct page **
seg_plookup(struct seg * seg,struct anon_map * amp,caddr_t addr,size_t len,enum seg_rw rw,uint_t flags)463 seg_plookup(struct seg *seg, struct anon_map *amp, caddr_t addr, size_t len,
464 enum seg_rw rw, uint_t flags)
465 {
466 struct seg_pcache *pcp;
467 struct seg_phash *hp;
468 void *htag0;
469
470 ASSERT(seg != NULL);
471 ASSERT(rw == S_READ || rw == S_WRITE);
472
473 /*
474 * Skip pagelock cache, while DR is in progress or
475 * seg_pcache is off.
476 */
477 if (seg_pdisabled) {
478 return (NULL);
479 }
480 ASSERT(seg_phashsize_win != 0);
481
482 htag0 = (amp == NULL ? (void *)seg : (void *)amp);
483 hp = P_HASHBP(seg, htag0, addr, flags);
484 mutex_enter(&hp->p_hmutex);
485 for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
486 pcp = pcp->p_hnext) {
487 ASSERT(pcp->p_hashp == hp);
488 if (P_MATCH(pcp, htag0, addr, len)) {
489 ASSERT(IS_PFLAGS_WIRED(flags) == IS_PCP_WIRED(pcp));
490 /*
491 * If this request wants to write pages
492 * but write permissions starting from
493 * addr don't cover the entire length len
494 * return lookup failure back to the caller.
495 * It will check protections and fail this
496 * pagelock operation with EACCESS error.
497 */
498 if (rw == S_WRITE && pcp->p_wlen < len) {
499 break;
500 }
501 if (pcp->p_active == UINT_MAX) {
502 break;
503 }
504 pcp->p_active++;
505 if (rw == S_WRITE && !pcp->p_write) {
506 pcp->p_write = 1;
507 }
508 mutex_exit(&hp->p_hmutex);
509 return (pcp->p_pp);
510 }
511 }
512 mutex_exit(&hp->p_hmutex);
513 return (NULL);
514 }
515
516 /*
517 * mark address range inactive. If the cache is off or the address range is
518 * not in the cache or another shadow list that covers bigger range is found
519 * we call the segment driver to reclaim the pages. Otherwise just decrement
520 * active count and set ref bit. If amp is not NULL use amp as a lookup tag
521 * otherwise use seg as a lookup tag.
522 */
523 void
seg_pinactive(struct seg * seg,struct anon_map * amp,caddr_t addr,size_t len,struct page ** pp,enum seg_rw rw,uint_t flags,seg_preclaim_cbfunc_t callback)524 seg_pinactive(struct seg *seg, struct anon_map *amp, caddr_t addr,
525 size_t len, struct page **pp, enum seg_rw rw, uint_t flags,
526 seg_preclaim_cbfunc_t callback)
527 {
528 struct seg_pcache *pcp;
529 struct seg_phash *hp;
530 kmutex_t *pmtx = NULL;
531 pcache_link_t *pheadp;
532 void *htag0;
533 pgcnt_t npages = 0;
534 int keep = 0;
535
536 ASSERT(seg != NULL);
537 ASSERT(rw == S_READ || rw == S_WRITE);
538
539 htag0 = (amp == NULL ? (void *)seg : (void *)amp);
540
541 /*
542 * Skip lookup if pcache is not configured.
543 */
544 if (seg_phashsize_win == 0) {
545 goto out;
546 }
547
548 /*
549 * Grab per seg/amp lock before hash lock if we are going to remove
550 * inactive entry from pcache.
551 */
552 if (!IS_PFLAGS_WIRED(flags) && seg_pdisabled) {
553 if (amp == NULL) {
554 pheadp = &seg->s_phead;
555 pmtx = &seg->s_pmtx;
556 } else {
557 pheadp = &->a_phead;
558 pmtx = &->a_pmtx;
559 }
560 mutex_enter(pmtx);
561 }
562
563 hp = P_HASHBP(seg, htag0, addr, flags);
564 mutex_enter(&hp->p_hmutex);
565 again:
566 for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
567 pcp = pcp->p_hnext) {
568 ASSERT(pcp->p_hashp == hp);
569 if (P_MATCH_PP(pcp, htag0, addr, len, pp)) {
570 ASSERT(IS_PFLAGS_WIRED(flags) == IS_PCP_WIRED(pcp));
571 ASSERT(pcp->p_active);
572 if (keep) {
573 /*
574 * Don't remove this pcp entry
575 * if we didn't find duplicate
576 * shadow lists on second search.
577 * Somebody removed those duplicates
578 * since we dropped hash lock after first
579 * search.
580 */
581 ASSERT(pmtx != NULL);
582 ASSERT(!IS_PFLAGS_WIRED(flags));
583 mutex_exit(pmtx);
584 pmtx = NULL;
585 }
586 pcp->p_active--;
587 if (pcp->p_active == 0 && (pmtx != NULL ||
588 (seg_pdisabled && IS_PFLAGS_WIRED(flags)))) {
589
590 /*
591 * This entry is no longer active. Remove it
592 * now either because pcaching is temporarily
593 * disabled or there're other pcp entries that
594 * can match this pagelock request (i.e. this
595 * entry is a duplicate).
596 */
597
598 ASSERT(callback == pcp->p_callback);
599 if (pmtx != NULL) {
600 pcache_link_t *plinkp = &pcp->p_plink;
601 ASSERT(!IS_PCP_WIRED(pcp));
602 ASSERT(pheadp->p_lnext != pheadp);
603 ASSERT(pheadp->p_lprev != pheadp);
604 plinkp->p_lprev->p_lnext =
605 plinkp->p_lnext;
606 plinkp->p_lnext->p_lprev =
607 plinkp->p_lprev;
608 }
609 pcp->p_hprev->p_hnext = pcp->p_hnext;
610 pcp->p_hnext->p_hprev = pcp->p_hprev;
611 if (!IS_PCP_WIRED(pcp) &&
612 hp->p_hnext == (struct seg_pcache *)hp) {
613 /*
614 * We removed the last entry from this
615 * bucket. Now remove the bucket from
616 * its active list.
617 */
618 seg_premove_abuck(hp, 0);
619 }
620 mutex_exit(&hp->p_hmutex);
621 if (pmtx != NULL) {
622 mutex_exit(pmtx);
623 }
624 len = pcp->p_len;
625 npages = btop(len);
626 if (rw != S_WRITE && pcp->p_write) {
627 rw = S_WRITE;
628 }
629 kmem_cache_free(seg_pkmcache, pcp);
630 goto out;
631 } else {
632 /*
633 * We found a matching pcp entry but will not
634 * free it right away even if it's no longer
635 * active.
636 */
637 if (!pcp->p_active && !IS_PCP_WIRED(pcp)) {
638 /*
639 * Set the reference bit and mark the
640 * time of last access to this pcp
641 * so that asynchronous thread doesn't
642 * free it immediately since
643 * it may be reactivated very soon.
644 */
645 pcp->p_lbolt = ddi_get_lbolt();
646 pcp->p_ref = 1;
647 }
648 mutex_exit(&hp->p_hmutex);
649 if (pmtx != NULL) {
650 mutex_exit(pmtx);
651 }
652 return;
653 }
654 } else if (!IS_PFLAGS_WIRED(flags) &&
655 P_MATCH(pcp, htag0, addr, len)) {
656 /*
657 * This is a duplicate pcp entry. This situation may
658 * happen if a bigger shadow list that covers our
659 * range was added while our entry was still active.
660 * Now we can free our pcp entry if it becomes
661 * inactive.
662 */
663 if (!pcp->p_active) {
664 /*
665 * Mark this entry as referenced just in case
666 * we'll free our own pcp entry soon.
667 */
668 pcp->p_lbolt = ddi_get_lbolt();
669 pcp->p_ref = 1;
670 }
671 if (pmtx != NULL) {
672 /*
673 * we are already holding pmtx and found a
674 * duplicate. Don't keep our own pcp entry.
675 */
676 keep = 0;
677 continue;
678 }
679 /*
680 * We have to use mutex_tryenter to attempt to lock
681 * seg/amp list lock since we already hold hash lock
682 * and seg/amp list lock is above hash lock in lock
683 * order. If mutex_tryenter fails drop hash lock and
684 * retake both locks in correct order and research
685 * this hash chain.
686 */
687 ASSERT(keep == 0);
688 if (amp == NULL) {
689 pheadp = &seg->s_phead;
690 pmtx = &seg->s_pmtx;
691 } else {
692 pheadp = &->a_phead;
693 pmtx = &->a_pmtx;
694 }
695 if (!mutex_tryenter(pmtx)) {
696 mutex_exit(&hp->p_hmutex);
697 mutex_enter(pmtx);
698 mutex_enter(&hp->p_hmutex);
699 /*
700 * If we don't find bigger shadow list on
701 * second search (it may happen since we
702 * dropped bucket lock) keep the entry that
703 * matches our own shadow list.
704 */
705 keep = 1;
706 goto again;
707 }
708 }
709 }
710 mutex_exit(&hp->p_hmutex);
711 if (pmtx != NULL) {
712 mutex_exit(pmtx);
713 }
714 out:
715 (*callback)(htag0, addr, len, pp, rw, 0);
716 if (npages) {
717 mutex_enter(&seg_pmem_mtx);
718 ASSERT(seg_plocked >= npages);
719 seg_plocked -= npages;
720 if (!IS_PFLAGS_WIRED(flags)) {
721 ASSERT(seg_plocked_window >= npages);
722 seg_plocked_window -= npages;
723 }
724 mutex_exit(&seg_pmem_mtx);
725 }
726
727 }
728
729 #ifdef DEBUG
730 static uint32_t p_insert_chk_mtbf = 0;
731 #endif
732
733 /*
734 * The seg_pinsert_check() is used by segment drivers to predict whether
735 * a call to seg_pinsert will fail and thereby avoid wasteful pre-processing.
736 */
737 /*ARGSUSED*/
738 int
seg_pinsert_check(struct seg * seg,struct anon_map * amp,caddr_t addr,size_t len,uint_t flags)739 seg_pinsert_check(struct seg *seg, struct anon_map *amp, caddr_t addr,
740 size_t len, uint_t flags)
741 {
742 ASSERT(seg != NULL);
743
744 #ifdef DEBUG
745 if (p_insert_chk_mtbf && !(gethrtime() % p_insert_chk_mtbf)) {
746 return (SEGP_FAIL);
747 }
748 #endif
749
750 if (seg_pdisabled) {
751 return (SEGP_FAIL);
752 }
753 ASSERT(seg_phashsize_win != 0);
754
755 if (IS_PFLAGS_WIRED(flags)) {
756 return (SEGP_SUCCESS);
757 }
758
759 if (seg_plocked_window + btop(len) > seg_pmaxwindow) {
760 return (SEGP_FAIL);
761 }
762
763 if (freemem < desfree) {
764 return (SEGP_FAIL);
765 }
766
767 return (SEGP_SUCCESS);
768 }
769
770 #ifdef DEBUG
771 static uint32_t p_insert_mtbf = 0;
772 #endif
773
774 /*
775 * Insert address range with shadow list into pagelock cache if there's no
776 * shadow list already cached for this address range. If the cache is off or
777 * caching is temporarily disabled or the allowed 'window' is exceeded return
778 * SEGP_FAIL. Otherwise return SEGP_SUCCESS.
779 *
780 * For non wired shadow lists (segvn case) include address in the hashing
781 * function to avoid linking all the entries from the same segment or amp on
782 * the same bucket. amp is used instead of seg if amp is not NULL. Non wired
783 * pcache entries are also linked on a per segment/amp list so that all
784 * entries can be found quickly during seg/amp purge without walking the
785 * entire pcache hash table. For wired shadow lists (segspt case) we
786 * don't use address hashing and per segment linking because the caller
787 * currently inserts only one entry per segment that covers the entire
788 * segment. If we used per segment linking even for segspt it would complicate
789 * seg_ppurge_wiredpp() locking.
790 *
791 * Both hash bucket and per seg/amp locks need to be held before adding a non
792 * wired entry to hash and per seg/amp lists. per seg/amp lock should be taken
793 * first.
794 *
795 * This function will also remove from pcache old inactive shadow lists that
796 * overlap with this request but cover smaller range for the same start
797 * address.
798 */
799 int
seg_pinsert(struct seg * seg,struct anon_map * amp,caddr_t addr,size_t len,size_t wlen,struct page ** pp,enum seg_rw rw,uint_t flags,seg_preclaim_cbfunc_t callback)800 seg_pinsert(struct seg *seg, struct anon_map *amp, caddr_t addr, size_t len,
801 size_t wlen, struct page **pp, enum seg_rw rw, uint_t flags,
802 seg_preclaim_cbfunc_t callback)
803 {
804 struct seg_pcache *pcp;
805 struct seg_phash *hp;
806 pgcnt_t npages;
807 pcache_link_t *pheadp;
808 kmutex_t *pmtx;
809 struct seg_pcache *delcallb_list = NULL;
810
811 ASSERT(seg != NULL);
812 ASSERT(rw == S_READ || rw == S_WRITE);
813 ASSERT(rw == S_READ || wlen == len);
814 ASSERT(rw == S_WRITE || wlen <= len);
815 ASSERT(amp == NULL || wlen == len);
816
817 #ifdef DEBUG
818 if (p_insert_mtbf && !(gethrtime() % p_insert_mtbf)) {
819 return (SEGP_FAIL);
820 }
821 #endif
822
823 if (seg_pdisabled) {
824 return (SEGP_FAIL);
825 }
826 ASSERT(seg_phashsize_win != 0);
827
828 ASSERT((len & PAGEOFFSET) == 0);
829 npages = btop(len);
830 mutex_enter(&seg_pmem_mtx);
831 if (!IS_PFLAGS_WIRED(flags)) {
832 if (seg_plocked_window + npages > seg_pmaxwindow) {
833 mutex_exit(&seg_pmem_mtx);
834 return (SEGP_FAIL);
835 }
836 seg_plocked_window += npages;
837 }
838 seg_plocked += npages;
839 mutex_exit(&seg_pmem_mtx);
840
841 pcp = kmem_cache_alloc(seg_pkmcache, KM_SLEEP);
842 /*
843 * If amp is not NULL set htag0 to amp otherwise set it to seg.
844 */
845 if (amp == NULL) {
846 pcp->p_htag0 = (void *)seg;
847 pcp->p_flags = flags & 0xffff;
848 } else {
849 pcp->p_htag0 = (void *)amp;
850 pcp->p_flags = (flags & 0xffff) | SEGP_AMP;
851 }
852 pcp->p_addr = addr;
853 pcp->p_len = len;
854 pcp->p_wlen = wlen;
855 pcp->p_pp = pp;
856 pcp->p_write = (rw == S_WRITE);
857 pcp->p_callback = callback;
858 pcp->p_active = 1;
859
860 hp = P_HASHBP(seg, pcp->p_htag0, addr, flags);
861 if (!IS_PFLAGS_WIRED(flags)) {
862 int found;
863 void *htag0;
864 if (amp == NULL) {
865 pheadp = &seg->s_phead;
866 pmtx = &seg->s_pmtx;
867 htag0 = (void *)seg;
868 } else {
869 pheadp = &->a_phead;
870 pmtx = &->a_pmtx;
871 htag0 = (void *)amp;
872 }
873 mutex_enter(pmtx);
874 mutex_enter(&hp->p_hmutex);
875 delcallb_list = seg_plookup_checkdup(hp, htag0, addr,
876 len, &found);
877 if (found) {
878 mutex_exit(&hp->p_hmutex);
879 mutex_exit(pmtx);
880 mutex_enter(&seg_pmem_mtx);
881 seg_plocked -= npages;
882 seg_plocked_window -= npages;
883 mutex_exit(&seg_pmem_mtx);
884 kmem_cache_free(seg_pkmcache, pcp);
885 goto out;
886 }
887 pcp->p_plink.p_lnext = pheadp->p_lnext;
888 pcp->p_plink.p_lprev = pheadp;
889 pheadp->p_lnext->p_lprev = &pcp->p_plink;
890 pheadp->p_lnext = &pcp->p_plink;
891 } else {
892 mutex_enter(&hp->p_hmutex);
893 }
894 pcp->p_hashp = hp;
895 pcp->p_hnext = hp->p_hnext;
896 pcp->p_hprev = (struct seg_pcache *)hp;
897 hp->p_hnext->p_hprev = pcp;
898 hp->p_hnext = pcp;
899 if (!IS_PFLAGS_WIRED(flags) &&
900 hp->p_hprev == pcp) {
901 seg_padd_abuck(hp);
902 }
903 mutex_exit(&hp->p_hmutex);
904 if (!IS_PFLAGS_WIRED(flags)) {
905 mutex_exit(pmtx);
906 }
907
908 out:
909 npages = 0;
910 while (delcallb_list != NULL) {
911 pcp = delcallb_list;
912 delcallb_list = pcp->p_hprev;
913 ASSERT(!IS_PCP_WIRED(pcp) && !pcp->p_active);
914 (void) (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr,
915 pcp->p_len, pcp->p_pp, pcp->p_write ? S_WRITE : S_READ, 0);
916 npages += btop(pcp->p_len);
917 kmem_cache_free(seg_pkmcache, pcp);
918 }
919 if (npages) {
920 ASSERT(!IS_PFLAGS_WIRED(flags));
921 mutex_enter(&seg_pmem_mtx);
922 ASSERT(seg_plocked >= npages);
923 ASSERT(seg_plocked_window >= npages);
924 seg_plocked -= npages;
925 seg_plocked_window -= npages;
926 mutex_exit(&seg_pmem_mtx);
927 }
928
929 return (SEGP_SUCCESS);
930 }
931
932 /*
933 * purge entries from the pagelock cache if not active
934 * and not recently used.
935 */
936 static void
seg_ppurge_async(int force)937 seg_ppurge_async(int force)
938 {
939 struct seg_pcache *delcallb_list = NULL;
940 struct seg_pcache *pcp;
941 struct seg_phash *hp;
942 pgcnt_t npages = 0;
943 pgcnt_t npages_window = 0;
944 pgcnt_t npgs_to_purge;
945 pgcnt_t npgs_purged = 0;
946 int hlinks = 0;
947 int hlix;
948 pcache_link_t *hlinkp;
949 pcache_link_t *hlnextp = NULL;
950 int lowmem;
951 int trim;
952
953 ASSERT(seg_phashsize_win != 0);
954
955 /*
956 * if the cache is off or empty, return
957 */
958 if (seg_plocked == 0 || (!force && seg_plocked_window == 0)) {
959 return;
960 }
961
962 if (!force) {
963 lowmem = 0;
964 trim = 0;
965 if (freemem < lotsfree + needfree) {
966 spgcnt_t fmem = MAX((spgcnt_t)(freemem - needfree), 0);
967 if (fmem <= 5 * (desfree >> 2)) {
968 lowmem = 1;
969 } else if (fmem <= 7 * (lotsfree >> 3)) {
970 if (seg_plocked_window >=
971 (availrmem_initial >> 1)) {
972 lowmem = 1;
973 }
974 } else if (fmem < lotsfree) {
975 if (seg_plocked_window >=
976 3 * (availrmem_initial >> 2)) {
977 lowmem = 1;
978 }
979 }
980 }
981 if (seg_plocked_window >= 7 * (seg_pmaxwindow >> 3)) {
982 trim = 1;
983 }
984 if (!lowmem && !trim) {
985 return;
986 }
987 npgs_to_purge = seg_plocked_window >>
988 seg_pshrink_shift;
989 if (lowmem) {
990 npgs_to_purge = MIN(npgs_to_purge,
991 MAX(seg_pmaxapurge_npages, desfree));
992 } else {
993 npgs_to_purge = MIN(npgs_to_purge,
994 seg_pmaxapurge_npages);
995 }
996 if (npgs_to_purge == 0) {
997 return;
998 }
999 } else {
1000 struct seg_phash_wired *hpw;
1001
1002 ASSERT(seg_phashsize_wired != 0);
1003
1004 for (hpw = seg_phashtab_wired;
1005 hpw < &seg_phashtab_wired[seg_phashsize_wired]; hpw++) {
1006
1007 if (hpw->p_hnext == (struct seg_pcache *)hpw) {
1008 continue;
1009 }
1010
1011 mutex_enter(&hpw->p_hmutex);
1012
1013 for (pcp = hpw->p_hnext;
1014 pcp != (struct seg_pcache *)hpw;
1015 pcp = pcp->p_hnext) {
1016
1017 ASSERT(IS_PCP_WIRED(pcp));
1018 ASSERT(pcp->p_hashp ==
1019 (struct seg_phash *)hpw);
1020
1021 if (pcp->p_active) {
1022 continue;
1023 }
1024 pcp->p_hprev->p_hnext = pcp->p_hnext;
1025 pcp->p_hnext->p_hprev = pcp->p_hprev;
1026 pcp->p_hprev = delcallb_list;
1027 delcallb_list = pcp;
1028 }
1029 mutex_exit(&hpw->p_hmutex);
1030 }
1031 }
1032
1033 mutex_enter(&seg_pmem_mtx);
1034 if (seg_pathr_on) {
1035 mutex_exit(&seg_pmem_mtx);
1036 goto runcb;
1037 }
1038 seg_pathr_on = 1;
1039 mutex_exit(&seg_pmem_mtx);
1040 ASSERT(seg_pahcur <= 1);
1041 hlix = !seg_pahcur;
1042
1043 again:
1044 for (hlinkp = seg_pahhead[hlix].p_lnext; hlinkp != &seg_pahhead[hlix];
1045 hlinkp = hlnextp) {
1046
1047 hlnextp = hlinkp->p_lnext;
1048 ASSERT(hlnextp != NULL);
1049
1050 hp = hlink2phash(hlinkp, hlix);
1051 if (hp->p_hnext == (struct seg_pcache *)hp) {
1052 seg_pathr_empty_ahb++;
1053 continue;
1054 }
1055 seg_pathr_full_ahb++;
1056 mutex_enter(&hp->p_hmutex);
1057
1058 for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
1059 pcp = pcp->p_hnext) {
1060 pcache_link_t *pheadp;
1061 pcache_link_t *plinkp;
1062 void *htag0;
1063 kmutex_t *pmtx;
1064
1065 ASSERT(!IS_PCP_WIRED(pcp));
1066 ASSERT(pcp->p_hashp == hp);
1067
1068 if (pcp->p_active) {
1069 continue;
1070 }
1071 if (!force && pcp->p_ref &&
1072 PCP_AGE(pcp) < seg_pmax_pcpage) {
1073 pcp->p_ref = 0;
1074 continue;
1075 }
1076 plinkp = &pcp->p_plink;
1077 htag0 = pcp->p_htag0;
1078 if (pcp->p_flags & SEGP_AMP) {
1079 pheadp = &((amp_t *)htag0)->a_phead;
1080 pmtx = &((amp_t *)htag0)->a_pmtx;
1081 } else {
1082 pheadp = &((seg_t *)htag0)->s_phead;
1083 pmtx = &((seg_t *)htag0)->s_pmtx;
1084 }
1085 if (!mutex_tryenter(pmtx)) {
1086 continue;
1087 }
1088 ASSERT(pheadp->p_lnext != pheadp);
1089 ASSERT(pheadp->p_lprev != pheadp);
1090 plinkp->p_lprev->p_lnext =
1091 plinkp->p_lnext;
1092 plinkp->p_lnext->p_lprev =
1093 plinkp->p_lprev;
1094 pcp->p_hprev->p_hnext = pcp->p_hnext;
1095 pcp->p_hnext->p_hprev = pcp->p_hprev;
1096 mutex_exit(pmtx);
1097 pcp->p_hprev = delcallb_list;
1098 delcallb_list = pcp;
1099 npgs_purged += btop(pcp->p_len);
1100 }
1101 if (hp->p_hnext == (struct seg_pcache *)hp) {
1102 seg_premove_abuck(hp, 1);
1103 }
1104 mutex_exit(&hp->p_hmutex);
1105 if (npgs_purged >= seg_plocked_window) {
1106 break;
1107 }
1108 if (!force) {
1109 if (npgs_purged >= npgs_to_purge) {
1110 break;
1111 }
1112 if (!trim && !(seg_pathr_full_ahb & 15)) {
1113 ASSERT(lowmem);
1114 if (freemem >= lotsfree + needfree) {
1115 break;
1116 }
1117 }
1118 }
1119 }
1120
1121 if (hlinkp == &seg_pahhead[hlix]) {
1122 /*
1123 * We processed the entire hlix active bucket list
1124 * but didn't find enough pages to reclaim.
1125 * Switch the lists and walk the other list
1126 * if we haven't done it yet.
1127 */
1128 mutex_enter(&seg_pmem_mtx);
1129 ASSERT(seg_pathr_on);
1130 ASSERT(seg_pahcur == !hlix);
1131 seg_pahcur = hlix;
1132 mutex_exit(&seg_pmem_mtx);
1133 if (++hlinks < 2) {
1134 hlix = !hlix;
1135 goto again;
1136 }
1137 } else if ((hlinkp = hlnextp) != &seg_pahhead[hlix] &&
1138 seg_pahhead[hlix].p_lnext != hlinkp) {
1139 ASSERT(hlinkp != NULL);
1140 ASSERT(hlinkp->p_lprev != &seg_pahhead[hlix]);
1141 ASSERT(seg_pahhead[hlix].p_lnext != &seg_pahhead[hlix]);
1142 ASSERT(seg_pahhead[hlix].p_lprev != &seg_pahhead[hlix]);
1143
1144 /*
1145 * Reinsert the header to point to hlinkp
1146 * so that we start from hlinkp bucket next time around.
1147 */
1148 seg_pahhead[hlix].p_lnext->p_lprev = seg_pahhead[hlix].p_lprev;
1149 seg_pahhead[hlix].p_lprev->p_lnext = seg_pahhead[hlix].p_lnext;
1150 seg_pahhead[hlix].p_lnext = hlinkp;
1151 seg_pahhead[hlix].p_lprev = hlinkp->p_lprev;
1152 hlinkp->p_lprev->p_lnext = &seg_pahhead[hlix];
1153 hlinkp->p_lprev = &seg_pahhead[hlix];
1154 }
1155
1156 mutex_enter(&seg_pmem_mtx);
1157 ASSERT(seg_pathr_on);
1158 seg_pathr_on = 0;
1159 mutex_exit(&seg_pmem_mtx);
1160
1161 runcb:
1162 /*
1163 * Run the delayed callback list. segments/amps can't go away until
1164 * callback is executed since they must have non 0 softlockcnt. That's
1165 * why we don't need to hold as/seg/amp locks to execute the callback.
1166 */
1167 while (delcallb_list != NULL) {
1168 pcp = delcallb_list;
1169 delcallb_list = pcp->p_hprev;
1170 ASSERT(!pcp->p_active);
1171 (void) (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr,
1172 pcp->p_len, pcp->p_pp, pcp->p_write ? S_WRITE : S_READ, 1);
1173 npages += btop(pcp->p_len);
1174 if (!IS_PCP_WIRED(pcp)) {
1175 npages_window += btop(pcp->p_len);
1176 }
1177 kmem_cache_free(seg_pkmcache, pcp);
1178 }
1179 if (npages) {
1180 mutex_enter(&seg_pmem_mtx);
1181 ASSERT(seg_plocked >= npages);
1182 ASSERT(seg_plocked_window >= npages_window);
1183 seg_plocked -= npages;
1184 seg_plocked_window -= npages_window;
1185 mutex_exit(&seg_pmem_mtx);
1186 }
1187 }
1188
1189 /*
1190 * Remove cached pages for segment(s) entries from hashtable. The segments
1191 * are identified by pp array. This is useful for multiple seg's cached on
1192 * behalf of dummy segment (ISM/DISM) with common pp array.
1193 */
1194 void
seg_ppurge_wiredpp(struct page ** pp)1195 seg_ppurge_wiredpp(struct page **pp)
1196 {
1197 struct seg_pcache *pcp;
1198 struct seg_phash_wired *hp;
1199 pgcnt_t npages = 0;
1200 struct seg_pcache *delcallb_list = NULL;
1201
1202 /*
1203 * if the cache is empty, return
1204 */
1205 if (seg_plocked == 0) {
1206 return;
1207 }
1208 ASSERT(seg_phashsize_wired != 0);
1209
1210 for (hp = seg_phashtab_wired;
1211 hp < &seg_phashtab_wired[seg_phashsize_wired]; hp++) {
1212 if (hp->p_hnext == (struct seg_pcache *)hp) {
1213 continue;
1214 }
1215 mutex_enter(&hp->p_hmutex);
1216 pcp = hp->p_hnext;
1217 while (pcp != (struct seg_pcache *)hp) {
1218 ASSERT(pcp->p_hashp == (struct seg_phash *)hp);
1219 ASSERT(IS_PCP_WIRED(pcp));
1220 /*
1221 * purge entries which are not active
1222 */
1223 if (!pcp->p_active && pcp->p_pp == pp) {
1224 ASSERT(pcp->p_htag0 != NULL);
1225 pcp->p_hprev->p_hnext = pcp->p_hnext;
1226 pcp->p_hnext->p_hprev = pcp->p_hprev;
1227 pcp->p_hprev = delcallb_list;
1228 delcallb_list = pcp;
1229 }
1230 pcp = pcp->p_hnext;
1231 }
1232 mutex_exit(&hp->p_hmutex);
1233 /*
1234 * segments can't go away until callback is executed since
1235 * they must have non 0 softlockcnt. That's why we don't
1236 * need to hold as/seg locks to execute the callback.
1237 */
1238 while (delcallb_list != NULL) {
1239 int done;
1240 pcp = delcallb_list;
1241 delcallb_list = pcp->p_hprev;
1242 ASSERT(!pcp->p_active);
1243 done = (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr,
1244 pcp->p_len, pcp->p_pp,
1245 pcp->p_write ? S_WRITE : S_READ, 1);
1246 npages += btop(pcp->p_len);
1247 ASSERT(IS_PCP_WIRED(pcp));
1248 kmem_cache_free(seg_pkmcache, pcp);
1249 if (done) {
1250 ASSERT(delcallb_list == NULL);
1251 goto out;
1252 }
1253 }
1254 }
1255
1256 out:
1257 mutex_enter(&seg_pmem_mtx);
1258 ASSERT(seg_plocked >= npages);
1259 seg_plocked -= npages;
1260 mutex_exit(&seg_pmem_mtx);
1261 }
1262
1263 /*
1264 * purge all entries for a given segment. Since we
1265 * callback into the segment driver directly for page
1266 * reclaim the caller needs to hold the right locks.
1267 */
1268 void
seg_ppurge(struct seg * seg,struct anon_map * amp,uint_t flags)1269 seg_ppurge(struct seg *seg, struct anon_map *amp, uint_t flags)
1270 {
1271 struct seg_pcache *delcallb_list = NULL;
1272 struct seg_pcache *pcp;
1273 struct seg_phash *hp;
1274 pgcnt_t npages = 0;
1275 void *htag0;
1276
1277 if (seg_plocked == 0) {
1278 return;
1279 }
1280 ASSERT(seg_phashsize_win != 0);
1281
1282 /*
1283 * If amp is not NULL use amp as a lookup tag otherwise use seg
1284 * as a lookup tag.
1285 */
1286 htag0 = (amp == NULL ? (void *)seg : (void *)amp);
1287 ASSERT(htag0 != NULL);
1288 if (IS_PFLAGS_WIRED(flags)) {
1289 hp = P_HASHBP(seg, htag0, 0, flags);
1290 mutex_enter(&hp->p_hmutex);
1291 pcp = hp->p_hnext;
1292 while (pcp != (struct seg_pcache *)hp) {
1293 ASSERT(pcp->p_hashp == hp);
1294 ASSERT(IS_PCP_WIRED(pcp));
1295 if (pcp->p_htag0 == htag0) {
1296 if (pcp->p_active) {
1297 break;
1298 }
1299 pcp->p_hprev->p_hnext = pcp->p_hnext;
1300 pcp->p_hnext->p_hprev = pcp->p_hprev;
1301 pcp->p_hprev = delcallb_list;
1302 delcallb_list = pcp;
1303 }
1304 pcp = pcp->p_hnext;
1305 }
1306 mutex_exit(&hp->p_hmutex);
1307 } else {
1308 pcache_link_t *plinkp;
1309 pcache_link_t *pheadp;
1310 kmutex_t *pmtx;
1311
1312 if (amp == NULL) {
1313 ASSERT(seg != NULL);
1314 pheadp = &seg->s_phead;
1315 pmtx = &seg->s_pmtx;
1316 } else {
1317 pheadp = &->a_phead;
1318 pmtx = &->a_pmtx;
1319 }
1320 mutex_enter(pmtx);
1321 while ((plinkp = pheadp->p_lnext) != pheadp) {
1322 pcp = plink2pcache(plinkp);
1323 ASSERT(!IS_PCP_WIRED(pcp));
1324 ASSERT(pcp->p_htag0 == htag0);
1325 hp = pcp->p_hashp;
1326 mutex_enter(&hp->p_hmutex);
1327 if (pcp->p_active) {
1328 mutex_exit(&hp->p_hmutex);
1329 break;
1330 }
1331 ASSERT(plinkp->p_lprev == pheadp);
1332 pheadp->p_lnext = plinkp->p_lnext;
1333 plinkp->p_lnext->p_lprev = pheadp;
1334 pcp->p_hprev->p_hnext = pcp->p_hnext;
1335 pcp->p_hnext->p_hprev = pcp->p_hprev;
1336 pcp->p_hprev = delcallb_list;
1337 delcallb_list = pcp;
1338 if (hp->p_hnext == (struct seg_pcache *)hp) {
1339 seg_premove_abuck(hp, 0);
1340 }
1341 mutex_exit(&hp->p_hmutex);
1342 }
1343 mutex_exit(pmtx);
1344 }
1345 while (delcallb_list != NULL) {
1346 pcp = delcallb_list;
1347 delcallb_list = pcp->p_hprev;
1348 ASSERT(!pcp->p_active);
1349 (void) (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr, pcp->p_len,
1350 pcp->p_pp, pcp->p_write ? S_WRITE : S_READ, 0);
1351 npages += btop(pcp->p_len);
1352 kmem_cache_free(seg_pkmcache, pcp);
1353 }
1354 mutex_enter(&seg_pmem_mtx);
1355 ASSERT(seg_plocked >= npages);
1356 seg_plocked -= npages;
1357 if (!IS_PFLAGS_WIRED(flags)) {
1358 ASSERT(seg_plocked_window >= npages);
1359 seg_plocked_window -= npages;
1360 }
1361 mutex_exit(&seg_pmem_mtx);
1362 }
1363
1364 static void seg_pinit_mem_config(void);
1365
1366 /*
1367 * setup the pagelock cache
1368 */
1369 static void
seg_pinit(void)1370 seg_pinit(void)
1371 {
1372 struct seg_phash *hp;
1373 ulong_t i;
1374 pgcnt_t physmegs;
1375
1376 seg_plocked = 0;
1377 seg_plocked_window = 0;
1378
1379 if (segpcache_enabled == 0) {
1380 seg_phashsize_win = 0;
1381 seg_phashsize_wired = 0;
1382 seg_pdisabled = 1;
1383 return;
1384 }
1385
1386 seg_pdisabled = 0;
1387 seg_pkmcache = kmem_cache_create("seg_pcache",
1388 sizeof (struct seg_pcache), 0, NULL, NULL, NULL, NULL, NULL, 0);
1389 if (segpcache_pcp_maxage_ticks <= 0) {
1390 segpcache_pcp_maxage_ticks = segpcache_pcp_maxage_sec * hz;
1391 }
1392 seg_pmax_pcpage = segpcache_pcp_maxage_ticks;
1393 seg_pathr_empty_ahb = 0;
1394 seg_pathr_full_ahb = 0;
1395 seg_pshrink_shift = segpcache_shrink_shift;
1396 seg_pmaxapurge_npages = btop(segpcache_maxapurge_bytes);
1397
1398 mutex_init(&seg_pcache_mtx, NULL, MUTEX_DEFAULT, NULL);
1399 mutex_init(&seg_pmem_mtx, NULL, MUTEX_DEFAULT, NULL);
1400 mutex_init(&seg_pasync_mtx, NULL, MUTEX_DEFAULT, NULL);
1401 cv_init(&seg_pasync_cv, NULL, CV_DEFAULT, NULL);
1402
1403 physmegs = physmem >> (20 - PAGESHIFT);
1404
1405 /*
1406 * If segpcache_hashsize_win was not set in /etc/system or it has
1407 * absurd value set it to a default.
1408 */
1409 if (segpcache_hashsize_win == 0 || segpcache_hashsize_win > physmem) {
1410 /*
1411 * Create one bucket per 32K (or at least per 8 pages) of
1412 * available memory.
1413 */
1414 pgcnt_t pages_per_bucket = MAX(btop(32 * 1024), 8);
1415 segpcache_hashsize_win = MAX(1024, physmem / pages_per_bucket);
1416 }
1417 if (!ISP2(segpcache_hashsize_win)) {
1418 ulong_t rndfac = ~(1UL <<
1419 (highbit(segpcache_hashsize_win) - 1));
1420 rndfac &= segpcache_hashsize_win;
1421 segpcache_hashsize_win += rndfac;
1422 segpcache_hashsize_win = 1 <<
1423 (highbit(segpcache_hashsize_win) - 1);
1424 }
1425 seg_phashsize_win = segpcache_hashsize_win;
1426 seg_phashtab_win = kmem_zalloc(
1427 seg_phashsize_win * sizeof (struct seg_phash),
1428 KM_SLEEP);
1429 for (i = 0; i < seg_phashsize_win; i++) {
1430 hp = &seg_phashtab_win[i];
1431 hp->p_hnext = (struct seg_pcache *)hp;
1432 hp->p_hprev = (struct seg_pcache *)hp;
1433 mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL);
1434 }
1435
1436 seg_pahcur = 0;
1437 seg_pathr_on = 0;
1438 seg_pahhead[0].p_lnext = &seg_pahhead[0];
1439 seg_pahhead[0].p_lprev = &seg_pahhead[0];
1440 seg_pahhead[1].p_lnext = &seg_pahhead[1];
1441 seg_pahhead[1].p_lprev = &seg_pahhead[1];
1442
1443 /*
1444 * If segpcache_hashsize_wired was not set in /etc/system or it has
1445 * absurd value set it to a default.
1446 */
1447 if (segpcache_hashsize_wired == 0 ||
1448 segpcache_hashsize_wired > physmem / 4) {
1449 /*
1450 * Choose segpcache_hashsize_wired based on physmem.
1451 * Create a bucket per 128K bytes upto 256K buckets.
1452 */
1453 if (physmegs < 20 * 1024) {
1454 segpcache_hashsize_wired = MAX(1024, physmegs << 3);
1455 } else {
1456 segpcache_hashsize_wired = 256 * 1024;
1457 }
1458 }
1459 if (!ISP2(segpcache_hashsize_wired)) {
1460 segpcache_hashsize_wired = 1 <<
1461 highbit(segpcache_hashsize_wired);
1462 }
1463 seg_phashsize_wired = segpcache_hashsize_wired;
1464 seg_phashtab_wired = kmem_zalloc(
1465 seg_phashsize_wired * sizeof (struct seg_phash_wired), KM_SLEEP);
1466 for (i = 0; i < seg_phashsize_wired; i++) {
1467 hp = (struct seg_phash *)&seg_phashtab_wired[i];
1468 hp->p_hnext = (struct seg_pcache *)hp;
1469 hp->p_hprev = (struct seg_pcache *)hp;
1470 mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL);
1471 }
1472
1473 if (segpcache_maxwindow == 0) {
1474 if (physmegs < 64) {
1475 /* 3% of memory */
1476 segpcache_maxwindow = availrmem >> 5;
1477 } else if (physmegs < 512) {
1478 /* 12% of memory */
1479 segpcache_maxwindow = availrmem >> 3;
1480 } else if (physmegs < 1024) {
1481 /* 25% of memory */
1482 segpcache_maxwindow = availrmem >> 2;
1483 } else if (physmegs < 2048) {
1484 /* 50% of memory */
1485 segpcache_maxwindow = availrmem >> 1;
1486 } else {
1487 /* no limit */
1488 segpcache_maxwindow = (pgcnt_t)-1;
1489 }
1490 }
1491 seg_pmaxwindow = segpcache_maxwindow;
1492 seg_pinit_mem_config();
1493 }
1494
1495 /*
1496 * called by pageout if memory is low
1497 */
1498 void
seg_preap(void)1499 seg_preap(void)
1500 {
1501 /*
1502 * if the cache is off or empty, return
1503 */
1504 if (seg_plocked_window == 0) {
1505 return;
1506 }
1507 ASSERT(seg_phashsize_win != 0);
1508
1509 /*
1510 * If somebody is already purging pcache
1511 * just return.
1512 */
1513 if (seg_pdisabled) {
1514 return;
1515 }
1516
1517 cv_signal(&seg_pasync_cv);
1518 }
1519
1520 /*
1521 * run as a backgroud thread and reclaim pagelock
1522 * pages which have not been used recently
1523 */
1524 void
seg_pasync_thread(void)1525 seg_pasync_thread(void)
1526 {
1527 callb_cpr_t cpr_info;
1528
1529 if (seg_phashsize_win == 0) {
1530 thread_exit();
1531 /*NOTREACHED*/
1532 }
1533
1534 seg_pasync_thr = curthread;
1535
1536 CALLB_CPR_INIT(&cpr_info, &seg_pasync_mtx,
1537 callb_generic_cpr, "seg_pasync");
1538
1539 if (segpcache_reap_ticks <= 0) {
1540 segpcache_reap_ticks = segpcache_reap_sec * hz;
1541 }
1542
1543 mutex_enter(&seg_pasync_mtx);
1544 for (;;) {
1545 CALLB_CPR_SAFE_BEGIN(&cpr_info);
1546 (void) cv_reltimedwait(&seg_pasync_cv, &seg_pasync_mtx,
1547 segpcache_reap_ticks, TR_CLOCK_TICK);
1548 CALLB_CPR_SAFE_END(&cpr_info, &seg_pasync_mtx);
1549 if (seg_pdisabled == 0) {
1550 seg_ppurge_async(0);
1551 }
1552 }
1553 }
1554
1555 static struct kmem_cache *seg_cache;
1556
1557 /*
1558 * Initialize segment management data structures.
1559 */
1560 void
seg_init(void)1561 seg_init(void)
1562 {
1563 kstat_t *ksp;
1564
1565 seg_cache = kmem_cache_create("seg_cache", sizeof (struct seg),
1566 0, NULL, NULL, NULL, NULL, NULL, 0);
1567
1568 ksp = kstat_create("unix", 0, "segadvstat", "vm", KSTAT_TYPE_NAMED,
1569 segadvstat_ndata, KSTAT_FLAG_VIRTUAL);
1570 if (ksp) {
1571 ksp->ks_data = (void *)segadvstat_ptr;
1572 kstat_install(ksp);
1573 }
1574
1575 seg_pinit();
1576 }
1577
1578 /*
1579 * Allocate a segment to cover [base, base+size]
1580 * and attach it to the specified address space.
1581 */
1582 struct seg *
seg_alloc(struct as * as,caddr_t base,size_t size)1583 seg_alloc(struct as *as, caddr_t base, size_t size)
1584 {
1585 struct seg *new;
1586 caddr_t segbase;
1587 size_t segsize;
1588
1589 segbase = (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK);
1590 segsize = (((uintptr_t)(base + size) + PAGEOFFSET) & PAGEMASK) -
1591 (uintptr_t)segbase;
1592
1593 if (!valid_va_range(&segbase, &segsize, segsize, AH_LO))
1594 return ((struct seg *)NULL); /* bad virtual addr range */
1595
1596 if (as != &kas &&
1597 valid_usr_range(segbase, segsize, 0, as,
1598 as->a_userlimit) != RANGE_OKAY)
1599 return ((struct seg *)NULL); /* bad virtual addr range */
1600
1601 new = kmem_cache_alloc(seg_cache, KM_SLEEP);
1602 new->s_ops = NULL;
1603 new->s_data = NULL;
1604 new->s_szc = 0;
1605 new->s_flags = 0;
1606 mutex_init(&new->s_pmtx, NULL, MUTEX_DEFAULT, NULL);
1607 new->s_phead.p_lnext = &new->s_phead;
1608 new->s_phead.p_lprev = &new->s_phead;
1609 if (seg_attach(as, segbase, segsize, new) < 0) {
1610 kmem_cache_free(seg_cache, new);
1611 return ((struct seg *)NULL);
1612 }
1613 /* caller must fill in ops, data */
1614 return (new);
1615 }
1616
1617 /*
1618 * Attach a segment to the address space. Used by seg_alloc()
1619 * and for kernel startup to attach to static segments.
1620 */
1621 int
seg_attach(struct as * as,caddr_t base,size_t size,struct seg * seg)1622 seg_attach(struct as *as, caddr_t base, size_t size, struct seg *seg)
1623 {
1624 seg->s_as = as;
1625 seg->s_base = base;
1626 seg->s_size = size;
1627
1628 /*
1629 * as_addseg() will add the segment at the appropraite point
1630 * in the list. It will return -1 if there is overlap with
1631 * an already existing segment.
1632 */
1633 return (as_addseg(as, seg));
1634 }
1635
1636 /*
1637 * Unmap a segment and free it from its associated address space.
1638 * This should be called by anybody who's finished with a whole segment's
1639 * mapping. Just calls SEGOP_UNMAP() on the whole mapping . It is the
1640 * responsibility of the segment driver to unlink the the segment
1641 * from the address space, and to free public and private data structures
1642 * associated with the segment. (This is typically done by a call to
1643 * seg_free()).
1644 */
1645 void
seg_unmap(struct seg * seg)1646 seg_unmap(struct seg *seg)
1647 {
1648 #ifdef DEBUG
1649 int ret;
1650 #endif /* DEBUG */
1651
1652 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
1653
1654 /* Shouldn't have called seg_unmap if mapping isn't yet established */
1655 ASSERT(seg->s_data != NULL);
1656
1657 /* Unmap the whole mapping */
1658 #ifdef DEBUG
1659 ret = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
1660 ASSERT(ret == 0);
1661 #else
1662 SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
1663 #endif /* DEBUG */
1664 }
1665
1666 /*
1667 * Free the segment from its associated as. This should only be called
1668 * if a mapping to the segment has not yet been established (e.g., if
1669 * an error occurs in the middle of doing an as_map when the segment
1670 * has already been partially set up) or if it has already been deleted
1671 * (e.g., from a segment driver unmap routine if the unmap applies to the
1672 * entire segment). If the mapping is currently set up then seg_unmap() should
1673 * be called instead.
1674 */
1675 void
seg_free(struct seg * seg)1676 seg_free(struct seg *seg)
1677 {
1678 register struct as *as = seg->s_as;
1679 struct seg *tseg = as_removeseg(as, seg);
1680
1681 ASSERT(tseg == seg);
1682
1683 /*
1684 * If the segment private data field is NULL,
1685 * then segment driver is not attached yet.
1686 */
1687 if (seg->s_data != NULL)
1688 SEGOP_FREE(seg);
1689
1690 mutex_destroy(&seg->s_pmtx);
1691 ASSERT(seg->s_phead.p_lnext == &seg->s_phead);
1692 ASSERT(seg->s_phead.p_lprev == &seg->s_phead);
1693 kmem_cache_free(seg_cache, seg);
1694 }
1695
1696 /*ARGSUSED*/
1697 static void
seg_p_mem_config_post_add(void * arg,pgcnt_t delta_pages)1698 seg_p_mem_config_post_add(
1699 void *arg,
1700 pgcnt_t delta_pages)
1701 {
1702 /* Nothing to do. */
1703 }
1704
1705 void
seg_p_enable(void)1706 seg_p_enable(void)
1707 {
1708 mutex_enter(&seg_pcache_mtx);
1709 ASSERT(seg_pdisabled != 0);
1710 seg_pdisabled--;
1711 mutex_exit(&seg_pcache_mtx);
1712 }
1713
1714 /*
1715 * seg_p_disable - disables seg_pcache, and then attempts to empty the
1716 * cache.
1717 * Returns SEGP_SUCCESS if the cache was successfully emptied, or
1718 * SEGP_FAIL if the cache could not be emptied.
1719 */
1720 int
seg_p_disable(void)1721 seg_p_disable(void)
1722 {
1723 pgcnt_t old_plocked;
1724 int stall_count = 0;
1725
1726 mutex_enter(&seg_pcache_mtx);
1727 seg_pdisabled++;
1728 ASSERT(seg_pdisabled != 0);
1729 mutex_exit(&seg_pcache_mtx);
1730
1731 /*
1732 * Attempt to empty the cache. Terminate if seg_plocked does not
1733 * diminish with SEGP_STALL_THRESHOLD consecutive attempts.
1734 */
1735 while (seg_plocked != 0) {
1736 ASSERT(seg_phashsize_win != 0);
1737 old_plocked = seg_plocked;
1738 seg_ppurge_async(1);
1739 if (seg_plocked == old_plocked) {
1740 if (stall_count++ > SEGP_STALL_THRESHOLD) {
1741 return (SEGP_FAIL);
1742 }
1743 } else
1744 stall_count = 0;
1745 if (seg_plocked != 0)
1746 delay(hz/SEGP_PREDEL_DELAY_FACTOR);
1747 }
1748 return (SEGP_SUCCESS);
1749 }
1750
1751 /*
1752 * Attempt to purge seg_pcache. May need to return before this has
1753 * completed to allow other pre_del callbacks to unlock pages. This is
1754 * ok because:
1755 * 1) The seg_pdisabled flag has been set so at least we won't
1756 * cache anymore locks and the locks we couldn't purge
1757 * will not be held if they do get released by a subsequent
1758 * pre-delete callback.
1759 *
1760 * 2) The rest of the memory delete thread processing does not
1761 * depend on the changes made in this pre-delete callback. No
1762 * panics will result, the worst that will happen is that the
1763 * DR code will timeout and cancel the delete.
1764 */
1765 /*ARGSUSED*/
1766 static int
seg_p_mem_config_pre_del(void * arg,pgcnt_t delta_pages)1767 seg_p_mem_config_pre_del(
1768 void *arg,
1769 pgcnt_t delta_pages)
1770 {
1771 if (seg_phashsize_win == 0) {
1772 return (0);
1773 }
1774 if (seg_p_disable() != SEGP_SUCCESS)
1775 cmn_err(CE_NOTE,
1776 "!Pre-delete couldn't purge"" pagelock cache - continuing");
1777 return (0);
1778 }
1779
1780 /*ARGSUSED*/
1781 static void
seg_p_mem_config_post_del(void * arg,pgcnt_t delta_pages,int cancelled)1782 seg_p_mem_config_post_del(
1783 void *arg,
1784 pgcnt_t delta_pages,
1785 int cancelled)
1786 {
1787 if (seg_phashsize_win == 0) {
1788 return;
1789 }
1790 seg_p_enable();
1791 }
1792
1793 static kphysm_setup_vector_t seg_p_mem_config_vec = {
1794 KPHYSM_SETUP_VECTOR_VERSION,
1795 seg_p_mem_config_post_add,
1796 seg_p_mem_config_pre_del,
1797 seg_p_mem_config_post_del,
1798 };
1799
1800 static void
seg_pinit_mem_config(void)1801 seg_pinit_mem_config(void)
1802 {
1803 int ret;
1804
1805 ret = kphysm_setup_func_register(&seg_p_mem_config_vec, (void *)NULL);
1806 /*
1807 * Want to catch this in the debug kernel. At run time, if the
1808 * callbacks don't get run all will be OK as the disable just makes
1809 * it more likely that the pages can be collected.
1810 */
1811 ASSERT(ret == 0);
1812 }
1813
1814 /*
1815 * Verify that segment is not a shared anonymous segment which reserves
1816 * swap. zone.max-swap accounting (zone->zone_max_swap) cannot be transfered
1817 * from one zone to another if any segments are shared. This is because the
1818 * last process to exit will credit the swap reservation. This could lead
1819 * to the swap being reserved by one zone, and credited to another.
1820 */
1821 boolean_t
seg_can_change_zones(struct seg * seg)1822 seg_can_change_zones(struct seg *seg)
1823 {
1824 struct segvn_data *svd;
1825
1826 if (seg->s_ops == &segspt_shmops)
1827 return (B_FALSE);
1828
1829 if (seg->s_ops == &segvn_ops) {
1830 svd = (struct segvn_data *)seg->s_data;
1831 if (svd->type == MAP_SHARED &&
1832 svd->amp != NULL &&
1833 svd->amp->swresv > 0)
1834 return (B_FALSE);
1835 }
1836 return (B_TRUE);
1837 }
1838
1839 /*
1840 * Return swap reserved by a segment backing a private mapping.
1841 */
1842 size_t
seg_swresv(struct seg * seg)1843 seg_swresv(struct seg *seg)
1844 {
1845 struct segvn_data *svd;
1846 size_t swap = 0;
1847
1848 if (seg->s_ops == &segvn_ops) {
1849 svd = (struct segvn_data *)seg->s_data;
1850 if (svd->type == MAP_PRIVATE && svd->swresv > 0)
1851 swap = svd->swresv;
1852 }
1853 return (swap);
1854 }
1855
1856 /*
1857 * General not supported function for SEGOP_INHERIT
1858 */
1859 /* ARGSUSED */
1860 int
seg_inherit_notsup(struct seg * seg,caddr_t addr,size_t len,uint_t op)1861 seg_inherit_notsup(struct seg *seg, caddr_t addr, size_t len, uint_t op)
1862 {
1863 return (ENOTSUP);
1864 }
1865