1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 26 /* All Rights Reserved */ 27 28 /* 29 * University Copyright- Copyright (c) 1982, 1986, 1988 30 * The Regents of the University of California 31 * All Rights Reserved 32 * 33 * University Acknowledgment- Portions of this document are derived from 34 * software developed by the University of California, Berkeley, and its 35 * contributors. 36 */ 37 38 #ifndef _VM_ANON_H 39 #define _VM_ANON_H 40 41 #include <sys/cred.h> 42 #include <sys/zone.h> 43 #include <vm/seg.h> 44 #include <vm/vpage.h> 45 46 #ifdef __cplusplus 47 extern "C" { 48 #endif 49 50 /* 51 * VM - Anonymous pages. 52 */ 53 54 typedef unsigned long anoff_t; /* anon offsets */ 55 56 /* 57 * Each anonymous page, either in memory or in swap, has an anon structure. 58 * The structure (slot) provides a level of indirection between anonymous pages 59 * and their backing store. 60 * 61 * (an_vp, an_off) names the vnode of the anonymous page for this slot. 62 * 63 * (an_pvp, an_poff) names the location of the physical backing store 64 * for the page this slot represents. If the name is null there is no 65 * associated physical store. The physical backing store location can 66 * change while the slot is in use. 67 * 68 * an_hash is a hash list of anon slots. The list is hashed by 69 * (an_vp, an_off) of the associated anonymous page and provides a 70 * method of going from the name of an anonymous page to its 71 * associated anon slot. 72 * 73 * an_refcnt holds a reference count which is the number of separate 74 * copies that will need to be created in case of copy-on-write. 75 * A refcnt > 0 protects the existence of the slot. The refcnt is 76 * initialized to 1 when the anon slot is created in anon_alloc(). 77 * If a client obtains an anon slot and allows multiple threads to 78 * share it, then it is the client's responsibility to insure that 79 * it does not allow one thread to try to reference the slot at the 80 * same time as another is trying to decrement the last count and 81 * destroy the anon slot. E.g., the seg_vn segment type protects 82 * against this with higher level locks. 83 */ 84 85 struct anon { 86 struct vnode *an_vp; /* vnode of anon page */ 87 struct vnode *an_pvp; /* vnode of physical backing store */ 88 anoff_t an_off; /* offset of anon page */ 89 anoff_t an_poff; /* offset in vnode */ 90 struct anon *an_hash; /* hash table of anon slots */ 91 int an_refcnt; /* # of people sharing slot */ 92 }; 93 94 #define AN_CACHE_ALIGN_LOG2 4 /* log2(AN_CACHE_ALIGN) */ 95 #define AN_CACHE_ALIGN (1U << AN_CACHE_ALIGN_LOG2) /* anon address aligned */ 96 /* 16 bytes */ 97 98 99 #ifdef _KERNEL 100 /* 101 * The swapinfo_lock protects: 102 * swapinfo list 103 * individual swapinfo structures 104 * 105 * The anoninfo_lock protects: 106 * anoninfo counters 107 * 108 * The anonhash_lock protects: 109 * anon hash lists 110 * anon slot fields 111 * 112 * Fields in the anon slot which are read-only for the life of the slot 113 * (an_vp, an_off) do not require the anonhash_lock be held to access them. 114 * If you access a field without the anonhash_lock held you must be holding 115 * the slot with an_refcnt to make sure it isn't destroyed. 116 * To write (an_pvp, an_poff) in a given slot you must also hold the 117 * p_iolock of the anonymous page for slot. 118 */ 119 extern kmutex_t anoninfo_lock; 120 extern kmutex_t swapinfo_lock; 121 extern pad_mutex_t *anonhash_lock; 122 extern pad_mutex_t anon_array_lock[]; 123 extern kcondvar_t anon_array_cv[]; 124 125 /* 126 * Global hash table to provide a function from (vp, off) -> ap 127 */ 128 extern size_t anon_hash_size; 129 extern unsigned int anon_hash_shift; 130 extern struct anon **anon_hash; 131 #define ANON_HASH_SIZE anon_hash_size 132 #define ANON_HASHAVELEN 4 133 /* 134 * Try to use as many bits of randomness from both vp and off as we can. 135 * This should help spreading evenly for a variety of workloads. See comments 136 * for PAGE_HASH_FUNC for more explanation. 137 */ 138 #define ANON_HASH(vp, off) \ 139 (((((uintptr_t)(off) >> PAGESHIFT) ^ \ 140 ((uintptr_t)(off) >> (PAGESHIFT + anon_hash_shift))) ^ \ 141 (((uintptr_t)(vp) >> 3) ^ \ 142 ((uintptr_t)(vp) >> (3 + anon_hash_shift)) ^ \ 143 ((uintptr_t)(vp) >> (3 + 2 * anon_hash_shift)) ^ \ 144 ((uintptr_t)(vp) << \ 145 (anon_hash_shift - AN_VPSHIFT - VNODE_ALIGN_LOG2)))) & \ 146 (anon_hash_size - 1)) 147 148 #define AH_LOCK_SIZE (2 << NCPU_LOG2) 149 150 #define AH_MUTEX(vp, off) \ 151 (&anonhash_lock[(ANON_HASH((vp), (off)) & \ 152 (AH_LOCK_SIZE - 1))].pad_mutex) 153 154 #endif /* _KERNEL */ 155 156 /* 157 * Declaration for the Global counters to accurately 158 * track the kernel foot print in memory. 159 */ 160 extern pgcnt_t pages_locked; 161 extern pgcnt_t pages_claimed; 162 extern pgcnt_t pages_useclaim; 163 extern pgcnt_t obp_pages; 164 165 /* 166 * Anonymous backing store accounting structure for swapctl. 167 * 168 * ani_max = maximum amount of swap space 169 * (including potentially available physical memory) 170 * ani_free = amount of unallocated anonymous memory 171 * (some of which might be reserved and including 172 * potentially available physical memory) 173 * ani_resv = amount of claimed (reserved) anonymous memory 174 * 175 * The swap data can be aquired more efficiently through the 176 * kstats interface. 177 * Total slots currently available for reservation = 178 * MAX(ani_max - ani_resv, 0) + (availrmem - swapfs_minfree) 179 */ 180 struct anoninfo { 181 pgcnt_t ani_max; 182 pgcnt_t ani_free; 183 pgcnt_t ani_resv; 184 }; 185 186 #ifdef _SYSCALL32 187 struct anoninfo32 { 188 size32_t ani_max; 189 size32_t ani_free; 190 size32_t ani_resv; 191 }; 192 #endif /* _SYSCALL32 */ 193 194 /* 195 * Define the NCPU pool of the ani_free counters. Update the counter 196 * of the cpu on which the thread is running and in every clock intr 197 * sync anoninfo.ani_free with the current total off all the NCPU entries. 198 */ 199 200 typedef struct ani_free { 201 kmutex_t ani_lock; 202 pgcnt_t ani_count; 203 uchar_t pad[64 - sizeof (kmutex_t) - sizeof (pgcnt_t)]; 204 /* XXX 64 = cacheline size */ 205 } ani_free_t; 206 207 #define ANI_MAX_POOL 128 208 extern ani_free_t ani_free_pool[]; 209 210 #define ANI_ADD(inc) { \ 211 ani_free_t *anifp; \ 212 int index; \ 213 index = (CPU->cpu_id & (ANI_MAX_POOL - 1)); \ 214 anifp = &ani_free_pool[index]; \ 215 mutex_enter(&anifp->ani_lock); \ 216 anifp->ani_count += inc; \ 217 mutex_exit(&anifp->ani_lock); \ 218 } 219 220 /* 221 * Anon array pointers are allocated in chunks. Each chunk 222 * has PAGESIZE/sizeof(u_long *) of anon pointers. 223 * There are two levels of arrays for anon array pointers larger 224 * than a chunk. The first level points to anon array chunks. 225 * The second level consists of chunks of anon pointers. 226 * 227 * If anon array is smaller than a chunk then the whole anon array 228 * is created (memory is allocated for whole anon array). 229 * If anon array is larger than a chunk only first level array is 230 * allocated. Then other arrays (chunks) are allocated only when 231 * they are initialized with anon pointers. 232 */ 233 struct anon_hdr { 234 kmutex_t serial_lock; /* serialize array chunk allocation */ 235 pgcnt_t size; /* number of pointers to (anon) pages */ 236 void **array_chunk; /* pointers to anon pointers or chunks of */ 237 /* anon pointers */ 238 int flags; /* ANON_ALLOC_FORCE force preallocation of */ 239 /* whole anon array */ 240 }; 241 242 #ifdef _LP64 243 #define ANON_PTRSHIFT 3 244 #define ANON_PTRMASK ~7 245 #else 246 #define ANON_PTRSHIFT 2 247 #define ANON_PTRMASK ~3 248 #endif 249 250 #define ANON_CHUNK_SIZE (PAGESIZE >> ANON_PTRSHIFT) 251 #define ANON_CHUNK_SHIFT (PAGESHIFT - ANON_PTRSHIFT) 252 #define ANON_CHUNK_OFF (ANON_CHUNK_SIZE - 1) 253 254 /* 255 * Anon flags. 256 */ 257 #define ANON_SLEEP 0x0 /* ok to block */ 258 #define ANON_NOSLEEP 0x1 /* non-blocking call */ 259 #define ANON_ALLOC_FORCE 0x2 /* force single level anon array */ 260 #define ANON_GROWDOWN 0x4 /* anon array should grow downward */ 261 262 struct kshmid; 263 264 /* 265 * The anon_map structure is used by various clients of the anon layer to 266 * manage anonymous memory. When anonymous memory is shared, 267 * then the different clients sharing it will point to the 268 * same anon_map structure. Also, if a segment is unmapped 269 * in the middle where an anon_map structure exists, the 270 * newly created segment will also share the anon_map structure, 271 * although the two segments will use different ranges of the 272 * anon array. When mappings are private (or shared with 273 * a reference count of 1), an unmap operation will free up 274 * a range of anon slots in the array given by the anon_map 275 * structure. Because of fragmentation due to this unmapping, 276 * we have to store the size of the anon array in the anon_map 277 * structure so that we can free everything when the referernce 278 * count goes to zero. 279 * 280 * A new rangelock scheme is introduced to make the anon layer scale. 281 * A reader/writer lock per anon_amp and an array of system-wide hash 282 * locks, anon_array_lock[] are introduced to replace serial_lock and 283 * anonmap lock. The writer lock is held when we want to singlethreaD 284 * the reference to the anon array pointers or when references to 285 * anon_map's members, whereas reader lock and anon_array_lock are 286 * held to allows multiple threads to reference different part of 287 * anon array. A global set of condition variables, anon_array_cv, 288 * are used with anon_array_lock[] to make the hold time of the locks 289 * short. 290 * 291 * szc is used to calculate the index of hash locks and cv's. We 292 * could've just used seg->s_szc if not for the possible sharing of 293 * anon_amp between SYSV shared memory and ISM, so now we introduce 294 * szc in the anon_map structure. For MAP_SHARED, the amp->szc is either 295 * 0 (base page size) or page_num_pagesizes() - 1, while MAP_PRIVATE 296 * the amp->szc could be anything in [0, page_num_pagesizes() - 1]. 297 */ 298 typedef struct anon_map { 299 krwlock_t a_rwlock; /* protect anon_map and anon array */ 300 size_t size; /* size in bytes mapped by the anon array */ 301 struct anon_hdr *ahp; /* anon array header pointer, containing */ 302 /* anon pointer array(s) */ 303 size_t swresv; /* swap space reserved for this anon_map */ 304 ulong_t refcnt; /* reference count on this structure */ 305 ushort_t a_szc; /* max szc among shared processes */ 306 void *locality; /* lgroup locality info */ 307 struct kshmid *a_sp; /* kshmid if amp backs sysV, or NULL */ 308 int a_purgewait; /* somebody waits for slocks to go away */ 309 kcondvar_t a_purgecv; /* cv for waiting for slocks to go away */ 310 kmutex_t a_purgemtx; /* mutex for anonmap_purge() */ 311 spgcnt_t a_softlockcnt; /* number of pages locked in pcache */ 312 kmutex_t a_pmtx; /* protects amp's pcache list */ 313 pcache_link_t a_phead; /* head of amp's pcache list */ 314 } amp_t; 315 316 #ifdef _KERNEL 317 318 #define ANON_BUSY 0x1 319 #define ANON_ISBUSY(slot) (*(slot) & ANON_BUSY) 320 #define ANON_SETBUSY(slot) (*(slot) |= ANON_BUSY) 321 #define ANON_CLRBUSY(slot) (*(slot) &= ~ANON_BUSY) 322 323 #define ANON_MAP_SHIFT 6 /* log2(sizeof (struct anon_map)) */ 324 #define ANON_ARRAY_SHIFT 7 /* log2(ANON_LOCKSIZE) */ 325 #define ANON_LOCKSIZE 128 326 327 #define ANON_LOCK_ENTER(lock, type) rw_enter((lock), (type)) 328 #define ANON_LOCK_EXIT(lock) rw_exit((lock)) 329 #define ANON_LOCK_HELD(lock) RW_LOCK_HELD((lock)) 330 #define ANON_READ_HELD(lock) RW_READ_HELD((lock)) 331 #define ANON_WRITE_HELD(lock) RW_WRITE_HELD((lock)) 332 333 #define ANON_ARRAY_HASH(amp, idx)\ 334 ((((idx) + ((idx) >> ANON_ARRAY_SHIFT) +\ 335 ((idx) >> (ANON_ARRAY_SHIFT << 1)) +\ 336 ((idx) >> (ANON_ARRAY_SHIFT + (ANON_ARRAY_SHIFT << 1)))) ^\ 337 ((uintptr_t)(amp) >> ANON_MAP_SHIFT)) & (ANON_LOCKSIZE - 1)) 338 339 typedef struct anon_sync_obj { 340 kmutex_t *sync_mutex; 341 kcondvar_t *sync_cv; 342 ulong_t *sync_data; 343 } anon_sync_obj_t; 344 345 /* 346 * Anonymous backing store accounting structure for kernel. 347 * ani_max = total reservable slots on physical (disk-backed) swap 348 * ani_phys_resv = total phys slots reserved for use by clients 349 * ani_mem_resv = total mem slots reserved for use by clients 350 * ani_free = # unallocated physical slots + # of reserved unallocated 351 * memory slots 352 */ 353 354 /* 355 * Initial total swap slots available for reservation 356 */ 357 #define TOTAL_AVAILABLE_SWAP \ 358 (k_anoninfo.ani_max + MAX((spgcnt_t)(availrmem - swapfs_minfree), 0)) 359 360 /* 361 * Swap slots currently available for reservation 362 */ 363 #define CURRENT_TOTAL_AVAILABLE_SWAP \ 364 ((k_anoninfo.ani_max - k_anoninfo.ani_phys_resv) + \ 365 MAX((spgcnt_t)(availrmem - swapfs_minfree), 0)) 366 367 struct k_anoninfo { 368 pgcnt_t ani_max; /* total reservable slots on phys */ 369 /* (disk) swap */ 370 pgcnt_t ani_free; /* # of unallocated phys and mem slots */ 371 pgcnt_t ani_phys_resv; /* # of reserved phys (disk) slots */ 372 pgcnt_t ani_mem_resv; /* # of reserved mem slots */ 373 pgcnt_t ani_locked_swap; /* # of swap slots locked in reserved */ 374 /* mem swap */ 375 }; 376 377 extern struct k_anoninfo k_anoninfo; 378 379 extern void anon_init(void); 380 extern struct anon *anon_alloc(struct vnode *, anoff_t); 381 extern void anon_dup(struct anon_hdr *, ulong_t, 382 struct anon_hdr *, ulong_t, size_t); 383 extern void anon_dup_fill_holes(struct anon_hdr *, ulong_t, 384 struct anon_hdr *, ulong_t, size_t, uint_t, int); 385 extern int anon_fill_cow_holes(struct seg *, caddr_t, struct anon_hdr *, 386 ulong_t, struct vnode *, u_offset_t, size_t, uint_t, 387 uint_t, struct vpage [], struct cred *); 388 extern void anon_free(struct anon_hdr *, ulong_t, size_t); 389 extern void anon_free_pages(struct anon_hdr *, ulong_t, size_t, uint_t); 390 extern void anon_disclaim(struct anon_map *, ulong_t, size_t); 391 extern int anon_getpage(struct anon **, uint_t *, struct page **, 392 size_t, struct seg *, caddr_t, enum seg_rw, struct cred *); 393 extern int swap_getconpage(struct vnode *, u_offset_t, size_t, 394 uint_t *, page_t *[], size_t, page_t *, uint_t *, 395 spgcnt_t *, struct seg *, caddr_t, 396 enum seg_rw, struct cred *); 397 extern int anon_map_getpages(struct anon_map *, ulong_t, 398 uint_t, struct seg *, caddr_t, uint_t, 399 uint_t *, page_t *[], uint_t *, 400 struct vpage [], enum seg_rw, int, int, int, struct cred *); 401 extern int anon_map_privatepages(struct anon_map *, ulong_t, 402 uint_t, struct seg *, caddr_t, uint_t, 403 page_t *[], struct vpage [], int, int, struct cred *); 404 extern struct page *anon_private(struct anon **, struct seg *, 405 caddr_t, uint_t, struct page *, 406 int, struct cred *); 407 extern struct page *anon_zero(struct seg *, caddr_t, 408 struct anon **, struct cred *); 409 extern int anon_map_createpages(struct anon_map *, ulong_t, 410 size_t, struct page **, 411 struct seg *, caddr_t, 412 enum seg_rw, struct cred *); 413 extern int anon_map_demotepages(struct anon_map *, ulong_t, 414 struct seg *, caddr_t, uint_t, 415 struct vpage [], struct cred *); 416 extern void anon_shmap_free_pages(struct anon_map *, ulong_t, size_t); 417 extern int anon_resvmem(size_t, boolean_t, zone_t *, int); 418 extern void anon_unresvmem(size_t, zone_t *); 419 extern struct anon_map *anonmap_alloc(size_t, size_t, int); 420 extern void anonmap_free(struct anon_map *); 421 extern void anonmap_purge(struct anon_map *); 422 extern void anon_swap_free(struct anon *, struct page *); 423 extern void anon_decref(struct anon *); 424 extern int non_anon(struct anon_hdr *, ulong_t, u_offset_t *, size_t *); 425 extern pgcnt_t anon_pages(struct anon_hdr *, ulong_t, pgcnt_t); 426 extern int anon_swap_adjust(pgcnt_t); 427 extern void anon_swap_restore(pgcnt_t); 428 extern struct anon_hdr *anon_create(pgcnt_t, int); 429 extern void anon_release(struct anon_hdr *, pgcnt_t); 430 extern struct anon *anon_get_ptr(struct anon_hdr *, ulong_t); 431 extern ulong_t *anon_get_slot(struct anon_hdr *, ulong_t); 432 extern struct anon *anon_get_next_ptr(struct anon_hdr *, ulong_t *); 433 extern int anon_set_ptr(struct anon_hdr *, ulong_t, struct anon *, int); 434 extern int anon_copy_ptr(struct anon_hdr *, ulong_t, 435 struct anon_hdr *, ulong_t, pgcnt_t, int); 436 extern pgcnt_t anon_grow(struct anon_hdr *, ulong_t *, pgcnt_t, pgcnt_t, int); 437 extern void anon_array_enter(struct anon_map *, ulong_t, 438 anon_sync_obj_t *); 439 extern int anon_array_try_enter(struct anon_map *, ulong_t, 440 anon_sync_obj_t *); 441 extern void anon_array_exit(anon_sync_obj_t *); 442 443 /* 444 * anon_resv checks to see if there is enough swap space to fulfill a 445 * request and if so, reserves the appropriate anonymous memory resources. 446 * anon_checkspace just checks to see if there is space to fulfill the request, 447 * without taking any resources. Both return 1 if successful and 0 if not. 448 * 449 * Macros are provided as anon reservation is usually charged to the zone of 450 * the current process. In some cases (such as anon reserved by tmpfs), a 451 * zone pointer is needed to charge the appropriate zone. 452 */ 453 #define anon_unresv(size) anon_unresvmem(size, curproc->p_zone) 454 #define anon_unresv_zone(size, zone) anon_unresvmem(size, zone) 455 #define anon_resv(size) \ 456 anon_resvmem((size), 1, curproc->p_zone, 1) 457 #define anon_resv_zone(size, zone) anon_resvmem((size), 1, zone, 1) 458 #define anon_checkspace(size, zone) anon_resvmem((size), 0, zone, 0) 459 #define anon_try_resv_zone(size, zone) anon_resvmem((size), 1, zone, 0) 460 461 /* 462 * Flags to anon_private 463 */ 464 #define STEAL_PAGE 0x1 /* page can be stolen */ 465 #define LOCK_PAGE 0x2 /* page must be ``logically'' locked */ 466 467 /* 468 * SEGKP ANON pages that are locked are assumed to be LWP stack pages 469 * and thus count towards the user pages locked count. 470 * This value is protected by the same lock as availrmem. 471 */ 472 extern pgcnt_t anon_segkp_pages_locked; 473 474 extern int anon_debug; 475 476 #ifdef ANON_DEBUG 477 478 #define A_ANON 0x01 479 #define A_RESV 0x02 480 #define A_MRESV 0x04 481 482 /* vararg-like debugging macro. */ 483 #define ANON_PRINT(f, printf_args) \ 484 if (anon_debug & f) \ 485 printf printf_args 486 487 #else /* ANON_DEBUG */ 488 489 #define ANON_PRINT(f, printf_args) 490 491 #endif /* ANON_DEBUG */ 492 493 #endif /* _KERNEL */ 494 495 #ifdef __cplusplus 496 } 497 #endif 498 499 #endif /* _VM_ANON_H */ 500