1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 39 #ifndef _VM_ANON_H 40 #define _VM_ANON_H 41 42 #pragma ident "%Z%%M% %I% %E% SMI" 43 44 #include <sys/cred.h> 45 #include <vm/seg.h> 46 #include <vm/vpage.h> 47 48 #ifdef __cplusplus 49 extern "C" { 50 #endif 51 52 /* 53 * VM - Anonymous pages. 54 */ 55 56 typedef unsigned long anoff_t; /* anon offsets */ 57 58 /* 59 * Each anonymous page, either in memory or in swap, has an anon structure. 60 * The structure (slot) provides a level of indirection between anonymous pages 61 * and their backing store. 62 * 63 * (an_vp, an_off) names the vnode of the anonymous page for this slot. 64 * 65 * (an_pvp, an_poff) names the location of the physical backing store 66 * for the page this slot represents. If the name is null there is no 67 * associated physical store. The physical backing store location can 68 * change while the slot is in use. 69 * 70 * an_hash is a hash list of anon slots. The list is hashed by 71 * (an_vp, an_off) of the associated anonymous page and provides a 72 * method of going from the name of an anonymous page to its 73 * associated anon slot. 74 * 75 * an_refcnt holds a reference count which is the number of separate 76 * copies that will need to be created in case of copy-on-write. 77 * A refcnt > 0 protects the existence of the slot. The refcnt is 78 * initialized to 1 when the anon slot is created in anon_alloc(). 79 * If a client obtains an anon slot and allows multiple threads to 80 * share it, then it is the client's responsibility to insure that 81 * it does not allow one thread to try to reference the slot at the 82 * same time as another is trying to decrement the last count and 83 * destroy the anon slot. E.g., the seg_vn segment type protects 84 * against this with higher level locks. 85 */ 86 87 struct anon { 88 struct vnode *an_vp; /* vnode of anon page */ 89 struct vnode *an_pvp; /* vnode of physical backing store */ 90 anoff_t an_off; /* offset of anon page */ 91 anoff_t an_poff; /* offset in vnode */ 92 struct anon *an_hash; /* hash table of anon slots */ 93 int an_refcnt; /* # of people sharing slot */ 94 }; 95 96 #ifdef _KERNEL 97 /* 98 * The swapinfo_lock protects: 99 * swapinfo list 100 * individual swapinfo structures 101 * 102 * The anoninfo_lock protects: 103 * anoninfo counters 104 * 105 * The anonhash_lock protects: 106 * anon hash lists 107 * anon slot fields 108 * 109 * Fields in the anon slot which are read-only for the life of the slot 110 * (an_vp, an_off) do not require the anonhash_lock be held to access them. 111 * If you access a field without the anonhash_lock held you must be holding 112 * the slot with an_refcnt to make sure it isn't destroyed. 113 * To write (an_pvp, an_poff) in a given slot you must also hold the 114 * p_iolock of the anonymous page for slot. 115 */ 116 extern kmutex_t anoninfo_lock; 117 extern kmutex_t swapinfo_lock; 118 extern kmutex_t anonhash_lock[]; 119 extern pad_mutex_t anon_array_lock[]; 120 extern kcondvar_t anon_array_cv[]; 121 122 /* 123 * Global hash table to provide a function from (vp, off) -> ap 124 */ 125 extern size_t anon_hash_size; 126 extern struct anon **anon_hash; 127 #define ANON_HASH_SIZE anon_hash_size 128 #define ANON_HASHAVELEN 4 129 #define ANON_HASH(VP, OFF) \ 130 ((((uintptr_t)(VP) >> 7) ^ ((OFF) >> PAGESHIFT)) & (ANON_HASH_SIZE - 1)) 131 132 #define AH_LOCK_SIZE 64 133 #define AH_LOCK(vp, off) (ANON_HASH((vp), (off)) & (AH_LOCK_SIZE -1)) 134 135 #endif /* _KERNEL */ 136 137 /* 138 * Declaration for the Global counters to accurately 139 * track the kernel foot print in memory. 140 */ 141 extern pgcnt_t segvn_pages_locked; 142 extern pgcnt_t pages_locked; 143 extern pgcnt_t pages_claimed; 144 extern pgcnt_t pages_useclaim; 145 extern pgcnt_t obp_pages; 146 147 /* 148 * Anonymous backing store accounting structure for swapctl. 149 * 150 * ani_max = maximum amount of swap space 151 * (including potentially available physical memory) 152 * ani_free = amount of unallocated anonymous memory 153 * (some of which might be reserved and including 154 * potentially available physical memory) 155 * ani_resv = amount of claimed (reserved) anonymous memory 156 * 157 * The swap data can be aquired more efficiently through the 158 * kstats interface. 159 * Total slots currently available for reservation = 160 * MAX(ani_max - ani_resv, 0) + (availrmem - swapfs_minfree) 161 */ 162 struct anoninfo { 163 pgcnt_t ani_max; 164 pgcnt_t ani_free; 165 pgcnt_t ani_resv; 166 }; 167 168 #ifdef _SYSCALL32 169 struct anoninfo32 { 170 size32_t ani_max; 171 size32_t ani_free; 172 size32_t ani_resv; 173 }; 174 #endif /* _SYSCALL32 */ 175 176 /* 177 * Define the NCPU pool of the ani_free counters. Update the counter 178 * of the cpu on which the thread is running and in every clock intr 179 * sync anoninfo.ani_free with the current total off all the NCPU entries. 180 */ 181 182 typedef struct ani_free { 183 kmutex_t ani_lock; 184 pgcnt_t ani_count; 185 uchar_t pad[64 - sizeof (kmutex_t) - sizeof (pgcnt_t)]; 186 /* XXX 64 = cacheline size */ 187 } ani_free_t; 188 189 #define ANI_MAX_POOL 128 190 extern ani_free_t ani_free_pool[]; 191 192 #define ANI_ADD(inc) { \ 193 ani_free_t *anifp; \ 194 int index; \ 195 index = (CPU->cpu_id & (ANI_MAX_POOL - 1)); \ 196 anifp = &ani_free_pool[index]; \ 197 mutex_enter(&anifp->ani_lock); \ 198 anifp->ani_count += inc; \ 199 mutex_exit(&anifp->ani_lock); \ 200 } 201 202 /* 203 * Anon array pointers are allocated in chunks. Each chunk 204 * has PAGESIZE/sizeof(u_long *) of anon pointers. 205 * There are two levels of arrays for anon array pointers larger 206 * than a chunk. The first level points to anon array chunks. 207 * The second level consists of chunks of anon pointers. 208 * 209 * If anon array is smaller than a chunk then the whole anon array 210 * is created (memory is allocated for whole anon array). 211 * If anon array is larger than a chunk only first level array is 212 * allocated. Then other arrays (chunks) are allocated only when 213 * they are initialized with anon pointers. 214 */ 215 struct anon_hdr { 216 kmutex_t serial_lock; /* serialize array chunk allocation */ 217 pgcnt_t size; /* number of pointers to (anon) pages */ 218 void **array_chunk; /* pointers to anon pointers or chunks of */ 219 /* anon pointers */ 220 int flags; /* ANON_ALLOC_FORCE force preallocation of */ 221 /* whole anon array */ 222 }; 223 224 #ifdef _LP64 225 #define ANON_PTRSHIFT 3 226 #define ANON_PTRMASK ~7 227 #else 228 #define ANON_PTRSHIFT 2 229 #define ANON_PTRMASK ~3 230 #endif 231 232 #define ANON_CHUNK_SIZE (PAGESIZE >> ANON_PTRSHIFT) 233 #define ANON_CHUNK_SHIFT (PAGESHIFT - ANON_PTRSHIFT) 234 #define ANON_CHUNK_OFF (ANON_CHUNK_SIZE - 1) 235 236 /* 237 * Anon flags. 238 */ 239 #define ANON_SLEEP 0x0 /* ok to block */ 240 #define ANON_NOSLEEP 0x1 /* non-blocking call */ 241 #define ANON_ALLOC_FORCE 0x2 /* force single level anon array */ 242 #define ANON_GROWDOWN 0x4 /* anon array should grow downward */ 243 244 struct kshmid; 245 246 /* 247 * The anon_map structure is used by various clients of the anon layer to 248 * manage anonymous memory. When anonymous memory is shared, 249 * then the different clients sharing it will point to the 250 * same anon_map structure. Also, if a segment is unmapped 251 * in the middle where an anon_map structure exists, the 252 * newly created segment will also share the anon_map structure, 253 * although the two segments will use different ranges of the 254 * anon array. When mappings are private (or shared with 255 * a reference count of 1), an unmap operation will free up 256 * a range of anon slots in the array given by the anon_map 257 * structure. Because of fragmentation due to this unmapping, 258 * we have to store the size of the anon array in the anon_map 259 * structure so that we can free everything when the referernce 260 * count goes to zero. 261 * 262 * A new rangelock scheme is introduced to make the anon layer scale. 263 * A reader/writer lock per anon_amp and an array of system-wide hash 264 * locks, anon_array_lock[] are introduced to replace serial_lock and 265 * anonmap lock. The writer lock is held when we want to singlethreaD 266 * the reference to the anon array pointers or when references to 267 * anon_map's members, whereas reader lock and anon_array_lock are 268 * held to allows multiple threads to reference different part of 269 * anon array. A global set of condition variables, anon_array_cv, 270 * are used with anon_array_lock[] to make the hold time of the locks 271 * short. 272 * 273 * szc is used to calculate the index of hash locks and cv's. We 274 * could've just used seg->s_szc if not for the possible sharing of 275 * anon_amp between SYSV shared memory and ISM, so now we introduce 276 * szc in the anon_map structure. For MAP_SHARED, the amp->szc is either 277 * 0 (base page size) or page_num_pagesizes() - 1, while MAP_PRIVATE 278 * the amp->szc could be anything in [0, page_num_pagesizes() - 1]. 279 */ 280 struct anon_map { 281 krwlock_t a_rwlock; /* protect anon_map and anon array */ 282 size_t size; /* size in bytes mapped by the anon array */ 283 struct anon_hdr *ahp; /* anon array header pointer, containing */ 284 /* anon pointer array(s) */ 285 size_t swresv; /* swap space reserved for this anon_map */ 286 ulong_t refcnt; /* reference count on this structure */ 287 ushort_t a_szc; /* max szc among shared processes */ 288 void *locality; /* lgroup locality info */ 289 struct kshmid *a_sp; /* kshmid if amp backs sysV, or NULL */ 290 }; 291 292 #ifdef _KERNEL 293 294 #define ANON_BUSY 0x1 295 #define ANON_ISBUSY(slot) (*(slot) & ANON_BUSY) 296 #define ANON_SETBUSY(slot) (*(slot) |= ANON_BUSY) 297 #define ANON_CLRBUSY(slot) (*(slot) &= ~ANON_BUSY) 298 299 #define ANON_MAP_SHIFT 6 /* log2(sizeof (struct anon_map)) */ 300 #define ANON_ARRAY_SHIFT 7 /* log2(ANON_LOCKSIZE) */ 301 #define ANON_LOCKSIZE 128 302 303 #define ANON_LOCK_ENTER(lock, type) rw_enter((lock), (type)) 304 #define ANON_LOCK_EXIT(lock) rw_exit((lock)) 305 306 #define ANON_ARRAY_HASH(amp, idx)\ 307 ((((idx) + ((idx) >> ANON_ARRAY_SHIFT) +\ 308 ((idx) >> (ANON_ARRAY_SHIFT << 1)) +\ 309 ((idx) >> (ANON_ARRAY_SHIFT + (ANON_ARRAY_SHIFT << 1)))) ^\ 310 ((uintptr_t)(amp) >> ANON_MAP_SHIFT)) & (ANON_LOCKSIZE - 1)) 311 312 typedef struct anon_sync_obj { 313 kmutex_t *sync_mutex; 314 kcondvar_t *sync_cv; 315 ulong_t *sync_data; 316 } anon_sync_obj_t; 317 318 /* 319 * Anonymous backing store accounting structure for kernel. 320 * ani_max = total reservable slots on physical (disk-backed) swap 321 * ani_phys_resv = total phys slots reserved for use by clients 322 * ani_mem_resv = total mem slots reserved for use by clients 323 * ani_free = # unallocated physical slots + # of reserved unallocated 324 * memory slots 325 */ 326 327 /* 328 * Initial total swap slots available for reservation 329 */ 330 #define TOTAL_AVAILABLE_SWAP \ 331 (k_anoninfo.ani_max + MAX((spgcnt_t)(availrmem - swapfs_minfree), 0)) 332 333 /* 334 * Swap slots currently available for reservation 335 */ 336 #define CURRENT_TOTAL_AVAILABLE_SWAP \ 337 ((k_anoninfo.ani_max - k_anoninfo.ani_phys_resv) + \ 338 MAX((spgcnt_t)(availrmem - swapfs_minfree), 0)) 339 340 struct k_anoninfo { 341 pgcnt_t ani_max; /* total reservable slots on phys */ 342 /* (disk) swap */ 343 pgcnt_t ani_free; /* # of unallocated phys and mem slots */ 344 pgcnt_t ani_phys_resv; /* # of reserved phys (disk) slots */ 345 pgcnt_t ani_mem_resv; /* # of reserved mem slots */ 346 pgcnt_t ani_locked_swap; /* # of swap slots locked in reserved */ 347 /* mem swap */ 348 }; 349 350 extern struct k_anoninfo k_anoninfo; 351 352 extern void anon_init(void); 353 extern struct anon *anon_alloc(struct vnode *, anoff_t); 354 extern void anon_dup(struct anon_hdr *, ulong_t, 355 struct anon_hdr *, ulong_t, size_t); 356 extern void anon_dup_fill_holes(struct anon_hdr *, ulong_t, 357 struct anon_hdr *, ulong_t, size_t, uint_t, int); 358 extern int anon_fill_cow_holes(struct seg *, caddr_t, struct anon_hdr *, 359 ulong_t, struct vnode *, u_offset_t, size_t, uint_t, 360 uint_t, struct vpage [], struct cred *); 361 extern void anon_free(struct anon_hdr *, ulong_t, size_t); 362 extern void anon_free_pages(struct anon_hdr *, ulong_t, size_t, uint_t); 363 extern void anon_disclaim(struct anon_map *, ulong_t, size_t, int); 364 extern int anon_getpage(struct anon **, uint_t *, struct page **, 365 size_t, struct seg *, caddr_t, enum seg_rw, struct cred *); 366 extern int swap_getconpage(struct vnode *, u_offset_t, size_t, 367 uint_t *, page_t *[], size_t, page_t *, uint_t *, 368 spgcnt_t *, struct seg *, caddr_t, 369 enum seg_rw, struct cred *); 370 extern int anon_map_getpages(struct anon_map *, ulong_t, 371 uint_t, struct seg *, caddr_t, uint_t, 372 uint_t *, page_t *[], uint_t *, 373 struct vpage [], enum seg_rw, int, int, struct cred *); 374 extern int anon_map_privatepages(struct anon_map *, ulong_t, 375 uint_t, struct seg *, caddr_t, uint_t, 376 page_t *[], struct vpage [], int, struct cred *); 377 extern struct page *anon_private(struct anon **, struct seg *, 378 caddr_t, uint_t, struct page *, 379 int, struct cred *); 380 extern struct page *anon_zero(struct seg *, caddr_t, 381 struct anon **, struct cred *); 382 extern int anon_map_createpages(struct anon_map *, ulong_t, 383 size_t, struct page **, 384 struct seg *, caddr_t, 385 enum seg_rw, struct cred *); 386 extern int anon_map_demotepages(struct anon_map *, ulong_t, 387 struct seg *, caddr_t, uint_t, 388 struct vpage [], struct cred *); 389 extern void anon_shmap_free_pages(struct anon_map *, ulong_t, size_t); 390 extern int anon_resvmem(size_t, uint_t); 391 extern void anon_unresv(size_t); 392 extern struct anon_map *anonmap_alloc(size_t, size_t); 393 extern void anonmap_free(struct anon_map *); 394 extern void anon_decref(struct anon *); 395 extern int non_anon(struct anon_hdr *, ulong_t, u_offset_t *, size_t *); 396 extern pgcnt_t anon_pages(struct anon_hdr *, ulong_t, pgcnt_t); 397 extern int anon_swap_adjust(pgcnt_t); 398 extern void anon_swap_restore(pgcnt_t); 399 extern struct anon_hdr *anon_create(pgcnt_t, int); 400 extern void anon_release(struct anon_hdr *, pgcnt_t); 401 extern struct anon *anon_get_ptr(struct anon_hdr *, ulong_t); 402 extern ulong_t *anon_get_slot(struct anon_hdr *, ulong_t); 403 extern struct anon *anon_get_next_ptr(struct anon_hdr *, ulong_t *); 404 extern int anon_set_ptr(struct anon_hdr *, ulong_t, struct anon *, int); 405 extern int anon_copy_ptr(struct anon_hdr *, ulong_t, 406 struct anon_hdr *, ulong_t, pgcnt_t, int); 407 extern pgcnt_t anon_grow(struct anon_hdr *, ulong_t *, pgcnt_t, pgcnt_t, int); 408 extern void anon_array_enter(struct anon_map *, ulong_t, 409 anon_sync_obj_t *); 410 extern int anon_array_try_enter(struct anon_map *, ulong_t, 411 anon_sync_obj_t *); 412 extern void anon_array_exit(anon_sync_obj_t *); 413 414 /* 415 * anon_resv checks to see if there is enough swap space to fulfill a 416 * request and if so, reserves the appropriate anonymous memory resources. 417 * anon_checkspace just checks to see if there is space to fulfill the request, 418 * without taking any resources. Both return 1 if successful and 0 if not. 419 */ 420 #define anon_resv(size) anon_resvmem((size), 1) 421 #define anon_checkspace(size) anon_resvmem((size), 0) 422 423 /* 424 * Flags to anon_private 425 */ 426 #define STEAL_PAGE 0x1 /* page can be stolen */ 427 #define LOCK_PAGE 0x2 /* page must be ``logically'' locked */ 428 429 /* 430 * Flags to anon_disclaim 431 */ 432 #define ANON_PGLOOKUP_BLK 0x1 /* block on locked pages */ 433 434 /* 435 * SEGKP ANON pages that are locked are assumed to be LWP stack pages 436 * and thus count towards the user pages locked count. 437 * This value is protected by the same lock as availrmem. 438 */ 439 extern pgcnt_t anon_segkp_pages_locked; 440 441 extern int anon_debug; 442 443 #ifdef ANON_DEBUG 444 445 #define A_ANON 0x01 446 #define A_RESV 0x02 447 #define A_MRESV 0x04 448 449 /* vararg-like debugging macro. */ 450 #define ANON_PRINT(f, printf_args) \ 451 if (anon_debug & f) \ 452 printf printf_args 453 454 #else /* ANON_DEBUG */ 455 456 #define ANON_PRINT(f, printf_args) 457 458 #endif /* ANON_DEBUG */ 459 460 #endif /* _KERNEL */ 461 462 #ifdef __cplusplus 463 } 464 #endif 465 466 #endif /* _VM_ANON_H */ 467