1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * The following routines implement the hat layer's 28 * recording of the referenced and modified bits. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/debug.h> 35 #include <sys/kmem.h> 36 37 /* 38 * Note, usage of cmn_err requires you not hold any hat layer locks. 39 */ 40 #include <sys/cmn_err.h> 41 42 #include <vm/as.h> 43 #include <vm/hat.h> 44 45 kmutex_t hat_statlock; /* protects all hat statistics data */ 46 struct hrmstat *hrm_memlist; /* tracks memory alloced for hrm_blist blocks */ 47 struct hrmstat **hrm_hashtab; /* hash table for finding blocks quickly */ 48 struct hrmstat *hrm_blist; 49 int hrm_blist_incr = HRM_BLIST_INCR; 50 int hrm_blist_lowater = HRM_BLIST_INCR/2; 51 int hrm_blist_num = 0; 52 int hrm_blist_total = 0; 53 int hrm_mlockinited = 0; 54 int hrm_allocfailmsg = 0; /* print a message when allocations fail */ 55 int hrm_allocfail = 0; 56 57 static struct hrmstat *hrm_balloc(void); 58 static void hrm_link(struct hrmstat *); 59 static void hrm_setbits(struct hrmstat *, caddr_t, uint_t); 60 static void hrm_hashout(struct hrmstat *); 61 static void hrm_getblk(int); 62 63 #define hrm_hash(as, addr) \ 64 (HRM_HASHMASK & \ 65 (((uintptr_t)(addr) >> HRM_BASESHIFT) ^ ((uintptr_t)(as) >> 2))) 66 67 #define hrm_match(hrm, as, addr) \ 68 (((hrm)->hrm_as == (as) && \ 69 ((hrm)->hrm_base == ((uintptr_t)(addr) & HRM_BASEMASK))) ? 1 : 0) 70 71 /* 72 * Called when an address space maps in more pages while stats are being 73 * collected. 74 */ 75 /* ARGSUSED */ 76 void 77 hat_resvstat(size_t chunk, struct as *as, caddr_t addr) 78 { 79 } 80 81 /* 82 * Start the statistics gathering for an address space. 83 * Return -1 if we can't do it, otherwise return an opaque 84 * identifier to be used when querying for the gathered statistics. 85 * The identifier is an unused bit in a_vbits. 86 * Bit 0 is reserved for swsmon. 87 */ 88 int 89 hat_startstat(struct as *as) 90 { 91 uint_t nbits; /* number of bits */ 92 uint_t bn; /* bit number */ 93 uint_t id; /* new vbit, identifier */ 94 uint_t vbits; /* used vbits of address space */ 95 size_t chunk; /* mapped size for stats */ 96 97 /* 98 * If the refmod saving memory allocator runs out, print 99 * a warning message about how to fix it, see comment at 100 * the beginning of hat_setstat. 101 */ 102 if (hrm_allocfailmsg) { 103 cmn_err(CE_WARN, 104 "hrm_balloc failures occured, increase hrm_blist_incr"); 105 hrm_allocfailmsg = 0; 106 } 107 108 /* 109 * Verify that a buffer of statistics blocks exists 110 * and allocate more, if needed. 111 */ 112 113 chunk = hat_get_mapped_size(as->a_hat); 114 chunk = (btop(chunk)/HRM_PAGES); 115 if (chunk < HRM_BLIST_INCR) 116 chunk = 0; 117 118 hrm_getblk((int)chunk); 119 120 /* 121 * Find a unused id in the given address space. 122 */ 123 hat_enter(as->a_hat); 124 vbits = as->a_vbits; 125 nbits = sizeof (as->a_vbits) * NBBY; 126 for (bn = 1, id = 2; bn < (nbits - 1); bn++, id <<= 1) 127 if ((id & vbits) == 0) 128 break; 129 if (bn >= (nbits - 1)) { 130 hat_exit(as->a_hat); 131 return (-1); 132 } 133 as->a_vbits |= id; 134 hat_exit(as->a_hat); 135 (void) hat_stats_enable(as->a_hat); 136 return (id); 137 } 138 139 /* 140 * Record referenced and modified information for an address space. 141 * Rmbits is a word containing the referenced bit in bit position 1 142 * and the modified bit in bit position 0. 143 * 144 * For current informational uses, one can rerun any program using 145 * this facility after modifying the hrm_blist_incr to be a larger 146 * amount so that a larger buffer of blocks will be maintained. 147 */ 148 void 149 hat_setstat(struct as *as, caddr_t addr, size_t len, uint_t rmbits) 150 { 151 struct hrmstat *hrm; 152 uint_t vbits, newbits, nb; 153 int h; 154 155 ASSERT(len == PAGESIZE); 156 ASSERT((rmbits & ~(P_MOD|P_REF)) == 0); 157 158 if (rmbits == 0) 159 return; 160 161 mutex_enter(&hat_statlock); 162 163 /* 164 * Search the hash list for the as and addr we are looking for 165 * and set the ref and mod bits in every block that matches. 166 */ 167 vbits = 0; 168 h = hrm_hash(as, addr); 169 for (hrm = hrm_hashtab[h]; hrm; hrm = hrm->hrm_hnext) { 170 if (hrm_match(hrm, as, addr)) { 171 hrm_setbits(hrm, addr, rmbits); 172 vbits |= hrm->hrm_id; 173 } 174 } 175 176 /* 177 * If we didn't find a block for all of the enabled 178 * vpages bits, then allocate and initialize a block 179 * for each bit that was not found. 180 */ 181 if (vbits != as->a_vbits) { 182 newbits = (vbits ^ as->a_vbits) & as->a_vbits; 183 nb = 0; 184 while (newbits) { 185 if (ffs(newbits)) 186 nb = 1 << (ffs(newbits)-1); 187 hrm = (struct hrmstat *)hrm_balloc(); 188 if (hrm == NULL) { 189 hrm_allocfailmsg = 1; 190 hrm_allocfail++; 191 mutex_exit(&hat_statlock); 192 return; 193 } 194 hrm->hrm_as = as; 195 hrm->hrm_base = (uintptr_t)addr & HRM_BASEMASK; 196 hrm->hrm_id = nb; 197 hrm_link(hrm); 198 hrm_setbits(hrm, addr, rmbits); 199 newbits &= ~nb; 200 } 201 } 202 mutex_exit(&hat_statlock); 203 } 204 205 /* 206 * Free the resources used to maintain the referenced and modified 207 * statistics for the virtual page view of an address space 208 * identified by id. 209 */ 210 void 211 hat_freestat(struct as *as, int id) 212 { 213 struct hrmstat *hrm; 214 struct hrmstat *prev_ahrm; 215 struct hrmstat *hrm_tmplist; 216 struct hrmstat *hrm_next; 217 218 hat_stats_disable(as->a_hat); /* tell the hat layer to stop */ 219 hat_enter(as->a_hat); 220 if (id == 0) 221 as->a_vbits = 0; 222 else 223 as->a_vbits &= ~id; 224 225 if ((hrm = as->a_hrm) == NULL) { 226 hat_exit(as->a_hat); 227 return; 228 } 229 hat_exit(as->a_hat); 230 231 mutex_enter(&hat_statlock); 232 233 for (prev_ahrm = NULL; hrm; hrm = hrm->hrm_anext) { 234 if ((id == hrm->hrm_id) || (id == 0)) { 235 236 hrm_hashout(hrm); 237 hrm->hrm_hnext = hrm_blist; 238 hrm_blist = hrm; 239 hrm_blist_num++; 240 241 if (prev_ahrm == NULL) 242 as->a_hrm = hrm->hrm_anext; 243 else 244 prev_ahrm->hrm_anext = hrm->hrm_anext; 245 246 } else 247 prev_ahrm = hrm; 248 } 249 250 /* 251 * If all statistics blocks are free, 252 * return the memory to the system. 253 */ 254 if (hrm_blist_num == hrm_blist_total) { 255 /* zero the block list since we are giving back its memory */ 256 hrm_blist = NULL; 257 hrm_blist_num = 0; 258 hrm_blist_total = 0; 259 hrm_tmplist = hrm_memlist; 260 hrm_memlist = NULL; 261 } else { 262 hrm_tmplist = NULL; 263 } 264 265 mutex_exit(&hat_statlock); 266 267 /* 268 * If there are any hrmstat structures to be freed, this must only 269 * be done after we've released hat_statlock. 270 */ 271 while (hrm_tmplist != NULL) { 272 hrm_next = hrm_tmplist->hrm_hnext; 273 kmem_free(hrm_tmplist, hrm_tmplist->hrm_base); 274 hrm_tmplist = hrm_next; 275 } 276 } 277 278 /* 279 * Grab memory for statistics gathering of the hat layer. 280 */ 281 static void 282 hrm_getblk(int chunk) 283 { 284 struct hrmstat *hrm, *l; 285 int i; 286 int hrm_incr; 287 288 mutex_enter(&hat_statlock); 289 /* 290 * XXX The whole private freelist management here really should be 291 * overhauled. 292 * 293 * The freelist should have some knowledge of how much memory is 294 * needed by a process and thus when hat_resvstat get's called, we can 295 * increment the freelist needs for that process within this subsystem. 296 * Thus there will be reservations for all processes which are being 297 * watched which should be accurate, and consume less memory overall. 298 * 299 * For now, just make sure there's enough entries on the freelist to 300 * handle the current chunk. 301 */ 302 if ((hrm_blist == NULL) || 303 (hrm_blist_num <= hrm_blist_lowater) || 304 (chunk && (hrm_blist_num < chunk + hrm_blist_incr))) { 305 mutex_exit(&hat_statlock); 306 307 hrm_incr = chunk + hrm_blist_incr; 308 hrm = kmem_zalloc(sizeof (struct hrmstat) * hrm_incr, KM_SLEEP); 309 hrm->hrm_base = sizeof (struct hrmstat) * hrm_incr; 310 311 /* 312 * thread the allocated blocks onto a freelist 313 * using the first block to hold information for 314 * freeing them all later 315 */ 316 mutex_enter(&hat_statlock); 317 hrm->hrm_hnext = hrm_memlist; 318 hrm_memlist = hrm; 319 320 hrm_blist_total += (hrm_incr - 1); 321 for (i = 1; i < hrm_incr; i++) { 322 l = &hrm[i]; 323 l->hrm_hnext = hrm_blist; 324 hrm_blist = l; 325 hrm_blist_num++; 326 } 327 } 328 mutex_exit(&hat_statlock); 329 } 330 331 static void 332 hrm_hashin(struct hrmstat *hrm) 333 { 334 int h; 335 336 ASSERT(MUTEX_HELD(&hat_statlock)); 337 h = hrm_hash(hrm->hrm_as, hrm->hrm_base); 338 339 hrm->hrm_hnext = hrm_hashtab[h]; 340 hrm_hashtab[h] = hrm; 341 } 342 343 static void 344 hrm_hashout(struct hrmstat *hrm) 345 { 346 struct hrmstat *list, **prev_hrm; 347 int h; 348 349 ASSERT(MUTEX_HELD(&hat_statlock)); 350 h = hrm_hash(hrm->hrm_as, hrm->hrm_base); 351 list = hrm_hashtab[h]; 352 prev_hrm = &hrm_hashtab[h]; 353 354 while (list) { 355 if (list == hrm) { 356 *prev_hrm = list->hrm_hnext; 357 return; 358 } 359 prev_hrm = &list->hrm_hnext; 360 list = list->hrm_hnext; 361 } 362 } 363 364 365 /* 366 * Link a statistic block into an address space and also put it 367 * on the hash list for future references. 368 */ 369 static void 370 hrm_link(struct hrmstat *hrm) 371 { 372 struct as *as = hrm->hrm_as; 373 374 ASSERT(MUTEX_HELD(&hat_statlock)); 375 hrm->hrm_anext = as->a_hrm; 376 as->a_hrm = hrm; 377 hrm_hashin(hrm); 378 } 379 380 /* 381 * Allocate a block for statistics keeping. 382 * Returns NULL if blocks are unavailable. 383 */ 384 static struct hrmstat * 385 hrm_balloc(void) 386 { 387 struct hrmstat *hrm; 388 389 ASSERT(MUTEX_HELD(&hat_statlock)); 390 391 hrm = hrm_blist; 392 if (hrm != NULL) { 393 hrm_blist = hrm->hrm_hnext; 394 hrm_blist_num--; 395 hrm->hrm_hnext = NULL; 396 } 397 return (hrm); 398 } 399 400 /* 401 * Set the ref and mod bits for addr within statistics block hrm. 402 */ 403 static void 404 hrm_setbits(struct hrmstat *hrm, caddr_t addr, uint_t bits) 405 { 406 uint_t po, bo, spb; 407 uint_t nbits; 408 409 po = ((uintptr_t)addr & HRM_BASEOFFSET) >> MMU_PAGESHIFT; /* pg off */ 410 bo = po / (NBBY / 2); /* which byte in bit array */ 411 spb = (3 - (po & 3)) * 2; /* shift position within byte */ 412 nbits = bits << spb; /* bit mask */ 413 hrm->hrm_bits[bo] |= nbits; 414 } 415 416 /* 417 * Return collected statistics about an address space. 418 * If clearflag is set, atomically read and zero the bits. 419 * 420 * Fill in the data array supplied with the referenced and 421 * modified bits collected for address range [addr ... addr + len] 422 * in address space, as, uniquely identified by id. 423 * The destination is a byte array. We fill in three bits per byte: 424 * referenced, modified, and hwmapped bits. 425 * Kernel only interface, can't fault on destination data array. 426 * 427 */ 428 void 429 hat_getstat(struct as *as, caddr_t addr, size_t len, uint_t id, 430 caddr_t datap, int clearflag) 431 { 432 size_t np; /* number of pages */ 433 caddr_t a; 434 char *dp; 435 436 np = btop(len); 437 bzero(datap, np); 438 439 /* allocate enough statistics blocks to cover the len passed in */ 440 hrm_getblk(np / HRM_PAGES); 441 442 hat_sync(as->a_hat, addr, len, clearflag); 443 444 /* allocate more statistics blocks if needed */ 445 hrm_getblk(0); 446 447 mutex_enter(&hat_statlock); 448 if (hrm_hashtab == NULL) { 449 /* can happen when victim process exits */ 450 mutex_exit(&hat_statlock); 451 return; 452 } 453 dp = datap; 454 a = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 455 while (a < addr + len) { 456 struct hrmstat *hrm; 457 size_t n; /* number of pages, temp */ 458 int h; /* hash index */ 459 uint_t po; 460 461 h = hrm_hash(as, a); 462 n = (HRM_PAGES - 463 (((uintptr_t)a & HRM_PAGEMASK) >> MMU_PAGESHIFT)); 464 if (n > np) 465 n = np; 466 po = ((uintptr_t)a & HRM_BASEOFFSET) >> MMU_PAGESHIFT; 467 468 for (hrm = hrm_hashtab[h]; hrm; hrm = hrm->hrm_hnext) { 469 if (hrm->hrm_as == as && 470 hrm->hrm_base == ((uintptr_t)a & HRM_BASEMASK) && 471 id == hrm->hrm_id) { 472 int i, nr; 473 uint_t bo, spb; 474 475 /* 476 * Extract leading unaligned bits. 477 */ 478 i = 0; 479 while (i < n && (po & 3)) { 480 bo = po / (NBBY / 2); 481 spb = (3 - (po & 3)) * 2; 482 *dp++ |= (hrm->hrm_bits[bo] >> spb) & 3; 483 if (clearflag) 484 hrm->hrm_bits[bo] &= ~(3<<spb); 485 po++; 486 i++; 487 } 488 /* 489 * Extract aligned bits. 490 */ 491 nr = n/4*4; 492 bo = po / (NBBY / 2); 493 while (i < nr) { 494 int bits = hrm->hrm_bits[bo]; 495 *dp++ |= (bits >> 6) & 3; 496 *dp++ |= (bits >> 4) & 3; 497 *dp++ |= (bits >> 2) & 3; 498 *dp++ |= (bits >> 0) & 3; 499 if (clearflag) 500 hrm->hrm_bits[bo] = 0; 501 bo++; 502 po += 4; 503 i += 4; 504 } 505 /* 506 * Extract trailing unaligned bits. 507 */ 508 while (i < n) { 509 bo = po / (NBBY / 2); 510 spb = (3 - (po & 3)) * 2; 511 *dp++ |= (hrm->hrm_bits[bo] >> spb) & 3; 512 if (clearflag) 513 hrm->hrm_bits[bo] &= ~(3<<spb); 514 po++; 515 i++; 516 } 517 518 break; 519 } 520 } 521 if (hrm == NULL) 522 dp += n; 523 np -= n; 524 a += n * MMU_PAGESIZE; 525 } 526 mutex_exit(&hat_statlock); 527 } 528