1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * The following routines implement the hat layer's
28 * recording of the referenced and modified bits.
29 */
30
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/debug.h>
35 #include <sys/kmem.h>
36
37 /*
38 * Note, usage of cmn_err requires you not hold any hat layer locks.
39 */
40 #include <sys/cmn_err.h>
41
42 #include <vm/as.h>
43 #include <vm/hat.h>
44
45 kmutex_t hat_statlock; /* protects all hat statistics data */
46 struct hrmstat *hrm_memlist; /* tracks memory alloced for hrm_blist blocks */
47 struct hrmstat **hrm_hashtab; /* hash table for finding blocks quickly */
48 struct hrmstat *hrm_blist;
49 int hrm_blist_incr = HRM_BLIST_INCR;
50 int hrm_blist_lowater = HRM_BLIST_INCR/2;
51 int hrm_blist_num = 0;
52 int hrm_blist_total = 0;
53 int hrm_mlockinited = 0;
54 int hrm_allocfailmsg = 0; /* print a message when allocations fail */
55 int hrm_allocfail = 0;
56
57 static struct hrmstat *hrm_balloc(void);
58 static void hrm_link(struct hrmstat *);
59 static void hrm_setbits(struct hrmstat *, caddr_t, uint_t);
60 static void hrm_hashout(struct hrmstat *);
61 static void hrm_getblk(int);
62
63 #define hrm_hash(as, addr) \
64 (HRM_HASHMASK & \
65 (((uintptr_t)(addr) >> HRM_BASESHIFT) ^ ((uintptr_t)(as) >> 2)))
66
67 #define hrm_match(hrm, as, addr) \
68 (((hrm)->hrm_as == (as) && \
69 ((hrm)->hrm_base == ((uintptr_t)(addr) & HRM_BASEMASK))) ? 1 : 0)
70
71 /*
72 * Called when an address space maps in more pages while stats are being
73 * collected.
74 */
75 /* ARGSUSED */
76 void
hat_resvstat(size_t chunk,struct as * as,caddr_t addr)77 hat_resvstat(size_t chunk, struct as *as, caddr_t addr)
78 {
79 }
80
81 /*
82 * Start the statistics gathering for an address space.
83 * Return -1 if we can't do it, otherwise return an opaque
84 * identifier to be used when querying for the gathered statistics.
85 * The identifier is an unused bit in a_vbits.
86 * Bit 0 is reserved for swsmon.
87 */
88 int
hat_startstat(struct as * as)89 hat_startstat(struct as *as)
90 {
91 uint_t nbits; /* number of bits */
92 uint_t bn; /* bit number */
93 uint_t id; /* new vbit, identifier */
94 uint_t vbits; /* used vbits of address space */
95 size_t chunk; /* mapped size for stats */
96
97 /*
98 * If the refmod saving memory allocator runs out, print
99 * a warning message about how to fix it, see comment at
100 * the beginning of hat_setstat.
101 */
102 if (hrm_allocfailmsg) {
103 cmn_err(CE_WARN,
104 "hrm_balloc failures occured, increase hrm_blist_incr");
105 hrm_allocfailmsg = 0;
106 }
107
108 /*
109 * Verify that a buffer of statistics blocks exists
110 * and allocate more, if needed.
111 */
112
113 chunk = hat_get_mapped_size(as->a_hat);
114 chunk = (btop(chunk)/HRM_PAGES);
115 if (chunk < HRM_BLIST_INCR)
116 chunk = 0;
117
118 hrm_getblk((int)chunk);
119
120 /*
121 * Find a unused id in the given address space.
122 */
123 hat_enter(as->a_hat);
124 vbits = as->a_vbits;
125 nbits = sizeof (as->a_vbits) * NBBY;
126 for (bn = 1, id = 2; bn < (nbits - 1); bn++, id <<= 1)
127 if ((id & vbits) == 0)
128 break;
129 if (bn >= (nbits - 1)) {
130 hat_exit(as->a_hat);
131 return (-1);
132 }
133 as->a_vbits |= id;
134 hat_exit(as->a_hat);
135 (void) hat_stats_enable(as->a_hat);
136 return (id);
137 }
138
139 /*
140 * Record referenced and modified information for an address space.
141 * Rmbits is a word containing the referenced bit in bit position 1
142 * and the modified bit in bit position 0.
143 *
144 * For current informational uses, one can rerun any program using
145 * this facility after modifying the hrm_blist_incr to be a larger
146 * amount so that a larger buffer of blocks will be maintained.
147 */
148 void
hat_setstat(struct as * as,caddr_t addr,size_t len,uint_t rmbits)149 hat_setstat(struct as *as, caddr_t addr, size_t len, uint_t rmbits)
150 {
151 struct hrmstat *hrm;
152 uint_t vbits, newbits, nb;
153 int h;
154
155 ASSERT(len == PAGESIZE);
156 ASSERT((rmbits & ~(P_MOD|P_REF)) == 0);
157
158 if (rmbits == 0)
159 return;
160
161 mutex_enter(&hat_statlock);
162
163 /*
164 * Search the hash list for the as and addr we are looking for
165 * and set the ref and mod bits in every block that matches.
166 */
167 vbits = 0;
168 h = hrm_hash(as, addr);
169 for (hrm = hrm_hashtab[h]; hrm; hrm = hrm->hrm_hnext) {
170 if (hrm_match(hrm, as, addr)) {
171 hrm_setbits(hrm, addr, rmbits);
172 vbits |= hrm->hrm_id;
173 }
174 }
175
176 /*
177 * If we didn't find a block for all of the enabled
178 * vpages bits, then allocate and initialize a block
179 * for each bit that was not found.
180 */
181 if (vbits != as->a_vbits) {
182 newbits = (vbits ^ as->a_vbits) & as->a_vbits;
183 while (newbits) {
184 if (ffs(newbits))
185 nb = 1 << (ffs(newbits)-1);
186 hrm = (struct hrmstat *)hrm_balloc();
187 if (hrm == NULL) {
188 hrm_allocfailmsg = 1;
189 hrm_allocfail++;
190 mutex_exit(&hat_statlock);
191 return;
192 }
193 hrm->hrm_as = as;
194 hrm->hrm_base = (uintptr_t)addr & HRM_BASEMASK;
195 hrm->hrm_id = nb;
196 hrm_link(hrm);
197 hrm_setbits(hrm, addr, rmbits);
198 newbits &= ~nb;
199 }
200 }
201 mutex_exit(&hat_statlock);
202 }
203
204 /*
205 * Free the resources used to maintain the referenced and modified
206 * statistics for the virtual page view of an address space
207 * identified by id.
208 */
209 void
hat_freestat(struct as * as,int id)210 hat_freestat(struct as *as, int id)
211 {
212 struct hrmstat *hrm;
213 struct hrmstat *prev_ahrm;
214 struct hrmstat *hrm_tmplist;
215 struct hrmstat *hrm_next;
216
217 hat_stats_disable(as->a_hat); /* tell the hat layer to stop */
218 hat_enter(as->a_hat);
219 if (id == 0)
220 as->a_vbits = 0;
221 else
222 as->a_vbits &= ~id;
223
224 if ((hrm = as->a_hrm) == NULL) {
225 hat_exit(as->a_hat);
226 return;
227 }
228 hat_exit(as->a_hat);
229
230 mutex_enter(&hat_statlock);
231
232 for (prev_ahrm = NULL; hrm; hrm = hrm->hrm_anext) {
233 if ((id == hrm->hrm_id) || (id == NULL)) {
234
235 hrm_hashout(hrm);
236 hrm->hrm_hnext = hrm_blist;
237 hrm_blist = hrm;
238 hrm_blist_num++;
239
240 if (prev_ahrm == NULL)
241 as->a_hrm = hrm->hrm_anext;
242 else
243 prev_ahrm->hrm_anext = hrm->hrm_anext;
244
245 } else
246 prev_ahrm = hrm;
247 }
248
249 /*
250 * If all statistics blocks are free,
251 * return the memory to the system.
252 */
253 if (hrm_blist_num == hrm_blist_total) {
254 /* zero the block list since we are giving back its memory */
255 hrm_blist = NULL;
256 hrm_blist_num = 0;
257 hrm_blist_total = 0;
258 hrm_tmplist = hrm_memlist;
259 hrm_memlist = NULL;
260 } else {
261 hrm_tmplist = NULL;
262 }
263
264 mutex_exit(&hat_statlock);
265
266 /*
267 * If there are any hrmstat structures to be freed, this must only
268 * be done after we've released hat_statlock.
269 */
270 while (hrm_tmplist != NULL) {
271 hrm_next = hrm_tmplist->hrm_hnext;
272 kmem_free(hrm_tmplist, hrm_tmplist->hrm_base);
273 hrm_tmplist = hrm_next;
274 }
275 }
276
277 /*
278 * Grab memory for statistics gathering of the hat layer.
279 */
280 static void
hrm_getblk(int chunk)281 hrm_getblk(int chunk)
282 {
283 struct hrmstat *hrm, *l;
284 int i;
285 int hrm_incr;
286
287 mutex_enter(&hat_statlock);
288 /*
289 * XXX The whole private freelist management here really should be
290 * overhauled.
291 *
292 * The freelist should have some knowledge of how much memory is
293 * needed by a process and thus when hat_resvstat get's called, we can
294 * increment the freelist needs for that process within this subsystem.
295 * Thus there will be reservations for all processes which are being
296 * watched which should be accurate, and consume less memory overall.
297 *
298 * For now, just make sure there's enough entries on the freelist to
299 * handle the current chunk.
300 */
301 if ((hrm_blist == NULL) ||
302 (hrm_blist_num <= hrm_blist_lowater) ||
303 (chunk && (hrm_blist_num < chunk + hrm_blist_incr))) {
304 mutex_exit(&hat_statlock);
305
306 hrm_incr = chunk + hrm_blist_incr;
307 hrm = kmem_zalloc(sizeof (struct hrmstat) * hrm_incr, KM_SLEEP);
308 hrm->hrm_base = sizeof (struct hrmstat) * hrm_incr;
309
310 /*
311 * thread the allocated blocks onto a freelist
312 * using the first block to hold information for
313 * freeing them all later
314 */
315 mutex_enter(&hat_statlock);
316 hrm->hrm_hnext = hrm_memlist;
317 hrm_memlist = hrm;
318
319 hrm_blist_total += (hrm_incr - 1);
320 for (i = 1; i < hrm_incr; i++) {
321 l = &hrm[i];
322 l->hrm_hnext = hrm_blist;
323 hrm_blist = l;
324 hrm_blist_num++;
325 }
326 }
327 mutex_exit(&hat_statlock);
328 }
329
330 static void
hrm_hashin(struct hrmstat * hrm)331 hrm_hashin(struct hrmstat *hrm)
332 {
333 int h;
334
335 ASSERT(MUTEX_HELD(&hat_statlock));
336 h = hrm_hash(hrm->hrm_as, hrm->hrm_base);
337
338 hrm->hrm_hnext = hrm_hashtab[h];
339 hrm_hashtab[h] = hrm;
340 }
341
342 static void
hrm_hashout(struct hrmstat * hrm)343 hrm_hashout(struct hrmstat *hrm)
344 {
345 struct hrmstat *list, **prev_hrm;
346 int h;
347
348 ASSERT(MUTEX_HELD(&hat_statlock));
349 h = hrm_hash(hrm->hrm_as, hrm->hrm_base);
350 list = hrm_hashtab[h];
351 prev_hrm = &hrm_hashtab[h];
352
353 while (list) {
354 if (list == hrm) {
355 *prev_hrm = list->hrm_hnext;
356 return;
357 }
358 prev_hrm = &list->hrm_hnext;
359 list = list->hrm_hnext;
360 }
361 }
362
363
364 /*
365 * Link a statistic block into an address space and also put it
366 * on the hash list for future references.
367 */
368 static void
hrm_link(struct hrmstat * hrm)369 hrm_link(struct hrmstat *hrm)
370 {
371 struct as *as = hrm->hrm_as;
372
373 ASSERT(MUTEX_HELD(&hat_statlock));
374 hrm->hrm_anext = as->a_hrm;
375 as->a_hrm = hrm;
376 hrm_hashin(hrm);
377 }
378
379 /*
380 * Allocate a block for statistics keeping.
381 * Returns NULL if blocks are unavailable.
382 */
383 static struct hrmstat *
hrm_balloc(void)384 hrm_balloc(void)
385 {
386 struct hrmstat *hrm;
387
388 ASSERT(MUTEX_HELD(&hat_statlock));
389
390 hrm = hrm_blist;
391 if (hrm != NULL) {
392 hrm_blist = hrm->hrm_hnext;
393 hrm_blist_num--;
394 hrm->hrm_hnext = NULL;
395 }
396 return (hrm);
397 }
398
399 /*
400 * Set the ref and mod bits for addr within statistics block hrm.
401 */
402 static void
hrm_setbits(struct hrmstat * hrm,caddr_t addr,uint_t bits)403 hrm_setbits(struct hrmstat *hrm, caddr_t addr, uint_t bits)
404 {
405 uint_t po, bo, spb;
406 uint_t nbits;
407
408 po = ((uintptr_t)addr & HRM_BASEOFFSET) >> MMU_PAGESHIFT; /* pg off */
409 bo = po / (NBBY / 2); /* which byte in bit array */
410 spb = (3 - (po & 3)) * 2; /* shift position within byte */
411 nbits = bits << spb; /* bit mask */
412 hrm->hrm_bits[bo] |= nbits;
413 }
414
415 /*
416 * Return collected statistics about an address space.
417 * If clearflag is set, atomically read and zero the bits.
418 *
419 * Fill in the data array supplied with the referenced and
420 * modified bits collected for address range [addr ... addr + len]
421 * in address space, as, uniquely identified by id.
422 * The destination is a byte array. We fill in three bits per byte:
423 * referenced, modified, and hwmapped bits.
424 * Kernel only interface, can't fault on destination data array.
425 *
426 */
427 void
hat_getstat(struct as * as,caddr_t addr,size_t len,uint_t id,caddr_t datap,int clearflag)428 hat_getstat(struct as *as, caddr_t addr, size_t len, uint_t id,
429 caddr_t datap, int clearflag)
430 {
431 size_t np; /* number of pages */
432 caddr_t a;
433 char *dp;
434
435 np = btop(len);
436 bzero(datap, np);
437
438 /* allocate enough statistics blocks to cover the len passed in */
439 hrm_getblk(np / HRM_PAGES);
440
441 hat_sync(as->a_hat, addr, len, clearflag);
442
443 /* allocate more statistics blocks if needed */
444 hrm_getblk(0);
445
446 mutex_enter(&hat_statlock);
447 if (hrm_hashtab == NULL) {
448 /* can happen when victim process exits */
449 mutex_exit(&hat_statlock);
450 return;
451 }
452 dp = datap;
453 a = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
454 while (a < addr + len) {
455 struct hrmstat *hrm;
456 size_t n; /* number of pages, temp */
457 int h; /* hash index */
458 uint_t po;
459
460 h = hrm_hash(as, a);
461 n = (HRM_PAGES -
462 (((uintptr_t)a & HRM_PAGEMASK) >> MMU_PAGESHIFT));
463 if (n > np)
464 n = np;
465 po = ((uintptr_t)a & HRM_BASEOFFSET) >> MMU_PAGESHIFT;
466
467 for (hrm = hrm_hashtab[h]; hrm; hrm = hrm->hrm_hnext) {
468 if (hrm->hrm_as == as &&
469 hrm->hrm_base == ((uintptr_t)a & HRM_BASEMASK) &&
470 id == hrm->hrm_id) {
471 int i, nr;
472 uint_t bo, spb;
473
474 /*
475 * Extract leading unaligned bits.
476 */
477 i = 0;
478 while (i < n && (po & 3)) {
479 bo = po / (NBBY / 2);
480 spb = (3 - (po & 3)) * 2;
481 *dp++ |= (hrm->hrm_bits[bo] >> spb) & 3;
482 if (clearflag)
483 hrm->hrm_bits[bo] &= ~(3<<spb);
484 po++;
485 i++;
486 }
487 /*
488 * Extract aligned bits.
489 */
490 nr = n/4*4;
491 bo = po / (NBBY / 2);
492 while (i < nr) {
493 int bits = hrm->hrm_bits[bo];
494 *dp++ |= (bits >> 6) & 3;
495 *dp++ |= (bits >> 4) & 3;
496 *dp++ |= (bits >> 2) & 3;
497 *dp++ |= (bits >> 0) & 3;
498 if (clearflag)
499 hrm->hrm_bits[bo] = 0;
500 bo++;
501 po += 4;
502 i += 4;
503 }
504 /*
505 * Extract trailing unaligned bits.
506 */
507 while (i < n) {
508 bo = po / (NBBY / 2);
509 spb = (3 - (po & 3)) * 2;
510 *dp++ |= (hrm->hrm_bits[bo] >> spb) & 3;
511 if (clearflag)
512 hrm->hrm_bits[bo] &= ~(3<<spb);
513 po++;
514 i++;
515 }
516
517 break;
518 }
519 }
520 if (hrm == NULL)
521 dp += n;
522 np -= n;
523 a += n * MMU_PAGESIZE;
524 }
525 mutex_exit(&hat_statlock);
526 }
527