xref: /illumos-gate/usr/src/uts/common/vm/hat_refmod.c (revision a359d6b10060521dfb798db6da99bff792cb55cb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * The following routines implement the hat layer's
30  * recording of the referenced and modified bits.
31  */
32 
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/debug.h>
37 #include <sys/kmem.h>
38 
39 /*
40  * Note, usage of cmn_err requires you not hold any hat layer locks.
41  */
42 #include <sys/cmn_err.h>
43 
44 #include <vm/as.h>
45 #include <vm/hat.h>
46 
47 kmutex_t hat_statlock;		/* protects all hat statistics data */
48 struct hrmstat *hrm_memlist;	/* tracks memory alloced for hrm_blist blocks */
49 struct hrmstat **hrm_hashtab;	/* hash table for finding blocks quickly */
50 struct hrmstat *hrm_blist;
51 int hrm_blist_incr = HRM_BLIST_INCR;
52 int hrm_blist_lowater = HRM_BLIST_INCR/2;
53 int hrm_blist_num = 0;
54 int hrm_blist_total = 0;
55 int hrm_mlockinited = 0;
56 int hrm_allocfailmsg = 0;	/* print a message when allocations fail */
57 int hrm_allocfail = 0;
58 
59 static struct hrmstat	*hrm_balloc(void);
60 static void	hrm_init(void);
61 static void	hrm_link(struct hrmstat *);
62 static void	hrm_setbits(struct hrmstat *, caddr_t, uint_t);
63 static void	hrm_hashout(struct hrmstat *);
64 static void	hrm_getblk(int);
65 
66 #define	hrm_hash(as, addr) \
67 	(HRM_HASHMASK & \
68 	(((uintptr_t)(addr) >> HRM_BASESHIFT) ^ ((uintptr_t)(as) >> 2)))
69 
70 #define	hrm_match(hrm, as, addr) \
71 	(((hrm)->hrm_as == (as) && \
72 	((hrm)->hrm_base == ((uintptr_t)(addr) & HRM_BASEMASK))) ? 1 : 0)
73 
74 /*
75  * reserve enough statistic blocks for
76  * chunk of bytes (pages) in a given as.
77  */
78 /* ARGSUSED */
79 void
80 hat_resvstat(size_t chunk, struct as *as, caddr_t addr)
81 {
82 	int nhrm = btop(chunk)/HRM_PAGES;
83 
84 	if (nhrm < HRM_BLIST_INCR)
85 		nhrm = 0;	/* preallocate at least HRM_BLIST_INCR */
86 	hrm_getblk(nhrm);
87 }
88 
89 /*
90  * Start the statistics gathering for an address space.
91  * Return -1 if we can't do it, otherwise return an opaque
92  * identifier to be used when querying for the gathered statistics.
93  * The identifier is an unused bit in a_vbits.
94  * Bit 0 is reserved for swsmon.
95  */
96 int
97 hat_startstat(struct as *as)
98 {
99 	uint_t nbits;		/* number of bits */
100 	uint_t bn;		/* bit number */
101 	uint_t id;		/* new vbit, identifier */
102 	uint_t vbits;		/* used vbits of address space */
103 	size_t chunk;		/* mapped size for stats */
104 	/*
105 	 * Initialize global data, if needed.
106 	 */
107 	hrm_init();
108 
109 	/*
110 	 * If the refmod saving memory allocator runs out, print
111 	 * a warning message about how to fix it, see comment at
112 	 * the beginning of hat_setstat.
113 	 */
114 	if (hrm_allocfailmsg) {
115 		cmn_err(CE_WARN,
116 		    "hrm_balloc failures occured, increase hrm_blist_incr");
117 		hrm_allocfailmsg = 0;
118 	}
119 
120 	/*
121 	 * Verify that a buffer of statistics blocks exists
122 	 * and allocate more, if needed.
123 	 */
124 
125 	chunk = hat_get_mapped_size(as->a_hat);
126 	chunk = (btop(chunk)/HRM_PAGES);
127 	if (chunk < HRM_BLIST_INCR)
128 		chunk = 0;
129 
130 	hrm_getblk((int)chunk);
131 
132 	/*
133 	 * Find a unused id in the given address space.
134 	 */
135 	hat_enter(as->a_hat);
136 	vbits = as->a_vbits;
137 	nbits = sizeof (as->a_vbits) * NBBY;
138 	for (bn = 1, id = 2; bn < (nbits - 1); bn++, id <<= 1)
139 		if ((id & vbits) == 0)
140 			break;
141 	if (bn >= (nbits - 1)) {
142 		hat_exit(as->a_hat);
143 		return (-1);
144 	}
145 	as->a_vbits |= id;
146 	hat_exit(as->a_hat);
147 	(void) hat_stats_enable(as->a_hat);
148 	return (id);
149 }
150 
151 /*
152  * Record referenced and modified information for an address space.
153  * Rmbits is a word containing the referenced bit in bit position 1
154  * and the modified bit in bit position 0.
155  *
156  * For current informational uses, one can rerun any program using
157  * this facility after modifying the hrm_blist_incr to be a larger
158  * amount so that a larger buffer of blocks will be maintained.
159  */
160 void
161 hat_setstat(struct as *as, caddr_t addr, size_t len, uint_t rmbits)
162 {
163 	struct hrmstat	*hrm;
164 	uint_t		vbits, newbits, nb;
165 	int		h;
166 
167 	ASSERT(len == PAGESIZE);
168 	ASSERT((rmbits & ~(P_MOD|P_REF)) == 0);
169 
170 	if (rmbits == 0)
171 		return;
172 
173 	/*
174 	 * Initialize global data, if needed.
175 	 */
176 	hrm_init();
177 
178 	mutex_enter(&hat_statlock);
179 
180 	/*
181 	 * The previous owner of hat_statlock could have been
182 	 * hat_freestat(). Check whether hrm_hashtab is NULL, if it is,
183 	 * we bail out.
184 	 */
185 	if (hrm_hashtab == NULL) {
186 		mutex_exit(&hat_statlock);
187 		return;
188 	}
189 
190 	/*
191 	 * Search the hash list for the as and addr we are looking for
192 	 * and set the ref and mod bits in every block that matches.
193 	 */
194 	vbits = 0;
195 	h = hrm_hash(as, addr);
196 	for (hrm = hrm_hashtab[h]; hrm; hrm = hrm->hrm_hnext) {
197 		if (hrm_match(hrm, as, addr)) {
198 			hrm_setbits(hrm, addr, rmbits);
199 			vbits |= hrm->hrm_id;
200 		}
201 	}
202 
203 	/*
204 	 * If we didn't find a block for all of the enabled
205 	 * vpages bits, then allocate and initialize a block
206 	 * for each bit that was not found.
207 	 */
208 	if (vbits != as->a_vbits) {
209 		newbits = vbits ^ as->a_vbits;
210 		while (newbits) {
211 			if (ffs(newbits))
212 				nb = 1 << (ffs(newbits)-1);
213 			hrm = (struct hrmstat *)hrm_balloc();
214 			if (hrm == NULL) {
215 				hrm_allocfailmsg = 1;
216 				hrm_allocfail++;
217 				mutex_exit(&hat_statlock);
218 				return;
219 			}
220 			hrm->hrm_as = as;
221 			hrm->hrm_base = (uintptr_t)addr & HRM_BASEMASK;
222 			hrm->hrm_id = nb;
223 			hrm_link(hrm);
224 			hrm_setbits(hrm, addr, rmbits);
225 			newbits &= ~nb;
226 		}
227 	}
228 	mutex_exit(&hat_statlock);
229 }
230 
231 /*
232  * Free the resources used to maintain the referenced and modified
233  * statistics for the virtual page view of an address space
234  * identified by id.
235  */
236 void
237 hat_freestat(struct as *as, int id)
238 {
239 	struct hrmstat *hrm, *prev_ahrm;
240 	struct hrmstat **hashtab;
241 
242 	hat_stats_disable(as->a_hat);	/* tell the hat layer to stop */
243 	hat_enter(as->a_hat);
244 	if (id == 0)
245 		as->a_vbits = 0;
246 	else
247 		as->a_vbits &= ~id;
248 
249 	if ((hrm = as->a_hrm) == NULL) {
250 		hat_exit(as->a_hat);
251 		return;
252 	}
253 	hat_exit(as->a_hat);
254 
255 	mutex_enter(&hat_statlock);
256 	if (hrm_hashtab == NULL) {
257 		/* can't happen? */
258 		mutex_exit(&hat_statlock);
259 		return;
260 	}
261 	for (prev_ahrm = NULL; hrm; hrm = hrm->hrm_anext) {
262 		if ((id == hrm->hrm_id) || (id == NULL)) {
263 
264 			hrm_hashout(hrm);
265 			hrm->hrm_hnext = hrm_blist;
266 			hrm_blist = hrm;
267 			hrm_blist_num++;
268 
269 			if (prev_ahrm == NULL)
270 				as->a_hrm = hrm->hrm_anext;
271 			else
272 				prev_ahrm->hrm_anext = hrm->hrm_anext;
273 
274 		} else
275 			prev_ahrm = hrm;
276 	}
277 
278 	/*
279 	 * If all statistics blocks are free,
280 	 * return the memory to the system.
281 	 */
282 	if (hrm_blist_num == hrm_blist_total) {
283 		/* zero the block list since we are giving back its memory */
284 		hrm_blist = NULL;
285 		hrm_blist_num = 0;
286 		hrm_blist_total = 0;
287 		hrm = hrm_memlist;
288 		hrm_memlist = NULL;
289 		hashtab = hrm_hashtab;
290 		hrm_hashtab = NULL;
291 	} else {
292 		hashtab = NULL;
293 	}
294 
295 	mutex_exit(&hat_statlock);
296 
297 	if (hashtab != NULL) {
298 		struct hrmstat *next;
299 
300 		kmem_free(hashtab, HRM_HASHSIZE * sizeof (char *));
301 		while (hrm != NULL) {
302 			next = hrm->hrm_hnext;
303 			kmem_free(hrm, hrm->hrm_base);
304 			hrm = next;
305 		}
306 	}
307 }
308 
309 /*
310  * Initialize any global state for the statistics handling.
311  * Hrm_lock protects the globally allocted memory:
312  *	hrm_memlist and hrm_hashtab.
313  */
314 static void
315 hrm_init(void)
316 {
317 	/*
318 	 * Alloacte the hashtable if it doesn't exist yet.
319 	 */
320 	mutex_enter(&hat_statlock);
321 	if (hrm_hashtab == NULL)
322 		hrm_hashtab =
323 			kmem_zalloc(HRM_HASHSIZE * sizeof (char *), KM_SLEEP);
324 	mutex_exit(&hat_statlock);
325 }
326 
327 /*
328  * Grab memory for statistics gathering of the hat layer.
329  */
330 static void
331 hrm_getblk(int chunk)
332 {
333 	struct hrmstat *hrm, *l;
334 	int i;
335 	int hrm_incr;
336 
337 	mutex_enter(&hat_statlock);
338 	if ((hrm_blist == NULL) ||
339 	    (hrm_blist_num <= hrm_blist_lowater) ||
340 	    (chunk && (hrm_blist_num < chunk))) {
341 
342 		mutex_exit(&hat_statlock);
343 
344 		hrm_incr = chunk? chunk : hrm_blist_incr;
345 		hrm = kmem_zalloc(sizeof (struct hrmstat) * hrm_incr, KM_SLEEP);
346 		hrm->hrm_base = sizeof (struct hrmstat) * hrm_incr;
347 
348 		/*
349 		 * thread the allocated blocks onto a freelist
350 		 * using the first block to hold information for
351 		 * freeing them all later
352 		 */
353 		mutex_enter(&hat_statlock);
354 		hrm->hrm_hnext = hrm_memlist;
355 		hrm_memlist = hrm;
356 
357 		hrm_blist_total += (hrm_incr - 1);
358 		for (i = 1; i < hrm_incr; i++) {
359 			l = &hrm[i];
360 			l->hrm_hnext = hrm_blist;
361 			hrm_blist = l;
362 			hrm_blist_num++;
363 		}
364 	}
365 	mutex_exit(&hat_statlock);
366 }
367 
368 static void
369 hrm_hashin(struct hrmstat *hrm)
370 {
371 	int 		h;
372 
373 	ASSERT(MUTEX_HELD(&hat_statlock));
374 	h = hrm_hash(hrm->hrm_as, hrm->hrm_base);
375 
376 	hrm->hrm_hnext = hrm_hashtab[h];
377 	hrm_hashtab[h] = hrm;
378 }
379 
380 static void
381 hrm_hashout(struct hrmstat *hrm)
382 {
383 	struct hrmstat	*list, **prev_hrm;
384 	int		h;
385 
386 	ASSERT(MUTEX_HELD(&hat_statlock));
387 	h = hrm_hash(hrm->hrm_as, hrm->hrm_base);
388 	list = hrm_hashtab[h];
389 	prev_hrm = &hrm_hashtab[h];
390 
391 	while (list) {
392 		if (list == hrm) {
393 			*prev_hrm = list->hrm_hnext;
394 			return;
395 		}
396 		prev_hrm = &list->hrm_hnext;
397 		list = list->hrm_hnext;
398 	}
399 }
400 
401 
402 /*
403  * Link a statistic block into an address space and also put it
404  * on the hash list for future references.
405  */
406 static void
407 hrm_link(struct hrmstat *hrm)
408 {
409 	struct as *as = hrm->hrm_as;
410 
411 	ASSERT(MUTEX_HELD(&hat_statlock));
412 	hrm->hrm_anext = as->a_hrm;
413 	as->a_hrm = hrm;
414 	hrm_hashin(hrm);
415 }
416 
417 /*
418  * Allocate a block for statistics keeping.
419  * Returns NULL if blocks are unavailable.
420  */
421 static struct hrmstat *
422 hrm_balloc(void)
423 {
424 	struct hrmstat *hrm;
425 
426 	ASSERT(MUTEX_HELD(&hat_statlock));
427 
428 	hrm = hrm_blist;
429 	if (hrm != NULL) {
430 		hrm_blist = hrm->hrm_hnext;
431 		hrm_blist_num--;
432 		hrm->hrm_hnext = NULL;
433 	}
434 	return (hrm);
435 }
436 
437 /*
438  * Set the ref and mod bits for addr within statistics block hrm.
439  */
440 static void
441 hrm_setbits(struct hrmstat *hrm, caddr_t addr, uint_t bits)
442 {
443 	uint_t po, bo, spb;
444 	uint_t nbits;
445 
446 	po = ((uintptr_t)addr & HRM_BASEOFFSET) >> MMU_PAGESHIFT; /* pg off */
447 	bo = po / (NBBY / 2);			/* which byte in bit array */
448 	spb = (3 - (po & 3)) * 2;		/* shift position within byte */
449 	nbits = bits << spb;			/* bit mask */
450 	hrm->hrm_bits[bo] |= nbits;
451 }
452 
453 /*
454  * Return collected statistics about an address space.
455  * If clearflag is set, atomically read and zero the bits.
456  *
457  * Fill in the data array supplied with the referenced and
458  * modified bits collected for address range [addr ... addr + len]
459  * in address space, as, uniquely identified by id.
460  * The destination is a byte array.  We fill in three bits per byte:
461  * referenced, modified, and hwmapped bits.
462  * Kernel only interface, can't fault on destination data array.
463  *
464  */
465 void
466 hat_getstat(struct as *as, caddr_t addr, size_t len, uint_t id,
467     caddr_t datap, int clearflag)
468 {
469 	size_t	np;		/* number of pages */
470 	caddr_t	a;
471 	char 	*dp;
472 
473 	np = btop(len);
474 	bzero(datap, np);
475 
476 	hat_sync(as->a_hat, addr, len, clearflag);
477 
478 	/* allocate more statistics blocks if needed */
479 	hrm_getblk(0);
480 
481 	mutex_enter(&hat_statlock);
482 	if (hrm_hashtab == NULL) {
483 		/* can happen when victim process exits */
484 		mutex_exit(&hat_statlock);
485 		return;
486 	}
487 	dp = datap;
488 	a = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
489 	while (a < addr + len) {
490 		struct hrmstat	*hrm;
491 		size_t	n;		/* number of pages, temp */
492 		int	h;		/* hash index */
493 		uint_t	po;
494 
495 		h = hrm_hash(as, a);
496 		n = (HRM_PAGES -
497 			(((uintptr_t)a & HRM_PAGEMASK) >> MMU_PAGESHIFT));
498 		if (n > np)
499 			n = np;
500 		po = ((uintptr_t)a & HRM_BASEOFFSET) >> MMU_PAGESHIFT;
501 
502 		for (hrm = hrm_hashtab[h]; hrm; hrm = hrm->hrm_hnext) {
503 			if (hrm->hrm_as == as &&
504 			    hrm->hrm_base == ((uintptr_t)a & HRM_BASEMASK) &&
505 			    id == hrm->hrm_id) {
506 				int i, nr;
507 				uint_t bo, spb;
508 
509 				/*
510 				 * Extract leading unaligned bits.
511 				 */
512 				i = 0;
513 				while (i < n && (po & 3)) {
514 					bo = po / (NBBY / 2);
515 					spb = (3 - (po & 3)) * 2;
516 					*dp++ |= (hrm->hrm_bits[bo] >> spb) & 3;
517 					if (clearflag)
518 						hrm->hrm_bits[bo] &= ~(3<<spb);
519 					po++;
520 					i++;
521 				}
522 				/*
523 				 * Extract aligned bits.
524 				 */
525 				nr = n/4*4;
526 				bo = po / (NBBY / 2);
527 				while (i < nr) {
528 					int bits = hrm->hrm_bits[bo];
529 					*dp++ |= (bits >> 6) & 3;
530 					*dp++ |= (bits >> 4) & 3;
531 					*dp++ |= (bits >> 2) & 3;
532 					*dp++ |= (bits >> 0) & 3;
533 					if (clearflag)
534 						hrm->hrm_bits[bo] = 0;
535 					bo++;
536 					po += 4;
537 					i += 4;
538 				}
539 				/*
540 				 * Extract trailing unaligned bits.
541 				 */
542 				while (i < n) {
543 					bo = po / (NBBY / 2);
544 					spb = (3 - (po & 3)) * 2;
545 					*dp++ |= (hrm->hrm_bits[bo] >> spb) & 3;
546 					if (clearflag)
547 						hrm->hrm_bits[bo] &= ~(3<<spb);
548 					po++;
549 					i++;
550 				}
551 
552 				break;
553 			}
554 		}
555 		if (hrm == NULL)
556 			dp += n;
557 		np -= n;
558 		a += n * MMU_PAGESIZE;
559 	}
560 	mutex_exit(&hat_statlock);
561 }
562