xref: /titanic_51/usr/src/uts/common/vm/hat_refmod.c (revision ebd1706e95186ddae1d4c0d63c47544cf33832ee)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * The following routines implement the hat layer's
31  * recording of the referenced and modified bits.
32  */
33 
34 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/debug.h>
38 #include <sys/kmem.h>
39 
40 /*
41  * Note, usage of cmn_err requires you not hold any hat layer locks.
42  */
43 #include <sys/cmn_err.h>
44 
45 #include <vm/as.h>
46 #include <vm/hat.h>
47 
48 kmutex_t hat_statlock;		/* protects all hat statistics data */
49 struct hrmstat *hrm_memlist;	/* tracks memory alloced for hrm_blist blocks */
50 struct hrmstat **hrm_hashtab;	/* hash table for finding blocks quickly */
51 struct hrmstat *hrm_blist;
52 int hrm_blist_incr = HRM_BLIST_INCR;
53 int hrm_blist_lowater = HRM_BLIST_INCR/2;
54 int hrm_blist_num = 0;
55 int hrm_blist_total = 0;
56 int hrm_mlockinited = 0;
57 int hrm_allocfailmsg = 0;	/* print a message when allocations fail */
58 int hrm_allocfail = 0;
59 
60 static struct hrmstat	*hrm_balloc(void);
61 static void	hrm_init(void);
62 static void	hrm_link(struct hrmstat *);
63 static void	hrm_setbits(struct hrmstat *, caddr_t, uint_t);
64 static void	hrm_hashout(struct hrmstat *);
65 static void	hrm_getblk(int);
66 
67 #define	hrm_hash(as, addr) \
68 	(HRM_HASHMASK & \
69 	(((uintptr_t)(addr) >> HRM_BASESHIFT) ^ ((uintptr_t)(as) >> 2)))
70 
71 #define	hrm_match(hrm, as, addr) \
72 	(((hrm)->hrm_as == (as) && \
73 	((hrm)->hrm_base == ((uintptr_t)(addr) & HRM_BASEMASK))) ? 1 : 0)
74 
75 /*
76  * reserve enough statistic blocks for
77  * chunk of bytes (pages) in a given as.
78  */
79 /* ARGSUSED */
80 void
81 hat_resvstat(size_t chunk, struct as *as, caddr_t addr)
82 {
83 	int nhrm = btop(chunk)/HRM_PAGES;
84 
85 	if (nhrm < HRM_BLIST_INCR)
86 		nhrm = 0;	/* preallocate at least HRM_BLIST_INCR */
87 	hrm_getblk(nhrm);
88 }
89 
90 /*
91  * Start the statistics gathering for an address space.
92  * Return -1 if we can't do it, otherwise return an opaque
93  * identifier to be used when querying for the gathered statistics.
94  * The identifier is an unused bit in a_vbits.
95  * Bit 0 is reserved for swsmon.
96  */
97 int
98 hat_startstat(struct as *as)
99 {
100 	uint_t nbits;		/* number of bits */
101 	uint_t bn;		/* bit number */
102 	uint_t id;		/* new vbit, identifier */
103 	uint_t vbits;		/* used vbits of address space */
104 	size_t chunk;		/* mapped size for stats */
105 	/*
106 	 * Initialize global data, if needed.
107 	 */
108 	hrm_init();
109 
110 	/*
111 	 * If the refmod saving memory allocator runs out, print
112 	 * a warning message about how to fix it, see comment at
113 	 * the beginning of hat_setstat.
114 	 */
115 	if (hrm_allocfailmsg) {
116 		cmn_err(CE_WARN,
117 		    "hrm_balloc failures occured, increase hrm_blist_incr");
118 		hrm_allocfailmsg = 0;
119 	}
120 
121 	/*
122 	 * Verify that a buffer of statistics blocks exists
123 	 * and allocate more, if needed.
124 	 */
125 
126 	chunk = hat_get_mapped_size(as->a_hat);
127 	chunk = (btop(chunk)/HRM_PAGES);
128 	if (chunk < HRM_BLIST_INCR)
129 		chunk = 0;
130 
131 	hrm_getblk((int)chunk);
132 
133 	/*
134 	 * Find a unused id in the given address space.
135 	 */
136 	hat_enter(as->a_hat);
137 	vbits = as->a_vbits;
138 	nbits = sizeof (as->a_vbits) * NBBY;
139 	for (bn = 1, id = 2; bn < (nbits - 1); bn++, id <<= 1)
140 		if ((id & vbits) == 0)
141 			break;
142 	if (bn >= (nbits - 1)) {
143 		hat_exit(as->a_hat);
144 		return (-1);
145 	}
146 	as->a_vbits |= id;
147 	hat_exit(as->a_hat);
148 	(void) hat_stats_enable(as->a_hat);
149 	return (id);
150 }
151 
152 /*
153  * Record referenced and modified information for an address space.
154  * Rmbits is a word containing the referenced bit in bit position 1
155  * and the modified bit in bit position 0.
156  *
157  * For current informational uses, one can rerun any program using
158  * this facility after modifying the hrm_blist_incr to be a larger
159  * amount so that a larger buffer of blocks will be maintained.
160  */
161 void
162 hat_setstat(struct as *as, caddr_t addr, size_t len, uint_t rmbits)
163 {
164 	struct hrmstat	*hrm;
165 	uint_t		vbits, newbits, nb;
166 	int		h;
167 
168 	ASSERT(len == PAGESIZE);
169 	ASSERT((rmbits & ~(P_MOD|P_REF)) == 0);
170 
171 	if (rmbits == 0)
172 		return;
173 
174 	/*
175 	 * Initialize global data, if needed.
176 	 */
177 	hrm_init();
178 
179 	mutex_enter(&hat_statlock);
180 
181 	/*
182 	 * The previous owner of hat_statlock could have been
183 	 * hat_freestat(). Check whether hrm_hashtab is NULL, if it is,
184 	 * we bail out.
185 	 */
186 	if (hrm_hashtab == NULL) {
187 		mutex_exit(&hat_statlock);
188 		return;
189 	}
190 
191 	/*
192 	 * Search the hash list for the as and addr we are looking for
193 	 * and set the ref and mod bits in every block that matches.
194 	 */
195 	vbits = 0;
196 	h = hrm_hash(as, addr);
197 	for (hrm = hrm_hashtab[h]; hrm; hrm = hrm->hrm_hnext) {
198 		if (hrm_match(hrm, as, addr)) {
199 			hrm_setbits(hrm, addr, rmbits);
200 			vbits |= hrm->hrm_id;
201 		}
202 	}
203 
204 	/*
205 	 * If we didn't find a block for all of the enabled
206 	 * vpages bits, then allocate and initialize a block
207 	 * for each bit that was not found.
208 	 */
209 	if (vbits != as->a_vbits) {
210 		newbits = vbits ^ as->a_vbits;
211 		while (newbits) {
212 			if (ffs(newbits))
213 				nb = 1 << (ffs(newbits)-1);
214 			hrm = (struct hrmstat *)hrm_balloc();
215 			if (hrm == NULL) {
216 				hrm_allocfailmsg = 1;
217 				hrm_allocfail++;
218 				mutex_exit(&hat_statlock);
219 				return;
220 			}
221 			hrm->hrm_as = as;
222 			hrm->hrm_base = (uintptr_t)addr & HRM_BASEMASK;
223 			hrm->hrm_id = nb;
224 			hrm_link(hrm);
225 			hrm_setbits(hrm, addr, rmbits);
226 			newbits &= ~nb;
227 		}
228 	}
229 	mutex_exit(&hat_statlock);
230 }
231 
232 /*
233  * Free the resources used to maintain the referenced and modified
234  * statistics for the virtual page view of an address space
235  * identified by id.
236  */
237 void
238 hat_freestat(struct as *as, int id)
239 {
240 	struct hrmstat *hrm, *prev_ahrm;
241 
242 	hat_stats_disable(as->a_hat);	/* tell the hat layer to stop */
243 	hat_enter(as->a_hat);
244 	if (id == 0)
245 		as->a_vbits = 0;
246 	else
247 		as->a_vbits &= ~id;
248 
249 	if ((hrm = as->a_hrm) == NULL) {
250 		hat_exit(as->a_hat);
251 		return;
252 	}
253 	hat_exit(as->a_hat);
254 
255 	mutex_enter(&hat_statlock);
256 	if (hrm_hashtab == NULL) {
257 		/* can't happen? */
258 		mutex_exit(&hat_statlock);
259 		return;
260 	}
261 	for (prev_ahrm = NULL; hrm; hrm = hrm->hrm_anext) {
262 		if ((id == hrm->hrm_id) || (id == NULL)) {
263 
264 			hrm_hashout(hrm);
265 			hrm->hrm_hnext = hrm_blist;
266 			hrm_blist = hrm;
267 			hrm_blist_num++;
268 
269 			if (prev_ahrm == NULL)
270 				as->a_hrm = hrm->hrm_anext;
271 			else
272 				prev_ahrm->hrm_anext = hrm->hrm_anext;
273 
274 		} else
275 			prev_ahrm = hrm;
276 	}
277 
278 	/*
279 	 * If all statistics blocks are free,
280 	 * return the memory to the system.
281 	 */
282 	if (hrm_blist_num == hrm_blist_total) {
283 		/* zero the block list since we are giving back its memory */
284 		hrm_blist = NULL;
285 		hrm_blist_num = 0;
286 		hrm_blist_total = 0;
287 		while (hrm_memlist) {
288 			hrm = hrm_memlist;
289 			hrm_memlist = hrm->hrm_hnext;
290 			kmem_free(hrm, hrm->hrm_base);
291 		}
292 		ASSERT(hrm_memlist == NULL);
293 		kmem_free(hrm_hashtab, HRM_HASHSIZE * sizeof (char *));
294 		hrm_hashtab = NULL;
295 	}
296 	mutex_exit(&hat_statlock);
297 }
298 
299 /*
300  * Initialize any global state for the statistics handling.
301  * Hrm_lock protects the globally allocted memory:
302  *	hrm_memlist and hrm_hashtab.
303  */
304 static void
305 hrm_init(void)
306 {
307 	/*
308 	 * Alloacte the hashtable if it doesn't exist yet.
309 	 */
310 	mutex_enter(&hat_statlock);
311 	if (hrm_hashtab == NULL)
312 		hrm_hashtab =
313 			kmem_zalloc(HRM_HASHSIZE * sizeof (char *), KM_SLEEP);
314 	mutex_exit(&hat_statlock);
315 }
316 
317 /*
318  * Grab memory for statistics gathering of the hat layer.
319  */
320 static void
321 hrm_getblk(int chunk)
322 {
323 	struct hrmstat *hrm, *l;
324 	int i;
325 	int hrm_incr;
326 
327 	mutex_enter(&hat_statlock);
328 	if ((hrm_blist == NULL) ||
329 	    (hrm_blist_num <= hrm_blist_lowater) ||
330 	    (chunk && (hrm_blist_num < chunk))) {
331 
332 		mutex_exit(&hat_statlock);
333 
334 		hrm_incr = chunk? chunk : hrm_blist_incr;
335 		hrm = kmem_zalloc(sizeof (struct hrmstat) * hrm_incr, KM_SLEEP);
336 		hrm->hrm_base = sizeof (struct hrmstat) * hrm_incr;
337 
338 		/*
339 		 * thread the allocated blocks onto a freelist
340 		 * using the first block to hold information for
341 		 * freeing them all later
342 		 */
343 		mutex_enter(&hat_statlock);
344 		hrm->hrm_hnext = hrm_memlist;
345 		hrm_memlist = hrm;
346 
347 		hrm_blist_total += (hrm_incr - 1);
348 		for (i = 1; i < hrm_incr; i++) {
349 			l = &hrm[i];
350 			l->hrm_hnext = hrm_blist;
351 			hrm_blist = l;
352 			hrm_blist_num++;
353 		}
354 	}
355 	mutex_exit(&hat_statlock);
356 }
357 
358 static void
359 hrm_hashin(struct hrmstat *hrm)
360 {
361 	int 		h;
362 
363 	ASSERT(MUTEX_HELD(&hat_statlock));
364 	h = hrm_hash(hrm->hrm_as, hrm->hrm_base);
365 
366 	hrm->hrm_hnext = hrm_hashtab[h];
367 	hrm_hashtab[h] = hrm;
368 }
369 
370 static void
371 hrm_hashout(struct hrmstat *hrm)
372 {
373 	struct hrmstat	*list, **prev_hrm;
374 	int		h;
375 
376 	ASSERT(MUTEX_HELD(&hat_statlock));
377 	h = hrm_hash(hrm->hrm_as, hrm->hrm_base);
378 	list = hrm_hashtab[h];
379 	prev_hrm = &hrm_hashtab[h];
380 
381 	while (list) {
382 		if (list == hrm) {
383 			*prev_hrm = list->hrm_hnext;
384 			return;
385 		}
386 		prev_hrm = &list->hrm_hnext;
387 		list = list->hrm_hnext;
388 	}
389 }
390 
391 
392 /*
393  * Link a statistic block into an address space and also put it
394  * on the hash list for future references.
395  */
396 static void
397 hrm_link(struct hrmstat *hrm)
398 {
399 	struct as *as = hrm->hrm_as;
400 
401 	ASSERT(MUTEX_HELD(&hat_statlock));
402 	hrm->hrm_anext = as->a_hrm;
403 	as->a_hrm = hrm;
404 	hrm_hashin(hrm);
405 }
406 
407 /*
408  * Allocate a block for statistics keeping.
409  * Returns NULL if blocks are unavailable.
410  */
411 static struct hrmstat *
412 hrm_balloc(void)
413 {
414 	struct hrmstat *hrm;
415 
416 	ASSERT(MUTEX_HELD(&hat_statlock));
417 
418 	hrm = hrm_blist;
419 	if (hrm != NULL) {
420 		hrm_blist = hrm->hrm_hnext;
421 		hrm_blist_num--;
422 		hrm->hrm_hnext = NULL;
423 	}
424 	return (hrm);
425 }
426 
427 /*
428  * Set the ref and mod bits for addr within statistics block hrm.
429  */
430 static void
431 hrm_setbits(struct hrmstat *hrm, caddr_t addr, uint_t bits)
432 {
433 	uint_t po, bo, spb;
434 	uint_t nbits;
435 
436 	po = ((uintptr_t)addr & HRM_BASEOFFSET) >> MMU_PAGESHIFT; /* pg off */
437 	bo = po / (NBBY / 2);			/* which byte in bit array */
438 	spb = (3 - (po & 3)) * 2;		/* shift position within byte */
439 	nbits = bits << spb;			/* bit mask */
440 	hrm->hrm_bits[bo] |= nbits;
441 }
442 
443 /*
444  * Return collected statistics about an address space.
445  * If clearflag is set, atomically read and zero the bits.
446  *
447  * Fill in the data array supplied with the referenced and
448  * modified bits collected for address range [addr ... addr + len]
449  * in address space, as, uniquely identified by id.
450  * The destination is a byte array.  We fill in three bits per byte:
451  * referenced, modified, and hwmapped bits.
452  * Kernel only interface, can't fault on destination data array.
453  *
454  */
455 void
456 hat_getstat(struct as *as, caddr_t addr, size_t len, uint_t id,
457     caddr_t datap, int clearflag)
458 {
459 	size_t	np;		/* number of pages */
460 	caddr_t	a;
461 	char 	*dp;
462 
463 	np = btop(len);
464 	bzero(datap, np);
465 
466 	hat_sync(as->a_hat, addr, len, clearflag);
467 
468 	/* allocate more statistics blocks if needed */
469 	hrm_getblk(0);
470 
471 	mutex_enter(&hat_statlock);
472 	if (hrm_hashtab == NULL) {
473 		/* can happen when victim process exits */
474 		mutex_exit(&hat_statlock);
475 		return;
476 	}
477 	dp = datap;
478 	a = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
479 	while (a < addr + len) {
480 		struct hrmstat	*hrm;
481 		size_t	n;		/* number of pages, temp */
482 		int	h;		/* hash index */
483 		uint_t	po;
484 
485 		h = hrm_hash(as, a);
486 		n = (HRM_PAGES -
487 			(((uintptr_t)a & HRM_PAGEMASK) >> MMU_PAGESHIFT));
488 		if (n > np)
489 			n = np;
490 		po = ((uintptr_t)a & HRM_BASEOFFSET) >> MMU_PAGESHIFT;
491 
492 		for (hrm = hrm_hashtab[h]; hrm; hrm = hrm->hrm_hnext) {
493 			if (hrm->hrm_as == as &&
494 			    hrm->hrm_base == ((uintptr_t)a & HRM_BASEMASK) &&
495 			    id == hrm->hrm_id) {
496 				int i, nr;
497 				uint_t bo, spb;
498 
499 				/*
500 				 * Extract leading unaligned bits.
501 				 */
502 				i = 0;
503 				while (i < n && (po & 3)) {
504 					bo = po / (NBBY / 2);
505 					spb = (3 - (po & 3)) * 2;
506 					*dp++ |= (hrm->hrm_bits[bo] >> spb) & 3;
507 					if (clearflag)
508 						hrm->hrm_bits[bo] &= ~(3<<spb);
509 					po++;
510 					i++;
511 				}
512 				/*
513 				 * Extract aligned bits.
514 				 */
515 				nr = n/4*4;
516 				bo = po / (NBBY / 2);
517 				while (i < nr) {
518 					int bits = hrm->hrm_bits[bo];
519 					*dp++ |= (bits >> 6) & 3;
520 					*dp++ |= (bits >> 4) & 3;
521 					*dp++ |= (bits >> 2) & 3;
522 					*dp++ |= (bits >> 0) & 3;
523 					if (clearflag)
524 						hrm->hrm_bits[bo] = 0;
525 					bo++;
526 					po += 4;
527 					i += 4;
528 				}
529 				/*
530 				 * Extract trailing unaligned bits.
531 				 */
532 				while (i < n) {
533 					bo = po / (NBBY / 2);
534 					spb = (3 - (po & 3)) * 2;
535 					*dp++ |= (hrm->hrm_bits[bo] >> spb) & 3;
536 					if (clearflag)
537 						hrm->hrm_bits[bo] &= ~(3<<spb);
538 					po++;
539 					i++;
540 				}
541 
542 				break;
543 			}
544 		}
545 		if (hrm == NULL)
546 			dp += n;
547 		np -= n;
548 		a += n * MMU_PAGESIZE;
549 	}
550 	mutex_exit(&hat_statlock);
551 }
552