xref: /titanic_52/usr/src/uts/common/vm/hat_refmod.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * The following routines implement the hat layer's
31  * recording of the referenced and modified bits.
32  */
33 
34 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/debug.h>
38 #include <sys/kmem.h>
39 
40 /*
41  * Note, usage of cmn_err requires you not hold any hat layer locks.
42  */
43 #include <sys/cmn_err.h>
44 
45 #include <vm/as.h>
46 #include <vm/hat.h>
47 
48 kmutex_t hat_statlock;		/* protects all hat statistics data */
49 struct hrmstat *hrm_memlist;	/* tracks memory alloced for hrm_blist blocks */
50 struct hrmstat **hrm_hashtab;	/* hash table for finding blocks quickly */
51 struct hrmstat *hrm_blist;
52 int hrm_blist_incr = HRM_BLIST_INCR;
53 int hrm_blist_lowater = HRM_BLIST_INCR/2;
54 int hrm_blist_num = 0;
55 int hrm_blist_total = 0;
56 int hrm_mlockinited = 0;
57 int hrm_allocfailmsg = 0;	/* print a message when allocations fail */
58 int hrm_allocfail = 0;
59 
60 static struct hrmstat	*hrm_balloc(void);
61 static int	hrm_init(void);
62 static void	hrm_link(struct hrmstat *);
63 static void	hrm_setbits(struct hrmstat *, caddr_t, uint_t);
64 static void	hrm_hashout(struct hrmstat *);
65 static void	hrm_getblk(int);
66 
67 #define	hrm_hash(as, addr) \
68 	(HRM_HASHMASK & \
69 	(((uintptr_t)(addr) >> HRM_BASESHIFT) ^ ((uintptr_t)(as) >> 2)))
70 
71 #define	hrm_match(hrm, as, addr) \
72 	(((hrm)->hrm_as == (as) && \
73 	((hrm)->hrm_base == ((uintptr_t)(addr) & HRM_BASEMASK))) ? 1 : 0)
74 
75 /*
76  * reserve enough statistic blocks for
77  * chunk of bytes (pages) in a given as.
78  */
79 /* ARGSUSED */
80 void
81 hat_resvstat(size_t chunk, struct as *as, caddr_t addr)
82 {
83 	int nhrm = btop(chunk)/HRM_PAGES;
84 
85 	if (nhrm < HRM_BLIST_INCR)
86 		nhrm = 0;	/* preallocate at least HRM_BLIST_INCR */
87 	hrm_getblk(nhrm);
88 }
89 
90 /*
91  * Start the statistics gathering for an address space.
92  * Return -1 if we can't do it, otherwise return an opaque
93  * identifier to be used when querying for the gathered statistics.
94  * The identifier is an unused bit in a_vbits.
95  * Bit 0 is reserved for swsmon.
96  */
97 int
98 hat_startstat(struct as *as)
99 {
100 	uint_t nbits;		/* number of bits */
101 	uint_t bn;		/* bit number */
102 	uint_t id;		/* new vbit, identifier */
103 	uint_t vbits;		/* used vbits of address space */
104 	size_t chunk;		/* mapped size for stats */
105 	/*
106 	 * Initialize global data, if needed.
107 	 */
108 	if (hrm_init() == -1)
109 		return (-1);
110 
111 	/*
112 	 * If the refmod saving memory allocator runs out, print
113 	 * a warning message about how to fix it, see comment at
114 	 * the beginning of hat_setstat.
115 	 */
116 	if (hrm_allocfailmsg) {
117 		cmn_err(CE_WARN,
118 		    "hrm_balloc failures occured, increase hrm_blist_incr");
119 		hrm_allocfailmsg = 0;
120 	}
121 
122 	/*
123 	 * Verify that a buffer of statistics blocks exists
124 	 * and allocate more, if needed.
125 	 */
126 
127 	chunk = hat_get_mapped_size(as->a_hat);
128 	chunk = (btop(chunk)/HRM_PAGES);
129 	if (chunk < HRM_BLIST_INCR)
130 		chunk = 0;
131 
132 	hrm_getblk((int)chunk);
133 
134 	/*
135 	 * Find a unused id in the given address space.
136 	 */
137 	hat_enter(as->a_hat);
138 	vbits = as->a_vbits;
139 	nbits = sizeof (as->a_vbits) * NBBY;
140 	for (bn = 1, id = 2; bn < (nbits - 1); bn++, id <<= 1)
141 		if ((id & vbits) == 0)
142 			break;
143 	if (bn >= (nbits - 1)) {
144 		hat_exit(as->a_hat);
145 		return (-1);
146 	}
147 	as->a_vbits |= id;
148 	hat_exit(as->a_hat);
149 	(void) hat_stats_enable(as->a_hat);
150 	return (id);
151 }
152 
153 /*
154  * Record referenced and modified information for an address space.
155  * Rmbits is a word containing the referenced bit in bit position 1
156  * and the modified bit in bit position 0.
157  *
158  * For current informational uses, one can rerun any program using
159  * this facility after modifying the hrm_blist_incr to be a larger
160  * amount so that a larger buffer of blocks will be maintained.
161  */
162 void
163 hat_setstat(struct as *as, caddr_t addr, size_t len, uint_t rmbits)
164 {
165 	struct hrmstat	*hrm;
166 	uint_t		vbits, newbits, nb;
167 	int		h;
168 
169 	ASSERT(len == PAGESIZE);
170 	ASSERT((rmbits & ~(P_MOD|P_REF)) == 0);
171 
172 	if (rmbits == 0)
173 		return;
174 
175 	/*
176 	 * Initialize global data, if needed.
177 	 */
178 	if (hrm_init() == -1)
179 		return;
180 
181 	mutex_enter(&hat_statlock);
182 
183 	/*
184 	 * Search the hash list for the as and addr we are looking for
185 	 * and set the ref and mod bits in every block that matches.
186 	 */
187 	vbits = 0;
188 	h = hrm_hash(as, addr);
189 	for (hrm = hrm_hashtab[h]; hrm; hrm = hrm->hrm_hnext) {
190 		if (hrm_match(hrm, as, addr)) {
191 			hrm_setbits(hrm, addr, rmbits);
192 			vbits |= hrm->hrm_id;
193 		}
194 	}
195 
196 	/*
197 	 * If we didn't find a block for all of the enabled
198 	 * vpages bits, then allocate and initialize a block
199 	 * for each bit that was not found.
200 	 */
201 	if (vbits != as->a_vbits) {
202 		newbits = vbits ^ as->a_vbits;
203 		while (newbits) {
204 			if (ffs(newbits))
205 				nb = 1 << (ffs(newbits)-1);
206 			hrm = (struct hrmstat *)hrm_balloc();
207 			if (hrm == NULL) {
208 				hrm_allocfailmsg = 1;
209 				hrm_allocfail++;
210 				mutex_exit(&hat_statlock);
211 				return;
212 			}
213 			hrm->hrm_as = as;
214 			hrm->hrm_base = (uintptr_t)addr & HRM_BASEMASK;
215 			hrm->hrm_id = nb;
216 			hrm_link(hrm);
217 			hrm_setbits(hrm, addr, rmbits);
218 			newbits &= ~nb;
219 		}
220 	}
221 	mutex_exit(&hat_statlock);
222 }
223 
224 /*
225  * Free the resources used to maintain the referenced and modified
226  * statistics for the virtual page view of an address space
227  * identified by id.
228  */
229 void
230 hat_freestat(struct as *as, int id)
231 {
232 	struct hrmstat *hrm, *prev_ahrm;
233 
234 	hat_stats_disable(as->a_hat);	/* tell the hat layer to stop */
235 	hat_enter(as->a_hat);
236 	if (id == 0)
237 		as->a_vbits = 0;
238 	else
239 		as->a_vbits &= ~id;
240 
241 	if ((hrm = as->a_hrm) == NULL) {
242 		hat_exit(as->a_hat);
243 		return;
244 	}
245 	hat_exit(as->a_hat);
246 
247 	mutex_enter(&hat_statlock);
248 	if (hrm_hashtab == NULL) {
249 		/* can't happen? */
250 		mutex_exit(&hat_statlock);
251 		return;
252 	}
253 	for (prev_ahrm = NULL; hrm; hrm = hrm->hrm_anext) {
254 		if ((id == hrm->hrm_id) || (id == NULL)) {
255 
256 			hrm_hashout(hrm);
257 			hrm->hrm_hnext = hrm_blist;
258 			hrm_blist = hrm;
259 			hrm_blist_num++;
260 
261 			if (prev_ahrm == NULL)
262 				as->a_hrm = hrm->hrm_anext;
263 			else
264 				prev_ahrm->hrm_anext = hrm->hrm_anext;
265 
266 		} else
267 			prev_ahrm = hrm;
268 	}
269 
270 	/*
271 	 * If all statistics blocks are free,
272 	 * return the memory to the system.
273 	 */
274 	if (hrm_blist_num == hrm_blist_total) {
275 		/* zero the block list since we are giving back its memory */
276 		hrm_blist = NULL;
277 		hrm_blist_num = 0;
278 		hrm_blist_total = 0;
279 		while (hrm_memlist) {
280 			hrm = hrm_memlist;
281 			hrm_memlist = hrm->hrm_hnext;
282 			kmem_free(hrm, hrm->hrm_base);
283 		}
284 		ASSERT(hrm_memlist == NULL);
285 		kmem_free(hrm_hashtab, HRM_HASHSIZE * sizeof (char *));
286 		hrm_hashtab = NULL;
287 	}
288 	mutex_exit(&hat_statlock);
289 }
290 
291 /*
292  * Initialize any global state for the statistics handling.
293  * Hrm_lock protects the globally allocted memory:
294  *	hrm_memlist and hrm_hashtab.
295  */
296 static int
297 hrm_init(void)
298 {
299 	/*
300 	 * Alloacte the hashtable if it doesn't exist yet.
301 	 */
302 	mutex_enter(&hat_statlock);
303 	if (hrm_hashtab == NULL)
304 		hrm_hashtab =
305 			kmem_zalloc(HRM_HASHSIZE * sizeof (char *), KM_SLEEP);
306 	mutex_exit(&hat_statlock);
307 	return (0);
308 }
309 
310 /*
311  * Grab memory for statistics gathering of the hat layer.
312  */
313 static void
314 hrm_getblk(int chunk)
315 {
316 	struct hrmstat *hrm, *l;
317 	int i;
318 	int hrm_incr;
319 
320 	mutex_enter(&hat_statlock);
321 	if ((hrm_blist == NULL) ||
322 	    (hrm_blist_num <= hrm_blist_lowater) ||
323 	    chunk) {
324 
325 		mutex_exit(&hat_statlock);
326 
327 		hrm_incr = chunk? chunk : hrm_blist_incr;
328 		hrm = kmem_zalloc(sizeof (struct hrmstat) * hrm_incr, KM_SLEEP);
329 		hrm->hrm_base = sizeof (struct hrmstat) * hrm_incr;
330 
331 		/*
332 		 * thread the allocated blocks onto a freelist
333 		 * using the first block to hold information for
334 		 * freeing them all later
335 		 */
336 		mutex_enter(&hat_statlock);
337 		hrm->hrm_hnext = hrm_memlist;
338 		hrm_memlist = hrm;
339 
340 		hrm_blist_total += (hrm_incr - 1);
341 		for (i = 1; i < hrm_incr; i++) {
342 			l = &hrm[i];
343 			l->hrm_hnext = hrm_blist;
344 			hrm_blist = l;
345 			hrm_blist_num++;
346 		}
347 	}
348 	mutex_exit(&hat_statlock);
349 }
350 
351 static void
352 hrm_hashin(struct hrmstat *hrm)
353 {
354 	int 		h;
355 
356 	ASSERT(MUTEX_HELD(&hat_statlock));
357 	h = hrm_hash(hrm->hrm_as, hrm->hrm_base);
358 
359 	hrm->hrm_hnext = hrm_hashtab[h];
360 	hrm_hashtab[h] = hrm;
361 }
362 
363 static void
364 hrm_hashout(struct hrmstat *hrm)
365 {
366 	struct hrmstat	*list, **prev_hrm;
367 	int		h;
368 
369 	ASSERT(MUTEX_HELD(&hat_statlock));
370 	h = hrm_hash(hrm->hrm_as, hrm->hrm_base);
371 	list = hrm_hashtab[h];
372 	prev_hrm = &hrm_hashtab[h];
373 
374 	while (list) {
375 		if (list == hrm) {
376 			*prev_hrm = list->hrm_hnext;
377 			return;
378 		}
379 		prev_hrm = &list->hrm_hnext;
380 		list = list->hrm_hnext;
381 	}
382 }
383 
384 
385 /*
386  * Link a statistic block into an address space and also put it
387  * on the hash list for future references.
388  */
389 static void
390 hrm_link(struct hrmstat *hrm)
391 {
392 	struct as *as = hrm->hrm_as;
393 
394 	ASSERT(MUTEX_HELD(&hat_statlock));
395 	hrm->hrm_anext = as->a_hrm;
396 	as->a_hrm = hrm;
397 	hrm_hashin(hrm);
398 }
399 
400 /*
401  * Allocate a block for statistics keeping.
402  * Returns NULL if blocks are unavailable.
403  */
404 static struct hrmstat *
405 hrm_balloc(void)
406 {
407 	struct hrmstat *hrm;
408 
409 	ASSERT(MUTEX_HELD(&hat_statlock));
410 
411 	hrm = hrm_blist;
412 	if (hrm != NULL) {
413 		hrm_blist = hrm->hrm_hnext;
414 		hrm_blist_num--;
415 		hrm->hrm_hnext = NULL;
416 	}
417 	return (hrm);
418 }
419 
420 /*
421  * Set the ref and mod bits for addr within statistics block hrm.
422  */
423 static void
424 hrm_setbits(struct hrmstat *hrm, caddr_t addr, uint_t bits)
425 {
426 	uint_t po, bo, spb;
427 	uint_t nbits;
428 
429 	po = ((uintptr_t)addr & HRM_BASEOFFSET) >> MMU_PAGESHIFT; /* pg off */
430 	bo = po / (NBBY / 2);			/* which byte in bit array */
431 	spb = (3 - (po & 3)) * 2;		/* shift position within byte */
432 	nbits = bits << spb;			/* bit mask */
433 	hrm->hrm_bits[bo] |= nbits;
434 }
435 
436 /*
437  * Return collected statistics about an address space.
438  * If clearflag is set, atomically read and zero the bits.
439  *
440  * Fill in the data array supplied with the referenced and
441  * modified bits collected for address range [addr ... addr + len]
442  * in address space, as, uniquely identified by id.
443  * The destination is a byte array.  We fill in three bits per byte:
444  * referenced, modified, and hwmapped bits.
445  * Kernel only interface, can't fault on destination data array.
446  *
447  */
448 void
449 hat_getstat(struct as *as, caddr_t addr, size_t len, uint_t id,
450     caddr_t datap, int clearflag)
451 {
452 	size_t	np;		/* number of pages */
453 	caddr_t	a;
454 	char 	*dp;
455 
456 	np = btop(len);
457 	bzero(datap, np);
458 
459 	hat_sync(as->a_hat, addr, len, clearflag);
460 
461 	/* allocate more statistics blocks if needed */
462 	hrm_getblk(0);
463 
464 	mutex_enter(&hat_statlock);
465 	if (hrm_hashtab == NULL) {
466 		/* can happen when victim process exits */
467 		mutex_exit(&hat_statlock);
468 		return;
469 	}
470 	dp = datap;
471 	a = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
472 	while (a < addr + len) {
473 		struct hrmstat	*hrm;
474 		size_t	n;		/* number of pages, temp */
475 		int	h;		/* hash index */
476 		uint_t	po;
477 
478 		h = hrm_hash(as, a);
479 		n = (HRM_PAGES -
480 			(((uintptr_t)a & HRM_PAGEMASK) >> MMU_PAGESHIFT));
481 		if (n > np)
482 			n = np;
483 		po = ((uintptr_t)a & HRM_BASEOFFSET) >> MMU_PAGESHIFT;
484 
485 		for (hrm = hrm_hashtab[h]; hrm; hrm = hrm->hrm_hnext) {
486 			if (hrm->hrm_as == as &&
487 			    hrm->hrm_base == ((uintptr_t)a & HRM_BASEMASK) &&
488 			    id == hrm->hrm_id) {
489 				int i, nr;
490 				uint_t bo, spb;
491 
492 				/*
493 				 * Extract leading unaligned bits.
494 				 */
495 				i = 0;
496 				while (i < n && (po & 3)) {
497 					bo = po / (NBBY / 2);
498 					spb = (3 - (po & 3)) * 2;
499 					*dp++ |= (hrm->hrm_bits[bo] >> spb) & 3;
500 					if (clearflag)
501 						hrm->hrm_bits[bo] &= ~(3<<spb);
502 					po++;
503 					i++;
504 				}
505 				/*
506 				 * Extract aligned bits.
507 				 */
508 				nr = n/4*4;
509 				bo = po / (NBBY / 2);
510 				while (i < nr) {
511 					int bits = hrm->hrm_bits[bo];
512 					*dp++ |= (bits >> 6) & 3;
513 					*dp++ |= (bits >> 4) & 3;
514 					*dp++ |= (bits >> 2) & 3;
515 					*dp++ |= (bits >> 0) & 3;
516 					if (clearflag)
517 						hrm->hrm_bits[bo] = 0;
518 					bo++;
519 					po += 4;
520 					i += 4;
521 				}
522 				/*
523 				 * Extract trailing unaligned bits.
524 				 */
525 				while (i < n) {
526 					bo = po / (NBBY / 2);
527 					spb = (3 - (po & 3)) * 2;
528 					*dp++ |= (hrm->hrm_bits[bo] >> spb) & 3;
529 					if (clearflag)
530 						hrm->hrm_bits[bo] &= ~(3<<spb);
531 					po++;
532 					i++;
533 				}
534 
535 				break;
536 			}
537 		}
538 		if (hrm == NULL)
539 			dp += n;
540 		np -= n;
541 		a += n * MMU_PAGESIZE;
542 	}
543 	mutex_exit(&hat_statlock);
544 }
545