xref: /illumos-gate/usr/src/uts/common/vm/hat_refmod.c (revision c6f039c73ee9eb7e4acb232afaca51cdf9d30ff3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * The following routines implement the hat layer's
28  * recording of the referenced and modified bits.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/debug.h>
35 #include <sys/kmem.h>
36 
37 /*
38  * Note, usage of cmn_err requires you not hold any hat layer locks.
39  */
40 #include <sys/cmn_err.h>
41 
42 #include <vm/as.h>
43 #include <vm/hat.h>
44 
45 kmutex_t hat_statlock;		/* protects all hat statistics data */
46 struct hrmstat *hrm_memlist;	/* tracks memory alloced for hrm_blist blocks */
47 struct hrmstat **hrm_hashtab;	/* hash table for finding blocks quickly */
48 struct hrmstat *hrm_blist;
49 int hrm_blist_incr = HRM_BLIST_INCR;
50 int hrm_blist_lowater = HRM_BLIST_INCR/2;
51 int hrm_blist_num = 0;
52 int hrm_blist_total = 0;
53 int hrm_mlockinited = 0;
54 int hrm_allocfailmsg = 0;	/* print a message when allocations fail */
55 int hrm_allocfail = 0;
56 
57 static struct hrmstat	*hrm_balloc(void);
58 static void	hrm_link(struct hrmstat *);
59 static void	hrm_setbits(struct hrmstat *, caddr_t, uint_t);
60 static void	hrm_hashout(struct hrmstat *);
61 static void	hrm_getblk(int);
62 
63 #define	hrm_hash(as, addr) \
64 	(HRM_HASHMASK & \
65 	(((uintptr_t)(addr) >> HRM_BASESHIFT) ^ ((uintptr_t)(as) >> 2)))
66 
67 #define	hrm_match(hrm, as, addr) \
68 	(((hrm)->hrm_as == (as) && \
69 	((hrm)->hrm_base == ((uintptr_t)(addr) & HRM_BASEMASK))) ? 1 : 0)
70 
71 /*
72  * Called when an address space maps in more pages while stats are being
73  * collected.
74  */
75 /* ARGSUSED */
76 void
hat_resvstat(size_t chunk,struct as * as,caddr_t addr)77 hat_resvstat(size_t chunk, struct as *as, caddr_t addr)
78 {
79 }
80 
81 /*
82  * Start the statistics gathering for an address space.
83  * Return -1 if we can't do it, otherwise return an opaque
84  * identifier to be used when querying for the gathered statistics.
85  * The identifier is an unused bit in a_vbits.
86  * Bit 0 is reserved for swsmon.
87  */
88 int
hat_startstat(struct as * as)89 hat_startstat(struct as *as)
90 {
91 	uint_t nbits;		/* number of bits */
92 	uint_t bn;		/* bit number */
93 	uint_t id;		/* new vbit, identifier */
94 	uint_t vbits;		/* used vbits of address space */
95 	size_t chunk;		/* mapped size for stats */
96 
97 	/*
98 	 * If the refmod saving memory allocator runs out, print
99 	 * a warning message about how to fix it, see comment at
100 	 * the beginning of hat_setstat.
101 	 */
102 	if (hrm_allocfailmsg) {
103 		cmn_err(CE_WARN,
104 		    "hrm_balloc failures occured, increase hrm_blist_incr");
105 		hrm_allocfailmsg = 0;
106 	}
107 
108 	/*
109 	 * Verify that a buffer of statistics blocks exists
110 	 * and allocate more, if needed.
111 	 */
112 
113 	chunk = hat_get_mapped_size(as->a_hat);
114 	chunk = (btop(chunk)/HRM_PAGES);
115 	if (chunk < HRM_BLIST_INCR)
116 		chunk = 0;
117 
118 	hrm_getblk((int)chunk);
119 
120 	/*
121 	 * Find a unused id in the given address space.
122 	 */
123 	hat_enter(as->a_hat);
124 	vbits = as->a_vbits;
125 	nbits = sizeof (as->a_vbits) * NBBY;
126 	for (bn = 1, id = 2; bn < (nbits - 1); bn++, id <<= 1)
127 		if ((id & vbits) == 0)
128 			break;
129 	if (bn >= (nbits - 1)) {
130 		hat_exit(as->a_hat);
131 		return (-1);
132 	}
133 	as->a_vbits |= id;
134 	hat_exit(as->a_hat);
135 	(void) hat_stats_enable(as->a_hat);
136 	return (id);
137 }
138 
139 /*
140  * Record referenced and modified information for an address space.
141  * Rmbits is a word containing the referenced bit in bit position 1
142  * and the modified bit in bit position 0.
143  *
144  * For current informational uses, one can rerun any program using
145  * this facility after modifying the hrm_blist_incr to be a larger
146  * amount so that a larger buffer of blocks will be maintained.
147  */
148 void
hat_setstat(struct as * as,caddr_t addr,size_t len,uint_t rmbits)149 hat_setstat(struct as *as, caddr_t addr, size_t len, uint_t rmbits)
150 {
151 	struct hrmstat	*hrm;
152 	uint_t		vbits, newbits, nb;
153 	int		h;
154 
155 	ASSERT(len == PAGESIZE);
156 	ASSERT((rmbits & ~(P_MOD|P_REF)) == 0);
157 
158 	if (rmbits == 0)
159 		return;
160 
161 	mutex_enter(&hat_statlock);
162 
163 	/*
164 	 * Search the hash list for the as and addr we are looking for
165 	 * and set the ref and mod bits in every block that matches.
166 	 */
167 	vbits = 0;
168 	h = hrm_hash(as, addr);
169 	for (hrm = hrm_hashtab[h]; hrm; hrm = hrm->hrm_hnext) {
170 		if (hrm_match(hrm, as, addr)) {
171 			hrm_setbits(hrm, addr, rmbits);
172 			vbits |= hrm->hrm_id;
173 		}
174 	}
175 
176 	/*
177 	 * If we didn't find a block for all of the enabled
178 	 * vpages bits, then allocate and initialize a block
179 	 * for each bit that was not found.
180 	 */
181 	if (vbits != as->a_vbits) {
182 		newbits = (vbits ^ as->a_vbits) & as->a_vbits;
183 		nb = 0;
184 		while (newbits) {
185 			if (ffs(newbits))
186 				nb = 1 << (ffs(newbits)-1);
187 			hrm = (struct hrmstat *)hrm_balloc();
188 			if (hrm == NULL) {
189 				hrm_allocfailmsg = 1;
190 				hrm_allocfail++;
191 				mutex_exit(&hat_statlock);
192 				return;
193 			}
194 			hrm->hrm_as = as;
195 			hrm->hrm_base = (uintptr_t)addr & HRM_BASEMASK;
196 			hrm->hrm_id = nb;
197 			hrm_link(hrm);
198 			hrm_setbits(hrm, addr, rmbits);
199 			newbits &= ~nb;
200 		}
201 	}
202 	mutex_exit(&hat_statlock);
203 }
204 
205 /*
206  * Free the resources used to maintain the referenced and modified
207  * statistics for the virtual page view of an address space
208  * identified by id.
209  */
210 void
hat_freestat(struct as * as,int id)211 hat_freestat(struct as *as, int id)
212 {
213 	struct hrmstat *hrm;
214 	struct hrmstat *prev_ahrm;
215 	struct hrmstat *hrm_tmplist;
216 	struct hrmstat *hrm_next;
217 
218 	hat_stats_disable(as->a_hat);	/* tell the hat layer to stop */
219 	hat_enter(as->a_hat);
220 	if (id == 0)
221 		as->a_vbits = 0;
222 	else
223 		as->a_vbits &= ~id;
224 
225 	if ((hrm = as->a_hrm) == NULL) {
226 		hat_exit(as->a_hat);
227 		return;
228 	}
229 	hat_exit(as->a_hat);
230 
231 	mutex_enter(&hat_statlock);
232 
233 	for (prev_ahrm = NULL; hrm; hrm = hrm->hrm_anext) {
234 		if ((id == hrm->hrm_id) || (id == 0)) {
235 
236 			hrm_hashout(hrm);
237 			hrm->hrm_hnext = hrm_blist;
238 			hrm_blist = hrm;
239 			hrm_blist_num++;
240 
241 			if (prev_ahrm == NULL)
242 				as->a_hrm = hrm->hrm_anext;
243 			else
244 				prev_ahrm->hrm_anext = hrm->hrm_anext;
245 
246 		} else
247 			prev_ahrm = hrm;
248 	}
249 
250 	/*
251 	 * If all statistics blocks are free,
252 	 * return the memory to the system.
253 	 */
254 	if (hrm_blist_num == hrm_blist_total) {
255 		/* zero the block list since we are giving back its memory */
256 		hrm_blist = NULL;
257 		hrm_blist_num = 0;
258 		hrm_blist_total = 0;
259 		hrm_tmplist = hrm_memlist;
260 		hrm_memlist = NULL;
261 	} else {
262 		hrm_tmplist = NULL;
263 	}
264 
265 	mutex_exit(&hat_statlock);
266 
267 	/*
268 	 * If there are any hrmstat structures to be freed, this must only
269 	 * be done after we've released hat_statlock.
270 	 */
271 	while (hrm_tmplist != NULL) {
272 		hrm_next = hrm_tmplist->hrm_hnext;
273 		kmem_free(hrm_tmplist, hrm_tmplist->hrm_base);
274 		hrm_tmplist = hrm_next;
275 	}
276 }
277 
278 /*
279  * Grab memory for statistics gathering of the hat layer.
280  */
281 static void
hrm_getblk(int chunk)282 hrm_getblk(int chunk)
283 {
284 	struct hrmstat *hrm, *l;
285 	int i;
286 	int hrm_incr;
287 
288 	mutex_enter(&hat_statlock);
289 	/*
290 	 * XXX The whole private freelist management here really should be
291 	 * overhauled.
292 	 *
293 	 * The freelist should have some knowledge of how much memory is
294 	 * needed by a process and thus when hat_resvstat get's called, we can
295 	 * increment the freelist needs for that process within this subsystem.
296 	 * Thus there will be reservations for all processes which are being
297 	 * watched which should be accurate, and consume less memory overall.
298 	 *
299 	 * For now, just make sure there's enough entries on the freelist to
300 	 * handle the current chunk.
301 	 */
302 	if ((hrm_blist == NULL) ||
303 	    (hrm_blist_num <= hrm_blist_lowater) ||
304 	    (chunk && (hrm_blist_num < chunk + hrm_blist_incr))) {
305 		mutex_exit(&hat_statlock);
306 
307 		hrm_incr = chunk  + hrm_blist_incr;
308 		hrm = kmem_zalloc(sizeof (struct hrmstat) * hrm_incr, KM_SLEEP);
309 		hrm->hrm_base = sizeof (struct hrmstat) * hrm_incr;
310 
311 		/*
312 		 * thread the allocated blocks onto a freelist
313 		 * using the first block to hold information for
314 		 * freeing them all later
315 		 */
316 		mutex_enter(&hat_statlock);
317 		hrm->hrm_hnext = hrm_memlist;
318 		hrm_memlist = hrm;
319 
320 		hrm_blist_total += (hrm_incr - 1);
321 		for (i = 1; i < hrm_incr; i++) {
322 			l = &hrm[i];
323 			l->hrm_hnext = hrm_blist;
324 			hrm_blist = l;
325 			hrm_blist_num++;
326 		}
327 	}
328 	mutex_exit(&hat_statlock);
329 }
330 
331 static void
hrm_hashin(struct hrmstat * hrm)332 hrm_hashin(struct hrmstat *hrm)
333 {
334 	int 		h;
335 
336 	ASSERT(MUTEX_HELD(&hat_statlock));
337 	h = hrm_hash(hrm->hrm_as, hrm->hrm_base);
338 
339 	hrm->hrm_hnext = hrm_hashtab[h];
340 	hrm_hashtab[h] = hrm;
341 }
342 
343 static void
hrm_hashout(struct hrmstat * hrm)344 hrm_hashout(struct hrmstat *hrm)
345 {
346 	struct hrmstat	*list, **prev_hrm;
347 	int		h;
348 
349 	ASSERT(MUTEX_HELD(&hat_statlock));
350 	h = hrm_hash(hrm->hrm_as, hrm->hrm_base);
351 	list = hrm_hashtab[h];
352 	prev_hrm = &hrm_hashtab[h];
353 
354 	while (list) {
355 		if (list == hrm) {
356 			*prev_hrm = list->hrm_hnext;
357 			return;
358 		}
359 		prev_hrm = &list->hrm_hnext;
360 		list = list->hrm_hnext;
361 	}
362 }
363 
364 
365 /*
366  * Link a statistic block into an address space and also put it
367  * on the hash list for future references.
368  */
369 static void
hrm_link(struct hrmstat * hrm)370 hrm_link(struct hrmstat *hrm)
371 {
372 	struct as *as = hrm->hrm_as;
373 
374 	ASSERT(MUTEX_HELD(&hat_statlock));
375 	hrm->hrm_anext = as->a_hrm;
376 	as->a_hrm = hrm;
377 	hrm_hashin(hrm);
378 }
379 
380 /*
381  * Allocate a block for statistics keeping.
382  * Returns NULL if blocks are unavailable.
383  */
384 static struct hrmstat *
hrm_balloc(void)385 hrm_balloc(void)
386 {
387 	struct hrmstat *hrm;
388 
389 	ASSERT(MUTEX_HELD(&hat_statlock));
390 
391 	hrm = hrm_blist;
392 	if (hrm != NULL) {
393 		hrm_blist = hrm->hrm_hnext;
394 		hrm_blist_num--;
395 		hrm->hrm_hnext = NULL;
396 	}
397 	return (hrm);
398 }
399 
400 /*
401  * Set the ref and mod bits for addr within statistics block hrm.
402  */
403 static void
hrm_setbits(struct hrmstat * hrm,caddr_t addr,uint_t bits)404 hrm_setbits(struct hrmstat *hrm, caddr_t addr, uint_t bits)
405 {
406 	uint_t po, bo, spb;
407 	uint_t nbits;
408 
409 	po = ((uintptr_t)addr & HRM_BASEOFFSET) >> MMU_PAGESHIFT; /* pg off */
410 	bo = po / (NBBY / 2);			/* which byte in bit array */
411 	spb = (3 - (po & 3)) * 2;		/* shift position within byte */
412 	nbits = bits << spb;			/* bit mask */
413 	hrm->hrm_bits[bo] |= nbits;
414 }
415 
416 /*
417  * Return collected statistics about an address space.
418  * If clearflag is set, atomically read and zero the bits.
419  *
420  * Fill in the data array supplied with the referenced and
421  * modified bits collected for address range [addr ... addr + len]
422  * in address space, as, uniquely identified by id.
423  * The destination is a byte array.  We fill in three bits per byte:
424  * referenced, modified, and hwmapped bits.
425  * Kernel only interface, can't fault on destination data array.
426  *
427  */
428 void
hat_getstat(struct as * as,caddr_t addr,size_t len,uint_t id,caddr_t datap,int clearflag)429 hat_getstat(struct as *as, caddr_t addr, size_t len, uint_t id,
430     caddr_t datap, int clearflag)
431 {
432 	size_t	np;		/* number of pages */
433 	caddr_t	a;
434 	char 	*dp;
435 
436 	np = btop(len);
437 	bzero(datap, np);
438 
439 	/* allocate enough statistics blocks to cover the len passed in */
440 	hrm_getblk(np / HRM_PAGES);
441 
442 	hat_sync(as->a_hat, addr, len, clearflag);
443 
444 	/* allocate more statistics blocks if needed */
445 	hrm_getblk(0);
446 
447 	mutex_enter(&hat_statlock);
448 	if (hrm_hashtab == NULL) {
449 		/* can happen when victim process exits */
450 		mutex_exit(&hat_statlock);
451 		return;
452 	}
453 	dp = datap;
454 	a = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
455 	while (a < addr + len) {
456 		struct hrmstat	*hrm;
457 		size_t	n;		/* number of pages, temp */
458 		int	h;		/* hash index */
459 		uint_t	po;
460 
461 		h = hrm_hash(as, a);
462 		n = (HRM_PAGES -
463 		    (((uintptr_t)a & HRM_PAGEMASK) >> MMU_PAGESHIFT));
464 		if (n > np)
465 			n = np;
466 		po = ((uintptr_t)a & HRM_BASEOFFSET) >> MMU_PAGESHIFT;
467 
468 		for (hrm = hrm_hashtab[h]; hrm; hrm = hrm->hrm_hnext) {
469 			if (hrm->hrm_as == as &&
470 			    hrm->hrm_base == ((uintptr_t)a & HRM_BASEMASK) &&
471 			    id == hrm->hrm_id) {
472 				int i, nr;
473 				uint_t bo, spb;
474 
475 				/*
476 				 * Extract leading unaligned bits.
477 				 */
478 				i = 0;
479 				while (i < n && (po & 3)) {
480 					bo = po / (NBBY / 2);
481 					spb = (3 - (po & 3)) * 2;
482 					*dp++ |= (hrm->hrm_bits[bo] >> spb) & 3;
483 					if (clearflag)
484 						hrm->hrm_bits[bo] &= ~(3<<spb);
485 					po++;
486 					i++;
487 				}
488 				/*
489 				 * Extract aligned bits.
490 				 */
491 				nr = n/4*4;
492 				bo = po / (NBBY / 2);
493 				while (i < nr) {
494 					int bits = hrm->hrm_bits[bo];
495 					*dp++ |= (bits >> 6) & 3;
496 					*dp++ |= (bits >> 4) & 3;
497 					*dp++ |= (bits >> 2) & 3;
498 					*dp++ |= (bits >> 0) & 3;
499 					if (clearflag)
500 						hrm->hrm_bits[bo] = 0;
501 					bo++;
502 					po += 4;
503 					i += 4;
504 				}
505 				/*
506 				 * Extract trailing unaligned bits.
507 				 */
508 				while (i < n) {
509 					bo = po / (NBBY / 2);
510 					spb = (3 - (po & 3)) * 2;
511 					*dp++ |= (hrm->hrm_bits[bo] >> spb) & 3;
512 					if (clearflag)
513 						hrm->hrm_bits[bo] &= ~(3<<spb);
514 					po++;
515 					i++;
516 				}
517 
518 				break;
519 			}
520 		}
521 		if (hrm == NULL)
522 			dp += n;
523 		np -= n;
524 		a += n * MMU_PAGESIZE;
525 	}
526 	mutex_exit(&hat_statlock);
527 }
528