xref: /titanic_52/usr/src/uts/common/vm/hat_refmod.c (revision df8bdeb362277e8d95a74d6c097341fe97409948)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * The following routines implement the hat layer's
30  * recording of the referenced and modified bits.
31  */
32 
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/debug.h>
37 #include <sys/kmem.h>
38 
39 /*
40  * Note, usage of cmn_err requires you not hold any hat layer locks.
41  */
42 #include <sys/cmn_err.h>
43 
44 #include <vm/as.h>
45 #include <vm/hat.h>
46 
47 kmutex_t hat_statlock;		/* protects all hat statistics data */
48 struct hrmstat *hrm_memlist;	/* tracks memory alloced for hrm_blist blocks */
49 struct hrmstat **hrm_hashtab;	/* hash table for finding blocks quickly */
50 struct hrmstat *hrm_blist;
51 int hrm_blist_incr = HRM_BLIST_INCR;
52 int hrm_blist_lowater = HRM_BLIST_INCR/2;
53 int hrm_blist_num = 0;
54 int hrm_blist_total = 0;
55 int hrm_mlockinited = 0;
56 int hrm_allocfailmsg = 0;	/* print a message when allocations fail */
57 int hrm_allocfail = 0;
58 
59 static struct hrmstat	*hrm_balloc(void);
60 static void	hrm_link(struct hrmstat *);
61 static void	hrm_setbits(struct hrmstat *, caddr_t, uint_t);
62 static void	hrm_hashout(struct hrmstat *);
63 static void	hrm_getblk(int);
64 
65 #define	hrm_hash(as, addr) \
66 	(HRM_HASHMASK & \
67 	(((uintptr_t)(addr) >> HRM_BASESHIFT) ^ ((uintptr_t)(as) >> 2)))
68 
69 #define	hrm_match(hrm, as, addr) \
70 	(((hrm)->hrm_as == (as) && \
71 	((hrm)->hrm_base == ((uintptr_t)(addr) & HRM_BASEMASK))) ? 1 : 0)
72 
73 /*
74  * reserve enough statistic blocks for
75  * chunk of bytes (pages) in a given as.
76  */
77 /* ARGSUSED */
78 void
79 hat_resvstat(size_t chunk, struct as *as, caddr_t addr)
80 {
81 	int nhrm = btop(chunk)/HRM_PAGES;
82 
83 	if (nhrm < HRM_BLIST_INCR)
84 		nhrm = 0;	/* preallocate at least HRM_BLIST_INCR */
85 	hrm_getblk(nhrm);
86 }
87 
88 /*
89  * Start the statistics gathering for an address space.
90  * Return -1 if we can't do it, otherwise return an opaque
91  * identifier to be used when querying for the gathered statistics.
92  * The identifier is an unused bit in a_vbits.
93  * Bit 0 is reserved for swsmon.
94  */
95 int
96 hat_startstat(struct as *as)
97 {
98 	uint_t nbits;		/* number of bits */
99 	uint_t bn;		/* bit number */
100 	uint_t id;		/* new vbit, identifier */
101 	uint_t vbits;		/* used vbits of address space */
102 	size_t chunk;		/* mapped size for stats */
103 
104 	/*
105 	 * If the refmod saving memory allocator runs out, print
106 	 * a warning message about how to fix it, see comment at
107 	 * the beginning of hat_setstat.
108 	 */
109 	if (hrm_allocfailmsg) {
110 		cmn_err(CE_WARN,
111 		    "hrm_balloc failures occured, increase hrm_blist_incr");
112 		hrm_allocfailmsg = 0;
113 	}
114 
115 	/*
116 	 * Verify that a buffer of statistics blocks exists
117 	 * and allocate more, if needed.
118 	 */
119 
120 	chunk = hat_get_mapped_size(as->a_hat);
121 	chunk = (btop(chunk)/HRM_PAGES);
122 	if (chunk < HRM_BLIST_INCR)
123 		chunk = 0;
124 
125 	hrm_getblk((int)chunk);
126 
127 	/*
128 	 * Find a unused id in the given address space.
129 	 */
130 	hat_enter(as->a_hat);
131 	vbits = as->a_vbits;
132 	nbits = sizeof (as->a_vbits) * NBBY;
133 	for (bn = 1, id = 2; bn < (nbits - 1); bn++, id <<= 1)
134 		if ((id & vbits) == 0)
135 			break;
136 	if (bn >= (nbits - 1)) {
137 		hat_exit(as->a_hat);
138 		return (-1);
139 	}
140 	as->a_vbits |= id;
141 	hat_exit(as->a_hat);
142 	(void) hat_stats_enable(as->a_hat);
143 	return (id);
144 }
145 
146 /*
147  * Record referenced and modified information for an address space.
148  * Rmbits is a word containing the referenced bit in bit position 1
149  * and the modified bit in bit position 0.
150  *
151  * For current informational uses, one can rerun any program using
152  * this facility after modifying the hrm_blist_incr to be a larger
153  * amount so that a larger buffer of blocks will be maintained.
154  */
155 void
156 hat_setstat(struct as *as, caddr_t addr, size_t len, uint_t rmbits)
157 {
158 	struct hrmstat	*hrm;
159 	uint_t		vbits, newbits, nb;
160 	int		h;
161 
162 	ASSERT(len == PAGESIZE);
163 	ASSERT((rmbits & ~(P_MOD|P_REF)) == 0);
164 
165 	if (rmbits == 0)
166 		return;
167 
168 	mutex_enter(&hat_statlock);
169 
170 	/*
171 	 * Search the hash list for the as and addr we are looking for
172 	 * and set the ref and mod bits in every block that matches.
173 	 */
174 	vbits = 0;
175 	h = hrm_hash(as, addr);
176 	for (hrm = hrm_hashtab[h]; hrm; hrm = hrm->hrm_hnext) {
177 		if (hrm_match(hrm, as, addr)) {
178 			hrm_setbits(hrm, addr, rmbits);
179 			vbits |= hrm->hrm_id;
180 		}
181 	}
182 
183 	/*
184 	 * If we didn't find a block for all of the enabled
185 	 * vpages bits, then allocate and initialize a block
186 	 * for each bit that was not found.
187 	 */
188 	if (vbits != as->a_vbits) {
189 		newbits = (vbits ^ as->a_vbits) & as->a_vbits;
190 		while (newbits) {
191 			if (ffs(newbits))
192 				nb = 1 << (ffs(newbits)-1);
193 			hrm = (struct hrmstat *)hrm_balloc();
194 			if (hrm == NULL) {
195 				hrm_allocfailmsg = 1;
196 				hrm_allocfail++;
197 				mutex_exit(&hat_statlock);
198 				return;
199 			}
200 			hrm->hrm_as = as;
201 			hrm->hrm_base = (uintptr_t)addr & HRM_BASEMASK;
202 			hrm->hrm_id = nb;
203 			hrm_link(hrm);
204 			hrm_setbits(hrm, addr, rmbits);
205 			newbits &= ~nb;
206 		}
207 	}
208 	mutex_exit(&hat_statlock);
209 }
210 
211 /*
212  * Free the resources used to maintain the referenced and modified
213  * statistics for the virtual page view of an address space
214  * identified by id.
215  */
216 void
217 hat_freestat(struct as *as, int id)
218 {
219 	struct hrmstat *hrm;
220 	struct hrmstat *prev_ahrm;
221 	struct hrmstat *hrm_tmplist;
222 	struct hrmstat *hrm_next;
223 
224 	hat_stats_disable(as->a_hat);	/* tell the hat layer to stop */
225 	hat_enter(as->a_hat);
226 	if (id == 0)
227 		as->a_vbits = 0;
228 	else
229 		as->a_vbits &= ~id;
230 
231 	if ((hrm = as->a_hrm) == NULL) {
232 		hat_exit(as->a_hat);
233 		return;
234 	}
235 	hat_exit(as->a_hat);
236 
237 	mutex_enter(&hat_statlock);
238 
239 	for (prev_ahrm = NULL; hrm; hrm = hrm->hrm_anext) {
240 		if ((id == hrm->hrm_id) || (id == NULL)) {
241 
242 			hrm_hashout(hrm);
243 			hrm->hrm_hnext = hrm_blist;
244 			hrm_blist = hrm;
245 			hrm_blist_num++;
246 
247 			if (prev_ahrm == NULL)
248 				as->a_hrm = hrm->hrm_anext;
249 			else
250 				prev_ahrm->hrm_anext = hrm->hrm_anext;
251 
252 		} else
253 			prev_ahrm = hrm;
254 	}
255 
256 	/*
257 	 * If all statistics blocks are free,
258 	 * return the memory to the system.
259 	 */
260 	if (hrm_blist_num == hrm_blist_total) {
261 		/* zero the block list since we are giving back its memory */
262 		hrm_blist = NULL;
263 		hrm_blist_num = 0;
264 		hrm_blist_total = 0;
265 		hrm_tmplist = hrm_memlist;
266 		hrm_memlist = NULL;
267 	} else {
268 		hrm_tmplist = NULL;
269 	}
270 
271 	mutex_exit(&hat_statlock);
272 
273 	/*
274 	 * If there are any hrmstat structures to be freed, this must only
275 	 * be done after we've released hat_statlock.
276 	 */
277 	while (hrm_tmplist != NULL) {
278 		hrm_next = hrm_tmplist->hrm_hnext;
279 		kmem_free(hrm_tmplist, hrm_tmplist->hrm_base);
280 		hrm_tmplist = hrm_next;
281 	}
282 }
283 
284 /*
285  * Grab memory for statistics gathering of the hat layer.
286  */
287 static void
288 hrm_getblk(int chunk)
289 {
290 	struct hrmstat *hrm, *l;
291 	int i;
292 	int hrm_incr;
293 
294 	mutex_enter(&hat_statlock);
295 	if ((hrm_blist == NULL) ||
296 	    (hrm_blist_num <= hrm_blist_lowater) ||
297 	    (chunk && (hrm_blist_num < chunk))) {
298 
299 		mutex_exit(&hat_statlock);
300 
301 		hrm_incr = chunk? chunk : hrm_blist_incr;
302 		hrm = kmem_zalloc(sizeof (struct hrmstat) * hrm_incr, KM_SLEEP);
303 		hrm->hrm_base = sizeof (struct hrmstat) * hrm_incr;
304 
305 		/*
306 		 * thread the allocated blocks onto a freelist
307 		 * using the first block to hold information for
308 		 * freeing them all later
309 		 */
310 		mutex_enter(&hat_statlock);
311 		hrm->hrm_hnext = hrm_memlist;
312 		hrm_memlist = hrm;
313 
314 		hrm_blist_total += (hrm_incr - 1);
315 		for (i = 1; i < hrm_incr; i++) {
316 			l = &hrm[i];
317 			l->hrm_hnext = hrm_blist;
318 			hrm_blist = l;
319 			hrm_blist_num++;
320 		}
321 	}
322 	mutex_exit(&hat_statlock);
323 }
324 
325 static void
326 hrm_hashin(struct hrmstat *hrm)
327 {
328 	int 		h;
329 
330 	ASSERT(MUTEX_HELD(&hat_statlock));
331 	h = hrm_hash(hrm->hrm_as, hrm->hrm_base);
332 
333 	hrm->hrm_hnext = hrm_hashtab[h];
334 	hrm_hashtab[h] = hrm;
335 }
336 
337 static void
338 hrm_hashout(struct hrmstat *hrm)
339 {
340 	struct hrmstat	*list, **prev_hrm;
341 	int		h;
342 
343 	ASSERT(MUTEX_HELD(&hat_statlock));
344 	h = hrm_hash(hrm->hrm_as, hrm->hrm_base);
345 	list = hrm_hashtab[h];
346 	prev_hrm = &hrm_hashtab[h];
347 
348 	while (list) {
349 		if (list == hrm) {
350 			*prev_hrm = list->hrm_hnext;
351 			return;
352 		}
353 		prev_hrm = &list->hrm_hnext;
354 		list = list->hrm_hnext;
355 	}
356 }
357 
358 
359 /*
360  * Link a statistic block into an address space and also put it
361  * on the hash list for future references.
362  */
363 static void
364 hrm_link(struct hrmstat *hrm)
365 {
366 	struct as *as = hrm->hrm_as;
367 
368 	ASSERT(MUTEX_HELD(&hat_statlock));
369 	hrm->hrm_anext = as->a_hrm;
370 	as->a_hrm = hrm;
371 	hrm_hashin(hrm);
372 }
373 
374 /*
375  * Allocate a block for statistics keeping.
376  * Returns NULL if blocks are unavailable.
377  */
378 static struct hrmstat *
379 hrm_balloc(void)
380 {
381 	struct hrmstat *hrm;
382 
383 	ASSERT(MUTEX_HELD(&hat_statlock));
384 
385 	hrm = hrm_blist;
386 	if (hrm != NULL) {
387 		hrm_blist = hrm->hrm_hnext;
388 		hrm_blist_num--;
389 		hrm->hrm_hnext = NULL;
390 	}
391 	return (hrm);
392 }
393 
394 /*
395  * Set the ref and mod bits for addr within statistics block hrm.
396  */
397 static void
398 hrm_setbits(struct hrmstat *hrm, caddr_t addr, uint_t bits)
399 {
400 	uint_t po, bo, spb;
401 	uint_t nbits;
402 
403 	po = ((uintptr_t)addr & HRM_BASEOFFSET) >> MMU_PAGESHIFT; /* pg off */
404 	bo = po / (NBBY / 2);			/* which byte in bit array */
405 	spb = (3 - (po & 3)) * 2;		/* shift position within byte */
406 	nbits = bits << spb;			/* bit mask */
407 	hrm->hrm_bits[bo] |= nbits;
408 }
409 
410 /*
411  * Return collected statistics about an address space.
412  * If clearflag is set, atomically read and zero the bits.
413  *
414  * Fill in the data array supplied with the referenced and
415  * modified bits collected for address range [addr ... addr + len]
416  * in address space, as, uniquely identified by id.
417  * The destination is a byte array.  We fill in three bits per byte:
418  * referenced, modified, and hwmapped bits.
419  * Kernel only interface, can't fault on destination data array.
420  *
421  */
422 void
423 hat_getstat(struct as *as, caddr_t addr, size_t len, uint_t id,
424     caddr_t datap, int clearflag)
425 {
426 	size_t	np;		/* number of pages */
427 	caddr_t	a;
428 	char 	*dp;
429 
430 	np = btop(len);
431 	bzero(datap, np);
432 
433 	hat_sync(as->a_hat, addr, len, clearflag);
434 
435 	/* allocate more statistics blocks if needed */
436 	hrm_getblk(0);
437 
438 	mutex_enter(&hat_statlock);
439 	if (hrm_hashtab == NULL) {
440 		/* can happen when victim process exits */
441 		mutex_exit(&hat_statlock);
442 		return;
443 	}
444 	dp = datap;
445 	a = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
446 	while (a < addr + len) {
447 		struct hrmstat	*hrm;
448 		size_t	n;		/* number of pages, temp */
449 		int	h;		/* hash index */
450 		uint_t	po;
451 
452 		h = hrm_hash(as, a);
453 		n = (HRM_PAGES -
454 			(((uintptr_t)a & HRM_PAGEMASK) >> MMU_PAGESHIFT));
455 		if (n > np)
456 			n = np;
457 		po = ((uintptr_t)a & HRM_BASEOFFSET) >> MMU_PAGESHIFT;
458 
459 		for (hrm = hrm_hashtab[h]; hrm; hrm = hrm->hrm_hnext) {
460 			if (hrm->hrm_as == as &&
461 			    hrm->hrm_base == ((uintptr_t)a & HRM_BASEMASK) &&
462 			    id == hrm->hrm_id) {
463 				int i, nr;
464 				uint_t bo, spb;
465 
466 				/*
467 				 * Extract leading unaligned bits.
468 				 */
469 				i = 0;
470 				while (i < n && (po & 3)) {
471 					bo = po / (NBBY / 2);
472 					spb = (3 - (po & 3)) * 2;
473 					*dp++ |= (hrm->hrm_bits[bo] >> spb) & 3;
474 					if (clearflag)
475 						hrm->hrm_bits[bo] &= ~(3<<spb);
476 					po++;
477 					i++;
478 				}
479 				/*
480 				 * Extract aligned bits.
481 				 */
482 				nr = n/4*4;
483 				bo = po / (NBBY / 2);
484 				while (i < nr) {
485 					int bits = hrm->hrm_bits[bo];
486 					*dp++ |= (bits >> 6) & 3;
487 					*dp++ |= (bits >> 4) & 3;
488 					*dp++ |= (bits >> 2) & 3;
489 					*dp++ |= (bits >> 0) & 3;
490 					if (clearflag)
491 						hrm->hrm_bits[bo] = 0;
492 					bo++;
493 					po += 4;
494 					i += 4;
495 				}
496 				/*
497 				 * Extract trailing unaligned bits.
498 				 */
499 				while (i < n) {
500 					bo = po / (NBBY / 2);
501 					spb = (3 - (po & 3)) * 2;
502 					*dp++ |= (hrm->hrm_bits[bo] >> spb) & 3;
503 					if (clearflag)
504 						hrm->hrm_bits[bo] &= ~(3<<spb);
505 					po++;
506 					i++;
507 				}
508 
509 				break;
510 			}
511 		}
512 		if (hrm == NULL)
513 			dp += n;
514 		np -= n;
515 		a += n * MMU_PAGESIZE;
516 	}
517 	mutex_exit(&hat_statlock);
518 }
519