xref: /illumos-gate/usr/src/uts/common/syscall/lgrpsys.c (revision 753d2d2e8e7fd0c9bcf736d9bf2f2faf4d6234cc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * lgroup system calls
31  */
32 
33 #include <sys/types.h>
34 #include <sys/errno.h>
35 #include <sys/sunddi.h>
36 #include <sys/systm.h>
37 #include <sys/mman.h>
38 #include <sys/cpupart.h>
39 #include <sys/lgrp.h>
40 #include <sys/lgrp_user.h>
41 #include <sys/promif.h>		/* for prom_printf() */
42 #include <sys/sysmacros.h>
43 
44 #include <vm/as.h>
45 
46 
47 /* definitions for mi_validity */
48 #define	VALID_ADDR	1
49 #define	VALID_REQ	2
50 
51 /*
52  * run through the given number of addresses and requests and return the
53  * corresponding memory information for each address
54  */
55 static int
56 meminfo(int addr_count, struct meminfo *mip)
57 {
58 	size_t		in_size, out_size, req_size, val_size;
59 	struct as	*as;
60 	struct hat	*hat;
61 	int		i, j, out_idx, info_count;
62 	lgrp_t		*lgrp;
63 	pfn_t		pfn;
64 	ssize_t		pgsz;
65 	int		*req_array, *val_array;
66 	uint64_t	*in_array, *out_array;
67 	uint64_t	addr, paddr;
68 	uintptr_t	vaddr;
69 	int		ret = 0;
70 	struct meminfo minfo;
71 #if defined(_SYSCALL32_IMPL)
72 	struct meminfo32 minfo32;
73 #endif
74 
75 	/*
76 	 * Make sure that there is at least one address to translate and
77 	 * limit how many virtual addresses the kernel can do per call
78 	 */
79 	if (addr_count < 1)
80 		return (set_errno(EINVAL));
81 	else if (addr_count > MAX_MEMINFO_CNT)
82 		addr_count = MAX_MEMINFO_CNT;
83 
84 	if (get_udatamodel() == DATAMODEL_NATIVE) {
85 		if (copyin(mip, &minfo, sizeof (struct meminfo)))
86 			return (set_errno(EFAULT));
87 	}
88 #if defined(_SYSCALL32_IMPL)
89 	else {
90 		bzero(&minfo, sizeof (minfo));
91 		if (copyin(mip, &minfo32, sizeof (struct meminfo32)))
92 			return (set_errno(EFAULT));
93 		minfo.mi_inaddr = (const uint64_t *)(uintptr_t)
94 		    minfo32.mi_inaddr;
95 		minfo.mi_info_req = (const uint_t *)(uintptr_t)
96 		    minfo32.mi_info_req;
97 		minfo.mi_info_count = minfo32.mi_info_count;
98 		minfo.mi_outdata = (uint64_t *)(uintptr_t)
99 		    minfo32.mi_outdata;
100 		minfo.mi_validity = (uint_t *)(uintptr_t)
101 		    minfo32.mi_validity;
102 	}
103 #endif
104 	/*
105 	 * all the input parameters have been copied in:-
106 	 * addr_count - number of input addresses
107 	 * minfo.mi_inaddr - array of input addresses
108 	 * minfo.mi_info_req - array of types of information requested
109 	 * minfo.mi_info_count - no. of pieces of info requested for each addr
110 	 * minfo.mi_outdata - array into which the results are placed
111 	 * minfo.mi_validity -  array containing bitwise result codes; 0th bit
112 	 *			evaluates validity of corresponding input
113 	 *			address, 1st bit validity of response to first
114 	 *			member of info_req, etc.
115 	 */
116 
117 	/* make sure mi_info_count is within limit */
118 	info_count = minfo.mi_info_count;
119 	if (info_count < 1 || info_count > MAX_MEMINFO_REQ)
120 		return (set_errno(EINVAL));
121 
122 	/*
123 	 * allocate buffer in_array for the input addresses and copy them in
124 	 */
125 	in_size = sizeof (uint64_t) * addr_count;
126 	in_array = kmem_alloc(in_size, KM_SLEEP);
127 	if (copyin(minfo.mi_inaddr, in_array, in_size)) {
128 		kmem_free(in_array, in_size);
129 		return (set_errno(EFAULT));
130 	}
131 
132 	/*
133 	 * allocate buffer req_array for the input info_reqs and copy them in
134 	 */
135 	req_size = sizeof (uint_t) * info_count;
136 	req_array = kmem_alloc(req_size, KM_SLEEP);
137 	if (copyin(minfo.mi_info_req, req_array, req_size)) {
138 		kmem_free(req_array, req_size);
139 		kmem_free(in_array, in_size);
140 		return (set_errno(EFAULT));
141 	}
142 
143 	/*
144 	 * allocate buffer out_array which holds the results and will have
145 	 * to be copied out later
146 	 */
147 	out_size = sizeof (uint64_t) * addr_count * info_count;
148 	out_array = kmem_alloc(out_size, KM_SLEEP);
149 
150 	/*
151 	 * allocate buffer val_array which holds the validity bits and will
152 	 * have to be copied out later
153 	 */
154 	val_size = sizeof (uint_t) * addr_count;
155 	val_array = kmem_alloc(val_size, KM_SLEEP);
156 
157 	if ((req_array[0] & MEMINFO_MASK) == MEMINFO_PLGRP) {
158 		/* find the corresponding lgroup for each physical address */
159 		for (i = 0; i < addr_count; i++) {
160 			paddr = in_array[i];
161 			pfn = btop(paddr);
162 			lgrp = lgrp_pfn_to_lgrp(pfn);
163 			if (lgrp) {
164 				out_array[i] = lgrp->lgrp_id;
165 				val_array[i] = VALID_ADDR | VALID_REQ;
166 			} else {
167 				out_array[i] = NULL;
168 				val_array[i] = 0;
169 			}
170 		}
171 	} else {
172 		/* get the corresponding memory info for each virtual address */
173 		as = curproc->p_as;
174 
175 		AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
176 		hat = as->a_hat;
177 		for (i = out_idx = 0; i < addr_count; i++, out_idx +=
178 		    info_count) {
179 			addr = in_array[i];
180 			vaddr = (uintptr_t)(addr & ~PAGEOFFSET);
181 			if (!as_segat(as, (caddr_t)vaddr)) {
182 				val_array[i] = 0;
183 				continue;
184 			}
185 			val_array[i] = VALID_ADDR;
186 			pfn = hat_getpfnum(hat, (caddr_t)vaddr);
187 			if (pfn != PFN_INVALID) {
188 				paddr = (uint64_t)((pfn << PAGESHIFT) |
189 					(addr & PAGEOFFSET));
190 				for (j = 0; j < info_count; j++) {
191 					switch (req_array[j] & MEMINFO_MASK) {
192 					case MEMINFO_VPHYSICAL:
193 						/*
194 						 * return the physical address
195 						 * corresponding to the input
196 						 * virtual address
197 						 */
198 						out_array[out_idx + j] = paddr;
199 						val_array[i] |= VALID_REQ << j;
200 						break;
201 					case MEMINFO_VLGRP:
202 						/*
203 						 * return the lgroup of physical
204 						 * page corresponding to the
205 						 * input virtual address
206 						 */
207 						lgrp = lgrp_pfn_to_lgrp(pfn);
208 						if (lgrp) {
209 							out_array[out_idx + j] =
210 								lgrp->lgrp_id;
211 							val_array[i] |=
212 								VALID_REQ << j;
213 						}
214 						break;
215 					case MEMINFO_VPAGESIZE:
216 						/*
217 						 * return the size of physical
218 						 * page corresponding to the
219 						 * input virtual address
220 						 */
221 						pgsz = hat_getpagesize(hat,
222 							(caddr_t)vaddr);
223 						if (pgsz != -1) {
224 							out_array[out_idx + j] =
225 									pgsz;
226 							val_array[i] |=
227 								VALID_REQ << j;
228 						}
229 						break;
230 					case MEMINFO_VREPLCNT:
231 						/*
232 						 * for future use:-
233 						 * return the no. replicated
234 						 * physical pages corresponding
235 						 * to the input virtual address,
236 						 * so it is always 0 at the
237 						 * moment
238 						 */
239 						out_array[out_idx + j] = 0;
240 						val_array[i] |= VALID_REQ << j;
241 						break;
242 					case MEMINFO_VREPL:
243 						/*
244 						 * for future use:-
245 						 * return the nth physical
246 						 * replica of the specified
247 						 * virtual address
248 						 */
249 						break;
250 					case MEMINFO_VREPL_LGRP:
251 						/*
252 						 * for future use:-
253 						 * return the lgroup of nth
254 						 * physical replica of the
255 						 * specified virtual address
256 						 */
257 						break;
258 					case MEMINFO_PLGRP:
259 						/*
260 						 * this is for physical address
261 						 * only, shouldn't mix with
262 						 * virtual address
263 						 */
264 						break;
265 					default:
266 						break;
267 					}
268 				}
269 			}
270 		}
271 		AS_LOCK_EXIT(as, &as->a_lock);
272 	}
273 
274 	/* copy out the results and validity bits and free the buffers */
275 	if ((copyout(out_array, minfo.mi_outdata, out_size) != 0) ||
276 		(copyout(val_array, minfo.mi_validity, val_size) != 0))
277 		ret = set_errno(EFAULT);
278 
279 	kmem_free(in_array, in_size);
280 	kmem_free(out_array, out_size);
281 	kmem_free(req_array, req_size);
282 	kmem_free(val_array, val_size);
283 
284 	return (ret);
285 }
286 
287 
288 /*
289  * Initialize lgroup affinities for thread
290  */
291 void
292 lgrp_affinity_init(lgrp_affinity_t **bufaddr)
293 {
294 	if (bufaddr)
295 		*bufaddr = NULL;
296 }
297 
298 
299 /*
300  * Free lgroup affinities for thread and set to NULL
301  * just in case thread gets recycled
302  */
303 void
304 lgrp_affinity_free(lgrp_affinity_t **bufaddr)
305 {
306 	if (bufaddr && *bufaddr) {
307 		kmem_free(*bufaddr, nlgrpsmax * sizeof (lgrp_affinity_t));
308 		*bufaddr = NULL;
309 	}
310 }
311 
312 
313 #define	P_ANY	-2	/* cookie specifying any ID */
314 
315 
316 /*
317  * Find LWP with given ID in specified process and get its affinity for
318  * specified lgroup
319  */
320 lgrp_affinity_t
321 lgrp_affinity_get_thread(proc_t *p, id_t lwpid, lgrp_id_t lgrp)
322 {
323 	lgrp_affinity_t aff;
324 	int		found;
325 	kthread_t	*t;
326 
327 	ASSERT(MUTEX_HELD(&p->p_lock));
328 
329 	aff = LGRP_AFF_NONE;
330 	found = 0;
331 	t = p->p_tlist;
332 	/*
333 	 * The process may be executing in proc_exit() and its p->p_list may be
334 	 * already NULL.
335 	 */
336 	if (t == NULL)
337 		return (set_errno(ESRCH));
338 
339 	do {
340 		if (t->t_tid == lwpid || lwpid == P_ANY) {
341 			thread_lock(t);
342 			/*
343 			 * Check to see whether caller has permission to set
344 			 * affinity for LWP
345 			 */
346 			if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
347 				thread_unlock(t);
348 				return (set_errno(EPERM));
349 			}
350 
351 			if (t->t_lgrp_affinity)
352 				aff = t->t_lgrp_affinity[lgrp];
353 			thread_unlock(t);
354 			found = 1;
355 			break;
356 		}
357 	} while ((t = t->t_forw) != p->p_tlist);
358 	if (!found)
359 		aff = set_errno(ESRCH);
360 
361 	return (aff);
362 }
363 
364 
365 /*
366  * Get lgroup affinity for given LWP
367  */
368 lgrp_affinity_t
369 lgrp_affinity_get(lgrp_affinity_args_t *ap)
370 {
371 	lgrp_affinity_t		aff;
372 	lgrp_affinity_args_t	args;
373 	id_t			id;
374 	idtype_t		idtype;
375 	lgrp_id_t		lgrp;
376 	proc_t			*p;
377 	kthread_t		*t;
378 
379 	/*
380 	 * Copyin arguments
381 	 */
382 	if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
383 		return (set_errno(EFAULT));
384 
385 	id = args.id;
386 	idtype = args.idtype;
387 	lgrp = args.lgrp;
388 
389 	/*
390 	 * Check for invalid lgroup
391 	 */
392 	if (lgrp < 0 || lgrp == LGRP_NONE)
393 		return (set_errno(EINVAL));
394 
395 	/*
396 	 * Check for existing lgroup
397 	 */
398 	if (lgrp > lgrp_alloc_max)
399 		return (set_errno(ESRCH));
400 
401 	/*
402 	 * Get lgroup affinity for given LWP or process
403 	 */
404 	switch (idtype) {
405 
406 	case P_LWPID:
407 		/*
408 		 * LWP in current process
409 		 */
410 		p = curproc;
411 		mutex_enter(&p->p_lock);
412 		if (id != P_MYID)	/* different thread */
413 			aff = lgrp_affinity_get_thread(p, id, lgrp);
414 		else {			/* current thread */
415 			aff = LGRP_AFF_NONE;
416 			t = curthread;
417 			thread_lock(t);
418 			if (t->t_lgrp_affinity)
419 				aff = t->t_lgrp_affinity[lgrp];
420 			thread_unlock(t);
421 		}
422 		mutex_exit(&p->p_lock);
423 		break;
424 
425 	case P_PID:
426 		/*
427 		 * Process
428 		 */
429 		mutex_enter(&pidlock);
430 
431 		if (id == P_MYID)
432 			p = curproc;
433 		else {
434 			p = prfind(id);
435 			if (p == NULL) {
436 				mutex_exit(&pidlock);
437 				return (set_errno(ESRCH));
438 			}
439 		}
440 
441 		mutex_enter(&p->p_lock);
442 		aff = lgrp_affinity_get_thread(p, P_ANY, lgrp);
443 		mutex_exit(&p->p_lock);
444 
445 		mutex_exit(&pidlock);
446 		break;
447 
448 	default:
449 		aff = set_errno(EINVAL);
450 		break;
451 	}
452 
453 	return (aff);
454 }
455 
456 
457 /*
458  * Find lgroup for which this thread has most affinity in specified partition
459  */
460 lpl_t *
461 lgrp_affinity_best(kthread_t *t, struct cpupart *cpupart, lgrp_id_t start)
462 {
463 	lgrp_affinity_t	*affs;
464 	lgrp_affinity_t	best_aff;
465 	lpl_t		*best_lpl;
466 	lgrp_id_t	home;
467 	lgrp_id_t	lgrpid;
468 	lpl_t		*lpl;
469 
470 	ASSERT(t != NULL);
471 	ASSERT((MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0) ||
472 	    (MUTEX_HELD(&ttoproc(t)->p_lock) && THREAD_LOCK_HELD(t)));
473 	ASSERT(cpupart != NULL);
474 
475 	if (t->t_lgrp_affinity == NULL)
476 		return (NULL);
477 
478 	affs = t->t_lgrp_affinity;
479 
480 	/*
481 	 * Thread bound to CPU
482 	 */
483 	if (t->t_bind_cpu != PBIND_NONE) {
484 		cpu_t	*cp;
485 
486 		/*
487 		 * See whether thread has more affinity for root lgroup
488 		 * than lgroup containing CPU
489 		 */
490 		cp = cpu[t->t_bind_cpu];
491 		lpl = cp->cpu_lpl;
492 		lgrpid = LGRP_ROOTID;
493 		if (affs[lgrpid] > affs[lpl->lpl_lgrpid])
494 			return (&cpupart->cp_lgrploads[lgrpid]);
495 		return (lpl);
496 	}
497 
498 	/*
499 	 * Start searching at given lgroup
500 	 */
501 	ASSERT(start >= 0 && start <= lgrp_alloc_max);
502 	lgrpid = start;
503 
504 	/*
505 	 * Use starting lgroup given above as best first
506 	 */
507 	home = t->t_lpl->lpl_lgrpid;
508 	if (LGRP_CPUS_IN_PART(lgrpid, cpupart))
509 		best_lpl = &cpupart->cp_lgrploads[lgrpid];
510 	else
511 		best_lpl = &cpupart->cp_lgrploads[home];
512 
513 	best_aff = affs[best_lpl->lpl_lgrpid];
514 
515 	do {
516 		/*
517 		 * Skip any lgroups that don't have CPU resources
518 		 * in this processor set.
519 		 */
520 		if (!LGRP_CPUS_IN_PART(lgrpid, cpupart)) {
521 			if (++lgrpid > lgrp_alloc_max)
522 				lgrpid = 0;	/* wrap the search */
523 			continue;
524 		}
525 
526 		/*
527 		 * Find lgroup with most affinity
528 		 */
529 		lpl = &cpupart->cp_lgrploads[lgrpid];
530 		if (affs[lgrpid] > best_aff) {
531 			best_aff = affs[lgrpid];
532 			best_lpl = lpl;
533 		}
534 
535 		if (++lgrpid > lgrp_alloc_max)
536 			lgrpid = 0;	/* wrap the search */
537 
538 	} while (lgrpid != start);
539 
540 	/*
541 	 * No lgroup (in this pset) with any affinity
542 	 */
543 	if (best_aff == LGRP_AFF_NONE)
544 		return (NULL);
545 
546 	lgrpid = best_lpl->lpl_lgrpid;
547 	ASSERT(LGRP_CPUS_IN_PART(lgrpid, cpupart) && best_lpl->lpl_ncpu > 0);
548 
549 	return (best_lpl);
550 }
551 
552 
553 /*
554  * Set thread's affinity for given lgroup
555  */
556 int
557 lgrp_affinity_set_thread(kthread_t *t, lgrp_id_t lgrp, lgrp_affinity_t aff,
558     lgrp_affinity_t **aff_buf)
559 {
560 	lgrp_affinity_t	*affs;
561 	lgrp_id_t	best;
562 	lpl_t		*best_lpl;
563 	lgrp_id_t	home;
564 	int		retval;
565 
566 	ASSERT(t != NULL);
567 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
568 
569 	retval = 0;
570 
571 	thread_lock(t);
572 
573 	/*
574 	 * Check to see whether caller has permission to set affinity for
575 	 * thread
576 	 */
577 	if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
578 		thread_unlock(t);
579 		return (set_errno(EPERM));
580 	}
581 
582 	if (t->t_lgrp_affinity == NULL) {
583 		if (aff == LGRP_AFF_NONE) {
584 			thread_unlock(t);
585 			return (0);
586 		}
587 		ASSERT(aff_buf != NULL && *aff_buf != NULL);
588 		t->t_lgrp_affinity = *aff_buf;
589 		*aff_buf = NULL;
590 	}
591 
592 	affs = t->t_lgrp_affinity;
593 	affs[lgrp] = aff;
594 
595 	/*
596 	 * Find lgroup for which thread has most affinity,
597 	 * starting with lgroup for which affinity being set
598 	 */
599 	best_lpl = lgrp_affinity_best(t, t->t_cpupart, lgrp);
600 
601 	/*
602 	 * Rehome if found lgroup with more affinity than home or lgroup for
603 	 * which affinity is being set has same affinity as home
604 	 */
605 	home = t->t_lpl->lpl_lgrpid;
606 	if (best_lpl != NULL && best_lpl != t->t_lpl) {
607 		best = best_lpl->lpl_lgrpid;
608 		if (affs[best] > affs[home] || (affs[best] == affs[home] &&
609 		    best == lgrp))
610 			lgrp_move_thread(t, best_lpl, 1);
611 	}
612 
613 	thread_unlock(t);
614 
615 	return (retval);
616 }
617 
618 
619 /*
620  * Set process' affinity for specified lgroup
621  */
622 int
623 lgrp_affinity_set_proc(proc_t *p, lgrp_id_t lgrp, lgrp_affinity_t aff,
624     lgrp_affinity_t **aff_buf_array)
625 {
626 	lgrp_affinity_t	*buf;
627 	int		err = 0;
628 	int		i;
629 	int		retval;
630 	kthread_t	*t;
631 
632 	ASSERT(MUTEX_HELD(&pidlock) && MUTEX_HELD(&p->p_lock));
633 	ASSERT(aff_buf_array != NULL);
634 
635 	i = 0;
636 	t = p->p_tlist;
637 	if (t != NULL) {
638 		do {
639 			/*
640 			 * Set lgroup affinity for thread
641 			 */
642 			buf = aff_buf_array[i];
643 			retval = lgrp_affinity_set_thread(t, lgrp, aff, &buf);
644 
645 			if (err == 0 && retval != 0)
646 				err = retval;
647 
648 			/*
649 			 * Advance pointer to next buffer
650 			 */
651 			if (buf == NULL) {
652 				ASSERT(i < p->p_lwpcnt);
653 				aff_buf_array[i] = NULL;
654 				i++;
655 			}
656 
657 		} while ((t = t->t_forw) != p->p_tlist);
658 	}
659 	return (err);
660 }
661 
662 
663 /*
664  * Set LWP's or process' affinity for specified lgroup
665  *
666  * When setting affinities, pidlock, process p_lock, and thread_lock()
667  * need to be held in that order to protect target thread's pset, process,
668  * process contents, and thread contents.  thread_lock() does splhigh(),
669  * so it ends up having similiar effect as kpreempt_disable(), so it will
670  * protect calls to lgrp_move_thread() and lgrp_choose() from pset changes.
671  */
672 int
673 lgrp_affinity_set(lgrp_affinity_args_t *ap)
674 {
675 	lgrp_affinity_t		aff;
676 	lgrp_affinity_t		*aff_buf;
677 	lgrp_affinity_args_t	args;
678 	id_t			id;
679 	idtype_t		idtype;
680 	lgrp_id_t		lgrp;
681 	int			nthreads;
682 	proc_t			*p;
683 	int			retval;
684 
685 	/*
686 	 * Copyin arguments
687 	 */
688 	if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
689 		return (set_errno(EFAULT));
690 
691 	idtype = args.idtype;
692 	id = args.id;
693 	lgrp = args.lgrp;
694 	aff = args.aff;
695 
696 	/*
697 	 * Check for invalid lgroup
698 	 */
699 	if (lgrp < 0 || lgrp == LGRP_NONE)
700 		return (set_errno(EINVAL));
701 
702 	/*
703 	 * Check for existing lgroup
704 	 */
705 	if (lgrp > lgrp_alloc_max)
706 		return (set_errno(ESRCH));
707 
708 	/*
709 	 * Check for legal affinity
710 	 */
711 	if (aff != LGRP_AFF_NONE && aff != LGRP_AFF_WEAK &&
712 	    aff != LGRP_AFF_STRONG)
713 		return (set_errno(EINVAL));
714 
715 	/*
716 	 * Must be process or LWP ID
717 	 */
718 	if (idtype != P_LWPID && idtype != P_PID)
719 		return (set_errno(EINVAL));
720 
721 	/*
722 	 * Set given LWP's or process' affinity for specified lgroup
723 	 */
724 	switch (idtype) {
725 
726 	case P_LWPID:
727 		/*
728 		 * Allocate memory for thread's lgroup affinities
729 		 * ahead of time w/o holding locks
730 		 */
731 		aff_buf = kmem_zalloc(nlgrpsmax * sizeof (lgrp_affinity_t),
732 		    KM_SLEEP);
733 
734 		p = curproc;
735 
736 		/*
737 		 * Set affinity for thread
738 		 */
739 		mutex_enter(&p->p_lock);
740 		if (id == P_MYID) {		/* current thread */
741 			retval = lgrp_affinity_set_thread(curthread, lgrp, aff,
742 			    &aff_buf);
743 		} else if (p->p_tlist == NULL) {
744 			retval = set_errno(ESRCH);
745 		} else {			/* other thread */
746 			int		found = 0;
747 			kthread_t	*t;
748 
749 			t = p->p_tlist;
750 			do {
751 				if (t->t_tid == id) {
752 					retval = lgrp_affinity_set_thread(t,
753 					    lgrp, aff, &aff_buf);
754 					found = 1;
755 					break;
756 				}
757 			} while ((t = t->t_forw) != p->p_tlist);
758 			if (!found)
759 				retval = set_errno(ESRCH);
760 		}
761 		mutex_exit(&p->p_lock);
762 
763 		/*
764 		 * Free memory for lgroup affinities,
765 		 * since thread didn't need it
766 		 */
767 		if (aff_buf)
768 			kmem_free(aff_buf,
769 			    nlgrpsmax * sizeof (lgrp_affinity_t));
770 
771 		break;
772 
773 	case P_PID:
774 
775 		do {
776 			lgrp_affinity_t	**aff_buf_array;
777 			int		i;
778 			size_t		size;
779 
780 			/*
781 			 * Get process
782 			 */
783 			mutex_enter(&pidlock);
784 
785 			if (id == P_MYID)
786 				p = curproc;
787 			else
788 				p = prfind(id);
789 
790 			if (p == NULL) {
791 				mutex_exit(&pidlock);
792 				return (set_errno(ESRCH));
793 			}
794 
795 			/*
796 			 * Get number of threads in process
797 			 *
798 			 * NOTE: Only care about user processes,
799 			 *	 so p_lwpcnt should be number of threads.
800 			 */
801 			mutex_enter(&p->p_lock);
802 			nthreads = p->p_lwpcnt;
803 			mutex_exit(&p->p_lock);
804 
805 			mutex_exit(&pidlock);
806 
807 			if (nthreads < 1)
808 				return (set_errno(ESRCH));
809 
810 			/*
811 			 * Preallocate memory for lgroup affinities for
812 			 * each thread in process now to avoid holding
813 			 * any locks.  Allocate an array to hold a buffer
814 			 * for each thread.
815 			 */
816 			aff_buf_array = kmem_zalloc(nthreads *
817 			    sizeof (lgrp_affinity_t *), KM_SLEEP);
818 
819 			size = nlgrpsmax * sizeof (lgrp_affinity_t);
820 			for (i = 0; i < nthreads; i++)
821 				aff_buf_array[i] = kmem_zalloc(size, KM_SLEEP);
822 
823 			mutex_enter(&pidlock);
824 
825 			/*
826 			 * Get process again since dropped locks to allocate
827 			 * memory (except current process)
828 			 */
829 			if (id != P_MYID)
830 				p = prfind(id);
831 
832 			/*
833 			 * Process went away after we dropped locks and before
834 			 * reacquiring them, so drop locks, free memory, and
835 			 * return.
836 			 */
837 			if (p == NULL) {
838 				mutex_exit(&pidlock);
839 				for (i = 0; i < nthreads; i++)
840 					kmem_free(aff_buf_array[i], size);
841 				kmem_free(aff_buf_array,
842 				    nthreads * sizeof (lgrp_affinity_t *));
843 				return (set_errno(ESRCH));
844 			}
845 
846 			mutex_enter(&p->p_lock);
847 
848 			/*
849 			 * See whether number of threads is same
850 			 * If not, drop locks, free memory, and try again
851 			 */
852 			if (nthreads != p->p_lwpcnt) {
853 				mutex_exit(&p->p_lock);
854 				mutex_exit(&pidlock);
855 				for (i = 0; i < nthreads; i++)
856 					kmem_free(aff_buf_array[i], size);
857 				kmem_free(aff_buf_array,
858 				    nthreads * sizeof (lgrp_affinity_t *));
859 				continue;
860 			}
861 
862 			/*
863 			 * Set lgroup affinity for threads in process
864 			 */
865 			retval = lgrp_affinity_set_proc(p, lgrp, aff,
866 			    aff_buf_array);
867 
868 			mutex_exit(&p->p_lock);
869 			mutex_exit(&pidlock);
870 
871 			/*
872 			 * Free any leftover memory, since some threads may
873 			 * have already allocated memory and set lgroup
874 			 * affinities before
875 			 */
876 			for (i = 0; i < nthreads; i++)
877 				if (aff_buf_array[i] != NULL)
878 					kmem_free(aff_buf_array[i], size);
879 			kmem_free(aff_buf_array,
880 			    nthreads * sizeof (lgrp_affinity_t *));
881 
882 			break;
883 
884 		} while (nthreads != p->p_lwpcnt);
885 
886 		break;
887 
888 	default:
889 		retval = set_errno(EINVAL);
890 		break;
891 	}
892 
893 	return (retval);
894 }
895 
896 
897 /*
898  * Return the latest generation number for the lgroup hierarchy
899  * with the given view
900  */
901 lgrp_gen_t
902 lgrp_generation(lgrp_view_t view)
903 {
904 	cpupart_t	*cpupart;
905 	uint_t		gen;
906 
907 	kpreempt_disable();
908 
909 	/*
910 	 * Determine generation number for given view
911 	 */
912 	if (view == LGRP_VIEW_OS)
913 		/*
914 		 * Return generation number of lgroup hierarchy for OS view
915 		 */
916 		gen = lgrp_gen;
917 	else {
918 		/*
919 		 * For caller's view, use generation numbers for lgroup
920 		 * hierarchy and caller's pset
921 		 * NOTE: Caller needs to check for change in pset ID
922 		 */
923 		cpupart = curthread->t_cpupart;
924 		ASSERT(cpupart);
925 		gen = lgrp_gen + cpupart->cp_gen;
926 	}
927 
928 	kpreempt_enable();
929 
930 	return (gen);
931 }
932 
933 
934 lgrp_id_t
935 lgrp_home_thread(kthread_t *t)
936 {
937 	lgrp_id_t	home;
938 
939 	ASSERT(t != NULL);
940 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
941 
942 	thread_lock(t);
943 
944 	/*
945 	 * Check to see whether caller has permission to set affinity for
946 	 * thread
947 	 */
948 	if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
949 		thread_unlock(t);
950 		return (set_errno(EPERM));
951 	}
952 
953 	home = lgrp_home_id(t);
954 
955 	thread_unlock(t);
956 	return (home);
957 }
958 
959 
960 /*
961  * Get home lgroup of given process or thread
962  */
963 lgrp_id_t
964 lgrp_home_get(idtype_t idtype, id_t id)
965 {
966 	proc_t		*p;
967 	lgrp_id_t	retval;
968 	kthread_t	*t;
969 
970 	/*
971 	 * Get home lgroup of given LWP or process
972 	 */
973 	switch (idtype) {
974 
975 	case P_LWPID:
976 		p = curproc;
977 
978 		/*
979 		 * Set affinity for thread
980 		 */
981 		mutex_enter(&p->p_lock);
982 		if (id == P_MYID) {		/* current thread */
983 			retval = lgrp_home_thread(curthread);
984 		} else if (p->p_tlist == NULL) {
985 			retval = set_errno(ESRCH);
986 		} else {			/* other thread */
987 			int	found = 0;
988 
989 			t = p->p_tlist;
990 			do {
991 				if (t->t_tid == id) {
992 					retval = lgrp_home_thread(t);
993 					found = 1;
994 					break;
995 				}
996 			} while ((t = t->t_forw) != p->p_tlist);
997 			if (!found)
998 				retval = set_errno(ESRCH);
999 		}
1000 		mutex_exit(&p->p_lock);
1001 		break;
1002 
1003 	case P_PID:
1004 		/*
1005 		 * Get process
1006 		 */
1007 		mutex_enter(&pidlock);
1008 
1009 		if (id == P_MYID)
1010 			p = curproc;
1011 		else
1012 			p = prfind(id);
1013 
1014 		if (p == NULL) {
1015 			mutex_exit(&pidlock);
1016 			return (set_errno(ESRCH));
1017 		}
1018 
1019 		mutex_enter(&p->p_lock);
1020 		t = p->p_tlist;
1021 		if (t == NULL)
1022 			retval = set_errno(ESRCH);
1023 		else
1024 			retval = lgrp_home_thread(t);
1025 		mutex_exit(&p->p_lock);
1026 
1027 		mutex_exit(&pidlock);
1028 
1029 		break;
1030 
1031 	default:
1032 		retval = set_errno(EINVAL);
1033 		break;
1034 	}
1035 
1036 	return (retval);
1037 }
1038 
1039 
1040 /*
1041  * Return latency between "from" and "to" lgroups
1042  *
1043  * This latency number can only be used for relative comparison
1044  * between lgroups on the running system, cannot be used across platforms,
1045  * and may not reflect the actual latency.  It is platform and implementation
1046  * specific, so platform gets to decide its value.  It would be nice if the
1047  * number was at least proportional to make comparisons more meaningful though.
1048  */
1049 int
1050 lgrp_latency(lgrp_id_t from, lgrp_id_t to)
1051 {
1052 	lgrp_t		*from_lgrp;
1053 	int		i;
1054 	int		latency;
1055 	int		latency_max;
1056 	lgrp_t		*to_lgrp;
1057 
1058 	ASSERT(MUTEX_HELD(&cpu_lock));
1059 
1060 	if (from < 0 || to < 0)
1061 		return (set_errno(EINVAL));
1062 
1063 	if (from > lgrp_alloc_max || to > lgrp_alloc_max)
1064 		return (set_errno(ESRCH));
1065 
1066 	from_lgrp = lgrp_table[from];
1067 	to_lgrp = lgrp_table[to];
1068 
1069 	if (!LGRP_EXISTS(from_lgrp) || !LGRP_EXISTS(to_lgrp)) {
1070 		return (set_errno(ESRCH));
1071 	}
1072 
1073 	/*
1074 	 * Get latency for same lgroup
1075 	 */
1076 	if (from == to) {
1077 		latency = from_lgrp->lgrp_latency;
1078 		return (latency);
1079 	}
1080 
1081 	/*
1082 	 * Get latency between leaf lgroups
1083 	 */
1084 	if (from_lgrp->lgrp_childcnt == 0 && to_lgrp->lgrp_childcnt == 0)
1085 		return (lgrp_plat_latency(from_lgrp->lgrp_plathand,
1086 		    to_lgrp->lgrp_plathand));
1087 
1088 	/*
1089 	 * Determine max latency between resources in two lgroups
1090 	 */
1091 	latency_max = 0;
1092 	for (i = 0; i <= lgrp_alloc_max; i++) {
1093 		lgrp_t	*from_rsrc;
1094 		int	j;
1095 		lgrp_t	*to_rsrc;
1096 
1097 		from_rsrc = lgrp_table[i];
1098 		if (!LGRP_EXISTS(from_rsrc) ||
1099 		    !klgrpset_ismember(from_lgrp->lgrp_set[LGRP_RSRC_CPU], i))
1100 			continue;
1101 
1102 		for (j = 0; j <= lgrp_alloc_max; j++) {
1103 			to_rsrc = lgrp_table[j];
1104 			if (!LGRP_EXISTS(to_rsrc) ||
1105 			    klgrpset_ismember(to_lgrp->lgrp_set[LGRP_RSRC_MEM],
1106 			    j) == 0)
1107 				continue;
1108 			latency = lgrp_plat_latency(from_rsrc->lgrp_plathand,
1109 			    to_rsrc->lgrp_plathand);
1110 			if (latency > latency_max)
1111 				latency_max = latency;
1112 		}
1113 	}
1114 	return (latency_max);
1115 }
1116 
1117 
1118 /*
1119  * Return lgroup interface version number
1120  * 0 - none
1121  * 1 - original
1122  * 2 - lgrp_latency_cookie() and lgrp_resources() added
1123  */
1124 int
1125 lgrp_version(int version)
1126 {
1127 	/*
1128 	 * Return LGRP_VER_NONE when requested version isn't supported
1129 	 */
1130 	if (version < LGRP_VER_NONE || version > LGRP_VER_CURRENT)
1131 		return (LGRP_VER_NONE);
1132 
1133 	/*
1134 	 * Return current version when LGRP_VER_NONE passed in
1135 	 */
1136 	if (version == LGRP_VER_NONE)
1137 		return (LGRP_VER_CURRENT);
1138 
1139 	/*
1140 	 * Otherwise, return supported version.
1141 	 */
1142 	return (version);
1143 }
1144 
1145 
1146 /*
1147  * Snapshot of lgroup hieararchy
1148  *
1149  * One snapshot is kept and is based on the kernel's native data model, so
1150  * a 32-bit snapshot is kept for the 32-bit kernel and a 64-bit one for the
1151  * 64-bit kernel.  If a 32-bit user wants a snapshot from the 64-bit kernel,
1152  * the kernel generates a 32-bit snapshot from the data in its 64-bit snapshot.
1153  *
1154  * The format is defined by lgroup snapshot header and the layout of
1155  * the snapshot in memory is as follows:
1156  * 1) lgroup snapshot header
1157  *    - specifies format of snapshot
1158  *    - defined by lgrp_snapshot_header_t
1159  * 2) lgroup info array
1160  *    - contains information about each lgroup
1161  *    - one element for each lgroup
1162  *    - each element is defined by lgrp_info_t
1163  * 3) lgroup CPU ID array
1164  *    - contains list (array) of CPU IDs for each lgroup
1165  *    - lgrp_info_t points into array and specifies how many CPUs belong to
1166  *      given lgroup
1167  * 4) lgroup parents array
1168  *    - contains lgroup bitmask of parents for each lgroup
1169  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1170  * 5) lgroup children array
1171  *    - contains lgroup bitmask of children for each lgroup
1172  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1173  * 6) lgroup resources array
1174  *    - contains lgroup bitmask of resources for each lgroup
1175  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1176  * 7) lgroup latency table
1177  *    - contains latency from each lgroup to each of other lgroups
1178  *
1179  * NOTE:  Must use nlgrpsmax for per lgroup data structures because lgroups
1180  *	  may be sparsely allocated.
1181  */
1182 lgrp_snapshot_header_t	*lgrp_snap = NULL;	/* lgroup snapshot */
1183 static kmutex_t		lgrp_snap_lock;		/* snapshot lock */
1184 
1185 
1186 /*
1187  * Take a snapshot of lgroup hierarchy and return size of buffer
1188  * needed to hold snapshot
1189  */
1190 static int
1191 lgrp_snapshot(void)
1192 {
1193 	size_t		bitmask_size;
1194 	size_t		bitmasks_size;
1195 	size_t		bufsize;
1196 	int		cpu_index;
1197 	size_t		cpuids_size;
1198 	int		i;
1199 	int		j;
1200 	size_t		info_size;
1201 	size_t		lats_size;
1202 	ulong_t		*lgrp_children;
1203 	processorid_t	*lgrp_cpuids;
1204 	lgrp_info_t	*lgrp_info;
1205 	int		**lgrp_lats;
1206 	ulong_t		*lgrp_parents;
1207 	ulong_t		*lgrp_rsets;
1208 	ulong_t		*lgrpset;
1209 	int		snap_ncpus;
1210 	int		snap_nlgrps;
1211 	int		snap_nlgrpsmax;
1212 	size_t		snap_hdr_size;
1213 #ifdef	_SYSCALL32_IMPL
1214 	model_t		model = DATAMODEL_NATIVE;
1215 
1216 	/*
1217 	 * Have up-to-date snapshot, so check to see whether caller is 32-bit
1218 	 * program and need to return size of 32-bit snapshot now.
1219 	 */
1220 	model = get_udatamodel();
1221 	if (model == DATAMODEL_ILP32 && lgrp_snap &&
1222 	    lgrp_snap->ss_gen == lgrp_gen) {
1223 
1224 		snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1225 
1226 		/*
1227 		 * Calculate size of buffer needed for 32-bit snapshot,
1228 		 * rounding up size of each object to allow for alignment
1229 		 * of next object in buffer.
1230 		 */
1231 		snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1232 		    sizeof (caddr32_t));
1233 		info_size =
1234 		    P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1235 		    sizeof (processorid_t));
1236 		cpuids_size =
1237 		    P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1238 		    sizeof (ulong_t));
1239 
1240 		/*
1241 		 * lgroup bitmasks needed for parents, children, and resources
1242 		 * for each lgroup and pset lgroup set
1243 		 */
1244 		bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1245 		bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1246 		    snap_nlgrpsmax) + 1) * bitmask_size;
1247 
1248 		/*
1249 		 * Size of latency table and buffer
1250 		 */
1251 		lats_size = snap_nlgrpsmax * sizeof (caddr32_t) +
1252 		    snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1253 
1254 		bufsize = snap_hdr_size + info_size + cpuids_size +
1255 		    bitmasks_size + lats_size;
1256 		return (bufsize);
1257 	}
1258 #endif	/* _SYSCALL32_IMPL */
1259 
1260 	/*
1261 	 * Check whether snapshot is up-to-date
1262 	 * Free it and take another one if not
1263 	 */
1264 	if (lgrp_snap) {
1265 		if (lgrp_snap->ss_gen == lgrp_gen)
1266 			return (lgrp_snap->ss_size);
1267 
1268 		kmem_free(lgrp_snap, lgrp_snap->ss_size);
1269 		lgrp_snap = NULL;
1270 	}
1271 
1272 	/*
1273 	 * Allocate memory for snapshot
1274 	 * w/o holding cpu_lock while waiting for memory
1275 	 */
1276 	while (lgrp_snap == NULL) {
1277 		int	old_generation;
1278 
1279 		/*
1280 		 * Take snapshot of lgroup generation number
1281 		 * and configuration size dependent information
1282 		 * NOTE: Only count number of online CPUs,
1283 		 * since only online CPUs appear in lgroups.
1284 		 */
1285 		mutex_enter(&cpu_lock);
1286 		old_generation = lgrp_gen;
1287 		snap_ncpus = ncpus_online;
1288 		snap_nlgrps = nlgrps;
1289 		snap_nlgrpsmax = nlgrpsmax;
1290 		mutex_exit(&cpu_lock);
1291 
1292 		/*
1293 		 * Calculate size of buffer needed for snapshot,
1294 		 * rounding up size of each object to allow for alignment
1295 		 * of next object in buffer.
1296 		 */
1297 		snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1298 		    sizeof (void *));
1299 		info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1300 		    sizeof (processorid_t));
1301 		cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1302 		    sizeof (ulong_t));
1303 		/*
1304 		 * lgroup bitmasks needed for pset lgroup set and  parents,
1305 		 * children, and resource sets for each lgroup
1306 		 */
1307 		bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1308 		bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1309 		    snap_nlgrpsmax) + 1) * bitmask_size;
1310 
1311 		/*
1312 		 * Size of latency table and buffer
1313 		 */
1314 		lats_size = snap_nlgrpsmax * sizeof (int *) +
1315 		    snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1316 
1317 		bufsize = snap_hdr_size + info_size + cpuids_size +
1318 		    bitmasks_size + lats_size;
1319 
1320 		/*
1321 		 * Allocate memory for buffer
1322 		 */
1323 		lgrp_snap = kmem_zalloc(bufsize, KM_NOSLEEP);
1324 		if (lgrp_snap == NULL)
1325 			return (set_errno(ENOMEM));
1326 
1327 		/*
1328 		 * Check whether generation number has changed
1329 		 */
1330 		mutex_enter(&cpu_lock);
1331 		if (lgrp_gen == old_generation)
1332 			break;		/* hasn't change, so done. */
1333 
1334 		/*
1335 		 * Generation number changed, so free memory and try again.
1336 		 */
1337 		mutex_exit(&cpu_lock);
1338 		kmem_free(lgrp_snap, bufsize);
1339 		lgrp_snap = NULL;
1340 	}
1341 
1342 	/*
1343 	 * Fill in lgroup snapshot header
1344 	 * (including pointers to tables of lgroup info, CPU IDs, and parents
1345 	 * and children)
1346 	 */
1347 	lgrp_snap->ss_version = LGRP_VER_CURRENT;
1348 
1349 	/*
1350 	 * XXX For now, liblgrp only needs to know whether the hierarchy
1351 	 * XXX only has one level or not
1352 	 */
1353 	if (snap_nlgrps == 1)
1354 		lgrp_snap->ss_levels = 1;
1355 	else
1356 		lgrp_snap->ss_levels = 2;
1357 
1358 	lgrp_snap->ss_root = LGRP_ROOTID;
1359 
1360 	lgrp_snap->ss_nlgrps = lgrp_snap->ss_nlgrps_os = snap_nlgrps;
1361 	lgrp_snap->ss_nlgrps_max = snap_nlgrpsmax;
1362 	lgrp_snap->ss_ncpus = snap_ncpus;
1363 	lgrp_snap->ss_gen = lgrp_gen;
1364 	lgrp_snap->ss_view = LGRP_VIEW_OS;
1365 	lgrp_snap->ss_pset = 0;		/* NOTE: caller should set if needed */
1366 	lgrp_snap->ss_size = bufsize;
1367 	lgrp_snap->ss_magic = (uintptr_t)lgrp_snap;
1368 
1369 	lgrp_snap->ss_info = lgrp_info =
1370 	    (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1371 
1372 	lgrp_snap->ss_cpuids = lgrp_cpuids =
1373 	    (processorid_t *)((uintptr_t)lgrp_info + info_size);
1374 
1375 	lgrp_snap->ss_lgrpset = lgrpset =
1376 	    (ulong_t *)((uintptr_t)lgrp_cpuids + cpuids_size);
1377 
1378 	lgrp_snap->ss_parents = lgrp_parents =
1379 	    (ulong_t *)((uintptr_t)lgrpset + bitmask_size);
1380 
1381 	lgrp_snap->ss_children = lgrp_children =
1382 	    (ulong_t *)((uintptr_t)lgrp_parents + (snap_nlgrpsmax *
1383 	    bitmask_size));
1384 
1385 	lgrp_snap->ss_rsets = lgrp_rsets =
1386 	    (ulong_t *)((uintptr_t)lgrp_children + (snap_nlgrpsmax *
1387 	    bitmask_size));
1388 
1389 	lgrp_snap->ss_latencies = lgrp_lats =
1390 	    (int **)((uintptr_t)lgrp_rsets + (LGRP_RSRC_COUNT *
1391 		snap_nlgrpsmax * bitmask_size));
1392 
1393 	/*
1394 	 * Fill in lgroup information
1395 	 */
1396 	cpu_index = 0;
1397 	for (i = 0; i < snap_nlgrpsmax; i++) {
1398 		struct cpu	*cp;
1399 		int		cpu_count;
1400 		struct cpu	*head;
1401 		int		k;
1402 		lgrp_t		*lgrp;
1403 
1404 		lgrp = lgrp_table[i];
1405 		if (!LGRP_EXISTS(lgrp)) {
1406 			bzero(&lgrp_info[i], sizeof (lgrp_info[i]));
1407 			lgrp_info[i].info_lgrpid = LGRP_NONE;
1408 			continue;
1409 		}
1410 
1411 		lgrp_info[i].info_lgrpid = i;
1412 		lgrp_info[i].info_latency = lgrp->lgrp_latency;
1413 
1414 		/*
1415 		 * Fill in parents, children, and lgroup resources
1416 		 */
1417 		lgrp_info[i].info_parents =
1418 		    (ulong_t *)((uintptr_t)lgrp_parents + (i * bitmask_size));
1419 
1420 		if (lgrp->lgrp_parent)
1421 			BT_SET(lgrp_info[i].info_parents,
1422 			    lgrp->lgrp_parent->lgrp_id);
1423 
1424 		lgrp_info[i].info_children =
1425 		    (ulong_t *)((uintptr_t)lgrp_children + (i * bitmask_size));
1426 
1427 		for (j = 0; j < snap_nlgrpsmax; j++)
1428 			if (klgrpset_ismember(lgrp->lgrp_children, j))
1429 				BT_SET(lgrp_info[i].info_children, j);
1430 
1431 		lgrp_info[i].info_rset =
1432 		    (ulong_t *)((uintptr_t)lgrp_rsets +
1433 		    (i * LGRP_RSRC_COUNT * bitmask_size));
1434 
1435 		for (j = 0; j < LGRP_RSRC_COUNT; j++) {
1436 			ulong_t	*rset;
1437 
1438 			rset = (ulong_t *)((uintptr_t)lgrp_info[i].info_rset +
1439 			    (j * bitmask_size));
1440 			for (k = 0; k < snap_nlgrpsmax; k++)
1441 				if (klgrpset_ismember(lgrp->lgrp_set[j], k))
1442 					BT_SET(rset, k);
1443 		}
1444 
1445 		/*
1446 		 * Fill in CPU IDs
1447 		 */
1448 		cpu_count = 0;
1449 		lgrp_info[i].info_cpuids = NULL;
1450 		cp = head = lgrp->lgrp_cpu;
1451 		if (head != NULL) {
1452 			lgrp_info[i].info_cpuids = &lgrp_cpuids[cpu_index];
1453 			do {
1454 				lgrp_cpuids[cpu_index] = cp->cpu_id;
1455 				cpu_index++;
1456 				cpu_count++;
1457 				cp = cp->cpu_next_lgrp;
1458 			} while (cp != head);
1459 		}
1460 		ASSERT(cpu_count == lgrp->lgrp_cpucnt);
1461 		lgrp_info[i].info_ncpus = cpu_count;
1462 
1463 		/*
1464 		 * Fill in memory sizes for lgroups that directly contain
1465 		 * memory
1466 		 */
1467 		if (klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], i)) {
1468 			lgrp_info[i].info_mem_free =
1469 			    lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1470 			lgrp_info[i].info_mem_install =
1471 			    lgrp_mem_size(i, LGRP_MEM_SIZE_INSTALL);
1472 		}
1473 
1474 		/*
1475 		 * Fill in latency table and buffer
1476 		 */
1477 		lgrp_lats[i] = (int *)((uintptr_t)lgrp_lats + snap_nlgrpsmax *
1478 		    sizeof (int *) + i * snap_nlgrpsmax * sizeof (int));
1479 		for (j = 0; j < snap_nlgrpsmax; j++) {
1480 			lgrp_t	*to;
1481 
1482 			to = lgrp_table[j];
1483 			if (!LGRP_EXISTS(to))
1484 				continue;
1485 			lgrp_lats[i][j] = lgrp_latency(lgrp->lgrp_id,
1486 			    to->lgrp_id);
1487 		}
1488 	}
1489 	ASSERT(cpu_index == snap_ncpus);
1490 
1491 
1492 	mutex_exit(&cpu_lock);
1493 
1494 #ifdef	_SYSCALL32_IMPL
1495 	/*
1496 	 * Check to see whether caller is 32-bit program and need to return
1497 	 * size of 32-bit snapshot now that snapshot has been taken/updated.
1498 	 * May not have been able to do this earlier if snapshot was out of
1499 	 * date or didn't exist yet.
1500 	 */
1501 	if (model == DATAMODEL_ILP32) {
1502 
1503 		snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1504 
1505 		/*
1506 		 * Calculate size of buffer needed for 32-bit snapshot,
1507 		 * rounding up size of each object to allow for alignment
1508 		 * of next object in buffer.
1509 		 */
1510 		snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1511 		    sizeof (caddr32_t));
1512 		info_size =
1513 		    P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1514 		    sizeof (processorid_t));
1515 		cpuids_size =
1516 		    P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1517 		    sizeof (ulong_t));
1518 
1519 		bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1520 		bitmasks_size = (((2 + LGRP_RSRC_COUNT) * snap_nlgrpsmax) +
1521 		    1) * bitmask_size;
1522 
1523 
1524 		/*
1525 		 * Size of latency table and buffer
1526 		 */
1527 		lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1528 		    (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1529 
1530 		bufsize = snap_hdr_size + info_size + cpuids_size +
1531 		    bitmasks_size + lats_size;
1532 		return (bufsize);
1533 	}
1534 #endif	/* _SYSCALL32_IMPL */
1535 
1536 	return (lgrp_snap->ss_size);
1537 }
1538 
1539 
1540 /*
1541  * Copy snapshot into given user buffer, fix up any pointers in buffer to point
1542  * into user instead of kernel address space, and return size of buffer
1543  * needed to hold snapshot
1544  */
1545 static int
1546 lgrp_snapshot_copy(char *buf, size_t bufsize)
1547 {
1548 	size_t			bitmask_size;
1549 	int			cpu_index;
1550 	size_t			cpuids_size;
1551 	int			i;
1552 	size_t			info_size;
1553 	lgrp_info_t		*lgrp_info;
1554 	int			retval;
1555 	size_t			snap_hdr_size;
1556 	int			snap_ncpus;
1557 	int			snap_nlgrpsmax;
1558 	lgrp_snapshot_header_t	*user_snap;
1559 	lgrp_info_t		*user_info;
1560 	lgrp_info_t		*user_info_buffer;
1561 	processorid_t		*user_cpuids;
1562 	ulong_t			*user_lgrpset;
1563 	ulong_t			*user_parents;
1564 	ulong_t			*user_children;
1565 	int			**user_lats;
1566 	int			**user_lats_buffer;
1567 	ulong_t			*user_rsets;
1568 
1569 	if (lgrp_snap == NULL)
1570 		return (0);
1571 
1572 	if (buf == NULL || bufsize <= 0)
1573 		return (lgrp_snap->ss_size);
1574 
1575 	/*
1576 	 * User needs to try getting size of buffer again
1577 	 * because given buffer size is too small.
1578 	 * The lgroup hierarchy may have changed after they asked for the size
1579 	 * but before the snapshot was taken.
1580 	 */
1581 	if (bufsize < lgrp_snap->ss_size)
1582 		return (set_errno(EAGAIN));
1583 
1584 	snap_ncpus = lgrp_snap->ss_ncpus;
1585 	snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1586 
1587 	/*
1588 	 * Fill in lgrpset now because caller may have change psets
1589 	 */
1590 	kpreempt_disable();
1591 	for (i = 0; i < snap_nlgrpsmax; i++) {
1592 		if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1593 		    i)) {
1594 			BT_SET(lgrp_snap->ss_lgrpset, i);
1595 		}
1596 	}
1597 	kpreempt_enable();
1598 
1599 	/*
1600 	 * Copy lgroup snapshot (snapshot header, lgroup info, and CPU IDs)
1601 	 * into user buffer all at once
1602 	 */
1603 	if (copyout(lgrp_snap, buf, lgrp_snap->ss_size) != 0)
1604 		return (set_errno(EFAULT));
1605 
1606 	/*
1607 	 * Round up sizes of lgroup snapshot header and info for alignment
1608 	 */
1609 	snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1610 	    sizeof (void *));
1611 	info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1612 	    sizeof (processorid_t));
1613 	cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1614 	    sizeof (ulong_t));
1615 
1616 	bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1617 
1618 	/*
1619 	 * Calculate pointers into user buffer for lgroup snapshot header,
1620 	 * info, and CPU IDs
1621 	 */
1622 	user_snap = (lgrp_snapshot_header_t *)buf;
1623 	user_info = (lgrp_info_t *)((uintptr_t)user_snap + snap_hdr_size);
1624 	user_cpuids = (processorid_t *)((uintptr_t)user_info + info_size);
1625 	user_lgrpset = (ulong_t *)((uintptr_t)user_cpuids + cpuids_size);
1626 	user_parents = (ulong_t *)((uintptr_t)user_lgrpset + bitmask_size);
1627 	user_children = (ulong_t *)((uintptr_t)user_parents +
1628 	    (snap_nlgrpsmax * bitmask_size));
1629 	user_rsets = (ulong_t *)((uintptr_t)user_children +
1630 	    (snap_nlgrpsmax * bitmask_size));
1631 	user_lats = (int **)((uintptr_t)user_rsets +
1632 	    (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size));
1633 
1634 	/*
1635 	 * Copyout magic number (ie. pointer to beginning of buffer)
1636 	 */
1637 	if (copyout(&buf, &user_snap->ss_magic, sizeof (buf)) != 0)
1638 		return (set_errno(EFAULT));
1639 
1640 	/*
1641 	 * Fix up pointers in user buffer to point into user buffer
1642 	 * not kernel snapshot
1643 	 */
1644 	if (copyout(&user_info, &user_snap->ss_info, sizeof (user_info)) != 0)
1645 		return (set_errno(EFAULT));
1646 
1647 	if (copyout(&user_cpuids, &user_snap->ss_cpuids,
1648 	    sizeof (user_cpuids)) != 0)
1649 		return (set_errno(EFAULT));
1650 
1651 	if (copyout(&user_lgrpset, &user_snap->ss_lgrpset,
1652 	    sizeof (user_lgrpset)) != 0)
1653 		return (set_errno(EFAULT));
1654 
1655 	if (copyout(&user_parents, &user_snap->ss_parents,
1656 	    sizeof (user_parents)) != 0)
1657 		return (set_errno(EFAULT));
1658 
1659 	if (copyout(&user_children, &user_snap->ss_children,
1660 	    sizeof (user_children)) != 0)
1661 		return (set_errno(EFAULT));
1662 
1663 	if (copyout(&user_rsets, &user_snap->ss_rsets,
1664 	    sizeof (user_rsets)) != 0)
1665 		return (set_errno(EFAULT));
1666 
1667 	if (copyout(&user_lats, &user_snap->ss_latencies,
1668 	    sizeof (user_lats)) != 0)
1669 		return (set_errno(EFAULT));
1670 
1671 	/*
1672 	 * Make copies of lgroup info and latency table, fix up pointers,
1673 	 * and then copy them into user buffer
1674 	 */
1675 	user_info_buffer = kmem_zalloc(info_size, KM_NOSLEEP);
1676 	if (user_info_buffer == NULL)
1677 		return (set_errno(ENOMEM));
1678 
1679 	user_lats_buffer = kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1680 	    KM_NOSLEEP);
1681 	if (user_lats_buffer == NULL) {
1682 		kmem_free(user_info_buffer, info_size);
1683 		return (set_errno(ENOMEM));
1684 	}
1685 
1686 	lgrp_info = (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1687 	bcopy(lgrp_info, user_info_buffer, info_size);
1688 
1689 	cpu_index = 0;
1690 	for (i = 0; i < snap_nlgrpsmax; i++) {
1691 		ulong_t	*snap_rset;
1692 
1693 		/*
1694 		 * Skip non-existent lgroups
1695 		 */
1696 		if (user_info_buffer[i].info_lgrpid == LGRP_NONE)
1697 			continue;
1698 
1699 		/*
1700 		 * Update free memory size since it changes frequently
1701 		 * Only do so for lgroups directly containing memory
1702 		 *
1703 		 * NOTE: This must be done before changing the pointers to
1704 		 *	 point into user space since we need to dereference
1705 		 *	 lgroup resource set
1706 		 */
1707 		snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
1708 		    BT_BITOUL(snap_nlgrpsmax)];
1709 		if (BT_TEST(snap_rset, i))
1710 			user_info_buffer[i].info_mem_free =
1711 			    lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1712 
1713 		/*
1714 		 * Fix up pointers to parents, children, resources, and
1715 		 * latencies
1716 		 */
1717 		user_info_buffer[i].info_parents =
1718 		    (ulong_t *)((uintptr_t)user_parents + (i * bitmask_size));
1719 		user_info_buffer[i].info_children =
1720 		    (ulong_t *)((uintptr_t)user_children + (i * bitmask_size));
1721 		user_info_buffer[i].info_rset =
1722 		    (ulong_t *)((uintptr_t)user_rsets +
1723 		    (i * LGRP_RSRC_COUNT * bitmask_size));
1724 		user_lats_buffer[i] = (int *)((uintptr_t)user_lats +
1725 		    (snap_nlgrpsmax * sizeof (int *)) + (i * snap_nlgrpsmax *
1726 		    sizeof (int)));
1727 
1728 		/*
1729 		 * Fix up pointer to CPU IDs
1730 		 */
1731 		if (user_info_buffer[i].info_ncpus == 0) {
1732 			user_info_buffer[i].info_cpuids = NULL;
1733 			continue;
1734 		}
1735 		user_info_buffer[i].info_cpuids = &user_cpuids[cpu_index];
1736 		cpu_index += user_info_buffer[i].info_ncpus;
1737 	}
1738 	ASSERT(cpu_index == snap_ncpus);
1739 
1740 	/*
1741 	 * Copy lgroup info and latency table with pointers fixed up to point
1742 	 * into user buffer out to user buffer now
1743 	 */
1744 	retval = lgrp_snap->ss_size;
1745 	if (copyout(user_info_buffer, user_info, info_size) != 0)
1746 		retval = set_errno(EFAULT);
1747 	kmem_free(user_info_buffer, info_size);
1748 
1749 	if (copyout(user_lats_buffer, user_lats, snap_nlgrpsmax *
1750 	    sizeof (int *)) != 0)
1751 		retval = set_errno(EFAULT);
1752 	kmem_free(user_lats_buffer, snap_nlgrpsmax * sizeof (int *));
1753 
1754 	return (retval);
1755 }
1756 
1757 
1758 #ifdef	_SYSCALL32_IMPL
1759 /*
1760  * Make 32-bit copy of snapshot, fix up any pointers in buffer to point
1761  * into user instead of kernel address space, copy 32-bit snapshot into
1762  * given user buffer, and return size of buffer needed to hold snapshot
1763  */
1764 static int
1765 lgrp_snapshot_copy32(caddr32_t buf, size32_t bufsize)
1766 {
1767 	size32_t			bitmask_size;
1768 	size32_t			bitmasks_size;
1769 	size32_t			children_size;
1770 	int				cpu_index;
1771 	size32_t			cpuids_size;
1772 	int				i;
1773 	int				j;
1774 	size32_t			info_size;
1775 	size32_t			lats_size;
1776 	lgrp_info_t			*lgrp_info;
1777 	lgrp_snapshot_header32_t	*lgrp_snap32;
1778 	lgrp_info32_t			*lgrp_info32;
1779 	processorid_t			*lgrp_cpuids32;
1780 	caddr32_t			*lgrp_lats32;
1781 	int				**lgrp_lats32_kernel;
1782 	uint_t				*lgrp_set32;
1783 	uint_t				*lgrp_parents32;
1784 	uint_t				*lgrp_children32;
1785 	uint_t				*lgrp_rsets32;
1786 	size32_t			parents_size;
1787 	size32_t			rsets_size;
1788 	size32_t			set_size;
1789 	size32_t			snap_hdr_size;
1790 	int				snap_ncpus;
1791 	int				snap_nlgrpsmax;
1792 	size32_t			snap_size;
1793 
1794 	if (lgrp_snap == NULL)
1795 		return (0);
1796 
1797 	snap_ncpus = lgrp_snap->ss_ncpus;
1798 	snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1799 
1800 	/*
1801 	 * Calculate size of buffer needed for 32-bit snapshot,
1802 	 * rounding up size of each object to allow for alignment
1803 	 * of next object in buffer.
1804 	 */
1805 	snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1806 	    sizeof (caddr32_t));
1807 	info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1808 	    sizeof (processorid_t));
1809 	cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1810 		    sizeof (ulong_t));
1811 
1812 	bitmask_size = BT_SIZEOFMAP32(snap_nlgrpsmax);
1813 
1814 	set_size = bitmask_size;
1815 	parents_size = snap_nlgrpsmax * bitmask_size;
1816 	children_size = snap_nlgrpsmax * bitmask_size;
1817 	rsets_size = P2ROUNDUP(LGRP_RSRC_COUNT * snap_nlgrpsmax *
1818 	    (int)bitmask_size, sizeof (caddr32_t));
1819 
1820 	bitmasks_size = set_size + parents_size + children_size + rsets_size;
1821 
1822 	/*
1823 	 * Size of latency table and buffer
1824 	 */
1825 	lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1826 	    (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1827 
1828 	snap_size = snap_hdr_size + info_size + cpuids_size + bitmasks_size +
1829 		lats_size;
1830 
1831 	if (buf == NULL || bufsize <= 0) {
1832 		return (snap_size);
1833 	}
1834 
1835 	/*
1836 	 * User needs to try getting size of buffer again
1837 	 * because given buffer size is too small.
1838 	 * The lgroup hierarchy may have changed after they asked for the size
1839 	 * but before the snapshot was taken.
1840 	 */
1841 	if (bufsize < snap_size)
1842 		return (set_errno(EAGAIN));
1843 
1844 	/*
1845 	 * Make 32-bit copy of snapshot, fix up pointers to point into user
1846 	 * buffer not kernel, and then copy whole thing into user buffer
1847 	 */
1848 	lgrp_snap32 = kmem_zalloc(snap_size, KM_NOSLEEP);
1849 	if (lgrp_snap32 == NULL)
1850 		return (set_errno(ENOMEM));
1851 
1852 	/*
1853 	 * Calculate pointers into 32-bit copy of snapshot
1854 	 * for lgroup info, CPU IDs, pset lgroup bitmask, parents, children,
1855 	 * resources, and latency table and buffer
1856 	 */
1857 	lgrp_info32 = (lgrp_info32_t *)((uintptr_t)lgrp_snap32 +
1858 	    snap_hdr_size);
1859 	lgrp_cpuids32 = (processorid_t *)((uintptr_t)lgrp_info32 + info_size);
1860 	lgrp_set32 = (uint_t *)((uintptr_t)lgrp_cpuids32 + cpuids_size);
1861 	lgrp_parents32 = (uint_t *)((uintptr_t)lgrp_set32 + set_size);
1862 	lgrp_children32 = (uint_t *)((uintptr_t)lgrp_parents32 + parents_size);
1863 	lgrp_rsets32 = (uint_t *)((uintptr_t)lgrp_children32 + children_size);
1864 	lgrp_lats32 = (caddr32_t *)((uintptr_t)lgrp_rsets32 + rsets_size);
1865 
1866 	/*
1867 	 * Make temporary lgroup latency table of pointers for kernel to use
1868 	 * to fill in rows of table with latencies from each lgroup
1869 	 */
1870 	lgrp_lats32_kernel =  kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1871 	    KM_NOSLEEP);
1872 	if (lgrp_lats32_kernel == NULL) {
1873 		kmem_free(lgrp_snap32, snap_size);
1874 		return (set_errno(ENOMEM));
1875 	}
1876 
1877 	/*
1878 	 * Fill in 32-bit lgroup snapshot header
1879 	 * (with pointers into user's buffer for lgroup info, CPU IDs,
1880 	 * bit masks, and latencies)
1881 	 */
1882 	lgrp_snap32->ss_version = lgrp_snap->ss_version;
1883 	lgrp_snap32->ss_levels = lgrp_snap->ss_levels;
1884 	lgrp_snap32->ss_nlgrps = lgrp_snap32->ss_nlgrps_os =
1885 	    lgrp_snap->ss_nlgrps;
1886 	lgrp_snap32->ss_nlgrps_max = snap_nlgrpsmax;
1887 	lgrp_snap32->ss_root = lgrp_snap->ss_root;
1888 	lgrp_snap32->ss_ncpus = lgrp_snap->ss_ncpus;
1889 	lgrp_snap32->ss_gen = lgrp_snap->ss_gen;
1890 	lgrp_snap32->ss_view = LGRP_VIEW_OS;
1891 	lgrp_snap32->ss_size = snap_size;
1892 	lgrp_snap32->ss_magic = buf;
1893 	lgrp_snap32->ss_info = buf + snap_hdr_size;
1894 	lgrp_snap32->ss_cpuids = lgrp_snap32->ss_info + info_size;
1895 	lgrp_snap32->ss_lgrpset = lgrp_snap32->ss_cpuids + cpuids_size;
1896 	lgrp_snap32->ss_parents = lgrp_snap32->ss_lgrpset + bitmask_size;
1897 	lgrp_snap32->ss_children = lgrp_snap32->ss_parents +
1898 	    (snap_nlgrpsmax * bitmask_size);
1899 	lgrp_snap32->ss_rsets = lgrp_snap32->ss_children +
1900 	    (snap_nlgrpsmax * bitmask_size);
1901 	lgrp_snap32->ss_latencies = lgrp_snap32->ss_rsets +
1902 	    (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size);
1903 
1904 	/*
1905 	 * Fill in lgrpset now because caller may have change psets
1906 	 */
1907 	kpreempt_disable();
1908 	for (i = 0; i < snap_nlgrpsmax; i++) {
1909 		if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1910 		    i)) {
1911 			BT_SET32(lgrp_set32, i);
1912 		}
1913 	}
1914 	kpreempt_enable();
1915 
1916 	/*
1917 	 * Fill in 32-bit copy of lgroup info and fix up pointers
1918 	 * to point into user's buffer instead of kernel's
1919 	 */
1920 	cpu_index = 0;
1921 	lgrp_info = lgrp_snap->ss_info;
1922 	for (i = 0; i < snap_nlgrpsmax; i++) {
1923 		uint_t	*children;
1924 		uint_t	*lgrp_rset;
1925 		uint_t	*parents;
1926 		ulong_t	*snap_rset;
1927 
1928 		/*
1929 		 * Skip non-existent lgroups
1930 		 */
1931 		if (lgrp_info[i].info_lgrpid == LGRP_NONE) {
1932 			bzero(&lgrp_info32[i], sizeof (lgrp_info32[i]));
1933 			lgrp_info32[i].info_lgrpid = LGRP_NONE;
1934 			continue;
1935 		}
1936 
1937 		/*
1938 		 * Fill in parents, children, lgroup resource set, and
1939 		 * latencies from snapshot
1940 		 */
1941 		parents = (uint_t *)((uintptr_t)lgrp_parents32 +
1942 		    i * bitmask_size);
1943 		children = (uint_t *)((uintptr_t)lgrp_children32 +
1944 		    i * bitmask_size);
1945 		snap_rset = (ulong_t *)((uintptr_t)lgrp_snap->ss_rsets +
1946 		    (i * LGRP_RSRC_COUNT * BT_SIZEOFMAP(snap_nlgrpsmax)));
1947 		lgrp_rset = (uint_t *)((uintptr_t)lgrp_rsets32 +
1948 		    (i * LGRP_RSRC_COUNT * bitmask_size));
1949 		lgrp_lats32_kernel[i] = (int *)((uintptr_t)lgrp_lats32 +
1950 		    snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
1951 		    sizeof (int));
1952 		for (j = 0; j < snap_nlgrpsmax; j++) {
1953 			int	k;
1954 			uint_t	*rset;
1955 
1956 			if (BT_TEST(&lgrp_snap->ss_parents[i], j))
1957 				BT_SET32(parents, j);
1958 
1959 			if (BT_TEST(&lgrp_snap->ss_children[i], j))
1960 				BT_SET32(children, j);
1961 
1962 			for (k = 0; k < LGRP_RSRC_COUNT; k++) {
1963 				rset = (uint_t *)((uintptr_t)lgrp_rset +
1964 				    k * bitmask_size);
1965 				if (BT_TEST(&snap_rset[k], j))
1966 					BT_SET32(rset, j);
1967 			}
1968 
1969 			lgrp_lats32_kernel[i][j] =
1970 			    lgrp_snap->ss_latencies[i][j];
1971 		}
1972 
1973 		/*
1974 		 * Fix up pointer to latency buffer
1975 		 */
1976 		lgrp_lats32[i] = lgrp_snap32->ss_latencies +
1977 		    snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
1978 		    sizeof (int);
1979 
1980 		/*
1981 		 * Fix up pointers for parents, children, and resources
1982 		 */
1983 		lgrp_info32[i].info_parents = lgrp_snap32->ss_parents +
1984 		    (i * bitmask_size);
1985 		lgrp_info32[i].info_children = lgrp_snap32->ss_children +
1986 		    (i * bitmask_size);
1987 		lgrp_info32[i].info_rset = lgrp_snap32->ss_rsets +
1988 		    (i * LGRP_RSRC_COUNT * bitmask_size);
1989 
1990 		/*
1991 		 * Fill in memory and CPU info
1992 		 * Only fill in memory for lgroups directly containing memory
1993 		 */
1994 		snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
1995 		    BT_BITOUL(snap_nlgrpsmax)];
1996 		if (BT_TEST(snap_rset, i)) {
1997 			lgrp_info32[i].info_mem_free = lgrp_mem_size(i,
1998 			    LGRP_MEM_SIZE_FREE);
1999 			lgrp_info32[i].info_mem_install =
2000 			    lgrp_info[i].info_mem_install;
2001 		}
2002 
2003 		lgrp_info32[i].info_ncpus = lgrp_info[i].info_ncpus;
2004 
2005 		lgrp_info32[i].info_lgrpid = lgrp_info[i].info_lgrpid;
2006 		lgrp_info32[i].info_latency = lgrp_info[i].info_latency;
2007 
2008 		if (lgrp_info32[i].info_ncpus == 0) {
2009 			lgrp_info32[i].info_cpuids = 0;
2010 			continue;
2011 		}
2012 
2013 		/*
2014 		 * Fix up pointer for CPU IDs
2015 		 */
2016 		lgrp_info32[i].info_cpuids = lgrp_snap32->ss_cpuids +
2017 		    (cpu_index * sizeof (processorid_t));
2018 		cpu_index += lgrp_info32[i].info_ncpus;
2019 	}
2020 	ASSERT(cpu_index == snap_ncpus);
2021 
2022 	/*
2023 	 * Copy lgroup CPU IDs into 32-bit snapshot
2024 	 * before copying it out into user's buffer
2025 	 */
2026 	bcopy(lgrp_snap->ss_cpuids, lgrp_cpuids32, cpuids_size);
2027 
2028 	/*
2029 	 * Copy 32-bit lgroup snapshot into user's buffer all at once
2030 	 */
2031 	if (copyout(lgrp_snap32, (void *)(uintptr_t)buf, snap_size) != 0) {
2032 		kmem_free(lgrp_snap32, snap_size);
2033 		kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2034 		return (set_errno(EFAULT));
2035 	}
2036 
2037 	kmem_free(lgrp_snap32, snap_size);
2038 	kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2039 
2040 	return (snap_size);
2041 }
2042 #endif	/* _SYSCALL32_IMPL */
2043 
2044 
2045 int
2046 lgrpsys(int subcode, long ia, void *ap)
2047 {
2048 	size_t	bufsize;
2049 	int	latency;
2050 
2051 	switch (subcode) {
2052 
2053 	case LGRP_SYS_AFFINITY_GET:
2054 		return (lgrp_affinity_get((lgrp_affinity_args_t *)ap));
2055 
2056 	case LGRP_SYS_AFFINITY_SET:
2057 		return (lgrp_affinity_set((lgrp_affinity_args_t *)ap));
2058 
2059 	case LGRP_SYS_GENERATION:
2060 		return (lgrp_generation(ia));
2061 
2062 	case LGRP_SYS_HOME:
2063 		return (lgrp_home_get((idtype_t)ia, (id_t)(uintptr_t)ap));
2064 
2065 	case LGRP_SYS_LATENCY:
2066 		mutex_enter(&cpu_lock);
2067 		latency = lgrp_latency(ia, (lgrp_id_t)(uintptr_t)ap);
2068 		mutex_exit(&cpu_lock);
2069 		return (latency);
2070 
2071 	case LGRP_SYS_MEMINFO:
2072 		return (meminfo(ia, (struct meminfo *)ap));
2073 
2074 	case LGRP_SYS_VERSION:
2075 		return (lgrp_version(ia));
2076 
2077 	case LGRP_SYS_SNAPSHOT:
2078 		mutex_enter(&lgrp_snap_lock);
2079 		bufsize = lgrp_snapshot();
2080 		if (ap && ia > 0) {
2081 			if (get_udatamodel() == DATAMODEL_NATIVE)
2082 				bufsize = lgrp_snapshot_copy(ap, ia);
2083 #ifdef	_SYSCALL32_IMPL
2084 			else
2085 				bufsize = lgrp_snapshot_copy32(
2086 				    (caddr32_t)(uintptr_t)ap, ia);
2087 #endif	/* _SYSCALL32_IMPL */
2088 		}
2089 		mutex_exit(&lgrp_snap_lock);
2090 		return (bufsize);
2091 
2092 	default:
2093 		break;
2094 
2095 	}
2096 
2097 	return (set_errno(EINVAL));
2098 }
2099