xref: /titanic_50/usr/src/uts/common/syscall/lgrpsys.c (revision f841f6ad96ea6675d6c6b35c749eaac601799fdf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * lgroup system calls
31  */
32 
33 #include <sys/types.h>
34 #include <sys/errno.h>
35 #include <sys/sunddi.h>
36 #include <sys/systm.h>
37 #include <sys/mman.h>
38 #include <sys/cpupart.h>
39 #include <sys/lgrp.h>
40 #include <sys/lgrp_user.h>
41 #include <sys/promif.h>		/* for prom_printf() */
42 #include <sys/sysmacros.h>
43 
44 #include <vm/as.h>
45 
46 
47 /* definitions for mi_validity */
48 #define	VALID_ADDR	1
49 #define	VALID_REQ	2
50 
51 /*
52  * run through the given number of addresses and requests and return the
53  * corresponding memory information for each address
54  */
55 static int
56 meminfo(int addr_count, struct meminfo *mip)
57 {
58 	size_t		in_size, out_size, req_size, val_size;
59 	struct as	*as;
60 	struct hat	*hat;
61 	int		i, j, out_idx, info_count;
62 	lgrp_t		*lgrp;
63 	pfn_t		pfn;
64 	ssize_t		pgsz;
65 	int		*req_array, *val_array;
66 	uint64_t	*in_array, *out_array;
67 	uint64_t	addr, paddr;
68 	uintptr_t	vaddr;
69 	int		ret = 0;
70 	struct meminfo minfo;
71 #if defined(_SYSCALL32_IMPL)
72 	struct meminfo32 minfo32;
73 #endif
74 
75 	/*
76 	 * Make sure that there is at least one address to translate and
77 	 * limit how many virtual addresses the kernel can do per call
78 	 */
79 	if (addr_count < 1)
80 		return (set_errno(EINVAL));
81 	else if (addr_count > MAX_MEMINFO_CNT)
82 		addr_count = MAX_MEMINFO_CNT;
83 
84 	if (get_udatamodel() == DATAMODEL_NATIVE) {
85 		if (copyin(mip, &minfo, sizeof (struct meminfo)))
86 			return (set_errno(EFAULT));
87 	}
88 #if defined(_SYSCALL32_IMPL)
89 	else {
90 		bzero(&minfo, sizeof (minfo));
91 		if (copyin(mip, &minfo32, sizeof (struct meminfo32)))
92 			return (set_errno(EFAULT));
93 		minfo.mi_inaddr = (const uint64_t *)(uintptr_t)
94 		    minfo32.mi_inaddr;
95 		minfo.mi_info_req = (const uint_t *)(uintptr_t)
96 		    minfo32.mi_info_req;
97 		minfo.mi_info_count = minfo32.mi_info_count;
98 		minfo.mi_outdata = (uint64_t *)(uintptr_t)
99 		    minfo32.mi_outdata;
100 		minfo.mi_validity = (uint_t *)(uintptr_t)
101 		    minfo32.mi_validity;
102 	}
103 #endif
104 	/*
105 	 * all the input parameters have been copied in:-
106 	 * addr_count - number of input addresses
107 	 * minfo.mi_inaddr - array of input addresses
108 	 * minfo.mi_info_req - array of types of information requested
109 	 * minfo.mi_info_count - no. of pieces of info requested for each addr
110 	 * minfo.mi_outdata - array into which the results are placed
111 	 * minfo.mi_validity -  array containing bitwise result codes; 0th bit
112 	 *			evaluates validity of corresponding input
113 	 *			address, 1st bit validity of response to first
114 	 *			member of info_req, etc.
115 	 */
116 
117 	/* make sure mi_info_count is within limit */
118 	info_count = minfo.mi_info_count;
119 	if (info_count < 1 || info_count > MAX_MEMINFO_REQ)
120 		return (set_errno(EINVAL));
121 
122 	/*
123 	 * allocate buffer in_array for the input addresses and copy them in
124 	 */
125 	in_size = sizeof (uint64_t) * addr_count;
126 	in_array = kmem_alloc(in_size, KM_SLEEP);
127 	if (copyin(minfo.mi_inaddr, in_array, in_size)) {
128 		kmem_free(in_array, in_size);
129 		return (set_errno(EFAULT));
130 	}
131 
132 	/*
133 	 * allocate buffer req_array for the input info_reqs and copy them in
134 	 */
135 	req_size = sizeof (uint_t) * info_count;
136 	req_array = kmem_alloc(req_size, KM_SLEEP);
137 	if (copyin(minfo.mi_info_req, req_array, req_size)) {
138 		kmem_free(req_array, req_size);
139 		kmem_free(in_array, in_size);
140 		return (set_errno(EFAULT));
141 	}
142 
143 	/*
144 	 * allocate buffer out_array which holds the results and will have
145 	 * to be copied out later
146 	 */
147 	out_size = sizeof (uint64_t) * addr_count * info_count;
148 	out_array = kmem_alloc(out_size, KM_SLEEP);
149 
150 	/*
151 	 * allocate buffer val_array which holds the validity bits and will
152 	 * have to be copied out later
153 	 */
154 	val_size = sizeof (uint_t) * addr_count;
155 	val_array = kmem_alloc(val_size, KM_SLEEP);
156 
157 	if ((req_array[0] & MEMINFO_MASK) == MEMINFO_PLGRP) {
158 		/* find the corresponding lgroup for each physical address */
159 		for (i = 0; i < addr_count; i++) {
160 			paddr = in_array[i];
161 			pfn = btop(paddr);
162 			lgrp = lgrp_pfn_to_lgrp(pfn);
163 			if (lgrp) {
164 				out_array[i] = lgrp->lgrp_id;
165 				val_array[i] = VALID_ADDR | VALID_REQ;
166 			} else {
167 				out_array[i] = NULL;
168 				val_array[i] = 0;
169 			}
170 		}
171 	} else {
172 		/* get the corresponding memory info for each virtual address */
173 		as = curproc->p_as;
174 
175 		AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
176 		hat = as->a_hat;
177 		for (i = out_idx = 0; i < addr_count; i++, out_idx +=
178 		    info_count) {
179 			addr = in_array[i];
180 			vaddr = (uintptr_t)(addr & ~PAGEOFFSET);
181 			if (!as_segat(as, (caddr_t)vaddr)) {
182 				val_array[i] = 0;
183 				continue;
184 			}
185 			val_array[i] = VALID_ADDR;
186 			pfn = hat_getpfnum(hat, (caddr_t)vaddr);
187 			if (pfn != PFN_INVALID) {
188 				paddr = (uint64_t)((pfn << PAGESHIFT) |
189 					(addr & PAGEOFFSET));
190 				for (j = 0; j < info_count; j++) {
191 					switch (req_array[j] & MEMINFO_MASK) {
192 					case MEMINFO_VPHYSICAL:
193 						/*
194 						 * return the physical address
195 						 * corresponding to the input
196 						 * virtual address
197 						 */
198 						out_array[out_idx + j] = paddr;
199 						val_array[i] |= VALID_REQ << j;
200 						break;
201 					case MEMINFO_VLGRP:
202 						/*
203 						 * return the lgroup of physical
204 						 * page corresponding to the
205 						 * input virtual address
206 						 */
207 						lgrp = lgrp_pfn_to_lgrp(pfn);
208 						if (lgrp) {
209 							out_array[out_idx + j] =
210 								lgrp->lgrp_id;
211 							val_array[i] |=
212 								VALID_REQ << j;
213 						}
214 						break;
215 					case MEMINFO_VPAGESIZE:
216 						/*
217 						 * return the size of physical
218 						 * page corresponding to the
219 						 * input virtual address
220 						 */
221 						pgsz = hat_getpagesize(hat,
222 							(caddr_t)vaddr);
223 						if (pgsz != -1) {
224 							out_array[out_idx + j] =
225 									pgsz;
226 							val_array[i] |=
227 								VALID_REQ << j;
228 						}
229 						break;
230 					case MEMINFO_VREPLCNT:
231 						/*
232 						 * for future use:-
233 						 * return the no. replicated
234 						 * physical pages corresponding
235 						 * to the input virtual address,
236 						 * so it is always 0 at the
237 						 * moment
238 						 */
239 						out_array[out_idx + j] = 0;
240 						val_array[i] |= VALID_REQ << j;
241 						break;
242 					case MEMINFO_VREPL:
243 						/*
244 						 * for future use:-
245 						 * return the nth physical
246 						 * replica of the specified
247 						 * virtual address
248 						 */
249 						break;
250 					case MEMINFO_VREPL_LGRP:
251 						/*
252 						 * for future use:-
253 						 * return the lgroup of nth
254 						 * physical replica of the
255 						 * specified virtual address
256 						 */
257 						break;
258 					case MEMINFO_PLGRP:
259 						/*
260 						 * this is for physical address
261 						 * only, shouldn't mix with
262 						 * virtual address
263 						 */
264 						break;
265 					default:
266 						break;
267 					}
268 				}
269 			}
270 		}
271 		AS_LOCK_EXIT(as, &as->a_lock);
272 	}
273 
274 	/* copy out the results and validity bits and free the buffers */
275 	if ((copyout(out_array, minfo.mi_outdata, out_size) != 0) ||
276 		(copyout(val_array, minfo.mi_validity, val_size) != 0))
277 		ret = set_errno(EFAULT);
278 
279 	kmem_free(in_array, in_size);
280 	kmem_free(out_array, out_size);
281 	kmem_free(req_array, req_size);
282 	kmem_free(val_array, val_size);
283 
284 	return (ret);
285 }
286 
287 
288 /*
289  * Initialize lgroup affinities for thread
290  */
291 void
292 lgrp_affinity_init(lgrp_affinity_t **bufaddr)
293 {
294 	if (bufaddr)
295 		*bufaddr = NULL;
296 }
297 
298 
299 /*
300  * Free lgroup affinities for thread and set to NULL
301  * just in case thread gets recycled
302  */
303 void
304 lgrp_affinity_free(lgrp_affinity_t **bufaddr)
305 {
306 	if (bufaddr && *bufaddr) {
307 		kmem_free(*bufaddr, nlgrpsmax * sizeof (lgrp_affinity_t));
308 		*bufaddr = NULL;
309 	}
310 }
311 
312 
313 #define	P_ANY	-2	/* cookie specifying any ID */
314 
315 
316 /*
317  * Find LWP with given ID in specified process and get its affinity for
318  * specified lgroup
319  */
320 lgrp_affinity_t
321 lgrp_affinity_get_thread(proc_t *p, id_t lwpid, lgrp_id_t lgrp)
322 {
323 	lgrp_affinity_t aff;
324 	int		found;
325 	kthread_t	*t;
326 
327 	ASSERT(MUTEX_HELD(&p->p_lock));
328 
329 	aff = LGRP_AFF_NONE;
330 	found = 0;
331 	t = p->p_tlist;
332 	/*
333 	 * The process may be executing in proc_exit() and its p->p_list may be
334 	 * already NULL.
335 	 */
336 	if (t == NULL)
337 		return (set_errno(ESRCH));
338 
339 	do {
340 		if (t->t_tid == lwpid || lwpid == P_ANY) {
341 			thread_lock(t);
342 			/*
343 			 * Check to see whether caller has permission to set
344 			 * affinity for LWP
345 			 */
346 			if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
347 				thread_unlock(t);
348 				return (set_errno(EPERM));
349 			}
350 
351 			if (t->t_lgrp_affinity)
352 				aff = t->t_lgrp_affinity[lgrp];
353 			thread_unlock(t);
354 			found = 1;
355 			break;
356 		}
357 	} while ((t = t->t_forw) != p->p_tlist);
358 	if (!found)
359 		aff = set_errno(ESRCH);
360 
361 	return (aff);
362 }
363 
364 
365 /*
366  * Get lgroup affinity for given LWP
367  */
368 lgrp_affinity_t
369 lgrp_affinity_get(lgrp_affinity_args_t *ap)
370 {
371 	lgrp_affinity_t		aff;
372 	lgrp_affinity_args_t	args;
373 	id_t			id;
374 	idtype_t		idtype;
375 	lgrp_id_t		lgrp;
376 	proc_t			*p;
377 	kthread_t		*t;
378 
379 	/*
380 	 * Copyin arguments
381 	 */
382 	if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
383 		return (set_errno(EFAULT));
384 
385 	id = args.id;
386 	idtype = args.idtype;
387 	lgrp = args.lgrp;
388 
389 	/*
390 	 * Check for invalid lgroup
391 	 */
392 	if (lgrp < 0 || lgrp == LGRP_NONE)
393 		return (set_errno(EINVAL));
394 
395 	/*
396 	 * Check for existing lgroup
397 	 */
398 	if (lgrp > lgrp_alloc_max)
399 		return (set_errno(ESRCH));
400 
401 	/*
402 	 * Get lgroup affinity for given LWP or process
403 	 */
404 	switch (idtype) {
405 
406 	case P_LWPID:
407 		/*
408 		 * LWP in current process
409 		 */
410 		p = curproc;
411 		mutex_enter(&p->p_lock);
412 		if (id != P_MYID)	/* different thread */
413 			aff = lgrp_affinity_get_thread(p, id, lgrp);
414 		else {			/* current thread */
415 			aff = LGRP_AFF_NONE;
416 			t = curthread;
417 			thread_lock(t);
418 			if (t->t_lgrp_affinity)
419 				aff = t->t_lgrp_affinity[lgrp];
420 			thread_unlock(t);
421 		}
422 		mutex_exit(&p->p_lock);
423 		break;
424 
425 	case P_PID:
426 		/*
427 		 * Process
428 		 */
429 		mutex_enter(&pidlock);
430 
431 		if (id == P_MYID)
432 			p = curproc;
433 		else {
434 			p = prfind(id);
435 			if (p == NULL) {
436 				mutex_exit(&pidlock);
437 				return (set_errno(ESRCH));
438 			}
439 		}
440 
441 		mutex_enter(&p->p_lock);
442 		aff = lgrp_affinity_get_thread(p, P_ANY, lgrp);
443 		mutex_exit(&p->p_lock);
444 
445 		mutex_exit(&pidlock);
446 		break;
447 
448 	default:
449 		aff = set_errno(EINVAL);
450 		break;
451 	}
452 
453 	return (aff);
454 }
455 
456 
457 /*
458  * Find lgroup for which this thread has most affinity in specified partition
459  */
460 lpl_t *
461 lgrp_affinity_best(kthread_t *t, struct cpupart *cpupart, lgrp_id_t start)
462 {
463 	lgrp_affinity_t	*affs;
464 	lgrp_affinity_t	best_aff;
465 	lpl_t		*best_lpl;
466 	lgrp_id_t	home;
467 	lgrp_id_t	lgrpid;
468 	lpl_t		*lpl;
469 
470 	ASSERT(t != NULL);
471 	ASSERT((MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0) ||
472 	    (MUTEX_HELD(&ttoproc(t)->p_lock) && THREAD_LOCK_HELD(t)));
473 	ASSERT(cpupart != NULL);
474 
475 	if (t->t_lgrp_affinity == NULL)
476 		return (NULL);
477 
478 	affs = t->t_lgrp_affinity;
479 
480 	/*
481 	 * Thread bound to CPU
482 	 */
483 	if (t->t_bind_cpu != PBIND_NONE) {
484 		cpu_t	*cp;
485 
486 		/*
487 		 * See whether thread has more affinity for root lgroup
488 		 * than lgroup containing CPU
489 		 */
490 		cp = cpu[t->t_bind_cpu];
491 		lpl = cp->cpu_lpl;
492 		lgrpid = LGRP_ROOTID;
493 		if (affs[lgrpid] > affs[lpl->lpl_lgrpid])
494 			return (&cpupart->cp_lgrploads[lgrpid]);
495 		return (lpl);
496 	}
497 
498 	/*
499 	 * Start searching at given lgroup
500 	 */
501 	ASSERT(start >= 0 && start <= lgrp_alloc_max);
502 	lgrpid = start;
503 
504 	/*
505 	 * Begin with home as best lgroup if it's root or in this pset
506 	 * Otherwise, use starting lgroup given above as best first.
507 	 */
508 	home = t->t_lpl->lpl_lgrpid;
509 	if (LGRP_CPUS_IN_PART(home, cpupart))
510 		best_lpl = &cpupart->cp_lgrploads[home];
511 	else
512 		best_lpl = &cpupart->cp_lgrploads[lgrpid];
513 
514 	best_aff = affs[best_lpl->lpl_lgrpid];
515 
516 	do {
517 		/*
518 		 * Skip any lgroups that don't have CPU resources
519 		 * in this processor set.
520 		 */
521 		if (!LGRP_CPUS_IN_PART(lgrpid, cpupart)) {
522 			if (++lgrpid > lgrp_alloc_max)
523 				lgrpid = 0;	/* wrap the search */
524 			continue;
525 		}
526 
527 		/*
528 		 * Find lgroup with most affinity
529 		 */
530 		lpl = &cpupart->cp_lgrploads[lgrpid];
531 		if (affs[lgrpid] > best_aff) {
532 			best_aff =  affs[lgrpid];
533 			best_lpl = lpl;
534 		}
535 
536 		if (++lgrpid > lgrp_alloc_max)
537 			lgrpid = 0;	/* wrap the search */
538 
539 	} while (lgrpid != start);
540 
541 	/*
542 	 * No lgroup (in this pset) with any affinity
543 	 */
544 	if (best_aff == LGRP_AFF_NONE)
545 		return (NULL);
546 
547 	lgrpid = best_lpl->lpl_lgrpid;
548 	ASSERT(LGRP_CPUS_IN_PART(lgrpid, cpupart) && best_lpl->lpl_ncpu > 0);
549 
550 	return (best_lpl);
551 }
552 
553 
554 /*
555  * Set thread's affinity for given lgroup
556  */
557 int
558 lgrp_affinity_set_thread(kthread_t *t, lgrp_id_t lgrp, lgrp_affinity_t aff,
559     lgrp_affinity_t **aff_buf)
560 {
561 	lpl_t		*best_lpl;
562 	lgrp_id_t	home;
563 	int		retval;
564 	lgrp_id_t	start;
565 
566 	ASSERT(t != NULL);
567 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
568 
569 	retval = 0;
570 
571 	thread_lock(t);
572 
573 	/*
574 	 * Check to see whether caller has permission to set affinity for
575 	 * thread
576 	 */
577 	if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
578 		thread_unlock(t);
579 		return (set_errno(EPERM));
580 	}
581 
582 	if (t->t_lgrp_affinity == NULL) {
583 		if (aff == LGRP_AFF_NONE) {
584 			thread_unlock(t);
585 			return (0);
586 		}
587 		ASSERT(aff_buf != NULL && *aff_buf != NULL);
588 		t->t_lgrp_affinity = *aff_buf;
589 		*aff_buf = NULL;
590 	}
591 
592 	t->t_lgrp_affinity[lgrp] = aff;
593 
594 	/*
595 	 * Select a new home if the thread's affinity is being cleared
596 	 */
597 	if (aff == LGRP_AFF_NONE) {
598 		lgrp_move_thread(t, lgrp_choose(t, t->t_cpupart), 1);
599 		thread_unlock(t);
600 		return (retval);
601 	}
602 
603 	/*
604 	 * Find lgroup for which thread has most affinity,
605 	 * starting after home
606 	 */
607 	home = t->t_lpl->lpl_lgrpid;
608 	start = home + 1;
609 	if (start > lgrp_alloc_max)
610 		start = 0;
611 
612 	best_lpl = lgrp_affinity_best(t, t->t_cpupart, start);
613 
614 	/*
615 	 * Rehome if found lgroup with more affinity than home
616 	 */
617 	if (best_lpl != NULL && best_lpl != t->t_lpl)
618 		lgrp_move_thread(t, best_lpl, 1);
619 
620 	thread_unlock(t);
621 
622 	return (retval);
623 }
624 
625 
626 /*
627  * Set process' affinity for specified lgroup
628  */
629 int
630 lgrp_affinity_set_proc(proc_t *p, lgrp_id_t lgrp, lgrp_affinity_t aff,
631     lgrp_affinity_t **aff_buf_array)
632 {
633 	lgrp_affinity_t	*buf;
634 	int		err = 0;
635 	int		i;
636 	int		retval;
637 	kthread_t	*t;
638 
639 	ASSERT(MUTEX_HELD(&pidlock) && MUTEX_HELD(&p->p_lock));
640 	ASSERT(aff_buf_array != NULL);
641 
642 	i = 0;
643 	t = p->p_tlist;
644 	if (t != NULL) {
645 		do {
646 			/*
647 			 * Set lgroup affinity for thread
648 			 */
649 			buf = aff_buf_array[i];
650 			retval = lgrp_affinity_set_thread(t, lgrp, aff, &buf);
651 
652 			if (err == 0 && retval != 0)
653 				err = retval;
654 
655 			/*
656 			 * Advance pointer to next buffer
657 			 */
658 			if (buf == NULL) {
659 				ASSERT(i < p->p_lwpcnt);
660 				aff_buf_array[i] = NULL;
661 				i++;
662 			}
663 
664 		} while ((t = t->t_forw) != p->p_tlist);
665 	}
666 	return (err);
667 }
668 
669 
670 /*
671  * Set LWP's or process' affinity for specified lgroup
672  *
673  * When setting affinities, pidlock, process p_lock, and thread_lock()
674  * need to be held in that order to protect target thread's pset, process,
675  * process contents, and thread contents.  thread_lock() does splhigh(),
676  * so it ends up having similiar effect as kpreempt_disable(), so it will
677  * protect calls to lgrp_move_thread() and lgrp_choose() from pset changes.
678  */
679 int
680 lgrp_affinity_set(lgrp_affinity_args_t *ap)
681 {
682 	lgrp_affinity_t		aff;
683 	lgrp_affinity_t		*aff_buf;
684 	lgrp_affinity_args_t	args;
685 	id_t			id;
686 	idtype_t		idtype;
687 	lgrp_id_t		lgrp;
688 	int			nthreads;
689 	proc_t			*p;
690 	int			retval;
691 
692 	/*
693 	 * Copyin arguments
694 	 */
695 	if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
696 		return (set_errno(EFAULT));
697 
698 	idtype = args.idtype;
699 	id = args.id;
700 	lgrp = args.lgrp;
701 	aff = args.aff;
702 
703 	/*
704 	 * Check for invalid lgroup
705 	 */
706 	if (lgrp < 0 || lgrp == LGRP_NONE)
707 		return (set_errno(EINVAL));
708 
709 	/*
710 	 * Check for existing lgroup
711 	 */
712 	if (lgrp > lgrp_alloc_max)
713 		return (set_errno(ESRCH));
714 
715 	/*
716 	 * Check for legal affinity
717 	 */
718 	if (aff != LGRP_AFF_NONE && aff != LGRP_AFF_WEAK &&
719 	    aff != LGRP_AFF_STRONG)
720 		return (set_errno(EINVAL));
721 
722 	/*
723 	 * Must be process or LWP ID
724 	 */
725 	if (idtype != P_LWPID && idtype != P_PID)
726 		return (set_errno(EINVAL));
727 
728 	/*
729 	 * Set given LWP's or process' affinity for specified lgroup
730 	 */
731 	switch (idtype) {
732 
733 	case P_LWPID:
734 		/*
735 		 * Allocate memory for thread's lgroup affinities
736 		 * ahead of time w/o holding locks
737 		 */
738 		aff_buf = kmem_zalloc(nlgrpsmax * sizeof (lgrp_affinity_t),
739 		    KM_SLEEP);
740 
741 		p = curproc;
742 
743 		/*
744 		 * Set affinity for thread
745 		 */
746 		mutex_enter(&p->p_lock);
747 		if (id == P_MYID) {		/* current thread */
748 			retval = lgrp_affinity_set_thread(curthread, lgrp, aff,
749 			    &aff_buf);
750 		} else if (p->p_tlist == NULL) {
751 			retval = set_errno(ESRCH);
752 		} else {			/* other thread */
753 			int		found = 0;
754 			kthread_t	*t;
755 
756 			t = p->p_tlist;
757 			do {
758 				if (t->t_tid == id) {
759 					retval = lgrp_affinity_set_thread(t,
760 					    lgrp, aff, &aff_buf);
761 					found = 1;
762 					break;
763 				}
764 			} while ((t = t->t_forw) != p->p_tlist);
765 			if (!found)
766 				retval = set_errno(ESRCH);
767 		}
768 		mutex_exit(&p->p_lock);
769 
770 		/*
771 		 * Free memory for lgroup affinities,
772 		 * since thread didn't need it
773 		 */
774 		if (aff_buf)
775 			kmem_free(aff_buf,
776 			    nlgrpsmax * sizeof (lgrp_affinity_t));
777 
778 		break;
779 
780 	case P_PID:
781 
782 		do {
783 			lgrp_affinity_t	**aff_buf_array;
784 			int		i;
785 			size_t		size;
786 
787 			/*
788 			 * Get process
789 			 */
790 			mutex_enter(&pidlock);
791 
792 			if (id == P_MYID)
793 				p = curproc;
794 			else
795 				p = prfind(id);
796 
797 			if (p == NULL) {
798 				mutex_exit(&pidlock);
799 				return (set_errno(ESRCH));
800 			}
801 
802 			/*
803 			 * Get number of threads in process
804 			 *
805 			 * NOTE: Only care about user processes,
806 			 *	 so p_lwpcnt should be number of threads.
807 			 */
808 			mutex_enter(&p->p_lock);
809 			nthreads = p->p_lwpcnt;
810 			mutex_exit(&p->p_lock);
811 
812 			mutex_exit(&pidlock);
813 
814 			if (nthreads < 1)
815 				return (set_errno(ESRCH));
816 
817 			/*
818 			 * Preallocate memory for lgroup affinities for
819 			 * each thread in process now to avoid holding
820 			 * any locks.  Allocate an array to hold a buffer
821 			 * for each thread.
822 			 */
823 			aff_buf_array = kmem_zalloc(nthreads *
824 			    sizeof (lgrp_affinity_t *), KM_SLEEP);
825 
826 			size = nlgrpsmax * sizeof (lgrp_affinity_t);
827 			for (i = 0; i < nthreads; i++)
828 				aff_buf_array[i] = kmem_zalloc(size, KM_SLEEP);
829 
830 			mutex_enter(&pidlock);
831 
832 			/*
833 			 * Get process again since dropped locks to allocate
834 			 * memory (except current process)
835 			 */
836 			if (id != P_MYID)
837 				p = prfind(id);
838 
839 			/*
840 			 * Process went away after we dropped locks and before
841 			 * reacquiring them, so drop locks, free memory, and
842 			 * return.
843 			 */
844 			if (p == NULL) {
845 				mutex_exit(&pidlock);
846 				for (i = 0; i < nthreads; i++)
847 					kmem_free(aff_buf_array[i], size);
848 				kmem_free(aff_buf_array,
849 				    nthreads * sizeof (lgrp_affinity_t *));
850 				return (set_errno(ESRCH));
851 			}
852 
853 			mutex_enter(&p->p_lock);
854 
855 			/*
856 			 * See whether number of threads is same
857 			 * If not, drop locks, free memory, and try again
858 			 */
859 			if (nthreads != p->p_lwpcnt) {
860 				mutex_exit(&p->p_lock);
861 				mutex_exit(&pidlock);
862 				for (i = 0; i < nthreads; i++)
863 					kmem_free(aff_buf_array[i], size);
864 				kmem_free(aff_buf_array,
865 				    nthreads * sizeof (lgrp_affinity_t *));
866 				continue;
867 			}
868 
869 			/*
870 			 * Set lgroup affinity for threads in process
871 			 */
872 			retval = lgrp_affinity_set_proc(p, lgrp, aff,
873 			    aff_buf_array);
874 
875 			mutex_exit(&p->p_lock);
876 			mutex_exit(&pidlock);
877 
878 			/*
879 			 * Free any leftover memory, since some threads may
880 			 * have already allocated memory and set lgroup
881 			 * affinities before
882 			 */
883 			for (i = 0; i < nthreads; i++)
884 				if (aff_buf_array[i] != NULL)
885 					kmem_free(aff_buf_array[i], size);
886 			kmem_free(aff_buf_array,
887 			    nthreads * sizeof (lgrp_affinity_t *));
888 
889 			break;
890 
891 		} while (nthreads != p->p_lwpcnt);
892 
893 		break;
894 
895 	default:
896 		retval = set_errno(EINVAL);
897 		break;
898 	}
899 
900 	return (retval);
901 }
902 
903 
904 /*
905  * Return the latest generation number for the lgroup hierarchy
906  * with the given view
907  */
908 lgrp_gen_t
909 lgrp_generation(lgrp_view_t view)
910 {
911 	cpupart_t	*cpupart;
912 	uint_t		gen;
913 
914 	kpreempt_disable();
915 
916 	/*
917 	 * Determine generation number for given view
918 	 */
919 	if (view == LGRP_VIEW_OS)
920 		/*
921 		 * Return generation number of lgroup hierarchy for OS view
922 		 */
923 		gen = lgrp_gen;
924 	else {
925 		/*
926 		 * For caller's view, use generation numbers for lgroup
927 		 * hierarchy and caller's pset
928 		 * NOTE: Caller needs to check for change in pset ID
929 		 */
930 		cpupart = curthread->t_cpupart;
931 		ASSERT(cpupart);
932 		gen = lgrp_gen + cpupart->cp_gen;
933 	}
934 
935 	kpreempt_enable();
936 
937 	return (gen);
938 }
939 
940 
941 lgrp_id_t
942 lgrp_home_thread(kthread_t *t)
943 {
944 	lgrp_id_t	home;
945 
946 	ASSERT(t != NULL);
947 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
948 
949 	thread_lock(t);
950 
951 	/*
952 	 * Check to see whether caller has permission to set affinity for
953 	 * thread
954 	 */
955 	if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
956 		thread_unlock(t);
957 		return (set_errno(EPERM));
958 	}
959 
960 	home = lgrp_home_id(t);
961 
962 	thread_unlock(t);
963 	return (home);
964 }
965 
966 
967 /*
968  * Get home lgroup of given process or thread
969  */
970 lgrp_id_t
971 lgrp_home_get(idtype_t idtype, id_t id)
972 {
973 	proc_t		*p;
974 	lgrp_id_t	retval;
975 	kthread_t	*t;
976 
977 	/*
978 	 * Get home lgroup of given LWP or process
979 	 */
980 	switch (idtype) {
981 
982 	case P_LWPID:
983 		p = curproc;
984 
985 		/*
986 		 * Set affinity for thread
987 		 */
988 		mutex_enter(&p->p_lock);
989 		if (id == P_MYID) {		/* current thread */
990 			retval = lgrp_home_thread(curthread);
991 		} else if (p->p_tlist == NULL) {
992 			retval = set_errno(ESRCH);
993 		} else {			/* other thread */
994 			int	found = 0;
995 
996 			t = p->p_tlist;
997 			do {
998 				if (t->t_tid == id) {
999 					retval = lgrp_home_thread(t);
1000 					found = 1;
1001 					break;
1002 				}
1003 			} while ((t = t->t_forw) != p->p_tlist);
1004 			if (!found)
1005 				retval = set_errno(ESRCH);
1006 		}
1007 		mutex_exit(&p->p_lock);
1008 		break;
1009 
1010 	case P_PID:
1011 		/*
1012 		 * Get process
1013 		 */
1014 		mutex_enter(&pidlock);
1015 
1016 		if (id == P_MYID)
1017 			p = curproc;
1018 		else
1019 			p = prfind(id);
1020 
1021 		if (p == NULL) {
1022 			mutex_exit(&pidlock);
1023 			return (set_errno(ESRCH));
1024 		}
1025 
1026 		mutex_enter(&p->p_lock);
1027 		t = p->p_tlist;
1028 		if (t == NULL)
1029 			retval = set_errno(ESRCH);
1030 		else
1031 			retval = lgrp_home_thread(t);
1032 		mutex_exit(&p->p_lock);
1033 
1034 		mutex_exit(&pidlock);
1035 
1036 		break;
1037 
1038 	default:
1039 		retval = set_errno(EINVAL);
1040 		break;
1041 	}
1042 
1043 	return (retval);
1044 }
1045 
1046 
1047 /*
1048  * Return latency between "from" and "to" lgroups
1049  *
1050  * This latency number can only be used for relative comparison
1051  * between lgroups on the running system, cannot be used across platforms,
1052  * and may not reflect the actual latency.  It is platform and implementation
1053  * specific, so platform gets to decide its value.  It would be nice if the
1054  * number was at least proportional to make comparisons more meaningful though.
1055  */
1056 int
1057 lgrp_latency(lgrp_id_t from, lgrp_id_t to)
1058 {
1059 	lgrp_t		*from_lgrp;
1060 	int		i;
1061 	int		latency;
1062 	int		latency_max;
1063 	lgrp_t		*to_lgrp;
1064 
1065 	ASSERT(MUTEX_HELD(&cpu_lock));
1066 
1067 	if (from < 0 || to < 0)
1068 		return (set_errno(EINVAL));
1069 
1070 	if (from > lgrp_alloc_max || to > lgrp_alloc_max)
1071 		return (set_errno(ESRCH));
1072 
1073 	from_lgrp = lgrp_table[from];
1074 	to_lgrp = lgrp_table[to];
1075 
1076 	if (!LGRP_EXISTS(from_lgrp) || !LGRP_EXISTS(to_lgrp)) {
1077 		return (set_errno(ESRCH));
1078 	}
1079 
1080 	/*
1081 	 * Get latency for same lgroup
1082 	 */
1083 	if (from == to) {
1084 		latency = from_lgrp->lgrp_latency;
1085 		return (latency);
1086 	}
1087 
1088 	/*
1089 	 * Get latency between leaf lgroups
1090 	 */
1091 	if (from_lgrp->lgrp_childcnt == 0 && to_lgrp->lgrp_childcnt == 0)
1092 		return (lgrp_plat_latency(from_lgrp->lgrp_plathand,
1093 		    to_lgrp->lgrp_plathand));
1094 
1095 	/*
1096 	 * Determine max latency between resources in two lgroups
1097 	 */
1098 	latency_max = 0;
1099 	for (i = 0; i <= lgrp_alloc_max; i++) {
1100 		lgrp_t	*from_rsrc;
1101 		int	j;
1102 		lgrp_t	*to_rsrc;
1103 
1104 		from_rsrc = lgrp_table[i];
1105 		if (!LGRP_EXISTS(from_rsrc) ||
1106 		    !klgrpset_ismember(from_lgrp->lgrp_set[LGRP_RSRC_CPU], i))
1107 			continue;
1108 
1109 		for (j = 0; j <= lgrp_alloc_max; j++) {
1110 			to_rsrc = lgrp_table[j];
1111 			if (!LGRP_EXISTS(to_rsrc) ||
1112 			    klgrpset_ismember(to_lgrp->lgrp_set[LGRP_RSRC_MEM],
1113 			    j) == 0)
1114 				continue;
1115 			latency = lgrp_plat_latency(from_rsrc->lgrp_plathand,
1116 			    to_rsrc->lgrp_plathand);
1117 			if (latency > latency_max)
1118 				latency_max = latency;
1119 		}
1120 	}
1121 	return (latency_max);
1122 }
1123 
1124 
1125 /*
1126  * Return lgroup interface version number
1127  * 0 - none
1128  * 1 - original
1129  * 2 - lgrp_latency_cookie() and lgrp_resources() added
1130  */
1131 int
1132 lgrp_version(int version)
1133 {
1134 	/*
1135 	 * Return LGRP_VER_NONE when requested version isn't supported
1136 	 */
1137 	if (version < LGRP_VER_NONE || version > LGRP_VER_CURRENT)
1138 		return (LGRP_VER_NONE);
1139 
1140 	/*
1141 	 * Return current version when LGRP_VER_NONE passed in
1142 	 */
1143 	if (version == LGRP_VER_NONE)
1144 		return (LGRP_VER_CURRENT);
1145 
1146 	/*
1147 	 * Otherwise, return supported version.
1148 	 */
1149 	return (version);
1150 }
1151 
1152 
1153 /*
1154  * Snapshot of lgroup hieararchy
1155  *
1156  * One snapshot is kept and is based on the kernel's native data model, so
1157  * a 32-bit snapshot is kept for the 32-bit kernel and a 64-bit one for the
1158  * 64-bit kernel.  If a 32-bit user wants a snapshot from the 64-bit kernel,
1159  * the kernel generates a 32-bit snapshot from the data in its 64-bit snapshot.
1160  *
1161  * The format is defined by lgroup snapshot header and the layout of
1162  * the snapshot in memory is as follows:
1163  * 1) lgroup snapshot header
1164  *    - specifies format of snapshot
1165  *    - defined by lgrp_snapshot_header_t
1166  * 2) lgroup info array
1167  *    - contains information about each lgroup
1168  *    - one element for each lgroup
1169  *    - each element is defined by lgrp_info_t
1170  * 3) lgroup CPU ID array
1171  *    - contains list (array) of CPU IDs for each lgroup
1172  *    - lgrp_info_t points into array and specifies how many CPUs belong to
1173  *      given lgroup
1174  * 4) lgroup parents array
1175  *    - contains lgroup bitmask of parents for each lgroup
1176  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1177  * 5) lgroup children array
1178  *    - contains lgroup bitmask of children for each lgroup
1179  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1180  * 6) lgroup resources array
1181  *    - contains lgroup bitmask of resources for each lgroup
1182  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1183  * 7) lgroup latency table
1184  *    - contains latency from each lgroup to each of other lgroups
1185  *
1186  * NOTE:  Must use nlgrpsmax for per lgroup data structures because lgroups
1187  *	  may be sparsely allocated.
1188  */
1189 lgrp_snapshot_header_t	*lgrp_snap = NULL;	/* lgroup snapshot */
1190 static kmutex_t		lgrp_snap_lock;		/* snapshot lock */
1191 
1192 
1193 /*
1194  * Take a snapshot of lgroup hierarchy and return size of buffer
1195  * needed to hold snapshot
1196  */
1197 static int
1198 lgrp_snapshot(void)
1199 {
1200 	size_t		bitmask_size;
1201 	size_t		bitmasks_size;
1202 	size_t		bufsize;
1203 	int		cpu_index;
1204 	size_t		cpuids_size;
1205 	int		i;
1206 	int		j;
1207 	size_t		info_size;
1208 	size_t		lats_size;
1209 	ulong_t		*lgrp_children;
1210 	processorid_t	*lgrp_cpuids;
1211 	lgrp_info_t	*lgrp_info;
1212 	int		**lgrp_lats;
1213 	ulong_t		*lgrp_parents;
1214 	ulong_t		*lgrp_rsets;
1215 	ulong_t		*lgrpset;
1216 	int		snap_ncpus;
1217 	int		snap_nlgrps;
1218 	int		snap_nlgrpsmax;
1219 	size_t		snap_hdr_size;
1220 #ifdef	_SYSCALL32_IMPL
1221 	model_t		model = DATAMODEL_NATIVE;
1222 
1223 	/*
1224 	 * Have up-to-date snapshot, so check to see whether caller is 32-bit
1225 	 * program and need to return size of 32-bit snapshot now.
1226 	 */
1227 	model = get_udatamodel();
1228 	if (model == DATAMODEL_ILP32 && lgrp_snap &&
1229 	    lgrp_snap->ss_gen == lgrp_gen) {
1230 
1231 		snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1232 
1233 		/*
1234 		 * Calculate size of buffer needed for 32-bit snapshot,
1235 		 * rounding up size of each object to allow for alignment
1236 		 * of next object in buffer.
1237 		 */
1238 		snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1239 		    sizeof (caddr32_t));
1240 		info_size =
1241 		    P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1242 		    sizeof (processorid_t));
1243 		cpuids_size =
1244 		    P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1245 		    sizeof (ulong_t));
1246 
1247 		/*
1248 		 * lgroup bitmasks needed for parents, children, and resources
1249 		 * for each lgroup and pset lgroup set
1250 		 */
1251 		bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1252 		bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1253 		    snap_nlgrpsmax) + 1) * bitmask_size;
1254 
1255 		/*
1256 		 * Size of latency table and buffer
1257 		 */
1258 		lats_size = snap_nlgrpsmax * sizeof (caddr32_t) +
1259 		    snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1260 
1261 		bufsize = snap_hdr_size + info_size + cpuids_size +
1262 		    bitmasks_size + lats_size;
1263 		return (bufsize);
1264 	}
1265 #endif	/* _SYSCALL32_IMPL */
1266 
1267 	/*
1268 	 * Check whether snapshot is up-to-date
1269 	 * Free it and take another one if not
1270 	 */
1271 	if (lgrp_snap) {
1272 		if (lgrp_snap->ss_gen == lgrp_gen)
1273 			return (lgrp_snap->ss_size);
1274 
1275 		kmem_free(lgrp_snap, lgrp_snap->ss_size);
1276 		lgrp_snap = NULL;
1277 	}
1278 
1279 	/*
1280 	 * Allocate memory for snapshot
1281 	 * w/o holding cpu_lock while waiting for memory
1282 	 */
1283 	while (lgrp_snap == NULL) {
1284 		int	old_generation;
1285 
1286 		/*
1287 		 * Take snapshot of lgroup generation number
1288 		 * and configuration size dependent information
1289 		 * NOTE: Only count number of online CPUs,
1290 		 * since only online CPUs appear in lgroups.
1291 		 */
1292 		mutex_enter(&cpu_lock);
1293 		old_generation = lgrp_gen;
1294 		snap_ncpus = ncpus_online;
1295 		snap_nlgrps = nlgrps;
1296 		snap_nlgrpsmax = nlgrpsmax;
1297 		mutex_exit(&cpu_lock);
1298 
1299 		/*
1300 		 * Calculate size of buffer needed for snapshot,
1301 		 * rounding up size of each object to allow for alignment
1302 		 * of next object in buffer.
1303 		 */
1304 		snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1305 		    sizeof (void *));
1306 		info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1307 		    sizeof (processorid_t));
1308 		cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1309 		    sizeof (ulong_t));
1310 		/*
1311 		 * lgroup bitmasks needed for pset lgroup set and  parents,
1312 		 * children, and resource sets for each lgroup
1313 		 */
1314 		bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1315 		bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1316 		    snap_nlgrpsmax) + 1) * bitmask_size;
1317 
1318 		/*
1319 		 * Size of latency table and buffer
1320 		 */
1321 		lats_size = snap_nlgrpsmax * sizeof (int *) +
1322 		    snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1323 
1324 		bufsize = snap_hdr_size + info_size + cpuids_size +
1325 		    bitmasks_size + lats_size;
1326 
1327 		/*
1328 		 * Allocate memory for buffer
1329 		 */
1330 		lgrp_snap = kmem_zalloc(bufsize, KM_NOSLEEP);
1331 		if (lgrp_snap == NULL)
1332 			return (set_errno(ENOMEM));
1333 
1334 		/*
1335 		 * Check whether generation number has changed
1336 		 */
1337 		mutex_enter(&cpu_lock);
1338 		if (lgrp_gen == old_generation)
1339 			break;		/* hasn't change, so done. */
1340 
1341 		/*
1342 		 * Generation number changed, so free memory and try again.
1343 		 */
1344 		mutex_exit(&cpu_lock);
1345 		kmem_free(lgrp_snap, bufsize);
1346 		lgrp_snap = NULL;
1347 	}
1348 
1349 	/*
1350 	 * Fill in lgroup snapshot header
1351 	 * (including pointers to tables of lgroup info, CPU IDs, and parents
1352 	 * and children)
1353 	 */
1354 	lgrp_snap->ss_version = LGRP_VER_CURRENT;
1355 
1356 	/*
1357 	 * XXX For now, liblgrp only needs to know whether the hierarchy
1358 	 * XXX only has one level or not
1359 	 */
1360 	if (snap_nlgrps == 1)
1361 		lgrp_snap->ss_levels = 1;
1362 	else
1363 		lgrp_snap->ss_levels = 2;
1364 
1365 	lgrp_snap->ss_root = LGRP_ROOTID;
1366 
1367 	lgrp_snap->ss_nlgrps = lgrp_snap->ss_nlgrps_os = snap_nlgrps;
1368 	lgrp_snap->ss_nlgrps_max = snap_nlgrpsmax;
1369 	lgrp_snap->ss_ncpus = snap_ncpus;
1370 	lgrp_snap->ss_gen = lgrp_gen;
1371 	lgrp_snap->ss_view = LGRP_VIEW_OS;
1372 	lgrp_snap->ss_pset = 0;		/* NOTE: caller should set if needed */
1373 	lgrp_snap->ss_size = bufsize;
1374 	lgrp_snap->ss_magic = (uintptr_t)lgrp_snap;
1375 
1376 	lgrp_snap->ss_info = lgrp_info =
1377 	    (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1378 
1379 	lgrp_snap->ss_cpuids = lgrp_cpuids =
1380 	    (processorid_t *)((uintptr_t)lgrp_info + info_size);
1381 
1382 	lgrp_snap->ss_lgrpset = lgrpset =
1383 	    (ulong_t *)((uintptr_t)lgrp_cpuids + cpuids_size);
1384 
1385 	lgrp_snap->ss_parents = lgrp_parents =
1386 	    (ulong_t *)((uintptr_t)lgrpset + bitmask_size);
1387 
1388 	lgrp_snap->ss_children = lgrp_children =
1389 	    (ulong_t *)((uintptr_t)lgrp_parents + (snap_nlgrpsmax *
1390 	    bitmask_size));
1391 
1392 	lgrp_snap->ss_rsets = lgrp_rsets =
1393 	    (ulong_t *)((uintptr_t)lgrp_children + (snap_nlgrpsmax *
1394 	    bitmask_size));
1395 
1396 	lgrp_snap->ss_latencies = lgrp_lats =
1397 	    (int **)((uintptr_t)lgrp_rsets + (LGRP_RSRC_COUNT *
1398 		snap_nlgrpsmax * bitmask_size));
1399 
1400 	/*
1401 	 * Fill in lgroup information
1402 	 */
1403 	cpu_index = 0;
1404 	for (i = 0; i < snap_nlgrpsmax; i++) {
1405 		struct cpu	*cp;
1406 		int		cpu_count;
1407 		struct cpu	*head;
1408 		int		k;
1409 		lgrp_t		*lgrp;
1410 
1411 		lgrp = lgrp_table[i];
1412 		if (!LGRP_EXISTS(lgrp)) {
1413 			bzero(&lgrp_info[i], sizeof (lgrp_info[i]));
1414 			lgrp_info[i].info_lgrpid = LGRP_NONE;
1415 			continue;
1416 		}
1417 
1418 		lgrp_info[i].info_lgrpid = i;
1419 		lgrp_info[i].info_latency = lgrp->lgrp_latency;
1420 
1421 		/*
1422 		 * Fill in parents, children, and lgroup resources
1423 		 */
1424 		lgrp_info[i].info_parents =
1425 		    (ulong_t *)((uintptr_t)lgrp_parents + (i * bitmask_size));
1426 
1427 		if (lgrp->lgrp_parent)
1428 			BT_SET(lgrp_info[i].info_parents,
1429 			    lgrp->lgrp_parent->lgrp_id);
1430 
1431 		lgrp_info[i].info_children =
1432 		    (ulong_t *)((uintptr_t)lgrp_children + (i * bitmask_size));
1433 
1434 		for (j = 0; j < snap_nlgrpsmax; j++)
1435 			if (klgrpset_ismember(lgrp->lgrp_children, j))
1436 				BT_SET(lgrp_info[i].info_children, j);
1437 
1438 		lgrp_info[i].info_rset =
1439 		    (ulong_t *)((uintptr_t)lgrp_rsets +
1440 		    (i * LGRP_RSRC_COUNT * bitmask_size));
1441 
1442 		for (j = 0; j < LGRP_RSRC_COUNT; j++) {
1443 			ulong_t	*rset;
1444 
1445 			rset = (ulong_t *)((uintptr_t)lgrp_info[i].info_rset +
1446 			    (j * bitmask_size));
1447 			for (k = 0; k < snap_nlgrpsmax; k++)
1448 				if (klgrpset_ismember(lgrp->lgrp_set[j], k))
1449 					BT_SET(rset, k);
1450 		}
1451 
1452 		/*
1453 		 * Fill in CPU IDs
1454 		 */
1455 		cpu_count = 0;
1456 		lgrp_info[i].info_cpuids = NULL;
1457 		cp = head = lgrp->lgrp_cpu;
1458 		if (head != NULL) {
1459 			lgrp_info[i].info_cpuids = &lgrp_cpuids[cpu_index];
1460 			do {
1461 				lgrp_cpuids[cpu_index] = cp->cpu_id;
1462 				cpu_index++;
1463 				cpu_count++;
1464 				cp = cp->cpu_next_lgrp;
1465 			} while (cp != head);
1466 		}
1467 		ASSERT(cpu_count == lgrp->lgrp_cpucnt);
1468 		lgrp_info[i].info_ncpus = cpu_count;
1469 
1470 		/*
1471 		 * Fill in memory sizes for lgroups that directly contain
1472 		 * memory
1473 		 */
1474 		if (klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], i)) {
1475 			lgrp_info[i].info_mem_free =
1476 			    lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1477 			lgrp_info[i].info_mem_install =
1478 			    lgrp_mem_size(i, LGRP_MEM_SIZE_INSTALL);
1479 		}
1480 
1481 		/*
1482 		 * Fill in latency table and buffer
1483 		 */
1484 		lgrp_lats[i] = (int *)((uintptr_t)lgrp_lats + snap_nlgrpsmax *
1485 		    sizeof (int *) + i * snap_nlgrpsmax * sizeof (int));
1486 		for (j = 0; j < snap_nlgrpsmax; j++) {
1487 			lgrp_t	*to;
1488 
1489 			to = lgrp_table[j];
1490 			if (!LGRP_EXISTS(to))
1491 				continue;
1492 			lgrp_lats[i][j] = lgrp_latency(lgrp->lgrp_id,
1493 			    to->lgrp_id);
1494 		}
1495 	}
1496 	ASSERT(cpu_index == snap_ncpus);
1497 
1498 
1499 	mutex_exit(&cpu_lock);
1500 
1501 #ifdef	_SYSCALL32_IMPL
1502 	/*
1503 	 * Check to see whether caller is 32-bit program and need to return
1504 	 * size of 32-bit snapshot now that snapshot has been taken/updated.
1505 	 * May not have been able to do this earlier if snapshot was out of
1506 	 * date or didn't exist yet.
1507 	 */
1508 	if (model == DATAMODEL_ILP32) {
1509 
1510 		snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1511 
1512 		/*
1513 		 * Calculate size of buffer needed for 32-bit snapshot,
1514 		 * rounding up size of each object to allow for alignment
1515 		 * of next object in buffer.
1516 		 */
1517 		snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1518 		    sizeof (caddr32_t));
1519 		info_size =
1520 		    P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1521 		    sizeof (processorid_t));
1522 		cpuids_size =
1523 		    P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1524 		    sizeof (ulong_t));
1525 
1526 		bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1527 		bitmasks_size = (((2 + LGRP_RSRC_COUNT) * snap_nlgrpsmax) +
1528 		    1) * bitmask_size;
1529 
1530 
1531 		/*
1532 		 * Size of latency table and buffer
1533 		 */
1534 		lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1535 		    (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1536 
1537 		bufsize = snap_hdr_size + info_size + cpuids_size +
1538 		    bitmasks_size + lats_size;
1539 		return (bufsize);
1540 	}
1541 #endif	/* _SYSCALL32_IMPL */
1542 
1543 	return (lgrp_snap->ss_size);
1544 }
1545 
1546 
1547 /*
1548  * Copy snapshot into given user buffer, fix up any pointers in buffer to point
1549  * into user instead of kernel address space, and return size of buffer
1550  * needed to hold snapshot
1551  */
1552 static int
1553 lgrp_snapshot_copy(char *buf, size_t bufsize)
1554 {
1555 	size_t			bitmask_size;
1556 	int			cpu_index;
1557 	size_t			cpuids_size;
1558 	int			i;
1559 	size_t			info_size;
1560 	lgrp_info_t		*lgrp_info;
1561 	int			retval;
1562 	size_t			snap_hdr_size;
1563 	int			snap_ncpus;
1564 	int			snap_nlgrpsmax;
1565 	lgrp_snapshot_header_t	*user_snap;
1566 	lgrp_info_t		*user_info;
1567 	lgrp_info_t		*user_info_buffer;
1568 	processorid_t		*user_cpuids;
1569 	ulong_t			*user_lgrpset;
1570 	ulong_t			*user_parents;
1571 	ulong_t			*user_children;
1572 	int			**user_lats;
1573 	int			**user_lats_buffer;
1574 	ulong_t			*user_rsets;
1575 
1576 	if (lgrp_snap == NULL)
1577 		return (0);
1578 
1579 	if (buf == NULL || bufsize <= 0)
1580 		return (lgrp_snap->ss_size);
1581 
1582 	/*
1583 	 * User needs to try getting size of buffer again
1584 	 * because given buffer size is too small.
1585 	 * The lgroup hierarchy may have changed after they asked for the size
1586 	 * but before the snapshot was taken.
1587 	 */
1588 	if (bufsize < lgrp_snap->ss_size)
1589 		return (set_errno(EAGAIN));
1590 
1591 	snap_ncpus = lgrp_snap->ss_ncpus;
1592 	snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1593 
1594 	/*
1595 	 * Fill in lgrpset now because caller may have change psets
1596 	 */
1597 	kpreempt_disable();
1598 	for (i = 0; i < snap_nlgrpsmax; i++) {
1599 		if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1600 		    i)) {
1601 			BT_SET(lgrp_snap->ss_lgrpset, i);
1602 		}
1603 	}
1604 	kpreempt_enable();
1605 
1606 	/*
1607 	 * Copy lgroup snapshot (snapshot header, lgroup info, and CPU IDs)
1608 	 * into user buffer all at once
1609 	 */
1610 	if (copyout(lgrp_snap, buf, lgrp_snap->ss_size) != 0)
1611 		return (set_errno(EFAULT));
1612 
1613 	/*
1614 	 * Round up sizes of lgroup snapshot header and info for alignment
1615 	 */
1616 	snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1617 	    sizeof (void *));
1618 	info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1619 	    sizeof (processorid_t));
1620 	cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1621 	    sizeof (ulong_t));
1622 
1623 	bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1624 
1625 	/*
1626 	 * Calculate pointers into user buffer for lgroup snapshot header,
1627 	 * info, and CPU IDs
1628 	 */
1629 	user_snap = (lgrp_snapshot_header_t *)buf;
1630 	user_info = (lgrp_info_t *)((uintptr_t)user_snap + snap_hdr_size);
1631 	user_cpuids = (processorid_t *)((uintptr_t)user_info + info_size);
1632 	user_lgrpset = (ulong_t *)((uintptr_t)user_cpuids + cpuids_size);
1633 	user_parents = (ulong_t *)((uintptr_t)user_lgrpset + bitmask_size);
1634 	user_children = (ulong_t *)((uintptr_t)user_parents +
1635 	    (snap_nlgrpsmax * bitmask_size));
1636 	user_rsets = (ulong_t *)((uintptr_t)user_children +
1637 	    (snap_nlgrpsmax * bitmask_size));
1638 	user_lats = (int **)((uintptr_t)user_rsets +
1639 	    (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size));
1640 
1641 	/*
1642 	 * Copyout magic number (ie. pointer to beginning of buffer)
1643 	 */
1644 	if (copyout(&buf, &user_snap->ss_magic, sizeof (buf)) != 0)
1645 		return (set_errno(EFAULT));
1646 
1647 	/*
1648 	 * Fix up pointers in user buffer to point into user buffer
1649 	 * not kernel snapshot
1650 	 */
1651 	if (copyout(&user_info, &user_snap->ss_info, sizeof (user_info)) != 0)
1652 		return (set_errno(EFAULT));
1653 
1654 	if (copyout(&user_cpuids, &user_snap->ss_cpuids,
1655 	    sizeof (user_cpuids)) != 0)
1656 		return (set_errno(EFAULT));
1657 
1658 	if (copyout(&user_lgrpset, &user_snap->ss_lgrpset,
1659 	    sizeof (user_lgrpset)) != 0)
1660 		return (set_errno(EFAULT));
1661 
1662 	if (copyout(&user_parents, &user_snap->ss_parents,
1663 	    sizeof (user_parents)) != 0)
1664 		return (set_errno(EFAULT));
1665 
1666 	if (copyout(&user_children, &user_snap->ss_children,
1667 	    sizeof (user_children)) != 0)
1668 		return (set_errno(EFAULT));
1669 
1670 	if (copyout(&user_rsets, &user_snap->ss_rsets,
1671 	    sizeof (user_rsets)) != 0)
1672 		return (set_errno(EFAULT));
1673 
1674 	if (copyout(&user_lats, &user_snap->ss_latencies,
1675 	    sizeof (user_lats)) != 0)
1676 		return (set_errno(EFAULT));
1677 
1678 	/*
1679 	 * Make copies of lgroup info and latency table, fix up pointers,
1680 	 * and then copy them into user buffer
1681 	 */
1682 	user_info_buffer = kmem_zalloc(info_size, KM_NOSLEEP);
1683 	if (user_info_buffer == NULL)
1684 		return (set_errno(ENOMEM));
1685 
1686 	user_lats_buffer = kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1687 	    KM_NOSLEEP);
1688 	if (user_lats_buffer == NULL) {
1689 		kmem_free(user_info_buffer, info_size);
1690 		return (set_errno(ENOMEM));
1691 	}
1692 
1693 	lgrp_info = (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1694 	bcopy(lgrp_info, user_info_buffer, info_size);
1695 
1696 	cpu_index = 0;
1697 	for (i = 0; i < snap_nlgrpsmax; i++) {
1698 		ulong_t	*snap_rset;
1699 
1700 		/*
1701 		 * Skip non-existent lgroups
1702 		 */
1703 		if (user_info_buffer[i].info_lgrpid == LGRP_NONE)
1704 			continue;
1705 
1706 		/*
1707 		 * Update free memory size since it changes frequently
1708 		 * Only do so for lgroups directly containing memory
1709 		 *
1710 		 * NOTE: This must be done before changing the pointers to
1711 		 *	 point into user space since we need to dereference
1712 		 *	 lgroup resource set
1713 		 */
1714 		snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
1715 		    BT_BITOUL(snap_nlgrpsmax)];
1716 		if (BT_TEST(snap_rset, i))
1717 			user_info_buffer[i].info_mem_free =
1718 			    lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1719 
1720 		/*
1721 		 * Fix up pointers to parents, children, resources, and
1722 		 * latencies
1723 		 */
1724 		user_info_buffer[i].info_parents =
1725 		    (ulong_t *)((uintptr_t)user_parents + (i * bitmask_size));
1726 		user_info_buffer[i].info_children =
1727 		    (ulong_t *)((uintptr_t)user_children + (i * bitmask_size));
1728 		user_info_buffer[i].info_rset =
1729 		    (ulong_t *)((uintptr_t)user_rsets +
1730 		    (i * LGRP_RSRC_COUNT * bitmask_size));
1731 		user_lats_buffer[i] = (int *)((uintptr_t)user_lats +
1732 		    (snap_nlgrpsmax * sizeof (int *)) + (i * snap_nlgrpsmax *
1733 		    sizeof (int)));
1734 
1735 		/*
1736 		 * Fix up pointer to CPU IDs
1737 		 */
1738 		if (user_info_buffer[i].info_ncpus == 0) {
1739 			user_info_buffer[i].info_cpuids = NULL;
1740 			continue;
1741 		}
1742 		user_info_buffer[i].info_cpuids = &user_cpuids[cpu_index];
1743 		cpu_index += user_info_buffer[i].info_ncpus;
1744 	}
1745 	ASSERT(cpu_index == snap_ncpus);
1746 
1747 	/*
1748 	 * Copy lgroup info and latency table with pointers fixed up to point
1749 	 * into user buffer out to user buffer now
1750 	 */
1751 	retval = lgrp_snap->ss_size;
1752 	if (copyout(user_info_buffer, user_info, info_size) != 0)
1753 		retval = set_errno(EFAULT);
1754 	kmem_free(user_info_buffer, info_size);
1755 
1756 	if (copyout(user_lats_buffer, user_lats, snap_nlgrpsmax *
1757 	    sizeof (int *)) != 0)
1758 		retval = set_errno(EFAULT);
1759 	kmem_free(user_lats_buffer, snap_nlgrpsmax * sizeof (int *));
1760 
1761 	return (retval);
1762 }
1763 
1764 
1765 #ifdef	_SYSCALL32_IMPL
1766 /*
1767  * Make 32-bit copy of snapshot, fix up any pointers in buffer to point
1768  * into user instead of kernel address space, copy 32-bit snapshot into
1769  * given user buffer, and return size of buffer needed to hold snapshot
1770  */
1771 static int
1772 lgrp_snapshot_copy32(caddr32_t buf, size32_t bufsize)
1773 {
1774 	size32_t			bitmask_size;
1775 	size32_t			bitmasks_size;
1776 	size32_t			children_size;
1777 	int				cpu_index;
1778 	size32_t			cpuids_size;
1779 	int				i;
1780 	int				j;
1781 	size32_t			info_size;
1782 	size32_t			lats_size;
1783 	lgrp_info_t			*lgrp_info;
1784 	lgrp_snapshot_header32_t	*lgrp_snap32;
1785 	lgrp_info32_t			*lgrp_info32;
1786 	processorid_t			*lgrp_cpuids32;
1787 	caddr32_t			*lgrp_lats32;
1788 	int				**lgrp_lats32_kernel;
1789 	uint_t				*lgrp_set32;
1790 	uint_t				*lgrp_parents32;
1791 	uint_t				*lgrp_children32;
1792 	uint_t				*lgrp_rsets32;
1793 	size32_t			parents_size;
1794 	size32_t			rsets_size;
1795 	size32_t			set_size;
1796 	size32_t			snap_hdr_size;
1797 	int				snap_ncpus;
1798 	int				snap_nlgrpsmax;
1799 	size32_t			snap_size;
1800 
1801 	if (lgrp_snap == NULL)
1802 		return (0);
1803 
1804 	snap_ncpus = lgrp_snap->ss_ncpus;
1805 	snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1806 
1807 	/*
1808 	 * Calculate size of buffer needed for 32-bit snapshot,
1809 	 * rounding up size of each object to allow for alignment
1810 	 * of next object in buffer.
1811 	 */
1812 	snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1813 	    sizeof (caddr32_t));
1814 	info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1815 	    sizeof (processorid_t));
1816 	cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1817 		    sizeof (ulong_t));
1818 
1819 	bitmask_size = BT_SIZEOFMAP32(snap_nlgrpsmax);
1820 
1821 	set_size = bitmask_size;
1822 	parents_size = snap_nlgrpsmax * bitmask_size;
1823 	children_size = snap_nlgrpsmax * bitmask_size;
1824 	rsets_size = P2ROUNDUP(LGRP_RSRC_COUNT * snap_nlgrpsmax *
1825 	    (int)bitmask_size, sizeof (caddr32_t));
1826 
1827 	bitmasks_size = set_size + parents_size + children_size + rsets_size;
1828 
1829 	/*
1830 	 * Size of latency table and buffer
1831 	 */
1832 	lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1833 	    (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1834 
1835 	snap_size = snap_hdr_size + info_size + cpuids_size + bitmasks_size +
1836 		lats_size;
1837 
1838 	if (buf == NULL || bufsize <= 0) {
1839 		return (snap_size);
1840 	}
1841 
1842 	/*
1843 	 * User needs to try getting size of buffer again
1844 	 * because given buffer size is too small.
1845 	 * The lgroup hierarchy may have changed after they asked for the size
1846 	 * but before the snapshot was taken.
1847 	 */
1848 	if (bufsize < snap_size)
1849 		return (set_errno(EAGAIN));
1850 
1851 	/*
1852 	 * Make 32-bit copy of snapshot, fix up pointers to point into user
1853 	 * buffer not kernel, and then copy whole thing into user buffer
1854 	 */
1855 	lgrp_snap32 = kmem_zalloc(snap_size, KM_NOSLEEP);
1856 	if (lgrp_snap32 == NULL)
1857 		return (set_errno(ENOMEM));
1858 
1859 	/*
1860 	 * Calculate pointers into 32-bit copy of snapshot
1861 	 * for lgroup info, CPU IDs, pset lgroup bitmask, parents, children,
1862 	 * resources, and latency table and buffer
1863 	 */
1864 	lgrp_info32 = (lgrp_info32_t *)((uintptr_t)lgrp_snap32 +
1865 	    snap_hdr_size);
1866 	lgrp_cpuids32 = (processorid_t *)((uintptr_t)lgrp_info32 + info_size);
1867 	lgrp_set32 = (uint_t *)((uintptr_t)lgrp_cpuids32 + cpuids_size);
1868 	lgrp_parents32 = (uint_t *)((uintptr_t)lgrp_set32 + set_size);
1869 	lgrp_children32 = (uint_t *)((uintptr_t)lgrp_parents32 + parents_size);
1870 	lgrp_rsets32 = (uint_t *)((uintptr_t)lgrp_children32 + children_size);
1871 	lgrp_lats32 = (caddr32_t *)((uintptr_t)lgrp_rsets32 + rsets_size);
1872 
1873 	/*
1874 	 * Make temporary lgroup latency table of pointers for kernel to use
1875 	 * to fill in rows of table with latencies from each lgroup
1876 	 */
1877 	lgrp_lats32_kernel =  kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1878 	    KM_NOSLEEP);
1879 	if (lgrp_lats32_kernel == NULL) {
1880 		kmem_free(lgrp_snap32, snap_size);
1881 		return (set_errno(ENOMEM));
1882 	}
1883 
1884 	/*
1885 	 * Fill in 32-bit lgroup snapshot header
1886 	 * (with pointers into user's buffer for lgroup info, CPU IDs,
1887 	 * bit masks, and latencies)
1888 	 */
1889 	lgrp_snap32->ss_version = lgrp_snap->ss_version;
1890 	lgrp_snap32->ss_levels = lgrp_snap->ss_levels;
1891 	lgrp_snap32->ss_nlgrps = lgrp_snap32->ss_nlgrps_os =
1892 	    lgrp_snap->ss_nlgrps;
1893 	lgrp_snap32->ss_nlgrps_max = snap_nlgrpsmax;
1894 	lgrp_snap32->ss_root = lgrp_snap->ss_root;
1895 	lgrp_snap32->ss_ncpus = lgrp_snap->ss_ncpus;
1896 	lgrp_snap32->ss_gen = lgrp_snap->ss_gen;
1897 	lgrp_snap32->ss_view = LGRP_VIEW_OS;
1898 	lgrp_snap32->ss_size = snap_size;
1899 	lgrp_snap32->ss_magic = buf;
1900 	lgrp_snap32->ss_info = buf + snap_hdr_size;
1901 	lgrp_snap32->ss_cpuids = lgrp_snap32->ss_info + info_size;
1902 	lgrp_snap32->ss_lgrpset = lgrp_snap32->ss_cpuids + cpuids_size;
1903 	lgrp_snap32->ss_parents = lgrp_snap32->ss_lgrpset + bitmask_size;
1904 	lgrp_snap32->ss_children = lgrp_snap32->ss_parents +
1905 	    (snap_nlgrpsmax * bitmask_size);
1906 	lgrp_snap32->ss_rsets = lgrp_snap32->ss_children +
1907 	    (snap_nlgrpsmax * bitmask_size);
1908 	lgrp_snap32->ss_latencies = lgrp_snap32->ss_rsets +
1909 	    (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size);
1910 
1911 	/*
1912 	 * Fill in lgrpset now because caller may have change psets
1913 	 */
1914 	kpreempt_disable();
1915 	for (i = 0; i < snap_nlgrpsmax; i++) {
1916 		if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1917 		    i)) {
1918 			BT_SET32(lgrp_set32, i);
1919 		}
1920 	}
1921 	kpreempt_enable();
1922 
1923 	/*
1924 	 * Fill in 32-bit copy of lgroup info and fix up pointers
1925 	 * to point into user's buffer instead of kernel's
1926 	 */
1927 	cpu_index = 0;
1928 	lgrp_info = lgrp_snap->ss_info;
1929 	for (i = 0; i < snap_nlgrpsmax; i++) {
1930 		uint_t	*children;
1931 		uint_t	*lgrp_rset;
1932 		uint_t	*parents;
1933 		ulong_t	*snap_rset;
1934 
1935 		/*
1936 		 * Skip non-existent lgroups
1937 		 */
1938 		if (lgrp_info[i].info_lgrpid == LGRP_NONE) {
1939 			bzero(&lgrp_info32[i], sizeof (lgrp_info32[i]));
1940 			lgrp_info32[i].info_lgrpid = LGRP_NONE;
1941 			continue;
1942 		}
1943 
1944 		/*
1945 		 * Fill in parents, children, lgroup resource set, and
1946 		 * latencies from snapshot
1947 		 */
1948 		parents = (uint_t *)((uintptr_t)lgrp_parents32 +
1949 		    i * bitmask_size);
1950 		children = (uint_t *)((uintptr_t)lgrp_children32 +
1951 		    i * bitmask_size);
1952 		snap_rset = (ulong_t *)((uintptr_t)lgrp_snap->ss_rsets +
1953 		    (i * LGRP_RSRC_COUNT * BT_SIZEOFMAP(snap_nlgrpsmax)));
1954 		lgrp_rset = (uint_t *)((uintptr_t)lgrp_rsets32 +
1955 		    (i * LGRP_RSRC_COUNT * bitmask_size));
1956 		lgrp_lats32_kernel[i] = (int *)((uintptr_t)lgrp_lats32 +
1957 		    snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
1958 		    sizeof (int));
1959 		for (j = 0; j < snap_nlgrpsmax; j++) {
1960 			int	k;
1961 			uint_t	*rset;
1962 
1963 			if (BT_TEST(&lgrp_snap->ss_parents[i], j))
1964 				BT_SET32(parents, j);
1965 
1966 			if (BT_TEST(&lgrp_snap->ss_children[i], j))
1967 				BT_SET32(children, j);
1968 
1969 			for (k = 0; k < LGRP_RSRC_COUNT; k++) {
1970 				rset = (uint_t *)((uintptr_t)lgrp_rset +
1971 				    k * bitmask_size);
1972 				if (BT_TEST(&snap_rset[k], j))
1973 					BT_SET32(rset, j);
1974 			}
1975 
1976 			lgrp_lats32_kernel[i][j] =
1977 			    lgrp_snap->ss_latencies[i][j];
1978 		}
1979 
1980 		/*
1981 		 * Fix up pointer to latency buffer
1982 		 */
1983 		lgrp_lats32[i] = lgrp_snap32->ss_latencies +
1984 		    snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
1985 		    sizeof (int);
1986 
1987 		/*
1988 		 * Fix up pointers for parents, children, and resources
1989 		 */
1990 		lgrp_info32[i].info_parents = lgrp_snap32->ss_parents +
1991 		    (i * bitmask_size);
1992 		lgrp_info32[i].info_children = lgrp_snap32->ss_children +
1993 		    (i * bitmask_size);
1994 		lgrp_info32[i].info_rset = lgrp_snap32->ss_rsets +
1995 		    (i * LGRP_RSRC_COUNT * bitmask_size);
1996 
1997 		/*
1998 		 * Fill in memory and CPU info
1999 		 * Only fill in memory for lgroups directly containing memory
2000 		 */
2001 		snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
2002 		    BT_BITOUL(snap_nlgrpsmax)];
2003 		if (BT_TEST(snap_rset, i)) {
2004 			lgrp_info32[i].info_mem_free = lgrp_mem_size(i,
2005 			    LGRP_MEM_SIZE_FREE);
2006 			lgrp_info32[i].info_mem_install =
2007 			    lgrp_info[i].info_mem_install;
2008 		}
2009 
2010 		lgrp_info32[i].info_ncpus = lgrp_info[i].info_ncpus;
2011 
2012 		lgrp_info32[i].info_lgrpid = lgrp_info[i].info_lgrpid;
2013 		lgrp_info32[i].info_latency = lgrp_info[i].info_latency;
2014 
2015 		if (lgrp_info32[i].info_ncpus == 0) {
2016 			lgrp_info32[i].info_cpuids = 0;
2017 			continue;
2018 		}
2019 
2020 		/*
2021 		 * Fix up pointer for CPU IDs
2022 		 */
2023 		lgrp_info32[i].info_cpuids = lgrp_snap32->ss_cpuids +
2024 		    (cpu_index * sizeof (processorid_t));
2025 		cpu_index += lgrp_info32[i].info_ncpus;
2026 	}
2027 	ASSERT(cpu_index == snap_ncpus);
2028 
2029 	/*
2030 	 * Copy lgroup CPU IDs into 32-bit snapshot
2031 	 * before copying it out into user's buffer
2032 	 */
2033 	bcopy(lgrp_snap->ss_cpuids, lgrp_cpuids32, cpuids_size);
2034 
2035 	/*
2036 	 * Copy 32-bit lgroup snapshot into user's buffer all at once
2037 	 */
2038 	if (copyout(lgrp_snap32, (void *)(uintptr_t)buf, snap_size) != 0) {
2039 		kmem_free(lgrp_snap32, snap_size);
2040 		kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2041 		return (set_errno(EFAULT));
2042 	}
2043 
2044 	kmem_free(lgrp_snap32, snap_size);
2045 	kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2046 
2047 	return (snap_size);
2048 }
2049 #endif	/* _SYSCALL32_IMPL */
2050 
2051 
2052 int
2053 lgrpsys(int subcode, long ia, void *ap)
2054 {
2055 	size_t	bufsize;
2056 	int	latency;
2057 
2058 	switch (subcode) {
2059 
2060 	case LGRP_SYS_AFFINITY_GET:
2061 		return (lgrp_affinity_get((lgrp_affinity_args_t *)ap));
2062 
2063 	case LGRP_SYS_AFFINITY_SET:
2064 		return (lgrp_affinity_set((lgrp_affinity_args_t *)ap));
2065 
2066 	case LGRP_SYS_GENERATION:
2067 		return (lgrp_generation(ia));
2068 
2069 	case LGRP_SYS_HOME:
2070 		return (lgrp_home_get((idtype_t)ia, (id_t)(uintptr_t)ap));
2071 
2072 	case LGRP_SYS_LATENCY:
2073 		mutex_enter(&cpu_lock);
2074 		latency = lgrp_latency(ia, (lgrp_id_t)(uintptr_t)ap);
2075 		mutex_exit(&cpu_lock);
2076 		return (latency);
2077 
2078 	case LGRP_SYS_MEMINFO:
2079 		return (meminfo(ia, (struct meminfo *)ap));
2080 
2081 	case LGRP_SYS_VERSION:
2082 		return (lgrp_version(ia));
2083 
2084 	case LGRP_SYS_SNAPSHOT:
2085 		mutex_enter(&lgrp_snap_lock);
2086 		bufsize = lgrp_snapshot();
2087 		if (ap && ia > 0) {
2088 			if (get_udatamodel() == DATAMODEL_NATIVE)
2089 				bufsize = lgrp_snapshot_copy(ap, ia);
2090 #ifdef	_SYSCALL32_IMPL
2091 			else
2092 				bufsize = lgrp_snapshot_copy32(
2093 				    (caddr32_t)(uintptr_t)ap, ia);
2094 #endif	/* _SYSCALL32_IMPL */
2095 		}
2096 		mutex_exit(&lgrp_snap_lock);
2097 		return (bufsize);
2098 
2099 	default:
2100 		break;
2101 
2102 	}
2103 
2104 	return (set_errno(EINVAL));
2105 }
2106