xref: /illumos-gate/usr/src/uts/common/syscall/lgrpsys.c (revision 4e9cfc9a015e8ca7d41f7d018c74dc8a692305b3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * lgroup system calls
31  */
32 
33 #include <sys/types.h>
34 #include <sys/errno.h>
35 #include <sys/sunddi.h>
36 #include <sys/systm.h>
37 #include <sys/mman.h>
38 #include <sys/cpupart.h>
39 #include <sys/lgrp.h>
40 #include <sys/lgrp_user.h>
41 #include <sys/promif.h>		/* for prom_printf() */
42 #include <sys/sysmacros.h>
43 
44 #include <vm/as.h>
45 
46 
47 /* definitions for mi_validity */
48 #define	VALID_ADDR	1
49 #define	VALID_REQ	2
50 
51 /*
52  * run through the given number of addresses and requests and return the
53  * corresponding memory information for each address
54  */
55 static int
56 meminfo(int addr_count, struct meminfo *mip)
57 {
58 	size_t		in_size, out_size, req_size, val_size;
59 	struct as	*as;
60 	struct hat	*hat;
61 	int		i, j, out_idx, info_count;
62 	lgrp_t		*lgrp;
63 	pfn_t		pfn;
64 	ssize_t		pgsz;
65 	int		*req_array, *val_array;
66 	uint64_t	*in_array, *out_array;
67 	uint64_t	addr, paddr;
68 	uintptr_t	vaddr;
69 	int		ret = 0;
70 	struct meminfo minfo;
71 #if defined(_SYSCALL32_IMPL)
72 	struct meminfo32 minfo32;
73 #endif
74 
75 	/*
76 	 * Make sure that there is at least one address to translate and
77 	 * limit how many virtual addresses the kernel can do per call
78 	 */
79 	if (addr_count < 1)
80 		return (set_errno(EINVAL));
81 	else if (addr_count > MAX_MEMINFO_CNT)
82 		addr_count = MAX_MEMINFO_CNT;
83 
84 	if (get_udatamodel() == DATAMODEL_NATIVE) {
85 		if (copyin(mip, &minfo, sizeof (struct meminfo)))
86 			return (set_errno(EFAULT));
87 	}
88 #if defined(_SYSCALL32_IMPL)
89 	else {
90 		bzero(&minfo, sizeof (minfo));
91 		if (copyin(mip, &minfo32, sizeof (struct meminfo32)))
92 			return (set_errno(EFAULT));
93 		minfo.mi_inaddr = (const uint64_t *)(uintptr_t)
94 		    minfo32.mi_inaddr;
95 		minfo.mi_info_req = (const uint_t *)(uintptr_t)
96 		    minfo32.mi_info_req;
97 		minfo.mi_info_count = minfo32.mi_info_count;
98 		minfo.mi_outdata = (uint64_t *)(uintptr_t)
99 		    minfo32.mi_outdata;
100 		minfo.mi_validity = (uint_t *)(uintptr_t)
101 		    minfo32.mi_validity;
102 	}
103 #endif
104 	/*
105 	 * all the input parameters have been copied in:-
106 	 * addr_count - number of input addresses
107 	 * minfo.mi_inaddr - array of input addresses
108 	 * minfo.mi_info_req - array of types of information requested
109 	 * minfo.mi_info_count - no. of pieces of info requested for each addr
110 	 * minfo.mi_outdata - array into which the results are placed
111 	 * minfo.mi_validity -  array containing bitwise result codes; 0th bit
112 	 *			evaluates validity of corresponding input
113 	 *			address, 1st bit validity of response to first
114 	 *			member of info_req, etc.
115 	 */
116 
117 	/* make sure mi_info_count is within limit */
118 	info_count = minfo.mi_info_count;
119 	if (info_count < 1 || info_count > MAX_MEMINFO_REQ)
120 		return (set_errno(EINVAL));
121 
122 	/*
123 	 * allocate buffer in_array for the input addresses and copy them in
124 	 */
125 	in_size = sizeof (uint64_t) * addr_count;
126 	in_array = kmem_alloc(in_size, KM_SLEEP);
127 	if (copyin(minfo.mi_inaddr, in_array, in_size)) {
128 		kmem_free(in_array, in_size);
129 		return (set_errno(EFAULT));
130 	}
131 
132 	/*
133 	 * allocate buffer req_array for the input info_reqs and copy them in
134 	 */
135 	req_size = sizeof (uint_t) * info_count;
136 	req_array = kmem_alloc(req_size, KM_SLEEP);
137 	if (copyin(minfo.mi_info_req, req_array, req_size)) {
138 		kmem_free(req_array, req_size);
139 		kmem_free(in_array, in_size);
140 		return (set_errno(EFAULT));
141 	}
142 
143 	/*
144 	 * allocate buffer out_array which holds the results and will have
145 	 * to be copied out later
146 	 */
147 	out_size = sizeof (uint64_t) * addr_count * info_count;
148 	out_array = kmem_alloc(out_size, KM_SLEEP);
149 
150 	/*
151 	 * allocate buffer val_array which holds the validity bits and will
152 	 * have to be copied out later
153 	 */
154 	val_size = sizeof (uint_t) * addr_count;
155 	val_array = kmem_alloc(val_size, KM_SLEEP);
156 
157 	if ((req_array[0] & MEMINFO_MASK) == MEMINFO_PLGRP) {
158 		/* find the corresponding lgroup for each physical address */
159 		for (i = 0; i < addr_count; i++) {
160 			paddr = in_array[i];
161 			pfn = btop(paddr);
162 			lgrp = lgrp_pfn_to_lgrp(pfn);
163 			if (lgrp) {
164 				out_array[i] = lgrp->lgrp_id;
165 				val_array[i] = VALID_ADDR | VALID_REQ;
166 			} else {
167 				out_array[i] = NULL;
168 				val_array[i] = 0;
169 			}
170 		}
171 	} else {
172 		/* get the corresponding memory info for each virtual address */
173 		as = curproc->p_as;
174 
175 		AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
176 		hat = as->a_hat;
177 		for (i = out_idx = 0; i < addr_count; i++, out_idx +=
178 		    info_count) {
179 			addr = in_array[i];
180 			vaddr = (uintptr_t)(addr & ~PAGEOFFSET);
181 			if (!as_segat(as, (caddr_t)vaddr)) {
182 				val_array[i] = 0;
183 				continue;
184 			}
185 			val_array[i] = VALID_ADDR;
186 			pfn = hat_getpfnum(hat, (caddr_t)vaddr);
187 			if (pfn != PFN_INVALID) {
188 				paddr = (uint64_t)((pfn << PAGESHIFT) |
189 					(addr & PAGEOFFSET));
190 				for (j = 0; j < info_count; j++) {
191 					switch (req_array[j] & MEMINFO_MASK) {
192 					case MEMINFO_VPHYSICAL:
193 						/*
194 						 * return the physical address
195 						 * corresponding to the input
196 						 * virtual address
197 						 */
198 						out_array[out_idx + j] = paddr;
199 						val_array[i] |= VALID_REQ << j;
200 						break;
201 					case MEMINFO_VLGRP:
202 						/*
203 						 * return the lgroup of physical
204 						 * page corresponding to the
205 						 * input virtual address
206 						 */
207 						lgrp = lgrp_pfn_to_lgrp(pfn);
208 						if (lgrp) {
209 							out_array[out_idx + j] =
210 								lgrp->lgrp_id;
211 							val_array[i] |=
212 								VALID_REQ << j;
213 						}
214 						break;
215 					case MEMINFO_VPAGESIZE:
216 						/*
217 						 * return the size of physical
218 						 * page corresponding to the
219 						 * input virtual address
220 						 */
221 						pgsz = hat_getpagesize(hat,
222 							(caddr_t)vaddr);
223 						if (pgsz != -1) {
224 							out_array[out_idx + j] =
225 									pgsz;
226 							val_array[i] |=
227 								VALID_REQ << j;
228 						}
229 						break;
230 					case MEMINFO_VREPLCNT:
231 						/*
232 						 * for future use:-
233 						 * return the no. replicated
234 						 * physical pages corresponding
235 						 * to the input virtual address,
236 						 * so it is always 0 at the
237 						 * moment
238 						 */
239 						out_array[out_idx + j] = 0;
240 						val_array[i] |= VALID_REQ << j;
241 						break;
242 					case MEMINFO_VREPL:
243 						/*
244 						 * for future use:-
245 						 * return the nth physical
246 						 * replica of the specified
247 						 * virtual address
248 						 */
249 						break;
250 					case MEMINFO_VREPL_LGRP:
251 						/*
252 						 * for future use:-
253 						 * return the lgroup of nth
254 						 * physical replica of the
255 						 * specified virtual address
256 						 */
257 						break;
258 					case MEMINFO_PLGRP:
259 						/*
260 						 * this is for physical address
261 						 * only, shouldn't mix with
262 						 * virtual address
263 						 */
264 						break;
265 					default:
266 						break;
267 					}
268 				}
269 			}
270 		}
271 		AS_LOCK_EXIT(as, &as->a_lock);
272 	}
273 
274 	/* copy out the results and validity bits and free the buffers */
275 	if ((copyout(out_array, minfo.mi_outdata, out_size) != 0) ||
276 		(copyout(val_array, minfo.mi_validity, val_size) != 0))
277 		ret = set_errno(EFAULT);
278 
279 	kmem_free(in_array, in_size);
280 	kmem_free(out_array, out_size);
281 	kmem_free(req_array, req_size);
282 	kmem_free(val_array, val_size);
283 
284 	return (ret);
285 }
286 
287 
288 /*
289  * Initialize lgroup affinities for thread
290  */
291 void
292 lgrp_affinity_init(lgrp_affinity_t **bufaddr)
293 {
294 	if (bufaddr)
295 		*bufaddr = NULL;
296 }
297 
298 
299 /*
300  * Free lgroup affinities for thread and set to NULL
301  * just in case thread gets recycled
302  */
303 void
304 lgrp_affinity_free(lgrp_affinity_t **bufaddr)
305 {
306 	if (bufaddr && *bufaddr) {
307 		kmem_free(*bufaddr, nlgrpsmax * sizeof (lgrp_affinity_t));
308 		*bufaddr = NULL;
309 	}
310 }
311 
312 
313 #define	P_ANY	-2	/* cookie specifying any ID */
314 
315 
316 /*
317  * Find LWP with given ID in specified process and get its affinity for
318  * specified lgroup
319  */
320 lgrp_affinity_t
321 lgrp_affinity_get_thread(proc_t *p, id_t lwpid, lgrp_id_t lgrp)
322 {
323 	lgrp_affinity_t aff;
324 	int		found;
325 	kthread_t	*t;
326 
327 	ASSERT(MUTEX_HELD(&p->p_lock));
328 
329 	aff = LGRP_AFF_NONE;
330 	found = 0;
331 	t = p->p_tlist;
332 	/*
333 	 * The process may be executing in proc_exit() and its p->p_list may be
334 	 * already NULL.
335 	 */
336 	if (t == NULL)
337 		return (set_errno(ESRCH));
338 
339 	do {
340 		if (t->t_tid == lwpid || lwpid == P_ANY) {
341 			thread_lock(t);
342 			/*
343 			 * Check to see whether caller has permission to set
344 			 * affinity for LWP
345 			 */
346 			if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
347 				thread_unlock(t);
348 				return (set_errno(EPERM));
349 			}
350 
351 			if (t->t_lgrp_affinity)
352 				aff = t->t_lgrp_affinity[lgrp];
353 			thread_unlock(t);
354 			found = 1;
355 			break;
356 		}
357 	} while ((t = t->t_forw) != p->p_tlist);
358 	if (!found)
359 		aff = set_errno(ESRCH);
360 
361 	return (aff);
362 }
363 
364 
365 /*
366  * Get lgroup affinity for given LWP
367  */
368 lgrp_affinity_t
369 lgrp_affinity_get(lgrp_affinity_args_t *ap)
370 {
371 	lgrp_affinity_t		aff;
372 	lgrp_affinity_args_t	args;
373 	id_t			id;
374 	idtype_t		idtype;
375 	lgrp_id_t		lgrp;
376 	proc_t			*p;
377 	kthread_t		*t;
378 
379 	/*
380 	 * Copyin arguments
381 	 */
382 	if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
383 		return (set_errno(EFAULT));
384 
385 	id = args.id;
386 	idtype = args.idtype;
387 	lgrp = args.lgrp;
388 
389 	/*
390 	 * Check for invalid lgroup
391 	 */
392 	if (lgrp < 0 || lgrp == LGRP_NONE)
393 		return (set_errno(EINVAL));
394 
395 	/*
396 	 * Check for existing lgroup
397 	 */
398 	if (lgrp > lgrp_alloc_max)
399 		return (set_errno(ESRCH));
400 
401 	/*
402 	 * Get lgroup affinity for given LWP or process
403 	 */
404 	switch (idtype) {
405 
406 	case P_LWPID:
407 		/*
408 		 * LWP in current process
409 		 */
410 		p = curproc;
411 		mutex_enter(&p->p_lock);
412 		if (id != P_MYID)	/* different thread */
413 			aff = lgrp_affinity_get_thread(p, id, lgrp);
414 		else {			/* current thread */
415 			aff = LGRP_AFF_NONE;
416 			t = curthread;
417 			thread_lock(t);
418 			if (t->t_lgrp_affinity)
419 				aff = t->t_lgrp_affinity[lgrp];
420 			thread_unlock(t);
421 		}
422 		mutex_exit(&p->p_lock);
423 		break;
424 
425 	case P_PID:
426 		/*
427 		 * Process
428 		 */
429 		mutex_enter(&pidlock);
430 
431 		if (id == P_MYID)
432 			p = curproc;
433 		else {
434 			p = prfind(id);
435 			if (p == NULL) {
436 				mutex_exit(&pidlock);
437 				return (set_errno(ESRCH));
438 			}
439 		}
440 
441 		mutex_enter(&p->p_lock);
442 		aff = lgrp_affinity_get_thread(p, P_ANY, lgrp);
443 		mutex_exit(&p->p_lock);
444 
445 		mutex_exit(&pidlock);
446 		break;
447 
448 	default:
449 		aff = set_errno(EINVAL);
450 		break;
451 	}
452 
453 	return (aff);
454 }
455 
456 
457 /*
458  * Find lgroup for which this thread has most affinity in specified partition
459  * starting from home lgroup unless specified starting lgroup is preferred
460  */
461 lpl_t *
462 lgrp_affinity_best(kthread_t *t, struct cpupart *cpupart, lgrp_id_t start,
463     boolean_t prefer_start)
464 {
465 	lgrp_affinity_t	*affs;
466 	lgrp_affinity_t	best_aff;
467 	lpl_t		*best_lpl;
468 	lgrp_id_t	finish;
469 	lgrp_id_t	home;
470 	lgrp_id_t	lgrpid;
471 	lpl_t		*lpl;
472 
473 	ASSERT(t != NULL);
474 	ASSERT((MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0) ||
475 	    (MUTEX_HELD(&ttoproc(t)->p_lock) && THREAD_LOCK_HELD(t)));
476 	ASSERT(cpupart != NULL);
477 
478 	if (t->t_lgrp_affinity == NULL)
479 		return (NULL);
480 
481 	affs = t->t_lgrp_affinity;
482 
483 	/*
484 	 * Thread bound to CPU
485 	 */
486 	if (t->t_bind_cpu != PBIND_NONE) {
487 		cpu_t	*cp;
488 
489 		/*
490 		 * Find which lpl has most affinity among leaf lpl directly
491 		 * containing CPU and its ancestor lpls
492 		 */
493 		cp = cpu[t->t_bind_cpu];
494 
495 		best_lpl = lpl = cp->cpu_lpl;
496 		best_aff = affs[best_lpl->lpl_lgrpid];
497 		while (lpl->lpl_parent != NULL) {
498 			lpl = lpl->lpl_parent;
499 			lgrpid = lpl->lpl_lgrpid;
500 			if (affs[lgrpid] > best_aff) {
501 				best_lpl = lpl;
502 				best_aff = affs[lgrpid];
503 			}
504 		}
505 		return (best_lpl);
506 	}
507 
508 	/*
509 	 * Start searching from home lgroup unless given starting lgroup is
510 	 * preferred or home lgroup isn't in given pset.  Use root lgroup as
511 	 * starting point if both home and starting lgroups aren't in given
512 	 * pset.
513 	 */
514 	ASSERT(start >= 0 && start <= lgrp_alloc_max);
515 	home = t->t_lpl->lpl_lgrpid;
516 	if (!prefer_start && LGRP_CPUS_IN_PART(home, cpupart))
517 		lgrpid = home;
518 	else if (start != LGRP_NONE && LGRP_CPUS_IN_PART(start, cpupart))
519 		lgrpid = start;
520 	else
521 		lgrpid = LGRP_ROOTID;
522 
523 	best_lpl = &cpupart->cp_lgrploads[lgrpid];
524 	best_aff = affs[lgrpid];
525 	finish = lgrpid;
526 	do {
527 		/*
528 		 * Skip any lgroups that don't have CPU resources
529 		 * in this processor set.
530 		 */
531 		if (!LGRP_CPUS_IN_PART(lgrpid, cpupart)) {
532 			if (++lgrpid > lgrp_alloc_max)
533 				lgrpid = 0;	/* wrap the search */
534 			continue;
535 		}
536 
537 		/*
538 		 * Find lgroup with most affinity
539 		 */
540 		lpl = &cpupart->cp_lgrploads[lgrpid];
541 		if (affs[lgrpid] > best_aff) {
542 			best_aff = affs[lgrpid];
543 			best_lpl = lpl;
544 		}
545 
546 		if (++lgrpid > lgrp_alloc_max)
547 			lgrpid = 0;	/* wrap the search */
548 
549 	} while (lgrpid != finish);
550 
551 	/*
552 	 * No lgroup (in this pset) with any affinity
553 	 */
554 	if (best_aff == LGRP_AFF_NONE)
555 		return (NULL);
556 
557 	lgrpid = best_lpl->lpl_lgrpid;
558 	ASSERT(LGRP_CPUS_IN_PART(lgrpid, cpupart) && best_lpl->lpl_ncpu > 0);
559 
560 	return (best_lpl);
561 }
562 
563 
564 /*
565  * Set thread's affinity for given lgroup
566  */
567 int
568 lgrp_affinity_set_thread(kthread_t *t, lgrp_id_t lgrp, lgrp_affinity_t aff,
569     lgrp_affinity_t **aff_buf)
570 {
571 	lgrp_affinity_t	*affs;
572 	lgrp_id_t	best;
573 	lpl_t		*best_lpl;
574 	lgrp_id_t	home;
575 	int		retval;
576 
577 	ASSERT(t != NULL);
578 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
579 
580 	retval = 0;
581 
582 	thread_lock(t);
583 
584 	/*
585 	 * Check to see whether caller has permission to set affinity for
586 	 * thread
587 	 */
588 	if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
589 		thread_unlock(t);
590 		return (set_errno(EPERM));
591 	}
592 
593 	if (t->t_lgrp_affinity == NULL) {
594 		if (aff == LGRP_AFF_NONE) {
595 			thread_unlock(t);
596 			return (0);
597 		}
598 		ASSERT(aff_buf != NULL && *aff_buf != NULL);
599 		t->t_lgrp_affinity = *aff_buf;
600 		*aff_buf = NULL;
601 	}
602 
603 	affs = t->t_lgrp_affinity;
604 	affs[lgrp] = aff;
605 
606 	/*
607 	 * Find lgroup for which thread has most affinity,
608 	 * starting with lgroup for which affinity being set
609 	 */
610 	best_lpl = lgrp_affinity_best(t, t->t_cpupart, lgrp, B_TRUE);
611 
612 	/*
613 	 * Rehome if found lgroup with more affinity than home or lgroup for
614 	 * which affinity is being set has same affinity as home
615 	 */
616 	home = t->t_lpl->lpl_lgrpid;
617 	if (best_lpl != NULL && best_lpl != t->t_lpl) {
618 		best = best_lpl->lpl_lgrpid;
619 		if (affs[best] > affs[home] || (affs[best] == affs[home] &&
620 		    best == lgrp))
621 			lgrp_move_thread(t, best_lpl, 1);
622 	}
623 
624 	thread_unlock(t);
625 
626 	return (retval);
627 }
628 
629 
630 /*
631  * Set process' affinity for specified lgroup
632  */
633 int
634 lgrp_affinity_set_proc(proc_t *p, lgrp_id_t lgrp, lgrp_affinity_t aff,
635     lgrp_affinity_t **aff_buf_array)
636 {
637 	lgrp_affinity_t	*buf;
638 	int		err = 0;
639 	int		i;
640 	int		retval;
641 	kthread_t	*t;
642 
643 	ASSERT(MUTEX_HELD(&pidlock) && MUTEX_HELD(&p->p_lock));
644 	ASSERT(aff_buf_array != NULL);
645 
646 	i = 0;
647 	t = p->p_tlist;
648 	if (t != NULL) {
649 		do {
650 			/*
651 			 * Set lgroup affinity for thread
652 			 */
653 			buf = aff_buf_array[i];
654 			retval = lgrp_affinity_set_thread(t, lgrp, aff, &buf);
655 
656 			if (err == 0 && retval != 0)
657 				err = retval;
658 
659 			/*
660 			 * Advance pointer to next buffer
661 			 */
662 			if (buf == NULL) {
663 				ASSERT(i < p->p_lwpcnt);
664 				aff_buf_array[i] = NULL;
665 				i++;
666 			}
667 
668 		} while ((t = t->t_forw) != p->p_tlist);
669 	}
670 	return (err);
671 }
672 
673 
674 /*
675  * Set LWP's or process' affinity for specified lgroup
676  *
677  * When setting affinities, pidlock, process p_lock, and thread_lock()
678  * need to be held in that order to protect target thread's pset, process,
679  * process contents, and thread contents.  thread_lock() does splhigh(),
680  * so it ends up having similiar effect as kpreempt_disable(), so it will
681  * protect calls to lgrp_move_thread() and lgrp_choose() from pset changes.
682  */
683 int
684 lgrp_affinity_set(lgrp_affinity_args_t *ap)
685 {
686 	lgrp_affinity_t		aff;
687 	lgrp_affinity_t		*aff_buf;
688 	lgrp_affinity_args_t	args;
689 	id_t			id;
690 	idtype_t		idtype;
691 	lgrp_id_t		lgrp;
692 	int			nthreads;
693 	proc_t			*p;
694 	int			retval;
695 
696 	/*
697 	 * Copyin arguments
698 	 */
699 	if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
700 		return (set_errno(EFAULT));
701 
702 	idtype = args.idtype;
703 	id = args.id;
704 	lgrp = args.lgrp;
705 	aff = args.aff;
706 
707 	/*
708 	 * Check for invalid lgroup
709 	 */
710 	if (lgrp < 0 || lgrp == LGRP_NONE)
711 		return (set_errno(EINVAL));
712 
713 	/*
714 	 * Check for existing lgroup
715 	 */
716 	if (lgrp > lgrp_alloc_max)
717 		return (set_errno(ESRCH));
718 
719 	/*
720 	 * Check for legal affinity
721 	 */
722 	if (aff != LGRP_AFF_NONE && aff != LGRP_AFF_WEAK &&
723 	    aff != LGRP_AFF_STRONG)
724 		return (set_errno(EINVAL));
725 
726 	/*
727 	 * Must be process or LWP ID
728 	 */
729 	if (idtype != P_LWPID && idtype != P_PID)
730 		return (set_errno(EINVAL));
731 
732 	/*
733 	 * Set given LWP's or process' affinity for specified lgroup
734 	 */
735 	switch (idtype) {
736 
737 	case P_LWPID:
738 		/*
739 		 * Allocate memory for thread's lgroup affinities
740 		 * ahead of time w/o holding locks
741 		 */
742 		aff_buf = kmem_zalloc(nlgrpsmax * sizeof (lgrp_affinity_t),
743 		    KM_SLEEP);
744 
745 		p = curproc;
746 
747 		/*
748 		 * Set affinity for thread
749 		 */
750 		mutex_enter(&p->p_lock);
751 		if (id == P_MYID) {		/* current thread */
752 			retval = lgrp_affinity_set_thread(curthread, lgrp, aff,
753 			    &aff_buf);
754 		} else if (p->p_tlist == NULL) {
755 			retval = set_errno(ESRCH);
756 		} else {			/* other thread */
757 			int		found = 0;
758 			kthread_t	*t;
759 
760 			t = p->p_tlist;
761 			do {
762 				if (t->t_tid == id) {
763 					retval = lgrp_affinity_set_thread(t,
764 					    lgrp, aff, &aff_buf);
765 					found = 1;
766 					break;
767 				}
768 			} while ((t = t->t_forw) != p->p_tlist);
769 			if (!found)
770 				retval = set_errno(ESRCH);
771 		}
772 		mutex_exit(&p->p_lock);
773 
774 		/*
775 		 * Free memory for lgroup affinities,
776 		 * since thread didn't need it
777 		 */
778 		if (aff_buf)
779 			kmem_free(aff_buf,
780 			    nlgrpsmax * sizeof (lgrp_affinity_t));
781 
782 		break;
783 
784 	case P_PID:
785 
786 		do {
787 			lgrp_affinity_t	**aff_buf_array;
788 			int		i;
789 			size_t		size;
790 
791 			/*
792 			 * Get process
793 			 */
794 			mutex_enter(&pidlock);
795 
796 			if (id == P_MYID)
797 				p = curproc;
798 			else
799 				p = prfind(id);
800 
801 			if (p == NULL) {
802 				mutex_exit(&pidlock);
803 				return (set_errno(ESRCH));
804 			}
805 
806 			/*
807 			 * Get number of threads in process
808 			 *
809 			 * NOTE: Only care about user processes,
810 			 *	 so p_lwpcnt should be number of threads.
811 			 */
812 			mutex_enter(&p->p_lock);
813 			nthreads = p->p_lwpcnt;
814 			mutex_exit(&p->p_lock);
815 
816 			mutex_exit(&pidlock);
817 
818 			if (nthreads < 1)
819 				return (set_errno(ESRCH));
820 
821 			/*
822 			 * Preallocate memory for lgroup affinities for
823 			 * each thread in process now to avoid holding
824 			 * any locks.  Allocate an array to hold a buffer
825 			 * for each thread.
826 			 */
827 			aff_buf_array = kmem_zalloc(nthreads *
828 			    sizeof (lgrp_affinity_t *), KM_SLEEP);
829 
830 			size = nlgrpsmax * sizeof (lgrp_affinity_t);
831 			for (i = 0; i < nthreads; i++)
832 				aff_buf_array[i] = kmem_zalloc(size, KM_SLEEP);
833 
834 			mutex_enter(&pidlock);
835 
836 			/*
837 			 * Get process again since dropped locks to allocate
838 			 * memory (except current process)
839 			 */
840 			if (id != P_MYID)
841 				p = prfind(id);
842 
843 			/*
844 			 * Process went away after we dropped locks and before
845 			 * reacquiring them, so drop locks, free memory, and
846 			 * return.
847 			 */
848 			if (p == NULL) {
849 				mutex_exit(&pidlock);
850 				for (i = 0; i < nthreads; i++)
851 					kmem_free(aff_buf_array[i], size);
852 				kmem_free(aff_buf_array,
853 				    nthreads * sizeof (lgrp_affinity_t *));
854 				return (set_errno(ESRCH));
855 			}
856 
857 			mutex_enter(&p->p_lock);
858 
859 			/*
860 			 * See whether number of threads is same
861 			 * If not, drop locks, free memory, and try again
862 			 */
863 			if (nthreads != p->p_lwpcnt) {
864 				mutex_exit(&p->p_lock);
865 				mutex_exit(&pidlock);
866 				for (i = 0; i < nthreads; i++)
867 					kmem_free(aff_buf_array[i], size);
868 				kmem_free(aff_buf_array,
869 				    nthreads * sizeof (lgrp_affinity_t *));
870 				continue;
871 			}
872 
873 			/*
874 			 * Set lgroup affinity for threads in process
875 			 */
876 			retval = lgrp_affinity_set_proc(p, lgrp, aff,
877 			    aff_buf_array);
878 
879 			mutex_exit(&p->p_lock);
880 			mutex_exit(&pidlock);
881 
882 			/*
883 			 * Free any leftover memory, since some threads may
884 			 * have already allocated memory and set lgroup
885 			 * affinities before
886 			 */
887 			for (i = 0; i < nthreads; i++)
888 				if (aff_buf_array[i] != NULL)
889 					kmem_free(aff_buf_array[i], size);
890 			kmem_free(aff_buf_array,
891 			    nthreads * sizeof (lgrp_affinity_t *));
892 
893 			break;
894 
895 		} while (nthreads != p->p_lwpcnt);
896 
897 		break;
898 
899 	default:
900 		retval = set_errno(EINVAL);
901 		break;
902 	}
903 
904 	return (retval);
905 }
906 
907 
908 /*
909  * Return the latest generation number for the lgroup hierarchy
910  * with the given view
911  */
912 lgrp_gen_t
913 lgrp_generation(lgrp_view_t view)
914 {
915 	cpupart_t	*cpupart;
916 	uint_t		gen;
917 
918 	kpreempt_disable();
919 
920 	/*
921 	 * Determine generation number for given view
922 	 */
923 	if (view == LGRP_VIEW_OS)
924 		/*
925 		 * Return generation number of lgroup hierarchy for OS view
926 		 */
927 		gen = lgrp_gen;
928 	else {
929 		/*
930 		 * For caller's view, use generation numbers for lgroup
931 		 * hierarchy and caller's pset
932 		 * NOTE: Caller needs to check for change in pset ID
933 		 */
934 		cpupart = curthread->t_cpupart;
935 		ASSERT(cpupart);
936 		gen = lgrp_gen + cpupart->cp_gen;
937 	}
938 
939 	kpreempt_enable();
940 
941 	return (gen);
942 }
943 
944 
945 lgrp_id_t
946 lgrp_home_thread(kthread_t *t)
947 {
948 	lgrp_id_t	home;
949 
950 	ASSERT(t != NULL);
951 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
952 
953 	thread_lock(t);
954 
955 	/*
956 	 * Check to see whether caller has permission to set affinity for
957 	 * thread
958 	 */
959 	if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
960 		thread_unlock(t);
961 		return (set_errno(EPERM));
962 	}
963 
964 	home = lgrp_home_id(t);
965 
966 	thread_unlock(t);
967 	return (home);
968 }
969 
970 
971 /*
972  * Get home lgroup of given process or thread
973  */
974 lgrp_id_t
975 lgrp_home_get(idtype_t idtype, id_t id)
976 {
977 	proc_t		*p;
978 	lgrp_id_t	retval;
979 	kthread_t	*t;
980 
981 	/*
982 	 * Get home lgroup of given LWP or process
983 	 */
984 	switch (idtype) {
985 
986 	case P_LWPID:
987 		p = curproc;
988 
989 		/*
990 		 * Set affinity for thread
991 		 */
992 		mutex_enter(&p->p_lock);
993 		if (id == P_MYID) {		/* current thread */
994 			retval = lgrp_home_thread(curthread);
995 		} else if (p->p_tlist == NULL) {
996 			retval = set_errno(ESRCH);
997 		} else {			/* other thread */
998 			int	found = 0;
999 
1000 			t = p->p_tlist;
1001 			do {
1002 				if (t->t_tid == id) {
1003 					retval = lgrp_home_thread(t);
1004 					found = 1;
1005 					break;
1006 				}
1007 			} while ((t = t->t_forw) != p->p_tlist);
1008 			if (!found)
1009 				retval = set_errno(ESRCH);
1010 		}
1011 		mutex_exit(&p->p_lock);
1012 		break;
1013 
1014 	case P_PID:
1015 		/*
1016 		 * Get process
1017 		 */
1018 		mutex_enter(&pidlock);
1019 
1020 		if (id == P_MYID)
1021 			p = curproc;
1022 		else
1023 			p = prfind(id);
1024 
1025 		if (p == NULL) {
1026 			mutex_exit(&pidlock);
1027 			return (set_errno(ESRCH));
1028 		}
1029 
1030 		mutex_enter(&p->p_lock);
1031 		t = p->p_tlist;
1032 		if (t == NULL)
1033 			retval = set_errno(ESRCH);
1034 		else
1035 			retval = lgrp_home_thread(t);
1036 		mutex_exit(&p->p_lock);
1037 
1038 		mutex_exit(&pidlock);
1039 
1040 		break;
1041 
1042 	default:
1043 		retval = set_errno(EINVAL);
1044 		break;
1045 	}
1046 
1047 	return (retval);
1048 }
1049 
1050 
1051 /*
1052  * Return latency between "from" and "to" lgroups
1053  *
1054  * This latency number can only be used for relative comparison
1055  * between lgroups on the running system, cannot be used across platforms,
1056  * and may not reflect the actual latency.  It is platform and implementation
1057  * specific, so platform gets to decide its value.  It would be nice if the
1058  * number was at least proportional to make comparisons more meaningful though.
1059  */
1060 int
1061 lgrp_latency(lgrp_id_t from, lgrp_id_t to)
1062 {
1063 	lgrp_t		*from_lgrp;
1064 	int		i;
1065 	int		latency;
1066 	int		latency_max;
1067 	lgrp_t		*to_lgrp;
1068 
1069 	ASSERT(MUTEX_HELD(&cpu_lock));
1070 
1071 	if (from < 0 || to < 0)
1072 		return (set_errno(EINVAL));
1073 
1074 	if (from > lgrp_alloc_max || to > lgrp_alloc_max)
1075 		return (set_errno(ESRCH));
1076 
1077 	from_lgrp = lgrp_table[from];
1078 	to_lgrp = lgrp_table[to];
1079 
1080 	if (!LGRP_EXISTS(from_lgrp) || !LGRP_EXISTS(to_lgrp)) {
1081 		return (set_errno(ESRCH));
1082 	}
1083 
1084 	/*
1085 	 * Get latency for same lgroup
1086 	 */
1087 	if (from == to) {
1088 		latency = from_lgrp->lgrp_latency;
1089 		return (latency);
1090 	}
1091 
1092 	/*
1093 	 * Get latency between leaf lgroups
1094 	 */
1095 	if (from_lgrp->lgrp_childcnt == 0 && to_lgrp->lgrp_childcnt == 0)
1096 		return (lgrp_plat_latency(from_lgrp->lgrp_plathand,
1097 		    to_lgrp->lgrp_plathand));
1098 
1099 	/*
1100 	 * Determine max latency between resources in two lgroups
1101 	 */
1102 	latency_max = 0;
1103 	for (i = 0; i <= lgrp_alloc_max; i++) {
1104 		lgrp_t	*from_rsrc;
1105 		int	j;
1106 		lgrp_t	*to_rsrc;
1107 
1108 		from_rsrc = lgrp_table[i];
1109 		if (!LGRP_EXISTS(from_rsrc) ||
1110 		    !klgrpset_ismember(from_lgrp->lgrp_set[LGRP_RSRC_CPU], i))
1111 			continue;
1112 
1113 		for (j = 0; j <= lgrp_alloc_max; j++) {
1114 			to_rsrc = lgrp_table[j];
1115 			if (!LGRP_EXISTS(to_rsrc) ||
1116 			    klgrpset_ismember(to_lgrp->lgrp_set[LGRP_RSRC_MEM],
1117 			    j) == 0)
1118 				continue;
1119 			latency = lgrp_plat_latency(from_rsrc->lgrp_plathand,
1120 			    to_rsrc->lgrp_plathand);
1121 			if (latency > latency_max)
1122 				latency_max = latency;
1123 		}
1124 	}
1125 	return (latency_max);
1126 }
1127 
1128 
1129 /*
1130  * Return lgroup interface version number
1131  * 0 - none
1132  * 1 - original
1133  * 2 - lgrp_latency_cookie() and lgrp_resources() added
1134  */
1135 int
1136 lgrp_version(int version)
1137 {
1138 	/*
1139 	 * Return LGRP_VER_NONE when requested version isn't supported
1140 	 */
1141 	if (version < LGRP_VER_NONE || version > LGRP_VER_CURRENT)
1142 		return (LGRP_VER_NONE);
1143 
1144 	/*
1145 	 * Return current version when LGRP_VER_NONE passed in
1146 	 */
1147 	if (version == LGRP_VER_NONE)
1148 		return (LGRP_VER_CURRENT);
1149 
1150 	/*
1151 	 * Otherwise, return supported version.
1152 	 */
1153 	return (version);
1154 }
1155 
1156 
1157 /*
1158  * Snapshot of lgroup hieararchy
1159  *
1160  * One snapshot is kept and is based on the kernel's native data model, so
1161  * a 32-bit snapshot is kept for the 32-bit kernel and a 64-bit one for the
1162  * 64-bit kernel.  If a 32-bit user wants a snapshot from the 64-bit kernel,
1163  * the kernel generates a 32-bit snapshot from the data in its 64-bit snapshot.
1164  *
1165  * The format is defined by lgroup snapshot header and the layout of
1166  * the snapshot in memory is as follows:
1167  * 1) lgroup snapshot header
1168  *    - specifies format of snapshot
1169  *    - defined by lgrp_snapshot_header_t
1170  * 2) lgroup info array
1171  *    - contains information about each lgroup
1172  *    - one element for each lgroup
1173  *    - each element is defined by lgrp_info_t
1174  * 3) lgroup CPU ID array
1175  *    - contains list (array) of CPU IDs for each lgroup
1176  *    - lgrp_info_t points into array and specifies how many CPUs belong to
1177  *      given lgroup
1178  * 4) lgroup parents array
1179  *    - contains lgroup bitmask of parents for each lgroup
1180  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1181  * 5) lgroup children array
1182  *    - contains lgroup bitmask of children for each lgroup
1183  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1184  * 6) lgroup resources array
1185  *    - contains lgroup bitmask of resources for each lgroup
1186  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1187  * 7) lgroup latency table
1188  *    - contains latency from each lgroup to each of other lgroups
1189  *
1190  * NOTE:  Must use nlgrpsmax for per lgroup data structures because lgroups
1191  *	  may be sparsely allocated.
1192  */
1193 lgrp_snapshot_header_t	*lgrp_snap = NULL;	/* lgroup snapshot */
1194 static kmutex_t		lgrp_snap_lock;		/* snapshot lock */
1195 
1196 
1197 /*
1198  * Take a snapshot of lgroup hierarchy and return size of buffer
1199  * needed to hold snapshot
1200  */
1201 static int
1202 lgrp_snapshot(void)
1203 {
1204 	size_t		bitmask_size;
1205 	size_t		bitmasks_size;
1206 	size_t		bufsize;
1207 	int		cpu_index;
1208 	size_t		cpuids_size;
1209 	int		i;
1210 	int		j;
1211 	size_t		info_size;
1212 	size_t		lats_size;
1213 	ulong_t		*lgrp_children;
1214 	processorid_t	*lgrp_cpuids;
1215 	lgrp_info_t	*lgrp_info;
1216 	int		**lgrp_lats;
1217 	ulong_t		*lgrp_parents;
1218 	ulong_t		*lgrp_rsets;
1219 	ulong_t		*lgrpset;
1220 	int		snap_ncpus;
1221 	int		snap_nlgrps;
1222 	int		snap_nlgrpsmax;
1223 	size_t		snap_hdr_size;
1224 #ifdef	_SYSCALL32_IMPL
1225 	model_t		model = DATAMODEL_NATIVE;
1226 
1227 	/*
1228 	 * Have up-to-date snapshot, so check to see whether caller is 32-bit
1229 	 * program and need to return size of 32-bit snapshot now.
1230 	 */
1231 	model = get_udatamodel();
1232 	if (model == DATAMODEL_ILP32 && lgrp_snap &&
1233 	    lgrp_snap->ss_gen == lgrp_gen) {
1234 
1235 		snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1236 
1237 		/*
1238 		 * Calculate size of buffer needed for 32-bit snapshot,
1239 		 * rounding up size of each object to allow for alignment
1240 		 * of next object in buffer.
1241 		 */
1242 		snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1243 		    sizeof (caddr32_t));
1244 		info_size =
1245 		    P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1246 		    sizeof (processorid_t));
1247 		cpuids_size =
1248 		    P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1249 		    sizeof (ulong_t));
1250 
1251 		/*
1252 		 * lgroup bitmasks needed for parents, children, and resources
1253 		 * for each lgroup and pset lgroup set
1254 		 */
1255 		bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1256 		bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1257 		    snap_nlgrpsmax) + 1) * bitmask_size;
1258 
1259 		/*
1260 		 * Size of latency table and buffer
1261 		 */
1262 		lats_size = snap_nlgrpsmax * sizeof (caddr32_t) +
1263 		    snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1264 
1265 		bufsize = snap_hdr_size + info_size + cpuids_size +
1266 		    bitmasks_size + lats_size;
1267 		return (bufsize);
1268 	}
1269 #endif	/* _SYSCALL32_IMPL */
1270 
1271 	/*
1272 	 * Check whether snapshot is up-to-date
1273 	 * Free it and take another one if not
1274 	 */
1275 	if (lgrp_snap) {
1276 		if (lgrp_snap->ss_gen == lgrp_gen)
1277 			return (lgrp_snap->ss_size);
1278 
1279 		kmem_free(lgrp_snap, lgrp_snap->ss_size);
1280 		lgrp_snap = NULL;
1281 	}
1282 
1283 	/*
1284 	 * Allocate memory for snapshot
1285 	 * w/o holding cpu_lock while waiting for memory
1286 	 */
1287 	while (lgrp_snap == NULL) {
1288 		int	old_generation;
1289 
1290 		/*
1291 		 * Take snapshot of lgroup generation number
1292 		 * and configuration size dependent information
1293 		 * NOTE: Only count number of online CPUs,
1294 		 * since only online CPUs appear in lgroups.
1295 		 */
1296 		mutex_enter(&cpu_lock);
1297 		old_generation = lgrp_gen;
1298 		snap_ncpus = ncpus_online;
1299 		snap_nlgrps = nlgrps;
1300 		snap_nlgrpsmax = nlgrpsmax;
1301 		mutex_exit(&cpu_lock);
1302 
1303 		/*
1304 		 * Calculate size of buffer needed for snapshot,
1305 		 * rounding up size of each object to allow for alignment
1306 		 * of next object in buffer.
1307 		 */
1308 		snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1309 		    sizeof (void *));
1310 		info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1311 		    sizeof (processorid_t));
1312 		cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1313 		    sizeof (ulong_t));
1314 		/*
1315 		 * lgroup bitmasks needed for pset lgroup set and  parents,
1316 		 * children, and resource sets for each lgroup
1317 		 */
1318 		bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1319 		bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1320 		    snap_nlgrpsmax) + 1) * bitmask_size;
1321 
1322 		/*
1323 		 * Size of latency table and buffer
1324 		 */
1325 		lats_size = snap_nlgrpsmax * sizeof (int *) +
1326 		    snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1327 
1328 		bufsize = snap_hdr_size + info_size + cpuids_size +
1329 		    bitmasks_size + lats_size;
1330 
1331 		/*
1332 		 * Allocate memory for buffer
1333 		 */
1334 		lgrp_snap = kmem_zalloc(bufsize, KM_NOSLEEP);
1335 		if (lgrp_snap == NULL)
1336 			return (set_errno(ENOMEM));
1337 
1338 		/*
1339 		 * Check whether generation number has changed
1340 		 */
1341 		mutex_enter(&cpu_lock);
1342 		if (lgrp_gen == old_generation)
1343 			break;		/* hasn't change, so done. */
1344 
1345 		/*
1346 		 * Generation number changed, so free memory and try again.
1347 		 */
1348 		mutex_exit(&cpu_lock);
1349 		kmem_free(lgrp_snap, bufsize);
1350 		lgrp_snap = NULL;
1351 	}
1352 
1353 	/*
1354 	 * Fill in lgroup snapshot header
1355 	 * (including pointers to tables of lgroup info, CPU IDs, and parents
1356 	 * and children)
1357 	 */
1358 	lgrp_snap->ss_version = LGRP_VER_CURRENT;
1359 
1360 	/*
1361 	 * XXX For now, liblgrp only needs to know whether the hierarchy
1362 	 * XXX only has one level or not
1363 	 */
1364 	if (snap_nlgrps == 1)
1365 		lgrp_snap->ss_levels = 1;
1366 	else
1367 		lgrp_snap->ss_levels = 2;
1368 
1369 	lgrp_snap->ss_root = LGRP_ROOTID;
1370 
1371 	lgrp_snap->ss_nlgrps = lgrp_snap->ss_nlgrps_os = snap_nlgrps;
1372 	lgrp_snap->ss_nlgrps_max = snap_nlgrpsmax;
1373 	lgrp_snap->ss_ncpus = snap_ncpus;
1374 	lgrp_snap->ss_gen = lgrp_gen;
1375 	lgrp_snap->ss_view = LGRP_VIEW_OS;
1376 	lgrp_snap->ss_pset = 0;		/* NOTE: caller should set if needed */
1377 	lgrp_snap->ss_size = bufsize;
1378 	lgrp_snap->ss_magic = (uintptr_t)lgrp_snap;
1379 
1380 	lgrp_snap->ss_info = lgrp_info =
1381 	    (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1382 
1383 	lgrp_snap->ss_cpuids = lgrp_cpuids =
1384 	    (processorid_t *)((uintptr_t)lgrp_info + info_size);
1385 
1386 	lgrp_snap->ss_lgrpset = lgrpset =
1387 	    (ulong_t *)((uintptr_t)lgrp_cpuids + cpuids_size);
1388 
1389 	lgrp_snap->ss_parents = lgrp_parents =
1390 	    (ulong_t *)((uintptr_t)lgrpset + bitmask_size);
1391 
1392 	lgrp_snap->ss_children = lgrp_children =
1393 	    (ulong_t *)((uintptr_t)lgrp_parents + (snap_nlgrpsmax *
1394 	    bitmask_size));
1395 
1396 	lgrp_snap->ss_rsets = lgrp_rsets =
1397 	    (ulong_t *)((uintptr_t)lgrp_children + (snap_nlgrpsmax *
1398 	    bitmask_size));
1399 
1400 	lgrp_snap->ss_latencies = lgrp_lats =
1401 	    (int **)((uintptr_t)lgrp_rsets + (LGRP_RSRC_COUNT *
1402 		snap_nlgrpsmax * bitmask_size));
1403 
1404 	/*
1405 	 * Fill in lgroup information
1406 	 */
1407 	cpu_index = 0;
1408 	for (i = 0; i < snap_nlgrpsmax; i++) {
1409 		struct cpu	*cp;
1410 		int		cpu_count;
1411 		struct cpu	*head;
1412 		int		k;
1413 		lgrp_t		*lgrp;
1414 
1415 		lgrp = lgrp_table[i];
1416 		if (!LGRP_EXISTS(lgrp)) {
1417 			bzero(&lgrp_info[i], sizeof (lgrp_info[i]));
1418 			lgrp_info[i].info_lgrpid = LGRP_NONE;
1419 			continue;
1420 		}
1421 
1422 		lgrp_info[i].info_lgrpid = i;
1423 		lgrp_info[i].info_latency = lgrp->lgrp_latency;
1424 
1425 		/*
1426 		 * Fill in parents, children, and lgroup resources
1427 		 */
1428 		lgrp_info[i].info_parents =
1429 		    (ulong_t *)((uintptr_t)lgrp_parents + (i * bitmask_size));
1430 
1431 		if (lgrp->lgrp_parent)
1432 			BT_SET(lgrp_info[i].info_parents,
1433 			    lgrp->lgrp_parent->lgrp_id);
1434 
1435 		lgrp_info[i].info_children =
1436 		    (ulong_t *)((uintptr_t)lgrp_children + (i * bitmask_size));
1437 
1438 		for (j = 0; j < snap_nlgrpsmax; j++)
1439 			if (klgrpset_ismember(lgrp->lgrp_children, j))
1440 				BT_SET(lgrp_info[i].info_children, j);
1441 
1442 		lgrp_info[i].info_rset =
1443 		    (ulong_t *)((uintptr_t)lgrp_rsets +
1444 		    (i * LGRP_RSRC_COUNT * bitmask_size));
1445 
1446 		for (j = 0; j < LGRP_RSRC_COUNT; j++) {
1447 			ulong_t	*rset;
1448 
1449 			rset = (ulong_t *)((uintptr_t)lgrp_info[i].info_rset +
1450 			    (j * bitmask_size));
1451 			for (k = 0; k < snap_nlgrpsmax; k++)
1452 				if (klgrpset_ismember(lgrp->lgrp_set[j], k))
1453 					BT_SET(rset, k);
1454 		}
1455 
1456 		/*
1457 		 * Fill in CPU IDs
1458 		 */
1459 		cpu_count = 0;
1460 		lgrp_info[i].info_cpuids = NULL;
1461 		cp = head = lgrp->lgrp_cpu;
1462 		if (head != NULL) {
1463 			lgrp_info[i].info_cpuids = &lgrp_cpuids[cpu_index];
1464 			do {
1465 				lgrp_cpuids[cpu_index] = cp->cpu_id;
1466 				cpu_index++;
1467 				cpu_count++;
1468 				cp = cp->cpu_next_lgrp;
1469 			} while (cp != head);
1470 		}
1471 		ASSERT(cpu_count == lgrp->lgrp_cpucnt);
1472 		lgrp_info[i].info_ncpus = cpu_count;
1473 
1474 		/*
1475 		 * Fill in memory sizes for lgroups that directly contain
1476 		 * memory
1477 		 */
1478 		if (klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], i)) {
1479 			lgrp_info[i].info_mem_free =
1480 			    lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1481 			lgrp_info[i].info_mem_install =
1482 			    lgrp_mem_size(i, LGRP_MEM_SIZE_INSTALL);
1483 		}
1484 
1485 		/*
1486 		 * Fill in latency table and buffer
1487 		 */
1488 		lgrp_lats[i] = (int *)((uintptr_t)lgrp_lats + snap_nlgrpsmax *
1489 		    sizeof (int *) + i * snap_nlgrpsmax * sizeof (int));
1490 		for (j = 0; j < snap_nlgrpsmax; j++) {
1491 			lgrp_t	*to;
1492 
1493 			to = lgrp_table[j];
1494 			if (!LGRP_EXISTS(to))
1495 				continue;
1496 			lgrp_lats[i][j] = lgrp_latency(lgrp->lgrp_id,
1497 			    to->lgrp_id);
1498 		}
1499 	}
1500 	ASSERT(cpu_index == snap_ncpus);
1501 
1502 
1503 	mutex_exit(&cpu_lock);
1504 
1505 #ifdef	_SYSCALL32_IMPL
1506 	/*
1507 	 * Check to see whether caller is 32-bit program and need to return
1508 	 * size of 32-bit snapshot now that snapshot has been taken/updated.
1509 	 * May not have been able to do this earlier if snapshot was out of
1510 	 * date or didn't exist yet.
1511 	 */
1512 	if (model == DATAMODEL_ILP32) {
1513 
1514 		snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1515 
1516 		/*
1517 		 * Calculate size of buffer needed for 32-bit snapshot,
1518 		 * rounding up size of each object to allow for alignment
1519 		 * of next object in buffer.
1520 		 */
1521 		snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1522 		    sizeof (caddr32_t));
1523 		info_size =
1524 		    P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1525 		    sizeof (processorid_t));
1526 		cpuids_size =
1527 		    P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1528 		    sizeof (ulong_t));
1529 
1530 		bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1531 		bitmasks_size = (((2 + LGRP_RSRC_COUNT) * snap_nlgrpsmax) +
1532 		    1) * bitmask_size;
1533 
1534 
1535 		/*
1536 		 * Size of latency table and buffer
1537 		 */
1538 		lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1539 		    (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1540 
1541 		bufsize = snap_hdr_size + info_size + cpuids_size +
1542 		    bitmasks_size + lats_size;
1543 		return (bufsize);
1544 	}
1545 #endif	/* _SYSCALL32_IMPL */
1546 
1547 	return (lgrp_snap->ss_size);
1548 }
1549 
1550 
1551 /*
1552  * Copy snapshot into given user buffer, fix up any pointers in buffer to point
1553  * into user instead of kernel address space, and return size of buffer
1554  * needed to hold snapshot
1555  */
1556 static int
1557 lgrp_snapshot_copy(char *buf, size_t bufsize)
1558 {
1559 	size_t			bitmask_size;
1560 	int			cpu_index;
1561 	size_t			cpuids_size;
1562 	int			i;
1563 	size_t			info_size;
1564 	lgrp_info_t		*lgrp_info;
1565 	int			retval;
1566 	size_t			snap_hdr_size;
1567 	int			snap_ncpus;
1568 	int			snap_nlgrpsmax;
1569 	lgrp_snapshot_header_t	*user_snap;
1570 	lgrp_info_t		*user_info;
1571 	lgrp_info_t		*user_info_buffer;
1572 	processorid_t		*user_cpuids;
1573 	ulong_t			*user_lgrpset;
1574 	ulong_t			*user_parents;
1575 	ulong_t			*user_children;
1576 	int			**user_lats;
1577 	int			**user_lats_buffer;
1578 	ulong_t			*user_rsets;
1579 
1580 	if (lgrp_snap == NULL)
1581 		return (0);
1582 
1583 	if (buf == NULL || bufsize <= 0)
1584 		return (lgrp_snap->ss_size);
1585 
1586 	/*
1587 	 * User needs to try getting size of buffer again
1588 	 * because given buffer size is too small.
1589 	 * The lgroup hierarchy may have changed after they asked for the size
1590 	 * but before the snapshot was taken.
1591 	 */
1592 	if (bufsize < lgrp_snap->ss_size)
1593 		return (set_errno(EAGAIN));
1594 
1595 	snap_ncpus = lgrp_snap->ss_ncpus;
1596 	snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1597 
1598 	/*
1599 	 * Fill in lgrpset now because caller may have change psets
1600 	 */
1601 	kpreempt_disable();
1602 	for (i = 0; i < snap_nlgrpsmax; i++) {
1603 		if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1604 		    i)) {
1605 			BT_SET(lgrp_snap->ss_lgrpset, i);
1606 		}
1607 	}
1608 	kpreempt_enable();
1609 
1610 	/*
1611 	 * Copy lgroup snapshot (snapshot header, lgroup info, and CPU IDs)
1612 	 * into user buffer all at once
1613 	 */
1614 	if (copyout(lgrp_snap, buf, lgrp_snap->ss_size) != 0)
1615 		return (set_errno(EFAULT));
1616 
1617 	/*
1618 	 * Round up sizes of lgroup snapshot header and info for alignment
1619 	 */
1620 	snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1621 	    sizeof (void *));
1622 	info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1623 	    sizeof (processorid_t));
1624 	cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1625 	    sizeof (ulong_t));
1626 
1627 	bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1628 
1629 	/*
1630 	 * Calculate pointers into user buffer for lgroup snapshot header,
1631 	 * info, and CPU IDs
1632 	 */
1633 	user_snap = (lgrp_snapshot_header_t *)buf;
1634 	user_info = (lgrp_info_t *)((uintptr_t)user_snap + snap_hdr_size);
1635 	user_cpuids = (processorid_t *)((uintptr_t)user_info + info_size);
1636 	user_lgrpset = (ulong_t *)((uintptr_t)user_cpuids + cpuids_size);
1637 	user_parents = (ulong_t *)((uintptr_t)user_lgrpset + bitmask_size);
1638 	user_children = (ulong_t *)((uintptr_t)user_parents +
1639 	    (snap_nlgrpsmax * bitmask_size));
1640 	user_rsets = (ulong_t *)((uintptr_t)user_children +
1641 	    (snap_nlgrpsmax * bitmask_size));
1642 	user_lats = (int **)((uintptr_t)user_rsets +
1643 	    (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size));
1644 
1645 	/*
1646 	 * Copyout magic number (ie. pointer to beginning of buffer)
1647 	 */
1648 	if (copyout(&buf, &user_snap->ss_magic, sizeof (buf)) != 0)
1649 		return (set_errno(EFAULT));
1650 
1651 	/*
1652 	 * Fix up pointers in user buffer to point into user buffer
1653 	 * not kernel snapshot
1654 	 */
1655 	if (copyout(&user_info, &user_snap->ss_info, sizeof (user_info)) != 0)
1656 		return (set_errno(EFAULT));
1657 
1658 	if (copyout(&user_cpuids, &user_snap->ss_cpuids,
1659 	    sizeof (user_cpuids)) != 0)
1660 		return (set_errno(EFAULT));
1661 
1662 	if (copyout(&user_lgrpset, &user_snap->ss_lgrpset,
1663 	    sizeof (user_lgrpset)) != 0)
1664 		return (set_errno(EFAULT));
1665 
1666 	if (copyout(&user_parents, &user_snap->ss_parents,
1667 	    sizeof (user_parents)) != 0)
1668 		return (set_errno(EFAULT));
1669 
1670 	if (copyout(&user_children, &user_snap->ss_children,
1671 	    sizeof (user_children)) != 0)
1672 		return (set_errno(EFAULT));
1673 
1674 	if (copyout(&user_rsets, &user_snap->ss_rsets,
1675 	    sizeof (user_rsets)) != 0)
1676 		return (set_errno(EFAULT));
1677 
1678 	if (copyout(&user_lats, &user_snap->ss_latencies,
1679 	    sizeof (user_lats)) != 0)
1680 		return (set_errno(EFAULT));
1681 
1682 	/*
1683 	 * Make copies of lgroup info and latency table, fix up pointers,
1684 	 * and then copy them into user buffer
1685 	 */
1686 	user_info_buffer = kmem_zalloc(info_size, KM_NOSLEEP);
1687 	if (user_info_buffer == NULL)
1688 		return (set_errno(ENOMEM));
1689 
1690 	user_lats_buffer = kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1691 	    KM_NOSLEEP);
1692 	if (user_lats_buffer == NULL) {
1693 		kmem_free(user_info_buffer, info_size);
1694 		return (set_errno(ENOMEM));
1695 	}
1696 
1697 	lgrp_info = (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1698 	bcopy(lgrp_info, user_info_buffer, info_size);
1699 
1700 	cpu_index = 0;
1701 	for (i = 0; i < snap_nlgrpsmax; i++) {
1702 		ulong_t	*snap_rset;
1703 
1704 		/*
1705 		 * Skip non-existent lgroups
1706 		 */
1707 		if (user_info_buffer[i].info_lgrpid == LGRP_NONE)
1708 			continue;
1709 
1710 		/*
1711 		 * Update free memory size since it changes frequently
1712 		 * Only do so for lgroups directly containing memory
1713 		 *
1714 		 * NOTE: This must be done before changing the pointers to
1715 		 *	 point into user space since we need to dereference
1716 		 *	 lgroup resource set
1717 		 */
1718 		snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
1719 		    BT_BITOUL(snap_nlgrpsmax)];
1720 		if (BT_TEST(snap_rset, i))
1721 			user_info_buffer[i].info_mem_free =
1722 			    lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1723 
1724 		/*
1725 		 * Fix up pointers to parents, children, resources, and
1726 		 * latencies
1727 		 */
1728 		user_info_buffer[i].info_parents =
1729 		    (ulong_t *)((uintptr_t)user_parents + (i * bitmask_size));
1730 		user_info_buffer[i].info_children =
1731 		    (ulong_t *)((uintptr_t)user_children + (i * bitmask_size));
1732 		user_info_buffer[i].info_rset =
1733 		    (ulong_t *)((uintptr_t)user_rsets +
1734 		    (i * LGRP_RSRC_COUNT * bitmask_size));
1735 		user_lats_buffer[i] = (int *)((uintptr_t)user_lats +
1736 		    (snap_nlgrpsmax * sizeof (int *)) + (i * snap_nlgrpsmax *
1737 		    sizeof (int)));
1738 
1739 		/*
1740 		 * Fix up pointer to CPU IDs
1741 		 */
1742 		if (user_info_buffer[i].info_ncpus == 0) {
1743 			user_info_buffer[i].info_cpuids = NULL;
1744 			continue;
1745 		}
1746 		user_info_buffer[i].info_cpuids = &user_cpuids[cpu_index];
1747 		cpu_index += user_info_buffer[i].info_ncpus;
1748 	}
1749 	ASSERT(cpu_index == snap_ncpus);
1750 
1751 	/*
1752 	 * Copy lgroup info and latency table with pointers fixed up to point
1753 	 * into user buffer out to user buffer now
1754 	 */
1755 	retval = lgrp_snap->ss_size;
1756 	if (copyout(user_info_buffer, user_info, info_size) != 0)
1757 		retval = set_errno(EFAULT);
1758 	kmem_free(user_info_buffer, info_size);
1759 
1760 	if (copyout(user_lats_buffer, user_lats, snap_nlgrpsmax *
1761 	    sizeof (int *)) != 0)
1762 		retval = set_errno(EFAULT);
1763 	kmem_free(user_lats_buffer, snap_nlgrpsmax * sizeof (int *));
1764 
1765 	return (retval);
1766 }
1767 
1768 
1769 #ifdef	_SYSCALL32_IMPL
1770 /*
1771  * Make 32-bit copy of snapshot, fix up any pointers in buffer to point
1772  * into user instead of kernel address space, copy 32-bit snapshot into
1773  * given user buffer, and return size of buffer needed to hold snapshot
1774  */
1775 static int
1776 lgrp_snapshot_copy32(caddr32_t buf, size32_t bufsize)
1777 {
1778 	size32_t			bitmask_size;
1779 	size32_t			bitmasks_size;
1780 	size32_t			children_size;
1781 	int				cpu_index;
1782 	size32_t			cpuids_size;
1783 	int				i;
1784 	int				j;
1785 	size32_t			info_size;
1786 	size32_t			lats_size;
1787 	lgrp_info_t			*lgrp_info;
1788 	lgrp_snapshot_header32_t	*lgrp_snap32;
1789 	lgrp_info32_t			*lgrp_info32;
1790 	processorid_t			*lgrp_cpuids32;
1791 	caddr32_t			*lgrp_lats32;
1792 	int				**lgrp_lats32_kernel;
1793 	uint_t				*lgrp_set32;
1794 	uint_t				*lgrp_parents32;
1795 	uint_t				*lgrp_children32;
1796 	uint_t				*lgrp_rsets32;
1797 	size32_t			parents_size;
1798 	size32_t			rsets_size;
1799 	size32_t			set_size;
1800 	size32_t			snap_hdr_size;
1801 	int				snap_ncpus;
1802 	int				snap_nlgrpsmax;
1803 	size32_t			snap_size;
1804 
1805 	if (lgrp_snap == NULL)
1806 		return (0);
1807 
1808 	snap_ncpus = lgrp_snap->ss_ncpus;
1809 	snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1810 
1811 	/*
1812 	 * Calculate size of buffer needed for 32-bit snapshot,
1813 	 * rounding up size of each object to allow for alignment
1814 	 * of next object in buffer.
1815 	 */
1816 	snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1817 	    sizeof (caddr32_t));
1818 	info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1819 	    sizeof (processorid_t));
1820 	cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1821 		    sizeof (ulong_t));
1822 
1823 	bitmask_size = BT_SIZEOFMAP32(snap_nlgrpsmax);
1824 
1825 	set_size = bitmask_size;
1826 	parents_size = snap_nlgrpsmax * bitmask_size;
1827 	children_size = snap_nlgrpsmax * bitmask_size;
1828 	rsets_size = P2ROUNDUP(LGRP_RSRC_COUNT * snap_nlgrpsmax *
1829 	    (int)bitmask_size, sizeof (caddr32_t));
1830 
1831 	bitmasks_size = set_size + parents_size + children_size + rsets_size;
1832 
1833 	/*
1834 	 * Size of latency table and buffer
1835 	 */
1836 	lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1837 	    (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1838 
1839 	snap_size = snap_hdr_size + info_size + cpuids_size + bitmasks_size +
1840 		lats_size;
1841 
1842 	if (buf == NULL || bufsize <= 0) {
1843 		return (snap_size);
1844 	}
1845 
1846 	/*
1847 	 * User needs to try getting size of buffer again
1848 	 * because given buffer size is too small.
1849 	 * The lgroup hierarchy may have changed after they asked for the size
1850 	 * but before the snapshot was taken.
1851 	 */
1852 	if (bufsize < snap_size)
1853 		return (set_errno(EAGAIN));
1854 
1855 	/*
1856 	 * Make 32-bit copy of snapshot, fix up pointers to point into user
1857 	 * buffer not kernel, and then copy whole thing into user buffer
1858 	 */
1859 	lgrp_snap32 = kmem_zalloc(snap_size, KM_NOSLEEP);
1860 	if (lgrp_snap32 == NULL)
1861 		return (set_errno(ENOMEM));
1862 
1863 	/*
1864 	 * Calculate pointers into 32-bit copy of snapshot
1865 	 * for lgroup info, CPU IDs, pset lgroup bitmask, parents, children,
1866 	 * resources, and latency table and buffer
1867 	 */
1868 	lgrp_info32 = (lgrp_info32_t *)((uintptr_t)lgrp_snap32 +
1869 	    snap_hdr_size);
1870 	lgrp_cpuids32 = (processorid_t *)((uintptr_t)lgrp_info32 + info_size);
1871 	lgrp_set32 = (uint_t *)((uintptr_t)lgrp_cpuids32 + cpuids_size);
1872 	lgrp_parents32 = (uint_t *)((uintptr_t)lgrp_set32 + set_size);
1873 	lgrp_children32 = (uint_t *)((uintptr_t)lgrp_parents32 + parents_size);
1874 	lgrp_rsets32 = (uint_t *)((uintptr_t)lgrp_children32 + children_size);
1875 	lgrp_lats32 = (caddr32_t *)((uintptr_t)lgrp_rsets32 + rsets_size);
1876 
1877 	/*
1878 	 * Make temporary lgroup latency table of pointers for kernel to use
1879 	 * to fill in rows of table with latencies from each lgroup
1880 	 */
1881 	lgrp_lats32_kernel =  kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1882 	    KM_NOSLEEP);
1883 	if (lgrp_lats32_kernel == NULL) {
1884 		kmem_free(lgrp_snap32, snap_size);
1885 		return (set_errno(ENOMEM));
1886 	}
1887 
1888 	/*
1889 	 * Fill in 32-bit lgroup snapshot header
1890 	 * (with pointers into user's buffer for lgroup info, CPU IDs,
1891 	 * bit masks, and latencies)
1892 	 */
1893 	lgrp_snap32->ss_version = lgrp_snap->ss_version;
1894 	lgrp_snap32->ss_levels = lgrp_snap->ss_levels;
1895 	lgrp_snap32->ss_nlgrps = lgrp_snap32->ss_nlgrps_os =
1896 	    lgrp_snap->ss_nlgrps;
1897 	lgrp_snap32->ss_nlgrps_max = snap_nlgrpsmax;
1898 	lgrp_snap32->ss_root = lgrp_snap->ss_root;
1899 	lgrp_snap32->ss_ncpus = lgrp_snap->ss_ncpus;
1900 	lgrp_snap32->ss_gen = lgrp_snap->ss_gen;
1901 	lgrp_snap32->ss_view = LGRP_VIEW_OS;
1902 	lgrp_snap32->ss_size = snap_size;
1903 	lgrp_snap32->ss_magic = buf;
1904 	lgrp_snap32->ss_info = buf + snap_hdr_size;
1905 	lgrp_snap32->ss_cpuids = lgrp_snap32->ss_info + info_size;
1906 	lgrp_snap32->ss_lgrpset = lgrp_snap32->ss_cpuids + cpuids_size;
1907 	lgrp_snap32->ss_parents = lgrp_snap32->ss_lgrpset + bitmask_size;
1908 	lgrp_snap32->ss_children = lgrp_snap32->ss_parents +
1909 	    (snap_nlgrpsmax * bitmask_size);
1910 	lgrp_snap32->ss_rsets = lgrp_snap32->ss_children +
1911 	    (snap_nlgrpsmax * bitmask_size);
1912 	lgrp_snap32->ss_latencies = lgrp_snap32->ss_rsets +
1913 	    (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size);
1914 
1915 	/*
1916 	 * Fill in lgrpset now because caller may have change psets
1917 	 */
1918 	kpreempt_disable();
1919 	for (i = 0; i < snap_nlgrpsmax; i++) {
1920 		if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1921 		    i)) {
1922 			BT_SET32(lgrp_set32, i);
1923 		}
1924 	}
1925 	kpreempt_enable();
1926 
1927 	/*
1928 	 * Fill in 32-bit copy of lgroup info and fix up pointers
1929 	 * to point into user's buffer instead of kernel's
1930 	 */
1931 	cpu_index = 0;
1932 	lgrp_info = lgrp_snap->ss_info;
1933 	for (i = 0; i < snap_nlgrpsmax; i++) {
1934 		uint_t	*children;
1935 		uint_t	*lgrp_rset;
1936 		uint_t	*parents;
1937 		ulong_t	*snap_rset;
1938 
1939 		/*
1940 		 * Skip non-existent lgroups
1941 		 */
1942 		if (lgrp_info[i].info_lgrpid == LGRP_NONE) {
1943 			bzero(&lgrp_info32[i], sizeof (lgrp_info32[i]));
1944 			lgrp_info32[i].info_lgrpid = LGRP_NONE;
1945 			continue;
1946 		}
1947 
1948 		/*
1949 		 * Fill in parents, children, lgroup resource set, and
1950 		 * latencies from snapshot
1951 		 */
1952 		parents = (uint_t *)((uintptr_t)lgrp_parents32 +
1953 		    i * bitmask_size);
1954 		children = (uint_t *)((uintptr_t)lgrp_children32 +
1955 		    i * bitmask_size);
1956 		snap_rset = (ulong_t *)((uintptr_t)lgrp_snap->ss_rsets +
1957 		    (i * LGRP_RSRC_COUNT * BT_SIZEOFMAP(snap_nlgrpsmax)));
1958 		lgrp_rset = (uint_t *)((uintptr_t)lgrp_rsets32 +
1959 		    (i * LGRP_RSRC_COUNT * bitmask_size));
1960 		lgrp_lats32_kernel[i] = (int *)((uintptr_t)lgrp_lats32 +
1961 		    snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
1962 		    sizeof (int));
1963 		for (j = 0; j < snap_nlgrpsmax; j++) {
1964 			int	k;
1965 			uint_t	*rset;
1966 
1967 			if (BT_TEST(&lgrp_snap->ss_parents[i], j))
1968 				BT_SET32(parents, j);
1969 
1970 			if (BT_TEST(&lgrp_snap->ss_children[i], j))
1971 				BT_SET32(children, j);
1972 
1973 			for (k = 0; k < LGRP_RSRC_COUNT; k++) {
1974 				rset = (uint_t *)((uintptr_t)lgrp_rset +
1975 				    k * bitmask_size);
1976 				if (BT_TEST(&snap_rset[k], j))
1977 					BT_SET32(rset, j);
1978 			}
1979 
1980 			lgrp_lats32_kernel[i][j] =
1981 			    lgrp_snap->ss_latencies[i][j];
1982 		}
1983 
1984 		/*
1985 		 * Fix up pointer to latency buffer
1986 		 */
1987 		lgrp_lats32[i] = lgrp_snap32->ss_latencies +
1988 		    snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
1989 		    sizeof (int);
1990 
1991 		/*
1992 		 * Fix up pointers for parents, children, and resources
1993 		 */
1994 		lgrp_info32[i].info_parents = lgrp_snap32->ss_parents +
1995 		    (i * bitmask_size);
1996 		lgrp_info32[i].info_children = lgrp_snap32->ss_children +
1997 		    (i * bitmask_size);
1998 		lgrp_info32[i].info_rset = lgrp_snap32->ss_rsets +
1999 		    (i * LGRP_RSRC_COUNT * bitmask_size);
2000 
2001 		/*
2002 		 * Fill in memory and CPU info
2003 		 * Only fill in memory for lgroups directly containing memory
2004 		 */
2005 		snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
2006 		    BT_BITOUL(snap_nlgrpsmax)];
2007 		if (BT_TEST(snap_rset, i)) {
2008 			lgrp_info32[i].info_mem_free = lgrp_mem_size(i,
2009 			    LGRP_MEM_SIZE_FREE);
2010 			lgrp_info32[i].info_mem_install =
2011 			    lgrp_info[i].info_mem_install;
2012 		}
2013 
2014 		lgrp_info32[i].info_ncpus = lgrp_info[i].info_ncpus;
2015 
2016 		lgrp_info32[i].info_lgrpid = lgrp_info[i].info_lgrpid;
2017 		lgrp_info32[i].info_latency = lgrp_info[i].info_latency;
2018 
2019 		if (lgrp_info32[i].info_ncpus == 0) {
2020 			lgrp_info32[i].info_cpuids = 0;
2021 			continue;
2022 		}
2023 
2024 		/*
2025 		 * Fix up pointer for CPU IDs
2026 		 */
2027 		lgrp_info32[i].info_cpuids = lgrp_snap32->ss_cpuids +
2028 		    (cpu_index * sizeof (processorid_t));
2029 		cpu_index += lgrp_info32[i].info_ncpus;
2030 	}
2031 	ASSERT(cpu_index == snap_ncpus);
2032 
2033 	/*
2034 	 * Copy lgroup CPU IDs into 32-bit snapshot
2035 	 * before copying it out into user's buffer
2036 	 */
2037 	bcopy(lgrp_snap->ss_cpuids, lgrp_cpuids32, cpuids_size);
2038 
2039 	/*
2040 	 * Copy 32-bit lgroup snapshot into user's buffer all at once
2041 	 */
2042 	if (copyout(lgrp_snap32, (void *)(uintptr_t)buf, snap_size) != 0) {
2043 		kmem_free(lgrp_snap32, snap_size);
2044 		kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2045 		return (set_errno(EFAULT));
2046 	}
2047 
2048 	kmem_free(lgrp_snap32, snap_size);
2049 	kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2050 
2051 	return (snap_size);
2052 }
2053 #endif	/* _SYSCALL32_IMPL */
2054 
2055 
2056 int
2057 lgrpsys(int subcode, long ia, void *ap)
2058 {
2059 	size_t	bufsize;
2060 	int	latency;
2061 
2062 	switch (subcode) {
2063 
2064 	case LGRP_SYS_AFFINITY_GET:
2065 		return (lgrp_affinity_get((lgrp_affinity_args_t *)ap));
2066 
2067 	case LGRP_SYS_AFFINITY_SET:
2068 		return (lgrp_affinity_set((lgrp_affinity_args_t *)ap));
2069 
2070 	case LGRP_SYS_GENERATION:
2071 		return (lgrp_generation(ia));
2072 
2073 	case LGRP_SYS_HOME:
2074 		return (lgrp_home_get((idtype_t)ia, (id_t)(uintptr_t)ap));
2075 
2076 	case LGRP_SYS_LATENCY:
2077 		mutex_enter(&cpu_lock);
2078 		latency = lgrp_latency(ia, (lgrp_id_t)(uintptr_t)ap);
2079 		mutex_exit(&cpu_lock);
2080 		return (latency);
2081 
2082 	case LGRP_SYS_MEMINFO:
2083 		return (meminfo(ia, (struct meminfo *)ap));
2084 
2085 	case LGRP_SYS_VERSION:
2086 		return (lgrp_version(ia));
2087 
2088 	case LGRP_SYS_SNAPSHOT:
2089 		mutex_enter(&lgrp_snap_lock);
2090 		bufsize = lgrp_snapshot();
2091 		if (ap && ia > 0) {
2092 			if (get_udatamodel() == DATAMODEL_NATIVE)
2093 				bufsize = lgrp_snapshot_copy(ap, ia);
2094 #ifdef	_SYSCALL32_IMPL
2095 			else
2096 				bufsize = lgrp_snapshot_copy32(
2097 				    (caddr32_t)(uintptr_t)ap, ia);
2098 #endif	/* _SYSCALL32_IMPL */
2099 		}
2100 		mutex_exit(&lgrp_snap_lock);
2101 		return (bufsize);
2102 
2103 	default:
2104 		break;
2105 
2106 	}
2107 
2108 	return (set_errno(EINVAL));
2109 }
2110