xref: /illumos-gate/usr/src/uts/common/syscall/lgrpsys.c (revision c6f039c73ee9eb7e4acb232afaca51cdf9d30ff3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  * Copyright 2015 Joyent, Inc.
26  */
27 
28 /*
29  * lgroup system calls
30  */
31 
32 #include <sys/types.h>
33 #include <sys/errno.h>
34 #include <sys/sunddi.h>
35 #include <sys/systm.h>
36 #include <sys/mman.h>
37 #include <sys/cpupart.h>
38 #include <sys/lgrp.h>
39 #include <sys/lgrp_user.h>
40 #include <sys/promif.h>		/* for prom_printf() */
41 #include <sys/sysmacros.h>
42 #include <sys/policy.h>
43 
44 #include <vm/as.h>
45 
46 
47 /* definitions for mi_validity */
48 #define	VALID_ADDR	1
49 #define	VALID_REQ	2
50 
51 /*
52  * run through the given number of addresses and requests and return the
53  * corresponding memory information for each address
54  */
55 static int
meminfo(int addr_count,struct meminfo * mip)56 meminfo(int addr_count, struct meminfo *mip)
57 {
58 	size_t		in_size, out_size, req_size, val_size;
59 	struct as	*as;
60 	struct hat	*hat;
61 	int		i, j, out_idx, info_count;
62 	lgrp_t		*lgrp;
63 	pfn_t		pfn;
64 	ssize_t		pgsz;
65 	int		*req_array, *val_array;
66 	uint64_t	*in_array, *out_array;
67 	uint64_t	addr, paddr;
68 	uintptr_t	vaddr;
69 	int		ret = 0;
70 	struct meminfo minfo;
71 #if defined(_SYSCALL32_IMPL)
72 	struct meminfo32 minfo32;
73 #endif
74 
75 	/*
76 	 * Make sure that there is at least one address to translate and
77 	 * limit how many virtual addresses the kernel can do per call
78 	 */
79 	if (addr_count < 1)
80 		return (set_errno(EINVAL));
81 	else if (addr_count > MAX_MEMINFO_CNT)
82 		addr_count = MAX_MEMINFO_CNT;
83 
84 	if (get_udatamodel() == DATAMODEL_NATIVE) {
85 		if (copyin(mip, &minfo, sizeof (struct meminfo)))
86 			return (set_errno(EFAULT));
87 	}
88 #if defined(_SYSCALL32_IMPL)
89 	else {
90 		bzero(&minfo, sizeof (minfo));
91 		if (copyin(mip, &minfo32, sizeof (struct meminfo32)))
92 			return (set_errno(EFAULT));
93 		minfo.mi_inaddr = (const uint64_t *)(uintptr_t)
94 		    minfo32.mi_inaddr;
95 		minfo.mi_info_req = (const uint_t *)(uintptr_t)
96 		    minfo32.mi_info_req;
97 		minfo.mi_info_count = minfo32.mi_info_count;
98 		minfo.mi_outdata = (uint64_t *)(uintptr_t)
99 		    minfo32.mi_outdata;
100 		minfo.mi_validity = (uint_t *)(uintptr_t)
101 		    minfo32.mi_validity;
102 	}
103 #endif
104 	/*
105 	 * all the input parameters have been copied in:-
106 	 * addr_count - number of input addresses
107 	 * minfo.mi_inaddr - array of input addresses
108 	 * minfo.mi_info_req - array of types of information requested
109 	 * minfo.mi_info_count - no. of pieces of info requested for each addr
110 	 * minfo.mi_outdata - array into which the results are placed
111 	 * minfo.mi_validity -  array containing bitwise result codes; 0th bit
112 	 *			evaluates validity of corresponding input
113 	 *			address, 1st bit validity of response to first
114 	 *			member of info_req, etc.
115 	 */
116 
117 	/* make sure mi_info_count is within limit */
118 	info_count = minfo.mi_info_count;
119 	if (info_count < 1 || info_count > MAX_MEMINFO_REQ)
120 		return (set_errno(EINVAL));
121 
122 	/*
123 	 * allocate buffer in_array for the input addresses and copy them in
124 	 */
125 	in_size = sizeof (uint64_t) * addr_count;
126 	in_array = kmem_alloc(in_size, KM_SLEEP);
127 	if (copyin(minfo.mi_inaddr, in_array, in_size)) {
128 		kmem_free(in_array, in_size);
129 		return (set_errno(EFAULT));
130 	}
131 
132 	/*
133 	 * allocate buffer req_array for the input info_reqs and copy them in
134 	 */
135 	req_size = sizeof (uint_t) * info_count;
136 	req_array = kmem_alloc(req_size, KM_SLEEP);
137 	if (copyin(minfo.mi_info_req, req_array, req_size)) {
138 		kmem_free(req_array, req_size);
139 		kmem_free(in_array, in_size);
140 		return (set_errno(EFAULT));
141 	}
142 
143 	/*
144 	 * Validate privs for each req.
145 	 */
146 	for (i = 0; i < info_count; i++) {
147 		switch (req_array[i] & MEMINFO_MASK) {
148 		case MEMINFO_VLGRP:
149 		case MEMINFO_VPAGESIZE:
150 			break;
151 		default:
152 			if (secpolicy_meminfo(CRED()) != 0) {
153 				kmem_free(req_array, req_size);
154 				kmem_free(in_array, in_size);
155 				return (set_errno(EPERM));
156 			}
157 			break;
158 		}
159 	}
160 
161 	/*
162 	 * allocate buffer out_array which holds the results and will have
163 	 * to be copied out later
164 	 */
165 	out_size = sizeof (uint64_t) * addr_count * info_count;
166 	out_array = kmem_alloc(out_size, KM_SLEEP);
167 
168 	/*
169 	 * allocate buffer val_array which holds the validity bits and will
170 	 * have to be copied out later
171 	 */
172 	val_size = sizeof (uint_t) * addr_count;
173 	val_array = kmem_alloc(val_size, KM_SLEEP);
174 
175 	if ((req_array[0] & MEMINFO_MASK) == MEMINFO_PLGRP) {
176 		/* find the corresponding lgroup for each physical address */
177 		for (i = 0; i < addr_count; i++) {
178 			paddr = in_array[i];
179 			pfn = btop(paddr);
180 			lgrp = lgrp_pfn_to_lgrp(pfn);
181 			if (lgrp) {
182 				out_array[i] = lgrp->lgrp_id;
183 				val_array[i] = VALID_ADDR | VALID_REQ;
184 			} else {
185 				out_array[i] = 0;
186 				val_array[i] = 0;
187 			}
188 		}
189 	} else {
190 		/* get the corresponding memory info for each virtual address */
191 		as = curproc->p_as;
192 
193 		AS_LOCK_ENTER(as, RW_READER);
194 		hat = as->a_hat;
195 		for (i = out_idx = 0; i < addr_count; i++, out_idx +=
196 		    info_count) {
197 			addr = in_array[i];
198 			vaddr = (uintptr_t)(addr & ~PAGEOFFSET);
199 			if (!as_segat(as, (caddr_t)vaddr)) {
200 				val_array[i] = 0;
201 				continue;
202 			}
203 			val_array[i] = VALID_ADDR;
204 			pfn = hat_getpfnum(hat, (caddr_t)vaddr);
205 			if (pfn != PFN_INVALID) {
206 				paddr = (uint64_t)((pfn << PAGESHIFT) |
207 				    (addr & PAGEOFFSET));
208 				for (j = 0; j < info_count; j++) {
209 					switch (req_array[j] & MEMINFO_MASK) {
210 					case MEMINFO_VPHYSICAL:
211 						/*
212 						 * return the physical address
213 						 * corresponding to the input
214 						 * virtual address
215 						 */
216 						out_array[out_idx + j] = paddr;
217 						val_array[i] |= VALID_REQ << j;
218 						break;
219 					case MEMINFO_VLGRP:
220 						/*
221 						 * return the lgroup of physical
222 						 * page corresponding to the
223 						 * input virtual address
224 						 */
225 						lgrp = lgrp_pfn_to_lgrp(pfn);
226 						if (lgrp) {
227 							out_array[out_idx + j] =
228 							    lgrp->lgrp_id;
229 							val_array[i] |=
230 							    VALID_REQ << j;
231 						}
232 						break;
233 					case MEMINFO_VPAGESIZE:
234 						/*
235 						 * return the size of physical
236 						 * page corresponding to the
237 						 * input virtual address
238 						 */
239 						pgsz = hat_getpagesize(hat,
240 						    (caddr_t)vaddr);
241 						if (pgsz != -1) {
242 							out_array[out_idx + j] =
243 							    pgsz;
244 							val_array[i] |=
245 							    VALID_REQ << j;
246 						}
247 						break;
248 					case MEMINFO_VREPLCNT:
249 						/*
250 						 * for future use:-
251 						 * return the no. replicated
252 						 * physical pages corresponding
253 						 * to the input virtual address,
254 						 * so it is always 0 at the
255 						 * moment
256 						 */
257 						out_array[out_idx + j] = 0;
258 						val_array[i] |= VALID_REQ << j;
259 						break;
260 					case MEMINFO_VREPL:
261 						/*
262 						 * for future use:-
263 						 * return the nth physical
264 						 * replica of the specified
265 						 * virtual address
266 						 */
267 						break;
268 					case MEMINFO_VREPL_LGRP:
269 						/*
270 						 * for future use:-
271 						 * return the lgroup of nth
272 						 * physical replica of the
273 						 * specified virtual address
274 						 */
275 						break;
276 					case MEMINFO_PLGRP:
277 						/*
278 						 * this is for physical address
279 						 * only, shouldn't mix with
280 						 * virtual address
281 						 */
282 						break;
283 					default:
284 						break;
285 					}
286 				}
287 			}
288 		}
289 		AS_LOCK_EXIT(as);
290 	}
291 
292 	/* copy out the results and validity bits and free the buffers */
293 	if ((copyout(out_array, minfo.mi_outdata, out_size) != 0) ||
294 	    (copyout(val_array, minfo.mi_validity, val_size) != 0))
295 		ret = set_errno(EFAULT);
296 
297 	kmem_free(in_array, in_size);
298 	kmem_free(out_array, out_size);
299 	kmem_free(req_array, req_size);
300 	kmem_free(val_array, val_size);
301 
302 	return (ret);
303 }
304 
305 
306 /*
307  * Initialize lgroup affinities for thread
308  */
309 void
lgrp_affinity_init(lgrp_affinity_t ** bufaddr)310 lgrp_affinity_init(lgrp_affinity_t **bufaddr)
311 {
312 	if (bufaddr)
313 		*bufaddr = NULL;
314 }
315 
316 
317 /*
318  * Free lgroup affinities for thread and set to NULL
319  * just in case thread gets recycled
320  */
321 void
lgrp_affinity_free(lgrp_affinity_t ** bufaddr)322 lgrp_affinity_free(lgrp_affinity_t **bufaddr)
323 {
324 	if (bufaddr && *bufaddr) {
325 		kmem_free(*bufaddr, nlgrpsmax * sizeof (lgrp_affinity_t));
326 		*bufaddr = NULL;
327 	}
328 }
329 
330 
331 #define	P_ANY	-2	/* cookie specifying any ID */
332 
333 
334 /*
335  * Find LWP with given ID in specified process and get its affinity for
336  * specified lgroup
337  */
338 lgrp_affinity_t
lgrp_affinity_get_thread(proc_t * p,id_t lwpid,lgrp_id_t lgrp)339 lgrp_affinity_get_thread(proc_t *p, id_t lwpid, lgrp_id_t lgrp)
340 {
341 	lgrp_affinity_t aff;
342 	int		found;
343 	kthread_t	*t;
344 
345 	ASSERT(MUTEX_HELD(&p->p_lock));
346 
347 	aff = LGRP_AFF_NONE;
348 	found = 0;
349 	t = p->p_tlist;
350 	/*
351 	 * The process may be executing in proc_exit() and its p->p_list may be
352 	 * already NULL.
353 	 */
354 	if (t == NULL)
355 		return (set_errno(ESRCH));
356 
357 	do {
358 		if (t->t_tid == lwpid || lwpid == P_ANY) {
359 			thread_lock(t);
360 			/*
361 			 * Check to see whether caller has permission to set
362 			 * affinity for LWP
363 			 */
364 			if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
365 				thread_unlock(t);
366 				return (set_errno(EPERM));
367 			}
368 
369 			if (t->t_lgrp_affinity)
370 				aff = t->t_lgrp_affinity[lgrp];
371 			thread_unlock(t);
372 			found = 1;
373 			break;
374 		}
375 	} while ((t = t->t_forw) != p->p_tlist);
376 	if (!found)
377 		aff = set_errno(ESRCH);
378 
379 	return (aff);
380 }
381 
382 
383 /*
384  * Get lgroup affinity for given LWP
385  */
386 lgrp_affinity_t
lgrp_affinity_get(lgrp_affinity_args_t * ap)387 lgrp_affinity_get(lgrp_affinity_args_t *ap)
388 {
389 	lgrp_affinity_t		aff;
390 	lgrp_affinity_args_t	args;
391 	id_t			id;
392 	idtype_t		idtype;
393 	lgrp_id_t		lgrp;
394 	proc_t			*p;
395 	kthread_t		*t;
396 
397 	/*
398 	 * Copyin arguments
399 	 */
400 	if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
401 		return (set_errno(EFAULT));
402 
403 	id = args.id;
404 	idtype = args.idtype;
405 	lgrp = args.lgrp;
406 
407 	/*
408 	 * Check for invalid lgroup
409 	 */
410 	if (lgrp < 0 || lgrp == LGRP_NONE)
411 		return (set_errno(EINVAL));
412 
413 	/*
414 	 * Check for existing lgroup
415 	 */
416 	if (lgrp > lgrp_alloc_max)
417 		return (set_errno(ESRCH));
418 
419 	/*
420 	 * Get lgroup affinity for given LWP or process
421 	 */
422 	switch (idtype) {
423 
424 	case P_LWPID:
425 		/*
426 		 * LWP in current process
427 		 */
428 		p = curproc;
429 		mutex_enter(&p->p_lock);
430 		if (id != P_MYID)	/* different thread */
431 			aff = lgrp_affinity_get_thread(p, id, lgrp);
432 		else {			/* current thread */
433 			aff = LGRP_AFF_NONE;
434 			t = curthread;
435 			thread_lock(t);
436 			if (t->t_lgrp_affinity)
437 				aff = t->t_lgrp_affinity[lgrp];
438 			thread_unlock(t);
439 		}
440 		mutex_exit(&p->p_lock);
441 		break;
442 
443 	case P_PID:
444 		/*
445 		 * Process
446 		 */
447 		mutex_enter(&pidlock);
448 
449 		if (id == P_MYID)
450 			p = curproc;
451 		else {
452 			p = prfind(id);
453 			if (p == NULL) {
454 				mutex_exit(&pidlock);
455 				return (set_errno(ESRCH));
456 			}
457 		}
458 
459 		mutex_enter(&p->p_lock);
460 		aff = lgrp_affinity_get_thread(p, P_ANY, lgrp);
461 		mutex_exit(&p->p_lock);
462 
463 		mutex_exit(&pidlock);
464 		break;
465 
466 	default:
467 		aff = set_errno(EINVAL);
468 		break;
469 	}
470 
471 	return (aff);
472 }
473 
474 
475 /*
476  * Find lgroup for which this thread has most affinity in specified partition
477  * starting from home lgroup unless specified starting lgroup is preferred
478  */
479 lpl_t *
lgrp_affinity_best(kthread_t * t,struct cpupart * cpupart,lgrp_id_t start,boolean_t prefer_start)480 lgrp_affinity_best(kthread_t *t, struct cpupart *cpupart, lgrp_id_t start,
481     boolean_t prefer_start)
482 {
483 	lgrp_affinity_t	*affs;
484 	lgrp_affinity_t	best_aff;
485 	lpl_t		*best_lpl;
486 	lgrp_id_t	finish;
487 	lgrp_id_t	home;
488 	lgrp_id_t	lgrpid;
489 	lpl_t		*lpl;
490 
491 	ASSERT(t != NULL);
492 	ASSERT((MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0) ||
493 	    (MUTEX_HELD(&ttoproc(t)->p_lock) && THREAD_LOCK_HELD(t)));
494 	ASSERT(cpupart != NULL);
495 
496 	if (t->t_lgrp_affinity == NULL)
497 		return (NULL);
498 
499 	affs = t->t_lgrp_affinity;
500 
501 	/*
502 	 * Thread bound to CPU
503 	 */
504 	if (t->t_bind_cpu != PBIND_NONE) {
505 		cpu_t	*cp;
506 
507 		/*
508 		 * Find which lpl has most affinity among leaf lpl directly
509 		 * containing CPU and its ancestor lpls
510 		 */
511 		cp = cpu[t->t_bind_cpu];
512 
513 		best_lpl = lpl = cp->cpu_lpl;
514 		best_aff = affs[best_lpl->lpl_lgrpid];
515 		while (lpl->lpl_parent != NULL) {
516 			lpl = lpl->lpl_parent;
517 			lgrpid = lpl->lpl_lgrpid;
518 			if (affs[lgrpid] > best_aff) {
519 				best_lpl = lpl;
520 				best_aff = affs[lgrpid];
521 			}
522 		}
523 		return (best_lpl);
524 	}
525 
526 	/*
527 	 * Start searching from home lgroup unless given starting lgroup is
528 	 * preferred or home lgroup isn't in given pset.  Use root lgroup as
529 	 * starting point if both home and starting lgroups aren't in given
530 	 * pset.
531 	 */
532 	ASSERT(start >= 0 && start <= lgrp_alloc_max);
533 	home = t->t_lpl->lpl_lgrpid;
534 	if (!prefer_start && LGRP_CPUS_IN_PART(home, cpupart))
535 		lgrpid = home;
536 	else if (start != LGRP_NONE && LGRP_CPUS_IN_PART(start, cpupart))
537 		lgrpid = start;
538 	else
539 		lgrpid = LGRP_ROOTID;
540 
541 	best_lpl = &cpupart->cp_lgrploads[lgrpid];
542 	best_aff = affs[lgrpid];
543 	finish = lgrpid;
544 	do {
545 		/*
546 		 * Skip any lgroups that don't have CPU resources
547 		 * in this processor set.
548 		 */
549 		if (!LGRP_CPUS_IN_PART(lgrpid, cpupart)) {
550 			if (++lgrpid > lgrp_alloc_max)
551 				lgrpid = 0;	/* wrap the search */
552 			continue;
553 		}
554 
555 		/*
556 		 * Find lgroup with most affinity
557 		 */
558 		lpl = &cpupart->cp_lgrploads[lgrpid];
559 		if (affs[lgrpid] > best_aff) {
560 			best_aff = affs[lgrpid];
561 			best_lpl = lpl;
562 		}
563 
564 		if (++lgrpid > lgrp_alloc_max)
565 			lgrpid = 0;	/* wrap the search */
566 
567 	} while (lgrpid != finish);
568 
569 	/*
570 	 * No lgroup (in this pset) with any affinity
571 	 */
572 	if (best_aff == LGRP_AFF_NONE)
573 		return (NULL);
574 
575 	lgrpid = best_lpl->lpl_lgrpid;
576 	ASSERT(LGRP_CPUS_IN_PART(lgrpid, cpupart) && best_lpl->lpl_ncpu > 0);
577 
578 	return (best_lpl);
579 }
580 
581 
582 /*
583  * Set thread's affinity for given lgroup
584  */
585 int
lgrp_affinity_set_thread(kthread_t * t,lgrp_id_t lgrp,lgrp_affinity_t aff,lgrp_affinity_t ** aff_buf)586 lgrp_affinity_set_thread(kthread_t *t, lgrp_id_t lgrp, lgrp_affinity_t aff,
587     lgrp_affinity_t **aff_buf)
588 {
589 	lgrp_affinity_t	*affs;
590 	lgrp_id_t	best;
591 	lpl_t		*best_lpl;
592 	lgrp_id_t	home;
593 	int		retval;
594 
595 	ASSERT(t != NULL);
596 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
597 
598 	retval = 0;
599 
600 	thread_lock(t);
601 
602 	/*
603 	 * Check to see whether caller has permission to set affinity for
604 	 * thread
605 	 */
606 	if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
607 		thread_unlock(t);
608 		return (set_errno(EPERM));
609 	}
610 
611 	if (t->t_lgrp_affinity == NULL) {
612 		if (aff == LGRP_AFF_NONE) {
613 			thread_unlock(t);
614 			return (0);
615 		}
616 		ASSERT(aff_buf != NULL && *aff_buf != NULL);
617 		t->t_lgrp_affinity = *aff_buf;
618 		*aff_buf = NULL;
619 	}
620 
621 	affs = t->t_lgrp_affinity;
622 	affs[lgrp] = aff;
623 
624 	/*
625 	 * Find lgroup for which thread has most affinity,
626 	 * starting with lgroup for which affinity being set
627 	 */
628 	best_lpl = lgrp_affinity_best(t, t->t_cpupart, lgrp, B_TRUE);
629 
630 	/*
631 	 * Rehome if found lgroup with more affinity than home or lgroup for
632 	 * which affinity is being set has same affinity as home
633 	 */
634 	home = t->t_lpl->lpl_lgrpid;
635 	if (best_lpl != NULL && best_lpl != t->t_lpl) {
636 		best = best_lpl->lpl_lgrpid;
637 		if (affs[best] > affs[home] || (affs[best] == affs[home] &&
638 		    best == lgrp))
639 			lgrp_move_thread(t, best_lpl, 1);
640 	}
641 
642 	thread_unlock(t);
643 
644 	return (retval);
645 }
646 
647 
648 /*
649  * Set process' affinity for specified lgroup
650  */
651 int
lgrp_affinity_set_proc(proc_t * p,lgrp_id_t lgrp,lgrp_affinity_t aff,lgrp_affinity_t ** aff_buf_array)652 lgrp_affinity_set_proc(proc_t *p, lgrp_id_t lgrp, lgrp_affinity_t aff,
653     lgrp_affinity_t **aff_buf_array)
654 {
655 	lgrp_affinity_t	*buf;
656 	int		err = 0;
657 	int		i;
658 	int		retval;
659 	kthread_t	*t;
660 
661 	ASSERT(MUTEX_HELD(&pidlock) && MUTEX_HELD(&p->p_lock));
662 	ASSERT(aff_buf_array != NULL);
663 
664 	i = 0;
665 	t = p->p_tlist;
666 	if (t != NULL) {
667 		do {
668 			/*
669 			 * Set lgroup affinity for thread
670 			 */
671 			buf = aff_buf_array[i];
672 			retval = lgrp_affinity_set_thread(t, lgrp, aff, &buf);
673 
674 			if (err == 0 && retval != 0)
675 				err = retval;
676 
677 			/*
678 			 * Advance pointer to next buffer
679 			 */
680 			if (buf == NULL) {
681 				ASSERT(i < p->p_lwpcnt);
682 				aff_buf_array[i] = NULL;
683 				i++;
684 			}
685 
686 		} while ((t = t->t_forw) != p->p_tlist);
687 	}
688 	return (err);
689 }
690 
691 
692 /*
693  * Set LWP's or process' affinity for specified lgroup
694  *
695  * When setting affinities, pidlock, process p_lock, and thread_lock()
696  * need to be held in that order to protect target thread's pset, process,
697  * process contents, and thread contents.  thread_lock() does splhigh(),
698  * so it ends up having similiar effect as kpreempt_disable(), so it will
699  * protect calls to lgrp_move_thread() and lgrp_choose() from pset changes.
700  */
701 int
lgrp_affinity_set(lgrp_affinity_args_t * ap)702 lgrp_affinity_set(lgrp_affinity_args_t *ap)
703 {
704 	lgrp_affinity_t		aff;
705 	lgrp_affinity_t		*aff_buf;
706 	lgrp_affinity_args_t	args;
707 	id_t			id;
708 	idtype_t		idtype;
709 	lgrp_id_t		lgrp;
710 	int			nthreads;
711 	proc_t			*p;
712 	int			retval;
713 
714 	/*
715 	 * Copyin arguments
716 	 */
717 	if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
718 		return (set_errno(EFAULT));
719 
720 	idtype = args.idtype;
721 	id = args.id;
722 	lgrp = args.lgrp;
723 	aff = args.aff;
724 
725 	/*
726 	 * Check for invalid lgroup
727 	 */
728 	if (lgrp < 0 || lgrp == LGRP_NONE)
729 		return (set_errno(EINVAL));
730 
731 	/*
732 	 * Check for existing lgroup
733 	 */
734 	if (lgrp > lgrp_alloc_max)
735 		return (set_errno(ESRCH));
736 
737 	/*
738 	 * Check for legal affinity
739 	 */
740 	if (aff != LGRP_AFF_NONE && aff != LGRP_AFF_WEAK &&
741 	    aff != LGRP_AFF_STRONG)
742 		return (set_errno(EINVAL));
743 
744 	/*
745 	 * Must be process or LWP ID
746 	 */
747 	if (idtype != P_LWPID && idtype != P_PID)
748 		return (set_errno(EINVAL));
749 
750 	retval = EINVAL;
751 	/*
752 	 * Set given LWP's or process' affinity for specified lgroup
753 	 */
754 	switch (idtype) {
755 
756 	case P_LWPID:
757 		/*
758 		 * Allocate memory for thread's lgroup affinities
759 		 * ahead of time w/o holding locks
760 		 */
761 		aff_buf = kmem_zalloc(nlgrpsmax * sizeof (lgrp_affinity_t),
762 		    KM_SLEEP);
763 
764 		p = curproc;
765 
766 		/*
767 		 * Set affinity for thread
768 		 */
769 		mutex_enter(&p->p_lock);
770 		if (id == P_MYID) {		/* current thread */
771 			retval = lgrp_affinity_set_thread(curthread, lgrp, aff,
772 			    &aff_buf);
773 		} else if (p->p_tlist == NULL) {
774 			retval = set_errno(ESRCH);
775 		} else {			/* other thread */
776 			int		found = 0;
777 			kthread_t	*t;
778 
779 			t = p->p_tlist;
780 			do {
781 				if (t->t_tid == id) {
782 					retval = lgrp_affinity_set_thread(t,
783 					    lgrp, aff, &aff_buf);
784 					found = 1;
785 					break;
786 				}
787 			} while ((t = t->t_forw) != p->p_tlist);
788 			if (!found)
789 				retval = set_errno(ESRCH);
790 		}
791 		mutex_exit(&p->p_lock);
792 
793 		/*
794 		 * Free memory for lgroup affinities,
795 		 * since thread didn't need it
796 		 */
797 		if (aff_buf)
798 			kmem_free(aff_buf,
799 			    nlgrpsmax * sizeof (lgrp_affinity_t));
800 
801 		break;
802 
803 	case P_PID:
804 
805 		do {
806 			lgrp_affinity_t	**aff_buf_array;
807 			int		i;
808 			size_t		size;
809 
810 			/*
811 			 * Get process
812 			 */
813 			mutex_enter(&pidlock);
814 
815 			if (id == P_MYID)
816 				p = curproc;
817 			else
818 				p = prfind(id);
819 
820 			if (p == NULL) {
821 				mutex_exit(&pidlock);
822 				return (set_errno(ESRCH));
823 			}
824 
825 			/*
826 			 * Get number of threads in process
827 			 *
828 			 * NOTE: Only care about user processes,
829 			 *	 so p_lwpcnt should be number of threads.
830 			 */
831 			mutex_enter(&p->p_lock);
832 			nthreads = p->p_lwpcnt;
833 			mutex_exit(&p->p_lock);
834 
835 			mutex_exit(&pidlock);
836 
837 			if (nthreads < 1)
838 				return (set_errno(ESRCH));
839 
840 			/*
841 			 * Preallocate memory for lgroup affinities for
842 			 * each thread in process now to avoid holding
843 			 * any locks.  Allocate an array to hold a buffer
844 			 * for each thread.
845 			 */
846 			aff_buf_array = kmem_zalloc(nthreads *
847 			    sizeof (lgrp_affinity_t *), KM_SLEEP);
848 
849 			size = nlgrpsmax * sizeof (lgrp_affinity_t);
850 			for (i = 0; i < nthreads; i++)
851 				aff_buf_array[i] = kmem_zalloc(size, KM_SLEEP);
852 
853 			mutex_enter(&pidlock);
854 
855 			/*
856 			 * Get process again since dropped locks to allocate
857 			 * memory (except current process)
858 			 */
859 			if (id != P_MYID)
860 				p = prfind(id);
861 
862 			/*
863 			 * Process went away after we dropped locks and before
864 			 * reacquiring them, so drop locks, free memory, and
865 			 * return.
866 			 */
867 			if (p == NULL) {
868 				mutex_exit(&pidlock);
869 				for (i = 0; i < nthreads; i++)
870 					kmem_free(aff_buf_array[i], size);
871 				kmem_free(aff_buf_array,
872 				    nthreads * sizeof (lgrp_affinity_t *));
873 				return (set_errno(ESRCH));
874 			}
875 
876 			mutex_enter(&p->p_lock);
877 
878 			/*
879 			 * See whether number of threads is same
880 			 * If not, drop locks, free memory, and try again
881 			 */
882 			if (nthreads != p->p_lwpcnt) {
883 				mutex_exit(&p->p_lock);
884 				mutex_exit(&pidlock);
885 				for (i = 0; i < nthreads; i++)
886 					kmem_free(aff_buf_array[i], size);
887 				kmem_free(aff_buf_array,
888 				    nthreads * sizeof (lgrp_affinity_t *));
889 				continue;
890 			}
891 
892 			/*
893 			 * Set lgroup affinity for threads in process
894 			 */
895 			retval = lgrp_affinity_set_proc(p, lgrp, aff,
896 			    aff_buf_array);
897 
898 			mutex_exit(&p->p_lock);
899 			mutex_exit(&pidlock);
900 
901 			/*
902 			 * Free any leftover memory, since some threads may
903 			 * have already allocated memory and set lgroup
904 			 * affinities before
905 			 */
906 			for (i = 0; i < nthreads; i++)
907 				if (aff_buf_array[i] != NULL)
908 					kmem_free(aff_buf_array[i], size);
909 			kmem_free(aff_buf_array,
910 			    nthreads * sizeof (lgrp_affinity_t *));
911 
912 			break;
913 
914 		} while (nthreads != p->p_lwpcnt);
915 
916 		break;
917 
918 	default:
919 		retval = set_errno(EINVAL);
920 		break;
921 	}
922 
923 	return (retval);
924 }
925 
926 
927 /*
928  * Return the latest generation number for the lgroup hierarchy
929  * with the given view
930  */
931 lgrp_gen_t
lgrp_generation(lgrp_view_t view)932 lgrp_generation(lgrp_view_t view)
933 {
934 	cpupart_t	*cpupart;
935 	uint_t		gen;
936 
937 	kpreempt_disable();
938 
939 	/*
940 	 * Determine generation number for given view
941 	 */
942 	if (view == LGRP_VIEW_OS)
943 		/*
944 		 * Return generation number of lgroup hierarchy for OS view
945 		 */
946 		gen = lgrp_gen;
947 	else {
948 		/*
949 		 * For caller's view, use generation numbers for lgroup
950 		 * hierarchy and caller's pset
951 		 * NOTE: Caller needs to check for change in pset ID
952 		 */
953 		cpupart = curthread->t_cpupart;
954 		ASSERT(cpupart);
955 		gen = lgrp_gen + cpupart->cp_gen;
956 	}
957 
958 	kpreempt_enable();
959 
960 	return (gen);
961 }
962 
963 
964 lgrp_id_t
lgrp_home_thread(kthread_t * t)965 lgrp_home_thread(kthread_t *t)
966 {
967 	lgrp_id_t	home;
968 
969 	ASSERT(t != NULL);
970 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
971 
972 	thread_lock(t);
973 
974 	/*
975 	 * Check to see whether caller has permission to set affinity for
976 	 * thread
977 	 */
978 	if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
979 		thread_unlock(t);
980 		return (set_errno(EPERM));
981 	}
982 
983 	home = lgrp_home_id(t);
984 
985 	thread_unlock(t);
986 	return (home);
987 }
988 
989 
990 /*
991  * Get home lgroup of given process or thread
992  */
993 lgrp_id_t
lgrp_home_get(idtype_t idtype,id_t id)994 lgrp_home_get(idtype_t idtype, id_t id)
995 {
996 	proc_t		*p;
997 	lgrp_id_t	retval;
998 	kthread_t	*t;
999 
1000 	/*
1001 	 * Get home lgroup of given LWP or process
1002 	 */
1003 	switch (idtype) {
1004 
1005 	case P_LWPID:
1006 		p = curproc;
1007 
1008 		/*
1009 		 * Set affinity for thread
1010 		 */
1011 		mutex_enter(&p->p_lock);
1012 		if (id == P_MYID) {		/* current thread */
1013 			retval = lgrp_home_thread(curthread);
1014 		} else if (p->p_tlist == NULL) {
1015 			retval = set_errno(ESRCH);
1016 		} else {			/* other thread */
1017 			int	found = 0;
1018 
1019 			t = p->p_tlist;
1020 			do {
1021 				if (t->t_tid == id) {
1022 					retval = lgrp_home_thread(t);
1023 					found = 1;
1024 					break;
1025 				}
1026 			} while ((t = t->t_forw) != p->p_tlist);
1027 			if (!found)
1028 				retval = set_errno(ESRCH);
1029 		}
1030 		mutex_exit(&p->p_lock);
1031 		break;
1032 
1033 	case P_PID:
1034 		/*
1035 		 * Get process
1036 		 */
1037 		mutex_enter(&pidlock);
1038 
1039 		if (id == P_MYID)
1040 			p = curproc;
1041 		else
1042 			p = prfind(id);
1043 
1044 		if (p == NULL) {
1045 			mutex_exit(&pidlock);
1046 			return (set_errno(ESRCH));
1047 		}
1048 
1049 		mutex_enter(&p->p_lock);
1050 		t = p->p_tlist;
1051 		if (t == NULL)
1052 			retval = set_errno(ESRCH);
1053 		else
1054 			retval = lgrp_home_thread(t);
1055 		mutex_exit(&p->p_lock);
1056 
1057 		mutex_exit(&pidlock);
1058 
1059 		break;
1060 
1061 	default:
1062 		retval = set_errno(EINVAL);
1063 		break;
1064 	}
1065 
1066 	return (retval);
1067 }
1068 
1069 
1070 /*
1071  * Return latency between "from" and "to" lgroups
1072  *
1073  * This latency number can only be used for relative comparison
1074  * between lgroups on the running system, cannot be used across platforms,
1075  * and may not reflect the actual latency.  It is platform and implementation
1076  * specific, so platform gets to decide its value.  It would be nice if the
1077  * number was at least proportional to make comparisons more meaningful though.
1078  */
1079 int
lgrp_latency(lgrp_id_t from,lgrp_id_t to)1080 lgrp_latency(lgrp_id_t from, lgrp_id_t to)
1081 {
1082 	lgrp_t		*from_lgrp;
1083 	int		i;
1084 	int		latency;
1085 	int		latency_max;
1086 	lgrp_t		*to_lgrp;
1087 
1088 	ASSERT(MUTEX_HELD(&cpu_lock));
1089 
1090 	if (from < 0 || to < 0)
1091 		return (set_errno(EINVAL));
1092 
1093 	if (from > lgrp_alloc_max || to > lgrp_alloc_max)
1094 		return (set_errno(ESRCH));
1095 
1096 	from_lgrp = lgrp_table[from];
1097 	to_lgrp = lgrp_table[to];
1098 
1099 	if (!LGRP_EXISTS(from_lgrp) || !LGRP_EXISTS(to_lgrp)) {
1100 		return (set_errno(ESRCH));
1101 	}
1102 
1103 	/*
1104 	 * Get latency for same lgroup
1105 	 */
1106 	if (from == to) {
1107 		latency = from_lgrp->lgrp_latency;
1108 		return (latency);
1109 	}
1110 
1111 	/*
1112 	 * Get latency between leaf lgroups
1113 	 */
1114 	if (from_lgrp->lgrp_childcnt == 0 && to_lgrp->lgrp_childcnt == 0)
1115 		return (lgrp_plat_latency(from_lgrp->lgrp_plathand,
1116 		    to_lgrp->lgrp_plathand));
1117 
1118 	/*
1119 	 * Determine max latency between resources in two lgroups
1120 	 */
1121 	latency_max = 0;
1122 	for (i = 0; i <= lgrp_alloc_max; i++) {
1123 		lgrp_t	*from_rsrc;
1124 		int	j;
1125 		lgrp_t	*to_rsrc;
1126 
1127 		from_rsrc = lgrp_table[i];
1128 		if (!LGRP_EXISTS(from_rsrc) ||
1129 		    !klgrpset_ismember(from_lgrp->lgrp_set[LGRP_RSRC_CPU], i))
1130 			continue;
1131 
1132 		for (j = 0; j <= lgrp_alloc_max; j++) {
1133 			to_rsrc = lgrp_table[j];
1134 			if (!LGRP_EXISTS(to_rsrc) ||
1135 			    klgrpset_ismember(to_lgrp->lgrp_set[LGRP_RSRC_MEM],
1136 			    j) == 0)
1137 				continue;
1138 			latency = lgrp_plat_latency(from_rsrc->lgrp_plathand,
1139 			    to_rsrc->lgrp_plathand);
1140 			if (latency > latency_max)
1141 				latency_max = latency;
1142 		}
1143 	}
1144 	return (latency_max);
1145 }
1146 
1147 
1148 /*
1149  * Return lgroup interface version number
1150  * 0 - none
1151  * 1 - original
1152  * 2 - lgrp_latency_cookie() and lgrp_resources() added
1153  */
1154 int
lgrp_version(int version)1155 lgrp_version(int version)
1156 {
1157 	/*
1158 	 * Return LGRP_VER_NONE when requested version isn't supported
1159 	 */
1160 	if (version < LGRP_VER_NONE || version > LGRP_VER_CURRENT)
1161 		return (LGRP_VER_NONE);
1162 
1163 	/*
1164 	 * Return current version when LGRP_VER_NONE passed in
1165 	 */
1166 	if (version == LGRP_VER_NONE)
1167 		return (LGRP_VER_CURRENT);
1168 
1169 	/*
1170 	 * Otherwise, return supported version.
1171 	 */
1172 	return (version);
1173 }
1174 
1175 
1176 /*
1177  * Snapshot of lgroup hieararchy
1178  *
1179  * One snapshot is kept and is based on the kernel's native data model, so
1180  * a 32-bit snapshot is kept for the 32-bit kernel and a 64-bit one for the
1181  * 64-bit kernel.  If a 32-bit user wants a snapshot from the 64-bit kernel,
1182  * the kernel generates a 32-bit snapshot from the data in its 64-bit snapshot.
1183  *
1184  * The format is defined by lgroup snapshot header and the layout of
1185  * the snapshot in memory is as follows:
1186  * 1) lgroup snapshot header
1187  *    - specifies format of snapshot
1188  *    - defined by lgrp_snapshot_header_t
1189  * 2) lgroup info array
1190  *    - contains information about each lgroup
1191  *    - one element for each lgroup
1192  *    - each element is defined by lgrp_info_t
1193  * 3) lgroup CPU ID array
1194  *    - contains list (array) of CPU IDs for each lgroup
1195  *    - lgrp_info_t points into array and specifies how many CPUs belong to
1196  *      given lgroup
1197  * 4) lgroup parents array
1198  *    - contains lgroup bitmask of parents for each lgroup
1199  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1200  * 5) lgroup children array
1201  *    - contains lgroup bitmask of children for each lgroup
1202  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1203  * 6) lgroup resources array
1204  *    - contains lgroup bitmask of resources for each lgroup
1205  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1206  * 7) lgroup latency table
1207  *    - contains latency from each lgroup to each of other lgroups
1208  *
1209  * NOTE:  Must use nlgrpsmax for per lgroup data structures because lgroups
1210  *	  may be sparsely allocated.
1211  */
1212 lgrp_snapshot_header_t	*lgrp_snap = NULL;	/* lgroup snapshot */
1213 static kmutex_t		lgrp_snap_lock;		/* snapshot lock */
1214 
1215 
1216 /*
1217  * Take a snapshot of lgroup hierarchy and return size of buffer
1218  * needed to hold snapshot
1219  */
1220 static int
lgrp_snapshot(void)1221 lgrp_snapshot(void)
1222 {
1223 	size_t		bitmask_size;
1224 	size_t		bitmasks_size;
1225 	size_t		bufsize;
1226 	int		cpu_index;
1227 	size_t		cpuids_size;
1228 	int		i;
1229 	int		j;
1230 	size_t		info_size;
1231 	size_t		lats_size;
1232 	ulong_t		*lgrp_children;
1233 	processorid_t	*lgrp_cpuids;
1234 	lgrp_info_t	*lgrp_info;
1235 	int		**lgrp_lats;
1236 	ulong_t		*lgrp_parents;
1237 	ulong_t		*lgrp_rsets;
1238 	ulong_t		*lgrpset;
1239 	int		snap_ncpus;
1240 	int		snap_nlgrps;
1241 	int		snap_nlgrpsmax;
1242 	size_t		snap_hdr_size;
1243 #ifdef	_SYSCALL32_IMPL
1244 	model_t		model = DATAMODEL_NATIVE;
1245 
1246 	/*
1247 	 * Have up-to-date snapshot, so check to see whether caller is 32-bit
1248 	 * program and need to return size of 32-bit snapshot now.
1249 	 */
1250 	model = get_udatamodel();
1251 	if (model == DATAMODEL_ILP32 && lgrp_snap &&
1252 	    lgrp_snap->ss_gen == lgrp_gen) {
1253 
1254 		snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1255 
1256 		/*
1257 		 * Calculate size of buffer needed for 32-bit snapshot,
1258 		 * rounding up size of each object to allow for alignment
1259 		 * of next object in buffer.
1260 		 */
1261 		snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1262 		    sizeof (caddr32_t));
1263 		info_size =
1264 		    P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1265 		    sizeof (processorid_t));
1266 		cpuids_size =
1267 		    P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1268 		    sizeof (ulong_t));
1269 
1270 		/*
1271 		 * lgroup bitmasks needed for parents, children, and resources
1272 		 * for each lgroup and pset lgroup set
1273 		 */
1274 		bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1275 		bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1276 		    snap_nlgrpsmax) + 1) * bitmask_size;
1277 
1278 		/*
1279 		 * Size of latency table and buffer
1280 		 */
1281 		lats_size = snap_nlgrpsmax * sizeof (caddr32_t) +
1282 		    snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1283 
1284 		bufsize = snap_hdr_size + info_size + cpuids_size +
1285 		    bitmasks_size + lats_size;
1286 		return (bufsize);
1287 	}
1288 #endif	/* _SYSCALL32_IMPL */
1289 
1290 	/*
1291 	 * Check whether snapshot is up-to-date
1292 	 * Free it and take another one if not
1293 	 */
1294 	if (lgrp_snap) {
1295 		if (lgrp_snap->ss_gen == lgrp_gen)
1296 			return (lgrp_snap->ss_size);
1297 
1298 		kmem_free(lgrp_snap, lgrp_snap->ss_size);
1299 		lgrp_snap = NULL;
1300 	}
1301 
1302 	/*
1303 	 * Allocate memory for snapshot
1304 	 * w/o holding cpu_lock while waiting for memory
1305 	 */
1306 	while (lgrp_snap == NULL) {
1307 		int	old_generation;
1308 
1309 		/*
1310 		 * Take snapshot of lgroup generation number
1311 		 * and configuration size dependent information
1312 		 * NOTE: Only count number of online CPUs,
1313 		 * since only online CPUs appear in lgroups.
1314 		 */
1315 		mutex_enter(&cpu_lock);
1316 		old_generation = lgrp_gen;
1317 		snap_ncpus = ncpus_online;
1318 		snap_nlgrps = nlgrps;
1319 		snap_nlgrpsmax = nlgrpsmax;
1320 		mutex_exit(&cpu_lock);
1321 
1322 		/*
1323 		 * Calculate size of buffer needed for snapshot,
1324 		 * rounding up size of each object to allow for alignment
1325 		 * of next object in buffer.
1326 		 */
1327 		snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1328 		    sizeof (void *));
1329 		info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1330 		    sizeof (processorid_t));
1331 		cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1332 		    sizeof (ulong_t));
1333 		/*
1334 		 * lgroup bitmasks needed for pset lgroup set and  parents,
1335 		 * children, and resource sets for each lgroup
1336 		 */
1337 		bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1338 		bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1339 		    snap_nlgrpsmax) + 1) * bitmask_size;
1340 
1341 		/*
1342 		 * Size of latency table and buffer
1343 		 */
1344 		lats_size = snap_nlgrpsmax * sizeof (int *) +
1345 		    snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1346 
1347 		bufsize = snap_hdr_size + info_size + cpuids_size +
1348 		    bitmasks_size + lats_size;
1349 
1350 		/*
1351 		 * Allocate memory for buffer
1352 		 */
1353 		lgrp_snap = kmem_zalloc(bufsize, KM_NOSLEEP);
1354 		if (lgrp_snap == NULL)
1355 			return (set_errno(ENOMEM));
1356 
1357 		/*
1358 		 * Check whether generation number has changed
1359 		 */
1360 		mutex_enter(&cpu_lock);
1361 		if (lgrp_gen == old_generation)
1362 			break;		/* hasn't change, so done. */
1363 
1364 		/*
1365 		 * Generation number changed, so free memory and try again.
1366 		 */
1367 		mutex_exit(&cpu_lock);
1368 		kmem_free(lgrp_snap, bufsize);
1369 		lgrp_snap = NULL;
1370 	}
1371 
1372 	/*
1373 	 * Fill in lgroup snapshot header
1374 	 * (including pointers to tables of lgroup info, CPU IDs, and parents
1375 	 * and children)
1376 	 */
1377 	lgrp_snap->ss_version = LGRP_VER_CURRENT;
1378 
1379 	/*
1380 	 * XXX For now, liblgrp only needs to know whether the hierarchy
1381 	 * XXX only has one level or not
1382 	 */
1383 	if (snap_nlgrps == 1)
1384 		lgrp_snap->ss_levels = 1;
1385 	else
1386 		lgrp_snap->ss_levels = 2;
1387 
1388 	lgrp_snap->ss_root = LGRP_ROOTID;
1389 
1390 	lgrp_snap->ss_nlgrps = lgrp_snap->ss_nlgrps_os = snap_nlgrps;
1391 	lgrp_snap->ss_nlgrps_max = snap_nlgrpsmax;
1392 	lgrp_snap->ss_ncpus = snap_ncpus;
1393 	lgrp_snap->ss_gen = lgrp_gen;
1394 	lgrp_snap->ss_view = LGRP_VIEW_OS;
1395 	lgrp_snap->ss_pset = 0;		/* NOTE: caller should set if needed */
1396 	lgrp_snap->ss_size = bufsize;
1397 	lgrp_snap->ss_magic = (uintptr_t)lgrp_snap;
1398 
1399 	lgrp_snap->ss_info = lgrp_info =
1400 	    (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1401 
1402 	lgrp_snap->ss_cpuids = lgrp_cpuids =
1403 	    (processorid_t *)((uintptr_t)lgrp_info + info_size);
1404 
1405 	lgrp_snap->ss_lgrpset = lgrpset =
1406 	    (ulong_t *)((uintptr_t)lgrp_cpuids + cpuids_size);
1407 
1408 	lgrp_snap->ss_parents = lgrp_parents =
1409 	    (ulong_t *)((uintptr_t)lgrpset + bitmask_size);
1410 
1411 	lgrp_snap->ss_children = lgrp_children =
1412 	    (ulong_t *)((uintptr_t)lgrp_parents + (snap_nlgrpsmax *
1413 	    bitmask_size));
1414 
1415 	lgrp_snap->ss_rsets = lgrp_rsets =
1416 	    (ulong_t *)((uintptr_t)lgrp_children + (snap_nlgrpsmax *
1417 	    bitmask_size));
1418 
1419 	lgrp_snap->ss_latencies = lgrp_lats =
1420 	    (int **)((uintptr_t)lgrp_rsets + (LGRP_RSRC_COUNT *
1421 	    snap_nlgrpsmax * bitmask_size));
1422 
1423 	/*
1424 	 * Fill in lgroup information
1425 	 */
1426 	cpu_index = 0;
1427 	for (i = 0; i < snap_nlgrpsmax; i++) {
1428 		struct cpu	*cp;
1429 		int		cpu_count;
1430 		struct cpu	*head;
1431 		int		k;
1432 		lgrp_t		*lgrp;
1433 
1434 		lgrp = lgrp_table[i];
1435 		if (!LGRP_EXISTS(lgrp)) {
1436 			bzero(&lgrp_info[i], sizeof (lgrp_info[i]));
1437 			lgrp_info[i].info_lgrpid = LGRP_NONE;
1438 			continue;
1439 		}
1440 
1441 		lgrp_info[i].info_lgrpid = i;
1442 		lgrp_info[i].info_latency = lgrp->lgrp_latency;
1443 
1444 		/*
1445 		 * Fill in parents, children, and lgroup resources
1446 		 */
1447 		lgrp_info[i].info_parents =
1448 		    (ulong_t *)((uintptr_t)lgrp_parents + (i * bitmask_size));
1449 
1450 		if (lgrp->lgrp_parent)
1451 			BT_SET(lgrp_info[i].info_parents,
1452 			    lgrp->lgrp_parent->lgrp_id);
1453 
1454 		lgrp_info[i].info_children =
1455 		    (ulong_t *)((uintptr_t)lgrp_children + (i * bitmask_size));
1456 
1457 		for (j = 0; j < snap_nlgrpsmax; j++)
1458 			if (klgrpset_ismember(lgrp->lgrp_children, j))
1459 				BT_SET(lgrp_info[i].info_children, j);
1460 
1461 		lgrp_info[i].info_rset =
1462 		    (ulong_t *)((uintptr_t)lgrp_rsets +
1463 		    (i * LGRP_RSRC_COUNT * bitmask_size));
1464 
1465 		for (j = 0; j < LGRP_RSRC_COUNT; j++) {
1466 			ulong_t	*rset;
1467 
1468 			rset = (ulong_t *)((uintptr_t)lgrp_info[i].info_rset +
1469 			    (j * bitmask_size));
1470 			for (k = 0; k < snap_nlgrpsmax; k++)
1471 				if (klgrpset_ismember(lgrp->lgrp_set[j], k))
1472 					BT_SET(rset, k);
1473 		}
1474 
1475 		/*
1476 		 * Fill in CPU IDs
1477 		 */
1478 		cpu_count = 0;
1479 		lgrp_info[i].info_cpuids = NULL;
1480 		cp = head = lgrp->lgrp_cpu;
1481 		if (head != NULL) {
1482 			lgrp_info[i].info_cpuids = &lgrp_cpuids[cpu_index];
1483 			do {
1484 				lgrp_cpuids[cpu_index] = cp->cpu_id;
1485 				cpu_index++;
1486 				cpu_count++;
1487 				cp = cp->cpu_next_lgrp;
1488 			} while (cp != head);
1489 		}
1490 		ASSERT(cpu_count == lgrp->lgrp_cpucnt);
1491 		lgrp_info[i].info_ncpus = cpu_count;
1492 
1493 		/*
1494 		 * Fill in memory sizes for lgroups that directly contain
1495 		 * memory
1496 		 */
1497 		if (klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], i)) {
1498 			lgrp_info[i].info_mem_free =
1499 			    lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1500 			lgrp_info[i].info_mem_install =
1501 			    lgrp_mem_size(i, LGRP_MEM_SIZE_INSTALL);
1502 		}
1503 
1504 		/*
1505 		 * Fill in latency table and buffer
1506 		 */
1507 		lgrp_lats[i] = (int *)((uintptr_t)lgrp_lats + snap_nlgrpsmax *
1508 		    sizeof (int *) + i * snap_nlgrpsmax * sizeof (int));
1509 		for (j = 0; j < snap_nlgrpsmax; j++) {
1510 			lgrp_t	*to;
1511 
1512 			to = lgrp_table[j];
1513 			if (!LGRP_EXISTS(to))
1514 				continue;
1515 			lgrp_lats[i][j] = lgrp_latency(lgrp->lgrp_id,
1516 			    to->lgrp_id);
1517 		}
1518 	}
1519 	ASSERT(cpu_index == snap_ncpus);
1520 
1521 
1522 	mutex_exit(&cpu_lock);
1523 
1524 #ifdef	_SYSCALL32_IMPL
1525 	/*
1526 	 * Check to see whether caller is 32-bit program and need to return
1527 	 * size of 32-bit snapshot now that snapshot has been taken/updated.
1528 	 * May not have been able to do this earlier if snapshot was out of
1529 	 * date or didn't exist yet.
1530 	 */
1531 	if (model == DATAMODEL_ILP32) {
1532 
1533 		snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1534 
1535 		/*
1536 		 * Calculate size of buffer needed for 32-bit snapshot,
1537 		 * rounding up size of each object to allow for alignment
1538 		 * of next object in buffer.
1539 		 */
1540 		snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1541 		    sizeof (caddr32_t));
1542 		info_size =
1543 		    P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1544 		    sizeof (processorid_t));
1545 		cpuids_size =
1546 		    P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1547 		    sizeof (ulong_t));
1548 
1549 		bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1550 		bitmasks_size = (((2 + LGRP_RSRC_COUNT) * snap_nlgrpsmax) +
1551 		    1) * bitmask_size;
1552 
1553 
1554 		/*
1555 		 * Size of latency table and buffer
1556 		 */
1557 		lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1558 		    (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1559 
1560 		bufsize = snap_hdr_size + info_size + cpuids_size +
1561 		    bitmasks_size + lats_size;
1562 		return (bufsize);
1563 	}
1564 #endif	/* _SYSCALL32_IMPL */
1565 
1566 	return (lgrp_snap->ss_size);
1567 }
1568 
1569 
1570 /*
1571  * Copy snapshot into given user buffer, fix up any pointers in buffer to point
1572  * into user instead of kernel address space, and return size of buffer
1573  * needed to hold snapshot
1574  */
1575 static int
lgrp_snapshot_copy(char * buf,size_t bufsize)1576 lgrp_snapshot_copy(char *buf, size_t bufsize)
1577 {
1578 	size_t			bitmask_size;
1579 	int			cpu_index;
1580 	size_t			cpuids_size;
1581 	int			i;
1582 	size_t			info_size;
1583 	lgrp_info_t		*lgrp_info;
1584 	int			retval;
1585 	size_t			snap_hdr_size;
1586 	int			snap_ncpus;
1587 	int			snap_nlgrpsmax;
1588 	lgrp_snapshot_header_t	*user_snap;
1589 	lgrp_info_t		*user_info;
1590 	lgrp_info_t		*user_info_buffer;
1591 	processorid_t		*user_cpuids;
1592 	ulong_t			*user_lgrpset;
1593 	ulong_t			*user_parents;
1594 	ulong_t			*user_children;
1595 	int			**user_lats;
1596 	int			**user_lats_buffer;
1597 	ulong_t			*user_rsets;
1598 
1599 	if (lgrp_snap == NULL)
1600 		return (0);
1601 
1602 	if (buf == NULL || bufsize <= 0)
1603 		return (lgrp_snap->ss_size);
1604 
1605 	/*
1606 	 * User needs to try getting size of buffer again
1607 	 * because given buffer size is too small.
1608 	 * The lgroup hierarchy may have changed after they asked for the size
1609 	 * but before the snapshot was taken.
1610 	 */
1611 	if (bufsize < lgrp_snap->ss_size)
1612 		return (set_errno(EAGAIN));
1613 
1614 	snap_ncpus = lgrp_snap->ss_ncpus;
1615 	snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1616 
1617 	/*
1618 	 * Fill in lgrpset now because caller may have change psets
1619 	 */
1620 	kpreempt_disable();
1621 	for (i = 0; i < snap_nlgrpsmax; i++) {
1622 		if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1623 		    i)) {
1624 			BT_SET(lgrp_snap->ss_lgrpset, i);
1625 		}
1626 	}
1627 	kpreempt_enable();
1628 
1629 	/*
1630 	 * Copy lgroup snapshot (snapshot header, lgroup info, and CPU IDs)
1631 	 * into user buffer all at once
1632 	 */
1633 	if (copyout(lgrp_snap, buf, lgrp_snap->ss_size) != 0)
1634 		return (set_errno(EFAULT));
1635 
1636 	/*
1637 	 * Round up sizes of lgroup snapshot header and info for alignment
1638 	 */
1639 	snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1640 	    sizeof (void *));
1641 	info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1642 	    sizeof (processorid_t));
1643 	cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1644 	    sizeof (ulong_t));
1645 
1646 	bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1647 
1648 	/*
1649 	 * Calculate pointers into user buffer for lgroup snapshot header,
1650 	 * info, and CPU IDs
1651 	 */
1652 	user_snap = (lgrp_snapshot_header_t *)buf;
1653 	user_info = (lgrp_info_t *)((uintptr_t)user_snap + snap_hdr_size);
1654 	user_cpuids = (processorid_t *)((uintptr_t)user_info + info_size);
1655 	user_lgrpset = (ulong_t *)((uintptr_t)user_cpuids + cpuids_size);
1656 	user_parents = (ulong_t *)((uintptr_t)user_lgrpset + bitmask_size);
1657 	user_children = (ulong_t *)((uintptr_t)user_parents +
1658 	    (snap_nlgrpsmax * bitmask_size));
1659 	user_rsets = (ulong_t *)((uintptr_t)user_children +
1660 	    (snap_nlgrpsmax * bitmask_size));
1661 	user_lats = (int **)((uintptr_t)user_rsets +
1662 	    (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size));
1663 
1664 	/*
1665 	 * Copyout magic number (ie. pointer to beginning of buffer)
1666 	 */
1667 	if (copyout(&buf, &user_snap->ss_magic, sizeof (buf)) != 0)
1668 		return (set_errno(EFAULT));
1669 
1670 	/*
1671 	 * Fix up pointers in user buffer to point into user buffer
1672 	 * not kernel snapshot
1673 	 */
1674 	if (copyout(&user_info, &user_snap->ss_info, sizeof (user_info)) != 0)
1675 		return (set_errno(EFAULT));
1676 
1677 	if (copyout(&user_cpuids, &user_snap->ss_cpuids,
1678 	    sizeof (user_cpuids)) != 0)
1679 		return (set_errno(EFAULT));
1680 
1681 	if (copyout(&user_lgrpset, &user_snap->ss_lgrpset,
1682 	    sizeof (user_lgrpset)) != 0)
1683 		return (set_errno(EFAULT));
1684 
1685 	if (copyout(&user_parents, &user_snap->ss_parents,
1686 	    sizeof (user_parents)) != 0)
1687 		return (set_errno(EFAULT));
1688 
1689 	if (copyout(&user_children, &user_snap->ss_children,
1690 	    sizeof (user_children)) != 0)
1691 		return (set_errno(EFAULT));
1692 
1693 	if (copyout(&user_rsets, &user_snap->ss_rsets,
1694 	    sizeof (user_rsets)) != 0)
1695 		return (set_errno(EFAULT));
1696 
1697 	if (copyout(&user_lats, &user_snap->ss_latencies,
1698 	    sizeof (user_lats)) != 0)
1699 		return (set_errno(EFAULT));
1700 
1701 	/*
1702 	 * Make copies of lgroup info and latency table, fix up pointers,
1703 	 * and then copy them into user buffer
1704 	 */
1705 	user_info_buffer = kmem_zalloc(info_size, KM_NOSLEEP);
1706 	if (user_info_buffer == NULL)
1707 		return (set_errno(ENOMEM));
1708 
1709 	user_lats_buffer = kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1710 	    KM_NOSLEEP);
1711 	if (user_lats_buffer == NULL) {
1712 		kmem_free(user_info_buffer, info_size);
1713 		return (set_errno(ENOMEM));
1714 	}
1715 
1716 	lgrp_info = (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1717 	bcopy(lgrp_info, user_info_buffer, info_size);
1718 
1719 	cpu_index = 0;
1720 	for (i = 0; i < snap_nlgrpsmax; i++) {
1721 		ulong_t	*snap_rset;
1722 
1723 		/*
1724 		 * Skip non-existent lgroups
1725 		 */
1726 		if (user_info_buffer[i].info_lgrpid == LGRP_NONE)
1727 			continue;
1728 
1729 		/*
1730 		 * Update free memory size since it changes frequently
1731 		 * Only do so for lgroups directly containing memory
1732 		 *
1733 		 * NOTE: This must be done before changing the pointers to
1734 		 *	 point into user space since we need to dereference
1735 		 *	 lgroup resource set
1736 		 */
1737 		snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
1738 		    BT_BITOUL(snap_nlgrpsmax)];
1739 		if (BT_TEST(snap_rset, i))
1740 			user_info_buffer[i].info_mem_free =
1741 			    lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1742 
1743 		/*
1744 		 * Fix up pointers to parents, children, resources, and
1745 		 * latencies
1746 		 */
1747 		user_info_buffer[i].info_parents =
1748 		    (ulong_t *)((uintptr_t)user_parents + (i * bitmask_size));
1749 		user_info_buffer[i].info_children =
1750 		    (ulong_t *)((uintptr_t)user_children + (i * bitmask_size));
1751 		user_info_buffer[i].info_rset =
1752 		    (ulong_t *)((uintptr_t)user_rsets +
1753 		    (i * LGRP_RSRC_COUNT * bitmask_size));
1754 		user_lats_buffer[i] = (int *)((uintptr_t)user_lats +
1755 		    (snap_nlgrpsmax * sizeof (int *)) + (i * snap_nlgrpsmax *
1756 		    sizeof (int)));
1757 
1758 		/*
1759 		 * Fix up pointer to CPU IDs
1760 		 */
1761 		if (user_info_buffer[i].info_ncpus == 0) {
1762 			user_info_buffer[i].info_cpuids = NULL;
1763 			continue;
1764 		}
1765 		user_info_buffer[i].info_cpuids = &user_cpuids[cpu_index];
1766 		cpu_index += user_info_buffer[i].info_ncpus;
1767 	}
1768 	ASSERT(cpu_index == snap_ncpus);
1769 
1770 	/*
1771 	 * Copy lgroup info and latency table with pointers fixed up to point
1772 	 * into user buffer out to user buffer now
1773 	 */
1774 	retval = lgrp_snap->ss_size;
1775 	if (copyout(user_info_buffer, user_info, info_size) != 0)
1776 		retval = set_errno(EFAULT);
1777 	kmem_free(user_info_buffer, info_size);
1778 
1779 	if (copyout(user_lats_buffer, user_lats, snap_nlgrpsmax *
1780 	    sizeof (int *)) != 0)
1781 		retval = set_errno(EFAULT);
1782 	kmem_free(user_lats_buffer, snap_nlgrpsmax * sizeof (int *));
1783 
1784 	return (retval);
1785 }
1786 
1787 
1788 #ifdef	_SYSCALL32_IMPL
1789 /*
1790  * Make 32-bit copy of snapshot, fix up any pointers in buffer to point
1791  * into user instead of kernel address space, copy 32-bit snapshot into
1792  * given user buffer, and return size of buffer needed to hold snapshot
1793  */
1794 static int
lgrp_snapshot_copy32(caddr32_t buf,size32_t bufsize)1795 lgrp_snapshot_copy32(caddr32_t buf, size32_t bufsize)
1796 {
1797 	size32_t			bitmask_size;
1798 	size32_t			bitmasks_size;
1799 	size32_t			children_size;
1800 	int				cpu_index;
1801 	size32_t			cpuids_size;
1802 	int				i;
1803 	int				j;
1804 	size32_t			info_size;
1805 	size32_t			lats_size;
1806 	lgrp_info_t			*lgrp_info;
1807 	lgrp_snapshot_header32_t	*lgrp_snap32;
1808 	lgrp_info32_t			*lgrp_info32;
1809 	processorid_t			*lgrp_cpuids32;
1810 	caddr32_t			*lgrp_lats32;
1811 	int				**lgrp_lats32_kernel;
1812 	uint_t				*lgrp_set32;
1813 	uint_t				*lgrp_parents32;
1814 	uint_t				*lgrp_children32;
1815 	uint_t				*lgrp_rsets32;
1816 	size32_t			parents_size;
1817 	size32_t			rsets_size;
1818 	size32_t			set_size;
1819 	size32_t			snap_hdr_size;
1820 	int				snap_ncpus;
1821 	int				snap_nlgrpsmax;
1822 	size32_t			snap_size;
1823 
1824 	if (lgrp_snap == NULL)
1825 		return (0);
1826 
1827 	snap_ncpus = lgrp_snap->ss_ncpus;
1828 	snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1829 
1830 	/*
1831 	 * Calculate size of buffer needed for 32-bit snapshot,
1832 	 * rounding up size of each object to allow for alignment
1833 	 * of next object in buffer.
1834 	 */
1835 	snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1836 	    sizeof (caddr32_t));
1837 	info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1838 	    sizeof (processorid_t));
1839 	cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1840 	    sizeof (ulong_t));
1841 
1842 	bitmask_size = BT_SIZEOFMAP32(snap_nlgrpsmax);
1843 
1844 	set_size = bitmask_size;
1845 	parents_size = snap_nlgrpsmax * bitmask_size;
1846 	children_size = snap_nlgrpsmax * bitmask_size;
1847 	rsets_size = P2ROUNDUP(LGRP_RSRC_COUNT * snap_nlgrpsmax *
1848 	    (int)bitmask_size, sizeof (caddr32_t));
1849 
1850 	bitmasks_size = set_size + parents_size + children_size + rsets_size;
1851 
1852 	/*
1853 	 * Size of latency table and buffer
1854 	 */
1855 	lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1856 	    (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1857 
1858 	snap_size = snap_hdr_size + info_size + cpuids_size + bitmasks_size +
1859 	    lats_size;
1860 
1861 	if (buf == 0 || bufsize <= 0) {
1862 		return (snap_size);
1863 	}
1864 
1865 	/*
1866 	 * User needs to try getting size of buffer again
1867 	 * because given buffer size is too small.
1868 	 * The lgroup hierarchy may have changed after they asked for the size
1869 	 * but before the snapshot was taken.
1870 	 */
1871 	if (bufsize < snap_size)
1872 		return (set_errno(EAGAIN));
1873 
1874 	/*
1875 	 * Make 32-bit copy of snapshot, fix up pointers to point into user
1876 	 * buffer not kernel, and then copy whole thing into user buffer
1877 	 */
1878 	lgrp_snap32 = kmem_zalloc(snap_size, KM_NOSLEEP);
1879 	if (lgrp_snap32 == NULL)
1880 		return (set_errno(ENOMEM));
1881 
1882 	/*
1883 	 * Calculate pointers into 32-bit copy of snapshot
1884 	 * for lgroup info, CPU IDs, pset lgroup bitmask, parents, children,
1885 	 * resources, and latency table and buffer
1886 	 */
1887 	lgrp_info32 = (lgrp_info32_t *)((uintptr_t)lgrp_snap32 +
1888 	    snap_hdr_size);
1889 	lgrp_cpuids32 = (processorid_t *)((uintptr_t)lgrp_info32 + info_size);
1890 	lgrp_set32 = (uint_t *)((uintptr_t)lgrp_cpuids32 + cpuids_size);
1891 	lgrp_parents32 = (uint_t *)((uintptr_t)lgrp_set32 + set_size);
1892 	lgrp_children32 = (uint_t *)((uintptr_t)lgrp_parents32 + parents_size);
1893 	lgrp_rsets32 = (uint_t *)((uintptr_t)lgrp_children32 + children_size);
1894 	lgrp_lats32 = (caddr32_t *)((uintptr_t)lgrp_rsets32 + rsets_size);
1895 
1896 	/*
1897 	 * Make temporary lgroup latency table of pointers for kernel to use
1898 	 * to fill in rows of table with latencies from each lgroup
1899 	 */
1900 	lgrp_lats32_kernel =  kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1901 	    KM_NOSLEEP);
1902 	if (lgrp_lats32_kernel == NULL) {
1903 		kmem_free(lgrp_snap32, snap_size);
1904 		return (set_errno(ENOMEM));
1905 	}
1906 
1907 	/*
1908 	 * Fill in 32-bit lgroup snapshot header
1909 	 * (with pointers into user's buffer for lgroup info, CPU IDs,
1910 	 * bit masks, and latencies)
1911 	 */
1912 	lgrp_snap32->ss_version = lgrp_snap->ss_version;
1913 	lgrp_snap32->ss_levels = lgrp_snap->ss_levels;
1914 	lgrp_snap32->ss_nlgrps = lgrp_snap32->ss_nlgrps_os =
1915 	    lgrp_snap->ss_nlgrps;
1916 	lgrp_snap32->ss_nlgrps_max = snap_nlgrpsmax;
1917 	lgrp_snap32->ss_root = lgrp_snap->ss_root;
1918 	lgrp_snap32->ss_ncpus = lgrp_snap->ss_ncpus;
1919 	lgrp_snap32->ss_gen = lgrp_snap->ss_gen;
1920 	lgrp_snap32->ss_view = LGRP_VIEW_OS;
1921 	lgrp_snap32->ss_size = snap_size;
1922 	lgrp_snap32->ss_magic = buf;
1923 	lgrp_snap32->ss_info = buf + snap_hdr_size;
1924 	lgrp_snap32->ss_cpuids = lgrp_snap32->ss_info + info_size;
1925 	lgrp_snap32->ss_lgrpset = lgrp_snap32->ss_cpuids + cpuids_size;
1926 	lgrp_snap32->ss_parents = lgrp_snap32->ss_lgrpset + bitmask_size;
1927 	lgrp_snap32->ss_children = lgrp_snap32->ss_parents +
1928 	    (snap_nlgrpsmax * bitmask_size);
1929 	lgrp_snap32->ss_rsets = lgrp_snap32->ss_children +
1930 	    (snap_nlgrpsmax * bitmask_size);
1931 	lgrp_snap32->ss_latencies = lgrp_snap32->ss_rsets +
1932 	    (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size);
1933 
1934 	/*
1935 	 * Fill in lgrpset now because caller may have change psets
1936 	 */
1937 	kpreempt_disable();
1938 	for (i = 0; i < snap_nlgrpsmax; i++) {
1939 		if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1940 		    i)) {
1941 			BT_SET32(lgrp_set32, i);
1942 		}
1943 	}
1944 	kpreempt_enable();
1945 
1946 	/*
1947 	 * Fill in 32-bit copy of lgroup info and fix up pointers
1948 	 * to point into user's buffer instead of kernel's
1949 	 */
1950 	cpu_index = 0;
1951 	lgrp_info = lgrp_snap->ss_info;
1952 	for (i = 0; i < snap_nlgrpsmax; i++) {
1953 		uint_t	*children;
1954 		uint_t	*lgrp_rset;
1955 		uint_t	*parents;
1956 		ulong_t	*snap_rset;
1957 
1958 		/*
1959 		 * Skip non-existent lgroups
1960 		 */
1961 		if (lgrp_info[i].info_lgrpid == LGRP_NONE) {
1962 			bzero(&lgrp_info32[i], sizeof (lgrp_info32[i]));
1963 			lgrp_info32[i].info_lgrpid = LGRP_NONE;
1964 			continue;
1965 		}
1966 
1967 		/*
1968 		 * Fill in parents, children, lgroup resource set, and
1969 		 * latencies from snapshot
1970 		 */
1971 		parents = (uint_t *)((uintptr_t)lgrp_parents32 +
1972 		    i * bitmask_size);
1973 		children = (uint_t *)((uintptr_t)lgrp_children32 +
1974 		    i * bitmask_size);
1975 		snap_rset = (ulong_t *)((uintptr_t)lgrp_snap->ss_rsets +
1976 		    (i * LGRP_RSRC_COUNT * BT_SIZEOFMAP(snap_nlgrpsmax)));
1977 		lgrp_rset = (uint_t *)((uintptr_t)lgrp_rsets32 +
1978 		    (i * LGRP_RSRC_COUNT * bitmask_size));
1979 		lgrp_lats32_kernel[i] = (int *)((uintptr_t)lgrp_lats32 +
1980 		    snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
1981 		    sizeof (int));
1982 		for (j = 0; j < snap_nlgrpsmax; j++) {
1983 			int	k;
1984 			uint_t	*rset;
1985 
1986 			if (BT_TEST(&lgrp_snap->ss_parents[i], j))
1987 				BT_SET32(parents, j);
1988 
1989 			if (BT_TEST(&lgrp_snap->ss_children[i], j))
1990 				BT_SET32(children, j);
1991 
1992 			for (k = 0; k < LGRP_RSRC_COUNT; k++) {
1993 				rset = (uint_t *)((uintptr_t)lgrp_rset +
1994 				    k * bitmask_size);
1995 				if (BT_TEST(&snap_rset[k], j))
1996 					BT_SET32(rset, j);
1997 			}
1998 
1999 			lgrp_lats32_kernel[i][j] =
2000 			    lgrp_snap->ss_latencies[i][j];
2001 		}
2002 
2003 		/*
2004 		 * Fix up pointer to latency buffer
2005 		 */
2006 		lgrp_lats32[i] = lgrp_snap32->ss_latencies +
2007 		    snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
2008 		    sizeof (int);
2009 
2010 		/*
2011 		 * Fix up pointers for parents, children, and resources
2012 		 */
2013 		lgrp_info32[i].info_parents = lgrp_snap32->ss_parents +
2014 		    (i * bitmask_size);
2015 		lgrp_info32[i].info_children = lgrp_snap32->ss_children +
2016 		    (i * bitmask_size);
2017 		lgrp_info32[i].info_rset = lgrp_snap32->ss_rsets +
2018 		    (i * LGRP_RSRC_COUNT * bitmask_size);
2019 
2020 		/*
2021 		 * Fill in memory and CPU info
2022 		 * Only fill in memory for lgroups directly containing memory
2023 		 */
2024 		snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
2025 		    BT_BITOUL(snap_nlgrpsmax)];
2026 		if (BT_TEST(snap_rset, i)) {
2027 			lgrp_info32[i].info_mem_free = lgrp_mem_size(i,
2028 			    LGRP_MEM_SIZE_FREE);
2029 			lgrp_info32[i].info_mem_install =
2030 			    lgrp_info[i].info_mem_install;
2031 		}
2032 
2033 		lgrp_info32[i].info_ncpus = lgrp_info[i].info_ncpus;
2034 
2035 		lgrp_info32[i].info_lgrpid = lgrp_info[i].info_lgrpid;
2036 		lgrp_info32[i].info_latency = lgrp_info[i].info_latency;
2037 
2038 		if (lgrp_info32[i].info_ncpus == 0) {
2039 			lgrp_info32[i].info_cpuids = 0;
2040 			continue;
2041 		}
2042 
2043 		/*
2044 		 * Fix up pointer for CPU IDs
2045 		 */
2046 		lgrp_info32[i].info_cpuids = lgrp_snap32->ss_cpuids +
2047 		    (cpu_index * sizeof (processorid_t));
2048 		cpu_index += lgrp_info32[i].info_ncpus;
2049 	}
2050 	ASSERT(cpu_index == snap_ncpus);
2051 
2052 	/*
2053 	 * Copy lgroup CPU IDs into 32-bit snapshot
2054 	 * before copying it out into user's buffer
2055 	 */
2056 	bcopy(lgrp_snap->ss_cpuids, lgrp_cpuids32, cpuids_size);
2057 
2058 	/*
2059 	 * Copy 32-bit lgroup snapshot into user's buffer all at once
2060 	 */
2061 	if (copyout(lgrp_snap32, (void *)(uintptr_t)buf, snap_size) != 0) {
2062 		kmem_free(lgrp_snap32, snap_size);
2063 		kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2064 		return (set_errno(EFAULT));
2065 	}
2066 
2067 	kmem_free(lgrp_snap32, snap_size);
2068 	kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2069 
2070 	return (snap_size);
2071 }
2072 #endif	/* _SYSCALL32_IMPL */
2073 
2074 
2075 int
lgrpsys(int subcode,long ia,void * ap)2076 lgrpsys(int subcode, long ia, void *ap)
2077 {
2078 	size_t	bufsize;
2079 	int	latency;
2080 
2081 	switch (subcode) {
2082 
2083 	case LGRP_SYS_AFFINITY_GET:
2084 		return (lgrp_affinity_get((lgrp_affinity_args_t *)ap));
2085 
2086 	case LGRP_SYS_AFFINITY_SET:
2087 		return (lgrp_affinity_set((lgrp_affinity_args_t *)ap));
2088 
2089 	case LGRP_SYS_GENERATION:
2090 		return (lgrp_generation(ia));
2091 
2092 	case LGRP_SYS_HOME:
2093 		return (lgrp_home_get((idtype_t)ia, (id_t)(uintptr_t)ap));
2094 
2095 	case LGRP_SYS_LATENCY:
2096 		mutex_enter(&cpu_lock);
2097 		latency = lgrp_latency(ia, (lgrp_id_t)(uintptr_t)ap);
2098 		mutex_exit(&cpu_lock);
2099 		return (latency);
2100 
2101 	case LGRP_SYS_MEMINFO:
2102 		return (meminfo(ia, (struct meminfo *)ap));
2103 
2104 	case LGRP_SYS_VERSION:
2105 		return (lgrp_version(ia));
2106 
2107 	case LGRP_SYS_SNAPSHOT:
2108 		mutex_enter(&lgrp_snap_lock);
2109 		bufsize = lgrp_snapshot();
2110 		if (ap && ia > 0) {
2111 			if (get_udatamodel() == DATAMODEL_NATIVE)
2112 				bufsize = lgrp_snapshot_copy(ap, ia);
2113 #ifdef	_SYSCALL32_IMPL
2114 			else
2115 				bufsize = lgrp_snapshot_copy32(
2116 				    (caddr32_t)(uintptr_t)ap, ia);
2117 #endif	/* _SYSCALL32_IMPL */
2118 		}
2119 		mutex_exit(&lgrp_snap_lock);
2120 		return (bufsize);
2121 
2122 	default:
2123 		break;
2124 
2125 	}
2126 
2127 	return (set_errno(EINVAL));
2128 }
2129