xref: /titanic_44/usr/src/uts/common/syscall/pset.c (revision 1e4c938b57d1656808e4112127ff1dce3eba5314)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/types.h>
26 #include <sys/systm.h>
27 #include <sys/cmn_err.h>
28 #include <sys/cpuvar.h>
29 #include <sys/thread.h>
30 #include <sys/disp.h>
31 #include <sys/kmem.h>
32 #include <sys/debug.h>
33 #include <sys/sysmacros.h>
34 #include <sys/cpupart.h>
35 #include <sys/pset.h>
36 #include <sys/modctl.h>
37 #include <sys/syscall.h>
38 #include <sys/task.h>
39 #include <sys/loadavg.h>
40 #include <sys/fss.h>
41 #include <sys/pool.h>
42 #include <sys/pool_pset.h>
43 #include <sys/policy.h>
44 #include <sys/zone.h>
45 #include <sys/contract/process_impl.h>
46 
47 static int	pset(int, long, long, long, long);
48 
49 static struct sysent pset_sysent = {
50 	5,
51 	SE_ARGC | SE_NOUNLOAD,
52 	(int (*)())pset,
53 };
54 
55 static struct modlsys modlsys = {
56 	&mod_syscallops, "processor sets", &pset_sysent
57 };
58 
59 #ifdef _SYSCALL32_IMPL
60 static struct modlsys modlsys32 = {
61 	&mod_syscallops32, "32-bit pset(2) syscall", &pset_sysent
62 };
63 #endif
64 
65 static struct modlinkage modlinkage = {
66 	MODREV_1,
67 	&modlsys,
68 #ifdef _SYSCALL32_IMPL
69 	&modlsys32,
70 #endif
71 	NULL
72 };
73 
74 #define	PSET_BADATTR(attr)	((~PSET_NOESCAPE) & (attr))
75 
76 int
77 _init(void)
78 {
79 	return (mod_install(&modlinkage));
80 }
81 
82 int
83 _info(struct modinfo *modinfop)
84 {
85 	return (mod_info(&modlinkage, modinfop));
86 }
87 
88 static int
89 pset_create(psetid_t *psetp)
90 {
91 	psetid_t newpset;
92 	int error;
93 
94 	if (secpolicy_pset(CRED()) != 0)
95 		return (set_errno(EPERM));
96 
97 	pool_lock();
98 	if (pool_state == POOL_ENABLED) {
99 		pool_unlock();
100 		return (set_errno(ENOTSUP));
101 	}
102 	error = cpupart_create(&newpset);
103 	if (error) {
104 		pool_unlock();
105 		return (set_errno(error));
106 	}
107 	if (copyout(&newpset, psetp, sizeof (psetid_t)) != 0) {
108 		(void) cpupart_destroy(newpset);
109 		pool_unlock();
110 		return (set_errno(EFAULT));
111 	}
112 	pool_unlock();
113 	return (error);
114 }
115 
116 static int
117 pset_destroy(psetid_t pset)
118 {
119 	int error;
120 
121 	if (secpolicy_pset(CRED()) != 0)
122 		return (set_errno(EPERM));
123 
124 	pool_lock();
125 	if (pool_state == POOL_ENABLED) {
126 		pool_unlock();
127 		return (set_errno(ENOTSUP));
128 	}
129 	error = cpupart_destroy(pset);
130 	pool_unlock();
131 	if (error)
132 		return (set_errno(error));
133 	else
134 		return (0);
135 }
136 
137 static int
138 pset_assign(psetid_t pset, processorid_t cpuid, psetid_t *opset, int forced)
139 {
140 	psetid_t oldpset;
141 	int	error = 0;
142 	cpu_t	*cp;
143 
144 	if (pset != PS_QUERY && secpolicy_pset(CRED()) != 0)
145 		return (set_errno(EPERM));
146 
147 	pool_lock();
148 	if (pset != PS_QUERY && pool_state == POOL_ENABLED) {
149 		pool_unlock();
150 		return (set_errno(ENOTSUP));
151 	}
152 
153 	mutex_enter(&cpu_lock);
154 	if ((cp = cpu_get(cpuid)) == NULL) {
155 		mutex_exit(&cpu_lock);
156 		pool_unlock();
157 		return (set_errno(EINVAL));
158 	}
159 
160 	oldpset = cpupart_query_cpu(cp);
161 
162 	if (pset != PS_QUERY)
163 		error = cpupart_attach_cpu(pset, cp, forced);
164 	mutex_exit(&cpu_lock);
165 	pool_unlock();
166 
167 	if (error)
168 		return (set_errno(error));
169 
170 	if (opset != NULL)
171 		if (copyout(&oldpset, opset, sizeof (psetid_t)) != 0)
172 			return (set_errno(EFAULT));
173 
174 	return (0);
175 }
176 
177 static int
178 pset_info(psetid_t pset, int *typep, uint_t *numcpusp,
179     processorid_t *cpulistp)
180 {
181 	int pset_type;
182 	uint_t user_ncpus = 0, real_ncpus, copy_ncpus;
183 	processorid_t *pset_cpus = NULL;
184 	int error = 0;
185 
186 	if (numcpusp != NULL) {
187 		if (copyin(numcpusp, &user_ncpus, sizeof (uint_t)) != 0)
188 			return (set_errno(EFAULT));
189 	}
190 
191 	if (user_ncpus > max_ncpus)	/* sanity check */
192 		user_ncpus = max_ncpus;
193 	if (user_ncpus != 0 && cpulistp != NULL)
194 		pset_cpus = kmem_alloc(sizeof (processorid_t) * user_ncpus,
195 		    KM_SLEEP);
196 
197 	real_ncpus = user_ncpus;
198 	if ((error = cpupart_get_cpus(&pset, pset_cpus, &real_ncpus)) != 0)
199 		goto out;
200 
201 	/*
202 	 * Now copyout the information about this processor set.
203 	 */
204 
205 	/*
206 	 * Get number of cpus to copy back.  If the user didn't pass in
207 	 * a big enough buffer, only copy back as many cpus as fits in
208 	 * the buffer but copy back the real number of cpus.
209 	 */
210 
211 	if (user_ncpus != 0 && cpulistp != NULL) {
212 		copy_ncpus = MIN(real_ncpus, user_ncpus);
213 		if (copyout(pset_cpus, cpulistp,
214 		    sizeof (processorid_t) * copy_ncpus) != 0) {
215 			error = EFAULT;
216 			goto out;
217 		}
218 	}
219 	if (pset_cpus != NULL)
220 		kmem_free(pset_cpus, sizeof (processorid_t) * user_ncpus);
221 	if (typep != NULL) {
222 		if (pset == PS_NONE)
223 			pset_type = PS_NONE;
224 		else
225 			pset_type = PS_PRIVATE;
226 		if (copyout(&pset_type, typep, sizeof (int)) != 0)
227 			return (set_errno(EFAULT));
228 	}
229 	if (numcpusp != NULL)
230 		if (copyout(&real_ncpus, numcpusp, sizeof (uint_t)) != 0)
231 			return (set_errno(EFAULT));
232 	return (0);
233 
234 out:
235 	if (pset_cpus != NULL)
236 		kmem_free(pset_cpus, sizeof (processorid_t) * user_ncpus);
237 	return (set_errno(error));
238 }
239 
240 static int
241 pset_bind_thread(kthread_t *tp, psetid_t pset, psetid_t *oldpset, void *projbuf,
242     void *zonebuf)
243 {
244 	int error = 0;
245 
246 	ASSERT(pool_lock_held());
247 	ASSERT(MUTEX_HELD(&cpu_lock));
248 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
249 
250 	*oldpset = tp->t_bind_pset;
251 
252 	switch (pset) {
253 	case PS_SOFT:
254 		TB_PSET_SOFT_SET(tp);
255 		break;
256 
257 	case PS_HARD:
258 		TB_PSET_HARD_SET(tp);
259 		break;
260 
261 	case PS_QUERY:
262 		break;
263 
264 	case PS_QUERY_TYPE:
265 		*oldpset = TB_PSET_IS_SOFT(tp) ? PS_SOFT : PS_HARD;
266 		break;
267 
268 	default:
269 		/*
270 		 * Must have the same UID as the target process or
271 		 * have PRIV_PROC_OWNER privilege.
272 		 */
273 		if (!hasprocperm(tp->t_cred, CRED()))
274 			return (EPERM);
275 		/*
276 		 * Unbinding of an unbound thread should always succeed.
277 		 */
278 		if (*oldpset == PS_NONE && pset == PS_NONE)
279 			return (0);
280 		/*
281 		 * Only privileged processes can move threads from psets with
282 		 * PSET_NOESCAPE attribute.
283 		 */
284 		if ((tp->t_cpupart->cp_attr & PSET_NOESCAPE) &&
285 		    secpolicy_pbind(CRED()) != 0)
286 			return (EPERM);
287 		if ((error = cpupart_bind_thread(tp, pset, 0,
288 		    projbuf, zonebuf)) == 0)
289 			tp->t_bind_pset = pset;
290 
291 		break;
292 	}
293 
294 	return (error);
295 }
296 
297 static int
298 pset_bind_process(proc_t *pp, psetid_t pset, psetid_t *oldpset, void *projbuf,
299     void *zonebuf)
300 {
301 	int error = 0;
302 	kthread_t *tp;
303 
304 	/* skip kernel processes */
305 	if ((pset != PS_QUERY) && pp->p_flag & SSYS) {
306 		*oldpset = PS_NONE;
307 		return (ENOTSUP);
308 	}
309 
310 	mutex_enter(&pp->p_lock);
311 	tp = pp->p_tlist;
312 	if (tp != NULL) {
313 		do {
314 			int rval;
315 
316 			rval = pset_bind_thread(tp, pset, oldpset, projbuf,
317 			    zonebuf);
318 			if (error == 0)
319 				error = rval;
320 		} while ((tp = tp->t_forw) != pp->p_tlist);
321 	} else
322 		error = ESRCH;
323 	mutex_exit(&pp->p_lock);
324 
325 	return (error);
326 }
327 
328 static int
329 pset_bind_task(task_t *tk, psetid_t pset, psetid_t *oldpset, void *projbuf,
330     void *zonebuf)
331 {
332 	int error = 0;
333 	proc_t *pp;
334 
335 	ASSERT(MUTEX_HELD(&pidlock));
336 
337 	if ((pp = tk->tk_memb_list) == NULL) {
338 		return (ESRCH);
339 	}
340 
341 	do {
342 		int rval;
343 
344 		if (!(pp->p_flag & SSYS)) {
345 			rval = pset_bind_process(pp, pset, oldpset, projbuf,
346 			    zonebuf);
347 			if (error == 0)
348 				error = rval;
349 		}
350 	} while ((pp = pp->p_tasknext) != tk->tk_memb_list);
351 
352 	return (error);
353 }
354 
355 static int
356 pset_bind_project(kproject_t *kpj, psetid_t pset, psetid_t *oldpset,
357     void *projbuf, void *zonebuf)
358 {
359 	int error = 0;
360 	proc_t *pp;
361 
362 	ASSERT(MUTEX_HELD(&pidlock));
363 
364 	for (pp = practive; pp != NULL; pp = pp->p_next) {
365 		if (pp->p_tlist == NULL)
366 			continue;
367 		if (pp->p_task->tk_proj == kpj && !(pp->p_flag & SSYS)) {
368 			int rval;
369 
370 			rval = pset_bind_process(pp, pset, oldpset, projbuf,
371 			    zonebuf);
372 			if (error == 0)
373 				error = rval;
374 		}
375 	}
376 
377 	return (error);
378 }
379 
380 static int
381 pset_bind_zone(zone_t *zptr, psetid_t pset, psetid_t *oldpset, void *projbuf,
382     void *zonebuf)
383 {
384 	int error = 0;
385 	proc_t *pp;
386 
387 	ASSERT(MUTEX_HELD(&pidlock));
388 
389 	for (pp = practive; pp != NULL; pp = pp->p_next) {
390 		if (pp->p_zone == zptr && !(pp->p_flag & SSYS)) {
391 			int rval;
392 
393 			rval = pset_bind_process(pp, pset, oldpset, projbuf,
394 			    zonebuf);
395 			if (error == 0)
396 				error = rval;
397 		}
398 	}
399 
400 	return (error);
401 }
402 
403 /*
404  * Unbind all threads from the specified processor set, or from all
405  * processor sets.
406  */
407 static int
408 pset_unbind(psetid_t pset, void *projbuf, void *zonebuf, idtype_t idtype)
409 {
410 	psetid_t olbind;
411 	kthread_t *tp;
412 	int error = 0;
413 	int rval;
414 	proc_t *pp;
415 
416 	ASSERT(MUTEX_HELD(&cpu_lock));
417 
418 	if (idtype == P_PSETID && cpupart_find(pset) == NULL)
419 		return (EINVAL);
420 
421 	mutex_enter(&pidlock);
422 	for (pp = practive; pp != NULL; pp = pp->p_next) {
423 		mutex_enter(&pp->p_lock);
424 		tp = pp->p_tlist;
425 		/*
426 		 * Skip zombies and kernel processes, and processes in
427 		 * other zones, if called from a non-global zone.
428 		 */
429 		if (tp == NULL || (pp->p_flag & SSYS) ||
430 		    !HASZONEACCESS(curproc, pp->p_zone->zone_id)) {
431 			mutex_exit(&pp->p_lock);
432 			continue;
433 		}
434 		do {
435 			if ((idtype == P_PSETID && tp->t_bind_pset != pset) ||
436 			    (idtype == P_ALL && tp->t_bind_pset == PS_NONE))
437 				continue;
438 			rval = pset_bind_thread(tp, PS_NONE, &olbind,
439 			    projbuf, zonebuf);
440 			if (error == 0)
441 				error = rval;
442 		} while ((tp = tp->t_forw) != pp->p_tlist);
443 		mutex_exit(&pp->p_lock);
444 	}
445 	mutex_exit(&pidlock);
446 	return (error);
447 }
448 
449 static int
450 pset_bind_contract(cont_process_t *ctp, psetid_t pset, psetid_t *oldpset,
451     void *projbuf, void *zonebuf)
452 {
453 	int error = 0;
454 	proc_t *pp;
455 
456 	ASSERT(MUTEX_HELD(&pidlock));
457 
458 	for (pp = practive; pp != NULL; pp = pp->p_next) {
459 		if (pp->p_ct_process == ctp) {
460 			int rval;
461 
462 			rval = pset_bind_process(pp, pset, oldpset, projbuf,
463 			    zonebuf);
464 			if (error == 0)
465 				error = rval;
466 		}
467 	}
468 
469 	return (error);
470 }
471 
472 /*
473  * Bind the lwp:id of process:pid to processor set: pset
474  */
475 static int
476 pset_bind_lwp(psetid_t pset, id_t id, pid_t pid, psetid_t *opset)
477 {
478 	kthread_t	*tp;
479 	proc_t		*pp;
480 	psetid_t	oldpset;
481 	void		*projbuf, *zonebuf;
482 	int		error = 0;
483 
484 	pool_lock();
485 	mutex_enter(&cpu_lock);
486 	projbuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_PROJ);
487 	zonebuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_ZONE);
488 
489 	mutex_enter(&pidlock);
490 	if ((pid == P_MYID && id == P_MYID) ||
491 	    (pid == curproc->p_pid && id == P_MYID)) {
492 		pp = curproc;
493 		tp = curthread;
494 		mutex_enter(&pp->p_lock);
495 	} else {
496 		if (pid == P_MYID) {
497 			pp = curproc;
498 		} else if ((pp = prfind(pid)) == NULL) {
499 			error = ESRCH;
500 			goto err;
501 		}
502 		if (pp != curproc && id == P_MYID) {
503 			error = EINVAL;
504 			goto err;
505 		}
506 		mutex_enter(&pp->p_lock);
507 		if ((tp = idtot(pp, id)) == NULL) {
508 			mutex_exit(&pp->p_lock);
509 			error = ESRCH;
510 			goto err;
511 		}
512 	}
513 
514 	error = pset_bind_thread(tp, pset, &oldpset, projbuf, zonebuf);
515 	mutex_exit(&pp->p_lock);
516 err:
517 	mutex_exit(&pidlock);
518 
519 	fss_freebuf(projbuf, FSS_ALLOC_PROJ);
520 	fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
521 	mutex_exit(&cpu_lock);
522 	pool_unlock();
523 	if (opset != NULL) {
524 		if (copyout(&oldpset, opset, sizeof (psetid_t)) != 0)
525 			return (set_errno(EFAULT));
526 	}
527 	if (error != 0)
528 		return (set_errno(error));
529 	return (0);
530 }
531 
532 static int
533 pset_bind(psetid_t pset, idtype_t idtype, id_t id, psetid_t *opset)
534 {
535 	kthread_t	*tp;
536 	proc_t		*pp;
537 	task_t		*tk;
538 	kproject_t	*kpj;
539 	contract_t	*ct;
540 	zone_t		*zptr;
541 	psetid_t	oldpset;
542 	int		error = 0;
543 	void		*projbuf, *zonebuf;
544 
545 	pool_lock();
546 	if ((pset != PS_QUERY) && (pset != PS_SOFT) &&
547 	    (pset != PS_HARD) && (pset != PS_QUERY_TYPE)) {
548 		/*
549 		 * Check if the set actually exists before checking
550 		 * permissions.  This is the historical error
551 		 * precedence.  Note that if pset was PS_MYID, the
552 		 * cpupart_get_cpus call will change it to the
553 		 * processor set id of the caller (or PS_NONE if the
554 		 * caller is not bound to a processor set).
555 		 */
556 		if (pool_state == POOL_ENABLED) {
557 			pool_unlock();
558 			return (set_errno(ENOTSUP));
559 		}
560 		if (cpupart_get_cpus(&pset, NULL, NULL) != 0) {
561 			pool_unlock();
562 			return (set_errno(EINVAL));
563 		} else if (pset != PS_NONE && secpolicy_pbind(CRED()) != 0) {
564 			pool_unlock();
565 			return (set_errno(EPERM));
566 		}
567 	}
568 
569 	/*
570 	 * Pre-allocate enough buffers for FSS for all active projects
571 	 * and for all active zones on the system.  Unused buffers will
572 	 * be freed later by fss_freebuf().
573 	 */
574 	mutex_enter(&cpu_lock);
575 	projbuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_PROJ);
576 	zonebuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_ZONE);
577 
578 	switch (idtype) {
579 	case P_LWPID:
580 		pp = curproc;
581 		mutex_enter(&pidlock);
582 		mutex_enter(&pp->p_lock);
583 		if (id == P_MYID) {
584 			tp = curthread;
585 		} else {
586 			if ((tp = idtot(pp, id)) == NULL) {
587 				mutex_exit(&pp->p_lock);
588 				mutex_exit(&pidlock);
589 				error = ESRCH;
590 				break;
591 			}
592 		}
593 		error = pset_bind_thread(tp, pset, &oldpset, projbuf, zonebuf);
594 		mutex_exit(&pp->p_lock);
595 		mutex_exit(&pidlock);
596 		break;
597 
598 	case P_PID:
599 		mutex_enter(&pidlock);
600 		if (id == P_MYID) {
601 			pp = curproc;
602 		} else if ((pp = prfind(id)) == NULL) {
603 			mutex_exit(&pidlock);
604 			error = ESRCH;
605 			break;
606 		}
607 		error = pset_bind_process(pp, pset, &oldpset, projbuf, zonebuf);
608 		mutex_exit(&pidlock);
609 		break;
610 
611 	case P_TASKID:
612 		mutex_enter(&pidlock);
613 		if (id == P_MYID)
614 			id = curproc->p_task->tk_tkid;
615 		if ((tk = task_hold_by_id(id)) == NULL) {
616 			mutex_exit(&pidlock);
617 			error = ESRCH;
618 			break;
619 		}
620 		error = pset_bind_task(tk, pset, &oldpset, projbuf, zonebuf);
621 		mutex_exit(&pidlock);
622 		task_rele(tk);
623 		break;
624 
625 	case P_PROJID:
626 		pp = curproc;
627 		if (id == P_MYID)
628 			id = curprojid();
629 		if ((kpj = project_hold_by_id(id, pp->p_zone,
630 		    PROJECT_HOLD_FIND)) == NULL) {
631 			error = ESRCH;
632 			break;
633 		}
634 		mutex_enter(&pidlock);
635 		error = pset_bind_project(kpj, pset, &oldpset, projbuf,
636 		    zonebuf);
637 		mutex_exit(&pidlock);
638 		project_rele(kpj);
639 		break;
640 
641 	case P_ZONEID:
642 		if (id == P_MYID)
643 			id = getzoneid();
644 		if ((zptr = zone_find_by_id(id)) == NULL) {
645 			error = ESRCH;
646 			break;
647 		}
648 		mutex_enter(&pidlock);
649 		error = pset_bind_zone(zptr, pset, &oldpset, projbuf, zonebuf);
650 		mutex_exit(&pidlock);
651 		zone_rele(zptr);
652 		break;
653 
654 	case P_CTID:
655 		if (id == P_MYID)
656 			id = PRCTID(curproc);
657 		if ((ct = contract_type_ptr(process_type, id,
658 		    curproc->p_zone->zone_uniqid)) == NULL) {
659 			error = ESRCH;
660 			break;
661 		}
662 		mutex_enter(&pidlock);
663 		error = pset_bind_contract(ct->ct_data, pset, &oldpset, projbuf,
664 		    zonebuf);
665 		mutex_exit(&pidlock);
666 		contract_rele(ct);
667 		break;
668 
669 	case P_PSETID:
670 		if (id == P_MYID || pset != PS_NONE || !INGLOBALZONE(curproc)) {
671 			error = EINVAL;
672 			break;
673 		}
674 		error = pset_unbind(id, projbuf, zonebuf, idtype);
675 		break;
676 
677 	case P_ALL:
678 		if (id == P_MYID || pset != PS_NONE || !INGLOBALZONE(curproc)) {
679 			error = EINVAL;
680 			break;
681 		}
682 		error = pset_unbind(PS_NONE, projbuf, zonebuf, idtype);
683 		break;
684 
685 	default:
686 		error = EINVAL;
687 		break;
688 	}
689 
690 	fss_freebuf(projbuf, FSS_ALLOC_PROJ);
691 	fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
692 	mutex_exit(&cpu_lock);
693 	pool_unlock();
694 
695 	if (error != 0)
696 		return (set_errno(error));
697 	if (opset != NULL) {
698 		if (copyout(&oldpset, opset, sizeof (psetid_t)) != 0)
699 			return (set_errno(EFAULT));
700 	}
701 	return (0);
702 }
703 
704 /*
705  * Report load average statistics for the specified processor set.
706  */
707 static int
708 pset_getloadavg(psetid_t pset, int *buf, int nelem)
709 {
710 	int loadbuf[LOADAVG_NSTATS];
711 	int error = 0;
712 
713 	if (nelem < 0)
714 		return (set_errno(EINVAL));
715 
716 	/*
717 	 * We keep the same number of load average statistics for processor
718 	 * sets as we do for the system as a whole.
719 	 */
720 	if (nelem > LOADAVG_NSTATS)
721 		nelem = LOADAVG_NSTATS;
722 
723 	mutex_enter(&cpu_lock);
724 	error = cpupart_get_loadavg(pset, loadbuf, nelem);
725 	mutex_exit(&cpu_lock);
726 	if (!error && nelem && copyout(loadbuf, buf, nelem * sizeof (int)) != 0)
727 		error = EFAULT;
728 
729 	if (error)
730 		return (set_errno(error));
731 	else
732 		return (0);
733 }
734 
735 
736 /*
737  * Return list of active processor sets, up to a maximum indicated by
738  * numpsets.  The total number of processor sets is stored in the
739  * location pointed to by numpsets.
740  */
741 static int
742 pset_list(psetid_t *psetlist, uint_t *numpsets)
743 {
744 	uint_t user_npsets = 0;
745 	uint_t real_npsets;
746 	psetid_t *psets = NULL;
747 	int error = 0;
748 
749 	if (numpsets != NULL) {
750 		if (copyin(numpsets, &user_npsets, sizeof (uint_t)) != 0)
751 			return (set_errno(EFAULT));
752 	}
753 
754 	/*
755 	 * Get the list of all processor sets.  First we need to find
756 	 * out how many there are, so we can allocate a large enough
757 	 * buffer.
758 	 */
759 	mutex_enter(&cpu_lock);
760 	if (!INGLOBALZONE(curproc) && pool_pset_enabled()) {
761 		psetid_t psetid = zone_pset_get(curproc->p_zone);
762 
763 		if (psetid == PS_NONE) {
764 			real_npsets = 0;
765 		} else {
766 			real_npsets = 1;
767 			psets = kmem_alloc(real_npsets * sizeof (psetid_t),
768 			    KM_SLEEP);
769 			psets[0] = psetid;
770 		}
771 	} else {
772 		real_npsets = cpupart_list(0, NULL, CP_ALL);
773 		if (real_npsets) {
774 			psets = kmem_alloc(real_npsets * sizeof (psetid_t),
775 			    KM_SLEEP);
776 			(void) cpupart_list(psets, real_npsets, CP_ALL);
777 		}
778 	}
779 	mutex_exit(&cpu_lock);
780 
781 	if (user_npsets > real_npsets)
782 		user_npsets = real_npsets;
783 
784 	if (numpsets != NULL) {
785 		if (copyout(&real_npsets, numpsets, sizeof (uint_t)) != 0)
786 			error = EFAULT;
787 		else if (psetlist != NULL && user_npsets != 0) {
788 			if (copyout(psets, psetlist,
789 			    user_npsets * sizeof (psetid_t)) != 0)
790 				error = EFAULT;
791 		}
792 	}
793 
794 	if (real_npsets)
795 		kmem_free(psets, real_npsets * sizeof (psetid_t));
796 
797 	if (error)
798 		return (set_errno(error));
799 	else
800 		return (0);
801 }
802 
803 static int
804 pset_setattr(psetid_t pset, uint_t attr)
805 {
806 	int error;
807 
808 	if (secpolicy_pset(CRED()) != 0)
809 		return (set_errno(EPERM));
810 	pool_lock();
811 	if (pool_state == POOL_ENABLED) {
812 		pool_unlock();
813 		return (set_errno(ENOTSUP));
814 	}
815 	if (pset == PS_QUERY || PSET_BADATTR(attr)) {
816 		pool_unlock();
817 		return (set_errno(EINVAL));
818 	}
819 	if ((error = cpupart_setattr(pset, attr)) != 0) {
820 		pool_unlock();
821 		return (set_errno(error));
822 	}
823 	pool_unlock();
824 	return (0);
825 }
826 
827 static int
828 pset_getattr(psetid_t pset, uint_t *attrp)
829 {
830 	int error = 0;
831 	uint_t attr;
832 
833 	if (pset == PS_QUERY)
834 		return (set_errno(EINVAL));
835 	if ((error = cpupart_getattr(pset, &attr)) != 0)
836 		return (set_errno(error));
837 	if (copyout(&attr, attrp, sizeof (uint_t)) != 0)
838 		return (set_errno(EFAULT));
839 	return (0);
840 }
841 
842 static int
843 pset(int subcode, long arg1, long arg2, long arg3, long arg4)
844 {
845 	switch (subcode) {
846 	case PSET_CREATE:
847 		return (pset_create((psetid_t *)arg1));
848 	case PSET_DESTROY:
849 		return (pset_destroy((psetid_t)arg1));
850 	case PSET_ASSIGN:
851 		return (pset_assign((psetid_t)arg1,
852 		    (processorid_t)arg2, (psetid_t *)arg3, 0));
853 	case PSET_INFO:
854 		return (pset_info((psetid_t)arg1, (int *)arg2,
855 		    (uint_t *)arg3, (processorid_t *)arg4));
856 	case PSET_BIND:
857 		return (pset_bind((psetid_t)arg1, (idtype_t)arg2,
858 		    (id_t)arg3, (psetid_t *)arg4));
859 	case PSET_BIND_LWP:
860 		return (pset_bind_lwp((psetid_t)arg1, (id_t)arg2,
861 		    (pid_t)arg3, (psetid_t *)arg4));
862 	case PSET_GETLOADAVG:
863 		return (pset_getloadavg((psetid_t)arg1, (int *)arg2,
864 		    (int)arg3));
865 	case PSET_LIST:
866 		return (pset_list((psetid_t *)arg1, (uint_t *)arg2));
867 	case PSET_SETATTR:
868 		return (pset_setattr((psetid_t)arg1, (uint_t)arg2));
869 	case PSET_GETATTR:
870 		return (pset_getattr((psetid_t)arg1, (uint_t *)arg2));
871 	case PSET_ASSIGN_FORCED:
872 		return (pset_assign((psetid_t)arg1,
873 		    (processorid_t)arg2, (psetid_t *)arg3, 1));
874 	default:
875 		return (set_errno(EINVAL));
876 	}
877 }
878