1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 #include <sys/pool.h>
30 #include <sys/pool_impl.h>
31 #include <sys/pool_pset.h>
32 #include <sys/cpuvar.h>
33 #include <sys/cpupart.h>
34 #include <sys/mutex.h>
35 #include <sys/errno.h>
36 #include <sys/systm.h>
37 #include <sys/cmn_err.h>
38 #include <sys/fss.h>
39 #include <sys/exacct.h>
40 #include <sys/time.h>
41 #include <sys/policy.h>
42 #include <sys/class.h>
43 #include <sys/list.h>
44 #include <sys/cred.h>
45 #include <sys/zone.h>
46
47 /*
48 * Processor set plugin for pools.
49 *
50 * This file contains various routines used by the common pools layer to create,
51 * modify, and destroy processor sets. All processor sets created by this
52 * plug-in are stored in the pool_pset_list doubly-linked list, which is
53 * guaranteed to always have an entry for the default processor set,
54 * pool_pset_default.
55 *
56 * Interaction with zones:
57 *
58 * If pools are enabled, non-global zones only have visibility into the
59 * pset of the pool to which they are bound. This is accomplished by
60 * changing the set of processors and processor sets which are visible
61 * through both systemcall interfaces and system kstats.
62 *
63 * To avoid grabbing pool_lock() during cpu change operations, we cache
64 * the pset the zone is currently bound to, and can read this value
65 * while under cpu_lock. The special psetid_t token ZONE_PS_INVAL means
66 * that pools are disabled, and provides a mechanism for determining if the
67 * status of pools without grabbing pool_lock().
68 *
69 * To avoid grabbing any locks to determine the instantaneous value of
70 * the number of configured and online cpus in the zone, we also cache
71 * these values in a zone_t. If these values are zero, the pools
72 * facility must be disabled, in which case relevant systemcall
73 * interfaces will return the values for the system as a whole.
74 *
75 * The various kstat interfaces are dealt with as follows: if pools are
76 * disabled all cpu-related kstats should be exported to all zones.
77 * When pools are enabled we begin maintaining a list of "permitted
78 * zones" on a per-kstat basis. There are various hooks throughout the
79 * code to update this list when certain pools- or cpu-related events
80 * occur.
81 */
82
83 static list_t pool_pset_list; /* doubly-linked list of psets */
84 pool_pset_t *pool_pset_default; /* default pset */
85 hrtime_t pool_pset_mod; /* last modification time for psets */
86 hrtime_t pool_cpu_mod; /* last modification time for CPUs */
87
88 static pool_pset_t *
pool_lookup_pset_by_id(psetid_t psetid)89 pool_lookup_pset_by_id(psetid_t psetid)
90 {
91 pool_pset_t *pset = pool_pset_default;
92
93 ASSERT(pool_lock_held());
94
95 for (pset = list_head(&pool_pset_list); pset;
96 pset = list_next(&pool_pset_list, pset)) {
97 if (pset->pset_id == psetid)
98 return (pset);
99 }
100 return (NULL);
101 }
102
103 struct setup_arg {
104 psetid_t psetid;
105 cpu_t *cpu;
106 cpu_setup_t what;
107 };
108
109 /*
110 * Callback function used to apply a cpu configuration event to a zone.
111 */
112 static int
pool_pset_setup_cb(zone_t * zone,void * arg)113 pool_pset_setup_cb(zone_t *zone, void *arg)
114 {
115 struct setup_arg *sa = arg;
116
117 ASSERT(MUTEX_HELD(&cpu_lock));
118 ASSERT(INGLOBALZONE(curproc));
119 ASSERT(zone != NULL);
120
121 if (zone == global_zone)
122 return (0);
123 if (zone_pset_get(zone) != sa->psetid)
124 return (0); /* ignore */
125 switch (sa->what) {
126 case CPU_CONFIG:
127 cpu_visibility_configure(sa->cpu, zone);
128 break;
129 case CPU_UNCONFIG:
130 cpu_visibility_unconfigure(sa->cpu, zone);
131 break;
132 case CPU_ON:
133 cpu_visibility_online(sa->cpu, zone);
134 break;
135 case CPU_OFF:
136 cpu_visibility_offline(sa->cpu, zone);
137 break;
138 case CPU_CPUPART_IN:
139 cpu_visibility_add(sa->cpu, zone);
140 break;
141 case CPU_CPUPART_OUT:
142 cpu_visibility_remove(sa->cpu, zone);
143 break;
144 default:
145 cmn_err(CE_PANIC, "invalid cpu_setup_t value %d", sa->what);
146 }
147 return (0);
148 }
149
150 /*
151 * Callback function to be executed when a noteworthy cpu event takes
152 * place. Will ensure that the event is reflected by the zones which
153 * were affected by it.
154 */
155 /* ARGSUSED */
156 static int
pool_pset_cpu_setup(cpu_setup_t what,int id,void * arg)157 pool_pset_cpu_setup(cpu_setup_t what, int id, void *arg)
158 {
159 processorid_t cpuid = id;
160 struct setup_arg sarg;
161 int error;
162 cpu_t *c;
163
164 ASSERT(MUTEX_HELD(&cpu_lock));
165 ASSERT(INGLOBALZONE(curproc));
166
167 if (!pool_pset_enabled())
168 return (0);
169 if (what != CPU_CONFIG && what != CPU_UNCONFIG &&
170 what != CPU_ON && what != CPU_OFF &&
171 what != CPU_CPUPART_IN && what != CPU_CPUPART_OUT)
172 return (0);
173 c = cpu_get(cpuid);
174 ASSERT(c != NULL);
175 sarg.psetid = cpupart_query_cpu(c);
176 sarg.cpu = c;
177 sarg.what = what;
178
179 error = zone_walk(pool_pset_setup_cb, &sarg);
180 ASSERT(error == 0);
181 return (0);
182 }
183
184 /*
185 * Initialize processor set plugin. Called once at boot time.
186 */
187 void
pool_pset_init(void)188 pool_pset_init(void)
189 {
190 ASSERT(pool_pset_default == NULL);
191 pool_pset_default = kmem_zalloc(sizeof (pool_pset_t), KM_SLEEP);
192 pool_pset_default->pset_id = PS_NONE;
193 pool_pset_default->pset_npools = 1; /* for pool_default */
194 pool_default->pool_pset = pool_pset_default;
195 list_create(&pool_pset_list, sizeof (pool_pset_t),
196 offsetof(pool_pset_t, pset_link));
197 list_insert_head(&pool_pset_list, pool_pset_default);
198 mutex_enter(&cpu_lock);
199 register_cpu_setup_func(pool_pset_cpu_setup, NULL);
200 mutex_exit(&cpu_lock);
201 }
202
203 /*
204 * Dummy wrapper function that returns 0 to satisfy zone_walk().
205 */
206 static int
pool_pset_zone_pset_set(zone_t * zone,void * arg)207 pool_pset_zone_pset_set(zone_t *zone, void *arg)
208 {
209 psetid_t psetid = (psetid_t)(uintptr_t)arg;
210
211 ASSERT(MUTEX_HELD(&cpu_lock));
212 zone_pset_set(zone, psetid);
213 return (0);
214 }
215
216 /*
217 * Enable processor set plugin.
218 */
219 int
pool_pset_enable(void)220 pool_pset_enable(void)
221 {
222 int error;
223 nvlist_t *props;
224
225 ASSERT(pool_lock_held());
226 ASSERT(INGLOBALZONE(curproc));
227 /*
228 * Can't enable pools if there are existing cpu partitions.
229 */
230 mutex_enter(&cpu_lock);
231 if (cp_numparts > 1) {
232 mutex_exit(&cpu_lock);
233 return (EEXIST);
234 }
235
236 /*
237 * We want to switch things such that everything that was tagged with
238 * the special ALL_ZONES token now is explicitly visible to all zones:
239 * first add individual zones to the visibility list then remove the
240 * special "ALL_ZONES" token. There must only be the default pset
241 * (PS_NONE) active if pools are being enabled, so we only need to
242 * deal with it.
243 *
244 * We want to make pool_pset_enabled() start returning B_TRUE before
245 * we call any of the visibility update functions.
246 */
247 global_zone->zone_psetid = PS_NONE;
248 /*
249 * We need to explicitly handle the global zone since
250 * zone_pset_set() won't modify it.
251 */
252 pool_pset_visibility_add(PS_NONE, global_zone);
253 /*
254 * A NULL argument means the ALL_ZONES token.
255 */
256 pool_pset_visibility_remove(PS_NONE, NULL);
257 error = zone_walk(pool_pset_zone_pset_set, (void *)PS_NONE);
258 ASSERT(error == 0);
259
260 /*
261 * It is safe to drop cpu_lock here. We're still
262 * holding pool_lock so no new cpu partitions can
263 * be created while we're here.
264 */
265 mutex_exit(&cpu_lock);
266 (void) nvlist_alloc(&pool_pset_default->pset_props,
267 NV_UNIQUE_NAME, KM_SLEEP);
268 props = pool_pset_default->pset_props;
269 (void) nvlist_add_string(props, "pset.name", "pset_default");
270 (void) nvlist_add_string(props, "pset.comment", "");
271 (void) nvlist_add_int64(props, "pset.sys_id", PS_NONE);
272 (void) nvlist_add_string(props, "pset.units", "population");
273 (void) nvlist_add_byte(props, "pset.default", 1);
274 (void) nvlist_add_uint64(props, "pset.max", 65536);
275 (void) nvlist_add_uint64(props, "pset.min", 1);
276 pool_pset_mod = pool_cpu_mod = gethrtime();
277 return (0);
278 }
279
280 /*
281 * Disable processor set plugin.
282 */
283 int
pool_pset_disable(void)284 pool_pset_disable(void)
285 {
286 processorid_t cpuid;
287 cpu_t *cpu;
288 int error;
289
290 ASSERT(pool_lock_held());
291 ASSERT(INGLOBALZONE(curproc));
292
293 mutex_enter(&cpu_lock);
294 if (cp_numparts > 1) { /* make sure only default pset is left */
295 mutex_exit(&cpu_lock);
296 return (EBUSY);
297 }
298 /*
299 * Remove all non-system CPU and processor set properties
300 */
301 for (cpuid = 0; cpuid < NCPU; cpuid++) {
302 if ((cpu = cpu_get(cpuid)) == NULL)
303 continue;
304 if (cpu->cpu_props != NULL) {
305 (void) nvlist_free(cpu->cpu_props);
306 cpu->cpu_props = NULL;
307 }
308 }
309
310 /*
311 * We want to switch things such that everything is now visible
312 * to ALL_ZONES: first add the special "ALL_ZONES" token to the
313 * visibility list then remove individual zones. There must
314 * only be the default pset active if pools are being disabled,
315 * so we only need to deal with it.
316 */
317 error = zone_walk(pool_pset_zone_pset_set, (void *)ZONE_PS_INVAL);
318 ASSERT(error == 0);
319 pool_pset_visibility_add(PS_NONE, NULL);
320 pool_pset_visibility_remove(PS_NONE, global_zone);
321 /*
322 * pool_pset_enabled() will henceforth return B_FALSE.
323 */
324 global_zone->zone_psetid = ZONE_PS_INVAL;
325 mutex_exit(&cpu_lock);
326 if (pool_pset_default->pset_props != NULL) {
327 nvlist_free(pool_pset_default->pset_props);
328 pool_pset_default->pset_props = NULL;
329 }
330 return (0);
331 }
332
333 /*
334 * Create new processor set and give it a temporary name.
335 */
336 int
pool_pset_create(psetid_t * id)337 pool_pset_create(psetid_t *id)
338 {
339 char pset_name[40];
340 pool_pset_t *pset;
341 psetid_t psetid;
342 int err;
343
344 ASSERT(pool_lock_held());
345 if ((err = cpupart_create(&psetid)) != 0)
346 return (err);
347 pset = kmem_alloc(sizeof (pool_pset_t), KM_SLEEP);
348 pset->pset_id = *id = psetid;
349 pset->pset_npools = 0;
350 (void) nvlist_alloc(&pset->pset_props, NV_UNIQUE_NAME, KM_SLEEP);
351 (void) nvlist_add_int64(pset->pset_props, "pset.sys_id", psetid);
352 (void) nvlist_add_byte(pset->pset_props, "pset.default", 0);
353 pool_pset_mod = gethrtime();
354 (void) snprintf(pset_name, sizeof (pset_name), "pset_%lld",
355 pool_pset_mod);
356 (void) nvlist_add_string(pset->pset_props, "pset.name", pset_name);
357 list_insert_tail(&pool_pset_list, pset);
358 return (0);
359 }
360
361 /*
362 * Destroy existing processor set.
363 */
364 int
pool_pset_destroy(psetid_t psetid)365 pool_pset_destroy(psetid_t psetid)
366 {
367 pool_pset_t *pset;
368 int ret;
369
370 ASSERT(pool_lock_held());
371
372 if (psetid == PS_NONE)
373 return (EINVAL);
374 if ((pset = pool_lookup_pset_by_id(psetid)) == NULL)
375 return (ESRCH);
376 if (pset->pset_npools > 0) /* can't destroy associated psets */
377 return (EBUSY);
378 if ((ret = cpupart_destroy(pset->pset_id)) != 0)
379 return (ret);
380 (void) nvlist_free(pset->pset_props);
381 list_remove(&pool_pset_list, pset);
382 pool_pset_mod = gethrtime();
383 kmem_free(pset, sizeof (pool_pset_t));
384 return (0);
385 }
386
387 /*
388 * Change the visibility of a pset (and all contained cpus) in a zone.
389 * A NULL zone argument implies the special ALL_ZONES token.
390 */
391 static void
pool_pset_visibility_change(psetid_t psetid,zone_t * zone,boolean_t add)392 pool_pset_visibility_change(psetid_t psetid, zone_t *zone, boolean_t add)
393 {
394 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES;
395 cpupart_t *cp;
396 cpu_t *c;
397
398 ASSERT(MUTEX_HELD(&cpu_lock));
399 ASSERT(psetid != ZONE_PS_INVAL);
400
401 cp = cpupart_find(psetid);
402 ASSERT(cp != NULL);
403 if (cp->cp_kstat != NULL) {
404 if (add)
405 kstat_zone_add(cp->cp_kstat, zoneid);
406 else
407 kstat_zone_remove(cp->cp_kstat, zoneid);
408 }
409
410 c = cpu_list;
411 do {
412 ASSERT(c != NULL);
413 if (c->cpu_part == cp && !cpu_is_poweredoff(c)) {
414 if (add)
415 cpu_visibility_add(c, zone);
416 else
417 cpu_visibility_remove(c, zone);
418 }
419 } while ((c = c->cpu_next) != cpu_list);
420 }
421
422 /*
423 * Make the processor set visible to the zone. A NULL value for
424 * the zone means that the special ALL_ZONES token should be added to
425 * the visibility list.
426 */
427 void
pool_pset_visibility_add(psetid_t psetid,zone_t * zone)428 pool_pset_visibility_add(psetid_t psetid, zone_t *zone)
429 {
430 pool_pset_visibility_change(psetid, zone, B_TRUE);
431 }
432
433 /*
434 * Remove zone's visibility into the processor set. A NULL value for
435 * the zone means that the special ALL_ZONES token should be removed
436 * from the visibility list.
437 */
438 void
pool_pset_visibility_remove(psetid_t psetid,zone_t * zone)439 pool_pset_visibility_remove(psetid_t psetid, zone_t *zone)
440 {
441 pool_pset_visibility_change(psetid, zone, B_FALSE);
442 }
443
444 /*
445 * Quick way of seeing if pools are enabled (as far as processor sets are
446 * concerned) without holding pool_lock().
447 */
448 boolean_t
pool_pset_enabled(void)449 pool_pset_enabled(void)
450 {
451 ASSERT(MUTEX_HELD(&cpu_lock));
452
453 return (zone_pset_get(global_zone) != ZONE_PS_INVAL);
454 }
455
456 struct assoc_zone_arg {
457 poolid_t poolid;
458 psetid_t newpsetid;
459 };
460
461 /*
462 * Callback function to update a zone's processor set visibility when
463 * a pool is associated with a processor set.
464 */
465 static int
pool_pset_assoc_zone_cb(zone_t * zone,void * arg)466 pool_pset_assoc_zone_cb(zone_t *zone, void *arg)
467 {
468 struct assoc_zone_arg *aza = arg;
469 pool_t *pool;
470 zoneid_t zoneid = zone->zone_id;
471
472 ASSERT(pool_lock_held());
473 ASSERT(MUTEX_HELD(&cpu_lock));
474
475 if (zoneid == GLOBAL_ZONEID)
476 return (0);
477 pool = zone_pool_get(zone);
478 if (pool->pool_id == aza->poolid)
479 zone_pset_set(zone, aza->newpsetid);
480 return (0);
481 }
482
483 /*
484 * Associate pool with new processor set.
485 */
486 int
pool_pset_assoc(poolid_t poolid,psetid_t psetid)487 pool_pset_assoc(poolid_t poolid, psetid_t psetid)
488 {
489 pool_t *pool;
490 pool_pset_t *pset, *oldpset;
491 int err = 0;
492
493 ASSERT(pool_lock_held());
494
495 if ((pool = pool_lookup_pool_by_id(poolid)) == NULL ||
496 (pset = pool_lookup_pset_by_id(psetid)) == NULL) {
497 return (ESRCH);
498 }
499 if (pool->pool_pset->pset_id == psetid) {
500 /*
501 * Already associated.
502 */
503 return (0);
504 }
505
506 /*
507 * Hang the new pset off the pool, and rebind all of the pool's
508 * processes to it. If pool_do_bind fails, all processes will remain
509 * bound to the old set.
510 */
511 oldpset = pool->pool_pset;
512 pool->pool_pset = pset;
513 err = pool_do_bind(pool, P_POOLID, poolid, POOL_BIND_PSET);
514 if (err) {
515 pool->pool_pset = oldpset;
516 } else {
517 struct assoc_zone_arg azarg;
518
519 /*
520 * Update zones' visibility to reflect changes.
521 */
522 azarg.poolid = poolid;
523 azarg.newpsetid = pset->pset_id;
524 mutex_enter(&cpu_lock);
525 err = zone_walk(pool_pset_assoc_zone_cb, &azarg);
526 ASSERT(err == 0);
527 mutex_exit(&cpu_lock);
528
529 oldpset->pset_npools--;
530 pset->pset_npools++;
531 }
532 return (err);
533 }
534
535 /*
536 * Transfer specified CPUs between processor sets.
537 */
538 int
pool_pset_xtransfer(psetid_t src,psetid_t dst,size_t size,id_t * ids)539 pool_pset_xtransfer(psetid_t src, psetid_t dst, size_t size, id_t *ids)
540 {
541 struct cpu *cpu;
542 int ret = 0;
543 int id;
544
545 ASSERT(pool_lock_held());
546 ASSERT(INGLOBALZONE(curproc));
547
548 if (size == 0 || size > max_ncpus) /* quick sanity check */
549 return (EINVAL);
550
551 mutex_enter(&cpu_lock);
552 for (id = 0; id < size; id++) {
553 if ((cpu = cpu_get((processorid_t)ids[id])) == NULL ||
554 cpupart_query_cpu(cpu) != src) {
555 ret = EINVAL;
556 break;
557 }
558 if ((ret = cpupart_attach_cpu(dst, cpu, 1)) != 0)
559 break;
560 }
561 mutex_exit(&cpu_lock);
562 if (ret == 0)
563 pool_pset_mod = gethrtime();
564 return (ret);
565 }
566
567 /*
568 * Bind process to processor set. This should never fail because
569 * we should've done all preliminary checks before calling it.
570 */
571 void
pool_pset_bind(proc_t * p,psetid_t psetid,void * projbuf,void * zonebuf)572 pool_pset_bind(proc_t *p, psetid_t psetid, void *projbuf, void *zonebuf)
573 {
574 kthread_t *t;
575 int ret;
576
577 ASSERT(pool_lock_held());
578 ASSERT(MUTEX_HELD(&cpu_lock));
579 ASSERT(MUTEX_HELD(&pidlock));
580 ASSERT(MUTEX_HELD(&p->p_lock));
581
582 if ((t = p->p_tlist) == NULL)
583 return;
584 do {
585 ret = cpupart_bind_thread(t, psetid, 0, projbuf, zonebuf);
586 ASSERT(ret == 0);
587 t->t_bind_pset = psetid;
588 } while ((t = t->t_forw) != p->p_tlist);
589 }
590
591 /*
592 * See the comment above pool_do_bind() for the semantics of the pset_bind_*()
593 * functions. These must be kept in sync with cpupart_move_thread, and
594 * anything else that could fail a pool_pset_bind.
595 *
596 * Returns non-zero errno on failure and zero on success.
597 * Iff successful, cpu_lock is held on return.
598 */
599 int
pset_bind_start(proc_t ** procs,pool_t * pool)600 pset_bind_start(proc_t **procs, pool_t *pool)
601 {
602 cred_t *pcred;
603 proc_t *p, **pp;
604 kthread_t *t;
605 cpupart_t *newpp;
606 int ret;
607
608 extern int cpupart_movable_thread(kthread_id_t, cpupart_t *, int);
609
610 ASSERT(pool_lock_held());
611 ASSERT(INGLOBALZONE(curproc));
612
613 mutex_enter(&cpu_lock);
614 weakbinding_stop();
615
616 newpp = cpupart_find(pool->pool_pset->pset_id);
617 ASSERT(newpp != NULL);
618 if (newpp->cp_cpulist == NULL) {
619 weakbinding_start();
620 mutex_exit(&cpu_lock);
621 return (ENOTSUP);
622 }
623
624 pcred = crgetcred();
625
626 /*
627 * Check for the PRIV_PROC_PRIOCNTL privilege that is required
628 * to enter and exit scheduling classes. If other privileges
629 * are required by CL_ENTERCLASS/CL_CANEXIT types of routines
630 * in the future, this code will have to be updated.
631 */
632 if (secpolicy_setpriority(pcred) != 0) {
633 weakbinding_start();
634 mutex_exit(&cpu_lock);
635 crfree(pcred);
636 return (EPERM);
637 }
638
639 for (pp = procs; (p = *pp) != NULL; pp++) {
640 mutex_enter(&p->p_lock);
641 if ((t = p->p_tlist) == NULL) {
642 mutex_exit(&p->p_lock);
643 continue;
644 }
645 /*
646 * Check our basic permissions to control this process.
647 */
648 if (!prochasprocperm(p, curproc, pcred)) {
649 mutex_exit(&p->p_lock);
650 weakbinding_start();
651 mutex_exit(&cpu_lock);
652 crfree(pcred);
653 return (EPERM);
654 }
655 do {
656 /*
657 * Check that all threads can be moved to
658 * a new processor set.
659 */
660 thread_lock(t);
661 ret = cpupart_movable_thread(t, newpp, 0);
662 thread_unlock(t);
663 if (ret != 0) {
664 mutex_exit(&p->p_lock);
665 weakbinding_start();
666 mutex_exit(&cpu_lock);
667 crfree(pcred);
668 return (ret);
669 }
670 } while ((t = t->t_forw) != p->p_tlist);
671 mutex_exit(&p->p_lock);
672 }
673 crfree(pcred);
674 return (0); /* with cpu_lock held and weakbinding stopped */
675 }
676
677 /*ARGSUSED*/
678 void
pset_bind_abort(proc_t ** procs,pool_t * pool)679 pset_bind_abort(proc_t **procs, pool_t *pool)
680 {
681 mutex_exit(&cpu_lock);
682 }
683
684 void
pset_bind_finish(void)685 pset_bind_finish(void)
686 {
687 weakbinding_start();
688 mutex_exit(&cpu_lock);
689 }
690
691 static pool_property_t pool_pset_props[] = {
692 { "pset.name", DATA_TYPE_STRING, PP_RDWR },
693 { "pset.comment", DATA_TYPE_STRING, PP_RDWR },
694 { "pset.sys_id", DATA_TYPE_UINT64, PP_READ },
695 { "pset.units", DATA_TYPE_STRING, PP_RDWR },
696 { "pset.default", DATA_TYPE_BYTE, PP_READ },
697 { "pset.min", DATA_TYPE_UINT64, PP_RDWR },
698 { "pset.max", DATA_TYPE_UINT64, PP_RDWR },
699 { "pset.size", DATA_TYPE_UINT64, PP_READ },
700 { "pset.load", DATA_TYPE_UINT64, PP_READ },
701 { "pset.poold.objectives", DATA_TYPE_STRING,
702 PP_RDWR | PP_OPTIONAL },
703 { NULL, 0, 0 }
704 };
705
706 static pool_property_t pool_cpu_props[] = {
707 { "cpu.sys_id", DATA_TYPE_UINT64, PP_READ },
708 { "cpu.comment", DATA_TYPE_STRING, PP_RDWR },
709 { "cpu.status", DATA_TYPE_STRING, PP_RDWR },
710 { "cpu.pinned", DATA_TYPE_BYTE,
711 PP_RDWR | PP_OPTIONAL },
712 { NULL, 0, 0 }
713 };
714
715 /*
716 * Put property on the specified processor set.
717 */
718 int
pool_pset_propput(psetid_t psetid,nvpair_t * pair)719 pool_pset_propput(psetid_t psetid, nvpair_t *pair)
720 {
721 pool_pset_t *pset;
722 int ret;
723
724 ASSERT(pool_lock_held());
725
726 if ((pset = pool_lookup_pset_by_id(psetid)) == NULL)
727 return (ESRCH);
728 ret = pool_propput_common(pset->pset_props, pair, pool_pset_props);
729 if (ret == 0)
730 pool_pset_mod = gethrtime();
731 return (ret);
732 }
733
734 /*
735 * Remove existing processor set property.
736 */
737 int
pool_pset_proprm(psetid_t psetid,char * name)738 pool_pset_proprm(psetid_t psetid, char *name)
739 {
740 pool_pset_t *pset;
741 int ret;
742
743 ASSERT(pool_lock_held());
744
745 if ((pset = pool_lookup_pset_by_id(psetid)) == NULL)
746 return (EINVAL);
747 ret = pool_proprm_common(pset->pset_props, name, pool_pset_props);
748 if (ret == 0)
749 pool_pset_mod = gethrtime();
750 return (ret);
751 }
752
753 /*
754 * Put new CPU property.
755 * Handle special case of "cpu.status".
756 */
757 int
pool_cpu_propput(processorid_t cpuid,nvpair_t * pair)758 pool_cpu_propput(processorid_t cpuid, nvpair_t *pair)
759 {
760 int ret = 0;
761 cpu_t *cpu;
762
763 ASSERT(pool_lock_held());
764 ASSERT(INGLOBALZONE(curproc));
765
766 if (nvpair_type(pair) == DATA_TYPE_STRING &&
767 strcmp(nvpair_name(pair), "cpu.status") == 0) {
768 char *val;
769 int status;
770 int old_status;
771 (void) nvpair_value_string(pair, &val);
772 if (strcmp(val, PS_OFFLINE) == 0)
773 status = P_OFFLINE;
774 else if (strcmp(val, PS_ONLINE) == 0)
775 status = P_ONLINE;
776 else if (strcmp(val, PS_NOINTR) == 0)
777 status = P_NOINTR;
778 else if (strcmp(val, PS_FAULTED) == 0)
779 status = P_FAULTED;
780 else if (strcmp(val, PS_SPARE) == 0)
781 status = P_SPARE;
782 else
783 return (EINVAL);
784 ret = p_online_internal(cpuid, status, &old_status);
785 } else {
786 mutex_enter(&cpu_lock);
787 if ((cpu = cpu_get(cpuid)) == NULL)
788 ret = EINVAL;
789 if (cpu->cpu_props == NULL) {
790 (void) nvlist_alloc(&cpu->cpu_props,
791 NV_UNIQUE_NAME, KM_SLEEP);
792 (void) nvlist_add_string(cpu->cpu_props,
793 "cpu.comment", "");
794 }
795 ret = pool_propput_common(cpu->cpu_props, pair, pool_cpu_props);
796 if (ret == 0)
797 pool_cpu_mod = gethrtime();
798 mutex_exit(&cpu_lock);
799 }
800 return (ret);
801 }
802
803 /*
804 * Remove existing CPU property.
805 */
806 int
pool_cpu_proprm(processorid_t cpuid,char * name)807 pool_cpu_proprm(processorid_t cpuid, char *name)
808 {
809 int ret;
810 cpu_t *cpu;
811
812 ASSERT(pool_lock_held());
813 ASSERT(INGLOBALZONE(curproc));
814
815 mutex_enter(&cpu_lock);
816 if ((cpu = cpu_get(cpuid)) == NULL || cpu_is_poweredoff(cpu)) {
817 ret = EINVAL;
818 } else {
819 if (cpu->cpu_props == NULL)
820 ret = EINVAL;
821 else
822 ret = pool_proprm_common(cpu->cpu_props, name,
823 pool_cpu_props);
824 }
825 if (ret == 0)
826 pool_cpu_mod = gethrtime();
827 mutex_exit(&cpu_lock);
828 return (ret);
829 }
830
831 /*
832 * This macro returns load average multiplied by 1000 w/o losing precision
833 */
834 #define PSET_LOAD(f) (((f >> 16) * 1000) + (((f & 0xffff) * 1000) / 0xffff))
835
836 /*
837 * Take a snapshot of the current state of processor sets and CPUs,
838 * pack it in the exacct format, and attach it to specified exacct record.
839 */
840 int
pool_pset_pack(ea_object_t * eo_system)841 pool_pset_pack(ea_object_t *eo_system)
842 {
843 ea_object_t *eo_pset, *eo_cpu;
844 cpupart_t *cpupart;
845 psetid_t mypsetid;
846 pool_pset_t *pset;
847 nvlist_t *nvl;
848 size_t bufsz;
849 cpu_t *cpu;
850 char *buf;
851 int ncpu;
852
853 ASSERT(pool_lock_held());
854
855 mutex_enter(&cpu_lock);
856 mypsetid = zone_pset_get(curproc->p_zone);
857 for (pset = list_head(&pool_pset_list); pset;
858 pset = list_next(&pool_pset_list, pset)) {
859 psetid_t psetid = pset->pset_id;
860
861 if (!INGLOBALZONE(curproc) && mypsetid != psetid)
862 continue;
863 cpupart = cpupart_find(psetid);
864 ASSERT(cpupart != NULL);
865 eo_pset = ea_alloc_group(EXT_GROUP |
866 EXC_LOCAL | EXD_GROUP_PSET);
867 (void) ea_attach_item(eo_pset, &psetid, sizeof (id_t),
868 EXC_LOCAL | EXD_PSET_PSETID | EXT_UINT32);
869 /*
870 * Pack info for all CPUs in this processor set.
871 */
872 ncpu = 0;
873 cpu = cpu_list;
874 do {
875 if (cpu->cpu_part != cpupart) /* not our pset */
876 continue;
877 ncpu++;
878 eo_cpu = ea_alloc_group(EXT_GROUP
879 | EXC_LOCAL | EXD_GROUP_CPU);
880 (void) ea_attach_item(eo_cpu, &cpu->cpu_id,
881 sizeof (processorid_t),
882 EXC_LOCAL | EXD_CPU_CPUID | EXT_UINT32);
883 if (cpu->cpu_props == NULL) {
884 (void) nvlist_alloc(&cpu->cpu_props,
885 NV_UNIQUE_NAME, KM_SLEEP);
886 (void) nvlist_add_string(cpu->cpu_props,
887 "cpu.comment", "");
888 }
889 (void) nvlist_dup(cpu->cpu_props, &nvl, KM_SLEEP);
890 (void) nvlist_add_int64(nvl, "cpu.sys_id", cpu->cpu_id);
891 (void) nvlist_add_string(nvl, "cpu.status",
892 (char *)cpu_get_state_str(cpu));
893 buf = NULL;
894 bufsz = 0;
895 (void) nvlist_pack(nvl, &buf, &bufsz,
896 NV_ENCODE_NATIVE, 0);
897 (void) ea_attach_item(eo_cpu, buf, bufsz,
898 EXC_LOCAL | EXD_CPU_PROP | EXT_RAW);
899 (void) nvlist_free(nvl);
900 kmem_free(buf, bufsz);
901 (void) ea_attach_to_group(eo_pset, eo_cpu);
902 } while ((cpu = cpu->cpu_next) != cpu_list);
903
904 (void) nvlist_dup(pset->pset_props, &nvl, KM_SLEEP);
905 (void) nvlist_add_uint64(nvl, "pset.size", ncpu);
906 (void) nvlist_add_uint64(nvl, "pset.load",
907 (uint64_t)PSET_LOAD(cpupart->cp_hp_avenrun[0]));
908 buf = NULL;
909 bufsz = 0;
910 (void) nvlist_pack(nvl, &buf, &bufsz, NV_ENCODE_NATIVE, 0);
911 (void) ea_attach_item(eo_pset, buf, bufsz,
912 EXC_LOCAL | EXD_PSET_PROP | EXT_RAW);
913 (void) nvlist_free(nvl);
914 kmem_free(buf, bufsz);
915
916 (void) ea_attach_to_group(eo_system, eo_pset);
917 }
918 mutex_exit(&cpu_lock);
919 return (0);
920 }
921
922 /*
923 * Get dynamic property for processor sets.
924 * The only dynamic property currently implemented is "pset.load".
925 */
926 int
pool_pset_propget(psetid_t psetid,char * name,nvlist_t * nvl)927 pool_pset_propget(psetid_t psetid, char *name, nvlist_t *nvl)
928 {
929 cpupart_t *cpupart;
930 pool_pset_t *pset;
931 int ret = ESRCH;
932
933 ASSERT(pool_lock_held());
934
935 mutex_enter(&cpu_lock);
936 pset = pool_lookup_pset_by_id(psetid);
937 cpupart = cpupart_find(psetid);
938 if (cpupart == NULL || pset == NULL) {
939 mutex_exit(&cpu_lock);
940 return (EINVAL);
941 }
942 if (strcmp(name, "pset.load") == 0)
943 ret = nvlist_add_uint64(nvl, "pset.load",
944 (uint64_t)PSET_LOAD(cpupart->cp_hp_avenrun[0]));
945 else
946 ret = EINVAL;
947 mutex_exit(&cpu_lock);
948 return (ret);
949 }
950
951 /*
952 * Get dynamic property for CPUs.
953 * The only dynamic property currently implemented is "cpu.status".
954 */
955 int
pool_cpu_propget(processorid_t cpuid,char * name,nvlist_t * nvl)956 pool_cpu_propget(processorid_t cpuid, char *name, nvlist_t *nvl)
957 {
958 int ret = ESRCH;
959 cpu_t *cpu;
960
961 ASSERT(pool_lock_held());
962
963 mutex_enter(&cpu_lock);
964 if ((cpu = cpu_get(cpuid)) == NULL) {
965 mutex_exit(&cpu_lock);
966 return (ESRCH);
967 }
968 if (strcmp(name, "cpu.status") == 0) {
969 ret = nvlist_add_string(nvl, "cpu.status",
970 (char *)cpu_get_state_str(cpu));
971 } else {
972 ret = EINVAL;
973 }
974 mutex_exit(&cpu_lock);
975 return (ret);
976 }
977