1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Copyright 2019 Joyent, Inc.
29 */
30
31 #include <sys/pool.h>
32 #include <sys/pool_impl.h>
33 #include <sys/pool_pset.h>
34 #include <sys/cpuvar.h>
35 #include <sys/cpupart.h>
36 #include <sys/mutex.h>
37 #include <sys/errno.h>
38 #include <sys/systm.h>
39 #include <sys/cmn_err.h>
40 #include <sys/fss.h>
41 #include <sys/exacct.h>
42 #include <sys/time.h>
43 #include <sys/policy.h>
44 #include <sys/class.h>
45 #include <sys/list.h>
46 #include <sys/cred.h>
47 #include <sys/zone.h>
48
49 /*
50 * Processor set plugin for pools.
51 *
52 * This file contains various routines used by the common pools layer to create,
53 * modify, and destroy processor sets. All processor sets created by this
54 * plug-in are stored in the pool_pset_list doubly-linked list, which is
55 * guaranteed to always have an entry for the default processor set,
56 * pool_pset_default.
57 *
58 * Interaction with zones:
59 *
60 * If pools are enabled, non-global zones only have visibility into the
61 * pset of the pool to which they are bound. This is accomplished by
62 * changing the set of processors and processor sets which are visible
63 * through both systemcall interfaces and system kstats.
64 *
65 * To avoid grabbing pool_lock() during cpu change operations, we cache
66 * the pset the zone is currently bound to, and can read this value
67 * while under cpu_lock. The special psetid_t token ZONE_PS_INVAL means
68 * that pools are disabled, and provides a mechanism for determining if the
69 * status of pools without grabbing pool_lock().
70 *
71 * To avoid grabbing any locks to determine the instantaneous value of
72 * the number of configured and online cpus in the zone, we also cache
73 * these values in a zone_t. If these values are zero, the pools
74 * facility must be disabled, in which case relevant systemcall
75 * interfaces will return the values for the system as a whole.
76 *
77 * The various kstat interfaces are dealt with as follows: if pools are
78 * disabled all cpu-related kstats should be exported to all zones.
79 * When pools are enabled we begin maintaining a list of "permitted
80 * zones" on a per-kstat basis. There are various hooks throughout the
81 * code to update this list when certain pools- or cpu-related events
82 * occur.
83 */
84
85 static list_t pool_pset_list; /* doubly-linked list of psets */
86 pool_pset_t *pool_pset_default; /* default pset */
87 hrtime_t pool_pset_mod; /* last modification time for psets */
88 hrtime_t pool_cpu_mod; /* last modification time for CPUs */
89
90 static pool_pset_t *
pool_lookup_pset_by_id(psetid_t psetid)91 pool_lookup_pset_by_id(psetid_t psetid)
92 {
93 pool_pset_t *pset = pool_pset_default;
94
95 ASSERT(pool_lock_held());
96
97 for (pset = list_head(&pool_pset_list); pset;
98 pset = list_next(&pool_pset_list, pset)) {
99 if (pset->pset_id == psetid)
100 return (pset);
101 }
102 return (NULL);
103 }
104
105 struct setup_arg {
106 psetid_t psetid;
107 cpu_t *cpu;
108 cpu_setup_t what;
109 };
110
111 /*
112 * Callback function used to apply a cpu configuration event to a zone.
113 */
114 static int
pool_pset_setup_cb(zone_t * zone,void * arg)115 pool_pset_setup_cb(zone_t *zone, void *arg)
116 {
117 struct setup_arg *sa = arg;
118
119 ASSERT(MUTEX_HELD(&cpu_lock));
120 ASSERT(INGLOBALZONE(curproc));
121 ASSERT(zone != NULL);
122
123 if (zone == global_zone)
124 return (0);
125 if (zone_pset_get(zone) != sa->psetid)
126 return (0); /* ignore */
127 switch (sa->what) {
128 case CPU_CONFIG:
129 cpu_visibility_configure(sa->cpu, zone);
130 break;
131 case CPU_UNCONFIG:
132 cpu_visibility_unconfigure(sa->cpu, zone);
133 break;
134 case CPU_ON:
135 cpu_visibility_online(sa->cpu, zone);
136 break;
137 case CPU_OFF:
138 cpu_visibility_offline(sa->cpu, zone);
139 break;
140 case CPU_CPUPART_IN:
141 cpu_visibility_add(sa->cpu, zone);
142 break;
143 case CPU_CPUPART_OUT:
144 cpu_visibility_remove(sa->cpu, zone);
145 break;
146 default:
147 cmn_err(CE_PANIC, "invalid cpu_setup_t value %d", sa->what);
148 }
149 return (0);
150 }
151
152 /*
153 * Callback function to be executed when a noteworthy cpu event takes
154 * place. Will ensure that the event is reflected by the zones which
155 * were affected by it.
156 */
157 /* ARGSUSED */
158 static int
pool_pset_cpu_setup(cpu_setup_t what,int id,void * arg)159 pool_pset_cpu_setup(cpu_setup_t what, int id, void *arg)
160 {
161 processorid_t cpuid = id;
162 struct setup_arg sarg;
163 int error;
164 cpu_t *c;
165
166 ASSERT(MUTEX_HELD(&cpu_lock));
167 ASSERT(INGLOBALZONE(curproc));
168
169 if (!pool_pset_enabled())
170 return (0);
171 if (what != CPU_CONFIG && what != CPU_UNCONFIG &&
172 what != CPU_ON && what != CPU_OFF &&
173 what != CPU_CPUPART_IN && what != CPU_CPUPART_OUT)
174 return (0);
175 c = cpu_get(cpuid);
176 ASSERT(c != NULL);
177 sarg.psetid = cpupart_query_cpu(c);
178 sarg.cpu = c;
179 sarg.what = what;
180
181 error = zone_walk(pool_pset_setup_cb, &sarg);
182 ASSERT(error == 0);
183 return (0);
184 }
185
186 /*
187 * Initialize processor set plugin. Called once at boot time.
188 */
189 void
pool_pset_init(void)190 pool_pset_init(void)
191 {
192 ASSERT(pool_pset_default == NULL);
193 pool_pset_default = kmem_zalloc(sizeof (pool_pset_t), KM_SLEEP);
194 pool_pset_default->pset_id = PS_NONE;
195 pool_pset_default->pset_npools = 1; /* for pool_default */
196 pool_default->pool_pset = pool_pset_default;
197 list_create(&pool_pset_list, sizeof (pool_pset_t),
198 offsetof(pool_pset_t, pset_link));
199 list_insert_head(&pool_pset_list, pool_pset_default);
200 mutex_enter(&cpu_lock);
201 register_cpu_setup_func(pool_pset_cpu_setup, NULL);
202 mutex_exit(&cpu_lock);
203 }
204
205 /*
206 * Dummy wrapper function that returns 0 to satisfy zone_walk().
207 */
208 static int
pool_pset_zone_pset_set(zone_t * zone,void * arg)209 pool_pset_zone_pset_set(zone_t *zone, void *arg)
210 {
211 psetid_t psetid = (psetid_t)(uintptr_t)arg;
212
213 ASSERT(MUTEX_HELD(&cpu_lock));
214 zone_pset_set(zone, psetid);
215 return (0);
216 }
217
218 /*
219 * Enable processor set plugin.
220 */
221 int
pool_pset_enable(void)222 pool_pset_enable(void)
223 {
224 int error;
225 nvlist_t *props;
226
227 ASSERT(pool_lock_held());
228 ASSERT(INGLOBALZONE(curproc));
229 /*
230 * Can't enable pools if there are existing cpu partitions.
231 */
232 mutex_enter(&cpu_lock);
233 if (cp_numparts > 1) {
234 mutex_exit(&cpu_lock);
235 return (EEXIST);
236 }
237
238 /*
239 * We want to switch things such that everything that was tagged with
240 * the special ALL_ZONES token now is explicitly visible to all zones:
241 * first add individual zones to the visibility list then remove the
242 * special "ALL_ZONES" token. There must only be the default pset
243 * (PS_NONE) active if pools are being enabled, so we only need to
244 * deal with it.
245 *
246 * We want to make pool_pset_enabled() start returning B_TRUE before
247 * we call any of the visibility update functions.
248 */
249 global_zone->zone_psetid = PS_NONE;
250 /*
251 * We need to explicitly handle the global zone since
252 * zone_pset_set() won't modify it.
253 */
254 pool_pset_visibility_add(PS_NONE, global_zone);
255 /*
256 * A NULL argument means the ALL_ZONES token.
257 */
258 pool_pset_visibility_remove(PS_NONE, NULL);
259 error = zone_walk(pool_pset_zone_pset_set, (void *)PS_NONE);
260 ASSERT(error == 0);
261
262 /*
263 * It is safe to drop cpu_lock here. We're still
264 * holding pool_lock so no new cpu partitions can
265 * be created while we're here.
266 */
267 mutex_exit(&cpu_lock);
268 (void) nvlist_alloc(&pool_pset_default->pset_props,
269 NV_UNIQUE_NAME, KM_SLEEP);
270 props = pool_pset_default->pset_props;
271 (void) nvlist_add_string(props, "pset.name", "pset_default");
272 (void) nvlist_add_string(props, "pset.comment", "");
273 (void) nvlist_add_int64(props, "pset.sys_id", PS_NONE);
274 (void) nvlist_add_string(props, "pset.units", "population");
275 (void) nvlist_add_byte(props, "pset.default", 1);
276 (void) nvlist_add_uint64(props, "pset.max", 65536);
277 (void) nvlist_add_uint64(props, "pset.min", 1);
278 pool_pset_mod = pool_cpu_mod = gethrtime();
279 return (0);
280 }
281
282 /*
283 * Disable processor set plugin.
284 */
285 int
pool_pset_disable(void)286 pool_pset_disable(void)
287 {
288 processorid_t cpuid;
289 cpu_t *cpu;
290 int error;
291
292 ASSERT(pool_lock_held());
293 ASSERT(INGLOBALZONE(curproc));
294
295 mutex_enter(&cpu_lock);
296 if (cp_numparts > 1) { /* make sure only default pset is left */
297 mutex_exit(&cpu_lock);
298 return (EBUSY);
299 }
300 /*
301 * Remove all non-system CPU and processor set properties
302 */
303 for (cpuid = 0; cpuid < NCPU; cpuid++) {
304 if ((cpu = cpu_get(cpuid)) == NULL)
305 continue;
306 if (cpu->cpu_props != NULL) {
307 (void) nvlist_free(cpu->cpu_props);
308 cpu->cpu_props = NULL;
309 }
310 }
311
312 /*
313 * We want to switch things such that everything is now visible
314 * to ALL_ZONES: first add the special "ALL_ZONES" token to the
315 * visibility list then remove individual zones. There must
316 * only be the default pset active if pools are being disabled,
317 * so we only need to deal with it.
318 */
319 error = zone_walk(pool_pset_zone_pset_set, (void *)ZONE_PS_INVAL);
320 ASSERT(error == 0);
321 pool_pset_visibility_add(PS_NONE, NULL);
322 pool_pset_visibility_remove(PS_NONE, global_zone);
323 /*
324 * pool_pset_enabled() will henceforth return B_FALSE.
325 */
326 global_zone->zone_psetid = ZONE_PS_INVAL;
327 mutex_exit(&cpu_lock);
328 if (pool_pset_default->pset_props != NULL) {
329 nvlist_free(pool_pset_default->pset_props);
330 pool_pset_default->pset_props = NULL;
331 }
332 return (0);
333 }
334
335 /*
336 * Create new processor set and give it a temporary name.
337 */
338 int
pool_pset_create(psetid_t * id)339 pool_pset_create(psetid_t *id)
340 {
341 char pset_name[40];
342 pool_pset_t *pset;
343 psetid_t psetid;
344 int err;
345
346 ASSERT(pool_lock_held());
347 if ((err = cpupart_create(&psetid)) != 0)
348 return (err);
349 pset = kmem_alloc(sizeof (pool_pset_t), KM_SLEEP);
350 pset->pset_id = *id = psetid;
351 pset->pset_npools = 0;
352 (void) nvlist_alloc(&pset->pset_props, NV_UNIQUE_NAME, KM_SLEEP);
353 (void) nvlist_add_int64(pset->pset_props, "pset.sys_id", psetid);
354 (void) nvlist_add_byte(pset->pset_props, "pset.default", 0);
355 pool_pset_mod = gethrtime();
356 (void) snprintf(pset_name, sizeof (pset_name), "pset_%lld",
357 pool_pset_mod);
358 (void) nvlist_add_string(pset->pset_props, "pset.name", pset_name);
359 list_insert_tail(&pool_pset_list, pset);
360 return (0);
361 }
362
363 /*
364 * Destroy existing processor set.
365 */
366 int
pool_pset_destroy(psetid_t psetid)367 pool_pset_destroy(psetid_t psetid)
368 {
369 pool_pset_t *pset;
370 int ret;
371
372 ASSERT(pool_lock_held());
373
374 if (psetid == PS_NONE)
375 return (EINVAL);
376 if ((pset = pool_lookup_pset_by_id(psetid)) == NULL)
377 return (ESRCH);
378 if (pset->pset_npools > 0) /* can't destroy associated psets */
379 return (EBUSY);
380 if ((ret = cpupart_destroy(pset->pset_id)) != 0)
381 return (ret);
382 (void) nvlist_free(pset->pset_props);
383 list_remove(&pool_pset_list, pset);
384 pool_pset_mod = gethrtime();
385 kmem_free(pset, sizeof (pool_pset_t));
386 return (0);
387 }
388
389 /*
390 * Change the visibility of a pset (and all contained cpus) in a zone.
391 * A NULL zone argument implies the special ALL_ZONES token.
392 */
393 static void
pool_pset_visibility_change(psetid_t psetid,zone_t * zone,boolean_t add)394 pool_pset_visibility_change(psetid_t psetid, zone_t *zone, boolean_t add)
395 {
396 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES;
397 cpupart_t *cp;
398 cpu_t *c;
399
400 ASSERT(MUTEX_HELD(&cpu_lock));
401 ASSERT(psetid != ZONE_PS_INVAL);
402
403 cp = cpupart_find(psetid);
404 ASSERT(cp != NULL);
405 if (cp->cp_kstat != NULL) {
406 if (add)
407 kstat_zone_add(cp->cp_kstat, zoneid);
408 else
409 kstat_zone_remove(cp->cp_kstat, zoneid);
410 }
411
412 c = cpu_list;
413 do {
414 ASSERT(c != NULL);
415 if (c->cpu_part == cp && !cpu_is_poweredoff(c)) {
416 if (add)
417 cpu_visibility_add(c, zone);
418 else
419 cpu_visibility_remove(c, zone);
420 }
421 } while ((c = c->cpu_next) != cpu_list);
422 }
423
424 /*
425 * Make the processor set visible to the zone. A NULL value for
426 * the zone means that the special ALL_ZONES token should be added to
427 * the visibility list.
428 */
429 void
pool_pset_visibility_add(psetid_t psetid,zone_t * zone)430 pool_pset_visibility_add(psetid_t psetid, zone_t *zone)
431 {
432 pool_pset_visibility_change(psetid, zone, B_TRUE);
433 }
434
435 /*
436 * Remove zone's visibility into the processor set. A NULL value for
437 * the zone means that the special ALL_ZONES token should be removed
438 * from the visibility list.
439 */
440 void
pool_pset_visibility_remove(psetid_t psetid,zone_t * zone)441 pool_pset_visibility_remove(psetid_t psetid, zone_t *zone)
442 {
443 pool_pset_visibility_change(psetid, zone, B_FALSE);
444 }
445
446 /*
447 * Quick way of seeing if pools are enabled (as far as processor sets are
448 * concerned) without holding pool_lock().
449 */
450 boolean_t
pool_pset_enabled(void)451 pool_pset_enabled(void)
452 {
453 ASSERT(MUTEX_HELD(&cpu_lock));
454
455 return (zone_pset_get(global_zone) != ZONE_PS_INVAL);
456 }
457
458 struct assoc_zone_arg {
459 poolid_t poolid;
460 psetid_t newpsetid;
461 };
462
463 /*
464 * Callback function to update a zone's processor set visibility when
465 * a pool is associated with a processor set.
466 */
467 static int
pool_pset_assoc_zone_cb(zone_t * zone,void * arg)468 pool_pset_assoc_zone_cb(zone_t *zone, void *arg)
469 {
470 struct assoc_zone_arg *aza = arg;
471 pool_t *pool;
472 zoneid_t zoneid = zone->zone_id;
473
474 ASSERT(pool_lock_held());
475 ASSERT(MUTEX_HELD(&cpu_lock));
476
477 if (zoneid == GLOBAL_ZONEID)
478 return (0);
479 pool = zone_pool_get(zone);
480 if (pool->pool_id == aza->poolid)
481 zone_pset_set(zone, aza->newpsetid);
482 return (0);
483 }
484
485 /*
486 * Associate pool with new processor set.
487 */
488 int
pool_pset_assoc(poolid_t poolid,psetid_t psetid)489 pool_pset_assoc(poolid_t poolid, psetid_t psetid)
490 {
491 pool_t *pool;
492 pool_pset_t *pset, *oldpset;
493 int err = 0;
494
495 ASSERT(pool_lock_held());
496
497 if ((pool = pool_lookup_pool_by_id(poolid)) == NULL ||
498 (pset = pool_lookup_pset_by_id(psetid)) == NULL) {
499 return (ESRCH);
500 }
501 if (pool->pool_pset->pset_id == psetid) {
502 /*
503 * Already associated.
504 */
505 return (0);
506 }
507
508 /*
509 * Hang the new pset off the pool, and rebind all of the pool's
510 * processes to it. If pool_do_bind fails, all processes will remain
511 * bound to the old set.
512 */
513 oldpset = pool->pool_pset;
514 pool->pool_pset = pset;
515 err = pool_do_bind(pool, P_POOLID, poolid, POOL_BIND_PSET);
516 if (err) {
517 pool->pool_pset = oldpset;
518 } else {
519 struct assoc_zone_arg azarg;
520
521 /*
522 * Update zones' visibility to reflect changes.
523 */
524 azarg.poolid = poolid;
525 azarg.newpsetid = pset->pset_id;
526 mutex_enter(&cpu_lock);
527 err = zone_walk(pool_pset_assoc_zone_cb, &azarg);
528 ASSERT(err == 0);
529 mutex_exit(&cpu_lock);
530
531 oldpset->pset_npools--;
532 pset->pset_npools++;
533 }
534 return (err);
535 }
536
537 /*
538 * Transfer specified CPUs between processor sets.
539 */
540 int
pool_pset_xtransfer(psetid_t src,psetid_t dst,size_t size,id_t * ids)541 pool_pset_xtransfer(psetid_t src, psetid_t dst, size_t size, id_t *ids)
542 {
543 struct cpu *cpu;
544 int ret = 0;
545 int id;
546
547 ASSERT(pool_lock_held());
548 ASSERT(INGLOBALZONE(curproc));
549
550 if (size == 0 || size > max_ncpus) /* quick sanity check */
551 return (EINVAL);
552
553 mutex_enter(&cpu_lock);
554 for (id = 0; id < size; id++) {
555 if ((cpu = cpu_get((processorid_t)ids[id])) == NULL ||
556 cpupart_query_cpu(cpu) != src) {
557 ret = EINVAL;
558 break;
559 }
560 if ((ret = cpupart_attach_cpu(dst, cpu, 1)) != 0)
561 break;
562 }
563 mutex_exit(&cpu_lock);
564 if (ret == 0)
565 pool_pset_mod = gethrtime();
566 return (ret);
567 }
568
569 /*
570 * Bind process to processor set. This should never fail because
571 * we should've done all preliminary checks before calling it.
572 */
573 void
pool_pset_bind(proc_t * p,psetid_t psetid,void * projbuf,void * zonebuf)574 pool_pset_bind(proc_t *p, psetid_t psetid, void *projbuf, void *zonebuf)
575 {
576 kthread_t *t;
577 int ret;
578
579 ASSERT(pool_lock_held());
580 ASSERT(MUTEX_HELD(&cpu_lock));
581 ASSERT(MUTEX_HELD(&pidlock));
582 ASSERT(MUTEX_HELD(&p->p_lock));
583
584 if ((t = p->p_tlist) == NULL)
585 return;
586 do {
587 ret = cpupart_bind_thread(t, psetid, 0, projbuf, zonebuf);
588 ASSERT(ret == 0);
589 t->t_bind_pset = psetid;
590 } while ((t = t->t_forw) != p->p_tlist);
591 }
592
593 /*
594 * See the comment above pool_do_bind() for the semantics of the pset_bind_*()
595 * functions. These must be kept in sync with cpupart_move_thread, and
596 * anything else that could fail a pool_pset_bind.
597 *
598 * Returns non-zero errno on failure and zero on success.
599 * Iff successful, cpu_lock is held on return.
600 */
601 int
pset_bind_start(proc_t ** procs,pool_t * pool)602 pset_bind_start(proc_t **procs, pool_t *pool)
603 {
604 cred_t *pcred;
605 proc_t *p, **pp;
606 kthread_t *t;
607 cpupart_t *newpp;
608 int ret;
609
610 extern int cpupart_movable_thread(kthread_id_t, cpupart_t *, int);
611
612 ASSERT(pool_lock_held());
613 ASSERT(INGLOBALZONE(curproc));
614
615 mutex_enter(&cpu_lock);
616 weakbinding_stop();
617
618 newpp = cpupart_find(pool->pool_pset->pset_id);
619 ASSERT(newpp != NULL);
620 if (newpp->cp_cpulist == NULL) {
621 weakbinding_start();
622 mutex_exit(&cpu_lock);
623 return (ENOTSUP);
624 }
625
626 pcred = crgetcred();
627
628 /*
629 * Check for the PRIV_PROC_PRIOCNTL privilege that is required
630 * to enter and exit scheduling classes. If other privileges
631 * are required by CL_ENTERCLASS/CL_CANEXIT types of routines
632 * in the future, this code will have to be updated.
633 */
634 if (secpolicy_setpriority(pcred) != 0) {
635 weakbinding_start();
636 mutex_exit(&cpu_lock);
637 crfree(pcred);
638 return (EPERM);
639 }
640
641 for (pp = procs; (p = *pp) != NULL; pp++) {
642 mutex_enter(&p->p_lock);
643 if ((t = p->p_tlist) == NULL) {
644 mutex_exit(&p->p_lock);
645 continue;
646 }
647 /*
648 * Check our basic permissions to control this process.
649 */
650 if (!prochasprocperm(p, curproc, pcred)) {
651 mutex_exit(&p->p_lock);
652 weakbinding_start();
653 mutex_exit(&cpu_lock);
654 crfree(pcred);
655 return (EPERM);
656 }
657 do {
658 /*
659 * Check that all threads can be moved to
660 * a new processor set.
661 */
662 thread_lock(t);
663 ret = cpupart_movable_thread(t, newpp, 0);
664 thread_unlock(t);
665 if (ret != 0) {
666 mutex_exit(&p->p_lock);
667 weakbinding_start();
668 mutex_exit(&cpu_lock);
669 crfree(pcred);
670 return (ret);
671 }
672 } while ((t = t->t_forw) != p->p_tlist);
673 mutex_exit(&p->p_lock);
674 }
675 crfree(pcred);
676 return (0); /* with cpu_lock held and weakbinding stopped */
677 }
678
679 /*ARGSUSED*/
680 void
pset_bind_abort(proc_t ** procs,pool_t * pool)681 pset_bind_abort(proc_t **procs, pool_t *pool)
682 {
683 mutex_exit(&cpu_lock);
684 }
685
686 void
pset_bind_finish(void)687 pset_bind_finish(void)
688 {
689 weakbinding_start();
690 mutex_exit(&cpu_lock);
691 }
692
693 static pool_property_t pool_pset_props[] = {
694 { "pset.name", DATA_TYPE_STRING, PP_RDWR },
695 { "pset.comment", DATA_TYPE_STRING, PP_RDWR },
696 { "pset.sys_id", DATA_TYPE_UINT64, PP_READ },
697 { "pset.units", DATA_TYPE_STRING, PP_RDWR },
698 { "pset.default", DATA_TYPE_BYTE, PP_READ },
699 { "pset.min", DATA_TYPE_UINT64, PP_RDWR },
700 { "pset.max", DATA_TYPE_UINT64, PP_RDWR },
701 { "pset.size", DATA_TYPE_UINT64, PP_READ },
702 { "pset.load", DATA_TYPE_UINT64, PP_READ },
703 { "pset.poold.objectives", DATA_TYPE_STRING,
704 PP_RDWR | PP_OPTIONAL },
705 { NULL, 0, 0 }
706 };
707
708 static pool_property_t pool_cpu_props[] = {
709 { "cpu.sys_id", DATA_TYPE_UINT64, PP_READ },
710 { "cpu.comment", DATA_TYPE_STRING, PP_RDWR },
711 { "cpu.status", DATA_TYPE_STRING, PP_RDWR },
712 { "cpu.pinned", DATA_TYPE_BYTE,
713 PP_RDWR | PP_OPTIONAL },
714 { NULL, 0, 0 }
715 };
716
717 /*
718 * Put property on the specified processor set.
719 */
720 int
pool_pset_propput(psetid_t psetid,nvpair_t * pair)721 pool_pset_propput(psetid_t psetid, nvpair_t *pair)
722 {
723 pool_pset_t *pset;
724 int ret;
725
726 ASSERT(pool_lock_held());
727
728 if ((pset = pool_lookup_pset_by_id(psetid)) == NULL)
729 return (ESRCH);
730 ret = pool_propput_common(pset->pset_props, pair, pool_pset_props);
731 if (ret == 0)
732 pool_pset_mod = gethrtime();
733 return (ret);
734 }
735
736 /*
737 * Remove existing processor set property.
738 */
739 int
pool_pset_proprm(psetid_t psetid,char * name)740 pool_pset_proprm(psetid_t psetid, char *name)
741 {
742 pool_pset_t *pset;
743 int ret;
744
745 ASSERT(pool_lock_held());
746
747 if ((pset = pool_lookup_pset_by_id(psetid)) == NULL)
748 return (EINVAL);
749 ret = pool_proprm_common(pset->pset_props, name, pool_pset_props);
750 if (ret == 0)
751 pool_pset_mod = gethrtime();
752 return (ret);
753 }
754
755 /*
756 * Put new CPU property.
757 * Handle special case of "cpu.status".
758 */
759 int
pool_cpu_propput(processorid_t cpuid,nvpair_t * pair)760 pool_cpu_propput(processorid_t cpuid, nvpair_t *pair)
761 {
762 int ret = 0;
763 cpu_t *cpu;
764
765 ASSERT(pool_lock_held());
766 ASSERT(INGLOBALZONE(curproc));
767
768 if (nvpair_type(pair) == DATA_TYPE_STRING &&
769 strcmp(nvpair_name(pair), "cpu.status") == 0) {
770 char *val;
771 int status;
772 int old_status;
773 (void) nvpair_value_string(pair, &val);
774 if (strcmp(val, PS_OFFLINE) == 0)
775 status = P_OFFLINE;
776 else if (strcmp(val, PS_ONLINE) == 0)
777 status = P_ONLINE;
778 else if (strcmp(val, PS_NOINTR) == 0)
779 status = P_NOINTR;
780 else if (strcmp(val, PS_FAULTED) == 0)
781 status = P_FAULTED;
782 else if (strcmp(val, PS_SPARE) == 0)
783 status = P_SPARE;
784 else
785 return (EINVAL);
786 ret = p_online_internal(cpuid, status, &old_status);
787 } else {
788 mutex_enter(&cpu_lock);
789 if ((cpu = cpu_get(cpuid)) == NULL)
790 ret = EINVAL;
791 if (cpu->cpu_props == NULL) {
792 (void) nvlist_alloc(&cpu->cpu_props,
793 NV_UNIQUE_NAME, KM_SLEEP);
794 (void) nvlist_add_string(cpu->cpu_props,
795 "cpu.comment", "");
796 }
797 ret = pool_propput_common(cpu->cpu_props, pair, pool_cpu_props);
798 if (ret == 0)
799 pool_cpu_mod = gethrtime();
800 mutex_exit(&cpu_lock);
801 }
802 return (ret);
803 }
804
805 /*
806 * Remove existing CPU property.
807 */
808 int
pool_cpu_proprm(processorid_t cpuid,char * name)809 pool_cpu_proprm(processorid_t cpuid, char *name)
810 {
811 int ret;
812 cpu_t *cpu;
813
814 ASSERT(pool_lock_held());
815 ASSERT(INGLOBALZONE(curproc));
816
817 mutex_enter(&cpu_lock);
818 if ((cpu = cpu_get(cpuid)) == NULL || cpu_is_poweredoff(cpu)) {
819 ret = EINVAL;
820 } else {
821 if (cpu->cpu_props == NULL)
822 ret = EINVAL;
823 else
824 ret = pool_proprm_common(cpu->cpu_props, name,
825 pool_cpu_props);
826 }
827 if (ret == 0)
828 pool_cpu_mod = gethrtime();
829 mutex_exit(&cpu_lock);
830 return (ret);
831 }
832
833 /*
834 * This macro returns load average multiplied by 1000 w/o losing precision
835 */
836 #define PSET_LOAD(f) (((f >> 16) * 1000) + (((f & 0xffff) * 1000) / 0xffff))
837
838 /*
839 * Take a snapshot of the current state of processor sets and CPUs,
840 * pack it in the exacct format, and attach it to specified exacct record.
841 */
842 int
pool_pset_pack(ea_object_t * eo_system)843 pool_pset_pack(ea_object_t *eo_system)
844 {
845 ea_object_t *eo_pset, *eo_cpu;
846 cpupart_t *cpupart;
847 psetid_t mypsetid;
848 pool_pset_t *pset;
849 nvlist_t *nvl;
850 size_t bufsz;
851 cpu_t *cpu;
852 char *buf;
853 int ncpu;
854
855 ASSERT(pool_lock_held());
856
857 mutex_enter(&cpu_lock);
858 mypsetid = zone_pset_get(curproc->p_zone);
859 for (pset = list_head(&pool_pset_list); pset;
860 pset = list_next(&pool_pset_list, pset)) {
861 psetid_t psetid = pset->pset_id;
862
863 if (!INGLOBALZONE(curproc) && mypsetid != psetid)
864 continue;
865 cpupart = cpupart_find(psetid);
866 ASSERT(cpupart != NULL);
867 eo_pset = ea_alloc_group(EXT_GROUP |
868 EXC_LOCAL | EXD_GROUP_PSET);
869 (void) ea_attach_item(eo_pset, &psetid, sizeof (id_t),
870 EXC_LOCAL | EXD_PSET_PSETID | EXT_UINT32);
871 /*
872 * Pack info for all CPUs in this processor set.
873 */
874 ncpu = 0;
875 cpu = cpu_list;
876 do {
877 if (cpu->cpu_part != cpupart) /* not our pset */
878 continue;
879 ncpu++;
880 eo_cpu = ea_alloc_group(EXT_GROUP
881 | EXC_LOCAL | EXD_GROUP_CPU);
882 (void) ea_attach_item(eo_cpu, &cpu->cpu_id,
883 sizeof (processorid_t),
884 EXC_LOCAL | EXD_CPU_CPUID | EXT_UINT32);
885 if (cpu->cpu_props == NULL) {
886 (void) nvlist_alloc(&cpu->cpu_props,
887 NV_UNIQUE_NAME, KM_SLEEP);
888 (void) nvlist_add_string(cpu->cpu_props,
889 "cpu.comment", "");
890 }
891 (void) nvlist_dup(cpu->cpu_props, &nvl, KM_SLEEP);
892 (void) nvlist_add_int64(nvl, "cpu.sys_id", cpu->cpu_id);
893 (void) nvlist_add_string(nvl, "cpu.status",
894 (char *)cpu_get_state_str(cpu->cpu_flags));
895 buf = NULL;
896 bufsz = 0;
897 (void) nvlist_pack(nvl, &buf, &bufsz,
898 NV_ENCODE_NATIVE, 0);
899 (void) ea_attach_item(eo_cpu, buf, bufsz,
900 EXC_LOCAL | EXD_CPU_PROP | EXT_RAW);
901 (void) nvlist_free(nvl);
902 kmem_free(buf, bufsz);
903 (void) ea_attach_to_group(eo_pset, eo_cpu);
904 } while ((cpu = cpu->cpu_next) != cpu_list);
905
906 (void) nvlist_dup(pset->pset_props, &nvl, KM_SLEEP);
907 (void) nvlist_add_uint64(nvl, "pset.size", ncpu);
908 (void) nvlist_add_uint64(nvl, "pset.load",
909 (uint64_t)PSET_LOAD(cpupart->cp_hp_avenrun[0]));
910 buf = NULL;
911 bufsz = 0;
912 (void) nvlist_pack(nvl, &buf, &bufsz, NV_ENCODE_NATIVE, 0);
913 (void) ea_attach_item(eo_pset, buf, bufsz,
914 EXC_LOCAL | EXD_PSET_PROP | EXT_RAW);
915 (void) nvlist_free(nvl);
916 kmem_free(buf, bufsz);
917
918 (void) ea_attach_to_group(eo_system, eo_pset);
919 }
920 mutex_exit(&cpu_lock);
921 return (0);
922 }
923
924 /*
925 * Get dynamic property for processor sets.
926 * The only dynamic property currently implemented is "pset.load".
927 */
928 int
pool_pset_propget(psetid_t psetid,char * name,nvlist_t * nvl)929 pool_pset_propget(psetid_t psetid, char *name, nvlist_t *nvl)
930 {
931 cpupart_t *cpupart;
932 pool_pset_t *pset;
933 int ret = ESRCH;
934
935 ASSERT(pool_lock_held());
936
937 mutex_enter(&cpu_lock);
938 pset = pool_lookup_pset_by_id(psetid);
939 cpupart = cpupart_find(psetid);
940 if (cpupart == NULL || pset == NULL) {
941 mutex_exit(&cpu_lock);
942 return (EINVAL);
943 }
944 if (strcmp(name, "pset.load") == 0)
945 ret = nvlist_add_uint64(nvl, "pset.load",
946 (uint64_t)PSET_LOAD(cpupart->cp_hp_avenrun[0]));
947 else
948 ret = EINVAL;
949 mutex_exit(&cpu_lock);
950 return (ret);
951 }
952
953 /*
954 * Get dynamic property for CPUs.
955 * The only dynamic property currently implemented is "cpu.status".
956 */
957 int
pool_cpu_propget(processorid_t cpuid,char * name,nvlist_t * nvl)958 pool_cpu_propget(processorid_t cpuid, char *name, nvlist_t *nvl)
959 {
960 int ret = ESRCH;
961 cpu_t *cpu;
962
963 ASSERT(pool_lock_held());
964
965 mutex_enter(&cpu_lock);
966 if ((cpu = cpu_get(cpuid)) == NULL) {
967 mutex_exit(&cpu_lock);
968 return (ESRCH);
969 }
970 if (strcmp(name, "cpu.status") == 0) {
971 ret = nvlist_add_string(nvl, "cpu.status",
972 (char *)cpu_get_state_str(cpu->cpu_flags));
973 } else {
974 ret = EINVAL;
975 }
976 mutex_exit(&cpu_lock);
977 return (ret);
978 }
979