1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2015 Joyent, Inc.
26 */
27
28 /*
29 * lgroup system calls
30 */
31
32 #include <sys/types.h>
33 #include <sys/errno.h>
34 #include <sys/sunddi.h>
35 #include <sys/systm.h>
36 #include <sys/mman.h>
37 #include <sys/cpupart.h>
38 #include <sys/lgrp.h>
39 #include <sys/lgrp_user.h>
40 #include <sys/promif.h> /* for prom_printf() */
41 #include <sys/sysmacros.h>
42 #include <sys/policy.h>
43
44 #include <vm/as.h>
45
46
47 /* definitions for mi_validity */
48 #define VALID_ADDR 1
49 #define VALID_REQ 2
50
51 /*
52 * run through the given number of addresses and requests and return the
53 * corresponding memory information for each address
54 */
55 static int
meminfo(int addr_count,struct meminfo * mip)56 meminfo(int addr_count, struct meminfo *mip)
57 {
58 size_t in_size, out_size, req_size, val_size;
59 struct as *as;
60 struct hat *hat;
61 int i, j, out_idx, info_count;
62 lgrp_t *lgrp;
63 pfn_t pfn;
64 ssize_t pgsz;
65 int *req_array, *val_array;
66 uint64_t *in_array, *out_array;
67 uint64_t addr, paddr;
68 uintptr_t vaddr;
69 int ret = 0;
70 struct meminfo minfo;
71 #if defined(_SYSCALL32_IMPL)
72 struct meminfo32 minfo32;
73 #endif
74
75 /*
76 * Make sure that there is at least one address to translate and
77 * limit how many virtual addresses the kernel can do per call
78 */
79 if (addr_count < 1)
80 return (set_errno(EINVAL));
81 else if (addr_count > MAX_MEMINFO_CNT)
82 addr_count = MAX_MEMINFO_CNT;
83
84 if (get_udatamodel() == DATAMODEL_NATIVE) {
85 if (copyin(mip, &minfo, sizeof (struct meminfo)))
86 return (set_errno(EFAULT));
87 }
88 #if defined(_SYSCALL32_IMPL)
89 else {
90 bzero(&minfo, sizeof (minfo));
91 if (copyin(mip, &minfo32, sizeof (struct meminfo32)))
92 return (set_errno(EFAULT));
93 minfo.mi_inaddr = (const uint64_t *)(uintptr_t)
94 minfo32.mi_inaddr;
95 minfo.mi_info_req = (const uint_t *)(uintptr_t)
96 minfo32.mi_info_req;
97 minfo.mi_info_count = minfo32.mi_info_count;
98 minfo.mi_outdata = (uint64_t *)(uintptr_t)
99 minfo32.mi_outdata;
100 minfo.mi_validity = (uint_t *)(uintptr_t)
101 minfo32.mi_validity;
102 }
103 #endif
104 /*
105 * all the input parameters have been copied in:-
106 * addr_count - number of input addresses
107 * minfo.mi_inaddr - array of input addresses
108 * minfo.mi_info_req - array of types of information requested
109 * minfo.mi_info_count - no. of pieces of info requested for each addr
110 * minfo.mi_outdata - array into which the results are placed
111 * minfo.mi_validity - array containing bitwise result codes; 0th bit
112 * evaluates validity of corresponding input
113 * address, 1st bit validity of response to first
114 * member of info_req, etc.
115 */
116
117 /* make sure mi_info_count is within limit */
118 info_count = minfo.mi_info_count;
119 if (info_count < 1 || info_count > MAX_MEMINFO_REQ)
120 return (set_errno(EINVAL));
121
122 /*
123 * allocate buffer in_array for the input addresses and copy them in
124 */
125 in_size = sizeof (uint64_t) * addr_count;
126 in_array = kmem_alloc(in_size, KM_SLEEP);
127 if (copyin(minfo.mi_inaddr, in_array, in_size)) {
128 kmem_free(in_array, in_size);
129 return (set_errno(EFAULT));
130 }
131
132 /*
133 * allocate buffer req_array for the input info_reqs and copy them in
134 */
135 req_size = sizeof (uint_t) * info_count;
136 req_array = kmem_alloc(req_size, KM_SLEEP);
137 if (copyin(minfo.mi_info_req, req_array, req_size)) {
138 kmem_free(req_array, req_size);
139 kmem_free(in_array, in_size);
140 return (set_errno(EFAULT));
141 }
142
143 /*
144 * Validate privs for each req.
145 */
146 for (i = 0; i < info_count; i++) {
147 switch (req_array[i] & MEMINFO_MASK) {
148 case MEMINFO_VLGRP:
149 case MEMINFO_VPAGESIZE:
150 break;
151 default:
152 if (secpolicy_meminfo(CRED()) != 0) {
153 kmem_free(req_array, req_size);
154 kmem_free(in_array, in_size);
155 return (set_errno(EPERM));
156 }
157 break;
158 }
159 }
160
161 /*
162 * allocate buffer out_array which holds the results and will have
163 * to be copied out later
164 */
165 out_size = sizeof (uint64_t) * addr_count * info_count;
166 out_array = kmem_alloc(out_size, KM_SLEEP);
167
168 /*
169 * allocate buffer val_array which holds the validity bits and will
170 * have to be copied out later
171 */
172 val_size = sizeof (uint_t) * addr_count;
173 val_array = kmem_alloc(val_size, KM_SLEEP);
174
175 if ((req_array[0] & MEMINFO_MASK) == MEMINFO_PLGRP) {
176 /* find the corresponding lgroup for each physical address */
177 for (i = 0; i < addr_count; i++) {
178 paddr = in_array[i];
179 pfn = btop(paddr);
180 lgrp = lgrp_pfn_to_lgrp(pfn);
181 if (lgrp) {
182 out_array[i] = lgrp->lgrp_id;
183 val_array[i] = VALID_ADDR | VALID_REQ;
184 } else {
185 out_array[i] = 0;
186 val_array[i] = 0;
187 }
188 }
189 } else {
190 /* get the corresponding memory info for each virtual address */
191 as = curproc->p_as;
192
193 AS_LOCK_ENTER(as, RW_READER);
194 hat = as->a_hat;
195 for (i = out_idx = 0; i < addr_count; i++, out_idx +=
196 info_count) {
197 addr = in_array[i];
198 vaddr = (uintptr_t)(addr & ~PAGEOFFSET);
199 if (!as_segat(as, (caddr_t)vaddr)) {
200 val_array[i] = 0;
201 continue;
202 }
203 val_array[i] = VALID_ADDR;
204 pfn = hat_getpfnum(hat, (caddr_t)vaddr);
205 if (pfn != PFN_INVALID) {
206 paddr = (uint64_t)((pfn << PAGESHIFT) |
207 (addr & PAGEOFFSET));
208 for (j = 0; j < info_count; j++) {
209 switch (req_array[j] & MEMINFO_MASK) {
210 case MEMINFO_VPHYSICAL:
211 /*
212 * return the physical address
213 * corresponding to the input
214 * virtual address
215 */
216 out_array[out_idx + j] = paddr;
217 val_array[i] |= VALID_REQ << j;
218 break;
219 case MEMINFO_VLGRP:
220 /*
221 * return the lgroup of physical
222 * page corresponding to the
223 * input virtual address
224 */
225 lgrp = lgrp_pfn_to_lgrp(pfn);
226 if (lgrp) {
227 out_array[out_idx + j] =
228 lgrp->lgrp_id;
229 val_array[i] |=
230 VALID_REQ << j;
231 }
232 break;
233 case MEMINFO_VPAGESIZE:
234 /*
235 * return the size of physical
236 * page corresponding to the
237 * input virtual address
238 */
239 pgsz = hat_getpagesize(hat,
240 (caddr_t)vaddr);
241 if (pgsz != -1) {
242 out_array[out_idx + j] =
243 pgsz;
244 val_array[i] |=
245 VALID_REQ << j;
246 }
247 break;
248 case MEMINFO_VREPLCNT:
249 /*
250 * for future use:-
251 * return the no. replicated
252 * physical pages corresponding
253 * to the input virtual address,
254 * so it is always 0 at the
255 * moment
256 */
257 out_array[out_idx + j] = 0;
258 val_array[i] |= VALID_REQ << j;
259 break;
260 case MEMINFO_VREPL:
261 /*
262 * for future use:-
263 * return the nth physical
264 * replica of the specified
265 * virtual address
266 */
267 break;
268 case MEMINFO_VREPL_LGRP:
269 /*
270 * for future use:-
271 * return the lgroup of nth
272 * physical replica of the
273 * specified virtual address
274 */
275 break;
276 case MEMINFO_PLGRP:
277 /*
278 * this is for physical address
279 * only, shouldn't mix with
280 * virtual address
281 */
282 break;
283 default:
284 break;
285 }
286 }
287 }
288 }
289 AS_LOCK_EXIT(as);
290 }
291
292 /* copy out the results and validity bits and free the buffers */
293 if ((copyout(out_array, minfo.mi_outdata, out_size) != 0) ||
294 (copyout(val_array, minfo.mi_validity, val_size) != 0))
295 ret = set_errno(EFAULT);
296
297 kmem_free(in_array, in_size);
298 kmem_free(out_array, out_size);
299 kmem_free(req_array, req_size);
300 kmem_free(val_array, val_size);
301
302 return (ret);
303 }
304
305
306 /*
307 * Initialize lgroup affinities for thread
308 */
309 void
lgrp_affinity_init(lgrp_affinity_t ** bufaddr)310 lgrp_affinity_init(lgrp_affinity_t **bufaddr)
311 {
312 if (bufaddr)
313 *bufaddr = NULL;
314 }
315
316
317 /*
318 * Free lgroup affinities for thread and set to NULL
319 * just in case thread gets recycled
320 */
321 void
lgrp_affinity_free(lgrp_affinity_t ** bufaddr)322 lgrp_affinity_free(lgrp_affinity_t **bufaddr)
323 {
324 if (bufaddr && *bufaddr) {
325 kmem_free(*bufaddr, nlgrpsmax * sizeof (lgrp_affinity_t));
326 *bufaddr = NULL;
327 }
328 }
329
330
331 #define P_ANY -2 /* cookie specifying any ID */
332
333
334 /*
335 * Find LWP with given ID in specified process and get its affinity for
336 * specified lgroup
337 */
338 lgrp_affinity_t
lgrp_affinity_get_thread(proc_t * p,id_t lwpid,lgrp_id_t lgrp)339 lgrp_affinity_get_thread(proc_t *p, id_t lwpid, lgrp_id_t lgrp)
340 {
341 lgrp_affinity_t aff;
342 int found;
343 kthread_t *t;
344
345 ASSERT(MUTEX_HELD(&p->p_lock));
346
347 aff = LGRP_AFF_NONE;
348 found = 0;
349 t = p->p_tlist;
350 /*
351 * The process may be executing in proc_exit() and its p->p_list may be
352 * already NULL.
353 */
354 if (t == NULL)
355 return (set_errno(ESRCH));
356
357 do {
358 if (t->t_tid == lwpid || lwpid == P_ANY) {
359 thread_lock(t);
360 /*
361 * Check to see whether caller has permission to set
362 * affinity for LWP
363 */
364 if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
365 thread_unlock(t);
366 return (set_errno(EPERM));
367 }
368
369 if (t->t_lgrp_affinity)
370 aff = t->t_lgrp_affinity[lgrp];
371 thread_unlock(t);
372 found = 1;
373 break;
374 }
375 } while ((t = t->t_forw) != p->p_tlist);
376 if (!found)
377 aff = set_errno(ESRCH);
378
379 return (aff);
380 }
381
382
383 /*
384 * Get lgroup affinity for given LWP
385 */
386 lgrp_affinity_t
lgrp_affinity_get(lgrp_affinity_args_t * ap)387 lgrp_affinity_get(lgrp_affinity_args_t *ap)
388 {
389 lgrp_affinity_t aff;
390 lgrp_affinity_args_t args;
391 id_t id;
392 idtype_t idtype;
393 lgrp_id_t lgrp;
394 proc_t *p;
395 kthread_t *t;
396
397 /*
398 * Copyin arguments
399 */
400 if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
401 return (set_errno(EFAULT));
402
403 id = args.id;
404 idtype = args.idtype;
405 lgrp = args.lgrp;
406
407 /*
408 * Check for invalid lgroup
409 */
410 if (lgrp < 0 || lgrp == LGRP_NONE)
411 return (set_errno(EINVAL));
412
413 /*
414 * Check for existing lgroup
415 */
416 if (lgrp > lgrp_alloc_max)
417 return (set_errno(ESRCH));
418
419 /*
420 * Get lgroup affinity for given LWP or process
421 */
422 switch (idtype) {
423
424 case P_LWPID:
425 /*
426 * LWP in current process
427 */
428 p = curproc;
429 mutex_enter(&p->p_lock);
430 if (id != P_MYID) /* different thread */
431 aff = lgrp_affinity_get_thread(p, id, lgrp);
432 else { /* current thread */
433 aff = LGRP_AFF_NONE;
434 t = curthread;
435 thread_lock(t);
436 if (t->t_lgrp_affinity)
437 aff = t->t_lgrp_affinity[lgrp];
438 thread_unlock(t);
439 }
440 mutex_exit(&p->p_lock);
441 break;
442
443 case P_PID:
444 /*
445 * Process
446 */
447 mutex_enter(&pidlock);
448
449 if (id == P_MYID)
450 p = curproc;
451 else {
452 p = prfind(id);
453 if (p == NULL) {
454 mutex_exit(&pidlock);
455 return (set_errno(ESRCH));
456 }
457 }
458
459 mutex_enter(&p->p_lock);
460 aff = lgrp_affinity_get_thread(p, P_ANY, lgrp);
461 mutex_exit(&p->p_lock);
462
463 mutex_exit(&pidlock);
464 break;
465
466 default:
467 aff = set_errno(EINVAL);
468 break;
469 }
470
471 return (aff);
472 }
473
474
475 /*
476 * Find lgroup for which this thread has most affinity in specified partition
477 * starting from home lgroup unless specified starting lgroup is preferred
478 */
479 lpl_t *
lgrp_affinity_best(kthread_t * t,struct cpupart * cpupart,lgrp_id_t start,boolean_t prefer_start)480 lgrp_affinity_best(kthread_t *t, struct cpupart *cpupart, lgrp_id_t start,
481 boolean_t prefer_start)
482 {
483 lgrp_affinity_t *affs;
484 lgrp_affinity_t best_aff;
485 lpl_t *best_lpl;
486 lgrp_id_t finish;
487 lgrp_id_t home;
488 lgrp_id_t lgrpid;
489 lpl_t *lpl;
490
491 ASSERT(t != NULL);
492 ASSERT((MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0) ||
493 (MUTEX_HELD(&ttoproc(t)->p_lock) && THREAD_LOCK_HELD(t)));
494 ASSERT(cpupart != NULL);
495
496 if (t->t_lgrp_affinity == NULL)
497 return (NULL);
498
499 affs = t->t_lgrp_affinity;
500
501 /*
502 * Thread bound to CPU
503 */
504 if (t->t_bind_cpu != PBIND_NONE) {
505 cpu_t *cp;
506
507 /*
508 * Find which lpl has most affinity among leaf lpl directly
509 * containing CPU and its ancestor lpls
510 */
511 cp = cpu[t->t_bind_cpu];
512
513 best_lpl = lpl = cp->cpu_lpl;
514 best_aff = affs[best_lpl->lpl_lgrpid];
515 while (lpl->lpl_parent != NULL) {
516 lpl = lpl->lpl_parent;
517 lgrpid = lpl->lpl_lgrpid;
518 if (affs[lgrpid] > best_aff) {
519 best_lpl = lpl;
520 best_aff = affs[lgrpid];
521 }
522 }
523 return (best_lpl);
524 }
525
526 /*
527 * Start searching from home lgroup unless given starting lgroup is
528 * preferred or home lgroup isn't in given pset. Use root lgroup as
529 * starting point if both home and starting lgroups aren't in given
530 * pset.
531 */
532 ASSERT(start >= 0 && start <= lgrp_alloc_max);
533 home = t->t_lpl->lpl_lgrpid;
534 if (!prefer_start && LGRP_CPUS_IN_PART(home, cpupart))
535 lgrpid = home;
536 else if (start != LGRP_NONE && LGRP_CPUS_IN_PART(start, cpupart))
537 lgrpid = start;
538 else
539 lgrpid = LGRP_ROOTID;
540
541 best_lpl = &cpupart->cp_lgrploads[lgrpid];
542 best_aff = affs[lgrpid];
543 finish = lgrpid;
544 do {
545 /*
546 * Skip any lgroups that don't have CPU resources
547 * in this processor set.
548 */
549 if (!LGRP_CPUS_IN_PART(lgrpid, cpupart)) {
550 if (++lgrpid > lgrp_alloc_max)
551 lgrpid = 0; /* wrap the search */
552 continue;
553 }
554
555 /*
556 * Find lgroup with most affinity
557 */
558 lpl = &cpupart->cp_lgrploads[lgrpid];
559 if (affs[lgrpid] > best_aff) {
560 best_aff = affs[lgrpid];
561 best_lpl = lpl;
562 }
563
564 if (++lgrpid > lgrp_alloc_max)
565 lgrpid = 0; /* wrap the search */
566
567 } while (lgrpid != finish);
568
569 /*
570 * No lgroup (in this pset) with any affinity
571 */
572 if (best_aff == LGRP_AFF_NONE)
573 return (NULL);
574
575 lgrpid = best_lpl->lpl_lgrpid;
576 ASSERT(LGRP_CPUS_IN_PART(lgrpid, cpupart) && best_lpl->lpl_ncpu > 0);
577
578 return (best_lpl);
579 }
580
581
582 /*
583 * Set thread's affinity for given lgroup
584 */
585 int
lgrp_affinity_set_thread(kthread_t * t,lgrp_id_t lgrp,lgrp_affinity_t aff,lgrp_affinity_t ** aff_buf)586 lgrp_affinity_set_thread(kthread_t *t, lgrp_id_t lgrp, lgrp_affinity_t aff,
587 lgrp_affinity_t **aff_buf)
588 {
589 lgrp_affinity_t *affs;
590 lgrp_id_t best;
591 lpl_t *best_lpl;
592 lgrp_id_t home;
593 int retval;
594
595 ASSERT(t != NULL);
596 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
597
598 retval = 0;
599
600 thread_lock(t);
601
602 /*
603 * Check to see whether caller has permission to set affinity for
604 * thread
605 */
606 if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
607 thread_unlock(t);
608 return (set_errno(EPERM));
609 }
610
611 if (t->t_lgrp_affinity == NULL) {
612 if (aff == LGRP_AFF_NONE) {
613 thread_unlock(t);
614 return (0);
615 }
616 ASSERT(aff_buf != NULL && *aff_buf != NULL);
617 t->t_lgrp_affinity = *aff_buf;
618 *aff_buf = NULL;
619 }
620
621 affs = t->t_lgrp_affinity;
622 affs[lgrp] = aff;
623
624 /*
625 * Find lgroup for which thread has most affinity,
626 * starting with lgroup for which affinity being set
627 */
628 best_lpl = lgrp_affinity_best(t, t->t_cpupart, lgrp, B_TRUE);
629
630 /*
631 * Rehome if found lgroup with more affinity than home or lgroup for
632 * which affinity is being set has same affinity as home
633 */
634 home = t->t_lpl->lpl_lgrpid;
635 if (best_lpl != NULL && best_lpl != t->t_lpl) {
636 best = best_lpl->lpl_lgrpid;
637 if (affs[best] > affs[home] || (affs[best] == affs[home] &&
638 best == lgrp))
639 lgrp_move_thread(t, best_lpl, 1);
640 }
641
642 thread_unlock(t);
643
644 return (retval);
645 }
646
647
648 /*
649 * Set process' affinity for specified lgroup
650 */
651 int
lgrp_affinity_set_proc(proc_t * p,lgrp_id_t lgrp,lgrp_affinity_t aff,lgrp_affinity_t ** aff_buf_array)652 lgrp_affinity_set_proc(proc_t *p, lgrp_id_t lgrp, lgrp_affinity_t aff,
653 lgrp_affinity_t **aff_buf_array)
654 {
655 lgrp_affinity_t *buf;
656 int err = 0;
657 int i;
658 int retval;
659 kthread_t *t;
660
661 ASSERT(MUTEX_HELD(&pidlock) && MUTEX_HELD(&p->p_lock));
662 ASSERT(aff_buf_array != NULL);
663
664 i = 0;
665 t = p->p_tlist;
666 if (t != NULL) {
667 do {
668 /*
669 * Set lgroup affinity for thread
670 */
671 buf = aff_buf_array[i];
672 retval = lgrp_affinity_set_thread(t, lgrp, aff, &buf);
673
674 if (err == 0 && retval != 0)
675 err = retval;
676
677 /*
678 * Advance pointer to next buffer
679 */
680 if (buf == NULL) {
681 ASSERT(i < p->p_lwpcnt);
682 aff_buf_array[i] = NULL;
683 i++;
684 }
685
686 } while ((t = t->t_forw) != p->p_tlist);
687 }
688 return (err);
689 }
690
691
692 /*
693 * Set LWP's or process' affinity for specified lgroup
694 *
695 * When setting affinities, pidlock, process p_lock, and thread_lock()
696 * need to be held in that order to protect target thread's pset, process,
697 * process contents, and thread contents. thread_lock() does splhigh(),
698 * so it ends up having similiar effect as kpreempt_disable(), so it will
699 * protect calls to lgrp_move_thread() and lgrp_choose() from pset changes.
700 */
701 int
lgrp_affinity_set(lgrp_affinity_args_t * ap)702 lgrp_affinity_set(lgrp_affinity_args_t *ap)
703 {
704 lgrp_affinity_t aff;
705 lgrp_affinity_t *aff_buf;
706 lgrp_affinity_args_t args;
707 id_t id;
708 idtype_t idtype;
709 lgrp_id_t lgrp;
710 int nthreads;
711 proc_t *p;
712 int retval;
713
714 /*
715 * Copyin arguments
716 */
717 if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
718 return (set_errno(EFAULT));
719
720 idtype = args.idtype;
721 id = args.id;
722 lgrp = args.lgrp;
723 aff = args.aff;
724
725 /*
726 * Check for invalid lgroup
727 */
728 if (lgrp < 0 || lgrp == LGRP_NONE)
729 return (set_errno(EINVAL));
730
731 /*
732 * Check for existing lgroup
733 */
734 if (lgrp > lgrp_alloc_max)
735 return (set_errno(ESRCH));
736
737 /*
738 * Check for legal affinity
739 */
740 if (aff != LGRP_AFF_NONE && aff != LGRP_AFF_WEAK &&
741 aff != LGRP_AFF_STRONG)
742 return (set_errno(EINVAL));
743
744 /*
745 * Must be process or LWP ID
746 */
747 if (idtype != P_LWPID && idtype != P_PID)
748 return (set_errno(EINVAL));
749
750 retval = EINVAL;
751 /*
752 * Set given LWP's or process' affinity for specified lgroup
753 */
754 switch (idtype) {
755
756 case P_LWPID:
757 /*
758 * Allocate memory for thread's lgroup affinities
759 * ahead of time w/o holding locks
760 */
761 aff_buf = kmem_zalloc(nlgrpsmax * sizeof (lgrp_affinity_t),
762 KM_SLEEP);
763
764 p = curproc;
765
766 /*
767 * Set affinity for thread
768 */
769 mutex_enter(&p->p_lock);
770 if (id == P_MYID) { /* current thread */
771 retval = lgrp_affinity_set_thread(curthread, lgrp, aff,
772 &aff_buf);
773 } else if (p->p_tlist == NULL) {
774 retval = set_errno(ESRCH);
775 } else { /* other thread */
776 int found = 0;
777 kthread_t *t;
778
779 t = p->p_tlist;
780 do {
781 if (t->t_tid == id) {
782 retval = lgrp_affinity_set_thread(t,
783 lgrp, aff, &aff_buf);
784 found = 1;
785 break;
786 }
787 } while ((t = t->t_forw) != p->p_tlist);
788 if (!found)
789 retval = set_errno(ESRCH);
790 }
791 mutex_exit(&p->p_lock);
792
793 /*
794 * Free memory for lgroup affinities,
795 * since thread didn't need it
796 */
797 if (aff_buf)
798 kmem_free(aff_buf,
799 nlgrpsmax * sizeof (lgrp_affinity_t));
800
801 break;
802
803 case P_PID:
804
805 do {
806 lgrp_affinity_t **aff_buf_array;
807 int i;
808 size_t size;
809
810 /*
811 * Get process
812 */
813 mutex_enter(&pidlock);
814
815 if (id == P_MYID)
816 p = curproc;
817 else
818 p = prfind(id);
819
820 if (p == NULL) {
821 mutex_exit(&pidlock);
822 return (set_errno(ESRCH));
823 }
824
825 /*
826 * Get number of threads in process
827 *
828 * NOTE: Only care about user processes,
829 * so p_lwpcnt should be number of threads.
830 */
831 mutex_enter(&p->p_lock);
832 nthreads = p->p_lwpcnt;
833 mutex_exit(&p->p_lock);
834
835 mutex_exit(&pidlock);
836
837 if (nthreads < 1)
838 return (set_errno(ESRCH));
839
840 /*
841 * Preallocate memory for lgroup affinities for
842 * each thread in process now to avoid holding
843 * any locks. Allocate an array to hold a buffer
844 * for each thread.
845 */
846 aff_buf_array = kmem_zalloc(nthreads *
847 sizeof (lgrp_affinity_t *), KM_SLEEP);
848
849 size = nlgrpsmax * sizeof (lgrp_affinity_t);
850 for (i = 0; i < nthreads; i++)
851 aff_buf_array[i] = kmem_zalloc(size, KM_SLEEP);
852
853 mutex_enter(&pidlock);
854
855 /*
856 * Get process again since dropped locks to allocate
857 * memory (except current process)
858 */
859 if (id != P_MYID)
860 p = prfind(id);
861
862 /*
863 * Process went away after we dropped locks and before
864 * reacquiring them, so drop locks, free memory, and
865 * return.
866 */
867 if (p == NULL) {
868 mutex_exit(&pidlock);
869 for (i = 0; i < nthreads; i++)
870 kmem_free(aff_buf_array[i], size);
871 kmem_free(aff_buf_array,
872 nthreads * sizeof (lgrp_affinity_t *));
873 return (set_errno(ESRCH));
874 }
875
876 mutex_enter(&p->p_lock);
877
878 /*
879 * See whether number of threads is same
880 * If not, drop locks, free memory, and try again
881 */
882 if (nthreads != p->p_lwpcnt) {
883 mutex_exit(&p->p_lock);
884 mutex_exit(&pidlock);
885 for (i = 0; i < nthreads; i++)
886 kmem_free(aff_buf_array[i], size);
887 kmem_free(aff_buf_array,
888 nthreads * sizeof (lgrp_affinity_t *));
889 continue;
890 }
891
892 /*
893 * Set lgroup affinity for threads in process
894 */
895 retval = lgrp_affinity_set_proc(p, lgrp, aff,
896 aff_buf_array);
897
898 mutex_exit(&p->p_lock);
899 mutex_exit(&pidlock);
900
901 /*
902 * Free any leftover memory, since some threads may
903 * have already allocated memory and set lgroup
904 * affinities before
905 */
906 for (i = 0; i < nthreads; i++)
907 if (aff_buf_array[i] != NULL)
908 kmem_free(aff_buf_array[i], size);
909 kmem_free(aff_buf_array,
910 nthreads * sizeof (lgrp_affinity_t *));
911
912 break;
913
914 } while (nthreads != p->p_lwpcnt);
915
916 break;
917
918 default:
919 retval = set_errno(EINVAL);
920 break;
921 }
922
923 return (retval);
924 }
925
926
927 /*
928 * Return the latest generation number for the lgroup hierarchy
929 * with the given view
930 */
931 lgrp_gen_t
lgrp_generation(lgrp_view_t view)932 lgrp_generation(lgrp_view_t view)
933 {
934 cpupart_t *cpupart;
935 uint_t gen;
936
937 kpreempt_disable();
938
939 /*
940 * Determine generation number for given view
941 */
942 if (view == LGRP_VIEW_OS)
943 /*
944 * Return generation number of lgroup hierarchy for OS view
945 */
946 gen = lgrp_gen;
947 else {
948 /*
949 * For caller's view, use generation numbers for lgroup
950 * hierarchy and caller's pset
951 * NOTE: Caller needs to check for change in pset ID
952 */
953 cpupart = curthread->t_cpupart;
954 ASSERT(cpupart);
955 gen = lgrp_gen + cpupart->cp_gen;
956 }
957
958 kpreempt_enable();
959
960 return (gen);
961 }
962
963
964 lgrp_id_t
lgrp_home_thread(kthread_t * t)965 lgrp_home_thread(kthread_t *t)
966 {
967 lgrp_id_t home;
968
969 ASSERT(t != NULL);
970 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
971
972 thread_lock(t);
973
974 /*
975 * Check to see whether caller has permission to set affinity for
976 * thread
977 */
978 if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
979 thread_unlock(t);
980 return (set_errno(EPERM));
981 }
982
983 home = lgrp_home_id(t);
984
985 thread_unlock(t);
986 return (home);
987 }
988
989
990 /*
991 * Get home lgroup of given process or thread
992 */
993 lgrp_id_t
lgrp_home_get(idtype_t idtype,id_t id)994 lgrp_home_get(idtype_t idtype, id_t id)
995 {
996 proc_t *p;
997 lgrp_id_t retval;
998 kthread_t *t;
999
1000 /*
1001 * Get home lgroup of given LWP or process
1002 */
1003 switch (idtype) {
1004
1005 case P_LWPID:
1006 p = curproc;
1007
1008 /*
1009 * Set affinity for thread
1010 */
1011 mutex_enter(&p->p_lock);
1012 if (id == P_MYID) { /* current thread */
1013 retval = lgrp_home_thread(curthread);
1014 } else if (p->p_tlist == NULL) {
1015 retval = set_errno(ESRCH);
1016 } else { /* other thread */
1017 int found = 0;
1018
1019 t = p->p_tlist;
1020 do {
1021 if (t->t_tid == id) {
1022 retval = lgrp_home_thread(t);
1023 found = 1;
1024 break;
1025 }
1026 } while ((t = t->t_forw) != p->p_tlist);
1027 if (!found)
1028 retval = set_errno(ESRCH);
1029 }
1030 mutex_exit(&p->p_lock);
1031 break;
1032
1033 case P_PID:
1034 /*
1035 * Get process
1036 */
1037 mutex_enter(&pidlock);
1038
1039 if (id == P_MYID)
1040 p = curproc;
1041 else
1042 p = prfind(id);
1043
1044 if (p == NULL) {
1045 mutex_exit(&pidlock);
1046 return (set_errno(ESRCH));
1047 }
1048
1049 mutex_enter(&p->p_lock);
1050 t = p->p_tlist;
1051 if (t == NULL)
1052 retval = set_errno(ESRCH);
1053 else
1054 retval = lgrp_home_thread(t);
1055 mutex_exit(&p->p_lock);
1056
1057 mutex_exit(&pidlock);
1058
1059 break;
1060
1061 default:
1062 retval = set_errno(EINVAL);
1063 break;
1064 }
1065
1066 return (retval);
1067 }
1068
1069
1070 /*
1071 * Return latency between "from" and "to" lgroups
1072 *
1073 * This latency number can only be used for relative comparison
1074 * between lgroups on the running system, cannot be used across platforms,
1075 * and may not reflect the actual latency. It is platform and implementation
1076 * specific, so platform gets to decide its value. It would be nice if the
1077 * number was at least proportional to make comparisons more meaningful though.
1078 */
1079 int
lgrp_latency(lgrp_id_t from,lgrp_id_t to)1080 lgrp_latency(lgrp_id_t from, lgrp_id_t to)
1081 {
1082 lgrp_t *from_lgrp;
1083 int i;
1084 int latency;
1085 int latency_max;
1086 lgrp_t *to_lgrp;
1087
1088 ASSERT(MUTEX_HELD(&cpu_lock));
1089
1090 if (from < 0 || to < 0)
1091 return (set_errno(EINVAL));
1092
1093 if (from > lgrp_alloc_max || to > lgrp_alloc_max)
1094 return (set_errno(ESRCH));
1095
1096 from_lgrp = lgrp_table[from];
1097 to_lgrp = lgrp_table[to];
1098
1099 if (!LGRP_EXISTS(from_lgrp) || !LGRP_EXISTS(to_lgrp)) {
1100 return (set_errno(ESRCH));
1101 }
1102
1103 /*
1104 * Get latency for same lgroup
1105 */
1106 if (from == to) {
1107 latency = from_lgrp->lgrp_latency;
1108 return (latency);
1109 }
1110
1111 /*
1112 * Get latency between leaf lgroups
1113 */
1114 if (from_lgrp->lgrp_childcnt == 0 && to_lgrp->lgrp_childcnt == 0)
1115 return (lgrp_plat_latency(from_lgrp->lgrp_plathand,
1116 to_lgrp->lgrp_plathand));
1117
1118 /*
1119 * Determine max latency between resources in two lgroups
1120 */
1121 latency_max = 0;
1122 for (i = 0; i <= lgrp_alloc_max; i++) {
1123 lgrp_t *from_rsrc;
1124 int j;
1125 lgrp_t *to_rsrc;
1126
1127 from_rsrc = lgrp_table[i];
1128 if (!LGRP_EXISTS(from_rsrc) ||
1129 !klgrpset_ismember(from_lgrp->lgrp_set[LGRP_RSRC_CPU], i))
1130 continue;
1131
1132 for (j = 0; j <= lgrp_alloc_max; j++) {
1133 to_rsrc = lgrp_table[j];
1134 if (!LGRP_EXISTS(to_rsrc) ||
1135 klgrpset_ismember(to_lgrp->lgrp_set[LGRP_RSRC_MEM],
1136 j) == 0)
1137 continue;
1138 latency = lgrp_plat_latency(from_rsrc->lgrp_plathand,
1139 to_rsrc->lgrp_plathand);
1140 if (latency > latency_max)
1141 latency_max = latency;
1142 }
1143 }
1144 return (latency_max);
1145 }
1146
1147
1148 /*
1149 * Return lgroup interface version number
1150 * 0 - none
1151 * 1 - original
1152 * 2 - lgrp_latency_cookie() and lgrp_resources() added
1153 */
1154 int
lgrp_version(int version)1155 lgrp_version(int version)
1156 {
1157 /*
1158 * Return LGRP_VER_NONE when requested version isn't supported
1159 */
1160 if (version < LGRP_VER_NONE || version > LGRP_VER_CURRENT)
1161 return (LGRP_VER_NONE);
1162
1163 /*
1164 * Return current version when LGRP_VER_NONE passed in
1165 */
1166 if (version == LGRP_VER_NONE)
1167 return (LGRP_VER_CURRENT);
1168
1169 /*
1170 * Otherwise, return supported version.
1171 */
1172 return (version);
1173 }
1174
1175
1176 /*
1177 * Snapshot of lgroup hieararchy
1178 *
1179 * One snapshot is kept and is based on the kernel's native data model, so
1180 * a 32-bit snapshot is kept for the 32-bit kernel and a 64-bit one for the
1181 * 64-bit kernel. If a 32-bit user wants a snapshot from the 64-bit kernel,
1182 * the kernel generates a 32-bit snapshot from the data in its 64-bit snapshot.
1183 *
1184 * The format is defined by lgroup snapshot header and the layout of
1185 * the snapshot in memory is as follows:
1186 * 1) lgroup snapshot header
1187 * - specifies format of snapshot
1188 * - defined by lgrp_snapshot_header_t
1189 * 2) lgroup info array
1190 * - contains information about each lgroup
1191 * - one element for each lgroup
1192 * - each element is defined by lgrp_info_t
1193 * 3) lgroup CPU ID array
1194 * - contains list (array) of CPU IDs for each lgroup
1195 * - lgrp_info_t points into array and specifies how many CPUs belong to
1196 * given lgroup
1197 * 4) lgroup parents array
1198 * - contains lgroup bitmask of parents for each lgroup
1199 * - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1200 * 5) lgroup children array
1201 * - contains lgroup bitmask of children for each lgroup
1202 * - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1203 * 6) lgroup resources array
1204 * - contains lgroup bitmask of resources for each lgroup
1205 * - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1206 * 7) lgroup latency table
1207 * - contains latency from each lgroup to each of other lgroups
1208 *
1209 * NOTE: Must use nlgrpsmax for per lgroup data structures because lgroups
1210 * may be sparsely allocated.
1211 */
1212 lgrp_snapshot_header_t *lgrp_snap = NULL; /* lgroup snapshot */
1213 static kmutex_t lgrp_snap_lock; /* snapshot lock */
1214
1215
1216 /*
1217 * Take a snapshot of lgroup hierarchy and return size of buffer
1218 * needed to hold snapshot
1219 */
1220 static int
lgrp_snapshot(void)1221 lgrp_snapshot(void)
1222 {
1223 size_t bitmask_size;
1224 size_t bitmasks_size;
1225 size_t bufsize;
1226 int cpu_index;
1227 size_t cpuids_size;
1228 int i;
1229 int j;
1230 size_t info_size;
1231 size_t lats_size;
1232 ulong_t *lgrp_children;
1233 processorid_t *lgrp_cpuids;
1234 lgrp_info_t *lgrp_info;
1235 int **lgrp_lats;
1236 ulong_t *lgrp_parents;
1237 ulong_t *lgrp_rsets;
1238 ulong_t *lgrpset;
1239 int snap_ncpus;
1240 int snap_nlgrps;
1241 int snap_nlgrpsmax;
1242 size_t snap_hdr_size;
1243 #ifdef _SYSCALL32_IMPL
1244 model_t model = DATAMODEL_NATIVE;
1245
1246 /*
1247 * Have up-to-date snapshot, so check to see whether caller is 32-bit
1248 * program and need to return size of 32-bit snapshot now.
1249 */
1250 model = get_udatamodel();
1251 if (model == DATAMODEL_ILP32 && lgrp_snap &&
1252 lgrp_snap->ss_gen == lgrp_gen) {
1253
1254 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1255
1256 /*
1257 * Calculate size of buffer needed for 32-bit snapshot,
1258 * rounding up size of each object to allow for alignment
1259 * of next object in buffer.
1260 */
1261 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1262 sizeof (caddr32_t));
1263 info_size =
1264 P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1265 sizeof (processorid_t));
1266 cpuids_size =
1267 P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1268 sizeof (ulong_t));
1269
1270 /*
1271 * lgroup bitmasks needed for parents, children, and resources
1272 * for each lgroup and pset lgroup set
1273 */
1274 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1275 bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1276 snap_nlgrpsmax) + 1) * bitmask_size;
1277
1278 /*
1279 * Size of latency table and buffer
1280 */
1281 lats_size = snap_nlgrpsmax * sizeof (caddr32_t) +
1282 snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1283
1284 bufsize = snap_hdr_size + info_size + cpuids_size +
1285 bitmasks_size + lats_size;
1286 return (bufsize);
1287 }
1288 #endif /* _SYSCALL32_IMPL */
1289
1290 /*
1291 * Check whether snapshot is up-to-date
1292 * Free it and take another one if not
1293 */
1294 if (lgrp_snap) {
1295 if (lgrp_snap->ss_gen == lgrp_gen)
1296 return (lgrp_snap->ss_size);
1297
1298 kmem_free(lgrp_snap, lgrp_snap->ss_size);
1299 lgrp_snap = NULL;
1300 }
1301
1302 /*
1303 * Allocate memory for snapshot
1304 * w/o holding cpu_lock while waiting for memory
1305 */
1306 while (lgrp_snap == NULL) {
1307 int old_generation;
1308
1309 /*
1310 * Take snapshot of lgroup generation number
1311 * and configuration size dependent information
1312 * NOTE: Only count number of online CPUs,
1313 * since only online CPUs appear in lgroups.
1314 */
1315 mutex_enter(&cpu_lock);
1316 old_generation = lgrp_gen;
1317 snap_ncpus = ncpus_online;
1318 snap_nlgrps = nlgrps;
1319 snap_nlgrpsmax = nlgrpsmax;
1320 mutex_exit(&cpu_lock);
1321
1322 /*
1323 * Calculate size of buffer needed for snapshot,
1324 * rounding up size of each object to allow for alignment
1325 * of next object in buffer.
1326 */
1327 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1328 sizeof (void *));
1329 info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1330 sizeof (processorid_t));
1331 cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1332 sizeof (ulong_t));
1333 /*
1334 * lgroup bitmasks needed for pset lgroup set and parents,
1335 * children, and resource sets for each lgroup
1336 */
1337 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1338 bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1339 snap_nlgrpsmax) + 1) * bitmask_size;
1340
1341 /*
1342 * Size of latency table and buffer
1343 */
1344 lats_size = snap_nlgrpsmax * sizeof (int *) +
1345 snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1346
1347 bufsize = snap_hdr_size + info_size + cpuids_size +
1348 bitmasks_size + lats_size;
1349
1350 /*
1351 * Allocate memory for buffer
1352 */
1353 lgrp_snap = kmem_zalloc(bufsize, KM_NOSLEEP);
1354 if (lgrp_snap == NULL)
1355 return (set_errno(ENOMEM));
1356
1357 /*
1358 * Check whether generation number has changed
1359 */
1360 mutex_enter(&cpu_lock);
1361 if (lgrp_gen == old_generation)
1362 break; /* hasn't change, so done. */
1363
1364 /*
1365 * Generation number changed, so free memory and try again.
1366 */
1367 mutex_exit(&cpu_lock);
1368 kmem_free(lgrp_snap, bufsize);
1369 lgrp_snap = NULL;
1370 }
1371
1372 /*
1373 * Fill in lgroup snapshot header
1374 * (including pointers to tables of lgroup info, CPU IDs, and parents
1375 * and children)
1376 */
1377 lgrp_snap->ss_version = LGRP_VER_CURRENT;
1378
1379 /*
1380 * XXX For now, liblgrp only needs to know whether the hierarchy
1381 * XXX only has one level or not
1382 */
1383 if (snap_nlgrps == 1)
1384 lgrp_snap->ss_levels = 1;
1385 else
1386 lgrp_snap->ss_levels = 2;
1387
1388 lgrp_snap->ss_root = LGRP_ROOTID;
1389
1390 lgrp_snap->ss_nlgrps = lgrp_snap->ss_nlgrps_os = snap_nlgrps;
1391 lgrp_snap->ss_nlgrps_max = snap_nlgrpsmax;
1392 lgrp_snap->ss_ncpus = snap_ncpus;
1393 lgrp_snap->ss_gen = lgrp_gen;
1394 lgrp_snap->ss_view = LGRP_VIEW_OS;
1395 lgrp_snap->ss_pset = 0; /* NOTE: caller should set if needed */
1396 lgrp_snap->ss_size = bufsize;
1397 lgrp_snap->ss_magic = (uintptr_t)lgrp_snap;
1398
1399 lgrp_snap->ss_info = lgrp_info =
1400 (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1401
1402 lgrp_snap->ss_cpuids = lgrp_cpuids =
1403 (processorid_t *)((uintptr_t)lgrp_info + info_size);
1404
1405 lgrp_snap->ss_lgrpset = lgrpset =
1406 (ulong_t *)((uintptr_t)lgrp_cpuids + cpuids_size);
1407
1408 lgrp_snap->ss_parents = lgrp_parents =
1409 (ulong_t *)((uintptr_t)lgrpset + bitmask_size);
1410
1411 lgrp_snap->ss_children = lgrp_children =
1412 (ulong_t *)((uintptr_t)lgrp_parents + (snap_nlgrpsmax *
1413 bitmask_size));
1414
1415 lgrp_snap->ss_rsets = lgrp_rsets =
1416 (ulong_t *)((uintptr_t)lgrp_children + (snap_nlgrpsmax *
1417 bitmask_size));
1418
1419 lgrp_snap->ss_latencies = lgrp_lats =
1420 (int **)((uintptr_t)lgrp_rsets + (LGRP_RSRC_COUNT *
1421 snap_nlgrpsmax * bitmask_size));
1422
1423 /*
1424 * Fill in lgroup information
1425 */
1426 cpu_index = 0;
1427 for (i = 0; i < snap_nlgrpsmax; i++) {
1428 struct cpu *cp;
1429 int cpu_count;
1430 struct cpu *head;
1431 int k;
1432 lgrp_t *lgrp;
1433
1434 lgrp = lgrp_table[i];
1435 if (!LGRP_EXISTS(lgrp)) {
1436 bzero(&lgrp_info[i], sizeof (lgrp_info[i]));
1437 lgrp_info[i].info_lgrpid = LGRP_NONE;
1438 continue;
1439 }
1440
1441 lgrp_info[i].info_lgrpid = i;
1442 lgrp_info[i].info_latency = lgrp->lgrp_latency;
1443
1444 /*
1445 * Fill in parents, children, and lgroup resources
1446 */
1447 lgrp_info[i].info_parents =
1448 (ulong_t *)((uintptr_t)lgrp_parents + (i * bitmask_size));
1449
1450 if (lgrp->lgrp_parent)
1451 BT_SET(lgrp_info[i].info_parents,
1452 lgrp->lgrp_parent->lgrp_id);
1453
1454 lgrp_info[i].info_children =
1455 (ulong_t *)((uintptr_t)lgrp_children + (i * bitmask_size));
1456
1457 for (j = 0; j < snap_nlgrpsmax; j++)
1458 if (klgrpset_ismember(lgrp->lgrp_children, j))
1459 BT_SET(lgrp_info[i].info_children, j);
1460
1461 lgrp_info[i].info_rset =
1462 (ulong_t *)((uintptr_t)lgrp_rsets +
1463 (i * LGRP_RSRC_COUNT * bitmask_size));
1464
1465 for (j = 0; j < LGRP_RSRC_COUNT; j++) {
1466 ulong_t *rset;
1467
1468 rset = (ulong_t *)((uintptr_t)lgrp_info[i].info_rset +
1469 (j * bitmask_size));
1470 for (k = 0; k < snap_nlgrpsmax; k++)
1471 if (klgrpset_ismember(lgrp->lgrp_set[j], k))
1472 BT_SET(rset, k);
1473 }
1474
1475 /*
1476 * Fill in CPU IDs
1477 */
1478 cpu_count = 0;
1479 lgrp_info[i].info_cpuids = NULL;
1480 cp = head = lgrp->lgrp_cpu;
1481 if (head != NULL) {
1482 lgrp_info[i].info_cpuids = &lgrp_cpuids[cpu_index];
1483 do {
1484 lgrp_cpuids[cpu_index] = cp->cpu_id;
1485 cpu_index++;
1486 cpu_count++;
1487 cp = cp->cpu_next_lgrp;
1488 } while (cp != head);
1489 }
1490 ASSERT(cpu_count == lgrp->lgrp_cpucnt);
1491 lgrp_info[i].info_ncpus = cpu_count;
1492
1493 /*
1494 * Fill in memory sizes for lgroups that directly contain
1495 * memory
1496 */
1497 if (klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], i)) {
1498 lgrp_info[i].info_mem_free =
1499 lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1500 lgrp_info[i].info_mem_install =
1501 lgrp_mem_size(i, LGRP_MEM_SIZE_INSTALL);
1502 }
1503
1504 /*
1505 * Fill in latency table and buffer
1506 */
1507 lgrp_lats[i] = (int *)((uintptr_t)lgrp_lats + snap_nlgrpsmax *
1508 sizeof (int *) + i * snap_nlgrpsmax * sizeof (int));
1509 for (j = 0; j < snap_nlgrpsmax; j++) {
1510 lgrp_t *to;
1511
1512 to = lgrp_table[j];
1513 if (!LGRP_EXISTS(to))
1514 continue;
1515 lgrp_lats[i][j] = lgrp_latency(lgrp->lgrp_id,
1516 to->lgrp_id);
1517 }
1518 }
1519 ASSERT(cpu_index == snap_ncpus);
1520
1521
1522 mutex_exit(&cpu_lock);
1523
1524 #ifdef _SYSCALL32_IMPL
1525 /*
1526 * Check to see whether caller is 32-bit program and need to return
1527 * size of 32-bit snapshot now that snapshot has been taken/updated.
1528 * May not have been able to do this earlier if snapshot was out of
1529 * date or didn't exist yet.
1530 */
1531 if (model == DATAMODEL_ILP32) {
1532
1533 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1534
1535 /*
1536 * Calculate size of buffer needed for 32-bit snapshot,
1537 * rounding up size of each object to allow for alignment
1538 * of next object in buffer.
1539 */
1540 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1541 sizeof (caddr32_t));
1542 info_size =
1543 P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1544 sizeof (processorid_t));
1545 cpuids_size =
1546 P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1547 sizeof (ulong_t));
1548
1549 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1550 bitmasks_size = (((2 + LGRP_RSRC_COUNT) * snap_nlgrpsmax) +
1551 1) * bitmask_size;
1552
1553
1554 /*
1555 * Size of latency table and buffer
1556 */
1557 lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1558 (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1559
1560 bufsize = snap_hdr_size + info_size + cpuids_size +
1561 bitmasks_size + lats_size;
1562 return (bufsize);
1563 }
1564 #endif /* _SYSCALL32_IMPL */
1565
1566 return (lgrp_snap->ss_size);
1567 }
1568
1569
1570 /*
1571 * Copy snapshot into given user buffer, fix up any pointers in buffer to point
1572 * into user instead of kernel address space, and return size of buffer
1573 * needed to hold snapshot
1574 */
1575 static int
lgrp_snapshot_copy(char * buf,size_t bufsize)1576 lgrp_snapshot_copy(char *buf, size_t bufsize)
1577 {
1578 size_t bitmask_size;
1579 int cpu_index;
1580 size_t cpuids_size;
1581 int i;
1582 size_t info_size;
1583 lgrp_info_t *lgrp_info;
1584 int retval;
1585 size_t snap_hdr_size;
1586 int snap_ncpus;
1587 int snap_nlgrpsmax;
1588 lgrp_snapshot_header_t *user_snap;
1589 lgrp_info_t *user_info;
1590 lgrp_info_t *user_info_buffer;
1591 processorid_t *user_cpuids;
1592 ulong_t *user_lgrpset;
1593 ulong_t *user_parents;
1594 ulong_t *user_children;
1595 int **user_lats;
1596 int **user_lats_buffer;
1597 ulong_t *user_rsets;
1598
1599 if (lgrp_snap == NULL)
1600 return (0);
1601
1602 if (buf == NULL || bufsize <= 0)
1603 return (lgrp_snap->ss_size);
1604
1605 /*
1606 * User needs to try getting size of buffer again
1607 * because given buffer size is too small.
1608 * The lgroup hierarchy may have changed after they asked for the size
1609 * but before the snapshot was taken.
1610 */
1611 if (bufsize < lgrp_snap->ss_size)
1612 return (set_errno(EAGAIN));
1613
1614 snap_ncpus = lgrp_snap->ss_ncpus;
1615 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1616
1617 /*
1618 * Fill in lgrpset now because caller may have change psets
1619 */
1620 kpreempt_disable();
1621 for (i = 0; i < snap_nlgrpsmax; i++) {
1622 if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1623 i)) {
1624 BT_SET(lgrp_snap->ss_lgrpset, i);
1625 }
1626 }
1627 kpreempt_enable();
1628
1629 /*
1630 * Copy lgroup snapshot (snapshot header, lgroup info, and CPU IDs)
1631 * into user buffer all at once
1632 */
1633 if (copyout(lgrp_snap, buf, lgrp_snap->ss_size) != 0)
1634 return (set_errno(EFAULT));
1635
1636 /*
1637 * Round up sizes of lgroup snapshot header and info for alignment
1638 */
1639 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1640 sizeof (void *));
1641 info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1642 sizeof (processorid_t));
1643 cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1644 sizeof (ulong_t));
1645
1646 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1647
1648 /*
1649 * Calculate pointers into user buffer for lgroup snapshot header,
1650 * info, and CPU IDs
1651 */
1652 user_snap = (lgrp_snapshot_header_t *)buf;
1653 user_info = (lgrp_info_t *)((uintptr_t)user_snap + snap_hdr_size);
1654 user_cpuids = (processorid_t *)((uintptr_t)user_info + info_size);
1655 user_lgrpset = (ulong_t *)((uintptr_t)user_cpuids + cpuids_size);
1656 user_parents = (ulong_t *)((uintptr_t)user_lgrpset + bitmask_size);
1657 user_children = (ulong_t *)((uintptr_t)user_parents +
1658 (snap_nlgrpsmax * bitmask_size));
1659 user_rsets = (ulong_t *)((uintptr_t)user_children +
1660 (snap_nlgrpsmax * bitmask_size));
1661 user_lats = (int **)((uintptr_t)user_rsets +
1662 (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size));
1663
1664 /*
1665 * Copyout magic number (ie. pointer to beginning of buffer)
1666 */
1667 if (copyout(&buf, &user_snap->ss_magic, sizeof (buf)) != 0)
1668 return (set_errno(EFAULT));
1669
1670 /*
1671 * Fix up pointers in user buffer to point into user buffer
1672 * not kernel snapshot
1673 */
1674 if (copyout(&user_info, &user_snap->ss_info, sizeof (user_info)) != 0)
1675 return (set_errno(EFAULT));
1676
1677 if (copyout(&user_cpuids, &user_snap->ss_cpuids,
1678 sizeof (user_cpuids)) != 0)
1679 return (set_errno(EFAULT));
1680
1681 if (copyout(&user_lgrpset, &user_snap->ss_lgrpset,
1682 sizeof (user_lgrpset)) != 0)
1683 return (set_errno(EFAULT));
1684
1685 if (copyout(&user_parents, &user_snap->ss_parents,
1686 sizeof (user_parents)) != 0)
1687 return (set_errno(EFAULT));
1688
1689 if (copyout(&user_children, &user_snap->ss_children,
1690 sizeof (user_children)) != 0)
1691 return (set_errno(EFAULT));
1692
1693 if (copyout(&user_rsets, &user_snap->ss_rsets,
1694 sizeof (user_rsets)) != 0)
1695 return (set_errno(EFAULT));
1696
1697 if (copyout(&user_lats, &user_snap->ss_latencies,
1698 sizeof (user_lats)) != 0)
1699 return (set_errno(EFAULT));
1700
1701 /*
1702 * Make copies of lgroup info and latency table, fix up pointers,
1703 * and then copy them into user buffer
1704 */
1705 user_info_buffer = kmem_zalloc(info_size, KM_NOSLEEP);
1706 if (user_info_buffer == NULL)
1707 return (set_errno(ENOMEM));
1708
1709 user_lats_buffer = kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1710 KM_NOSLEEP);
1711 if (user_lats_buffer == NULL) {
1712 kmem_free(user_info_buffer, info_size);
1713 return (set_errno(ENOMEM));
1714 }
1715
1716 lgrp_info = (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1717 bcopy(lgrp_info, user_info_buffer, info_size);
1718
1719 cpu_index = 0;
1720 for (i = 0; i < snap_nlgrpsmax; i++) {
1721 ulong_t *snap_rset;
1722
1723 /*
1724 * Skip non-existent lgroups
1725 */
1726 if (user_info_buffer[i].info_lgrpid == LGRP_NONE)
1727 continue;
1728
1729 /*
1730 * Update free memory size since it changes frequently
1731 * Only do so for lgroups directly containing memory
1732 *
1733 * NOTE: This must be done before changing the pointers to
1734 * point into user space since we need to dereference
1735 * lgroup resource set
1736 */
1737 snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
1738 BT_BITOUL(snap_nlgrpsmax)];
1739 if (BT_TEST(snap_rset, i))
1740 user_info_buffer[i].info_mem_free =
1741 lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1742
1743 /*
1744 * Fix up pointers to parents, children, resources, and
1745 * latencies
1746 */
1747 user_info_buffer[i].info_parents =
1748 (ulong_t *)((uintptr_t)user_parents + (i * bitmask_size));
1749 user_info_buffer[i].info_children =
1750 (ulong_t *)((uintptr_t)user_children + (i * bitmask_size));
1751 user_info_buffer[i].info_rset =
1752 (ulong_t *)((uintptr_t)user_rsets +
1753 (i * LGRP_RSRC_COUNT * bitmask_size));
1754 user_lats_buffer[i] = (int *)((uintptr_t)user_lats +
1755 (snap_nlgrpsmax * sizeof (int *)) + (i * snap_nlgrpsmax *
1756 sizeof (int)));
1757
1758 /*
1759 * Fix up pointer to CPU IDs
1760 */
1761 if (user_info_buffer[i].info_ncpus == 0) {
1762 user_info_buffer[i].info_cpuids = NULL;
1763 continue;
1764 }
1765 user_info_buffer[i].info_cpuids = &user_cpuids[cpu_index];
1766 cpu_index += user_info_buffer[i].info_ncpus;
1767 }
1768 ASSERT(cpu_index == snap_ncpus);
1769
1770 /*
1771 * Copy lgroup info and latency table with pointers fixed up to point
1772 * into user buffer out to user buffer now
1773 */
1774 retval = lgrp_snap->ss_size;
1775 if (copyout(user_info_buffer, user_info, info_size) != 0)
1776 retval = set_errno(EFAULT);
1777 kmem_free(user_info_buffer, info_size);
1778
1779 if (copyout(user_lats_buffer, user_lats, snap_nlgrpsmax *
1780 sizeof (int *)) != 0)
1781 retval = set_errno(EFAULT);
1782 kmem_free(user_lats_buffer, snap_nlgrpsmax * sizeof (int *));
1783
1784 return (retval);
1785 }
1786
1787
1788 #ifdef _SYSCALL32_IMPL
1789 /*
1790 * Make 32-bit copy of snapshot, fix up any pointers in buffer to point
1791 * into user instead of kernel address space, copy 32-bit snapshot into
1792 * given user buffer, and return size of buffer needed to hold snapshot
1793 */
1794 static int
lgrp_snapshot_copy32(caddr32_t buf,size32_t bufsize)1795 lgrp_snapshot_copy32(caddr32_t buf, size32_t bufsize)
1796 {
1797 size32_t bitmask_size;
1798 size32_t bitmasks_size;
1799 size32_t children_size;
1800 int cpu_index;
1801 size32_t cpuids_size;
1802 int i;
1803 int j;
1804 size32_t info_size;
1805 size32_t lats_size;
1806 lgrp_info_t *lgrp_info;
1807 lgrp_snapshot_header32_t *lgrp_snap32;
1808 lgrp_info32_t *lgrp_info32;
1809 processorid_t *lgrp_cpuids32;
1810 caddr32_t *lgrp_lats32;
1811 int **lgrp_lats32_kernel;
1812 uint_t *lgrp_set32;
1813 uint_t *lgrp_parents32;
1814 uint_t *lgrp_children32;
1815 uint_t *lgrp_rsets32;
1816 size32_t parents_size;
1817 size32_t rsets_size;
1818 size32_t set_size;
1819 size32_t snap_hdr_size;
1820 int snap_ncpus;
1821 int snap_nlgrpsmax;
1822 size32_t snap_size;
1823
1824 if (lgrp_snap == NULL)
1825 return (0);
1826
1827 snap_ncpus = lgrp_snap->ss_ncpus;
1828 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1829
1830 /*
1831 * Calculate size of buffer needed for 32-bit snapshot,
1832 * rounding up size of each object to allow for alignment
1833 * of next object in buffer.
1834 */
1835 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1836 sizeof (caddr32_t));
1837 info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1838 sizeof (processorid_t));
1839 cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1840 sizeof (ulong_t));
1841
1842 bitmask_size = BT_SIZEOFMAP32(snap_nlgrpsmax);
1843
1844 set_size = bitmask_size;
1845 parents_size = snap_nlgrpsmax * bitmask_size;
1846 children_size = snap_nlgrpsmax * bitmask_size;
1847 rsets_size = P2ROUNDUP(LGRP_RSRC_COUNT * snap_nlgrpsmax *
1848 (int)bitmask_size, sizeof (caddr32_t));
1849
1850 bitmasks_size = set_size + parents_size + children_size + rsets_size;
1851
1852 /*
1853 * Size of latency table and buffer
1854 */
1855 lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1856 (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1857
1858 snap_size = snap_hdr_size + info_size + cpuids_size + bitmasks_size +
1859 lats_size;
1860
1861 if (buf == 0 || bufsize <= 0) {
1862 return (snap_size);
1863 }
1864
1865 /*
1866 * User needs to try getting size of buffer again
1867 * because given buffer size is too small.
1868 * The lgroup hierarchy may have changed after they asked for the size
1869 * but before the snapshot was taken.
1870 */
1871 if (bufsize < snap_size)
1872 return (set_errno(EAGAIN));
1873
1874 /*
1875 * Make 32-bit copy of snapshot, fix up pointers to point into user
1876 * buffer not kernel, and then copy whole thing into user buffer
1877 */
1878 lgrp_snap32 = kmem_zalloc(snap_size, KM_NOSLEEP);
1879 if (lgrp_snap32 == NULL)
1880 return (set_errno(ENOMEM));
1881
1882 /*
1883 * Calculate pointers into 32-bit copy of snapshot
1884 * for lgroup info, CPU IDs, pset lgroup bitmask, parents, children,
1885 * resources, and latency table and buffer
1886 */
1887 lgrp_info32 = (lgrp_info32_t *)((uintptr_t)lgrp_snap32 +
1888 snap_hdr_size);
1889 lgrp_cpuids32 = (processorid_t *)((uintptr_t)lgrp_info32 + info_size);
1890 lgrp_set32 = (uint_t *)((uintptr_t)lgrp_cpuids32 + cpuids_size);
1891 lgrp_parents32 = (uint_t *)((uintptr_t)lgrp_set32 + set_size);
1892 lgrp_children32 = (uint_t *)((uintptr_t)lgrp_parents32 + parents_size);
1893 lgrp_rsets32 = (uint_t *)((uintptr_t)lgrp_children32 + children_size);
1894 lgrp_lats32 = (caddr32_t *)((uintptr_t)lgrp_rsets32 + rsets_size);
1895
1896 /*
1897 * Make temporary lgroup latency table of pointers for kernel to use
1898 * to fill in rows of table with latencies from each lgroup
1899 */
1900 lgrp_lats32_kernel = kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1901 KM_NOSLEEP);
1902 if (lgrp_lats32_kernel == NULL) {
1903 kmem_free(lgrp_snap32, snap_size);
1904 return (set_errno(ENOMEM));
1905 }
1906
1907 /*
1908 * Fill in 32-bit lgroup snapshot header
1909 * (with pointers into user's buffer for lgroup info, CPU IDs,
1910 * bit masks, and latencies)
1911 */
1912 lgrp_snap32->ss_version = lgrp_snap->ss_version;
1913 lgrp_snap32->ss_levels = lgrp_snap->ss_levels;
1914 lgrp_snap32->ss_nlgrps = lgrp_snap32->ss_nlgrps_os =
1915 lgrp_snap->ss_nlgrps;
1916 lgrp_snap32->ss_nlgrps_max = snap_nlgrpsmax;
1917 lgrp_snap32->ss_root = lgrp_snap->ss_root;
1918 lgrp_snap32->ss_ncpus = lgrp_snap->ss_ncpus;
1919 lgrp_snap32->ss_gen = lgrp_snap->ss_gen;
1920 lgrp_snap32->ss_view = LGRP_VIEW_OS;
1921 lgrp_snap32->ss_size = snap_size;
1922 lgrp_snap32->ss_magic = buf;
1923 lgrp_snap32->ss_info = buf + snap_hdr_size;
1924 lgrp_snap32->ss_cpuids = lgrp_snap32->ss_info + info_size;
1925 lgrp_snap32->ss_lgrpset = lgrp_snap32->ss_cpuids + cpuids_size;
1926 lgrp_snap32->ss_parents = lgrp_snap32->ss_lgrpset + bitmask_size;
1927 lgrp_snap32->ss_children = lgrp_snap32->ss_parents +
1928 (snap_nlgrpsmax * bitmask_size);
1929 lgrp_snap32->ss_rsets = lgrp_snap32->ss_children +
1930 (snap_nlgrpsmax * bitmask_size);
1931 lgrp_snap32->ss_latencies = lgrp_snap32->ss_rsets +
1932 (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size);
1933
1934 /*
1935 * Fill in lgrpset now because caller may have change psets
1936 */
1937 kpreempt_disable();
1938 for (i = 0; i < snap_nlgrpsmax; i++) {
1939 if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1940 i)) {
1941 BT_SET32(lgrp_set32, i);
1942 }
1943 }
1944 kpreempt_enable();
1945
1946 /*
1947 * Fill in 32-bit copy of lgroup info and fix up pointers
1948 * to point into user's buffer instead of kernel's
1949 */
1950 cpu_index = 0;
1951 lgrp_info = lgrp_snap->ss_info;
1952 for (i = 0; i < snap_nlgrpsmax; i++) {
1953 uint_t *children;
1954 uint_t *lgrp_rset;
1955 uint_t *parents;
1956 ulong_t *snap_rset;
1957
1958 /*
1959 * Skip non-existent lgroups
1960 */
1961 if (lgrp_info[i].info_lgrpid == LGRP_NONE) {
1962 bzero(&lgrp_info32[i], sizeof (lgrp_info32[i]));
1963 lgrp_info32[i].info_lgrpid = LGRP_NONE;
1964 continue;
1965 }
1966
1967 /*
1968 * Fill in parents, children, lgroup resource set, and
1969 * latencies from snapshot
1970 */
1971 parents = (uint_t *)((uintptr_t)lgrp_parents32 +
1972 i * bitmask_size);
1973 children = (uint_t *)((uintptr_t)lgrp_children32 +
1974 i * bitmask_size);
1975 snap_rset = (ulong_t *)((uintptr_t)lgrp_snap->ss_rsets +
1976 (i * LGRP_RSRC_COUNT * BT_SIZEOFMAP(snap_nlgrpsmax)));
1977 lgrp_rset = (uint_t *)((uintptr_t)lgrp_rsets32 +
1978 (i * LGRP_RSRC_COUNT * bitmask_size));
1979 lgrp_lats32_kernel[i] = (int *)((uintptr_t)lgrp_lats32 +
1980 snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
1981 sizeof (int));
1982 for (j = 0; j < snap_nlgrpsmax; j++) {
1983 int k;
1984 uint_t *rset;
1985
1986 if (BT_TEST(&lgrp_snap->ss_parents[i], j))
1987 BT_SET32(parents, j);
1988
1989 if (BT_TEST(&lgrp_snap->ss_children[i], j))
1990 BT_SET32(children, j);
1991
1992 for (k = 0; k < LGRP_RSRC_COUNT; k++) {
1993 rset = (uint_t *)((uintptr_t)lgrp_rset +
1994 k * bitmask_size);
1995 if (BT_TEST(&snap_rset[k], j))
1996 BT_SET32(rset, j);
1997 }
1998
1999 lgrp_lats32_kernel[i][j] =
2000 lgrp_snap->ss_latencies[i][j];
2001 }
2002
2003 /*
2004 * Fix up pointer to latency buffer
2005 */
2006 lgrp_lats32[i] = lgrp_snap32->ss_latencies +
2007 snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
2008 sizeof (int);
2009
2010 /*
2011 * Fix up pointers for parents, children, and resources
2012 */
2013 lgrp_info32[i].info_parents = lgrp_snap32->ss_parents +
2014 (i * bitmask_size);
2015 lgrp_info32[i].info_children = lgrp_snap32->ss_children +
2016 (i * bitmask_size);
2017 lgrp_info32[i].info_rset = lgrp_snap32->ss_rsets +
2018 (i * LGRP_RSRC_COUNT * bitmask_size);
2019
2020 /*
2021 * Fill in memory and CPU info
2022 * Only fill in memory for lgroups directly containing memory
2023 */
2024 snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
2025 BT_BITOUL(snap_nlgrpsmax)];
2026 if (BT_TEST(snap_rset, i)) {
2027 lgrp_info32[i].info_mem_free = lgrp_mem_size(i,
2028 LGRP_MEM_SIZE_FREE);
2029 lgrp_info32[i].info_mem_install =
2030 lgrp_info[i].info_mem_install;
2031 }
2032
2033 lgrp_info32[i].info_ncpus = lgrp_info[i].info_ncpus;
2034
2035 lgrp_info32[i].info_lgrpid = lgrp_info[i].info_lgrpid;
2036 lgrp_info32[i].info_latency = lgrp_info[i].info_latency;
2037
2038 if (lgrp_info32[i].info_ncpus == 0) {
2039 lgrp_info32[i].info_cpuids = 0;
2040 continue;
2041 }
2042
2043 /*
2044 * Fix up pointer for CPU IDs
2045 */
2046 lgrp_info32[i].info_cpuids = lgrp_snap32->ss_cpuids +
2047 (cpu_index * sizeof (processorid_t));
2048 cpu_index += lgrp_info32[i].info_ncpus;
2049 }
2050 ASSERT(cpu_index == snap_ncpus);
2051
2052 /*
2053 * Copy lgroup CPU IDs into 32-bit snapshot
2054 * before copying it out into user's buffer
2055 */
2056 bcopy(lgrp_snap->ss_cpuids, lgrp_cpuids32, cpuids_size);
2057
2058 /*
2059 * Copy 32-bit lgroup snapshot into user's buffer all at once
2060 */
2061 if (copyout(lgrp_snap32, (void *)(uintptr_t)buf, snap_size) != 0) {
2062 kmem_free(lgrp_snap32, snap_size);
2063 kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2064 return (set_errno(EFAULT));
2065 }
2066
2067 kmem_free(lgrp_snap32, snap_size);
2068 kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2069
2070 return (snap_size);
2071 }
2072 #endif /* _SYSCALL32_IMPL */
2073
2074
2075 int
lgrpsys(int subcode,long ia,void * ap)2076 lgrpsys(int subcode, long ia, void *ap)
2077 {
2078 size_t bufsize;
2079 int latency;
2080
2081 switch (subcode) {
2082
2083 case LGRP_SYS_AFFINITY_GET:
2084 return (lgrp_affinity_get((lgrp_affinity_args_t *)ap));
2085
2086 case LGRP_SYS_AFFINITY_SET:
2087 return (lgrp_affinity_set((lgrp_affinity_args_t *)ap));
2088
2089 case LGRP_SYS_GENERATION:
2090 return (lgrp_generation(ia));
2091
2092 case LGRP_SYS_HOME:
2093 return (lgrp_home_get((idtype_t)ia, (id_t)(uintptr_t)ap));
2094
2095 case LGRP_SYS_LATENCY:
2096 mutex_enter(&cpu_lock);
2097 latency = lgrp_latency(ia, (lgrp_id_t)(uintptr_t)ap);
2098 mutex_exit(&cpu_lock);
2099 return (latency);
2100
2101 case LGRP_SYS_MEMINFO:
2102 return (meminfo(ia, (struct meminfo *)ap));
2103
2104 case LGRP_SYS_VERSION:
2105 return (lgrp_version(ia));
2106
2107 case LGRP_SYS_SNAPSHOT:
2108 mutex_enter(&lgrp_snap_lock);
2109 bufsize = lgrp_snapshot();
2110 if (ap && ia > 0) {
2111 if (get_udatamodel() == DATAMODEL_NATIVE)
2112 bufsize = lgrp_snapshot_copy(ap, ia);
2113 #ifdef _SYSCALL32_IMPL
2114 else
2115 bufsize = lgrp_snapshot_copy32(
2116 (caddr32_t)(uintptr_t)ap, ia);
2117 #endif /* _SYSCALL32_IMPL */
2118 }
2119 mutex_exit(&lgrp_snap_lock);
2120 return (bufsize);
2121
2122 default:
2123 break;
2124
2125 }
2126
2127 return (set_errno(EINVAL));
2128 }
2129