1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 /*
30 * lgroup system calls
31 */
32
33 #include <sys/types.h>
34 #include <sys/errno.h>
35 #include <sys/sunddi.h>
36 #include <sys/systm.h>
37 #include <sys/mman.h>
38 #include <sys/cpupart.h>
39 #include <sys/lgrp.h>
40 #include <sys/lgrp_user.h>
41 #include <sys/promif.h> /* for prom_printf() */
42 #include <sys/sysmacros.h>
43
44 #include <vm/as.h>
45
46
47 /* definitions for mi_validity */
48 #define VALID_ADDR 1
49 #define VALID_REQ 2
50
51 /*
52 * run through the given number of addresses and requests and return the
53 * corresponding memory information for each address
54 */
55 static int
meminfo(int addr_count,struct meminfo * mip)56 meminfo(int addr_count, struct meminfo *mip)
57 {
58 size_t in_size, out_size, req_size, val_size;
59 struct as *as;
60 struct hat *hat;
61 int i, j, out_idx, info_count;
62 lgrp_t *lgrp;
63 pfn_t pfn;
64 ssize_t pgsz;
65 int *req_array, *val_array;
66 uint64_t *in_array, *out_array;
67 uint64_t addr, paddr;
68 uintptr_t vaddr;
69 int ret = 0;
70 struct meminfo minfo;
71 #if defined(_SYSCALL32_IMPL)
72 struct meminfo32 minfo32;
73 #endif
74
75 /*
76 * Make sure that there is at least one address to translate and
77 * limit how many virtual addresses the kernel can do per call
78 */
79 if (addr_count < 1)
80 return (set_errno(EINVAL));
81 else if (addr_count > MAX_MEMINFO_CNT)
82 addr_count = MAX_MEMINFO_CNT;
83
84 if (get_udatamodel() == DATAMODEL_NATIVE) {
85 if (copyin(mip, &minfo, sizeof (struct meminfo)))
86 return (set_errno(EFAULT));
87 }
88 #if defined(_SYSCALL32_IMPL)
89 else {
90 bzero(&minfo, sizeof (minfo));
91 if (copyin(mip, &minfo32, sizeof (struct meminfo32)))
92 return (set_errno(EFAULT));
93 minfo.mi_inaddr = (const uint64_t *)(uintptr_t)
94 minfo32.mi_inaddr;
95 minfo.mi_info_req = (const uint_t *)(uintptr_t)
96 minfo32.mi_info_req;
97 minfo.mi_info_count = minfo32.mi_info_count;
98 minfo.mi_outdata = (uint64_t *)(uintptr_t)
99 minfo32.mi_outdata;
100 minfo.mi_validity = (uint_t *)(uintptr_t)
101 minfo32.mi_validity;
102 }
103 #endif
104 /*
105 * all the input parameters have been copied in:-
106 * addr_count - number of input addresses
107 * minfo.mi_inaddr - array of input addresses
108 * minfo.mi_info_req - array of types of information requested
109 * minfo.mi_info_count - no. of pieces of info requested for each addr
110 * minfo.mi_outdata - array into which the results are placed
111 * minfo.mi_validity - array containing bitwise result codes; 0th bit
112 * evaluates validity of corresponding input
113 * address, 1st bit validity of response to first
114 * member of info_req, etc.
115 */
116
117 /* make sure mi_info_count is within limit */
118 info_count = minfo.mi_info_count;
119 if (info_count < 1 || info_count > MAX_MEMINFO_REQ)
120 return (set_errno(EINVAL));
121
122 /*
123 * allocate buffer in_array for the input addresses and copy them in
124 */
125 in_size = sizeof (uint64_t) * addr_count;
126 in_array = kmem_alloc(in_size, KM_SLEEP);
127 if (copyin(minfo.mi_inaddr, in_array, in_size)) {
128 kmem_free(in_array, in_size);
129 return (set_errno(EFAULT));
130 }
131
132 /*
133 * allocate buffer req_array for the input info_reqs and copy them in
134 */
135 req_size = sizeof (uint_t) * info_count;
136 req_array = kmem_alloc(req_size, KM_SLEEP);
137 if (copyin(minfo.mi_info_req, req_array, req_size)) {
138 kmem_free(req_array, req_size);
139 kmem_free(in_array, in_size);
140 return (set_errno(EFAULT));
141 }
142
143 /*
144 * allocate buffer out_array which holds the results and will have
145 * to be copied out later
146 */
147 out_size = sizeof (uint64_t) * addr_count * info_count;
148 out_array = kmem_alloc(out_size, KM_SLEEP);
149
150 /*
151 * allocate buffer val_array which holds the validity bits and will
152 * have to be copied out later
153 */
154 val_size = sizeof (uint_t) * addr_count;
155 val_array = kmem_alloc(val_size, KM_SLEEP);
156
157 if ((req_array[0] & MEMINFO_MASK) == MEMINFO_PLGRP) {
158 /* find the corresponding lgroup for each physical address */
159 for (i = 0; i < addr_count; i++) {
160 paddr = in_array[i];
161 pfn = btop(paddr);
162 lgrp = lgrp_pfn_to_lgrp(pfn);
163 if (lgrp) {
164 out_array[i] = lgrp->lgrp_id;
165 val_array[i] = VALID_ADDR | VALID_REQ;
166 } else {
167 out_array[i] = NULL;
168 val_array[i] = 0;
169 }
170 }
171 } else {
172 /* get the corresponding memory info for each virtual address */
173 as = curproc->p_as;
174
175 AS_LOCK_ENTER(as, RW_READER);
176 hat = as->a_hat;
177 for (i = out_idx = 0; i < addr_count; i++, out_idx +=
178 info_count) {
179 addr = in_array[i];
180 vaddr = (uintptr_t)(addr & ~PAGEOFFSET);
181 if (!as_segat(as, (caddr_t)vaddr)) {
182 val_array[i] = 0;
183 continue;
184 }
185 val_array[i] = VALID_ADDR;
186 pfn = hat_getpfnum(hat, (caddr_t)vaddr);
187 if (pfn != PFN_INVALID) {
188 paddr = (uint64_t)((pfn << PAGESHIFT) |
189 (addr & PAGEOFFSET));
190 for (j = 0; j < info_count; j++) {
191 switch (req_array[j] & MEMINFO_MASK) {
192 case MEMINFO_VPHYSICAL:
193 /*
194 * return the physical address
195 * corresponding to the input
196 * virtual address
197 */
198 out_array[out_idx + j] = paddr;
199 val_array[i] |= VALID_REQ << j;
200 break;
201 case MEMINFO_VLGRP:
202 /*
203 * return the lgroup of physical
204 * page corresponding to the
205 * input virtual address
206 */
207 lgrp = lgrp_pfn_to_lgrp(pfn);
208 if (lgrp) {
209 out_array[out_idx + j] =
210 lgrp->lgrp_id;
211 val_array[i] |=
212 VALID_REQ << j;
213 }
214 break;
215 case MEMINFO_VPAGESIZE:
216 /*
217 * return the size of physical
218 * page corresponding to the
219 * input virtual address
220 */
221 pgsz = hat_getpagesize(hat,
222 (caddr_t)vaddr);
223 if (pgsz != -1) {
224 out_array[out_idx + j] =
225 pgsz;
226 val_array[i] |=
227 VALID_REQ << j;
228 }
229 break;
230 case MEMINFO_VREPLCNT:
231 /*
232 * for future use:-
233 * return the no. replicated
234 * physical pages corresponding
235 * to the input virtual address,
236 * so it is always 0 at the
237 * moment
238 */
239 out_array[out_idx + j] = 0;
240 val_array[i] |= VALID_REQ << j;
241 break;
242 case MEMINFO_VREPL:
243 /*
244 * for future use:-
245 * return the nth physical
246 * replica of the specified
247 * virtual address
248 */
249 break;
250 case MEMINFO_VREPL_LGRP:
251 /*
252 * for future use:-
253 * return the lgroup of nth
254 * physical replica of the
255 * specified virtual address
256 */
257 break;
258 case MEMINFO_PLGRP:
259 /*
260 * this is for physical address
261 * only, shouldn't mix with
262 * virtual address
263 */
264 break;
265 default:
266 break;
267 }
268 }
269 }
270 }
271 AS_LOCK_EXIT(as);
272 }
273
274 /* copy out the results and validity bits and free the buffers */
275 if ((copyout(out_array, minfo.mi_outdata, out_size) != 0) ||
276 (copyout(val_array, minfo.mi_validity, val_size) != 0))
277 ret = set_errno(EFAULT);
278
279 kmem_free(in_array, in_size);
280 kmem_free(out_array, out_size);
281 kmem_free(req_array, req_size);
282 kmem_free(val_array, val_size);
283
284 return (ret);
285 }
286
287
288 /*
289 * Initialize lgroup affinities for thread
290 */
291 void
lgrp_affinity_init(lgrp_affinity_t ** bufaddr)292 lgrp_affinity_init(lgrp_affinity_t **bufaddr)
293 {
294 if (bufaddr)
295 *bufaddr = NULL;
296 }
297
298
299 /*
300 * Free lgroup affinities for thread and set to NULL
301 * just in case thread gets recycled
302 */
303 void
lgrp_affinity_free(lgrp_affinity_t ** bufaddr)304 lgrp_affinity_free(lgrp_affinity_t **bufaddr)
305 {
306 if (bufaddr && *bufaddr) {
307 kmem_free(*bufaddr, nlgrpsmax * sizeof (lgrp_affinity_t));
308 *bufaddr = NULL;
309 }
310 }
311
312
313 #define P_ANY -2 /* cookie specifying any ID */
314
315
316 /*
317 * Find LWP with given ID in specified process and get its affinity for
318 * specified lgroup
319 */
320 lgrp_affinity_t
lgrp_affinity_get_thread(proc_t * p,id_t lwpid,lgrp_id_t lgrp)321 lgrp_affinity_get_thread(proc_t *p, id_t lwpid, lgrp_id_t lgrp)
322 {
323 lgrp_affinity_t aff;
324 int found;
325 kthread_t *t;
326
327 ASSERT(MUTEX_HELD(&p->p_lock));
328
329 aff = LGRP_AFF_NONE;
330 found = 0;
331 t = p->p_tlist;
332 /*
333 * The process may be executing in proc_exit() and its p->p_list may be
334 * already NULL.
335 */
336 if (t == NULL)
337 return (set_errno(ESRCH));
338
339 do {
340 if (t->t_tid == lwpid || lwpid == P_ANY) {
341 thread_lock(t);
342 /*
343 * Check to see whether caller has permission to set
344 * affinity for LWP
345 */
346 if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
347 thread_unlock(t);
348 return (set_errno(EPERM));
349 }
350
351 if (t->t_lgrp_affinity)
352 aff = t->t_lgrp_affinity[lgrp];
353 thread_unlock(t);
354 found = 1;
355 break;
356 }
357 } while ((t = t->t_forw) != p->p_tlist);
358 if (!found)
359 aff = set_errno(ESRCH);
360
361 return (aff);
362 }
363
364
365 /*
366 * Get lgroup affinity for given LWP
367 */
368 lgrp_affinity_t
lgrp_affinity_get(lgrp_affinity_args_t * ap)369 lgrp_affinity_get(lgrp_affinity_args_t *ap)
370 {
371 lgrp_affinity_t aff;
372 lgrp_affinity_args_t args;
373 id_t id;
374 idtype_t idtype;
375 lgrp_id_t lgrp;
376 proc_t *p;
377 kthread_t *t;
378
379 /*
380 * Copyin arguments
381 */
382 if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
383 return (set_errno(EFAULT));
384
385 id = args.id;
386 idtype = args.idtype;
387 lgrp = args.lgrp;
388
389 /*
390 * Check for invalid lgroup
391 */
392 if (lgrp < 0 || lgrp == LGRP_NONE)
393 return (set_errno(EINVAL));
394
395 /*
396 * Check for existing lgroup
397 */
398 if (lgrp > lgrp_alloc_max)
399 return (set_errno(ESRCH));
400
401 /*
402 * Get lgroup affinity for given LWP or process
403 */
404 switch (idtype) {
405
406 case P_LWPID:
407 /*
408 * LWP in current process
409 */
410 p = curproc;
411 mutex_enter(&p->p_lock);
412 if (id != P_MYID) /* different thread */
413 aff = lgrp_affinity_get_thread(p, id, lgrp);
414 else { /* current thread */
415 aff = LGRP_AFF_NONE;
416 t = curthread;
417 thread_lock(t);
418 if (t->t_lgrp_affinity)
419 aff = t->t_lgrp_affinity[lgrp];
420 thread_unlock(t);
421 }
422 mutex_exit(&p->p_lock);
423 break;
424
425 case P_PID:
426 /*
427 * Process
428 */
429 mutex_enter(&pidlock);
430
431 if (id == P_MYID)
432 p = curproc;
433 else {
434 p = prfind(id);
435 if (p == NULL) {
436 mutex_exit(&pidlock);
437 return (set_errno(ESRCH));
438 }
439 }
440
441 mutex_enter(&p->p_lock);
442 aff = lgrp_affinity_get_thread(p, P_ANY, lgrp);
443 mutex_exit(&p->p_lock);
444
445 mutex_exit(&pidlock);
446 break;
447
448 default:
449 aff = set_errno(EINVAL);
450 break;
451 }
452
453 return (aff);
454 }
455
456
457 /*
458 * Find lgroup for which this thread has most affinity in specified partition
459 * starting from home lgroup unless specified starting lgroup is preferred
460 */
461 lpl_t *
lgrp_affinity_best(kthread_t * t,struct cpupart * cpupart,lgrp_id_t start,boolean_t prefer_start)462 lgrp_affinity_best(kthread_t *t, struct cpupart *cpupart, lgrp_id_t start,
463 boolean_t prefer_start)
464 {
465 lgrp_affinity_t *affs;
466 lgrp_affinity_t best_aff;
467 lpl_t *best_lpl;
468 lgrp_id_t finish;
469 lgrp_id_t home;
470 lgrp_id_t lgrpid;
471 lpl_t *lpl;
472
473 ASSERT(t != NULL);
474 ASSERT((MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0) ||
475 (MUTEX_HELD(&ttoproc(t)->p_lock) && THREAD_LOCK_HELD(t)));
476 ASSERT(cpupart != NULL);
477
478 if (t->t_lgrp_affinity == NULL)
479 return (NULL);
480
481 affs = t->t_lgrp_affinity;
482
483 /*
484 * Thread bound to CPU
485 */
486 if (t->t_bind_cpu != PBIND_NONE) {
487 cpu_t *cp;
488
489 /*
490 * Find which lpl has most affinity among leaf lpl directly
491 * containing CPU and its ancestor lpls
492 */
493 cp = cpu[t->t_bind_cpu];
494
495 best_lpl = lpl = cp->cpu_lpl;
496 best_aff = affs[best_lpl->lpl_lgrpid];
497 while (lpl->lpl_parent != NULL) {
498 lpl = lpl->lpl_parent;
499 lgrpid = lpl->lpl_lgrpid;
500 if (affs[lgrpid] > best_aff) {
501 best_lpl = lpl;
502 best_aff = affs[lgrpid];
503 }
504 }
505 return (best_lpl);
506 }
507
508 /*
509 * Start searching from home lgroup unless given starting lgroup is
510 * preferred or home lgroup isn't in given pset. Use root lgroup as
511 * starting point if both home and starting lgroups aren't in given
512 * pset.
513 */
514 ASSERT(start >= 0 && start <= lgrp_alloc_max);
515 home = t->t_lpl->lpl_lgrpid;
516 if (!prefer_start && LGRP_CPUS_IN_PART(home, cpupart))
517 lgrpid = home;
518 else if (start != LGRP_NONE && LGRP_CPUS_IN_PART(start, cpupart))
519 lgrpid = start;
520 else
521 lgrpid = LGRP_ROOTID;
522
523 best_lpl = &cpupart->cp_lgrploads[lgrpid];
524 best_aff = affs[lgrpid];
525 finish = lgrpid;
526 do {
527 /*
528 * Skip any lgroups that don't have CPU resources
529 * in this processor set.
530 */
531 if (!LGRP_CPUS_IN_PART(lgrpid, cpupart)) {
532 if (++lgrpid > lgrp_alloc_max)
533 lgrpid = 0; /* wrap the search */
534 continue;
535 }
536
537 /*
538 * Find lgroup with most affinity
539 */
540 lpl = &cpupart->cp_lgrploads[lgrpid];
541 if (affs[lgrpid] > best_aff) {
542 best_aff = affs[lgrpid];
543 best_lpl = lpl;
544 }
545
546 if (++lgrpid > lgrp_alloc_max)
547 lgrpid = 0; /* wrap the search */
548
549 } while (lgrpid != finish);
550
551 /*
552 * No lgroup (in this pset) with any affinity
553 */
554 if (best_aff == LGRP_AFF_NONE)
555 return (NULL);
556
557 lgrpid = best_lpl->lpl_lgrpid;
558 ASSERT(LGRP_CPUS_IN_PART(lgrpid, cpupart) && best_lpl->lpl_ncpu > 0);
559
560 return (best_lpl);
561 }
562
563
564 /*
565 * Set thread's affinity for given lgroup
566 */
567 int
lgrp_affinity_set_thread(kthread_t * t,lgrp_id_t lgrp,lgrp_affinity_t aff,lgrp_affinity_t ** aff_buf)568 lgrp_affinity_set_thread(kthread_t *t, lgrp_id_t lgrp, lgrp_affinity_t aff,
569 lgrp_affinity_t **aff_buf)
570 {
571 lgrp_affinity_t *affs;
572 lgrp_id_t best;
573 lpl_t *best_lpl;
574 lgrp_id_t home;
575 int retval;
576
577 ASSERT(t != NULL);
578 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
579
580 retval = 0;
581
582 thread_lock(t);
583
584 /*
585 * Check to see whether caller has permission to set affinity for
586 * thread
587 */
588 if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
589 thread_unlock(t);
590 return (set_errno(EPERM));
591 }
592
593 if (t->t_lgrp_affinity == NULL) {
594 if (aff == LGRP_AFF_NONE) {
595 thread_unlock(t);
596 return (0);
597 }
598 ASSERT(aff_buf != NULL && *aff_buf != NULL);
599 t->t_lgrp_affinity = *aff_buf;
600 *aff_buf = NULL;
601 }
602
603 affs = t->t_lgrp_affinity;
604 affs[lgrp] = aff;
605
606 /*
607 * Find lgroup for which thread has most affinity,
608 * starting with lgroup for which affinity being set
609 */
610 best_lpl = lgrp_affinity_best(t, t->t_cpupart, lgrp, B_TRUE);
611
612 /*
613 * Rehome if found lgroup with more affinity than home or lgroup for
614 * which affinity is being set has same affinity as home
615 */
616 home = t->t_lpl->lpl_lgrpid;
617 if (best_lpl != NULL && best_lpl != t->t_lpl) {
618 best = best_lpl->lpl_lgrpid;
619 if (affs[best] > affs[home] || (affs[best] == affs[home] &&
620 best == lgrp))
621 lgrp_move_thread(t, best_lpl, 1);
622 }
623
624 thread_unlock(t);
625
626 return (retval);
627 }
628
629
630 /*
631 * Set process' affinity for specified lgroup
632 */
633 int
lgrp_affinity_set_proc(proc_t * p,lgrp_id_t lgrp,lgrp_affinity_t aff,lgrp_affinity_t ** aff_buf_array)634 lgrp_affinity_set_proc(proc_t *p, lgrp_id_t lgrp, lgrp_affinity_t aff,
635 lgrp_affinity_t **aff_buf_array)
636 {
637 lgrp_affinity_t *buf;
638 int err = 0;
639 int i;
640 int retval;
641 kthread_t *t;
642
643 ASSERT(MUTEX_HELD(&pidlock) && MUTEX_HELD(&p->p_lock));
644 ASSERT(aff_buf_array != NULL);
645
646 i = 0;
647 t = p->p_tlist;
648 if (t != NULL) {
649 do {
650 /*
651 * Set lgroup affinity for thread
652 */
653 buf = aff_buf_array[i];
654 retval = lgrp_affinity_set_thread(t, lgrp, aff, &buf);
655
656 if (err == 0 && retval != 0)
657 err = retval;
658
659 /*
660 * Advance pointer to next buffer
661 */
662 if (buf == NULL) {
663 ASSERT(i < p->p_lwpcnt);
664 aff_buf_array[i] = NULL;
665 i++;
666 }
667
668 } while ((t = t->t_forw) != p->p_tlist);
669 }
670 return (err);
671 }
672
673
674 /*
675 * Set LWP's or process' affinity for specified lgroup
676 *
677 * When setting affinities, pidlock, process p_lock, and thread_lock()
678 * need to be held in that order to protect target thread's pset, process,
679 * process contents, and thread contents. thread_lock() does splhigh(),
680 * so it ends up having similiar effect as kpreempt_disable(), so it will
681 * protect calls to lgrp_move_thread() and lgrp_choose() from pset changes.
682 */
683 int
lgrp_affinity_set(lgrp_affinity_args_t * ap)684 lgrp_affinity_set(lgrp_affinity_args_t *ap)
685 {
686 lgrp_affinity_t aff;
687 lgrp_affinity_t *aff_buf;
688 lgrp_affinity_args_t args;
689 id_t id;
690 idtype_t idtype;
691 lgrp_id_t lgrp;
692 int nthreads;
693 proc_t *p;
694 int retval;
695
696 /*
697 * Copyin arguments
698 */
699 if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
700 return (set_errno(EFAULT));
701
702 idtype = args.idtype;
703 id = args.id;
704 lgrp = args.lgrp;
705 aff = args.aff;
706
707 /*
708 * Check for invalid lgroup
709 */
710 if (lgrp < 0 || lgrp == LGRP_NONE)
711 return (set_errno(EINVAL));
712
713 /*
714 * Check for existing lgroup
715 */
716 if (lgrp > lgrp_alloc_max)
717 return (set_errno(ESRCH));
718
719 /*
720 * Check for legal affinity
721 */
722 if (aff != LGRP_AFF_NONE && aff != LGRP_AFF_WEAK &&
723 aff != LGRP_AFF_STRONG)
724 return (set_errno(EINVAL));
725
726 /*
727 * Must be process or LWP ID
728 */
729 if (idtype != P_LWPID && idtype != P_PID)
730 return (set_errno(EINVAL));
731
732 /*
733 * Set given LWP's or process' affinity for specified lgroup
734 */
735 switch (idtype) {
736
737 case P_LWPID:
738 /*
739 * Allocate memory for thread's lgroup affinities
740 * ahead of time w/o holding locks
741 */
742 aff_buf = kmem_zalloc(nlgrpsmax * sizeof (lgrp_affinity_t),
743 KM_SLEEP);
744
745 p = curproc;
746
747 /*
748 * Set affinity for thread
749 */
750 mutex_enter(&p->p_lock);
751 if (id == P_MYID) { /* current thread */
752 retval = lgrp_affinity_set_thread(curthread, lgrp, aff,
753 &aff_buf);
754 } else if (p->p_tlist == NULL) {
755 retval = set_errno(ESRCH);
756 } else { /* other thread */
757 int found = 0;
758 kthread_t *t;
759
760 t = p->p_tlist;
761 do {
762 if (t->t_tid == id) {
763 retval = lgrp_affinity_set_thread(t,
764 lgrp, aff, &aff_buf);
765 found = 1;
766 break;
767 }
768 } while ((t = t->t_forw) != p->p_tlist);
769 if (!found)
770 retval = set_errno(ESRCH);
771 }
772 mutex_exit(&p->p_lock);
773
774 /*
775 * Free memory for lgroup affinities,
776 * since thread didn't need it
777 */
778 if (aff_buf)
779 kmem_free(aff_buf,
780 nlgrpsmax * sizeof (lgrp_affinity_t));
781
782 break;
783
784 case P_PID:
785
786 do {
787 lgrp_affinity_t **aff_buf_array;
788 int i;
789 size_t size;
790
791 /*
792 * Get process
793 */
794 mutex_enter(&pidlock);
795
796 if (id == P_MYID)
797 p = curproc;
798 else
799 p = prfind(id);
800
801 if (p == NULL) {
802 mutex_exit(&pidlock);
803 return (set_errno(ESRCH));
804 }
805
806 /*
807 * Get number of threads in process
808 *
809 * NOTE: Only care about user processes,
810 * so p_lwpcnt should be number of threads.
811 */
812 mutex_enter(&p->p_lock);
813 nthreads = p->p_lwpcnt;
814 mutex_exit(&p->p_lock);
815
816 mutex_exit(&pidlock);
817
818 if (nthreads < 1)
819 return (set_errno(ESRCH));
820
821 /*
822 * Preallocate memory for lgroup affinities for
823 * each thread in process now to avoid holding
824 * any locks. Allocate an array to hold a buffer
825 * for each thread.
826 */
827 aff_buf_array = kmem_zalloc(nthreads *
828 sizeof (lgrp_affinity_t *), KM_SLEEP);
829
830 size = nlgrpsmax * sizeof (lgrp_affinity_t);
831 for (i = 0; i < nthreads; i++)
832 aff_buf_array[i] = kmem_zalloc(size, KM_SLEEP);
833
834 mutex_enter(&pidlock);
835
836 /*
837 * Get process again since dropped locks to allocate
838 * memory (except current process)
839 */
840 if (id != P_MYID)
841 p = prfind(id);
842
843 /*
844 * Process went away after we dropped locks and before
845 * reacquiring them, so drop locks, free memory, and
846 * return.
847 */
848 if (p == NULL) {
849 mutex_exit(&pidlock);
850 for (i = 0; i < nthreads; i++)
851 kmem_free(aff_buf_array[i], size);
852 kmem_free(aff_buf_array,
853 nthreads * sizeof (lgrp_affinity_t *));
854 return (set_errno(ESRCH));
855 }
856
857 mutex_enter(&p->p_lock);
858
859 /*
860 * See whether number of threads is same
861 * If not, drop locks, free memory, and try again
862 */
863 if (nthreads != p->p_lwpcnt) {
864 mutex_exit(&p->p_lock);
865 mutex_exit(&pidlock);
866 for (i = 0; i < nthreads; i++)
867 kmem_free(aff_buf_array[i], size);
868 kmem_free(aff_buf_array,
869 nthreads * sizeof (lgrp_affinity_t *));
870 continue;
871 }
872
873 /*
874 * Set lgroup affinity for threads in process
875 */
876 retval = lgrp_affinity_set_proc(p, lgrp, aff,
877 aff_buf_array);
878
879 mutex_exit(&p->p_lock);
880 mutex_exit(&pidlock);
881
882 /*
883 * Free any leftover memory, since some threads may
884 * have already allocated memory and set lgroup
885 * affinities before
886 */
887 for (i = 0; i < nthreads; i++)
888 if (aff_buf_array[i] != NULL)
889 kmem_free(aff_buf_array[i], size);
890 kmem_free(aff_buf_array,
891 nthreads * sizeof (lgrp_affinity_t *));
892
893 break;
894
895 } while (nthreads != p->p_lwpcnt);
896
897 break;
898
899 default:
900 retval = set_errno(EINVAL);
901 break;
902 }
903
904 return (retval);
905 }
906
907
908 /*
909 * Return the latest generation number for the lgroup hierarchy
910 * with the given view
911 */
912 lgrp_gen_t
lgrp_generation(lgrp_view_t view)913 lgrp_generation(lgrp_view_t view)
914 {
915 cpupart_t *cpupart;
916 uint_t gen;
917
918 kpreempt_disable();
919
920 /*
921 * Determine generation number for given view
922 */
923 if (view == LGRP_VIEW_OS)
924 /*
925 * Return generation number of lgroup hierarchy for OS view
926 */
927 gen = lgrp_gen;
928 else {
929 /*
930 * For caller's view, use generation numbers for lgroup
931 * hierarchy and caller's pset
932 * NOTE: Caller needs to check for change in pset ID
933 */
934 cpupart = curthread->t_cpupart;
935 ASSERT(cpupart);
936 gen = lgrp_gen + cpupart->cp_gen;
937 }
938
939 kpreempt_enable();
940
941 return (gen);
942 }
943
944
945 lgrp_id_t
lgrp_home_thread(kthread_t * t)946 lgrp_home_thread(kthread_t *t)
947 {
948 lgrp_id_t home;
949
950 ASSERT(t != NULL);
951 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
952
953 thread_lock(t);
954
955 /*
956 * Check to see whether caller has permission to set affinity for
957 * thread
958 */
959 if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
960 thread_unlock(t);
961 return (set_errno(EPERM));
962 }
963
964 home = lgrp_home_id(t);
965
966 thread_unlock(t);
967 return (home);
968 }
969
970
971 /*
972 * Get home lgroup of given process or thread
973 */
974 lgrp_id_t
lgrp_home_get(idtype_t idtype,id_t id)975 lgrp_home_get(idtype_t idtype, id_t id)
976 {
977 proc_t *p;
978 lgrp_id_t retval;
979 kthread_t *t;
980
981 /*
982 * Get home lgroup of given LWP or process
983 */
984 switch (idtype) {
985
986 case P_LWPID:
987 p = curproc;
988
989 /*
990 * Set affinity for thread
991 */
992 mutex_enter(&p->p_lock);
993 if (id == P_MYID) { /* current thread */
994 retval = lgrp_home_thread(curthread);
995 } else if (p->p_tlist == NULL) {
996 retval = set_errno(ESRCH);
997 } else { /* other thread */
998 int found = 0;
999
1000 t = p->p_tlist;
1001 do {
1002 if (t->t_tid == id) {
1003 retval = lgrp_home_thread(t);
1004 found = 1;
1005 break;
1006 }
1007 } while ((t = t->t_forw) != p->p_tlist);
1008 if (!found)
1009 retval = set_errno(ESRCH);
1010 }
1011 mutex_exit(&p->p_lock);
1012 break;
1013
1014 case P_PID:
1015 /*
1016 * Get process
1017 */
1018 mutex_enter(&pidlock);
1019
1020 if (id == P_MYID)
1021 p = curproc;
1022 else
1023 p = prfind(id);
1024
1025 if (p == NULL) {
1026 mutex_exit(&pidlock);
1027 return (set_errno(ESRCH));
1028 }
1029
1030 mutex_enter(&p->p_lock);
1031 t = p->p_tlist;
1032 if (t == NULL)
1033 retval = set_errno(ESRCH);
1034 else
1035 retval = lgrp_home_thread(t);
1036 mutex_exit(&p->p_lock);
1037
1038 mutex_exit(&pidlock);
1039
1040 break;
1041
1042 default:
1043 retval = set_errno(EINVAL);
1044 break;
1045 }
1046
1047 return (retval);
1048 }
1049
1050
1051 /*
1052 * Return latency between "from" and "to" lgroups
1053 *
1054 * This latency number can only be used for relative comparison
1055 * between lgroups on the running system, cannot be used across platforms,
1056 * and may not reflect the actual latency. It is platform and implementation
1057 * specific, so platform gets to decide its value. It would be nice if the
1058 * number was at least proportional to make comparisons more meaningful though.
1059 */
1060 int
lgrp_latency(lgrp_id_t from,lgrp_id_t to)1061 lgrp_latency(lgrp_id_t from, lgrp_id_t to)
1062 {
1063 lgrp_t *from_lgrp;
1064 int i;
1065 int latency;
1066 int latency_max;
1067 lgrp_t *to_lgrp;
1068
1069 ASSERT(MUTEX_HELD(&cpu_lock));
1070
1071 if (from < 0 || to < 0)
1072 return (set_errno(EINVAL));
1073
1074 if (from > lgrp_alloc_max || to > lgrp_alloc_max)
1075 return (set_errno(ESRCH));
1076
1077 from_lgrp = lgrp_table[from];
1078 to_lgrp = lgrp_table[to];
1079
1080 if (!LGRP_EXISTS(from_lgrp) || !LGRP_EXISTS(to_lgrp)) {
1081 return (set_errno(ESRCH));
1082 }
1083
1084 /*
1085 * Get latency for same lgroup
1086 */
1087 if (from == to) {
1088 latency = from_lgrp->lgrp_latency;
1089 return (latency);
1090 }
1091
1092 /*
1093 * Get latency between leaf lgroups
1094 */
1095 if (from_lgrp->lgrp_childcnt == 0 && to_lgrp->lgrp_childcnt == 0)
1096 return (lgrp_plat_latency(from_lgrp->lgrp_plathand,
1097 to_lgrp->lgrp_plathand));
1098
1099 /*
1100 * Determine max latency between resources in two lgroups
1101 */
1102 latency_max = 0;
1103 for (i = 0; i <= lgrp_alloc_max; i++) {
1104 lgrp_t *from_rsrc;
1105 int j;
1106 lgrp_t *to_rsrc;
1107
1108 from_rsrc = lgrp_table[i];
1109 if (!LGRP_EXISTS(from_rsrc) ||
1110 !klgrpset_ismember(from_lgrp->lgrp_set[LGRP_RSRC_CPU], i))
1111 continue;
1112
1113 for (j = 0; j <= lgrp_alloc_max; j++) {
1114 to_rsrc = lgrp_table[j];
1115 if (!LGRP_EXISTS(to_rsrc) ||
1116 klgrpset_ismember(to_lgrp->lgrp_set[LGRP_RSRC_MEM],
1117 j) == 0)
1118 continue;
1119 latency = lgrp_plat_latency(from_rsrc->lgrp_plathand,
1120 to_rsrc->lgrp_plathand);
1121 if (latency > latency_max)
1122 latency_max = latency;
1123 }
1124 }
1125 return (latency_max);
1126 }
1127
1128
1129 /*
1130 * Return lgroup interface version number
1131 * 0 - none
1132 * 1 - original
1133 * 2 - lgrp_latency_cookie() and lgrp_resources() added
1134 */
1135 int
lgrp_version(int version)1136 lgrp_version(int version)
1137 {
1138 /*
1139 * Return LGRP_VER_NONE when requested version isn't supported
1140 */
1141 if (version < LGRP_VER_NONE || version > LGRP_VER_CURRENT)
1142 return (LGRP_VER_NONE);
1143
1144 /*
1145 * Return current version when LGRP_VER_NONE passed in
1146 */
1147 if (version == LGRP_VER_NONE)
1148 return (LGRP_VER_CURRENT);
1149
1150 /*
1151 * Otherwise, return supported version.
1152 */
1153 return (version);
1154 }
1155
1156
1157 /*
1158 * Snapshot of lgroup hieararchy
1159 *
1160 * One snapshot is kept and is based on the kernel's native data model, so
1161 * a 32-bit snapshot is kept for the 32-bit kernel and a 64-bit one for the
1162 * 64-bit kernel. If a 32-bit user wants a snapshot from the 64-bit kernel,
1163 * the kernel generates a 32-bit snapshot from the data in its 64-bit snapshot.
1164 *
1165 * The format is defined by lgroup snapshot header and the layout of
1166 * the snapshot in memory is as follows:
1167 * 1) lgroup snapshot header
1168 * - specifies format of snapshot
1169 * - defined by lgrp_snapshot_header_t
1170 * 2) lgroup info array
1171 * - contains information about each lgroup
1172 * - one element for each lgroup
1173 * - each element is defined by lgrp_info_t
1174 * 3) lgroup CPU ID array
1175 * - contains list (array) of CPU IDs for each lgroup
1176 * - lgrp_info_t points into array and specifies how many CPUs belong to
1177 * given lgroup
1178 * 4) lgroup parents array
1179 * - contains lgroup bitmask of parents for each lgroup
1180 * - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1181 * 5) lgroup children array
1182 * - contains lgroup bitmask of children for each lgroup
1183 * - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1184 * 6) lgroup resources array
1185 * - contains lgroup bitmask of resources for each lgroup
1186 * - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1187 * 7) lgroup latency table
1188 * - contains latency from each lgroup to each of other lgroups
1189 *
1190 * NOTE: Must use nlgrpsmax for per lgroup data structures because lgroups
1191 * may be sparsely allocated.
1192 */
1193 lgrp_snapshot_header_t *lgrp_snap = NULL; /* lgroup snapshot */
1194 static kmutex_t lgrp_snap_lock; /* snapshot lock */
1195
1196
1197 /*
1198 * Take a snapshot of lgroup hierarchy and return size of buffer
1199 * needed to hold snapshot
1200 */
1201 static int
lgrp_snapshot(void)1202 lgrp_snapshot(void)
1203 {
1204 size_t bitmask_size;
1205 size_t bitmasks_size;
1206 size_t bufsize;
1207 int cpu_index;
1208 size_t cpuids_size;
1209 int i;
1210 int j;
1211 size_t info_size;
1212 size_t lats_size;
1213 ulong_t *lgrp_children;
1214 processorid_t *lgrp_cpuids;
1215 lgrp_info_t *lgrp_info;
1216 int **lgrp_lats;
1217 ulong_t *lgrp_parents;
1218 ulong_t *lgrp_rsets;
1219 ulong_t *lgrpset;
1220 int snap_ncpus;
1221 int snap_nlgrps;
1222 int snap_nlgrpsmax;
1223 size_t snap_hdr_size;
1224 #ifdef _SYSCALL32_IMPL
1225 model_t model = DATAMODEL_NATIVE;
1226
1227 /*
1228 * Have up-to-date snapshot, so check to see whether caller is 32-bit
1229 * program and need to return size of 32-bit snapshot now.
1230 */
1231 model = get_udatamodel();
1232 if (model == DATAMODEL_ILP32 && lgrp_snap &&
1233 lgrp_snap->ss_gen == lgrp_gen) {
1234
1235 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1236
1237 /*
1238 * Calculate size of buffer needed for 32-bit snapshot,
1239 * rounding up size of each object to allow for alignment
1240 * of next object in buffer.
1241 */
1242 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1243 sizeof (caddr32_t));
1244 info_size =
1245 P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1246 sizeof (processorid_t));
1247 cpuids_size =
1248 P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1249 sizeof (ulong_t));
1250
1251 /*
1252 * lgroup bitmasks needed for parents, children, and resources
1253 * for each lgroup and pset lgroup set
1254 */
1255 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1256 bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1257 snap_nlgrpsmax) + 1) * bitmask_size;
1258
1259 /*
1260 * Size of latency table and buffer
1261 */
1262 lats_size = snap_nlgrpsmax * sizeof (caddr32_t) +
1263 snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1264
1265 bufsize = snap_hdr_size + info_size + cpuids_size +
1266 bitmasks_size + lats_size;
1267 return (bufsize);
1268 }
1269 #endif /* _SYSCALL32_IMPL */
1270
1271 /*
1272 * Check whether snapshot is up-to-date
1273 * Free it and take another one if not
1274 */
1275 if (lgrp_snap) {
1276 if (lgrp_snap->ss_gen == lgrp_gen)
1277 return (lgrp_snap->ss_size);
1278
1279 kmem_free(lgrp_snap, lgrp_snap->ss_size);
1280 lgrp_snap = NULL;
1281 }
1282
1283 /*
1284 * Allocate memory for snapshot
1285 * w/o holding cpu_lock while waiting for memory
1286 */
1287 while (lgrp_snap == NULL) {
1288 int old_generation;
1289
1290 /*
1291 * Take snapshot of lgroup generation number
1292 * and configuration size dependent information
1293 * NOTE: Only count number of online CPUs,
1294 * since only online CPUs appear in lgroups.
1295 */
1296 mutex_enter(&cpu_lock);
1297 old_generation = lgrp_gen;
1298 snap_ncpus = ncpus_online;
1299 snap_nlgrps = nlgrps;
1300 snap_nlgrpsmax = nlgrpsmax;
1301 mutex_exit(&cpu_lock);
1302
1303 /*
1304 * Calculate size of buffer needed for snapshot,
1305 * rounding up size of each object to allow for alignment
1306 * of next object in buffer.
1307 */
1308 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1309 sizeof (void *));
1310 info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1311 sizeof (processorid_t));
1312 cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1313 sizeof (ulong_t));
1314 /*
1315 * lgroup bitmasks needed for pset lgroup set and parents,
1316 * children, and resource sets for each lgroup
1317 */
1318 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1319 bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1320 snap_nlgrpsmax) + 1) * bitmask_size;
1321
1322 /*
1323 * Size of latency table and buffer
1324 */
1325 lats_size = snap_nlgrpsmax * sizeof (int *) +
1326 snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1327
1328 bufsize = snap_hdr_size + info_size + cpuids_size +
1329 bitmasks_size + lats_size;
1330
1331 /*
1332 * Allocate memory for buffer
1333 */
1334 lgrp_snap = kmem_zalloc(bufsize, KM_NOSLEEP);
1335 if (lgrp_snap == NULL)
1336 return (set_errno(ENOMEM));
1337
1338 /*
1339 * Check whether generation number has changed
1340 */
1341 mutex_enter(&cpu_lock);
1342 if (lgrp_gen == old_generation)
1343 break; /* hasn't change, so done. */
1344
1345 /*
1346 * Generation number changed, so free memory and try again.
1347 */
1348 mutex_exit(&cpu_lock);
1349 kmem_free(lgrp_snap, bufsize);
1350 lgrp_snap = NULL;
1351 }
1352
1353 /*
1354 * Fill in lgroup snapshot header
1355 * (including pointers to tables of lgroup info, CPU IDs, and parents
1356 * and children)
1357 */
1358 lgrp_snap->ss_version = LGRP_VER_CURRENT;
1359
1360 /*
1361 * XXX For now, liblgrp only needs to know whether the hierarchy
1362 * XXX only has one level or not
1363 */
1364 if (snap_nlgrps == 1)
1365 lgrp_snap->ss_levels = 1;
1366 else
1367 lgrp_snap->ss_levels = 2;
1368
1369 lgrp_snap->ss_root = LGRP_ROOTID;
1370
1371 lgrp_snap->ss_nlgrps = lgrp_snap->ss_nlgrps_os = snap_nlgrps;
1372 lgrp_snap->ss_nlgrps_max = snap_nlgrpsmax;
1373 lgrp_snap->ss_ncpus = snap_ncpus;
1374 lgrp_snap->ss_gen = lgrp_gen;
1375 lgrp_snap->ss_view = LGRP_VIEW_OS;
1376 lgrp_snap->ss_pset = 0; /* NOTE: caller should set if needed */
1377 lgrp_snap->ss_size = bufsize;
1378 lgrp_snap->ss_magic = (uintptr_t)lgrp_snap;
1379
1380 lgrp_snap->ss_info = lgrp_info =
1381 (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1382
1383 lgrp_snap->ss_cpuids = lgrp_cpuids =
1384 (processorid_t *)((uintptr_t)lgrp_info + info_size);
1385
1386 lgrp_snap->ss_lgrpset = lgrpset =
1387 (ulong_t *)((uintptr_t)lgrp_cpuids + cpuids_size);
1388
1389 lgrp_snap->ss_parents = lgrp_parents =
1390 (ulong_t *)((uintptr_t)lgrpset + bitmask_size);
1391
1392 lgrp_snap->ss_children = lgrp_children =
1393 (ulong_t *)((uintptr_t)lgrp_parents + (snap_nlgrpsmax *
1394 bitmask_size));
1395
1396 lgrp_snap->ss_rsets = lgrp_rsets =
1397 (ulong_t *)((uintptr_t)lgrp_children + (snap_nlgrpsmax *
1398 bitmask_size));
1399
1400 lgrp_snap->ss_latencies = lgrp_lats =
1401 (int **)((uintptr_t)lgrp_rsets + (LGRP_RSRC_COUNT *
1402 snap_nlgrpsmax * bitmask_size));
1403
1404 /*
1405 * Fill in lgroup information
1406 */
1407 cpu_index = 0;
1408 for (i = 0; i < snap_nlgrpsmax; i++) {
1409 struct cpu *cp;
1410 int cpu_count;
1411 struct cpu *head;
1412 int k;
1413 lgrp_t *lgrp;
1414
1415 lgrp = lgrp_table[i];
1416 if (!LGRP_EXISTS(lgrp)) {
1417 bzero(&lgrp_info[i], sizeof (lgrp_info[i]));
1418 lgrp_info[i].info_lgrpid = LGRP_NONE;
1419 continue;
1420 }
1421
1422 lgrp_info[i].info_lgrpid = i;
1423 lgrp_info[i].info_latency = lgrp->lgrp_latency;
1424
1425 /*
1426 * Fill in parents, children, and lgroup resources
1427 */
1428 lgrp_info[i].info_parents =
1429 (ulong_t *)((uintptr_t)lgrp_parents + (i * bitmask_size));
1430
1431 if (lgrp->lgrp_parent)
1432 BT_SET(lgrp_info[i].info_parents,
1433 lgrp->lgrp_parent->lgrp_id);
1434
1435 lgrp_info[i].info_children =
1436 (ulong_t *)((uintptr_t)lgrp_children + (i * bitmask_size));
1437
1438 for (j = 0; j < snap_nlgrpsmax; j++)
1439 if (klgrpset_ismember(lgrp->lgrp_children, j))
1440 BT_SET(lgrp_info[i].info_children, j);
1441
1442 lgrp_info[i].info_rset =
1443 (ulong_t *)((uintptr_t)lgrp_rsets +
1444 (i * LGRP_RSRC_COUNT * bitmask_size));
1445
1446 for (j = 0; j < LGRP_RSRC_COUNT; j++) {
1447 ulong_t *rset;
1448
1449 rset = (ulong_t *)((uintptr_t)lgrp_info[i].info_rset +
1450 (j * bitmask_size));
1451 for (k = 0; k < snap_nlgrpsmax; k++)
1452 if (klgrpset_ismember(lgrp->lgrp_set[j], k))
1453 BT_SET(rset, k);
1454 }
1455
1456 /*
1457 * Fill in CPU IDs
1458 */
1459 cpu_count = 0;
1460 lgrp_info[i].info_cpuids = NULL;
1461 cp = head = lgrp->lgrp_cpu;
1462 if (head != NULL) {
1463 lgrp_info[i].info_cpuids = &lgrp_cpuids[cpu_index];
1464 do {
1465 lgrp_cpuids[cpu_index] = cp->cpu_id;
1466 cpu_index++;
1467 cpu_count++;
1468 cp = cp->cpu_next_lgrp;
1469 } while (cp != head);
1470 }
1471 ASSERT(cpu_count == lgrp->lgrp_cpucnt);
1472 lgrp_info[i].info_ncpus = cpu_count;
1473
1474 /*
1475 * Fill in memory sizes for lgroups that directly contain
1476 * memory
1477 */
1478 if (klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], i)) {
1479 lgrp_info[i].info_mem_free =
1480 lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1481 lgrp_info[i].info_mem_install =
1482 lgrp_mem_size(i, LGRP_MEM_SIZE_INSTALL);
1483 }
1484
1485 /*
1486 * Fill in latency table and buffer
1487 */
1488 lgrp_lats[i] = (int *)((uintptr_t)lgrp_lats + snap_nlgrpsmax *
1489 sizeof (int *) + i * snap_nlgrpsmax * sizeof (int));
1490 for (j = 0; j < snap_nlgrpsmax; j++) {
1491 lgrp_t *to;
1492
1493 to = lgrp_table[j];
1494 if (!LGRP_EXISTS(to))
1495 continue;
1496 lgrp_lats[i][j] = lgrp_latency(lgrp->lgrp_id,
1497 to->lgrp_id);
1498 }
1499 }
1500 ASSERT(cpu_index == snap_ncpus);
1501
1502
1503 mutex_exit(&cpu_lock);
1504
1505 #ifdef _SYSCALL32_IMPL
1506 /*
1507 * Check to see whether caller is 32-bit program and need to return
1508 * size of 32-bit snapshot now that snapshot has been taken/updated.
1509 * May not have been able to do this earlier if snapshot was out of
1510 * date or didn't exist yet.
1511 */
1512 if (model == DATAMODEL_ILP32) {
1513
1514 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1515
1516 /*
1517 * Calculate size of buffer needed for 32-bit snapshot,
1518 * rounding up size of each object to allow for alignment
1519 * of next object in buffer.
1520 */
1521 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1522 sizeof (caddr32_t));
1523 info_size =
1524 P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1525 sizeof (processorid_t));
1526 cpuids_size =
1527 P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1528 sizeof (ulong_t));
1529
1530 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1531 bitmasks_size = (((2 + LGRP_RSRC_COUNT) * snap_nlgrpsmax) +
1532 1) * bitmask_size;
1533
1534
1535 /*
1536 * Size of latency table and buffer
1537 */
1538 lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1539 (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1540
1541 bufsize = snap_hdr_size + info_size + cpuids_size +
1542 bitmasks_size + lats_size;
1543 return (bufsize);
1544 }
1545 #endif /* _SYSCALL32_IMPL */
1546
1547 return (lgrp_snap->ss_size);
1548 }
1549
1550
1551 /*
1552 * Copy snapshot into given user buffer, fix up any pointers in buffer to point
1553 * into user instead of kernel address space, and return size of buffer
1554 * needed to hold snapshot
1555 */
1556 static int
lgrp_snapshot_copy(char * buf,size_t bufsize)1557 lgrp_snapshot_copy(char *buf, size_t bufsize)
1558 {
1559 size_t bitmask_size;
1560 int cpu_index;
1561 size_t cpuids_size;
1562 int i;
1563 size_t info_size;
1564 lgrp_info_t *lgrp_info;
1565 int retval;
1566 size_t snap_hdr_size;
1567 int snap_ncpus;
1568 int snap_nlgrpsmax;
1569 lgrp_snapshot_header_t *user_snap;
1570 lgrp_info_t *user_info;
1571 lgrp_info_t *user_info_buffer;
1572 processorid_t *user_cpuids;
1573 ulong_t *user_lgrpset;
1574 ulong_t *user_parents;
1575 ulong_t *user_children;
1576 int **user_lats;
1577 int **user_lats_buffer;
1578 ulong_t *user_rsets;
1579
1580 if (lgrp_snap == NULL)
1581 return (0);
1582
1583 if (buf == NULL || bufsize <= 0)
1584 return (lgrp_snap->ss_size);
1585
1586 /*
1587 * User needs to try getting size of buffer again
1588 * because given buffer size is too small.
1589 * The lgroup hierarchy may have changed after they asked for the size
1590 * but before the snapshot was taken.
1591 */
1592 if (bufsize < lgrp_snap->ss_size)
1593 return (set_errno(EAGAIN));
1594
1595 snap_ncpus = lgrp_snap->ss_ncpus;
1596 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1597
1598 /*
1599 * Fill in lgrpset now because caller may have change psets
1600 */
1601 kpreempt_disable();
1602 for (i = 0; i < snap_nlgrpsmax; i++) {
1603 if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1604 i)) {
1605 BT_SET(lgrp_snap->ss_lgrpset, i);
1606 }
1607 }
1608 kpreempt_enable();
1609
1610 /*
1611 * Copy lgroup snapshot (snapshot header, lgroup info, and CPU IDs)
1612 * into user buffer all at once
1613 */
1614 if (copyout(lgrp_snap, buf, lgrp_snap->ss_size) != 0)
1615 return (set_errno(EFAULT));
1616
1617 /*
1618 * Round up sizes of lgroup snapshot header and info for alignment
1619 */
1620 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1621 sizeof (void *));
1622 info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1623 sizeof (processorid_t));
1624 cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1625 sizeof (ulong_t));
1626
1627 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1628
1629 /*
1630 * Calculate pointers into user buffer for lgroup snapshot header,
1631 * info, and CPU IDs
1632 */
1633 user_snap = (lgrp_snapshot_header_t *)buf;
1634 user_info = (lgrp_info_t *)((uintptr_t)user_snap + snap_hdr_size);
1635 user_cpuids = (processorid_t *)((uintptr_t)user_info + info_size);
1636 user_lgrpset = (ulong_t *)((uintptr_t)user_cpuids + cpuids_size);
1637 user_parents = (ulong_t *)((uintptr_t)user_lgrpset + bitmask_size);
1638 user_children = (ulong_t *)((uintptr_t)user_parents +
1639 (snap_nlgrpsmax * bitmask_size));
1640 user_rsets = (ulong_t *)((uintptr_t)user_children +
1641 (snap_nlgrpsmax * bitmask_size));
1642 user_lats = (int **)((uintptr_t)user_rsets +
1643 (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size));
1644
1645 /*
1646 * Copyout magic number (ie. pointer to beginning of buffer)
1647 */
1648 if (copyout(&buf, &user_snap->ss_magic, sizeof (buf)) != 0)
1649 return (set_errno(EFAULT));
1650
1651 /*
1652 * Fix up pointers in user buffer to point into user buffer
1653 * not kernel snapshot
1654 */
1655 if (copyout(&user_info, &user_snap->ss_info, sizeof (user_info)) != 0)
1656 return (set_errno(EFAULT));
1657
1658 if (copyout(&user_cpuids, &user_snap->ss_cpuids,
1659 sizeof (user_cpuids)) != 0)
1660 return (set_errno(EFAULT));
1661
1662 if (copyout(&user_lgrpset, &user_snap->ss_lgrpset,
1663 sizeof (user_lgrpset)) != 0)
1664 return (set_errno(EFAULT));
1665
1666 if (copyout(&user_parents, &user_snap->ss_parents,
1667 sizeof (user_parents)) != 0)
1668 return (set_errno(EFAULT));
1669
1670 if (copyout(&user_children, &user_snap->ss_children,
1671 sizeof (user_children)) != 0)
1672 return (set_errno(EFAULT));
1673
1674 if (copyout(&user_rsets, &user_snap->ss_rsets,
1675 sizeof (user_rsets)) != 0)
1676 return (set_errno(EFAULT));
1677
1678 if (copyout(&user_lats, &user_snap->ss_latencies,
1679 sizeof (user_lats)) != 0)
1680 return (set_errno(EFAULT));
1681
1682 /*
1683 * Make copies of lgroup info and latency table, fix up pointers,
1684 * and then copy them into user buffer
1685 */
1686 user_info_buffer = kmem_zalloc(info_size, KM_NOSLEEP);
1687 if (user_info_buffer == NULL)
1688 return (set_errno(ENOMEM));
1689
1690 user_lats_buffer = kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1691 KM_NOSLEEP);
1692 if (user_lats_buffer == NULL) {
1693 kmem_free(user_info_buffer, info_size);
1694 return (set_errno(ENOMEM));
1695 }
1696
1697 lgrp_info = (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1698 bcopy(lgrp_info, user_info_buffer, info_size);
1699
1700 cpu_index = 0;
1701 for (i = 0; i < snap_nlgrpsmax; i++) {
1702 ulong_t *snap_rset;
1703
1704 /*
1705 * Skip non-existent lgroups
1706 */
1707 if (user_info_buffer[i].info_lgrpid == LGRP_NONE)
1708 continue;
1709
1710 /*
1711 * Update free memory size since it changes frequently
1712 * Only do so for lgroups directly containing memory
1713 *
1714 * NOTE: This must be done before changing the pointers to
1715 * point into user space since we need to dereference
1716 * lgroup resource set
1717 */
1718 snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
1719 BT_BITOUL(snap_nlgrpsmax)];
1720 if (BT_TEST(snap_rset, i))
1721 user_info_buffer[i].info_mem_free =
1722 lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1723
1724 /*
1725 * Fix up pointers to parents, children, resources, and
1726 * latencies
1727 */
1728 user_info_buffer[i].info_parents =
1729 (ulong_t *)((uintptr_t)user_parents + (i * bitmask_size));
1730 user_info_buffer[i].info_children =
1731 (ulong_t *)((uintptr_t)user_children + (i * bitmask_size));
1732 user_info_buffer[i].info_rset =
1733 (ulong_t *)((uintptr_t)user_rsets +
1734 (i * LGRP_RSRC_COUNT * bitmask_size));
1735 user_lats_buffer[i] = (int *)((uintptr_t)user_lats +
1736 (snap_nlgrpsmax * sizeof (int *)) + (i * snap_nlgrpsmax *
1737 sizeof (int)));
1738
1739 /*
1740 * Fix up pointer to CPU IDs
1741 */
1742 if (user_info_buffer[i].info_ncpus == 0) {
1743 user_info_buffer[i].info_cpuids = NULL;
1744 continue;
1745 }
1746 user_info_buffer[i].info_cpuids = &user_cpuids[cpu_index];
1747 cpu_index += user_info_buffer[i].info_ncpus;
1748 }
1749 ASSERT(cpu_index == snap_ncpus);
1750
1751 /*
1752 * Copy lgroup info and latency table with pointers fixed up to point
1753 * into user buffer out to user buffer now
1754 */
1755 retval = lgrp_snap->ss_size;
1756 if (copyout(user_info_buffer, user_info, info_size) != 0)
1757 retval = set_errno(EFAULT);
1758 kmem_free(user_info_buffer, info_size);
1759
1760 if (copyout(user_lats_buffer, user_lats, snap_nlgrpsmax *
1761 sizeof (int *)) != 0)
1762 retval = set_errno(EFAULT);
1763 kmem_free(user_lats_buffer, snap_nlgrpsmax * sizeof (int *));
1764
1765 return (retval);
1766 }
1767
1768
1769 #ifdef _SYSCALL32_IMPL
1770 /*
1771 * Make 32-bit copy of snapshot, fix up any pointers in buffer to point
1772 * into user instead of kernel address space, copy 32-bit snapshot into
1773 * given user buffer, and return size of buffer needed to hold snapshot
1774 */
1775 static int
lgrp_snapshot_copy32(caddr32_t buf,size32_t bufsize)1776 lgrp_snapshot_copy32(caddr32_t buf, size32_t bufsize)
1777 {
1778 size32_t bitmask_size;
1779 size32_t bitmasks_size;
1780 size32_t children_size;
1781 int cpu_index;
1782 size32_t cpuids_size;
1783 int i;
1784 int j;
1785 size32_t info_size;
1786 size32_t lats_size;
1787 lgrp_info_t *lgrp_info;
1788 lgrp_snapshot_header32_t *lgrp_snap32;
1789 lgrp_info32_t *lgrp_info32;
1790 processorid_t *lgrp_cpuids32;
1791 caddr32_t *lgrp_lats32;
1792 int **lgrp_lats32_kernel;
1793 uint_t *lgrp_set32;
1794 uint_t *lgrp_parents32;
1795 uint_t *lgrp_children32;
1796 uint_t *lgrp_rsets32;
1797 size32_t parents_size;
1798 size32_t rsets_size;
1799 size32_t set_size;
1800 size32_t snap_hdr_size;
1801 int snap_ncpus;
1802 int snap_nlgrpsmax;
1803 size32_t snap_size;
1804
1805 if (lgrp_snap == NULL)
1806 return (0);
1807
1808 snap_ncpus = lgrp_snap->ss_ncpus;
1809 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1810
1811 /*
1812 * Calculate size of buffer needed for 32-bit snapshot,
1813 * rounding up size of each object to allow for alignment
1814 * of next object in buffer.
1815 */
1816 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1817 sizeof (caddr32_t));
1818 info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1819 sizeof (processorid_t));
1820 cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1821 sizeof (ulong_t));
1822
1823 bitmask_size = BT_SIZEOFMAP32(snap_nlgrpsmax);
1824
1825 set_size = bitmask_size;
1826 parents_size = snap_nlgrpsmax * bitmask_size;
1827 children_size = snap_nlgrpsmax * bitmask_size;
1828 rsets_size = P2ROUNDUP(LGRP_RSRC_COUNT * snap_nlgrpsmax *
1829 (int)bitmask_size, sizeof (caddr32_t));
1830
1831 bitmasks_size = set_size + parents_size + children_size + rsets_size;
1832
1833 /*
1834 * Size of latency table and buffer
1835 */
1836 lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1837 (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1838
1839 snap_size = snap_hdr_size + info_size + cpuids_size + bitmasks_size +
1840 lats_size;
1841
1842 if (buf == NULL || bufsize <= 0) {
1843 return (snap_size);
1844 }
1845
1846 /*
1847 * User needs to try getting size of buffer again
1848 * because given buffer size is too small.
1849 * The lgroup hierarchy may have changed after they asked for the size
1850 * but before the snapshot was taken.
1851 */
1852 if (bufsize < snap_size)
1853 return (set_errno(EAGAIN));
1854
1855 /*
1856 * Make 32-bit copy of snapshot, fix up pointers to point into user
1857 * buffer not kernel, and then copy whole thing into user buffer
1858 */
1859 lgrp_snap32 = kmem_zalloc(snap_size, KM_NOSLEEP);
1860 if (lgrp_snap32 == NULL)
1861 return (set_errno(ENOMEM));
1862
1863 /*
1864 * Calculate pointers into 32-bit copy of snapshot
1865 * for lgroup info, CPU IDs, pset lgroup bitmask, parents, children,
1866 * resources, and latency table and buffer
1867 */
1868 lgrp_info32 = (lgrp_info32_t *)((uintptr_t)lgrp_snap32 +
1869 snap_hdr_size);
1870 lgrp_cpuids32 = (processorid_t *)((uintptr_t)lgrp_info32 + info_size);
1871 lgrp_set32 = (uint_t *)((uintptr_t)lgrp_cpuids32 + cpuids_size);
1872 lgrp_parents32 = (uint_t *)((uintptr_t)lgrp_set32 + set_size);
1873 lgrp_children32 = (uint_t *)((uintptr_t)lgrp_parents32 + parents_size);
1874 lgrp_rsets32 = (uint_t *)((uintptr_t)lgrp_children32 + children_size);
1875 lgrp_lats32 = (caddr32_t *)((uintptr_t)lgrp_rsets32 + rsets_size);
1876
1877 /*
1878 * Make temporary lgroup latency table of pointers for kernel to use
1879 * to fill in rows of table with latencies from each lgroup
1880 */
1881 lgrp_lats32_kernel = kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1882 KM_NOSLEEP);
1883 if (lgrp_lats32_kernel == NULL) {
1884 kmem_free(lgrp_snap32, snap_size);
1885 return (set_errno(ENOMEM));
1886 }
1887
1888 /*
1889 * Fill in 32-bit lgroup snapshot header
1890 * (with pointers into user's buffer for lgroup info, CPU IDs,
1891 * bit masks, and latencies)
1892 */
1893 lgrp_snap32->ss_version = lgrp_snap->ss_version;
1894 lgrp_snap32->ss_levels = lgrp_snap->ss_levels;
1895 lgrp_snap32->ss_nlgrps = lgrp_snap32->ss_nlgrps_os =
1896 lgrp_snap->ss_nlgrps;
1897 lgrp_snap32->ss_nlgrps_max = snap_nlgrpsmax;
1898 lgrp_snap32->ss_root = lgrp_snap->ss_root;
1899 lgrp_snap32->ss_ncpus = lgrp_snap->ss_ncpus;
1900 lgrp_snap32->ss_gen = lgrp_snap->ss_gen;
1901 lgrp_snap32->ss_view = LGRP_VIEW_OS;
1902 lgrp_snap32->ss_size = snap_size;
1903 lgrp_snap32->ss_magic = buf;
1904 lgrp_snap32->ss_info = buf + snap_hdr_size;
1905 lgrp_snap32->ss_cpuids = lgrp_snap32->ss_info + info_size;
1906 lgrp_snap32->ss_lgrpset = lgrp_snap32->ss_cpuids + cpuids_size;
1907 lgrp_snap32->ss_parents = lgrp_snap32->ss_lgrpset + bitmask_size;
1908 lgrp_snap32->ss_children = lgrp_snap32->ss_parents +
1909 (snap_nlgrpsmax * bitmask_size);
1910 lgrp_snap32->ss_rsets = lgrp_snap32->ss_children +
1911 (snap_nlgrpsmax * bitmask_size);
1912 lgrp_snap32->ss_latencies = lgrp_snap32->ss_rsets +
1913 (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size);
1914
1915 /*
1916 * Fill in lgrpset now because caller may have change psets
1917 */
1918 kpreempt_disable();
1919 for (i = 0; i < snap_nlgrpsmax; i++) {
1920 if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1921 i)) {
1922 BT_SET32(lgrp_set32, i);
1923 }
1924 }
1925 kpreempt_enable();
1926
1927 /*
1928 * Fill in 32-bit copy of lgroup info and fix up pointers
1929 * to point into user's buffer instead of kernel's
1930 */
1931 cpu_index = 0;
1932 lgrp_info = lgrp_snap->ss_info;
1933 for (i = 0; i < snap_nlgrpsmax; i++) {
1934 uint_t *children;
1935 uint_t *lgrp_rset;
1936 uint_t *parents;
1937 ulong_t *snap_rset;
1938
1939 /*
1940 * Skip non-existent lgroups
1941 */
1942 if (lgrp_info[i].info_lgrpid == LGRP_NONE) {
1943 bzero(&lgrp_info32[i], sizeof (lgrp_info32[i]));
1944 lgrp_info32[i].info_lgrpid = LGRP_NONE;
1945 continue;
1946 }
1947
1948 /*
1949 * Fill in parents, children, lgroup resource set, and
1950 * latencies from snapshot
1951 */
1952 parents = (uint_t *)((uintptr_t)lgrp_parents32 +
1953 i * bitmask_size);
1954 children = (uint_t *)((uintptr_t)lgrp_children32 +
1955 i * bitmask_size);
1956 snap_rset = (ulong_t *)((uintptr_t)lgrp_snap->ss_rsets +
1957 (i * LGRP_RSRC_COUNT * BT_SIZEOFMAP(snap_nlgrpsmax)));
1958 lgrp_rset = (uint_t *)((uintptr_t)lgrp_rsets32 +
1959 (i * LGRP_RSRC_COUNT * bitmask_size));
1960 lgrp_lats32_kernel[i] = (int *)((uintptr_t)lgrp_lats32 +
1961 snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
1962 sizeof (int));
1963 for (j = 0; j < snap_nlgrpsmax; j++) {
1964 int k;
1965 uint_t *rset;
1966
1967 if (BT_TEST(&lgrp_snap->ss_parents[i], j))
1968 BT_SET32(parents, j);
1969
1970 if (BT_TEST(&lgrp_snap->ss_children[i], j))
1971 BT_SET32(children, j);
1972
1973 for (k = 0; k < LGRP_RSRC_COUNT; k++) {
1974 rset = (uint_t *)((uintptr_t)lgrp_rset +
1975 k * bitmask_size);
1976 if (BT_TEST(&snap_rset[k], j))
1977 BT_SET32(rset, j);
1978 }
1979
1980 lgrp_lats32_kernel[i][j] =
1981 lgrp_snap->ss_latencies[i][j];
1982 }
1983
1984 /*
1985 * Fix up pointer to latency buffer
1986 */
1987 lgrp_lats32[i] = lgrp_snap32->ss_latencies +
1988 snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
1989 sizeof (int);
1990
1991 /*
1992 * Fix up pointers for parents, children, and resources
1993 */
1994 lgrp_info32[i].info_parents = lgrp_snap32->ss_parents +
1995 (i * bitmask_size);
1996 lgrp_info32[i].info_children = lgrp_snap32->ss_children +
1997 (i * bitmask_size);
1998 lgrp_info32[i].info_rset = lgrp_snap32->ss_rsets +
1999 (i * LGRP_RSRC_COUNT * bitmask_size);
2000
2001 /*
2002 * Fill in memory and CPU info
2003 * Only fill in memory for lgroups directly containing memory
2004 */
2005 snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
2006 BT_BITOUL(snap_nlgrpsmax)];
2007 if (BT_TEST(snap_rset, i)) {
2008 lgrp_info32[i].info_mem_free = lgrp_mem_size(i,
2009 LGRP_MEM_SIZE_FREE);
2010 lgrp_info32[i].info_mem_install =
2011 lgrp_info[i].info_mem_install;
2012 }
2013
2014 lgrp_info32[i].info_ncpus = lgrp_info[i].info_ncpus;
2015
2016 lgrp_info32[i].info_lgrpid = lgrp_info[i].info_lgrpid;
2017 lgrp_info32[i].info_latency = lgrp_info[i].info_latency;
2018
2019 if (lgrp_info32[i].info_ncpus == 0) {
2020 lgrp_info32[i].info_cpuids = 0;
2021 continue;
2022 }
2023
2024 /*
2025 * Fix up pointer for CPU IDs
2026 */
2027 lgrp_info32[i].info_cpuids = lgrp_snap32->ss_cpuids +
2028 (cpu_index * sizeof (processorid_t));
2029 cpu_index += lgrp_info32[i].info_ncpus;
2030 }
2031 ASSERT(cpu_index == snap_ncpus);
2032
2033 /*
2034 * Copy lgroup CPU IDs into 32-bit snapshot
2035 * before copying it out into user's buffer
2036 */
2037 bcopy(lgrp_snap->ss_cpuids, lgrp_cpuids32, cpuids_size);
2038
2039 /*
2040 * Copy 32-bit lgroup snapshot into user's buffer all at once
2041 */
2042 if (copyout(lgrp_snap32, (void *)(uintptr_t)buf, snap_size) != 0) {
2043 kmem_free(lgrp_snap32, snap_size);
2044 kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2045 return (set_errno(EFAULT));
2046 }
2047
2048 kmem_free(lgrp_snap32, snap_size);
2049 kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2050
2051 return (snap_size);
2052 }
2053 #endif /* _SYSCALL32_IMPL */
2054
2055
2056 int
lgrpsys(int subcode,long ia,void * ap)2057 lgrpsys(int subcode, long ia, void *ap)
2058 {
2059 size_t bufsize;
2060 int latency;
2061
2062 switch (subcode) {
2063
2064 case LGRP_SYS_AFFINITY_GET:
2065 return (lgrp_affinity_get((lgrp_affinity_args_t *)ap));
2066
2067 case LGRP_SYS_AFFINITY_SET:
2068 return (lgrp_affinity_set((lgrp_affinity_args_t *)ap));
2069
2070 case LGRP_SYS_GENERATION:
2071 return (lgrp_generation(ia));
2072
2073 case LGRP_SYS_HOME:
2074 return (lgrp_home_get((idtype_t)ia, (id_t)(uintptr_t)ap));
2075
2076 case LGRP_SYS_LATENCY:
2077 mutex_enter(&cpu_lock);
2078 latency = lgrp_latency(ia, (lgrp_id_t)(uintptr_t)ap);
2079 mutex_exit(&cpu_lock);
2080 return (latency);
2081
2082 case LGRP_SYS_MEMINFO:
2083 return (meminfo(ia, (struct meminfo *)ap));
2084
2085 case LGRP_SYS_VERSION:
2086 return (lgrp_version(ia));
2087
2088 case LGRP_SYS_SNAPSHOT:
2089 mutex_enter(&lgrp_snap_lock);
2090 bufsize = lgrp_snapshot();
2091 if (ap && ia > 0) {
2092 if (get_udatamodel() == DATAMODEL_NATIVE)
2093 bufsize = lgrp_snapshot_copy(ap, ia);
2094 #ifdef _SYSCALL32_IMPL
2095 else
2096 bufsize = lgrp_snapshot_copy32(
2097 (caddr32_t)(uintptr_t)ap, ia);
2098 #endif /* _SYSCALL32_IMPL */
2099 }
2100 mutex_exit(&lgrp_snap_lock);
2101 return (bufsize);
2102
2103 default:
2104 break;
2105
2106 }
2107
2108 return (set_errno(EINVAL));
2109 }
2110