17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5ab761399Sesaxe * Common Development and Distribution License (the "License"). 6ab761399Sesaxe * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22ab761399Sesaxe * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 277c478bd9Sstevel@tonic-gate 287c478bd9Sstevel@tonic-gate /* 297c478bd9Sstevel@tonic-gate * Basic NUMA support in terms of locality groups 307c478bd9Sstevel@tonic-gate * 317c478bd9Sstevel@tonic-gate * Solaris needs to know which CPUs, memory, etc. are near each other to 327c478bd9Sstevel@tonic-gate * provide good performance on NUMA machines by optimizing for locality. 337c478bd9Sstevel@tonic-gate * In order to do this, a new abstraction called a "locality group (lgroup)" 347c478bd9Sstevel@tonic-gate * has been introduced to keep track of which CPU-like and memory-like hardware 357c478bd9Sstevel@tonic-gate * resources are close to each other. Currently, latency is the only measure 367c478bd9Sstevel@tonic-gate * used to determine how to group hardware resources into lgroups, but this 377c478bd9Sstevel@tonic-gate * does not limit the groupings to be based solely on latency. Other factors 387c478bd9Sstevel@tonic-gate * may be used to determine the groupings in the future. 397c478bd9Sstevel@tonic-gate * 407c478bd9Sstevel@tonic-gate * Lgroups are organized into a hieararchy or topology that represents the 417c478bd9Sstevel@tonic-gate * latency topology of the machine. There is always at least a root lgroup in 427c478bd9Sstevel@tonic-gate * the system. It represents all the hardware resources in the machine at a 437c478bd9Sstevel@tonic-gate * latency big enough that any hardware resource can at least access any other 447c478bd9Sstevel@tonic-gate * hardware resource within that latency. A Uniform Memory Access (UMA) 457c478bd9Sstevel@tonic-gate * machine is represented with one lgroup (the root). In contrast, a NUMA 467c478bd9Sstevel@tonic-gate * machine is represented at least by the root lgroup and some number of leaf 477c478bd9Sstevel@tonic-gate * lgroups where the leaf lgroups contain the hardware resources within the 487c478bd9Sstevel@tonic-gate * least latency of each other and the root lgroup still contains all the 497c478bd9Sstevel@tonic-gate * resources in the machine. Some number of intermediate lgroups may exist 507c478bd9Sstevel@tonic-gate * which represent more levels of locality than just the local latency of the 517c478bd9Sstevel@tonic-gate * leaf lgroups and the system latency of the root lgroup. Non-leaf lgroups 527c478bd9Sstevel@tonic-gate * (eg. root and intermediate lgroups) contain the next nearest resources to 537c478bd9Sstevel@tonic-gate * its children lgroups. Thus, the lgroup hierarchy from a given leaf lgroup 547c478bd9Sstevel@tonic-gate * to the root lgroup shows the hardware resources from closest to farthest 557c478bd9Sstevel@tonic-gate * from the leaf lgroup such that each successive ancestor lgroup contains 567c478bd9Sstevel@tonic-gate * the next nearest resources at the next level of locality from the previous. 577c478bd9Sstevel@tonic-gate * 587c478bd9Sstevel@tonic-gate * The kernel uses the lgroup abstraction to know how to allocate resources 597c478bd9Sstevel@tonic-gate * near a given process/thread. At fork() and lwp/thread_create() time, a 607c478bd9Sstevel@tonic-gate * "home" lgroup is chosen for a thread. This is done by picking the lgroup 617c478bd9Sstevel@tonic-gate * with the lowest load average. Binding to a processor or processor set will 627c478bd9Sstevel@tonic-gate * change the home lgroup for a thread. The scheduler has been modified to try 637c478bd9Sstevel@tonic-gate * to dispatch a thread on a CPU in its home lgroup. Physical memory 647c478bd9Sstevel@tonic-gate * allocation is lgroup aware too, so memory will be allocated from the current 657c478bd9Sstevel@tonic-gate * thread's home lgroup if possible. If the desired resources are not 667c478bd9Sstevel@tonic-gate * available, the kernel traverses the lgroup hierarchy going to the parent 677c478bd9Sstevel@tonic-gate * lgroup to find resources at the next level of locality until it reaches the 687c478bd9Sstevel@tonic-gate * root lgroup. 697c478bd9Sstevel@tonic-gate */ 707c478bd9Sstevel@tonic-gate 717c478bd9Sstevel@tonic-gate #include <sys/lgrp.h> 727c478bd9Sstevel@tonic-gate #include <sys/lgrp_user.h> 737c478bd9Sstevel@tonic-gate #include <sys/types.h> 747c478bd9Sstevel@tonic-gate #include <sys/mman.h> 757c478bd9Sstevel@tonic-gate #include <sys/param.h> 767c478bd9Sstevel@tonic-gate #include <sys/var.h> 777c478bd9Sstevel@tonic-gate #include <sys/thread.h> 787c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 797c478bd9Sstevel@tonic-gate #include <sys/cpupart.h> 807c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 817c478bd9Sstevel@tonic-gate #include <vm/seg.h> 827c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 837c478bd9Sstevel@tonic-gate #include <vm/seg_spt.h> 847c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h> 857c478bd9Sstevel@tonic-gate #include <vm/as.h> 867c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 877c478bd9Sstevel@tonic-gate #include <sys/systm.h> 887c478bd9Sstevel@tonic-gate #include <sys/errno.h> 897c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 907c478bd9Sstevel@tonic-gate #include <sys/kstat.h> 917c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 927c478bd9Sstevel@tonic-gate #include <sys/chip.h> 937c478bd9Sstevel@tonic-gate #include <sys/promif.h> 947c478bd9Sstevel@tonic-gate #include <sys/sdt.h> 957c478bd9Sstevel@tonic-gate 967c478bd9Sstevel@tonic-gate lgrp_gen_t lgrp_gen = 0; /* generation of lgroup hierarchy */ 977c478bd9Sstevel@tonic-gate lgrp_t *lgrp_table[NLGRPS_MAX]; /* table of all initialized lgrp_t structs */ 987c478bd9Sstevel@tonic-gate /* indexed by lgrp_id */ 997c478bd9Sstevel@tonic-gate int nlgrps; /* number of lgroups in machine */ 1007c478bd9Sstevel@tonic-gate int lgrp_alloc_hint = -1; /* hint for where to try to allocate next */ 1017c478bd9Sstevel@tonic-gate int lgrp_alloc_max = 0; /* max lgroup ID allocated so far */ 1027c478bd9Sstevel@tonic-gate 1037c478bd9Sstevel@tonic-gate /* 1047c478bd9Sstevel@tonic-gate * Kstat data for lgroups. 1057c478bd9Sstevel@tonic-gate * 1067c478bd9Sstevel@tonic-gate * Actual kstat data is collected in lgrp_stats array. 1077c478bd9Sstevel@tonic-gate * The lgrp_kstat_data array of named kstats is used to extract data from 1087c478bd9Sstevel@tonic-gate * lgrp_stats and present it to kstat framework. It is protected from partallel 1097c478bd9Sstevel@tonic-gate * modifications by lgrp_kstat_mutex. This may cause some contention when 1107c478bd9Sstevel@tonic-gate * several kstat commands run in parallel but this is not the 1117c478bd9Sstevel@tonic-gate * performance-critical path. 1127c478bd9Sstevel@tonic-gate */ 1137c478bd9Sstevel@tonic-gate extern struct lgrp_stats lgrp_stats[]; /* table of per-lgrp stats */ 1147c478bd9Sstevel@tonic-gate 1157c478bd9Sstevel@tonic-gate /* 1167c478bd9Sstevel@tonic-gate * Declare kstat names statically for enums as defined in the header file. 1177c478bd9Sstevel@tonic-gate */ 1187c478bd9Sstevel@tonic-gate LGRP_KSTAT_NAMES; 1197c478bd9Sstevel@tonic-gate 1207c478bd9Sstevel@tonic-gate static void lgrp_kstat_init(void); 1217c478bd9Sstevel@tonic-gate static int lgrp_kstat_extract(kstat_t *, int); 1227c478bd9Sstevel@tonic-gate static void lgrp_kstat_reset(lgrp_id_t); 1237c478bd9Sstevel@tonic-gate 1247c478bd9Sstevel@tonic-gate static struct kstat_named lgrp_kstat_data[LGRP_NUM_STATS]; 1257c478bd9Sstevel@tonic-gate static kmutex_t lgrp_kstat_mutex; 1267c478bd9Sstevel@tonic-gate 1277c478bd9Sstevel@tonic-gate 1287c478bd9Sstevel@tonic-gate /* 1297c478bd9Sstevel@tonic-gate * max number of lgroups supported by the platform 1307c478bd9Sstevel@tonic-gate */ 1317c478bd9Sstevel@tonic-gate int nlgrpsmax = 0; 1327c478bd9Sstevel@tonic-gate 1337c478bd9Sstevel@tonic-gate /* 1347c478bd9Sstevel@tonic-gate * The root lgroup. Represents the set of resources at the system wide 1357c478bd9Sstevel@tonic-gate * level of locality. 1367c478bd9Sstevel@tonic-gate */ 1377c478bd9Sstevel@tonic-gate lgrp_t *lgrp_root = NULL; 1387c478bd9Sstevel@tonic-gate 1397c478bd9Sstevel@tonic-gate /* 1407c478bd9Sstevel@tonic-gate * During system bootstrap cp_default does not contain the list of lgrp load 1417c478bd9Sstevel@tonic-gate * averages (cp_lgrploads). The list is allocated after the first CPU is brought 1427c478bd9Sstevel@tonic-gate * on-line when cp_default is initialized by cpupart_initialize_default(). 1437c478bd9Sstevel@tonic-gate * Configuring CPU0 may create a two-level topology with root and one leaf node 1447c478bd9Sstevel@tonic-gate * containing CPU0. This topology is initially constructed in a special 1457c478bd9Sstevel@tonic-gate * statically allocated 2-element lpl list lpl_bootstrap_list and later cloned 1467c478bd9Sstevel@tonic-gate * to cp_default when cp_default is initialized. The lpl_bootstrap_list is used 1477c478bd9Sstevel@tonic-gate * for all lpl operations until cp_default is fully constructed. 1487c478bd9Sstevel@tonic-gate * 1497c478bd9Sstevel@tonic-gate * The lpl_bootstrap_list is maintained by the code in lgrp.c. Every other 1507c478bd9Sstevel@tonic-gate * consumer who needs default lpl should use lpl_bootstrap which is a pointer to 1517c478bd9Sstevel@tonic-gate * the first element of lpl_bootstrap_list. 152394b433dSesaxe * 153394b433dSesaxe * CPUs that are added to the system, but have not yet been assigned to an 154394b433dSesaxe * lgrp will use lpl_bootstrap as a default lpl. This is necessary because 155394b433dSesaxe * on some architectures (x86) it's possible for the slave CPU startup thread 156394b433dSesaxe * to enter the dispatcher or allocate memory before calling lgrp_cpu_init(). 1577c478bd9Sstevel@tonic-gate */ 1587c478bd9Sstevel@tonic-gate #define LPL_BOOTSTRAP_SIZE 2 1597c478bd9Sstevel@tonic-gate static lpl_t lpl_bootstrap_list[LPL_BOOTSTRAP_SIZE]; 1607c478bd9Sstevel@tonic-gate lpl_t *lpl_bootstrap; 1617c478bd9Sstevel@tonic-gate 162394b433dSesaxe /* 163394b433dSesaxe * If cp still references the bootstrap lpl, it has not yet been added to 164394b433dSesaxe * an lgrp. lgrp_mem_choose() uses this macro to detect the case where 165394b433dSesaxe * a thread is trying to allocate memory close to a CPU that has no lgrp. 166394b433dSesaxe */ 167394b433dSesaxe #define LGRP_CPU_HAS_NO_LGRP(cp) ((cp)->cpu_lpl == lpl_bootstrap) 168394b433dSesaxe 1697c478bd9Sstevel@tonic-gate static lgrp_t lroot; 1707c478bd9Sstevel@tonic-gate 1717c478bd9Sstevel@tonic-gate /* 1727c478bd9Sstevel@tonic-gate * Size, in bytes, beyond which random memory allocation policy is applied 1737c478bd9Sstevel@tonic-gate * to non-shared memory. Default is the maximum size, so random memory 1747c478bd9Sstevel@tonic-gate * allocation won't be used for non-shared memory by default. 1757c478bd9Sstevel@tonic-gate */ 1767c478bd9Sstevel@tonic-gate size_t lgrp_privm_random_thresh = (size_t)(-1); 1777c478bd9Sstevel@tonic-gate 178c6402783Sakolb /* the maximum effect that a single thread can have on it's lgroup's load */ 179c6402783Sakolb #define LGRP_LOADAVG_MAX_EFFECT(ncpu) \ 180c6402783Sakolb ((lgrp_loadavg_max_effect) / (ncpu)) 181c6402783Sakolb uint32_t lgrp_loadavg_max_effect = LGRP_LOADAVG_THREAD_MAX; 182c6402783Sakolb 183c6402783Sakolb 1847c478bd9Sstevel@tonic-gate /* 1857c478bd9Sstevel@tonic-gate * Size, in bytes, beyond which random memory allocation policy is applied to 1867c478bd9Sstevel@tonic-gate * shared memory. Default is 8MB (2 ISM pages). 1877c478bd9Sstevel@tonic-gate */ 1887c478bd9Sstevel@tonic-gate size_t lgrp_shm_random_thresh = 8*1024*1024; 1897c478bd9Sstevel@tonic-gate 1907c478bd9Sstevel@tonic-gate /* 1917c478bd9Sstevel@tonic-gate * Whether to do processor set aware memory allocation by default 1927c478bd9Sstevel@tonic-gate */ 1937c478bd9Sstevel@tonic-gate int lgrp_mem_pset_aware = 0; 1947c478bd9Sstevel@tonic-gate 1957c478bd9Sstevel@tonic-gate /* 1967c478bd9Sstevel@tonic-gate * Set the default memory allocation policy for root lgroup 1977c478bd9Sstevel@tonic-gate */ 1987c478bd9Sstevel@tonic-gate lgrp_mem_policy_t lgrp_mem_policy_root = LGRP_MEM_POLICY_RANDOM; 1997c478bd9Sstevel@tonic-gate 2007c478bd9Sstevel@tonic-gate /* 2017c478bd9Sstevel@tonic-gate * Set the default memory allocation policy. For most platforms, 2027c478bd9Sstevel@tonic-gate * next touch is sufficient, but some platforms may wish to override 2037c478bd9Sstevel@tonic-gate * this. 2047c478bd9Sstevel@tonic-gate */ 2057c478bd9Sstevel@tonic-gate lgrp_mem_policy_t lgrp_mem_default_policy = LGRP_MEM_POLICY_NEXT; 2067c478bd9Sstevel@tonic-gate 2077c478bd9Sstevel@tonic-gate 2087c478bd9Sstevel@tonic-gate /* 2097c478bd9Sstevel@tonic-gate * lgroup CPU event handlers 2107c478bd9Sstevel@tonic-gate */ 2117c478bd9Sstevel@tonic-gate static void lgrp_cpu_init(struct cpu *); 2127c478bd9Sstevel@tonic-gate static void lgrp_cpu_fini(struct cpu *, lgrp_id_t); 2137c478bd9Sstevel@tonic-gate static lgrp_t *lgrp_cpu_to_lgrp(struct cpu *); 2147c478bd9Sstevel@tonic-gate 2157c478bd9Sstevel@tonic-gate /* 2167c478bd9Sstevel@tonic-gate * lgroup memory event handlers 2177c478bd9Sstevel@tonic-gate */ 2187c478bd9Sstevel@tonic-gate static void lgrp_mem_init(int, lgrp_handle_t, boolean_t); 2197c478bd9Sstevel@tonic-gate static void lgrp_mem_fini(int, lgrp_handle_t, boolean_t); 2207c478bd9Sstevel@tonic-gate static void lgrp_mem_rename(int, lgrp_handle_t, lgrp_handle_t); 2217c478bd9Sstevel@tonic-gate 2227c478bd9Sstevel@tonic-gate /* 2237c478bd9Sstevel@tonic-gate * lgroup CPU partition event handlers 2247c478bd9Sstevel@tonic-gate */ 2257c478bd9Sstevel@tonic-gate static void lgrp_part_add_cpu(struct cpu *, lgrp_id_t); 2267c478bd9Sstevel@tonic-gate static void lgrp_part_del_cpu(struct cpu *); 2277c478bd9Sstevel@tonic-gate 2287c478bd9Sstevel@tonic-gate static void lgrp_root_init(void); 2297c478bd9Sstevel@tonic-gate 2307c478bd9Sstevel@tonic-gate /* 2317c478bd9Sstevel@tonic-gate * lpl topology 2327c478bd9Sstevel@tonic-gate */ 2337c478bd9Sstevel@tonic-gate static void lpl_init(lpl_t *, lpl_t *, lgrp_t *); 2347c478bd9Sstevel@tonic-gate static void lpl_clear(lpl_t *); 2357c478bd9Sstevel@tonic-gate static void lpl_leaf_insert(lpl_t *, struct cpupart *); 2367c478bd9Sstevel@tonic-gate static void lpl_leaf_remove(lpl_t *, struct cpupart *); 2377c478bd9Sstevel@tonic-gate static void lpl_rset_add(lpl_t *, lpl_t *); 2387c478bd9Sstevel@tonic-gate static void lpl_rset_del(lpl_t *, lpl_t *); 2397c478bd9Sstevel@tonic-gate static int lpl_rset_contains(lpl_t *, lpl_t *); 2407c478bd9Sstevel@tonic-gate static void lpl_cpu_adjcnt(lpl_act_t, struct cpu *); 2417c478bd9Sstevel@tonic-gate static void lpl_child_update(lpl_t *, struct cpupart *); 2427c478bd9Sstevel@tonic-gate static int lpl_pick(lpl_t *, lpl_t *); 2437c478bd9Sstevel@tonic-gate static void lpl_verify_wrapper(struct cpupart *); 2447c478bd9Sstevel@tonic-gate 2457c478bd9Sstevel@tonic-gate /* 2467c478bd9Sstevel@tonic-gate * defines for lpl topology verifier return codes 2477c478bd9Sstevel@tonic-gate */ 2487c478bd9Sstevel@tonic-gate 2497c478bd9Sstevel@tonic-gate #define LPL_TOPO_CORRECT 0 2507c478bd9Sstevel@tonic-gate #define LPL_TOPO_PART_HAS_NO_LPL -1 2517c478bd9Sstevel@tonic-gate #define LPL_TOPO_CPUS_NOT_EMPTY -2 2527c478bd9Sstevel@tonic-gate #define LPL_TOPO_LGRP_MISMATCH -3 2537c478bd9Sstevel@tonic-gate #define LPL_TOPO_MISSING_PARENT -4 2547c478bd9Sstevel@tonic-gate #define LPL_TOPO_PARENT_MISMATCH -5 2557c478bd9Sstevel@tonic-gate #define LPL_TOPO_BAD_CPUCNT -6 2567c478bd9Sstevel@tonic-gate #define LPL_TOPO_RSET_MISMATCH -7 2577c478bd9Sstevel@tonic-gate #define LPL_TOPO_LPL_ORPHANED -8 2587c478bd9Sstevel@tonic-gate #define LPL_TOPO_LPL_BAD_NCPU -9 2597c478bd9Sstevel@tonic-gate #define LPL_TOPO_RSET_MSSNG_LF -10 2607c478bd9Sstevel@tonic-gate #define LPL_TOPO_CPU_HAS_BAD_LPL -11 2617c478bd9Sstevel@tonic-gate #define LPL_TOPO_BOGUS_HINT -12 2627c478bd9Sstevel@tonic-gate #define LPL_TOPO_NONLEAF_HAS_CPUS -13 2637c478bd9Sstevel@tonic-gate #define LPL_TOPO_LGRP_NOT_LEAF -14 2647c478bd9Sstevel@tonic-gate #define LPL_TOPO_BAD_RSETCNT -15 2657c478bd9Sstevel@tonic-gate 2667c478bd9Sstevel@tonic-gate /* 2677c478bd9Sstevel@tonic-gate * Return whether lgroup optimizations should be enabled on this system 2687c478bd9Sstevel@tonic-gate */ 2697c478bd9Sstevel@tonic-gate int 2707c478bd9Sstevel@tonic-gate lgrp_optimizations(void) 2717c478bd9Sstevel@tonic-gate { 2727c478bd9Sstevel@tonic-gate /* 2737c478bd9Sstevel@tonic-gate * System must have more than 2 lgroups to enable lgroup optimizations 2747c478bd9Sstevel@tonic-gate * 2757c478bd9Sstevel@tonic-gate * XXX This assumes that a 2 lgroup system has an empty root lgroup 2767c478bd9Sstevel@tonic-gate * with one child lgroup containing all the resources. A 2 lgroup 2777c478bd9Sstevel@tonic-gate * system with a root lgroup directly containing CPUs or memory might 2787c478bd9Sstevel@tonic-gate * need lgroup optimizations with its child lgroup, but there 2797c478bd9Sstevel@tonic-gate * isn't such a machine for now.... 2807c478bd9Sstevel@tonic-gate */ 2817c478bd9Sstevel@tonic-gate if (nlgrps > 2) 2827c478bd9Sstevel@tonic-gate return (1); 2837c478bd9Sstevel@tonic-gate 2847c478bd9Sstevel@tonic-gate return (0); 2857c478bd9Sstevel@tonic-gate } 2867c478bd9Sstevel@tonic-gate 2877c478bd9Sstevel@tonic-gate /* 2887c478bd9Sstevel@tonic-gate * Build full lgroup topology 2897c478bd9Sstevel@tonic-gate */ 2907c478bd9Sstevel@tonic-gate static void 2917c478bd9Sstevel@tonic-gate lgrp_root_init(void) 2927c478bd9Sstevel@tonic-gate { 2937c478bd9Sstevel@tonic-gate lgrp_handle_t hand; 2947c478bd9Sstevel@tonic-gate int i; 2957c478bd9Sstevel@tonic-gate lgrp_id_t id; 2967c478bd9Sstevel@tonic-gate 2977c478bd9Sstevel@tonic-gate /* 2987c478bd9Sstevel@tonic-gate * Create the "root" lgroup 2997c478bd9Sstevel@tonic-gate */ 3007c478bd9Sstevel@tonic-gate ASSERT(nlgrps == 0); 3017c478bd9Sstevel@tonic-gate id = nlgrps++; 3027c478bd9Sstevel@tonic-gate 3037c478bd9Sstevel@tonic-gate lgrp_root = &lroot; 3047c478bd9Sstevel@tonic-gate 3057c478bd9Sstevel@tonic-gate lgrp_root->lgrp_cpu = NULL; 3067c478bd9Sstevel@tonic-gate lgrp_root->lgrp_mnodes = 0; 3077c478bd9Sstevel@tonic-gate lgrp_root->lgrp_nmnodes = 0; 3087c478bd9Sstevel@tonic-gate hand = lgrp_plat_root_hand(); 3097c478bd9Sstevel@tonic-gate lgrp_root->lgrp_plathand = hand; 3107c478bd9Sstevel@tonic-gate 3117c478bd9Sstevel@tonic-gate lgrp_root->lgrp_id = id; 3127c478bd9Sstevel@tonic-gate lgrp_root->lgrp_cpucnt = 0; 3137c478bd9Sstevel@tonic-gate lgrp_root->lgrp_childcnt = 0; 3147c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp_root->lgrp_children); 3157c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp_root->lgrp_leaves); 3167c478bd9Sstevel@tonic-gate lgrp_root->lgrp_parent = NULL; 3177c478bd9Sstevel@tonic-gate lgrp_root->lgrp_chips = NULL; 3187c478bd9Sstevel@tonic-gate lgrp_root->lgrp_chipcnt = 0; 3197c478bd9Sstevel@tonic-gate lgrp_root->lgrp_latency = lgrp_plat_latency(hand, hand); 3207c478bd9Sstevel@tonic-gate 3217c478bd9Sstevel@tonic-gate for (i = 0; i < LGRP_RSRC_COUNT; i++) 3227c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp_root->lgrp_set[i]); 3237c478bd9Sstevel@tonic-gate 3247c478bd9Sstevel@tonic-gate lgrp_root->lgrp_kstat = NULL; 3257c478bd9Sstevel@tonic-gate 3267c478bd9Sstevel@tonic-gate lgrp_table[id] = lgrp_root; 3277c478bd9Sstevel@tonic-gate 3287c478bd9Sstevel@tonic-gate /* 3297c478bd9Sstevel@tonic-gate * Setup initial lpl list for CPU0 and initial t0 home. 3307c478bd9Sstevel@tonic-gate * The only lpl space we have so far is lpl_bootstrap. It is used for 331394b433dSesaxe * all topology operations until cp_default is initialized at which 332394b433dSesaxe * point t0.t_lpl will be updated. 3337c478bd9Sstevel@tonic-gate */ 3347c478bd9Sstevel@tonic-gate lpl_bootstrap = lpl_bootstrap_list; 3357c478bd9Sstevel@tonic-gate t0.t_lpl = lpl_bootstrap; 3367c478bd9Sstevel@tonic-gate cp_default.cp_nlgrploads = LPL_BOOTSTRAP_SIZE; 3377c478bd9Sstevel@tonic-gate lpl_bootstrap_list[1].lpl_lgrpid = 1; 3387c478bd9Sstevel@tonic-gate cp_default.cp_lgrploads = lpl_bootstrap; 3397c478bd9Sstevel@tonic-gate } 3407c478bd9Sstevel@tonic-gate 3417c478bd9Sstevel@tonic-gate /* 3427c478bd9Sstevel@tonic-gate * Initialize the lgroup framework and allow the platform to do the same 3437c478bd9Sstevel@tonic-gate */ 3447c478bd9Sstevel@tonic-gate void 3457c478bd9Sstevel@tonic-gate lgrp_init(void) 3467c478bd9Sstevel@tonic-gate { 3477c478bd9Sstevel@tonic-gate /* 3487c478bd9Sstevel@tonic-gate * Initialize the platform 3497c478bd9Sstevel@tonic-gate */ 3507c478bd9Sstevel@tonic-gate lgrp_plat_init(); 3517c478bd9Sstevel@tonic-gate 3527c478bd9Sstevel@tonic-gate /* 3537c478bd9Sstevel@tonic-gate * Set max number of lgroups supported on this platform which must be 3547c478bd9Sstevel@tonic-gate * less than the max number of lgroups supported by the common lgroup 3557c478bd9Sstevel@tonic-gate * framework (eg. NLGRPS_MAX is max elements in lgrp_table[], etc.) 3567c478bd9Sstevel@tonic-gate */ 3577c478bd9Sstevel@tonic-gate nlgrpsmax = lgrp_plat_max_lgrps(); 3587c478bd9Sstevel@tonic-gate ASSERT(nlgrpsmax <= NLGRPS_MAX); 3597c478bd9Sstevel@tonic-gate } 3607c478bd9Sstevel@tonic-gate 3617c478bd9Sstevel@tonic-gate /* 3627c478bd9Sstevel@tonic-gate * Create the root and cpu0's lgroup, and set t0's home. 3637c478bd9Sstevel@tonic-gate */ 3647c478bd9Sstevel@tonic-gate void 3657c478bd9Sstevel@tonic-gate lgrp_setup(void) 3667c478bd9Sstevel@tonic-gate { 3677c478bd9Sstevel@tonic-gate /* 3687c478bd9Sstevel@tonic-gate * Setup the root lgroup 3697c478bd9Sstevel@tonic-gate */ 3707c478bd9Sstevel@tonic-gate lgrp_root_init(); 3717c478bd9Sstevel@tonic-gate 3727c478bd9Sstevel@tonic-gate /* 3737c478bd9Sstevel@tonic-gate * Add cpu0 to an lgroup 3747c478bd9Sstevel@tonic-gate */ 3757c478bd9Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_CPU_ADD, (uintptr_t)CPU, 0); 3767c478bd9Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_CPU_ONLINE, (uintptr_t)CPU, 0); 3777c478bd9Sstevel@tonic-gate } 3787c478bd9Sstevel@tonic-gate 3797c478bd9Sstevel@tonic-gate /* 3807c478bd9Sstevel@tonic-gate * Lgroup initialization is split in two parts. The first part 3817c478bd9Sstevel@tonic-gate * (lgrp_main_init()) is called right before start_other_cpus() in main. The 3827c478bd9Sstevel@tonic-gate * second part (lgrp_main_mp_init()) is called right after start_other_cpus() 3837c478bd9Sstevel@tonic-gate * when all CPUs are brought online and all distance information is available. 3847c478bd9Sstevel@tonic-gate * 3857c478bd9Sstevel@tonic-gate * When lgrp_main_init() is complete it sets lgrp_initialized. The 3867c478bd9Sstevel@tonic-gate * lgrp_main_mp_init() sets lgrp_topo_initialized. 3877c478bd9Sstevel@tonic-gate */ 3887c478bd9Sstevel@tonic-gate 3897c478bd9Sstevel@tonic-gate /* 3907c478bd9Sstevel@tonic-gate * true when lgrp initialization has been completed. 3917c478bd9Sstevel@tonic-gate */ 3927c478bd9Sstevel@tonic-gate int lgrp_initialized = 0; 3937c478bd9Sstevel@tonic-gate 3947c478bd9Sstevel@tonic-gate /* 3957c478bd9Sstevel@tonic-gate * True when lgrp topology is constructed. 3967c478bd9Sstevel@tonic-gate */ 3977c478bd9Sstevel@tonic-gate int lgrp_topo_initialized = 0; 3987c478bd9Sstevel@tonic-gate 3997c478bd9Sstevel@tonic-gate /* 4007c478bd9Sstevel@tonic-gate * Init routine called after startup(), /etc/system has been processed, 4017c478bd9Sstevel@tonic-gate * and cpu0 has been added to an lgroup. 4027c478bd9Sstevel@tonic-gate */ 4037c478bd9Sstevel@tonic-gate void 4047c478bd9Sstevel@tonic-gate lgrp_main_init(void) 4057c478bd9Sstevel@tonic-gate { 4067c478bd9Sstevel@tonic-gate cpu_t *cp = CPU; 4077c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid; 4087c478bd9Sstevel@tonic-gate int i; 4097c478bd9Sstevel@tonic-gate /* 4107c478bd9Sstevel@tonic-gate * Enforce a valid lgrp_mem_default_policy 4117c478bd9Sstevel@tonic-gate */ 4127c478bd9Sstevel@tonic-gate if ((lgrp_mem_default_policy <= LGRP_MEM_POLICY_DEFAULT) || 4137c478bd9Sstevel@tonic-gate (lgrp_mem_default_policy >= LGRP_NUM_MEM_POLICIES)) 4147c478bd9Sstevel@tonic-gate lgrp_mem_default_policy = LGRP_MEM_POLICY_NEXT; 4157c478bd9Sstevel@tonic-gate 4167c478bd9Sstevel@tonic-gate /* 4177c478bd9Sstevel@tonic-gate * See if mpo should be disabled. 4187c478bd9Sstevel@tonic-gate * This may happen in the case of null proc LPA on Starcat. 4197c478bd9Sstevel@tonic-gate * The platform won't be able to detect null proc LPA until after 4207c478bd9Sstevel@tonic-gate * cpu0 and memory have already been added to lgroups. 4217c478bd9Sstevel@tonic-gate * When and if it is detected, the Starcat platform will return 4227c478bd9Sstevel@tonic-gate * a different platform handle for cpu0 which is what we check for 4237c478bd9Sstevel@tonic-gate * here. If mpo should be disabled move cpu0 to it's rightful place 4247c478bd9Sstevel@tonic-gate * (the root), and destroy the remaining lgroups. This effectively 4257c478bd9Sstevel@tonic-gate * provides an UMA lgroup topology. 4267c478bd9Sstevel@tonic-gate */ 4277c478bd9Sstevel@tonic-gate lgrpid = cp->cpu_lpl->lpl_lgrpid; 4287c478bd9Sstevel@tonic-gate if (lgrp_table[lgrpid]->lgrp_plathand != 4297c478bd9Sstevel@tonic-gate lgrp_plat_cpu_to_hand(cp->cpu_id)) { 4307c478bd9Sstevel@tonic-gate lgrp_part_del_cpu(cp); 4317c478bd9Sstevel@tonic-gate lgrp_cpu_fini(cp, lgrpid); 4327c478bd9Sstevel@tonic-gate 4337c478bd9Sstevel@tonic-gate lgrp_cpu_init(cp); 4347c478bd9Sstevel@tonic-gate lgrp_part_add_cpu(cp, cp->cpu_lpl->lpl_lgrpid); 4357c478bd9Sstevel@tonic-gate 4367c478bd9Sstevel@tonic-gate ASSERT(cp->cpu_lpl->lpl_lgrpid == LGRP_ROOTID); 4377c478bd9Sstevel@tonic-gate 4388c6a5496Sjjc /* 4398c6a5496Sjjc * Destroy all lgroups except for root 4408c6a5496Sjjc */ 4417c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 4427c478bd9Sstevel@tonic-gate if (LGRP_EXISTS(lgrp_table[i]) && 4437c478bd9Sstevel@tonic-gate lgrp_table[i] != lgrp_root) 4447c478bd9Sstevel@tonic-gate lgrp_destroy(lgrp_table[i]); 4457c478bd9Sstevel@tonic-gate } 4468c6a5496Sjjc 4478c6a5496Sjjc /* 4488c6a5496Sjjc * Fix up root to point at itself for leaves and resources 4498c6a5496Sjjc * and not have any children 4508c6a5496Sjjc */ 4518c6a5496Sjjc lgrp_root->lgrp_childcnt = 0; 4528c6a5496Sjjc klgrpset_clear(lgrp_root->lgrp_children); 4538c6a5496Sjjc klgrpset_clear(lgrp_root->lgrp_leaves); 4548c6a5496Sjjc klgrpset_add(lgrp_root->lgrp_leaves, LGRP_ROOTID); 4557c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp_root->lgrp_set[LGRP_RSRC_MEM]); 4567c478bd9Sstevel@tonic-gate klgrpset_add(lgrp_root->lgrp_set[LGRP_RSRC_MEM], LGRP_ROOTID); 4577c478bd9Sstevel@tonic-gate } 4587c478bd9Sstevel@tonic-gate 4597c478bd9Sstevel@tonic-gate /* 4607c478bd9Sstevel@tonic-gate * Initialize kstats framework. 4617c478bd9Sstevel@tonic-gate */ 4627c478bd9Sstevel@tonic-gate lgrp_kstat_init(); 4637c478bd9Sstevel@tonic-gate /* 4647c478bd9Sstevel@tonic-gate * cpu0 is finally where it should be, so create it's lgroup's kstats 4657c478bd9Sstevel@tonic-gate */ 4667c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock); 4677c478bd9Sstevel@tonic-gate lgrp_kstat_create(cp); 4687c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 4697c478bd9Sstevel@tonic-gate 4707c478bd9Sstevel@tonic-gate lgrp_plat_main_init(); 4717c478bd9Sstevel@tonic-gate lgrp_initialized = 1; 4727c478bd9Sstevel@tonic-gate } 4737c478bd9Sstevel@tonic-gate 4747c478bd9Sstevel@tonic-gate /* 4757c478bd9Sstevel@tonic-gate * Finish lgrp initialization after all CPUS are brought on-line. 4767c478bd9Sstevel@tonic-gate * This routine is called after start_other_cpus(). 4777c478bd9Sstevel@tonic-gate */ 4787c478bd9Sstevel@tonic-gate void 4797c478bd9Sstevel@tonic-gate lgrp_main_mp_init(void) 4807c478bd9Sstevel@tonic-gate { 4817c478bd9Sstevel@tonic-gate klgrpset_t changed; 4827c478bd9Sstevel@tonic-gate 4837c478bd9Sstevel@tonic-gate /* 4847c478bd9Sstevel@tonic-gate * Update lgroup topology (if necessary) 4857c478bd9Sstevel@tonic-gate */ 4867c478bd9Sstevel@tonic-gate klgrpset_clear(changed); 4877c478bd9Sstevel@tonic-gate (void) lgrp_topo_update(lgrp_table, lgrp_alloc_max + 1, &changed); 4887c478bd9Sstevel@tonic-gate lgrp_topo_initialized = 1; 4897c478bd9Sstevel@tonic-gate } 4907c478bd9Sstevel@tonic-gate 4917c478bd9Sstevel@tonic-gate /* 492*03400a71Sjjc * Change latency of lgroup with specified lgroup platform handle (if one is 493*03400a71Sjjc * given) or change all lgroups with old latency to new latency 494*03400a71Sjjc */ 495*03400a71Sjjc void 496*03400a71Sjjc lgrp_latency_change(lgrp_handle_t hand, u_longlong_t oldtime, 497*03400a71Sjjc u_longlong_t newtime) 498*03400a71Sjjc { 499*03400a71Sjjc lgrp_t *lgrp; 500*03400a71Sjjc int i; 501*03400a71Sjjc 502*03400a71Sjjc for (i = 0; i <= lgrp_alloc_max; i++) { 503*03400a71Sjjc lgrp = lgrp_table[i]; 504*03400a71Sjjc 505*03400a71Sjjc if (!LGRP_EXISTS(lgrp)) 506*03400a71Sjjc continue; 507*03400a71Sjjc 508*03400a71Sjjc if ((hand == LGRP_NULL_HANDLE && 509*03400a71Sjjc lgrp->lgrp_latency == oldtime) || 510*03400a71Sjjc (hand != LGRP_NULL_HANDLE && lgrp->lgrp_plathand == hand)) 511*03400a71Sjjc lgrp->lgrp_latency = (int)newtime; 512*03400a71Sjjc } 513*03400a71Sjjc } 514*03400a71Sjjc 515*03400a71Sjjc /* 5167c478bd9Sstevel@tonic-gate * Handle lgroup (re)configuration events (eg. addition of CPU, etc.) 5177c478bd9Sstevel@tonic-gate */ 5187c478bd9Sstevel@tonic-gate void 5197c478bd9Sstevel@tonic-gate lgrp_config(lgrp_config_flag_t event, uintptr_t resource, uintptr_t where) 5207c478bd9Sstevel@tonic-gate { 5217c478bd9Sstevel@tonic-gate klgrpset_t changed; 5227c478bd9Sstevel@tonic-gate cpu_t *cp; 5237c478bd9Sstevel@tonic-gate lgrp_id_t id; 5247c478bd9Sstevel@tonic-gate int rc; 5257c478bd9Sstevel@tonic-gate 5267c478bd9Sstevel@tonic-gate switch (event) { 5277c478bd9Sstevel@tonic-gate /* 5287c478bd9Sstevel@tonic-gate * The following (re)configuration events are common code 5297c478bd9Sstevel@tonic-gate * initiated. lgrp_plat_config() is called here to inform the 5307c478bd9Sstevel@tonic-gate * platform of the reconfiguration event. 5317c478bd9Sstevel@tonic-gate */ 5327c478bd9Sstevel@tonic-gate case LGRP_CONFIG_CPU_ADD: 533394b433dSesaxe cp = (cpu_t *)resource; 534394b433dSesaxe 535394b433dSesaxe /* 536394b433dSesaxe * Initialize the new CPU's lgrp related next/prev 537394b433dSesaxe * links, and give it a bootstrap lpl so that it can 538394b433dSesaxe * survive should it need to enter the dispatcher. 539394b433dSesaxe */ 540394b433dSesaxe cp->cpu_next_lpl = cp; 541394b433dSesaxe cp->cpu_prev_lpl = cp; 542394b433dSesaxe cp->cpu_next_lgrp = cp; 543394b433dSesaxe cp->cpu_prev_lgrp = cp; 544394b433dSesaxe cp->cpu_lpl = lpl_bootstrap; 545394b433dSesaxe 5467c478bd9Sstevel@tonic-gate lgrp_plat_config(event, resource); 5477c478bd9Sstevel@tonic-gate atomic_add_32(&lgrp_gen, 1); 5487c478bd9Sstevel@tonic-gate 5497c478bd9Sstevel@tonic-gate break; 5507c478bd9Sstevel@tonic-gate case LGRP_CONFIG_CPU_DEL: 5517c478bd9Sstevel@tonic-gate lgrp_plat_config(event, resource); 5527c478bd9Sstevel@tonic-gate atomic_add_32(&lgrp_gen, 1); 5537c478bd9Sstevel@tonic-gate 5547c478bd9Sstevel@tonic-gate break; 5557c478bd9Sstevel@tonic-gate case LGRP_CONFIG_CPU_ONLINE: 5567c478bd9Sstevel@tonic-gate cp = (cpu_t *)resource; 5577c478bd9Sstevel@tonic-gate lgrp_cpu_init(cp); 5587c478bd9Sstevel@tonic-gate lgrp_part_add_cpu(cp, cp->cpu_lpl->lpl_lgrpid); 5597c478bd9Sstevel@tonic-gate rc = lpl_topo_verify(cp->cpu_part); 5607c478bd9Sstevel@tonic-gate if (rc != LPL_TOPO_CORRECT) { 5617c478bd9Sstevel@tonic-gate panic("lpl_topo_verify failed: %d", rc); 5627c478bd9Sstevel@tonic-gate } 5637c478bd9Sstevel@tonic-gate lgrp_plat_config(event, resource); 5647c478bd9Sstevel@tonic-gate atomic_add_32(&lgrp_gen, 1); 5657c478bd9Sstevel@tonic-gate 5667c478bd9Sstevel@tonic-gate break; 5677c478bd9Sstevel@tonic-gate case LGRP_CONFIG_CPU_OFFLINE: 5687c478bd9Sstevel@tonic-gate cp = (cpu_t *)resource; 5697c478bd9Sstevel@tonic-gate id = cp->cpu_lpl->lpl_lgrpid; 5707c478bd9Sstevel@tonic-gate lgrp_part_del_cpu(cp); 5717c478bd9Sstevel@tonic-gate lgrp_cpu_fini(cp, id); 5727c478bd9Sstevel@tonic-gate rc = lpl_topo_verify(cp->cpu_part); 5737c478bd9Sstevel@tonic-gate if (rc != LPL_TOPO_CORRECT) { 5747c478bd9Sstevel@tonic-gate panic("lpl_topo_verify failed: %d", rc); 5757c478bd9Sstevel@tonic-gate } 5767c478bd9Sstevel@tonic-gate lgrp_plat_config(event, resource); 5777c478bd9Sstevel@tonic-gate atomic_add_32(&lgrp_gen, 1); 5787c478bd9Sstevel@tonic-gate 5797c478bd9Sstevel@tonic-gate break; 5807c478bd9Sstevel@tonic-gate case LGRP_CONFIG_CPUPART_ADD: 5817c478bd9Sstevel@tonic-gate cp = (cpu_t *)resource; 5827c478bd9Sstevel@tonic-gate lgrp_part_add_cpu((cpu_t *)resource, (lgrp_id_t)where); 5837c478bd9Sstevel@tonic-gate rc = lpl_topo_verify(cp->cpu_part); 5847c478bd9Sstevel@tonic-gate if (rc != LPL_TOPO_CORRECT) { 5857c478bd9Sstevel@tonic-gate panic("lpl_topo_verify failed: %d", rc); 5867c478bd9Sstevel@tonic-gate } 5877c478bd9Sstevel@tonic-gate lgrp_plat_config(event, resource); 5887c478bd9Sstevel@tonic-gate 5897c478bd9Sstevel@tonic-gate break; 5907c478bd9Sstevel@tonic-gate case LGRP_CONFIG_CPUPART_DEL: 5917c478bd9Sstevel@tonic-gate cp = (cpu_t *)resource; 5927c478bd9Sstevel@tonic-gate lgrp_part_del_cpu((cpu_t *)resource); 5937c478bd9Sstevel@tonic-gate rc = lpl_topo_verify(cp->cpu_part); 5947c478bd9Sstevel@tonic-gate if (rc != LPL_TOPO_CORRECT) { 5957c478bd9Sstevel@tonic-gate panic("lpl_topo_verify failed: %d", rc); 5967c478bd9Sstevel@tonic-gate } 5977c478bd9Sstevel@tonic-gate lgrp_plat_config(event, resource); 5987c478bd9Sstevel@tonic-gate 5997c478bd9Sstevel@tonic-gate break; 6007c478bd9Sstevel@tonic-gate /* 6017c478bd9Sstevel@tonic-gate * The following events are initiated by the memnode 6027c478bd9Sstevel@tonic-gate * subsystem. 6037c478bd9Sstevel@tonic-gate */ 6047c478bd9Sstevel@tonic-gate case LGRP_CONFIG_MEM_ADD: 6057c478bd9Sstevel@tonic-gate lgrp_mem_init((int)resource, where, B_FALSE); 6067c478bd9Sstevel@tonic-gate atomic_add_32(&lgrp_gen, 1); 6077c478bd9Sstevel@tonic-gate 6087c478bd9Sstevel@tonic-gate break; 6097c478bd9Sstevel@tonic-gate case LGRP_CONFIG_MEM_DEL: 6107c478bd9Sstevel@tonic-gate lgrp_mem_fini((int)resource, where, B_FALSE); 6117c478bd9Sstevel@tonic-gate atomic_add_32(&lgrp_gen, 1); 6127c478bd9Sstevel@tonic-gate 6137c478bd9Sstevel@tonic-gate break; 6147c478bd9Sstevel@tonic-gate case LGRP_CONFIG_MEM_RENAME: { 6157c478bd9Sstevel@tonic-gate lgrp_config_mem_rename_t *ren_arg = 6167c478bd9Sstevel@tonic-gate (lgrp_config_mem_rename_t *)where; 6177c478bd9Sstevel@tonic-gate 6187c478bd9Sstevel@tonic-gate lgrp_mem_rename((int)resource, 6197c478bd9Sstevel@tonic-gate ren_arg->lmem_rename_from, 6207c478bd9Sstevel@tonic-gate ren_arg->lmem_rename_to); 6217c478bd9Sstevel@tonic-gate atomic_add_32(&lgrp_gen, 1); 6227c478bd9Sstevel@tonic-gate 6237c478bd9Sstevel@tonic-gate break; 6247c478bd9Sstevel@tonic-gate } 6257c478bd9Sstevel@tonic-gate case LGRP_CONFIG_GEN_UPDATE: 6267c478bd9Sstevel@tonic-gate atomic_add_32(&lgrp_gen, 1); 6277c478bd9Sstevel@tonic-gate 6287c478bd9Sstevel@tonic-gate break; 6297c478bd9Sstevel@tonic-gate case LGRP_CONFIG_FLATTEN: 6307c478bd9Sstevel@tonic-gate if (where == 0) 6317c478bd9Sstevel@tonic-gate lgrp_topo_levels = (int)resource; 6327c478bd9Sstevel@tonic-gate else 6337c478bd9Sstevel@tonic-gate (void) lgrp_topo_flatten(resource, 6347c478bd9Sstevel@tonic-gate lgrp_table, lgrp_alloc_max, &changed); 6357c478bd9Sstevel@tonic-gate 6367c478bd9Sstevel@tonic-gate break; 6377c478bd9Sstevel@tonic-gate /* 638*03400a71Sjjc * Update any lgroups with old latency to new latency 6397c478bd9Sstevel@tonic-gate */ 640*03400a71Sjjc case LGRP_CONFIG_LAT_CHANGE_ALL: 641*03400a71Sjjc lgrp_latency_change(LGRP_NULL_HANDLE, (u_longlong_t)resource, 642*03400a71Sjjc (u_longlong_t)where); 643*03400a71Sjjc 644*03400a71Sjjc break; 645*03400a71Sjjc /* 646*03400a71Sjjc * Update lgroup with specified lgroup platform handle to have 647*03400a71Sjjc * new latency 648*03400a71Sjjc */ 649*03400a71Sjjc case LGRP_CONFIG_LAT_CHANGE: 650*03400a71Sjjc lgrp_latency_change((lgrp_handle_t)resource, 0, 6517c478bd9Sstevel@tonic-gate (u_longlong_t)where); 6527c478bd9Sstevel@tonic-gate 6537c478bd9Sstevel@tonic-gate break; 6547c478bd9Sstevel@tonic-gate case LGRP_CONFIG_NOP: 6557c478bd9Sstevel@tonic-gate 6567c478bd9Sstevel@tonic-gate break; 6577c478bd9Sstevel@tonic-gate default: 6587c478bd9Sstevel@tonic-gate break; 6597c478bd9Sstevel@tonic-gate } 6607c478bd9Sstevel@tonic-gate 6617c478bd9Sstevel@tonic-gate } 6627c478bd9Sstevel@tonic-gate 6637c478bd9Sstevel@tonic-gate /* 6647c478bd9Sstevel@tonic-gate * Called to add lgrp info into cpu structure from cpu_add_unit; 6657c478bd9Sstevel@tonic-gate * do not assume cpu is in cpu[] yet! 6667c478bd9Sstevel@tonic-gate * 6677c478bd9Sstevel@tonic-gate * CPUs are brought online with all other CPUs paused so we can't 6687c478bd9Sstevel@tonic-gate * allocate memory or we could deadlock the system, so we rely on 6697c478bd9Sstevel@tonic-gate * the platform to statically allocate as much space as we need 6707c478bd9Sstevel@tonic-gate * for the lgrp structs and stats. 6717c478bd9Sstevel@tonic-gate */ 6727c478bd9Sstevel@tonic-gate static void 6737c478bd9Sstevel@tonic-gate lgrp_cpu_init(struct cpu *cp) 6747c478bd9Sstevel@tonic-gate { 6757c478bd9Sstevel@tonic-gate klgrpset_t changed; 6767c478bd9Sstevel@tonic-gate int count; 6777c478bd9Sstevel@tonic-gate lgrp_handle_t hand; 6787c478bd9Sstevel@tonic-gate int first_cpu; 6797c478bd9Sstevel@tonic-gate lgrp_t *my_lgrp; 6807c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid; 6817c478bd9Sstevel@tonic-gate struct cpu *cptr; 6827c478bd9Sstevel@tonic-gate struct chip *chp; 6837c478bd9Sstevel@tonic-gate 6847c478bd9Sstevel@tonic-gate /* 6857c478bd9Sstevel@tonic-gate * This is the first time through if the resource set 6867c478bd9Sstevel@tonic-gate * for the root lgroup is empty. After cpu0 has been 6877c478bd9Sstevel@tonic-gate * initially added to an lgroup, the root's CPU resource 6887c478bd9Sstevel@tonic-gate * set can never be empty, since the system's last CPU 6897c478bd9Sstevel@tonic-gate * cannot be offlined. 6907c478bd9Sstevel@tonic-gate */ 6917c478bd9Sstevel@tonic-gate if (klgrpset_isempty(lgrp_root->lgrp_set[LGRP_RSRC_CPU])) { 6927c478bd9Sstevel@tonic-gate /* 6937c478bd9Sstevel@tonic-gate * First time through. 6947c478bd9Sstevel@tonic-gate */ 6957c478bd9Sstevel@tonic-gate first_cpu = 1; 6967c478bd9Sstevel@tonic-gate } else { 6977c478bd9Sstevel@tonic-gate /* 6987c478bd9Sstevel@tonic-gate * If cpu0 needs to move lgroups, we may come 6997c478bd9Sstevel@tonic-gate * through here again, at which time cpu_lock won't 7007c478bd9Sstevel@tonic-gate * be held, and lgrp_initialized will be false. 7017c478bd9Sstevel@tonic-gate */ 7027c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized); 7037c478bd9Sstevel@tonic-gate ASSERT(cp->cpu_part != NULL); 7047c478bd9Sstevel@tonic-gate first_cpu = 0; 7057c478bd9Sstevel@tonic-gate } 7067c478bd9Sstevel@tonic-gate 7077c478bd9Sstevel@tonic-gate hand = lgrp_plat_cpu_to_hand(cp->cpu_id); 7087c478bd9Sstevel@tonic-gate my_lgrp = lgrp_hand_to_lgrp(hand); 7097c478bd9Sstevel@tonic-gate 7107c478bd9Sstevel@tonic-gate if (my_lgrp == NULL) { 7117c478bd9Sstevel@tonic-gate /* 7127c478bd9Sstevel@tonic-gate * Create new lgrp and add it to lgroup topology 7137c478bd9Sstevel@tonic-gate */ 7147c478bd9Sstevel@tonic-gate my_lgrp = lgrp_create(); 7157c478bd9Sstevel@tonic-gate my_lgrp->lgrp_plathand = hand; 7167c478bd9Sstevel@tonic-gate my_lgrp->lgrp_latency = lgrp_plat_latency(hand, hand); 7177c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id; 7187c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_leaves, lgrpid); 7197c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid); 7207c478bd9Sstevel@tonic-gate 7217c478bd9Sstevel@tonic-gate count = 0; 7227c478bd9Sstevel@tonic-gate klgrpset_clear(changed); 7237c478bd9Sstevel@tonic-gate count += lgrp_leaf_add(my_lgrp, lgrp_table, lgrp_alloc_max + 1, 7247c478bd9Sstevel@tonic-gate &changed); 7252dae3fb5Sjjc /* 7262dae3fb5Sjjc * May have added new intermediate lgroups, so need to add 7272dae3fb5Sjjc * resources other than CPUs which are added below 7282dae3fb5Sjjc */ 7292dae3fb5Sjjc (void) lgrp_mnode_update(changed, NULL); 7307c478bd9Sstevel@tonic-gate } else if (my_lgrp->lgrp_latency == 0 && lgrp_plat_latency(hand, hand) 7317c478bd9Sstevel@tonic-gate > 0) { 7327c478bd9Sstevel@tonic-gate /* 7337c478bd9Sstevel@tonic-gate * Leaf lgroup was created, but latency wasn't available 7347c478bd9Sstevel@tonic-gate * then. So, set latency for it and fill in rest of lgroup 7357c478bd9Sstevel@tonic-gate * topology now that we know how far it is from other leaf 7367c478bd9Sstevel@tonic-gate * lgroups. 7377c478bd9Sstevel@tonic-gate */ 7387c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id; 7397c478bd9Sstevel@tonic-gate klgrpset_clear(changed); 7407c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(my_lgrp->lgrp_set[LGRP_RSRC_CPU], 7417c478bd9Sstevel@tonic-gate lgrpid)) 7427c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid); 7437c478bd9Sstevel@tonic-gate count = lgrp_leaf_add(my_lgrp, lgrp_table, lgrp_alloc_max + 1, 7447c478bd9Sstevel@tonic-gate &changed); 7457c478bd9Sstevel@tonic-gate 7467c478bd9Sstevel@tonic-gate /* 7477c478bd9Sstevel@tonic-gate * May have added new intermediate lgroups, so need to add 7487c478bd9Sstevel@tonic-gate * resources other than CPUs which are added below 7497c478bd9Sstevel@tonic-gate */ 7507c478bd9Sstevel@tonic-gate (void) lgrp_mnode_update(changed, NULL); 7517c478bd9Sstevel@tonic-gate } else if (!klgrpset_ismember(my_lgrp->lgrp_set[LGRP_RSRC_CPU], 7527c478bd9Sstevel@tonic-gate my_lgrp->lgrp_id)) { 7537c478bd9Sstevel@tonic-gate int i; 7547c478bd9Sstevel@tonic-gate 7557c478bd9Sstevel@tonic-gate /* 7567c478bd9Sstevel@tonic-gate * Update existing lgroup and lgroups containing it with CPU 7577c478bd9Sstevel@tonic-gate * resource 7587c478bd9Sstevel@tonic-gate */ 7597c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id; 7607c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid); 7617c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 7627c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 7637c478bd9Sstevel@tonic-gate 7647c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 7657c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp) || 7667c478bd9Sstevel@tonic-gate !lgrp_rsets_member(lgrp->lgrp_set, lgrpid)) 7677c478bd9Sstevel@tonic-gate continue; 7687c478bd9Sstevel@tonic-gate 7697c478bd9Sstevel@tonic-gate klgrpset_add(lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid); 7707c478bd9Sstevel@tonic-gate } 7717c478bd9Sstevel@tonic-gate } 7727c478bd9Sstevel@tonic-gate 7737c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id; 7747c478bd9Sstevel@tonic-gate cp->cpu_lpl = &cp->cpu_part->cp_lgrploads[lgrpid]; 7757c478bd9Sstevel@tonic-gate 7767c478bd9Sstevel@tonic-gate /* 7777c478bd9Sstevel@tonic-gate * For multi-lgroup systems, need to setup lpl for CPU0 or CPU0 will 7787c478bd9Sstevel@tonic-gate * end up in lpl for lgroup 0 whether it is supposed to be in there or 7797c478bd9Sstevel@tonic-gate * not since none of lgroup IDs in the lpl's have been set yet. 7807c478bd9Sstevel@tonic-gate */ 7817c478bd9Sstevel@tonic-gate if (first_cpu && nlgrpsmax > 1 && lgrpid != cp->cpu_lpl->lpl_lgrpid) 7827c478bd9Sstevel@tonic-gate cp->cpu_lpl->lpl_lgrpid = lgrpid; 7837c478bd9Sstevel@tonic-gate 7847c478bd9Sstevel@tonic-gate /* 7857c478bd9Sstevel@tonic-gate * link the CPU into the lgrp's CPU list 7867c478bd9Sstevel@tonic-gate */ 7877c478bd9Sstevel@tonic-gate if (my_lgrp->lgrp_cpucnt == 0) { 7887c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpu = cp; 7897c478bd9Sstevel@tonic-gate cp->cpu_next_lgrp = cp->cpu_prev_lgrp = cp; 7907c478bd9Sstevel@tonic-gate } else { 7917c478bd9Sstevel@tonic-gate cptr = my_lgrp->lgrp_cpu; 7927c478bd9Sstevel@tonic-gate cp->cpu_next_lgrp = cptr; 7937c478bd9Sstevel@tonic-gate cp->cpu_prev_lgrp = cptr->cpu_prev_lgrp; 7947c478bd9Sstevel@tonic-gate cptr->cpu_prev_lgrp->cpu_next_lgrp = cp; 7957c478bd9Sstevel@tonic-gate cptr->cpu_prev_lgrp = cp; 7967c478bd9Sstevel@tonic-gate } 7977c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpucnt++; 7987c478bd9Sstevel@tonic-gate 7997c478bd9Sstevel@tonic-gate /* 8007c478bd9Sstevel@tonic-gate * Add this cpu's chip to the per lgroup list 8017c478bd9Sstevel@tonic-gate * if necessary 8027c478bd9Sstevel@tonic-gate */ 8037c478bd9Sstevel@tonic-gate if (cp->cpu_chip->chip_lgrp == NULL) { 8047c478bd9Sstevel@tonic-gate struct chip *lcpr; 8057c478bd9Sstevel@tonic-gate 8067c478bd9Sstevel@tonic-gate chp = cp->cpu_chip; 8077c478bd9Sstevel@tonic-gate 8087c478bd9Sstevel@tonic-gate if (my_lgrp->lgrp_chipcnt == 0) { 8097c478bd9Sstevel@tonic-gate my_lgrp->lgrp_chips = chp; 8107c478bd9Sstevel@tonic-gate chp->chip_next_lgrp = 8117c478bd9Sstevel@tonic-gate chp->chip_prev_lgrp = chp; 8127c478bd9Sstevel@tonic-gate } else { 8137c478bd9Sstevel@tonic-gate lcpr = my_lgrp->lgrp_chips; 8147c478bd9Sstevel@tonic-gate chp->chip_next_lgrp = lcpr; 8157c478bd9Sstevel@tonic-gate chp->chip_prev_lgrp = 8167c478bd9Sstevel@tonic-gate lcpr->chip_prev_lgrp; 8177c478bd9Sstevel@tonic-gate lcpr->chip_prev_lgrp->chip_next_lgrp = 8187c478bd9Sstevel@tonic-gate chp; 8197c478bd9Sstevel@tonic-gate lcpr->chip_prev_lgrp = chp; 8207c478bd9Sstevel@tonic-gate } 8217c478bd9Sstevel@tonic-gate chp->chip_lgrp = my_lgrp; 8227c478bd9Sstevel@tonic-gate chp->chip_balance = chp->chip_next_lgrp; 8237c478bd9Sstevel@tonic-gate my_lgrp->lgrp_chipcnt++; 8247c478bd9Sstevel@tonic-gate } 8257c478bd9Sstevel@tonic-gate } 8267c478bd9Sstevel@tonic-gate 8277c478bd9Sstevel@tonic-gate lgrp_t * 8287c478bd9Sstevel@tonic-gate lgrp_create(void) 8297c478bd9Sstevel@tonic-gate { 8307c478bd9Sstevel@tonic-gate lgrp_t *my_lgrp; 8317c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid; 8327c478bd9Sstevel@tonic-gate int i; 8337c478bd9Sstevel@tonic-gate 8347c478bd9Sstevel@tonic-gate ASSERT(!lgrp_initialized || MUTEX_HELD(&cpu_lock)); 8357c478bd9Sstevel@tonic-gate 8367c478bd9Sstevel@tonic-gate /* 8377c478bd9Sstevel@tonic-gate * Find an open slot in the lgroup table and recycle unused lgroup 8387c478bd9Sstevel@tonic-gate * left there if any 8397c478bd9Sstevel@tonic-gate */ 8407c478bd9Sstevel@tonic-gate my_lgrp = NULL; 8417c478bd9Sstevel@tonic-gate if (lgrp_alloc_hint == -1) 8427c478bd9Sstevel@tonic-gate /* 8437c478bd9Sstevel@tonic-gate * Allocate from end when hint not set yet because no lgroups 8447c478bd9Sstevel@tonic-gate * have been deleted yet 8457c478bd9Sstevel@tonic-gate */ 8467c478bd9Sstevel@tonic-gate lgrpid = nlgrps++; 8477c478bd9Sstevel@tonic-gate else { 8487c478bd9Sstevel@tonic-gate /* 8497c478bd9Sstevel@tonic-gate * Start looking for next open slot from hint and leave hint 8507c478bd9Sstevel@tonic-gate * at slot allocated 8517c478bd9Sstevel@tonic-gate */ 8527c478bd9Sstevel@tonic-gate for (i = lgrp_alloc_hint; i < nlgrpsmax; i++) { 8537c478bd9Sstevel@tonic-gate my_lgrp = lgrp_table[i]; 8547c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(my_lgrp)) { 8557c478bd9Sstevel@tonic-gate lgrpid = i; 8567c478bd9Sstevel@tonic-gate nlgrps++; 8577c478bd9Sstevel@tonic-gate break; 8587c478bd9Sstevel@tonic-gate } 8597c478bd9Sstevel@tonic-gate } 8607c478bd9Sstevel@tonic-gate lgrp_alloc_hint = lgrpid; 8617c478bd9Sstevel@tonic-gate } 8627c478bd9Sstevel@tonic-gate 8637c478bd9Sstevel@tonic-gate /* 8647c478bd9Sstevel@tonic-gate * Keep track of max lgroup ID allocated so far to cut down on searches 8657c478bd9Sstevel@tonic-gate */ 8667c478bd9Sstevel@tonic-gate if (lgrpid > lgrp_alloc_max) 8677c478bd9Sstevel@tonic-gate lgrp_alloc_max = lgrpid; 8687c478bd9Sstevel@tonic-gate 8697c478bd9Sstevel@tonic-gate /* 8707c478bd9Sstevel@tonic-gate * Need to allocate new lgroup if next open slot didn't have one 8717c478bd9Sstevel@tonic-gate * for recycling 8727c478bd9Sstevel@tonic-gate */ 8737c478bd9Sstevel@tonic-gate if (my_lgrp == NULL) 8747c478bd9Sstevel@tonic-gate my_lgrp = lgrp_plat_alloc(lgrpid); 8757c478bd9Sstevel@tonic-gate 8767c478bd9Sstevel@tonic-gate if (nlgrps > nlgrpsmax || my_lgrp == NULL) 8777c478bd9Sstevel@tonic-gate panic("Too many lgrps for platform (%d)", nlgrps); 8787c478bd9Sstevel@tonic-gate 8797c478bd9Sstevel@tonic-gate my_lgrp->lgrp_id = lgrpid; 8807c478bd9Sstevel@tonic-gate my_lgrp->lgrp_latency = 0; 8817c478bd9Sstevel@tonic-gate my_lgrp->lgrp_plathand = LGRP_NULL_HANDLE; 8827c478bd9Sstevel@tonic-gate my_lgrp->lgrp_parent = NULL; 8837c478bd9Sstevel@tonic-gate my_lgrp->lgrp_childcnt = 0; 8847c478bd9Sstevel@tonic-gate my_lgrp->lgrp_mnodes = (mnodeset_t)0; 8857c478bd9Sstevel@tonic-gate my_lgrp->lgrp_nmnodes = 0; 8867c478bd9Sstevel@tonic-gate klgrpset_clear(my_lgrp->lgrp_children); 8877c478bd9Sstevel@tonic-gate klgrpset_clear(my_lgrp->lgrp_leaves); 8887c478bd9Sstevel@tonic-gate for (i = 0; i < LGRP_RSRC_COUNT; i++) 8897c478bd9Sstevel@tonic-gate klgrpset_clear(my_lgrp->lgrp_set[i]); 8907c478bd9Sstevel@tonic-gate 8917c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpu = NULL; 8927c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpucnt = 0; 8937c478bd9Sstevel@tonic-gate my_lgrp->lgrp_chips = NULL; 8947c478bd9Sstevel@tonic-gate my_lgrp->lgrp_chipcnt = 0; 8957c478bd9Sstevel@tonic-gate 8967c478bd9Sstevel@tonic-gate if (my_lgrp->lgrp_kstat != NULL) 8977c478bd9Sstevel@tonic-gate lgrp_kstat_reset(lgrpid); 8987c478bd9Sstevel@tonic-gate 8997c478bd9Sstevel@tonic-gate lgrp_table[my_lgrp->lgrp_id] = my_lgrp; 9007c478bd9Sstevel@tonic-gate 9017c478bd9Sstevel@tonic-gate return (my_lgrp); 9027c478bd9Sstevel@tonic-gate } 9037c478bd9Sstevel@tonic-gate 9047c478bd9Sstevel@tonic-gate void 9057c478bd9Sstevel@tonic-gate lgrp_destroy(lgrp_t *lgrp) 9067c478bd9Sstevel@tonic-gate { 9077c478bd9Sstevel@tonic-gate int i; 9087c478bd9Sstevel@tonic-gate 9097c478bd9Sstevel@tonic-gate /* 9107c478bd9Sstevel@tonic-gate * Unless this lgroup is being destroyed on behalf of 9117c478bd9Sstevel@tonic-gate * the boot CPU, cpu_lock must be held 9127c478bd9Sstevel@tonic-gate */ 9137c478bd9Sstevel@tonic-gate ASSERT(!lgrp_initialized || MUTEX_HELD(&cpu_lock)); 9147c478bd9Sstevel@tonic-gate 9157c478bd9Sstevel@tonic-gate if (nlgrps == 1) 9167c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "Can't destroy only lgroup!"); 9177c478bd9Sstevel@tonic-gate 9187c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp)) 9197c478bd9Sstevel@tonic-gate return; 9207c478bd9Sstevel@tonic-gate 9217c478bd9Sstevel@tonic-gate /* 9227c478bd9Sstevel@tonic-gate * Set hint to lgroup being deleted and try to keep lower numbered 9237c478bd9Sstevel@tonic-gate * hints to facilitate finding empty slots 9247c478bd9Sstevel@tonic-gate */ 9257c478bd9Sstevel@tonic-gate if (lgrp_alloc_hint == -1 || lgrp->lgrp_id < lgrp_alloc_hint) 9267c478bd9Sstevel@tonic-gate lgrp_alloc_hint = lgrp->lgrp_id; 9277c478bd9Sstevel@tonic-gate 9287c478bd9Sstevel@tonic-gate /* 9297c478bd9Sstevel@tonic-gate * Mark this lgroup to be recycled by setting its lgroup ID to 9307c478bd9Sstevel@tonic-gate * LGRP_NONE and clear relevant fields 9317c478bd9Sstevel@tonic-gate */ 9327c478bd9Sstevel@tonic-gate lgrp->lgrp_id = LGRP_NONE; 9337c478bd9Sstevel@tonic-gate lgrp->lgrp_latency = 0; 9347c478bd9Sstevel@tonic-gate lgrp->lgrp_plathand = LGRP_NULL_HANDLE; 9357c478bd9Sstevel@tonic-gate lgrp->lgrp_parent = NULL; 9367c478bd9Sstevel@tonic-gate lgrp->lgrp_childcnt = 0; 9377c478bd9Sstevel@tonic-gate 9387c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp->lgrp_children); 9397c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp->lgrp_leaves); 9407c478bd9Sstevel@tonic-gate for (i = 0; i < LGRP_RSRC_COUNT; i++) 9417c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp->lgrp_set[i]); 9427c478bd9Sstevel@tonic-gate 9437c478bd9Sstevel@tonic-gate lgrp->lgrp_mnodes = (mnodeset_t)0; 9447c478bd9Sstevel@tonic-gate lgrp->lgrp_nmnodes = 0; 9457c478bd9Sstevel@tonic-gate 9467c478bd9Sstevel@tonic-gate lgrp->lgrp_cpu = NULL; 9477c478bd9Sstevel@tonic-gate lgrp->lgrp_cpucnt = 0; 9487c478bd9Sstevel@tonic-gate lgrp->lgrp_chipcnt = 0; 9497c478bd9Sstevel@tonic-gate lgrp->lgrp_chips = NULL; 9507c478bd9Sstevel@tonic-gate 9517c478bd9Sstevel@tonic-gate nlgrps--; 9527c478bd9Sstevel@tonic-gate } 9537c478bd9Sstevel@tonic-gate 9547c478bd9Sstevel@tonic-gate /* 9557c478bd9Sstevel@tonic-gate * Initialize kstat data. Called from lgrp intialization code. 9567c478bd9Sstevel@tonic-gate */ 9577c478bd9Sstevel@tonic-gate static void 9587c478bd9Sstevel@tonic-gate lgrp_kstat_init(void) 9597c478bd9Sstevel@tonic-gate { 9607c478bd9Sstevel@tonic-gate lgrp_stat_t stat; 9617c478bd9Sstevel@tonic-gate 9627c478bd9Sstevel@tonic-gate mutex_init(&lgrp_kstat_mutex, NULL, MUTEX_DEFAULT, NULL); 9637c478bd9Sstevel@tonic-gate 9647c478bd9Sstevel@tonic-gate for (stat = 0; stat < LGRP_NUM_STATS; stat++) 9657c478bd9Sstevel@tonic-gate kstat_named_init(&lgrp_kstat_data[stat], 9667c478bd9Sstevel@tonic-gate lgrp_kstat_names[stat], KSTAT_DATA_INT64); 9677c478bd9Sstevel@tonic-gate } 9687c478bd9Sstevel@tonic-gate 9697c478bd9Sstevel@tonic-gate /* 9707c478bd9Sstevel@tonic-gate * initialize an lgrp's kstats if needed 9717c478bd9Sstevel@tonic-gate * called with cpu_lock held but not with cpus paused. 9727c478bd9Sstevel@tonic-gate * we don't tear these down now because we don't know about 9737c478bd9Sstevel@tonic-gate * memory leaving the lgrp yet... 9747c478bd9Sstevel@tonic-gate */ 9757c478bd9Sstevel@tonic-gate 9767c478bd9Sstevel@tonic-gate void 9777c478bd9Sstevel@tonic-gate lgrp_kstat_create(cpu_t *cp) 9787c478bd9Sstevel@tonic-gate { 9797c478bd9Sstevel@tonic-gate kstat_t *lgrp_kstat; 9807c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid; 9817c478bd9Sstevel@tonic-gate lgrp_t *my_lgrp; 9827c478bd9Sstevel@tonic-gate 9837c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 9847c478bd9Sstevel@tonic-gate 9857c478bd9Sstevel@tonic-gate lgrpid = cp->cpu_lpl->lpl_lgrpid; 9867c478bd9Sstevel@tonic-gate my_lgrp = lgrp_table[lgrpid]; 9877c478bd9Sstevel@tonic-gate 9887c478bd9Sstevel@tonic-gate if (my_lgrp->lgrp_kstat != NULL) 9897c478bd9Sstevel@tonic-gate return; /* already initialized */ 9907c478bd9Sstevel@tonic-gate 9917c478bd9Sstevel@tonic-gate lgrp_kstat = kstat_create("lgrp", lgrpid, NULL, "misc", 9927c478bd9Sstevel@tonic-gate KSTAT_TYPE_NAMED, LGRP_NUM_STATS, 9937c478bd9Sstevel@tonic-gate KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE); 9947c478bd9Sstevel@tonic-gate 9957c478bd9Sstevel@tonic-gate if (lgrp_kstat != NULL) { 9967c478bd9Sstevel@tonic-gate lgrp_kstat->ks_lock = &lgrp_kstat_mutex; 9977c478bd9Sstevel@tonic-gate lgrp_kstat->ks_private = my_lgrp; 9987c478bd9Sstevel@tonic-gate lgrp_kstat->ks_data = &lgrp_kstat_data; 9997c478bd9Sstevel@tonic-gate lgrp_kstat->ks_update = lgrp_kstat_extract; 10007c478bd9Sstevel@tonic-gate my_lgrp->lgrp_kstat = lgrp_kstat; 10017c478bd9Sstevel@tonic-gate kstat_install(lgrp_kstat); 10027c478bd9Sstevel@tonic-gate } 10037c478bd9Sstevel@tonic-gate } 10047c478bd9Sstevel@tonic-gate 10057c478bd9Sstevel@tonic-gate /* 10067c478bd9Sstevel@tonic-gate * this will do something when we manage to remove now unused lgrps 10077c478bd9Sstevel@tonic-gate */ 10087c478bd9Sstevel@tonic-gate 10097c478bd9Sstevel@tonic-gate /* ARGSUSED */ 10107c478bd9Sstevel@tonic-gate void 10117c478bd9Sstevel@tonic-gate lgrp_kstat_destroy(cpu_t *cp) 10127c478bd9Sstevel@tonic-gate { 10137c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 10147c478bd9Sstevel@tonic-gate } 10157c478bd9Sstevel@tonic-gate 10167c478bd9Sstevel@tonic-gate /* 10177c478bd9Sstevel@tonic-gate * Called when a CPU is off-lined. 10187c478bd9Sstevel@tonic-gate */ 10197c478bd9Sstevel@tonic-gate static void 10207c478bd9Sstevel@tonic-gate lgrp_cpu_fini(struct cpu *cp, lgrp_id_t lgrpid) 10217c478bd9Sstevel@tonic-gate { 10227c478bd9Sstevel@tonic-gate lgrp_t *my_lgrp; 10237c478bd9Sstevel@tonic-gate struct cpu *prev; 10247c478bd9Sstevel@tonic-gate struct cpu *next; 10257c478bd9Sstevel@tonic-gate chip_t *chp; 10267c478bd9Sstevel@tonic-gate 10277c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized); 10287c478bd9Sstevel@tonic-gate 10297c478bd9Sstevel@tonic-gate prev = cp->cpu_prev_lgrp; 10307c478bd9Sstevel@tonic-gate next = cp->cpu_next_lgrp; 10317c478bd9Sstevel@tonic-gate 10327c478bd9Sstevel@tonic-gate prev->cpu_next_lgrp = next; 10337c478bd9Sstevel@tonic-gate next->cpu_prev_lgrp = prev; 10347c478bd9Sstevel@tonic-gate 10357c478bd9Sstevel@tonic-gate /* 10367c478bd9Sstevel@tonic-gate * just because I'm paranoid doesn't mean... 10377c478bd9Sstevel@tonic-gate */ 10387c478bd9Sstevel@tonic-gate 10397c478bd9Sstevel@tonic-gate cp->cpu_next_lgrp = cp->cpu_prev_lgrp = NULL; 10407c478bd9Sstevel@tonic-gate 10417c478bd9Sstevel@tonic-gate my_lgrp = lgrp_table[lgrpid]; 10427c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpucnt--; 10437c478bd9Sstevel@tonic-gate 10447c478bd9Sstevel@tonic-gate /* 10457c478bd9Sstevel@tonic-gate * If the last CPU on it's chip is being offlined 10467c478bd9Sstevel@tonic-gate * then remove this chip from the per lgroup list. 10477c478bd9Sstevel@tonic-gate * 10487c478bd9Sstevel@tonic-gate * This is also done for the boot CPU when it needs 10497c478bd9Sstevel@tonic-gate * to move between lgroups as a consequence of 10507c478bd9Sstevel@tonic-gate * null proc lpa. 10517c478bd9Sstevel@tonic-gate */ 10527c478bd9Sstevel@tonic-gate chp = cp->cpu_chip; 10537c478bd9Sstevel@tonic-gate if (chp->chip_ncpu == 0 || !lgrp_initialized) { 10547c478bd9Sstevel@tonic-gate 10557c478bd9Sstevel@tonic-gate chip_t *chpp; 10567c478bd9Sstevel@tonic-gate 10577c478bd9Sstevel@tonic-gate if (--my_lgrp->lgrp_chipcnt == 0) 10587c478bd9Sstevel@tonic-gate my_lgrp->lgrp_chips = NULL; 10597c478bd9Sstevel@tonic-gate else if (my_lgrp->lgrp_chips == chp) 10607c478bd9Sstevel@tonic-gate my_lgrp->lgrp_chips = chp->chip_next_lgrp; 10617c478bd9Sstevel@tonic-gate 10627c478bd9Sstevel@tonic-gate /* 10637c478bd9Sstevel@tonic-gate * Walk this lgroup's chip list looking for chips that 10647c478bd9Sstevel@tonic-gate * may try to balance against the one that's leaving 10657c478bd9Sstevel@tonic-gate */ 10667c478bd9Sstevel@tonic-gate for (chpp = chp->chip_next_lgrp; chpp != chp; 10677c478bd9Sstevel@tonic-gate chpp = chpp->chip_next_lgrp) { 10687c478bd9Sstevel@tonic-gate if (chpp->chip_balance == chp) 10697c478bd9Sstevel@tonic-gate chpp->chip_balance = chp->chip_next_lgrp; 10707c478bd9Sstevel@tonic-gate } 10717c478bd9Sstevel@tonic-gate 10727c478bd9Sstevel@tonic-gate chp->chip_prev_lgrp->chip_next_lgrp = chp->chip_next_lgrp; 10737c478bd9Sstevel@tonic-gate chp->chip_next_lgrp->chip_prev_lgrp = chp->chip_prev_lgrp; 10747c478bd9Sstevel@tonic-gate 10757c478bd9Sstevel@tonic-gate chp->chip_next_lgrp = chp->chip_prev_lgrp = NULL; 10767c478bd9Sstevel@tonic-gate chp->chip_lgrp = NULL; 10777c478bd9Sstevel@tonic-gate chp->chip_balance = NULL; 10787c478bd9Sstevel@tonic-gate } 10797c478bd9Sstevel@tonic-gate 10807c478bd9Sstevel@tonic-gate /* 10817c478bd9Sstevel@tonic-gate * Removing last CPU in lgroup, so update lgroup topology 10827c478bd9Sstevel@tonic-gate */ 10837c478bd9Sstevel@tonic-gate if (my_lgrp->lgrp_cpucnt == 0) { 10847c478bd9Sstevel@tonic-gate klgrpset_t changed; 10857c478bd9Sstevel@tonic-gate int count; 10867c478bd9Sstevel@tonic-gate int i; 10877c478bd9Sstevel@tonic-gate 10887c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpu = NULL; 10897c478bd9Sstevel@tonic-gate 10907c478bd9Sstevel@tonic-gate /* 10917c478bd9Sstevel@tonic-gate * Remove this lgroup from its lgroup CPU resources and remove 10927c478bd9Sstevel@tonic-gate * lgroup from lgroup topology if it doesn't have any more 10937c478bd9Sstevel@tonic-gate * resources in it now 10947c478bd9Sstevel@tonic-gate */ 10957c478bd9Sstevel@tonic-gate klgrpset_del(my_lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid); 10967c478bd9Sstevel@tonic-gate if (lgrp_rsets_empty(my_lgrp->lgrp_set)) { 10977c478bd9Sstevel@tonic-gate count = 0; 10987c478bd9Sstevel@tonic-gate klgrpset_clear(changed); 10997c478bd9Sstevel@tonic-gate count += lgrp_leaf_delete(my_lgrp, lgrp_table, 11007c478bd9Sstevel@tonic-gate lgrp_alloc_max + 1, &changed); 11017c478bd9Sstevel@tonic-gate return; 11027c478bd9Sstevel@tonic-gate } 11037c478bd9Sstevel@tonic-gate 11047c478bd9Sstevel@tonic-gate /* 11057c478bd9Sstevel@tonic-gate * This lgroup isn't empty, so just remove it from CPU 11067c478bd9Sstevel@tonic-gate * resources of any lgroups that contain it as such 11077c478bd9Sstevel@tonic-gate */ 11087c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 11097c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 11107c478bd9Sstevel@tonic-gate 11117c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 11127c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp) || 11137c478bd9Sstevel@tonic-gate !klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_CPU], 11147c478bd9Sstevel@tonic-gate lgrpid)) 11157c478bd9Sstevel@tonic-gate continue; 11167c478bd9Sstevel@tonic-gate 11177c478bd9Sstevel@tonic-gate klgrpset_del(lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid); 11187c478bd9Sstevel@tonic-gate } 11197c478bd9Sstevel@tonic-gate return; 11207c478bd9Sstevel@tonic-gate } 11217c478bd9Sstevel@tonic-gate 11227c478bd9Sstevel@tonic-gate if (my_lgrp->lgrp_cpu == cp) 11237c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpu = next; 11247c478bd9Sstevel@tonic-gate 11257c478bd9Sstevel@tonic-gate } 11267c478bd9Sstevel@tonic-gate 11277c478bd9Sstevel@tonic-gate /* 11287c478bd9Sstevel@tonic-gate * Update memory nodes in target lgroups and return ones that get changed 11297c478bd9Sstevel@tonic-gate */ 11307c478bd9Sstevel@tonic-gate int 11317c478bd9Sstevel@tonic-gate lgrp_mnode_update(klgrpset_t target, klgrpset_t *changed) 11327c478bd9Sstevel@tonic-gate { 11337c478bd9Sstevel@tonic-gate int count; 11347c478bd9Sstevel@tonic-gate int i; 11357c478bd9Sstevel@tonic-gate int j; 11367c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 11377c478bd9Sstevel@tonic-gate lgrp_t *lgrp_rsrc; 11387c478bd9Sstevel@tonic-gate 11397c478bd9Sstevel@tonic-gate count = 0; 11407c478bd9Sstevel@tonic-gate if (changed) 11417c478bd9Sstevel@tonic-gate klgrpset_clear(*changed); 11427c478bd9Sstevel@tonic-gate 11437c478bd9Sstevel@tonic-gate if (klgrpset_isempty(target)) 11447c478bd9Sstevel@tonic-gate return (0); 11457c478bd9Sstevel@tonic-gate 11467c478bd9Sstevel@tonic-gate /* 11477c478bd9Sstevel@tonic-gate * Find each lgroup in target lgroups 11487c478bd9Sstevel@tonic-gate */ 11497c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 11507c478bd9Sstevel@tonic-gate /* 11517c478bd9Sstevel@tonic-gate * Skip any lgroups that don't exist or aren't in target group 11527c478bd9Sstevel@tonic-gate */ 11537c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 11547c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(target, i) || !LGRP_EXISTS(lgrp)) { 11557c478bd9Sstevel@tonic-gate continue; 11567c478bd9Sstevel@tonic-gate } 11577c478bd9Sstevel@tonic-gate 11587c478bd9Sstevel@tonic-gate /* 11597c478bd9Sstevel@tonic-gate * Initialize memnodes for intermediate lgroups to 0 11607c478bd9Sstevel@tonic-gate * and update them from scratch since they may have completely 11617c478bd9Sstevel@tonic-gate * changed 11627c478bd9Sstevel@tonic-gate */ 11637c478bd9Sstevel@tonic-gate if (lgrp->lgrp_childcnt && lgrp != lgrp_root) { 11647c478bd9Sstevel@tonic-gate lgrp->lgrp_mnodes = (mnodeset_t)0; 11657c478bd9Sstevel@tonic-gate lgrp->lgrp_nmnodes = 0; 11667c478bd9Sstevel@tonic-gate } 11677c478bd9Sstevel@tonic-gate 11687c478bd9Sstevel@tonic-gate /* 11697c478bd9Sstevel@tonic-gate * Update memory nodes of of target lgroup with memory nodes 11707c478bd9Sstevel@tonic-gate * from each lgroup in its lgroup memory resource set 11717c478bd9Sstevel@tonic-gate */ 11727c478bd9Sstevel@tonic-gate for (j = 0; j <= lgrp_alloc_max; j++) { 11737c478bd9Sstevel@tonic-gate int k; 11747c478bd9Sstevel@tonic-gate 11757c478bd9Sstevel@tonic-gate /* 11767c478bd9Sstevel@tonic-gate * Skip any lgroups that don't exist or aren't in 11777c478bd9Sstevel@tonic-gate * memory resources of target lgroup 11787c478bd9Sstevel@tonic-gate */ 11797c478bd9Sstevel@tonic-gate lgrp_rsrc = lgrp_table[j]; 11807c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_rsrc) || 11817c478bd9Sstevel@tonic-gate !klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], 11827c478bd9Sstevel@tonic-gate j)) 11837c478bd9Sstevel@tonic-gate continue; 11847c478bd9Sstevel@tonic-gate 11857c478bd9Sstevel@tonic-gate /* 11867c478bd9Sstevel@tonic-gate * Update target lgroup's memnodes to include memnodes 11877c478bd9Sstevel@tonic-gate * of this lgroup 11887c478bd9Sstevel@tonic-gate */ 11897c478bd9Sstevel@tonic-gate for (k = 0; k < sizeof (mnodeset_t) * NBBY; k++) { 11907c478bd9Sstevel@tonic-gate mnodeset_t mnode_mask; 11917c478bd9Sstevel@tonic-gate 11927c478bd9Sstevel@tonic-gate mnode_mask = (mnodeset_t)1 << k; 11937c478bd9Sstevel@tonic-gate if ((lgrp_rsrc->lgrp_mnodes & mnode_mask) && 11947c478bd9Sstevel@tonic-gate !(lgrp->lgrp_mnodes & mnode_mask)) { 11957c478bd9Sstevel@tonic-gate lgrp->lgrp_mnodes |= mnode_mask; 11967c478bd9Sstevel@tonic-gate lgrp->lgrp_nmnodes++; 11977c478bd9Sstevel@tonic-gate } 11987c478bd9Sstevel@tonic-gate } 11997c478bd9Sstevel@tonic-gate count++; 12007c478bd9Sstevel@tonic-gate if (changed) 12017c478bd9Sstevel@tonic-gate klgrpset_add(*changed, lgrp->lgrp_id); 12027c478bd9Sstevel@tonic-gate } 12037c478bd9Sstevel@tonic-gate } 12047c478bd9Sstevel@tonic-gate 12057c478bd9Sstevel@tonic-gate return (count); 12067c478bd9Sstevel@tonic-gate } 12077c478bd9Sstevel@tonic-gate 12087c478bd9Sstevel@tonic-gate /* 12097c478bd9Sstevel@tonic-gate * Memory copy-rename. Called when the "mnode" containing the kernel cage memory 12107c478bd9Sstevel@tonic-gate * is moved from one board to another. The "from" and "to" arguments specify the 12117c478bd9Sstevel@tonic-gate * source and the destination of the move. 12127c478bd9Sstevel@tonic-gate * 12137c478bd9Sstevel@tonic-gate * See plat_lgrp_config() for a detailed description of the copy-rename 12147c478bd9Sstevel@tonic-gate * semantics. 12157c478bd9Sstevel@tonic-gate * 12167c478bd9Sstevel@tonic-gate * The lgrp_mem_rename() is called by the platform copy-rename code to update 12177c478bd9Sstevel@tonic-gate * the lgroup topology which is changing as memory moves from one lgroup to 12187c478bd9Sstevel@tonic-gate * another. It removes the mnode from the source lgroup and re-inserts it in the 12197c478bd9Sstevel@tonic-gate * target lgroup. 12207c478bd9Sstevel@tonic-gate * 12217c478bd9Sstevel@tonic-gate * The lgrp_mem_rename() function passes a flag to lgrp_mem_init() and 12227c478bd9Sstevel@tonic-gate * lgrp_mem_fini() telling that the insertion and deleteion are part of a DR 12237c478bd9Sstevel@tonic-gate * copy-rename operation. 12247c478bd9Sstevel@tonic-gate * 12257c478bd9Sstevel@tonic-gate * There is one case which requires special handling. If the system contains 12267c478bd9Sstevel@tonic-gate * only two boards (mnodes), the lgrp_mem_fini() removes the only mnode from the 12277c478bd9Sstevel@tonic-gate * lgroup hierarchy. This mnode is soon re-inserted back in the hierarchy by 12287c478bd9Sstevel@tonic-gate * lgrp_mem_init), but there is a window when the system has no memory in the 12297c478bd9Sstevel@tonic-gate * lgroup hierarchy. If another thread tries to allocate memory during this 12307c478bd9Sstevel@tonic-gate * window, the allocation will fail, although the system has physical memory. 12317c478bd9Sstevel@tonic-gate * This may cause a system panic or a deadlock (some sleeping memory allocations 12327c478bd9Sstevel@tonic-gate * happen with cpu_lock held which prevents lgrp_mem_init() from re-inserting 12337c478bd9Sstevel@tonic-gate * the mnode back). 12347c478bd9Sstevel@tonic-gate * 12357c478bd9Sstevel@tonic-gate * The lgrp_memnode_choose() function walks the lgroup hierarchy looking for the 12367c478bd9Sstevel@tonic-gate * lgrp with non-empty lgrp_mnodes. To deal with the special case above, 12377c478bd9Sstevel@tonic-gate * lgrp_mem_fini() does not remove the last mnode from the lroot->lgrp_mnodes, 12387c478bd9Sstevel@tonic-gate * but it updates the rest of the lgroup topology as if the mnode was actually 12397c478bd9Sstevel@tonic-gate * removed. The lgrp_mem_init() function recognizes that the mnode being 12407c478bd9Sstevel@tonic-gate * inserted represents such a special case and updates the topology 12417c478bd9Sstevel@tonic-gate * appropriately. 12427c478bd9Sstevel@tonic-gate */ 12437c478bd9Sstevel@tonic-gate void 12447c478bd9Sstevel@tonic-gate lgrp_mem_rename(int mnode, lgrp_handle_t from, lgrp_handle_t to) 12457c478bd9Sstevel@tonic-gate { 12467c478bd9Sstevel@tonic-gate /* 12477c478bd9Sstevel@tonic-gate * Remove the memory from the source node and add it to the destination 12487c478bd9Sstevel@tonic-gate * node. 12497c478bd9Sstevel@tonic-gate */ 12507c478bd9Sstevel@tonic-gate lgrp_mem_fini(mnode, from, B_TRUE); 12517c478bd9Sstevel@tonic-gate lgrp_mem_init(mnode, to, B_TRUE); 12527c478bd9Sstevel@tonic-gate } 12537c478bd9Sstevel@tonic-gate 12547c478bd9Sstevel@tonic-gate /* 12557c478bd9Sstevel@tonic-gate * Called to indicate that the lgrp with platform handle "hand" now 12567c478bd9Sstevel@tonic-gate * contains the memory identified by "mnode". 12577c478bd9Sstevel@tonic-gate * 12587c478bd9Sstevel@tonic-gate * LOCKING for this routine is a bit tricky. Usually it is called without 12597c478bd9Sstevel@tonic-gate * cpu_lock and it must must grab cpu_lock here to prevent racing with other 12607c478bd9Sstevel@tonic-gate * callers. During DR of the board containing the caged memory it may be called 12617c478bd9Sstevel@tonic-gate * with cpu_lock already held and CPUs paused. 12627c478bd9Sstevel@tonic-gate * 12637c478bd9Sstevel@tonic-gate * If the insertion is part of the DR copy-rename and the inserted mnode (and 12647c478bd9Sstevel@tonic-gate * only this mnode) is already present in the lgrp_root->lgrp_mnodes set, we are 12657c478bd9Sstevel@tonic-gate * dealing with the special case of DR copy-rename described in 12667c478bd9Sstevel@tonic-gate * lgrp_mem_rename(). 12677c478bd9Sstevel@tonic-gate */ 12687c478bd9Sstevel@tonic-gate void 12697c478bd9Sstevel@tonic-gate lgrp_mem_init(int mnode, lgrp_handle_t hand, boolean_t is_copy_rename) 12707c478bd9Sstevel@tonic-gate { 12717c478bd9Sstevel@tonic-gate klgrpset_t changed; 12727c478bd9Sstevel@tonic-gate int count; 12737c478bd9Sstevel@tonic-gate int i; 12747c478bd9Sstevel@tonic-gate lgrp_t *my_lgrp; 12757c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid; 12767c478bd9Sstevel@tonic-gate mnodeset_t mnodes_mask = ((mnodeset_t)1 << mnode); 12777c478bd9Sstevel@tonic-gate boolean_t drop_lock = B_FALSE; 12787c478bd9Sstevel@tonic-gate boolean_t need_synch = B_FALSE; 12797c478bd9Sstevel@tonic-gate 12807c478bd9Sstevel@tonic-gate /* 12817c478bd9Sstevel@tonic-gate * Grab CPU lock (if we haven't already) 12827c478bd9Sstevel@tonic-gate */ 12837c478bd9Sstevel@tonic-gate if (!MUTEX_HELD(&cpu_lock)) { 12847c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock); 12857c478bd9Sstevel@tonic-gate drop_lock = B_TRUE; 12867c478bd9Sstevel@tonic-gate } 12877c478bd9Sstevel@tonic-gate 12887c478bd9Sstevel@tonic-gate /* 12897c478bd9Sstevel@tonic-gate * This routine may be called from a context where we already 12907c478bd9Sstevel@tonic-gate * hold cpu_lock, and have already paused cpus. 12917c478bd9Sstevel@tonic-gate */ 12927c478bd9Sstevel@tonic-gate if (!cpus_paused()) 12937c478bd9Sstevel@tonic-gate need_synch = B_TRUE; 12947c478bd9Sstevel@tonic-gate 12957c478bd9Sstevel@tonic-gate /* 12967c478bd9Sstevel@tonic-gate * Check if this mnode is already configured and return immediately if 12977c478bd9Sstevel@tonic-gate * it is. 12987c478bd9Sstevel@tonic-gate * 12997c478bd9Sstevel@tonic-gate * NOTE: in special case of copy-rename of the only remaining mnode, 13007c478bd9Sstevel@tonic-gate * lgrp_mem_fini() refuses to remove the last mnode from the root, so we 13017c478bd9Sstevel@tonic-gate * recognize this case and continue as usual, but skip the update to 13027c478bd9Sstevel@tonic-gate * the lgrp_mnodes and the lgrp_nmnodes. This restores the inconsistency 13037c478bd9Sstevel@tonic-gate * in topology, temporarily introduced by lgrp_mem_fini(). 13047c478bd9Sstevel@tonic-gate */ 13057c478bd9Sstevel@tonic-gate if (! (is_copy_rename && (lgrp_root->lgrp_mnodes == mnodes_mask)) && 13067c478bd9Sstevel@tonic-gate lgrp_root->lgrp_mnodes & mnodes_mask) { 13077c478bd9Sstevel@tonic-gate if (drop_lock) 13087c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 13097c478bd9Sstevel@tonic-gate return; 13107c478bd9Sstevel@tonic-gate } 13117c478bd9Sstevel@tonic-gate 13127c478bd9Sstevel@tonic-gate /* 13137c478bd9Sstevel@tonic-gate * Update lgroup topology with new memory resources, keeping track of 13147c478bd9Sstevel@tonic-gate * which lgroups change 13157c478bd9Sstevel@tonic-gate */ 13167c478bd9Sstevel@tonic-gate count = 0; 13177c478bd9Sstevel@tonic-gate klgrpset_clear(changed); 13187c478bd9Sstevel@tonic-gate my_lgrp = lgrp_hand_to_lgrp(hand); 13197c478bd9Sstevel@tonic-gate if (my_lgrp == NULL) { 13207c478bd9Sstevel@tonic-gate /* new lgrp */ 13217c478bd9Sstevel@tonic-gate my_lgrp = lgrp_create(); 13227c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id; 13237c478bd9Sstevel@tonic-gate my_lgrp->lgrp_plathand = hand; 13247c478bd9Sstevel@tonic-gate my_lgrp->lgrp_latency = lgrp_plat_latency(hand, hand); 13257c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_leaves, lgrpid); 13267c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid); 13277c478bd9Sstevel@tonic-gate 13287c478bd9Sstevel@tonic-gate if (need_synch) 13297c478bd9Sstevel@tonic-gate pause_cpus(NULL); 13307c478bd9Sstevel@tonic-gate count = lgrp_leaf_add(my_lgrp, lgrp_table, lgrp_alloc_max + 1, 13317c478bd9Sstevel@tonic-gate &changed); 13327c478bd9Sstevel@tonic-gate if (need_synch) 13337c478bd9Sstevel@tonic-gate start_cpus(); 13347c478bd9Sstevel@tonic-gate } else if (my_lgrp->lgrp_latency == 0 && lgrp_plat_latency(hand, hand) 13357c478bd9Sstevel@tonic-gate > 0) { 13367c478bd9Sstevel@tonic-gate /* 13377c478bd9Sstevel@tonic-gate * Leaf lgroup was created, but latency wasn't available 13387c478bd9Sstevel@tonic-gate * then. So, set latency for it and fill in rest of lgroup 13397c478bd9Sstevel@tonic-gate * topology now that we know how far it is from other leaf 13407c478bd9Sstevel@tonic-gate * lgroups. 13417c478bd9Sstevel@tonic-gate */ 13427c478bd9Sstevel@tonic-gate klgrpset_clear(changed); 13437c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id; 13447c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(my_lgrp->lgrp_set[LGRP_RSRC_MEM], 13457c478bd9Sstevel@tonic-gate lgrpid)) 13467c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid); 13477c478bd9Sstevel@tonic-gate if (need_synch) 13487c478bd9Sstevel@tonic-gate pause_cpus(NULL); 13497c478bd9Sstevel@tonic-gate count = lgrp_leaf_add(my_lgrp, lgrp_table, lgrp_alloc_max + 1, 13507c478bd9Sstevel@tonic-gate &changed); 13517c478bd9Sstevel@tonic-gate if (need_synch) 13527c478bd9Sstevel@tonic-gate start_cpus(); 13537c478bd9Sstevel@tonic-gate } else if (!klgrpset_ismember(my_lgrp->lgrp_set[LGRP_RSRC_MEM], 13547c478bd9Sstevel@tonic-gate my_lgrp->lgrp_id)) { 13552dae3fb5Sjjc /* 13562dae3fb5Sjjc * Add new lgroup memory resource to existing lgroup 13572dae3fb5Sjjc */ 13587c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id; 13597c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid); 13607c478bd9Sstevel@tonic-gate klgrpset_add(changed, lgrpid); 13617c478bd9Sstevel@tonic-gate count++; 13627c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 13637c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 13647c478bd9Sstevel@tonic-gate 13657c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 13667c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp) || 13677c478bd9Sstevel@tonic-gate !lgrp_rsets_member(lgrp->lgrp_set, lgrpid)) 13687c478bd9Sstevel@tonic-gate continue; 13697c478bd9Sstevel@tonic-gate 13707c478bd9Sstevel@tonic-gate klgrpset_add(lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid); 13717c478bd9Sstevel@tonic-gate klgrpset_add(changed, lgrp->lgrp_id); 13727c478bd9Sstevel@tonic-gate count++; 13737c478bd9Sstevel@tonic-gate } 13747c478bd9Sstevel@tonic-gate } 13757c478bd9Sstevel@tonic-gate 13767c478bd9Sstevel@tonic-gate /* 13777c478bd9Sstevel@tonic-gate * Add memory node to lgroup and remove lgroup from ones that need 13787c478bd9Sstevel@tonic-gate * to be updated 13797c478bd9Sstevel@tonic-gate */ 13807c478bd9Sstevel@tonic-gate if (!(my_lgrp->lgrp_mnodes & mnodes_mask)) { 13817c478bd9Sstevel@tonic-gate my_lgrp->lgrp_mnodes |= mnodes_mask; 13827c478bd9Sstevel@tonic-gate my_lgrp->lgrp_nmnodes++; 13837c478bd9Sstevel@tonic-gate } 13847c478bd9Sstevel@tonic-gate klgrpset_del(changed, lgrpid); 13857c478bd9Sstevel@tonic-gate 13867c478bd9Sstevel@tonic-gate /* 13877c478bd9Sstevel@tonic-gate * Update memory node information for all lgroups that changed and 13887c478bd9Sstevel@tonic-gate * contain new memory node as a resource 13897c478bd9Sstevel@tonic-gate */ 13907c478bd9Sstevel@tonic-gate if (count) 13917c478bd9Sstevel@tonic-gate (void) lgrp_mnode_update(changed, NULL); 13927c478bd9Sstevel@tonic-gate 13937c478bd9Sstevel@tonic-gate if (drop_lock) 13947c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 13957c478bd9Sstevel@tonic-gate } 13967c478bd9Sstevel@tonic-gate 13977c478bd9Sstevel@tonic-gate /* 13987c478bd9Sstevel@tonic-gate * Called to indicate that the lgroup associated with the platform 13997c478bd9Sstevel@tonic-gate * handle "hand" no longer contains given memory node 14007c478bd9Sstevel@tonic-gate * 14017c478bd9Sstevel@tonic-gate * LOCKING for this routine is a bit tricky. Usually it is called without 14027c478bd9Sstevel@tonic-gate * cpu_lock and it must must grab cpu_lock here to prevent racing with other 14037c478bd9Sstevel@tonic-gate * callers. During DR of the board containing the caged memory it may be called 14047c478bd9Sstevel@tonic-gate * with cpu_lock already held and CPUs paused. 14057c478bd9Sstevel@tonic-gate * 14067c478bd9Sstevel@tonic-gate * If the deletion is part of the DR copy-rename and the deleted mnode is the 14077c478bd9Sstevel@tonic-gate * only one present in the lgrp_root->lgrp_mnodes, all the topology is updated, 14087c478bd9Sstevel@tonic-gate * but lgrp_root->lgrp_mnodes is left intact. Later, lgrp_mem_init() will insert 14097c478bd9Sstevel@tonic-gate * the same mnode back into the topology. See lgrp_mem_rename() and 14107c478bd9Sstevel@tonic-gate * lgrp_mem_init() for additional details. 14117c478bd9Sstevel@tonic-gate */ 14127c478bd9Sstevel@tonic-gate void 14137c478bd9Sstevel@tonic-gate lgrp_mem_fini(int mnode, lgrp_handle_t hand, boolean_t is_copy_rename) 14147c478bd9Sstevel@tonic-gate { 14157c478bd9Sstevel@tonic-gate klgrpset_t changed; 14167c478bd9Sstevel@tonic-gate int count; 14177c478bd9Sstevel@tonic-gate int i; 14187c478bd9Sstevel@tonic-gate lgrp_t *my_lgrp; 14197c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid; 14207c478bd9Sstevel@tonic-gate mnodeset_t mnodes_mask; 14217c478bd9Sstevel@tonic-gate boolean_t drop_lock = B_FALSE; 14227c478bd9Sstevel@tonic-gate boolean_t need_synch = B_FALSE; 14237c478bd9Sstevel@tonic-gate 14247c478bd9Sstevel@tonic-gate /* 14257c478bd9Sstevel@tonic-gate * Grab CPU lock (if we haven't already) 14267c478bd9Sstevel@tonic-gate */ 14277c478bd9Sstevel@tonic-gate if (!MUTEX_HELD(&cpu_lock)) { 14287c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock); 14297c478bd9Sstevel@tonic-gate drop_lock = B_TRUE; 14307c478bd9Sstevel@tonic-gate } 14317c478bd9Sstevel@tonic-gate 14327c478bd9Sstevel@tonic-gate /* 14337c478bd9Sstevel@tonic-gate * This routine may be called from a context where we already 14347c478bd9Sstevel@tonic-gate * hold cpu_lock and have already paused cpus. 14357c478bd9Sstevel@tonic-gate */ 14367c478bd9Sstevel@tonic-gate if (!cpus_paused()) 14377c478bd9Sstevel@tonic-gate need_synch = B_TRUE; 14387c478bd9Sstevel@tonic-gate 14397c478bd9Sstevel@tonic-gate my_lgrp = lgrp_hand_to_lgrp(hand); 14407c478bd9Sstevel@tonic-gate 14417c478bd9Sstevel@tonic-gate /* 14427c478bd9Sstevel@tonic-gate * The lgrp *must* be pre-existing 14437c478bd9Sstevel@tonic-gate */ 14447c478bd9Sstevel@tonic-gate ASSERT(my_lgrp != NULL); 14457c478bd9Sstevel@tonic-gate 14467c478bd9Sstevel@tonic-gate /* 14477c478bd9Sstevel@tonic-gate * Delete memory node from lgroups which contain it 14487c478bd9Sstevel@tonic-gate */ 14497c478bd9Sstevel@tonic-gate mnodes_mask = ((mnodeset_t)1 << mnode); 14507c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 14517c478bd9Sstevel@tonic-gate lgrp_t *lgrp = lgrp_table[i]; 14527c478bd9Sstevel@tonic-gate /* 14537c478bd9Sstevel@tonic-gate * Skip any non-existent lgroups and any lgroups that don't 14547c478bd9Sstevel@tonic-gate * contain leaf lgroup of memory as a memory resource 14557c478bd9Sstevel@tonic-gate */ 14567c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp) || 14577c478bd9Sstevel@tonic-gate !(lgrp->lgrp_mnodes & mnodes_mask)) 14587c478bd9Sstevel@tonic-gate continue; 14597c478bd9Sstevel@tonic-gate 14607c478bd9Sstevel@tonic-gate /* 14617c478bd9Sstevel@tonic-gate * Avoid removing the last mnode from the root in the DR 14627c478bd9Sstevel@tonic-gate * copy-rename case. See lgrp_mem_rename() for details. 14637c478bd9Sstevel@tonic-gate */ 14647c478bd9Sstevel@tonic-gate if (is_copy_rename && 14657c478bd9Sstevel@tonic-gate (lgrp == lgrp_root) && (lgrp->lgrp_mnodes == mnodes_mask)) 14667c478bd9Sstevel@tonic-gate continue; 14677c478bd9Sstevel@tonic-gate 14687c478bd9Sstevel@tonic-gate /* 14697c478bd9Sstevel@tonic-gate * Remove memory node from lgroup. 14707c478bd9Sstevel@tonic-gate */ 14717c478bd9Sstevel@tonic-gate lgrp->lgrp_mnodes &= ~mnodes_mask; 14727c478bd9Sstevel@tonic-gate lgrp->lgrp_nmnodes--; 14737c478bd9Sstevel@tonic-gate ASSERT(lgrp->lgrp_nmnodes >= 0); 14747c478bd9Sstevel@tonic-gate } 14757c478bd9Sstevel@tonic-gate ASSERT(lgrp_root->lgrp_nmnodes > 0); 14767c478bd9Sstevel@tonic-gate 14777c478bd9Sstevel@tonic-gate /* 14787c478bd9Sstevel@tonic-gate * Don't need to update lgroup topology if this lgroup still has memory. 14797c478bd9Sstevel@tonic-gate * 14807c478bd9Sstevel@tonic-gate * In the special case of DR copy-rename with the only mnode being 14817c478bd9Sstevel@tonic-gate * removed, the lgrp_mnodes for the root is always non-zero, but we 14827c478bd9Sstevel@tonic-gate * still need to update the lgroup topology. 14837c478bd9Sstevel@tonic-gate */ 14847c478bd9Sstevel@tonic-gate if ((my_lgrp->lgrp_nmnodes > 0) && 14857c478bd9Sstevel@tonic-gate !(is_copy_rename && 14867c478bd9Sstevel@tonic-gate (my_lgrp == lgrp_root) && 14877c478bd9Sstevel@tonic-gate (my_lgrp->lgrp_mnodes == mnodes_mask))) { 14887c478bd9Sstevel@tonic-gate if (drop_lock) 14897c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 14907c478bd9Sstevel@tonic-gate return; 14917c478bd9Sstevel@tonic-gate } 14927c478bd9Sstevel@tonic-gate 14937c478bd9Sstevel@tonic-gate /* 14947c478bd9Sstevel@tonic-gate * This lgroup does not contain any memory now 14957c478bd9Sstevel@tonic-gate */ 14967c478bd9Sstevel@tonic-gate klgrpset_clear(my_lgrp->lgrp_set[LGRP_RSRC_MEM]); 14977c478bd9Sstevel@tonic-gate 14987c478bd9Sstevel@tonic-gate /* 14997c478bd9Sstevel@tonic-gate * Remove this lgroup from lgroup topology if it does not contain any 15007c478bd9Sstevel@tonic-gate * resources now 15017c478bd9Sstevel@tonic-gate */ 15027c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id; 15037c478bd9Sstevel@tonic-gate count = 0; 15047c478bd9Sstevel@tonic-gate klgrpset_clear(changed); 15057c478bd9Sstevel@tonic-gate if (lgrp_rsets_empty(my_lgrp->lgrp_set)) { 15067c478bd9Sstevel@tonic-gate /* 15077c478bd9Sstevel@tonic-gate * Delete lgroup when no more resources 15087c478bd9Sstevel@tonic-gate */ 15097c478bd9Sstevel@tonic-gate if (need_synch) 15107c478bd9Sstevel@tonic-gate pause_cpus(NULL); 15117c478bd9Sstevel@tonic-gate count = lgrp_leaf_delete(my_lgrp, lgrp_table, 15127c478bd9Sstevel@tonic-gate lgrp_alloc_max + 1, &changed); 15137c478bd9Sstevel@tonic-gate ASSERT(count > 0); 15147c478bd9Sstevel@tonic-gate if (need_synch) 15157c478bd9Sstevel@tonic-gate start_cpus(); 15167c478bd9Sstevel@tonic-gate } else { 15177c478bd9Sstevel@tonic-gate /* 15187c478bd9Sstevel@tonic-gate * Remove lgroup from memory resources of any lgroups that 15197c478bd9Sstevel@tonic-gate * contain it as such 15207c478bd9Sstevel@tonic-gate */ 15217c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 15227c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 15237c478bd9Sstevel@tonic-gate 15247c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 15257c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp) || 15267c478bd9Sstevel@tonic-gate !klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], 15277c478bd9Sstevel@tonic-gate lgrpid)) 15287c478bd9Sstevel@tonic-gate continue; 15297c478bd9Sstevel@tonic-gate 15307c478bd9Sstevel@tonic-gate klgrpset_del(lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid); 15317c478bd9Sstevel@tonic-gate } 15327c478bd9Sstevel@tonic-gate } 15337c478bd9Sstevel@tonic-gate if (drop_lock) 15347c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 15357c478bd9Sstevel@tonic-gate } 15367c478bd9Sstevel@tonic-gate 15377c478bd9Sstevel@tonic-gate /* 15387c478bd9Sstevel@tonic-gate * Return lgroup with given platform handle 15397c478bd9Sstevel@tonic-gate */ 15407c478bd9Sstevel@tonic-gate lgrp_t * 15417c478bd9Sstevel@tonic-gate lgrp_hand_to_lgrp(lgrp_handle_t hand) 15427c478bd9Sstevel@tonic-gate { 15437c478bd9Sstevel@tonic-gate int i; 15447c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 15457c478bd9Sstevel@tonic-gate 15467c478bd9Sstevel@tonic-gate if (hand == LGRP_NULL_HANDLE) 15477c478bd9Sstevel@tonic-gate return (NULL); 15487c478bd9Sstevel@tonic-gate 15497c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 15507c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 15517c478bd9Sstevel@tonic-gate if (LGRP_EXISTS(lgrp) && lgrp->lgrp_plathand == hand) 15527c478bd9Sstevel@tonic-gate return (lgrp); 15537c478bd9Sstevel@tonic-gate } 15547c478bd9Sstevel@tonic-gate return (NULL); 15557c478bd9Sstevel@tonic-gate } 15567c478bd9Sstevel@tonic-gate 15577c478bd9Sstevel@tonic-gate /* 15587c478bd9Sstevel@tonic-gate * Return the home lgroup of the current thread. 15597c478bd9Sstevel@tonic-gate * We must do this with kernel preemption disabled, since we don't want our 15607c478bd9Sstevel@tonic-gate * thread to be re-homed while we're poking around with its lpl, and the lpl 15617c478bd9Sstevel@tonic-gate * should never be NULL. 15627c478bd9Sstevel@tonic-gate * 15637c478bd9Sstevel@tonic-gate * NOTE: Can't guarantee that lgroup will be valid once kernel preemption 15647c478bd9Sstevel@tonic-gate * is enabled because of DR. Callers can use disable kernel preemption 15657c478bd9Sstevel@tonic-gate * around this call to guarantee that the lgroup will be valid beyond this 15667c478bd9Sstevel@tonic-gate * routine, since kernel preemption can be recursive. 15677c478bd9Sstevel@tonic-gate */ 15687c478bd9Sstevel@tonic-gate lgrp_t * 15697c478bd9Sstevel@tonic-gate lgrp_home_lgrp(void) 15707c478bd9Sstevel@tonic-gate { 15717c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 15727c478bd9Sstevel@tonic-gate lpl_t *lpl; 15737c478bd9Sstevel@tonic-gate 15747c478bd9Sstevel@tonic-gate kpreempt_disable(); 15757c478bd9Sstevel@tonic-gate 15767c478bd9Sstevel@tonic-gate lpl = curthread->t_lpl; 15777c478bd9Sstevel@tonic-gate ASSERT(lpl != NULL); 15787c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_lgrpid >= 0 && lpl->lpl_lgrpid <= lgrp_alloc_max); 15797c478bd9Sstevel@tonic-gate ASSERT(LGRP_EXISTS(lgrp_table[lpl->lpl_lgrpid])); 15807c478bd9Sstevel@tonic-gate lgrp = lgrp_table[lpl->lpl_lgrpid]; 15817c478bd9Sstevel@tonic-gate 15827c478bd9Sstevel@tonic-gate kpreempt_enable(); 15837c478bd9Sstevel@tonic-gate 15847c478bd9Sstevel@tonic-gate return (lgrp); 15857c478bd9Sstevel@tonic-gate } 15867c478bd9Sstevel@tonic-gate 15877c478bd9Sstevel@tonic-gate /* 15887c478bd9Sstevel@tonic-gate * Return ID of home lgroup for given thread 15897c478bd9Sstevel@tonic-gate * (See comments for lgrp_home_lgrp() for special care and handling 15907c478bd9Sstevel@tonic-gate * instructions) 15917c478bd9Sstevel@tonic-gate */ 15927c478bd9Sstevel@tonic-gate lgrp_id_t 15937c478bd9Sstevel@tonic-gate lgrp_home_id(kthread_t *t) 15947c478bd9Sstevel@tonic-gate { 15957c478bd9Sstevel@tonic-gate lgrp_id_t lgrp; 15967c478bd9Sstevel@tonic-gate lpl_t *lpl; 15977c478bd9Sstevel@tonic-gate 15987c478bd9Sstevel@tonic-gate ASSERT(t != NULL); 15997c478bd9Sstevel@tonic-gate /* 16007c478bd9Sstevel@tonic-gate * We'd like to ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)), but we 16017c478bd9Sstevel@tonic-gate * cannot since the HAT layer can call into this routine to 16027c478bd9Sstevel@tonic-gate * determine the locality for its data structures in the context 16037c478bd9Sstevel@tonic-gate * of a page fault. 16047c478bd9Sstevel@tonic-gate */ 16057c478bd9Sstevel@tonic-gate 16067c478bd9Sstevel@tonic-gate kpreempt_disable(); 16077c478bd9Sstevel@tonic-gate 16087c478bd9Sstevel@tonic-gate lpl = t->t_lpl; 16097c478bd9Sstevel@tonic-gate ASSERT(lpl != NULL); 16107c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_lgrpid >= 0 && lpl->lpl_lgrpid <= lgrp_alloc_max); 16117c478bd9Sstevel@tonic-gate lgrp = lpl->lpl_lgrpid; 16127c478bd9Sstevel@tonic-gate 16137c478bd9Sstevel@tonic-gate kpreempt_enable(); 16147c478bd9Sstevel@tonic-gate 16157c478bd9Sstevel@tonic-gate return (lgrp); 16167c478bd9Sstevel@tonic-gate } 16177c478bd9Sstevel@tonic-gate 16187c478bd9Sstevel@tonic-gate /* 16197c478bd9Sstevel@tonic-gate * Return lgroup containing the physical memory for the given page frame number 16207c478bd9Sstevel@tonic-gate */ 16217c478bd9Sstevel@tonic-gate lgrp_t * 16227c478bd9Sstevel@tonic-gate lgrp_pfn_to_lgrp(pfn_t pfn) 16237c478bd9Sstevel@tonic-gate { 16247c478bd9Sstevel@tonic-gate lgrp_handle_t hand; 16257c478bd9Sstevel@tonic-gate int i; 16267c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 16277c478bd9Sstevel@tonic-gate 16287c478bd9Sstevel@tonic-gate hand = lgrp_plat_pfn_to_hand(pfn); 16297c478bd9Sstevel@tonic-gate if (hand != LGRP_NULL_HANDLE) 16307c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 16317c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 16327c478bd9Sstevel@tonic-gate if (LGRP_EXISTS(lgrp) && lgrp->lgrp_plathand == hand) 16337c478bd9Sstevel@tonic-gate return (lgrp); 16347c478bd9Sstevel@tonic-gate } 16357c478bd9Sstevel@tonic-gate return (NULL); 16367c478bd9Sstevel@tonic-gate } 16377c478bd9Sstevel@tonic-gate 16387c478bd9Sstevel@tonic-gate /* 16397c478bd9Sstevel@tonic-gate * Return lgroup containing the physical memory for the given page frame number 16407c478bd9Sstevel@tonic-gate */ 16417c478bd9Sstevel@tonic-gate lgrp_t * 16427c478bd9Sstevel@tonic-gate lgrp_phys_to_lgrp(u_longlong_t physaddr) 16437c478bd9Sstevel@tonic-gate { 16447c478bd9Sstevel@tonic-gate lgrp_handle_t hand; 16457c478bd9Sstevel@tonic-gate int i; 16467c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 16477c478bd9Sstevel@tonic-gate pfn_t pfn; 16487c478bd9Sstevel@tonic-gate 16497c478bd9Sstevel@tonic-gate pfn = btop(physaddr); 16507c478bd9Sstevel@tonic-gate hand = lgrp_plat_pfn_to_hand(pfn); 16517c478bd9Sstevel@tonic-gate if (hand != LGRP_NULL_HANDLE) 16527c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 16537c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 16547c478bd9Sstevel@tonic-gate if (LGRP_EXISTS(lgrp) && lgrp->lgrp_plathand == hand) 16557c478bd9Sstevel@tonic-gate return (lgrp); 16567c478bd9Sstevel@tonic-gate } 16577c478bd9Sstevel@tonic-gate return (NULL); 16587c478bd9Sstevel@tonic-gate } 16597c478bd9Sstevel@tonic-gate 16607c478bd9Sstevel@tonic-gate /* 16617c478bd9Sstevel@tonic-gate * Return the leaf lgroup containing the given CPU 1662394b433dSesaxe * 1663394b433dSesaxe * The caller needs to take precautions necessary to prevent 1664394b433dSesaxe * "cpu" from going away across a call to this function. 1665394b433dSesaxe * hint: kpreempt_disable()/kpreempt_enable() 16667c478bd9Sstevel@tonic-gate */ 16677c478bd9Sstevel@tonic-gate static lgrp_t * 16687c478bd9Sstevel@tonic-gate lgrp_cpu_to_lgrp(cpu_t *cpu) 16697c478bd9Sstevel@tonic-gate { 1670ab761399Sesaxe return (cpu->cpu_lpl->lpl_lgrp); 16717c478bd9Sstevel@tonic-gate } 16727c478bd9Sstevel@tonic-gate 16737c478bd9Sstevel@tonic-gate /* 16747c478bd9Sstevel@tonic-gate * Return the sum of the partition loads in an lgrp divided by 16757c478bd9Sstevel@tonic-gate * the number of CPUs in the lgrp. This is our best approximation 16767c478bd9Sstevel@tonic-gate * of an 'lgroup load average' for a useful per-lgroup kstat. 16777c478bd9Sstevel@tonic-gate */ 16787c478bd9Sstevel@tonic-gate static uint64_t 16797c478bd9Sstevel@tonic-gate lgrp_sum_loadavgs(lgrp_t *lgrp) 16807c478bd9Sstevel@tonic-gate { 16817c478bd9Sstevel@tonic-gate cpu_t *cpu; 16827c478bd9Sstevel@tonic-gate int ncpu; 16837c478bd9Sstevel@tonic-gate uint64_t loads = 0; 16847c478bd9Sstevel@tonic-gate 16857c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock); 16867c478bd9Sstevel@tonic-gate 16877c478bd9Sstevel@tonic-gate cpu = lgrp->lgrp_cpu; 16887c478bd9Sstevel@tonic-gate ncpu = lgrp->lgrp_cpucnt; 16897c478bd9Sstevel@tonic-gate 16907c478bd9Sstevel@tonic-gate if (cpu == NULL || ncpu == 0) { 16917c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 16927c478bd9Sstevel@tonic-gate return (0ull); 16937c478bd9Sstevel@tonic-gate } 16947c478bd9Sstevel@tonic-gate 16957c478bd9Sstevel@tonic-gate do { 16967c478bd9Sstevel@tonic-gate loads += cpu->cpu_lpl->lpl_loadavg; 16977c478bd9Sstevel@tonic-gate cpu = cpu->cpu_next_lgrp; 16987c478bd9Sstevel@tonic-gate } while (cpu != lgrp->lgrp_cpu); 16997c478bd9Sstevel@tonic-gate 17007c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 17017c478bd9Sstevel@tonic-gate 17027c478bd9Sstevel@tonic-gate return (loads / ncpu); 17037c478bd9Sstevel@tonic-gate } 17047c478bd9Sstevel@tonic-gate 17057c478bd9Sstevel@tonic-gate void 17067c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp_id_t lgrpid, lgrp_stat_t stat, int64_t val) 17077c478bd9Sstevel@tonic-gate { 17087c478bd9Sstevel@tonic-gate struct lgrp_stats *pstats; 17097c478bd9Sstevel@tonic-gate 17107c478bd9Sstevel@tonic-gate /* 17117c478bd9Sstevel@tonic-gate * Verify that the caller isn't trying to add to 17127c478bd9Sstevel@tonic-gate * a statistic for an lgroup that has gone away 17137c478bd9Sstevel@tonic-gate */ 17147c478bd9Sstevel@tonic-gate if (lgrpid < 0 || lgrpid > lgrp_alloc_max) 17157c478bd9Sstevel@tonic-gate return; 17167c478bd9Sstevel@tonic-gate 17177c478bd9Sstevel@tonic-gate pstats = &lgrp_stats[lgrpid]; 17187c478bd9Sstevel@tonic-gate atomic_add_64((uint64_t *)LGRP_STAT_WRITE_PTR(pstats, stat), val); 17197c478bd9Sstevel@tonic-gate } 17207c478bd9Sstevel@tonic-gate 17217c478bd9Sstevel@tonic-gate int64_t 17227c478bd9Sstevel@tonic-gate lgrp_stat_read(lgrp_id_t lgrpid, lgrp_stat_t stat) 17237c478bd9Sstevel@tonic-gate { 17247c478bd9Sstevel@tonic-gate uint64_t val; 17257c478bd9Sstevel@tonic-gate struct lgrp_stats *pstats; 17267c478bd9Sstevel@tonic-gate 17277c478bd9Sstevel@tonic-gate if (lgrpid < 0 || lgrpid > lgrp_alloc_max) 17287c478bd9Sstevel@tonic-gate return ((int64_t)0); 17297c478bd9Sstevel@tonic-gate 17307c478bd9Sstevel@tonic-gate pstats = &lgrp_stats[lgrpid]; 17317c478bd9Sstevel@tonic-gate LGRP_STAT_READ(pstats, stat, val); 17327c478bd9Sstevel@tonic-gate return (val); 17337c478bd9Sstevel@tonic-gate } 17347c478bd9Sstevel@tonic-gate 17357c478bd9Sstevel@tonic-gate /* 17367c478bd9Sstevel@tonic-gate * Reset all kstats for lgrp specified by its lgrpid. 17377c478bd9Sstevel@tonic-gate */ 17387c478bd9Sstevel@tonic-gate static void 17397c478bd9Sstevel@tonic-gate lgrp_kstat_reset(lgrp_id_t lgrpid) 17407c478bd9Sstevel@tonic-gate { 17417c478bd9Sstevel@tonic-gate lgrp_stat_t stat; 17427c478bd9Sstevel@tonic-gate 17437c478bd9Sstevel@tonic-gate if (lgrpid < 0 || lgrpid > lgrp_alloc_max) 17447c478bd9Sstevel@tonic-gate return; 17457c478bd9Sstevel@tonic-gate 17467c478bd9Sstevel@tonic-gate for (stat = 0; stat < LGRP_NUM_COUNTER_STATS; stat++) { 17477c478bd9Sstevel@tonic-gate LGRP_STAT_RESET(&lgrp_stats[lgrpid], stat); 17487c478bd9Sstevel@tonic-gate } 17497c478bd9Sstevel@tonic-gate } 17507c478bd9Sstevel@tonic-gate 17517c478bd9Sstevel@tonic-gate /* 17527c478bd9Sstevel@tonic-gate * Collect all per-lgrp statistics for the lgrp associated with this 17537c478bd9Sstevel@tonic-gate * kstat, and store them in the ks_data array. 17547c478bd9Sstevel@tonic-gate * 17557c478bd9Sstevel@tonic-gate * The superuser can reset all the running counter statistics for an 17567c478bd9Sstevel@tonic-gate * lgrp by writing to any of the lgrp's stats. 17577c478bd9Sstevel@tonic-gate */ 17587c478bd9Sstevel@tonic-gate static int 17597c478bd9Sstevel@tonic-gate lgrp_kstat_extract(kstat_t *ksp, int rw) 17607c478bd9Sstevel@tonic-gate { 17617c478bd9Sstevel@tonic-gate lgrp_stat_t stat; 17627c478bd9Sstevel@tonic-gate struct kstat_named *ksd; 17637c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 17647c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid; 17657c478bd9Sstevel@tonic-gate 17667c478bd9Sstevel@tonic-gate lgrp = (lgrp_t *)ksp->ks_private; 17677c478bd9Sstevel@tonic-gate 17687c478bd9Sstevel@tonic-gate ksd = (struct kstat_named *)ksp->ks_data; 17697c478bd9Sstevel@tonic-gate ASSERT(ksd == (struct kstat_named *)&lgrp_kstat_data); 17707c478bd9Sstevel@tonic-gate 17717c478bd9Sstevel@tonic-gate lgrpid = lgrp->lgrp_id; 17727c478bd9Sstevel@tonic-gate 17737c478bd9Sstevel@tonic-gate if (lgrpid == LGRP_NONE) { 17747c478bd9Sstevel@tonic-gate /* 17757c478bd9Sstevel@tonic-gate * Return all zeroes as stats for freed lgrp. 17767c478bd9Sstevel@tonic-gate */ 17777c478bd9Sstevel@tonic-gate for (stat = 0; stat < LGRP_NUM_COUNTER_STATS; stat++) { 17787c478bd9Sstevel@tonic-gate ksd[stat].value.i64 = 0; 17797c478bd9Sstevel@tonic-gate } 17807c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_CPUS].value.i64 = 0; 17817c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_PG_INSTALL].value.i64 = 0; 17827c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_PG_AVAIL].value.i64 = 0; 17837c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_PG_FREE].value.i64 = 0; 17847c478bd9Sstevel@tonic-gate ksd[stat + LGRP_LOADAVG].value.i64 = 0; 17857c478bd9Sstevel@tonic-gate } else if (rw != KSTAT_WRITE) { 17867c478bd9Sstevel@tonic-gate /* 17877c478bd9Sstevel@tonic-gate * Handle counter stats 17887c478bd9Sstevel@tonic-gate */ 17897c478bd9Sstevel@tonic-gate for (stat = 0; stat < LGRP_NUM_COUNTER_STATS; stat++) { 17907c478bd9Sstevel@tonic-gate ksd[stat].value.i64 = lgrp_stat_read(lgrpid, stat); 17917c478bd9Sstevel@tonic-gate } 17927c478bd9Sstevel@tonic-gate 17937c478bd9Sstevel@tonic-gate /* 17947c478bd9Sstevel@tonic-gate * Handle kernel data snapshot stats 17957c478bd9Sstevel@tonic-gate */ 17967c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_CPUS].value.i64 = lgrp->lgrp_cpucnt; 17977c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_PG_INSTALL].value.i64 = 17987c478bd9Sstevel@tonic-gate lgrp_mem_size(lgrpid, LGRP_MEM_SIZE_INSTALL); 17997c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_PG_AVAIL].value.i64 = 18007c478bd9Sstevel@tonic-gate lgrp_mem_size(lgrpid, LGRP_MEM_SIZE_AVAIL); 18017c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_PG_FREE].value.i64 = 18027c478bd9Sstevel@tonic-gate lgrp_mem_size(lgrpid, LGRP_MEM_SIZE_FREE); 18037c478bd9Sstevel@tonic-gate ksd[stat + LGRP_LOADAVG].value.i64 = lgrp_sum_loadavgs(lgrp); 1804c6402783Sakolb ksd[stat + LGRP_LOADAVG_SCALE].value.i64 = 1805c6402783Sakolb lgrp_loadavg_max_effect; 18067c478bd9Sstevel@tonic-gate } else { 18077c478bd9Sstevel@tonic-gate lgrp_kstat_reset(lgrpid); 18087c478bd9Sstevel@tonic-gate } 18097c478bd9Sstevel@tonic-gate 18107c478bd9Sstevel@tonic-gate return (0); 18117c478bd9Sstevel@tonic-gate } 18127c478bd9Sstevel@tonic-gate 18137c478bd9Sstevel@tonic-gate int 18147c478bd9Sstevel@tonic-gate lgrp_query_cpu(processorid_t id, lgrp_id_t *lp) 18157c478bd9Sstevel@tonic-gate { 18167c478bd9Sstevel@tonic-gate cpu_t *cp; 18177c478bd9Sstevel@tonic-gate 18187c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock); 18197c478bd9Sstevel@tonic-gate 18207c478bd9Sstevel@tonic-gate if ((cp = cpu_get(id)) == NULL) { 18217c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 18227c478bd9Sstevel@tonic-gate return (EINVAL); 18237c478bd9Sstevel@tonic-gate } 18247c478bd9Sstevel@tonic-gate 18257c478bd9Sstevel@tonic-gate if (cpu_is_offline(cp) || cpu_is_poweredoff(cp)) { 18267c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 18277c478bd9Sstevel@tonic-gate return (EINVAL); 18287c478bd9Sstevel@tonic-gate } 18297c478bd9Sstevel@tonic-gate 18307c478bd9Sstevel@tonic-gate ASSERT(cp->cpu_lpl != NULL); 18317c478bd9Sstevel@tonic-gate 18327c478bd9Sstevel@tonic-gate *lp = cp->cpu_lpl->lpl_lgrpid; 18337c478bd9Sstevel@tonic-gate 18347c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 18357c478bd9Sstevel@tonic-gate 18367c478bd9Sstevel@tonic-gate return (0); 18377c478bd9Sstevel@tonic-gate } 18387c478bd9Sstevel@tonic-gate 18397c478bd9Sstevel@tonic-gate int 18407c478bd9Sstevel@tonic-gate lgrp_query_load(processorid_t id, lgrp_load_t *lp) 18417c478bd9Sstevel@tonic-gate { 18427c478bd9Sstevel@tonic-gate cpu_t *cp; 18437c478bd9Sstevel@tonic-gate 18447c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock); 18457c478bd9Sstevel@tonic-gate 18467c478bd9Sstevel@tonic-gate if ((cp = cpu_get(id)) == NULL) { 18477c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 18487c478bd9Sstevel@tonic-gate return (EINVAL); 18497c478bd9Sstevel@tonic-gate } 18507c478bd9Sstevel@tonic-gate 18517c478bd9Sstevel@tonic-gate ASSERT(cp->cpu_lpl != NULL); 18527c478bd9Sstevel@tonic-gate 18537c478bd9Sstevel@tonic-gate *lp = cp->cpu_lpl->lpl_loadavg; 18547c478bd9Sstevel@tonic-gate 18557c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 18567c478bd9Sstevel@tonic-gate 18577c478bd9Sstevel@tonic-gate return (0); 18587c478bd9Sstevel@tonic-gate } 18597c478bd9Sstevel@tonic-gate 18607c478bd9Sstevel@tonic-gate /* 18617c478bd9Sstevel@tonic-gate * Add a resource named by lpl_leaf to rset of lpl_target 18627c478bd9Sstevel@tonic-gate * 18637c478bd9Sstevel@tonic-gate * This routine also adjusts ncpu and nrset if the call succeeds in adding a 18647c478bd9Sstevel@tonic-gate * resource. It is adjusted here, as this is presently the only place that we 18657c478bd9Sstevel@tonic-gate * can be certain a resource addition has succeeded. 18667c478bd9Sstevel@tonic-gate * 18677c478bd9Sstevel@tonic-gate * We keep the list of rsets sorted so that the dispatcher can quickly walk the 18687c478bd9Sstevel@tonic-gate * list in order until it reaches a NULL. (This list is required to be NULL 18697c478bd9Sstevel@tonic-gate * terminated, too). This is done so that we can mark start pos + 1, so that 18707c478bd9Sstevel@tonic-gate * each lpl is traversed sequentially, but in a different order. We hope this 18717c478bd9Sstevel@tonic-gate * will improve performance a bit. (Hopefully, less read-to-own traffic...) 18727c478bd9Sstevel@tonic-gate */ 18737c478bd9Sstevel@tonic-gate 18747c478bd9Sstevel@tonic-gate void 18757c478bd9Sstevel@tonic-gate lpl_rset_add(lpl_t *lpl_target, lpl_t *lpl_leaf) 18767c478bd9Sstevel@tonic-gate { 18777c478bd9Sstevel@tonic-gate int i; 18787c478bd9Sstevel@tonic-gate int entry_slot = 0; 18797c478bd9Sstevel@tonic-gate 18807c478bd9Sstevel@tonic-gate /* return if leaf is already present */ 18817c478bd9Sstevel@tonic-gate for (i = 0; i < lpl_target->lpl_nrset; i++) { 18827c478bd9Sstevel@tonic-gate if (lpl_target->lpl_rset[i] == lpl_leaf) { 18837c478bd9Sstevel@tonic-gate return; 18847c478bd9Sstevel@tonic-gate } 18857c478bd9Sstevel@tonic-gate 18867c478bd9Sstevel@tonic-gate if (lpl_target->lpl_rset[i]->lpl_lgrpid > 18877c478bd9Sstevel@tonic-gate lpl_leaf->lpl_lgrpid) { 18887c478bd9Sstevel@tonic-gate break; 18897c478bd9Sstevel@tonic-gate } 18907c478bd9Sstevel@tonic-gate } 18917c478bd9Sstevel@tonic-gate 18927c478bd9Sstevel@tonic-gate /* insert leaf, update counts */ 18937c478bd9Sstevel@tonic-gate entry_slot = i; 18947c478bd9Sstevel@tonic-gate i = lpl_target->lpl_nrset++; 18957c478bd9Sstevel@tonic-gate if (lpl_target->lpl_nrset >= LPL_RSET_MAX) { 18967c478bd9Sstevel@tonic-gate panic("More leaf lgrps in system than are supported!\n"); 18977c478bd9Sstevel@tonic-gate } 18987c478bd9Sstevel@tonic-gate 18997c478bd9Sstevel@tonic-gate /* 19007c478bd9Sstevel@tonic-gate * Start at the end of the rset array and work backwards towards the 19017c478bd9Sstevel@tonic-gate * slot into which the new lpl will be inserted. This effectively 19027c478bd9Sstevel@tonic-gate * preserves the current ordering by scooting everybody over one entry, 19037c478bd9Sstevel@tonic-gate * and placing the new entry into the space created. 19047c478bd9Sstevel@tonic-gate */ 19057c478bd9Sstevel@tonic-gate 19067c478bd9Sstevel@tonic-gate while (i-- > entry_slot) { 19077c478bd9Sstevel@tonic-gate lpl_target->lpl_rset[i + 1] = lpl_target->lpl_rset[i]; 19087c478bd9Sstevel@tonic-gate } 19097c478bd9Sstevel@tonic-gate 19107c478bd9Sstevel@tonic-gate lpl_target->lpl_rset[entry_slot] = lpl_leaf; 19117c478bd9Sstevel@tonic-gate lpl_target->lpl_ncpu += lpl_leaf->lpl_ncpu; 19127c478bd9Sstevel@tonic-gate } 19137c478bd9Sstevel@tonic-gate 19147c478bd9Sstevel@tonic-gate /* 19157c478bd9Sstevel@tonic-gate * Update each of lpl_parent's children with a proper hint and 19167c478bd9Sstevel@tonic-gate * a reference to their parent. 19177c478bd9Sstevel@tonic-gate * The lgrp topology is used as the reference since it is fully 19187c478bd9Sstevel@tonic-gate * consistent and correct at this point. 19197c478bd9Sstevel@tonic-gate * 19207c478bd9Sstevel@tonic-gate * Each child's hint will reference an element in lpl_parent's 19217c478bd9Sstevel@tonic-gate * rset that designates where the child should start searching 19227c478bd9Sstevel@tonic-gate * for CPU resources. The hint selected is the highest order leaf present 19237c478bd9Sstevel@tonic-gate * in the child's lineage. 19247c478bd9Sstevel@tonic-gate * 19257c478bd9Sstevel@tonic-gate * This should be called after any potential change in lpl_parent's 19267c478bd9Sstevel@tonic-gate * rset. 19277c478bd9Sstevel@tonic-gate */ 19287c478bd9Sstevel@tonic-gate static void 19297c478bd9Sstevel@tonic-gate lpl_child_update(lpl_t *lpl_parent, struct cpupart *cp) 19307c478bd9Sstevel@tonic-gate { 19317c478bd9Sstevel@tonic-gate klgrpset_t children, leaves; 19327c478bd9Sstevel@tonic-gate lpl_t *lpl; 19337c478bd9Sstevel@tonic-gate int hint; 19347c478bd9Sstevel@tonic-gate int i, j; 19357c478bd9Sstevel@tonic-gate 19367c478bd9Sstevel@tonic-gate children = lgrp_table[lpl_parent->lpl_lgrpid]->lgrp_children; 19377c478bd9Sstevel@tonic-gate if (klgrpset_isempty(children)) 19387c478bd9Sstevel@tonic-gate return; /* nothing to do */ 19397c478bd9Sstevel@tonic-gate 19407c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 19417c478bd9Sstevel@tonic-gate if (klgrpset_ismember(children, i)) { 19427c478bd9Sstevel@tonic-gate 19437c478bd9Sstevel@tonic-gate /* 19447c478bd9Sstevel@tonic-gate * Given the set of leaves in this child's lineage, 19457c478bd9Sstevel@tonic-gate * find the highest order leaf present in the parent's 19467c478bd9Sstevel@tonic-gate * rset. Select this as the hint for the child. 19477c478bd9Sstevel@tonic-gate */ 19487c478bd9Sstevel@tonic-gate leaves = lgrp_table[i]->lgrp_leaves; 19497c478bd9Sstevel@tonic-gate hint = 0; 19507c478bd9Sstevel@tonic-gate for (j = 0; j < lpl_parent->lpl_nrset; j++) { 19517c478bd9Sstevel@tonic-gate lpl = lpl_parent->lpl_rset[j]; 19527c478bd9Sstevel@tonic-gate if (klgrpset_ismember(leaves, lpl->lpl_lgrpid)) 19537c478bd9Sstevel@tonic-gate hint = j; 19547c478bd9Sstevel@tonic-gate } 19557c478bd9Sstevel@tonic-gate cp->cp_lgrploads[i].lpl_hint = hint; 19567c478bd9Sstevel@tonic-gate 19577c478bd9Sstevel@tonic-gate /* 19587c478bd9Sstevel@tonic-gate * (Re)set the parent. It may be incorrect if 19597c478bd9Sstevel@tonic-gate * lpl_parent is new in the topology. 19607c478bd9Sstevel@tonic-gate */ 19617c478bd9Sstevel@tonic-gate cp->cp_lgrploads[i].lpl_parent = lpl_parent; 19627c478bd9Sstevel@tonic-gate } 19637c478bd9Sstevel@tonic-gate } 19647c478bd9Sstevel@tonic-gate } 19657c478bd9Sstevel@tonic-gate 19667c478bd9Sstevel@tonic-gate /* 19677c478bd9Sstevel@tonic-gate * Delete resource lpl_leaf from rset of lpl_target, assuming it's there. 19687c478bd9Sstevel@tonic-gate * 19697c478bd9Sstevel@tonic-gate * This routine also adjusts ncpu and nrset if the call succeeds in deleting a 19707c478bd9Sstevel@tonic-gate * resource. The values are adjusted here, as this is the only place that we can 19717c478bd9Sstevel@tonic-gate * be certain a resource was successfully deleted. 19727c478bd9Sstevel@tonic-gate */ 19737c478bd9Sstevel@tonic-gate void 19747c478bd9Sstevel@tonic-gate lpl_rset_del(lpl_t *lpl_target, lpl_t *lpl_leaf) 19757c478bd9Sstevel@tonic-gate { 19767c478bd9Sstevel@tonic-gate int i; 19777c478bd9Sstevel@tonic-gate 19787c478bd9Sstevel@tonic-gate /* find leaf in intermediate node */ 19797c478bd9Sstevel@tonic-gate for (i = 0; i < lpl_target->lpl_nrset; i++) { 19807c478bd9Sstevel@tonic-gate if (lpl_target->lpl_rset[i] == lpl_leaf) 19817c478bd9Sstevel@tonic-gate break; 19827c478bd9Sstevel@tonic-gate } 19837c478bd9Sstevel@tonic-gate 19847c478bd9Sstevel@tonic-gate /* return if leaf not found */ 19857c478bd9Sstevel@tonic-gate if (lpl_target->lpl_rset[i] != lpl_leaf) 19867c478bd9Sstevel@tonic-gate return; 19877c478bd9Sstevel@tonic-gate 19887c478bd9Sstevel@tonic-gate /* prune leaf, compress array */ 19897c478bd9Sstevel@tonic-gate ASSERT(lpl_target->lpl_nrset < LPL_RSET_MAX); 19907c478bd9Sstevel@tonic-gate lpl_target->lpl_rset[lpl_target->lpl_nrset--] = NULL; 19917c478bd9Sstevel@tonic-gate lpl_target->lpl_ncpu--; 19927c478bd9Sstevel@tonic-gate do { 19937c478bd9Sstevel@tonic-gate lpl_target->lpl_rset[i] = lpl_target->lpl_rset[i + 1]; 19947c478bd9Sstevel@tonic-gate } while (i++ < lpl_target->lpl_nrset); 19957c478bd9Sstevel@tonic-gate } 19967c478bd9Sstevel@tonic-gate 19977c478bd9Sstevel@tonic-gate /* 19987c478bd9Sstevel@tonic-gate * Check to see if the resource set of the target lpl contains the 19997c478bd9Sstevel@tonic-gate * supplied leaf lpl. This returns 1 if the lpl is found, 0 if it is not. 20007c478bd9Sstevel@tonic-gate */ 20017c478bd9Sstevel@tonic-gate 20027c478bd9Sstevel@tonic-gate int 20037c478bd9Sstevel@tonic-gate lpl_rset_contains(lpl_t *lpl_target, lpl_t *lpl_leaf) 20047c478bd9Sstevel@tonic-gate { 20057c478bd9Sstevel@tonic-gate int i; 20067c478bd9Sstevel@tonic-gate 20077c478bd9Sstevel@tonic-gate for (i = 0; i < lpl_target->lpl_nrset; i++) { 20087c478bd9Sstevel@tonic-gate if (lpl_target->lpl_rset[i] == lpl_leaf) 20097c478bd9Sstevel@tonic-gate return (1); 20107c478bd9Sstevel@tonic-gate } 20117c478bd9Sstevel@tonic-gate 20127c478bd9Sstevel@tonic-gate return (0); 20137c478bd9Sstevel@tonic-gate } 20147c478bd9Sstevel@tonic-gate 20157c478bd9Sstevel@tonic-gate /* 20167c478bd9Sstevel@tonic-gate * Called when we change cpu lpl membership. This increments or decrements the 20177c478bd9Sstevel@tonic-gate * per-cpu counter in every lpl in which our leaf appears. 20187c478bd9Sstevel@tonic-gate */ 20197c478bd9Sstevel@tonic-gate void 20207c478bd9Sstevel@tonic-gate lpl_cpu_adjcnt(lpl_act_t act, cpu_t *cp) 20217c478bd9Sstevel@tonic-gate { 20227c478bd9Sstevel@tonic-gate cpupart_t *cpupart; 20237c478bd9Sstevel@tonic-gate lgrp_t *lgrp_leaf; 20247c478bd9Sstevel@tonic-gate lgrp_t *lgrp_cur; 20257c478bd9Sstevel@tonic-gate lpl_t *lpl_leaf; 20267c478bd9Sstevel@tonic-gate lpl_t *lpl_cur; 20277c478bd9Sstevel@tonic-gate int i; 20287c478bd9Sstevel@tonic-gate 20297c478bd9Sstevel@tonic-gate ASSERT(act == LPL_DECREMENT || act == LPL_INCREMENT); 20307c478bd9Sstevel@tonic-gate 20317c478bd9Sstevel@tonic-gate cpupart = cp->cpu_part; 20327c478bd9Sstevel@tonic-gate lpl_leaf = cp->cpu_lpl; 20337c478bd9Sstevel@tonic-gate lgrp_leaf = lgrp_table[lpl_leaf->lpl_lgrpid]; 20347c478bd9Sstevel@tonic-gate 20357c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 20367c478bd9Sstevel@tonic-gate lgrp_cur = lgrp_table[i]; 20377c478bd9Sstevel@tonic-gate 20387c478bd9Sstevel@tonic-gate /* 20397c478bd9Sstevel@tonic-gate * Don't adjust if the lgrp isn't there, if we're the leaf lpl 20407c478bd9Sstevel@tonic-gate * for the cpu in question, or if the current lgrp and leaf 20417c478bd9Sstevel@tonic-gate * don't share the same resources. 20427c478bd9Sstevel@tonic-gate */ 20437c478bd9Sstevel@tonic-gate 20447c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_cur) || (lgrp_cur == lgrp_leaf) || 20457c478bd9Sstevel@tonic-gate !klgrpset_intersects(lgrp_leaf->lgrp_set[LGRP_RSRC_CPU], 20467c478bd9Sstevel@tonic-gate lgrp_cur->lgrp_set[LGRP_RSRC_CPU])) 20477c478bd9Sstevel@tonic-gate continue; 20487c478bd9Sstevel@tonic-gate 20497c478bd9Sstevel@tonic-gate 20507c478bd9Sstevel@tonic-gate lpl_cur = &cpupart->cp_lgrploads[lgrp_cur->lgrp_id]; 20517c478bd9Sstevel@tonic-gate 20527c478bd9Sstevel@tonic-gate if (lpl_cur->lpl_nrset > 0) { 20537c478bd9Sstevel@tonic-gate if (act == LPL_INCREMENT) { 20547c478bd9Sstevel@tonic-gate lpl_cur->lpl_ncpu++; 20557c478bd9Sstevel@tonic-gate } else if (act == LPL_DECREMENT) { 20567c478bd9Sstevel@tonic-gate lpl_cur->lpl_ncpu--; 20577c478bd9Sstevel@tonic-gate } 20587c478bd9Sstevel@tonic-gate } 20597c478bd9Sstevel@tonic-gate } 20607c478bd9Sstevel@tonic-gate } 20617c478bd9Sstevel@tonic-gate 20627c478bd9Sstevel@tonic-gate /* 20637c478bd9Sstevel@tonic-gate * Initialize lpl with given resources and specified lgrp 20647c478bd9Sstevel@tonic-gate */ 20657c478bd9Sstevel@tonic-gate 20667c478bd9Sstevel@tonic-gate void 20677c478bd9Sstevel@tonic-gate lpl_init(lpl_t *lpl, lpl_t *lpl_leaf, lgrp_t *lgrp) 20687c478bd9Sstevel@tonic-gate { 20697c478bd9Sstevel@tonic-gate lpl->lpl_lgrpid = lgrp->lgrp_id; 20707c478bd9Sstevel@tonic-gate lpl->lpl_loadavg = 0; 20717c478bd9Sstevel@tonic-gate if (lpl == lpl_leaf) 20727c478bd9Sstevel@tonic-gate lpl->lpl_ncpu = 1; 20737c478bd9Sstevel@tonic-gate else 20747c478bd9Sstevel@tonic-gate lpl->lpl_ncpu = lpl_leaf->lpl_ncpu; 20757c478bd9Sstevel@tonic-gate lpl->lpl_nrset = 1; 20767c478bd9Sstevel@tonic-gate lpl->lpl_rset[0] = lpl_leaf; 20777c478bd9Sstevel@tonic-gate lpl->lpl_lgrp = lgrp; 20787c478bd9Sstevel@tonic-gate lpl->lpl_parent = NULL; /* set by lpl_leaf_insert() */ 20797c478bd9Sstevel@tonic-gate lpl->lpl_cpus = NULL; /* set by lgrp_part_add_cpu() */ 20807c478bd9Sstevel@tonic-gate } 20817c478bd9Sstevel@tonic-gate 20827c478bd9Sstevel@tonic-gate /* 20837c478bd9Sstevel@tonic-gate * Clear an unused lpl 20847c478bd9Sstevel@tonic-gate */ 20857c478bd9Sstevel@tonic-gate 20867c478bd9Sstevel@tonic-gate void 20877c478bd9Sstevel@tonic-gate lpl_clear(lpl_t *lpl) 20887c478bd9Sstevel@tonic-gate { 2089ab761399Sesaxe lgrp_id_t lid; 20907c478bd9Sstevel@tonic-gate 20917c478bd9Sstevel@tonic-gate /* save lid for debugging purposes */ 20927c478bd9Sstevel@tonic-gate lid = lpl->lpl_lgrpid; 20937c478bd9Sstevel@tonic-gate bzero(lpl, sizeof (lpl_t)); 20947c478bd9Sstevel@tonic-gate lpl->lpl_lgrpid = lid; 20957c478bd9Sstevel@tonic-gate } 20967c478bd9Sstevel@tonic-gate 20977c478bd9Sstevel@tonic-gate /* 20987c478bd9Sstevel@tonic-gate * Given a CPU-partition, verify that the lpl topology in the CPU-partition 20997c478bd9Sstevel@tonic-gate * is in sync with the lgroup toplogy in the system. The lpl topology may not 21007c478bd9Sstevel@tonic-gate * make full use of all of the lgroup topology, but this checks to make sure 21017c478bd9Sstevel@tonic-gate * that for the parts that it does use, it has correctly understood the 21027c478bd9Sstevel@tonic-gate * relationships that exist. This function returns 21037c478bd9Sstevel@tonic-gate * 0 if the topology is correct, and a non-zero error code, for non-debug 21047c478bd9Sstevel@tonic-gate * kernels if incorrect. Asserts are spread throughout the code to aid in 21057c478bd9Sstevel@tonic-gate * debugging on a DEBUG kernel. 21067c478bd9Sstevel@tonic-gate */ 21077c478bd9Sstevel@tonic-gate int 21087c478bd9Sstevel@tonic-gate lpl_topo_verify(cpupart_t *cpupart) 21097c478bd9Sstevel@tonic-gate { 21107c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 21117c478bd9Sstevel@tonic-gate lpl_t *lpl; 21127c478bd9Sstevel@tonic-gate klgrpset_t rset; 21137c478bd9Sstevel@tonic-gate klgrpset_t cset; 21147c478bd9Sstevel@tonic-gate cpu_t *cpu; 21157c478bd9Sstevel@tonic-gate cpu_t *cp_start; 21167c478bd9Sstevel@tonic-gate int i; 21177c478bd9Sstevel@tonic-gate int j; 21187c478bd9Sstevel@tonic-gate int sum; 21197c478bd9Sstevel@tonic-gate 21207c478bd9Sstevel@tonic-gate /* topology can't be incorrect if it doesn't exist */ 21217c478bd9Sstevel@tonic-gate if (!lgrp_topo_initialized || !lgrp_initialized) 21227c478bd9Sstevel@tonic-gate return (LPL_TOPO_CORRECT); 21237c478bd9Sstevel@tonic-gate 21247c478bd9Sstevel@tonic-gate ASSERT(cpupart != NULL); 21257c478bd9Sstevel@tonic-gate 21267c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 21277c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 21287c478bd9Sstevel@tonic-gate lpl = NULL; 21297c478bd9Sstevel@tonic-gate /* make sure lpls are allocated */ 21307c478bd9Sstevel@tonic-gate ASSERT(cpupart->cp_lgrploads); 21317c478bd9Sstevel@tonic-gate if (!cpupart->cp_lgrploads) 21327c478bd9Sstevel@tonic-gate return (LPL_TOPO_PART_HAS_NO_LPL); 21337c478bd9Sstevel@tonic-gate 21347c478bd9Sstevel@tonic-gate lpl = &cpupart->cp_lgrploads[i]; 21357c478bd9Sstevel@tonic-gate /* make sure our index is good */ 21367c478bd9Sstevel@tonic-gate ASSERT(i < cpupart->cp_nlgrploads); 21377c478bd9Sstevel@tonic-gate 21387c478bd9Sstevel@tonic-gate /* if lgroup doesn't exist, make sure lpl is empty */ 21397c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp)) { 21407c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_ncpu == 0); 21417c478bd9Sstevel@tonic-gate if (lpl->lpl_ncpu > 0) { 21427c478bd9Sstevel@tonic-gate return (LPL_TOPO_CPUS_NOT_EMPTY); 21437c478bd9Sstevel@tonic-gate } else { 21447c478bd9Sstevel@tonic-gate continue; 21457c478bd9Sstevel@tonic-gate } 21467c478bd9Sstevel@tonic-gate } 21477c478bd9Sstevel@tonic-gate 21487c478bd9Sstevel@tonic-gate /* verify that lgroup and lpl are identically numbered */ 21497c478bd9Sstevel@tonic-gate ASSERT(lgrp->lgrp_id == lpl->lpl_lgrpid); 21507c478bd9Sstevel@tonic-gate 21517c478bd9Sstevel@tonic-gate /* if lgroup isn't in our partition, make sure lpl is empty */ 21527c478bd9Sstevel@tonic-gate if (!klgrpset_intersects(lgrp->lgrp_leaves, 21537c478bd9Sstevel@tonic-gate cpupart->cp_lgrpset)) { 21547c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_ncpu == 0); 21557c478bd9Sstevel@tonic-gate if (lpl->lpl_ncpu > 0) { 21567c478bd9Sstevel@tonic-gate return (LPL_TOPO_CPUS_NOT_EMPTY); 21577c478bd9Sstevel@tonic-gate } 21587c478bd9Sstevel@tonic-gate /* 21597c478bd9Sstevel@tonic-gate * lpl is empty, and lgroup isn't in partition. verify 21607c478bd9Sstevel@tonic-gate * that lpl doesn't show up in anyone else's rsets (in 21617c478bd9Sstevel@tonic-gate * this partition, anyway) 21627c478bd9Sstevel@tonic-gate */ 21637c478bd9Sstevel@tonic-gate 21647c478bd9Sstevel@tonic-gate for (j = 0; j < cpupart->cp_nlgrploads; j++) { 21657c478bd9Sstevel@tonic-gate lpl_t *i_lpl; /* lpl we're iterating over */ 21667c478bd9Sstevel@tonic-gate 21677c478bd9Sstevel@tonic-gate i_lpl = &cpupart->cp_lgrploads[j]; 21687c478bd9Sstevel@tonic-gate 21697c478bd9Sstevel@tonic-gate ASSERT(!lpl_rset_contains(i_lpl, lpl)); 21707c478bd9Sstevel@tonic-gate if (lpl_rset_contains(i_lpl, lpl)) { 21717c478bd9Sstevel@tonic-gate return (LPL_TOPO_LPL_ORPHANED); 21727c478bd9Sstevel@tonic-gate } 21737c478bd9Sstevel@tonic-gate } 21747c478bd9Sstevel@tonic-gate /* lgroup is empty, and everything is ok. continue */ 21757c478bd9Sstevel@tonic-gate continue; 21767c478bd9Sstevel@tonic-gate } 21777c478bd9Sstevel@tonic-gate 21787c478bd9Sstevel@tonic-gate 21797c478bd9Sstevel@tonic-gate /* lgroup is in this partition, now check it against lpl */ 21807c478bd9Sstevel@tonic-gate 21817c478bd9Sstevel@tonic-gate /* do both have matching lgrps? */ 21827c478bd9Sstevel@tonic-gate ASSERT(lgrp == lpl->lpl_lgrp); 21837c478bd9Sstevel@tonic-gate if (lgrp != lpl->lpl_lgrp) { 21847c478bd9Sstevel@tonic-gate return (LPL_TOPO_LGRP_MISMATCH); 21857c478bd9Sstevel@tonic-gate } 21867c478bd9Sstevel@tonic-gate 21877c478bd9Sstevel@tonic-gate /* do the parent lgroups exist and do they match? */ 21887c478bd9Sstevel@tonic-gate if (lgrp->lgrp_parent) { 21897c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_parent); 21907c478bd9Sstevel@tonic-gate ASSERT(lgrp->lgrp_parent->lgrp_id == 21917c478bd9Sstevel@tonic-gate lpl->lpl_parent->lpl_lgrpid); 21927c478bd9Sstevel@tonic-gate 21937c478bd9Sstevel@tonic-gate if (!lpl->lpl_parent) { 21947c478bd9Sstevel@tonic-gate return (LPL_TOPO_MISSING_PARENT); 21957c478bd9Sstevel@tonic-gate } else if (lgrp->lgrp_parent->lgrp_id != 21967c478bd9Sstevel@tonic-gate lpl->lpl_parent->lpl_lgrpid) { 21977c478bd9Sstevel@tonic-gate return (LPL_TOPO_PARENT_MISMATCH); 21987c478bd9Sstevel@tonic-gate } 21997c478bd9Sstevel@tonic-gate } 22007c478bd9Sstevel@tonic-gate 22017c478bd9Sstevel@tonic-gate /* only leaf lgroups keep a cpucnt, only check leaves */ 22027c478bd9Sstevel@tonic-gate if ((lpl->lpl_nrset == 1) && (lpl == lpl->lpl_rset[0])) { 22037c478bd9Sstevel@tonic-gate 22047c478bd9Sstevel@tonic-gate /* verify that lgrp is also a leaf */ 22057c478bd9Sstevel@tonic-gate ASSERT((lgrp->lgrp_childcnt == 0) && 22067c478bd9Sstevel@tonic-gate (klgrpset_ismember(lgrp->lgrp_leaves, 22077c478bd9Sstevel@tonic-gate lpl->lpl_lgrpid))); 22087c478bd9Sstevel@tonic-gate 22097c478bd9Sstevel@tonic-gate if ((lgrp->lgrp_childcnt > 0) || 22107c478bd9Sstevel@tonic-gate (!klgrpset_ismember(lgrp->lgrp_leaves, 22117c478bd9Sstevel@tonic-gate lpl->lpl_lgrpid))) { 22127c478bd9Sstevel@tonic-gate return (LPL_TOPO_LGRP_NOT_LEAF); 22137c478bd9Sstevel@tonic-gate } 22147c478bd9Sstevel@tonic-gate 22157c478bd9Sstevel@tonic-gate ASSERT((lgrp->lgrp_cpucnt >= lpl->lpl_ncpu) && 22167c478bd9Sstevel@tonic-gate (lpl->lpl_ncpu > 0)); 22177c478bd9Sstevel@tonic-gate if ((lgrp->lgrp_cpucnt < lpl->lpl_ncpu) || 22187c478bd9Sstevel@tonic-gate (lpl->lpl_ncpu <= 0)) { 22197c478bd9Sstevel@tonic-gate return (LPL_TOPO_BAD_CPUCNT); 22207c478bd9Sstevel@tonic-gate } 22217c478bd9Sstevel@tonic-gate 22227c478bd9Sstevel@tonic-gate /* 22237c478bd9Sstevel@tonic-gate * Check that lpl_ncpu also matches the number of 22247c478bd9Sstevel@tonic-gate * cpus in the lpl's linked list. This only exists in 22257c478bd9Sstevel@tonic-gate * leaves, but they should always match. 22267c478bd9Sstevel@tonic-gate */ 22277c478bd9Sstevel@tonic-gate j = 0; 22287c478bd9Sstevel@tonic-gate cpu = cp_start = lpl->lpl_cpus; 22297c478bd9Sstevel@tonic-gate while (cpu != NULL) { 22307c478bd9Sstevel@tonic-gate j++; 22317c478bd9Sstevel@tonic-gate 22327c478bd9Sstevel@tonic-gate /* check to make sure cpu's lpl is leaf lpl */ 22337c478bd9Sstevel@tonic-gate ASSERT(cpu->cpu_lpl == lpl); 22347c478bd9Sstevel@tonic-gate if (cpu->cpu_lpl != lpl) { 22357c478bd9Sstevel@tonic-gate return (LPL_TOPO_CPU_HAS_BAD_LPL); 22367c478bd9Sstevel@tonic-gate } 22377c478bd9Sstevel@tonic-gate 22387c478bd9Sstevel@tonic-gate /* check next cpu */ 22397c478bd9Sstevel@tonic-gate if ((cpu = cpu->cpu_next_lpl) != cp_start) { 22407c478bd9Sstevel@tonic-gate continue; 22417c478bd9Sstevel@tonic-gate } else { 22427c478bd9Sstevel@tonic-gate cpu = NULL; 22437c478bd9Sstevel@tonic-gate } 22447c478bd9Sstevel@tonic-gate } 22457c478bd9Sstevel@tonic-gate 22467c478bd9Sstevel@tonic-gate ASSERT(j == lpl->lpl_ncpu); 22477c478bd9Sstevel@tonic-gate if (j != lpl->lpl_ncpu) { 22487c478bd9Sstevel@tonic-gate return (LPL_TOPO_LPL_BAD_NCPU); 22497c478bd9Sstevel@tonic-gate } 22507c478bd9Sstevel@tonic-gate 22517c478bd9Sstevel@tonic-gate /* 22527c478bd9Sstevel@tonic-gate * Also, check that leaf lpl is contained in all 22537c478bd9Sstevel@tonic-gate * intermediate lpls that name the leaf as a descendant 22547c478bd9Sstevel@tonic-gate */ 22557c478bd9Sstevel@tonic-gate 22567c478bd9Sstevel@tonic-gate for (j = 0; j <= lgrp_alloc_max; j++) { 22577c478bd9Sstevel@tonic-gate klgrpset_t intersect; 22587c478bd9Sstevel@tonic-gate lgrp_t *lgrp_cand; 22597c478bd9Sstevel@tonic-gate lpl_t *lpl_cand; 22607c478bd9Sstevel@tonic-gate 22617c478bd9Sstevel@tonic-gate lgrp_cand = lgrp_table[j]; 22627c478bd9Sstevel@tonic-gate intersect = klgrpset_intersects( 22637c478bd9Sstevel@tonic-gate lgrp_cand->lgrp_set[LGRP_RSRC_CPU], 22647c478bd9Sstevel@tonic-gate cpupart->cp_lgrpset); 22657c478bd9Sstevel@tonic-gate 22667c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_cand) || 22677c478bd9Sstevel@tonic-gate !klgrpset_intersects(lgrp_cand->lgrp_leaves, 22687c478bd9Sstevel@tonic-gate cpupart->cp_lgrpset) || 22697c478bd9Sstevel@tonic-gate (intersect == 0)) 22707c478bd9Sstevel@tonic-gate continue; 22717c478bd9Sstevel@tonic-gate 22727c478bd9Sstevel@tonic-gate lpl_cand = 22737c478bd9Sstevel@tonic-gate &cpupart->cp_lgrploads[lgrp_cand->lgrp_id]; 22747c478bd9Sstevel@tonic-gate 22757c478bd9Sstevel@tonic-gate if (klgrpset_ismember(intersect, 22767c478bd9Sstevel@tonic-gate lgrp->lgrp_id)) { 22777c478bd9Sstevel@tonic-gate ASSERT(lpl_rset_contains(lpl_cand, 22787c478bd9Sstevel@tonic-gate lpl)); 22797c478bd9Sstevel@tonic-gate 22807c478bd9Sstevel@tonic-gate if (!lpl_rset_contains(lpl_cand, lpl)) { 22817c478bd9Sstevel@tonic-gate return (LPL_TOPO_RSET_MSSNG_LF); 22827c478bd9Sstevel@tonic-gate } 22837c478bd9Sstevel@tonic-gate } 22847c478bd9Sstevel@tonic-gate } 22857c478bd9Sstevel@tonic-gate 22867c478bd9Sstevel@tonic-gate } else { /* non-leaf specific checks */ 22877c478bd9Sstevel@tonic-gate 22887c478bd9Sstevel@tonic-gate /* 22897c478bd9Sstevel@tonic-gate * Non-leaf lpls should have lpl_cpus == NULL 22907c478bd9Sstevel@tonic-gate * verify that this is so 22917c478bd9Sstevel@tonic-gate */ 22927c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_cpus == NULL); 22937c478bd9Sstevel@tonic-gate if (lpl->lpl_cpus != NULL) { 22947c478bd9Sstevel@tonic-gate return (LPL_TOPO_NONLEAF_HAS_CPUS); 22957c478bd9Sstevel@tonic-gate } 22967c478bd9Sstevel@tonic-gate 22977c478bd9Sstevel@tonic-gate /* 22987c478bd9Sstevel@tonic-gate * verify that the sum of the cpus in the leaf resources 22997c478bd9Sstevel@tonic-gate * is equal to the total ncpu in the intermediate 23007c478bd9Sstevel@tonic-gate */ 23017c478bd9Sstevel@tonic-gate for (j = sum = 0; j < lpl->lpl_nrset; j++) { 23027c478bd9Sstevel@tonic-gate sum += lpl->lpl_rset[j]->lpl_ncpu; 23037c478bd9Sstevel@tonic-gate } 23047c478bd9Sstevel@tonic-gate 23057c478bd9Sstevel@tonic-gate ASSERT(sum == lpl->lpl_ncpu); 23067c478bd9Sstevel@tonic-gate if (sum != lpl->lpl_ncpu) { 23077c478bd9Sstevel@tonic-gate return (LPL_TOPO_LPL_BAD_NCPU); 23087c478bd9Sstevel@tonic-gate } 23097c478bd9Sstevel@tonic-gate } 23107c478bd9Sstevel@tonic-gate 23117c478bd9Sstevel@tonic-gate /* 23127c478bd9Sstevel@tonic-gate * check on lpl_hint. Don't check root, since it has no parent. 23137c478bd9Sstevel@tonic-gate */ 23147c478bd9Sstevel@tonic-gate if (lpl->lpl_parent != NULL) { 23157c478bd9Sstevel@tonic-gate int hint; 23167c478bd9Sstevel@tonic-gate lpl_t *hint_lpl; 23177c478bd9Sstevel@tonic-gate 23187c478bd9Sstevel@tonic-gate /* make sure hint is within limits of nrset */ 23197c478bd9Sstevel@tonic-gate hint = lpl->lpl_hint; 23207c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_parent->lpl_nrset >= hint); 23217c478bd9Sstevel@tonic-gate if (lpl->lpl_parent->lpl_nrset < hint) { 23227c478bd9Sstevel@tonic-gate return (LPL_TOPO_BOGUS_HINT); 23237c478bd9Sstevel@tonic-gate } 23247c478bd9Sstevel@tonic-gate 23257c478bd9Sstevel@tonic-gate /* make sure hint points to valid lpl */ 23267c478bd9Sstevel@tonic-gate hint_lpl = lpl->lpl_parent->lpl_rset[hint]; 23277c478bd9Sstevel@tonic-gate ASSERT(hint_lpl->lpl_ncpu > 0); 23287c478bd9Sstevel@tonic-gate if (hint_lpl->lpl_ncpu <= 0) { 23297c478bd9Sstevel@tonic-gate return (LPL_TOPO_BOGUS_HINT); 23307c478bd9Sstevel@tonic-gate } 23317c478bd9Sstevel@tonic-gate } 23327c478bd9Sstevel@tonic-gate 23337c478bd9Sstevel@tonic-gate /* 23347c478bd9Sstevel@tonic-gate * Check the rset of the lpl in question. Make sure that each 23357c478bd9Sstevel@tonic-gate * rset contains a subset of the resources in 23367c478bd9Sstevel@tonic-gate * lgrp_set[LGRP_RSRC_CPU] and in cp_lgrpset. This also makes 23377c478bd9Sstevel@tonic-gate * sure that each rset doesn't include resources that are 23387c478bd9Sstevel@tonic-gate * outside of that set. (Which would be resources somehow not 23397c478bd9Sstevel@tonic-gate * accounted for). 23407c478bd9Sstevel@tonic-gate */ 23417c478bd9Sstevel@tonic-gate 23427c478bd9Sstevel@tonic-gate klgrpset_clear(rset); 23437c478bd9Sstevel@tonic-gate for (j = 0; j < lpl->lpl_nrset; j++) { 23447c478bd9Sstevel@tonic-gate klgrpset_add(rset, lpl->lpl_rset[j]->lpl_lgrpid); 23457c478bd9Sstevel@tonic-gate } 23467c478bd9Sstevel@tonic-gate klgrpset_copy(cset, rset); 23477c478bd9Sstevel@tonic-gate /* make sure lpl rset matches lgrp rset */ 23487c478bd9Sstevel@tonic-gate klgrpset_diff(rset, lgrp->lgrp_set[LGRP_RSRC_CPU]); 23497c478bd9Sstevel@tonic-gate /* make sure rset is contained with in partition, too */ 23507c478bd9Sstevel@tonic-gate klgrpset_diff(cset, cpupart->cp_lgrpset); 23517c478bd9Sstevel@tonic-gate 23527c478bd9Sstevel@tonic-gate ASSERT(klgrpset_isempty(rset) && 23537c478bd9Sstevel@tonic-gate klgrpset_isempty(cset)); 23547c478bd9Sstevel@tonic-gate if (!klgrpset_isempty(rset) || 23557c478bd9Sstevel@tonic-gate !klgrpset_isempty(cset)) { 23567c478bd9Sstevel@tonic-gate return (LPL_TOPO_RSET_MISMATCH); 23577c478bd9Sstevel@tonic-gate } 23587c478bd9Sstevel@tonic-gate 23597c478bd9Sstevel@tonic-gate /* 23607c478bd9Sstevel@tonic-gate * check to make sure lpl_nrset matches the number of rsets 23617c478bd9Sstevel@tonic-gate * contained in the lpl 23627c478bd9Sstevel@tonic-gate */ 23637c478bd9Sstevel@tonic-gate 23647c478bd9Sstevel@tonic-gate for (j = 0; (lpl->lpl_rset[j] != NULL) && (j < LPL_RSET_MAX); 23657c478bd9Sstevel@tonic-gate j++); 23667c478bd9Sstevel@tonic-gate 23677c478bd9Sstevel@tonic-gate ASSERT(j == lpl->lpl_nrset); 23687c478bd9Sstevel@tonic-gate if (j != lpl->lpl_nrset) { 23697c478bd9Sstevel@tonic-gate return (LPL_TOPO_BAD_RSETCNT); 23707c478bd9Sstevel@tonic-gate } 23717c478bd9Sstevel@tonic-gate 23727c478bd9Sstevel@tonic-gate } 23737c478bd9Sstevel@tonic-gate return (LPL_TOPO_CORRECT); 23747c478bd9Sstevel@tonic-gate } 23757c478bd9Sstevel@tonic-gate 23767c478bd9Sstevel@tonic-gate /* 23777c478bd9Sstevel@tonic-gate * Flatten lpl topology to given number of levels. This is presently only 23787c478bd9Sstevel@tonic-gate * implemented for a flatten to 2 levels, which will prune out the intermediates 23797c478bd9Sstevel@tonic-gate * and home the leaf lpls to the root lpl. 23807c478bd9Sstevel@tonic-gate */ 23817c478bd9Sstevel@tonic-gate int 23827c478bd9Sstevel@tonic-gate lpl_topo_flatten(int levels) 23837c478bd9Sstevel@tonic-gate { 23847c478bd9Sstevel@tonic-gate int i; 23857c478bd9Sstevel@tonic-gate uint_t sum; 23867c478bd9Sstevel@tonic-gate lgrp_t *lgrp_cur; 23877c478bd9Sstevel@tonic-gate lpl_t *lpl_cur; 23887c478bd9Sstevel@tonic-gate lpl_t *lpl_root; 23897c478bd9Sstevel@tonic-gate cpupart_t *cp; 23907c478bd9Sstevel@tonic-gate 23917c478bd9Sstevel@tonic-gate if (levels != 2) 23927c478bd9Sstevel@tonic-gate return (0); 23937c478bd9Sstevel@tonic-gate 23947c478bd9Sstevel@tonic-gate /* called w/ cpus paused - grab no locks! */ 23957c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0 || 23967c478bd9Sstevel@tonic-gate !lgrp_initialized); 23977c478bd9Sstevel@tonic-gate 23987c478bd9Sstevel@tonic-gate cp = cp_list_head; 23997c478bd9Sstevel@tonic-gate do { 24007c478bd9Sstevel@tonic-gate lpl_root = &cp->cp_lgrploads[lgrp_root->lgrp_id]; 24017c478bd9Sstevel@tonic-gate ASSERT(LGRP_EXISTS(lgrp_root) && (lpl_root->lpl_ncpu > 0)); 24027c478bd9Sstevel@tonic-gate 24037c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 24047c478bd9Sstevel@tonic-gate lgrp_cur = lgrp_table[i]; 24057c478bd9Sstevel@tonic-gate lpl_cur = &cp->cp_lgrploads[i]; 24067c478bd9Sstevel@tonic-gate 24077c478bd9Sstevel@tonic-gate if ((lgrp_cur == lgrp_root) || 24087c478bd9Sstevel@tonic-gate (!LGRP_EXISTS(lgrp_cur) && 24097c478bd9Sstevel@tonic-gate (lpl_cur->lpl_ncpu == 0))) 24107c478bd9Sstevel@tonic-gate continue; 24117c478bd9Sstevel@tonic-gate 24127c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_cur) && (lpl_cur->lpl_ncpu > 0)) { 24137c478bd9Sstevel@tonic-gate /* 24147c478bd9Sstevel@tonic-gate * this should be a deleted intermediate, so 24157c478bd9Sstevel@tonic-gate * clear it 24167c478bd9Sstevel@tonic-gate */ 24177c478bd9Sstevel@tonic-gate lpl_clear(lpl_cur); 24187c478bd9Sstevel@tonic-gate } else if ((lpl_cur->lpl_nrset == 1) && 24197c478bd9Sstevel@tonic-gate (lpl_cur->lpl_rset[0] == lpl_cur) && 24207c478bd9Sstevel@tonic-gate ((lpl_cur->lpl_parent->lpl_ncpu == 0) || 24217c478bd9Sstevel@tonic-gate (!LGRP_EXISTS(lpl_cur->lpl_parent->lpl_lgrp)))) { 24227c478bd9Sstevel@tonic-gate /* 24237c478bd9Sstevel@tonic-gate * this is a leaf whose parent was deleted, or 24247c478bd9Sstevel@tonic-gate * whose parent had their lgrp deleted. (And 24257c478bd9Sstevel@tonic-gate * whose parent will soon be deleted). Point 24267c478bd9Sstevel@tonic-gate * this guy back to the root lpl. 24277c478bd9Sstevel@tonic-gate */ 24287c478bd9Sstevel@tonic-gate lpl_cur->lpl_parent = lpl_root; 24297c478bd9Sstevel@tonic-gate lpl_rset_add(lpl_root, lpl_cur); 24307c478bd9Sstevel@tonic-gate } 24317c478bd9Sstevel@tonic-gate 24327c478bd9Sstevel@tonic-gate } 24337c478bd9Sstevel@tonic-gate 24347c478bd9Sstevel@tonic-gate /* 24357c478bd9Sstevel@tonic-gate * Now that we're done, make sure the count on the root lpl is 24367c478bd9Sstevel@tonic-gate * correct, and update the hints of the children for the sake of 24377c478bd9Sstevel@tonic-gate * thoroughness 24387c478bd9Sstevel@tonic-gate */ 24397c478bd9Sstevel@tonic-gate for (i = sum = 0; i < lpl_root->lpl_nrset; i++) { 24407c478bd9Sstevel@tonic-gate sum += lpl_root->lpl_rset[i]->lpl_ncpu; 24417c478bd9Sstevel@tonic-gate } 24427c478bd9Sstevel@tonic-gate lpl_root->lpl_ncpu = sum; 24437c478bd9Sstevel@tonic-gate lpl_child_update(lpl_root, cp); 24447c478bd9Sstevel@tonic-gate 24457c478bd9Sstevel@tonic-gate cp = cp->cp_next; 24467c478bd9Sstevel@tonic-gate } while (cp != cp_list_head); 24477c478bd9Sstevel@tonic-gate 24487c478bd9Sstevel@tonic-gate return (levels); 24497c478bd9Sstevel@tonic-gate } 24507c478bd9Sstevel@tonic-gate 24517c478bd9Sstevel@tonic-gate /* 24527c478bd9Sstevel@tonic-gate * Insert a lpl into the resource hierarchy and create any additional lpls that 24537c478bd9Sstevel@tonic-gate * are necessary to represent the varying states of locality for the cpu 24547c478bd9Sstevel@tonic-gate * resoruces newly added to the partition. 24557c478bd9Sstevel@tonic-gate * 24567c478bd9Sstevel@tonic-gate * This routine is clever enough that it can correctly add resources from the 24577c478bd9Sstevel@tonic-gate * new leaf into both direct and indirect resource sets in the hierarchy. (Ie, 24587c478bd9Sstevel@tonic-gate * those for which the lpl is a leaf as opposed to simply a named equally local 24597c478bd9Sstevel@tonic-gate * resource). The one special case that needs additional processing is when a 24607c478bd9Sstevel@tonic-gate * new intermediate lpl is introduced. Since the main loop only traverses 24617c478bd9Sstevel@tonic-gate * looking to add the leaf resource where it does not yet exist, additional work 24627c478bd9Sstevel@tonic-gate * is necessary to add other leaf resources that may need to exist in the newly 24637c478bd9Sstevel@tonic-gate * created intermediate. This is performed by the second inner loop, and is 24647c478bd9Sstevel@tonic-gate * only done when the check for more than one overlapping resource succeeds. 24657c478bd9Sstevel@tonic-gate */ 24667c478bd9Sstevel@tonic-gate 24677c478bd9Sstevel@tonic-gate void 24687c478bd9Sstevel@tonic-gate lpl_leaf_insert(lpl_t *lpl_leaf, cpupart_t *cpupart) 24697c478bd9Sstevel@tonic-gate { 24707c478bd9Sstevel@tonic-gate int i; 24717c478bd9Sstevel@tonic-gate int j; 24727c478bd9Sstevel@tonic-gate int hint; 24737c478bd9Sstevel@tonic-gate int rset_num_intersect; 24747c478bd9Sstevel@tonic-gate lgrp_t *lgrp_cur; 24757c478bd9Sstevel@tonic-gate lpl_t *lpl_cur; 24767c478bd9Sstevel@tonic-gate lpl_t *lpl_parent; 2477ab761399Sesaxe lgrp_id_t parent_id; 24787c478bd9Sstevel@tonic-gate klgrpset_t rset_intersect; /* resources in cpupart and lgrp */ 24797c478bd9Sstevel@tonic-gate 24807c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 24817c478bd9Sstevel@tonic-gate lgrp_cur = lgrp_table[i]; 24827c478bd9Sstevel@tonic-gate 24837c478bd9Sstevel@tonic-gate /* 24847c478bd9Sstevel@tonic-gate * Don't insert if the lgrp isn't there, if the leaf isn't 24857c478bd9Sstevel@tonic-gate * contained within the current lgrp, or if the current lgrp has 24867c478bd9Sstevel@tonic-gate * no leaves in this partition 24877c478bd9Sstevel@tonic-gate */ 24887c478bd9Sstevel@tonic-gate 24897c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_cur) || 24907c478bd9Sstevel@tonic-gate !klgrpset_ismember(lgrp_cur->lgrp_set[LGRP_RSRC_CPU], 24917c478bd9Sstevel@tonic-gate lpl_leaf->lpl_lgrpid) || 24927c478bd9Sstevel@tonic-gate !klgrpset_intersects(lgrp_cur->lgrp_leaves, 24937c478bd9Sstevel@tonic-gate cpupart->cp_lgrpset)) 24947c478bd9Sstevel@tonic-gate continue; 24957c478bd9Sstevel@tonic-gate 24967c478bd9Sstevel@tonic-gate lpl_cur = &cpupart->cp_lgrploads[lgrp_cur->lgrp_id]; 24977c478bd9Sstevel@tonic-gate if (lgrp_cur->lgrp_parent != NULL) { 24987c478bd9Sstevel@tonic-gate /* if lgrp has a parent, assign it properly */ 24997c478bd9Sstevel@tonic-gate parent_id = lgrp_cur->lgrp_parent->lgrp_id; 25007c478bd9Sstevel@tonic-gate lpl_parent = &cpupart->cp_lgrploads[parent_id]; 25017c478bd9Sstevel@tonic-gate } else { 25027c478bd9Sstevel@tonic-gate /* if not, make sure parent ptr gets set to null */ 25037c478bd9Sstevel@tonic-gate lpl_parent = NULL; 25047c478bd9Sstevel@tonic-gate } 25057c478bd9Sstevel@tonic-gate 25067c478bd9Sstevel@tonic-gate if (lpl_cur == lpl_leaf) { 25077c478bd9Sstevel@tonic-gate /* 25087c478bd9Sstevel@tonic-gate * Almost all leaf state was initialized elsewhere. The 25097c478bd9Sstevel@tonic-gate * only thing left to do is to set the parent. 25107c478bd9Sstevel@tonic-gate */ 25117c478bd9Sstevel@tonic-gate lpl_cur->lpl_parent = lpl_parent; 25127c478bd9Sstevel@tonic-gate continue; 25137c478bd9Sstevel@tonic-gate } 25147c478bd9Sstevel@tonic-gate 25157c478bd9Sstevel@tonic-gate /* 25167c478bd9Sstevel@tonic-gate * Initialize intermediate lpl 25177c478bd9Sstevel@tonic-gate * Save this lpl's hint though. Since we're changing this 25187c478bd9Sstevel@tonic-gate * lpl's resources, we need to update the hint in this lpl's 25197c478bd9Sstevel@tonic-gate * children, but the hint in this lpl is unaffected and 25207c478bd9Sstevel@tonic-gate * should be preserved. 25217c478bd9Sstevel@tonic-gate */ 25227c478bd9Sstevel@tonic-gate hint = lpl_cur->lpl_hint; 25237c478bd9Sstevel@tonic-gate 25247c478bd9Sstevel@tonic-gate lpl_clear(lpl_cur); 25257c478bd9Sstevel@tonic-gate lpl_init(lpl_cur, lpl_leaf, lgrp_cur); 25267c478bd9Sstevel@tonic-gate 25277c478bd9Sstevel@tonic-gate lpl_cur->lpl_hint = hint; 25287c478bd9Sstevel@tonic-gate lpl_cur->lpl_parent = lpl_parent; 25297c478bd9Sstevel@tonic-gate 25307c478bd9Sstevel@tonic-gate /* does new lpl need to be populated with other resources? */ 25317c478bd9Sstevel@tonic-gate rset_intersect = 25327c478bd9Sstevel@tonic-gate klgrpset_intersects(lgrp_cur->lgrp_set[LGRP_RSRC_CPU], 25337c478bd9Sstevel@tonic-gate cpupart->cp_lgrpset); 25347c478bd9Sstevel@tonic-gate klgrpset_nlgrps(rset_intersect, rset_num_intersect); 25357c478bd9Sstevel@tonic-gate 25367c478bd9Sstevel@tonic-gate if (rset_num_intersect > 1) { 25377c478bd9Sstevel@tonic-gate /* 25387c478bd9Sstevel@tonic-gate * If so, figure out what lpls have resources that 25397c478bd9Sstevel@tonic-gate * intersect this one, and add them. 25407c478bd9Sstevel@tonic-gate */ 25417c478bd9Sstevel@tonic-gate for (j = 0; j <= lgrp_alloc_max; j++) { 25427c478bd9Sstevel@tonic-gate lgrp_t *lgrp_cand; /* candidate lgrp */ 25437c478bd9Sstevel@tonic-gate lpl_t *lpl_cand; /* candidate lpl */ 25447c478bd9Sstevel@tonic-gate 25457c478bd9Sstevel@tonic-gate lgrp_cand = lgrp_table[j]; 25467c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_cand) || 25477c478bd9Sstevel@tonic-gate !klgrpset_ismember(rset_intersect, 25487c478bd9Sstevel@tonic-gate lgrp_cand->lgrp_id)) 25497c478bd9Sstevel@tonic-gate continue; 25507c478bd9Sstevel@tonic-gate lpl_cand = 25517c478bd9Sstevel@tonic-gate &cpupart->cp_lgrploads[lgrp_cand->lgrp_id]; 25527c478bd9Sstevel@tonic-gate lpl_rset_add(lpl_cur, lpl_cand); 25537c478bd9Sstevel@tonic-gate } 25547c478bd9Sstevel@tonic-gate } 25557c478bd9Sstevel@tonic-gate /* 25567c478bd9Sstevel@tonic-gate * This lpl's rset has changed. Update the hint in it's 25577c478bd9Sstevel@tonic-gate * children. 25587c478bd9Sstevel@tonic-gate */ 25597c478bd9Sstevel@tonic-gate lpl_child_update(lpl_cur, cpupart); 25607c478bd9Sstevel@tonic-gate } 25617c478bd9Sstevel@tonic-gate } 25627c478bd9Sstevel@tonic-gate 25637c478bd9Sstevel@tonic-gate /* 25647c478bd9Sstevel@tonic-gate * remove a lpl from the hierarchy of resources, clearing its state when 25657c478bd9Sstevel@tonic-gate * finished. If the lpls at the intermediate levels of the hierarchy have no 25667c478bd9Sstevel@tonic-gate * remaining resources, or no longer name a leaf resource in the cpu-partition, 25677c478bd9Sstevel@tonic-gate * delete them as well. 25687c478bd9Sstevel@tonic-gate */ 25697c478bd9Sstevel@tonic-gate 25707c478bd9Sstevel@tonic-gate void 25717c478bd9Sstevel@tonic-gate lpl_leaf_remove(lpl_t *lpl_leaf, cpupart_t *cpupart) 25727c478bd9Sstevel@tonic-gate { 25737c478bd9Sstevel@tonic-gate int i; 25747c478bd9Sstevel@tonic-gate lgrp_t *lgrp_cur; 25757c478bd9Sstevel@tonic-gate lpl_t *lpl_cur; 25767c478bd9Sstevel@tonic-gate klgrpset_t leaf_intersect; /* intersection of leaves */ 25777c478bd9Sstevel@tonic-gate 25787c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 25797c478bd9Sstevel@tonic-gate lgrp_cur = lgrp_table[i]; 25807c478bd9Sstevel@tonic-gate 25817c478bd9Sstevel@tonic-gate /* 25827c478bd9Sstevel@tonic-gate * Don't attempt to remove from lgrps that aren't there, that 25837c478bd9Sstevel@tonic-gate * don't contain our leaf, or from the leaf itself. (We do that 25847c478bd9Sstevel@tonic-gate * later) 25857c478bd9Sstevel@tonic-gate */ 25867c478bd9Sstevel@tonic-gate 25877c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_cur)) 25887c478bd9Sstevel@tonic-gate continue; 25897c478bd9Sstevel@tonic-gate 25907c478bd9Sstevel@tonic-gate lpl_cur = &cpupart->cp_lgrploads[lgrp_cur->lgrp_id]; 25917c478bd9Sstevel@tonic-gate 25927c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(lgrp_cur->lgrp_set[LGRP_RSRC_CPU], 25937c478bd9Sstevel@tonic-gate lpl_leaf->lpl_lgrpid) || 25947c478bd9Sstevel@tonic-gate (lpl_cur == lpl_leaf)) { 25957c478bd9Sstevel@tonic-gate continue; 25967c478bd9Sstevel@tonic-gate } 25977c478bd9Sstevel@tonic-gate 25987c478bd9Sstevel@tonic-gate /* 25997c478bd9Sstevel@tonic-gate * This is a slightly sleazy simplification in that we have 26007c478bd9Sstevel@tonic-gate * already marked the cp_lgrpset as no longer containing the 26017c478bd9Sstevel@tonic-gate * leaf we've deleted. Any lpls that pass the above checks 26027c478bd9Sstevel@tonic-gate * based upon lgrp membership but not necessarily cpu-part 26037c478bd9Sstevel@tonic-gate * membership also get cleared by the checks below. Currently 26047c478bd9Sstevel@tonic-gate * this is harmless, as the lpls should be empty anyway. 26057c478bd9Sstevel@tonic-gate * 26067c478bd9Sstevel@tonic-gate * In particular, we want to preserve lpls that have additional 26077c478bd9Sstevel@tonic-gate * leaf resources, even though we don't yet have a processor 26087c478bd9Sstevel@tonic-gate * architecture that represents resources this way. 26097c478bd9Sstevel@tonic-gate */ 26107c478bd9Sstevel@tonic-gate 26117c478bd9Sstevel@tonic-gate leaf_intersect = klgrpset_intersects(lgrp_cur->lgrp_leaves, 26127c478bd9Sstevel@tonic-gate cpupart->cp_lgrpset); 26137c478bd9Sstevel@tonic-gate 26147c478bd9Sstevel@tonic-gate lpl_rset_del(lpl_cur, lpl_leaf); 26157c478bd9Sstevel@tonic-gate if ((lpl_cur->lpl_nrset == 0) || (!leaf_intersect)) { 26167c478bd9Sstevel@tonic-gate lpl_clear(lpl_cur); 26177c478bd9Sstevel@tonic-gate } else { 26187c478bd9Sstevel@tonic-gate /* 26197c478bd9Sstevel@tonic-gate * Update this lpl's children 26207c478bd9Sstevel@tonic-gate */ 26217c478bd9Sstevel@tonic-gate lpl_child_update(lpl_cur, cpupart); 26227c478bd9Sstevel@tonic-gate } 26237c478bd9Sstevel@tonic-gate } 26247c478bd9Sstevel@tonic-gate lpl_clear(lpl_leaf); 26257c478bd9Sstevel@tonic-gate } 26267c478bd9Sstevel@tonic-gate 26277c478bd9Sstevel@tonic-gate /* 26287c478bd9Sstevel@tonic-gate * add a cpu to a partition in terms of lgrp load avg bookeeping 26297c478bd9Sstevel@tonic-gate * 26307c478bd9Sstevel@tonic-gate * The lpl (cpu partition load average information) is now arranged in a 26317c478bd9Sstevel@tonic-gate * hierarchical fashion whereby resources that are closest, ie. most local, to 26327c478bd9Sstevel@tonic-gate * the cpu in question are considered to be leaves in a tree of resources. 26337c478bd9Sstevel@tonic-gate * There are two general cases for cpu additon: 26347c478bd9Sstevel@tonic-gate * 26357c478bd9Sstevel@tonic-gate * 1. A lpl structure that contains resources already in the hierarchy tree. 26367c478bd9Sstevel@tonic-gate * In this case, all of the associated lpl relationships have been defined, and 26377c478bd9Sstevel@tonic-gate * all that is necessary is that we link the new cpu into the per-lpl list of 26387c478bd9Sstevel@tonic-gate * cpus, and increment the ncpu count of all places where this cpu resource will 26397c478bd9Sstevel@tonic-gate * be accounted for. lpl_cpu_adjcnt updates the cpu count, and the cpu pointer 26407c478bd9Sstevel@tonic-gate * pushing is accomplished by this routine. 26417c478bd9Sstevel@tonic-gate * 26427c478bd9Sstevel@tonic-gate * 2. The lpl to contain the resources in this cpu-partition for this lgrp does 26437c478bd9Sstevel@tonic-gate * not exist yet. In this case, it is necessary to build the leaf lpl, and 26447c478bd9Sstevel@tonic-gate * construct the hierarchy of state necessary to name it's more distant 26457c478bd9Sstevel@tonic-gate * resources, if they should exist. The leaf structure is initialized by this 26467c478bd9Sstevel@tonic-gate * routine, as is the cpu-partition state for the lgrp membership. This routine 26477c478bd9Sstevel@tonic-gate * also calls lpl_leaf_insert() which inserts the named lpl into the hierarchy 26487c478bd9Sstevel@tonic-gate * and builds all of the "ancestoral" state necessary to identify resources at 26497c478bd9Sstevel@tonic-gate * differing levels of locality. 26507c478bd9Sstevel@tonic-gate */ 26517c478bd9Sstevel@tonic-gate void 26527c478bd9Sstevel@tonic-gate lgrp_part_add_cpu(cpu_t *cp, lgrp_id_t lgrpid) 26537c478bd9Sstevel@tonic-gate { 26547c478bd9Sstevel@tonic-gate cpupart_t *cpupart; 26557c478bd9Sstevel@tonic-gate lgrp_t *lgrp_leaf; 26567c478bd9Sstevel@tonic-gate lpl_t *lpl_leaf; 26577c478bd9Sstevel@tonic-gate 26587c478bd9Sstevel@tonic-gate /* called sometimes w/ cpus paused - grab no locks */ 26597c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized); 26607c478bd9Sstevel@tonic-gate 26617c478bd9Sstevel@tonic-gate cpupart = cp->cpu_part; 26627c478bd9Sstevel@tonic-gate lgrp_leaf = lgrp_table[lgrpid]; 26637c478bd9Sstevel@tonic-gate 26647c478bd9Sstevel@tonic-gate /* don't add non-existent lgrp */ 26657c478bd9Sstevel@tonic-gate ASSERT(LGRP_EXISTS(lgrp_leaf)); 26667c478bd9Sstevel@tonic-gate lpl_leaf = &cpupart->cp_lgrploads[lgrpid]; 26677c478bd9Sstevel@tonic-gate cp->cpu_lpl = lpl_leaf; 26687c478bd9Sstevel@tonic-gate 26697c478bd9Sstevel@tonic-gate /* only leaf lpls contain cpus */ 26707c478bd9Sstevel@tonic-gate 26717c478bd9Sstevel@tonic-gate if (lpl_leaf->lpl_ncpu++ == 0) { 26727c478bd9Sstevel@tonic-gate lpl_init(lpl_leaf, lpl_leaf, lgrp_leaf); 26737c478bd9Sstevel@tonic-gate klgrpset_add(cpupart->cp_lgrpset, lgrpid); 26747c478bd9Sstevel@tonic-gate lpl_leaf_insert(lpl_leaf, cpupart); 26757c478bd9Sstevel@tonic-gate } else { 26767c478bd9Sstevel@tonic-gate /* 26777c478bd9Sstevel@tonic-gate * the lpl should already exist in the parent, so just update 26787c478bd9Sstevel@tonic-gate * the count of available CPUs 26797c478bd9Sstevel@tonic-gate */ 26807c478bd9Sstevel@tonic-gate lpl_cpu_adjcnt(LPL_INCREMENT, cp); 26817c478bd9Sstevel@tonic-gate } 26827c478bd9Sstevel@tonic-gate 26837c478bd9Sstevel@tonic-gate /* link cpu into list of cpus in lpl */ 26847c478bd9Sstevel@tonic-gate 26857c478bd9Sstevel@tonic-gate if (lpl_leaf->lpl_cpus) { 26867c478bd9Sstevel@tonic-gate cp->cpu_next_lpl = lpl_leaf->lpl_cpus; 26877c478bd9Sstevel@tonic-gate cp->cpu_prev_lpl = lpl_leaf->lpl_cpus->cpu_prev_lpl; 26887c478bd9Sstevel@tonic-gate lpl_leaf->lpl_cpus->cpu_prev_lpl->cpu_next_lpl = cp; 26897c478bd9Sstevel@tonic-gate lpl_leaf->lpl_cpus->cpu_prev_lpl = cp; 26907c478bd9Sstevel@tonic-gate } else { 26917c478bd9Sstevel@tonic-gate /* 26927c478bd9Sstevel@tonic-gate * We increment ncpu immediately after we create a new leaf 26937c478bd9Sstevel@tonic-gate * lpl, so assert that ncpu == 1 for the case where we don't 26947c478bd9Sstevel@tonic-gate * have any cpu pointers yet. 26957c478bd9Sstevel@tonic-gate */ 26967c478bd9Sstevel@tonic-gate ASSERT(lpl_leaf->lpl_ncpu == 1); 26977c478bd9Sstevel@tonic-gate lpl_leaf->lpl_cpus = cp->cpu_next_lpl = cp->cpu_prev_lpl = cp; 26987c478bd9Sstevel@tonic-gate } 26997c478bd9Sstevel@tonic-gate 27007c478bd9Sstevel@tonic-gate } 27017c478bd9Sstevel@tonic-gate 27027c478bd9Sstevel@tonic-gate 27037c478bd9Sstevel@tonic-gate /* 27047c478bd9Sstevel@tonic-gate * remove a cpu from a partition in terms of lgrp load avg bookeeping 27057c478bd9Sstevel@tonic-gate * 27067c478bd9Sstevel@tonic-gate * The lpl (cpu partition load average information) is now arranged in a 27077c478bd9Sstevel@tonic-gate * hierarchical fashion whereby resources that are closest, ie. most local, to 27087c478bd9Sstevel@tonic-gate * the cpu in question are considered to be leaves in a tree of resources. 27097c478bd9Sstevel@tonic-gate * There are two removal cases in question: 27107c478bd9Sstevel@tonic-gate * 27117c478bd9Sstevel@tonic-gate * 1. Removal of the resource in the leaf leaves other resources remaining in 27127c478bd9Sstevel@tonic-gate * that leaf. (Another cpu still exists at this level of locality). In this 27137c478bd9Sstevel@tonic-gate * case, the count of available cpus is decremented in all assocated lpls by 27147c478bd9Sstevel@tonic-gate * calling lpl_adj_cpucnt(), and the pointer to the removed cpu is pruned 27157c478bd9Sstevel@tonic-gate * from the per-cpu lpl list. 27167c478bd9Sstevel@tonic-gate * 27177c478bd9Sstevel@tonic-gate * 2. Removal of the resource results in the lpl containing no resources. (It's 27187c478bd9Sstevel@tonic-gate * empty) In this case, all of what has occurred for the first step must take 27197c478bd9Sstevel@tonic-gate * place; however, additionally we must remove the lpl structure itself, prune 27207c478bd9Sstevel@tonic-gate * out any stranded lpls that do not directly name a leaf resource, and mark the 27217c478bd9Sstevel@tonic-gate * cpu partition in question as no longer containing resources from the lgrp of 27227c478bd9Sstevel@tonic-gate * the lpl that has been delted. Cpu-partition changes are handled by this 27237c478bd9Sstevel@tonic-gate * method, but the lpl_leaf_remove function deals with the details of pruning 27247c478bd9Sstevel@tonic-gate * out the empty lpl and any of its orphaned direct ancestors. 27257c478bd9Sstevel@tonic-gate */ 27267c478bd9Sstevel@tonic-gate void 27277c478bd9Sstevel@tonic-gate lgrp_part_del_cpu(cpu_t *cp) 27287c478bd9Sstevel@tonic-gate { 27297c478bd9Sstevel@tonic-gate lpl_t *lpl; 27307c478bd9Sstevel@tonic-gate lpl_t *leaf_lpl; 27317c478bd9Sstevel@tonic-gate lgrp_t *lgrp_leaf; 27327c478bd9Sstevel@tonic-gate 27337c478bd9Sstevel@tonic-gate /* called sometimes w/ cpus paused - grab no locks */ 27347c478bd9Sstevel@tonic-gate 27357c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized); 27367c478bd9Sstevel@tonic-gate 27377c478bd9Sstevel@tonic-gate lpl = leaf_lpl = cp->cpu_lpl; 27387c478bd9Sstevel@tonic-gate lgrp_leaf = leaf_lpl->lpl_lgrp; 27397c478bd9Sstevel@tonic-gate 27407c478bd9Sstevel@tonic-gate /* don't delete a leaf that isn't there */ 27417c478bd9Sstevel@tonic-gate ASSERT(LGRP_EXISTS(lgrp_leaf)); 27427c478bd9Sstevel@tonic-gate 27437c478bd9Sstevel@tonic-gate /* no double-deletes */ 27447c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_ncpu); 27457c478bd9Sstevel@tonic-gate if (--lpl->lpl_ncpu == 0) { 27467c478bd9Sstevel@tonic-gate /* 27477c478bd9Sstevel@tonic-gate * This was the last cpu in this lgroup for this partition, 27487c478bd9Sstevel@tonic-gate * clear its bit in the partition's lgroup bitmask 27497c478bd9Sstevel@tonic-gate */ 27507c478bd9Sstevel@tonic-gate klgrpset_del(cp->cpu_part->cp_lgrpset, lpl->lpl_lgrpid); 27517c478bd9Sstevel@tonic-gate 27527c478bd9Sstevel@tonic-gate /* eliminate remaning lpl link pointers in cpu, lpl */ 27537c478bd9Sstevel@tonic-gate lpl->lpl_cpus = cp->cpu_next_lpl = cp->cpu_prev_lpl = NULL; 27547c478bd9Sstevel@tonic-gate 27557c478bd9Sstevel@tonic-gate lpl_leaf_remove(leaf_lpl, cp->cpu_part); 27567c478bd9Sstevel@tonic-gate } else { 27577c478bd9Sstevel@tonic-gate 27587c478bd9Sstevel@tonic-gate /* unlink cpu from lists of cpus in lpl */ 27597c478bd9Sstevel@tonic-gate cp->cpu_prev_lpl->cpu_next_lpl = cp->cpu_next_lpl; 27607c478bd9Sstevel@tonic-gate cp->cpu_next_lpl->cpu_prev_lpl = cp->cpu_prev_lpl; 27617c478bd9Sstevel@tonic-gate if (lpl->lpl_cpus == cp) { 27627c478bd9Sstevel@tonic-gate lpl->lpl_cpus = cp->cpu_next_lpl; 27637c478bd9Sstevel@tonic-gate } 27647c478bd9Sstevel@tonic-gate 27657c478bd9Sstevel@tonic-gate /* 27667c478bd9Sstevel@tonic-gate * Update the cpu count in the lpls associated with parent 27677c478bd9Sstevel@tonic-gate * lgroups. 27687c478bd9Sstevel@tonic-gate */ 27697c478bd9Sstevel@tonic-gate lpl_cpu_adjcnt(LPL_DECREMENT, cp); 27707c478bd9Sstevel@tonic-gate 27717c478bd9Sstevel@tonic-gate } 27727c478bd9Sstevel@tonic-gate /* clear cpu's lpl ptr when we're all done */ 27737c478bd9Sstevel@tonic-gate cp->cpu_lpl = NULL; 27747c478bd9Sstevel@tonic-gate } 27757c478bd9Sstevel@tonic-gate 27767c478bd9Sstevel@tonic-gate /* 27777c478bd9Sstevel@tonic-gate * Recompute load average for the specified partition/lgrp fragment. 27787c478bd9Sstevel@tonic-gate * 27797c478bd9Sstevel@tonic-gate * We rely on the fact that this routine is called from the clock thread 27807c478bd9Sstevel@tonic-gate * at a point before the clock thread can block (i.e. before its first 27817c478bd9Sstevel@tonic-gate * lock request). Since the clock thread can not be preempted (since it 27827c478bd9Sstevel@tonic-gate * runs at highest priority), we know that cpu partitions can not change 27837c478bd9Sstevel@tonic-gate * (since doing so would require either the repartition requester or the 27847c478bd9Sstevel@tonic-gate * cpu_pause thread to run on this cpu), so we can update the cpu's load 27857c478bd9Sstevel@tonic-gate * without grabbing cpu_lock. 27867c478bd9Sstevel@tonic-gate */ 27877c478bd9Sstevel@tonic-gate void 27887c478bd9Sstevel@tonic-gate lgrp_loadavg(lpl_t *lpl, uint_t nrcpus, int ageflag) 27897c478bd9Sstevel@tonic-gate { 27907c478bd9Sstevel@tonic-gate uint_t ncpu; 27917c478bd9Sstevel@tonic-gate int64_t old, new, f; 27927c478bd9Sstevel@tonic-gate 27937c478bd9Sstevel@tonic-gate /* 27947c478bd9Sstevel@tonic-gate * 1 - exp(-1/(20 * ncpu)) << 13 = 400 for 1 cpu... 27957c478bd9Sstevel@tonic-gate */ 27967c478bd9Sstevel@tonic-gate static short expval[] = { 27977c478bd9Sstevel@tonic-gate 0, 3196, 1618, 1083, 27987c478bd9Sstevel@tonic-gate 814, 652, 543, 466, 27997c478bd9Sstevel@tonic-gate 408, 363, 326, 297, 28007c478bd9Sstevel@tonic-gate 272, 251, 233, 218, 28017c478bd9Sstevel@tonic-gate 204, 192, 181, 172, 28027c478bd9Sstevel@tonic-gate 163, 155, 148, 142, 28037c478bd9Sstevel@tonic-gate 136, 130, 125, 121, 28047c478bd9Sstevel@tonic-gate 116, 112, 109, 105 28057c478bd9Sstevel@tonic-gate }; 28067c478bd9Sstevel@tonic-gate 28077c478bd9Sstevel@tonic-gate /* ASSERT (called from clock level) */ 28087c478bd9Sstevel@tonic-gate 28097c478bd9Sstevel@tonic-gate if ((lpl == NULL) || /* we're booting - this is easiest for now */ 28107c478bd9Sstevel@tonic-gate ((ncpu = lpl->lpl_ncpu) == 0)) { 28117c478bd9Sstevel@tonic-gate return; 28127c478bd9Sstevel@tonic-gate } 28137c478bd9Sstevel@tonic-gate 28147c478bd9Sstevel@tonic-gate for (;;) { 28157c478bd9Sstevel@tonic-gate 28167c478bd9Sstevel@tonic-gate if (ncpu >= sizeof (expval) / sizeof (expval[0])) 28177c478bd9Sstevel@tonic-gate f = expval[1]/ncpu; /* good approx. for large ncpu */ 28187c478bd9Sstevel@tonic-gate else 28197c478bd9Sstevel@tonic-gate f = expval[ncpu]; 28207c478bd9Sstevel@tonic-gate 28217c478bd9Sstevel@tonic-gate /* 28227c478bd9Sstevel@tonic-gate * Modify the load average atomically to avoid losing 28237c478bd9Sstevel@tonic-gate * anticipatory load updates (see lgrp_move_thread()). 28247c478bd9Sstevel@tonic-gate */ 28257c478bd9Sstevel@tonic-gate if (ageflag) { 28267c478bd9Sstevel@tonic-gate /* 28277c478bd9Sstevel@tonic-gate * We're supposed to both update and age the load. 28287c478bd9Sstevel@tonic-gate * This happens 10 times/sec. per cpu. We do a 28297c478bd9Sstevel@tonic-gate * little hoop-jumping to avoid integer overflow. 28307c478bd9Sstevel@tonic-gate */ 28317c478bd9Sstevel@tonic-gate int64_t q, r; 28327c478bd9Sstevel@tonic-gate 28337c478bd9Sstevel@tonic-gate do { 28347c478bd9Sstevel@tonic-gate old = new = lpl->lpl_loadavg; 28357c478bd9Sstevel@tonic-gate q = (old >> 16) << 7; 28367c478bd9Sstevel@tonic-gate r = (old & 0xffff) << 7; 28377c478bd9Sstevel@tonic-gate new += ((long long)(nrcpus - q) * f - 28387c478bd9Sstevel@tonic-gate ((r * f) >> 16)) >> 7; 28397c478bd9Sstevel@tonic-gate 28407c478bd9Sstevel@tonic-gate /* 28417c478bd9Sstevel@tonic-gate * Check for overflow 28427c478bd9Sstevel@tonic-gate */ 28437c478bd9Sstevel@tonic-gate if (new > LGRP_LOADAVG_MAX) 28447c478bd9Sstevel@tonic-gate new = LGRP_LOADAVG_MAX; 28457c478bd9Sstevel@tonic-gate else if (new < 0) 28467c478bd9Sstevel@tonic-gate new = 0; 28477c478bd9Sstevel@tonic-gate } while (cas32((lgrp_load_t *)&lpl->lpl_loadavg, old, 28487c478bd9Sstevel@tonic-gate new) != old); 28497c478bd9Sstevel@tonic-gate } else { 28507c478bd9Sstevel@tonic-gate /* 28517c478bd9Sstevel@tonic-gate * We're supposed to update the load, but not age it. 28527c478bd9Sstevel@tonic-gate * This option is used to update the load (which either 28537c478bd9Sstevel@tonic-gate * has already been aged in this 1/10 sec. interval or 28547c478bd9Sstevel@tonic-gate * soon will be) to account for a remotely executing 28557c478bd9Sstevel@tonic-gate * thread. 28567c478bd9Sstevel@tonic-gate */ 28577c478bd9Sstevel@tonic-gate do { 28587c478bd9Sstevel@tonic-gate old = new = lpl->lpl_loadavg; 28597c478bd9Sstevel@tonic-gate new += f; 28607c478bd9Sstevel@tonic-gate /* 28617c478bd9Sstevel@tonic-gate * Check for overflow 28627c478bd9Sstevel@tonic-gate * Underflow not possible here 28637c478bd9Sstevel@tonic-gate */ 28647c478bd9Sstevel@tonic-gate if (new < old) 28657c478bd9Sstevel@tonic-gate new = LGRP_LOADAVG_MAX; 28667c478bd9Sstevel@tonic-gate } while (cas32((lgrp_load_t *)&lpl->lpl_loadavg, old, 28677c478bd9Sstevel@tonic-gate new) != old); 28687c478bd9Sstevel@tonic-gate } 28697c478bd9Sstevel@tonic-gate 28707c478bd9Sstevel@tonic-gate /* 28717c478bd9Sstevel@tonic-gate * Do the same for this lpl's parent 28727c478bd9Sstevel@tonic-gate */ 28737c478bd9Sstevel@tonic-gate if ((lpl = lpl->lpl_parent) == NULL) 28747c478bd9Sstevel@tonic-gate break; 28757c478bd9Sstevel@tonic-gate ncpu = lpl->lpl_ncpu; 28767c478bd9Sstevel@tonic-gate } 28777c478bd9Sstevel@tonic-gate } 28787c478bd9Sstevel@tonic-gate 28797c478bd9Sstevel@tonic-gate /* 28807c478bd9Sstevel@tonic-gate * Initialize lpl topology in the target based on topology currently present in 28817c478bd9Sstevel@tonic-gate * lpl_bootstrap. 28827c478bd9Sstevel@tonic-gate * 28837c478bd9Sstevel@tonic-gate * lpl_topo_bootstrap is only called once from cpupart_initialize_default() to 28847c478bd9Sstevel@tonic-gate * initialize cp_default list of lpls. Up to this point all topology operations 28857c478bd9Sstevel@tonic-gate * were performed using lpl_bootstrap. Now cp_default has its own list of lpls 28867c478bd9Sstevel@tonic-gate * and all subsequent lpl operations should use it instead of lpl_bootstrap. The 28877c478bd9Sstevel@tonic-gate * `target' points to the list of lpls in cp_default and `size' is the size of 28887c478bd9Sstevel@tonic-gate * this list. 28897c478bd9Sstevel@tonic-gate * 28907c478bd9Sstevel@tonic-gate * This function walks the lpl topology in lpl_bootstrap and does for things: 28917c478bd9Sstevel@tonic-gate * 28927c478bd9Sstevel@tonic-gate * 1) Copies all fields from lpl_bootstrap to the target. 28937c478bd9Sstevel@tonic-gate * 28947c478bd9Sstevel@tonic-gate * 2) Sets CPU0 lpl pointer to the correct element of the target list. 28957c478bd9Sstevel@tonic-gate * 28967c478bd9Sstevel@tonic-gate * 3) Updates lpl_parent pointers to point to the lpls in the target list 28977c478bd9Sstevel@tonic-gate * instead of lpl_bootstrap. 28987c478bd9Sstevel@tonic-gate * 28997c478bd9Sstevel@tonic-gate * 4) Updates pointers in the resource list of the target to point to the lpls 29007c478bd9Sstevel@tonic-gate * in the target list instead of lpl_bootstrap. 29017c478bd9Sstevel@tonic-gate * 29027c478bd9Sstevel@tonic-gate * After lpl_topo_bootstrap() completes, target contains the same information 29037c478bd9Sstevel@tonic-gate * that would be present there if it were used during boot instead of 29047c478bd9Sstevel@tonic-gate * lpl_bootstrap. There is no need in information in lpl_bootstrap after this 29057c478bd9Sstevel@tonic-gate * and it is bzeroed. 29067c478bd9Sstevel@tonic-gate */ 29077c478bd9Sstevel@tonic-gate void 29087c478bd9Sstevel@tonic-gate lpl_topo_bootstrap(lpl_t *target, int size) 29097c478bd9Sstevel@tonic-gate { 29107c478bd9Sstevel@tonic-gate lpl_t *lpl = lpl_bootstrap; 29117c478bd9Sstevel@tonic-gate lpl_t *target_lpl = target; 29127c478bd9Sstevel@tonic-gate int howmany; 29137c478bd9Sstevel@tonic-gate int id; 29147c478bd9Sstevel@tonic-gate int i; 29157c478bd9Sstevel@tonic-gate 29167c478bd9Sstevel@tonic-gate /* 29177c478bd9Sstevel@tonic-gate * The only target that should be passed here is cp_default lpl list. 29187c478bd9Sstevel@tonic-gate */ 29197c478bd9Sstevel@tonic-gate ASSERT(target == cp_default.cp_lgrploads); 29207c478bd9Sstevel@tonic-gate ASSERT(size == cp_default.cp_nlgrploads); 29217c478bd9Sstevel@tonic-gate ASSERT(!lgrp_topo_initialized); 29227c478bd9Sstevel@tonic-gate ASSERT(ncpus == 1); 29237c478bd9Sstevel@tonic-gate 29247c478bd9Sstevel@tonic-gate howmany = MIN(LPL_BOOTSTRAP_SIZE, size); 29257c478bd9Sstevel@tonic-gate for (i = 0; i < howmany; i++, lpl++, target_lpl++) { 29267c478bd9Sstevel@tonic-gate /* 29277c478bd9Sstevel@tonic-gate * Copy all fields from lpl. 29287c478bd9Sstevel@tonic-gate */ 29297c478bd9Sstevel@tonic-gate 29307c478bd9Sstevel@tonic-gate *target_lpl = *lpl; 29317c478bd9Sstevel@tonic-gate 29327c478bd9Sstevel@tonic-gate /* 29337c478bd9Sstevel@tonic-gate * Substitute CPU0 lpl pointer with one relative to target. 29347c478bd9Sstevel@tonic-gate */ 29357c478bd9Sstevel@tonic-gate if (lpl->lpl_cpus == CPU) { 29367c478bd9Sstevel@tonic-gate ASSERT(CPU->cpu_lpl == lpl); 29377c478bd9Sstevel@tonic-gate CPU->cpu_lpl = target_lpl; 29387c478bd9Sstevel@tonic-gate } 29397c478bd9Sstevel@tonic-gate 29407c478bd9Sstevel@tonic-gate /* 29417c478bd9Sstevel@tonic-gate * Substitute parent information with parent relative to target. 29427c478bd9Sstevel@tonic-gate */ 29437c478bd9Sstevel@tonic-gate if (lpl->lpl_parent != NULL) 29447c478bd9Sstevel@tonic-gate target_lpl->lpl_parent = (lpl_t *) 29457c478bd9Sstevel@tonic-gate (((uintptr_t)lpl->lpl_parent - 29467c478bd9Sstevel@tonic-gate (uintptr_t)lpl_bootstrap) + 29477c478bd9Sstevel@tonic-gate (uintptr_t)target); 29487c478bd9Sstevel@tonic-gate 29497c478bd9Sstevel@tonic-gate /* 29507c478bd9Sstevel@tonic-gate * Walk over resource set substituting pointers relative to 29517c478bd9Sstevel@tonic-gate * lpl_bootstrap to pointers relative to target. 29527c478bd9Sstevel@tonic-gate */ 29537c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_nrset <= 1); 29547c478bd9Sstevel@tonic-gate 29557c478bd9Sstevel@tonic-gate for (id = 0; id < lpl->lpl_nrset; id++) { 29567c478bd9Sstevel@tonic-gate if (lpl->lpl_rset[id] != NULL) { 29577c478bd9Sstevel@tonic-gate target_lpl->lpl_rset[id] = 29587c478bd9Sstevel@tonic-gate (lpl_t *) 29597c478bd9Sstevel@tonic-gate (((uintptr_t)lpl->lpl_rset[id] - 29607c478bd9Sstevel@tonic-gate (uintptr_t)lpl_bootstrap) + 29617c478bd9Sstevel@tonic-gate (uintptr_t)target); 29627c478bd9Sstevel@tonic-gate } 29637c478bd9Sstevel@tonic-gate } 29647c478bd9Sstevel@tonic-gate } 29657c478bd9Sstevel@tonic-gate 29667c478bd9Sstevel@tonic-gate /* 29677c478bd9Sstevel@tonic-gate * Topology information in lpl_bootstrap is no longer needed. 29687c478bd9Sstevel@tonic-gate */ 29697c478bd9Sstevel@tonic-gate bzero(lpl_bootstrap_list, sizeof (lpl_bootstrap_list)); 29707c478bd9Sstevel@tonic-gate } 29717c478bd9Sstevel@tonic-gate 29727c478bd9Sstevel@tonic-gate /* 29737c478bd9Sstevel@tonic-gate * If the lowest load among the lgroups a process' threads are currently 29747c478bd9Sstevel@tonic-gate * spread across is greater than lgrp_expand_proc_thresh, we'll consider 29757c478bd9Sstevel@tonic-gate * expanding the process to a new lgroup. 29767c478bd9Sstevel@tonic-gate */ 29777c478bd9Sstevel@tonic-gate #define LGRP_EXPAND_PROC_THRESH_DEFAULT 62250 29787c478bd9Sstevel@tonic-gate lgrp_load_t lgrp_expand_proc_thresh = LGRP_EXPAND_PROC_THRESH_DEFAULT; 29797c478bd9Sstevel@tonic-gate 29807c478bd9Sstevel@tonic-gate #define LGRP_EXPAND_PROC_THRESH(ncpu) \ 29817c478bd9Sstevel@tonic-gate ((lgrp_expand_proc_thresh) / (ncpu)) 29827c478bd9Sstevel@tonic-gate 29837c478bd9Sstevel@tonic-gate /* 29847c478bd9Sstevel@tonic-gate * A process will be expanded to a new lgroup only if the difference between 29857c478bd9Sstevel@tonic-gate * the lowest load on the lgroups the process' thread's are currently spread 29867c478bd9Sstevel@tonic-gate * across and the lowest load on the other lgroups in the process' partition 29877c478bd9Sstevel@tonic-gate * is greater than lgrp_expand_proc_diff. 29887c478bd9Sstevel@tonic-gate */ 29897c478bd9Sstevel@tonic-gate #define LGRP_EXPAND_PROC_DIFF_DEFAULT 60000 29907c478bd9Sstevel@tonic-gate lgrp_load_t lgrp_expand_proc_diff = LGRP_EXPAND_PROC_DIFF_DEFAULT; 29917c478bd9Sstevel@tonic-gate 29927c478bd9Sstevel@tonic-gate #define LGRP_EXPAND_PROC_DIFF(ncpu) \ 29937c478bd9Sstevel@tonic-gate ((lgrp_expand_proc_diff) / (ncpu)) 29947c478bd9Sstevel@tonic-gate 29957c478bd9Sstevel@tonic-gate /* 29967c478bd9Sstevel@tonic-gate * The loadavg tolerance accounts for "noise" inherent in the load, which may 29977c478bd9Sstevel@tonic-gate * be present due to impreciseness of the load average decay algorithm. 29987c478bd9Sstevel@tonic-gate * 29997c478bd9Sstevel@tonic-gate * The default tolerance is lgrp_loadavg_max_effect. Note that the tunable 30007c478bd9Sstevel@tonic-gate * tolerance is scaled by the number of cpus in the lgroup just like 30017c478bd9Sstevel@tonic-gate * lgrp_loadavg_max_effect. For example, if lgrp_loadavg_tolerance = 0x10000, 30027c478bd9Sstevel@tonic-gate * and ncpu = 4, then lgrp_choose will consider differences in lgroup loads 30037c478bd9Sstevel@tonic-gate * of: 0x10000 / 4 => 0x4000 or greater to be significant. 30047c478bd9Sstevel@tonic-gate */ 30057c478bd9Sstevel@tonic-gate uint32_t lgrp_loadavg_tolerance = LGRP_LOADAVG_THREAD_MAX; 30067c478bd9Sstevel@tonic-gate #define LGRP_LOADAVG_TOLERANCE(ncpu) \ 30077c478bd9Sstevel@tonic-gate ((lgrp_loadavg_tolerance) / ncpu) 30087c478bd9Sstevel@tonic-gate 30097c478bd9Sstevel@tonic-gate /* 30107c478bd9Sstevel@tonic-gate * lgrp_choose() will choose root lgroup as home when lowest lgroup load 30117c478bd9Sstevel@tonic-gate * average is above this threshold 30127c478bd9Sstevel@tonic-gate */ 30137c478bd9Sstevel@tonic-gate uint32_t lgrp_load_thresh = UINT32_MAX; 30147c478bd9Sstevel@tonic-gate 30157c478bd9Sstevel@tonic-gate /* 30167c478bd9Sstevel@tonic-gate * lgrp_choose() will try to skip any lgroups with less memory 30177c478bd9Sstevel@tonic-gate * than this free when choosing a home lgroup 30187c478bd9Sstevel@tonic-gate */ 30197c478bd9Sstevel@tonic-gate pgcnt_t lgrp_mem_free_thresh = 0; 30207c478bd9Sstevel@tonic-gate 30217c478bd9Sstevel@tonic-gate /* 30227c478bd9Sstevel@tonic-gate * When choosing between similarly loaded lgroups, lgrp_choose() will pick 30237c478bd9Sstevel@tonic-gate * one based on one of the following policies: 30247c478bd9Sstevel@tonic-gate * - Random selection 30257c478bd9Sstevel@tonic-gate * - Pseudo round robin placement 30267c478bd9Sstevel@tonic-gate * - Longest time since a thread was last placed 30277c478bd9Sstevel@tonic-gate */ 30287c478bd9Sstevel@tonic-gate #define LGRP_CHOOSE_RANDOM 1 30297c478bd9Sstevel@tonic-gate #define LGRP_CHOOSE_RR 2 30307c478bd9Sstevel@tonic-gate #define LGRP_CHOOSE_TIME 3 30317c478bd9Sstevel@tonic-gate 30327c478bd9Sstevel@tonic-gate int lgrp_choose_policy = LGRP_CHOOSE_TIME; 30337c478bd9Sstevel@tonic-gate 30347c478bd9Sstevel@tonic-gate /* 30357c478bd9Sstevel@tonic-gate * Choose a suitable leaf lgroup for a kthread. The kthread is assumed not to 30367c478bd9Sstevel@tonic-gate * be bound to a CPU or processor set. 30377c478bd9Sstevel@tonic-gate * 30387c478bd9Sstevel@tonic-gate * Arguments: 30397c478bd9Sstevel@tonic-gate * t The thread 30407c478bd9Sstevel@tonic-gate * cpupart The partition the thread belongs to. 30417c478bd9Sstevel@tonic-gate * 30427c478bd9Sstevel@tonic-gate * NOTE: Should at least be called with the cpu_lock held, kernel preemption 30437c478bd9Sstevel@tonic-gate * disabled, or thread_lock held (at splhigh) to protect against the CPU 30447c478bd9Sstevel@tonic-gate * partitions changing out from under us and assumes that given thread is 30457c478bd9Sstevel@tonic-gate * protected. Also, called sometimes w/ cpus paused or kernel preemption 30467c478bd9Sstevel@tonic-gate * disabled, so don't grab any locks because we should never block under 30477c478bd9Sstevel@tonic-gate * those conditions. 30487c478bd9Sstevel@tonic-gate */ 30497c478bd9Sstevel@tonic-gate lpl_t * 30507c478bd9Sstevel@tonic-gate lgrp_choose(kthread_t *t, cpupart_t *cpupart) 30517c478bd9Sstevel@tonic-gate { 30527c478bd9Sstevel@tonic-gate lgrp_load_t bestload, bestrload; 30537c478bd9Sstevel@tonic-gate int lgrpid_offset, lgrp_count; 30547c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid, lgrpid_start; 30557c478bd9Sstevel@tonic-gate lpl_t *lpl, *bestlpl, *bestrlpl; 30567c478bd9Sstevel@tonic-gate klgrpset_t lgrpset; 30577c478bd9Sstevel@tonic-gate proc_t *p; 30587c478bd9Sstevel@tonic-gate 30597c478bd9Sstevel@tonic-gate ASSERT(t != NULL); 30607c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0 || 30617c478bd9Sstevel@tonic-gate THREAD_LOCK_HELD(t)); 30627c478bd9Sstevel@tonic-gate ASSERT(cpupart != NULL); 30637c478bd9Sstevel@tonic-gate 30647c478bd9Sstevel@tonic-gate p = t->t_procp; 30657c478bd9Sstevel@tonic-gate 30667c478bd9Sstevel@tonic-gate /* A process should always be in an active partition */ 30677c478bd9Sstevel@tonic-gate ASSERT(!klgrpset_isempty(cpupart->cp_lgrpset)); 30687c478bd9Sstevel@tonic-gate 30697c478bd9Sstevel@tonic-gate bestlpl = bestrlpl = NULL; 30707c478bd9Sstevel@tonic-gate bestload = bestrload = LGRP_LOADAVG_MAX; 30717c478bd9Sstevel@tonic-gate lgrpset = cpupart->cp_lgrpset; 30727c478bd9Sstevel@tonic-gate 30737c478bd9Sstevel@tonic-gate switch (lgrp_choose_policy) { 30747c478bd9Sstevel@tonic-gate case LGRP_CHOOSE_RR: 30757c478bd9Sstevel@tonic-gate lgrpid = cpupart->cp_lgrp_hint; 30767c478bd9Sstevel@tonic-gate do { 30777c478bd9Sstevel@tonic-gate if (++lgrpid > lgrp_alloc_max) 30787c478bd9Sstevel@tonic-gate lgrpid = 0; 30797c478bd9Sstevel@tonic-gate } while (!klgrpset_ismember(lgrpset, lgrpid)); 30807c478bd9Sstevel@tonic-gate 30817c478bd9Sstevel@tonic-gate break; 30827c478bd9Sstevel@tonic-gate default: 30837c478bd9Sstevel@tonic-gate case LGRP_CHOOSE_TIME: 30847c478bd9Sstevel@tonic-gate case LGRP_CHOOSE_RANDOM: 30857c478bd9Sstevel@tonic-gate klgrpset_nlgrps(lgrpset, lgrp_count); 30867c478bd9Sstevel@tonic-gate lgrpid_offset = 30877c478bd9Sstevel@tonic-gate (((ushort_t)(gethrtime() >> 4)) % lgrp_count) + 1; 30887c478bd9Sstevel@tonic-gate for (lgrpid = 0; ; lgrpid++) { 30897c478bd9Sstevel@tonic-gate if (klgrpset_ismember(lgrpset, lgrpid)) { 30907c478bd9Sstevel@tonic-gate if (--lgrpid_offset == 0) 30917c478bd9Sstevel@tonic-gate break; 30927c478bd9Sstevel@tonic-gate } 30937c478bd9Sstevel@tonic-gate } 30947c478bd9Sstevel@tonic-gate break; 30957c478bd9Sstevel@tonic-gate } 30967c478bd9Sstevel@tonic-gate 30977c478bd9Sstevel@tonic-gate lgrpid_start = lgrpid; 30987c478bd9Sstevel@tonic-gate 30997c478bd9Sstevel@tonic-gate DTRACE_PROBE2(lgrp_choose_start, lgrp_id_t, lgrpid_start, 31007c478bd9Sstevel@tonic-gate lgrp_id_t, cpupart->cp_lgrp_hint); 31017c478bd9Sstevel@tonic-gate 31027c478bd9Sstevel@tonic-gate /* 31037c478bd9Sstevel@tonic-gate * Use lgroup affinities (if any) to choose best lgroup 31047c478bd9Sstevel@tonic-gate * 31057c478bd9Sstevel@tonic-gate * NOTE: Assumes that thread is protected from going away and its 31067c478bd9Sstevel@tonic-gate * lgroup affinities won't change (ie. p_lock, or 31077c478bd9Sstevel@tonic-gate * thread_lock() being held and/or CPUs paused) 31087c478bd9Sstevel@tonic-gate */ 31097c478bd9Sstevel@tonic-gate if (t->t_lgrp_affinity) { 3110*03400a71Sjjc lpl = lgrp_affinity_best(t, cpupart, lgrpid_start, B_FALSE); 31117c478bd9Sstevel@tonic-gate if (lpl != NULL) 31127c478bd9Sstevel@tonic-gate return (lpl); 31137c478bd9Sstevel@tonic-gate } 31147c478bd9Sstevel@tonic-gate 31157c478bd9Sstevel@tonic-gate ASSERT(klgrpset_ismember(lgrpset, lgrpid_start)); 31167c478bd9Sstevel@tonic-gate 31177c478bd9Sstevel@tonic-gate do { 31187c478bd9Sstevel@tonic-gate pgcnt_t npgs; 31197c478bd9Sstevel@tonic-gate 31207c478bd9Sstevel@tonic-gate /* 31217c478bd9Sstevel@tonic-gate * Skip any lgroups outside of thread's pset 31227c478bd9Sstevel@tonic-gate */ 31237c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(lgrpset, lgrpid)) { 31247c478bd9Sstevel@tonic-gate if (++lgrpid > lgrp_alloc_max) 31257c478bd9Sstevel@tonic-gate lgrpid = 0; /* wrap the search */ 31267c478bd9Sstevel@tonic-gate continue; 31277c478bd9Sstevel@tonic-gate } 31287c478bd9Sstevel@tonic-gate 31297c478bd9Sstevel@tonic-gate /* 31307c478bd9Sstevel@tonic-gate * Skip any non-leaf lgroups 31317c478bd9Sstevel@tonic-gate */ 31327c478bd9Sstevel@tonic-gate if (lgrp_table[lgrpid]->lgrp_childcnt != 0) 31337c478bd9Sstevel@tonic-gate continue; 31347c478bd9Sstevel@tonic-gate 31357c478bd9Sstevel@tonic-gate /* 31367c478bd9Sstevel@tonic-gate * Skip any lgroups without enough free memory 31377c478bd9Sstevel@tonic-gate * (when threshold set to nonzero positive value) 31387c478bd9Sstevel@tonic-gate */ 31397c478bd9Sstevel@tonic-gate if (lgrp_mem_free_thresh > 0) { 31407c478bd9Sstevel@tonic-gate npgs = lgrp_mem_size(lgrpid, LGRP_MEM_SIZE_FREE); 31417c478bd9Sstevel@tonic-gate if (npgs < lgrp_mem_free_thresh) { 31427c478bd9Sstevel@tonic-gate if (++lgrpid > lgrp_alloc_max) 31437c478bd9Sstevel@tonic-gate lgrpid = 0; /* wrap the search */ 31447c478bd9Sstevel@tonic-gate continue; 31457c478bd9Sstevel@tonic-gate } 31467c478bd9Sstevel@tonic-gate } 31477c478bd9Sstevel@tonic-gate 31487c478bd9Sstevel@tonic-gate lpl = &cpupart->cp_lgrploads[lgrpid]; 31497c478bd9Sstevel@tonic-gate if (klgrpset_isempty(p->p_lgrpset) || 31507c478bd9Sstevel@tonic-gate klgrpset_ismember(p->p_lgrpset, lgrpid)) { 31517c478bd9Sstevel@tonic-gate /* 31527c478bd9Sstevel@tonic-gate * Either this is a new process or the process already 31537c478bd9Sstevel@tonic-gate * has threads on this lgrp, so this is a preferred 31547c478bd9Sstevel@tonic-gate * lgroup for the thread. 31557c478bd9Sstevel@tonic-gate */ 3156ab761399Sesaxe if (bestlpl == NULL || 3157ab761399Sesaxe lpl_pick(lpl, bestlpl)) { 31587c478bd9Sstevel@tonic-gate bestload = lpl->lpl_loadavg; 31597c478bd9Sstevel@tonic-gate bestlpl = lpl; 31607c478bd9Sstevel@tonic-gate } 31617c478bd9Sstevel@tonic-gate } else { 31627c478bd9Sstevel@tonic-gate /* 31637c478bd9Sstevel@tonic-gate * The process doesn't have any threads on this lgrp, 31647c478bd9Sstevel@tonic-gate * but we're willing to consider this lgrp if the load 31657c478bd9Sstevel@tonic-gate * difference is big enough to justify splitting up 31667c478bd9Sstevel@tonic-gate * the process' threads. 31677c478bd9Sstevel@tonic-gate */ 3168ab761399Sesaxe if (bestrlpl == NULL || 3169ab761399Sesaxe lpl_pick(lpl, bestrlpl)) { 31707c478bd9Sstevel@tonic-gate bestrload = lpl->lpl_loadavg; 31717c478bd9Sstevel@tonic-gate bestrlpl = lpl; 31727c478bd9Sstevel@tonic-gate } 31737c478bd9Sstevel@tonic-gate } 31747c478bd9Sstevel@tonic-gate if (++lgrpid > lgrp_alloc_max) 31757c478bd9Sstevel@tonic-gate lgrpid = 0; /* wrap the search */ 31767c478bd9Sstevel@tonic-gate } while (lgrpid != lgrpid_start); 31777c478bd9Sstevel@tonic-gate 31787c478bd9Sstevel@tonic-gate /* 31797c478bd9Sstevel@tonic-gate * Return root lgroup if threshold isn't set to maximum value and 31807c478bd9Sstevel@tonic-gate * lowest lgroup load average more than a certain threshold 31817c478bd9Sstevel@tonic-gate */ 31827c478bd9Sstevel@tonic-gate if (lgrp_load_thresh != UINT32_MAX && 31837c478bd9Sstevel@tonic-gate bestload >= lgrp_load_thresh && bestrload >= lgrp_load_thresh) 31847c478bd9Sstevel@tonic-gate return (&cpupart->cp_lgrploads[lgrp_root->lgrp_id]); 31857c478bd9Sstevel@tonic-gate 31867c478bd9Sstevel@tonic-gate /* 31877c478bd9Sstevel@tonic-gate * If all the lgroups over which the thread's process is spread are 3188ab761399Sesaxe * heavily loaded, or otherwise undesirable, we'll consider placing 3189ab761399Sesaxe * the thread on one of the other leaf lgroups in the thread's 3190ab761399Sesaxe * partition. 31917c478bd9Sstevel@tonic-gate */ 3192ab761399Sesaxe if ((bestlpl == NULL) || 3193ab761399Sesaxe ((bestload > LGRP_EXPAND_PROC_THRESH(bestlpl->lpl_ncpu)) && 31947c478bd9Sstevel@tonic-gate (bestrload < bestload) && /* paranoid about wraparound */ 31957c478bd9Sstevel@tonic-gate (bestrload + LGRP_EXPAND_PROC_DIFF(bestrlpl->lpl_ncpu) < 3196ab761399Sesaxe bestload))) { 31977c478bd9Sstevel@tonic-gate bestlpl = bestrlpl; 31987c478bd9Sstevel@tonic-gate } 31997c478bd9Sstevel@tonic-gate 3200ab761399Sesaxe if (bestlpl == NULL) { 3201ab761399Sesaxe /* 3202ab761399Sesaxe * No lgroup looked particularly good, but we still 3203ab761399Sesaxe * have to pick something. Go with the randomly selected 3204ab761399Sesaxe * legal lgroup we started with above. 3205ab761399Sesaxe */ 3206ab761399Sesaxe bestlpl = &cpupart->cp_lgrploads[lgrpid_start]; 3207ab761399Sesaxe } 3208ab761399Sesaxe 32097c478bd9Sstevel@tonic-gate cpupart->cp_lgrp_hint = bestlpl->lpl_lgrpid; 32107c478bd9Sstevel@tonic-gate bestlpl->lpl_homed_time = gethrtime_unscaled(); 32117c478bd9Sstevel@tonic-gate 32127c478bd9Sstevel@tonic-gate ASSERT(bestlpl->lpl_ncpu > 0); 32137c478bd9Sstevel@tonic-gate return (bestlpl); 32147c478bd9Sstevel@tonic-gate } 32157c478bd9Sstevel@tonic-gate 32167c478bd9Sstevel@tonic-gate /* 3217ab761399Sesaxe * Decide if lpl1 is a better candidate than lpl2 for lgrp homing. 3218ab761399Sesaxe * Returns non-zero if lpl1 is a better candidate, and 0 otherwise. 32197c478bd9Sstevel@tonic-gate */ 32207c478bd9Sstevel@tonic-gate static int 32217c478bd9Sstevel@tonic-gate lpl_pick(lpl_t *lpl1, lpl_t *lpl2) 32227c478bd9Sstevel@tonic-gate { 32237c478bd9Sstevel@tonic-gate lgrp_load_t l1, l2; 32247c478bd9Sstevel@tonic-gate lgrp_load_t tolerance = LGRP_LOADAVG_TOLERANCE(lpl1->lpl_ncpu); 32257c478bd9Sstevel@tonic-gate 32267c478bd9Sstevel@tonic-gate l1 = lpl1->lpl_loadavg; 32277c478bd9Sstevel@tonic-gate l2 = lpl2->lpl_loadavg; 32287c478bd9Sstevel@tonic-gate 32297c478bd9Sstevel@tonic-gate if ((l1 + tolerance < l2) && (l1 < l2)) { 32307c478bd9Sstevel@tonic-gate /* lpl1 is significantly less loaded than lpl2 */ 32317c478bd9Sstevel@tonic-gate return (1); 32327c478bd9Sstevel@tonic-gate } 32337c478bd9Sstevel@tonic-gate 32347c478bd9Sstevel@tonic-gate if (lgrp_choose_policy == LGRP_CHOOSE_TIME && 32357c478bd9Sstevel@tonic-gate l1 + tolerance >= l2 && l1 < l2 && 32367c478bd9Sstevel@tonic-gate lpl1->lpl_homed_time < lpl2->lpl_homed_time) { 32377c478bd9Sstevel@tonic-gate /* 32387c478bd9Sstevel@tonic-gate * lpl1's load is within the tolerance of lpl2. We're 32397c478bd9Sstevel@tonic-gate * willing to consider it be to better however if 32407c478bd9Sstevel@tonic-gate * it has been longer since we last homed a thread there 32417c478bd9Sstevel@tonic-gate */ 32427c478bd9Sstevel@tonic-gate return (1); 32437c478bd9Sstevel@tonic-gate } 32447c478bd9Sstevel@tonic-gate 32457c478bd9Sstevel@tonic-gate return (0); 32467c478bd9Sstevel@tonic-gate } 32477c478bd9Sstevel@tonic-gate 32487c478bd9Sstevel@tonic-gate /* 32497c478bd9Sstevel@tonic-gate * An LWP is expected to be assigned to an lgroup for at least this long 32507c478bd9Sstevel@tonic-gate * for its anticipatory load to be justified. NOTE that this value should 32517c478bd9Sstevel@tonic-gate * not be set extremely huge (say, larger than 100 years), to avoid problems 32527c478bd9Sstevel@tonic-gate * with overflow in the calculation that uses it. 32537c478bd9Sstevel@tonic-gate */ 32547c478bd9Sstevel@tonic-gate #define LGRP_MIN_NSEC (NANOSEC / 10) /* 1/10 of a second */ 32557c478bd9Sstevel@tonic-gate hrtime_t lgrp_min_nsec = LGRP_MIN_NSEC; 32567c478bd9Sstevel@tonic-gate 32577c478bd9Sstevel@tonic-gate /* 32587c478bd9Sstevel@tonic-gate * Routine to change a thread's lgroup affiliation. This routine updates 32597c478bd9Sstevel@tonic-gate * the thread's kthread_t struct and its process' proc_t struct to note the 32607c478bd9Sstevel@tonic-gate * thread's new lgroup affiliation, and its lgroup affinities. 32617c478bd9Sstevel@tonic-gate * 32627c478bd9Sstevel@tonic-gate * Note that this is the only routine that modifies a thread's t_lpl field, 32637c478bd9Sstevel@tonic-gate * and that adds in or removes anticipatory load. 32647c478bd9Sstevel@tonic-gate * 32657c478bd9Sstevel@tonic-gate * If the thread is exiting, newlpl is NULL. 32667c478bd9Sstevel@tonic-gate * 32677c478bd9Sstevel@tonic-gate * Locking: 32687c478bd9Sstevel@tonic-gate * The following lock must be held on entry: 32697c478bd9Sstevel@tonic-gate * cpu_lock, kpreempt_disable(), or thread_lock -- to assure t's new lgrp 32707c478bd9Sstevel@tonic-gate * doesn't get removed from t's partition 32717c478bd9Sstevel@tonic-gate * 32727c478bd9Sstevel@tonic-gate * This routine is not allowed to grab any locks, since it may be called 32737c478bd9Sstevel@tonic-gate * with cpus paused (such as from cpu_offline). 32747c478bd9Sstevel@tonic-gate */ 32757c478bd9Sstevel@tonic-gate void 32767c478bd9Sstevel@tonic-gate lgrp_move_thread(kthread_t *t, lpl_t *newlpl, int do_lgrpset_delete) 32777c478bd9Sstevel@tonic-gate { 32787c478bd9Sstevel@tonic-gate proc_t *p; 32797c478bd9Sstevel@tonic-gate lpl_t *lpl, *oldlpl; 32807c478bd9Sstevel@tonic-gate lgrp_id_t oldid; 32817c478bd9Sstevel@tonic-gate kthread_t *tp; 32827c478bd9Sstevel@tonic-gate uint_t ncpu; 32837c478bd9Sstevel@tonic-gate lgrp_load_t old, new; 32847c478bd9Sstevel@tonic-gate 32857c478bd9Sstevel@tonic-gate ASSERT(t); 32867c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0 || 32877c478bd9Sstevel@tonic-gate THREAD_LOCK_HELD(t)); 32887c478bd9Sstevel@tonic-gate 32897c478bd9Sstevel@tonic-gate /* 32907c478bd9Sstevel@tonic-gate * If not changing lpls, just return 32917c478bd9Sstevel@tonic-gate */ 32927c478bd9Sstevel@tonic-gate if ((oldlpl = t->t_lpl) == newlpl) 32937c478bd9Sstevel@tonic-gate return; 32947c478bd9Sstevel@tonic-gate 32957c478bd9Sstevel@tonic-gate /* 32967c478bd9Sstevel@tonic-gate * Make sure the thread's lwp hasn't exited (if so, this thread is now 32977c478bd9Sstevel@tonic-gate * associated with process 0 rather than with its original process). 32987c478bd9Sstevel@tonic-gate */ 32997c478bd9Sstevel@tonic-gate if (t->t_proc_flag & TP_LWPEXIT) { 33007c478bd9Sstevel@tonic-gate if (newlpl != NULL) { 33017c478bd9Sstevel@tonic-gate t->t_lpl = newlpl; 33027c478bd9Sstevel@tonic-gate } 33037c478bd9Sstevel@tonic-gate return; 33047c478bd9Sstevel@tonic-gate } 33057c478bd9Sstevel@tonic-gate 33067c478bd9Sstevel@tonic-gate p = ttoproc(t); 33077c478bd9Sstevel@tonic-gate 33087c478bd9Sstevel@tonic-gate /* 33097c478bd9Sstevel@tonic-gate * If the thread had a previous lgroup, update its process' p_lgrpset 33107c478bd9Sstevel@tonic-gate * to account for it being moved from its old lgroup. 33117c478bd9Sstevel@tonic-gate */ 33127c478bd9Sstevel@tonic-gate if ((oldlpl != NULL) && /* thread had a previous lgroup */ 33137c478bd9Sstevel@tonic-gate (p->p_tlist != NULL)) { 33147c478bd9Sstevel@tonic-gate oldid = oldlpl->lpl_lgrpid; 33157c478bd9Sstevel@tonic-gate 33167c478bd9Sstevel@tonic-gate if (newlpl != NULL) 33177c478bd9Sstevel@tonic-gate lgrp_stat_add(oldid, LGRP_NUM_MIGR, 1); 33187c478bd9Sstevel@tonic-gate 33197c478bd9Sstevel@tonic-gate if ((do_lgrpset_delete) && 33207c478bd9Sstevel@tonic-gate (klgrpset_ismember(p->p_lgrpset, oldid))) { 33217c478bd9Sstevel@tonic-gate for (tp = p->p_tlist->t_forw; ; tp = tp->t_forw) { 33227c478bd9Sstevel@tonic-gate /* 33237c478bd9Sstevel@tonic-gate * Check if a thread other than the thread 33247c478bd9Sstevel@tonic-gate * that's moving is assigned to the same 33257c478bd9Sstevel@tonic-gate * lgroup as the thread that's moving. Note 33267c478bd9Sstevel@tonic-gate * that we have to compare lgroup IDs, rather 33277c478bd9Sstevel@tonic-gate * than simply comparing t_lpl's, since the 33287c478bd9Sstevel@tonic-gate * threads may belong to different partitions 33297c478bd9Sstevel@tonic-gate * but be assigned to the same lgroup. 33307c478bd9Sstevel@tonic-gate */ 33317c478bd9Sstevel@tonic-gate ASSERT(tp->t_lpl != NULL); 33327c478bd9Sstevel@tonic-gate 33337c478bd9Sstevel@tonic-gate if ((tp != t) && 33347c478bd9Sstevel@tonic-gate (tp->t_lpl->lpl_lgrpid == oldid)) { 33357c478bd9Sstevel@tonic-gate /* 33367c478bd9Sstevel@tonic-gate * Another thread is assigned to the 33377c478bd9Sstevel@tonic-gate * same lgroup as the thread that's 33387c478bd9Sstevel@tonic-gate * moving, p_lgrpset doesn't change. 33397c478bd9Sstevel@tonic-gate */ 33407c478bd9Sstevel@tonic-gate break; 33417c478bd9Sstevel@tonic-gate } else if (tp == p->p_tlist) { 33427c478bd9Sstevel@tonic-gate /* 33437c478bd9Sstevel@tonic-gate * No other thread is assigned to the 33447c478bd9Sstevel@tonic-gate * same lgroup as the exiting thread, 33457c478bd9Sstevel@tonic-gate * clear the lgroup's bit in p_lgrpset. 33467c478bd9Sstevel@tonic-gate */ 33477c478bd9Sstevel@tonic-gate klgrpset_del(p->p_lgrpset, oldid); 33487c478bd9Sstevel@tonic-gate break; 33497c478bd9Sstevel@tonic-gate } 33507c478bd9Sstevel@tonic-gate } 33517c478bd9Sstevel@tonic-gate } 33527c478bd9Sstevel@tonic-gate 33537c478bd9Sstevel@tonic-gate /* 33547c478bd9Sstevel@tonic-gate * If this thread was assigned to its old lgroup for such a 33557c478bd9Sstevel@tonic-gate * short amount of time that the anticipatory load that was 33567c478bd9Sstevel@tonic-gate * added on its behalf has aged very little, remove that 33577c478bd9Sstevel@tonic-gate * anticipatory load. 33587c478bd9Sstevel@tonic-gate */ 33597c478bd9Sstevel@tonic-gate if ((t->t_anttime + lgrp_min_nsec > gethrtime()) && 33607c478bd9Sstevel@tonic-gate ((ncpu = oldlpl->lpl_ncpu) > 0)) { 33617c478bd9Sstevel@tonic-gate lpl = oldlpl; 33627c478bd9Sstevel@tonic-gate for (;;) { 33637c478bd9Sstevel@tonic-gate do { 33647c478bd9Sstevel@tonic-gate old = new = lpl->lpl_loadavg; 33657c478bd9Sstevel@tonic-gate new -= LGRP_LOADAVG_MAX_EFFECT(ncpu); 33667c478bd9Sstevel@tonic-gate if (new > old) { 33677c478bd9Sstevel@tonic-gate /* 33687c478bd9Sstevel@tonic-gate * this can happen if the load 33697c478bd9Sstevel@tonic-gate * average was aged since we 33707c478bd9Sstevel@tonic-gate * added in the anticipatory 33717c478bd9Sstevel@tonic-gate * load 33727c478bd9Sstevel@tonic-gate */ 33737c478bd9Sstevel@tonic-gate new = 0; 33747c478bd9Sstevel@tonic-gate } 33757c478bd9Sstevel@tonic-gate } while (cas32( 33767c478bd9Sstevel@tonic-gate (lgrp_load_t *)&lpl->lpl_loadavg, old, 33777c478bd9Sstevel@tonic-gate new) != old); 33787c478bd9Sstevel@tonic-gate 33797c478bd9Sstevel@tonic-gate lpl = lpl->lpl_parent; 33807c478bd9Sstevel@tonic-gate if (lpl == NULL) 33817c478bd9Sstevel@tonic-gate break; 33827c478bd9Sstevel@tonic-gate 33837c478bd9Sstevel@tonic-gate ncpu = lpl->lpl_ncpu; 33847c478bd9Sstevel@tonic-gate ASSERT(ncpu > 0); 33857c478bd9Sstevel@tonic-gate } 33867c478bd9Sstevel@tonic-gate } 33877c478bd9Sstevel@tonic-gate } 33887c478bd9Sstevel@tonic-gate /* 33897c478bd9Sstevel@tonic-gate * If the thread has a new lgroup (i.e. it's not exiting), update its 33907c478bd9Sstevel@tonic-gate * t_lpl and its process' p_lgrpset, and apply an anticipatory load 33917c478bd9Sstevel@tonic-gate * to its new lgroup to account for its move to its new lgroup. 33927c478bd9Sstevel@tonic-gate */ 33937c478bd9Sstevel@tonic-gate if (newlpl != NULL) { 33947c478bd9Sstevel@tonic-gate /* 33957c478bd9Sstevel@tonic-gate * This thread is moving to a new lgroup 33967c478bd9Sstevel@tonic-gate */ 33977c478bd9Sstevel@tonic-gate t->t_lpl = newlpl; 33987c478bd9Sstevel@tonic-gate 33997c478bd9Sstevel@tonic-gate /* 34007c478bd9Sstevel@tonic-gate * Reflect move in load average of new lgroup 34017c478bd9Sstevel@tonic-gate * unless it is root lgroup 34027c478bd9Sstevel@tonic-gate */ 34037c478bd9Sstevel@tonic-gate if (lgrp_table[newlpl->lpl_lgrpid] == lgrp_root) 34047c478bd9Sstevel@tonic-gate return; 34057c478bd9Sstevel@tonic-gate 34067c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(p->p_lgrpset, newlpl->lpl_lgrpid)) { 34077c478bd9Sstevel@tonic-gate klgrpset_add(p->p_lgrpset, newlpl->lpl_lgrpid); 34087c478bd9Sstevel@tonic-gate } 34097c478bd9Sstevel@tonic-gate 34107c478bd9Sstevel@tonic-gate /* 34117c478bd9Sstevel@tonic-gate * It'll take some time for the load on the new lgroup 34127c478bd9Sstevel@tonic-gate * to reflect this thread's placement on it. We'd 34137c478bd9Sstevel@tonic-gate * like not, however, to have all threads between now 34147c478bd9Sstevel@tonic-gate * and then also piling on to this lgroup. To avoid 34157c478bd9Sstevel@tonic-gate * this pileup, we anticipate the load this thread 34167c478bd9Sstevel@tonic-gate * will generate on its new lgroup. The goal is to 34177c478bd9Sstevel@tonic-gate * make the lgroup's load appear as though the thread 34187c478bd9Sstevel@tonic-gate * had been there all along. We're very conservative 34197c478bd9Sstevel@tonic-gate * in calculating this anticipatory load, we assume 34207c478bd9Sstevel@tonic-gate * the worst case case (100% CPU-bound thread). This 34217c478bd9Sstevel@tonic-gate * may be modified in the future to be more accurate. 34227c478bd9Sstevel@tonic-gate */ 34237c478bd9Sstevel@tonic-gate lpl = newlpl; 34247c478bd9Sstevel@tonic-gate for (;;) { 34257c478bd9Sstevel@tonic-gate ncpu = lpl->lpl_ncpu; 34267c478bd9Sstevel@tonic-gate ASSERT(ncpu > 0); 34277c478bd9Sstevel@tonic-gate do { 34287c478bd9Sstevel@tonic-gate old = new = lpl->lpl_loadavg; 34297c478bd9Sstevel@tonic-gate new += LGRP_LOADAVG_MAX_EFFECT(ncpu); 34307c478bd9Sstevel@tonic-gate /* 34317c478bd9Sstevel@tonic-gate * Check for overflow 34327c478bd9Sstevel@tonic-gate * Underflow not possible here 34337c478bd9Sstevel@tonic-gate */ 34347c478bd9Sstevel@tonic-gate if (new < old) 34357c478bd9Sstevel@tonic-gate new = UINT32_MAX; 34367c478bd9Sstevel@tonic-gate } while (cas32((lgrp_load_t *)&lpl->lpl_loadavg, old, 34377c478bd9Sstevel@tonic-gate new) != old); 34387c478bd9Sstevel@tonic-gate 34397c478bd9Sstevel@tonic-gate lpl = lpl->lpl_parent; 34407c478bd9Sstevel@tonic-gate if (lpl == NULL) 34417c478bd9Sstevel@tonic-gate break; 34427c478bd9Sstevel@tonic-gate } 34437c478bd9Sstevel@tonic-gate t->t_anttime = gethrtime(); 34447c478bd9Sstevel@tonic-gate } 34457c478bd9Sstevel@tonic-gate } 34467c478bd9Sstevel@tonic-gate 34477c478bd9Sstevel@tonic-gate /* 34487c478bd9Sstevel@tonic-gate * Return lgroup memory allocation policy given advice from madvise(3C) 34497c478bd9Sstevel@tonic-gate */ 34507c478bd9Sstevel@tonic-gate lgrp_mem_policy_t 34517c478bd9Sstevel@tonic-gate lgrp_madv_to_policy(uchar_t advice, size_t size, int type) 34527c478bd9Sstevel@tonic-gate { 34537c478bd9Sstevel@tonic-gate switch (advice) { 34547c478bd9Sstevel@tonic-gate case MADV_ACCESS_LWP: 34557c478bd9Sstevel@tonic-gate return (LGRP_MEM_POLICY_NEXT); 34567c478bd9Sstevel@tonic-gate case MADV_ACCESS_MANY: 34577c478bd9Sstevel@tonic-gate return (LGRP_MEM_POLICY_RANDOM); 34587c478bd9Sstevel@tonic-gate default: 34597c478bd9Sstevel@tonic-gate return (lgrp_mem_policy_default(size, type)); 34607c478bd9Sstevel@tonic-gate } 34617c478bd9Sstevel@tonic-gate } 34627c478bd9Sstevel@tonic-gate 34637c478bd9Sstevel@tonic-gate /* 34647c478bd9Sstevel@tonic-gate * Figure out default policy 34657c478bd9Sstevel@tonic-gate */ 34667c478bd9Sstevel@tonic-gate lgrp_mem_policy_t 34677c478bd9Sstevel@tonic-gate lgrp_mem_policy_default(size_t size, int type) 34687c478bd9Sstevel@tonic-gate { 34697c478bd9Sstevel@tonic-gate cpupart_t *cp; 34707c478bd9Sstevel@tonic-gate lgrp_mem_policy_t policy; 34717c478bd9Sstevel@tonic-gate size_t pset_mem_size; 34727c478bd9Sstevel@tonic-gate 34737c478bd9Sstevel@tonic-gate /* 34747c478bd9Sstevel@tonic-gate * Randomly allocate memory across lgroups for shared memory 34757c478bd9Sstevel@tonic-gate * beyond a certain threshold 34767c478bd9Sstevel@tonic-gate */ 34777c478bd9Sstevel@tonic-gate if ((type != MAP_SHARED && size > lgrp_privm_random_thresh) || 34787c478bd9Sstevel@tonic-gate (type == MAP_SHARED && size > lgrp_shm_random_thresh)) { 34797c478bd9Sstevel@tonic-gate /* 34807c478bd9Sstevel@tonic-gate * Get total memory size of current thread's pset 34817c478bd9Sstevel@tonic-gate */ 34827c478bd9Sstevel@tonic-gate kpreempt_disable(); 34837c478bd9Sstevel@tonic-gate cp = curthread->t_cpupart; 34847c478bd9Sstevel@tonic-gate klgrpset_totalsize(cp->cp_lgrpset, pset_mem_size); 34857c478bd9Sstevel@tonic-gate kpreempt_enable(); 34867c478bd9Sstevel@tonic-gate 34877c478bd9Sstevel@tonic-gate /* 34887c478bd9Sstevel@tonic-gate * Choose policy to randomly allocate memory across 34897c478bd9Sstevel@tonic-gate * lgroups in pset if it will fit and is not default 34907c478bd9Sstevel@tonic-gate * partition. Otherwise, allocate memory randomly 34917c478bd9Sstevel@tonic-gate * across machine. 34927c478bd9Sstevel@tonic-gate */ 34937c478bd9Sstevel@tonic-gate if (lgrp_mem_pset_aware && size < pset_mem_size) 34947c478bd9Sstevel@tonic-gate policy = LGRP_MEM_POLICY_RANDOM_PSET; 34957c478bd9Sstevel@tonic-gate else 34967c478bd9Sstevel@tonic-gate policy = LGRP_MEM_POLICY_RANDOM; 34977c478bd9Sstevel@tonic-gate } else 34987c478bd9Sstevel@tonic-gate /* 34997c478bd9Sstevel@tonic-gate * Apply default policy for private memory and 35007c478bd9Sstevel@tonic-gate * shared memory under the respective random 35017c478bd9Sstevel@tonic-gate * threshold. 35027c478bd9Sstevel@tonic-gate */ 35037c478bd9Sstevel@tonic-gate policy = lgrp_mem_default_policy; 35047c478bd9Sstevel@tonic-gate 35057c478bd9Sstevel@tonic-gate return (policy); 35067c478bd9Sstevel@tonic-gate } 35077c478bd9Sstevel@tonic-gate 35087c478bd9Sstevel@tonic-gate /* 35097c478bd9Sstevel@tonic-gate * Get memory allocation policy for this segment 35107c478bd9Sstevel@tonic-gate */ 35117c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t * 35127c478bd9Sstevel@tonic-gate lgrp_mem_policy_get(struct seg *seg, caddr_t vaddr) 35137c478bd9Sstevel@tonic-gate { 35147c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t *policy_info; 35157c478bd9Sstevel@tonic-gate extern struct seg_ops segspt_ops; 35167c478bd9Sstevel@tonic-gate extern struct seg_ops segspt_shmops; 35177c478bd9Sstevel@tonic-gate 35187c478bd9Sstevel@tonic-gate /* 35197c478bd9Sstevel@tonic-gate * This is for binary compatibility to protect against third party 35207c478bd9Sstevel@tonic-gate * segment drivers which haven't recompiled to allow for 35217c478bd9Sstevel@tonic-gate * SEGOP_GETPOLICY() 35227c478bd9Sstevel@tonic-gate */ 35237c478bd9Sstevel@tonic-gate if (seg->s_ops != &segvn_ops && seg->s_ops != &segspt_ops && 35247c478bd9Sstevel@tonic-gate seg->s_ops != &segspt_shmops) 35257c478bd9Sstevel@tonic-gate return (NULL); 35267c478bd9Sstevel@tonic-gate 35277c478bd9Sstevel@tonic-gate policy_info = NULL; 35287c478bd9Sstevel@tonic-gate if (seg->s_ops->getpolicy != NULL) 35297c478bd9Sstevel@tonic-gate policy_info = SEGOP_GETPOLICY(seg, vaddr); 35307c478bd9Sstevel@tonic-gate 35317c478bd9Sstevel@tonic-gate return (policy_info); 35327c478bd9Sstevel@tonic-gate } 35337c478bd9Sstevel@tonic-gate 35347c478bd9Sstevel@tonic-gate /* 35357c478bd9Sstevel@tonic-gate * Set policy for allocating private memory given desired policy, policy info, 35367c478bd9Sstevel@tonic-gate * size in bytes of memory that policy is being applied. 35377c478bd9Sstevel@tonic-gate * Return 0 if policy wasn't set already and 1 if policy was set already 35387c478bd9Sstevel@tonic-gate */ 35397c478bd9Sstevel@tonic-gate int 35407c478bd9Sstevel@tonic-gate lgrp_privm_policy_set(lgrp_mem_policy_t policy, 35417c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t *policy_info, size_t size) 35427c478bd9Sstevel@tonic-gate { 35437c478bd9Sstevel@tonic-gate 35447c478bd9Sstevel@tonic-gate ASSERT(policy_info != NULL); 35457c478bd9Sstevel@tonic-gate 35467c478bd9Sstevel@tonic-gate if (policy == LGRP_MEM_POLICY_DEFAULT) 35477c478bd9Sstevel@tonic-gate policy = lgrp_mem_policy_default(size, MAP_PRIVATE); 35487c478bd9Sstevel@tonic-gate 35497c478bd9Sstevel@tonic-gate /* 35507c478bd9Sstevel@tonic-gate * Policy set already? 35517c478bd9Sstevel@tonic-gate */ 35527c478bd9Sstevel@tonic-gate if (policy == policy_info->mem_policy) 35537c478bd9Sstevel@tonic-gate return (1); 35547c478bd9Sstevel@tonic-gate 35557c478bd9Sstevel@tonic-gate /* 35567c478bd9Sstevel@tonic-gate * Set policy 35577c478bd9Sstevel@tonic-gate */ 35587c478bd9Sstevel@tonic-gate policy_info->mem_policy = policy; 35597c478bd9Sstevel@tonic-gate policy_info->mem_reserved = 0; 35607c478bd9Sstevel@tonic-gate 35617c478bd9Sstevel@tonic-gate return (0); 35627c478bd9Sstevel@tonic-gate } 35637c478bd9Sstevel@tonic-gate 35647c478bd9Sstevel@tonic-gate 35657c478bd9Sstevel@tonic-gate /* 35667c478bd9Sstevel@tonic-gate * Get shared memory allocation policy with given tree and offset 35677c478bd9Sstevel@tonic-gate */ 35687c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t * 35697c478bd9Sstevel@tonic-gate lgrp_shm_policy_get(struct anon_map *amp, ulong_t anon_index, vnode_t *vp, 35707c478bd9Sstevel@tonic-gate u_offset_t vn_off) 35717c478bd9Sstevel@tonic-gate { 35727c478bd9Sstevel@tonic-gate u_offset_t off; 35737c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t *policy_info; 35747c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *policy_seg; 35757c478bd9Sstevel@tonic-gate lgrp_shm_locality_t *shm_locality; 35767c478bd9Sstevel@tonic-gate avl_tree_t *tree; 35777c478bd9Sstevel@tonic-gate avl_index_t where; 35787c478bd9Sstevel@tonic-gate 35797c478bd9Sstevel@tonic-gate /* 35807c478bd9Sstevel@tonic-gate * Get policy segment tree from anon_map or vnode and use specified 35817c478bd9Sstevel@tonic-gate * anon index or vnode offset as offset 35827c478bd9Sstevel@tonic-gate * 35837c478bd9Sstevel@tonic-gate * Assume that no lock needs to be held on anon_map or vnode, since 35847c478bd9Sstevel@tonic-gate * they should be protected by their reference count which must be 35857c478bd9Sstevel@tonic-gate * nonzero for an existing segment 35867c478bd9Sstevel@tonic-gate */ 35877c478bd9Sstevel@tonic-gate if (amp) { 35887c478bd9Sstevel@tonic-gate ASSERT(amp->refcnt != 0); 35897c478bd9Sstevel@tonic-gate shm_locality = amp->locality; 35907c478bd9Sstevel@tonic-gate if (shm_locality == NULL) 35917c478bd9Sstevel@tonic-gate return (NULL); 35927c478bd9Sstevel@tonic-gate tree = shm_locality->loc_tree; 35937c478bd9Sstevel@tonic-gate off = ptob(anon_index); 35947c478bd9Sstevel@tonic-gate } else if (vp) { 35957c478bd9Sstevel@tonic-gate shm_locality = vp->v_locality; 35967c478bd9Sstevel@tonic-gate if (shm_locality == NULL) 35977c478bd9Sstevel@tonic-gate return (NULL); 35987c478bd9Sstevel@tonic-gate ASSERT(shm_locality->loc_count != 0); 35997c478bd9Sstevel@tonic-gate tree = shm_locality->loc_tree; 36007c478bd9Sstevel@tonic-gate off = vn_off; 36017c478bd9Sstevel@tonic-gate } 36027c478bd9Sstevel@tonic-gate 36037c478bd9Sstevel@tonic-gate if (tree == NULL) 36047c478bd9Sstevel@tonic-gate return (NULL); 36057c478bd9Sstevel@tonic-gate 36067c478bd9Sstevel@tonic-gate /* 36077c478bd9Sstevel@tonic-gate * Lookup policy segment for offset into shared object and return 36087c478bd9Sstevel@tonic-gate * policy info 36097c478bd9Sstevel@tonic-gate */ 36107c478bd9Sstevel@tonic-gate rw_enter(&shm_locality->loc_lock, RW_READER); 36117c478bd9Sstevel@tonic-gate policy_info = NULL; 36127c478bd9Sstevel@tonic-gate policy_seg = avl_find(tree, &off, &where); 36137c478bd9Sstevel@tonic-gate if (policy_seg) 36147c478bd9Sstevel@tonic-gate policy_info = &policy_seg->shm_policy; 36157c478bd9Sstevel@tonic-gate rw_exit(&shm_locality->loc_lock); 36167c478bd9Sstevel@tonic-gate 36177c478bd9Sstevel@tonic-gate return (policy_info); 36187c478bd9Sstevel@tonic-gate } 36197c478bd9Sstevel@tonic-gate 36207c478bd9Sstevel@tonic-gate /* 3621611ffe8aSesaxe * Default memory allocation policy for kernel segmap pages 3622611ffe8aSesaxe */ 3623611ffe8aSesaxe lgrp_mem_policy_t lgrp_segmap_default_policy = LGRP_MEM_POLICY_RANDOM; 3624611ffe8aSesaxe 3625611ffe8aSesaxe /* 36267c478bd9Sstevel@tonic-gate * Return lgroup to use for allocating memory 36277c478bd9Sstevel@tonic-gate * given the segment and address 36287c478bd9Sstevel@tonic-gate * 36297c478bd9Sstevel@tonic-gate * There isn't any mutual exclusion that exists between calls 36307c478bd9Sstevel@tonic-gate * to this routine and DR, so this routine and whomever calls it 36317c478bd9Sstevel@tonic-gate * should be mindful of the possibility that the lgrp returned 36327c478bd9Sstevel@tonic-gate * may be deleted. If this happens, dereferences of the lgrp 36337c478bd9Sstevel@tonic-gate * pointer will still be safe, but the resources in the lgrp will 36347c478bd9Sstevel@tonic-gate * be gone, and LGRP_EXISTS() will no longer be true. 36357c478bd9Sstevel@tonic-gate */ 36367c478bd9Sstevel@tonic-gate lgrp_t * 36377c478bd9Sstevel@tonic-gate lgrp_mem_choose(struct seg *seg, caddr_t vaddr, size_t pgsz) 36387c478bd9Sstevel@tonic-gate { 36397c478bd9Sstevel@tonic-gate int i; 36407c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 36417c478bd9Sstevel@tonic-gate klgrpset_t lgrpset; 36427c478bd9Sstevel@tonic-gate int lgrps_spanned; 36437c478bd9Sstevel@tonic-gate unsigned long off; 36447c478bd9Sstevel@tonic-gate lgrp_mem_policy_t policy; 36457c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t *policy_info; 36467c478bd9Sstevel@tonic-gate ushort_t random; 36477c478bd9Sstevel@tonic-gate int stat = 0; 3648611ffe8aSesaxe extern struct seg *segkmap; 36497c478bd9Sstevel@tonic-gate 36507c478bd9Sstevel@tonic-gate /* 36517c478bd9Sstevel@tonic-gate * Just return null if the lgrp framework hasn't finished 36527c478bd9Sstevel@tonic-gate * initializing or if this is a UMA machine. 36537c478bd9Sstevel@tonic-gate */ 36547c478bd9Sstevel@tonic-gate if (nlgrps == 1 || !lgrp_initialized) 36557c478bd9Sstevel@tonic-gate return (lgrp_root); 36567c478bd9Sstevel@tonic-gate 36577c478bd9Sstevel@tonic-gate /* 36587c478bd9Sstevel@tonic-gate * Get memory allocation policy for this segment 36597c478bd9Sstevel@tonic-gate */ 36607c478bd9Sstevel@tonic-gate policy = lgrp_mem_default_policy; 36617c478bd9Sstevel@tonic-gate if (seg != NULL) { 36627c478bd9Sstevel@tonic-gate if (seg->s_as == &kas) { 3663611ffe8aSesaxe if (seg == segkmap) 3664611ffe8aSesaxe policy = lgrp_segmap_default_policy; 36657c478bd9Sstevel@tonic-gate if (policy == LGRP_MEM_POLICY_RANDOM_PROC || 36667c478bd9Sstevel@tonic-gate policy == LGRP_MEM_POLICY_RANDOM_PSET) 36677c478bd9Sstevel@tonic-gate policy = LGRP_MEM_POLICY_RANDOM; 36687c478bd9Sstevel@tonic-gate } else { 36697c478bd9Sstevel@tonic-gate policy_info = lgrp_mem_policy_get(seg, vaddr); 36707c478bd9Sstevel@tonic-gate if (policy_info != NULL) 36717c478bd9Sstevel@tonic-gate policy = policy_info->mem_policy; 36727c478bd9Sstevel@tonic-gate } 36737c478bd9Sstevel@tonic-gate } 36747c478bd9Sstevel@tonic-gate lgrpset = 0; 36757c478bd9Sstevel@tonic-gate 36767c478bd9Sstevel@tonic-gate /* 36777c478bd9Sstevel@tonic-gate * Initialize lgroup to home by default 36787c478bd9Sstevel@tonic-gate */ 36797c478bd9Sstevel@tonic-gate lgrp = lgrp_home_lgrp(); 36807c478bd9Sstevel@tonic-gate 36817c478bd9Sstevel@tonic-gate /* 36827c478bd9Sstevel@tonic-gate * When homing threads on root lgrp, override default memory 36837c478bd9Sstevel@tonic-gate * allocation policies with root lgroup memory allocation policy 36847c478bd9Sstevel@tonic-gate */ 36857c478bd9Sstevel@tonic-gate if (lgrp == lgrp_root) 36867c478bd9Sstevel@tonic-gate policy = lgrp_mem_policy_root; 36877c478bd9Sstevel@tonic-gate 36887c478bd9Sstevel@tonic-gate /* 36897c478bd9Sstevel@tonic-gate * Implement policy 36907c478bd9Sstevel@tonic-gate */ 36917c478bd9Sstevel@tonic-gate switch (policy) { 36927c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_NEXT_CPU: 36937c478bd9Sstevel@tonic-gate 36947c478bd9Sstevel@tonic-gate /* 36957c478bd9Sstevel@tonic-gate * Return lgroup of current CPU which faulted on memory 3696394b433dSesaxe * If the CPU isn't currently in an lgrp, then opt to 3697394b433dSesaxe * allocate from the root. 3698394b433dSesaxe * 3699394b433dSesaxe * Kernel preemption needs to be disabled here to prevent 3700394b433dSesaxe * the current CPU from going away before lgrp is found. 37017c478bd9Sstevel@tonic-gate */ 3702394b433dSesaxe if (LGRP_CPU_HAS_NO_LGRP(CPU)) { 3703394b433dSesaxe lgrp = lgrp_root; 3704394b433dSesaxe } else { 3705394b433dSesaxe kpreempt_disable(); 37067c478bd9Sstevel@tonic-gate lgrp = lgrp_cpu_to_lgrp(CPU); 3707394b433dSesaxe kpreempt_enable(); 3708394b433dSesaxe } 37097c478bd9Sstevel@tonic-gate break; 37107c478bd9Sstevel@tonic-gate 37117c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_NEXT: 37127c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_DEFAULT: 37137c478bd9Sstevel@tonic-gate default: 37147c478bd9Sstevel@tonic-gate 37157c478bd9Sstevel@tonic-gate /* 37167c478bd9Sstevel@tonic-gate * Just return current thread's home lgroup 37177c478bd9Sstevel@tonic-gate * for default policy (next touch) 37187c478bd9Sstevel@tonic-gate * If the thread is homed to the root, 37197c478bd9Sstevel@tonic-gate * then the default policy is random across lgroups. 37207c478bd9Sstevel@tonic-gate * Fallthrough to the random case. 37217c478bd9Sstevel@tonic-gate */ 37227c478bd9Sstevel@tonic-gate if (lgrp != lgrp_root) { 37237c478bd9Sstevel@tonic-gate if (policy == LGRP_MEM_POLICY_NEXT) 37247c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_NEXT, 1); 37257c478bd9Sstevel@tonic-gate else 37267c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, 37277c478bd9Sstevel@tonic-gate LGRP_NUM_DEFAULT, 1); 37287c478bd9Sstevel@tonic-gate break; 37297c478bd9Sstevel@tonic-gate } 37307c478bd9Sstevel@tonic-gate /* LINTED fallthrough on case statement */ 37317c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_RANDOM: 37327c478bd9Sstevel@tonic-gate 37337c478bd9Sstevel@tonic-gate /* 37347c478bd9Sstevel@tonic-gate * Return a random leaf lgroup with memory 37357c478bd9Sstevel@tonic-gate */ 37367c478bd9Sstevel@tonic-gate lgrpset = lgrp_root->lgrp_set[LGRP_RSRC_MEM]; 37377c478bd9Sstevel@tonic-gate /* 37387c478bd9Sstevel@tonic-gate * Count how many lgroups are spanned 37397c478bd9Sstevel@tonic-gate */ 37407c478bd9Sstevel@tonic-gate klgrpset_nlgrps(lgrpset, lgrps_spanned); 37417c478bd9Sstevel@tonic-gate 37427c478bd9Sstevel@tonic-gate /* 37437c478bd9Sstevel@tonic-gate * There may be no memnodes in the root lgroup during DR copy 37447c478bd9Sstevel@tonic-gate * rename on a system with only two boards (memnodes) 37457c478bd9Sstevel@tonic-gate * configured. In this case just return the root lgrp. 37467c478bd9Sstevel@tonic-gate */ 37477c478bd9Sstevel@tonic-gate if (lgrps_spanned == 0) { 37487c478bd9Sstevel@tonic-gate lgrp = lgrp_root; 37497c478bd9Sstevel@tonic-gate break; 37507c478bd9Sstevel@tonic-gate } 37517c478bd9Sstevel@tonic-gate 37527c478bd9Sstevel@tonic-gate /* 37537c478bd9Sstevel@tonic-gate * Pick a random offset within lgroups spanned 37547c478bd9Sstevel@tonic-gate * and return lgroup at that offset 37557c478bd9Sstevel@tonic-gate */ 37567c478bd9Sstevel@tonic-gate random = (ushort_t)gethrtime() >> 4; 37577c478bd9Sstevel@tonic-gate off = random % lgrps_spanned; 37587c478bd9Sstevel@tonic-gate ASSERT(off <= lgrp_alloc_max); 37597c478bd9Sstevel@tonic-gate 37607c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 37617c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(lgrpset, i)) 37627c478bd9Sstevel@tonic-gate continue; 37637c478bd9Sstevel@tonic-gate if (off) 37647c478bd9Sstevel@tonic-gate off--; 37657c478bd9Sstevel@tonic-gate else { 37667c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 37677c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_RANDOM, 37687c478bd9Sstevel@tonic-gate 1); 37697c478bd9Sstevel@tonic-gate break; 37707c478bd9Sstevel@tonic-gate } 37717c478bd9Sstevel@tonic-gate } 37727c478bd9Sstevel@tonic-gate break; 37737c478bd9Sstevel@tonic-gate 37747c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_RANDOM_PROC: 37757c478bd9Sstevel@tonic-gate 37767c478bd9Sstevel@tonic-gate /* 37777c478bd9Sstevel@tonic-gate * Grab copy of bitmask of lgroups spanned by 37787c478bd9Sstevel@tonic-gate * this process 37797c478bd9Sstevel@tonic-gate */ 37807c478bd9Sstevel@tonic-gate klgrpset_copy(lgrpset, curproc->p_lgrpset); 37817c478bd9Sstevel@tonic-gate stat = LGRP_NUM_RANDOM_PROC; 37827c478bd9Sstevel@tonic-gate 37837c478bd9Sstevel@tonic-gate /* LINTED fallthrough on case statement */ 37847c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_RANDOM_PSET: 37857c478bd9Sstevel@tonic-gate 37867c478bd9Sstevel@tonic-gate if (!stat) 37877c478bd9Sstevel@tonic-gate stat = LGRP_NUM_RANDOM_PSET; 37887c478bd9Sstevel@tonic-gate 37897c478bd9Sstevel@tonic-gate if (klgrpset_isempty(lgrpset)) { 37907c478bd9Sstevel@tonic-gate /* 37917c478bd9Sstevel@tonic-gate * Grab copy of bitmask of lgroups spanned by 37927c478bd9Sstevel@tonic-gate * this processor set 37937c478bd9Sstevel@tonic-gate */ 37947c478bd9Sstevel@tonic-gate kpreempt_disable(); 37957c478bd9Sstevel@tonic-gate klgrpset_copy(lgrpset, 37967c478bd9Sstevel@tonic-gate curthread->t_cpupart->cp_lgrpset); 37977c478bd9Sstevel@tonic-gate kpreempt_enable(); 37987c478bd9Sstevel@tonic-gate } 37997c478bd9Sstevel@tonic-gate 38007c478bd9Sstevel@tonic-gate /* 38017c478bd9Sstevel@tonic-gate * Count how many lgroups are spanned 38027c478bd9Sstevel@tonic-gate */ 38037c478bd9Sstevel@tonic-gate klgrpset_nlgrps(lgrpset, lgrps_spanned); 38047c478bd9Sstevel@tonic-gate ASSERT(lgrps_spanned <= nlgrps); 38057c478bd9Sstevel@tonic-gate 38067c478bd9Sstevel@tonic-gate /* 38077c478bd9Sstevel@tonic-gate * Probably lgrps_spanned should be always non-zero, but to be 38087c478bd9Sstevel@tonic-gate * on the safe side we return lgrp_root if it is empty. 38097c478bd9Sstevel@tonic-gate */ 38107c478bd9Sstevel@tonic-gate if (lgrps_spanned == 0) { 38117c478bd9Sstevel@tonic-gate lgrp = lgrp_root; 38127c478bd9Sstevel@tonic-gate break; 38137c478bd9Sstevel@tonic-gate } 38147c478bd9Sstevel@tonic-gate 38157c478bd9Sstevel@tonic-gate /* 38167c478bd9Sstevel@tonic-gate * Pick a random offset within lgroups spanned 38177c478bd9Sstevel@tonic-gate * and return lgroup at that offset 38187c478bd9Sstevel@tonic-gate */ 38197c478bd9Sstevel@tonic-gate random = (ushort_t)gethrtime() >> 4; 38207c478bd9Sstevel@tonic-gate off = random % lgrps_spanned; 38217c478bd9Sstevel@tonic-gate ASSERT(off <= lgrp_alloc_max); 38227c478bd9Sstevel@tonic-gate 38237c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 38247c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(lgrpset, i)) 38257c478bd9Sstevel@tonic-gate continue; 38267c478bd9Sstevel@tonic-gate if (off) 38277c478bd9Sstevel@tonic-gate off--; 38287c478bd9Sstevel@tonic-gate else { 38297c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 38307c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_RANDOM, 38317c478bd9Sstevel@tonic-gate 1); 38327c478bd9Sstevel@tonic-gate break; 38337c478bd9Sstevel@tonic-gate } 38347c478bd9Sstevel@tonic-gate } 38357c478bd9Sstevel@tonic-gate break; 38367c478bd9Sstevel@tonic-gate 38377c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_ROUNDROBIN: 38387c478bd9Sstevel@tonic-gate 38397c478bd9Sstevel@tonic-gate /* 38407c478bd9Sstevel@tonic-gate * Use offset within segment to determine 38417c478bd9Sstevel@tonic-gate * offset from home lgroup to choose for 38427c478bd9Sstevel@tonic-gate * next lgroup to allocate memory from 38437c478bd9Sstevel@tonic-gate */ 38447c478bd9Sstevel@tonic-gate off = ((unsigned long)(vaddr - seg->s_base) / pgsz) % 38457c478bd9Sstevel@tonic-gate (lgrp_alloc_max + 1); 38467c478bd9Sstevel@tonic-gate 38477c478bd9Sstevel@tonic-gate kpreempt_disable(); 38487c478bd9Sstevel@tonic-gate lgrpset = lgrp_root->lgrp_set[LGRP_RSRC_MEM]; 38497c478bd9Sstevel@tonic-gate i = lgrp->lgrp_id; 38507c478bd9Sstevel@tonic-gate kpreempt_enable(); 38517c478bd9Sstevel@tonic-gate 38527c478bd9Sstevel@tonic-gate while (off > 0) { 38537c478bd9Sstevel@tonic-gate i = (i + 1) % (lgrp_alloc_max + 1); 38547c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 38557c478bd9Sstevel@tonic-gate if (klgrpset_ismember(lgrpset, i)) 38567c478bd9Sstevel@tonic-gate off--; 38577c478bd9Sstevel@tonic-gate } 38587c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ROUNDROBIN, 1); 38597c478bd9Sstevel@tonic-gate 38607c478bd9Sstevel@tonic-gate break; 38617c478bd9Sstevel@tonic-gate } 38627c478bd9Sstevel@tonic-gate 38637c478bd9Sstevel@tonic-gate ASSERT(lgrp != NULL); 38647c478bd9Sstevel@tonic-gate return (lgrp); 38657c478bd9Sstevel@tonic-gate } 38667c478bd9Sstevel@tonic-gate 38677c478bd9Sstevel@tonic-gate /* 38687c478bd9Sstevel@tonic-gate * Return the number of pages in an lgroup 38697c478bd9Sstevel@tonic-gate * 38707c478bd9Sstevel@tonic-gate * NOTE: NUMA test (numat) driver uses this, so changing arguments or semantics 38717c478bd9Sstevel@tonic-gate * could cause tests that rely on the numat driver to fail.... 38727c478bd9Sstevel@tonic-gate */ 38737c478bd9Sstevel@tonic-gate pgcnt_t 38747c478bd9Sstevel@tonic-gate lgrp_mem_size(lgrp_id_t lgrpid, lgrp_mem_query_t query) 38757c478bd9Sstevel@tonic-gate { 38767c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 38777c478bd9Sstevel@tonic-gate 38787c478bd9Sstevel@tonic-gate lgrp = lgrp_table[lgrpid]; 38797c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp) || 38807c478bd9Sstevel@tonic-gate klgrpset_isempty(lgrp->lgrp_set[LGRP_RSRC_MEM]) || 38817c478bd9Sstevel@tonic-gate !klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid)) 38827c478bd9Sstevel@tonic-gate return (0); 38837c478bd9Sstevel@tonic-gate 38847c478bd9Sstevel@tonic-gate return (lgrp_plat_mem_size(lgrp->lgrp_plathand, query)); 38857c478bd9Sstevel@tonic-gate } 38867c478bd9Sstevel@tonic-gate 38877c478bd9Sstevel@tonic-gate /* 38887c478bd9Sstevel@tonic-gate * Initialize lgroup shared memory allocation policy support 38897c478bd9Sstevel@tonic-gate */ 38907c478bd9Sstevel@tonic-gate void 38917c478bd9Sstevel@tonic-gate lgrp_shm_policy_init(struct anon_map *amp, vnode_t *vp) 38927c478bd9Sstevel@tonic-gate { 38937c478bd9Sstevel@tonic-gate lgrp_shm_locality_t *shm_locality; 38947c478bd9Sstevel@tonic-gate 38957c478bd9Sstevel@tonic-gate /* 38967c478bd9Sstevel@tonic-gate * Initialize locality field in anon_map 38977c478bd9Sstevel@tonic-gate * Don't need any locks because this is called when anon_map is 38987c478bd9Sstevel@tonic-gate * allocated, but not used anywhere yet. 38997c478bd9Sstevel@tonic-gate */ 39007c478bd9Sstevel@tonic-gate if (amp) { 39017c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 39027c478bd9Sstevel@tonic-gate if (amp->locality == NULL) { 39037c478bd9Sstevel@tonic-gate /* 39047c478bd9Sstevel@tonic-gate * Allocate and initialize shared memory locality info 39057c478bd9Sstevel@tonic-gate * and set anon_map locality pointer to it 39067c478bd9Sstevel@tonic-gate * Drop lock across kmem_alloc(KM_SLEEP) 39077c478bd9Sstevel@tonic-gate */ 39087c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 39097c478bd9Sstevel@tonic-gate shm_locality = kmem_alloc(sizeof (*shm_locality), 39107c478bd9Sstevel@tonic-gate KM_SLEEP); 39117c478bd9Sstevel@tonic-gate rw_init(&shm_locality->loc_lock, NULL, RW_DEFAULT, 39127c478bd9Sstevel@tonic-gate NULL); 39137c478bd9Sstevel@tonic-gate shm_locality->loc_count = 1; /* not used for amp */ 39147c478bd9Sstevel@tonic-gate shm_locality->loc_tree = NULL; 39157c478bd9Sstevel@tonic-gate 39167c478bd9Sstevel@tonic-gate /* 39177c478bd9Sstevel@tonic-gate * Reacquire lock and check to see whether anyone beat 39187c478bd9Sstevel@tonic-gate * us to initializing the locality info 39197c478bd9Sstevel@tonic-gate */ 39207c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 39217c478bd9Sstevel@tonic-gate if (amp->locality != NULL) { 39227c478bd9Sstevel@tonic-gate rw_destroy(&shm_locality->loc_lock); 39237c478bd9Sstevel@tonic-gate kmem_free(shm_locality, 39247c478bd9Sstevel@tonic-gate sizeof (*shm_locality)); 39257c478bd9Sstevel@tonic-gate } else 39267c478bd9Sstevel@tonic-gate amp->locality = shm_locality; 39277c478bd9Sstevel@tonic-gate } 39287c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 39297c478bd9Sstevel@tonic-gate return; 39307c478bd9Sstevel@tonic-gate } 39317c478bd9Sstevel@tonic-gate 39327c478bd9Sstevel@tonic-gate /* 39337c478bd9Sstevel@tonic-gate * Allocate shared vnode policy info if vnode is not locality aware yet 39347c478bd9Sstevel@tonic-gate */ 39357c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 39367c478bd9Sstevel@tonic-gate if ((vp->v_flag & V_LOCALITY) == 0) { 39377c478bd9Sstevel@tonic-gate /* 39387c478bd9Sstevel@tonic-gate * Allocate and initialize shared memory locality info 39397c478bd9Sstevel@tonic-gate */ 39407c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 39417c478bd9Sstevel@tonic-gate shm_locality = kmem_alloc(sizeof (*shm_locality), KM_SLEEP); 39427c478bd9Sstevel@tonic-gate rw_init(&shm_locality->loc_lock, NULL, RW_DEFAULT, NULL); 39437c478bd9Sstevel@tonic-gate shm_locality->loc_count = 1; 39447c478bd9Sstevel@tonic-gate shm_locality->loc_tree = NULL; 39457c478bd9Sstevel@tonic-gate 39467c478bd9Sstevel@tonic-gate /* 39477c478bd9Sstevel@tonic-gate * Point vnode locality field at shared vnode policy info 39487c478bd9Sstevel@tonic-gate * and set locality aware flag in vnode 39497c478bd9Sstevel@tonic-gate */ 39507c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 39517c478bd9Sstevel@tonic-gate if ((vp->v_flag & V_LOCALITY) == 0) { 39527c478bd9Sstevel@tonic-gate vp->v_locality = shm_locality; 39537c478bd9Sstevel@tonic-gate vp->v_flag |= V_LOCALITY; 39547c478bd9Sstevel@tonic-gate } else { 39557c478bd9Sstevel@tonic-gate /* 39567c478bd9Sstevel@tonic-gate * Lost race so free locality info and increment count. 39577c478bd9Sstevel@tonic-gate */ 39587c478bd9Sstevel@tonic-gate rw_destroy(&shm_locality->loc_lock); 39597c478bd9Sstevel@tonic-gate kmem_free(shm_locality, sizeof (*shm_locality)); 39607c478bd9Sstevel@tonic-gate shm_locality = vp->v_locality; 39617c478bd9Sstevel@tonic-gate shm_locality->loc_count++; 39627c478bd9Sstevel@tonic-gate } 39637c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 39647c478bd9Sstevel@tonic-gate 39657c478bd9Sstevel@tonic-gate return; 39667c478bd9Sstevel@tonic-gate } 39677c478bd9Sstevel@tonic-gate 39687c478bd9Sstevel@tonic-gate /* 39697c478bd9Sstevel@tonic-gate * Increment reference count of number of segments mapping this vnode 39707c478bd9Sstevel@tonic-gate * shared 39717c478bd9Sstevel@tonic-gate */ 39727c478bd9Sstevel@tonic-gate shm_locality = vp->v_locality; 39737c478bd9Sstevel@tonic-gate shm_locality->loc_count++; 39747c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 39757c478bd9Sstevel@tonic-gate } 39767c478bd9Sstevel@tonic-gate 39777c478bd9Sstevel@tonic-gate /* 39787c478bd9Sstevel@tonic-gate * Destroy the given shared memory policy segment tree 39797c478bd9Sstevel@tonic-gate */ 39807c478bd9Sstevel@tonic-gate void 39817c478bd9Sstevel@tonic-gate lgrp_shm_policy_tree_destroy(avl_tree_t *tree) 39827c478bd9Sstevel@tonic-gate { 39837c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *cur; 39847c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *next; 39857c478bd9Sstevel@tonic-gate 39867c478bd9Sstevel@tonic-gate if (tree == NULL) 39877c478bd9Sstevel@tonic-gate return; 39887c478bd9Sstevel@tonic-gate 39897c478bd9Sstevel@tonic-gate cur = (lgrp_shm_policy_seg_t *)avl_first(tree); 39907c478bd9Sstevel@tonic-gate while (cur != NULL) { 39917c478bd9Sstevel@tonic-gate next = AVL_NEXT(tree, cur); 39927c478bd9Sstevel@tonic-gate avl_remove(tree, cur); 39937c478bd9Sstevel@tonic-gate kmem_free(cur, sizeof (*cur)); 39947c478bd9Sstevel@tonic-gate cur = next; 39957c478bd9Sstevel@tonic-gate } 39967c478bd9Sstevel@tonic-gate kmem_free(tree, sizeof (avl_tree_t)); 39977c478bd9Sstevel@tonic-gate } 39987c478bd9Sstevel@tonic-gate 39997c478bd9Sstevel@tonic-gate /* 40007c478bd9Sstevel@tonic-gate * Uninitialize lgroup shared memory allocation policy support 40017c478bd9Sstevel@tonic-gate */ 40027c478bd9Sstevel@tonic-gate void 40037c478bd9Sstevel@tonic-gate lgrp_shm_policy_fini(struct anon_map *amp, vnode_t *vp) 40047c478bd9Sstevel@tonic-gate { 40057c478bd9Sstevel@tonic-gate lgrp_shm_locality_t *shm_locality; 40067c478bd9Sstevel@tonic-gate 40077c478bd9Sstevel@tonic-gate /* 40087c478bd9Sstevel@tonic-gate * For anon_map, deallocate shared memory policy tree and 40097c478bd9Sstevel@tonic-gate * zero locality field 40107c478bd9Sstevel@tonic-gate * Don't need any locks because anon_map is being freed 40117c478bd9Sstevel@tonic-gate */ 40127c478bd9Sstevel@tonic-gate if (amp) { 40137c478bd9Sstevel@tonic-gate if (amp->locality == NULL) 40147c478bd9Sstevel@tonic-gate return; 40157c478bd9Sstevel@tonic-gate shm_locality = amp->locality; 40167c478bd9Sstevel@tonic-gate shm_locality->loc_count = 0; /* not really used for amp */ 40177c478bd9Sstevel@tonic-gate rw_destroy(&shm_locality->loc_lock); 40187c478bd9Sstevel@tonic-gate lgrp_shm_policy_tree_destroy(shm_locality->loc_tree); 40197c478bd9Sstevel@tonic-gate kmem_free(shm_locality, sizeof (*shm_locality)); 40207c478bd9Sstevel@tonic-gate amp->locality = 0; 40217c478bd9Sstevel@tonic-gate return; 40227c478bd9Sstevel@tonic-gate } 40237c478bd9Sstevel@tonic-gate 40247c478bd9Sstevel@tonic-gate /* 40257c478bd9Sstevel@tonic-gate * For vnode, decrement reference count of segments mapping this vnode 40267c478bd9Sstevel@tonic-gate * shared and delete locality info if reference count drops to 0 40277c478bd9Sstevel@tonic-gate */ 40287c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 40297c478bd9Sstevel@tonic-gate shm_locality = vp->v_locality; 40307c478bd9Sstevel@tonic-gate shm_locality->loc_count--; 40317c478bd9Sstevel@tonic-gate 40327c478bd9Sstevel@tonic-gate if (shm_locality->loc_count == 0) { 40337c478bd9Sstevel@tonic-gate rw_destroy(&shm_locality->loc_lock); 40347c478bd9Sstevel@tonic-gate lgrp_shm_policy_tree_destroy(shm_locality->loc_tree); 40357c478bd9Sstevel@tonic-gate kmem_free(shm_locality, sizeof (*shm_locality)); 40367c478bd9Sstevel@tonic-gate vp->v_locality = 0; 40377c478bd9Sstevel@tonic-gate vp->v_flag &= ~V_LOCALITY; 40387c478bd9Sstevel@tonic-gate } 40397c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 40407c478bd9Sstevel@tonic-gate } 40417c478bd9Sstevel@tonic-gate 40427c478bd9Sstevel@tonic-gate /* 40437c478bd9Sstevel@tonic-gate * Compare two shared memory policy segments 40447c478bd9Sstevel@tonic-gate * Used by AVL tree code for searching 40457c478bd9Sstevel@tonic-gate */ 40467c478bd9Sstevel@tonic-gate int 40477c478bd9Sstevel@tonic-gate lgrp_shm_policy_compar(const void *x, const void *y) 40487c478bd9Sstevel@tonic-gate { 40497c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *a = (lgrp_shm_policy_seg_t *)x; 40507c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *b = (lgrp_shm_policy_seg_t *)y; 40517c478bd9Sstevel@tonic-gate 40527c478bd9Sstevel@tonic-gate if (a->shm_off < b->shm_off) 40537c478bd9Sstevel@tonic-gate return (-1); 40547c478bd9Sstevel@tonic-gate if (a->shm_off >= b->shm_off + b->shm_size) 40557c478bd9Sstevel@tonic-gate return (1); 40567c478bd9Sstevel@tonic-gate return (0); 40577c478bd9Sstevel@tonic-gate } 40587c478bd9Sstevel@tonic-gate 40597c478bd9Sstevel@tonic-gate /* 40607c478bd9Sstevel@tonic-gate * Concatenate seg1 with seg2 and remove seg2 40617c478bd9Sstevel@tonic-gate */ 40627c478bd9Sstevel@tonic-gate static int 40637c478bd9Sstevel@tonic-gate lgrp_shm_policy_concat(avl_tree_t *tree, lgrp_shm_policy_seg_t *seg1, 40647c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *seg2) 40657c478bd9Sstevel@tonic-gate { 40667c478bd9Sstevel@tonic-gate if (!seg1 || !seg2 || 40677c478bd9Sstevel@tonic-gate seg1->shm_off + seg1->shm_size != seg2->shm_off || 40687c478bd9Sstevel@tonic-gate seg1->shm_policy.mem_policy != seg2->shm_policy.mem_policy) 40697c478bd9Sstevel@tonic-gate return (-1); 40707c478bd9Sstevel@tonic-gate 40717c478bd9Sstevel@tonic-gate seg1->shm_size += seg2->shm_size; 40727c478bd9Sstevel@tonic-gate avl_remove(tree, seg2); 40737c478bd9Sstevel@tonic-gate kmem_free(seg2, sizeof (*seg2)); 40747c478bd9Sstevel@tonic-gate return (0); 40757c478bd9Sstevel@tonic-gate } 40767c478bd9Sstevel@tonic-gate 40777c478bd9Sstevel@tonic-gate /* 40787c478bd9Sstevel@tonic-gate * Split segment at given offset and return rightmost (uppermost) segment 40797c478bd9Sstevel@tonic-gate * Assumes that there are no overlapping segments 40807c478bd9Sstevel@tonic-gate */ 40817c478bd9Sstevel@tonic-gate static lgrp_shm_policy_seg_t * 40827c478bd9Sstevel@tonic-gate lgrp_shm_policy_split(avl_tree_t *tree, lgrp_shm_policy_seg_t *seg, 40837c478bd9Sstevel@tonic-gate u_offset_t off) 40847c478bd9Sstevel@tonic-gate { 40857c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *newseg; 40867c478bd9Sstevel@tonic-gate avl_index_t where; 40877c478bd9Sstevel@tonic-gate 40887c478bd9Sstevel@tonic-gate ASSERT(seg != NULL); 40897c478bd9Sstevel@tonic-gate ASSERT(off >= seg->shm_off && off <= seg->shm_off + seg->shm_size); 40907c478bd9Sstevel@tonic-gate 40917c478bd9Sstevel@tonic-gate if (!seg || off < seg->shm_off || off > seg->shm_off + 40927c478bd9Sstevel@tonic-gate seg->shm_size) 40937c478bd9Sstevel@tonic-gate return (NULL); 40947c478bd9Sstevel@tonic-gate 40957c478bd9Sstevel@tonic-gate if (off == seg->shm_off || off == seg->shm_off + seg->shm_size) 40967c478bd9Sstevel@tonic-gate return (seg); 40977c478bd9Sstevel@tonic-gate 40987c478bd9Sstevel@tonic-gate /* 40997c478bd9Sstevel@tonic-gate * Adjust size of left segment and allocate new (right) segment 41007c478bd9Sstevel@tonic-gate */ 41017c478bd9Sstevel@tonic-gate newseg = kmem_alloc(sizeof (lgrp_shm_policy_seg_t), KM_SLEEP); 41027c478bd9Sstevel@tonic-gate newseg->shm_policy = seg->shm_policy; 41037c478bd9Sstevel@tonic-gate newseg->shm_off = off; 41047c478bd9Sstevel@tonic-gate newseg->shm_size = seg->shm_size - (off - seg->shm_off); 41057c478bd9Sstevel@tonic-gate seg->shm_size = off - seg->shm_off; 41067c478bd9Sstevel@tonic-gate 41077c478bd9Sstevel@tonic-gate /* 41087c478bd9Sstevel@tonic-gate * Find where to insert new segment in AVL tree and insert it 41097c478bd9Sstevel@tonic-gate */ 41107c478bd9Sstevel@tonic-gate (void) avl_find(tree, &off, &where); 41117c478bd9Sstevel@tonic-gate avl_insert(tree, newseg, where); 41127c478bd9Sstevel@tonic-gate 41137c478bd9Sstevel@tonic-gate return (newseg); 41147c478bd9Sstevel@tonic-gate } 41157c478bd9Sstevel@tonic-gate 41167c478bd9Sstevel@tonic-gate /* 41177c478bd9Sstevel@tonic-gate * Set shared memory allocation policy on specified shared object at given 41187c478bd9Sstevel@tonic-gate * offset and length 41197c478bd9Sstevel@tonic-gate * 41207c478bd9Sstevel@tonic-gate * Return 0 if policy wasn't set already, 1 if policy was set already, and 41217c478bd9Sstevel@tonic-gate * -1 if can't set policy. 41227c478bd9Sstevel@tonic-gate */ 41237c478bd9Sstevel@tonic-gate int 41247c478bd9Sstevel@tonic-gate lgrp_shm_policy_set(lgrp_mem_policy_t policy, struct anon_map *amp, 41257c478bd9Sstevel@tonic-gate ulong_t anon_index, vnode_t *vp, u_offset_t vn_off, size_t len) 41267c478bd9Sstevel@tonic-gate { 41277c478bd9Sstevel@tonic-gate u_offset_t eoff; 41287c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *next; 41297c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *newseg; 41307c478bd9Sstevel@tonic-gate u_offset_t off; 41317c478bd9Sstevel@tonic-gate u_offset_t oldeoff; 41327c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *prev; 41337c478bd9Sstevel@tonic-gate int retval; 41347c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *seg; 41357c478bd9Sstevel@tonic-gate lgrp_shm_locality_t *shm_locality; 41367c478bd9Sstevel@tonic-gate avl_tree_t *tree; 41377c478bd9Sstevel@tonic-gate avl_index_t where; 41387c478bd9Sstevel@tonic-gate 41397c478bd9Sstevel@tonic-gate ASSERT(amp || vp); 41407c478bd9Sstevel@tonic-gate ASSERT((len & PAGEOFFSET) == 0); 41417c478bd9Sstevel@tonic-gate 41427c478bd9Sstevel@tonic-gate if (len == 0) 41437c478bd9Sstevel@tonic-gate return (-1); 41447c478bd9Sstevel@tonic-gate 41457c478bd9Sstevel@tonic-gate retval = 0; 41467c478bd9Sstevel@tonic-gate 41477c478bd9Sstevel@tonic-gate /* 41487c478bd9Sstevel@tonic-gate * Get locality info and starting offset into shared object 41497c478bd9Sstevel@tonic-gate * Try anon map first and then vnode 41507c478bd9Sstevel@tonic-gate * Assume that no locks need to be held on anon_map or vnode, since 41517c478bd9Sstevel@tonic-gate * it should be protected by its reference count which must be nonzero 41527c478bd9Sstevel@tonic-gate * for an existing segment. 41537c478bd9Sstevel@tonic-gate */ 41547c478bd9Sstevel@tonic-gate if (amp) { 41557c478bd9Sstevel@tonic-gate /* 41567c478bd9Sstevel@tonic-gate * Get policy info from anon_map 41577c478bd9Sstevel@tonic-gate * 41587c478bd9Sstevel@tonic-gate */ 41597c478bd9Sstevel@tonic-gate ASSERT(amp->refcnt != 0); 41607c478bd9Sstevel@tonic-gate if (amp->locality == NULL) 41617c478bd9Sstevel@tonic-gate lgrp_shm_policy_init(amp, NULL); 41627c478bd9Sstevel@tonic-gate shm_locality = amp->locality; 41637c478bd9Sstevel@tonic-gate off = ptob(anon_index); 41647c478bd9Sstevel@tonic-gate } else if (vp) { 41657c478bd9Sstevel@tonic-gate /* 41667c478bd9Sstevel@tonic-gate * Get policy info from vnode 41677c478bd9Sstevel@tonic-gate */ 41687c478bd9Sstevel@tonic-gate if ((vp->v_flag & V_LOCALITY) == 0 || vp->v_locality == NULL) 41697c478bd9Sstevel@tonic-gate lgrp_shm_policy_init(NULL, vp); 41707c478bd9Sstevel@tonic-gate shm_locality = vp->v_locality; 41717c478bd9Sstevel@tonic-gate ASSERT(shm_locality->loc_count != 0); 41727c478bd9Sstevel@tonic-gate off = vn_off; 41737c478bd9Sstevel@tonic-gate } else 41747c478bd9Sstevel@tonic-gate return (-1); 41757c478bd9Sstevel@tonic-gate 41767c478bd9Sstevel@tonic-gate ASSERT((off & PAGEOFFSET) == 0); 41777c478bd9Sstevel@tonic-gate 41787c478bd9Sstevel@tonic-gate /* 41797c478bd9Sstevel@tonic-gate * Figure out default policy 41807c478bd9Sstevel@tonic-gate */ 41817c478bd9Sstevel@tonic-gate if (policy == LGRP_MEM_POLICY_DEFAULT) 41827c478bd9Sstevel@tonic-gate policy = lgrp_mem_policy_default(len, MAP_SHARED); 41837c478bd9Sstevel@tonic-gate 41847c478bd9Sstevel@tonic-gate /* 41857c478bd9Sstevel@tonic-gate * Create AVL tree if there isn't one yet 41867c478bd9Sstevel@tonic-gate * and set locality field to point at it 41877c478bd9Sstevel@tonic-gate */ 41887c478bd9Sstevel@tonic-gate rw_enter(&shm_locality->loc_lock, RW_WRITER); 41897c478bd9Sstevel@tonic-gate tree = shm_locality->loc_tree; 41907c478bd9Sstevel@tonic-gate if (!tree) { 41917c478bd9Sstevel@tonic-gate rw_exit(&shm_locality->loc_lock); 41927c478bd9Sstevel@tonic-gate 41937c478bd9Sstevel@tonic-gate tree = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 41947c478bd9Sstevel@tonic-gate 41957c478bd9Sstevel@tonic-gate rw_enter(&shm_locality->loc_lock, RW_WRITER); 41967c478bd9Sstevel@tonic-gate if (shm_locality->loc_tree == NULL) { 41977c478bd9Sstevel@tonic-gate avl_create(tree, lgrp_shm_policy_compar, 41987c478bd9Sstevel@tonic-gate sizeof (lgrp_shm_policy_seg_t), 41997c478bd9Sstevel@tonic-gate offsetof(lgrp_shm_policy_seg_t, shm_tree)); 42007c478bd9Sstevel@tonic-gate shm_locality->loc_tree = tree; 42017c478bd9Sstevel@tonic-gate } else { 42027c478bd9Sstevel@tonic-gate /* 42037c478bd9Sstevel@tonic-gate * Another thread managed to set up the tree 42047c478bd9Sstevel@tonic-gate * before we could. Free the tree we allocated 42057c478bd9Sstevel@tonic-gate * and use the one that's already there. 42067c478bd9Sstevel@tonic-gate */ 42077c478bd9Sstevel@tonic-gate kmem_free(tree, sizeof (*tree)); 42087c478bd9Sstevel@tonic-gate tree = shm_locality->loc_tree; 42097c478bd9Sstevel@tonic-gate } 42107c478bd9Sstevel@tonic-gate } 42117c478bd9Sstevel@tonic-gate 42127c478bd9Sstevel@tonic-gate /* 42137c478bd9Sstevel@tonic-gate * Set policy 42147c478bd9Sstevel@tonic-gate * 42157c478bd9Sstevel@tonic-gate * Need to maintain hold on writer's lock to keep tree from 42167c478bd9Sstevel@tonic-gate * changing out from under us 42177c478bd9Sstevel@tonic-gate */ 42187c478bd9Sstevel@tonic-gate while (len != 0) { 42197c478bd9Sstevel@tonic-gate /* 42207c478bd9Sstevel@tonic-gate * Find policy segment for specified offset into shared object 42217c478bd9Sstevel@tonic-gate */ 42227c478bd9Sstevel@tonic-gate seg = avl_find(tree, &off, &where); 42237c478bd9Sstevel@tonic-gate 42247c478bd9Sstevel@tonic-gate /* 42257c478bd9Sstevel@tonic-gate * Didn't find any existing segment that contains specified 42267c478bd9Sstevel@tonic-gate * offset, so allocate new segment, insert it, and concatenate 42277c478bd9Sstevel@tonic-gate * with adjacent segments if possible 42287c478bd9Sstevel@tonic-gate */ 42297c478bd9Sstevel@tonic-gate if (seg == NULL) { 42307c478bd9Sstevel@tonic-gate newseg = kmem_alloc(sizeof (lgrp_shm_policy_seg_t), 42317c478bd9Sstevel@tonic-gate KM_SLEEP); 42327c478bd9Sstevel@tonic-gate newseg->shm_policy.mem_policy = policy; 42337c478bd9Sstevel@tonic-gate newseg->shm_policy.mem_reserved = 0; 42347c478bd9Sstevel@tonic-gate newseg->shm_off = off; 42357c478bd9Sstevel@tonic-gate avl_insert(tree, newseg, where); 42367c478bd9Sstevel@tonic-gate 42377c478bd9Sstevel@tonic-gate /* 42387c478bd9Sstevel@tonic-gate * Check to see whether new segment overlaps with next 42397c478bd9Sstevel@tonic-gate * one, set length of new segment accordingly, and 42407c478bd9Sstevel@tonic-gate * calculate remaining length and next offset 42417c478bd9Sstevel@tonic-gate */ 42427c478bd9Sstevel@tonic-gate seg = AVL_NEXT(tree, newseg); 42437c478bd9Sstevel@tonic-gate if (seg == NULL || off + len <= seg->shm_off) { 42447c478bd9Sstevel@tonic-gate newseg->shm_size = len; 42457c478bd9Sstevel@tonic-gate len = 0; 42467c478bd9Sstevel@tonic-gate } else { 42477c478bd9Sstevel@tonic-gate newseg->shm_size = seg->shm_off - off; 42487c478bd9Sstevel@tonic-gate off = seg->shm_off; 42497c478bd9Sstevel@tonic-gate len -= newseg->shm_size; 42507c478bd9Sstevel@tonic-gate } 42517c478bd9Sstevel@tonic-gate 42527c478bd9Sstevel@tonic-gate /* 42537c478bd9Sstevel@tonic-gate * Try to concatenate new segment with next and 42547c478bd9Sstevel@tonic-gate * previous ones, since they might have the same policy 42557c478bd9Sstevel@tonic-gate * now. Grab previous and next segments first because 42567c478bd9Sstevel@tonic-gate * they will change on concatenation. 42577c478bd9Sstevel@tonic-gate */ 42587c478bd9Sstevel@tonic-gate prev = AVL_PREV(tree, newseg); 42597c478bd9Sstevel@tonic-gate next = AVL_NEXT(tree, newseg); 42607c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_concat(tree, newseg, next); 42617c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_concat(tree, prev, newseg); 42627c478bd9Sstevel@tonic-gate 42637c478bd9Sstevel@tonic-gate continue; 42647c478bd9Sstevel@tonic-gate } 42657c478bd9Sstevel@tonic-gate 42667c478bd9Sstevel@tonic-gate eoff = off + len; 42677c478bd9Sstevel@tonic-gate oldeoff = seg->shm_off + seg->shm_size; 42687c478bd9Sstevel@tonic-gate 42697c478bd9Sstevel@tonic-gate /* 42707c478bd9Sstevel@tonic-gate * Policy set already? 42717c478bd9Sstevel@tonic-gate */ 42727c478bd9Sstevel@tonic-gate if (policy == seg->shm_policy.mem_policy) { 42737c478bd9Sstevel@tonic-gate /* 42747c478bd9Sstevel@tonic-gate * Nothing left to do if offset and length 42757c478bd9Sstevel@tonic-gate * fall within this segment 42767c478bd9Sstevel@tonic-gate */ 42777c478bd9Sstevel@tonic-gate if (eoff <= oldeoff) { 42787c478bd9Sstevel@tonic-gate retval = 1; 42797c478bd9Sstevel@tonic-gate break; 42807c478bd9Sstevel@tonic-gate } else { 42817c478bd9Sstevel@tonic-gate len = eoff - oldeoff; 42827c478bd9Sstevel@tonic-gate off = oldeoff; 42837c478bd9Sstevel@tonic-gate continue; 42847c478bd9Sstevel@tonic-gate } 42857c478bd9Sstevel@tonic-gate } 42867c478bd9Sstevel@tonic-gate 42877c478bd9Sstevel@tonic-gate /* 42887c478bd9Sstevel@tonic-gate * Specified offset and length match existing segment exactly 42897c478bd9Sstevel@tonic-gate */ 42907c478bd9Sstevel@tonic-gate if (off == seg->shm_off && len == seg->shm_size) { 42917c478bd9Sstevel@tonic-gate /* 42927c478bd9Sstevel@tonic-gate * Set policy and update current length 42937c478bd9Sstevel@tonic-gate */ 42947c478bd9Sstevel@tonic-gate seg->shm_policy.mem_policy = policy; 42957c478bd9Sstevel@tonic-gate seg->shm_policy.mem_reserved = 0; 42967c478bd9Sstevel@tonic-gate len = 0; 42977c478bd9Sstevel@tonic-gate 42987c478bd9Sstevel@tonic-gate /* 42997c478bd9Sstevel@tonic-gate * Try concatenating new segment with previous and next 43007c478bd9Sstevel@tonic-gate * segments, since they might have the same policy now. 43017c478bd9Sstevel@tonic-gate * Grab previous and next segments first because they 43027c478bd9Sstevel@tonic-gate * will change on concatenation. 43037c478bd9Sstevel@tonic-gate */ 43047c478bd9Sstevel@tonic-gate prev = AVL_PREV(tree, seg); 43057c478bd9Sstevel@tonic-gate next = AVL_NEXT(tree, seg); 43067c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_concat(tree, seg, next); 43077c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_concat(tree, prev, seg); 43087c478bd9Sstevel@tonic-gate } else { 43097c478bd9Sstevel@tonic-gate /* 43107c478bd9Sstevel@tonic-gate * Specified offset and length only apply to part of 43117c478bd9Sstevel@tonic-gate * existing segment 43127c478bd9Sstevel@tonic-gate */ 43137c478bd9Sstevel@tonic-gate 43147c478bd9Sstevel@tonic-gate /* 43157c478bd9Sstevel@tonic-gate * New segment starts in middle of old one, so split 43167c478bd9Sstevel@tonic-gate * new one off near beginning of old one 43177c478bd9Sstevel@tonic-gate */ 43187c478bd9Sstevel@tonic-gate newseg = NULL; 43197c478bd9Sstevel@tonic-gate if (off > seg->shm_off) { 43207c478bd9Sstevel@tonic-gate newseg = lgrp_shm_policy_split(tree, seg, off); 43217c478bd9Sstevel@tonic-gate 43227c478bd9Sstevel@tonic-gate /* 43237c478bd9Sstevel@tonic-gate * New segment ends where old one did, so try 43247c478bd9Sstevel@tonic-gate * to concatenate with next segment 43257c478bd9Sstevel@tonic-gate */ 43267c478bd9Sstevel@tonic-gate if (eoff == oldeoff) { 43277c478bd9Sstevel@tonic-gate newseg->shm_policy.mem_policy = policy; 43287c478bd9Sstevel@tonic-gate newseg->shm_policy.mem_reserved = 0; 43297c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_concat(tree, 43307c478bd9Sstevel@tonic-gate newseg, AVL_NEXT(tree, newseg)); 43317c478bd9Sstevel@tonic-gate break; 43327c478bd9Sstevel@tonic-gate } 43337c478bd9Sstevel@tonic-gate } 43347c478bd9Sstevel@tonic-gate 43357c478bd9Sstevel@tonic-gate /* 43367c478bd9Sstevel@tonic-gate * New segment ends before old one, so split off end of 43377c478bd9Sstevel@tonic-gate * old one 43387c478bd9Sstevel@tonic-gate */ 43397c478bd9Sstevel@tonic-gate if (eoff < oldeoff) { 43407c478bd9Sstevel@tonic-gate if (newseg) { 43417c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_split(tree, 43427c478bd9Sstevel@tonic-gate newseg, eoff); 43437c478bd9Sstevel@tonic-gate newseg->shm_policy.mem_policy = policy; 43447c478bd9Sstevel@tonic-gate newseg->shm_policy.mem_reserved = 0; 43457c478bd9Sstevel@tonic-gate } else { 43467c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_split(tree, seg, 43477c478bd9Sstevel@tonic-gate eoff); 43487c478bd9Sstevel@tonic-gate seg->shm_policy.mem_policy = policy; 43497c478bd9Sstevel@tonic-gate seg->shm_policy.mem_reserved = 0; 43507c478bd9Sstevel@tonic-gate } 43517c478bd9Sstevel@tonic-gate 43527c478bd9Sstevel@tonic-gate if (off == seg->shm_off) 43537c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_concat(tree, 43547c478bd9Sstevel@tonic-gate AVL_PREV(tree, seg), seg); 43557c478bd9Sstevel@tonic-gate break; 43567c478bd9Sstevel@tonic-gate } 43577c478bd9Sstevel@tonic-gate 43587c478bd9Sstevel@tonic-gate /* 43597c478bd9Sstevel@tonic-gate * Calculate remaining length and next offset 43607c478bd9Sstevel@tonic-gate */ 43617c478bd9Sstevel@tonic-gate len = eoff - oldeoff; 43627c478bd9Sstevel@tonic-gate off = oldeoff; 43637c478bd9Sstevel@tonic-gate } 43647c478bd9Sstevel@tonic-gate } 43657c478bd9Sstevel@tonic-gate 43667c478bd9Sstevel@tonic-gate rw_exit(&shm_locality->loc_lock); 43677c478bd9Sstevel@tonic-gate return (retval); 43687c478bd9Sstevel@tonic-gate } 43697c478bd9Sstevel@tonic-gate 43707c478bd9Sstevel@tonic-gate /* 43717c478bd9Sstevel@tonic-gate * Return the best memnode from which to allocate memory given 43727c478bd9Sstevel@tonic-gate * an lgroup. 43737c478bd9Sstevel@tonic-gate * 43747c478bd9Sstevel@tonic-gate * "c" is for cookie, which is good enough for me. 43757c478bd9Sstevel@tonic-gate * It references a cookie struct that should be zero'ed to initialize. 43767c478bd9Sstevel@tonic-gate * The cookie should live on the caller's stack. 43777c478bd9Sstevel@tonic-gate * 43787c478bd9Sstevel@tonic-gate * The routine returns -1 when: 43797c478bd9Sstevel@tonic-gate * - traverse is 0, and all the memnodes in "lgrp" have been returned. 43807c478bd9Sstevel@tonic-gate * - traverse is 1, and all the memnodes in the system have been 43817c478bd9Sstevel@tonic-gate * returned. 43827c478bd9Sstevel@tonic-gate */ 43837c478bd9Sstevel@tonic-gate int 43847c478bd9Sstevel@tonic-gate lgrp_memnode_choose(lgrp_mnode_cookie_t *c) 43857c478bd9Sstevel@tonic-gate { 43867c478bd9Sstevel@tonic-gate lgrp_t *lp = c->lmc_lgrp; 43877c478bd9Sstevel@tonic-gate mnodeset_t nodes = c->lmc_nodes; 43887c478bd9Sstevel@tonic-gate int cnt = c->lmc_cnt; 43897c478bd9Sstevel@tonic-gate int offset, mnode; 43907c478bd9Sstevel@tonic-gate 43917c478bd9Sstevel@tonic-gate extern int max_mem_nodes; 43927c478bd9Sstevel@tonic-gate 43937c478bd9Sstevel@tonic-gate /* 43947c478bd9Sstevel@tonic-gate * If the set is empty, and the caller is willing, traverse 43957c478bd9Sstevel@tonic-gate * up the hierarchy until we find a non-empty set. 43967c478bd9Sstevel@tonic-gate */ 43977c478bd9Sstevel@tonic-gate while (nodes == (mnodeset_t)0 || cnt <= 0) { 43987c478bd9Sstevel@tonic-gate if (c->lmc_scope == LGRP_SRCH_LOCAL || 43997c478bd9Sstevel@tonic-gate ((lp = lp->lgrp_parent) == NULL)) 44007c478bd9Sstevel@tonic-gate return (-1); 44017c478bd9Sstevel@tonic-gate 44027c478bd9Sstevel@tonic-gate nodes = lp->lgrp_mnodes & ~(c->lmc_tried); 44037c478bd9Sstevel@tonic-gate cnt = lp->lgrp_nmnodes - c->lmc_ntried; 44047c478bd9Sstevel@tonic-gate } 44057c478bd9Sstevel@tonic-gate 44067c478bd9Sstevel@tonic-gate /* 44077c478bd9Sstevel@tonic-gate * Select a memnode by picking one at a "random" offset. 44087c478bd9Sstevel@tonic-gate * Because of DR, memnodes can come and go at any time. 44097c478bd9Sstevel@tonic-gate * This code must be able to cope with the possibility 44107c478bd9Sstevel@tonic-gate * that the nodes count "cnt" is inconsistent with respect 44117c478bd9Sstevel@tonic-gate * to the number of elements actually in "nodes", and 44127c478bd9Sstevel@tonic-gate * therefore that the offset chosen could be greater than 44137c478bd9Sstevel@tonic-gate * the number of elements in the set (some memnodes may 44147c478bd9Sstevel@tonic-gate * have dissapeared just before cnt was read). 44157c478bd9Sstevel@tonic-gate * If this happens, the search simply wraps back to the 44167c478bd9Sstevel@tonic-gate * beginning of the set. 44177c478bd9Sstevel@tonic-gate */ 44187c478bd9Sstevel@tonic-gate ASSERT(nodes != (mnodeset_t)0 && cnt > 0); 44197c478bd9Sstevel@tonic-gate offset = c->lmc_rand % cnt; 44207c478bd9Sstevel@tonic-gate do { 44217c478bd9Sstevel@tonic-gate for (mnode = 0; mnode < max_mem_nodes; mnode++) 44227c478bd9Sstevel@tonic-gate if (nodes & ((mnodeset_t)1 << mnode)) 44237c478bd9Sstevel@tonic-gate if (!offset--) 44247c478bd9Sstevel@tonic-gate break; 44257c478bd9Sstevel@tonic-gate } while (mnode >= max_mem_nodes); 44267c478bd9Sstevel@tonic-gate 44277c478bd9Sstevel@tonic-gate /* Found a node. Store state before returning. */ 44287c478bd9Sstevel@tonic-gate c->lmc_lgrp = lp; 44297c478bd9Sstevel@tonic-gate c->lmc_nodes = (nodes & ~((mnodeset_t)1 << mnode)); 44307c478bd9Sstevel@tonic-gate c->lmc_cnt = cnt - 1; 44317c478bd9Sstevel@tonic-gate c->lmc_tried = (c->lmc_tried | ((mnodeset_t)1 << mnode)); 44327c478bd9Sstevel@tonic-gate c->lmc_ntried++; 44337c478bd9Sstevel@tonic-gate 44347c478bd9Sstevel@tonic-gate return (mnode); 44357c478bd9Sstevel@tonic-gate } 4436